summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--contrib/llvm/include/llvm-c/Analysis.h2
-rw-r--r--contrib/llvm/include/llvm-c/BitReader.h45
-rw-r--r--contrib/llvm/include/llvm-c/BitWriter.h2
-rw-r--r--contrib/llvm/include/llvm-c/Core.h135
-rw-r--r--contrib/llvm/include/llvm-c/ErrorHandling.h51
-rw-r--r--contrib/llvm/include/llvm-c/ExecutionEngine.h26
-rw-r--r--contrib/llvm/include/llvm-c/IRReader.h2
-rw-r--r--contrib/llvm/include/llvm-c/Initialization.h2
-rw-r--r--contrib/llvm/include/llvm-c/Linker.h24
-rw-r--r--contrib/llvm/include/llvm-c/Object.h2
-rw-r--r--contrib/llvm/include/llvm-c/OrcBindings.h134
-rw-r--r--contrib/llvm/include/llvm-c/Support.h20
-rw-r--r--contrib/llvm/include/llvm-c/Target.h2
-rw-r--r--contrib/llvm/include/llvm-c/TargetMachine.h4
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/IPO.h2
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h2
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/Scalar.h2
-rw-r--r--contrib/llvm/include/llvm-c/Transforms/Vectorize.h3
-rw-r--r--contrib/llvm/include/llvm-c/Types.h124
-rw-r--r--contrib/llvm/include/llvm-c/lto.h4
-rw-r--r--contrib/llvm/include/llvm/ADT/APFloat.h8
-rw-r--r--contrib/llvm/include/llvm/ADT/APInt.h7
-rw-r--r--contrib/llvm/include/llvm/ADT/APSInt.h6
-rw-r--r--contrib/llvm/include/llvm/ADT/ArrayRef.h21
-rw-r--r--contrib/llvm/include/llvm/ADT/BitVector.h10
-rw-r--r--contrib/llvm/include/llvm/ADT/DeltaAlgorithm.h2
-rw-r--r--contrib/llvm/include/llvm/ADT/DenseMap.h26
-rw-r--r--contrib/llvm/include/llvm/ADT/DenseMapInfo.h28
-rw-r--r--contrib/llvm/include/llvm/ADT/DenseSet.h5
-rw-r--r--contrib/llvm/include/llvm/ADT/DepthFirstIterator.h22
-rw-r--r--contrib/llvm/include/llvm/ADT/FoldingSet.h37
-rw-r--r--contrib/llvm/include/llvm/ADT/ImmutableList.h12
-rw-r--r--contrib/llvm/include/llvm/ADT/ImmutableMap.h101
-rw-r--r--contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h4
-rw-r--r--contrib/llvm/include/llvm/ADT/Optional.h19
-rw-r--r--contrib/llvm/include/llvm/ADT/PackedVector.h28
-rw-r--r--contrib/llvm/include/llvm/ADT/PointerIntPair.h178
-rw-r--r--contrib/llvm/include/llvm/ADT/PointerUnion.h848
-rw-r--r--contrib/llvm/include/llvm/ADT/PostOrderIterator.h8
-rw-r--r--contrib/llvm/include/llvm/ADT/STLExtras.h52
-rw-r--r--contrib/llvm/include/llvm/ADT/ScopedHashTable.h40
-rw-r--r--contrib/llvm/include/llvm/ADT/SetOperations.h2
-rw-r--r--contrib/llvm/include/llvm/ADT/SetVector.h30
-rw-r--r--contrib/llvm/include/llvm/ADT/SmallBitVector.h23
-rw-r--r--contrib/llvm/include/llvm/ADT/SmallPtrSet.h13
-rw-r--r--contrib/llvm/include/llvm/ADT/SmallSet.h4
-rw-r--r--contrib/llvm/include/llvm/ADT/SmallVector.h7
-rw-r--r--contrib/llvm/include/llvm/ADT/SparseBitVector.h48
-rw-r--r--contrib/llvm/include/llvm/ADT/Statistic.h5
-rw-r--r--contrib/llvm/include/llvm/ADT/StringMap.h22
-rw-r--r--contrib/llvm/include/llvm/ADT/StringRef.h37
-rw-r--r--contrib/llvm/include/llvm/ADT/StringSet.h5
-rw-r--r--contrib/llvm/include/llvm/ADT/StringSwitch.h50
-rw-r--r--contrib/llvm/include/llvm/ADT/TinyPtrVector.h3
-rw-r--r--contrib/llvm/include/llvm/ADT/Triple.h97
-rw-r--r--contrib/llvm/include/llvm/ADT/UniqueVector.h1
-rw-r--r--contrib/llvm/include/llvm/ADT/ilist.h152
-rw-r--r--contrib/llvm/include/llvm/ADT/ilist_node.h89
-rw-r--r--contrib/llvm/include/llvm/ADT/iterator_range.h12
-rw-r--r--contrib/llvm/include/llvm/Analysis/AliasAnalysis.h1057
-rw-r--r--contrib/llvm/include/llvm/Analysis/AliasSetTracker.h15
-rw-r--r--contrib/llvm/include/llvm/Analysis/AssumptionCache.h6
-rw-r--r--contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h223
-rw-r--r--contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h42
-rw-r--r--contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h88
-rw-r--r--contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h91
-rw-r--r--contrib/llvm/include/llvm/Analysis/CFG.h2
-rw-r--r--contrib/llvm/include/llvm/Analysis/CFLAliasAnalysis.h158
-rw-r--r--contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h2
-rw-r--r--contrib/llvm/include/llvm/Analysis/CallGraph.h33
-rw-r--r--contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h17
-rw-r--r--contrib/llvm/include/llvm/Analysis/CaptureTracking.h7
-rw-r--r--contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h34
-rw-r--r--contrib/llvm/include/llvm/Analysis/DemandedBits.h75
-rw-r--r--contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h61
-rw-r--r--contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h48
-rw-r--r--contrib/llvm/include/llvm/Analysis/EHPersonalities.h94
-rw-r--r--contrib/llvm/include/llvm/Analysis/GlobalsModRef.h160
-rw-r--r--contrib/llvm/include/llvm/Analysis/IVUsers.h2
-rw-r--r--contrib/llvm/include/llvm/Analysis/InlineCost.h65
-rw-r--r--contrib/llvm/include/llvm/Analysis/InstructionSimplify.h2
-rw-r--r--contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h6
-rw-r--r--contrib/llvm/include/llvm/Analysis/LazyCallGraph.h113
-rw-r--r--contrib/llvm/include/llvm/Analysis/LazyValueInfo.h17
-rw-r--r--contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h71
-rw-r--r--contrib/llvm/include/llvm/Analysis/LibCallSemantics.h225
-rw-r--r--contrib/llvm/include/llvm/Analysis/Loads.h13
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h224
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopInfo.h111
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h8
-rw-r--r--contrib/llvm/include/llvm/Analysis/LoopPass.h19
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h5
-rw-r--r--contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h39
-rw-r--r--contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h102
-rw-r--r--contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h287
-rw-r--r--contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h (renamed from contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h)8
-rw-r--r--contrib/llvm/include/llvm/Analysis/OrderedBasicBlock.h66
-rw-r--r--contrib/llvm/include/llvm/Analysis/PHITransAddr.h23
-rw-r--r--contrib/llvm/include/llvm/Analysis/Passes.h74
-rw-r--r--contrib/llvm/include/llvm/Analysis/RegionInfo.h57
-rw-r--r--contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h68
-rw-r--r--contrib/llvm/include/llvm/Analysis/RegionPrinter.h45
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScalarEvolution.h1005
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h79
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h36
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h149
-rw-r--r--contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h92
-rw-r--r--contrib/llvm/include/llvm/Analysis/SparsePropagation.h95
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def90
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h8
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h362
-rw-r--r--contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h93
-rw-r--r--contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h93
-rw-r--r--contrib/llvm/include/llvm/Analysis/ValueTracking.h180
-rw-r--r--contrib/llvm/include/llvm/Analysis/VectorUtils.h52
-rw-r--r--contrib/llvm/include/llvm/AsmParser/Parser.h12
-rw-r--r--contrib/llvm/include/llvm/AsmParser/SlotMapping.h12
-rw-r--r--contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h16
-rw-r--r--contrib/llvm/include/llvm/Bitcode/BitstreamReader.h2
-rw-r--r--contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h125
-rw-r--r--contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h122
-rw-r--r--contrib/llvm/include/llvm/Bitcode/ReaderWriter.h64
-rw-r--r--contrib/llvm/include/llvm/CodeGen/Analysis.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/AsmPrinter.h33
-rw-r--r--contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h57
-rw-r--r--contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h77
-rw-r--r--contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h7
-rw-r--r--contrib/llvm/include/llvm/CodeGen/CallingConvLower.h19
-rw-r--r--contrib/llvm/include/llvm/CodeGen/CommandFlags.h36
-rw-r--r--contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h70
-rw-r--r--contrib/llvm/include/llvm/CodeGen/DIE.h126
-rw-r--r--contrib/llvm/include/llvm/CodeGen/FastISel.h28
-rw-r--r--contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h25
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GCMetadata.h4
-rw-r--r--contrib/llvm/include/llvm/CodeGen/GCStrategy.h4
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h49
-rw-r--r--contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h69
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveInterval.h30
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h28
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h2
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveRegMatrix.h2
-rw-r--r--contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h138
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h202
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h468
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h31
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineCombinerPattern.h31
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h47
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineDominators.h32
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h46
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineFunction.h61
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstr.h92
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h85
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h61
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h12
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h42
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h138
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h118
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineScheduler.h30
-rw-r--r--contrib/llvm/include/llvm/CodeGen/MachineValueType.h250
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ParallelCG.h43
-rw-r--r--contrib/llvm/include/llvm/CodeGen/Passes.h14
-rw-r--r--contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h235
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h2
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h7
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RegisterPressure.h89
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h19
-rw-r--r--contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h8
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h53
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h56
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SelectionDAG.h39
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h210
-rw-r--r--contrib/llvm/include/llvm/CodeGen/SlotIndexes.h91
-rw-r--r--contrib/llvm/include/llvm/CodeGen/StackMaps.h1
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h12
-rw-r--r--contrib/llvm/include/llvm/CodeGen/TargetSchedule.h6
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ValueTypes.h20
-rw-r--r--contrib/llvm/include/llvm/CodeGen/ValueTypes.td113
-rw-r--r--contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h169
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h367
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h39
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h78
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/FunctionId.h56
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/Line.h124
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h43
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h68
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h35
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h176
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h270
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h57
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h37
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h60
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DIContext.h33
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h9
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h21
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h3
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h59
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h3
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h10
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h49
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h81
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h3
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h31
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h47
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h53
-rw-r--r--contrib/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h105
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h17
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Interpreter.h12
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h373
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h1
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h108
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h2
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h312
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h8
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h40
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h57
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h8
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h96
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h31
-rw-r--r--contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h25
-rw-r--r--contrib/llvm/include/llvm/IR/Argument.h9
-rw-r--r--contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h3
-rw-r--r--contrib/llvm/include/llvm/IR/Attributes.h89
-rw-r--r--contrib/llvm/include/llvm/IR/Attributes.td192
-rw-r--r--contrib/llvm/include/llvm/IR/BasicBlock.h44
-rw-r--r--contrib/llvm/include/llvm/IR/CFG.h150
-rw-r--r--contrib/llvm/include/llvm/IR/CallSite.h197
-rw-r--r--contrib/llvm/include/llvm/IR/CallingConv.h27
-rw-r--r--contrib/llvm/include/llvm/IR/Comdat.h2
-rw-r--r--contrib/llvm/include/llvm/IR/Constant.h51
-rw-r--r--contrib/llvm/include/llvm/IR/ConstantRange.h15
-rw-r--r--contrib/llvm/include/llvm/IR/Constants.h30
-rw-r--r--contrib/llvm/include/llvm/IR/DIBuilder.h107
-rw-r--r--contrib/llvm/include/llvm/IR/DataLayout.h7
-rw-r--r--contrib/llvm/include/llvm/IR/DebugInfo.h15
-rw-r--r--contrib/llvm/include/llvm/IR/DebugInfoFlags.def1
-rw-r--r--contrib/llvm/include/llvm/IR/DebugInfoMetadata.h564
-rw-r--r--contrib/llvm/include/llvm/IR/DerivedTypes.h100
-rw-r--r--contrib/llvm/include/llvm/IR/DiagnosticInfo.h133
-rw-r--r--contrib/llvm/include/llvm/IR/DiagnosticPrinter.h2
-rw-r--r--contrib/llvm/include/llvm/IR/Dominators.h49
-rw-r--r--contrib/llvm/include/llvm/IR/Function.h156
-rw-r--r--contrib/llvm/include/llvm/IR/FunctionInfo.h241
-rw-r--r--contrib/llvm/include/llvm/IR/GVMaterializer.h23
-rw-r--r--contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h2
-rw-r--r--contrib/llvm/include/llvm/IR/GlobalAlias.h25
-rw-r--r--contrib/llvm/include/llvm/IR/GlobalObject.h8
-rw-r--r--contrib/llvm/include/llvm/IR/GlobalValue.h26
-rw-r--r--contrib/llvm/include/llvm/IR/GlobalVariable.h24
-rw-r--r--contrib/llvm/include/llvm/IR/IRBuilder.h159
-rw-r--r--contrib/llvm/include/llvm/IR/IRPrintingPasses.h6
-rw-r--r--contrib/llvm/include/llvm/IR/InlineAsm.h65
-rw-r--r--contrib/llvm/include/llvm/IR/InstIterator.h29
-rw-r--r--contrib/llvm/include/llvm/IR/InstVisitor.h6
-rw-r--r--contrib/llvm/include/llvm/IR/InstrTypes.h840
-rw-r--r--contrib/llvm/include/llvm/IR/Instruction.def173
-rw-r--r--contrib/llvm/include/llvm/IR/Instruction.h88
-rw-r--r--contrib/llvm/include/llvm/IR/Instructions.h940
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicInst.h35
-rw-r--r--contrib/llvm/include/llvm/IR/Intrinsics.h2
-rw-r--r--contrib/llvm/include/llvm/IR/Intrinsics.td74
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td3
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td72
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsARM.td44
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td4411
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td18
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td6
-rw-r--r--contrib/llvm/include/llvm/IR/IntrinsicsX86.td2508
-rw-r--r--contrib/llvm/include/llvm/IR/LLVMContext.h25
-rw-r--r--contrib/llvm/include/llvm/IR/LegacyPassManagers.h75
-rw-r--r--contrib/llvm/include/llvm/IR/MDBuilder.h3
-rw-r--r--contrib/llvm/include/llvm/IR/Mangler.h2
-rw-r--r--contrib/llvm/include/llvm/IR/Metadata.def72
-rw-r--r--contrib/llvm/include/llvm/IR/Metadata.h115
-rw-r--r--contrib/llvm/include/llvm/IR/MetadataTracking.h99
-rw-r--r--contrib/llvm/include/llvm/IR/Module.h117
-rw-r--r--contrib/llvm/include/llvm/IR/ModuleSlotTracker.h8
-rw-r--r--contrib/llvm/include/llvm/IR/PassManager.h3
-rw-r--r--contrib/llvm/include/llvm/IR/PatternMatch.h40
-rw-r--r--contrib/llvm/include/llvm/IR/Statepoint.h29
-rw-r--r--contrib/llvm/include/llvm/IR/SymbolTableListTraits.h64
-rw-r--r--contrib/llvm/include/llvm/IR/TrackingMDRef.h6
-rw-r--r--contrib/llvm/include/llvm/IR/Type.h117
-rw-r--r--contrib/llvm/include/llvm/IR/TypeFinder.h2
-rw-r--r--contrib/llvm/include/llvm/IR/Use.h1
-rw-r--r--contrib/llvm/include/llvm/IR/UseListOrder.h4
-rw-r--r--contrib/llvm/include/llvm/IR/User.h37
-rw-r--r--contrib/llvm/include/llvm/IR/Value.def3
-rw-r--r--contrib/llvm/include/llvm/IR/Value.h142
-rw-r--r--contrib/llvm/include/llvm/IR/ValueHandle.h33
-rw-r--r--contrib/llvm/include/llvm/IR/ValueMap.h4
-rw-r--r--contrib/llvm/include/llvm/IR/ValueSymbolTable.h44
-rw-r--r--contrib/llvm/include/llvm/IRReader/IRReader.h9
-rw-r--r--contrib/llvm/include/llvm/InitializePasses.h47
-rw-r--r--contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h153
-rw-r--r--contrib/llvm/include/llvm/LTO/LTOModule.h51
-rw-r--r--contrib/llvm/include/llvm/LibDriver/LibDriver.h2
-rw-r--r--contrib/llvm/include/llvm/LinkAllPasses.h29
-rw-r--r--contrib/llvm/include/llvm/Linker/IRMover.h76
-rw-r--r--contrib/llvm/include/llvm/Linker/Linker.h102
-rw-r--r--contrib/llvm/include/llvm/MC/ConstantPools.h10
-rw-r--r--contrib/llvm/include/llvm/MC/MCAsmBackend.h5
-rw-r--r--contrib/llvm/include/llvm/MC/MCAsmInfo.h9
-rw-r--r--contrib/llvm/include/llvm/MC/MCAssembler.h512
-rw-r--r--contrib/llvm/include/llvm/MC/MCContext.h36
-rw-r--r--contrib/llvm/include/llvm/MC/MCDirectives.h4
-rw-r--r--contrib/llvm/include/llvm/MC/MCDwarf.h44
-rw-r--r--contrib/llvm/include/llvm/MC/MCELFObjectWriter.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCELFStreamer.h13
-rw-r--r--contrib/llvm/include/llvm/MC/MCExpr.h6
-rw-r--r--contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCFragment.h506
-rw-r--r--contrib/llvm/include/llvm/MC/MCInstrDesc.h32
-rw-r--r--contrib/llvm/include/llvm/MC/MCInstrItineraries.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCLinkerOptimizationHint.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MCMachObjectWriter.h22
-rw-r--r--contrib/llvm/include/llvm/MC/MCObjectFileInfo.h36
-rw-r--r--contrib/llvm/include/llvm/MC/MCObjectStreamer.h7
-rw-r--r--contrib/llvm/include/llvm/MC/MCObjectWriter.h38
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h3
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h40
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h3
-rw-r--r--contrib/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h10
-rw-r--r--contrib/llvm/include/llvm/MC/MCRegisterInfo.h8
-rw-r--r--contrib/llvm/include/llvm/MC/MCSchedule.h5
-rw-r--r--contrib/llvm/include/llvm/MC/MCSection.h16
-rw-r--r--contrib/llvm/include/llvm/MC/MCSectionCOFF.h109
-rw-r--r--contrib/llvm/include/llvm/MC/MCSectionELF.h27
-rw-r--r--contrib/llvm/include/llvm/MC/MCSectionMachO.h27
-rw-r--r--contrib/llvm/include/llvm/MC/MCStreamer.h31
-rw-r--r--contrib/llvm/include/llvm/MC/MCSubtargetInfo.h12
-rw-r--r--contrib/llvm/include/llvm/MC/MCSymbol.h89
-rw-r--r--contrib/llvm/include/llvm/MC/MCTargetAsmParser.h28
-rw-r--r--contrib/llvm/include/llvm/MC/MCTargetOptions.h4
-rw-r--r--contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h15
-rw-r--r--contrib/llvm/include/llvm/MC/MCValue.h5
-rw-r--r--contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h2
-rw-r--r--contrib/llvm/include/llvm/MC/MachineLocation.h4
-rw-r--r--contrib/llvm/include/llvm/MC/SectionKind.h59
-rw-r--r--contrib/llvm/include/llvm/MC/StringTableBuilder.h40
-rw-r--r--contrib/llvm/include/llvm/MC/SubtargetFeature.h2
-rw-r--r--contrib/llvm/include/llvm/Object/Archive.h67
-rw-r--r--contrib/llvm/include/llvm/Object/ArchiveWriter.h13
-rw-r--r--contrib/llvm/include/llvm/Object/Binary.h10
-rw-r--r--contrib/llvm/include/llvm/Object/COFF.h4
-rw-r--r--contrib/llvm/include/llvm/Object/COFFImportFile.h74
-rw-r--r--contrib/llvm/include/llvm/Object/ELF.h871
-rw-r--r--contrib/llvm/include/llvm/Object/ELFObjectFile.h171
-rw-r--r--contrib/llvm/include/llvm/Object/ELFTypes.h37
-rw-r--r--contrib/llvm/include/llvm/Object/Error.h1
-rw-r--r--contrib/llvm/include/llvm/Object/FunctionIndexObjectFile.h110
-rw-r--r--contrib/llvm/include/llvm/Object/MachO.h26
-rw-r--r--contrib/llvm/include/llvm/Object/ObjectFile.h13
-rw-r--r--contrib/llvm/include/llvm/Object/SymbolicFile.h8
-rw-r--r--contrib/llvm/include/llvm/Option/Arg.h1
-rw-r--r--contrib/llvm/include/llvm/Option/ArgList.h6
-rw-r--r--contrib/llvm/include/llvm/Option/OptTable.h8
-rw-r--r--contrib/llvm/include/llvm/Option/Option.h1
-rw-r--r--contrib/llvm/include/llvm/PassAnalysisSupport.h33
-rw-r--r--contrib/llvm/include/llvm/PassInfo.h36
-rw-r--r--contrib/llvm/include/llvm/PassRegistry.h1
-rw-r--r--contrib/llvm/include/llvm/PassSupport.h2
-rw-r--r--contrib/llvm/include/llvm/ProfileData/CoverageMapping.h6
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProf.h552
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProfData.inc735
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProfReader.h175
-rw-r--r--contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h23
-rw-r--r--contrib/llvm/include/llvm/ProfileData/SampleProf.h301
-rw-r--r--contrib/llvm/include/llvm/ProfileData/SampleProfReader.h245
-rw-r--r--contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h114
-rw-r--r--contrib/llvm/include/llvm/Support/ARMTargetParser.def223
-rw-r--r--contrib/llvm/include/llvm/Support/AlignOf.h37
-rw-r--r--contrib/llvm/include/llvm/Support/Allocator.h15
-rw-r--r--contrib/llvm/include/llvm/Support/BlockFrequency.h26
-rw-r--r--contrib/llvm/include/llvm/Support/BranchProbability.h176
-rw-r--r--contrib/llvm/include/llvm/Support/CBindingWrapping.h1
-rw-r--r--contrib/llvm/include/llvm/Support/COFF.h2
-rw-r--r--contrib/llvm/include/llvm/Support/CommandLine.h32
-rw-r--r--contrib/llvm/include/llvm/Support/Compiler.h74
-rw-r--r--contrib/llvm/include/llvm/Support/CrashRecoveryContext.h35
-rw-r--r--contrib/llvm/include/llvm/Support/DOTGraphTraits.h11
-rw-r--r--contrib/llvm/include/llvm/Support/Debug.h2
-rw-r--r--contrib/llvm/include/llvm/Support/Dwarf.def10
-rw-r--r--contrib/llvm/include/llvm/Support/Dwarf.h54
-rw-r--r--contrib/llvm/include/llvm/Support/ELF.h69
-rw-r--r--contrib/llvm/include/llvm/Support/ELFRelocs/AVR.def40
-rw-r--r--contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC.def62
-rw-r--r--contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC64.def93
-rw-r--r--contrib/llvm/include/llvm/Support/Endian.h143
-rw-r--r--contrib/llvm/include/llvm/Support/ErrorHandling.h32
-rw-r--r--contrib/llvm/include/llvm/Support/ErrorOr.h11
-rw-r--r--contrib/llvm/include/llvm/Support/FileOutputBuffer.h6
-rw-r--r--contrib/llvm/include/llvm/Support/FileSystem.h51
-rw-r--r--contrib/llvm/include/llvm/Support/Format.h5
-rw-r--r--contrib/llvm/include/llvm/Support/GCOV.h19
-rw-r--r--contrib/llvm/include/llvm/Support/GenericDomTree.h14
-rw-r--r--contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h18
-rw-r--r--contrib/llvm/include/llvm/Support/GraphWriter.h10
-rw-r--r--contrib/llvm/include/llvm/Support/JamCRC.h48
-rw-r--r--contrib/llvm/include/llvm/Support/MachO.h13
-rw-r--r--contrib/llvm/include/llvm/Support/ManagedStatic.h2
-rw-r--r--contrib/llvm/include/llvm/Support/MathExtras.h96
-rw-r--r--contrib/llvm/include/llvm/Support/Memory.h33
-rw-r--r--contrib/llvm/include/llvm/Support/MemoryBuffer.h6
-rw-r--r--contrib/llvm/include/llvm/Support/OnDiskHashTable.h210
-rw-r--r--contrib/llvm/include/llvm/Support/Options.h4
-rw-r--r--contrib/llvm/include/llvm/Support/OutputBuffer.h166
-rw-r--r--contrib/llvm/include/llvm/Support/Path.h35
-rw-r--r--contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h67
-rw-r--r--contrib/llvm/include/llvm/Support/PrettyStackTrace.h12
-rw-r--r--contrib/llvm/include/llvm/Support/Printable.h52
-rw-r--r--contrib/llvm/include/llvm/Support/Program.h3
-rw-r--r--contrib/llvm/include/llvm/Support/Recycler.h80
-rw-r--r--contrib/llvm/include/llvm/Support/Registry.h14
-rw-r--r--contrib/llvm/include/llvm/Support/SMLoc.h6
-rw-r--r--contrib/llvm/include/llvm/Support/ScaledNumber.h4
-rw-r--r--contrib/llvm/include/llvm/Support/Signals.h3
-rw-r--r--contrib/llvm/include/llvm/Support/StreamingMemoryObject.h4
-rw-r--r--contrib/llvm/include/llvm/Support/StringSaver.h16
-rw-r--r--contrib/llvm/include/llvm/Support/TargetParser.h276
-rw-r--r--contrib/llvm/include/llvm/Support/TargetRegistry.h15
-rw-r--r--contrib/llvm/include/llvm/Support/TargetSelect.h27
-rw-r--r--contrib/llvm/include/llvm/Support/ThreadPool.h136
-rw-r--r--contrib/llvm/include/llvm/Support/Threading.h2
-rw-r--r--contrib/llvm/include/llvm/Support/Timer.h63
-rw-r--r--contrib/llvm/include/llvm/Support/TrailingObjects.h349
-rw-r--r--contrib/llvm/include/llvm/Support/UnicodeCharRanges.h5
-rw-r--r--contrib/llvm/include/llvm/Support/Valgrind.h39
-rw-r--r--contrib/llvm/include/llvm/Support/YAMLParser.h7
-rw-r--r--contrib/llvm/include/llvm/Support/YAMLTraits.h49
-rw-r--r--contrib/llvm/include/llvm/Support/circular_raw_ostream.h4
-rw-r--r--contrib/llvm/include/llvm/Support/raw_ostream.h60
-rw-r--r--contrib/llvm/include/llvm/Support/thread.h66
-rw-r--r--contrib/llvm/include/llvm/Support/type_traits.h9
-rw-r--r--contrib/llvm/include/llvm/TableGen/Record.h10
-rw-r--r--contrib/llvm/include/llvm/Target/CostTable.h60
-rw-r--r--contrib/llvm/include/llvm/Target/Target.td38
-rw-r--r--contrib/llvm/include/llvm/Target/TargetCallingConv.h5
-rw-r--r--contrib/llvm/include/llvm/Target/TargetFrameLowering.h39
-rw-r--r--contrib/llvm/include/llvm/Target/TargetInstrInfo.h225
-rw-r--r--contrib/llvm/include/llvm/Target/TargetItinerary.td16
-rw-r--r--contrib/llvm/include/llvm/Target/TargetLowering.h304
-rw-r--r--contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h16
-rw-r--r--contrib/llvm/include/llvm/Target/TargetMachine.h42
-rw-r--r--contrib/llvm/include/llvm/Target/TargetOpcodes.h6
-rw-r--r--contrib/llvm/include/llvm/Target/TargetOptions.h64
-rw-r--r--contrib/llvm/include/llvm/Target/TargetRecip.h14
-rw-r--r--contrib/llvm/include/llvm/Target/TargetRegisterInfo.h441
-rw-r--r--contrib/llvm/include/llvm/Target/TargetSelectionDAG.td61
-rw-r--r--contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h45
-rw-r--r--contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h6
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO.h15
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h35
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h43
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h38
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h13
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/LowerBitSets.h7
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h10
-rw-r--r--contrib/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h34
-rw-r--r--contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h12
-rw-r--r--contrib/llvm/include/llvm/Transforms/Instrumentation.h42
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar.h18
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/ADCE.h38
-rw-r--r--contrib/llvm/include/llvm/Transforms/Scalar/SROA.h129
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h40
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/Cloning.h32
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/Local.h49
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h161
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/LoopVersioning.h56
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h4
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h2
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h11
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h3
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/SplitModule.h43
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h9
-rw-r--r--contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h48
-rw-r--r--contrib/llvm/include/llvm/module.modulemap12
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp615
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp173
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp54
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp136
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp44
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp32
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp1094
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp93
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp397
-rw-r--r--contrib/llvm/lib/Analysis/CFG.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp253
-rw-r--r--contrib/llvm/lib/Analysis/CallGraph.cpp (renamed from contrib/llvm/lib/Analysis/IPA/CallGraph.cpp)42
-rw-r--r--contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp (renamed from contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp)0
-rw-r--r--contrib/llvm/lib/Analysis/CallPrinter.cpp (renamed from contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp)0
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp98
-rw-r--r--contrib/llvm/lib/Analysis/CodeMetrics.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp136
-rw-r--r--contrib/llvm/lib/Analysis/CostModel.cpp19
-rw-r--r--contrib/llvm/lib/Analysis/Delinearization.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/DemandedBits.cpp392
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp182
-rw-r--r--contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp115
-rw-r--r--contrib/llvm/lib/Analysis/EHPersonalities.cpp106
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp1002
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp609
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp30
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp (renamed from contrib/llvm/lib/Analysis/IPA/InlineCost.cpp)102
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp81
-rw-r--r--contrib/llvm/lib/Analysis/LazyCallGraph.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp345
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp141
-rw-r--r--contrib/llvm/lib/Analysis/LibCallSemantics.cpp89
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp303
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp24
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp551
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp69
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp139
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/MemDerefPrinter.cpp18
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp55
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp174
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp95
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp (renamed from contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp)130
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp28
-rw-r--r--contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp (renamed from contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp)8
-rw-r--r--contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp85
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp9
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp151
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp3265
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp178
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp361
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp202
-rw-r--r--contrib/llvm/lib/Analysis/SparsePropagation.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp42
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp234
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp654
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp1164
-rw-r--r--contrib/llvm/lib/Analysis/VectorUtils.cpp199
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.cpp21
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp732
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.h59
-rw-r--r--contrib/llvm/lib/AsmParser/LLToken.h16
-rw-r--r--contrib/llvm/lib/AsmParser/Parser.cpp12
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitReader.cpp105
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp1931
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp741
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp18
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp96
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h7
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h4
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp98
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp459
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h13
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp126
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp109
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h84
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp98
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h30
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h8
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp1230
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h31
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp348
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp284
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h1
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp39
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp1325
-rw-r--r--contrib/llvm/lib/CodeGen/CoreCLRGC.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp87
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp138
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp189
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp201
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp76
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp405
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp107
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h1
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp117
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp129
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp451
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h102
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp1595
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h54
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp432
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp767
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp505
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp360
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp75
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp202
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp88
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp89
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp232
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp326
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ParallelCG.cpp96
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp144
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp1017
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp149
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp140
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp53
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp256
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp373
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp313
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2644
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp142
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1283
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp272
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp304
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp124
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h110
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp275
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp530
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1048
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h109
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp133
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp137
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp267
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp175
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp115
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp310
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp148
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp80
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp185
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp302
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp166
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp3506
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp165
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/Line.cpp22
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp31
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp35
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp49
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp113
-rw-r--r--contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp217
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp93
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp103
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp18
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp44
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp168
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp1
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h2
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDB.cpp2
-rw-r--r--contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp21
-rw-r--r--contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp69
-rw-r--r--contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp254
-rw-r--r--contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h82
-rw-r--r--contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp456
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp69
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp46
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp76
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp10
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp7
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp23
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h17
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp25
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp97
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp43
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h282
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h44
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp275
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp165
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp15
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp25
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp460
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h13
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h69
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp80
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h8
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h201
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h32
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h23
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h34
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h48
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h69
-rw-r--r--contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp105
-rw-r--r--contrib/llvm/lib/IR/AsmWriter.cpp407
-rw-r--r--contrib/llvm/lib/IR/AttributeImpl.h37
-rw-r--r--contrib/llvm/lib/IR/Attributes.cpp213
-rw-r--r--contrib/llvm/lib/IR/AttributesCompatFunc.td1
-rw-r--r--contrib/llvm/lib/IR/AutoUpgrade.cpp103
-rw-r--r--contrib/llvm/lib/IR/BasicBlock.cpp42
-rw-r--r--contrib/llvm/lib/IR/ConstantFold.cpp35
-rw-r--r--contrib/llvm/lib/IR/ConstantRange.cpp53
-rw-r--r--contrib/llvm/lib/IR/Constants.cpp326
-rw-r--r--contrib/llvm/lib/IR/ConstantsContext.h47
-rw-r--r--contrib/llvm/lib/IR/Core.cpp113
-rw-r--r--contrib/llvm/lib/IR/DIBuilder.cpp124
-rw-r--r--contrib/llvm/lib/IR/DataLayout.cpp13
-rw-r--r--contrib/llvm/lib/IR/DebugInfo.cpp55
-rw-r--r--contrib/llvm/lib/IR/DebugInfoMetadata.cpp90
-rw-r--r--contrib/llvm/lib/IR/DiagnosticInfo.cpp41
-rw-r--r--contrib/llvm/lib/IR/Dominators.cpp32
-rw-r--r--contrib/llvm/lib/IR/Function.cpp211
-rw-r--r--contrib/llvm/lib/IR/FunctionInfo.cpp67
-rw-r--r--contrib/llvm/lib/IR/GCOV.cpp4
-rw-r--r--contrib/llvm/lib/IR/Globals.cpp78
-rw-r--r--contrib/llvm/lib/IR/IRBuilder.cpp107
-rw-r--r--contrib/llvm/lib/IR/InlineAsm.cpp27
-rw-r--r--contrib/llvm/lib/IR/Instruction.cpp40
-rw-r--r--contrib/llvm/lib/IR/Instructions.cpp420
-rw-r--r--contrib/llvm/lib/IR/LLVMContext.cpp48
-rw-r--r--contrib/llvm/lib/IR/LLVMContextImpl.cpp24
-rw-r--r--contrib/llvm/lib/IR/LLVMContextImpl.h170
-rw-r--r--contrib/llvm/lib/IR/LegacyPassManager.cpp157
-rw-r--r--contrib/llvm/lib/IR/MDBuilder.cpp48
-rw-r--r--contrib/llvm/lib/IR/Metadata.cpp89
-rw-r--r--contrib/llvm/lib/IR/MetadataImpl.h13
-rw-r--r--contrib/llvm/lib/IR/MetadataTracking.cpp55
-rw-r--r--contrib/llvm/lib/IR/Module.cpp60
-rw-r--r--contrib/llvm/lib/IR/Statepoint.cpp5
-rw-r--r--contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h50
-rw-r--r--contrib/llvm/lib/IR/Type.cpp154
-rw-r--r--contrib/llvm/lib/IR/TypeFinder.cpp14
-rw-r--r--contrib/llvm/lib/IR/User.cpp64
-rw-r--r--contrib/llvm/lib/IR/Value.cpp13
-rw-r--r--contrib/llvm/lib/IR/ValueSymbolTable.cpp50
-rw-r--r--contrib/llvm/lib/IR/ValueTypes.cpp27
-rw-r--r--contrib/llvm/lib/IR/Verifier.cpp682
-rw-r--r--contrib/llvm/lib/IRReader/IRReader.cpp12
-rw-r--r--contrib/llvm/lib/LTO/LTOCodeGenerator.cpp329
-rw-r--r--contrib/llvm/lib/LTO/LTOModule.cpp155
-rw-r--r--contrib/llvm/lib/LibDriver/LibDriver.cpp11
-rw-r--r--contrib/llvm/lib/LibDriver/Options.td2
-rw-r--r--contrib/llvm/lib/Linker/IRMover.cpp1657
-rw-r--r--contrib/llvm/lib/Linker/LinkDiagnosticInfo.h25
-rw-r--r--contrib/llvm/lib/Linker/LinkModules.cpp1894
-rw-r--r--contrib/llvm/lib/MC/ConstantPools.cpp10
-rw-r--r--contrib/llvm/lib/MC/ELFObjectWriter.cpp144
-rw-r--r--contrib/llvm/lib/MC/MCAsmBackend.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfo.cpp6
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp5
-rw-r--r--contrib/llvm/lib/MC/MCAsmStreamer.cpp95
-rw-r--r--contrib/llvm/lib/MC/MCAssembler.cpp527
-rw-r--r--contrib/llvm/lib/MC/MCContext.cpp56
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp399
-rw-r--r--contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp16
-rw-r--r--contrib/llvm/lib/MC/MCELFStreamer.cpp60
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp74
-rw-r--r--contrib/llvm/lib/MC/MCFragment.cpp458
-rw-r--r--contrib/llvm/lib/MC/MCInst.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCInstrDesc.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCMachOStreamer.cpp47
-rw-r--r--contrib/llvm/lib/MC/MCObjectFileInfo.cpp230
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp96
-rw-r--r--contrib/llvm/lib/MC/MCObjectWriter.cpp10
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmLexer.cpp16
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp270
-rw-r--r--contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp11
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp78
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp17
-rw-r--r--contrib/llvm/lib/MC/MCSection.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCSectionCOFF.cpp1
-rw-r--r--contrib/llvm/lib/MC/MCSectionELF.cpp9
-rw-r--r--contrib/llvm/lib/MC/MCSectionMachO.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCStreamer.cpp31
-rw-r--r--contrib/llvm/lib/MC/MCSubtargetInfo.cpp17
-rw-r--r--contrib/llvm/lib/MC/MCSymbol.cpp7
-rw-r--r--contrib/llvm/lib/MC/MCTargetOptions.cpp7
-rw-r--r--contrib/llvm/lib/MC/MCWinEH.cpp8
-rw-r--r--contrib/llvm/lib/MC/MachObjectWriter.cpp135
-rw-r--r--contrib/llvm/lib/MC/StringTableBuilder.cpp116
-rw-r--r--contrib/llvm/lib/MC/SubtargetFeature.cpp2
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp193
-rw-r--r--contrib/llvm/lib/MC/WinCOFFStreamer.cpp41
-rw-r--r--contrib/llvm/lib/Object/Archive.cpp208
-rw-r--r--contrib/llvm/lib/Object/ArchiveWriter.cpp117
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp56
-rw-r--r--contrib/llvm/lib/Object/COFFYAML.cpp2
-rw-r--r--contrib/llvm/lib/Object/ELF.cpp1
-rw-r--r--contrib/llvm/lib/Object/ELFYAML.cpp33
-rw-r--r--contrib/llvm/lib/Object/Error.cpp2
-rw-r--r--contrib/llvm/lib/Object/FunctionIndexObjectFile.cpp143
-rw-r--r--contrib/llvm/lib/Object/IRObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Object/MachOObjectFile.cpp175
-rw-r--r--contrib/llvm/lib/Object/MachOUniversal.cpp16
-rw-r--r--contrib/llvm/lib/Object/Object.cpp4
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp6
-rw-r--r--contrib/llvm/lib/Object/SymbolicFile.cpp5
-rw-r--r--contrib/llvm/lib/Option/Arg.cpp21
-rw-r--r--contrib/llvm/lib/Option/ArgList.cpp25
-rw-r--r--contrib/llvm/lib/Option/OptTable.cpp12
-rw-r--r--contrib/llvm/lib/Option/Option.cpp31
-rw-r--r--contrib/llvm/lib/Passes/PassBuilder.cpp6
-rw-r--r--contrib/llvm/lib/Passes/PassRegistry.def7
-rw-r--r--contrib/llvm/lib/ProfileData/CoverageMapping.cpp18
-rw-r--r--contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp101
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProf.cpp434
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProfIndexed.h56
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProfReader.cpp452
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProfWriter.cpp175
-rw-r--r--contrib/llvm/lib/ProfileData/SampleProf.cpp106
-rw-r--r--contrib/llvm/lib/ProfileData/SampleProfReader.cpp742
-rw-r--r--contrib/llvm/lib/ProfileData/SampleProfWriter.cpp184
-rw-r--r--contrib/llvm/lib/Support/APFloat.cpp46
-rw-r--r--contrib/llvm/lib/Support/BlockFrequency.cpp36
-rw-r--r--contrib/llvm/lib/Support/BranchProbability.cpp49
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp60
-rw-r--r--contrib/llvm/lib/Support/CrashRecoveryContext.cpp26
-rw-r--r--contrib/llvm/lib/Support/Dwarf.cpp37
-rw-r--r--contrib/llvm/lib/Support/ErrorHandling.cpp2
-rw-r--r--contrib/llvm/lib/Support/FileOutputBuffer.cpp18
-rw-r--r--contrib/llvm/lib/Support/FoldingSet.cpp20
-rw-r--r--contrib/llvm/lib/Support/GraphWriter.cpp74
-rw-r--r--contrib/llvm/lib/Support/Host.cpp43
-rw-r--r--contrib/llvm/lib/Support/JamCRC.cpp96
-rw-r--r--contrib/llvm/lib/Support/Locale.cpp1
-rw-r--r--contrib/llvm/lib/Support/ManagedStatic.cpp1
-rw-r--r--contrib/llvm/lib/Support/MemoryBuffer.cpp5
-rw-r--r--contrib/llvm/lib/Support/Path.cpp110
-rw-r--r--contrib/llvm/lib/Support/PrettyStackTrace.cpp16
-rw-r--r--contrib/llvm/lib/Support/Signals.cpp140
-rw-r--r--contrib/llvm/lib/Support/Statistic.cpp18
-rw-r--r--contrib/llvm/lib/Support/StringRef.cpp103
-rw-r--r--contrib/llvm/lib/Support/StringSaver.cpp2
-rw-r--r--contrib/llvm/lib/Support/TargetParser.cpp504
-rw-r--r--contrib/llvm/lib/Support/ThreadPool.cpp155
-rw-r--r--contrib/llvm/lib/Support/TimeValue.cpp6
-rw-r--r--contrib/llvm/lib/Support/Timer.cpp74
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp209
-rw-r--r--contrib/llvm/lib/Support/Unix/Memory.inc15
-rw-r--r--contrib/llvm/lib/Support/Unix/Path.inc120
-rw-r--r--contrib/llvm/lib/Support/Unix/Process.inc13
-rw-r--r--contrib/llvm/lib/Support/Unix/Program.inc7
-rw-r--r--contrib/llvm/lib/Support/Unix/Signals.inc151
-rw-r--r--contrib/llvm/lib/Support/Unix/Unix.h13
-rw-r--r--contrib/llvm/lib/Support/Valgrind.cpp21
-rw-r--r--contrib/llvm/lib/Support/Windows/COM.inc2
-rw-r--r--contrib/llvm/lib/Support/Windows/DynamicLibrary.inc4
-rw-r--r--contrib/llvm/lib/Support/Windows/Memory.inc4
-rw-r--r--contrib/llvm/lib/Support/Windows/Path.inc107
-rw-r--r--contrib/llvm/lib/Support/Windows/Process.inc11
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc25
-rw-r--r--contrib/llvm/lib/Support/Windows/Signals.inc129
-rw-r--r--contrib/llvm/lib/Support/Windows/WindowsSupport.h18
-rw-r--r--contrib/llvm/lib/Support/YAMLParser.cpp18
-rw-r--r--contrib/llvm/lib/Support/YAMLTraits.cpp16
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp83
-rw-r--r--contrib/llvm/lib/TableGen/Record.cpp10
-rw-r--r--contrib/llvm/lib/TableGen/SetTheory.cpp2
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.cpp30
-rw-r--r--contrib/llvm/lib/TableGen/TGParser.h16
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td43
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp15
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp17
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp27
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h38
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td19
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp32
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp22
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp20
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp16
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp15
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp221
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp93
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp246
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp1446
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h110
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td1076
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp211
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h12
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td483
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp1304
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h42
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h17
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp56
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp31
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h27
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp150
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h39
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp201
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp46
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h10
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h26
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp5
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h8
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp88
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp5
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp33
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h41
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.h16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp126
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp84
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp200
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h48
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp479
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp195
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h15
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp373
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp25
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp77
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp87
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h51
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp102
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp43
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp266
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/CIInstructions.td340
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp27
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h40
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp51
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp15
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Processors.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600Instructions.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp229
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp133
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp204
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp243
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h34
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp660
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp88
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td44
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1332
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h154
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td837
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td485
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp36
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp109
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h232
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp193
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp271
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h64
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td117
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SISchedule.td18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp61
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp97
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VIInstructions.td61
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td708
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp119
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp174
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp64
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp112
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp202
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp275
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp2101
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h49
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td440
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td163
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td95
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td55
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp504
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td37
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td1046
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp56
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp175
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp170
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp204
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp113
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp389
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp54
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h59
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp99
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp32
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp291
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h36
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp47
-rw-r--r--contrib/llvm/lib/Target/AVR/AVR.td563
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRCallingConv.td65
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRConfig.h15
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h73
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td216
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp4
-rw-r--r--contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/BPF/BPF.td7
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp25
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp91
-rw-r--r--contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp2152
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.h16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp1010
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td82
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp435
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp2778
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp33
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp43
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp1063
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp435
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h11
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp319
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp160
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp776
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h48
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td462
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td1019
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td46
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td238
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp3980
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h390
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td65
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td130
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td10
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td2241
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td43
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td836
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp60
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp53
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOperands.td368
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp150
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp94
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td81
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp91
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td8
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td5
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td170
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td310
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp50
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp1209
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp616
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp92
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h20
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp97
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h70
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp1975
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h114
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp52
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h65
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp268
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h119
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h13
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp581
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h218
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp29
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp126
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp49
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp228
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h58
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp52
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h17
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp90
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h52
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h13
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp17
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp8
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2226
-rw-r--r--contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp318
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h7
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp62
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h1
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp17
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h3
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h13
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp73
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h25
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp82
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td579
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td887
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td86
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td119
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td244
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td528
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td28
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td81
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td174
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td17
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp202
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td120
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td298
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td91
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td12
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp13
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCCState.cpp16
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallingConv.td25
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp31
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td38
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td378
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp34
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td84
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td192
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFastISel.cpp59
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp125
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h37
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td42
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td8
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td510
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td24
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td210
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp69
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h51
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp59
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp241
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp22
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp85
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td68
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td392
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h21
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp24
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h19
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp132
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h15
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp10
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp59
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp183
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h17
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp66
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp353
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp102
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSection.h9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp60
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp105
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h21
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXVector.td58
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h25
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp267
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp253
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp26
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp87
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp242
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp140
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp502
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h22
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp131
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h53
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td20
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td612
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp131
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp230
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp114
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp52
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp141
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h32
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp53
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp89
-rw-r--r--contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp248
-rw-r--r--contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp52
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h1
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td9
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp154
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp183
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp295
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h18
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td9
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp39
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td146
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp64
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td54
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp6
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp19
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp8
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/README.txt6
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp86
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp24
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp159
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h20
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h8
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td55
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td34
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp128
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp24
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h11
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp57
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp20
-rw-r--r--contrib/llvm/lib/Target/TargetMachineC.cpp46
-rw-r--r--contrib/llvm/lib/Target/TargetRecip.cpp12
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp94
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h18
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp103
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp54
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp100
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp41
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h16
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/README.txt68
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Relooper.cpp984
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Relooper.h186
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.h14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.td12
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp110
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp285
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp468
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp81
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp117
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h3
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def25
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp57
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp602
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h50
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td67
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td82
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td125
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td117
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td84
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp133
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h20
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td115
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td105
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td529
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp133
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp106
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h45
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h68
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp76
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp1066
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp86
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp175
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp109
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp265
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp60
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td32
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp124
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h8
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp96
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp24
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h43
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt311
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp274
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h10
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp319
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h19
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp7
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp47
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h2
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp645
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp83
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h48
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp11
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h69
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp11
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp184
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h13
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp99
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp199
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h27
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h59
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td655
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp25
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp109
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.h59
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td79
-rw-r--r--contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp21
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp168
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp89
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp76
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp1162
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h65
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp505
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp6416
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h124
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td3696
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td69
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrBuilder.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td216
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td13
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td252
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td136
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td336
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp1514
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h142
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td257
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td24
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td761
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td140
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td91
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrXOP.td115
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h980
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp216
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h8
-rw-r--r--contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp326
-rw-r--r--contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp318
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h25
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td41
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp46
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h57
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp17
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp879
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h64
-rw-r--r--contrib/llvm/lib/Target/X86/X86WinEHState.cpp234
-rw-r--r--contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp18
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp38
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp26
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h14
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp9
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp14
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp128
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp83
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1873
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp433
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp629
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp937
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp150
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp103
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp589
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp560
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp125
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp (renamed from contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp)736
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp58
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp443
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp930
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp353
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp242
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h12
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp136
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp113
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp346
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp253
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h217
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp84
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp289
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp434
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp718
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp491
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h242
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h74
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/PtrState.h8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BDCE.cpp350
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp120
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp264
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp269
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp319
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp724
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp263
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp113
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp1347
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp566
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp447
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp828
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp278
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp455
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp174
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp199
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp104
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp109
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1826
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp991
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp251
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp42
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp431
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp58
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp76
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp270
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp118
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp103
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp36
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp276
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp93
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp706
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp130
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp445
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp85
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp256
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp175
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2475
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp489
-rw-r--r--contrib/llvm/tools/bugpoint/BugDriver.cpp6
-rw-r--r--contrib/llvm/tools/bugpoint/BugDriver.h19
-rw-r--r--contrib/llvm/tools/bugpoint/CrashDebugger.cpp265
-rw-r--r--contrib/llvm/tools/bugpoint/ExecutionDriver.cpp41
-rw-r--r--contrib/llvm/tools/bugpoint/ExtractFunction.cpp71
-rw-r--r--contrib/llvm/tools/bugpoint/ListReducer.h12
-rw-r--r--contrib/llvm/tools/bugpoint/Miscompilation.cpp277
-rw-r--r--contrib/llvm/tools/bugpoint/ToolRunner.cpp217
-rw-r--r--contrib/llvm/tools/bugpoint/ToolRunner.h50
-rw-r--r--contrib/llvm/tools/bugpoint/bugpoint.cpp10
-rw-r--r--contrib/llvm/tools/llc/llc.cpp55
-rw-r--r--contrib/llvm/tools/lli/OrcLazyJIT.cpp62
-rw-r--r--contrib/llvm/tools/lli/OrcLazyJIT.h54
-rw-r--r--contrib/llvm/tools/lli/RemoteTarget.cpp4
-rw-r--r--contrib/llvm/tools/lli/lli.cpp5
-rw-r--r--contrib/llvm/tools/llvm-ar/llvm-ar.cpp104
-rw-r--r--contrib/llvm/tools/llvm-as/llvm-as.cpp7
-rw-r--r--contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp67
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageReport.cpp31
-rw-r--r--contrib/llvm/tools/llvm-cov/CoverageViewOptions.h1
-rw-r--r--contrib/llvm/tools/llvm-cov/gcov.cpp6
-rw-r--r--contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp104
-rw-r--r--contrib/llvm/tools/llvm-diff/DiffLog.cpp3
-rw-r--r--contrib/llvm/tools/llvm-diff/DiffLog.h10
-rw-r--r--contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp4
-rw-r--r--contrib/llvm/tools/llvm-dis/llvm-dis.cpp2
-rw-r--r--contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp107
-rw-r--r--contrib/llvm/tools/llvm-extract/llvm-extract.cpp53
-rw-r--r--contrib/llvm/tools/llvm-link/llvm-link.cpp194
-rw-r--r--contrib/llvm/tools/llvm-lto/llvm-lto.cpp269
-rw-r--r--contrib/llvm/tools/llvm-mc/llvm-mc.cpp14
-rw-r--r--contrib/llvm/tools/llvm-nm/llvm-nm.cpp258
-rw-r--r--contrib/llvm/tools/llvm-objdump/COFFDump.cpp85
-rw-r--r--contrib/llvm/tools/llvm-objdump/ELFDump.cpp32
-rw-r--r--contrib/llvm/tools/llvm-objdump/MachODump.cpp814
-rw-r--r--contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp562
-rw-r--r--contrib/llvm/tools/llvm-objdump/llvm-objdump.h8
-rw-r--r--contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.cpp66
-rw-r--r--contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.h2
-rw-r--r--contrib/llvm/tools/llvm-pdbdump/LinePrinter.cpp104
-rw-r--r--contrib/llvm/tools/llvm-pdbdump/LinePrinter.h13
-rw-r--r--contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp299
-rw-r--r--contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h3
-rw-r--r--contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp271
-rw-r--r--contrib/llvm/tools/llvm-readobj/ARMAttributeParser.cpp72
-rw-r--r--contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h64
-rw-r--r--contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp5
-rw-r--r--contrib/llvm/tools/llvm-readobj/COFFDumper.cpp120
-rw-r--r--contrib/llvm/tools/llvm-readobj/COFFImportDumper.cpp52
-rw-r--r--contrib/llvm/tools/llvm-readobj/ELFDumper.cpp907
-rw-r--r--contrib/llvm/tools/llvm-readobj/MachODumper.cpp240
-rw-r--r--contrib/llvm/tools/llvm-readobj/ObjDumper.h19
-rw-r--r--contrib/llvm/tools/llvm-readobj/StreamWriter.h32
-rw-r--r--contrib/llvm/tools/llvm-readobj/Win64EHDumper.cpp21
-rw-r--r--contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp145
-rw-r--r--contrib/llvm/tools/llvm-readobj/llvm-readobj.h4
-rw-r--r--contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp235
-rw-r--r--contrib/llvm/tools/llvm-stress/llvm-stress.cpp5
-rw-r--r--contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp532
-rw-r--r--contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.h144
-rw-r--r--contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp50
-rw-r--r--contrib/llvm/tools/macho-dump/macho-dump.cpp434
-rw-r--r--contrib/llvm/tools/opt/opt.cpp56
-rw-r--r--contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp372
-rw-r--r--contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp109
-rw-r--r--contrib/llvm/utils/TableGen/Attributes.cpp156
-rw-r--r--contrib/llvm/utils/TableGen/CallingConvEmitter.cpp8
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp613
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h16
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenInstruction.cpp1
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenIntrinsics.h5
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenMapTable.cpp2
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenRegisters.cpp31
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenSchedule.cpp91
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenSchedule.h49
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenTarget.cpp12
-rw-r--r--contrib/llvm/utils/TableGen/CodeGenTarget.h2
-rw-r--r--contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp36
-rw-r--r--contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp704
-rw-r--r--contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp5
-rw-r--r--contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp189
-rw-r--r--contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp50
-rw-r--r--contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp136
-rw-r--r--contrib/llvm/utils/TableGen/OptParserEmitter.cpp20
-rw-r--r--contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp41
-rw-r--r--contrib/llvm/utils/TableGen/SubtargetEmitter.cpp17
-rw-r--r--contrib/llvm/utils/TableGen/TableGen.cpp8
-rw-r--r--contrib/llvm/utils/TableGen/TableGenBackends.h1
-rw-r--r--contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp17
1818 files changed, 174413 insertions, 73899 deletions
diff --git a/contrib/llvm/include/llvm-c/Analysis.h b/contrib/llvm/include/llvm-c/Analysis.h
index f0bdddc..36dcb89 100644
--- a/contrib/llvm/include/llvm-c/Analysis.h
+++ b/contrib/llvm/include/llvm-c/Analysis.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_ANALYSIS_H
#define LLVM_C_ANALYSIS_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
diff --git a/contrib/llvm/include/llvm-c/BitReader.h b/contrib/llvm/include/llvm-c/BitReader.h
index f3b388b..d1fc302 100644
--- a/contrib/llvm/include/llvm-c/BitReader.h
+++ b/contrib/llvm/include/llvm-c/BitReader.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_BITREADER_H
#define LLVM_C_BITREADER_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
@@ -34,36 +34,45 @@ extern "C" {
/* Builds a module from the bitcode in the specified memory buffer, returning a
reference to the module via the OutModule parameter. Returns 0 on success.
- Optionally returns a human-readable error message via OutMessage. */
-LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
- LLVMModuleRef *OutModule, char **OutMessage);
+ Optionally returns a human-readable error message via OutMessage.
+ This is deprecated. Use LLVMParseBitcode2. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule,
+ char **OutMessage);
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+ reference to the module via the OutModule parameter. Returns 0 on success. */
+LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule);
+
+/* This is deprecated. Use LLVMParseBitcodeInContext2. */
LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutModule, char **OutMessage);
+LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule);
+
/** Reads a module from the specified path, returning via the OutMP parameter
a module provider which performs lazy deserialization. Returns 0 on success.
- Optionally returns a human-readable error message via OutMessage. */
+ Optionally returns a human-readable error message via OutMessage.
+ This is deprecated. Use LLVMGetBitcodeModuleInContext2. */
LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
- LLVMModuleRef *OutM,
- char **OutMessage);
+ LLVMModuleRef *OutM, char **OutMessage);
+/** Reads a module from the specified path, returning via the OutMP parameter a
+ * module provider which performs lazy deserialization. Returns 0 on success. */
+LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM);
+
+/* This is deprecated. Use LLVMGetBitcodeModule2. */
LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
char **OutMessage);
-
-/** Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
-LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
- LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage);
-
-/** Deprecated: Use LLVMGetBitcodeModule instead. */
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage);
+LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM);
/**
* @}
diff --git a/contrib/llvm/include/llvm-c/BitWriter.h b/contrib/llvm/include/llvm-c/BitWriter.h
index f25ad3a..797d031 100644
--- a/contrib/llvm/include/llvm-c/BitWriter.h
+++ b/contrib/llvm/include/llvm-c/BitWriter.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_BITWRITER_H
#define LLVM_C_BITWRITER_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h
index 9dbcbfe..c8fda15 100644
--- a/contrib/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm/include/llvm-c/Core.h
@@ -15,7 +15,8 @@
#ifndef LLVM_C_CORE_H
#define LLVM_C_CORE_H
-#include "llvm-c/Support.h"
+#include "llvm-c/ErrorHandling.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
@@ -40,15 +41,6 @@ extern "C" {
* the LLVM intermediate representation as well as other related types
* and utilities.
*
- * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore
- * parameters must be passed as base types. Despite the declared types, most
- * of the functions provided operate only on branches of the type hierarchy.
- * The declared parameter names are descriptive and specify which type is
- * required. Additionally, each type hierarchy is documented along with the
- * functions that operate upon it. For more detail, refer to LLVM's C++ code.
- * If in doubt, refer to Core.cpp, which performs parameter downcasts in the
- * form unwrap<RequiredType>(Param).
- *
* Many exotic languages can interoperate with C code but have a harder time
* with C++ due to name mangling. So in addition to C, this interface enables
* tools written in such languages.
@@ -62,74 +54,6 @@ extern "C" {
* @{
*/
-/* Opaque types. */
-
-/**
- * The top-level container for all LLVM global data. See the LLVMContext class.
- */
-typedef struct LLVMOpaqueContext *LLVMContextRef;
-
-/**
- * The top-level container for all other LLVM Intermediate Representation (IR)
- * objects.
- *
- * @see llvm::Module
- */
-typedef struct LLVMOpaqueModule *LLVMModuleRef;
-
-/**
- * Each value in the LLVM IR has a type, an LLVMTypeRef.
- *
- * @see llvm::Type
- */
-typedef struct LLVMOpaqueType *LLVMTypeRef;
-
-/**
- * Represents an individual value in LLVM IR.
- *
- * This models llvm::Value.
- */
-typedef struct LLVMOpaqueValue *LLVMValueRef;
-
-/**
- * Represents a basic block of instructions in LLVM IR.
- *
- * This models llvm::BasicBlock.
- */
-typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
-
-/**
- * Represents an LLVM basic block builder.
- *
- * This models llvm::IRBuilder.
- */
-typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
-
-/**
- * Interface used to provide a module to JIT or interpreter.
- * This is now just a synonym for llvm::Module, but we have to keep using the
- * different type to keep binary compatibility.
- */
-typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
-
-/** @see llvm::PassManagerBase */
-typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
-
-/** @see llvm::PassRegistry */
-typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
-
-/**
- * Used to get the users and usees of a Value.
- *
- * @see llvm::Use */
-typedef struct LLVMOpaqueUse *LLVMUseRef;
-
-
-/**
- * @see llvm::DiagnosticInfo
- */
-typedef struct LLVMOpaqueDiagnosticInfo *LLVMDiagnosticInfoRef;
-
typedef enum {
LLVMZExtAttribute = 1<<0,
LLVMSExtAttribute = 1<<1,
@@ -248,8 +172,12 @@ typedef enum {
/* Exception Handling Operators */
LLVMResume = 58,
- LLVMLandingPad = 59
-
+ LLVMLandingPad = 59,
+ LLVMCleanupRet = 61,
+ LLVMCatchRet = 62,
+ LLVMCatchPad = 63,
+ LLVMCleanupPad = 64,
+ LLVMCatchSwitch = 65
} LLVMOpcode;
typedef enum {
@@ -268,7 +196,8 @@ typedef enum {
LLVMPointerTypeKind, /**< Pointers */
LLVMVectorTypeKind, /**< SIMD 'packed' format, or other vector type */
LLVMMetadataTypeKind, /**< Metadata */
- LLVMX86_MMXTypeKind /**< X86 MMX */
+ LLVMX86_MMXTypeKind, /**< X86 MMX */
+ LLVMTokenTypeKind /**< Tokens */
} LLVMTypeKind;
typedef enum {
@@ -428,36 +357,11 @@ void LLVMInitializeCore(LLVMPassRegistryRef R);
@see ManagedStatic */
void LLVMShutdown(void);
-
/*===-- Error handling ----------------------------------------------------===*/
char *LLVMCreateMessage(const char *Message);
void LLVMDisposeMessage(char *Message);
-typedef void (*LLVMFatalErrorHandler)(const char *Reason);
-
-/**
- * Install a fatal error handler. By default, if LLVM detects a fatal error, it
- * will call exit(1). This may not be appropriate in many contexts. For example,
- * doing exit(1) will bypass many crash reporting/tracing system tools. This
- * function allows you to install a callback that will be invoked prior to the
- * call to exit(1).
- */
-void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler);
-
-/**
- * Reset the fatal error handler. This resets LLVM's fatal error handling
- * behavior to the default.
- */
-void LLVMResetFatalErrorHandler(void);
-
-/**
- * Enable LLVM's built-in stack trace code. This intercepts the OS's crash
- * signals and prints which component of LLVM you were in at the time if the
- * crash.
- */
-void LLVMEnablePrettyStackTrace(void);
-
/**
* @defgroup LLVMCCoreContext Contexts
*
@@ -808,6 +712,7 @@ LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C);
LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C);
LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C);
LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt128TypeInContext(LLVMContextRef C);
LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits);
/**
@@ -819,6 +724,7 @@ LLVMTypeRef LLVMInt8Type(void);
LLVMTypeRef LLVMInt16Type(void);
LLVMTypeRef LLVMInt32Type(void);
LLVMTypeRef LLVMInt64Type(void);
+LLVMTypeRef LLVMInt128Type(void);
LLVMTypeRef LLVMIntType(unsigned NumBits);
unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy);
@@ -1022,7 +928,6 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
* @}
*/
-
/**
* @defgroup LLVMCCoreTypeSequential Sequential Types
*
@@ -1178,6 +1083,7 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(ConstantInt) \
macro(ConstantPointerNull) \
macro(ConstantStruct) \
+ macro(ConstantTokenNone) \
macro(ConstantVector) \
macro(GlobalValue) \
macro(GlobalAlias) \
@@ -1215,6 +1121,11 @@ LLVMTypeRef LLVMX86MMXType(void);
macro(SwitchInst) \
macro(UnreachableInst) \
macro(ResumeInst) \
+ macro(CleanupReturnInst) \
+ macro(CatchReturnInst) \
+ macro(FuncletPadInst) \
+ macro(CatchPadInst) \
+ macro(CleanupPadInst) \
macro(UnaryInstruction) \
macro(AllocaInst) \
macro(CastInst) \
@@ -1950,7 +1861,7 @@ void LLVMSetGC(LLVMValueRef Fn, const char *Name);
void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
/**
- * Add a target-dependent attribute to a fuction
+ * Add a target-dependent attribute to a function
* @see llvm::AttrBuilder::addAttribute()
*/
void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
@@ -2427,7 +2338,7 @@ void LLVMInstructionEraseFromParent(LLVMValueRef Inst);
*
* @see llvm::Instruction::getOpCode()
*/
-LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst);
+LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst);
/**
* Obtain the predicate of an instruction.
@@ -2780,6 +2691,8 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
const char *Name);
LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst);
void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile);
+LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst);
+void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering);
/* Casts */
LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val,
@@ -3020,6 +2933,6 @@ LLVMBool LLVMIsMultithreaded(void);
#ifdef __cplusplus
}
-#endif /* !defined(__cplusplus) */
+#endif
-#endif /* !defined(LLVM_C_CORE_H) */
+#endif /* LLVM_C_CORE_H */
diff --git a/contrib/llvm/include/llvm-c/ErrorHandling.h b/contrib/llvm/include/llvm-c/ErrorHandling.h
new file mode 100644
index 0000000..5a80bc5
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/ErrorHandling.h
@@ -0,0 +1,51 @@
+/*===-- llvm-c/ErrorHandling.h - Error Handling C Interface -------*- C -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file defines the C interface to LLVM's error handling mechanism. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ERROR_HANDLING_H
+#define LLVM_C_ERROR_HANDLING_H
+
+#include "llvm-c/Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*LLVMFatalErrorHandler)(const char *Reason);
+
+/**
+ * Install a fatal error handler. By default, if LLVM detects a fatal error, it
+ * will call exit(1). This may not be appropriate in many contexts. For example,
+ * doing exit(1) will bypass many crash reporting/tracing system tools. This
+ * function allows you to install a callback that will be invoked prior to the
+ * call to exit(1).
+ */
+void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler);
+
+/**
+ * Reset the fatal error handler. This resets LLVM's fatal error handling
+ * behavior to the default.
+ */
+void LLVMResetFatalErrorHandler(void);
+
+/**
+ * Enable LLVM's built-in stack trace code. This intercepts the OS's crash
+ * signals and prints which component of LLVM you were in at the time if the
+ * crash.
+ */
+void LLVMEnablePrettyStackTrace(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/include/llvm-c/ExecutionEngine.h b/contrib/llvm/include/llvm-c/ExecutionEngine.h
index eb3ecab..b72a91a 100644
--- a/contrib/llvm/include/llvm-c/ExecutionEngine.h
+++ b/contrib/llvm/include/llvm-c/ExecutionEngine.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_EXECUTIONENGINE_H
#define LLVM_C_EXECUTIONENGINE_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#include "llvm-c/Target.h"
#include "llvm-c/TargetMachine.h"
@@ -110,22 +110,6 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
struct LLVMMCJITCompilerOptions *Options, size_t SizeOfOptions,
char **OutError);
-/** Deprecated: Use LLVMCreateExecutionEngineForModule instead. */
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
- LLVMModuleProviderRef MP,
- char **OutError);
-
-/** Deprecated: Use LLVMCreateInterpreterForModule instead. */
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
- LLVMModuleProviderRef MP,
- char **OutError);
-
-/** Deprecated: Use LLVMCreateJITCompilerForModule instead. */
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
- LLVMModuleProviderRef MP,
- unsigned OptLevel,
- char **OutError);
-
void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE);
void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE);
@@ -144,17 +128,9 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F);
void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M);
-/** Deprecated: Use LLVMAddModule instead. */
-void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP);
-
LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
LLVMModuleRef *OutMod, char **OutError);
-/** Deprecated: Use LLVMRemoveModule instead. */
-LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
- LLVMModuleProviderRef MP,
- LLVMModuleRef *OutMod, char **OutError);
-
LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
LLVMValueRef *OutFn);
diff --git a/contrib/llvm/include/llvm-c/IRReader.h b/contrib/llvm/include/llvm-c/IRReader.h
index 5001afb..5b58d99 100644
--- a/contrib/llvm/include/llvm-c/IRReader.h
+++ b/contrib/llvm/include/llvm-c/IRReader.h
@@ -14,7 +14,7 @@
#ifndef LLVM_C_IRREADER_H
#define LLVM_C_IRREADER_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
diff --git a/contrib/llvm/include/llvm-c/Initialization.h b/contrib/llvm/include/llvm-c/Initialization.h
index 44194f8..90c8396 100644
--- a/contrib/llvm/include/llvm-c/Initialization.h
+++ b/contrib/llvm/include/llvm-c/Initialization.h
@@ -16,7 +16,7 @@
#ifndef LLVM_C_INITIALIZATION_H
#define LLVM_C_INITIALIZATION_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
diff --git a/contrib/llvm/include/llvm-c/Linker.h b/contrib/llvm/include/llvm-c/Linker.h
index 9f98a33..4d9bd46 100644
--- a/contrib/llvm/include/llvm-c/Linker.h
+++ b/contrib/llvm/include/llvm-c/Linker.h
@@ -14,7 +14,7 @@
#ifndef LLVM_C_LINKER_H
#define LLVM_C_LINKER_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
@@ -27,17 +27,27 @@ typedef enum {
should not be used. */
} LLVMLinkerMode;
-/* Links the source module into the destination module, taking ownership
- * of the source module away from the caller. Optionally returns a
- * human-readable description of any errors that occurred in linking.
- * OutMessage must be disposed with LLVMDisposeMessage. The return value
- * is true if an error occurred, false otherwise.
+/* Links the source module into the destination module. The source module is
+ * damaged. The only thing that can be done is destroy it. Optionally returns a
+ * human-readable description of any errors that occurred in linking. OutMessage
+ * must be disposed with LLVMDisposeMessage. The return value is true if an
+ * error occurred, false otherwise.
*
* Note that the linker mode parameter \p Unused is no longer used, and has
- * no effect. */
+ * no effect.
+ *
+ * This function is deprecated. Use LLVMLinkModules2 instead.
+ */
LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
LLVMLinkerMode Unused, char **OutMessage);
+/* Links the source module into the destination module. The source module is
+ * destroyed.
+ * The return value is true if an error occurred, false otherwise.
+ * Use the diagnostic handler to get any diagnostic message.
+*/
+LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src);
+
#ifdef __cplusplus
}
#endif
diff --git a/contrib/llvm/include/llvm-c/Object.h b/contrib/llvm/include/llvm-c/Object.h
index 9cab5c4..a2980e8 100644
--- a/contrib/llvm/include/llvm-c/Object.h
+++ b/contrib/llvm/include/llvm-c/Object.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_OBJECT_H
#define LLVM_C_OBJECT_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#include "llvm/Config/llvm-config.h"
#ifdef __cplusplus
diff --git a/contrib/llvm/include/llvm-c/OrcBindings.h b/contrib/llvm/include/llvm-c/OrcBindings.h
new file mode 100644
index 0000000..f6aff91
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/OrcBindings.h
@@ -0,0 +1,134 @@
+/*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to libLLVMOrcJIT.a, which implements *|
+|* JIT compilation of LLVM IR. *|
+|* *|
+|* Many exotic languages can interoperate with C code but have a harder time *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages. *|
+|* *|
+|* Note: This interface is experimental. It is *NOT* stable, and may be *|
+|* changed without warning. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ORCBINDINGS_H
+#define LLVM_C_ORCBINDINGS_H
+
+#include "llvm-c/Object.h"
+#include "llvm-c/Support.h"
+#include "llvm-c/TargetMachine.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef;
+typedef uint32_t LLVMOrcModuleHandle;
+typedef uint64_t LLVMOrcTargetAddress;
+typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name,
+ void *LookupCtx);
+typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack,
+ void *CallbackCtx);
+
+/**
+ * Create an ORC JIT stack.
+ *
+ * The client owns the resulting stack, and must call OrcDisposeInstance(...)
+ * to destroy it and free its memory. The JIT stack will take ownership of the
+ * TargetMachine, which will be destroyed when the stack is destroyed. The
+ * client should not attempt to dispose of the Target Machine, or it will result
+ * in a double-free.
+ */
+LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM);
+
+/**
+ * Mangle the given symbol.
+ * Memory will be allocated for MangledSymbol to hold the result. The client
+ */
+void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledSymbol,
+ const char *Symbol);
+
+/**
+ * Dispose of a mangled symbol.
+ */
+
+void LLVMOrcDisposeMangledSymbol(char *MangledSymbol);
+
+/**
+ * Create a lazy compile callback.
+ */
+LLVMOrcTargetAddress
+LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
+ LLVMOrcLazyCompileCallbackFn Callback,
+ void *CallbackCtx);
+
+/**
+ * Create a named indirect call stub.
+ */
+void LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress InitAddr);
+
+/**
+ * Set the pointer for the given indirect stub.
+ */
+void LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress NewAddr);
+
+/**
+ * Add module to be eagerly compiled.
+ */
+LLVMOrcModuleHandle
+LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx);
+
+/**
+ * Add module to be lazily compiled one function at a time.
+ */
+LLVMOrcModuleHandle
+LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx);
+
+/**
+ * Add an object file.
+ */
+LLVMOrcModuleHandle
+LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, LLVMObjectFileRef Obj,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx);
+
+/**
+ * Remove a module set from the JIT.
+ *
+ * This works for all modules that can be added via OrcAdd*, including object
+ * files.
+ */
+void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H);
+
+/**
+ * Get symbol address from JIT instance.
+ */
+LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+ const char *SymbolName);
+
+/**
+ * Dispose of an ORC JIT stack.
+ */
+void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
+
+#ifdef __cplusplus
+}
+#endif /* extern "C" */
+
+#endif /* LLVM_C_ORCBINDINGS_H */
diff --git a/contrib/llvm/include/llvm-c/Support.h b/contrib/llvm/include/llvm-c/Support.h
index eca3b7a..735d1fb 100644
--- a/contrib/llvm/include/llvm-c/Support.h
+++ b/contrib/llvm/include/llvm-c/Support.h
@@ -15,31 +15,13 @@
#define LLVM_C_SUPPORT_H
#include "llvm/Support/DataTypes.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
- * @defgroup LLVMCSupportTypes Types and Enumerations
- *
- * @{
- */
-
-typedef int LLVMBool;
-
-/**
- * Used to pass regions of memory through LLVM interfaces.
- *
- * @see llvm::MemoryBuffer
- */
-typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
-
-/**
- * @}
- */
-
-/**
* This function permanently loads the dynamic library at the given path.
* It is safe to call this function multiple times for the same library.
*
diff --git a/contrib/llvm/include/llvm-c/Target.h b/contrib/llvm/include/llvm-c/Target.h
index b465b4b..24d2cb4 100644
--- a/contrib/llvm/include/llvm-c/Target.h
+++ b/contrib/llvm/include/llvm-c/Target.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_TARGET_H
#define LLVM_C_TARGET_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#include "llvm/Config/llvm-config.h"
#if defined(_MSC_VER) && !defined(inline)
diff --git a/contrib/llvm/include/llvm-c/TargetMachine.h b/contrib/llvm/include/llvm-c/TargetMachine.h
index 8cf1f43..3037080 100644
--- a/contrib/llvm/include/llvm-c/TargetMachine.h
+++ b/contrib/llvm/include/llvm-c/TargetMachine.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_TARGETMACHINE_H
#define LLVM_C_TARGETMACHINE_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#include "llvm-c/Target.h"
#ifdef __cplusplus
@@ -115,7 +115,7 @@ char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T);
LLVMDisposeMessage. */
char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T);
-/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
+/** Returns the llvm::DataLayout used for this llvm:TargetMachine. */
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
/** Set the target machine's ASM verbosity. */
diff --git a/contrib/llvm/include/llvm-c/Transforms/IPO.h b/contrib/llvm/include/llvm-c/Transforms/IPO.h
index 4480780..3af7425 100644
--- a/contrib/llvm/include/llvm-c/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm-c/Transforms/IPO.h
@@ -15,7 +15,7 @@
#ifndef LLVM_C_TRANSFORMS_IPO_H
#define LLVM_C_TRANSFORMS_IPO_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
diff --git a/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h b/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
index 3d7a9d6..69786b3 100644
--- a/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -14,7 +14,7 @@
#ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
#define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef;
diff --git a/contrib/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
index 48c19a6..c989ee8 100644
--- a/contrib/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
@@ -19,7 +19,7 @@
#ifndef LLVM_C_TRANSFORMS_SCALAR_H
#define LLVM_C_TRANSFORMS_SCALAR_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
diff --git a/contrib/llvm/include/llvm-c/Transforms/Vectorize.h b/contrib/llvm/include/llvm-c/Transforms/Vectorize.h
index c9102da..a82ef49 100644
--- a/contrib/llvm/include/llvm-c/Transforms/Vectorize.h
+++ b/contrib/llvm/include/llvm-c/Transforms/Vectorize.h
@@ -20,7 +20,7 @@
#ifndef LLVM_C_TRANSFORMS_VECTORIZE_H
#define LLVM_C_TRANSFORMS_VECTORIZE_H
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
#ifdef __cplusplus
extern "C" {
@@ -51,4 +51,3 @@ void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM);
#endif /* defined(__cplusplus) */
#endif
-
diff --git a/contrib/llvm/include/llvm-c/Types.h b/contrib/llvm/include/llvm-c/Types.h
new file mode 100644
index 0000000..1902958
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Types.h
@@ -0,0 +1,124 @@
+/*===-- llvm-c/Support.h - C Interface Types declarations ---------*- C -*-===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file defines types used by the the C interface to LLVM. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TYPES_H
+#define LLVM_C_TYPES_H
+
+#include "llvm/Support/DataTypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCSupportTypes Types and Enumerations
+ *
+ * @{
+ */
+
+typedef int LLVMBool;
+
+/* Opaque types. */
+
+/**
+ * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore
+ * parameters must be passed as base types. Despite the declared types, most
+ * of the functions provided operate only on branches of the type hierarchy.
+ * The declared parameter names are descriptive and specify which type is
+ * required. Additionally, each type hierarchy is documented along with the
+ * functions that operate upon it. For more detail, refer to LLVM's C++ code.
+ * If in doubt, refer to Core.cpp, which performs parameter downcasts in the
+ * form unwrap<RequiredType>(Param).
+ */
+
+/**
+ * Used to pass regions of memory through LLVM interfaces.
+ *
+ * @see llvm::MemoryBuffer
+ */
+typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
+
+/**
+ * The top-level container for all LLVM global data. See the LLVMContext class.
+ */
+typedef struct LLVMOpaqueContext *LLVMContextRef;
+
+/**
+ * The top-level container for all other LLVM Intermediate Representation (IR)
+ * objects.
+ *
+ * @see llvm::Module
+ */
+typedef struct LLVMOpaqueModule *LLVMModuleRef;
+
+/**
+ * Each value in the LLVM IR has a type, an LLVMTypeRef.
+ *
+ * @see llvm::Type
+ */
+typedef struct LLVMOpaqueType *LLVMTypeRef;
+
+/**
+ * Represents an individual value in LLVM IR.
+ *
+ * This models llvm::Value.
+ */
+typedef struct LLVMOpaqueValue *LLVMValueRef;
+
+/**
+ * Represents a basic block of instructions in LLVM IR.
+ *
+ * This models llvm::BasicBlock.
+ */
+typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
+
+/**
+ * Represents an LLVM basic block builder.
+ *
+ * This models llvm::IRBuilder.
+ */
+typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
+
+/**
+ * Interface used to provide a module to JIT or interpreter.
+ * This is now just a synonym for llvm::Module, but we have to keep using the
+ * different type to keep binary compatibility.
+ */
+typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
+
+/** @see llvm::PassManagerBase */
+typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
+
+/** @see llvm::PassRegistry */
+typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
+
+/**
+ * Used to get the users and usees of a Value.
+ *
+ * @see llvm::Use */
+typedef struct LLVMOpaqueUse *LLVMUseRef;
+
+/**
+ * @see llvm::DiagnosticInfo
+ */
+typedef struct LLVMOpaqueDiagnosticInfo *LLVMDiagnosticInfoRef;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/include/llvm-c/lto.h b/contrib/llvm/include/llvm-c/lto.h
index cb3a691..691a0cd 100644
--- a/contrib/llvm/include/llvm-c/lto.h
+++ b/contrib/llvm/include/llvm-c/lto.h
@@ -374,8 +374,8 @@ extern lto_bool_t
lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod);
/**
- * Sets the object module for code generation. This will transfer the ownship of
- * the module to code generator.
+ * Sets the object module for code generation. This will transfer the ownership
+ * of the module to the code generator.
*
* \c cg and \c mod must both be in the same context.
*
diff --git a/contrib/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm/include/llvm/ADT/APFloat.h
index 76615af..3fe0406 100644
--- a/contrib/llvm/include/llvm/ADT/APFloat.h
+++ b/contrib/llvm/include/llvm/ADT/APFloat.h
@@ -142,6 +142,9 @@ public:
/// @}
static unsigned int semanticsPrecision(const fltSemantics &);
+ static ExponentType semanticsMinExponent(const fltSemantics &);
+ static ExponentType semanticsMaxExponent(const fltSemantics &);
+ static unsigned int semanticsSizeInBits(const fltSemantics &);
/// IEEE-754R 5.11: Floating Point Comparison Relations.
enum cmpResult {
@@ -296,7 +299,7 @@ public:
/// IEEE remainder.
opStatus remainder(const APFloat &);
/// C fmod, or llvm frem.
- opStatus mod(const APFloat &, roundingMode);
+ opStatus mod(const APFloat &);
opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
opStatus roundToIntegral(roundingMode);
/// IEEE-754R 5.3.1: nextUp/nextDown.
@@ -445,6 +448,9 @@ public:
/// Returns true if and only if the number has the largest possible finite
/// magnitude in the current semantics.
bool isLargest() const;
+
+ /// Returns true if and only if the number is an exact integer.
+ bool isInteger() const;
/// @}
diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h
index 5013f29..e2a0cb5 100644
--- a/contrib/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm/include/llvm/ADT/APInt.h
@@ -294,11 +294,12 @@ public:
delete[] pVal;
}
- /// \brief Default constructor that creates an uninitialized APInt.
+ /// \brief Default constructor that creates an uninteresting APInt
+ /// representing a 1-bit zero value.
///
/// This is useful for object deserialization (pair this with the static
/// method Read).
- explicit APInt() : BitWidth(1) {}
+ explicit APInt() : BitWidth(1), VAL(0) {}
/// \brief Returns whether this instance allocated memory.
bool needsCleanup() const { return !isSingleWord(); }
@@ -1528,7 +1529,7 @@ public:
/// \returns the nearest log base 2 of this APInt. Ties round up.
///
/// NOTE: When we have a BitWidth of 1, we define:
- ///
+ ///
/// log2(0) = UINT32_MAX
/// log2(1) = 0
///
diff --git a/contrib/llvm/include/llvm/ADT/APSInt.h b/contrib/llvm/include/llvm/ADT/APSInt.h
index a187515..a6552d0 100644
--- a/contrib/llvm/include/llvm/ADT/APSInt.h
+++ b/contrib/llvm/include/llvm/ADT/APSInt.h
@@ -21,6 +21,7 @@ namespace llvm {
class APSInt : public APInt {
bool IsUnsigned;
+
public:
/// Default constructor that creates an uninitialized APInt.
explicit APSInt() : IsUnsigned(false) {}
@@ -246,8 +247,7 @@ public:
return this->operator|(RHS);
}
-
- APSInt operator^(const APSInt& RHS) const {
+ APSInt operator^(const APSInt &RHS) const {
assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
return APSInt(static_cast<const APInt&>(*this) ^ RHS, IsUnsigned);
}
@@ -286,7 +286,7 @@ public:
}
/// \brief Determine if two APSInts have the same value, zero- or
- /// sign-extending as needed.
+ /// sign-extending as needed.
static bool isSameValue(const APSInt &I1, const APSInt &I2) {
return !compareValues(I1, I2);
}
diff --git a/contrib/llvm/include/llvm/ADT/ArrayRef.h b/contrib/llvm/include/llvm/ADT/ArrayRef.h
index c8795fd..517ba39 100644
--- a/contrib/llvm/include/llvm/ADT/ArrayRef.h
+++ b/contrib/llvm/include/llvm/ADT/ArrayRef.h
@@ -10,6 +10,7 @@
#ifndef LLVM_ADT_ARRAYREF_H
#define LLVM_ADT_ARRAYREF_H
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallVector.h"
#include <vector>
@@ -85,7 +86,7 @@ namespace llvm {
/// Construct an ArrayRef from a std::initializer_list.
/*implicit*/ ArrayRef(const std::initializer_list<T> &Vec)
- : Data(Vec.begin() == Vec.end() ? (T*)0 : Vec.begin()),
+ : Data(Vec.begin() == Vec.end() ? (T*)nullptr : Vec.begin()),
Length(Vec.size()) {}
/// Construct an ArrayRef<const T*> from ArrayRef<T*>. This uses SFINAE to
@@ -148,7 +149,7 @@ namespace llvm {
// copy - Allocate copy in Allocator and return ArrayRef<T> to it.
template <typename Allocator> ArrayRef<T> copy(Allocator &A) {
T *Buff = A.template Allocate<T>(Length);
- std::copy(begin(), end(), Buff);
+ std::uninitialized_copy(begin(), end(), Buff);
return ArrayRef<T>(Buff, Length);
}
@@ -156,8 +157,6 @@ namespace llvm {
bool equals(ArrayRef RHS) const {
if (Length != RHS.Length)
return false;
- if (Length == 0)
- return true;
return std::equal(begin(), end(), RHS.begin());
}
@@ -339,6 +338,16 @@ namespace llvm {
return Vec;
}
+ /// Construct an ArrayRef from an ArrayRef (no-op) (const)
+ template <typename T> ArrayRef<T> makeArrayRef(const ArrayRef<T> &Vec) {
+ return Vec;
+ }
+
+ /// Construct an ArrayRef from an ArrayRef (no-op)
+ template <typename T> ArrayRef<T> &makeArrayRef(ArrayRef<T> &Vec) {
+ return Vec;
+ }
+
/// Construct an ArrayRef from a C array.
template<typename T, size_t N>
ArrayRef<T> makeArrayRef(const T (&Arr)[N]) {
@@ -366,6 +375,10 @@ namespace llvm {
template <typename T> struct isPodLike<ArrayRef<T> > {
static const bool value = true;
};
+
+ template <typename T> hash_code hash_value(ArrayRef<T> S) {
+ return hash_combine_range(S.begin(), S.end());
+ }
}
#endif
diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h
index f58dd73..ad00d51 100644
--- a/contrib/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm/include/llvm/ADT/BitVector.h
@@ -34,7 +34,7 @@ class BitVector {
BitWord *Bits; // Actual bits.
unsigned Size; // Size of bitvector in bits.
- unsigned Capacity; // Size of allocated memory in BitWord.
+ unsigned Capacity; // Number of BitWords allocated in the Bits array.
public:
typedef unsigned size_type;
@@ -566,8 +566,16 @@ private:
if (AddBits)
clear_unused_bits();
}
+
+public:
+ /// Return the size (in bytes) of the bit vector.
+ size_t getMemorySize() const { return Capacity * sizeof(BitWord); }
};
+static inline size_t capacity_in_bytes(const BitVector &X) {
+ return X.getMemorySize();
+}
+
} // End llvm namespace
namespace std {
diff --git a/contrib/llvm/include/llvm/ADT/DeltaAlgorithm.h b/contrib/llvm/include/llvm/ADT/DeltaAlgorithm.h
index 21bc1e8..a26f37d 100644
--- a/contrib/llvm/include/llvm/ADT/DeltaAlgorithm.h
+++ b/contrib/llvm/include/llvm/ADT/DeltaAlgorithm.h
@@ -68,7 +68,7 @@ private:
/// \return - True on success.
bool Search(const changeset_ty &Changes, const changesetlist_ty &Sets,
changeset_ty &Res);
-
+
protected:
/// UpdatedSearchState - Callback used when the search state changes.
virtual void UpdatedSearchState(const changeset_ty &Changes,
diff --git a/contrib/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm/include/llvm/ADT/DenseMap.h
index 27f7315..6ee1960 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMap.h
@@ -282,7 +282,7 @@ protected:
"# initial buckets must be a power of two!");
const KeyT EmptyKey = getEmptyKey();
for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B)
- new (&B->getFirst()) KeyT(EmptyKey);
+ ::new (&B->getFirst()) KeyT(EmptyKey);
}
void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) {
@@ -300,7 +300,7 @@ protected:
(void)FoundVal; // silence warning.
assert(!FoundVal && "Key already in new map?");
DestBucket->getFirst() = std::move(B->getFirst());
- new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond()));
+ ::new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond()));
incrementNumEntries();
// Free the value.
@@ -324,11 +324,11 @@ protected:
getNumBuckets() * sizeof(BucketT));
else
for (size_t i = 0; i < getNumBuckets(); ++i) {
- new (&getBuckets()[i].getFirst())
+ ::new (&getBuckets()[i].getFirst())
KeyT(other.getBuckets()[i].getFirst());
if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) &&
!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey()))
- new (&getBuckets()[i].getSecond())
+ ::new (&getBuckets()[i].getSecond())
ValueT(other.getBuckets()[i].getSecond());
}
}
@@ -402,7 +402,7 @@ private:
TheBucket = InsertIntoBucketImpl(Key, TheBucket);
TheBucket->getFirst() = Key;
- new (&TheBucket->getSecond()) ValueT(Value);
+ ::new (&TheBucket->getSecond()) ValueT(Value);
return TheBucket;
}
@@ -411,7 +411,7 @@ private:
TheBucket = InsertIntoBucketImpl(Key, TheBucket);
TheBucket->getFirst() = Key;
- new (&TheBucket->getSecond()) ValueT(std::move(Value));
+ ::new (&TheBucket->getSecond()) ValueT(std::move(Value));
return TheBucket;
}
@@ -419,7 +419,7 @@ private:
TheBucket = InsertIntoBucketImpl(Key, TheBucket);
TheBucket->getFirst() = std::move(Key);
- new (&TheBucket->getSecond()) ValueT(std::move(Value));
+ ::new (&TheBucket->getSecond()) ValueT(std::move(Value));
return TheBucket;
}
@@ -766,10 +766,10 @@ public:
// Swap separately and handle any assymetry.
std::swap(LHSB->getFirst(), RHSB->getFirst());
if (hasLHSValue) {
- new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond()));
+ ::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond()));
LHSB->getSecond().~ValueT();
} else if (hasRHSValue) {
- new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond()));
+ ::new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond()));
RHSB->getSecond().~ValueT();
}
}
@@ -795,11 +795,11 @@ public:
for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
BucketT *NewB = &LargeSide.getInlineBuckets()[i],
*OldB = &SmallSide.getInlineBuckets()[i];
- new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
+ ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
OldB->getFirst().~KeyT();
if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
!KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
- new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
+ ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
OldB->getSecond().~ValueT();
}
}
@@ -866,8 +866,8 @@ public:
!KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) {
assert(size_t(TmpEnd - TmpBegin) < InlineBuckets &&
"Too many inline buckets!");
- new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst()));
- new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond()));
+ ::new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst()));
+ ::new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond()));
++TmpEnd;
P->getSecond().~ValueT();
}
diff --git a/contrib/llvm/include/llvm/ADT/DenseMapInfo.h b/contrib/llvm/include/llvm/ADT/DenseMapInfo.h
index b0a0530..a844ebc 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_ADT_DENSEMAPINFO_H
#define LLVM_ADT_DENSEMAPINFO_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
@@ -58,7 +59,7 @@ template<> struct DenseMapInfo<char> {
return LHS == RHS;
}
};
-
+
// Provide DenseMapInfo for unsigned ints.
template<> struct DenseMapInfo<unsigned> {
static inline unsigned getEmptyKey() { return ~0U; }
@@ -190,6 +191,31 @@ template <> struct DenseMapInfo<StringRef> {
}
};
+// Provide DenseMapInfo for ArrayRefs.
+template <typename T> struct DenseMapInfo<ArrayRef<T>> {
+ static inline ArrayRef<T> getEmptyKey() {
+ return ArrayRef<T>(reinterpret_cast<const T *>(~static_cast<uintptr_t>(0)),
+ size_t(0));
+ }
+ static inline ArrayRef<T> getTombstoneKey() {
+ return ArrayRef<T>(reinterpret_cast<const T *>(~static_cast<uintptr_t>(1)),
+ size_t(0));
+ }
+ static unsigned getHashValue(ArrayRef<T> Val) {
+ assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!");
+ assert(Val.data() != getTombstoneKey().data() &&
+ "Cannot hash the tombstone key!");
+ return (unsigned)(hash_value(Val));
+ }
+ static bool isEqual(ArrayRef<T> LHS, ArrayRef<T> RHS) {
+ if (RHS.data() == getEmptyKey().data())
+ return LHS.data() == getEmptyKey().data();
+ if (RHS.data() == getTombstoneKey().data())
+ return LHS.data() == getTombstoneKey().data();
+ return LHS == RHS;
+ }
+};
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm/include/llvm/ADT/DenseSet.h
index d340240..ef09dce 100644
--- a/contrib/llvm/include/llvm/ADT/DenseSet.h
+++ b/contrib/llvm/include/llvm/ADT/DenseSet.h
@@ -42,6 +42,7 @@ class DenseSet {
static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT),
"DenseMap buckets unexpectedly large!");
MapTy TheMap;
+
public:
typedef ValueT key_type;
typedef ValueT value_type;
@@ -79,6 +80,7 @@ public:
class Iterator {
typename MapTy::iterator I;
friend class DenseSet;
+
public:
typedef typename MapTy::iterator::difference_type difference_type;
typedef ValueT value_type;
@@ -99,6 +101,7 @@ public:
class ConstIterator {
typename MapTy::const_iterator I;
friend class DenseSet;
+
public:
typedef typename MapTy::const_iterator::difference_type difference_type;
typedef ValueT value_type;
@@ -148,7 +151,7 @@ public:
detail::DenseSetEmpty Empty;
return TheMap.insert(std::make_pair(V, Empty));
}
-
+
// Range insertion of values.
template<typename InputIt>
void insert(InputIt I, InputIt E) {
diff --git a/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h b/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
index d79b9ac..c9317b8 100644
--- a/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -58,7 +58,6 @@ public:
SetType &Visited;
};
-
// Generic Depth First Iterator
template<class GraphT,
class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
@@ -76,21 +75,22 @@ class df_iterator : public std::iterator<std::forward_iterator_tag,
// VisitStack - Used to maintain the ordering. Top = current block
// First element is node pointer, second is the 'next child' to visit
// if the int in PointerIntTy is 0, the 'next child' to visit is invalid
- std::vector<std::pair<PointerIntTy, ChildItTy> > VisitStack;
+ std::vector<std::pair<PointerIntTy, ChildItTy>> VisitStack;
+
private:
inline df_iterator(NodeType *Node) {
this->Visited.insert(Node);
- VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0),
- GT::child_begin(Node)));
+ VisitStack.push_back(
+ std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node)));
}
- inline df_iterator() {
- // End is when stack is empty
+ inline df_iterator() {
+ // End is when stack is empty
}
inline df_iterator(NodeType *Node, SetType &S)
: df_iterator_storage<SetType, ExtStorage>(S) {
if (!S.count(Node)) {
- VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0),
- GT::child_begin(Node)));
+ VisitStack.push_back(
+ std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node)));
this->Visited.insert(Node);
}
}
@@ -115,8 +115,8 @@ private:
// Has our next sibling been visited?
if (Next && this->Visited.insert(Next).second) {
// No, do it now.
- VisitStack.push_back(std::make_pair(PointerIntTy(Next, 0),
- GT::child_begin(Next)));
+ VisitStack.push_back(
+ std::make_pair(PointerIntTy(Next, 0), GT::child_begin(Next)));
return;
}
}
@@ -195,7 +195,6 @@ public:
}
};
-
// Provide global constructors that automatically figure out correct types...
//
template <class T>
@@ -237,7 +236,6 @@ iterator_range<df_ext_iterator<T, SetTy>> depth_first_ext(const T& G,
return make_range(df_ext_begin(G, S), df_ext_end(G, S));
}
-
// Provide global definitions of inverse depth first iterators...
template <class T,
class SetTy = llvm::SmallPtrSet<typename GraphTraits<T>::NodeType*, 8>,
diff --git a/contrib/llvm/include/llvm/ADT/FoldingSet.h b/contrib/llvm/include/llvm/ADT/FoldingSet.h
index 52d10c1..c920539 100644
--- a/contrib/llvm/include/llvm/ADT/FoldingSet.h
+++ b/contrib/llvm/include/llvm/ADT/FoldingSet.h
@@ -122,9 +122,10 @@ protected:
/// is greater than twice the number of buckets.
unsigned NumNodes;
- ~FoldingSetImpl();
-
explicit FoldingSetImpl(unsigned Log2InitSize = 6);
+ FoldingSetImpl(FoldingSetImpl &&Arg);
+ FoldingSetImpl &operator=(FoldingSetImpl &&RHS);
+ ~FoldingSetImpl();
public:
//===--------------------------------------------------------------------===//
@@ -137,7 +138,6 @@ public:
void *NextInFoldingSetBucket;
public:
-
Node() : NextInFoldingSetBucket(nullptr) {}
// Accessors
@@ -182,13 +182,11 @@ public:
bool empty() const { return NumNodes == 0; }
private:
-
/// GrowHashTable - Double the size of the hash table and rehash everything.
///
void GrowHashTable();
protected:
-
/// GetNodeProfile - Instantiations of the FoldingSet template implement
/// this function to gather data bits for the given node.
virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0;
@@ -269,6 +267,7 @@ template<typename T, typename Ctx> struct ContextualFoldingSetTrait
class FoldingSetNodeIDRef {
const unsigned *Data;
size_t Size;
+
public:
FoldingSetNodeIDRef() : Data(nullptr), Size(0) {}
FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
@@ -393,6 +392,10 @@ DefaultContextualFoldingSetTrait<T, Ctx>::ComputeHash(T &X,
/// implementation of the folding set to the node class T. T must be a
/// subclass of FoldingSetNode and implement a Profile function.
///
+/// Note that this set type is movable and move-assignable. However, its
+/// moved-from state is not a valid state for anything other than
+/// move-assigning and destroying. This is primarily to enable movable APIs
+/// that incorporate these objects.
template <class T> class FoldingSet final : public FoldingSetImpl {
private:
/// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a
@@ -417,8 +420,13 @@ private:
public:
explicit FoldingSet(unsigned Log2InitSize = 6)
- : FoldingSetImpl(Log2InitSize)
- {}
+ : FoldingSetImpl(Log2InitSize) {}
+
+ FoldingSet(FoldingSet &&Arg) : FoldingSetImpl(std::move(Arg)) {}
+ FoldingSet &operator=(FoldingSet &&RHS) {
+ (void)FoldingSetImpl::operator=(std::move(RHS));
+ return *this;
+ }
typedef FoldingSetIterator<T> iterator;
iterator begin() { return iterator(Buckets); }
@@ -498,7 +506,6 @@ public:
Ctx getContext() const { return Context; }
-
typedef FoldingSetIterator<T> iterator;
iterator begin() { return iterator(Buckets); }
iterator end() { return iterator(Buckets+NumBuckets); }
@@ -614,9 +621,7 @@ public:
}
};
-
-template<class T>
-class FoldingSetIterator : public FoldingSetIteratorImpl {
+template <class T> class FoldingSetIterator : public FoldingSetIteratorImpl {
public:
explicit FoldingSetIterator(void **Bucket) : FoldingSetIteratorImpl(Bucket) {}
@@ -666,8 +671,7 @@ public:
}
};
-
-template<class T>
+template <class T>
class FoldingSetBucketIterator : public FoldingSetBucketIteratorImpl {
public:
explicit FoldingSetBucketIterator(void **Bucket) :
@@ -694,6 +698,7 @@ public:
template <typename T>
class FoldingSetNodeWrapper : public FoldingSetNode {
T data;
+
public:
template <typename... Ts>
explicit FoldingSetNodeWrapper(Ts &&... Args)
@@ -716,12 +721,12 @@ public:
/// information that would otherwise only be required for recomputing an ID.
class FastFoldingSetNode : public FoldingSetNode {
FoldingSetNodeID FastID;
+
protected:
explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {}
+
public:
- void Profile(FoldingSetNodeID &ID) const {
- ID.AddNodeID(FastID);
- }
+ void Profile(FoldingSetNodeID &ID) const { ID.AddNodeID(FastID); }
};
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableList.h b/contrib/llvm/include/llvm/ADT/ImmutableList.h
index 748d3e4..a1d26bd 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableList.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableList.h
@@ -28,7 +28,7 @@ class ImmutableListImpl : public FoldingSetNode {
T Head;
const ImmutableListImpl* Tail;
- ImmutableListImpl(const T& head, const ImmutableListImpl* tail = 0)
+ ImmutableListImpl(const T& head, const ImmutableListImpl* tail = nullptr)
: Head(head), Tail(tail) {}
friend class ImmutableListFactory<T>;
@@ -72,7 +72,7 @@ public:
// This constructor should normally only be called by ImmutableListFactory<T>.
// There may be cases, however, when one needs to extract the internal pointer
// and reconstruct a list object from that pointer.
- ImmutableList(const ImmutableListImpl<T>* x = 0) : X(x) {}
+ ImmutableList(const ImmutableListImpl<T>* x = nullptr) : X(x) {}
const ImmutableListImpl<T>* getInternalPointer() const {
return X;
@@ -81,7 +81,7 @@ public:
class iterator {
const ImmutableListImpl<T>* L;
public:
- iterator() : L(0) {}
+ iterator() : L(nullptr) {}
iterator(ImmutableList l) : L(l.getInternalPointer()) {}
iterator& operator++() { L = L->getTail(); return *this; }
@@ -128,7 +128,7 @@ public:
/// getTail - Returns the tail of the list, which is another (possibly empty)
/// ImmutableList.
ImmutableList getTail() {
- return X ? X->getTail() : 0;
+ return X ? X->getTail() : nullptr;
}
void Profile(FoldingSetNodeID& ID) const {
@@ -190,7 +190,7 @@ public:
}
ImmutableList<T> getEmptyList() const {
- return ImmutableList<T>(0);
+ return ImmutableList<T>(nullptr);
}
ImmutableList<T> create(const T& X) {
@@ -226,4 +226,4 @@ struct isPodLike<ImmutableList<T> > { static const bool value = true; };
} // end llvm namespace
-#endif
+#endif // LLVM_ADT_IMMUTABLELIST_H
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableMap.h b/contrib/llvm/include/llvm/ADT/ImmutableMap.h
index 438dec2..7480cd7 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableMap.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableMap.h
@@ -55,7 +55,6 @@ struct ImutKeyValueInfo {
}
};
-
template <typename KeyT, typename ValT,
typename ValInfo = ImutKeyValueInfo<KeyT,ValT> >
class ImmutableMap {
@@ -79,9 +78,11 @@ public:
explicit ImmutableMap(const TreeTy* R) : Root(const_cast<TreeTy*>(R)) {
if (Root) { Root->retain(); }
}
+
ImmutableMap(const ImmutableMap &X) : Root(X.Root) {
if (Root) { Root->retain(); }
}
+
ImmutableMap &operator=(const ImmutableMap &X) {
if (Root != X.Root) {
if (X.Root) { X.Root->retain(); }
@@ -90,6 +91,7 @@ public:
}
return *this;
}
+
~ImmutableMap() {
if (Root) { Root->release(); }
}
@@ -99,11 +101,10 @@ public:
const bool Canonicalize;
public:
- Factory(bool canonicalize = true)
- : Canonicalize(canonicalize) {}
-
- Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
- : F(Alloc), Canonicalize(canonicalize) {}
+ Factory(bool canonicalize = true) : Canonicalize(canonicalize) {}
+
+ Factory(BumpPtrAllocator &Alloc, bool canonicalize = true)
+ : F(Alloc), Canonicalize(canonicalize) {}
ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); }
@@ -143,14 +144,12 @@ public:
return Root;
}
- TreeTy *getRootWithoutRetain() const {
- return Root;
- }
-
+ TreeTy *getRootWithoutRetain() const { return Root; }
+
void manualRetain() {
if (Root) Root->retain();
}
-
+
void manualRelease() {
if (Root) Root->release();
}
@@ -224,7 +223,7 @@ public:
return nullptr;
}
-
+
/// getMaxElement - Returns the <key,value> pair in the ImmutableMap for
/// which key is the highest in the ordering of keys in the map. This
/// method returns NULL if the map is empty.
@@ -260,20 +259,21 @@ public:
typedef typename ValInfo::data_type_ref data_type_ref;
typedef ImutAVLTree<ValInfo> TreeTy;
typedef typename TreeTy::Factory FactoryTy;
-
+
protected:
TreeTy *Root;
FactoryTy *Factory;
-
+
public:
/// Constructs a map from a pointer to a tree root. In general one
/// should use a Factory object to create maps instead of directly
/// invoking the constructor, but there are cases where make this
/// constructor public is useful.
- explicit ImmutableMapRef(const TreeTy* R, FactoryTy *F)
- : Root(const_cast<TreeTy*>(R)),
- Factory(F) {
- if (Root) { Root->retain(); }
+ explicit ImmutableMapRef(const TreeTy *R, FactoryTy *F)
+ : Root(const_cast<TreeTy *>(R)), Factory(F) {
+ if (Root) {
+ Root->retain();
+ }
}
explicit ImmutableMapRef(const ImmutableMap<KeyT, ValT> &X,
@@ -282,21 +282,21 @@ public:
Factory(F.getTreeFactory()) {
if (Root) { Root->retain(); }
}
-
- ImmutableMapRef(const ImmutableMapRef &X)
- : Root(X.Root),
- Factory(X.Factory) {
- if (Root) { Root->retain(); }
+
+ ImmutableMapRef(const ImmutableMapRef &X) : Root(X.Root), Factory(X.Factory) {
+ if (Root) {
+ Root->retain();
+ }
}
ImmutableMapRef &operator=(const ImmutableMapRef &X) {
if (Root != X.Root) {
if (X.Root)
X.Root->retain();
-
+
if (Root)
Root->release();
-
+
Root = X.Root;
Factory = X.Factory;
}
@@ -307,7 +307,7 @@ public:
if (Root)
Root->release();
}
-
+
static inline ImmutableMapRef getEmptyMap(FactoryTy *F) {
return ImmutableMapRef(0, F);
}
@@ -329,31 +329,34 @@ public:
TreeTy *NewT = Factory->remove(Root, K);
return ImmutableMapRef(NewT, Factory);
}
-
+
bool contains(key_type_ref K) const {
return Root ? Root->contains(K) : false;
}
-
+
ImmutableMap<KeyT, ValT> asImmutableMap() const {
return ImmutableMap<KeyT, ValT>(Factory->getCanonicalTree(Root));
}
-
+
bool operator==(const ImmutableMapRef &RHS) const {
return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root;
}
-
+
bool operator!=(const ImmutableMapRef &RHS) const {
return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
}
-
+
bool isEmpty() const { return !Root; }
-
+
//===--------------------------------------------------===//
// For testing.
//===--------------------------------------------------===//
-
- void verify() const { if (Root) Root->verify(); }
-
+
+ void verify() const {
+ if (Root)
+ Root->verify();
+ }
+
//===--------------------------------------------------===//
// Iterators.
//===--------------------------------------------------===//
@@ -370,38 +373,36 @@ public:
iterator begin() const { return iterator(Root); }
iterator end() const { return iterator(); }
-
- data_type* lookup(key_type_ref K) const {
+
+ data_type *lookup(key_type_ref K) const {
if (Root) {
TreeTy* T = Root->find(K);
if (T) return &T->getValue().second;
}
-
- return 0;
+
+ return nullptr;
}
-
+
/// getMaxElement - Returns the <key,value> pair in the ImmutableMap for
/// which key is the highest in the ordering of keys in the map. This
/// method returns NULL if the map is empty.
value_type* getMaxElement() const {
return Root ? &(Root->getMaxElement()->getValue()) : 0;
}
-
+
//===--------------------------------------------------===//
// Utility methods.
//===--------------------------------------------------===//
-
+
unsigned getHeight() const { return Root ? Root->getHeight() : 0; }
-
- static inline void Profile(FoldingSetNodeID& ID, const ImmutableMapRef &M) {
+
+ static inline void Profile(FoldingSetNodeID &ID, const ImmutableMapRef &M) {
ID.AddPointer(M.Root);
}
-
- inline void Profile(FoldingSetNodeID& ID) const {
- return Profile(ID, *this);
- }
+
+ inline void Profile(FoldingSetNodeID &ID) const { return Profile(ID, *this); }
};
-
+
} // end namespace llvm
-#endif
+#endif // LLVM_ADT_IMMUTABLEMAP_H
diff --git a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 65b2da7..8057ec1 100644
--- a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -154,7 +154,7 @@ public:
template <class X>
IntrusiveRefCntPtr(IntrusiveRefCntPtr<X>&& S) : Obj(S.get()) {
- S.Obj = 0;
+ S.Obj = nullptr;
}
template <class X>
@@ -190,7 +190,7 @@ public:
}
void resetWithoutRelease() {
- Obj = 0;
+ Obj = nullptr;
}
private:
diff --git a/contrib/llvm/include/llvm/ADT/Optional.h b/contrib/llvm/include/llvm/ADT/Optional.h
index 855ab89..d9acaf6 100644
--- a/contrib/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm/include/llvm/ADT/Optional.h
@@ -159,6 +159,25 @@ template <typename T> struct isPodLike<Optional<T> > {
template<typename T, typename U>
void operator==(const Optional<T> &X, const Optional<U> &Y);
+template<typename T>
+bool operator==(const Optional<T> &X, NoneType) {
+ return !X.hasValue();
+}
+
+template<typename T>
+bool operator==(NoneType, const Optional<T> &X) {
+ return X == None;
+}
+
+template<typename T>
+bool operator!=(const Optional<T> &X, NoneType) {
+ return !(X == None);
+}
+
+template<typename T>
+bool operator!=(NoneType, const Optional<T> &X) {
+ return X != None;
+}
/// \brief Poison comparison between two \c Optional objects. Clients needs to
/// explicitly compare the underlying values and account for empty \c Optional
/// objects.
diff --git a/contrib/llvm/include/llvm/ADT/PackedVector.h b/contrib/llvm/include/llvm/ADT/PackedVector.h
index 1ae2a77..0926717 100644
--- a/contrib/llvm/include/llvm/ADT/PackedVector.h
+++ b/contrib/llvm/include/llvm/ADT/PackedVector.h
@@ -83,9 +83,9 @@ public:
PackedVector &Vec;
const unsigned Idx;
- reference(); // Undefined
+ reference(); // Undefined
public:
- reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) { }
+ reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) {}
reference &operator=(T val) {
Vec.setValue(Vec.Bits, Idx, val);
@@ -96,16 +96,16 @@ public:
}
};
- PackedVector() { }
+ PackedVector() = default;
explicit PackedVector(unsigned size) : Bits(size << (BitNum-1)) { }
bool empty() const { return Bits.empty(); }
- unsigned size() const { return Bits.size() >> (BitNum-1); }
-
+ unsigned size() const { return Bits.size() >> (BitNum - 1); }
+
void clear() { Bits.clear(); }
-
- void resize(unsigned N) { Bits.resize(N << (BitNum-1)); }
+
+ void resize(unsigned N) { Bits.resize(N << (BitNum - 1)); }
void reserve(unsigned N) { Bits.reserve(N << (BitNum-1)); }
@@ -135,24 +135,14 @@ public:
return Bits != RHS.Bits;
}
- const PackedVector &operator=(const PackedVector &RHS) {
- Bits = RHS.Bits;
- return *this;
- }
-
PackedVector &operator|=(const PackedVector &RHS) {
Bits |= RHS.Bits;
return *this;
}
-
- void swap(PackedVector &RHS) {
- Bits.swap(RHS.Bits);
- }
};
-// Leave BitNum=0 undefined.
-template <typename T>
-class PackedVector<T, 0>;
+// Leave BitNum=0 undefined.
+template <typename T> class PackedVector<T, 0>;
} // end llvm namespace
diff --git a/contrib/llvm/include/llvm/ADT/PointerIntPair.h b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
index 45a40db..0058d85 100644
--- a/contrib/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
@@ -21,8 +21,10 @@
namespace llvm {
-template<typename T>
-struct DenseMapInfo;
+template <typename T> struct DenseMapInfo;
+
+template <typename PointerT, unsigned IntBits, typename PtrTraits>
+struct PointerIntPairInfo;
/// PointerIntPair - This class implements a pair of a pointer and small
/// integer. It is designed to represent this in the space required by one
@@ -38,83 +40,35 @@ struct DenseMapInfo;
/// PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool>
/// ... and the two bools will land in different bits.
///
-template <typename PointerTy, unsigned IntBits, typename IntType=unsigned,
- typename PtrTraits = PointerLikeTypeTraits<PointerTy> >
+template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
+ typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
+ typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
class PointerIntPair {
intptr_t Value;
- static_assert(PtrTraits::NumLowBitsAvailable <
- std::numeric_limits<uintptr_t>::digits,
- "cannot use a pointer type that has all bits free");
- static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
- "PointerIntPair with integer size too large for pointer");
- enum : uintptr_t {
- /// PointerBitMask - The bits that come from the pointer.
- PointerBitMask =
- ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable)-1),
- /// IntShift - The number of low bits that we reserve for other uses, and
- /// keep zero.
- IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable-IntBits,
-
- /// IntMask - This is the unshifted mask for valid bits of the int type.
- IntMask = (uintptr_t)(((intptr_t)1 << IntBits)-1),
-
- // ShiftedIntMask - This is the bits for the integer shifted in place.
- ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
- };
public:
PointerIntPair() : Value(0) {}
PointerIntPair(PointerTy PtrVal, IntType IntVal) {
setPointerAndInt(PtrVal, IntVal);
}
- explicit PointerIntPair(PointerTy PtrVal) {
- initWithPointer(PtrVal);
- }
+ explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); }
- PointerTy getPointer() const {
- return PtrTraits::getFromVoidPointer(
- reinterpret_cast<void*>(Value & PointerBitMask));
- }
+ PointerTy getPointer() const { return Info::getPointer(Value); }
- IntType getInt() const {
- return (IntType)((Value >> IntShift) & IntMask);
- }
+ IntType getInt() const { return (IntType)Info::getInt(Value); }
void setPointer(PointerTy PtrVal) {
- intptr_t PtrWord
- = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
- assert((PtrWord & ~PointerBitMask) == 0 &&
- "Pointer is not sufficiently aligned");
- // Preserve all low bits, just update the pointer.
- Value = PtrWord | (Value & ~PointerBitMask);
+ Value = Info::updatePointer(Value, PtrVal);
}
- void setInt(IntType IntVal) {
- intptr_t IntWord = static_cast<intptr_t>(IntVal);
- assert((IntWord & ~IntMask) == 0 && "Integer too large for field");
-
- // Preserve all bits other than the ones we are updating.
- Value &= ~ShiftedIntMask; // Remove integer field.
- Value |= IntWord << IntShift; // Set new integer.
- }
+ void setInt(IntType IntVal) { Value = Info::updateInt(Value, IntVal); }
void initWithPointer(PointerTy PtrVal) {
- intptr_t PtrWord
- = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
- assert((PtrWord & ~PointerBitMask) == 0 &&
- "Pointer is not sufficiently aligned");
- Value = PtrWord;
+ Value = Info::updatePointer(0, PtrVal);
}
void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
- intptr_t PtrWord
- = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
- assert((PtrWord & ~PointerBitMask) == 0 &&
- "Pointer is not sufficiently aligned");
- intptr_t IntWord = static_cast<intptr_t>(IntVal);
- assert((IntWord & ~IntMask) == 0 && "Integer too large for field");
-
- Value = PtrWord | (IntWord << IntShift);
+ Value = Info::updateInt(Info::updatePointer(0, PtrVal), IntVal);
}
PointerTy const *getAddrOfPointer() const {
@@ -128,11 +82,15 @@ public:
return reinterpret_cast<PointerTy *>(&Value);
}
- void *getOpaqueValue() const { return reinterpret_cast<void*>(Value); }
- void setFromOpaqueValue(void *Val) { Value = reinterpret_cast<intptr_t>(Val);}
+ void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
+ void setFromOpaqueValue(void *Val) {
+ Value = reinterpret_cast<intptr_t>(Val);
+ }
static PointerIntPair getFromOpaqueValue(void *V) {
- PointerIntPair P; P.setFromOpaqueValue(V); return P;
+ PointerIntPair P;
+ P.setFromOpaqueValue(V);
+ return P;
}
// Allow PointerIntPairs to be created from const void * if and only if the
@@ -142,23 +100,81 @@ public:
return getFromOpaqueValue(const_cast<void *>(V));
}
- bool operator==(const PointerIntPair &RHS) const {return Value == RHS.Value;}
- bool operator!=(const PointerIntPair &RHS) const {return Value != RHS.Value;}
- bool operator<(const PointerIntPair &RHS) const {return Value < RHS.Value;}
- bool operator>(const PointerIntPair &RHS) const {return Value > RHS.Value;}
- bool operator<=(const PointerIntPair &RHS) const {return Value <= RHS.Value;}
- bool operator>=(const PointerIntPair &RHS) const {return Value >= RHS.Value;}
+ bool operator==(const PointerIntPair &RHS) const {
+ return Value == RHS.Value;
+ }
+ bool operator!=(const PointerIntPair &RHS) const {
+ return Value != RHS.Value;
+ }
+ bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; }
+ bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; }
+ bool operator<=(const PointerIntPair &RHS) const {
+ return Value <= RHS.Value;
+ }
+ bool operator>=(const PointerIntPair &RHS) const {
+ return Value >= RHS.Value;
+ }
+};
+
+template <typename PointerT, unsigned IntBits, typename PtrTraits>
+struct PointerIntPairInfo {
+ static_assert(PtrTraits::NumLowBitsAvailable <
+ std::numeric_limits<uintptr_t>::digits,
+ "cannot use a pointer type that has all bits free");
+ static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
+ "PointerIntPair with integer size too large for pointer");
+ enum : uintptr_t {
+ /// PointerBitMask - The bits that come from the pointer.
+ PointerBitMask =
+ ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1),
+
+ /// IntShift - The number of low bits that we reserve for other uses, and
+ /// keep zero.
+ IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits,
+
+ /// IntMask - This is the unshifted mask for valid bits of the int type.
+ IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1),
+
+ // ShiftedIntMask - This is the bits for the integer shifted in place.
+ ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
+ };
+
+ static PointerT getPointer(intptr_t Value) {
+ return PtrTraits::getFromVoidPointer(
+ reinterpret_cast<void *>(Value & PointerBitMask));
+ }
+
+ static intptr_t getInt(intptr_t Value) {
+ return (Value >> IntShift) & IntMask;
+ }
+
+ static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) {
+ intptr_t PtrWord =
+ reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+ assert((PtrWord & ~PointerBitMask) == 0 &&
+ "Pointer is not sufficiently aligned");
+ // Preserve all low bits, just update the pointer.
+ return PtrWord | (OrigValue & ~PointerBitMask);
+ }
+
+ static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) {
+ intptr_t IntWord = static_cast<intptr_t>(Int);
+ assert((IntWord & ~IntMask) == 0 && "Integer too large for field");
+
+ // Preserve all bits other than the ones we are updating.
+ return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift;
+ }
};
template <typename T> struct isPodLike;
-template<typename PointerTy, unsigned IntBits, typename IntType>
-struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType> > {
- static const bool value = true;
+template <typename PointerTy, unsigned IntBits, typename IntType>
+struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType>> {
+ static const bool value = true;
};
-
+
// Provide specialization of DenseMapInfo for PointerIntPair.
-template<typename PointerTy, unsigned IntBits, typename IntType>
-struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
+template <typename PointerTy, unsigned IntBits, typename IntType>
+struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
typedef PointerIntPair<PointerTy, IntBits, IntType> Ty;
static Ty getEmptyKey() {
uintptr_t Val = static_cast<uintptr_t>(-1);
@@ -178,10 +194,10 @@ struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
};
// Teach SmallPtrSet that PointerIntPair is "basically a pointer".
-template<typename PointerTy, unsigned IntBits, typename IntType,
- typename PtrTraits>
-class PointerLikeTypeTraits<PointerIntPair<PointerTy, IntBits, IntType,
- PtrTraits> > {
+template <typename PointerTy, unsigned IntBits, typename IntType,
+ typename PtrTraits>
+class PointerLikeTypeTraits<
+ PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> {
public:
static inline void *
getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) {
@@ -195,9 +211,7 @@ public:
getFromVoidPointer(const void *P) {
return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
}
- enum {
- NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits
- };
+ enum { NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits };
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ADT/PointerUnion.h b/contrib/llvm/include/llvm/ADT/PointerUnion.h
index f27b811..6b3fe57 100644
--- a/contrib/llvm/include/llvm/ADT/PointerUnion.h
+++ b/contrib/llvm/include/llvm/ADT/PointerUnion.h
@@ -21,492 +21,454 @@
namespace llvm {
- template <typename T>
- struct PointerUnionTypeSelectorReturn {
- typedef T Return;
+template <typename T> struct PointerUnionTypeSelectorReturn {
+ typedef T Return;
+};
+
+/// Get a type based on whether two types are the same or not.
+///
+/// For:
+///
+/// \code
+/// typedef typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return Ret;
+/// \endcode
+///
+/// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
+template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+struct PointerUnionTypeSelector {
+ typedef typename PointerUnionTypeSelectorReturn<RET_NE>::Return Return;
+};
+
+template <typename T, typename RET_EQ, typename RET_NE>
+struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
+ typedef typename PointerUnionTypeSelectorReturn<RET_EQ>::Return Return;
+};
+
+template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+struct PointerUnionTypeSelectorReturn<
+ PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>> {
+ typedef
+ typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return Return;
+};
+
+/// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
+/// for the two template arguments.
+template <typename PT1, typename PT2> class PointerUnionUIntTraits {
+public:
+ static inline void *getAsVoidPointer(void *P) { return P; }
+ static inline void *getFromVoidPointer(void *P) { return P; }
+ enum {
+ PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
+ PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
+ NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
};
-
- /// \brief Get a type based on whether two types are the same or not. For:
- /// @code
- /// typedef typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return Ret;
- /// @endcode
- /// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
- template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
- struct PointerUnionTypeSelector {
- typedef typename PointerUnionTypeSelectorReturn<RET_NE>::Return Return;
- };
-
- template <typename T, typename RET_EQ, typename RET_NE>
- struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
- typedef typename PointerUnionTypeSelectorReturn<RET_EQ>::Return Return;
- };
-
- template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
- struct PointerUnionTypeSelectorReturn<
- PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE> > {
- typedef typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return
- Return;
+};
+
+/// A discriminated union of two pointer types, with the discriminator in the
+/// low bit of the pointer.
+///
+/// This implementation is extremely efficient in space due to leveraging the
+/// low bits of the pointer, while exposing a natural and type-safe API.
+///
+/// Common use patterns would be something like this:
+/// PointerUnion<int*, float*> P;
+/// P = (int*)0;
+/// printf("%d %d", P.is<int*>(), P.is<float*>()); // prints "1 0"
+/// X = P.get<int*>(); // ok.
+/// Y = P.get<float*>(); // runtime assertion failure.
+/// Z = P.get<double*>(); // compile time failure.
+/// P = (float*)0;
+/// Y = P.get<float*>(); // ok.
+/// X = P.get<int*>(); // runtime assertion failure.
+template <typename PT1, typename PT2> class PointerUnion {
+public:
+ typedef PointerIntPair<void *, 1, bool, PointerUnionUIntTraits<PT1, PT2>>
+ ValTy;
+
+private:
+ ValTy Val;
+
+ struct IsPT1 {
+ static const int Num = 0;
};
-
- /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
- /// for the two template arguments.
- template <typename PT1, typename PT2>
- class PointerUnionUIntTraits {
- public:
- static inline void *getAsVoidPointer(void *P) { return P; }
- static inline void *getFromVoidPointer(void *P) { return P; }
- enum {
- PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
- PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
- NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
- };
+ struct IsPT2 {
+ static const int Num = 1;
};
+ template <typename T> struct UNION_DOESNT_CONTAIN_TYPE {};
+
+public:
+ PointerUnion() {}
+
+ PointerUnion(PT1 V)
+ : Val(const_cast<void *>(
+ PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {}
+ PointerUnion(PT2 V)
+ : Val(const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)),
+ 1) {}
+
+ /// Test if the pointer held in the union is null, regardless of
+ /// which type it is.
+ bool isNull() const {
+ // Convert from the void* to one of the pointer types, to make sure that
+ // we recursively strip off low bits if we have a nested PointerUnion.
+ return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer());
+ }
+ explicit operator bool() const { return !isNull(); }
+
+ /// Test if the Union currently holds the type matching T.
+ template <typename T> int is() const {
+ typedef typename ::llvm::PointerUnionTypeSelector<
+ PT1, T, IsPT1, ::llvm::PointerUnionTypeSelector<
+ PT2, T, IsPT2, UNION_DOESNT_CONTAIN_TYPE<T>>>::Return
+ Ty;
+ int TyNo = Ty::Num;
+ return static_cast<int>(Val.getInt()) == TyNo;
+ }
- /// PointerUnion - This implements a discriminated union of two pointer types,
- /// and keeps the discriminator bit-mangled into the low bits of the pointer.
- /// This allows the implementation to be extremely efficient in space, but
- /// permits a very natural and type-safe API.
+ /// Returns the value of the specified pointer type.
///
- /// Common use patterns would be something like this:
- /// PointerUnion<int*, float*> P;
- /// P = (int*)0;
- /// printf("%d %d", P.is<int*>(), P.is<float*>()); // prints "1 0"
- /// X = P.get<int*>(); // ok.
- /// Y = P.get<float*>(); // runtime assertion failure.
- /// Z = P.get<double*>(); // compile time failure.
- /// P = (float*)0;
- /// Y = P.get<float*>(); // ok.
- /// X = P.get<int*>(); // runtime assertion failure.
- template <typename PT1, typename PT2>
- class PointerUnion {
- public:
- typedef PointerIntPair<void*, 1, bool,
- PointerUnionUIntTraits<PT1,PT2> > ValTy;
- private:
- ValTy Val;
-
- struct IsPT1 {
- static const int Num = 0;
- };
- struct IsPT2 {
- static const int Num = 1;
- };
- template <typename T>
- struct UNION_DOESNT_CONTAIN_TYPE { };
-
- public:
- PointerUnion() {}
-
- PointerUnion(PT1 V) : Val(
- const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {
- }
- PointerUnion(PT2 V) : Val(
- const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)), 1) {
- }
-
- /// isNull - Return true if the pointer held in the union is null,
- /// regardless of which type it is.
- bool isNull() const {
- // Convert from the void* to one of the pointer types, to make sure that
- // we recursively strip off low bits if we have a nested PointerUnion.
- return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer());
- }
- explicit operator bool() const { return !isNull(); }
-
- /// is<T>() return true if the Union currently holds the type matching T.
- template<typename T>
- int is() const {
- typedef typename
- ::llvm::PointerUnionTypeSelector<PT1, T, IsPT1,
- ::llvm::PointerUnionTypeSelector<PT2, T, IsPT2,
- UNION_DOESNT_CONTAIN_TYPE<T> > >::Return Ty;
- int TyNo = Ty::Num;
- return static_cast<int>(Val.getInt()) == TyNo;
- }
-
- /// get<T>() - Return the value of the specified pointer type. If the
- /// specified pointer type is incorrect, assert.
- template<typename T>
- T get() const {
- assert(is<T>() && "Invalid accessor called");
- return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
- }
+ /// If the specified pointer type is incorrect, assert.
+ template <typename T> T get() const {
+ assert(is<T>() && "Invalid accessor called");
+ return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
+ }
- /// dyn_cast<T>() - If the current value is of the specified pointer type,
- /// return it, otherwise return null.
- template<typename T>
- T dyn_cast() const {
- if (is<T>()) return get<T>();
- return T();
- }
+ /// Returns the current pointer if it is of the specified pointer type,
+ /// otherwises returns null.
+ template <typename T> T dyn_cast() const {
+ if (is<T>())
+ return get<T>();
+ return T();
+ }
- /// \brief If the union is set to the first pointer type get an address
- /// pointing to it.
- PT1 const *getAddrOfPtr1() const {
- return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
- }
+ /// If the union is set to the first pointer type get an address pointing to
+ /// it.
+ PT1 const *getAddrOfPtr1() const {
+ return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
+ }
- /// \brief If the union is set to the first pointer type get an address
- /// pointing to it.
- PT1 *getAddrOfPtr1() {
- assert(is<PT1>() && "Val is not the first pointer");
- assert(get<PT1>() == Val.getPointer() &&
- "Can't get the address because PointerLikeTypeTraits changes the ptr");
- return (PT1 *)Val.getAddrOfPointer();
- }
+ /// If the union is set to the first pointer type get an address pointing to
+ /// it.
+ PT1 *getAddrOfPtr1() {
+ assert(is<PT1>() && "Val is not the first pointer");
+ assert(
+ get<PT1>() == Val.getPointer() &&
+ "Can't get the address because PointerLikeTypeTraits changes the ptr");
+ return (PT1 *)Val.getAddrOfPointer();
+ }
- /// \brief Assignment from nullptr which just clears the union.
- const PointerUnion &operator=(std::nullptr_t) {
- Val.initWithPointer(nullptr);
- return *this;
- }
+ /// Assignment from nullptr which just clears the union.
+ const PointerUnion &operator=(std::nullptr_t) {
+ Val.initWithPointer(nullptr);
+ return *this;
+ }
- /// Assignment operators - Allow assigning into this union from either
- /// pointer type, setting the discriminator to remember what it came from.
- const PointerUnion &operator=(const PT1 &RHS) {
- Val.initWithPointer(
- const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
- return *this;
- }
- const PointerUnion &operator=(const PT2 &RHS) {
- Val.setPointerAndInt(
+ /// Assignment operators - Allow assigning into this union from either
+ /// pointer type, setting the discriminator to remember what it came from.
+ const PointerUnion &operator=(const PT1 &RHS) {
+ Val.initWithPointer(
+ const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
+ return *this;
+ }
+ const PointerUnion &operator=(const PT2 &RHS) {
+ Val.setPointerAndInt(
const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)),
1);
- return *this;
- }
-
- void *getOpaqueValue() const { return Val.getOpaqueValue(); }
- static inline PointerUnion getFromOpaqueValue(void *VP) {
- PointerUnion V;
- V.Val = ValTy::getFromOpaqueValue(VP);
- return V;
- }
- };
-
- template<typename PT1, typename PT2>
- static bool operator==(PointerUnion<PT1, PT2> lhs,
- PointerUnion<PT1, PT2> rhs) {
- return lhs.getOpaqueValue() == rhs.getOpaqueValue();
+ return *this;
}
- template<typename PT1, typename PT2>
- static bool operator!=(PointerUnion<PT1, PT2> lhs,
- PointerUnion<PT1, PT2> rhs) {
- return lhs.getOpaqueValue() != rhs.getOpaqueValue();
+ void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+ static inline PointerUnion getFromOpaqueValue(void *VP) {
+ PointerUnion V;
+ V.Val = ValTy::getFromOpaqueValue(VP);
+ return V;
}
+};
- template<typename PT1, typename PT2>
- static bool operator<(PointerUnion<PT1, PT2> lhs,
- PointerUnion<PT1, PT2> rhs) {
- return lhs.getOpaqueValue() < rhs.getOpaqueValue();
- }
+template <typename PT1, typename PT2>
+static bool operator==(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+ return lhs.getOpaqueValue() == rhs.getOpaqueValue();
+}
- // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
- // # low bits available = min(PT1bits,PT2bits)-1.
- template<typename PT1, typename PT2>
- class PointerLikeTypeTraits<PointerUnion<PT1, PT2> > {
- public:
- static inline void *
- getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
- return P.getOpaqueValue();
- }
- static inline PointerUnion<PT1, PT2>
- getFromVoidPointer(void *P) {
- return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
- }
+template <typename PT1, typename PT2>
+static bool operator!=(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+ return lhs.getOpaqueValue() != rhs.getOpaqueValue();
+}
+
+template <typename PT1, typename PT2>
+static bool operator<(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+ return lhs.getOpaqueValue() < rhs.getOpaqueValue();
+}
+
+// Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
+// # low bits available = min(PT1bits,PT2bits)-1.
+template <typename PT1, typename PT2>
+class PointerLikeTypeTraits<PointerUnion<PT1, PT2>> {
+public:
+ static inline void *getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
+ return P.getOpaqueValue();
+ }
+ static inline PointerUnion<PT1, PT2> getFromVoidPointer(void *P) {
+ return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
+ }
- // The number of bits available are the min of the two pointer types.
- enum {
- NumLowBitsAvailable =
- PointerLikeTypeTraits<typename PointerUnion<PT1,PT2>::ValTy>
- ::NumLowBitsAvailable
- };
+ // The number of bits available are the min of the two pointer types.
+ enum {
+ NumLowBitsAvailable = PointerLikeTypeTraits<
+ typename PointerUnion<PT1, PT2>::ValTy>::NumLowBitsAvailable
};
+};
+/// A pointer union of three pointer types. See documentation for PointerUnion
+/// for usage.
+template <typename PT1, typename PT2, typename PT3> class PointerUnion3 {
+public:
+ typedef PointerUnion<PT1, PT2> InnerUnion;
+ typedef PointerUnion<InnerUnion, PT3> ValTy;
- /// PointerUnion3 - This is a pointer union of three pointer types. See
- /// documentation for PointerUnion for usage.
- template <typename PT1, typename PT2, typename PT3>
- class PointerUnion3 {
- public:
- typedef PointerUnion<PT1, PT2> InnerUnion;
- typedef PointerUnion<InnerUnion, PT3> ValTy;
- private:
- ValTy Val;
+private:
+ ValTy Val;
- struct IsInnerUnion {
- ValTy Val;
- IsInnerUnion(ValTy val) : Val(val) { }
- template<typename T>
- int is() const {
- return Val.template is<InnerUnion>() &&
- Val.template get<InnerUnion>().template is<T>();
- }
- template<typename T>
- T get() const {
- return Val.template get<InnerUnion>().template get<T>();
- }
- };
-
- struct IsPT3 {
- ValTy Val;
- IsPT3(ValTy val) : Val(val) { }
- template<typename T>
- int is() const {
- return Val.template is<T>();
- }
- template<typename T>
- T get() const {
- return Val.template get<T>();
- }
- };
-
- public:
- PointerUnion3() {}
-
- PointerUnion3(PT1 V) {
- Val = InnerUnion(V);
- }
- PointerUnion3(PT2 V) {
- Val = InnerUnion(V);
+ struct IsInnerUnion {
+ ValTy Val;
+ IsInnerUnion(ValTy val) : Val(val) {}
+ template <typename T> int is() const {
+ return Val.template is<InnerUnion>() &&
+ Val.template get<InnerUnion>().template is<T>();
}
- PointerUnion3(PT3 V) {
- Val = V;
+ template <typename T> T get() const {
+ return Val.template get<InnerUnion>().template get<T>();
}
+ };
- /// isNull - Return true if the pointer held in the union is null,
- /// regardless of which type it is.
- bool isNull() const { return Val.isNull(); }
- explicit operator bool() const { return !isNull(); }
-
- /// is<T>() return true if the Union currently holds the type matching T.
- template<typename T>
- int is() const {
- // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
- typedef typename
- ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
- ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
- >::Return Ty;
- return Ty(Val).template is<T>();
- }
+ struct IsPT3 {
+ ValTy Val;
+ IsPT3(ValTy val) : Val(val) {}
+ template <typename T> int is() const { return Val.template is<T>(); }
+ template <typename T> T get() const { return Val.template get<T>(); }
+ };
- /// get<T>() - Return the value of the specified pointer type. If the
- /// specified pointer type is incorrect, assert.
- template<typename T>
- T get() const {
- assert(is<T>() && "Invalid accessor called");
- // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
- typedef typename
- ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
- ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
- >::Return Ty;
- return Ty(Val).template get<T>();
- }
+public:
+ PointerUnion3() {}
+
+ PointerUnion3(PT1 V) { Val = InnerUnion(V); }
+ PointerUnion3(PT2 V) { Val = InnerUnion(V); }
+ PointerUnion3(PT3 V) { Val = V; }
+
+ /// Test if the pointer held in the union is null, regardless of
+ /// which type it is.
+ bool isNull() const { return Val.isNull(); }
+ explicit operator bool() const { return !isNull(); }
+
+ /// Test if the Union currently holds the type matching T.
+ template <typename T> int is() const {
+ // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+ typedef typename ::llvm::PointerUnionTypeSelector<
+ PT1, T, IsInnerUnion,
+ ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return
+ Ty;
+ return Ty(Val).template is<T>();
+ }
- /// dyn_cast<T>() - If the current value is of the specified pointer type,
- /// return it, otherwise return null.
- template<typename T>
- T dyn_cast() const {
- if (is<T>()) return get<T>();
- return T();
- }
+ /// Returns the value of the specified pointer type.
+ ///
+ /// If the specified pointer type is incorrect, assert.
+ template <typename T> T get() const {
+ assert(is<T>() && "Invalid accessor called");
+ // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+ typedef typename ::llvm::PointerUnionTypeSelector<
+ PT1, T, IsInnerUnion,
+ ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return
+ Ty;
+ return Ty(Val).template get<T>();
+ }
- /// \brief Assignment from nullptr which just clears the union.
- const PointerUnion3 &operator=(std::nullptr_t) {
- Val = nullptr;
- return *this;
- }
+ /// Returns the current pointer if it is of the specified pointer type,
+ /// otherwises returns null.
+ template <typename T> T dyn_cast() const {
+ if (is<T>())
+ return get<T>();
+ return T();
+ }
- /// Assignment operators - Allow assigning into this union from either
- /// pointer type, setting the discriminator to remember what it came from.
- const PointerUnion3 &operator=(const PT1 &RHS) {
- Val = InnerUnion(RHS);
- return *this;
- }
- const PointerUnion3 &operator=(const PT2 &RHS) {
- Val = InnerUnion(RHS);
- return *this;
- }
- const PointerUnion3 &operator=(const PT3 &RHS) {
- Val = RHS;
- return *this;
- }
+ /// Assignment from nullptr which just clears the union.
+ const PointerUnion3 &operator=(std::nullptr_t) {
+ Val = nullptr;
+ return *this;
+ }
- void *getOpaqueValue() const { return Val.getOpaqueValue(); }
- static inline PointerUnion3 getFromOpaqueValue(void *VP) {
- PointerUnion3 V;
- V.Val = ValTy::getFromOpaqueValue(VP);
- return V;
- }
- };
+ /// Assignment operators - Allow assigning into this union from either
+ /// pointer type, setting the discriminator to remember what it came from.
+ const PointerUnion3 &operator=(const PT1 &RHS) {
+ Val = InnerUnion(RHS);
+ return *this;
+ }
+ const PointerUnion3 &operator=(const PT2 &RHS) {
+ Val = InnerUnion(RHS);
+ return *this;
+ }
+ const PointerUnion3 &operator=(const PT3 &RHS) {
+ Val = RHS;
+ return *this;
+ }
- // Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
- // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
- template<typename PT1, typename PT2, typename PT3>
- class PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3> > {
- public:
- static inline void *
- getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
- return P.getOpaqueValue();
- }
- static inline PointerUnion3<PT1, PT2, PT3>
- getFromVoidPointer(void *P) {
- return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
- }
+ void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+ static inline PointerUnion3 getFromOpaqueValue(void *VP) {
+ PointerUnion3 V;
+ V.Val = ValTy::getFromOpaqueValue(VP);
+ return V;
+ }
+};
+
+// Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
+// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+template <typename PT1, typename PT2, typename PT3>
+class PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3>> {
+public:
+ static inline void *getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
+ return P.getOpaqueValue();
+ }
+ static inline PointerUnion3<PT1, PT2, PT3> getFromVoidPointer(void *P) {
+ return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
+ }
- // The number of bits available are the min of the two pointer types.
- enum {
- NumLowBitsAvailable =
- PointerLikeTypeTraits<typename PointerUnion3<PT1, PT2, PT3>::ValTy>
- ::NumLowBitsAvailable
- };
+ // The number of bits available are the min of the two pointer types.
+ enum {
+ NumLowBitsAvailable = PointerLikeTypeTraits<
+ typename PointerUnion3<PT1, PT2, PT3>::ValTy>::NumLowBitsAvailable
};
+};
+
+/// A pointer union of four pointer types. See documentation for PointerUnion
+/// for usage.
+template <typename PT1, typename PT2, typename PT3, typename PT4>
+class PointerUnion4 {
+public:
+ typedef PointerUnion<PT1, PT2> InnerUnion1;
+ typedef PointerUnion<PT3, PT4> InnerUnion2;
+ typedef PointerUnion<InnerUnion1, InnerUnion2> ValTy;
+
+private:
+ ValTy Val;
+
+public:
+ PointerUnion4() {}
+
+ PointerUnion4(PT1 V) { Val = InnerUnion1(V); }
+ PointerUnion4(PT2 V) { Val = InnerUnion1(V); }
+ PointerUnion4(PT3 V) { Val = InnerUnion2(V); }
+ PointerUnion4(PT4 V) { Val = InnerUnion2(V); }
+
+ /// Test if the pointer held in the union is null, regardless of
+ /// which type it is.
+ bool isNull() const { return Val.isNull(); }
+ explicit operator bool() const { return !isNull(); }
+
+ /// Test if the Union currently holds the type matching T.
+ template <typename T> int is() const {
+ // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+ typedef typename ::llvm::PointerUnionTypeSelector<
+ PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector<
+ PT2, T, InnerUnion1, InnerUnion2>>::Return Ty;
+ return Val.template is<Ty>() && Val.template get<Ty>().template is<T>();
+ }
- /// PointerUnion4 - This is a pointer union of four pointer types. See
- /// documentation for PointerUnion for usage.
- template <typename PT1, typename PT2, typename PT3, typename PT4>
- class PointerUnion4 {
- public:
- typedef PointerUnion<PT1, PT2> InnerUnion1;
- typedef PointerUnion<PT3, PT4> InnerUnion2;
- typedef PointerUnion<InnerUnion1, InnerUnion2> ValTy;
- private:
- ValTy Val;
- public:
- PointerUnion4() {}
-
- PointerUnion4(PT1 V) {
- Val = InnerUnion1(V);
- }
- PointerUnion4(PT2 V) {
- Val = InnerUnion1(V);
- }
- PointerUnion4(PT3 V) {
- Val = InnerUnion2(V);
- }
- PointerUnion4(PT4 V) {
- Val = InnerUnion2(V);
- }
-
- /// isNull - Return true if the pointer held in the union is null,
- /// regardless of which type it is.
- bool isNull() const { return Val.isNull(); }
- explicit operator bool() const { return !isNull(); }
-
- /// is<T>() return true if the Union currently holds the type matching T.
- template<typename T>
- int is() const {
- // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
- typedef typename
- ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
- ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
- >::Return Ty;
- return Val.template is<Ty>() &&
- Val.template get<Ty>().template is<T>();
- }
+ /// Returns the value of the specified pointer type.
+ ///
+ /// If the specified pointer type is incorrect, assert.
+ template <typename T> T get() const {
+ assert(is<T>() && "Invalid accessor called");
+ // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+ typedef typename ::llvm::PointerUnionTypeSelector<
+ PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector<
+ PT2, T, InnerUnion1, InnerUnion2>>::Return Ty;
+ return Val.template get<Ty>().template get<T>();
+ }
- /// get<T>() - Return the value of the specified pointer type. If the
- /// specified pointer type is incorrect, assert.
- template<typename T>
- T get() const {
- assert(is<T>() && "Invalid accessor called");
- // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
- typedef typename
- ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
- ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
- >::Return Ty;
- return Val.template get<Ty>().template get<T>();
- }
+ /// Returns the current pointer if it is of the specified pointer type,
+ /// otherwises returns null.
+ template <typename T> T dyn_cast() const {
+ if (is<T>())
+ return get<T>();
+ return T();
+ }
- /// dyn_cast<T>() - If the current value is of the specified pointer type,
- /// return it, otherwise return null.
- template<typename T>
- T dyn_cast() const {
- if (is<T>()) return get<T>();
- return T();
- }
+ /// Assignment from nullptr which just clears the union.
+ const PointerUnion4 &operator=(std::nullptr_t) {
+ Val = nullptr;
+ return *this;
+ }
- /// \brief Assignment from nullptr which just clears the union.
- const PointerUnion4 &operator=(std::nullptr_t) {
- Val = nullptr;
- return *this;
- }
+ /// Assignment operators - Allow assigning into this union from either
+ /// pointer type, setting the discriminator to remember what it came from.
+ const PointerUnion4 &operator=(const PT1 &RHS) {
+ Val = InnerUnion1(RHS);
+ return *this;
+ }
+ const PointerUnion4 &operator=(const PT2 &RHS) {
+ Val = InnerUnion1(RHS);
+ return *this;
+ }
+ const PointerUnion4 &operator=(const PT3 &RHS) {
+ Val = InnerUnion2(RHS);
+ return *this;
+ }
+ const PointerUnion4 &operator=(const PT4 &RHS) {
+ Val = InnerUnion2(RHS);
+ return *this;
+ }
- /// Assignment operators - Allow assigning into this union from either
- /// pointer type, setting the discriminator to remember what it came from.
- const PointerUnion4 &operator=(const PT1 &RHS) {
- Val = InnerUnion1(RHS);
- return *this;
- }
- const PointerUnion4 &operator=(const PT2 &RHS) {
- Val = InnerUnion1(RHS);
- return *this;
- }
- const PointerUnion4 &operator=(const PT3 &RHS) {
- Val = InnerUnion2(RHS);
- return *this;
- }
- const PointerUnion4 &operator=(const PT4 &RHS) {
- Val = InnerUnion2(RHS);
- return *this;
- }
+ void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+ static inline PointerUnion4 getFromOpaqueValue(void *VP) {
+ PointerUnion4 V;
+ V.Val = ValTy::getFromOpaqueValue(VP);
+ return V;
+ }
+};
+
+// Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
+// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+template <typename PT1, typename PT2, typename PT3, typename PT4>
+class PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4>> {
+public:
+ static inline void *
+ getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
+ return P.getOpaqueValue();
+ }
+ static inline PointerUnion4<PT1, PT2, PT3, PT4> getFromVoidPointer(void *P) {
+ return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
+ }
- void *getOpaqueValue() const { return Val.getOpaqueValue(); }
- static inline PointerUnion4 getFromOpaqueValue(void *VP) {
- PointerUnion4 V;
- V.Val = ValTy::getFromOpaqueValue(VP);
- return V;
- }
+ // The number of bits available are the min of the two pointer types.
+ enum {
+ NumLowBitsAvailable = PointerLikeTypeTraits<
+ typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>::NumLowBitsAvailable
};
+};
- // Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
- // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
- template<typename PT1, typename PT2, typename PT3, typename PT4>
- class PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4> > {
- public:
- static inline void *
- getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
- return P.getOpaqueValue();
- }
- static inline PointerUnion4<PT1, PT2, PT3, PT4>
- getFromVoidPointer(void *P) {
- return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
- }
+// Teach DenseMap how to use PointerUnions as keys.
+template <typename T, typename U> struct DenseMapInfo<PointerUnion<T, U>> {
+ typedef PointerUnion<T, U> Pair;
+ typedef DenseMapInfo<T> FirstInfo;
+ typedef DenseMapInfo<U> SecondInfo;
- // The number of bits available are the min of the two pointer types.
- enum {
- NumLowBitsAvailable =
- PointerLikeTypeTraits<typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>
- ::NumLowBitsAvailable
- };
- };
-
- // Teach DenseMap how to use PointerUnions as keys.
- template<typename T, typename U>
- struct DenseMapInfo<PointerUnion<T, U> > {
- typedef PointerUnion<T, U> Pair;
- typedef DenseMapInfo<T> FirstInfo;
- typedef DenseMapInfo<U> SecondInfo;
+ static inline Pair getEmptyKey() { return Pair(FirstInfo::getEmptyKey()); }
+ static inline Pair getTombstoneKey() {
+ return Pair(FirstInfo::getTombstoneKey());
+ }
+ static unsigned getHashValue(const Pair &PairVal) {
+ intptr_t key = (intptr_t)PairVal.getOpaqueValue();
+ return DenseMapInfo<intptr_t>::getHashValue(key);
+ }
+ static bool isEqual(const Pair &LHS, const Pair &RHS) {
+ return LHS.template is<T>() == RHS.template is<T>() &&
+ (LHS.template is<T>() ? FirstInfo::isEqual(LHS.template get<T>(),
+ RHS.template get<T>())
+ : SecondInfo::isEqual(LHS.template get<U>(),
+ RHS.template get<U>()));
+ }
+};
- static inline Pair getEmptyKey() {
- return Pair(FirstInfo::getEmptyKey());
- }
- static inline Pair getTombstoneKey() {
- return Pair(FirstInfo::getTombstoneKey());
- }
- static unsigned getHashValue(const Pair &PairVal) {
- intptr_t key = (intptr_t)PairVal.getOpaqueValue();
- return DenseMapInfo<intptr_t>::getHashValue(key);
- }
- static bool isEqual(const Pair &LHS, const Pair &RHS) {
- return LHS.template is<T>() == RHS.template is<T>() &&
- (LHS.template is<T>() ?
- FirstInfo::isEqual(LHS.template get<T>(),
- RHS.template get<T>()) :
- SecondInfo::isEqual(LHS.template get<U>(),
- RHS.template get<U>()));
- }
- };
}
#endif
diff --git a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
index 759a2db2..ce343a1 100644
--- a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -215,8 +215,8 @@ struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External > {
};
template <class T>
-ipo_iterator<T> ipo_begin(const T &G, bool Reverse = false) {
- return ipo_iterator<T>::begin(G, Reverse);
+ipo_iterator<T> ipo_begin(const T &G) {
+ return ipo_iterator<T>::begin(G);
}
template <class T>
@@ -225,8 +225,8 @@ ipo_iterator<T> ipo_end(const T &G){
}
template <class T>
-iterator_range<ipo_iterator<T>> inverse_post_order(const T &G, bool Reverse = false) {
- return make_range(ipo_begin(G, Reverse), ipo_end(G));
+iterator_range<ipo_iterator<T>> inverse_post_order(const T &G) {
+ return make_range(ipo_begin(G), ipo_end(G));
}
// Provide global definitions of external inverse postorder iterators...
diff --git a/contrib/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm/include/llvm/ADT/STLExtras.h
index b68345a..d4360fa 100644
--- a/contrib/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm/include/llvm/ADT/STLExtras.h
@@ -196,6 +196,41 @@ inline mapped_iterator<ItTy, FuncTy> map_iterator(const ItTy &I, FuncTy F) {
return mapped_iterator<ItTy, FuncTy>(I, F);
}
+/// \brief Metafunction to determine if type T has a member called rbegin().
+template <typename T> struct has_rbegin {
+ template <typename U> static char(&f(const U &, decltype(&U::rbegin)))[1];
+ static char(&f(...))[2];
+ const static bool value = sizeof(f(std::declval<T>(), nullptr)) == 1;
+};
+
+// Returns an iterator_range over the given container which iterates in reverse.
+// Note that the container must have rbegin()/rend() methods for this to work.
+template <typename ContainerTy>
+auto reverse(ContainerTy &&C,
+ typename std::enable_if<has_rbegin<ContainerTy>::value>::type * =
+ nullptr) -> decltype(make_range(C.rbegin(), C.rend())) {
+ return make_range(C.rbegin(), C.rend());
+}
+
+// Returns a std::reverse_iterator wrapped around the given iterator.
+template <typename IteratorTy>
+std::reverse_iterator<IteratorTy> make_reverse_iterator(IteratorTy It) {
+ return std::reverse_iterator<IteratorTy>(It);
+}
+
+// Returns an iterator_range over the given container which iterates in reverse.
+// Note that the container must have begin()/end() methods which return
+// bidirectional iterators for this to work.
+template <typename ContainerTy>
+auto reverse(
+ ContainerTy &&C,
+ typename std::enable_if<!has_rbegin<ContainerTy>::value>::type * = nullptr)
+ -> decltype(make_range(llvm::make_reverse_iterator(std::end(C)),
+ llvm::make_reverse_iterator(std::begin(C)))) {
+ return make_range(llvm::make_reverse_iterator(std::end(C)),
+ llvm::make_reverse_iterator(std::begin(C)));
+}
+
//===----------------------------------------------------------------------===//
// Extra additions to <utility>
//===----------------------------------------------------------------------===//
@@ -329,13 +364,28 @@ void DeleteContainerSeconds(Container &C) {
}
/// Provide wrappers to std::all_of which take ranges instead of having to pass
-/// being/end explicitly.
+/// begin/end explicitly.
template<typename R, class UnaryPredicate>
bool all_of(R &&Range, UnaryPredicate &&P) {
return std::all_of(Range.begin(), Range.end(),
std::forward<UnaryPredicate>(P));
}
+/// Provide wrappers to std::any_of which take ranges instead of having to pass
+/// begin/end explicitly.
+template <typename R, class UnaryPredicate>
+bool any_of(R &&Range, UnaryPredicate &&P) {
+ return std::any_of(Range.begin(), Range.end(),
+ std::forward<UnaryPredicate>(P));
+}
+
+/// Provide wrappers to std::find which take ranges instead of having to pass
+/// begin/end explicitly.
+template<typename R, class T>
+auto find(R &&Range, const T &val) -> decltype(Range.begin()) {
+ return std::find(Range.begin(), Range.end(), val);
+}
+
//===----------------------------------------------------------------------===//
// Extra additions to <memory>
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/ADT/ScopedHashTable.h b/contrib/llvm/include/llvm/ADT/ScopedHashTable.h
index 5abe76c..4af3d6d 100644
--- a/contrib/llvm/include/llvm/ADT/ScopedHashTable.h
+++ b/contrib/llvm/include/llvm/ADT/ScopedHashTable.h
@@ -1,4 +1,4 @@
-//===- ScopedHashTable.h - A simple scoped hash table ---------------------===//
+//===- ScopedHashTable.h - A simple scoped hash table -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -47,8 +47,8 @@ class ScopedHashTableVal {
K Key;
V Val;
ScopedHashTableVal(const K &key, const V &val) : Key(key), Val(val) {}
-public:
+public:
const K &getKey() const { return Key; }
const V &getValue() const { return Val; }
V &getValue() { return Val; }
@@ -56,7 +56,7 @@ public:
ScopedHashTableVal *getNextForKey() { return NextForKey; }
const ScopedHashTableVal *getNextForKey() const { return NextForKey; }
ScopedHashTableVal *getNextInScope() { return NextInScope; }
-
+
template <typename AllocatorTy>
static ScopedHashTableVal *Create(ScopedHashTableVal *nextInScope,
ScopedHashTableVal *nextForKey,
@@ -66,12 +66,11 @@ public:
// Set up the value.
new (New) ScopedHashTableVal(key, val);
New->NextInScope = nextInScope;
- New->NextForKey = nextForKey;
+ New->NextForKey = nextForKey;
return New;
}
-
- template <typename AllocatorTy>
- void Destroy(AllocatorTy &Allocator) {
+
+ template <typename AllocatorTy> void Destroy(AllocatorTy &Allocator) {
// Free memory referenced by the item.
this->~ScopedHashTableVal();
Allocator.Deallocate(this);
@@ -90,15 +89,16 @@ class ScopedHashTableScope {
/// LastValInScope - This is the last value that was inserted for this scope
/// or null if none have been inserted yet.
ScopedHashTableVal<K, V> *LastValInScope;
- void operator=(ScopedHashTableScope&) = delete;
- ScopedHashTableScope(ScopedHashTableScope&) = delete;
+ void operator=(ScopedHashTableScope &) = delete;
+ ScopedHashTableScope(ScopedHashTableScope &) = delete;
+
public:
ScopedHashTableScope(ScopedHashTable<K, V, KInfo, AllocatorTy> &HT);
~ScopedHashTableScope();
ScopedHashTableScope *getParentScope() { return PrevScope; }
const ScopedHashTableScope *getParentScope() const { return PrevScope; }
-
+
private:
friend class ScopedHashTable<K, V, KInfo, AllocatorTy>;
ScopedHashTableVal<K, V> *getLastValInScope() {
@@ -109,10 +109,10 @@ private:
}
};
-
-template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
+template <typename K, typename V, typename KInfo = DenseMapInfo<K>>
class ScopedHashTableIterator {
ScopedHashTableVal<K, V> *Node;
+
public:
ScopedHashTableIterator(ScopedHashTableVal<K, V> *node) : Node(node) {}
@@ -141,7 +141,6 @@ public:
}
};
-
template <typename K, typename V, typename KInfo, typename AllocatorTy>
class ScopedHashTable {
public:
@@ -149,23 +148,24 @@ public:
/// to the name of the scope for this hash table.
typedef ScopedHashTableScope<K, V, KInfo, AllocatorTy> ScopeTy;
typedef unsigned size_type;
+
private:
typedef ScopedHashTableVal<K, V> ValTy;
DenseMap<K, ValTy*, KInfo> TopLevelMap;
ScopeTy *CurScope;
-
+
AllocatorTy Allocator;
-
- ScopedHashTable(const ScopedHashTable&); // NOT YET IMPLEMENTED
- void operator=(const ScopedHashTable&); // NOT YET IMPLEMENTED
+
+ ScopedHashTable(const ScopedHashTable &); // NOT YET IMPLEMENTED
+ void operator=(const ScopedHashTable &); // NOT YET IMPLEMENTED
friend class ScopedHashTableScope<K, V, KInfo, AllocatorTy>;
+
public:
ScopedHashTable() : CurScope(nullptr) {}
ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {}
~ScopedHashTable() {
assert(!CurScope && TopLevelMap.empty() && "Scope imbalance!");
}
-
/// Access to the allocator.
AllocatorTy &getAllocator() { return Allocator; }
@@ -180,7 +180,7 @@ public:
typename DenseMap<K, ValTy*, KInfo>::iterator I = TopLevelMap.find(Key);
if (I != TopLevelMap.end())
return I->second->getValue();
-
+
return V();
}
@@ -198,7 +198,7 @@ public:
if (I == TopLevelMap.end()) return end();
return iterator(I->second);
}
-
+
ScopeTy *getCurScope() { return CurScope; }
const ScopeTy *getCurScope() const { return CurScope; }
diff --git a/contrib/llvm/include/llvm/ADT/SetOperations.h b/contrib/llvm/include/llvm/ADT/SetOperations.h
index 71f5db3..7c9f2fb 100644
--- a/contrib/llvm/include/llvm/ADT/SetOperations.h
+++ b/contrib/llvm/include/llvm/ADT/SetOperations.h
@@ -39,7 +39,7 @@ bool set_union(S1Ty &S1, const S2Ty &S2) {
template <class S1Ty, class S2Ty>
void set_intersect(S1Ty &S1, const S2Ty &S2) {
for (typename S1Ty::iterator I = S1.begin(); I != S1.end();) {
- const typename S1Ty::key_type &E = *I;
+ const auto &E = *I;
++I;
if (!S2.count(E)) S1.erase(E); // Erase element if not in S2
}
diff --git a/contrib/llvm/include/llvm/ADT/SetVector.h b/contrib/llvm/include/llvm/ADT/SetVector.h
index a7fd408..bc56357 100644
--- a/contrib/llvm/include/llvm/ADT/SetVector.h
+++ b/contrib/llvm/include/llvm/ADT/SetVector.h
@@ -20,6 +20,7 @@
#ifndef LLVM_ADT_SETVECTOR_H
#define LLVM_ADT_SETVECTOR_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include <algorithm>
#include <cassert>
@@ -33,7 +34,7 @@ namespace llvm {
/// property of a deterministic iteration order. The order of iteration is the
/// order of insertion.
template <typename T, typename Vector = std::vector<T>,
- typename Set = SmallSet<T, 16> >
+ typename Set = DenseSet<T>>
class SetVector {
public:
typedef T value_type;
@@ -44,6 +45,8 @@ public:
typedef Vector vector_type;
typedef typename vector_type::const_iterator iterator;
typedef typename vector_type::const_iterator const_iterator;
+ typedef typename vector_type::const_reverse_iterator reverse_iterator;
+ typedef typename vector_type::const_reverse_iterator const_reverse_iterator;
typedef typename vector_type::size_type size_type;
/// \brief Construct an empty SetVector
@@ -55,6 +58,8 @@ public:
insert(Start, End);
}
+ ArrayRef<T> getArrayRef() const { return vector_; }
+
/// \brief Determine if the SetVector is empty or not.
bool empty() const {
return vector_.empty();
@@ -85,6 +90,26 @@ public:
return vector_.end();
}
+ /// \brief Get an reverse_iterator to the end of the SetVector.
+ reverse_iterator rbegin() {
+ return vector_.rbegin();
+ }
+
+ /// \brief Get a const_reverse_iterator to the end of the SetVector.
+ const_reverse_iterator rbegin() const {
+ return vector_.rbegin();
+ }
+
+ /// \brief Get a reverse_iterator to the beginning of the SetVector.
+ reverse_iterator rend() {
+ return vector_.rend();
+ }
+
+ /// \brief Get a const_reverse_iterator to the beginning of the SetVector.
+ const_reverse_iterator rend() const {
+ return vector_.rend();
+ }
+
/// \brief Return the last element of the SetVector.
const T &back() const {
assert(!empty() && "Cannot call back() on empty SetVector!");
@@ -150,7 +175,6 @@ public:
return true;
}
-
/// \brief Count the number of elements of a given key in the SetVector.
/// \returns 0 if the element is not in the SetVector, 1 if it is.
size_type count(const key_type &key) const {
@@ -169,7 +193,7 @@ public:
set_.erase(back());
vector_.pop_back();
}
-
+
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val() {
T Ret = back();
pop_back();
diff --git a/contrib/llvm/include/llvm/ADT/SmallBitVector.h b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
index ae3d645..4aa3bc2 100644
--- a/contrib/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
@@ -551,19 +551,18 @@ public:
}
private:
- template<bool AddBits, bool InvertMask>
+ template <bool AddBits, bool InvertMask>
void applyMask(const uint32_t *Mask, unsigned MaskWords) {
- if (NumBaseBits == 64 && MaskWords >= 2) {
- uint64_t M = Mask[0] | (uint64_t(Mask[1]) << 32);
- if (InvertMask) M = ~M;
- if (AddBits) setSmallBits(getSmallBits() | M);
- else setSmallBits(getSmallBits() & ~M);
- } else {
- uint32_t M = Mask[0];
- if (InvertMask) M = ~M;
- if (AddBits) setSmallBits(getSmallBits() | M);
- else setSmallBits(getSmallBits() & ~M);
- }
+ assert(MaskWords <= sizeof(uintptr_t) && "Mask is larger than base!");
+ uintptr_t M = Mask[0];
+ if (NumBaseBits == 64)
+ M |= uint64_t(Mask[1]) << 32;
+ if (InvertMask)
+ M = ~M;
+ if (AddBits)
+ setSmallBits(getSmallBits() | M);
+ else
+ setSmallBits(getSmallBits() & ~M);
}
};
diff --git a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
index 3e3c9c1..3d98e8f 100644
--- a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -48,6 +48,7 @@ class SmallPtrSetIteratorImpl;
///
class SmallPtrSetImplBase {
friend class SmallPtrSetIteratorImpl;
+
protected:
/// SmallArray - Points to a fixed size set of buckets, used in 'small mode'.
const void **SmallArray;
@@ -133,6 +134,7 @@ private:
void Grow(unsigned NewSize);
void operator=(const SmallPtrSetImplBase &RHS) = delete;
+
protected:
/// swap - Swaps the elements of two sets.
/// Note: This method assumes that both sets have the same small size.
@@ -148,6 +150,7 @@ class SmallPtrSetIteratorImpl {
protected:
const void *const *Bucket;
const void *const *End;
+
public:
explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
: Bucket(BP), End(E) {
@@ -178,14 +181,14 @@ protected:
template<typename PtrTy>
class SmallPtrSetIterator : public SmallPtrSetIteratorImpl {
typedef PointerLikeTypeTraits<PtrTy> PtrTraits;
-
+
public:
typedef PtrTy value_type;
typedef PtrTy reference;
typedef PtrTy pointer;
typedef std::ptrdiff_t difference_type;
typedef std::forward_iterator_tag iterator_category;
-
+
explicit SmallPtrSetIterator(const void *const *BP, const void *const *E)
: SmallPtrSetIteratorImpl(BP, E) {}
@@ -231,7 +234,6 @@ template<unsigned N>
struct RoundUpToPowerOfTwo {
enum { Val = RoundUpToPowerOfTwoH<N, (N&(N-1)) == 0>::Val };
};
-
/// \brief A templated base class for \c SmallPtrSet which provides the
/// typesafe interface that is common across all small sizes.
@@ -242,7 +244,8 @@ template <typename PtrType>
class SmallPtrSetImpl : public SmallPtrSetImplBase {
typedef PointerLikeTypeTraits<PtrType> PtrTraits;
- SmallPtrSetImpl(const SmallPtrSetImpl&) = delete;
+ SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
+
protected:
// Constructors that forward to the base.
SmallPtrSetImpl(const void **SmallStorage, const SmallPtrSetImpl &that)
@@ -303,6 +306,7 @@ class SmallPtrSet : public SmallPtrSetImpl<PtrType> {
enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val };
/// SmallStorage - Fixed size storage used in 'small mode'.
const void *SmallStorage[SmallSizePowTwo];
+
public:
SmallPtrSet() : BaseT(SmallStorage, SmallSizePowTwo) {}
SmallPtrSet(const SmallPtrSet &that) : BaseT(SmallStorage, that) {}
@@ -333,7 +337,6 @@ public:
SmallPtrSetImplBase::swap(RHS);
}
};
-
}
namespace std {
diff --git a/contrib/llvm/include/llvm/ADT/SmallSet.h b/contrib/llvm/include/llvm/ADT/SmallSet.h
index bc64935..39a57b8 100644
--- a/contrib/llvm/include/llvm/ADT/SmallSet.h
+++ b/contrib/llvm/include/llvm/ADT/SmallSet.h
@@ -37,6 +37,7 @@ class SmallSet {
std::set<T, C> Set;
typedef typename SmallVector<T, N>::const_iterator VIterator;
typedef typename SmallVector<T, N>::iterator mutable_iterator;
+
public:
typedef size_t size_type;
SmallSet() {}
@@ -92,7 +93,7 @@ public:
for (; I != E; ++I)
insert(*I);
}
-
+
bool erase(const T &V) {
if (!isSmall())
return Set.erase(V);
@@ -108,6 +109,7 @@ public:
Vector.clear();
Set.clear();
}
+
private:
bool isSmall() const { return Set.empty(); }
diff --git a/contrib/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm/include/llvm/ADT/SmallVector.h
index b938470..d1062ac 100644
--- a/contrib/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallVector.h
@@ -109,9 +109,13 @@ public:
typedef const T *const_pointer;
// forward iterator creation methods.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
iterator begin() { return (iterator)this->BeginX; }
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
const_iterator begin() const { return (const_iterator)this->BeginX; }
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
iterator end() { return (iterator)this->EndX; }
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
const_iterator end() const { return (const_iterator)this->EndX; }
protected:
iterator capacity_ptr() { return (iterator)this->CapacityX; }
@@ -124,6 +128,7 @@ public:
reverse_iterator rend() { return reverse_iterator(begin()); }
const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
size_type size() const { return end()-begin(); }
size_type max_size() const { return size_type(-1) / sizeof(T); }
@@ -135,10 +140,12 @@ public:
/// Return a pointer to the vector's buffer, even if empty().
const_pointer data() const { return const_pointer(begin()); }
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
reference operator[](size_type idx) {
assert(idx < size());
return begin()[idx];
}
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
const_reference operator[](size_type idx) const {
assert(idx < size());
return begin()[idx];
diff --git a/contrib/llvm/include/llvm/ADT/SparseBitVector.h b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
index 20cbe2c..e6e7241 100644
--- a/contrib/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
@@ -39,7 +39,6 @@ namespace llvm {
/// etc) do not perform as well in practice as a linked list with this iterator
/// kept up to date. They are also significantly more memory intensive.
-
template <unsigned ElementSize = 128>
struct SparseBitVectorElement
: public ilist_node<SparseBitVectorElement<ElementSize> > {
@@ -204,6 +203,7 @@ public:
BecameZero = allzero;
return changed;
}
+
// Intersect this Element with the complement of RHS and return true if this
// one changed. BecameZero is set to true if this element became all-zero
// bits.
@@ -226,6 +226,7 @@ public:
BecameZero = allzero;
return changed;
}
+
// Three argument version of intersectWithComplement that intersects
// RHS1 & ~RHS2 into this element
void intersectWithComplement(const SparseBitVectorElement &RHS1,
@@ -408,12 +409,13 @@ class SparseBitVector {
// bitmap.
return AtEnd == RHS.AtEnd && RHS.BitNumber == BitNumber;
}
+
bool operator!=(const SparseBitVectorIterator &RHS) const {
return !(*this == RHS);
}
- SparseBitVectorIterator(): BitVector(NULL) {
- }
+ SparseBitVectorIterator(): BitVector(nullptr) {
+ }
SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
bool end = false):BitVector(RHS) {
@@ -453,6 +455,9 @@ public:
// Assignment
SparseBitVector& operator=(const SparseBitVector& RHS) {
+ if (this == &RHS)
+ return *this;
+
Elements.clear();
ElementListConstIter ElementIter = RHS.Elements.begin();
@@ -559,6 +564,9 @@ public:
// Union our bitmap with the RHS and return true if we changed.
bool operator|=(const SparseBitVector &RHS) {
+ if (this == &RHS)
+ return false;
+
bool changed = false;
ElementListIter Iter1 = Elements.begin();
ElementListConstIter Iter2 = RHS.Elements.begin();
@@ -587,6 +595,9 @@ public:
// Intersect our bitmap with the RHS and return true if ours changed.
bool operator&=(const SparseBitVector &RHS) {
+ if (this == &RHS)
+ return false;
+
bool changed = false;
ElementListIter Iter1 = Elements.begin();
ElementListConstIter Iter2 = RHS.Elements.begin();
@@ -619,9 +630,13 @@ public:
ElementListIter IterTmp = Iter1;
++Iter1;
Elements.erase(IterTmp);
+ changed = true;
}
}
- Elements.erase(Iter1, Elements.end());
+ if (Iter1 != Elements.end()) {
+ Elements.erase(Iter1, Elements.end());
+ changed = true;
+ }
CurrElementIter = Elements.begin();
return changed;
}
@@ -629,6 +644,14 @@ public:
// Intersect our bitmap with the complement of the RHS and return true
// if ours changed.
bool intersectWithComplement(const SparseBitVector &RHS) {
+ if (this == &RHS) {
+ if (!empty()) {
+ clear();
+ return true;
+ }
+ return false;
+ }
+
bool changed = false;
ElementListIter Iter1 = Elements.begin();
ElementListConstIter Iter2 = RHS.Elements.begin();
@@ -669,12 +692,20 @@ public:
return intersectWithComplement(*RHS);
}
-
// Three argument version of intersectWithComplement.
// Result of RHS1 & ~RHS2 is stored into this bitmap.
void intersectWithComplement(const SparseBitVector<ElementSize> &RHS1,
const SparseBitVector<ElementSize> &RHS2)
{
+ if (this == &RHS1) {
+ intersectWithComplement(RHS2);
+ return;
+ } else if (this == &RHS2) {
+ SparseBitVector RHS2Copy(RHS2);
+ intersectWithComplement(RHS1, RHS2Copy);
+ return;
+ }
+
Elements.clear();
CurrElementIter = Elements.begin();
ElementListConstIter Iter1 = RHS1.Elements.begin();
@@ -719,8 +750,6 @@ public:
Elements.push_back(NewElement);
++Iter1;
}
-
- return;
}
void intersectWithComplement(const SparseBitVector<ElementSize> *RHS1,
@@ -855,9 +884,6 @@ operator-(const SparseBitVector<ElementSize> &LHS,
return Result;
}
-
-
-
// Dump a SparseBitVector to a stream
template <unsigned ElementSize>
void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
@@ -875,4 +901,4 @@ void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
}
} // end namespace llvm
-#endif
+#endif // LLVM_ADT_SPARSEBITVECTOR_H
diff --git a/contrib/llvm/include/llvm/ADT/Statistic.h b/contrib/llvm/include/llvm/ADT/Statistic.h
index d98abc3..7c84e3e 100644
--- a/contrib/llvm/include/llvm/ADT/Statistic.h
+++ b/contrib/llvm/include/llvm/ADT/Statistic.h
@@ -28,9 +28,11 @@
#include "llvm/Support/Atomic.h"
#include "llvm/Support/Valgrind.h"
+#include <memory>
namespace llvm {
class raw_ostream;
+class raw_fd_ostream;
class Statistic {
public:
@@ -170,6 +172,9 @@ void EnableStatistics();
/// \brief Check if statistics are enabled.
bool AreStatisticsEnabled();
+/// \brief Return a file stream to print our output on.
+std::unique_ptr<raw_fd_ostream> CreateInfoOutputFile();
+
/// \brief Print statistics to the file returned by CreateInfoOutputFile().
void PrintStatistics();
diff --git a/contrib/llvm/include/llvm/ADT/StringMap.h b/contrib/llvm/include/llvm/ADT/StringMap.h
index 9d03856..700bb9e 100644
--- a/contrib/llvm/include/llvm/ADT/StringMap.h
+++ b/contrib/llvm/include/llvm/ADT/StringMap.h
@@ -30,6 +30,7 @@ namespace llvm {
/// StringMapEntryBase - Shared base class of StringMapEntry instances.
class StringMapEntryBase {
unsigned StrLen;
+
public:
explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {}
@@ -48,6 +49,7 @@ protected:
unsigned NumItems;
unsigned NumTombstones;
unsigned ItemSize;
+
protected:
explicit StringMapImpl(unsigned itemSize)
: TheTable(nullptr),
@@ -85,8 +87,10 @@ protected:
/// RemoveKey - Remove the StringMapEntry for the specified key from the
/// table, returning it. If the key is not in the table, this returns null.
StringMapEntryBase *RemoveKey(StringRef Key);
+
private:
void init(unsigned Size);
+
public:
static StringMapEntryBase *getTombstoneVal() {
return (StringMapEntryBase*)-1;
@@ -112,6 +116,7 @@ public:
template<typename ValueTy>
class StringMapEntry : public StringMapEntryBase {
StringMapEntry(StringMapEntry &E) = delete;
+
public:
ValueTy second;
@@ -205,7 +210,6 @@ public:
}
};
-
/// StringMap - This is an unconventional map that is specialized for handling
/// keys that are "strings", which are basically ranges of bytes. This does some
/// funky memory allocation and hashing things to make it extremely efficient,
@@ -213,9 +217,10 @@ public:
template<typename ValueTy, typename AllocatorTy = MallocAllocator>
class StringMap : public StringMapImpl {
AllocatorTy Allocator;
+
public:
typedef StringMapEntry<ValueTy> MapEntryTy;
-
+
StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
explicit StringMap(unsigned InitialSize)
: StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
@@ -227,6 +232,13 @@ public:
: StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))),
Allocator(A) {}
+ StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List)
+ : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
+ for (const auto &P : List) {
+ insert(P);
+ }
+ }
+
StringMap(StringMap &&RHS)
: StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {}
@@ -386,11 +398,10 @@ public:
}
};
-
-template<typename ValueTy>
-class StringMapConstIterator {
+template <typename ValueTy> class StringMapConstIterator {
protected:
StringMapEntryBase **Ptr;
+
public:
typedef StringMapEntry<ValueTy> value_type;
@@ -447,7 +458,6 @@ public:
return static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
}
};
-
}
#endif
diff --git a/contrib/llvm/include/llvm/ADT/StringRef.h b/contrib/llvm/include/llvm/ADT/StringRef.h
index 95660a4..350032b 100644
--- a/contrib/llvm/include/llvm/ADT/StringRef.h
+++ b/contrib/llvm/include/llvm/ADT/StringRef.h
@@ -10,6 +10,7 @@
#ifndef LLVM_ADT_STRINGREF_H
#define LLVM_ADT_STRINGREF_H
+#include "llvm/Support/Compiler.h"
#include <algorithm>
#include <cassert>
#include <cstring>
@@ -53,6 +54,7 @@ namespace llvm {
// Workaround memcmp issue with null pointers (undefined behavior)
// by providing a specialized version
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
if (Length == 0) { return 0; }
return ::memcmp(Lhs,Rhs,Length);
@@ -73,6 +75,7 @@ namespace llvm {
}
/// Construct a string ref from a pointer and length.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
/*implicit*/ StringRef(const char *data, size_t length)
: Data(data), Length(length) {
assert((data || length == 0) &&
@@ -80,6 +83,7 @@ namespace llvm {
}
/// Construct a string ref from an std::string.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
/*implicit*/ StringRef(const std::string &Str)
: Data(Str.data()), Length(Str.length()) {}
@@ -104,12 +108,15 @@ namespace llvm {
/// data - Get a pointer to the start of the string (which may not be null
/// terminated).
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
const char *data() const { return Data; }
/// empty - Check if the string is empty.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
bool empty() const { return Length == 0; }
/// size - Get the string size.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
size_t size() const { return Length; }
/// front - Get the first character in the string.
@@ -133,6 +140,7 @@ namespace llvm {
/// equals - Check for string equality, this is more efficient than
/// compare() when the relative ordering of inequal strings isn't needed.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
bool equals(StringRef RHS) const {
return (Length == RHS.Length &&
compareMemory(Data, RHS.Data, RHS.Length) == 0);
@@ -145,6 +153,7 @@ namespace llvm {
/// compare - Compare two strings; the result is -1, 0, or 1 if this string
/// is lexicographically less than, equal to, or greater than the \p RHS.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
int compare(StringRef RHS) const {
// Check the prefix for a mismatch.
if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
@@ -212,6 +221,7 @@ namespace llvm {
/// @{
/// Check if this string starts with the given \p Prefix.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
bool startswith(StringRef Prefix) const {
return Length >= Prefix.Length &&
compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
@@ -221,6 +231,7 @@ namespace llvm {
bool startswith_lower(StringRef Prefix) const;
/// Check if this string ends with the given \p Suffix.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
bool endswith(StringRef Suffix) const {
return Length >= Suffix.Length &&
compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
@@ -237,6 +248,7 @@ namespace llvm {
///
/// \returns The index of the first occurrence of \p C, or npos if not
/// found.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
size_t find(char C, size_t From = 0) const {
size_t FindBegin = std::min(From, Length);
if (FindBegin < Length) { // Avoid calling memchr with nullptr.
@@ -402,6 +414,7 @@ namespace llvm {
/// \param N The number of characters to included in the substring. If N
/// exceeds the number of characters remaining in the string, the string
/// suffix (starting with \p Start) will be returned.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef substr(size_t Start, size_t N = npos) const {
Start = std::min(Start, Length);
return StringRef(Data + Start, std::min(N, Length - Start));
@@ -409,6 +422,7 @@ namespace llvm {
/// Return a StringRef equal to 'this' but with the first \p N elements
/// dropped.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef drop_front(size_t N = 1) const {
assert(size() >= N && "Dropping more elements than exist");
return substr(N);
@@ -416,6 +430,7 @@ namespace llvm {
/// Return a StringRef equal to 'this' but with the last \p N elements
/// dropped.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef drop_back(size_t N = 1) const {
assert(size() >= N && "Dropping more elements than exist");
return substr(0, size()-N);
@@ -431,6 +446,7 @@ namespace llvm {
/// substring. If this is npos, or less than \p Start, or exceeds the
/// number of characters remaining in the string, the string suffix
/// (starting with \p Start) will be returned.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef slice(size_t Start, size_t End) const {
Start = std::min(Start, Length);
End = std::min(std::max(Start, End), Length);
@@ -474,7 +490,7 @@ namespace llvm {
/// Split into substrings around the occurrences of a separator string.
///
/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
- /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+ /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
/// elements are added to A.
/// If \p KeepEmpty is false, empty strings are not added to \p A. They
/// still count when considering \p MaxSplit
@@ -489,6 +505,23 @@ namespace llvm {
StringRef Separator, int MaxSplit = -1,
bool KeepEmpty = true) const;
+ /// Split into substrings around the occurrences of a separator character.
+ ///
+ /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
+ /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
+ /// elements are added to A.
+ /// If \p KeepEmpty is false, empty strings are not added to \p A. They
+ /// still count when considering \p MaxSplit
+ /// An useful invariant is that
+ /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
+ ///
+ /// \param A - Where to put the substrings.
+ /// \param Separator - The string to split on.
+ /// \param MaxSplit - The maximum number of times the string is split.
+ /// \param KeepEmpty - True if empty substring should be added.
+ void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
+ bool KeepEmpty = true) const;
+
/// Split into two substrings around the last occurrence of a separator
/// character.
///
@@ -530,10 +563,12 @@ namespace llvm {
/// @name StringRef Comparison Operators
/// @{
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
inline bool operator==(StringRef LHS, StringRef RHS) {
return LHS.equals(RHS);
}
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
inline bool operator!=(StringRef LHS, StringRef RHS) {
return !(LHS == RHS);
}
diff --git a/contrib/llvm/include/llvm/ADT/StringSet.h b/contrib/llvm/include/llvm/ADT/StringSet.h
index 3e0cc20..08626dc 100644
--- a/contrib/llvm/include/llvm/ADT/StringSet.h
+++ b/contrib/llvm/include/llvm/ADT/StringSet.h
@@ -23,6 +23,11 @@ namespace llvm {
class StringSet : public llvm::StringMap<char, AllocatorTy> {
typedef llvm::StringMap<char, AllocatorTy> base;
public:
+ StringSet() = default;
+ StringSet(std::initializer_list<StringRef> S) {
+ for (StringRef X : S)
+ insert(X);
+ }
std::pair<typename base::iterator, bool> insert(StringRef Key) {
assert(!Key.empty());
diff --git a/contrib/llvm/include/llvm/ADT/StringSwitch.h b/contrib/llvm/include/llvm/ADT/StringSwitch.h
index 0393a0c..42b0fc4 100644
--- a/contrib/llvm/include/llvm/ADT/StringSwitch.h
+++ b/contrib/llvm/include/llvm/ADT/StringSwitch.h
@@ -14,6 +14,7 @@
#define LLVM_ADT_STRINGSWITCH_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
#include <cassert>
#include <cstring>
@@ -48,10 +49,12 @@ class StringSwitch {
const T *Result;
public:
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
explicit StringSwitch(StringRef S)
: Str(S), Result(nullptr) { }
template<unsigned N>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& Case(const char (&S)[N], const T& Value) {
if (!Result && N-1 == Str.size() &&
(std::memcmp(S, Str.data(), N-1) == 0)) {
@@ -62,6 +65,7 @@ public:
}
template<unsigned N>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& EndsWith(const char (&S)[N], const T &Value) {
if (!Result && Str.size() >= N-1 &&
std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0) {
@@ -72,6 +76,7 @@ public:
}
template<unsigned N>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& StartsWith(const char (&S)[N], const T &Value) {
if (!Result && Str.size() >= N-1 &&
std::memcmp(S, Str.data(), N-1) == 0) {
@@ -82,32 +87,66 @@ public:
}
template<unsigned N0, unsigned N1>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
const T& Value) {
- return Case(S0, Value).Case(S1, Value);
+ if (!Result && (
+ (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+ (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0))) {
+ Result = &Value;
+ }
+
+ return *this;
}
template<unsigned N0, unsigned N1, unsigned N2>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
const char (&S2)[N2], const T& Value) {
- return Case(S0, Value).Case(S1, Value).Case(S2, Value);
+ if (!Result && (
+ (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+ (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
+ (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0))) {
+ Result = &Value;
+ }
+
+ return *this;
}
template<unsigned N0, unsigned N1, unsigned N2, unsigned N3>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
const char (&S2)[N2], const char (&S3)[N3],
const T& Value) {
- return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value);
+ if (!Result && (
+ (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+ (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
+ (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) ||
+ (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0))) {
+ Result = &Value;
+ }
+
+ return *this;
}
template<unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
const char (&S2)[N2], const char (&S3)[N3],
const char (&S4)[N4], const T& Value) {
- return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value)
- .Case(S4, Value);
+ if (!Result && (
+ (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+ (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
+ (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) ||
+ (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0) ||
+ (N4-1 == Str.size() && std::memcmp(S4, Str.data(), N4-1) == 0))) {
+ Result = &Value;
+ }
+
+ return *this;
}
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
R Default(const T& Value) const {
if (Result)
return *Result;
@@ -115,6 +154,7 @@ public:
return Value;
}
+ LLVM_ATTRIBUTE_ALWAYS_INLINE
operator R() const {
assert(Result && "Fell off the end of a string-switch");
return *Result;
diff --git a/contrib/llvm/include/llvm/ADT/TinyPtrVector.h b/contrib/llvm/include/llvm/ADT/TinyPtrVector.h
index f29608f..487aa46 100644
--- a/contrib/llvm/include/llvm/ADT/TinyPtrVector.h
+++ b/contrib/llvm/include/llvm/ADT/TinyPtrVector.h
@@ -15,7 +15,7 @@
#include "llvm/ADT/SmallVector.h"
namespace llvm {
-
+
/// TinyPtrVector - This class is specialized for cases where there are
/// normally 0 or 1 element in a vector, but is general enough to go beyond that
/// when required.
@@ -150,7 +150,6 @@ public:
return Val.getAddrOfPtr1();
return Val.template get<VecTy *>()->begin();
-
}
iterator end() {
if (Val.template is<EltTy>())
diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h
index 947812d..e01db0a 100644
--- a/contrib/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm/include/llvm/ADT/Triple.h
@@ -50,6 +50,7 @@ public:
armeb, // ARM (big endian): armeb
aarch64, // AArch64 (little endian): aarch64
aarch64_be, // AArch64 (big endian): aarch64_be
+ avr, // AVR: Atmel AVR microcontroller
bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
hexagon, // Hexagon: hexagon
@@ -75,8 +76,8 @@ public:
xcore, // XCore: xcore
nvptx, // NVPTX: 32-bit
nvptx64, // NVPTX: 64-bit
- le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
- le64, // le64: generic little-endian 64-bit CPU (PNaCl / Emscripten)
+ le32, // le32: generic little-endian 32-bit CPU (PNaCl)
+ le64, // le64: generic little-endian 64-bit CPU (PNaCl)
amdil, // AMDIL
amdil64, // AMDIL with 64-bit pointers
hsail, // AMD HSAIL
@@ -92,12 +93,14 @@ public:
enum SubArchType {
NoSubArch,
+ ARMSubArch_v8_2a,
ARMSubArch_v8_1a,
ARMSubArch_v8,
ARMSubArch_v7,
ARMSubArch_v7em,
ARMSubArch_v7m,
ARMSubArch_v7s,
+ ARMSubArch_v7k,
ARMSubArch_v6,
ARMSubArch_v6m,
ARMSubArch_v6k,
@@ -124,7 +127,8 @@ public:
MipsTechnologies,
NVIDIA,
CSR,
- LastVendorType = CSR
+ Myriad,
+ LastVendorType = Myriad
};
enum OSType {
UnknownOS,
@@ -153,7 +157,10 @@ public:
NVCL, // NVIDIA OpenCL
AMDHSA, // AMD HSA Runtime
PS4,
- LastOSType = PS4
+ ELFIAMCU,
+ TvOS, // Apple tvOS
+ WatchOS, // Apple watchOS
+ LastOSType = WatchOS
};
enum EnvironmentType {
UnknownEnvironment,
@@ -170,7 +177,9 @@ public:
MSVC,
Itanium,
Cygnus,
- LastEnvironmentType = Cygnus
+ AMDOpenCL,
+ CoreCLR,
+ LastEnvironmentType = CoreCLR
};
enum ObjectFormatType {
UnknownObjectFormat,
@@ -205,7 +214,7 @@ public:
/// @name Constructors
/// @{
- /// \brief Default constructor is the same as an empty string and leaves all
+ /// Default constructor is the same as an empty string and leaves all
/// triple fields unknown.
Triple() : Data(), Arch(), Vendor(), OS(), Environment(), ObjectFormat() {}
@@ -231,7 +240,7 @@ public:
/// common case in which otherwise valid components are in the wrong order.
static std::string normalize(StringRef Str);
- /// \brief Return the normalized form of this triple's string.
+ /// Return the normalized form of this triple's string.
std::string normalize() const { return normalize(Data); }
/// @}
@@ -259,7 +268,7 @@ public:
/// getEnvironment - Get the parsed environment type of this triple.
EnvironmentType getEnvironment() const { return Environment; }
- /// \brief Parse the version number from the OS name component of the
+ /// Parse the version number from the OS name component of the
/// triple, if present.
///
/// For example, "fooos1.2.3" would return (1, 2, 3).
@@ -295,10 +304,15 @@ public:
unsigned &Micro) const;
/// getiOSVersion - Parse the version number as with getOSVersion. This should
- /// only be called with IOS triples.
+ /// only be called with IOS or generic triples.
void getiOSVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
+ /// getWatchOSVersion - Parse the version number as with getOSVersion. This
+ /// should only be called with WatchOS or generic triples.
+ void getWatchOSVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const;
+
/// @}
/// @name Direct Component Access
/// @{
@@ -331,7 +345,7 @@ public:
/// @name Convenience Predicates
/// @{
- /// \brief Test whether the architecture is 64-bit
+ /// Test whether the architecture is 64-bit
///
/// Note that this tests for 64-bit pointer width, and nothing else. Note
/// that we intentionally expose only three predicates, 64-bit, 32-bit, and
@@ -340,12 +354,12 @@ public:
/// system is provided.
bool isArch64Bit() const;
- /// \brief Test whether the architecture is 32-bit
+ /// Test whether the architecture is 32-bit
///
/// Note that this tests for 32-bit pointer width, and nothing else.
bool isArch32Bit() const;
- /// \brief Test whether the architecture is 16-bit
+ /// Test whether the architecture is 16-bit
///
/// Note that this tests for 16-bit pointer width, and nothing else.
bool isArch16Bit() const;
@@ -396,13 +410,27 @@ public:
}
/// Is this an iOS triple.
+ /// Note: This identifies tvOS as a variant of iOS. If that ever
+ /// changes, i.e., if the two operating systems diverge or their version
+ /// numbers get out of sync, that will need to be changed.
+ /// watchOS has completely different version numbers so it is not included.
bool isiOS() const {
- return getOS() == Triple::IOS;
+ return getOS() == Triple::IOS || isTvOS();
+ }
+
+ /// Is this an Apple tvOS triple.
+ bool isTvOS() const {
+ return getOS() == Triple::TvOS;
+ }
+
+ /// Is this an Apple watchOS triple.
+ bool isWatchOS() const {
+ return getOS() == Triple::WatchOS;
}
- /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS).
+ /// isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
bool isOSDarwin() const {
- return isMacOSX() || isiOS();
+ return isMacOSX() || isiOS() || isWatchOS();
}
bool isOSNetBSD() const {
@@ -427,16 +455,26 @@ public:
return getOS() == Triple::Bitrig;
}
+ bool isOSIAMCU() const {
+ return getOS() == Triple::ELFIAMCU;
+ }
+
+ /// Checks if the environment could be MSVC.
bool isWindowsMSVCEnvironment() const {
return getOS() == Triple::Win32 &&
(getEnvironment() == Triple::UnknownEnvironment ||
getEnvironment() == Triple::MSVC);
}
+ /// Checks if the environment is MSVC.
bool isKnownWindowsMSVCEnvironment() const {
return getOS() == Triple::Win32 && getEnvironment() == Triple::MSVC;
}
+ bool isWindowsCoreCLREnvironment() const {
+ return getOS() == Triple::Win32 && getEnvironment() == Triple::CoreCLR;
+ }
+
bool isWindowsItaniumEnvironment() const {
return getOS() == Triple::Win32 && getEnvironment() == Triple::Itanium;
}
@@ -449,60 +487,63 @@ public:
return getOS() == Triple::Win32 && getEnvironment() == Triple::GNU;
}
- /// \brief Tests for either Cygwin or MinGW OS
+ /// Tests for either Cygwin or MinGW OS
bool isOSCygMing() const {
return isWindowsCygwinEnvironment() || isWindowsGNUEnvironment();
}
- /// \brief Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
+ /// Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
bool isOSMSVCRT() const {
return isWindowsMSVCEnvironment() || isWindowsGNUEnvironment() ||
isWindowsItaniumEnvironment();
}
- /// \brief Tests whether the OS is Windows.
+ /// Tests whether the OS is Windows.
bool isOSWindows() const {
return getOS() == Triple::Win32;
}
- /// \brief Tests whether the OS is NaCl (Native Client)
+ /// Tests whether the OS is NaCl (Native Client)
bool isOSNaCl() const {
return getOS() == Triple::NaCl;
}
- /// \brief Tests whether the OS is Linux.
+ /// Tests whether the OS is Linux.
bool isOSLinux() const {
return getOS() == Triple::Linux;
}
- /// \brief Tests whether the OS uses the ELF binary format.
+ /// Tests whether the OS uses the ELF binary format.
bool isOSBinFormatELF() const {
return getObjectFormat() == Triple::ELF;
}
- /// \brief Tests whether the OS uses the COFF binary format.
+ /// Tests whether the OS uses the COFF binary format.
bool isOSBinFormatCOFF() const {
return getObjectFormat() == Triple::COFF;
}
- /// \brief Tests whether the environment is MachO.
+ /// Tests whether the environment is MachO.
bool isOSBinFormatMachO() const {
return getObjectFormat() == Triple::MachO;
}
- /// \brief Tests whether the target is the PS4 CPU
+ /// Tests whether the target is the PS4 CPU
bool isPS4CPU() const {
return getArch() == Triple::x86_64 &&
getVendor() == Triple::SCEI &&
getOS() == Triple::PS4;
}
- /// \brief Tests whether the target is the PS4 platform
+ /// Tests whether the target is the PS4 platform
bool isPS4() const {
return getVendor() == Triple::SCEI &&
getOS() == Triple::PS4;
}
+ /// Tests whether the target is Android
+ bool isAndroid() const { return getEnvironment() == Triple::Android; }
+
/// @}
/// @name Mutators
/// @{
@@ -553,7 +594,7 @@ public:
/// @name Helpers to build variants of a particular triple.
/// @{
- /// \brief Form a triple with a 32-bit variant of the current architecture.
+ /// Form a triple with a 32-bit variant of the current architecture.
///
/// This can be used to move across "families" of architectures where useful.
///
@@ -561,7 +602,7 @@ public:
/// architecture if no such variant can be found.
llvm::Triple get32BitArchVariant() const;
- /// \brief Form a triple with a 64-bit variant of the current architecture.
+ /// Form a triple with a 64-bit variant of the current architecture.
///
/// This can be used to move across "families" of architectures where useful.
///
@@ -589,7 +630,7 @@ public:
///
/// \param Arch the architecture name (e.g., "armv7s"). If it is an empty
/// string then the triple's arch name is used.
- const char* getARMCPUForArch(StringRef Arch = StringRef()) const;
+ StringRef getARMCPUForArch(StringRef Arch = StringRef()) const;
/// @}
/// @name Static helpers for IDs.
diff --git a/contrib/llvm/include/llvm/ADT/UniqueVector.h b/contrib/llvm/include/llvm/ADT/UniqueVector.h
index a9cb2f5..e1ab4b5 100644
--- a/contrib/llvm/include/llvm/ADT/UniqueVector.h
+++ b/contrib/llvm/include/llvm/ADT/UniqueVector.h
@@ -11,6 +11,7 @@
#define LLVM_ADT_UNIQUEVECTOR_H
#include <cassert>
+#include <cstddef>
#include <map>
#include <vector>
diff --git a/contrib/llvm/include/llvm/ADT/ilist.h b/contrib/llvm/include/llvm/ADT/ilist.h
index a7b9306..3044a6c 100644
--- a/contrib/llvm/include/llvm/ADT/ilist.h
+++ b/contrib/llvm/include/llvm/ADT/ilist.h
@@ -104,6 +104,53 @@ struct ilist_sentinel_traits {
}
};
+template <typename NodeTy> class ilist_half_node;
+template <typename NodeTy> class ilist_node;
+
+/// Traits with an embedded ilist_node as a sentinel.
+///
+/// FIXME: The downcast in createSentinel() is UB.
+template <typename NodeTy> struct ilist_embedded_sentinel_traits {
+ /// Get hold of the node that marks the end of the list.
+ NodeTy *createSentinel() const {
+ // Since i(p)lists always publicly derive from their corresponding traits,
+ // placing a data member in this class will augment the i(p)list. But since
+ // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
+ // there is a legal viable downcast from it to NodeTy. We use this trick to
+ // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
+ // sentinel. Dereferencing the sentinel is forbidden (save the
+ // ilist_node<NodeTy>), so no one will ever notice the superposition.
+ return static_cast<NodeTy *>(&Sentinel);
+ }
+ static void destroySentinel(NodeTy *) {}
+
+ NodeTy *provideInitialHead() const { return createSentinel(); }
+ NodeTy *ensureHead(NodeTy *) const { return createSentinel(); }
+ static void noteHead(NodeTy *, NodeTy *) {}
+
+private:
+ mutable ilist_node<NodeTy> Sentinel;
+};
+
+/// Trait with an embedded ilist_half_node as a sentinel.
+///
+/// FIXME: The downcast in createSentinel() is UB.
+template <typename NodeTy> struct ilist_half_embedded_sentinel_traits {
+ /// Get hold of the node that marks the end of the list.
+ NodeTy *createSentinel() const {
+ // See comment in ilist_embedded_sentinel_traits::createSentinel().
+ return static_cast<NodeTy *>(&Sentinel);
+ }
+ static void destroySentinel(NodeTy *) {}
+
+ NodeTy *provideInitialHead() const { return createSentinel(); }
+ NodeTy *ensureHead(NodeTy *) const { return createSentinel(); }
+ static void noteHead(NodeTy *, NodeTy *) {}
+
+private:
+ mutable ilist_half_node<NodeTy> Sentinel;
+};
+
/// ilist_node_traits - A fragment for template traits for intrusive list
/// that provides default node related operations.
///
@@ -173,8 +220,8 @@ private:
template<class T> void operator-(T) const;
public:
- ilist_iterator(pointer NP) : NodePtr(NP) {}
- ilist_iterator(reference NR) : NodePtr(&NR) {}
+ explicit ilist_iterator(pointer NP) : NodePtr(NP) {}
+ explicit ilist_iterator(reference NR) : NodePtr(&NR) {}
ilist_iterator() : NodePtr(nullptr) {}
// This is templated so that we can allow constructing a const iterator from
@@ -191,8 +238,10 @@ public:
return *this;
}
+ void reset(pointer NP) { NodePtr = NP; }
+
// Accessors...
- operator pointer() const {
+ explicit operator pointer() const {
return NodePtr;
}
@@ -202,11 +251,11 @@ public:
pointer operator->() const { return &operator*(); }
// Comparison operators
- bool operator==(const ilist_iterator &RHS) const {
- return NodePtr == RHS.NodePtr;
+ template <class Y> bool operator==(const ilist_iterator<Y> &RHS) const {
+ return NodePtr == RHS.getNodePtrUnchecked();
}
- bool operator!=(const ilist_iterator &RHS) const {
- return NodePtr != RHS.NodePtr;
+ template <class Y> bool operator!=(const ilist_iterator<Y> &RHS) const {
+ return NodePtr != RHS.getNodePtrUnchecked();
}
// Increment and decrement operators...
@@ -422,7 +471,7 @@ public:
this->setPrev(CurNode, New);
this->addNodeToList(New); // Notify traits that we added a node...
- return New;
+ return iterator(New);
}
iterator insertAfter(iterator where, NodeTy *New) {
@@ -443,7 +492,7 @@ public:
else
Head = NextNode;
this->setPrev(NextNode, PrevNode);
- IT = NextNode;
+ IT.reset(NextNode);
this->removeNodeFromList(Node); // Notify traits that we removed a node...
// Set the next/prev pointers of the current node to null. This isn't
@@ -461,12 +510,18 @@ public:
return remove(MutIt);
}
+ NodeTy *remove(NodeTy *IT) { return remove(iterator(IT)); }
+ NodeTy *remove(NodeTy &IT) { return remove(iterator(IT)); }
+
// erase - remove a node from the controlled sequence... and delete it.
iterator erase(iterator where) {
this->deleteNode(remove(where));
return where;
}
+ iterator erase(NodeTy *IT) { return erase(iterator(IT)); }
+ iterator erase(NodeTy &IT) { return erase(iterator(IT)); }
+
/// Remove all nodes from the list like clear(), but do not call
/// removeNodeFromList() or deleteNode().
///
@@ -522,7 +577,7 @@ private:
this->setNext(Last, PosNext);
this->setPrev(PosNext, Last);
- this->transferNodesFromList(L2, First, PosNext);
+ this->transferNodesFromList(L2, iterator(First), iterator(PosNext));
// Now that everything is set, restore the pointers to the list sentinels.
L2.setTail(L2Sentinel);
@@ -579,6 +634,83 @@ public:
void splice(iterator where, iplist &L2, iterator first, iterator last) {
if (first != last) transfer(where, L2, first, last);
}
+ void splice(iterator where, iplist &L2, NodeTy &N) {
+ splice(where, L2, iterator(N));
+ }
+ void splice(iterator where, iplist &L2, NodeTy *N) {
+ splice(where, L2, iterator(N));
+ }
+
+ template <class Compare>
+ void merge(iplist &Right, Compare comp) {
+ if (this == &Right)
+ return;
+ iterator First1 = begin(), Last1 = end();
+ iterator First2 = Right.begin(), Last2 = Right.end();
+ while (First1 != Last1 && First2 != Last2) {
+ if (comp(*First2, *First1)) {
+ iterator Next = First2;
+ transfer(First1, Right, First2, ++Next);
+ First2 = Next;
+ } else {
+ ++First1;
+ }
+ }
+ if (First2 != Last2)
+ transfer(Last1, Right, First2, Last2);
+ }
+ void merge(iplist &Right) { return merge(Right, op_less); }
+
+ template <class Compare>
+ void sort(Compare comp) {
+ // The list is empty, vacuously sorted.
+ if (empty())
+ return;
+ // The list has a single element, vacuously sorted.
+ if (std::next(begin()) == end())
+ return;
+ // Find the split point for the list.
+ iterator Center = begin(), End = begin();
+ while (End != end() && std::next(End) != end()) {
+ Center = std::next(Center);
+ End = std::next(std::next(End));
+ }
+ // Split the list into two.
+ iplist RightHalf;
+ RightHalf.splice(RightHalf.begin(), *this, Center, end());
+
+ // Sort the two sublists.
+ sort(comp);
+ RightHalf.sort(comp);
+
+ // Merge the two sublists back together.
+ merge(RightHalf, comp);
+ }
+ void sort() { sort(op_less); }
+
+ /// \brief Get the previous node, or \c nullptr for the list head.
+ NodeTy *getPrevNode(NodeTy &N) const {
+ auto I = N.getIterator();
+ if (I == begin())
+ return nullptr;
+ return &*std::prev(I);
+ }
+ /// \brief Get the previous node, or \c nullptr for the list head.
+ const NodeTy *getPrevNode(const NodeTy &N) const {
+ return getPrevNode(const_cast<NodeTy &>(N));
+ }
+
+ /// \brief Get the next node, or \c nullptr for the list tail.
+ NodeTy *getNextNode(NodeTy &N) const {
+ auto Next = std::next(N.getIterator());
+ if (Next == end())
+ return nullptr;
+ return &*Next;
+ }
+ /// \brief Get the next node, or \c nullptr for the list tail.
+ const NodeTy *getNextNode(const NodeTy &N) const {
+ return getNextNode(const_cast<NodeTy &>(N));
+ }
};
diff --git a/contrib/llvm/include/llvm/ADT/ilist_node.h b/contrib/llvm/include/llvm/ADT/ilist_node.h
index 26d0b55..7e5a0e0 100644
--- a/contrib/llvm/include/llvm/ADT/ilist_node.h
+++ b/contrib/llvm/include/llvm/ADT/ilist_node.h
@@ -19,12 +19,15 @@ namespace llvm {
template<typename NodeTy>
struct ilist_traits;
+template <typename NodeTy> struct ilist_embedded_sentinel_traits;
+template <typename NodeTy> struct ilist_half_embedded_sentinel_traits;
/// ilist_half_node - Base class that provides prev services for sentinels.
///
template<typename NodeTy>
class ilist_half_node {
friend struct ilist_traits<NodeTy>;
+ friend struct ilist_half_embedded_sentinel_traits<NodeTy>;
NodeTy *Prev;
protected:
NodeTy *getPrev() { return Prev; }
@@ -36,6 +39,8 @@ protected:
template<typename NodeTy>
struct ilist_nextprev_traits;
+template <typename NodeTy> class ilist_iterator;
+
/// ilist_node - Base class that provides next/prev services for nodes
/// that use ilist_nextprev_traits or ilist_default_traits.
///
@@ -43,6 +48,8 @@ template<typename NodeTy>
class ilist_node : private ilist_half_node<NodeTy> {
friend struct ilist_nextprev_traits<NodeTy>;
friend struct ilist_traits<NodeTy>;
+ friend struct ilist_half_embedded_sentinel_traits<NodeTy>;
+ friend struct ilist_embedded_sentinel_traits<NodeTy>;
NodeTy *Next;
NodeTy *getNext() { return Next; }
const NodeTy *getNext() const { return Next; }
@@ -51,53 +58,63 @@ protected:
ilist_node() : Next(nullptr) {}
public:
+ ilist_iterator<NodeTy> getIterator() {
+ // FIXME: Stop downcasting to create the iterator (potential UB).
+ return ilist_iterator<NodeTy>(static_cast<NodeTy *>(this));
+ }
+ ilist_iterator<const NodeTy> getIterator() const {
+ // FIXME: Stop downcasting to create the iterator (potential UB).
+ return ilist_iterator<const NodeTy>(static_cast<const NodeTy *>(this));
+ }
+};
+
+/// An ilist node that can access its parent list.
+///
+/// Requires \c NodeTy to have \a getParent() to find the parent node, and the
+/// \c ParentTy to have \a getSublistAccess() to get a reference to the list.
+template <typename NodeTy, typename ParentTy>
+class ilist_node_with_parent : public ilist_node<NodeTy> {
+protected:
+ ilist_node_with_parent() = default;
+
+private:
+ /// Forward to NodeTy::getParent().
+ ///
+ /// Note: do not use the name "getParent()". We want a compile error
+ /// (instead of recursion) when the subclass fails to implement \a
+ /// getParent().
+ const ParentTy *getNodeParent() const {
+ return static_cast<const NodeTy *>(this)->getParent();
+ }
+
+public:
/// @name Adjacent Node Accessors
/// @{
-
- /// \brief Get the previous node, or 0 for the list head.
+ /// \brief Get the previous node, or \c nullptr for the list head.
NodeTy *getPrevNode() {
- NodeTy *Prev = this->getPrev();
-
- // Check for sentinel.
- if (!Prev->getNext())
- return nullptr;
-
- return Prev;
+ // Should be separated to a reused function, but then we couldn't use auto
+ // (and would need the type of the list).
+ const auto &List =
+ getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr));
+ return List.getPrevNode(*static_cast<NodeTy *>(this));
}
-
- /// \brief Get the previous node, or 0 for the list head.
+ /// \brief Get the previous node, or \c nullptr for the list head.
const NodeTy *getPrevNode() const {
- const NodeTy *Prev = this->getPrev();
-
- // Check for sentinel.
- if (!Prev->getNext())
- return nullptr;
-
- return Prev;
+ return const_cast<ilist_node_with_parent *>(this)->getPrevNode();
}
- /// \brief Get the next node, or 0 for the list tail.
+ /// \brief Get the next node, or \c nullptr for the list tail.
NodeTy *getNextNode() {
- NodeTy *Next = getNext();
-
- // Check for sentinel.
- if (!Next->getNext())
- return nullptr;
-
- return Next;
+ // Should be separated to a reused function, but then we couldn't use auto
+ // (and would need the type of the list).
+ const auto &List =
+ getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr));
+ return List.getNextNode(*static_cast<NodeTy *>(this));
}
-
- /// \brief Get the next node, or 0 for the list tail.
+ /// \brief Get the next node, or \c nullptr for the list tail.
const NodeTy *getNextNode() const {
- const NodeTy *Next = getNext();
-
- // Check for sentinel.
- if (!Next->getNext())
- return nullptr;
-
- return Next;
+ return const_cast<ilist_node_with_parent *>(this)->getNextNode();
}
-
/// @}
};
diff --git a/contrib/llvm/include/llvm/ADT/iterator_range.h b/contrib/llvm/include/llvm/ADT/iterator_range.h
index 523a86f..3dd679b 100644
--- a/contrib/llvm/include/llvm/ADT/iterator_range.h
+++ b/contrib/llvm/include/llvm/ADT/iterator_range.h
@@ -20,6 +20,7 @@
#define LLVM_ADT_ITERATOR_RANGE_H
#include <utility>
+#include <iterator>
namespace llvm {
@@ -32,6 +33,12 @@ class iterator_range {
IteratorT begin_iterator, end_iterator;
public:
+ //TODO: Add SFINAE to test that the Container's iterators match the range's
+ // iterators.
+ template <typename Container>
+ iterator_range(Container &&c)
+ //TODO: Consider ADL/non-member begin/end calls.
+ : begin_iterator(c.begin()), end_iterator(c.end()) {}
iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
: begin_iterator(std::move(begin_iterator)),
end_iterator(std::move(end_iterator)) {}
@@ -51,6 +58,11 @@ template <class T> iterator_range<T> make_range(T x, T y) {
template <typename T> iterator_range<T> make_range(std::pair<T, T> p) {
return iterator_range<T>(std::move(p.first), std::move(p.second));
}
+
+template<typename T>
+iterator_range<decltype(begin(std::declval<T>()))> drop_begin(T &&t, int n) {
+ return make_range(std::next(begin(t), n), end(t));
+}
}
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
index 36f8199..5cc840a 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -41,10 +41,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Analysis/MemoryLocation.h"
namespace llvm {
-
+class BasicAAResult;
class LoadInst;
class StoreInst;
class VAArgInst;
@@ -55,6 +56,7 @@ class AnalysisUsage;
class MemTransferInst;
class MemIntrinsic;
class DominatorTree;
+class OrderedBasicBlock;
/// The possible results of an alias query.
///
@@ -84,462 +86,871 @@ enum AliasResult {
MustAlias,
};
-class AliasAnalysis {
-protected:
- const DataLayout *DL;
- const TargetLibraryInfo *TLI;
+/// Flags indicating whether a memory access modifies or references memory.
+///
+/// This is no access at all, a modification, a reference, or both
+/// a modification and a reference. These are specifically structured such that
+/// they form a two bit matrix and bit-tests for 'mod' or 'ref' work with any
+/// of the possible values.
+enum ModRefInfo {
+ /// The access neither references nor modifies the value stored in memory.
+ MRI_NoModRef = 0,
+ /// The access references the value stored in memory.
+ MRI_Ref = 1,
+ /// The access modifies the value stored in memory.
+ MRI_Mod = 2,
+ /// The access both references and modifies the value stored in memory.
+ MRI_ModRef = MRI_Ref | MRI_Mod
+};
-private:
- AliasAnalysis *AA; // Previous Alias Analysis to chain to.
+/// The locations at which a function might access memory.
+///
+/// These are primarily used in conjunction with the \c AccessKind bits to
+/// describe both the nature of access and the locations of access for a
+/// function call.
+enum FunctionModRefLocation {
+ /// Base case is no access to memory.
+ FMRL_Nowhere = 0,
+ /// Access to memory via argument pointers.
+ FMRL_ArgumentPointees = 4,
+ /// Access to any memory.
+ FMRL_Anywhere = 8 | FMRL_ArgumentPointees
+};
-protected:
- /// InitializeAliasAnalysis - Subclasses must call this method to initialize
- /// the AliasAnalysis interface before any other methods are called. This is
- /// typically called by the run* methods of these subclasses. This may be
- /// called multiple times.
+/// Summary of how a function affects memory in the program.
+///
+/// Loads from constant globals are not considered memory accesses for this
+/// interface. Also, functions may freely modify stack space local to their
+/// invocation without having to report it through these interfaces.
+enum FunctionModRefBehavior {
+ /// This function does not perform any non-local loads or stores to memory.
///
- void InitializeAliasAnalysis(Pass *P, const DataLayout *DL);
-
- /// getAnalysisUsage - All alias analysis implementations should invoke this
- /// directly (using AliasAnalysis::getAnalysisUsage(AU)).
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ /// This property corresponds to the GCC 'const' attribute.
+ /// This property corresponds to the LLVM IR 'readnone' attribute.
+ /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
+ FMRB_DoesNotAccessMemory = FMRL_Nowhere | MRI_NoModRef,
-public:
- static char ID; // Class identification, replacement for typeinfo
- AliasAnalysis() : DL(nullptr), TLI(nullptr), AA(nullptr) {}
- virtual ~AliasAnalysis(); // We want to be subclassed
+ /// The only memory references in this function (if it has any) are
+ /// non-volatile loads from objects pointed to by its pointer-typed
+ /// arguments, with arbitrary offsets.
+ ///
+ /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
+ FMRB_OnlyReadsArgumentPointees = FMRL_ArgumentPointees | MRI_Ref,
- /// getTargetLibraryInfo - Return a pointer to the current TargetLibraryInfo
- /// object, or null if no TargetLibraryInfo object is available.
+ /// The only memory references in this function (if it has any) are
+ /// non-volatile loads and stores from objects pointed to by its
+ /// pointer-typed arguments, with arbitrary offsets.
///
- const TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+ /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag.
+ FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef,
- /// getTypeStoreSize - Return the DataLayout store size for the given type,
- /// if known, or a conservative value otherwise.
+ /// This function does not perform any non-local stores or volatile loads,
+ /// but may read from any memory location.
///
- uint64_t getTypeStoreSize(Type *Ty);
+ /// This property corresponds to the GCC 'pure' attribute.
+ /// This property corresponds to the LLVM IR 'readonly' attribute.
+ /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
+ FMRB_OnlyReadsMemory = FMRL_Anywhere | MRI_Ref,
+
+ /// This indicates that the function could not be classified into one of the
+ /// behaviors above.
+ FMRB_UnknownModRefBehavior = FMRL_Anywhere | MRI_ModRef
+};
+
+class AAResults {
+public:
+ // Make these results default constructable and movable. We have to spell
+ // these out because MSVC won't synthesize them.
+ AAResults() {}
+ AAResults(AAResults &&Arg);
+ AAResults &operator=(AAResults &&Arg);
+ ~AAResults();
+
+ /// Register a specific AA result.
+ template <typename AAResultT> void addAAResult(AAResultT &AAResult) {
+ // FIXME: We should use a much lighter weight system than the usual
+ // polymorphic pattern because we don't own AAResult. It should
+ // ideally involve two pointers and no separate allocation.
+ AAs.emplace_back(new Model<AAResultT>(AAResult, *this));
+ }
//===--------------------------------------------------------------------===//
- /// Alias Queries...
- ///
+ /// \name Alias Queries
+ /// @{
- /// alias - The main low level interface to the alias analysis implementation.
+ /// The main low level interface to the alias analysis implementation.
/// Returns an AliasResult indicating whether the two pointers are aliased to
- /// each other. This is the interface that must be implemented by specific
+ /// each other. This is the interface that must be implemented by specific
/// alias analysis implementations.
- virtual AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- /// alias - A convenience wrapper.
- AliasResult alias(const Value *V1, uint64_t V1Size,
- const Value *V2, uint64_t V2Size) {
+ /// A convenience wrapper around the primary \c alias interface.
+ AliasResult alias(const Value *V1, uint64_t V1Size, const Value *V2,
+ uint64_t V2Size) {
return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
}
- /// alias - A convenience wrapper.
+ /// A convenience wrapper around the primary \c alias interface.
AliasResult alias(const Value *V1, const Value *V2) {
return alias(V1, MemoryLocation::UnknownSize, V2,
MemoryLocation::UnknownSize);
}
- /// isNoAlias - A trivial helper function to check to see if the specified
- /// pointers are no-alias.
+ /// A trivial helper function to check to see if the specified pointers are
+ /// no-alias.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
return alias(LocA, LocB) == NoAlias;
}
- /// isNoAlias - A convenience wrapper.
- bool isNoAlias(const Value *V1, uint64_t V1Size,
- const Value *V2, uint64_t V2Size) {
+ /// A convenience wrapper around the \c isNoAlias helper interface.
+ bool isNoAlias(const Value *V1, uint64_t V1Size, const Value *V2,
+ uint64_t V2Size) {
return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
}
-
- /// isNoAlias - A convenience wrapper.
+
+ /// A convenience wrapper around the \c isNoAlias helper interface.
bool isNoAlias(const Value *V1, const Value *V2) {
return isNoAlias(MemoryLocation(V1), MemoryLocation(V2));
}
-
- /// isMustAlias - A convenience wrapper.
+
+ /// A trivial helper function to check to see if the specified pointers are
+ /// must-alias.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
return alias(LocA, LocB) == MustAlias;
}
- /// isMustAlias - A convenience wrapper.
+ /// A convenience wrapper around the \c isMustAlias helper interface.
bool isMustAlias(const Value *V1, const Value *V2) {
return alias(V1, 1, V2, 1) == MustAlias;
}
-
- /// pointsToConstantMemory - If the specified memory location is
- /// known to be constant, return true. If OrLocal is true and the
- /// specified memory location is known to be "local" (derived from
- /// an alloca), return true. Otherwise return false.
- virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal = false);
- /// pointsToConstantMemory - A convenient wrapper.
+ /// Checks whether the given location points to constant memory, or if
+ /// \p OrLocal is true whether it points to a local alloca.
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false);
+
+ /// A convenience wrapper around the primary \c pointsToConstantMemory
+ /// interface.
bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
return pointsToConstantMemory(MemoryLocation(P), OrLocal);
}
+ /// @}
//===--------------------------------------------------------------------===//
- /// Simple mod/ref information...
- ///
-
- /// ModRefResult - Represent the result of a mod/ref query. Mod and Ref are
- /// bits which may be or'd together.
- ///
- enum ModRefResult { NoModRef = 0, Ref = 1, Mod = 2, ModRef = 3 };
-
- /// These values define additional bits used to define the
- /// ModRefBehavior values.
- enum { Nowhere = 0, ArgumentPointees = 4, Anywhere = 8 | ArgumentPointees };
-
- /// ModRefBehavior - Summary of how a function affects memory in the program.
- /// Loads from constant globals are not considered memory accesses for this
- /// interface. Also, functions may freely modify stack space local to their
- /// invocation without having to report it through these interfaces.
- enum ModRefBehavior {
- /// DoesNotAccessMemory - This function does not perform any non-local loads
- /// or stores to memory.
- ///
- /// This property corresponds to the GCC 'const' attribute.
- /// This property corresponds to the LLVM IR 'readnone' attribute.
- /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
- DoesNotAccessMemory = Nowhere | NoModRef,
-
- /// OnlyReadsArgumentPointees - The only memory references in this function
- /// (if it has any) are non-volatile loads from objects pointed to by its
- /// pointer-typed arguments, with arbitrary offsets.
- ///
- /// This property corresponds to the LLVM IR 'argmemonly' attribute combined
- /// with 'readonly' attribute.
- /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
- OnlyReadsArgumentPointees = ArgumentPointees | Ref,
-
- /// OnlyAccessesArgumentPointees - The only memory references in this
- /// function (if it has any) are non-volatile loads and stores from objects
- /// pointed to by its pointer-typed arguments, with arbitrary offsets.
- ///
- /// This property corresponds to the LLVM IR 'argmemonly' attribute.
- /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag.
- OnlyAccessesArgumentPointees = ArgumentPointees | ModRef,
-
- /// OnlyReadsMemory - This function does not perform any non-local stores or
- /// volatile loads, but may read from any memory location.
- ///
- /// This property corresponds to the GCC 'pure' attribute.
- /// This property corresponds to the LLVM IR 'readonly' attribute.
- /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
- OnlyReadsMemory = Anywhere | Ref,
-
- /// UnknownModRefBehavior - This indicates that the function could not be
- /// classified into one of the behaviors above.
- UnknownModRefBehavior = Anywhere | ModRef
- };
+ /// \name Simple mod/ref information
+ /// @{
/// Get the ModRef info associated with a pointer argument of a callsite. The
/// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
/// that these bits do not necessarily account for the overall behavior of
/// the function, but rather only provide additional per-argument
/// information.
- virtual ModRefResult getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+ ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
- /// getModRefBehavior - Return the behavior when calling the given call site.
- virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ /// Return the behavior of the given call site.
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
- /// getModRefBehavior - Return the behavior when calling the given function.
- /// For use when the call site is not known.
- virtual ModRefBehavior getModRefBehavior(const Function *F);
+ /// Return the behavior when calling the given function.
+ FunctionModRefBehavior getModRefBehavior(const Function *F);
- /// doesNotAccessMemory - If the specified call is known to never read or
- /// write memory, return true. If the call only reads from known-constant
- /// memory, it is also legal to return true. Calls that unwind the stack
- /// are legal for this predicate.
+ /// Checks if the specified call is known to never read or write memory.
+ ///
+ /// Note that if the call only reads from known-constant memory, it is also
+ /// legal to return true. Also, calls that unwind the stack are legal for
+ /// this predicate.
///
/// Many optimizations (such as CSE and LICM) can be performed on such calls
/// without worrying about aliasing properties, and many calls have this
/// property (e.g. calls to 'sin' and 'cos').
///
/// This property corresponds to the GCC 'const' attribute.
- ///
bool doesNotAccessMemory(ImmutableCallSite CS) {
- return getModRefBehavior(CS) == DoesNotAccessMemory;
+ return getModRefBehavior(CS) == FMRB_DoesNotAccessMemory;
}
- /// doesNotAccessMemory - If the specified function is known to never read or
- /// write memory, return true. For use when the call site is not known.
+ /// Checks if the specified function is known to never read or write memory.
+ ///
+ /// Note that if the function only reads from known-constant memory, it is
+ /// also legal to return true. Also, function that unwind the stack are legal
+ /// for this predicate.
+ ///
+ /// Many optimizations (such as CSE and LICM) can be performed on such calls
+ /// to such functions without worrying about aliasing properties, and many
+ /// functions have this property (e.g. 'sin' and 'cos').
///
+ /// This property corresponds to the GCC 'const' attribute.
bool doesNotAccessMemory(const Function *F) {
- return getModRefBehavior(F) == DoesNotAccessMemory;
+ return getModRefBehavior(F) == FMRB_DoesNotAccessMemory;
}
- /// onlyReadsMemory - If the specified call is known to only read from
- /// non-volatile memory (or not access memory at all), return true. Calls
- /// that unwind the stack are legal for this predicate.
+ /// Checks if the specified call is known to only read from non-volatile
+ /// memory (or not access memory at all).
+ ///
+ /// Calls that unwind the stack are legal for this predicate.
///
/// This property allows many common optimizations to be performed in the
/// absence of interfering store instructions, such as CSE of strlen calls.
///
/// This property corresponds to the GCC 'pure' attribute.
- ///
bool onlyReadsMemory(ImmutableCallSite CS) {
return onlyReadsMemory(getModRefBehavior(CS));
}
- /// onlyReadsMemory - If the specified function is known to only read from
- /// non-volatile memory (or not access memory at all), return true. For use
- /// when the call site is not known.
+ /// Checks if the specified function is known to only read from non-volatile
+ /// memory (or not access memory at all).
///
- bool onlyReadsMemory(const Function *F) {
- return onlyReadsMemory(getModRefBehavior(F));
- }
-
- /// onlyReadsMemory - Return true if functions with the specified behavior are
- /// known to only read from non-volatile memory (or not access memory at all).
+ /// Functions that unwind the stack are legal for this predicate.
///
- static bool onlyReadsMemory(ModRefBehavior MRB) {
- return !(MRB & Mod);
- }
-
- /// onlyAccessesArgPointees - Return true if functions with the specified
- /// behavior are known to read and write at most from objects pointed to by
- /// their pointer-typed arguments (with arbitrary offsets).
- ///
- static bool onlyAccessesArgPointees(ModRefBehavior MRB) {
- return !(MRB & Anywhere & ~ArgumentPointees);
- }
-
- /// doesAccessArgPointees - Return true if functions with the specified
- /// behavior are known to potentially read or write from objects pointed
- /// to be their pointer-typed arguments (with arbitrary offsets).
+ /// This property allows many common optimizations to be performed in the
+ /// absence of interfering store instructions, such as CSE of strlen calls.
///
- static bool doesAccessArgPointees(ModRefBehavior MRB) {
- return (MRB & ModRef) && (MRB & ArgumentPointees);
+ /// This property corresponds to the GCC 'pure' attribute.
+ bool onlyReadsMemory(const Function *F) {
+ return onlyReadsMemory(getModRefBehavior(F));
}
- /// getModRefInfo - Return information about whether or not an
- /// instruction may read or write memory (without regard to a
- /// specific location)
- ModRefResult getModRefInfo(const Instruction *I) {
- if (auto CS = ImmutableCallSite(I)) {
- auto MRB = getModRefBehavior(CS);
- if (MRB & ModRef)
- return ModRef;
- else if (MRB & Ref)
- return Ref;
- else if (MRB & Mod)
- return Mod;
- return NoModRef;
- }
-
- return getModRefInfo(I, MemoryLocation());
+ /// Checks if functions with the specified behavior are known to only read
+ /// from non-volatile memory (or not access memory at all).
+ static bool onlyReadsMemory(FunctionModRefBehavior MRB) {
+ return !(MRB & MRI_Mod);
}
- /// getModRefInfo - Return information about whether or not an instruction may
- /// read or write the specified memory location. An instruction
- /// that doesn't read or write memory may be trivially LICM'd for example.
- ModRefResult getModRefInfo(const Instruction *I, const MemoryLocation &Loc) {
- switch (I->getOpcode()) {
- case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc);
- case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc);
- case Instruction::Store: return getModRefInfo((const StoreInst*)I, Loc);
- case Instruction::Fence: return getModRefInfo((const FenceInst*)I, Loc);
- case Instruction::AtomicCmpXchg:
- return getModRefInfo((const AtomicCmpXchgInst*)I, Loc);
- case Instruction::AtomicRMW:
- return getModRefInfo((const AtomicRMWInst*)I, Loc);
- case Instruction::Call: return getModRefInfo((const CallInst*)I, Loc);
- case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
- default: return NoModRef;
- }
+ /// Checks if functions with the specified behavior are known to read and
+ /// write at most from objects pointed to by their pointer-typed arguments
+ /// (with arbitrary offsets).
+ static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) {
+ return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees);
}
- /// getModRefInfo - A convenience wrapper.
- ModRefResult getModRefInfo(const Instruction *I,
- const Value *P, uint64_t Size) {
- return getModRefInfo(I, MemoryLocation(P, Size));
+ /// Checks if functions with the specified behavior are known to potentially
+ /// read or write from objects pointed to be their pointer-typed arguments
+ /// (with arbitrary offsets).
+ static bool doesAccessArgPointees(FunctionModRefBehavior MRB) {
+ return (MRB & MRI_ModRef) && (MRB & FMRL_ArgumentPointees);
}
/// getModRefInfo (for call sites) - Return information about whether
/// a particular call site modifies or reads the specified memory location.
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
/// getModRefInfo (for call sites) - A convenience wrapper.
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Value *P, uint64_t Size) {
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const Value *P,
+ uint64_t Size) {
return getModRefInfo(CS, MemoryLocation(P, Size));
}
/// getModRefInfo (for calls) - Return information about whether
/// a particular call modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
+ ModRefInfo getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
return getModRefInfo(ImmutableCallSite(C), Loc);
}
/// getModRefInfo (for calls) - A convenience wrapper.
- ModRefResult getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
+ ModRefInfo getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
return getModRefInfo(C, MemoryLocation(P, Size));
}
/// getModRefInfo (for invokes) - Return information about whether
/// a particular invoke modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
+ ModRefInfo getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
return getModRefInfo(ImmutableCallSite(I), Loc);
}
/// getModRefInfo (for invokes) - A convenience wrapper.
- ModRefResult getModRefInfo(const InvokeInst *I,
- const Value *P, uint64_t Size) {
+ ModRefInfo getModRefInfo(const InvokeInst *I, const Value *P, uint64_t Size) {
return getModRefInfo(I, MemoryLocation(P, Size));
}
/// getModRefInfo (for loads) - Return information about whether
/// a particular load modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const LoadInst *L, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc);
/// getModRefInfo (for loads) - A convenience wrapper.
- ModRefResult getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
+ ModRefInfo getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
return getModRefInfo(L, MemoryLocation(P, Size));
}
/// getModRefInfo (for stores) - Return information about whether
/// a particular store modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const StoreInst *S, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc);
/// getModRefInfo (for stores) - A convenience wrapper.
- ModRefResult getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size){
+ ModRefInfo getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size) {
return getModRefInfo(S, MemoryLocation(P, Size));
}
/// getModRefInfo (for fences) - Return information about whether
/// a particular store modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+ ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
// Conservatively correct. (We could possibly be a bit smarter if
// Loc is a alloca that doesn't escape.)
- return ModRef;
+ return MRI_ModRef;
}
/// getModRefInfo (for fences) - A convenience wrapper.
- ModRefResult getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size){
+ ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size) {
return getModRefInfo(S, MemoryLocation(P, Size));
}
/// getModRefInfo (for cmpxchges) - Return information about whether
/// a particular cmpxchg modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX,
- const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc);
/// getModRefInfo (for cmpxchges) - A convenience wrapper.
- ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX,
- const Value *P, unsigned Size) {
+ ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P,
+ unsigned Size) {
return getModRefInfo(CX, MemoryLocation(P, Size));
}
/// getModRefInfo (for atomicrmws) - Return information about whether
/// a particular atomicrmw modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const AtomicRMWInst *RMW,
- const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc);
/// getModRefInfo (for atomicrmws) - A convenience wrapper.
- ModRefResult getModRefInfo(const AtomicRMWInst *RMW,
- const Value *P, unsigned Size) {
+ ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P,
+ unsigned Size) {
return getModRefInfo(RMW, MemoryLocation(P, Size));
}
/// getModRefInfo (for va_args) - Return information about whether
/// a particular va_arg modifies or reads the specified memory location.
- ModRefResult getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc);
/// getModRefInfo (for va_args) - A convenience wrapper.
- ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){
+ ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P, uint64_t Size) {
return getModRefInfo(I, MemoryLocation(P, Size));
}
- /// getModRefInfo - Return information about whether a call and an instruction
- /// may refer to the same memory locations.
- ModRefResult getModRefInfo(Instruction *I,
- ImmutableCallSite Call);
- /// getModRefInfo - Return information about whether two call sites may refer
- /// to the same set of memory locations. See
- /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
- /// for details.
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2);
+ /// getModRefInfo (for catchpads) - Return information about whether
+ /// a particular catchpad modifies or reads the specified memory location.
+ ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc);
- /// callCapturesBefore - Return information about whether a particular call
- /// site modifies or reads the specified memory location.
- ModRefResult callCapturesBefore(const Instruction *I,
- const MemoryLocation &MemLoc,
- DominatorTree *DT);
+ /// getModRefInfo (for catchpads) - A convenience wrapper.
+ ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P,
+ uint64_t Size) {
+ return getModRefInfo(I, MemoryLocation(P, Size));
+ }
- /// callCapturesBefore - A convenience wrapper.
- ModRefResult callCapturesBefore(const Instruction *I, const Value *P,
- uint64_t Size, DominatorTree *DT) {
- return callCapturesBefore(I, MemoryLocation(P, Size), DT);
+ /// getModRefInfo (for catchrets) - Return information about whether
+ /// a particular catchret modifies or reads the specified memory location.
+ ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc);
+
+ /// getModRefInfo (for catchrets) - A convenience wrapper.
+ ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P,
+ uint64_t Size) {
+ return getModRefInfo(I, MemoryLocation(P, Size));
}
- //===--------------------------------------------------------------------===//
- /// Higher level methods for querying mod/ref information.
+ /// Check whether or not an instruction may read or write memory (without
+ /// regard to a specific location).
///
+ /// For function calls, this delegates to the alias-analysis specific
+ /// call-site mod-ref behavior queries. Otherwise it delegates to the generic
+ /// mod ref information query without a location.
+ ModRefInfo getModRefInfo(const Instruction *I) {
+ if (auto CS = ImmutableCallSite(I)) {
+ auto MRB = getModRefBehavior(CS);
+ if (MRB & MRI_ModRef)
+ return MRI_ModRef;
+ else if (MRB & MRI_Ref)
+ return MRI_Ref;
+ else if (MRB & MRI_Mod)
+ return MRI_Mod;
+ return MRI_NoModRef;
+ }
+
+ return getModRefInfo(I, MemoryLocation());
+ }
+
+ /// Check whether or not an instruction may read or write the specified
+ /// memory location.
+ ///
+ /// An instruction that doesn't read or write memory may be trivially LICM'd
+ /// for example.
+ ///
+ /// This primarily delegates to specific helpers above.
+ ModRefInfo getModRefInfo(const Instruction *I, const MemoryLocation &Loc) {
+ switch (I->getOpcode()) {
+ case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc);
+ case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc);
+ case Instruction::Store: return getModRefInfo((const StoreInst*)I, Loc);
+ case Instruction::Fence: return getModRefInfo((const FenceInst*)I, Loc);
+ case Instruction::AtomicCmpXchg:
+ return getModRefInfo((const AtomicCmpXchgInst*)I, Loc);
+ case Instruction::AtomicRMW:
+ return getModRefInfo((const AtomicRMWInst*)I, Loc);
+ case Instruction::Call: return getModRefInfo((const CallInst*)I, Loc);
+ case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
+ case Instruction::CatchPad:
+ return getModRefInfo((const CatchPadInst *)I, Loc);
+ case Instruction::CatchRet:
+ return getModRefInfo((const CatchReturnInst *)I, Loc);
+ default:
+ return MRI_NoModRef;
+ }
+ }
+
+ /// A convenience wrapper for constructing the memory location.
+ ModRefInfo getModRefInfo(const Instruction *I, const Value *P,
+ uint64_t Size) {
+ return getModRefInfo(I, MemoryLocation(P, Size));
+ }
+
+ /// Return information about whether a call and an instruction may refer to
+ /// the same memory locations.
+ ModRefInfo getModRefInfo(Instruction *I, ImmutableCallSite Call);
+
+ /// Return information about whether two call sites may refer to the same set
+ /// of memory locations. See the AA documentation for details:
+ /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+ /// \brief Return information about whether a particular call site modifies
+ /// or reads the specified memory location \p MemLoc before instruction \p I
+ /// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+ /// instruction ordering queries inside the BasicBlock containing \p I.
+ ModRefInfo callCapturesBefore(const Instruction *I,
+ const MemoryLocation &MemLoc, DominatorTree *DT,
+ OrderedBasicBlock *OBB = nullptr);
+
+ /// \brief A convenience wrapper to synthesize a memory location.
+ ModRefInfo callCapturesBefore(const Instruction *I, const Value *P,
+ uint64_t Size, DominatorTree *DT,
+ OrderedBasicBlock *OBB = nullptr) {
+ return callCapturesBefore(I, MemoryLocation(P, Size), DT, OBB);
+ }
+
+ /// @}
+ //===--------------------------------------------------------------------===//
+ /// \name Higher level methods for querying mod/ref information.
+ /// @{
- /// canBasicBlockModify - Return true if it is possible for execution of the
- /// specified basic block to modify the location Loc.
+ /// Check if it is possible for execution of the specified basic block to
+ /// modify the location Loc.
bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc);
- /// canBasicBlockModify - A convenience wrapper.
- bool canBasicBlockModify(const BasicBlock &BB, const Value *P, uint64_t Size){
+ /// A convenience wrapper synthesizing a memory location.
+ bool canBasicBlockModify(const BasicBlock &BB, const Value *P,
+ uint64_t Size) {
return canBasicBlockModify(BB, MemoryLocation(P, Size));
}
- /// canInstructionRangeModRef - Return true if it is possible for the
- /// execution of the specified instructions to mod\ref (according to the
- /// mode) the location Loc. The instructions to consider are all
- /// of the instructions in the range of [I1,I2] INCLUSIVE.
- /// I1 and I2 must be in the same basic block.
+ /// Check if it is possible for the execution of the specified instructions
+ /// to mod\ref (according to the mode) the location Loc.
+ ///
+ /// The instructions to consider are all of the instructions in the range of
+ /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block.
bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
const MemoryLocation &Loc,
- const ModRefResult Mode);
+ const ModRefInfo Mode);
- /// canInstructionRangeModRef - A convenience wrapper.
- bool canInstructionRangeModRef(const Instruction &I1,
- const Instruction &I2, const Value *Ptr,
- uint64_t Size, const ModRefResult Mode) {
+ /// A convenience wrapper synthesizing a memory location.
+ bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
+ const Value *Ptr, uint64_t Size,
+ const ModRefInfo Mode) {
return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode);
}
+private:
+ class Concept;
+ template <typename T> class Model;
+
+ template <typename T> friend class AAResultBase;
+
+ std::vector<std::unique_ptr<Concept>> AAs;
+};
+
+/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
+/// pointer or reference.
+typedef AAResults AliasAnalysis;
+
+/// A private abstract base class describing the concept of an individual alias
+/// analysis implementation.
+///
+/// This interface is implemented by any \c Model instantiation. It is also the
+/// interface which a type used to instantiate the model must provide.
+///
+/// All of these methods model methods by the same name in the \c
+/// AAResults class. Only differences and specifics to how the
+/// implementations are called are documented here.
+class AAResults::Concept {
+public:
+ virtual ~Concept() = 0;
+
+ /// An update API used internally by the AAResults to provide
+ /// a handle back to the top level aggregation.
+ virtual void setAAResults(AAResults *NewAAR) = 0;
+
//===--------------------------------------------------------------------===//
- /// Methods that clients should call when they transform the program to allow
- /// alias analyses to update their internal data structures. Note that these
- /// methods may be called on any instruction, regardless of whether or not
- /// they have pointer-analysis implications.
- ///
+ /// \name Alias Queries
+ /// @{
- /// deleteValue - This method should be called whenever an LLVM Value is
- /// deleted from the program, for example when an instruction is found to be
- /// redundant and is eliminated.
- ///
- virtual void deleteValue(Value *V);
+ /// The main low level interface to the alias analysis implementation.
+ /// Returns an AliasResult indicating whether the two pointers are aliased to
+ /// each other. This is the interface that must be implemented by specific
+ /// alias analysis implementations.
+ virtual AliasResult alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) = 0;
- /// addEscapingUse - This method should be used whenever an escaping use is
- /// added to a pointer value. Analysis implementations may either return
- /// conservative responses for that value in the future, or may recompute
- /// some or all internal state to continue providing precise responses.
- ///
- /// Escaping uses are considered by anything _except_ the following:
- /// - GEPs or bitcasts of the pointer
- /// - Loads through the pointer
- /// - Stores through (but not of) the pointer
- virtual void addEscapingUse(Use &U);
-
- /// replaceWithNewValue - This method is the obvious combination of the two
- /// above, and it provided as a helper to simplify client code.
+ /// Checks whether the given location points to constant memory, or if
+ /// \p OrLocal is true whether it points to a local alloca.
+ virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) = 0;
+
+ /// @}
+ //===--------------------------------------------------------------------===//
+ /// \name Simple mod/ref information
+ /// @{
+
+ /// Get the ModRef info associated with a pointer argument of a callsite. The
+ /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
+ /// that these bits do not necessarily account for the overall behavior of
+ /// the function, but rather only provide additional per-argument
+ /// information.
+ virtual ModRefInfo getArgModRefInfo(ImmutableCallSite CS,
+ unsigned ArgIdx) = 0;
+
+ /// Return the behavior of the given call site.
+ virtual FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) = 0;
+
+ /// Return the behavior when calling the given function.
+ virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0;
+
+ /// getModRefInfo (for call sites) - Return information about whether
+ /// a particular call site modifies or reads the specified memory location.
+ virtual ModRefInfo getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) = 0;
+
+ /// Return information about whether two call sites may refer to the same set
+ /// of memory locations. See the AA documentation for details:
+ /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
+ virtual ModRefInfo getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) = 0;
+
+ /// @}
+};
+
+/// A private class template which derives from \c Concept and wraps some other
+/// type.
+///
+/// This models the concept by directly forwarding each interface point to the
+/// wrapped type which must implement a compatible interface. This provides
+/// a type erased binding.
+template <typename AAResultT> class AAResults::Model final : public Concept {
+ AAResultT &Result;
+
+public:
+ explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) {
+ Result.setAAResults(&AAR);
+ }
+ ~Model() override {}
+
+ void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); }
+
+ AliasResult alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) override {
+ return Result.alias(LocA, LocB);
+ }
+
+ bool pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) override {
+ return Result.pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) override {
+ return Result.getArgModRefInfo(CS, ArgIdx);
+ }
+
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
+ return Result.getModRefBehavior(CS);
+ }
+
+ FunctionModRefBehavior getModRefBehavior(const Function *F) override {
+ return Result.getModRefBehavior(F);
+ }
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) override {
+ return Result.getModRefInfo(CS, Loc);
+ }
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) override {
+ return Result.getModRefInfo(CS1, CS2);
+ }
+};
+
+/// A CRTP-driven "mixin" base class to help implement the function alias
+/// analysis results concept.
+///
+/// Because of the nature of many alias analysis implementations, they often
+/// only implement a subset of the interface. This base class will attempt to
+/// implement the remaining portions of the interface in terms of simpler forms
+/// of the interface where possible, and otherwise provide conservatively
+/// correct fallback implementations.
+///
+/// Implementors of an alias analysis should derive from this CRTP, and then
+/// override specific methods that they wish to customize. There is no need to
+/// use virtual anywhere, the CRTP base class does static dispatch to the
+/// derived type passed into it.
+template <typename DerivedT> class AAResultBase {
+ // Expose some parts of the interface only to the AAResults::Model
+ // for wrapping. Specifically, this allows the model to call our
+ // setAAResults method without exposing it as a fully public API.
+ friend class AAResults::Model<DerivedT>;
+
+ /// A pointer to the AAResults object that this AAResult is
+ /// aggregated within. May be null if not aggregated.
+ AAResults *AAR;
+
+ /// Helper to dispatch calls back through the derived type.
+ DerivedT &derived() { return static_cast<DerivedT &>(*this); }
+
+ /// A setter for the AAResults pointer, which is used to satisfy the
+ /// AAResults::Model contract.
+ void setAAResults(AAResults *NewAAR) { AAR = NewAAR; }
+
+protected:
+ /// This proxy class models a common pattern where we delegate to either the
+ /// top-level \c AAResults aggregation if one is registered, or to the
+ /// current result if none are registered.
+ class AAResultsProxy {
+ AAResults *AAR;
+ DerivedT &CurrentResult;
+
+ public:
+ AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult)
+ : AAR(AAR), CurrentResult(CurrentResult) {}
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB);
+ }
+
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+ return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal)
+ : CurrentResult.pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ return AAR ? AAR->getArgModRefInfo(CS, ArgIdx) : CurrentResult.getArgModRefInfo(CS, ArgIdx);
+ }
+
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return AAR ? AAR->getModRefBehavior(CS) : CurrentResult.getModRefBehavior(CS);
+ }
+
+ FunctionModRefBehavior getModRefBehavior(const Function *F) {
+ return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
+ }
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
+ return AAR ? AAR->getModRefInfo(CS, Loc)
+ : CurrentResult.getModRefInfo(CS, Loc);
+ }
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ return AAR ? AAR->getModRefInfo(CS1, CS2) : CurrentResult.getModRefInfo(CS1, CS2);
+ }
+ };
+
+ const TargetLibraryInfo &TLI;
+
+ explicit AAResultBase(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+
+ // Provide all the copy and move constructors so that derived types aren't
+ // constrained.
+ AAResultBase(const AAResultBase &Arg) : TLI(Arg.TLI) {}
+ AAResultBase(AAResultBase &&Arg) : TLI(Arg.TLI) {}
+
+ /// Get a proxy for the best AA result set to query at this time.
///
- void replaceWithNewValue(Value *Old, Value *New) {
- deleteValue(Old);
+ /// When this result is part of a larger aggregation, this will proxy to that
+ /// aggregation. When this result is used in isolation, it will just delegate
+ /// back to the derived class's implementation.
+ AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); }
+
+public:
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ return MayAlias;
+ }
+
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+ return false;
+ }
+
+ ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ return MRI_ModRef;
+ }
+
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ if (!CS.hasOperandBundles())
+ // If CS has operand bundles then aliasing attributes from the function it
+ // calls do not directly apply to the CallSite. This can be made more
+ // precise in the future.
+ if (const Function *F = CS.getCalledFunction())
+ return getBestAAResults().getModRefBehavior(F);
+
+ return FMRB_UnknownModRefBehavior;
+ }
+
+ FunctionModRefBehavior getModRefBehavior(const Function *F) {
+ return FMRB_UnknownModRefBehavior;
}
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
};
+/// Synthesize \c ModRefInfo for a call site and memory location by examining
+/// the general behavior of the call site and any specific information for its
+/// arguments.
+///
+/// This essentially, delegates across the alias analysis interface to collect
+/// information which may be enough to (conservatively) fulfill the query.
+template <typename DerivedT>
+ModRefInfo AAResultBase<DerivedT>::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ auto MRB = getBestAAResults().getModRefBehavior(CS);
+ if (MRB == FMRB_DoesNotAccessMemory)
+ return MRI_NoModRef;
+
+ ModRefInfo Mask = MRI_ModRef;
+ if (AAResults::onlyReadsMemory(MRB))
+ Mask = MRI_Ref;
+
+ if (AAResults::onlyAccessesArgPointees(MRB)) {
+ bool DoesAlias = false;
+ ModRefInfo AllArgsMask = MRI_NoModRef;
+ if (AAResults::doesAccessArgPointees(MRB)) {
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(),
+ AE = CS.arg_end();
+ AI != AE; ++AI) {
+ const Value *Arg = *AI;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
+ MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI);
+ AliasResult ArgAlias = getBestAAResults().alias(ArgLoc, Loc);
+ if (ArgAlias != NoAlias) {
+ ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS, ArgIdx);
+ DoesAlias = true;
+ AllArgsMask = ModRefInfo(AllArgsMask | ArgMask);
+ }
+ }
+ }
+ if (!DoesAlias)
+ return MRI_NoModRef;
+ Mask = ModRefInfo(Mask & AllArgsMask);
+ }
+
+ // If Loc is a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & MRI_Mod) &&
+ getBestAAResults().pointsToConstantMemory(Loc, /*OrLocal*/ false))
+ Mask = ModRefInfo(Mask & ~MRI_Mod);
+
+ return Mask;
+}
+
+/// Synthesize \c ModRefInfo for two call sites by examining the general
+/// behavior of the call site and any specific information for its arguments.
+///
+/// This essentially, delegates across the alias analysis interface to collect
+/// information which may be enough to (conservatively) fulfill the query.
+template <typename DerivedT>
+ModRefInfo AAResultBase<DerivedT>::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // If CS1 or CS2 are readnone, they don't interact.
+ auto CS1B = getBestAAResults().getModRefBehavior(CS1);
+ if (CS1B == FMRB_DoesNotAccessMemory)
+ return MRI_NoModRef;
+
+ auto CS2B = getBestAAResults().getModRefBehavior(CS2);
+ if (CS2B == FMRB_DoesNotAccessMemory)
+ return MRI_NoModRef;
+
+ // If they both only read from memory, there is no dependence.
+ if (AAResults::onlyReadsMemory(CS1B) && AAResults::onlyReadsMemory(CS2B))
+ return MRI_NoModRef;
+
+ ModRefInfo Mask = MRI_ModRef;
+
+ // If CS1 only reads memory, the only dependence on CS2 can be
+ // from CS1 reading memory written by CS2.
+ if (AAResults::onlyReadsMemory(CS1B))
+ Mask = ModRefInfo(Mask & MRI_Ref);
+
+ // If CS2 only access memory through arguments, accumulate the mod/ref
+ // information from CS1's references to the memory referenced by
+ // CS2's arguments.
+ if (AAResults::onlyAccessesArgPointees(CS2B)) {
+ ModRefInfo R = MRI_NoModRef;
+ if (AAResults::doesAccessArgPointees(CS2B)) {
+ for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(),
+ E = CS2.arg_end();
+ I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
+ auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
+
+ // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence
+ // of CS1 on that location is the inverse.
+ ModRefInfo ArgMask =
+ getBestAAResults().getArgModRefInfo(CS2, CS2ArgIdx);
+ if (ArgMask == MRI_Mod)
+ ArgMask = MRI_ModRef;
+ else if (ArgMask == MRI_Ref)
+ ArgMask = MRI_Mod;
+
+ ArgMask = ModRefInfo(ArgMask &
+ getBestAAResults().getModRefInfo(CS1, CS2ArgLoc));
+
+ R = ModRefInfo((R | ArgMask) & Mask);
+ if (R == Mask)
+ break;
+ }
+ }
+ return R;
+ }
+
+ // If CS1 only accesses memory through arguments, check if CS2 references
+ // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+ if (AAResults::onlyAccessesArgPointees(CS1B)) {
+ ModRefInfo R = MRI_NoModRef;
+ if (AAResults::doesAccessArgPointees(CS1B)) {
+ for (ImmutableCallSite::arg_iterator I = CS1.arg_begin(),
+ E = CS1.arg_end();
+ I != E; ++I) {
+ const Value *Arg = *I;
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
+ auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
+
+ // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
+ // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
+ // might Ref, then we care only about a Mod by CS2.
+ ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS1, CS1ArgIdx);
+ ModRefInfo ArgR = getBestAAResults().getModRefInfo(CS2, CS1ArgLoc);
+ if (((ArgMask & MRI_Mod) != MRI_NoModRef &&
+ (ArgR & MRI_ModRef) != MRI_NoModRef) ||
+ ((ArgMask & MRI_Ref) != MRI_NoModRef &&
+ (ArgR & MRI_Mod) != MRI_NoModRef))
+ R = ModRefInfo((R | ArgMask) & Mask);
+
+ if (R == Mask)
+ break;
+ }
+ }
+ return R;
+ }
+
+ return Mask;
+}
+
/// isNoAliasCall - Return true if this pointer is returned by a noalias
/// function.
bool isNoAliasCall(const Value *V);
@@ -564,6 +975,98 @@ bool isIdentifiedObject(const Value *V);
/// IdentifiedObjects.
bool isIdentifiedFunctionLocal(const Value *V);
+/// A manager for alias analyses.
+///
+/// This class can have analyses registered with it and when run, it will run
+/// all of them and aggregate their results into single AA results interface
+/// that dispatches across all of the alias analysis results available.
+///
+/// Note that the order in which analyses are registered is very significant.
+/// That is the order in which the results will be aggregated and queried.
+///
+/// This manager effectively wraps the AnalysisManager for registering alias
+/// analyses. When you register your alias analysis with this manager, it will
+/// ensure the analysis itself is registered with its AnalysisManager.
+class AAManager {
+public:
+ typedef AAResults Result;
+
+ // This type hase value semantics. We have to spell these out because MSVC
+ // won't synthesize them.
+ AAManager() {}
+ AAManager(AAManager &&Arg)
+ : FunctionResultGetters(std::move(Arg.FunctionResultGetters)) {}
+ AAManager(const AAManager &Arg)
+ : FunctionResultGetters(Arg.FunctionResultGetters) {}
+ AAManager &operator=(AAManager &&RHS) {
+ FunctionResultGetters = std::move(RHS.FunctionResultGetters);
+ return *this;
+ }
+ AAManager &operator=(const AAManager &RHS) {
+ FunctionResultGetters = RHS.FunctionResultGetters;
+ return *this;
+ }
+
+ /// Register a specific AA result.
+ template <typename AnalysisT> void registerFunctionAnalysis() {
+ FunctionResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>);
+ }
+
+ Result run(Function &F, AnalysisManager<Function> &AM) {
+ Result R;
+ for (auto &Getter : FunctionResultGetters)
+ (*Getter)(F, AM, R);
+ return R;
+ }
+
+private:
+ SmallVector<void (*)(Function &F, AnalysisManager<Function> &AM,
+ AAResults &AAResults),
+ 4> FunctionResultGetters;
+
+ template <typename AnalysisT>
+ static void getFunctionAAResultImpl(Function &F,
+ AnalysisManager<Function> &AM,
+ AAResults &AAResults) {
+ AAResults.addAAResult(AM.template getResult<AnalysisT>(F));
+ }
+};
+
+/// A wrapper pass to provide the legacy pass manager access to a suitably
+/// prepared AAResults object.
+class AAResultsWrapperPass : public FunctionPass {
+ std::unique_ptr<AAResults> AAR;
+
+public:
+ static char ID;
+
+ AAResultsWrapperPass();
+
+ AAResults &getAAResults() { return *AAR; }
+ const AAResults &getAAResults() const { return *AAR; }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+FunctionPass *createAAResultsWrapperPass();
+
+/// A wrapper pass around a callback which can be used to populate the
+/// AAResults in the AAResultsWrapperPass from an external AA.
+///
+/// The callback provided here will be used each time we prepare an AAResults
+/// object, and will receive a reference to the function wrapper pass, the
+/// function, and the AAResults object to populate. This should be used when
+/// setting up a custom pass pipeline to inject a hook into the AA results.
+ImmutablePass *createExternalAAWrapperPass(
+ std::function<void(Pass &, Function &, AAResults &)> Callback);
+
+/// A helper for the legacy pass manager to create a \c AAResults
+/// object populated to the best of our ability for a particular function when
+/// inside of a \c ModulePass or a \c CallGraphSCCPass.
+AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
index 881699d..37fd69b 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -20,13 +20,13 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include <vector>
namespace llvm {
-class AliasAnalysis;
class LoadInst;
class StoreInst;
class VAArgInst;
@@ -42,13 +42,14 @@ class AliasSet : public ilist_node<AliasSet> {
AliasSet *AS;
uint64_t Size;
AAMDNodes AAInfo;
+
public:
PointerRec(Value *V)
: Val(V), PrevInList(nullptr), NextInList(nullptr), AS(nullptr), Size(0),
AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {}
Value *getValue() const { return Val; }
-
+
PointerRec *getNext() const { return NextInList; }
bool hasAliasSet() const { return AS != nullptr; }
@@ -156,7 +157,7 @@ class AliasSet : public ilist_node<AliasSet> {
assert(i < UnknownInsts.size());
return UnknownInsts[i];
}
-
+
public:
/// Accessors...
bool isRef() const { return Access & RefAccess; }
@@ -190,6 +191,7 @@ public:
class iterator : public std::iterator<std::forward_iterator_tag,
PointerRec, ptrdiff_t> {
PointerRec *CurNode;
+
public:
explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {}
@@ -282,14 +284,14 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {
return OS;
}
-
class AliasSetTracker {
/// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be
/// notified whenever a Value is deleted.
- class ASTCallbackVH : public CallbackVH {
+ class ASTCallbackVH final : public CallbackVH {
AliasSetTracker *AST;
void deleted() override;
void allUsesReplacedWith(Value *) override;
+
public:
ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr);
ASTCallbackVH &operator=(Value *V);
@@ -347,7 +349,7 @@ public:
bool remove(Instruction *I);
void remove(AliasSet &AS);
bool removeUnknown(Instruction *I);
-
+
void clear();
/// getAliasSets - Return the alias sets that are active.
@@ -398,7 +400,6 @@ public:
///
void copyValue(Value *From, Value *To);
-
typedef ilist<AliasSet>::iterator iterator;
typedef ilist<AliasSet>::const_iterator const_iterator;
diff --git a/contrib/llvm/include/llvm/Analysis/AssumptionCache.h b/contrib/llvm/include/llvm/Analysis/AssumptionCache.h
index 1f00b69..b903f96 100644
--- a/contrib/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/contrib/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -66,7 +66,7 @@ public:
/// \brief Add an @llvm.assume intrinsic to this function's cache.
///
- /// The call passed in must be an instruction within this fuction and must
+ /// The call passed in must be an instruction within this function and must
/// not already be in the cache.
void registerAssumption(CallInst *CI);
@@ -79,7 +79,7 @@ public:
}
/// \brief Access the list of assumption handles currently tracked for this
- /// fuction.
+ /// function.
///
/// Note that these produce weak handles that may be null. The caller must
/// handle that case.
@@ -140,7 +140,7 @@ public:
class AssumptionCacheTracker : public ImmutablePass {
/// A callback value handle applied to function objects, which we use to
/// delete our cache of intrinsics for a function when it is deleted.
- class FunctionCallbackVH : public CallbackVH {
+ class FunctionCallbackVH final : public CallbackVH {
AssumptionCacheTracker *ACT;
void deleted() override;
diff --git a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
new file mode 100644
index 0000000..181a932
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -0,0 +1,223 @@
+//===- BasicAliasAnalysis.h - Stateless, local Alias Analysis ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for LLVM's primary stateless and local alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H
+#define LLVM_ANALYSIS_BASICALIASANALYSIS_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+class AssumptionCache;
+class DominatorTree;
+class LoopInfo;
+
+/// This is the AA result object for the basic, local, and stateless alias
+/// analysis. It implements the AA query interface in an entirely stateless
+/// manner. As one consequence, it is never invalidated. While it does retain
+/// some storage, that is used as an optimization and not to preserve
+/// information from query to query.
+class BasicAAResult : public AAResultBase<BasicAAResult> {
+ friend AAResultBase<BasicAAResult>;
+
+ const DataLayout &DL;
+ AssumptionCache &AC;
+ DominatorTree *DT;
+ LoopInfo *LI;
+
+public:
+ BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI,
+ AssumptionCache &AC, DominatorTree *DT = nullptr,
+ LoopInfo *LI = nullptr)
+ : AAResultBase(TLI), DL(DL), AC(AC), DT(DT), LI(LI) {}
+
+ BasicAAResult(const BasicAAResult &Arg)
+ : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {}
+ BasicAAResult(BasicAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT),
+ LI(Arg.LI) {}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+ /// Chases pointers until we find a (constant global) or not.
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+ /// Get the location associated with a pointer argument of a callsite.
+ ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+
+ /// Returns the behavior when calling the given call site.
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+ /// Returns the behavior when calling the given function. For use when the
+ /// call site is not known.
+ FunctionModRefBehavior getModRefBehavior(const Function *F);
+
+private:
+ // A linear transformation of a Value; this class represents ZExt(SExt(V,
+ // SExtBits), ZExtBits) * Scale + Offset.
+ struct VariableGEPIndex {
+
+ // An opaque Value - we can't decompose this further.
+ const Value *V;
+
+ // We need to track what extensions we've done as we consider the same Value
+ // with different extensions as different variables in a GEP's linear
+ // expression;
+ // e.g.: if V == -1, then sext(x) != zext(x).
+ unsigned ZExtBits;
+ unsigned SExtBits;
+
+ int64_t Scale;
+
+ bool operator==(const VariableGEPIndex &Other) const {
+ return V == Other.V && ZExtBits == Other.ZExtBits &&
+ SExtBits == Other.SExtBits && Scale == Other.Scale;
+ }
+
+ bool operator!=(const VariableGEPIndex &Other) const {
+ return !operator==(Other);
+ }
+ };
+
+ /// Track alias queries to guard against recursion.
+ typedef std::pair<MemoryLocation, MemoryLocation> LocPair;
+ typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
+ AliasCacheTy AliasCache;
+
+ /// Tracks phi nodes we have visited.
+ ///
+ /// When interpret "Value" pointer equality as value equality we need to make
+ /// sure that the "Value" is not part of a cycle. Otherwise, two uses could
+ /// come from different "iterations" of a cycle and see different values for
+ /// the same "Value" pointer.
+ ///
+ /// The following example shows the problem:
+ /// %p = phi(%alloca1, %addr2)
+ /// %l = load %ptr
+ /// %addr1 = gep, %alloca2, 0, %l
+ /// %addr2 = gep %alloca2, 0, (%l + 1)
+ /// alias(%p, %addr1) -> MayAlias !
+ /// store %l, ...
+ SmallPtrSet<const BasicBlock *, 8> VisitedPhiBBs;
+
+ /// Tracks instructions visited by pointsToConstantMemory.
+ SmallPtrSet<const Value *, 16> Visited;
+
+ static const Value *
+ GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset,
+ unsigned &ZExtBits, unsigned &SExtBits,
+ const DataLayout &DL, unsigned Depth, AssumptionCache *AC,
+ DominatorTree *DT, bool &NSW, bool &NUW);
+
+ static const Value *
+ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ bool &MaxLookupReached, const DataLayout &DL,
+ AssumptionCache *AC, DominatorTree *DT);
+ /// \brief A Heuristic for aliasGEP that searches for a constant offset
+ /// between the variables.
+ ///
+ /// GetLinearExpression has some limitations, as generally zext(%x + 1)
+ /// != zext(%x) + zext(1) if the arithmetic overflows. GetLinearExpression
+ /// will therefore conservatively refuse to decompose these expressions.
+ /// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if
+ /// the addition overflows.
+ bool
+ constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ uint64_t V1Size, uint64_t V2Size, int64_t BaseOffset,
+ AssumptionCache *AC, DominatorTree *DT);
+
+ bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
+
+ void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src);
+
+ AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+ const AAMDNodes &V1AAInfo, const Value *V2,
+ uint64_t V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+ AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const AAMDNodes &PNAAInfo, const Value *V2,
+ uint64_t V2Size, const AAMDNodes &V2AAInfo);
+
+ AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const AAMDNodes &SIAAInfo, const Value *V2,
+ uint64_t V2Size, const AAMDNodes &V2AAInfo);
+
+ AliasResult aliasCheck(const Value *V1, uint64_t V1Size, AAMDNodes V1AATag,
+ const Value *V2, uint64_t V2Size, AAMDNodes V2AATag);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class BasicAA {
+public:
+ typedef BasicAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ BasicAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "BasicAliasAnalysis"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the BasicAAResult object.
+class BasicAAWrapperPass : public FunctionPass {
+ std::unique_ptr<BasicAAResult> Result;
+
+ virtual void anchor();
+
+public:
+ static char ID;
+
+ BasicAAWrapperPass();
+
+ BasicAAResult &getResult() { return *Result; }
+ const BasicAAResult &getResult() const { return *Result; }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+FunctionPass *createBasicAAWrapperPass();
+
+/// A helper for the legacy pass manager to create a \c BasicAAResult object
+/// populated to the best of our ability for a particular function when inside
+/// of a \c ModulePass or a \c CallGraphSCCPass.
+BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F);
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
index f27c32d..6f2a2b5 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -21,26 +21,20 @@
namespace llvm {
class BranchProbabilityInfo;
+class LoopInfo;
template <class BlockT> class BlockFrequencyInfoImpl;
/// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to
/// estimate IR basic block frequencies.
-class BlockFrequencyInfo : public FunctionPass {
+class BlockFrequencyInfo {
typedef BlockFrequencyInfoImpl<BasicBlock> ImplType;
std::unique_ptr<ImplType> BFI;
public:
- static char ID;
-
BlockFrequencyInfo();
+ BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI,
+ const LoopInfo &LI);
- ~BlockFrequencyInfo() override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- bool runOnFunction(Function &F) override;
- void releaseMemory() override;
- void print(raw_ostream &O, const Module *M) const override;
const Function *getFunction() const;
void view() const;
@@ -51,6 +45,13 @@ public:
/// floating points.
BlockFrequency getBlockFreq(const BasicBlock *BB) const;
+ // Set the frequency of the given basic block.
+ void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
+
+ /// calculate - compute block frequency info for the given function.
+ void calculate(const Function &F, const BranchProbabilityInfo &BPI,
+ const LoopInfo &LI);
+
// Print the block frequency Freq to OS using the current functions entry
// frequency to convert freq into a relative decimal form.
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const;
@@ -60,7 +61,28 @@ public:
raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const;
uint64_t getEntryFreq() const;
+ void releaseMemory();
+ void print(raw_ostream &OS) const;
+};
+/// \brief Legacy analysis pass which computes \c BlockFrequencyInfo.
+class BlockFrequencyInfoWrapperPass : public FunctionPass {
+ BlockFrequencyInfo BFI;
+
+public:
+ static char ID;
+
+ BlockFrequencyInfoWrapperPass();
+ ~BlockFrequencyInfoWrapperPass() override;
+
+ BlockFrequencyInfo &getBFI() { return BFI; }
+ const BlockFrequencyInfo &getBFI() const { return BFI; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnFunction(Function &F) override;
+ void releaseMemory() override;
+ void print(raw_ostream &OS, const Module *M) const override;
};
}
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 32d9609..387e9a8 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -84,7 +84,7 @@ public:
/// \brief Add another mass.
///
/// Adds another mass, saturating at \a isFull() rather than overflowing.
- BlockMass &operator+=(const BlockMass &X) {
+ BlockMass &operator+=(BlockMass X) {
uint64_t Sum = Mass + X.Mass;
Mass = Sum < Mass ? UINT64_MAX : Sum;
return *this;
@@ -94,23 +94,23 @@ public:
///
/// Subtracts another mass, saturating at \a isEmpty() rather than
/// undeflowing.
- BlockMass &operator-=(const BlockMass &X) {
+ BlockMass &operator-=(BlockMass X) {
uint64_t Diff = Mass - X.Mass;
Mass = Diff > Mass ? 0 : Diff;
return *this;
}
- BlockMass &operator*=(const BranchProbability &P) {
+ BlockMass &operator*=(BranchProbability P) {
Mass = P.scale(Mass);
return *this;
}
- bool operator==(const BlockMass &X) const { return Mass == X.Mass; }
- bool operator!=(const BlockMass &X) const { return Mass != X.Mass; }
- bool operator<=(const BlockMass &X) const { return Mass <= X.Mass; }
- bool operator>=(const BlockMass &X) const { return Mass >= X.Mass; }
- bool operator<(const BlockMass &X) const { return Mass < X.Mass; }
- bool operator>(const BlockMass &X) const { return Mass > X.Mass; }
+ bool operator==(BlockMass X) const { return Mass == X.Mass; }
+ bool operator!=(BlockMass X) const { return Mass != X.Mass; }
+ bool operator<=(BlockMass X) const { return Mass <= X.Mass; }
+ bool operator>=(BlockMass X) const { return Mass >= X.Mass; }
+ bool operator<(BlockMass X) const { return Mass < X.Mass; }
+ bool operator>(BlockMass X) const { return Mass > X.Mass; }
/// \brief Convert to scaled number.
///
@@ -122,20 +122,20 @@ public:
raw_ostream &print(raw_ostream &OS) const;
};
-inline BlockMass operator+(const BlockMass &L, const BlockMass &R) {
+inline BlockMass operator+(BlockMass L, BlockMass R) {
return BlockMass(L) += R;
}
-inline BlockMass operator-(const BlockMass &L, const BlockMass &R) {
+inline BlockMass operator-(BlockMass L, BlockMass R) {
return BlockMass(L) -= R;
}
-inline BlockMass operator*(const BlockMass &L, const BranchProbability &R) {
+inline BlockMass operator*(BlockMass L, BranchProbability R) {
return BlockMass(L) *= R;
}
-inline BlockMass operator*(const BranchProbability &L, const BlockMass &R) {
+inline BlockMass operator*(BranchProbability L, BlockMass R) {
return BlockMass(R) *= L;
}
-inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) {
+inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) {
return X.print(OS);
}
@@ -477,6 +477,8 @@ public:
BlockFrequency getBlockFreq(const BlockNode &Node) const;
+ void setBlockFreq(const BlockNode &Node, uint64_t Freq);
+
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const;
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockFrequency &Freq) const;
@@ -905,14 +907,15 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
public:
const FunctionT *getFunction() const { return F; }
- void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI,
- const LoopInfoT *LI);
+ void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
+ const LoopInfoT &LI);
BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {}
using BlockFrequencyInfoImplBase::getEntryFreq;
BlockFrequency getBlockFreq(const BlockT *BB) const {
return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB));
}
+ void setBlockFreq(const BlockT *BB, uint64_t Freq);
Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB));
}
@@ -938,13 +941,13 @@ public:
};
template <class BT>
-void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
- const BranchProbabilityInfoT *BPI,
- const LoopInfoT *LI) {
+void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
+ const BranchProbabilityInfoT &BPI,
+ const LoopInfoT &LI) {
// Save the parameters.
- this->BPI = BPI;
- this->LI = LI;
- this->F = F;
+ this->BPI = &BPI;
+ this->LI = &LI;
+ this->F = &F;
// Clean up left-over data structures.
BlockFrequencyInfoImplBase::clear();
@@ -952,8 +955,8 @@ void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
Nodes.clear();
// Initialize.
- DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n================="
- << std::string(F->getName().size(), '=') << "\n");
+ DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n================="
+ << std::string(F.getName().size(), '=') << "\n");
initializeRPOT();
initializeLoops();
@@ -965,8 +968,23 @@ void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
finalizeMetrics();
}
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB, uint64_t Freq) {
+ if (Nodes.count(BB))
+ BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq);
+ else {
+ // If BB is a newly added block after BFI is done, we need to create a new
+ // BlockNode for it assigned with a new index. The index can be determined
+ // by the size of Freqs.
+ BlockNode NewNode(Freqs.size());
+ Nodes[BB] = NewNode;
+ Freqs.emplace_back();
+ BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq);
+ }
+}
+
template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() {
- const BlockT *Entry = F->begin();
+ const BlockT *Entry = &F->front();
RPOT.reserve(F->size());
std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT));
std::reverse(RPOT.begin(), RPOT.end());
@@ -1155,6 +1173,13 @@ void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
updateLoopWithIrreducible(*OuterLoop);
}
+namespace {
+// A helper function that converts a branch probability into weight.
+inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) {
+ return Prob.getNumerator();
+}
+} // namespace
+
template <class BT>
bool
BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
@@ -1171,10 +1196,8 @@ BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
const BlockT *BB = getBlock(Node);
for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB);
SI != SE; ++SI)
- // Do not dereference SI, or getEdgeWeight() is linear in the number of
- // successors.
if (!addToDist(Dist, OuterLoop, Node, getNode(*SI),
- BPI->getEdgeWeight(BB, SI)))
+ getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI))))
// Irreducible backedge.
return false;
}
@@ -1190,10 +1213,11 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const {
if (!F)
return OS;
OS << "block-frequency-info: " << F->getName() << "\n";
- for (const BlockT &BB : *F)
- OS << " - " << bfi_detail::getBlockName(&BB)
- << ": float = " << getFloatingBlockFreq(&BB)
- << ", int = " << getBlockFreq(&BB).getFrequency() << "\n";
+ for (const BlockT &BB : *F) {
+ OS << " - " << bfi_detail::getBlockName(&BB) << ": float = ";
+ getFloatingBlockFreq(&BB).print(OS, 5)
+ << ", int = " << getBlockFreq(&BB).getFrequency() << "\n";
+ }
// Add an extra newline for readability.
OS << "\n";
diff --git a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 9d86756..cfdf218 100644
--- a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -25,9 +25,9 @@ namespace llvm {
class LoopInfo;
class raw_ostream;
-/// \brief Analysis pass providing branch probability information.
+/// \brief Analysis providing branch probability information.
///
-/// This is a function analysis pass which provides information on the relative
+/// This is a function analysis which provides information on the relative
/// probabilities of each "edge" in the function's CFG where such an edge is
/// defined by a pair (PredBlock and an index in the successors). The
/// probability of an edge from one block is always relative to the
@@ -37,20 +37,14 @@ class raw_ostream;
/// identify an edge, since we can have multiple edges from Src to Dst.
/// As an example, we can have a switch which jumps to Dst with value 0 and
/// value 10.
-class BranchProbabilityInfo : public FunctionPass {
+class BranchProbabilityInfo {
public:
- static char ID;
-
- BranchProbabilityInfo() : FunctionPass(ID) {
- initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
+ BranchProbabilityInfo() {}
+ BranchProbabilityInfo(Function &F, const LoopInfo &LI) { calculate(F, LI); }
- void releaseMemory() override;
+ void releaseMemory();
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
+ void print(raw_ostream &OS) const;
/// \brief Get an edge's probability, relative to other out-edges of the Src.
///
@@ -67,6 +61,9 @@ public:
BranchProbability getEdgeProbability(const BasicBlock *Src,
const BasicBlock *Dst) const;
+ BranchProbability getEdgeProbability(const BasicBlock *Src,
+ succ_const_iterator Dst) const;
+
/// \brief Test if an edge is hot relative to other out-edges of the Src.
///
/// Check whether this edge out of the source block is 'hot'. We define hot
@@ -87,37 +84,22 @@ public:
raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src,
const BasicBlock *Dst) const;
- /// \brief Get the raw edge weight calculated for the edge.
+ /// \brief Set the raw edge probability for the given edge.
///
- /// This returns the raw edge weight. It is guaranteed to fall between 1 and
- /// UINT32_MAX. Note that the raw edge weight is not meaningful in isolation.
- /// This interface should be very carefully, and primarily by routines that
- /// are updating the analysis by later calling setEdgeWeight.
- uint32_t getEdgeWeight(const BasicBlock *Src,
- unsigned IndexInSuccessors) const;
-
- /// \brief Get the raw edge weight calculated for the block pair.
- ///
- /// This returns the sum of all raw edge weights from Src to Dst.
- /// It is guaranteed to fall between 1 and UINT32_MAX.
- uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const;
-
- uint32_t getEdgeWeight(const BasicBlock *Src,
- succ_const_iterator Dst) const;
-
- /// \brief Set the raw edge weight for a given edge.
- ///
- /// This allows a pass to explicitly set the edge weight for an edge. It can
- /// be used when updating the CFG to update and preserve the branch
+ /// This allows a pass to explicitly set the edge probability for an edge. It
+ /// can be used when updating the CFG to update and preserve the branch
/// probability information. Read the implementation of how these edge
- /// weights are calculated carefully before using!
- void setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors,
- uint32_t Weight);
+ /// probabilities are calculated carefully before using!
+ void setEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors,
+ BranchProbability Prob);
- static uint32_t getBranchWeightStackProtector(bool IsLikely) {
- return IsLikely ? (1u << 20) - 1 : 1;
+ static BranchProbability getBranchProbStackProtector(bool IsLikely) {
+ static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20);
+ return IsLikely ? LikelyProb : LikelyProb.getCompl();
}
+ void calculate(Function &F, const LoopInfo& LI);
+
private:
// Since we allow duplicate edges from one basic block to another, we use
// a pair (PredBlock and an index in the successors) to specify an edge.
@@ -131,10 +113,7 @@ private:
// weight to just "inherit" the non-zero weight of an adjacent successor.
static const uint32_t DEFAULT_WEIGHT = 16;
- DenseMap<Edge, uint32_t> Weights;
-
- /// \brief Handle to the LoopInfo analysis.
- LoopInfo *LI;
+ DenseMap<Edge, BranchProbability> Probs;
/// \brief Track the last function we run over for printing.
Function *LastF;
@@ -145,19 +124,37 @@ private:
/// \brief Track the set of blocks that always lead to a cold call.
SmallPtrSet<BasicBlock *, 16> PostDominatedByColdCall;
- /// \brief Get sum of the block successors' weights.
- uint32_t getSumForBlock(const BasicBlock *BB) const;
-
bool calcUnreachableHeuristics(BasicBlock *BB);
bool calcMetadataWeights(BasicBlock *BB);
bool calcColdCallHeuristics(BasicBlock *BB);
bool calcPointerHeuristics(BasicBlock *BB);
- bool calcLoopBranchHeuristics(BasicBlock *BB);
+ bool calcLoopBranchHeuristics(BasicBlock *BB, const LoopInfo &LI);
bool calcZeroHeuristics(BasicBlock *BB);
bool calcFloatingPointHeuristics(BasicBlock *BB);
bool calcInvokeHeuristics(BasicBlock *BB);
};
+/// \brief Legacy analysis pass which computes \c BranchProbabilityInfo.
+class BranchProbabilityInfoWrapperPass : public FunctionPass {
+ BranchProbabilityInfo BPI;
+
+public:
+ static char ID;
+
+ BranchProbabilityInfoWrapperPass() : FunctionPass(ID) {
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ BranchProbabilityInfo &getBPI() { return BPI; }
+ const BranchProbabilityInfo &getBPI() const { return BPI; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+ void releaseMemory() override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+};
+
}
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/CFG.h b/contrib/llvm/include/llvm/Analysis/CFG.h
index 7c4df78..35165f4 100644
--- a/contrib/llvm/include/llvm/Analysis/CFG.h
+++ b/contrib/llvm/include/llvm/Analysis/CFG.h
@@ -40,7 +40,7 @@ void FindFunctionBackedges(
/// Search for the specified successor of basic block BB and return its position
/// in the terminator instruction's list of successors. It is an error to call
/// this with a block that is not a successor.
-unsigned GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ);
+unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
/// Return true if the specified edge is a critical edge. Critical edges are
/// edges from a block with multiple successors to a block with multiple
diff --git a/contrib/llvm/include/llvm/Analysis/CFLAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/CFLAliasAnalysis.h
new file mode 100644
index 0000000..7473a45
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/CFLAliasAnalysis.h
@@ -0,0 +1,158 @@
+//===- CFLAliasAnalysis.h - CFL-Based Alias Analysis Interface ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for LLVM's primary stateless and local alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CFLALIASANALYSIS_H
+#define LLVM_ANALYSIS_CFLALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include <forward_list>
+
+namespace llvm {
+
+class CFLAAResult : public AAResultBase<CFLAAResult> {
+ friend AAResultBase<CFLAAResult>;
+
+ struct FunctionInfo;
+
+public:
+ explicit CFLAAResult(const TargetLibraryInfo &TLI);
+ CFLAAResult(CFLAAResult &&Arg);
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ /// \brief Inserts the given Function into the cache.
+ void scan(Function *Fn);
+
+ void evict(Function *Fn);
+
+ /// \brief Ensures that the given function is available in the cache.
+ /// Returns the appropriate entry from the cache.
+ const Optional<FunctionInfo> &ensureCached(Function *Fn);
+
+ AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ if (LocA.Ptr == LocB.Ptr) {
+ if (LocA.Size == LocB.Size) {
+ return MustAlias;
+ } else {
+ return PartialAlias;
+ }
+ }
+
+ // Comparisons between global variables and other constants should be
+ // handled by BasicAA.
+ // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
+ // a GlobalValue and ConstantExpr, but every query needs to have at least
+ // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
+ // are.
+ if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
+ return AAResultBase::alias(LocA, LocB);
+ }
+
+ AliasResult QueryResult = query(LocA, LocB);
+ if (QueryResult == MayAlias)
+ return AAResultBase::alias(LocA, LocB);
+
+ return QueryResult;
+ }
+
+private:
+ struct FunctionHandle final : public CallbackVH {
+ FunctionHandle(Function *Fn, CFLAAResult *Result)
+ : CallbackVH(Fn), Result(Result) {
+ assert(Fn != nullptr);
+ assert(Result != nullptr);
+ }
+
+ void deleted() override { removeSelfFromCache(); }
+ void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
+
+ private:
+ CFLAAResult *Result;
+
+ void removeSelfFromCache() {
+ assert(Result != nullptr);
+ auto *Val = getValPtr();
+ Result->evict(cast<Function>(Val));
+ setValPtr(nullptr);
+ }
+ };
+
+ /// \brief Cached mapping of Functions to their StratifiedSets.
+ /// If a function's sets are currently being built, it is marked
+ /// in the cache as an Optional without a value. This way, if we
+ /// have any kind of recursion, it is discernable from a function
+ /// that simply has empty sets.
+ DenseMap<Function *, Optional<FunctionInfo>> Cache;
+ std::forward_list<FunctionHandle> Handles;
+
+ FunctionInfo buildSetsFrom(Function *F);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+///
+/// FIXME: We really should refactor CFL to use the analysis more heavily, and
+/// in particular to leverage invalidation to trigger re-computation of sets.
+class CFLAA {
+public:
+ typedef CFLAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ CFLAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "CFLAA"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the CFLAAResult object.
+class CFLAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<CFLAAResult> Result;
+
+public:
+ static char ID;
+
+ CFLAAWrapperPass();
+
+ CFLAAResult &getResult() { return *Result; }
+ const CFLAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createCFLAAWrapperPass - This pass implements a set-based approach to
+// alias analysis.
+//
+ImmutablePass *createCFLAAWrapperPass();
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 6a406cd..e7635eb 100644
--- a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -358,7 +358,7 @@ private:
/// returned PreservedAnalysis set.
class CGSCCAnalysisManagerFunctionProxy {
public:
- /// \brief Result proxy object for \c ModuleAnalysisManagerFunctionProxy.
+ /// \brief Result proxy object for \c CGSCCAnalysisManagerFunctionProxy.
class Result {
public:
explicit Result(const CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {}
diff --git a/contrib/llvm/include/llvm/Analysis/CallGraph.h b/contrib/llvm/include/llvm/Analysis/CallGraph.h
index 662ae0e..5562e9b 100644
--- a/contrib/llvm/include/llvm/Analysis/CallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/CallGraph.h
@@ -75,7 +75,8 @@ class CallGraphNode;
class CallGraph {
Module &M;
- typedef std::map<const Function *, CallGraphNode *> FunctionMapTy;
+ typedef std::map<const Function *, std::unique_ptr<CallGraphNode>>
+ FunctionMapTy;
/// \brief A map from \c Function* to \c CallGraphNode*.
FunctionMapTy FunctionMap;
@@ -90,7 +91,7 @@ class CallGraph {
/// \brief This node has edges to it from all functions making indirect calls
/// or calling an external function.
- CallGraphNode *CallsExternalNode;
+ std::unique_ptr<CallGraphNode> CallsExternalNode;
/// \brief Replace the function represented by this node by another.
///
@@ -104,7 +105,8 @@ class CallGraph {
void addToCallGraph(Function *F);
public:
- CallGraph(Module &M);
+ explicit CallGraph(Module &M);
+ CallGraph(CallGraph &&Arg);
~CallGraph();
void print(raw_ostream &OS) const;
@@ -125,21 +127,23 @@ public:
inline const CallGraphNode *operator[](const Function *F) const {
const_iterator I = FunctionMap.find(F);
assert(I != FunctionMap.end() && "Function not in callgraph!");
- return I->second;
+ return I->second.get();
}
/// \brief Returns the call graph node for the provided function.
inline CallGraphNode *operator[](const Function *F) {
const_iterator I = FunctionMap.find(F);
assert(I != FunctionMap.end() && "Function not in callgraph!");
- return I->second;
+ return I->second.get();
}
/// \brief Returns the \c CallGraphNode which is used to represent
/// undetermined calls into the callgraph.
CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; }
- CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; }
+ CallGraphNode *getCallsExternalNode() const {
+ return CallsExternalNode.get();
+ }
//===---------------------------------------------------------------------
// Functions to keep a call graph up to date with a function that has been
@@ -444,8 +448,10 @@ struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
static NodeType *getEntryNode(CallGraph *CGN) {
return CGN->getExternalCallingNode(); // Start at the external node!
}
- typedef std::pair<const Function *, CallGraphNode *> PairTy;
- typedef std::pointer_to_unary_function<PairTy, CallGraphNode &> DerefFun;
+ typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>>
+ PairTy;
+ typedef std::pointer_to_unary_function<const PairTy &, CallGraphNode &>
+ DerefFun;
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator;
@@ -456,7 +462,7 @@ struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
return map_iterator(CG->end(), DerefFun(CGdereference));
}
- static CallGraphNode &CGdereference(PairTy P) { return *P.second; }
+ static CallGraphNode &CGdereference(const PairTy &P) { return *P.second; }
};
template <>
@@ -465,8 +471,9 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
static NodeType *getEntryNode(const CallGraph *CGN) {
return CGN->getExternalCallingNode(); // Start at the external node!
}
- typedef std::pair<const Function *, const CallGraphNode *> PairTy;
- typedef std::pointer_to_unary_function<PairTy, const CallGraphNode &>
+ typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>>
+ PairTy;
+ typedef std::pointer_to_unary_function<const PairTy &, const CallGraphNode &>
DerefFun;
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
@@ -478,7 +485,9 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
return map_iterator(CG->end(), DerefFun(CGdereference));
}
- static const CallGraphNode &CGdereference(PairTy P) { return *P.second; }
+ static const CallGraphNode &CGdereference(const PairTy &P) {
+ return *P.second;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h b/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
index 667e171..9c7f7bd 100644
--- a/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
@@ -30,7 +30,7 @@ class CallGraphNode;
class CallGraph;
class PMStack;
class CallGraphSCC;
-
+
class CallGraphSCCPass : public Pass {
public:
explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {}
@@ -79,25 +79,26 @@ public:
void getAnalysisUsage(AnalysisUsage &Info) const override;
};
-/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
+/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
class CallGraphSCC {
void *Context; // The CGPassManager object that is vending this.
std::vector<CallGraphNode*> Nodes;
+
public:
CallGraphSCC(void *context) : Context(context) {}
-
- void initialize(CallGraphNode*const*I, CallGraphNode*const*E) {
+
+ void initialize(CallGraphNode *const *I, CallGraphNode *const *E) {
Nodes.assign(I, E);
}
-
+
bool isSingular() const { return Nodes.size() == 1; }
unsigned size() const { return Nodes.size(); }
-
+
/// ReplaceNode - This informs the SCC and the pass manager that the specified
/// Old node has been deleted, and New is to be used in its place.
void ReplaceNode(CallGraphNode *Old, CallGraphNode *New);
-
- typedef std::vector<CallGraphNode*>::const_iterator iterator;
+
+ typedef std::vector<CallGraphNode *>::const_iterator iterator;
iterator begin() const { return Nodes.begin(); }
iterator end() const { return Nodes.end(); }
};
diff --git a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
index 8b7c7a9..8d2c095 100644
--- a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -20,6 +20,7 @@ namespace llvm {
class Use;
class Instruction;
class DominatorTree;
+ class OrderedBasicBlock;
/// PointerMayBeCaptured - Return true if this pointer value may be captured
/// by the enclosing function (which is required to exist). This routine can
@@ -41,10 +42,12 @@ namespace llvm {
/// it or not. The boolean StoreCaptures specified whether storing the value
/// (or part of it) into memory anywhere automatically counts as capturing it
/// or not. Captures by the provided instruction are considered if the
- /// final parameter is true.
+ /// final parameter is true. An ordered basic block in \p OBB could be used
+ /// to speed up capture-tracker queries.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
- DominatorTree *DT, bool IncludeI = false);
+ DominatorTree *DT, bool IncludeI = false,
+ OrderedBasicBlock *OBB = nullptr);
/// This callback is used in conjunction with PointerMayBeCaptured. In
/// addition to the interface here, you'll need to provide your own getters
diff --git a/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
index cb74e9f..ca50ee2 100644
--- a/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -36,8 +36,23 @@ public:
DOTGraphTraitsViewer(StringRef GraphName, char &ID)
: FunctionPass(ID), Name(GraphName) {}
+ /// @brief Return true if this function should be processed.
+ ///
+ /// An implementation of this class my override this function to indicate that
+ /// only certain functions should be viewed.
+ ///
+ /// @param Analysis The current analysis result for this function.
+ virtual bool processFunction(Function &F, AnalysisT &Analysis) {
+ return true;
+ }
+
bool runOnFunction(Function &F) override {
- GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
+ auto &Analysis = getAnalysis<AnalysisT>();
+
+ if (!processFunction(F, Analysis))
+ return false;
+
+ GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis);
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
@@ -63,8 +78,23 @@ public:
DOTGraphTraitsPrinter(StringRef GraphName, char &ID)
: FunctionPass(ID), Name(GraphName) {}
+ /// @brief Return true if this function should be processed.
+ ///
+ /// An implementation of this class my override this function to indicate that
+ /// only certain functions should be printed.
+ ///
+ /// @param Analysis The current analysis result for this function.
+ virtual bool processFunction(Function &F, AnalysisT &Analysis) {
+ return true;
+ }
+
bool runOnFunction(Function &F) override {
- GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
+ auto &Analysis = getAnalysis<AnalysisT>();
+
+ if (!processFunction(F, Analysis))
+ return false;
+
+ GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis);
std::string Filename = Name + "." + F.getName().str() + ".dot";
std::error_code EC;
diff --git a/contrib/llvm/include/llvm/Analysis/DemandedBits.h b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
new file mode 100644
index 0000000..42932bf
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
@@ -0,0 +1,75 @@
+//===-- llvm/Analysis/DemandedBits.h - Determine demanded bits --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a demanded bits analysis. A demanded bit is one that
+// contributes to a result; bits that are not demanded can be either zero or
+// one without affecting control or data flow. For example in this sequence:
+//
+// %1 = add i32 %x, %y
+// %2 = trunc i32 %1 to i16
+//
+// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
+// trunc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H
+#define LLVM_ANALYSIS_DEMANDED_BITS_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class FunctionPass;
+class Function;
+class Instruction;
+class DominatorTree;
+class AssumptionCache;
+
+struct DemandedBits : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DemandedBits();
+
+ bool runOnFunction(Function& F) override;
+ void getAnalysisUsage(AnalysisUsage& AU) const override;
+ void print(raw_ostream &OS, const Module *M) const override;
+
+ /// Return the bits demanded from instruction I.
+ APInt getDemandedBits(Instruction *I);
+
+ /// Return true if, during analysis, I could not be reached.
+ bool isInstructionDead(Instruction *I);
+
+private:
+ void performAnalysis();
+ void determineLiveOperandBits(const Instruction *UserI,
+ const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2);
+
+ AssumptionCache *AC;
+ DominatorTree *DT;
+ Function *F;
+ bool Analyzed;
+
+ // The set of visited instructions (non-integer-typed only).
+ SmallPtrSet<Instruction*, 128> Visited;
+ DenseMap<Instruction *, APInt> AliveBits;
+};
+
+/// Create a demanded bits analysis pass.
+FunctionPass *createDemandedBitsPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
index a08ce57..5290552 100644
--- a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -42,11 +42,11 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
namespace llvm {
- class AliasAnalysis;
class Loop;
class LoopInfo;
class ScalarEvolution;
@@ -69,6 +69,15 @@ namespace llvm {
/// as singly-linked lists, with the "next" fields stored in the dependence
/// itelf.
class Dependence {
+ protected:
+ Dependence(const Dependence &) = default;
+
+ // FIXME: When we move to MSVC 2015 as the base compiler for Visual Studio
+ // support, uncomment this line to allow a defaulted move constructor for
+ // Dependence. Currently, FullDependence relies on the copy constructor, but
+ // that is acceptable given the triviality of the class.
+ // Dependence(Dependence &&) = default;
+
public:
Dependence(Instruction *Source,
Instruction *Destination) :
@@ -176,38 +185,30 @@ namespace llvm {
/// getNextPredecessor - Returns the value of the NextPredecessor
/// field.
- const Dependence *getNextPredecessor() const {
- return NextPredecessor;
- }
-
+ const Dependence *getNextPredecessor() const { return NextPredecessor; }
+
/// getNextSuccessor - Returns the value of the NextSuccessor
/// field.
- const Dependence *getNextSuccessor() const {
- return NextSuccessor;
- }
-
+ const Dependence *getNextSuccessor() const { return NextSuccessor; }
+
/// setNextPredecessor - Sets the value of the NextPredecessor
/// field.
- void setNextPredecessor(const Dependence *pred) {
- NextPredecessor = pred;
- }
-
+ void setNextPredecessor(const Dependence *pred) { NextPredecessor = pred; }
+
/// setNextSuccessor - Sets the value of the NextSuccessor
/// field.
- void setNextSuccessor(const Dependence *succ) {
- NextSuccessor = succ;
- }
-
+ void setNextSuccessor(const Dependence *succ) { NextSuccessor = succ; }
+
/// dump - For debugging purposes, dumps a dependence to OS.
///
void dump(raw_ostream &OS) const;
+
private:
Instruction *Src, *Dst;
const Dependence *NextPredecessor, *NextSuccessor;
friend class DependenceAnalysis;
};
-
/// FullDependence - This class represents a dependence between two memory
/// references in a function. It contains detailed information about the
/// dependence (direction vectors, etc.) and is used when the compiler is
@@ -216,11 +217,15 @@ namespace llvm {
/// (for output, flow, and anti dependences), the dependence implies an
/// ordering, where the source must precede the destination; in contrast,
/// input dependences are unordered.
- class FullDependence : public Dependence {
+ class FullDependence final : public Dependence {
public:
FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent,
unsigned Levels);
- ~FullDependence() override { delete[] DV; }
+
+ FullDependence(FullDependence &&RHS)
+ : Dependence(std::move(RHS)), Levels(RHS.Levels),
+ LoopIndependent(RHS.LoopIndependent), Consistent(RHS.Consistent),
+ DV(std::move(RHS.DV)) {}
/// isLoopIndependent - Returns true if this is a loop-independent
/// dependence.
@@ -268,16 +273,16 @@ namespace llvm {
unsigned short Levels;
bool LoopIndependent;
bool Consistent; // Init to true, then refine.
- DVEntry *DV;
+ std::unique_ptr<DVEntry[]> DV;
friend class DependenceAnalysis;
};
-
/// DependenceAnalysis - This class is the main dependence-analysis driver.
///
class DependenceAnalysis : public FunctionPass {
void operator=(const DependenceAnalysis &) = delete;
DependenceAnalysis(const DependenceAnalysis &) = delete;
+
public:
/// depends - Tests for a dependence between the Src and Dst instructions.
/// Returns NULL if no dependence; otherwise, returns a Dependence (or a
@@ -387,6 +392,7 @@ namespace llvm {
const SCEV *B;
const SCEV *C;
const Loop *AssociatedLoop;
+
public:
/// isEmpty - Return true if the constraint is of kind Empty.
bool isEmpty() const { return Kind == Empty; }
@@ -453,7 +459,6 @@ namespace llvm {
void dump(raw_ostream &OS) const;
};
-
/// establishNestingLevels - Examines the loop nesting of the Src and Dst
/// instructions and establishes their shared loops. Sets the variables
/// CommonLevels, SrcLevels, and MaxLevels.
@@ -521,10 +526,10 @@ namespace llvm {
/// in LoopNest.
bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const;
- /// Makes sure all subscript pairs share the same integer type by
+ /// Makes sure all subscript pairs share the same integer type by
/// sign-extending as necessary.
/// Sign-extending a subscript is safe because getelementptr assumes the
- /// array subscripts are signed.
+ /// array subscripts are signed.
void unifySubscriptType(ArrayRef<Subscript *> Pairs);
/// removeMatchingExtensions - Examines a subscript pair.
@@ -806,7 +811,6 @@ namespace llvm {
const SCEV *Delta) const;
/// testBounds - Returns true iff the current bounds are plausible.
- ///
bool testBounds(unsigned char DirKind,
unsigned Level,
BoundInfo *Bound,
@@ -913,9 +917,8 @@ namespace llvm {
void updateDirection(Dependence::DVEntry &Level,
const Constraint &CurConstraint) const;
- bool tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
- SmallVectorImpl<Subscript> &Pair,
- const SCEV *ElementSize);
+ bool tryDelinearize(Instruction *Src, Instruction *Dst,
+ SmallVectorImpl<Subscript> &Pair);
public:
static char ID; // Class identification, replacement for typeinfo
diff --git a/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
new file mode 100644
index 0000000..aa2de57
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -0,0 +1,48 @@
+//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The divergence analysis is an LLVM pass which can be used to find out
+// if a branch instruction in a GPU program is divergent or not. It can help
+// branch optimizations such as jump threading and loop unswitching to make
+// better decisions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+class Value;
+class DivergenceAnalysis : public FunctionPass {
+public:
+ static char ID;
+
+ DivergenceAnalysis() : FunctionPass(ID) {
+ initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnFunction(Function &F) override;
+
+ // Print all divergent branches in the function.
+ void print(raw_ostream &OS, const Module *) const override;
+
+ // Returns true if V is divergent.
+ bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+
+ // Returns true if V is uniform/non-divergent.
+ bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+ // Stores all divergent values.
+ DenseSet<const Value *> DivergentValues;
+};
+} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/EHPersonalities.h b/contrib/llvm/include/llvm/Analysis/EHPersonalities.h
new file mode 100644
index 0000000..59e9672
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/EHPersonalities.h
@@ -0,0 +1,94 @@
+//===- EHPersonalities.h - Compute EH-related information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_EHPERSONALITIES_H
+#define LLVM_ANALYSIS_EHPERSONALITIES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+class BasicBlock;
+class Function;
+class Value;
+
+enum class EHPersonality {
+ Unknown,
+ GNU_Ada,
+ GNU_C,
+ GNU_CXX,
+ GNU_ObjC,
+ MSVC_X86SEH,
+ MSVC_Win64SEH,
+ MSVC_CXX,
+ CoreCLR
+};
+
+/// \brief See if the given exception handling personality function is one
+/// that we understand. If so, return a description of it; otherwise return
+/// Unknown.
+EHPersonality classifyEHPersonality(const Value *Pers);
+
+/// \brief Returns true if this personality function catches asynchronous
+/// exceptions.
+inline bool isAsynchronousEHPersonality(EHPersonality Pers) {
+ // The two SEH personality functions can catch asynch exceptions. We assume
+ // unknown personalities don't catch asynch exceptions.
+ switch (Pers) {
+ case EHPersonality::MSVC_X86SEH:
+ case EHPersonality::MSVC_Win64SEH:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+/// \brief Returns true if this is a personality function that invokes
+/// handler funclets (which must return to it).
+inline bool isFuncletEHPersonality(EHPersonality Pers) {
+ switch (Pers) {
+ case EHPersonality::MSVC_CXX:
+ case EHPersonality::MSVC_X86SEH:
+ case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::CoreCLR:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+/// \brief Return true if this personality may be safely removed if there
+/// are no invoke instructions remaining in the current function.
+inline bool isNoOpWithoutInvoke(EHPersonality Pers) {
+ switch (Pers) {
+ case EHPersonality::Unknown:
+ return false;
+ // All known personalities currently have this behavior
+ default:
+ return true;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+bool canSimplifyInvokeNoUnwind(const Function *F);
+
+typedef TinyPtrVector<BasicBlock *> ColorVector;
+
+/// \brief If an EH funclet personality is in use (see isFuncletEHPersonality),
+/// this will recompute which blocks are in which funclet. It is possible that
+/// some blocks are in multiple funclets. Consider this analysis to be
+/// expensive.
+DenseMap<BasicBlock *, ColorVector> colorEHFunclets(Function &F);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
new file mode 100644
index 0000000..bcd102e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
@@ -0,0 +1,160 @@
+//===- GlobalsModRef.h - Simple Mod/Ref AA for Globals ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a simple mod/ref and alias analysis over globals.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_GLOBALSMODREF_H
+#define LLVM_ANALYSIS_GLOBALSMODREF_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include <list>
+
+namespace llvm {
+
+/// An alias analysis result set for globals.
+///
+/// This focuses on handling aliasing properties of globals and interprocedural
+/// function call mod/ref information.
+class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
+ friend AAResultBase<GlobalsAAResult>;
+
+ class FunctionInfo;
+
+ const DataLayout &DL;
+
+ /// The globals that do not have their addresses taken.
+ SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals;
+
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ SmallPtrSet<const GlobalValue *, 8> IndirectGlobals;
+
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ DenseMap<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
+
+ /// For each function, keep track of what globals are modified or read.
+ DenseMap<const Function *, FunctionInfo> FunctionInfos;
+
+ /// A map of functions to SCC. The SCCs are described by a simple integer
+ /// ID that is only useful for comparing for equality (are two functions
+ /// in the same SCC or not?)
+ DenseMap<const Function *, unsigned> FunctionToSCCMap;
+
+ /// Handle to clear this analysis on deletion of values.
+ struct DeletionCallbackHandle final : CallbackVH {
+ GlobalsAAResult *GAR;
+ std::list<DeletionCallbackHandle>::iterator I;
+
+ DeletionCallbackHandle(GlobalsAAResult &GAR, Value *V)
+ : CallbackVH(V), GAR(&GAR) {}
+
+ void deleted() override;
+ };
+
+ /// List of callbacks for globals being tracked by this analysis. Note that
+ /// these objects are quite large, but we only anticipate having one per
+ /// global tracked by this analysis. There are numerous optimizations we
+ /// could perform to the memory utilization here if this becomes a problem.
+ std::list<DeletionCallbackHandle> Handles;
+
+ explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI);
+
+public:
+ GlobalsAAResult(GlobalsAAResult &&Arg);
+
+ static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI,
+ CallGraph &CG);
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+ using AAResultBase::getModRefInfo;
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ FunctionModRefBehavior getModRefBehavior(const Function *F);
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+private:
+ FunctionInfo *getFunctionInfo(const Function *F);
+
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V,
+ SmallPtrSetImpl<Function *> *Readers = nullptr,
+ SmallPtrSetImpl<Function *> *Writers = nullptr,
+ GlobalValue *OkayStoreDest = nullptr);
+ bool AnalyzeIndirectGlobalMemory(GlobalVariable *GV);
+ void CollectSCCMembership(CallGraph &CG);
+
+ bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
+ ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
+ const GlobalValue *GV);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class GlobalsAA {
+public:
+ typedef GlobalsAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ GlobalsAAResult run(Module &M, AnalysisManager<Module> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "GlobalsAA"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the GlobalsAAResult object.
+class GlobalsAAWrapperPass : public ModulePass {
+ std::unique_ptr<GlobalsAAResult> Result;
+
+public:
+ static char ID;
+
+ GlobalsAAWrapperPass();
+
+ GlobalsAAResult &getResult() { return *Result; }
+ const GlobalsAAResult &getResult() const { return *Result; }
+
+ bool runOnModule(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createGlobalsAAWrapperPass - This pass provides alias and mod/ref info for
+// global values that do not have their addresses taken.
+//
+ModulePass *createGlobalsAAWrapperPass();
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/IVUsers.h b/contrib/llvm/include/llvm/Analysis/IVUsers.h
index 00dbcbd..37d0149 100644
--- a/contrib/llvm/include/llvm/Analysis/IVUsers.h
+++ b/contrib/llvm/include/llvm/Analysis/IVUsers.h
@@ -34,7 +34,7 @@ class DataLayout;
/// The Expr member keeps track of the expression, User is the actual user
/// instruction of the operand, and 'OperandValToReplace' is the operand of
/// the User that is the use.
-class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
+class IVStrideUse final : public CallbackVH, public ilist_node<IVStrideUse> {
friend class IVUsers;
public:
IVStrideUse(IVUsers *P, Instruction* U, Value *O)
diff --git a/contrib/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm/include/llvm/Analysis/InlineCost.h
index 79ed74d..35f991c 100644
--- a/contrib/llvm/include/llvm/Analysis/InlineCost.h
+++ b/contrib/llvm/include/llvm/Analysis/InlineCost.h
@@ -23,7 +23,7 @@ class AssumptionCacheTracker;
class CallSite;
class DataLayout;
class Function;
-class TargetTransformInfoWrapperPass;
+class TargetTransformInfo;
namespace InlineConstants {
// Various magic constants used to adjust heuristics.
@@ -98,46 +98,31 @@ public:
int getCostDelta() const { return Threshold - getCost(); }
};
-/// \brief Cost analyzer used by inliner.
-class InlineCostAnalysis : public CallGraphSCCPass {
- TargetTransformInfoWrapperPass *TTIWP;
- AssumptionCacheTracker *ACT;
-
-public:
- static char ID;
-
- InlineCostAnalysis();
- ~InlineCostAnalysis() override;
-
- // Pass interface implementation.
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- /// \brief Get an InlineCost object representing the cost of inlining this
- /// callsite.
- ///
- /// Note that threshold is passed into this function. Only costs below the
- /// threshold are computed with any accuracy. The threshold can be used to
- /// bound the computation necessary to determine whether the cost is
- /// sufficiently low to warrant inlining.
- ///
- /// Also note that calling this function *dynamically* computes the cost of
- /// inlining the callsite. It is an expensive, heavyweight call.
- InlineCost getInlineCost(CallSite CS, int Threshold);
-
- /// \brief Get an InlineCost with the callee explicitly specified.
- /// This allows you to calculate the cost of inlining a function via a
- /// pointer. This behaves exactly as the version with no explicit callee
- /// parameter in all other respects.
- //
- // Note: This is used by out-of-tree passes, please do not remove without
- // adding a replacement API.
- InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
-
- /// \brief Minimal filter to detect invalid constructs for inlining.
- bool isInlineViable(Function &Callee);
-};
+/// \brief Get an InlineCost object representing the cost of inlining this
+/// callsite.
+///
+/// Note that threshold is passed into this function. Only costs below the
+/// threshold are computed with any accuracy. The threshold can be used to
+/// bound the computation necessary to determine whether the cost is
+/// sufficiently low to warrant inlining.
+///
+/// Also note that calling this function *dynamically* computes the cost of
+/// inlining the callsite. It is an expensive, heavyweight call.
+InlineCost getInlineCost(CallSite CS, int Threshold,
+ TargetTransformInfo &CalleeTTI,
+ AssumptionCacheTracker *ACT);
+
+/// \brief Get an InlineCost with the callee explicitly specified.
+/// This allows you to calculate the cost of inlining a function via a
+/// pointer. This behaves exactly as the version with no explicit callee
+/// parameter in all other respects.
+//
+InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold,
+ TargetTransformInfo &CalleeTTI,
+ AssumptionCacheTracker *ACT);
+/// \brief Minimal filter to detect invalid constructs for inlining.
+bool isInlineViable(Function &Callee);
}
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
index d44c5ff..ed313da 100644
--- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -207,7 +207,7 @@ namespace llvm {
const TargetLibraryInfo *TLI = nullptr,
const DominatorTree *DT = nullptr,
AssumptionCache *AC = nullptr,
- Instruction *CxtI = nullptr);
+ const Instruction *CxtI = nullptr);
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
diff --git a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 5a339f1..a1ded25 100644
--- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -34,7 +34,7 @@ namespace llvm {
class BasicBlock;
template <class T> class DomTreeNodeBase;
typedef DomTreeNodeBase<BasicBlock> DomTreeNode;
-class DominatorTree;
+template <class T> class DominatorTreeBase;
/// \brief Determine the iterated dominance frontier, given a set of defining
/// blocks, and optionally, a set of live-in blocks.
@@ -47,7 +47,7 @@ class DominatorTree;
class IDFCalculator {
public:
- IDFCalculator(DominatorTree &DT) : DT(DT), useLiveIn(false) {}
+ IDFCalculator(DominatorTreeBase<BasicBlock> &DT) : DT(DT), useLiveIn(false) {}
/// \brief Give the IDF calculator the set of blocks in which the value is
/// defined. This is equivalent to the set of starting blocks it should be
@@ -85,7 +85,7 @@ public:
void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks);
private:
- DominatorTree &DT;
+ DominatorTreeBase<BasicBlock> &DT;
bool useLiveIn;
DenseMap<DomTreeNode *, unsigned> DomLevels;
const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
diff --git a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
index b0b9068..ef3d5e8 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -54,7 +54,7 @@ namespace llvm {
class PreservedAnalyses;
class raw_ostream;
-/// \brief A lazily constructed view of the call graph of a module.
+/// A lazily constructed view of the call graph of a module.
///
/// With the edges of this graph, the motivating constraint that we are
/// attempting to maintain is that function-local optimization, CGSCC-local
@@ -107,7 +107,7 @@ public:
typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT;
typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT;
- /// \brief A lazy iterator used for both the entry nodes and child nodes.
+ /// A lazy iterator used for both the entry nodes and child nodes.
///
/// When this iterator is dereferenced, if not yet available, a function will
/// be scanned for "calls" or uses of functions and its child information
@@ -152,7 +152,7 @@ public:
}
};
- /// \brief A node in the call graph.
+ /// A node in the call graph.
///
/// This represents a single node. It's primary roles are to cache the list of
/// callees, de-duplicate and provide fast testing of whether a function is
@@ -172,25 +172,23 @@ public:
mutable NodeVectorT Callees;
DenseMap<Function *, size_t> CalleeIndexMap;
- /// \brief Basic constructor implements the scanning of F into Callees and
+ /// Basic constructor implements the scanning of F into Callees and
/// CalleeIndexMap.
Node(LazyCallGraph &G, Function &F);
- /// \brief Internal helper to insert a callee.
+ /// Internal helper to insert a callee.
void insertEdgeInternal(Function &Callee);
- /// \brief Internal helper to insert a callee.
+ /// Internal helper to insert a callee.
void insertEdgeInternal(Node &CalleeN);
- /// \brief Internal helper to remove a callee from this node.
+ /// Internal helper to remove a callee from this node.
void removeEdgeInternal(Function &Callee);
public:
typedef LazyCallGraph::iterator iterator;
- Function &getFunction() const {
- return F;
- };
+ Function &getFunction() const { return F; }
iterator begin() const {
return iterator(*G, Callees.begin(), Callees.end());
@@ -202,7 +200,7 @@ public:
bool operator!=(const Node &N) const { return !operator==(N); }
};
- /// \brief An SCC of the call graph.
+ /// An SCC of the call graph.
///
/// This represents a Strongly Connected Component of the call graph as
/// a collection of call graph nodes. While the order of nodes in the SCC is
@@ -226,7 +224,8 @@ public:
public:
typedef SmallVectorImpl<Node *>::const_iterator iterator;
- typedef pointee_iterator<SmallPtrSet<SCC *, 1>::const_iterator> parent_iterator;
+ typedef pointee_iterator<SmallPtrSet<SCC *, 1>::const_iterator>
+ parent_iterator;
iterator begin() const { return Nodes.begin(); }
iterator end() const { return Nodes.end(); }
@@ -235,24 +234,24 @@ public:
parent_iterator parent_end() const { return ParentSCCs.end(); }
iterator_range<parent_iterator> parents() const {
- return iterator_range<parent_iterator>(parent_begin(), parent_end());
+ return make_range(parent_begin(), parent_end());
}
- /// \brief Test if this SCC is a parent of \a C.
+ /// Test if this SCC is a parent of \a C.
bool isParentOf(const SCC &C) const { return C.isChildOf(*this); }
- /// \brief Test if this SCC is an ancestor of \a C.
+ /// Test if this SCC is an ancestor of \a C.
bool isAncestorOf(const SCC &C) const { return C.isDescendantOf(*this); }
- /// \brief Test if this SCC is a child of \a C.
+ /// Test if this SCC is a child of \a C.
bool isChildOf(const SCC &C) const {
return ParentSCCs.count(const_cast<SCC *>(&C));
}
- /// \brief Test if this SCC is a descendant of \a C.
+ /// Test if this SCC is a descendant of \a C.
bool isDescendantOf(const SCC &C) const;
- /// \brief Short name useful for debugging or logging.
+ /// Short name useful for debugging or logging.
///
/// We use the name of the first function in the SCC to name the SCC for
/// the purposes of debugging and logging.
@@ -267,22 +266,21 @@ public:
/// Note that these methods sometimes have complex runtimes, so be careful
/// how you call them.
- /// \brief Insert an edge from one node in this SCC to another in this SCC.
+ /// Insert an edge from one node in this SCC to another in this SCC.
///
/// By the definition of an SCC, this does not change the nature or make-up
/// of any SCCs.
void insertIntraSCCEdge(Node &CallerN, Node &CalleeN);
- /// \brief Insert an edge whose tail is in this SCC and head is in some
- /// child SCC.
+ /// Insert an edge whose tail is in this SCC and head is in some child SCC.
///
/// There must be an existing path from the caller to the callee. This
/// operation is inexpensive and does not change the set of SCCs in the
/// graph.
void insertOutgoingEdge(Node &CallerN, Node &CalleeN);
- /// \brief Insert an edge whose tail is in a descendant SCC and head is in
- /// this SCC.
+ /// Insert an edge whose tail is in a descendant SCC and head is in this
+ /// SCC.
///
/// There must be an existing path from the callee to the caller in this
/// case. NB! This is has the potential to be a very expensive function. It
@@ -297,7 +295,7 @@ public:
/// implementation for details, but that use case might impact users.
SmallVector<SCC *, 1> insertIncomingEdge(Node &CallerN, Node &CalleeN);
- /// \brief Remove an edge whose source is in this SCC and target is *not*.
+ /// Remove an edge whose source is in this SCC and target is *not*.
///
/// This removes an inter-SCC edge. All inter-SCC edges originating from
/// this SCC have been fully explored by any in-flight DFS SCC formation,
@@ -309,7 +307,7 @@ public:
/// them.
void removeInterSCCEdge(Node &CallerN, Node &CalleeN);
- /// \brief Remove an edge which is entirely within this SCC.
+ /// Remove an edge which is entirely within this SCC.
///
/// Both the \a Caller and the \a Callee must be within this SCC. Removing
/// such an edge make break cycles that form this SCC and thus this
@@ -346,7 +344,7 @@ public:
///@}
};
- /// \brief A post-order depth-first SCC iterator over the call graph.
+ /// A post-order depth-first SCC iterator over the call graph.
///
/// This iterator triggers the Tarjan DFS-based formation of the SCC DAG for
/// the call graph, walking it lazily in depth-first post-order. That is, it
@@ -358,7 +356,7 @@ public:
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
- /// \brief Nonce type to select the constructor for the end iterator.
+ /// Nonce type to select the constructor for the end iterator.
struct IsAtEndT {};
LazyCallGraph *G;
@@ -387,7 +385,7 @@ public:
}
};
- /// \brief Construct a graph for the given module.
+ /// Construct a graph for the given module.
///
/// This sets up the graph and computes all of the entry points of the graph.
/// No function definitions are scanned until their nodes in the graph are
@@ -410,22 +408,20 @@ public:
}
iterator_range<postorder_scc_iterator> postorder_sccs() {
- return iterator_range<postorder_scc_iterator>(postorder_scc_begin(),
- postorder_scc_end());
+ return make_range(postorder_scc_begin(), postorder_scc_end());
}
- /// \brief Lookup a function in the graph which has already been scanned and
- /// added.
+ /// Lookup a function in the graph which has already been scanned and added.
Node *lookup(const Function &F) const { return NodeMap.lookup(&F); }
- /// \brief Lookup a function's SCC in the graph.
+ /// Lookup a function's SCC in the graph.
///
/// \returns null if the function hasn't been assigned an SCC via the SCC
/// iterator walk.
SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); }
- /// \brief Get a graph node for a given function, scanning it to populate the
- /// graph data as necessary.
+ /// Get a graph node for a given function, scanning it to populate the graph
+ /// data as necessary.
Node &get(Function &F) {
Node *&N = NodeMap[&F];
if (N)
@@ -444,18 +440,18 @@ public:
/// Once you begin manipulating a call graph's SCCs, you must perform all
/// mutation of the graph via the SCC methods.
- /// \brief Update the call graph after inserting a new edge.
+ /// Update the call graph after inserting a new edge.
void insertEdge(Node &Caller, Function &Callee);
- /// \brief Update the call graph after inserting a new edge.
+ /// Update the call graph after inserting a new edge.
void insertEdge(Function &Caller, Function &Callee) {
return insertEdge(get(Caller), Callee);
}
- /// \brief Update the call graph after deleting an edge.
+ /// Update the call graph after deleting an edge.
void removeEdge(Node &Caller, Function &Callee);
- /// \brief Update the call graph after deleting an edge.
+ /// Update the call graph after deleting an edge.
void removeEdge(Function &Caller, Function &Callee) {
return removeEdge(get(Caller), Callee);
}
@@ -463,57 +459,56 @@ public:
///@}
private:
- /// \brief Allocator that holds all the call graph nodes.
+ /// Allocator that holds all the call graph nodes.
SpecificBumpPtrAllocator<Node> BPA;
- /// \brief Maps function->node for fast lookup.
+ /// Maps function->node for fast lookup.
DenseMap<const Function *, Node *> NodeMap;
- /// \brief The entry nodes to the graph.
+ /// The entry nodes to the graph.
///
/// These nodes are reachable through "external" means. Put another way, they
/// escape at the module scope.
NodeVectorT EntryNodes;
- /// \brief Map of the entry nodes in the graph to their indices in
- /// \c EntryNodes.
+ /// Map of the entry nodes in the graph to their indices in \c EntryNodes.
DenseMap<Function *, size_t> EntryIndexMap;
- /// \brief Allocator that holds all the call graph SCCs.
+ /// Allocator that holds all the call graph SCCs.
SpecificBumpPtrAllocator<SCC> SCCBPA;
- /// \brief Maps Function -> SCC for fast lookup.
+ /// Maps Function -> SCC for fast lookup.
DenseMap<Node *, SCC *> SCCMap;
- /// \brief The leaf SCCs of the graph.
+ /// The leaf SCCs of the graph.
///
/// These are all of the SCCs which have no children.
SmallVector<SCC *, 4> LeafSCCs;
- /// \brief Stack of nodes in the DFS walk.
+ /// Stack of nodes in the DFS walk.
SmallVector<std::pair<Node *, iterator>, 4> DFSStack;
- /// \brief Set of entry nodes not-yet-processed into SCCs.
+ /// Set of entry nodes not-yet-processed into SCCs.
SmallVector<Function *, 4> SCCEntryNodes;
- /// \brief Stack of nodes the DFS has walked but not yet put into a SCC.
+ /// Stack of nodes the DFS has walked but not yet put into a SCC.
SmallVector<Node *, 4> PendingSCCStack;
- /// \brief Counter for the next DFS number to assign.
+ /// Counter for the next DFS number to assign.
int NextDFSNumber;
- /// \brief Helper to insert a new function, with an already looked-up entry in
+ /// Helper to insert a new function, with an already looked-up entry in
/// the NodeMap.
Node &insertInto(Function &F, Node *&MappedN);
- /// \brief Helper to update pointers back to the graph object during moves.
+ /// Helper to update pointers back to the graph object during moves.
void updateGraphPtrs();
- /// \brief Helper to form a new SCC out of the top of a DFSStack-like
+ /// Helper to form a new SCC out of the top of a DFSStack-like
/// structure.
SCC *formSCC(Node *RootN, SmallVectorImpl<Node *> &NodeStack);
- /// \brief Retrieve the next node in the post-order SCC walk of the call graph.
+ /// Retrieve the next node in the post-order SCC walk of the call graph.
SCC *getNextSCCInPostOrder();
};
@@ -535,17 +530,17 @@ template <> struct GraphTraits<LazyCallGraph *> {
static ChildIteratorType child_end(NodeType *N) { return N->end(); }
};
-/// \brief An analysis pass which computes the call graph for a module.
+/// An analysis pass which computes the call graph for a module.
class LazyCallGraphAnalysis {
public:
- /// \brief Inform generic clients of the result type.
+ /// Inform generic clients of the result type.
typedef LazyCallGraph Result;
static void *ID() { return (void *)&PassID; }
static StringRef name() { return "Lazy CallGraph Analysis"; }
- /// \brief Compute the \c LazyCallGraph for the module \c M.
+ /// Compute the \c LazyCallGraph for the module \c M.
///
/// This just builds the set of entry points to the call graph. The rest is
/// built lazily as it is walked.
@@ -555,7 +550,7 @@ private:
static char PassID;
};
-/// \brief A pass which prints the call graph to a \c raw_ostream.
+/// A pass which prints the call graph to a \c raw_ostream.
///
/// This is primarily useful for testing the analysis.
class LazyCallGraphPrinterPass {
diff --git a/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h b/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h
index 1051cff..4200206 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
class Instruction;
class TargetLibraryInfo;
class Value;
-
+
/// This pass computes, caches, and vends lazy value constraint information.
class LazyValueInfo : public FunctionPass {
AssumptionCache *AC;
@@ -45,23 +45,22 @@ public:
enum Tristate {
Unknown = -1, False = 0, True = 1
};
-
-
+
// Public query interface.
-
+
/// Determine whether the specified value comparison with a constant is known
/// to be true or false on the specified CFG edge.
/// Pred is a CmpInst predicate.
Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI = nullptr);
-
+
/// Determine whether the specified value comparison with a constant is known
/// to be true or false at the specified instruction
/// (from an assume intrinsic). Pred is a CmpInst predicate.
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI);
-
+
/// Determine whether the specified value is known to be a
/// constant at the end of the specified block. Return null if not.
Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr);
@@ -70,14 +69,14 @@ public:
/// constant on the specified edge. Return null if not.
Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI = nullptr);
-
+
/// Inform the analysis cache that we have threaded an edge from
/// PredBB to OldSucc to be from PredBB to NewSucc instead.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc);
-
+
/// Inform the analysis cache that we have erased a block.
void eraseBlock(BasicBlock *BB);
-
+
// Implementation boilerplate.
void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h
deleted file mode 100644
index 6589ac1..0000000
--- a/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- LibCallAliasAnalysis.h - Implement AliasAnalysis for libcalls ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the LibCallAliasAnalysis class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H
-#define LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-
-namespace llvm {
- class LibCallInfo;
- struct LibCallFunctionInfo;
-
- /// LibCallAliasAnalysis - Alias analysis driven from LibCallInfo.
- struct LibCallAliasAnalysis : public FunctionPass, public AliasAnalysis {
- static char ID; // Class identification
-
- LibCallInfo *LCI;
-
- explicit LibCallAliasAnalysis(LibCallInfo *LC = nullptr)
- : FunctionPass(ID), LCI(LC) {
- initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
- explicit LibCallAliasAnalysis(char &ID, LibCallInfo *LC)
- : FunctionPass(ID), LCI(LC) {
- initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
- ~LibCallAliasAnalysis() override;
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
-
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- // TODO: Could compare two direct calls against each other if we cared to.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- bool runOnFunction(Function &F) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- ImmutableCallSite CS,
- const MemoryLocation &Loc);
- };
-} // End of llvm namespace
-
-#endif
diff --git a/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h b/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h
deleted file mode 100644
index b4bef31..0000000
--- a/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h
+++ /dev/null
@@ -1,225 +0,0 @@
-//===- LibCallSemantics.h - Describe library semantics --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines interfaces that can be used to describe language specific
-// runtime library interfaces (e.g. libc, libm, etc) to LLVM optimizers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_LIBCALLSEMANTICS_H
-#define LLVM_ANALYSIS_LIBCALLSEMANTICS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-
-namespace llvm {
-class InvokeInst;
-
- /// LibCallLocationInfo - This struct describes a set of memory locations that
- /// are accessed by libcalls. Identification of a location is doing with a
- /// simple callback function.
- ///
- /// For example, the LibCallInfo may be set up to model the behavior of
- /// standard libm functions. The location that they may be interested in is
- /// an abstract location that represents errno for the current target. In
- /// this case, a location for errno is anything such that the predicate
- /// returns true. On Mac OS X, this predicate would return true if the
- /// pointer is the result of a call to "__error()".
- ///
- /// Locations can also be defined in a constant-sensitive way. For example,
- /// it is possible to define a location that returns true iff it is passed
- /// into the call as a specific argument. This is useful for modeling things
- /// like "printf", which can store to memory, but only through pointers passed
- /// with a '%n' constraint.
- ///
- struct LibCallLocationInfo {
- // TODO: Flags: isContextSensitive etc.
-
- /// isLocation - Return a LocResult if the specified pointer refers to this
- /// location for the specified call site. This returns "Yes" if we can tell
- /// that the pointer *does definitely* refer to the location, "No" if we can
- /// tell that the location *definitely does not* refer to the location, and
- /// returns "Unknown" if we cannot tell for certain.
- enum LocResult {
- Yes, No, Unknown
- };
- LocResult (*isLocation)(ImmutableCallSite CS, const MemoryLocation &Loc);
- };
-
- /// LibCallFunctionInfo - Each record in the array of FunctionInfo structs
- /// records the behavior of one libcall that is known by the optimizer. This
- /// captures things like the side effects of the call. Side effects are
- /// modeled both universally (in the readnone/readonly) sense, but also
- /// potentially against a set of abstract locations defined by the optimizer.
- /// This allows an optimizer to define that some libcall (e.g. sqrt) is
- /// side-effect free except that it might modify errno (thus, the call is
- /// *not* universally readonly). Or it might say that the side effects
- /// are unknown other than to say that errno is not modified.
- ///
- struct LibCallFunctionInfo {
- /// Name - This is the name of the libcall this describes.
- const char *Name;
-
- /// TODO: Constant folding function: Constant* vector -> Constant*.
-
- /// UniversalBehavior - This captures the absolute mod/ref behavior without
- /// any specific context knowledge. For example, if the function is known
- /// to be readonly, this would be set to 'ref'. If known to be readnone,
- /// this is set to NoModRef.
- AliasAnalysis::ModRefResult UniversalBehavior;
-
- /// LocationMRInfo - This pair captures info about whether a specific
- /// location is modified or referenced by a libcall.
- struct LocationMRInfo {
- /// LocationID - ID # of the accessed location or ~0U for array end.
- unsigned LocationID;
- /// MRInfo - Mod/Ref info for this location.
- AliasAnalysis::ModRefResult MRInfo;
- };
-
- /// DetailsType - Indicate the sense of the LocationDetails array. This
- /// controls how the LocationDetails array is interpreted.
- enum {
- /// DoesOnly - If DetailsType is set to DoesOnly, then we know that the
- /// *only* mod/ref behavior of this function is captured by the
- /// LocationDetails array. If we are trying to say that 'sqrt' can only
- /// modify errno, we'd have the {errnoloc,mod} in the LocationDetails
- /// array and have DetailsType set to DoesOnly.
- DoesOnly,
-
- /// DoesNot - If DetailsType is set to DoesNot, then the sense of the
- /// LocationDetails array is completely inverted. This means that we *do
- /// not* know everything about the side effects of this libcall, but we do
- /// know things that the libcall cannot do. This is useful for complex
- /// functions like 'ctime' which have crazy mod/ref behavior, but are
- /// known to never read or write errno. In this case, we'd have
- /// {errnoloc,modref} in the LocationDetails array and DetailsType would
- /// be set to DoesNot, indicating that ctime does not read or write the
- /// errno location.
- DoesNot
- } DetailsType;
-
- /// LocationDetails - This is a pointer to an array of LocationMRInfo
- /// structs which indicates the behavior of the libcall w.r.t. specific
- /// locations. For example, if this libcall is known to only modify
- /// 'errno', it would have a LocationDetails array with the errno ID and
- /// 'mod' in it. See the DetailsType field for how this is interpreted.
- ///
- /// In the "DoesOnly" case, this information is 'may' information for: there
- /// is no guarantee that the specified side effect actually does happen,
- /// just that it could. In the "DoesNot" case, this is 'must not' info.
- ///
- /// If this pointer is null, no details are known.
- ///
- const LocationMRInfo *LocationDetails;
- };
-
-
- /// LibCallInfo - Abstract interface to query about library call information.
- /// Instances of this class return known information about some set of
- /// libcalls.
- ///
- class LibCallInfo {
- // Implementation details of this object, private.
- mutable void *Impl;
- mutable const LibCallLocationInfo *Locations;
- mutable unsigned NumLocations;
- public:
- LibCallInfo() : Impl(nullptr), Locations(nullptr), NumLocations(0) {}
- virtual ~LibCallInfo();
-
- //===------------------------------------------------------------------===//
- // Accessor Methods: Efficient access to contained data.
- //===------------------------------------------------------------------===//
-
- /// getLocationInfo - Return information about the specified LocationID.
- const LibCallLocationInfo &getLocationInfo(unsigned LocID) const;
-
-
- /// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
- /// the specified function if we have it. If not, return null.
- const LibCallFunctionInfo *getFunctionInfo(const Function *F) const;
-
-
- //===------------------------------------------------------------------===//
- // Implementation Methods: Subclasses should implement these.
- //===------------------------------------------------------------------===//
-
- /// getLocationInfo - Return descriptors for the locations referenced by
- /// this set of libcalls.
- virtual unsigned getLocationInfo(const LibCallLocationInfo *&Array) const {
- return 0;
- }
-
- /// getFunctionInfoArray - Return an array of descriptors that describe the
- /// set of libcalls represented by this LibCallInfo object. This array is
- /// terminated by an entry with a NULL name.
- virtual const LibCallFunctionInfo *getFunctionInfoArray() const = 0;
- };
-
- enum class EHPersonality {
- Unknown,
- GNU_Ada,
- GNU_C,
- GNU_CXX,
- GNU_ObjC,
- MSVC_X86SEH,
- MSVC_Win64SEH,
- MSVC_CXX,
- };
-
- /// \brief See if the given exception handling personality function is one
- /// that we understand. If so, return a description of it; otherwise return
- /// Unknown.
- EHPersonality classifyEHPersonality(const Value *Pers);
-
- /// \brief Returns true if this personality function catches asynchronous
- /// exceptions.
- inline bool isAsynchronousEHPersonality(EHPersonality Pers) {
- // The two SEH personality functions can catch asynch exceptions. We assume
- // unknown personalities don't catch asynch exceptions.
- switch (Pers) {
- case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
- return true;
- default: return false;
- }
- llvm_unreachable("invalid enum");
- }
-
- /// \brief Returns true if this is an MSVC personality function.
- inline bool isMSVCEHPersonality(EHPersonality Pers) {
- // The two SEH personality functions can catch asynch exceptions. We assume
- // unknown personalities don't catch asynch exceptions.
- switch (Pers) {
- case EHPersonality::MSVC_CXX:
- case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
- return true;
- default: return false;
- }
- llvm_unreachable("invalid enum");
- }
-
- /// \brief Return true if this personality may be safely removed if there
- /// are no invoke instructions remaining in the current function.
- inline bool isNoOpWithoutInvoke(EHPersonality Pers) {
- switch (Pers) {
- case EHPersonality::Unknown:
- return false;
- // All known personalities currently have this behavior
- default: return true;
- }
- llvm_unreachable("invalid enum");
- }
-
- bool canSimplifyInvokeNoUnwind(const Function *F);
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm/include/llvm/Analysis/Loads.h
index 42667d2..939663b 100644
--- a/contrib/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm/include/llvm/Analysis/Loads.h
@@ -14,11 +14,12 @@
#ifndef LLVM_ANALYSIS_LOADS_H
#define LLVM_ANALYSIS_LOADS_H
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
-class AliasAnalysis;
class DataLayout;
class MDNode;
@@ -29,15 +30,19 @@ class MDNode;
bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
unsigned Align);
+/// DefMaxInstsToScan - the default number of maximum instructions
+/// to scan in the block, used by FindAvailableLoadedValue().
+extern cl::opt<unsigned> DefMaxInstsToScan;
+
/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at
/// the instruction before ScanFrom) checking to see if we have the value at
/// the memory address *Ptr locally available within a small number of
/// instructions. If the value is available, return it.
///
-/// If not, return the iterator for the last validated instruction that the
+/// If not, return the iterator for the last validated instruction that the
/// value would be live through. If we scanned the entire block and didn't
/// find something that invalidates *Ptr or provides it, ScanFrom would be
-/// left at begin() and this returns null. ScanFrom could also be left
+/// left at begin() and this returns null. ScanFrom could also be left
///
/// MaxInstsToScan specifies the maximum instructions to scan in the block.
/// If it is set to 0, it will scan the whole block. You can also optionally
@@ -48,7 +53,7 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
/// is found, it is left unmodified.
Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
- unsigned MaxInstsToScan = 6,
+ unsigned MaxInstsToScan = DefMaxInstsToScan,
AliasAnalysis *AA = nullptr,
AAMDNodes *AATags = nullptr);
diff --git a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 476e4b6..871d35e 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -29,10 +29,11 @@ namespace llvm {
class Value;
class DataLayout;
-class AliasAnalysis;
class ScalarEvolution;
class Loop;
class SCEV;
+class SCEVUnionPredicate;
+class LoopAccessInfo;
/// Optimization analysis message produced during vectorization. Messages inform
/// the user why vectorization did not occur.
@@ -136,6 +137,14 @@ public:
// We couldn't determine the direction or the distance.
Unknown,
// Lexically forward.
+ //
+ // FIXME: If we only have loop-independent forward dependences (e.g. a
+ // read and write of A[i]), LAA will locally deem the dependence "safe"
+ // without querying the MemoryDepChecker. Therefore we can miss
+ // enumerating loop-independent forward dependences in
+ // getDependences. Note that as soon as there are different
+ // indices used to access the same array, the MemoryDepChecker *is*
+ // queried and the dependence list is complete.
Forward,
// Forward, but if vectorized, is likely to prevent store-to-load
// forwarding.
@@ -162,13 +171,20 @@ public:
Dependence(unsigned Source, unsigned Destination, DepType Type)
: Source(Source), Destination(Destination), Type(Type) {}
+ /// \brief Return the source instruction of the dependence.
+ Instruction *getSource(const LoopAccessInfo &LAI) const;
+ /// \brief Return the destination instruction of the dependence.
+ Instruction *getDestination(const LoopAccessInfo &LAI) const;
+
/// \brief Dependence types that don't prevent vectorization.
static bool isSafeForVectorization(DepType Type);
- /// \brief Dependence types that can be queried from the analysis.
- static bool isInterestingDependence(DepType Type);
+ /// \brief Lexically forward dependence.
+ bool isForward() const;
+ /// \brief Lexically backward dependence.
+ bool isBackward() const;
- /// \brief Lexically backward dependence types.
+ /// \brief May be a lexically backward dependence type (includes Unknown).
bool isPossiblyBackward() const;
/// \brief Print the dependence. \p Instr is used to map the instruction
@@ -177,10 +193,10 @@ public:
const SmallVectorImpl<Instruction *> &Instrs) const;
};
- MemoryDepChecker(ScalarEvolution *Se, const Loop *L)
- : SE(Se), InnermostLoop(L), AccessIdx(0),
+ MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
+ : PSE(PSE), InnermostLoop(L), AccessIdx(0),
ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
- RecordInterestingDependences(true) {}
+ RecordDependences(true) {}
/// \brief Register the location (instructions are given increasing numbers)
/// of a write access.
@@ -218,14 +234,14 @@ public:
/// vectorize the loop with a dynamic array access check.
bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
- /// \brief Returns the interesting dependences. If null is returned we
- /// exceeded the MaxInterestingDependence threshold and this information is
- /// not available.
- const SmallVectorImpl<Dependence> *getInterestingDependences() const {
- return RecordInterestingDependences ? &InterestingDependences : nullptr;
+ /// \brief Returns the memory dependences. If null is returned we exceeded
+ /// the MaxDependences threshold and this information is not
+ /// available.
+ const SmallVectorImpl<Dependence> *getDependences() const {
+ return RecordDependences ? &Dependences : nullptr;
}
- void clearInterestingDependences() { InterestingDependences.clear(); }
+ void clearDependences() { Dependences.clear(); }
/// \brief The vector of memory access instructions. The indices are used as
/// instruction identifiers in the Dependence class.
@@ -233,12 +249,29 @@ public:
return InstMap;
}
+ /// \brief Generate a mapping between the memory instructions and their
+ /// indices according to program order.
+ DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
+ DenseMap<Instruction *, unsigned> OrderMap;
+
+ for (unsigned I = 0; I < InstMap.size(); ++I)
+ OrderMap[InstMap[I]] = I;
+
+ return OrderMap;
+ }
+
/// \brief Find the set of instructions that read or write via \p Ptr.
SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
bool isWrite) const;
private:
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
+ /// applies dynamic knowledge to simplify SCEV expressions and convert them
+ /// to a more usable form. We need this in case assumptions about SCEV
+ /// expressions need to be made in order to avoid unknown dependences. For
+ /// example we might assume a unit stride for a pointer in order to prove
+ /// that a memory access is strided and doesn't wrap.
+ PredicatedScalarEvolution &PSE;
const Loop *InnermostLoop;
/// \brief Maps access locations (ptr, read/write) to program order.
@@ -261,15 +294,14 @@ private:
/// vectorization.
bool SafeForVectorization;
- //// \brief True if InterestingDependences reflects the dependences in the
- //// loop. If false we exceeded MaxInterestingDependence and
- //// InterestingDependences is invalid.
- bool RecordInterestingDependences;
+ //// \brief True if Dependences reflects the dependences in the
+ //// loop. If false we exceeded MaxDependences and
+ //// Dependences is invalid.
+ bool RecordDependences;
- /// \brief Interesting memory dependences collected during the analysis as
- /// defined by isInterestingDependence. Only valid if
- /// RecordInterestingDependences is true.
- SmallVector<Dependence, 8> InterestingDependences;
+ /// \brief Memory dependences collected during the analysis. Only valid if
+ /// RecordDependences is true.
+ SmallVector<Dependence, 8> Dependences;
/// \brief Check whether there is a plausible dependence between the two
/// accesses.
@@ -327,11 +359,17 @@ public:
void reset() {
Need = false;
Pointers.clear();
+ Checks.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
+ /// \p We need Preds in order to compute the SCEV expression of the pointer
+ /// according to the assumptions that we've made during the analysis.
+ /// The method might also version the pointer stride according to \p Strides,
+ /// and change \p Preds.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides);
+ unsigned ASId, const ValueToValueMap &Strides,
+ PredicatedScalarEvolution &PSE);
/// \brief No run-time memory checking is necessary.
bool empty() const { return Pointers.empty(); }
@@ -368,33 +406,38 @@ public:
SmallVector<unsigned, 2> Members;
};
- /// \brief Groups pointers such that a single memcheck is required
- /// between two different groups. This will clear the CheckingGroups vector
- /// and re-compute it. We will only group dependecies if \p UseDependencies
- /// is true, otherwise we will create a separate group for each pointer.
- void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
- bool UseDependencies);
+ /// \brief A memcheck which made up of a pair of grouped pointers.
+ ///
+ /// These *have* to be const for now, since checks are generated from
+ /// CheckingPtrGroups in LAI::addRuntimeChecks which is a const member
+ /// function. FIXME: once check-generation is moved inside this class (after
+ /// the PtrPartition hack is removed), we could drop const.
+ typedef std::pair<const CheckingPtrGroup *, const CheckingPtrGroup *>
+ PointerCheck;
+
+ /// \brief Generate the checks and store it. This also performs the grouping
+ /// of pointers to reduce the number of memchecks necessary.
+ void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
+ bool UseDependencies);
+
+ /// \brief Returns the checks that generateChecks created.
+ const SmallVector<PointerCheck, 4> &getChecks() const { return Checks; }
/// \brief Decide if we need to add a check between two groups of pointers,
/// according to needsChecking.
- bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N,
- const SmallVectorImpl<int> *PtrPartition) const;
-
- /// \brief Return true if any pointer requires run-time checking according
- /// to needsChecking.
- bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
+ bool needsChecking(const CheckingPtrGroup &M,
+ const CheckingPtrGroup &N) const;
/// \brief Returns the number of run-time checks required according to
/// needsChecking.
- unsigned getNumberOfChecks(const SmallVectorImpl<int> *PtrPartition) const;
+ unsigned getNumberOfChecks() const { return Checks.size(); }
/// \brief Print the list run-time memory checks necessary.
- ///
- /// If \p PtrPartition is set, it contains the partition number for
- /// pointers (-1 if the pointer belongs to multiple partitions). In this
- /// case omit checks between pointers belonging to the same partition.
- void print(raw_ostream &OS, unsigned Depth = 0,
- const SmallVectorImpl<int> *PtrPartition = nullptr) const;
+ void print(raw_ostream &OS, unsigned Depth = 0) const;
+
+ /// Print \p Checks.
+ void printChecks(raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+ unsigned Depth = 0) const;
/// This flag indicates if we need to add the runtime check.
bool Need;
@@ -405,18 +448,41 @@ public:
/// Holds a partitioning of pointers into "check groups".
SmallVector<CheckingPtrGroup, 2> CheckingGroups;
-private:
+ /// \brief Check if pointers are in the same partition
+ ///
+ /// \p PtrToPartition contains the partition number for pointers (-1 if the
+ /// pointer belongs to multiple partitions).
+ static bool
+ arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
+ unsigned PtrIdx1, unsigned PtrIdx2);
+
/// \brief Decide whether we need to issue a run-time check for pointer at
/// index \p I and \p J to prove their independence.
- ///
- /// If \p PtrPartition is set, it contains the partition number for
- /// pointers (-1 if the pointer belongs to multiple partitions). In this
- /// case omit checks between pointers belonging to the same partition.
- bool needsChecking(unsigned I, unsigned J,
- const SmallVectorImpl<int> *PtrPartition) const;
+ bool needsChecking(unsigned I, unsigned J) const;
+
+ /// \brief Return PointerInfo for pointer at index \p PtrIdx.
+ const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
+ return Pointers[PtrIdx];
+ }
+
+private:
+ /// \brief Groups pointers such that a single memcheck is required
+ /// between two different groups. This will clear the CheckingGroups vector
+ /// and re-compute it. We will only group dependecies if \p UseDependencies
+ /// is true, otherwise we will create a separate group for each pointer.
+ void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
+ bool UseDependencies);
+
+ /// Generate the checks and return them.
+ SmallVector<PointerCheck, 4>
+ generateChecks() const;
/// Holds a pointer to the ScalarEvolution analysis.
ScalarEvolution *SE;
+
+ /// \brief Set of run-time checks required to establish independence of
+ /// otherwise may-aliasing pointers in the loop.
+ SmallVector<PointerCheck, 4> Checks;
};
/// \brief Drive the analysis of memory accesses in the loop
@@ -433,6 +499,13 @@ private:
/// generates run-time checks to prove independence. This is done by
/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
/// RuntimePointerCheck class.
+///
+/// If pointers can wrap or can't be expressed as affine AddRec expressions by
+/// ScalarEvolution, we will generate run-time checks by emitting a
+/// SCEVUnionPredicate.
+///
+/// Checks for both memory dependences and the SCEV predicates contained in the
+/// PSE must be emitted in order for the results of this analysis to be valid.
class LoopAccessInfo {
public:
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
@@ -450,9 +523,8 @@ public:
/// \brief Number of memchecks required to prove independence of otherwise
/// may-alias pointers.
- unsigned getNumRuntimePointerChecks(
- const SmallVectorImpl<int> *PtrPartition = nullptr) const {
- return PtrRtChecking.getNumberOfChecks(PtrPartition);
+ unsigned getNumRuntimePointerChecks() const {
+ return PtrRtChecking.getNumberOfChecks();
}
/// Return true if the block BB needs to be predicated in order for the loop
@@ -472,13 +544,18 @@ public:
/// Returns a pair of instructions where the first element is the first
/// instruction generated in possibly a sequence of instructions and the
/// second value is the final comparator value or NULL if no check is needed.
+ std::pair<Instruction *, Instruction *>
+ addRuntimeChecks(Instruction *Loc) const;
+
+ /// \brief Generete the instructions for the checks in \p PointerChecks.
///
- /// If \p PtrPartition is set, it contains the partition number for pointers
- /// (-1 if the pointer belongs to multiple partitions). In this case omit
- /// checks between pointers belonging to the same partition.
+ /// Returns a pair of instructions where the first element is the first
+ /// instruction generated in possibly a sequence of instructions and the
+ /// second value is the final comparator value or NULL if no check is needed.
std::pair<Instruction *, Instruction *>
- addRuntimeCheck(Instruction *Loc,
- const SmallVectorImpl<int> *PtrPartition = nullptr) const;
+ addRuntimeChecks(Instruction *Loc,
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck>
+ &PointerChecks) const;
/// \brief The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
@@ -510,6 +587,13 @@ public:
return StoreToLoopInvariantAddress;
}
+ /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
+ /// them to a more usable form. All SCEV expressions during the analysis
+ /// should be re-written (and therefore simplified) according to PSE.
+ /// A user of LoopAccessAnalysis will need to emit the runtime checks
+ /// associated with this predicate.
+ PredicatedScalarEvolution PSE;
+
private:
/// \brief Analyze the loop. Substitute symbolic strides using Strides.
void analyzeLoop(const ValueToValueMap &Strides);
@@ -529,7 +613,6 @@ private:
MemoryDepChecker DepChecker;
Loop *TheLoop;
- ScalarEvolution *SE;
const DataLayout &DL;
const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -556,18 +639,24 @@ private:
Value *stripIntegerCast(Value *V);
///\brief Return the SCEV corresponding to a pointer with the symbolic stride
-///replaced with constant one.
+/// replaced with constant one, assuming \p Preds is true.
+///
+/// If necessary this method will version the stride of the pointer according
+/// to \p PtrToStride and therefore add a new predicate to \p Preds.
///
/// If \p OrigPtr is not null, use it to look up the stride value instead of \p
/// Ptr. \p PtrToStride provides the mapping between the pointer value and its
/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
-const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
Value *Ptr, Value *OrigPtr = nullptr);
/// \brief Check the stride of the pointer and ensure that it does not wrap in
-/// the address space.
-int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+/// the address space, assuming \p Preds is true.
+///
+/// If necessary this method will version the stride of the pointer according
+/// to \p PtrToStride and therefore add a new predicate to \p Preds.
+int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap);
/// \brief This analysis provides dependence information for the memory accesses
@@ -616,6 +705,17 @@ private:
DominatorTree *DT;
LoopInfo *LI;
};
+
+inline Instruction *MemoryDepChecker::Dependence::getSource(
+ const LoopAccessInfo &LAI) const {
+ return LAI.getDepChecker().getMemoryInstructions()[Source];
+}
+
+inline Instruction *MemoryDepChecker::Dependence::getDestination(
+ const LoopAccessInfo &LAI) const {
+ return LAI.getDepChecker().getMemoryInstructions()[Destination];
+}
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
index 3ec83f2..c219bd8 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@@ -37,6 +37,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include <algorithm>
@@ -72,6 +73,10 @@ class LoopBase {
SmallPtrSet<const BlockT*, 8> DenseBlockSet;
+ /// Indicator that this loops has been "unlooped", so there's no loop here
+ /// anymore.
+ bool IsUnloop = false;
+
LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
const LoopBase<BlockT, LoopT>&
operator=(const LoopBase<BlockT, LoopT> &) = delete;
@@ -140,12 +145,22 @@ public:
typedef typename std::vector<BlockT*>::const_iterator block_iterator;
block_iterator block_begin() const { return Blocks.begin(); }
block_iterator block_end() const { return Blocks.end(); }
+ inline iterator_range<block_iterator> blocks() const {
+ return make_range(block_begin(), block_end());
+ }
/// getNumBlocks - Get the number of blocks in this loop in constant time.
unsigned getNumBlocks() const {
return Blocks.size();
}
+ /// Mark this loop as having been unlooped - the last backedge was removed and
+ /// we no longer have a loop.
+ void markUnlooped() { IsUnloop = true; }
+
+ /// Return true if this no longer represents a loop.
+ bool isUnloop() const { return IsUnloop; }
+
/// isLoopExiting - True if terminator in the block can branch to another
/// block that is outside of the current loop.
///
@@ -398,6 +413,9 @@ public:
/// isLCSSAForm - Return true if the Loop is in LCSSA form
bool isLCSSAForm(DominatorTree &DT) const;
+ /// \brief Return true if this Loop and all inner subloops are in LCSSA form.
+ bool isRecursivelyLCSSAForm(DominatorTree &DT) const;
+
/// isLoopSimplifyForm - Return true if the Loop is in the form that
/// the LoopSimplify form transforms loops to, which is sometimes called
/// normal form.
@@ -622,7 +640,7 @@ public:
}
/// Create the loop forest using a stable algorithm.
- void Analyze(DominatorTreeBase<BlockT> &DomTree);
+ void analyze(const DominatorTreeBase<BlockT> &DomTree);
// Debugging
void print(raw_ostream &OS) const;
@@ -642,6 +660,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
LoopInfo(const LoopInfo &) = delete;
public:
LoopInfo() {}
+ explicit LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree);
LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
LoopInfo &operator=(LoopInfo &&RHS) {
@@ -653,8 +672,9 @@ public:
/// updateUnloop - Update LoopInfo after removing the last backedge from a
/// loop--now the "unloop". This updates the loop forest and parent loops for
- /// each block so that Unloop is no longer referenced, but the caller must
- /// actually delete the Unloop object.
+ /// each block so that Unloop is no longer referenced, but does not actually
+ /// delete the Unloop object. Generally, the loop pass manager should manage
+ /// deleting the Unloop.
void updateUnloop(Loop *Unloop);
/// replacementPreservesLCSSAForm - Returns true if replacing From with To
@@ -677,6 +697,78 @@ public:
// it as a replacement will not break LCSSA form.
return ToLoop->contains(getLoopFor(From->getParent()));
}
+
+ /// \brief Checks if moving a specific instruction can break LCSSA in any
+ /// loop.
+ ///
+ /// Return true if moving \p Inst to before \p NewLoc will break LCSSA,
+ /// assuming that the function containing \p Inst and \p NewLoc is currently
+ /// in LCSSA form.
+ bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) {
+ assert(Inst->getFunction() == NewLoc->getFunction() &&
+ "Can't reason about IPO!");
+
+ auto *OldBB = Inst->getParent();
+ auto *NewBB = NewLoc->getParent();
+
+ // Movement within the same loop does not break LCSSA (the equality check is
+ // to avoid doing a hashtable lookup in case of intra-block movement).
+ if (OldBB == NewBB)
+ return true;
+
+ auto *OldLoop = getLoopFor(OldBB);
+ auto *NewLoop = getLoopFor(NewBB);
+
+ if (OldLoop == NewLoop)
+ return true;
+
+ // Check if Outer contains Inner; with the null loop counting as the
+ // "outermost" loop.
+ auto Contains = [](const Loop *Outer, const Loop *Inner) {
+ return !Outer || Outer->contains(Inner);
+ };
+
+ // To check that the movement of Inst to before NewLoc does not break LCSSA,
+ // we need to check two sets of uses for possible LCSSA violations at
+ // NewLoc: the users of NewInst, and the operands of NewInst.
+
+ // If we know we're hoisting Inst out of an inner loop to an outer loop,
+ // then the uses *of* Inst don't need to be checked.
+
+ if (!Contains(NewLoop, OldLoop)) {
+ for (Use &U : Inst->uses()) {
+ auto *UI = cast<Instruction>(U.getUser());
+ auto *UBB = isa<PHINode>(UI) ? cast<PHINode>(UI)->getIncomingBlock(U)
+ : UI->getParent();
+ if (UBB != NewBB && getLoopFor(UBB) != NewLoop)
+ return false;
+ }
+ }
+
+ // If we know we're sinking Inst from an outer loop into an inner loop, then
+ // the *operands* of Inst don't need to be checked.
+
+ if (!Contains(OldLoop, NewLoop)) {
+ // See below on why we can't handle phi nodes here.
+ if (isa<PHINode>(Inst))
+ return false;
+
+ for (Use &U : Inst->operands()) {
+ auto *DefI = dyn_cast<Instruction>(U.get());
+ if (!DefI)
+ return false;
+
+ // This would need adjustment if we allow Inst to be a phi node -- the
+ // new use block won't simply be NewBB.
+
+ auto *DefBlock = DefI->getParent();
+ if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop)
+ return false;
+ }
+ }
+
+ return true;
+ }
};
// Allow clients to walk the list of nested loops...
@@ -759,6 +851,19 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+/// \brief Pass for printing a loop's contents as LLVM's text IR assembly.
+class PrintLoopPass {
+ raw_ostream &OS;
+ std::string Banner;
+
+public:
+ PrintLoopPass();
+ PrintLoopPass(raw_ostream &OS, const std::string &Banner = "");
+
+ PreservedAnalyses run(Loop &L);
+ static StringRef name() { return "PrintLoopPass"; }
+};
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
index f5cc856..824fc7e 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -269,7 +269,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
// A non-header loop shouldn't be reachable from outside the loop,
// though it is permitted if the predecessor is not itself actually
// reachable.
- BlockT *EntryBB = BB->getParent()->begin();
+ BlockT *EntryBB = &BB->getParent()->front();
for (BlockT *CB : depth_first(EntryBB))
for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i)
assert(CB != OutsideLoopPreds[i] &&
@@ -345,7 +345,7 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth) const {
template<class BlockT, class LoopT>
static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT*> Backedges,
LoopInfoBase<BlockT, LoopT> *LI,
- DominatorTreeBase<BlockT> &DomTree) {
+ const DominatorTreeBase<BlockT> &DomTree) {
typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
unsigned NumBlocks = 0;
@@ -468,10 +468,10 @@ void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) {
/// insertions per block.
template<class BlockT, class LoopT>
void LoopInfoBase<BlockT, LoopT>::
-Analyze(DominatorTreeBase<BlockT> &DomTree) {
+analyze(const DominatorTreeBase<BlockT> &DomTree) {
// Postorder traversal of the dominator tree.
- DomTreeNodeBase<BlockT>* DomRoot = DomTree.getRootNode();
+ const DomTreeNodeBase<BlockT> *DomRoot = DomTree.getRootNode();
for (auto DomNode : post_order(DomRoot)) {
BlockT *Header = DomNode->getBlock();
diff --git a/contrib/llvm/include/llvm/Analysis/LoopPass.h b/contrib/llvm/include/llvm/Analysis/LoopPass.h
index 8650000..2cf734e5 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopPass.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopPass.h
@@ -127,20 +127,9 @@ public:
}
public:
- // Delete loop from the loop queue and loop nest (LoopInfo).
- void deleteLoopFromQueue(Loop *L);
-
- // Insert loop into the loop queue and add it as a child of the
- // given parent.
- void insertLoop(Loop *L, Loop *ParentLoop);
-
- // Insert a loop into the loop queue.
- void insertLoopIntoQueue(Loop *L);
-
- // Reoptimize this loop. LPPassManager will re-insert this loop into the
- // queue. This allows LoopPass to change loop nest for the loop. This
- // utility may send LPPassManager into infinite loops so use caution.
- void redoLoop(Loop *L);
+ // Add a new loop into the loop queue as a child of the given parent, or at
+ // the top level if \c ParentLoop is null.
+ Loop &addLoop(Loop *ParentLoop);
//===--------------------------------------------------------------------===//
/// SimpleAnalysis - Provides simple interface to update analysis info
@@ -163,8 +152,6 @@ public:
private:
std::deque<Loop *> LQ;
- bool skipThisLoop;
- bool redoThisLoop;
LoopInfo *LI;
Loop *CurrentLoop;
};
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 805a43d..87fb3ef 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -60,11 +60,6 @@ bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
/// \brief Tests if a value is a call or invoke to a library function that
-/// reallocates memory (such as realloc).
-bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
- bool LookThroughBitCast = false);
-
-/// \brief Tests if a value is a call or invoke to a library function that
/// allocates memory and never returns null (such as operator new).
bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 5118980..daa1ba9 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -28,7 +28,6 @@ namespace llvm {
class FunctionPass;
class Instruction;
class CallSite;
- class AliasAnalysis;
class AssumptionCache;
class MemoryDependenceAnalysis;
class PredIteratorCache;
@@ -97,6 +96,7 @@ namespace llvm {
typedef PointerIntPair<Instruction*, 2, DepType> PairTy;
PairTy Value;
explicit MemDepResult(PairTy V) : Value(V) {}
+
public:
MemDepResult() : Value(nullptr, Invalid) {}
@@ -164,6 +164,7 @@ namespace llvm {
bool operator!=(const MemDepResult &M) const { return Value != M.Value; }
bool operator<(const MemDepResult &M) const { return Value < M.Value; }
bool operator>(const MemDepResult &M) const { return Value > M.Value; }
+
private:
friend class MemoryDependenceAnalysis;
/// Dirty - Entries with this marker occur in a LocalDeps map or
@@ -190,6 +191,7 @@ namespace llvm {
class NonLocalDepEntry {
BasicBlock *BB;
MemDepResult Result;
+
public:
NonLocalDepEntry(BasicBlock *bb, MemDepResult result)
: BB(bb), Result(result) {}
@@ -215,6 +217,7 @@ namespace llvm {
class NonLocalDepResult {
NonLocalDepEntry Entry;
Value *Address;
+
public:
NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address)
: Entry(bb, result), Address(address) {}
@@ -261,6 +264,7 @@ namespace llvm {
public:
typedef std::vector<NonLocalDepEntry> NonLocalDepInfo;
+
private:
/// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if
/// the dependence is a read only dependence, false if read/write.
@@ -302,7 +306,6 @@ namespace llvm {
SmallPtrSet<ValueIsLoadPair, 4> > ReverseNonLocalPtrDepTy;
ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps;
-
/// PerInstNLInfo - This is the instruction we keep for each cached access
/// that we have for an instruction. The pointer is an owning pointer and
/// the bool indicates whether we have any dirty bits in the set.
@@ -326,6 +329,7 @@ namespace llvm {
AliasAnalysis *AA;
DominatorTree *DT;
AssumptionCache *AC;
+ const TargetLibraryInfo *TLI;
PredIteratorCache PredCache;
public:
@@ -363,14 +367,13 @@ namespace llvm {
/// that.
const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS);
-
/// getNonLocalPointerDependency - Perform a full dependency query for an
/// access to the QueryInst's specified memory location, returning the set
/// of instructions that either define or clobber the value.
///
/// Warning: For a volatile query instruction, the dependencies will be
/// accurate, and thus usable for reordering, but it is never legal to
- /// remove the query instruction.
+ /// remove the query instruction.
///
/// This method assumes the pointer has a "NonLocal" dependency within
/// QueryInst's parent basic block.
@@ -394,12 +397,12 @@ namespace llvm {
/// critical edges.
void invalidateCachedPredecessors();
- /// getPointerDependencyFrom - Return the instruction on which a memory
- /// location depends. If isLoad is true, this routine ignores may-aliases
- /// with read-only operations. If isLoad is false, this routine ignores
- /// may-aliases with reads from read-only locations. If possible, pass
- /// the query instruction as well; this function may take advantage of
- /// the metadata annotated to the query instruction to refine the result.
+ /// \brief Return the instruction on which a memory location depends.
+ /// If isLoad is true, this routine ignores may-aliases with read-only
+ /// operations. If isLoad is false, this routine ignores may-aliases
+ /// with reads from read-only locations. If possible, pass the query
+ /// instruction as well; this function may take advantage of the metadata
+ /// annotated to the query instruction to refine the result.
///
/// Note that this is an uncached query, and thus may be inefficient.
///
@@ -409,6 +412,21 @@ namespace llvm {
BasicBlock *BB,
Instruction *QueryInst = nullptr);
+ MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
+ bool isLoad,
+ BasicBlock::iterator ScanIt,
+ BasicBlock *BB,
+ Instruction *QueryInst);
+
+ /// This analysis looks for other loads and stores with invariant.group
+ /// metadata and the same pointer operand. Returns Unknown if it does not
+ /// find anything, and Def if it can be assumed that 2 instructions load or
+ /// store the same value.
+ /// FIXME: This analysis works only on single block because of restrictions
+ /// at the call site.
+ MemDepResult getInvariantGroupPointerDependency(LoadInst *LI,
+ BasicBlock *BB);
+
/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
/// looks at a memory location for a load (specified by MemLocBase, Offs,
/// and Size) and compares it against a load. If the specified load could
@@ -442,7 +460,6 @@ namespace llvm {
/// verifyRemoved - Verify that the specified instruction does not occur
/// in our internal data structures.
void verifyRemoved(Instruction *Inst) const;
-
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
new file mode 100644
index 0000000..ac01154
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -0,0 +1,102 @@
+//===- ObjCARCAliasAnalysis.h - ObjC ARC Alias Analysis ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H
+#define LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief This is a simple alias analysis implementation that uses knowledge
+/// of ARC constructs to answer queries.
+///
+/// TODO: This class could be generalized to know about other ObjC-specific
+/// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+/// even though their offsets are dynamic.
+class ObjCARCAAResult : public AAResultBase<ObjCARCAAResult> {
+ friend AAResultBase<ObjCARCAAResult>;
+
+ const DataLayout &DL;
+
+public:
+ explicit ObjCARCAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI)
+ : AAResultBase(TLI), DL(DL) {}
+ ObjCARCAAResult(ObjCARCAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL) {}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+ using AAResultBase::getModRefBehavior;
+ FunctionModRefBehavior getModRefBehavior(const Function *F);
+
+ using AAResultBase::getModRefInfo;
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class ObjCARCAA {
+public:
+ typedef ObjCARCAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ ObjCARCAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "ObjCARCAA"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the ObjCARCAAResult object.
+class ObjCARCAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<ObjCARCAAResult> Result;
+
+public:
+ static char ID;
+
+ ObjCARCAAWrapperPass();
+
+ ObjCARCAAResult &getResult() { return *Result; }
+ const ObjCARCAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
new file mode 100644
index 0000000..29d99c9
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -0,0 +1,287 @@
+//===- ObjCARCAnalysisUtils.h - ObjC ARC Analysis Utilities -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines common analysis utilities used by the ObjC ARC Optimizer.
+/// ARC stands for Automatic Reference Counting and is a system for managing
+/// reference counts for objects in Objective C.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
+#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+extern bool EnableARCOpts;
+
+/// \brief Test if the given module looks interesting to run ARC optimization
+/// on.
+inline bool ModuleHasARC(const Module &M) {
+ return
+ M.getNamedValue("objc_retain") ||
+ M.getNamedValue("objc_release") ||
+ M.getNamedValue("objc_autorelease") ||
+ M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+ M.getNamedValue("objc_retainBlock") ||
+ M.getNamedValue("objc_autoreleaseReturnValue") ||
+ M.getNamedValue("objc_autoreleasePoolPush") ||
+ M.getNamedValue("objc_loadWeakRetained") ||
+ M.getNamedValue("objc_loadWeak") ||
+ M.getNamedValue("objc_destroyWeak") ||
+ M.getNamedValue("objc_storeWeak") ||
+ M.getNamedValue("objc_initWeak") ||
+ M.getNamedValue("objc_moveWeak") ||
+ M.getNamedValue("objc_copyWeak") ||
+ M.getNamedValue("objc_retainedObject") ||
+ M.getNamedValue("objc_unretainedObject") ||
+ M.getNamedValue("objc_unretainedPointer") ||
+ M.getNamedValue("clang.arc.use");
+}
+
+/// \brief This is a wrapper around getUnderlyingObject which also knows how to
+/// look through objc_retain and objc_autorelease calls, which we know to return
+/// their argument verbatim.
+inline const Value *GetUnderlyingObjCPtr(const Value *V,
+ const DataLayout &DL) {
+ for (;;) {
+ V = GetUnderlyingObject(V, DL);
+ if (!IsForwarding(GetBasicARCInstKind(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+
+ return V;
+}
+
+/// The RCIdentity root of a value \p V is a dominating value U for which
+/// retaining or releasing U is equivalent to retaining or releasing V. In other
+/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
+///
+/// We use this in the ARC optimizer to make it easier to match up ARC
+/// operations by always mapping ARC operations to RCIdentityRoots instead of
+/// pointers themselves.
+///
+/// The two ways that we see RCIdentical values in ObjC are via:
+///
+/// 1. PointerCasts
+/// 2. Forwarding Calls that return their argument verbatim.
+///
+/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
+/// This implies that two RCIdentical values must alias.
+inline const Value *GetRCIdentityRoot(const Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicARCInstKind(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
+/// casts away the const of the result. For documentation about what an
+/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
+/// function.
+inline Value *GetRCIdentityRoot(Value *V) {
+ return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
+}
+
+/// \brief Assuming the given instruction is one of the special calls such as
+/// objc_retain or objc_release, return the RCIdentity root of the argument of
+/// the call.
+inline Value *GetArgRCIdentityRoot(Value *Inst) {
+ return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+inline bool IsNullOrUndef(const Value *V) {
+ return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+inline bool IsNoopInstruction(const Instruction *I) {
+ return isa<BitCastInst>(I) ||
+ (isa<GetElementPtrInst>(I) &&
+ cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// \brief Test whether the given value is possible a retainable object pointer.
+inline bool IsPotentialRetainableObjPtr(const Value *Op) {
+ // Pointers to static or stack storage are not valid retainable object
+ // pointers.
+ if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+ return false;
+ // Special arguments can not be a valid retainable object pointer.
+ if (const Argument *Arg = dyn_cast<Argument>(Op))
+ if (Arg->hasByValAttr() ||
+ Arg->hasInAllocaAttr() ||
+ Arg->hasNestAttr() ||
+ Arg->hasStructRetAttr())
+ return false;
+ // Only consider values with pointer types.
+ //
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never retainable object pointers, however clang occasionally
+ // bitcasts retainable object pointers to function-pointer type temporarily.
+ PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ if (!Ty)
+ return false;
+ // Conservatively assume anything else is a potential retainable object
+ // pointer.
+ return true;
+}
+
+inline bool IsPotentialRetainableObjPtr(const Value *Op,
+ AliasAnalysis &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialRetainableObjPtr(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
+
+/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
+/// is.
+inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ if (IsPotentialRetainableObjPtr(*I))
+ return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
+
+ return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
+}
+
+/// \brief Return true if this value refers to a distinct and identifiable
+/// object.
+///
+/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
+/// special knowledge of ObjC conventions.
+inline bool IsObjCIdentifiedObject(const Value *V) {
+ // Assume that call results and arguments have their own "provenance".
+ // Constants (including GlobalVariables) and Allocas are never
+ // reference-counted.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+ isa<Argument>(V) || isa<Constant>(V) ||
+ isa<AllocaInst>(V))
+ return true;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ const Value *Pointer =
+ GetRCIdentityRoot(LI->getPointerOperand());
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (GV->isConstant())
+ return true;
+ StringRef Name = GV->getName();
+ // These special variables are known to hold values which are not
+ // reference-counted pointers.
+ if (Name.startswith("\01l_objc_msgSend_fixup_"))
+ return true;
+
+ StringRef Section = GV->getSection();
+ if (Section.find("__message_refs") != StringRef::npos ||
+ Section.find("__objc_classrefs") != StringRef::npos ||
+ Section.find("__objc_superrefs") != StringRef::npos ||
+ Section.find("__objc_methname") != StringRef::npos ||
+ Section.find("__cstring") != StringRef::npos)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+enum class ARCMDKindID {
+ ImpreciseRelease,
+ CopyOnEscape,
+ NoObjCARCExceptions,
+};
+
+/// A cache of MDKinds used by various ARC optimizations.
+class ARCMDKindCache {
+ Module *M;
+
+ /// The Metadata Kind for clang.imprecise_release metadata.
+ llvm::Optional<unsigned> ImpreciseReleaseMDKind;
+
+ /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+ llvm::Optional<unsigned> CopyOnEscapeMDKind;
+
+ /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+ llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
+
+public:
+ void init(Module *Mod) {
+ M = Mod;
+ ImpreciseReleaseMDKind = NoneType::None;
+ CopyOnEscapeMDKind = NoneType::None;
+ NoObjCARCExceptionsMDKind = NoneType::None;
+ }
+
+ unsigned get(ARCMDKindID ID) {
+ switch (ID) {
+ case ARCMDKindID::ImpreciseRelease:
+ if (!ImpreciseReleaseMDKind)
+ ImpreciseReleaseMDKind =
+ M->getContext().getMDKindID("clang.imprecise_release");
+ return *ImpreciseReleaseMDKind;
+ case ARCMDKindID::CopyOnEscape:
+ if (!CopyOnEscapeMDKind)
+ CopyOnEscapeMDKind =
+ M->getContext().getMDKindID("clang.arc.copy_on_escape");
+ return *CopyOnEscapeMDKind;
+ case ARCMDKindID::NoObjCARCExceptions:
+ if (!NoObjCARCExceptionsMDKind)
+ NoObjCARCExceptionsMDKind =
+ M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+ return *NoObjCARCExceptionsMDKind;
+ }
+ llvm_unreachable("Covered switch isn't covered?!");
+ }
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h b/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
index 636c65c..13efb4b 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
@@ -1,4 +1,4 @@
-//===--- ARCInstKind.h - ARC instruction equivalence classes -*- C++ -*----===//
+//===- ObjCARCInstKind.h - ARC instruction equivalence classes --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
-#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
+#ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H
+#define LLVM_ANALYSIS_OBJCARCINSTKIND_H
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Function.h"
@@ -98,7 +98,7 @@ ARCInstKind GetFunctionClass(const Function *F);
/// This is similar to GetARCInstKind except that it only detects objc
/// runtime calls. This allows it to be faster.
///
-static inline ARCInstKind GetBasicARCInstKind(const Value *V) {
+inline ARCInstKind GetBasicARCInstKind(const Value *V) {
if (const CallInst *CI = dyn_cast<CallInst>(V)) {
if (const Function *F = CI->getCalledFunction())
return GetFunctionClass(F);
diff --git a/contrib/llvm/include/llvm/Analysis/OrderedBasicBlock.h b/contrib/llvm/include/llvm/Analysis/OrderedBasicBlock.h
new file mode 100644
index 0000000..5aa813e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/OrderedBasicBlock.h
@@ -0,0 +1,66 @@
+//===- llvm/Analysis/OrderedBasicBlock.h --------------------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OrderedBasicBlock class. OrderedBasicBlock maintains
+// an interface where clients can query if one instruction comes before another
+// in a BasicBlock. Since BasicBlock currently lacks a reliable way to query
+// relative position between instructions one can use OrderedBasicBlock to do
+// such queries. OrderedBasicBlock is lazily built on a source BasicBlock and
+// maintains an internal Instruction -> Position map. A OrderedBasicBlock
+// instance should be discarded whenever the source BasicBlock changes.
+//
+// It's currently used by the CaptureTracker in order to find relative
+// positions of a pair of instructions inside a BasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_ORDEREDBASICBLOCK_H
+#define LLVM_ANALYSIS_ORDEREDBASICBLOCK_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/BasicBlock.h"
+
+namespace llvm {
+
+class Instruction;
+class BasicBlock;
+
+class OrderedBasicBlock {
+private:
+ /// \brief Map a instruction to its position in a BasicBlock.
+ SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts;
+
+ /// \brief Keep track of last instruction inserted into \p NumberedInsts.
+ /// It speeds up queries for uncached instructions by providing a start point
+ /// for new queries in OrderedBasicBlock::comesBefore.
+ BasicBlock::const_iterator LastInstFound;
+
+ /// \brief The position/number to tag the next instruction to be found.
+ unsigned NextInstPos;
+
+ /// \brief The source BasicBlock to map.
+ const BasicBlock *BB;
+
+ /// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+ /// Cache and number out instruction while walking \p BB.
+ bool comesBefore(const Instruction *A, const Instruction *B);
+
+public:
+ OrderedBasicBlock(const BasicBlock *BasicB);
+
+ /// \brief Find out whether \p A dominates \p B, meaning whether \p A
+ /// comes before \p B in \p BB. This is a simplification that considers
+ /// cached instruction positions and ignores other basic blocks, being
+ /// only relevant to compare relative instructions positions inside \p BB.
+ bool dominates(const Instruction *A, const Instruction *B);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/PHITransAddr.h b/contrib/llvm/include/llvm/Analysis/PHITransAddr.h
index cbdbb88..f0f34f3 100644
--- a/contrib/llvm/include/llvm/Analysis/PHITransAddr.h
+++ b/contrib/llvm/include/llvm/Analysis/PHITransAddr.h
@@ -48,6 +48,7 @@ class PHITransAddr {
/// InstInputs - The inputs for our symbolic address.
SmallVector<Instruction*, 4> InstInputs;
+
public:
PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC)
: Addr(addr), DL(DL), TLI(nullptr), AC(AC) {
@@ -55,9 +56,9 @@ public:
if (Instruction *I = dyn_cast<Instruction>(Addr))
InstInputs.push_back(I);
}
-
+
Value *getAddr() const { return Addr; }
-
+
/// NeedsPHITranslationFromBlock - Return true if moving from the specified
/// BasicBlock to its predecessors requires PHI translation.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const {
@@ -68,12 +69,12 @@ public:
return true;
return false;
}
-
+
/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
/// if we have some hope of doing it. This should be used as a filter to
/// avoid calling PHITranslateValue in hopeless situations.
bool IsPotentiallyPHITranslatable() const;
-
+
/// PHITranslateValue - PHI translate the current address up the CFG from
/// CurBB to Pred, updating our state to reflect any needed changes. If
/// 'MustDominate' is true, the translated value must dominate
@@ -90,18 +91,19 @@ public:
///
Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
const DominatorTree &DT,
- SmallVectorImpl<Instruction*> &NewInsts);
-
+ SmallVectorImpl<Instruction *> &NewInsts);
+
void dump() const;
-
+
/// Verify - Check internal consistency of this data structure. If the
/// structure is valid, it returns true. If invalid, it prints errors and
/// returns false.
bool Verify() const;
+
private:
Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB,
const DominatorTree *DT);
-
+
/// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated
/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
/// block. All newly created instructions are added to the NewInsts list.
@@ -109,8 +111,8 @@ private:
///
Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
BasicBlock *PredBB, const DominatorTree &DT,
- SmallVectorImpl<Instruction*> &NewInsts);
-
+ SmallVectorImpl<Instruction *> &NewInsts);
+
/// AddAsInput - If the specified value is an instruction, add it as an input.
Value *AddAsInput(Value *V) {
// If V is an instruction, it is now an input.
@@ -118,7 +120,6 @@ private:
InstInputs.push_back(VI);
return V;
}
-
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Analysis/Passes.h b/contrib/llvm/include/llvm/Analysis/Passes.h
index d112ab1..da17457 100644
--- a/contrib/llvm/include/llvm/Analysis/Passes.h
+++ b/contrib/llvm/include/llvm/Analysis/Passes.h
@@ -22,27 +22,6 @@ namespace llvm {
class ModulePass;
class Pass;
class PassInfo;
- class LibCallInfo;
-
- //===--------------------------------------------------------------------===//
- //
- // createGlobalsModRefPass - This pass provides alias and mod/ref info for
- // global values that do not have their addresses taken.
- //
- Pass *createGlobalsModRefPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createAliasDebugger - This pass helps debug clients of AA
- //
- Pass *createAliasDebugger();
-
- //===--------------------------------------------------------------------===//
- //
- // createAliasAnalysisCounterPass - This pass counts alias queries and how the
- // alias analysis implementation responds.
- //
- ModulePass *createAliasAnalysisCounterPass();
//===--------------------------------------------------------------------===//
//
@@ -53,59 +32,10 @@ namespace llvm {
//===--------------------------------------------------------------------===//
//
- // createNoAAPass - This pass implements a "I don't know" alias analysis.
- //
- ImmutablePass *createNoAAPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createBasicAliasAnalysisPass - This pass implements the stateless alias
- // analysis.
- //
- ImmutablePass *createBasicAliasAnalysisPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createCFLAliasAnalysisPass - This pass implements a set-based approach to
- // alias analysis.
- //
- ImmutablePass *createCFLAliasAnalysisPass();
-
- //===--------------------------------------------------------------------===//
- //
- /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows
- /// about the semantics of a set of libcalls specified by LCI. The newly
- /// constructed pass takes ownership of the pointer that is provided.
- ///
- FunctionPass *createLibCallAliasAnalysisPass(LibCallInfo *LCI);
-
- //===--------------------------------------------------------------------===//
- //
- // createScalarEvolutionAliasAnalysisPass - This pass implements a simple
- // alias analysis using ScalarEvolution queries.
- //
- FunctionPass *createScalarEvolutionAliasAnalysisPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createTypeBasedAliasAnalysisPass - This pass implements metadata-based
- // type-based alias analysis.
- //
- ImmutablePass *createTypeBasedAliasAnalysisPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createScopedNoAliasAAPass - This pass implements metadata-based
- // scoped noalias analysis.
- //
- ImmutablePass *createScopedNoAliasAAPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createObjCARCAliasAnalysisPass - This pass implements ObjC-ARC-based
+ // createObjCARCAAWrapperPass - This pass implements ObjC-ARC-based
// alias analysis.
//
- ImmutablePass *createObjCARCAliasAnalysisPass();
+ ImmutablePass *createObjCARCAAWrapperPass();
FunctionPass *createPAEvalPass();
diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfo.h b/contrib/llvm/include/llvm/Analysis/RegionInfo.h
index 8560f1f..4988386 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionInfo.h
@@ -47,7 +47,7 @@
namespace llvm {
-// RegionTraits - Class to be specialized for different users of RegionInfo
+// Class to be specialized for different users of RegionInfo
// (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to
// pass around an unreasonable number of template parameters.
template <class FuncT_>
@@ -282,17 +282,16 @@ class RegionBase : public RegionNodeBase<Tr> {
// Save the BasicBlock RegionNodes that are element of this Region.
mutable BBNodeMapT BBNodeMap;
- /// verifyBBInRegion - Check if a BB is in this Region. This check also works
+ /// Check if a BB is in this Region. This check also works
/// if the region is incorrectly built. (EXPENSIVE!)
void verifyBBInRegion(BlockT *BB) const;
- /// verifyWalk - Walk over all the BBs of the region starting from BB and
+ /// Walk over all the BBs of the region starting from BB and
/// verify that all reachable basic blocks are elements of the region.
/// (EXPENSIVE!)
void verifyWalk(BlockT *BB, std::set<BlockT *> *visitedBB) const;
- /// verifyRegionNest - Verify if the region and its children are valid
- /// regions (EXPENSIVE!)
+ /// Verify if the region and its children are valid regions (EXPENSIVE!)
void verifyRegionNest() const;
public:
@@ -688,45 +687,50 @@ private:
/// Map every BB to the smallest region, that contains BB.
BBtoRegionMap BBtoRegion;
- // isCommonDomFrontier - Returns true if BB is in the dominance frontier of
+ // Check whether the entries of BBtoRegion for the BBs of region
+ // SR are correct. Triggers an assertion if not. Calls itself recursively for
+ // subregions.
+ void verifyBBMap(const RegionT *SR) const;
+
+ // Returns true if BB is in the dominance frontier of
// entry, because it was inherited from exit. In the other case there is an
// edge going from entry to BB without passing exit.
bool isCommonDomFrontier(BlockT *BB, BlockT *entry, BlockT *exit) const;
- // isRegion - Check if entry and exit surround a valid region, based on
+ // Check if entry and exit surround a valid region, based on
// dominance tree and dominance frontier.
bool isRegion(BlockT *entry, BlockT *exit) const;
- // insertShortCut - Saves a shortcut pointing from entry to exit.
+ // Saves a shortcut pointing from entry to exit.
// This function may extend this shortcut if possible.
void insertShortCut(BlockT *entry, BlockT *exit, BBtoBBMap *ShortCut) const;
- // getNextPostDom - Returns the next BB that postdominates N, while skipping
+ // Returns the next BB that postdominates N, while skipping
// all post dominators that cannot finish a canonical region.
DomTreeNodeT *getNextPostDom(DomTreeNodeT *N, BBtoBBMap *ShortCut) const;
- // isTrivialRegion - A region is trivial, if it contains only one BB.
+ // A region is trivial, if it contains only one BB.
bool isTrivialRegion(BlockT *entry, BlockT *exit) const;
- // createRegion - Creates a single entry single exit region.
+ // Creates a single entry single exit region.
RegionT *createRegion(BlockT *entry, BlockT *exit);
- // findRegionsWithEntry - Detect all regions starting with bb 'entry'.
+ // Detect all regions starting with bb 'entry'.
void findRegionsWithEntry(BlockT *entry, BBtoBBMap *ShortCut);
- // scanForRegions - Detects regions in F.
+ // Detects regions in F.
void scanForRegions(FuncT &F, BBtoBBMap *ShortCut);
- // getTopMostParent - Get the top most parent with the same entry block.
+ // Get the top most parent with the same entry block.
RegionT *getTopMostParent(RegionT *region);
- // buildRegionsTree - build the region hierarchy after all region detected.
+ // Build the region hierarchy after all region detected.
void buildRegionsTree(DomTreeNodeT *N, RegionT *region);
- // updateStatistics - Update statistic about created regions.
+ // Update statistic about created regions.
virtual void updateStatistics(RegionT *R) = 0;
- // calculate - detect all regions in function and build the region tree.
+ // Detect all regions in function and build the region tree.
void calculate(FuncT &F);
public:
@@ -796,12 +800,6 @@ public:
RegionT *getTopLevelRegion() const { return TopLevelRegion; }
- /// @brief Update RegionInfo after a basic block was split.
- ///
- /// @param NewBB The basic block that was created before OldBB.
- /// @param OldBB The old basic block.
- void splitBlock(BlockT *NewBB, BlockT *OldBB);
-
/// @brief Clear the Node Cache for all Regions.
///
/// @see Region::clearNodeCache()
@@ -847,6 +845,19 @@ public:
void recalculate(Function &F, DominatorTree *DT, PostDominatorTree *PDT,
DominanceFrontier *DF);
+
+#ifndef NDEBUG
+ /// @brief Opens a viewer to show the GraphViz visualization of the regions.
+ ///
+ /// Useful during debugging as an alternative to dump().
+ void view();
+
+ /// @brief Opens a viewer to show the GraphViz visualization of this region
+ /// without instructions in the BasicBlocks.
+ ///
+ /// Useful during debugging as an alternative to dump().
+ void viewOnly();
+#endif
};
class RegionInfoPass : public FunctionPass {
diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h b/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h
index b31eefc..134cd8f 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -236,7 +236,7 @@ std::string RegionBase<Tr>::getNameStr() const {
template <class Tr>
void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
if (!contains(BB))
- llvm_unreachable("Broken region found!");
+ llvm_unreachable("Broken region found: enumerated BB not in region!");
BlockT *entry = getEntry(), *exit = getExit();
@@ -244,7 +244,8 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
SE = BlockTraits::child_end(BB);
SI != SE; ++SI) {
if (!contains(*SI) && exit != *SI)
- llvm_unreachable("Broken region found!");
+ llvm_unreachable("Broken region found: edges leaving the region must go "
+ "to the exit node!");
}
if (entry != BB) {
@@ -252,7 +253,8 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
SE = InvBlockTraits::child_end(BB);
SI != SE; ++SI) {
if (!contains(*SI))
- llvm_unreachable("Broken region found!");
+ llvm_unreachable("Broken region found: edges entering the region must "
+ "go to the entry node!");
}
}
}
@@ -442,16 +444,14 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const {
if (NumSuccessors == 0)
return nullptr;
- for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
- PE = InvBlockTraits::child_end(getExit());
- PI != PE; ++PI) {
- if (!DT->dominates(getEntry(), *PI))
- return nullptr;
- }
-
RegionT *R = RI->getRegionFor(exit);
if (R->getEntry() != exit) {
+ for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
+ PE = InvBlockTraits::child_end(getExit());
+ PI != PE; ++PI)
+ if (!contains(*PI))
+ return nullptr;
if (Tr::getNumSuccessors(exit) == 1)
return new RegionT(getEntry(), *BlockTraits::child_begin(exit), RI, DT);
return nullptr;
@@ -460,13 +460,11 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const {
while (R->getParent() && R->getParent()->getEntry() == exit)
R = R->getParent();
- if (!DT->dominates(getEntry(), R->getExit())) {
- for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
- PE = InvBlockTraits::child_end(getExit());
- PI != PE; ++PI) {
- if (!DT->dominates(R->getExit(), *PI))
- return nullptr;
- }
+ for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
+ PE = InvBlockTraits::child_end(getExit());
+ PI != PE; ++PI) {
+ if (!(contains(*PI) || R->contains(*PI)))
+ return nullptr;
}
return new RegionT(getEntry(), R->getExit(), RI, DT);
@@ -542,6 +540,21 @@ RegionInfoBase<Tr>::~RegionInfoBase() {
}
template <class Tr>
+void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
+ assert(R && "Re must be non-null");
+ for (auto I = R->element_begin(), E = R->element_end(); I != E; ++I) {
+ if (I->isSubRegion()) {
+ const RegionT *SR = I->template getNodeAs<RegionT>();
+ verifyBBMap(SR);
+ } else {
+ BlockT *BB = I->template getNodeAs<BlockT>();
+ if (getRegionFor(BB) != R)
+ llvm_unreachable("BB map does not match region nesting");
+ }
+ }
+}
+
+template <class Tr>
bool RegionInfoBase<Tr>::isCommonDomFrontier(BlockT *BB, BlockT *entry,
BlockT *exit) const {
for (PredIterTy PI = InvBlockTraits::child_begin(BB),
@@ -786,7 +799,14 @@ void RegionInfoBase<Tr>::releaseMemory() {
template <class Tr>
void RegionInfoBase<Tr>::verifyAnalysis() const {
+ // Do only verify regions if explicitely activated using XDEBUG or
+ // -verify-region-info
+ if (!RegionInfoBase<Tr>::VerifyRegionInfo)
+ return;
+
TopLevelRegion->verifyRegionNest();
+
+ verifyBBMap(TopLevelRegion);
}
// Region pass manager support.
@@ -887,20 +907,6 @@ RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<BlockT *> &BBs) const {
}
template <class Tr>
-void RegionInfoBase<Tr>::splitBlock(BlockT *NewBB, BlockT *OldBB) {
- RegionT *R = getRegionFor(OldBB);
-
- setRegionFor(NewBB, R);
-
- while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
- R->replaceEntry(NewBB);
- R = R->getParent();
- }
-
- setRegionFor(OldBB, R);
-}
-
-template <class Tr>
void RegionInfoBase<Tr>::calculate(FuncT &F) {
typedef typename std::add_pointer<FuncT>::type FuncPtrT;
diff --git a/contrib/llvm/include/llvm/Analysis/RegionPrinter.h b/contrib/llvm/include/llvm/Analysis/RegionPrinter.h
index 758748a..8f0035c 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionPrinter.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionPrinter.h
@@ -17,10 +17,55 @@
namespace llvm {
class FunctionPass;
+ class Function;
+ class RegionInfo;
+
FunctionPass *createRegionViewerPass();
FunctionPass *createRegionOnlyViewerPass();
FunctionPass *createRegionPrinterPass();
FunctionPass *createRegionOnlyPrinterPass();
+
+#ifndef NDEBUG
+ /// @brief Open a viewer to display the GraphViz vizualization of the analysis
+ /// result.
+ ///
+ /// Practical to call in the debugger.
+ /// Includes the instructions in each BasicBlock.
+ ///
+ /// @param RI The analysis to display.
+ void viewRegion(llvm::RegionInfo *RI);
+
+ /// @brief Analyze the regions of a function and open its GraphViz
+ /// visualization in a viewer.
+ ///
+ /// Useful to call in the debugger.
+ /// Includes the instructions in each BasicBlock.
+ /// The result of a new analysis may differ from the RegionInfo the pass
+ /// manager currently holds.
+ ///
+ /// @param F Function to analyze.
+ void viewRegion(const llvm::Function *F);
+
+ /// @brief Open a viewer to display the GraphViz vizualization of the analysis
+ /// result.
+ ///
+ /// Useful to call in the debugger.
+ /// Shows only the BasicBlock names without their instructions.
+ ///
+ /// @param RI The analysis to display.
+ void viewRegionOnly(llvm::RegionInfo *RI);
+
+ /// @brief Analyze the regions of a function and open its GraphViz
+ /// visualization in a viewer.
+ ///
+ /// Useful to call in the debugger.
+ /// Shows only the BasicBlock names without their instructions.
+ /// The result of a new analysis may differ from the RegionInfo the pass
+ /// manager currently holds.
+ ///
+ /// @param F Function to analyze.
+ void viewRegionOnly(const llvm::Function *F);
+#endif
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
index d47cab8..c08335d 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -23,10 +23,12 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
@@ -44,30 +46,33 @@ namespace llvm {
class DataLayout;
class TargetLibraryInfo;
class LLVMContext;
- class Loop;
- class LoopInfo;
class Operator;
- class SCEVUnknown;
class SCEV;
- template<> struct FoldingSetTrait<SCEV>;
+ class SCEVAddRecExpr;
+ class SCEVConstant;
+ class SCEVExpander;
+ class SCEVPredicate;
+ class SCEVUnknown;
- /// SCEV - This class represents an analyzed expression in the program. These
- /// are opaque objects that the client is not allowed to do much with
- /// directly.
+ template <> struct FoldingSetTrait<SCEV>;
+ template <> struct FoldingSetTrait<SCEVPredicate>;
+
+ /// This class represents an analyzed expression in the program. These are
+ /// opaque objects that the client is not allowed to do much with directly.
///
class SCEV : public FoldingSetNode {
friend struct FoldingSetTrait<SCEV>;
- /// FastID - A reference to an Interned FoldingSetNodeID for this node.
- /// The ScalarEvolution's BumpPtrAllocator holds the data.
+ /// A reference to an Interned FoldingSetNodeID for this node. The
+ /// ScalarEvolution's BumpPtrAllocator holds the data.
FoldingSetNodeIDRef FastID;
// The SCEV baseclass this node corresponds to
const unsigned short SCEVType;
protected:
- /// SubclassData - This field is initialized to zero and may be used in
- /// subclasses to store miscellaneous information.
+ /// This field is initialized to zero and may be used in subclasses to store
+ /// miscellaneous information.
unsigned short SubclassData;
private:
@@ -104,37 +109,32 @@ namespace llvm {
unsigned getSCEVType() const { return SCEVType; }
- /// getType - Return the LLVM type of this SCEV expression.
+ /// Return the LLVM type of this SCEV expression.
///
Type *getType() const;
- /// isZero - Return true if the expression is a constant zero.
+ /// Return true if the expression is a constant zero.
///
bool isZero() const;
- /// isOne - Return true if the expression is a constant one.
+ /// Return true if the expression is a constant one.
///
bool isOne() const;
- /// isAllOnesValue - Return true if the expression is a constant
- /// all-ones value.
+ /// Return true if the expression is a constant all-ones value.
///
bool isAllOnesValue() const;
- /// isNonConstantNegative - Return true if the specified scev is negated,
- /// but not a constant.
+ /// Return true if the specified scev is negated, but not a constant.
bool isNonConstantNegative() const;
- /// print - Print out the internal representation of this scalar to the
- /// specified stream. This should really only be used for debugging
- /// purposes.
+ /// Print out the internal representation of this scalar to the specified
+ /// stream. This should really only be used for debugging purposes.
void print(raw_ostream &OS) const;
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// dump - This method is used for debugging.
+ /// This method is used for debugging.
///
void dump() const;
-#endif
};
// Specialize FoldingSetTrait for SCEV to avoid needing to compute
@@ -157,11 +157,10 @@ namespace llvm {
return OS;
}
- /// SCEVCouldNotCompute - An object of this class is returned by queries that
- /// could not be answered. For example, if you ask for the number of
- /// iterations of a linked-list traversal loop, you will get one of these.
- /// None of the standard SCEV operations are valid on this class, it is just a
- /// marker.
+ /// An object of this class is returned by queries that could not be answered.
+ /// For example, if you ask for the number of iterations of a linked-list
+ /// traversal loop, you will get one of these. None of the standard SCEV
+ /// operations are valid on this class, it is just a marker.
struct SCEVCouldNotCompute : public SCEV {
SCEVCouldNotCompute();
@@ -169,22 +168,162 @@ namespace llvm {
static bool classof(const SCEV *S);
};
- /// ScalarEvolution - This class is the main scalar evolution driver. Because
- /// client code (intentionally) can't do much with the SCEV objects directly,
- /// they must ask this class for services.
- ///
- class ScalarEvolution : public FunctionPass {
+ /// SCEVPredicate - This class represents an assumption made using SCEV
+ /// expressions which can be checked at run-time.
+ class SCEVPredicate : public FoldingSetNode {
+ friend struct FoldingSetTrait<SCEVPredicate>;
+
+ /// A reference to an Interned FoldingSetNodeID for this node. The
+ /// ScalarEvolution's BumpPtrAllocator holds the data.
+ FoldingSetNodeIDRef FastID;
+
+ public:
+ enum SCEVPredicateKind { P_Union, P_Equal };
+
+ protected:
+ SCEVPredicateKind Kind;
+ ~SCEVPredicate() = default;
+ SCEVPredicate(const SCEVPredicate&) = default;
+ SCEVPredicate &operator=(const SCEVPredicate&) = default;
+
+ public:
+ SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind);
+
+ SCEVPredicateKind getKind() const { return Kind; }
+
+ /// \brief Returns the estimated complexity of this predicate.
+ /// This is roughly measured in the number of run-time checks required.
+ virtual unsigned getComplexity() const { return 1; }
+
+ /// \brief Returns true if the predicate is always true. This means that no
+ /// assumptions were made and nothing needs to be checked at run-time.
+ virtual bool isAlwaysTrue() const = 0;
+
+ /// \brief Returns true if this predicate implies \p N.
+ virtual bool implies(const SCEVPredicate *N) const = 0;
+
+ /// \brief Prints a textual representation of this predicate with an
+ /// indentation of \p Depth.
+ virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0;
+
+ /// \brief Returns the SCEV to which this predicate applies, or nullptr
+ /// if this is a SCEVUnionPredicate.
+ virtual const SCEV *getExpr() const = 0;
+ };
+
+ inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) {
+ P.print(OS);
+ return OS;
+ }
+
+ // Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute
+ // temporary FoldingSetNodeID values.
+ template <>
+ struct FoldingSetTrait<SCEVPredicate>
+ : DefaultFoldingSetTrait<SCEVPredicate> {
+
+ static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) {
+ ID = X.FastID;
+ }
+
+ static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID,
+ unsigned IDHash, FoldingSetNodeID &TempID) {
+ return ID == X.FastID;
+ }
+ static unsigned ComputeHash(const SCEVPredicate &X,
+ FoldingSetNodeID &TempID) {
+ return X.FastID.ComputeHash();
+ }
+ };
+
+ /// SCEVEqualPredicate - This class represents an assumption that two SCEV
+ /// expressions are equal, and this can be checked at run-time. We assume
+ /// that the left hand side is a SCEVUnknown and the right hand side a
+ /// constant.
+ class SCEVEqualPredicate final : public SCEVPredicate {
+ /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a
+ /// constant.
+ const SCEVUnknown *LHS;
+ const SCEVConstant *RHS;
+
public:
- /// LoopDisposition - An enum describing the relationship between a
- /// SCEV and a loop.
+ SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS,
+ const SCEVConstant *RHS);
+
+ /// Implementation of the SCEVPredicate interface
+ bool implies(const SCEVPredicate *N) const override;
+ void print(raw_ostream &OS, unsigned Depth = 0) const override;
+ bool isAlwaysTrue() const override;
+ const SCEV *getExpr() const override;
+
+ /// \brief Returns the left hand side of the equality.
+ const SCEVUnknown *getLHS() const { return LHS; }
+
+ /// \brief Returns the right hand side of the equality.
+ const SCEVConstant *getRHS() const { return RHS; }
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const SCEVPredicate *P) {
+ return P->getKind() == P_Equal;
+ }
+ };
+
+ /// SCEVUnionPredicate - This class represents a composition of other
+ /// SCEV predicates, and is the class that most clients will interact with.
+ /// This is equivalent to a logical "AND" of all the predicates in the union.
+ class SCEVUnionPredicate final : public SCEVPredicate {
+ private:
+ typedef DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>>
+ PredicateMap;
+
+ /// Vector with references to all predicates in this union.
+ SmallVector<const SCEVPredicate *, 16> Preds;
+ /// Maps SCEVs to predicates for quick look-ups.
+ PredicateMap SCEVToPreds;
+
+ public:
+ SCEVUnionPredicate();
+
+ const SmallVectorImpl<const SCEVPredicate *> &getPredicates() const {
+ return Preds;
+ }
+
+ /// \brief Adds a predicate to this union.
+ void add(const SCEVPredicate *N);
+
+ /// \brief Returns a reference to a vector containing all predicates
+ /// which apply to \p Expr.
+ ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr);
+
+ /// Implementation of the SCEVPredicate interface
+ bool isAlwaysTrue() const override;
+ bool implies(const SCEVPredicate *N) const override;
+ void print(raw_ostream &OS, unsigned Depth) const override;
+ const SCEV *getExpr() const override;
+
+ /// \brief We estimate the complexity of a union predicate as the size
+ /// number of predicates in the union.
+ unsigned getComplexity() const override { return Preds.size(); }
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const SCEVPredicate *P) {
+ return P->getKind() == P_Union;
+ }
+ };
+
+ /// The main scalar evolution driver. Because client code (intentionally)
+ /// can't do much with the SCEV objects directly, they must ask this class
+ /// for services.
+ class ScalarEvolution {
+ public:
+ /// An enum describing the relationship between a SCEV and a loop.
enum LoopDisposition {
LoopVariant, ///< The SCEV is loop-variant (unknown).
LoopInvariant, ///< The SCEV is loop-invariant.
LoopComputable ///< The SCEV varies predictably with the loop.
};
- /// BlockDisposition - An enum describing the relationship between a
- /// SCEV and a basic block.
+ /// An enum describing the relationship between a SCEV and a basic block.
enum BlockDisposition {
DoesNotDominateBlock, ///< The SCEV does not dominate the block.
DominatesBlock, ///< The SCEV dominates the block.
@@ -207,9 +346,9 @@ namespace llvm {
}
private:
- /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be
- /// notified whenever a Value is deleted.
- class SCEVCallbackVH : public CallbackVH {
+ /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
+ /// Value is deleted.
+ class SCEVCallbackVH final : public CallbackVH {
ScalarEvolution *SE;
void deleted() override;
void allUsesReplacedWith(Value *New) override;
@@ -221,35 +360,34 @@ namespace llvm {
friend class SCEVExpander;
friend class SCEVUnknown;
- /// F - The function we are analyzing.
+ /// The function we are analyzing.
///
- Function *F;
+ Function &F;
- /// The tracker for @llvm.assume intrinsics in this function.
- AssumptionCache *AC;
-
- /// LI - The loop information for the function we are currently analyzing.
+ /// The target library information for the target we are targeting.
///
- LoopInfo *LI;
+ TargetLibraryInfo &TLI;
+
+ /// The tracker for @llvm.assume intrinsics in this function.
+ AssumptionCache &AC;
- /// TLI - The target library information for the target we are targeting.
+ /// The dominator tree.
///
- TargetLibraryInfo *TLI;
+ DominatorTree &DT;
- /// DT - The dominator tree.
+ /// The loop information for the function we are currently analyzing.
///
- DominatorTree *DT;
+ LoopInfo &LI;
- /// CouldNotCompute - This SCEV is used to represent unknown trip
- /// counts and things.
- SCEVCouldNotCompute CouldNotCompute;
+ /// This SCEV is used to represent unknown trip counts and things.
+ std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute;
- /// ValueExprMapType - The typedef for ValueExprMap.
+ /// The typedef for ValueExprMap.
///
typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> >
ValueExprMapType;
- /// ValueExprMap - This is a cache of the values we have analyzed so far.
+ /// This is a cache of the values we have analyzed so far.
///
ValueExprMapType ValueExprMap;
@@ -260,10 +398,14 @@ namespace llvm {
/// conditions dominating the backedge of a loop.
bool WalkingBEDominatingConds;
- /// ExitLimit - Information about the number of loop iterations for which a
- /// loop exit's branch condition evaluates to the not-taken path. This is a
- /// temporary pair of exact and max expressions that are eventually
- /// summarized in ExitNotTakenInfo and BackedgeTakenInfo.
+ /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a
+ /// predicate by splitting it into a set of independent predicates.
+ bool ProvingSplitPredicate;
+
+ /// Information about the number of loop iterations for which a loop exit's
+ /// branch condition evaluates to the not-taken path. This is a temporary
+ /// pair of exact and max expressions that are eventually summarized in
+ /// ExitNotTakenInfo and BackedgeTakenInfo.
struct ExitLimit {
const SCEV *Exact;
const SCEV *Max;
@@ -272,16 +414,16 @@ namespace llvm {
ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {}
- /// hasAnyInfo - Test whether this ExitLimit contains any computed
- /// information, or whether it's all SCEVCouldNotCompute values.
+ /// Test whether this ExitLimit contains any computed information, or
+ /// whether it's all SCEVCouldNotCompute values.
bool hasAnyInfo() const {
return !isa<SCEVCouldNotCompute>(Exact) ||
!isa<SCEVCouldNotCompute>(Max);
}
};
- /// ExitNotTakenInfo - Information about the number of times a particular
- /// loop exit may be reached before exiting the loop.
+ /// Information about the number of times a particular loop exit may be
+ /// reached before exiting the loop.
struct ExitNotTakenInfo {
AssertingVH<BasicBlock> ExitingBlock;
const SCEV *ExactNotTaken;
@@ -289,14 +431,14 @@ namespace llvm {
ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {}
- /// isCompleteList - Return true if all loop exits are computable.
+ /// Return true if all loop exits are computable.
bool isCompleteList() const {
return NextExit.getInt() == 0;
}
void setIncomplete() { NextExit.setInt(1); }
- /// getNextExit - Return a pointer to the next exit's not-taken info.
+ /// Return a pointer to the next exit's not-taken info.
ExitNotTakenInfo *getNextExit() const {
return NextExit.getPointer();
}
@@ -304,16 +446,16 @@ namespace llvm {
void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); }
};
- /// BackedgeTakenInfo - Information about the backedge-taken count
- /// of a loop. This currently includes an exact count and a maximum count.
+ /// Information about the backedge-taken count of a loop. This currently
+ /// includes an exact count and a maximum count.
///
class BackedgeTakenInfo {
- /// ExitNotTaken - A list of computable exits and their not-taken counts.
- /// Loops almost never have more than one computable exit.
+ /// A list of computable exits and their not-taken counts. Loops almost
+ /// never have more than one computable exit.
ExitNotTakenInfo ExitNotTaken;
- /// Max - An expression indicating the least maximum backedge-taken
- /// count of the loop that is known, or a SCEVCouldNotCompute.
+ /// An expression indicating the least maximum backedge-taken count of the
+ /// loop that is known, or a SCEVCouldNotCompute.
const SCEV *Max;
public:
@@ -324,80 +466,78 @@ namespace llvm {
SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
bool Complete, const SCEV *MaxCount);
- /// hasAnyInfo - Test whether this BackedgeTakenInfo contains any
- /// computed information, or whether it's all SCEVCouldNotCompute
- /// values.
+ /// Test whether this BackedgeTakenInfo contains any computed information,
+ /// or whether it's all SCEVCouldNotCompute values.
bool hasAnyInfo() const {
return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max);
}
- /// getExact - Return an expression indicating the exact backedge-taken
- /// count of the loop if it is known, or SCEVCouldNotCompute
- /// otherwise. This is the number of times the loop header can be
- /// guaranteed to execute, minus one.
+ /// Return an expression indicating the exact backedge-taken count of the
+ /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the
+ /// number of times the loop header can be guaranteed to execute, minus
+ /// one.
const SCEV *getExact(ScalarEvolution *SE) const;
- /// getExact - Return the number of times this loop exit may fall through
- /// to the back edge, or SCEVCouldNotCompute. The loop is guaranteed not
- /// to exit via this block before this number of iterations, but may exit
- /// via another block.
+ /// Return the number of times this loop exit may fall through to the back
+ /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
+ /// this block before this number of iterations, but may exit via another
+ /// block.
const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
- /// getMax - Get the max backedge taken count for the loop.
+ /// Get the max backedge taken count for the loop.
const SCEV *getMax(ScalarEvolution *SE) const;
/// Return true if any backedge taken count expressions refer to the given
/// subexpression.
bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
- /// clear - Invalidate this result and free associated memory.
+ /// Invalidate this result and free associated memory.
void clear();
};
- /// BackedgeTakenCounts - Cache the backedge-taken count of the loops for
- /// this function as they are computed.
+ /// Cache the backedge-taken count of the loops for this function as they
+ /// are computed.
DenseMap<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts;
- /// ConstantEvolutionLoopExitValue - This map contains entries for all of
- /// the PHI instructions that we attempt to compute constant evolutions for.
- /// This allows us to avoid potentially expensive recomputation of these
- /// properties. An instruction maps to null if we are unable to compute its
- /// exit value.
+ /// This map contains entries for all of the PHI instructions that we
+ /// attempt to compute constant evolutions for. This allows us to avoid
+ /// potentially expensive recomputation of these properties. An instruction
+ /// maps to null if we are unable to compute its exit value.
DenseMap<PHINode*, Constant*> ConstantEvolutionLoopExitValue;
- /// ValuesAtScopes - This map contains entries for all the expressions
- /// that we attempt to compute getSCEVAtScope information for, which can
- /// be expensive in extreme cases.
+ /// This map contains entries for all the expressions that we attempt to
+ /// compute getSCEVAtScope information for, which can be expensive in
+ /// extreme cases.
DenseMap<const SCEV *,
SmallVector<std::pair<const Loop *, const SCEV *>, 2> > ValuesAtScopes;
- /// LoopDispositions - Memoized computeLoopDisposition results.
+ /// Memoized computeLoopDisposition results.
DenseMap<const SCEV *,
SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>>
LoopDispositions;
- /// computeLoopDisposition - Compute a LoopDisposition value.
+ /// Compute a LoopDisposition value.
LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
- /// BlockDispositions - Memoized computeBlockDisposition results.
+ /// Memoized computeBlockDisposition results.
DenseMap<
const SCEV *,
SmallVector<PointerIntPair<const BasicBlock *, 2, BlockDisposition>, 2>>
BlockDispositions;
- /// computeBlockDisposition - Compute a BlockDisposition value.
+ /// Compute a BlockDisposition value.
BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
- /// UnsignedRanges - Memoized results from getRange
+ /// Memoized results from getRange
DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
- /// SignedRanges - Memoized results from getRange
+ /// Memoized results from getRange
DenseMap<const SCEV *, ConstantRange> SignedRanges;
- /// RangeSignHint - Used to parameterize getRange
+ /// Used to parameterize getRange
enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED };
- /// setRange - Set the memoized range for the given SCEV.
+ /// Set the memoized range for the given SCEV.
const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint,
const ConstantRange &CR) {
DenseMap<const SCEV *, ConstantRange> &Cache =
@@ -410,198 +550,275 @@ namespace llvm {
return Pair.first->second;
}
- /// getRange - Determine the range for a particular SCEV.
+ /// Determine the range for a particular SCEV.
ConstantRange getRange(const SCEV *S, RangeSignHint Hint);
- /// createSCEV - We know that there is no SCEV for the specified value.
- /// Analyze the expression.
+ /// We know that there is no SCEV for the specified value. Analyze the
+ /// expression.
const SCEV *createSCEV(Value *V);
- /// createNodeForPHI - Provide the special handling we need to analyze PHI
- /// SCEVs.
+ /// Provide the special handling we need to analyze PHI SCEVs.
const SCEV *createNodeForPHI(PHINode *PN);
- /// createNodeForGEP - Provide the special handling we need to analyze GEP
- /// SCEVs.
+ /// Helper function called from createNodeForPHI.
+ const SCEV *createAddRecFromPHI(PHINode *PN);
+
+ /// Helper function called from createNodeForPHI.
+ const SCEV *createNodeFromSelectLikePHI(PHINode *PN);
+
+ /// Provide special handling for a select-like instruction (currently this
+ /// is either a select instruction or a phi node). \p I is the instruction
+ /// being processed, and it is assumed equivalent to "Cond ? TrueVal :
+ /// FalseVal".
+ const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond,
+ Value *TrueVal, Value *FalseVal);
+
+ /// Provide the special handling we need to analyze GEP SCEVs.
const SCEV *createNodeForGEP(GEPOperator *GEP);
- /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called
- /// at most once for each SCEV+Loop pair.
+ /// Implementation code for getSCEVAtScope; called at most once for each
+ /// SCEV+Loop pair.
///
const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
- /// ForgetSymbolicValue - This looks up computed SCEV values for all
- /// instructions that depend on the given instruction and removes them from
- /// the ValueExprMap map if they reference SymName. This is used during PHI
- /// resolution.
+ /// This looks up computed SCEV values for all instructions that depend on
+ /// the given instruction and removes them from the ValueExprMap map if they
+ /// reference SymName. This is used during PHI resolution.
void ForgetSymbolicName(Instruction *I, const SCEV *SymName);
- /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given
- /// loop, lazily computing new values if the loop hasn't been analyzed
- /// yet.
+ /// Return the BackedgeTakenInfo for the given loop, lazily computing new
+ /// values if the loop hasn't been analyzed yet.
const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
- /// ComputeBackedgeTakenCount - Compute the number of times the specified
- /// loop will iterate.
- BackedgeTakenInfo ComputeBackedgeTakenCount(const Loop *L);
+ /// Compute the number of times the specified loop will iterate.
+ BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L);
- /// ComputeExitLimit - Compute the number of times the backedge of the
- /// specified loop will execute if it exits via the specified block.
- ExitLimit ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
+ /// Compute the number of times the backedge of the specified loop will
+ /// execute if it exits via the specified block.
+ ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
- /// ComputeExitLimitFromCond - Compute the number of times the backedge of
- /// the specified loop will execute if its exit condition were a conditional
- /// branch of ExitCond, TBB, and FBB.
- ExitLimit ComputeExitLimitFromCond(const Loop *L,
+ /// Compute the number of times the backedge of the specified loop will
+ /// execute if its exit condition were a conditional branch of ExitCond,
+ /// TBB, and FBB.
+ ExitLimit computeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
bool IsSubExpr);
- /// ComputeExitLimitFromICmp - Compute the number of times the backedge of
- /// the specified loop will execute if its exit condition were a conditional
- /// branch of the ICmpInst ExitCond, TBB, and FBB.
- ExitLimit ComputeExitLimitFromICmp(const Loop *L,
+ /// Compute the number of times the backedge of the specified loop will
+ /// execute if its exit condition were a conditional branch of the ICmpInst
+ /// ExitCond, TBB, and FBB.
+ ExitLimit computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
bool IsSubExpr);
- /// ComputeExitLimitFromSingleExitSwitch - Compute the number of times the
- /// backedge of the specified loop will execute if its exit condition were a
- /// switch with a single exiting case to ExitingBB.
+ /// Compute the number of times the backedge of the specified loop will
+ /// execute if its exit condition were a switch with a single exiting case
+ /// to ExitingBB.
ExitLimit
- ComputeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch,
+ computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch,
BasicBlock *ExitingBB, bool IsSubExpr);
- /// ComputeLoadConstantCompareExitLimit - Given an exit condition
- /// of 'icmp op load X, cst', try to see if we can compute the
- /// backedge-taken count.
- ExitLimit ComputeLoadConstantCompareExitLimit(LoadInst *LI,
+ /// Given an exit condition of 'icmp op load X, cst', try to see if we can
+ /// compute the backedge-taken count.
+ ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI,
Constant *RHS,
const Loop *L,
ICmpInst::Predicate p);
- /// ComputeExitCountExhaustively - If the loop is known to execute a
- /// constant number of times (the condition evolves only from constants),
- /// try to evaluate a few iterations of the loop until we get the exit
- /// condition gets a value of ExitWhen (true or false). If we cannot
- /// evaluate the exit count of the loop, return CouldNotCompute.
- const SCEV *ComputeExitCountExhaustively(const Loop *L,
+ /// Compute the exit limit of a loop that is controlled by a
+ /// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip
+ /// count in these cases (since SCEV has no way of expressing them), but we
+ /// can still sometimes compute an upper bound.
+ ///
+ /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred
+ /// RHS`.
+ ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS,
+ const Loop *L,
+ ICmpInst::Predicate Pred);
+
+ /// If the loop is known to execute a constant number of times (the
+ /// condition evolves only from constants), try to evaluate a few iterations
+ /// of the loop until we get the exit condition gets a value of ExitWhen
+ /// (true or false). If we cannot evaluate the exit count of the loop,
+ /// return CouldNotCompute.
+ const SCEV *computeExitCountExhaustively(const Loop *L,
Value *Cond,
bool ExitWhen);
- /// HowFarToZero - Return the number of times an exit condition comparing
- /// the specified value to zero will execute. If not computable, return
- /// CouldNotCompute.
+ /// Return the number of times an exit condition comparing the specified
+ /// value to zero will execute. If not computable, return CouldNotCompute.
ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr);
- /// HowFarToNonZero - Return the number of times an exit condition checking
- /// the specified value for nonzero will execute. If not computable, return
+ /// Return the number of times an exit condition checking the specified
+ /// value for nonzero will execute. If not computable, return
/// CouldNotCompute.
ExitLimit HowFarToNonZero(const SCEV *V, const Loop *L);
- /// HowManyLessThans - Return the number of times an exit condition
- /// containing the specified less-than comparison will execute. If not
- /// computable, return CouldNotCompute. isSigned specifies whether the
- /// less-than is signed.
+ /// Return the number of times an exit condition containing the specified
+ /// less-than comparison will execute. If not computable, return
+ /// CouldNotCompute. isSigned specifies whether the less-than is signed.
ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool isSigned, bool IsSubExpr);
ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool isSigned, bool IsSubExpr);
- /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
- /// (which may not be an immediate predecessor) which has exactly one
- /// successor from which BB is reachable, or null if no such block is
- /// found.
+ /// Return a predecessor of BB (which may not be an immediate predecessor)
+ /// which has exactly one successor from which BB is reachable, or null if
+ /// no such block is found.
std::pair<BasicBlock *, BasicBlock *>
getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
- /// isImpliedCond - Test whether the condition described by Pred, LHS, and
- /// RHS is true whenever the given FoundCondValue value evaluates to true.
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the given FoundCondValue value evaluates to true.
bool isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
Value *FoundCondValue,
bool Inverse);
- /// isImpliedCondOperands - Test whether the condition described by Pred,
- /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
- /// and FoundRHS is true.
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
+ /// true.
+ bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS, ICmpInst::Predicate FoundPred,
+ const SCEV *FoundLHS, const SCEV *FoundRHS);
+
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+ /// true.
bool isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS, const SCEV *FoundRHS);
- /// isImpliedCondOperandsHelper - Test whether the condition described by
- /// Pred, LHS, and RHS is true whenever the condition described by Pred,
- /// FoundLHS, and FoundRHS is true.
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+ /// true.
bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS);
- /// isImpliedCondOperandsViaRanges - Test whether the condition described by
- /// Pred, LHS, and RHS is true whenever the condition described by Pred,
- /// FoundLHS, and FoundRHS is true. Utility function used by
- /// isImpliedCondOperands.
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+ /// true. Utility function used by isImpliedCondOperands.
bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS);
- /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
- /// in the header of its containing loop, we know the loop executes a
- /// constant number of times, and the PHI node is just a recurrence
- /// involving constants, fold it.
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+ /// true.
+ ///
+ /// This routine tries to rule out certain kinds of integer overflow, and
+ /// then tries to reason about arithmetic properties of the predicates.
+ bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS);
+
+ /// If we know that the specified Phi is in the header of its containing
+ /// loop, we know the loop executes a constant number of times, and the PHI
+ /// node is just a recurrence involving constants, fold it.
Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs,
const Loop *L);
- /// isKnownPredicateWithRanges - Test if the given expression is known to
- /// satisfy the condition described by Pred and the known constant ranges
- /// of LHS and RHS.
+ /// Test if the given expression is known to satisfy the condition described
+ /// by Pred and the known constant ranges of LHS and RHS.
///
bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
- /// forgetMemoizedResults - Drop memoized information computed for S.
+ /// Try to prove the condition described by "LHS Pred RHS" by ruling out
+ /// integer overflow.
+ ///
+ /// For instance, this will return true for "A s< (A + C)<nsw>" if C is
+ /// positive.
+ bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS);
+
+ /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to
+ /// prove them individually.
+ bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS);
+
+ /// Try to match the Expr as "(L + R)<Flags>".
+ bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
+ SCEV::NoWrapFlags &Flags);
+
+ /// Return true if More == (Less + C), where C is a constant. This is
+ /// intended to be used as a cheaper substitute for full SCEV subtraction.
+ bool computeConstantDifference(const SCEV *Less, const SCEV *More,
+ APInt &C);
+
+ /// Drop memoized information computed for S.
void forgetMemoizedResults(const SCEV *S);
+ /// Return an existing SCEV for V if there is one, otherwise return nullptr.
+ const SCEV *getExistingSCEV(Value *V);
+
/// Return false iff given SCEV contains a SCEVUnknown with NULL value-
/// pointer.
bool checkValidity(const SCEV *S) const;
- // Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be equal
- // to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is equivalent to
- // proving no signed (resp. unsigned) wrap in {`Start`,+,`Step`} if
- // `ExtendOpTy` is `SCEVSignExtendExpr` (resp. `SCEVZeroExtendExpr`).
- //
+ /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be
+ /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is
+ /// equivalent to proving no signed (resp. unsigned) wrap in
+ /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr`
+ /// (resp. `SCEVZeroExtendExpr`).
+ ///
template<typename ExtendOpTy>
bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step,
const Loop *L);
+ bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred, bool &Increasing);
+
+ /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
+ /// is monotonically increasing or decreasing. In the former case set
+ /// `Increasing` to true and in the latter case set `Increasing` to false.
+ ///
+ /// A predicate is said to be monotonically increasing if may go from being
+ /// false to being true as the loop iterates, but never the other way
+ /// around. A predicate is said to be monotonically decreasing if may go
+ /// from being true to being false as the loop iterates, but never the other
+ /// way around.
+ bool isMonotonicPredicate(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred, bool &Increasing);
+
+ // Return SCEV no-wrap flags that can be proven based on reasoning
+ // about how poison produced from no-wrap flags on this value
+ // (e.g. a nuw add) would trigger undefined behavior on overflow.
+ SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
+
public:
- static char ID; // Pass identification, replacement for typeid
- ScalarEvolution();
+ ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
+ DominatorTree &DT, LoopInfo &LI);
+ ~ScalarEvolution();
+ ScalarEvolution(ScalarEvolution &&Arg);
- LLVMContext &getContext() const { return F->getContext(); }
+ LLVMContext &getContext() const { return F.getContext(); }
- /// isSCEVable - Test if values of the given type are analyzable within
- /// the SCEV framework. This primarily includes integer types, and it
- /// can optionally include pointer types if the ScalarEvolution class
- /// has access to target-specific information.
+ /// Test if values of the given type are analyzable within the SCEV
+ /// framework. This primarily includes integer types, and it can optionally
+ /// include pointer types if the ScalarEvolution class has access to
+ /// target-specific information.
bool isSCEVable(Type *Ty) const;
- /// getTypeSizeInBits - Return the size in bits of the specified type,
- /// for which isSCEVable must return true.
+ /// Return the size in bits of the specified type, for which isSCEVable must
+ /// return true.
uint64_t getTypeSizeInBits(Type *Ty) const;
- /// getEffectiveSCEVType - Return a type with the same bitwidth as
- /// the given type and which represents how SCEV will treat the given
- /// type, for which isSCEVable must return true. For pointer types,
- /// this is the pointer-sized integer type.
+ /// Return a type with the same bitwidth as the given type and which
+ /// represents how SCEV will treat the given type, for which isSCEVable must
+ /// return true. For pointer types, this is the pointer-sized integer type.
Type *getEffectiveSCEVType(Type *Ty) const;
- /// getSCEV - Return a SCEV expression for the full generality of the
- /// specified expression.
+ /// Return a SCEV expression for the full generality of the specified
+ /// expression.
const SCEV *getSCEV(Value *V);
const SCEV *getConstant(ConstantInt *V);
@@ -615,35 +832,24 @@ namespace llvm {
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
- SmallVector<const SCEV *, 2> Ops;
- Ops.push_back(LHS);
- Ops.push_back(RHS);
+ SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
return getAddExpr(Ops, Flags);
}
const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
- SmallVector<const SCEV *, 3> Ops;
- Ops.push_back(Op0);
- Ops.push_back(Op1);
- Ops.push_back(Op2);
+ SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
return getAddExpr(Ops, Flags);
}
const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
- SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap)
- {
- SmallVector<const SCEV *, 2> Ops;
- Ops.push_back(LHS);
- Ops.push_back(RHS);
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+ SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
return getMulExpr(Ops, Flags);
}
const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
- SmallVector<const SCEV *, 3> Ops;
- Ops.push_back(Op0);
- Ops.push_back(Op1);
- Ops.push_back(Op2);
+ SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
return getMulExpr(Ops, Flags);
}
const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
@@ -675,81 +881,80 @@ namespace llvm {
const SCEV *getUnknown(Value *V);
const SCEV *getCouldNotCompute();
- /// getSizeOfExpr - Return an expression for sizeof AllocTy that is type
- /// IntTy
+ /// \brief Return a SCEV for the constant 0 of a specific type.
+ const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); }
+
+ /// \brief Return a SCEV for the constant 1 of a specific type.
+ const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); }
+
+ /// Return an expression for sizeof AllocTy that is type IntTy
///
const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
- /// getOffsetOfExpr - Return an expression for offsetof on the given field
- /// with type IntTy
+ /// Return an expression for offsetof on the given field with type IntTy
///
const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo);
- /// getNegativeSCEV - Return the SCEV object corresponding to -V.
+ /// Return the SCEV object corresponding to -V.
///
- const SCEV *getNegativeSCEV(const SCEV *V);
+ const SCEV *getNegativeSCEV(const SCEV *V,
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
- /// getNotSCEV - Return the SCEV object corresponding to ~V.
+ /// Return the SCEV object corresponding to ~V.
///
const SCEV *getNotSCEV(const SCEV *V);
- /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
+ /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
- /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
- /// of the input value to the specified type. If the type must be
- /// extended, it is zero extended.
+ /// Return a SCEV corresponding to a conversion of the input value to the
+ /// specified type. If the type must be extended, it is zero extended.
const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
- /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion
- /// of the input value to the specified type. If the type must be
- /// extended, it is sign extended.
+ /// Return a SCEV corresponding to a conversion of the input value to the
+ /// specified type. If the type must be extended, it is sign extended.
const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
- /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of
- /// the input value to the specified type. If the type must be extended,
- /// it is zero extended. The conversion must not be narrowing.
+ /// Return a SCEV corresponding to a conversion of the input value to the
+ /// specified type. If the type must be extended, it is zero extended. The
+ /// conversion must not be narrowing.
const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty);
- /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of
- /// the input value to the specified type. If the type must be extended,
- /// it is sign extended. The conversion must not be narrowing.
+ /// Return a SCEV corresponding to a conversion of the input value to the
+ /// specified type. If the type must be extended, it is sign extended. The
+ /// conversion must not be narrowing.
const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty);
- /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
- /// the input value to the specified type. If the type must be extended,
- /// it is extended with unspecified bits. The conversion must not be
- /// narrowing.
+ /// Return a SCEV corresponding to a conversion of the input value to the
+ /// specified type. If the type must be extended, it is extended with
+ /// unspecified bits. The conversion must not be narrowing.
const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty);
- /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
- /// input value to the specified type. The conversion must not be
- /// widening.
+ /// Return a SCEV corresponding to a conversion of the input value to the
+ /// specified type. The conversion must not be widening.
const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty);
- /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
- /// the types using zero-extension, and then perform a umax operation
- /// with them.
+ /// Promote the operands to the wider of the types using zero-extension, and
+ /// then perform a umax operation with them.
const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS);
- /// getUMinFromMismatchedTypes - Promote the operands to the wider of
- /// the types using zero-extension, and then perform a umin operation
- /// with them.
+ /// Promote the operands to the wider of the types using zero-extension, and
+ /// then perform a umin operation with them.
const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS,
const SCEV *RHS);
- /// getPointerBase - Transitively follow the chain of pointer-type operands
- /// until reaching a SCEV that does not have a single pointer operand. This
- /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
- /// but corner cases do exist.
+ /// Transitively follow the chain of pointer-type operands until reaching a
+ /// SCEV that does not have a single pointer operand. This returns a
+ /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner
+ /// cases do exist.
const SCEV *getPointerBase(const SCEV *V);
- /// getSCEVAtScope - Return a SCEV expression for the specified value
- /// at the specified scope in the program. The L value specifies a loop
- /// nest to evaluate the expression at, where null is the top-level or a
- /// specified loop is immediately inside of the loop.
+ /// Return a SCEV expression for the specified value at the specified scope
+ /// in the program. The L value specifies a loop nest to evaluate the
+ /// expression at, where null is the top-level or a specified loop is
+ /// immediately inside of the loop.
///
/// This method can be used to compute the exit value for a variable defined
/// in a loop by querying what the value will hold in the parent loop.
@@ -758,19 +963,17 @@ namespace llvm {
/// original value V is returned.
const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L);
- /// getSCEVAtScope - This is a convenience function which does
- /// getSCEVAtScope(getSCEV(V), L).
+ /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L).
const SCEV *getSCEVAtScope(Value *V, const Loop *L);
- /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
- /// by a conditional between LHS and RHS. This is used to help avoid max
- /// expressions in loop trip counts, and to eliminate casts.
+ /// Test whether entry to the loop is protected by a conditional between LHS
+ /// and RHS. This is used to help avoid max expressions in loop trip
+ /// counts, and to eliminate casts.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
- /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
- /// protected by a conditional between LHS and RHS. This is used to
- /// to eliminate casts.
+ /// Test whether the backedge of the loop is protected by a conditional
+ /// between LHS and RHS. This is used to to eliminate casts.
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
@@ -781,13 +984,13 @@ namespace llvm {
/// the single exiting block passed to it. See that routine for details.
unsigned getSmallConstantTripCount(Loop *L);
- /// getSmallConstantTripCount - Returns the maximum trip count of this loop
- /// as a normal unsigned value. Returns 0 if the trip count is unknown or
- /// not constant. This "trip count" assumes that control exits via
- /// ExitingBlock. More precisely, it is the number of times that control may
- /// reach ExitingBlock before taking the branch. For loops with multiple
- /// exits, it may not be the number times that the loop header executes if
- /// the loop exits prematurely via another branch.
+ /// Returns the maximum trip count of this loop as a normal unsigned
+ /// value. Returns 0 if the trip count is unknown or not constant. This
+ /// "trip count" assumes that control exits via ExitingBlock. More
+ /// precisely, it is the number of times that control may reach ExitingBlock
+ /// before taking the branch. For loops with multiple exits, it may not be
+ /// the number times that the loop header executes if the loop exits
+ /// prematurely via another branch.
unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
/// \brief Returns the largest constant divisor of the trip count of the
@@ -798,25 +1001,25 @@ namespace llvm {
/// the single exiting block passed to it. See that routine for details.
unsigned getSmallConstantTripMultiple(Loop *L);
- /// getSmallConstantTripMultiple - Returns the largest constant divisor of
- /// the trip count of this loop as a normal unsigned value, if
- /// possible. This means that the actual trip count is always a multiple of
- /// the returned value (don't forget the trip count could very well be zero
- /// as well!). As explained in the comments for getSmallConstantTripCount,
- /// this assumes that control exits the loop via ExitingBlock.
+ /// Returns the largest constant divisor of the trip count of this loop as a
+ /// normal unsigned value, if possible. This means that the actual trip
+ /// count is always a multiple of the returned value (don't forget the trip
+ /// count could very well be zero as well!). As explained in the comments
+ /// for getSmallConstantTripCount, this assumes that control exits the loop
+ /// via ExitingBlock.
unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
- // getExitCount - Get the expression for the number of loop iterations for
- // which this loop is guaranteed not to exit via ExitingBlock. Otherwise
- // return SCEVCouldNotCompute.
+ /// Get the expression for the number of loop iterations for which this loop
+ /// is guaranteed not to exit via ExitingBlock. Otherwise return
+ /// SCEVCouldNotCompute.
const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock);
- /// getBackedgeTakenCount - If the specified loop has a predictable
- /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
- /// object. The backedge-taken count is the number of times the loop header
- /// will be branched to from within the loop. This is one less than the
- /// trip count of the loop, since it doesn't count the first iteration,
- /// when the header is branched to from outside the loop.
+ /// If the specified loop has a predictable backedge-taken count, return it,
+ /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count
+ /// is the number of times the loop header will be branched to from within
+ /// the loop. This is one less than the trip count of the loop, since it
+ /// doesn't count the first iteration, when the header is branched to from
+ /// outside the loop.
///
/// Note that it is not valid to call this method on a loop without a
/// loop-invariant backedge-taken count (see
@@ -824,24 +1027,23 @@ namespace llvm {
///
const SCEV *getBackedgeTakenCount(const Loop *L);
- /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
- /// return the least SCEV value that is known never to be less than the
- /// actual backedge taken count.
+ /// Similar to getBackedgeTakenCount, except return the least SCEV value
+ /// that is known never to be less than the actual backedge taken count.
const SCEV *getMaxBackedgeTakenCount(const Loop *L);
- /// hasLoopInvariantBackedgeTakenCount - Return true if the specified loop
- /// has an analyzable loop-invariant backedge-taken count.
+ /// Return true if the specified loop has an analyzable loop-invariant
+ /// backedge-taken count.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
- /// forgetLoop - This method should be called by the client when it has
- /// changed a loop in a way that may effect ScalarEvolution's ability to
- /// compute a trip count, or if the loop is deleted. This call is
- /// potentially expensive for large loop bodies.
+ /// This method should be called by the client when it has changed a loop in
+ /// a way that may effect ScalarEvolution's ability to compute a trip count,
+ /// or if the loop is deleted. This call is potentially expensive for large
+ /// loop bodies.
void forgetLoop(const Loop *L);
- /// forgetValue - This method should be called by the client when it has
- /// changed a value in a way that may effect its value, or which may
- /// disconnect it from a def-use chain linking it to a loop.
+ /// This method should be called by the client when it has changed a value
+ /// in a way that may effect its value, or which may disconnect it from a
+ /// def-use chain linking it to a loop.
void forgetValue(Value *V);
/// \brief Called when the client has changed the disposition of values in
@@ -851,92 +1053,97 @@ namespace llvm {
/// recompute is simpler.
void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); }
- /// GetMinTrailingZeros - Determine the minimum number of zero bits that S
- /// is guaranteed to end in (at every loop iteration). It is, at the same
- /// time, the minimum number of times S is divisible by 2. For example,
- /// given {4,+,8} it returns 2. If S is guaranteed to be 0, it returns the
- /// bitwidth of S.
+ /// Determine the minimum number of zero bits that S is guaranteed to end in
+ /// (at every loop iteration). It is, at the same time, the minimum number
+ /// of times S is divisible by 2. For example, given {4,+,8} it returns 2.
+ /// If S is guaranteed to be 0, it returns the bitwidth of S.
uint32_t GetMinTrailingZeros(const SCEV *S);
- /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+ /// Determine the unsigned range for a particular SCEV.
///
ConstantRange getUnsignedRange(const SCEV *S) {
return getRange(S, HINT_RANGE_UNSIGNED);
}
- /// getSignedRange - Determine the signed range for a particular SCEV.
+ /// Determine the signed range for a particular SCEV.
///
ConstantRange getSignedRange(const SCEV *S) {
return getRange(S, HINT_RANGE_SIGNED);
}
- /// isKnownNegative - Test if the given expression is known to be negative.
+ /// Test if the given expression is known to be negative.
///
bool isKnownNegative(const SCEV *S);
- /// isKnownPositive - Test if the given expression is known to be positive.
+ /// Test if the given expression is known to be positive.
///
bool isKnownPositive(const SCEV *S);
- /// isKnownNonNegative - Test if the given expression is known to be
- /// non-negative.
+ /// Test if the given expression is known to be non-negative.
///
bool isKnownNonNegative(const SCEV *S);
- /// isKnownNonPositive - Test if the given expression is known to be
- /// non-positive.
+ /// Test if the given expression is known to be non-positive.
///
bool isKnownNonPositive(const SCEV *S);
- /// isKnownNonZero - Test if the given expression is known to be
- /// non-zero.
+ /// Test if the given expression is known to be non-zero.
///
bool isKnownNonZero(const SCEV *S);
- /// isKnownPredicate - Test if the given expression is known to satisfy
- /// the condition described by Pred, LHS, and RHS.
+ /// Test if the given expression is known to satisfy the condition described
+ /// by Pred, LHS, and RHS.
///
bool isKnownPredicate(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
- /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
- /// predicate Pred. Return true iff any changes were made. If the
- /// operands are provably equal or unequal, LHS and RHS are set to
- /// the same value and Pred is set to either ICMP_EQ or ICMP_NE.
+ /// Return true if the result of the predicate LHS `Pred` RHS is loop
+ /// invariant with respect to L. Set InvariantPred, InvariantLHS and
+ /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
+ /// loop invariant form of LHS `Pred` RHS.
+ bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS, const Loop *L,
+ ICmpInst::Predicate &InvariantPred,
+ const SCEV *&InvariantLHS,
+ const SCEV *&InvariantRHS);
+
+ /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
+ /// iff any changes were made. If the operands are provably equal or
+ /// unequal, LHS and RHS are set to the same value and Pred is set to either
+ /// ICMP_EQ or ICMP_NE.
///
bool SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS,
const SCEV *&RHS,
unsigned Depth = 0);
- /// getLoopDisposition - Return the "disposition" of the given SCEV with
- /// respect to the given loop.
+ /// Return the "disposition" of the given SCEV with respect to the given
+ /// loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L);
- /// isLoopInvariant - Return true if the value of the given SCEV is
- /// unchanging in the specified loop.
+ /// Return true if the value of the given SCEV is unchanging in the
+ /// specified loop.
bool isLoopInvariant(const SCEV *S, const Loop *L);
- /// hasComputableLoopEvolution - Return true if the given SCEV changes value
- /// in a known way in the specified loop. This property being true implies
- /// that the value is variant in the loop AND that we can emit an expression
- /// to compute the value of the expression at any particular loop iteration.
+ /// Return true if the given SCEV changes value in a known way in the
+ /// specified loop. This property being true implies that the value is
+ /// variant in the loop AND that we can emit an expression to compute the
+ /// value of the expression at any particular loop iteration.
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L);
- /// getLoopDisposition - Return the "disposition" of the given SCEV with
- /// respect to the given block.
+ /// Return the "disposition" of the given SCEV with respect to the given
+ /// block.
BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB);
- /// dominates - Return true if elements that makes up the given SCEV
- /// dominate the specified basic block.
+ /// Return true if elements that makes up the given SCEV dominate the
+ /// specified basic block.
bool dominates(const SCEV *S, const BasicBlock *BB);
- /// properlyDominates - Return true if elements that makes up the given SCEV
- /// properly dominate the specified basic block.
+ /// Return true if elements that makes up the given SCEV properly dominate
+ /// the specified basic block.
bool properlyDominates(const SCEV *S, const BasicBlock *BB);
- /// hasOperand - Test whether the given SCEV has Op as a direct or
- /// indirect operand.
+ /// Test whether the given SCEV has Op as a direct or indirect operand.
bool hasOperand(const SCEV *S, const SCEV *Op) const;
/// Return the size of an element read or written by Inst.
@@ -948,11 +1155,8 @@ namespace llvm {
SmallVectorImpl<const SCEV *> &Sizes,
const SCEV *ElementSize) const;
- bool runOnFunction(Function &F) override;
- void releaseMemory() override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void print(raw_ostream &OS, const Module* = nullptr) const override;
- void verifyAnalysis() const override;
+ void print(raw_ostream &OS) const;
+ void verify() const;
/// Collect parametric terms occurring in step expressions.
void collectParametricTerms(const SCEV *Expr,
@@ -1034,6 +1238,18 @@ namespace llvm {
SmallVectorImpl<const SCEV *> &Sizes,
const SCEV *ElementSize);
+ /// Return the DataLayout associated with the module this SCEV instance is
+ /// operating on.
+ const DataLayout &getDataLayout() const {
+ return F.getParent()->getDataLayout();
+ }
+
+ const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS,
+ const SCEVConstant *RHS);
+
+ /// Re-writes the SCEV according to the Predicates in \p Preds.
+ const SCEV *rewriteUsingPredicate(const SCEV *Scev, SCEVUnionPredicate &A);
+
private:
/// Compute the backedge taken count knowing the interval difference, the
/// stride and presence of the equality in the comparison.
@@ -1054,13 +1270,112 @@ namespace llvm {
private:
FoldingSet<SCEV> UniqueSCEVs;
+ FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
- /// FirstUnknown - The head of a linked list of all SCEVUnknown
- /// values that have been allocated. This is used by releaseMemory
- /// to locate them all and call their destructors.
+ /// The head of a linked list of all SCEVUnknown values that have been
+ /// allocated. This is used by releaseMemory to locate them all and call
+ /// their destructors.
SCEVUnknown *FirstUnknown;
};
+
+ /// \brief Analysis pass that exposes the \c ScalarEvolution for a function.
+ class ScalarEvolutionAnalysis {
+ static char PassID;
+
+ public:
+ typedef ScalarEvolution Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ /// \brief Provide a name for the analysis for debugging and logging.
+ static StringRef name() { return "ScalarEvolutionAnalysis"; }
+
+ ScalarEvolution run(Function &F, AnalysisManager<Function> *AM);
+ };
+
+ /// \brief Printer pass for the \c ScalarEvolutionAnalysis results.
+ class ScalarEvolutionPrinterPass {
+ raw_ostream &OS;
+
+ public:
+ explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+
+ static StringRef name() { return "ScalarEvolutionPrinterPass"; }
+ };
+
+ class ScalarEvolutionWrapperPass : public FunctionPass {
+ std::unique_ptr<ScalarEvolution> SE;
+
+ public:
+ static char ID;
+
+ ScalarEvolutionWrapperPass();
+
+ ScalarEvolution &getSE() { return *SE; }
+ const ScalarEvolution &getSE() const { return *SE; }
+
+ bool runOnFunction(Function &F) override;
+ void releaseMemory() override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void print(raw_ostream &OS, const Module * = nullptr) const override;
+ void verifyAnalysis() const override;
+ };
+
+ /// An interface layer with SCEV used to manage how we see SCEV expressions
+ /// for values in the context of existing predicates. We can add new
+ /// predicates, but we cannot remove them.
+ ///
+ /// This layer has multiple purposes:
+ /// - provides a simple interface for SCEV versioning.
+ /// - guarantees that the order of transformations applied on a SCEV
+ /// expression for a single Value is consistent across two different
+ /// getSCEV calls. This means that, for example, once we've obtained
+ /// an AddRec expression for a certain value through expression
+ /// rewriting, we will continue to get an AddRec expression for that
+ /// Value.
+ /// - lowers the number of expression rewrites.
+ class PredicatedScalarEvolution {
+ public:
+ PredicatedScalarEvolution(ScalarEvolution &SE);
+ const SCEVUnionPredicate &getUnionPredicate() const;
+ /// \brief Returns the SCEV expression of V, in the context of the current
+ /// SCEV predicate.
+ /// The order of transformations applied on the expression of V returned
+ /// by ScalarEvolution is guaranteed to be preserved, even when adding new
+ /// predicates.
+ const SCEV *getSCEV(Value *V);
+ /// \brief Adds a new predicate.
+ void addPredicate(const SCEVPredicate &Pred);
+ /// \brief Returns the ScalarEvolution analysis used.
+ ScalarEvolution *getSE() const { return &SE; }
+
+ private:
+ /// \brief Increments the version number of the predicate.
+ /// This needs to be called every time the SCEV predicate changes.
+ void updateGeneration();
+ /// Holds a SCEV and the version number of the SCEV predicate used to
+ /// perform the rewrite of the expression.
+ typedef std::pair<unsigned, const SCEV *> RewriteEntry;
+ /// Maps a SCEV to the rewrite result of that SCEV at a certain version
+ /// number. If this number doesn't match the current Generation, we will
+ /// need to do a rewrite. To preserve the transformation order of previous
+ /// rewrites, we will rewrite the previous result instead of the original
+ /// SCEV.
+ DenseMap<const SCEV *, RewriteEntry> RewriteMap;
+ /// The ScalarEvolution analysis.
+ ScalarEvolution &SE;
+ /// The SCEVPredicate that forms our context. We will rewrite all
+ /// expressions assuming that this predicate true.
+ SCEVUnionPredicate Preds;
+ /// Marks the version of the SCEV predicate used. When rewriting a SCEV
+ /// expression we mark it with the version of the predicate. We use this to
+ /// figure out if the predicate has changed from the last rewrite of the
+ /// SCEV. If so, we need to perform a new rewrite.
+ unsigned Generation;
+ };
}
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
new file mode 100644
index 0000000..7bbbf55
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -0,0 +1,79 @@
+//===- ScalarEvolutionAliasAnalysis.h - SCEV-based AA -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a SCEV-based alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H
+#define LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple alias analysis implementation that uses ScalarEvolution to answer
+/// queries.
+class SCEVAAResult : public AAResultBase<SCEVAAResult> {
+ ScalarEvolution &SE;
+
+public:
+ explicit SCEVAAResult(const TargetLibraryInfo &TLI, ScalarEvolution &SE)
+ : AAResultBase(TLI), SE(SE) {}
+ SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {}
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+private:
+ Value *GetBaseValue(const SCEV *S);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class SCEVAA {
+public:
+ typedef SCEVAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ SCEVAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "SCEVAA"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the SCEVAAResult object.
+class SCEVAAWrapperPass : public FunctionPass {
+ std::unique_ptr<SCEVAAResult> Result;
+
+public:
+ static char ID;
+
+ SCEVAAWrapperPass();
+
+ SCEVAAResult &getResult() { return *Result; }
+ const SCEVAAResult &getResult() const { return *Result; }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+/// Creates an instance of \c SCEVAAWrapperPass.
+FunctionPass *createSCEVAAWrapperPass();
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
index 8ec2078..b993916 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -117,9 +117,14 @@ namespace llvm {
/// \brief Return true for expressions that may incur non-trivial cost to
/// evaluate at runtime.
- bool isHighCostExpansion(const SCEV *Expr, Loop *L) {
+ ///
+ /// At is an optional parameter which specifies point in code where user is
+ /// going to expand this expression. Sometimes this knowledge can lead to a
+ /// more accurate cost estimation.
+ bool isHighCostExpansion(const SCEV *Expr, Loop *L,
+ const Instruction *At = nullptr) {
SmallPtrSet<const SCEV *, 8> Processed;
- return isHighCostExpansionHelper(Expr, L, Processed);
+ return isHighCostExpansionHelper(Expr, L, At, Processed);
}
/// \brief This method returns the canonical induction variable of the
@@ -146,6 +151,22 @@ namespace llvm {
/// block.
Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
+ /// \brief Generates a code sequence that evaluates this predicate.
+ /// The inserted instructions will be at position \p Loc.
+ /// The result will be of type i1 and will have a value of 0 when the
+ /// predicate is false and 1 otherwise.
+ Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc);
+
+ /// \brief A specialized variant of expandCodeForPredicate, handling the
+ /// case when we are expanding code for a SCEVEqualPredicate.
+ Value *expandEqualPredicate(const SCEVEqualPredicate *Pred,
+ Instruction *Loc);
+
+ /// \brief A specialized variant of expandCodeForPredicate, handling the
+ /// case when we are expanding code for a SCEVUnionPredicate.
+ Value *expandUnionPredicate(const SCEVUnionPredicate *Pred,
+ Instruction *Loc);
+
/// \brief Set the current IV increment loop and position.
void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
assert(!CanonicalMode &&
@@ -193,11 +214,22 @@ namespace llvm {
void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
+ /// \brief Try to find LLVM IR value for S available at the point At.
+ ///
+ /// L is a hint which tells in which loop to look for the suitable value.
+ /// On success return value which is equivalent to the expanded S at point
+ /// At. Return nullptr if value was not found.
+ ///
+ /// Note that this function does not perform an exhaustive search. I.e if it
+ /// didn't find any value it does not mean that there is no such value.
+ Value *findExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
+
private:
LLVMContext &getContext() const { return SE.getContext(); }
/// \brief Recursive helper function for isHighCostExpansion.
bool isHighCostExpansionHelper(const SCEV *S, Loop *L,
+ const Instruction *At,
SmallPtrSetImpl<const SCEV *> &Processed);
/// \brief Insert the specified binary operator, doing a small amount
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index da24de2..1699268 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -43,6 +43,7 @@ namespace llvm {
SCEV(ID, scConstant), V(v) {}
public:
ConstantInt *getValue() const { return V; }
+ const APInt &getAPInt() const { return getValue()->getValue(); }
Type *getType() const { return V->getType(); }
@@ -404,7 +405,7 @@ namespace llvm {
/// value, and only represent it as its LLVM Value. This is the "bottom"
/// value for the analysis.
///
- class SCEVUnknown : public SCEV, private CallbackVH {
+ class SCEVUnknown final : public SCEV, private CallbackVH {
friend class ScalarEvolution;
// Implement CallbackVH.
@@ -553,64 +554,56 @@ namespace llvm {
T.visitAll(Root);
}
- typedef DenseMap<const Value*, Value*> ValueToValueMap;
-
- /// The SCEVParameterRewriter takes a scalar evolution expression and updates
- /// the SCEVUnknown components following the Map (Value -> Value).
- struct SCEVParameterRewriter
- : public SCEVVisitor<SCEVParameterRewriter, const SCEV*> {
+ /// Recursively visits a SCEV expression and re-writes it.
+ template<typename SC>
+ class SCEVRewriteVisitor : public SCEVVisitor<SC, const SCEV *> {
+ protected:
+ ScalarEvolution &SE;
public:
- static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
- ValueToValueMap &Map,
- bool InterpretConsts = false) {
- SCEVParameterRewriter Rewriter(SE, Map, InterpretConsts);
- return Rewriter.visit(Scev);
- }
-
- SCEVParameterRewriter(ScalarEvolution &S, ValueToValueMap &M, bool C)
- : SE(S), Map(M), InterpretConsts(C) {}
+ SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {}
const SCEV *visitConstant(const SCEVConstant *Constant) {
return Constant;
}
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
- const SCEV *Operand = visit(Expr->getOperand());
+ const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
return SE.getTruncateExpr(Operand, Expr->getType());
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
- const SCEV *Operand = visit(Expr->getOperand());
+ const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
return SE.getZeroExtendExpr(Operand, Expr->getType());
}
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
- const SCEV *Operand = visit(Expr->getOperand());
+ const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
return SE.getSignExtendExpr(Operand, Expr->getType());
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
+ Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
return SE.getAddExpr(Operands);
}
const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
+ Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
return SE.getMulExpr(Operands);
}
const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
- return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
+ return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()),
+ ((SC*)this)->visit(Expr->getRHS()));
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
+ Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
return SE.getAddRecExpr(Operands, Expr->getLoop(),
Expr->getNoWrapFlags());
}
@@ -618,18 +611,43 @@ namespace llvm {
const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
+ Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
return SE.getSMaxExpr(Operands);
}
const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
+ Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
return SE.getUMaxExpr(Operands);
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ return Expr;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return Expr;
+ }
+ };
+
+ typedef DenseMap<const Value*, Value*> ValueToValueMap;
+
+ /// The SCEVParameterRewriter takes a scalar evolution expression and updates
+ /// the SCEVUnknown components following the Map (Value -> Value).
+ class SCEVParameterRewriter : public SCEVRewriteVisitor<SCEVParameterRewriter> {
+ public:
+ static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
+ ValueToValueMap &Map,
+ bool InterpretConsts = false) {
+ SCEVParameterRewriter Rewriter(SE, Map, InterpretConsts);
+ return Rewriter.visit(Scev);
+ }
+
+ SCEVParameterRewriter(ScalarEvolution &SE, ValueToValueMap &M, bool C)
+ : SCEVRewriteVisitor(SE), Map(M), InterpretConsts(C) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
Value *V = Expr->getValue();
if (Map.count(V)) {
Value *NV = Map[V];
@@ -640,68 +658,26 @@ namespace llvm {
return Expr;
}
- const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
- return Expr;
- }
-
private:
- ScalarEvolution &SE;
ValueToValueMap &Map;
bool InterpretConsts;
};
typedef DenseMap<const Loop*, const SCEV*> LoopToScevMapT;
- /// The SCEVApplyRewriter takes a scalar evolution expression and applies
+ /// The SCEVLoopAddRecRewriter takes a scalar evolution expression and applies
/// the Map (Loop -> SCEV) to all AddRecExprs.
- struct SCEVApplyRewriter
- : public SCEVVisitor<SCEVApplyRewriter, const SCEV*> {
+ class SCEVLoopAddRecRewriter
+ : public SCEVRewriteVisitor<SCEVLoopAddRecRewriter> {
public:
static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map,
ScalarEvolution &SE) {
- SCEVApplyRewriter Rewriter(SE, Map);
+ SCEVLoopAddRecRewriter Rewriter(SE, Map);
return Rewriter.visit(Scev);
}
- SCEVApplyRewriter(ScalarEvolution &S, LoopToScevMapT &M)
- : SE(S), Map(M) {}
-
- const SCEV *visitConstant(const SCEVConstant *Constant) {
- return Constant;
- }
-
- const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
- const SCEV *Operand = visit(Expr->getOperand());
- return SE.getTruncateExpr(Operand, Expr->getType());
- }
-
- const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
- const SCEV *Operand = visit(Expr->getOperand());
- return SE.getZeroExtendExpr(Operand, Expr->getType());
- }
-
- const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
- const SCEV *Operand = visit(Expr->getOperand());
- return SE.getSignExtendExpr(Operand, Expr->getType());
- }
-
- const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
- SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
- return SE.getAddExpr(Operands);
- }
-
- const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
- SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
- return SE.getMulExpr(Operands);
- }
-
- const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
- return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
- }
+ SCEVLoopAddRecRewriter(ScalarEvolution &SE, LoopToScevMapT &M)
+ : SCEVRewriteVisitor(SE), Map(M) {}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
@@ -714,41 +690,18 @@ namespace llvm {
if (0 == Map.count(L))
return Res;
- const SCEVAddRecExpr *Rec = (const SCEVAddRecExpr *) Res;
+ const SCEVAddRecExpr *Rec = cast<SCEVAddRecExpr>(Res);
return Rec->evaluateAtIteration(Map[L], SE);
}
- const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
- SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
- return SE.getSMaxExpr(Operands);
- }
-
- const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
- SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(visit(Expr->getOperand(i)));
- return SE.getUMaxExpr(Operands);
- }
-
- const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- return Expr;
- }
-
- const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
- return Expr;
- }
-
private:
- ScalarEvolution &SE;
LoopToScevMapT &Map;
};
/// Applies the Map (Loop -> SCEV) to the given Scev.
static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map,
ScalarEvolution &SE) {
- return SCEVApplyRewriter::rewrite(Scev, Map, SE);
+ return SCEVLoopAddRecRewriter::rewrite(Scev, Map, SE);
}
}
diff --git a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
new file mode 100644
index 0000000..1755616
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -0,0 +1,92 @@
+//===- ScopedNoAliasAA.h - Scoped No-Alias Alias Analysis -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a metadata-based scoped no-alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCOPEDNOALIASAA_H
+#define LLVM_ANALYSIS_SCOPEDNOALIASAA_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result which uses scoped-noalias metadata to answer queries.
+class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> {
+ friend AAResultBase<ScopedNoAliasAAResult>;
+
+public:
+ explicit ScopedNoAliasAAResult(const TargetLibraryInfo &TLI)
+ : AAResultBase(TLI) {}
+ ScopedNoAliasAAResult(ScopedNoAliasAAResult &&Arg)
+ : AAResultBase(std::move(Arg)) {}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+private:
+ bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
+ void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class ScopedNoAliasAA {
+public:
+ typedef ScopedNoAliasAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ ScopedNoAliasAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "ScopedNoAliasAA"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the ScopedNoAliasAAResult object.
+class ScopedNoAliasAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<ScopedNoAliasAAResult> Result;
+
+public:
+ static char ID;
+
+ ScopedNoAliasAAWrapperPass();
+
+ ScopedNoAliasAAResult &getResult() { return *Result; }
+ const ScopedNoAliasAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createScopedNoAliasAAWrapperPass - This pass implements metadata-based
+// scoped noalias analysis.
+//
+ImmutablePass *createScopedNoAliasAAWrapperPass();
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/SparsePropagation.h b/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
index 9ccae5f..2c7f5dd 100644
--- a/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -21,19 +21,19 @@
#include <vector>
namespace llvm {
- class Value;
- class Constant;
- class Argument;
- class Instruction;
- class PHINode;
- class TerminatorInst;
- class BasicBlock;
- class Function;
- class SparseSolver;
- class raw_ostream;
-
- template<typename T> class SmallVectorImpl;
-
+class Value;
+class Constant;
+class Argument;
+class Instruction;
+class PHINode;
+class TerminatorInst;
+class BasicBlock;
+class Function;
+class SparseSolver;
+class raw_ostream;
+
+template <typename T> class SmallVectorImpl;
+
/// AbstractLatticeFunction - This class is implemented by the dataflow instance
/// to specify what the lattice values are and how they handle merges etc.
/// This gives the client the power to compute lattice values from instructions,
@@ -44,8 +44,10 @@ namespace llvm {
class AbstractLatticeFunction {
public:
typedef void *LatticeVal;
+
private:
LatticeVal UndefVal, OverdefinedVal, UntrackedVal;
+
public:
AbstractLatticeFunction(LatticeVal undefVal, LatticeVal overdefinedVal,
LatticeVal untrackedVal) {
@@ -54,18 +56,16 @@ public:
UntrackedVal = untrackedVal;
}
virtual ~AbstractLatticeFunction();
-
+
LatticeVal getUndefVal() const { return UndefVal; }
LatticeVal getOverdefinedVal() const { return OverdefinedVal; }
LatticeVal getUntrackedVal() const { return UntrackedVal; }
-
+
/// IsUntrackedValue - If the specified Value is something that is obviously
/// uninteresting to the analysis (and would always return UntrackedVal),
/// this function can return true to avoid pointless work.
- virtual bool IsUntrackedValue(Value *V) {
- return false;
- }
-
+ virtual bool IsUntrackedValue(Value *V) { return false; }
+
/// ComputeConstant - Given a constant value, compute and return a lattice
/// value corresponding to the specified constant.
virtual LatticeVal ComputeConstant(Constant *C) {
@@ -74,10 +74,8 @@ public:
/// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is
/// one that the we want to handle through ComputeInstructionState.
- virtual bool IsSpecialCasedPHI(PHINode *PN) {
- return false;
- }
-
+ virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; }
+
/// GetConstant - If the specified lattice value is representable as an LLVM
/// constant value, return it. Otherwise return null. The returned value
/// must be in the same LLVM type as Val.
@@ -90,42 +88,41 @@ public:
virtual LatticeVal ComputeArgument(Argument *I) {
return getOverdefinedVal(); // always safe
}
-
+
/// MergeValues - Compute and return the merge of the two specified lattice
/// values. Merging should only move one direction down the lattice to
/// guarantee convergence (toward overdefined).
virtual LatticeVal MergeValues(LatticeVal X, LatticeVal Y) {
return getOverdefinedVal(); // always safe, never useful.
}
-
+
/// ComputeInstructionState - Given an instruction and a vector of its operand
/// values, compute the result value of the instruction.
virtual LatticeVal ComputeInstructionState(Instruction &I, SparseSolver &SS) {
return getOverdefinedVal(); // always safe, never useful.
}
-
+
/// PrintValue - Render the specified lattice value to the specified stream.
virtual void PrintValue(LatticeVal V, raw_ostream &OS);
};
-
/// SparseSolver - This class is a general purpose solver for Sparse Conditional
/// Propagation with a programmable lattice function.
///
class SparseSolver {
typedef AbstractLatticeFunction::LatticeVal LatticeVal;
-
+
/// LatticeFunc - This is the object that knows the lattice and how to do
/// compute transfer functions.
AbstractLatticeFunction *LatticeFunc;
-
- DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
- SmallPtrSet<BasicBlock*, 16> BBExecutable; // The bbs that are executable.
-
- std::vector<Instruction*> InstWorkList; // Worklist of insts to process.
-
- std::vector<BasicBlock*> BBWorkList; // The BasicBlock work list
-
+
+ DenseMap<Value *, LatticeVal> ValueState; // The state each value is in.
+ SmallPtrSet<BasicBlock *, 16> BBExecutable; // The bbs that are executable.
+
+ std::vector<Instruction *> InstWorkList; // Worklist of insts to process.
+
+ std::vector<BasicBlock *> BBWorkList; // The BasicBlock work list
+
/// KnownFeasibleEdges - Entries in this set are edges which have already had
/// PHI nodes retriggered.
typedef std::pair<BasicBlock*,BasicBlock*> Edge;
@@ -133,17 +130,16 @@ class SparseSolver {
SparseSolver(const SparseSolver&) = delete;
void operator=(const SparseSolver&) = delete;
+
public:
explicit SparseSolver(AbstractLatticeFunction *Lattice)
- : LatticeFunc(Lattice) {}
- ~SparseSolver() {
- delete LatticeFunc;
- }
-
+ : LatticeFunc(Lattice) {}
+ ~SparseSolver() { delete LatticeFunc; }
+
/// Solve - Solve for constants and executable blocks.
///
void Solve(Function &F);
-
+
void Print(Function &F, raw_ostream &OS) const;
/// getLatticeState - Return the LatticeVal object that corresponds to the
@@ -153,7 +149,7 @@ public:
DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal();
}
-
+
/// getOrInitValueState - Return the LatticeVal object that corresponds to the
/// value, initializing the value's state if it hasn't been entered into the
/// map yet. This function is necessary because not all values should start
@@ -161,7 +157,7 @@ public:
/// constants should be marked as constants.
///
LatticeVal getOrInitValueState(Value *V);
-
+
/// isEdgeFeasible - Return true if the control flow edge from the 'From'
/// basic block to the 'To' basic block is currently feasible. If
/// AggressiveUndef is true, then this treats values with unknown lattice
@@ -176,29 +172,28 @@ public:
bool isBlockExecutable(BasicBlock *BB) const {
return BBExecutable.count(BB);
}
-
+
private:
/// UpdateState - When the state for some instruction is potentially updated,
/// this function notices and adds I to the worklist if needed.
void UpdateState(Instruction &Inst, LatticeVal V);
-
+
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
void MarkBlockExecutable(BasicBlock *BB);
-
+
/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
/// work list if it is not already executable.
void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest);
-
+
/// getFeasibleSuccessors - Return a vector of booleans to indicate which
/// successors are reachable from a given terminator instruction.
void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs,
bool AggressiveUndef);
-
+
void visitInst(Instruction &I);
void visitPHINode(PHINode &I);
void visitTerminatorInst(TerminatorInst &TI);
-
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 1c1fdfe..7798e3c 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -27,6 +27,86 @@
#define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr,
#endif
+/// void *new(unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_int)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPAXI@Z")
+
+/// void *new(unsigned int, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_int_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPAXIABUnothrow_t@std@@@Z")
+
+/// void *new(unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_K@Z")
+
+/// void *new(unsigned long long, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_KAEBUnothrow_t@std@@@Z")
+
+/// void operator delete(void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPAX@Z")
+
+/// void operator delete(void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXABUnothrow_t@std@@@Z")
+
+/// void operator delete(void*, unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_int)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXI@Z")
+
+/// void operator delete(void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX@Z")
+
+/// void operator delete(void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAXAEBUnothrow_t@std@@@Z")
+
+/// void operator delete(void*, unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_longlong)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX_K@Z")
+
+/// void *new[](unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXI@Z")
+
+/// void *new[](unsigned int, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXIABUnothrow_t@std@@@Z")
+
+/// void *new[](unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_K@Z")
+
+/// void *new[](unsigned long long, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z")
+
+/// void operator delete[](void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAX@Z")
+
+/// void operator delete[](void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXABUnothrow_t@std@@@Z")
+
+/// void operator delete[](void*, unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_int)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXI@Z")
+
+/// void operator delete[](void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX@Z")
+
+/// void operator delete[](void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAXAEBUnothrow_t@std@@@Z")
+
+/// void operator delete[](void*, unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_longlong)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX_K@Z")
+
/// int _IO_getc(_IO_FILE * __fp);
TLI_DEFINE_ENUM_INTERNAL(under_IO_getc)
TLI_DEFINE_STRING_INTERNAL("_IO_getc")
@@ -406,6 +486,15 @@ TLI_DEFINE_STRING_INTERNAL("floorf")
/// long double floorl(long double x);
TLI_DEFINE_ENUM_INTERNAL(floorl)
TLI_DEFINE_STRING_INTERNAL("floorl")
+/// int fls(int i);
+TLI_DEFINE_ENUM_INTERNAL(fls)
+TLI_DEFINE_STRING_INTERNAL("fls")
+/// int flsl(long int i);
+TLI_DEFINE_ENUM_INTERNAL(flsl)
+TLI_DEFINE_STRING_INTERNAL("flsl")
+/// int flsll(long long int i);
+TLI_DEFINE_ENUM_INTERNAL(flsll)
+TLI_DEFINE_STRING_INTERNAL("flsll")
/// double fmax(double x, double y);
TLI_DEFINE_ENUM_INTERNAL(fmax)
TLI_DEFINE_STRING_INTERNAL("fmax")
@@ -664,6 +753,7 @@ TLI_DEFINE_STRING_INTERNAL("modff")
/// long double modfl(long double value, long double *iptr);
TLI_DEFINE_ENUM_INTERNAL(modfl)
TLI_DEFINE_STRING_INTERNAL("modfl")
+
/// double nearbyint(double x);
TLI_DEFINE_ENUM_INTERNAL(nearbyint)
TLI_DEFINE_STRING_INTERNAL("nearbyint")
diff --git a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index e0a1ee3..7becdf0 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -42,7 +42,7 @@ class PreservedAnalyses;
///
/// This class constructs tables that hold the target library information and
/// make it available. However, it is somewhat expensive to compute and only
-/// depends on the triple. So users typicaly interact with the \c
+/// depends on the triple. So users typically interact with the \c
/// TargetLibraryInfo wrapper below.
class TargetLibraryInfoImpl {
friend class TargetLibraryInfo;
@@ -201,13 +201,13 @@ public:
}
bool isFunctionVectorizable(StringRef F, unsigned VF) const {
return Impl->isFunctionVectorizable(F, VF);
- };
+ }
bool isFunctionVectorizable(StringRef F) const {
return Impl->isFunctionVectorizable(F);
- };
+ }
StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
return Impl->getVectorizedFunction(F, VF);
- };
+ }
/// \brief Tests if the function is both available and a candidate for
/// optimized code generation.
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 01f0089..3913cc3 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -42,11 +42,13 @@ class Value;
/// \brief Information about a load/store intrinsic defined by the target.
struct MemIntrinsicInfo {
MemIntrinsicInfo()
- : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0),
+ : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
NumMemRefs(0), PtrVal(nullptr) {}
bool ReadMem;
bool WriteMem;
- bool Vol;
+ /// True only if this memory operation is non-volatile, non-atomic, and
+ /// unordered. (See LoadInst/StoreInst for details on each)
+ bool IsSimple;
// Same Id is set by the target for corresponding load/store intrinsics.
unsigned short MatchingId;
int NumMemRefs;
@@ -97,11 +99,14 @@ public:
///
/// Many APIs in this interface return a cost. This enum defines the
/// fundamental values that should be used to interpret (and produce) those
- /// costs. The costs are returned as an unsigned rather than a member of this
+ /// costs. The costs are returned as an int rather than a member of this
/// enumeration because it is expected that the cost of one IR instruction
/// may have a multiplicative factor to it or otherwise won't fit directly
/// into the enum. Moreover, it is common to sum or average costs which works
/// better as simple integral values. Thus this enum only provides constants.
+ /// Also note that the returned costs are signed integers to make it natural
+ /// to add, subtract, and test with zero (a common boundary condition). It is
+ /// not expected that 2^32 is a realistic cost to be modeling at any point.
///
/// Note that these costs should usually reflect the intersection of code-size
/// cost and execution cost. A free instruction is typically one that folds
@@ -128,15 +133,15 @@ public:
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
- unsigned getOperationCost(unsigned Opcode, Type *Ty,
- Type *OpTy = nullptr) const;
+ int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
/// \brief Estimate the cost of a GEP operation when lowered.
///
/// The contract for this function is the same as \c getOperationCost except
/// that it supports an interface that provides extra information specific to
/// the GEP operation.
- unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const;
+ int getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) const;
/// \brief Estimate the cost of a function call when lowered.
///
@@ -147,31 +152,30 @@ public:
/// This is the most basic query for estimating call cost: it only knows the
/// function type and (potentially) the number of arguments at the call site.
/// The latter is only interesting for varargs function types.
- unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
+ int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
/// \brief Estimate the cost of calling a specific function when lowered.
///
/// This overload adds the ability to reason about the particular function
/// being called in the event it is a library call with special lowering.
- unsigned getCallCost(const Function *F, int NumArgs = -1) const;
+ int getCallCost(const Function *F, int NumArgs = -1) const;
/// \brief Estimate the cost of calling a specific function when lowered.
///
/// This overload allows specifying a set of candidate argument values.
- unsigned getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const;
+ int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
/// \brief Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) const;
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys) const;
/// \brief Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const;
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments) const;
/// \brief Estimate the cost of a given IR user when lowered.
///
@@ -188,7 +192,7 @@ public:
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
- unsigned getUserCost(const User *U) const;
+ int getUserCost(const User *U) const;
/// \brief Return true if branch divergence exists.
///
@@ -308,12 +312,17 @@ public:
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0) const;
- /// \brief Return true if the target works with masked instruction
- /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
- /// AVX-512 architecture will also allow masks for non-consecutive memory
- /// accesses.
- bool isLegalMaskedStore(Type *DataType, int Consecutive) const;
- bool isLegalMaskedLoad(Type *DataType, int Consecutive) const;
+ /// \brief Return true if the target supports masked load/store
+ /// AVX2 and AVX-512 targets allow masks for consecutive load and store for
+ /// 32 and 64 bit elements.
+ bool isLegalMaskedStore(Type *DataType) const;
+ bool isLegalMaskedLoad(Type *DataType) const;
+
+ /// \brief Return true if the target supports masked gather/scatter
+ /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
+ /// bits scalar type.
+ bool isLegalMaskedScatter(Type *DataType) const;
+ bool isLegalMaskedGather(Type *DataType) const;
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
@@ -350,6 +359,9 @@ public:
/// \brief Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
+ /// \brief Enable matching of interleaved access groups.
+ bool enableInterleavedAccessVectorization() const;
+
/// \brief Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
@@ -358,19 +370,19 @@ public:
/// \brief Return the expected cost of supporting the floating point operation
/// of the specified type.
- unsigned getFPOpCost(Type *Ty) const;
+ int getFPOpCost(Type *Ty) const;
/// \brief Return the expected cost of materializing for the given integer
/// immediate of the specified type.
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+ int getIntImmCost(const APInt &Imm, Type *Ty) const;
/// \brief Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
- unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
+ int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) const;
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const;
/// @}
/// \name Vector Target Information
@@ -410,43 +422,51 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF) const;
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
- unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue,
- OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const;
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds.
- unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
- Type *SubTp = nullptr) const;
+ int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
+ Type *SubTp = nullptr) const;
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc.
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
/// \return The expected cost of control-flow related instructions such as
/// Phi, Ret, Br.
- unsigned getCFInstrCost(unsigned Opcode) const;
+ int getCFInstrCost(unsigned Opcode) const;
/// \returns The expected cost of compare and select instructions.
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy = nullptr) const;
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index = -1) const;
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const;
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const;
/// \return The cost of masked Load and Store instructions.
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const;
+ int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// \return The cost of Gather or Scatter operation
+ /// \p Opcode - is a type of memory access Load or Store
+ /// \p DataTy - a vector type of the data to be loaded or stored
+ /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
+ /// \p VariableMask - true when the memory access is predicated with a mask
+ /// that is not a compile-time constant
+ /// \p Alignment - alignment of single element
+ int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ bool VariableMask, unsigned Alignment) const;
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
@@ -456,11 +476,9 @@ public:
/// load allows gaps)
/// \p Alignment is the alignment of the memory operation
/// \p AddressSpace is address space of the pointer.
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) const;
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace) const;
/// \brief Calculate the cost of performing a vector reduction.
///
@@ -475,16 +493,18 @@ public:
/// Split:
/// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef)
- unsigned getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) const;
+ int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
+
+ /// \returns The cost of Intrinsic instructions. Types analysis only.
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys) const;
- /// \returns The cost of Intrinsic instructions.
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) const;
+ /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args) const;
/// \returns The cost of Call instructions.
- unsigned getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys) const;
+ int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
/// \returns The number of pieces into which the provided type must be
/// split during legalization. Zero is returned when the answer is unknown.
@@ -497,7 +517,7 @@ public:
/// The 'IsComplex' parameter is a hint that the address computation is likely
/// to involve multiple instructions and as such unlikely to be merged into
/// the address indexing mode.
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
+ int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
/// \returns The cost, if any, of keeping values of the given types alive
/// over a callsite.
@@ -521,8 +541,8 @@ public:
/// \returns True if the two functions have compatible attributes for inlining
/// purposes.
- bool hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const;
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
/// @}
@@ -542,18 +562,18 @@ class TargetTransformInfo::Concept {
public:
virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;
- virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
- virtual unsigned getGEPCost(const Value *Ptr,
- ArrayRef<const Value *> Operands) = 0;
- virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0;
- virtual unsigned getCallCost(const Function *F, int NumArgs) = 0;
- virtual unsigned getCallCost(const Function *F,
+ virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
+ virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) = 0;
+ virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
+ virtual int getCallCost(const Function *F, int NumArgs) = 0;
+ virtual int getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) = 0;
+ virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys) = 0;
+ virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments) = 0;
- virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) = 0;
- virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) = 0;
- virtual unsigned getUserCost(const User *U) = 0;
+ virtual int getUserCost(const User *U) = 0;
virtual bool hasBranchDivergence() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
@@ -564,8 +584,10 @@ public:
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) = 0;
- virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0;
- virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0;
+ virtual bool isLegalMaskedStore(Type *DataType) = 0;
+ virtual bool isLegalMaskedLoad(Type *DataType) = 0;
+ virtual bool isLegalMaskedScatter(Type *DataType) = 0;
+ virtual bool isLegalMaskedGather(Type *DataType) = 0;
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace) = 0;
@@ -576,14 +598,15 @@ public:
virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
+ virtual bool enableInterleavedAccessVectorization() = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
- virtual unsigned getFPOpCost(Type *Ty) = 0;
- virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
- virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) = 0;
- virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) = 0;
+ virtual int getFPOpCost(Type *Ty) = 0;
+ virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
+ virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) = 0;
+ virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
@@ -592,40 +615,44 @@ public:
OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) = 0;
- virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) = 0;
- virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
- virtual unsigned getCFInstrCost(unsigned Opcode) = 0;
- virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) = 0;
- virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) = 0;
- virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) = 0;
- virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) = 0;
+ virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
+ virtual int getCFInstrCost(unsigned Opcode) = 0;
+ virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) = 0;
+ virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) = 0;
+ virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) = 0;
+ virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) = 0;
+ virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ Value *Ptr, bool VariableMask,
+ unsigned Alignment) = 0;
+ virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace) = 0;
- virtual unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) = 0;
- virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) = 0;
- virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) = 0;
- virtual unsigned getCallInstrCost(Function *F, Type *RetTy,
+ virtual int getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) = 0;
+ virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys) = 0;
+ virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args) = 0;
+ virtual int getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
- virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
+ virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) = 0;
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) = 0;
- virtual bool hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const = 0;
+ virtual bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const = 0;
};
template <typename T>
@@ -640,32 +667,32 @@ public:
return Impl.getDataLayout();
}
- unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
+ int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
return Impl.getOperationCost(Opcode, Ty, OpTy);
}
- unsigned getGEPCost(const Value *Ptr,
- ArrayRef<const Value *> Operands) override {
- return Impl.getGEPCost(Ptr, Operands);
+ int getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) override {
+ return Impl.getGEPCost(PointeeType, Ptr, Operands);
}
- unsigned getCallCost(FunctionType *FTy, int NumArgs) override {
+ int getCallCost(FunctionType *FTy, int NumArgs) override {
return Impl.getCallCost(FTy, NumArgs);
}
- unsigned getCallCost(const Function *F, int NumArgs) override {
+ int getCallCost(const Function *F, int NumArgs) override {
return Impl.getCallCost(F, NumArgs);
}
- unsigned getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) override {
+ int getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) override {
return Impl.getCallCost(F, Arguments);
}
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) override {
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
}
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) override {
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments);
}
- unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); }
+ int getUserCost(const User *U) override { return Impl.getUserCost(U); }
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
@@ -688,11 +715,17 @@ public:
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
}
- bool isLegalMaskedStore(Type *DataType, int Consecutive) override {
- return Impl.isLegalMaskedStore(DataType, Consecutive);
+ bool isLegalMaskedStore(Type *DataType) override {
+ return Impl.isLegalMaskedStore(DataType);
+ }
+ bool isLegalMaskedLoad(Type *DataType) override {
+ return Impl.isLegalMaskedLoad(DataType);
+ }
+ bool isLegalMaskedScatter(Type *DataType) override {
+ return Impl.isLegalMaskedScatter(DataType);
}
- bool isLegalMaskedLoad(Type *DataType, int Consecutive) override {
- return Impl.isLegalMaskedLoad(DataType, Consecutive);
+ bool isLegalMaskedGather(Type *DataType) override {
+ return Impl.isLegalMaskedGather(DataType);
}
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
@@ -715,24 +748,25 @@ public:
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
+ bool enableInterleavedAccessVectorization() override {
+ return Impl.enableInterleavedAccessVectorization();
+ }
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);
}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
- unsigned getFPOpCost(Type *Ty) override {
- return Impl.getFPOpCost(Ty);
- }
+ int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
+ int getIntImmCost(const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCost(Imm, Ty);
}
- unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
+ int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) override {
return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
}
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) override {
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
}
unsigned getNumberOfRegisters(bool Vector) override {
@@ -752,56 +786,62 @@ public:
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
- unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) override {
+ int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) override {
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
return Impl.getCastInstrCost(Opcode, Dst, Src);
}
- unsigned getCFInstrCost(unsigned Opcode) override {
+ int getCFInstrCost(unsigned Opcode) override {
return Impl.getCFInstrCost(Opcode);
}
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) override {
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) override {
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) override {
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) override {
return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) override {
+ int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) override {
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) override {
+ int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ Value *Ptr, bool VariableMask,
+ unsigned Alignment) override {
+ return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment);
+ }
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace);
}
- unsigned getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) override {
+ int getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) override {
return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
}
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) override {
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
}
- unsigned getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys) override {
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args) override {
+ return Impl.getIntrinsicInstrCost(ID, RetTy, Args);
+ }
+ int getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys) override {
return Impl.getCallInstrCost(F, RetTy, Tys);
}
unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp);
}
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex) override {
+ int getAddressComputationCost(Type *Ty, bool IsComplex) override {
return Impl.getAddressComputationCost(Ty, IsComplex);
}
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
@@ -815,9 +855,9 @@ public:
Type *ExpectedType) override {
return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
- bool hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const override {
- return Impl.hasCompatibleFunctionAttributes(Caller, Callee);
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const override {
+ return Impl.areInlineCompatible(Caller, Callee);
}
};
@@ -856,7 +896,7 @@ public:
///
/// The callback will be called with a particular function for which the TTI
/// is needed and must return a TTI object for that function.
- TargetIRAnalysis(std::function<Result(Function &)> TTICallback);
+ TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
// Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
@@ -872,7 +912,7 @@ public:
return *this;
}
- Result run(Function &F);
+ Result run(const Function &F);
private:
static char PassID;
@@ -887,10 +927,10 @@ private:
/// the analysis and thus use a function_ref which would be lighter weight.
/// This may also be less error prone as the callback is likely to reference
/// the external TargetMachine, and that reference needs to never dangle.
- std::function<Result(Function &)> TTICallback;
+ std::function<Result(const Function &)> TTICallback;
/// \brief Helper function used as the callback in the default constructor.
- static Result getDefaultTTI(Function &F);
+ static Result getDefaultTTI(const Function &F);
};
/// \brief Wrapper pass for TargetTransformInfo.
@@ -914,7 +954,7 @@ public:
explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
- TargetTransformInfo &getTTI(Function &F);
+ TargetTransformInfo &getTTI(const Function &F);
};
/// \brief Create an analysis pass wrapper around a TTI object.
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ecc7885..4381523 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -19,8 +19,10 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
+#include "llvm/Analysis/VectorUtils.h"
namespace llvm {
@@ -100,7 +102,8 @@ public:
}
}
- unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) {
+ unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -145,9 +148,6 @@ public:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
- case Intrinsic::experimental_gc_result_int:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_ptr:
case Intrinsic::experimental_gc_result:
case Intrinsic::experimental_gc_relocate:
// These intrinsics don't actually represent code after lowering.
@@ -207,9 +207,13 @@ public:
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
}
- bool isLegalMaskedStore(Type *DataType, int Consecutive) { return false; }
+ bool isLegalMaskedStore(Type *DataType) { return false; }
- bool isLegalMaskedLoad(Type *DataType, int Consecutive) { return false; }
+ bool isLegalMaskedLoad(Type *DataType) { return false; }
+
+ bool isLegalMaskedScatter(Type *DataType) { return false; }
+
+ bool isLegalMaskedGather(Type *DataType) { return false; }
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
@@ -234,6 +238,8 @@ public:
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
+ bool enableInterleavedAccessVectorization() { return false; }
+
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
return TTI::PSK_Software;
}
@@ -295,6 +301,12 @@ public:
return 1;
}
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ bool VariableMask,
+ unsigned Alignment) {
+ return 1;
+ }
+
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
@@ -307,6 +319,10 @@ public:
ArrayRef<Type *> Tys) {
return 1;
}
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args) {
+ return 1;
+ }
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
return 1;
@@ -329,8 +345,8 @@ public:
return nullptr;
}
- bool hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const {
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
return (Caller->getFnAttribute("target-cpu") ==
Callee->getFnAttribute("target-cpu")) &&
(Caller->getFnAttribute("target-features") ==
@@ -386,6 +402,61 @@ public:
return static_cast<T *>(this)->getCallCost(F, Arguments.size());
}
+ using BaseT::getGEPCost;
+
+ unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) {
+ const GlobalValue *BaseGV = nullptr;
+ if (Ptr != nullptr) {
+ // TODO: will remove this when pointers have an opaque type.
+ assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+ PointeeType &&
+ "explicit pointee type doesn't match operand's pointee type");
+ BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
+ }
+ bool HasBaseReg = (BaseGV == nullptr);
+ int64_t BaseOffset = 0;
+ int64_t Scale = 0;
+
+ // Assumes the address space is 0 when Ptr is nullptr.
+ unsigned AS =
+ (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
+ auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands);
+ for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
+ // We assume that the cost of Scalar GEP with constant index and the
+ // cost of Vector GEP with splat constant index are the same.
+ const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+ if (!ConstIdx)
+ if (auto Splat = getSplatValue(*I))
+ ConstIdx = dyn_cast<ConstantInt>(Splat);
+ if (isa<SequentialType>(*GTI)) {
+ int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (ConstIdx)
+ BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+ else {
+ // Needs scale register.
+ if (Scale != 0)
+ // No addressing mode takes two scale registers.
+ return TTI::TCC_Basic;
+ Scale = ElementSize;
+ }
+ } else {
+ StructType *STy = cast<StructType>(*GTI);
+ // For structures the index is always splat or scalar constant
+ assert(ConstIdx && "Unexpected GEP index");
+ uint64_t Field = ConstIdx->getZExtValue();
+ BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
+ }
+ }
+
+ if (static_cast<T *>(this)->isLegalAddressingMode(
+ PointerType::get(*GTI, AS), const_cast<GlobalValue *>(BaseGV),
+ BaseOffset, HasBaseReg, Scale, AS)) {
+ return TTI::TCC_Free;
+ }
+ return TTI::TCC_Basic;
+ }
+
using BaseT::getIntrinsicCost;
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
@@ -405,9 +476,9 @@ public:
return TTI::TCC_Free; // Model all PHI nodes as free.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
- SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
- return static_cast<T *>(this)
- ->getGEPCost(GEP->getPointerOperand(), Indices);
+ SmallVector<Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
+ return static_cast<T *>(this)->getGEPCost(
+ GEP->getSourceElementType(), GEP->getPointerOperand(), Indices);
}
if (auto CS = ImmutableCallSite(U)) {
diff --git a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
new file mode 100644
index 0000000..7b44ac7
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -0,0 +1,93 @@
+//===- TypeBasedAliasAnalysis.h - Type-Based Alias Analysis -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a metadata-based TBAA. See the source file for
+/// details on the algorithm.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H
+#define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result that uses TBAA metadata to answer queries.
+class TypeBasedAAResult : public AAResultBase<TypeBasedAAResult> {
+ friend AAResultBase<TypeBasedAAResult>;
+
+public:
+ explicit TypeBasedAAResult(const TargetLibraryInfo &TLI)
+ : AAResultBase(TLI) {}
+ TypeBasedAAResult(TypeBasedAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ FunctionModRefBehavior getModRefBehavior(const Function *F);
+ ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+private:
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+ bool PathAliases(const MDNode *A, const MDNode *B) const;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class TypeBasedAA {
+public:
+ typedef TypeBasedAAResult Result;
+
+ /// \brief Opaque, unique identifier for this analysis pass.
+ static void *ID() { return (void *)&PassID; }
+
+ TypeBasedAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+ /// \brief Provide access to a name for this pass for debugging purposes.
+ static StringRef name() { return "TypeBasedAA"; }
+
+private:
+ static char PassID;
+};
+
+/// Legacy wrapper pass to provide the TypeBasedAAResult object.
+class TypeBasedAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<TypeBasedAAResult> Result;
+
+public:
+ static char ID;
+
+ TypeBasedAAWrapperPass();
+
+ TypeBasedAAResult &getResult() { return *Result; }
+ const TypeBasedAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createTypeBasedAAWrapperPass - This pass implements metadata-based
+// type-based alias analysis.
+//
+ImmutablePass *createTypeBasedAAWrapperPass();
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
index 653821d..8e02910 100644
--- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
@@ -16,20 +16,23 @@
#define LLVM_ANALYSIS_VALUETRACKING_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
- class Value;
- class Instruction;
class APInt;
- class DataLayout;
- class StringRef;
- class MDNode;
+ class AddOperator;
class AssumptionCache;
+ class DataLayout;
class DominatorTree;
- class TargetLibraryInfo;
+ class Instruction;
+ class Loop;
class LoopInfo;
+ class MDNode;
+ class StringRef;
+ class TargetLibraryInfo;
+ class Value;
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
@@ -46,9 +49,10 @@ namespace llvm {
const DominatorTree *DT = nullptr);
/// Compute known bits from the range metadata.
/// \p KnownZero the set of bits that are known to be zero
+ /// \p KnownOne the set of bits that are known to be one
void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
- APInt &KnownZero);
- /// Returns true if LHS and RHS have no common bits set.
+ APInt &KnownZero, APInt &KnownOne);
+ /// Return true if LHS and RHS have no common bits set.
bool haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
@@ -66,7 +70,7 @@ namespace llvm {
/// exactly one bit set when defined. For vectors return true if every
/// element is known to be a power of two when defined. Supports values with
/// integer or pointer type and vectors of integers. If 'OrZero' is set then
- /// returns true if the given value is either a power of two or zero.
+ /// return true if the given value is either a power of two or zero.
bool isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL,
bool OrZero = false, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
@@ -82,6 +86,19 @@ namespace llvm {
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr);
+ /// Returns true if the give value is known to be non-negative.
+ bool isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
+
+ /// isKnownNonEqual - Return true if the given values are known to be
+ /// non-equal when defined. Supports scalar integer types only.
+ bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
+
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be
/// zero for bits that V cannot have.
@@ -118,12 +135,12 @@ namespace llvm {
bool LookThroughSExt = false,
unsigned Depth = 0);
- /// CannotBeNegativeZero - Return true if we can prove that the specified FP
+ /// CannotBeNegativeZero - Return true if we can prove that the specified FP
/// value is never equal to -0.0.
///
bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0);
- /// CannotBeOrderedLessThanZero - Return true if we can prove that the
+ /// CannotBeOrderedLessThanZero - Return true if we can prove that the
/// specified FP value is either a NaN or never less than 0.0.
///
bool CannotBeOrderedLessThanZero(const Value *V, unsigned Depth = 0);
@@ -134,7 +151,7 @@ namespace llvm {
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
/// byte store (e.g. i16 0x1234), return null.
Value *isBytewiseValue(Value *V);
-
+
/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
/// the scalar value indexed is already around as a register, for example if
/// it were inserted directly into the aggregrate.
@@ -156,7 +173,7 @@ namespace llvm {
return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset,
DL);
}
-
+
/// getConstantStringInfo - This function computes the length of a
/// null-terminated C string pointed to by V. If successful, it returns true
/// and returns the string in Str. If unsuccessful, it returns false. This
@@ -227,7 +244,17 @@ namespace llvm {
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
-
+
+ /// Returns true if V is always a dereferenceable pointer with alignment
+ /// greater or equal than requested. If the context instruction is specified
+ /// performs context-sensitive analysis and returns true if the pointer is
+ /// dereferenceable at the specified instruction.
+ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+ const DataLayout &DL,
+ const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
+
/// isSafeToSpeculativelyExecute - Return true if the instruction does not
/// have any effects besides calculating the result and does not have
/// undefined behavior.
@@ -257,6 +284,16 @@ namespace llvm {
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
+ /// Returns true if the result or effects of the given instructions \p I
+ /// depend on or influence global memory.
+ /// Memory dependence arises for example if the instruction reads from
+ /// memory or may produce effects or undefined behaviour. Memory dependent
+ /// instructions generally cannot be reorderd with respect to other memory
+ /// dependent instructions or moved into non-dominated basic blocks.
+ /// Instructions which just compute a value based on the values of their
+ /// operands are not memory dependent.
+ bool mayBeMemoryDependent(const Instruction &I);
+
/// isKnownNonNull - Return true if this pointer couldn't possibly be null by
/// its definition. This returns true for allocas, non-extern-weak globals
/// and byval arguments.
@@ -288,16 +325,98 @@ namespace llvm {
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT);
-
+ OverflowResult computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+ const DataLayout &DL,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
+ /// This version also leverages the sign bit of Add if known.
+ OverflowResult computeOverflowForSignedAdd(AddOperator *Add,
+ const DataLayout &DL,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
+
+ /// Return true if this function can prove that the instruction I will
+ /// always transfer execution to one of its successors (including the next
+ /// instruction that follows within a basic block). E.g. this is not
+ /// guaranteed for function calls that could loop infinitely.
+ ///
+ /// In other words, this function returns false for instructions that may
+ /// transfer execution or fail to transfer execution in a way that is not
+ /// captured in the CFG nor in the sequence of instructions within a basic
+ /// block.
+ ///
+ /// Undefined behavior is assumed not to happen, so e.g. division is
+ /// guaranteed to transfer execution to the following instruction even
+ /// though division by zero might cause undefined behavior.
+ bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I);
+
+ /// Return true if this function can prove that the instruction I
+ /// is executed for every iteration of the loop L.
+ ///
+ /// Note that this currently only considers the loop header.
+ bool isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+ const Loop *L);
+
+ /// Return true if this function can prove that I is guaranteed to yield
+ /// full-poison (all bits poison) if at least one of its operands are
+ /// full-poison (all bits poison).
+ ///
+ /// The exact rules for how poison propagates through instructions have
+ /// not been settled as of 2015-07-10, so this function is conservative
+ /// and only considers poison to be propagated in uncontroversial
+ /// cases. There is no attempt to track values that may be only partially
+ /// poison.
+ bool propagatesFullPoison(const Instruction *I);
+
+ /// Return either nullptr or an operand of I such that I will trigger
+ /// undefined behavior if I is executed and that operand has a full-poison
+ /// value (all bits poison).
+ const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
+
+ /// Return true if this function can prove that if PoisonI is executed
+ /// and yields a full-poison value (all bits poison), then that will
+ /// trigger undefined behavior.
+ ///
+ /// Note that this currently only considers the basic block that is
+ /// the parent of I.
+ bool isKnownNotFullPoison(const Instruction *PoisonI);
+
/// \brief Specific patterns of select instructions we can match.
enum SelectPatternFlavor {
SPF_UNKNOWN = 0,
- SPF_SMIN, // Signed minimum
- SPF_UMIN, // Unsigned minimum
- SPF_SMAX, // Signed maximum
- SPF_UMAX, // Unsigned maximum
- SPF_ABS, // Absolute value
- SPF_NABS // Negated absolute value
+ SPF_SMIN, /// Signed minimum
+ SPF_UMIN, /// Unsigned minimum
+ SPF_SMAX, /// Signed maximum
+ SPF_UMAX, /// Unsigned maximum
+ SPF_FMINNUM, /// Floating point minnum
+ SPF_FMAXNUM, /// Floating point maxnum
+ SPF_ABS, /// Absolute value
+ SPF_NABS /// Negated absolute value
+ };
+ /// \brief Behavior when a floating point min/max is given one NaN and one
+ /// non-NaN as input.
+ enum SelectPatternNaNBehavior {
+ SPNB_NA = 0, /// NaN behavior not applicable.
+ SPNB_RETURNS_NAN, /// Given one NaN input, returns the NaN.
+ SPNB_RETURNS_OTHER, /// Given one NaN input, returns the non-NaN.
+ SPNB_RETURNS_ANY /// Given one NaN input, can return either (or
+ /// it has been determined that no operands can
+ /// be NaN).
+ };
+ struct SelectPatternResult {
+ SelectPatternFlavor Flavor;
+ SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is
+ /// SPF_FMINNUM or SPF_FMAXNUM.
+ bool Ordered; /// When implementing this min/max pattern as
+ /// fcmp; select, does the fcmp have to be
+ /// ordered?
+
+ /// \brief Return true if \p SPF is a min or a max pattern.
+ static bool isMinOrMax(SelectPatternFlavor SPF) {
+ return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS);
+ }
};
/// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind
/// and providing the out parameter results if we successfully match.
@@ -314,9 +433,26 @@ namespace llvm {
///
/// -> LHS = %a, RHS = i32 4, *CastOp = Instruction::SExt
///
- SelectPatternFlavor matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
+ SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp = nullptr);
+ /// Parse out a conservative ConstantRange from !range metadata.
+ ///
+ /// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20).
+ ConstantRange getConstantRangeFromMetadata(MDNode &RangeMD);
+
+ /// Return true if RHS is known to be implied by LHS. A & B must be i1
+ /// (boolean) values or a vector of such values. Note that the truth table for
+ /// implication is the same as <=u on i1 values (but not <=s!). The truth
+ /// table for both is:
+ /// | T | F (B)
+ /// T | T | F
+ /// F | T | T
+ /// (A)
+ bool isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
+ unsigned Depth = 0, AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Analysis/VectorUtils.h b/contrib/llvm/include/llvm/Analysis/VectorUtils.h
index d8e9ca4..531803a 100644
--- a/contrib/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/VectorUtils.h
@@ -14,15 +14,19 @@
#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
namespace llvm {
+struct DemandedBits;
class GetElementPtrInst;
class Loop;
class ScalarEvolution;
+class TargetTransformInfo;
class Type;
class Value;
@@ -62,8 +66,8 @@ Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI);
/// pointer.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
-/// \brief If the argument is a GEP, then returns the operand identified by
-/// getGEPInductionOperand. However, if there is some other non-loop-invariant
+/// \brief If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
@@ -79,6 +83,50 @@ Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
/// from the vector.
Value *findScalarElement(Value *V, unsigned EltNo);
+/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// The value may be extracted from a splat constants vector or from
+/// a sequence of instructions that broadcast a single value into a vector.
+const Value *getSplatValue(const Value *V);
+
+/// \brief Compute a map of integer instructions to their minimum legal type
+/// size.
+///
+/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
+/// type (e.g. i32) whenever arithmetic is performed on them.
+///
+/// For targets with native i8 or i16 operations, usually InstCombine can shrink
+/// the arithmetic type down again. However InstCombine refuses to create
+/// illegal types, so for targets without i8 or i16 registers, the lengthening
+/// and shrinking remains.
+///
+/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
+/// their scalar equivalents do not, so during vectorization it is important to
+/// remove these lengthens and truncates when deciding the profitability of
+/// vectorization.
+///
+/// This function analyzes the given range of instructions and determines the
+/// minimum type size each can be converted to. It attempts to remove or
+/// minimize type size changes across each def-use chain, so for example in the
+/// following code:
+///
+/// %1 = load i8, i8*
+/// %2 = add i8 %1, 2
+/// %3 = load i16, i16*
+/// %4 = zext i8 %2 to i32
+/// %5 = zext i16 %3 to i32
+/// %6 = add i32 %4, %5
+/// %7 = trunc i32 %6 to i16
+///
+/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
+/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
+///
+/// If the optional TargetTransformInfo is provided, this function tries harder
+/// to do less work by only looking at illegal types.
+MapVector<Instruction*, uint64_t>
+computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
+ DemandedBits &DB,
+ const TargetTransformInfo *TTI=nullptr);
+
} // llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/AsmParser/Parser.h b/contrib/llvm/include/llvm/AsmParser/Parser.h
index 5215140..96a15c1 100644
--- a/contrib/llvm/include/llvm/AsmParser/Parser.h
+++ b/contrib/llvm/include/llvm/AsmParser/Parser.h
@@ -18,6 +18,7 @@
namespace llvm {
+class Constant;
class LLVMContext;
class Module;
struct SlotMapping;
@@ -79,6 +80,17 @@ std::unique_ptr<Module> parseAssembly(MemoryBufferRef F, SMDiagnostic &Err,
bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err,
SlotMapping *Slots = nullptr);
+/// Parse a type and a constant value in the given string.
+///
+/// The constant value can be any LLVM constant, including a constant
+/// expression.
+///
+/// \param Slots The optional slot mapping that will restore the parsing state
+/// of the module.
+/// \return null on error.
+Constant *parseConstantValue(StringRef Asm, SMDiagnostic &Err, const Module &M,
+ const SlotMapping *Slots = nullptr);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/AsmParser/SlotMapping.h b/contrib/llvm/include/llvm/AsmParser/SlotMapping.h
index c5f61d2..bd7e8fc 100644
--- a/contrib/llvm/include/llvm/AsmParser/SlotMapping.h
+++ b/contrib/llvm/include/llvm/AsmParser/SlotMapping.h
@@ -14,6 +14,7 @@
#ifndef LLVM_ASMPARSER_SLOTMAPPING_H
#define LLVM_ASMPARSER_SLOTMAPPING_H
+#include "llvm/ADT/StringMap.h"
#include "llvm/IR/TrackingMDRef.h"
#include <map>
#include <vector>
@@ -21,12 +22,19 @@
namespace llvm {
class GlobalValue;
+class Type;
-/// This struct contains the mapping from the slot numbers to unnamed metadata
-/// nodes and global values.
+/// This struct contains the mappings from the slot numbers to unnamed metadata
+/// nodes, global values and types. It also contains the mapping for the named
+/// types.
+/// It can be used to save the parsing state of an LLVM IR module so that the
+/// textual references to the values in the module can be parsed outside of the
+/// module's source.
struct SlotMapping {
std::vector<GlobalValue *> GlobalValues;
std::map<unsigned, TrackingMDNodeRef> MetadataNodes;
+ StringMap<Type *> NamedTypes;
+ std::map<unsigned, Type *> Types;
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h b/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
index ae915c6..a1272cf 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -29,8 +29,12 @@ class PreservedAnalyses;
///
/// If \c ShouldPreserveUseListOrder, encode use-list order so it can be
/// reproduced when deserialized.
+///
+/// If \c EmitFunctionSummary, emit the function summary index (currently
+/// for use in ThinLTO optimization).
ModulePass *createBitcodeWriterPass(raw_ostream &Str,
- bool ShouldPreserveUseListOrder = false);
+ bool ShouldPreserveUseListOrder = false,
+ bool EmitFunctionSummary = false);
/// \brief Pass for writing a module of IR out to a bitcode file.
///
@@ -39,15 +43,21 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str,
class BitcodeWriterPass {
raw_ostream &OS;
bool ShouldPreserveUseListOrder;
+ bool EmitFunctionSummary;
public:
/// \brief Construct a bitcode writer pass around a particular output stream.
///
/// If \c ShouldPreserveUseListOrder, encode use-list order so it can be
/// reproduced when deserialized.
+ ///
+ /// If \c EmitFunctionSummary, emit the function summary index (currently
+ /// for use in ThinLTO optimization).
explicit BitcodeWriterPass(raw_ostream &OS,
- bool ShouldPreserveUseListOrder = false)
- : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
+ bool ShouldPreserveUseListOrder = false,
+ bool EmitFunctionSummary = false)
+ : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder),
+ EmitFunctionSummary(EmitFunctionSummary) {}
/// \brief Run the bitcode writer pass, and output the module to the selected
/// output stream.
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
index 4c040a7..c0cf6cd 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
@@ -325,6 +325,8 @@ public:
// If we run out of data, stop at the end of the stream.
if (BytesRead == 0) {
+ CurWord = 0;
+ BitsInCurWord = 0;
Size = NextChar;
return;
}
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
index 9f23023..438f4a6 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
@@ -15,6 +15,8 @@
#ifndef LLVM_BITCODE_BITSTREAMWRITER_H
#define LLVM_BITCODE_BITSTREAMWRITER_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitCodes.h"
@@ -45,9 +47,9 @@ class BitstreamWriter {
struct Block {
unsigned PrevCodeSize;
- unsigned StartSizeWord;
+ size_t StartSizeWord;
std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
- Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+ Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
};
/// BlockScope - This tracks the current blocks that we have entered.
@@ -61,12 +63,6 @@ class BitstreamWriter {
};
std::vector<BlockInfo> BlockInfoRecords;
- // BackpatchWord - Backpatch a 32-bit word in the output with the specified
- // value.
- void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
- support::endian::write32le(&Out[ByteNo], NewWord);
- }
-
void WriteByte(unsigned char Value) {
Out.push_back(Value);
}
@@ -77,12 +73,10 @@ class BitstreamWriter {
reinterpret_cast<const char *>(&Value + 1));
}
- unsigned GetBufferOffset() const {
- return Out.size();
- }
+ size_t GetBufferOffset() const { return Out.size(); }
- unsigned GetWordIndex() const {
- unsigned Offset = GetBufferOffset();
+ size_t GetWordIndex() const {
+ size_t Offset = GetBufferOffset();
assert((Offset & 3) == 0 && "Not 32-bit aligned");
return Offset / 4;
}
@@ -99,10 +93,25 @@ public:
/// \brief Retrieve the current position in the stream, in bits.
uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
+ /// \brief Retrieve the number of bits currently used to encode an abbrev ID.
+ unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
+
//===--------------------------------------------------------------------===//
// Basic Primitives for emitting bits to the stream.
//===--------------------------------------------------------------------===//
+ /// Backpatch a 32-bit word in the output at the given bit offset
+ /// with the specified value.
+ void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
+ using namespace llvm::support;
+ unsigned ByteNo = BitNo / 8;
+ assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+ &Out[ByteNo], BitNo & 7)) &&
+ "Expected to be patching over 0-value placeholders");
+ endian::writeAtBitAlignment<uint32_t, little, unaligned>(
+ &Out[ByteNo], NewWord, BitNo & 7);
+ }
+
void Emit(uint32_t Val, unsigned NumBits) {
assert(NumBits && NumBits <= 32 && "Invalid value size!");
assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
@@ -200,7 +209,7 @@ public:
EmitVBR(CodeLen, bitc::CodeLenWidth);
FlushToWord();
- unsigned BlockSizeWordIndex = GetWordIndex();
+ size_t BlockSizeWordIndex = GetWordIndex();
unsigned OldCodeSize = CurCodeSize;
// Emit a placeholder, which will be replaced when the block is popped.
@@ -231,11 +240,11 @@ public:
FlushToWord();
// Compute the size of the block, in words, not counting the size field.
- unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
- unsigned ByteNo = B.StartSizeWord*4;
+ size_t SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
+ uint64_t BitNo = uint64_t(B.StartSizeWord) * 32;
// Update the block size field in the header of this sub-block.
- BackpatchWord(ByteNo, SizeInWords);
+ BackpatchWord(BitNo, SizeInWords);
// Restore the inner block's code size and abbrev table.
CurCodeSize = B.PrevCodeSize;
@@ -285,10 +294,12 @@ private:
/// EmitRecordWithAbbrevImpl - This is the core implementation of the record
/// emission code. If BlobData is non-null, then it specifies an array of
/// data that should be emitted as part of the Blob or Array operand that is
- /// known to exist at the end of the record.
- template<typename uintty>
- void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
- StringRef Blob) {
+ /// known to exist at the end of the record. If Code is specified, then
+ /// it is the record code to emit before the Vals, which must not contain
+ /// the code.
+ template <typename uintty>
+ void EmitRecordWithAbbrevImpl(unsigned Abbrev, ArrayRef<uintty> Vals,
+ StringRef Blob, Optional<unsigned> Code) {
const char *BlobData = Blob.data();
unsigned BlobLen = (unsigned) Blob.size();
unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
@@ -297,9 +308,23 @@ private:
EmitCode(Abbrev);
+ unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+ if (Code) {
+ assert(e && "Expected non-empty abbreviation");
+ const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++);
+
+ if (Op.isLiteral())
+ EmitAbbreviatedLiteral(Op, Code.getValue());
+ else {
+ assert(Op.getEncoding() != BitCodeAbbrevOp::Array &&
+ Op.getEncoding() != BitCodeAbbrevOp::Blob &&
+ "Expected literal or scalar");
+ EmitAbbreviatedField(Op, Code.getValue());
+ }
+ }
+
unsigned RecordIdx = 0;
- for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
- i != e; ++i) {
+ for (; i != e; ++i) {
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
if (Op.isLiteral()) {
assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
@@ -307,7 +332,7 @@ private:
++RecordIdx;
} else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
// Array case.
- assert(i+2 == e && "array op not second to last?");
+ assert(i + 2 == e && "array op not second to last?");
const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
// If this record has blob data, emit it, otherwise we must have record
@@ -381,32 +406,29 @@ public:
/// EmitRecord - Emit the specified record to the stream, using an abbrev if
/// we have one to compress the output.
- template<typename uintty>
- void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
- unsigned Abbrev = 0) {
+ template <typename Container>
+ void EmitRecord(unsigned Code, const Container &Vals, unsigned Abbrev = 0) {
if (!Abbrev) {
// If we don't have an abbrev to use, emit this in its fully unabbreviated
// form.
+ auto Count = static_cast<uint32_t>(makeArrayRef(Vals).size());
EmitCode(bitc::UNABBREV_RECORD);
EmitVBR(Code, 6);
- EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
- for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
+ EmitVBR(Count, 6);
+ for (unsigned i = 0, e = Count; i != e; ++i)
EmitVBR64(Vals[i], 6);
return;
}
- // Insert the code into Vals to treat it uniformly.
- Vals.insert(Vals.begin(), Code);
-
- EmitRecordWithAbbrev(Abbrev, Vals);
+ EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), Code);
}
/// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
/// Unlike EmitRecord, the code for the record should be included in Vals as
/// the first entry.
- template<typename uintty>
- void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
- EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
+ template <typename Container>
+ void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) {
+ EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None);
}
/// EmitRecordWithBlob - Emit the specified record to the stream, using an
@@ -414,29 +436,30 @@ public:
/// specified by the pointer and length specified at the end. In contrast to
/// EmitRecord, this routine expects that the first entry in Vals is the code
/// of the record.
- template<typename uintty>
- void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+ template <typename Container>
+ void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
StringRef Blob) {
- EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
+ EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None);
}
- template<typename uintty>
- void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+ template <typename Container>
+ void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
const char *BlobData, unsigned BlobLen) {
- return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
+ return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
+ StringRef(BlobData, BlobLen), None);
}
/// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
/// that end with an array.
- template<typename uintty>
- void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
- StringRef Array) {
- EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
+ template <typename Container>
+ void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
+ StringRef Array) {
+ EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None);
}
- template<typename uintty>
- void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
- const char *ArrayData, unsigned ArrayLen) {
- return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
- ArrayLen));
+ template <typename Container>
+ void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
+ const char *ArrayData, unsigned ArrayLen) {
+ return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
+ StringRef(ArrayData, ArrayLen), None);
}
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 7130ee7..bcc84be 100644
--- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -23,28 +23,52 @@
namespace llvm {
namespace bitc {
// The only top-level block type defined is for a module.
- enum BlockIDs {
- // Blocks
- MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
+enum BlockIDs {
+ // Blocks
+ MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
- // Module sub-block id's.
- PARAMATTR_BLOCK_ID,
- PARAMATTR_GROUP_BLOCK_ID,
+ // Module sub-block id's.
+ PARAMATTR_BLOCK_ID,
+ PARAMATTR_GROUP_BLOCK_ID,
- CONSTANTS_BLOCK_ID,
- FUNCTION_BLOCK_ID,
+ CONSTANTS_BLOCK_ID,
+ FUNCTION_BLOCK_ID,
- UNUSED_ID1,
+ // Block intended to contains information on the bitcode versioning.
+ // Can be used to provide better error messages when we fail to parse a
+ // bitcode file.
+ IDENTIFICATION_BLOCK_ID,
- VALUE_SYMTAB_BLOCK_ID,
- METADATA_BLOCK_ID,
- METADATA_ATTACHMENT_ID,
+ VALUE_SYMTAB_BLOCK_ID,
+ METADATA_BLOCK_ID,
+ METADATA_ATTACHMENT_ID,
- TYPE_BLOCK_ID_NEW,
+ TYPE_BLOCK_ID_NEW,
- USELIST_BLOCK_ID
- };
+ USELIST_BLOCK_ID,
+
+ MODULE_STRTAB_BLOCK_ID,
+ FUNCTION_SUMMARY_BLOCK_ID,
+
+ OPERAND_BUNDLE_TAGS_BLOCK_ID,
+
+ METADATA_KIND_BLOCK_ID
+};
+
+/// Identification block contains a string that describes the producer details,
+/// and an epoch that defines the auto-upgrade capability.
+enum IdentificationCodes {
+ IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N]
+ IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#]
+};
+/// The epoch that defines the auto-upgrade compatibility for the bitcode.
+///
+/// LLVM guarantees in a major release that a minor release can read bitcode
+/// generated by previous minor releases. We translate this by making the reader
+/// accepting only bitcode with the same epoch, except for the X.0 release which
+/// also accepts N-1.
+enum { BITCODE_CURRENT_EPOCH = 0 };
/// MODULE blocks have a number of optional fields and subblocks.
enum ModuleCodes {
@@ -66,13 +90,21 @@ namespace bitc {
MODULE_CODE_FUNCTION = 8,
// ALIAS: [alias type, aliasee val#, linkage, visibility]
- MODULE_CODE_ALIAS = 9,
+ MODULE_CODE_ALIAS_OLD = 9,
// MODULE_CODE_PURGEVALS: [numvals]
MODULE_CODE_PURGEVALS = 10,
MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N]
MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name]
+
+ MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset]
+
+ // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility]
+ MODULE_CODE_ALIAS = 14,
+
+ // METADATA_VALUES: [numvals]
+ MODULE_CODE_METADATA_VALUES = 15,
};
/// PARAMATTR blocks have code for defining a parameter attribute set.
@@ -121,7 +153,13 @@ namespace bitc {
TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
- TYPE_CODE_FUNCTION = 21 // FUNCTION: [vararg, retty, paramty x N]
+ TYPE_CODE_FUNCTION = 21, // FUNCTION: [vararg, retty, paramty x N]
+
+ TYPE_CODE_TOKEN = 22 // TOKEN
+ };
+
+ enum OperandBundleTagCode {
+ OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N]
};
// The type symbol table only has one code (TST_ENTRY_CODE).
@@ -129,10 +167,25 @@ namespace bitc {
TST_CODE_ENTRY = 1 // TST_ENTRY: [typeid, namechar x N]
};
- // The value symbol table only has one code (VST_ENTRY_CODE).
+ // Value symbol table codes.
enum ValueSymtabCodes {
- VST_CODE_ENTRY = 1, // VST_ENTRY: [valid, namechar x N]
- VST_CODE_BBENTRY = 2 // VST_BBENTRY: [bbid, namechar x N]
+ VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N]
+ VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N]
+ VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N]
+ // VST_COMBINED_FNENTRY: [offset, namechar x N]
+ VST_CODE_COMBINED_FNENTRY = 4
+ };
+
+ // The module path symbol table only has one code (MST_CODE_ENTRY).
+ enum ModulePathSymtabCodes {
+ MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
+ };
+
+ // The function summary section uses different codes in the per-module
+ // and combined index cases.
+ enum FunctionSummarySymtabCodes {
+ FS_CODE_PERMODULE_ENTRY = 1, // FS_ENTRY: [valueid, islocal, instcount]
+ FS_CODE_COMBINED_ENTRY = 2, // FS_ENTRY: [modid, instcount]
};
enum MetadataCodes {
@@ -167,7 +220,9 @@ namespace bitc {
METADATA_EXPRESSION = 29, // [distinct, n x element]
METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...]
METADATA_IMPORTED_ENTITY=31, // [distinct, tag, scope, entity, line, name]
- METADATA_MODULE=32, // [distinct, scope, name, ...]
+ METADATA_MODULE = 32, // [distinct, scope, name, ...]
+ METADATA_MACRO = 33, // [distinct, macinfo, line, name, value]
+ METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -287,6 +342,16 @@ namespace bitc {
SYNCHSCOPE_CROSSTHREAD = 1
};
+ /// Markers and flags for call instruction.
+ enum CallMarkersFlags {
+ CALL_TAIL = 0,
+ CALL_CCONV = 1,
+ CALL_MUSTTAIL = 14,
+ CALL_EXPLICIT_TYPE = 15,
+ CALL_NOTAIL = 16,
+ CALL_FMF = 17 // Call has optional fast-math-flags.
+ };
+
// The function body block (FUNCTION_BLOCK_ID) describes function bodies. It
// can contain a constant block (CONSTANTS_BLOCK_ID).
enum FunctionCodes {
@@ -354,6 +419,14 @@ namespace bitc {
FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align,
// vol,ordering,synchscope]
FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...]
+ FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#]
+ FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#]
+ FUNC_CODE_INST_CATCHPAD = 50, // CATCHPAD: [bb#,bb#,num,args...]
+ FUNC_CODE_INST_CLEANUPPAD = 51, // CLEANUPPAD: [num,args...]
+ FUNC_CODE_INST_CATCHSWITCH = 52, // CATCHSWITCH: [num,args...] or [num,args...,bb]
+ // 53 is unused.
+ // 54 is unused.
+ FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
};
enum UseListCodes {
@@ -407,7 +480,12 @@ namespace bitc {
ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42,
ATTR_KIND_CONVERGENT = 43,
ATTR_KIND_SAFESTACK = 44,
- ATTR_KIND_ARGMEMONLY = 45
+ ATTR_KIND_ARGMEMONLY = 45,
+ ATTR_KIND_SWIFT_SELF = 46,
+ ATTR_KIND_SWIFT_ERROR = 47,
+ ATTR_KIND_NO_RECURSE = 48,
+ ATTR_KIND_INACCESSIBLEMEM_ONLY = 49,
+ ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50
};
enum ComdatSelectionKindCodes {
diff --git a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h b/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
index 452ec3b..60d865f 100644
--- a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
+++ b/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
@@ -15,6 +15,7 @@
#define LLVM_BITCODE_READERWRITER_H
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -36,27 +37,54 @@ namespace llvm {
ErrorOr<std::unique_ptr<Module>>
getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler = nullptr,
bool ShouldLazyLoadMetadata = false);
/// Read the header of the specified stream and prepare for lazy
/// deserialization and streaming of function bodies.
- ErrorOr<std::unique_ptr<Module>> getStreamedBitcodeModule(
- StringRef Name, std::unique_ptr<DataStreamer> Streamer,
- LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler = nullptr);
+ ErrorOr<std::unique_ptr<Module>>
+ getStreamedBitcodeModule(StringRef Name,
+ std::unique_ptr<DataStreamer> Streamer,
+ LLVMContext &Context);
/// Read the header of the specified bitcode buffer and extract just the
/// triple information. If successful, this returns a string. On error, this
/// returns "".
- std::string
- getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler = nullptr);
+ std::string getBitcodeTargetTriple(MemoryBufferRef Buffer,
+ LLVMContext &Context);
+
+ /// Read the header of the specified bitcode buffer and extract just the
+ /// producer string information. If successful, this returns a string. On
+ /// error, this returns "".
+ std::string getBitcodeProducerString(MemoryBufferRef Buffer,
+ LLVMContext &Context);
/// Read the specified bitcode file, returning the module.
- ErrorOr<std::unique_ptr<Module>>
- parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler = nullptr);
+ ErrorOr<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
+ LLVMContext &Context);
+
+ /// Check if the given bitcode buffer contains a function summary block.
+ bool hasFunctionSummary(MemoryBufferRef Buffer,
+ DiagnosticHandlerFunction DiagnosticHandler);
+
+ /// Parse the specified bitcode buffer, returning the function info index.
+ /// If IsLazy is true, parse the entire function summary into
+ /// the index. Otherwise skip the function summary section, and only create
+ /// an index object with a map from function name to function summary offset.
+ /// The index is used to perform lazy function summary reading later.
+ ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+ getFunctionInfoIndex(MemoryBufferRef Buffer,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy = false);
+
+ /// This method supports lazy reading of function summary data from the
+ /// combined index during function importing. When reading the combined index
+ /// file, getFunctionInfoIndex is first invoked with IsLazy=true.
+ /// Then this method is called for each function considered for importing,
+ /// to parse the summary information for the given function name into
+ /// the index.
+ std::error_code readFunctionSummary(
+ MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler,
+ StringRef FunctionName, std::unique_ptr<FunctionInfoIndex> Index);
/// \brief Write the specified module to the specified raw output stream.
///
@@ -66,8 +94,18 @@ namespace llvm {
/// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
/// Value in \c M. These will be reconstructed exactly when \a M is
/// deserialized.
+ ///
+ /// If \c EmitFunctionSummary, emit the function summary index (currently
+ /// for use in ThinLTO optimization).
void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
- bool ShouldPreserveUseListOrder = false);
+ bool ShouldPreserveUseListOrder = false,
+ bool EmitFunctionSummary = false);
+
+ /// Write the specified function summary index to the given raw output stream,
+ /// where it will be written in a new bitcode block. This is used when
+ /// writing the combined index file for ThinLTO.
+ void WriteFunctionSummaryToFile(const FunctionInfoIndex &Index,
+ raw_ostream &Out);
/// isBitcodeWrapper - Return true if the given bytes are the magic bytes
/// for an LLVM IR bitcode wrapper.
@@ -159,7 +197,7 @@ namespace llvm {
BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity,
const Twine &Msg);
void print(DiagnosticPrinter &DP) const override;
- std::error_code getError() const { return EC; };
+ std::error_code getError() const { return EC; }
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == DK_Bitcode;
diff --git a/contrib/llvm/include/llvm/CodeGen/Analysis.h b/contrib/llvm/include/llvm/CodeGen/Analysis.h
index 82d1e8a..38e64ad 100644
--- a/contrib/llvm/include/llvm/CodeGen/Analysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/Analysis.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_ANALYSIS_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/CallSite.h"
@@ -23,6 +24,8 @@
namespace llvm {
class GlobalValue;
+class MachineBasicBlock;
+class MachineFunction;
class TargetLoweringBase;
class TargetLowering;
class TargetMachine;
@@ -37,7 +40,7 @@ struct EVT;
/// Given an LLVM IR aggregate type and a sequence of insertvalue or
/// extractvalue indices that identify a member, return the linearized index of
/// the start of the member, i.e the number of element in memory before the
-/// seeked one. This is disconnected from the number of bytes.
+/// sought one. This is disconnected from the number of bytes.
///
/// \param Ty is the type indexed by \p Indices.
/// \param Indices is an optional pointer in the indices list to the current
@@ -115,6 +118,9 @@ bool returnTypeIsEligibleForTailCall(const Function *F,
// or we are in LTO.
bool canBeOmittedFromSymbolTable(const GlobalValue *GV);
+DenseMap<const MachineBasicBlock *, int>
+getFuncletMembership(const MachineFunction &MF);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index fe7efae..f5e778b 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -165,6 +165,9 @@ public:
/// Return information about data layout.
const DataLayout &getDataLayout() const;
+ /// Return the pointer size from the TargetMachine
+ unsigned getPointerSize() const;
+
/// Return information about subtarget.
const MCSubtargetInfo &getSubtargetInfo() const;
@@ -233,7 +236,12 @@ public:
/// Print assembly representations of the jump tables used by the current
/// function to the current output stream.
///
- void EmitJumpTableInfo();
+ virtual void EmitJumpTableInfo();
+
+ /// Emit the control variable for an emulated TLS variable.
+ virtual void EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
+ MCSymbol *EmittedSym,
+ bool AllZeroInitValue);
/// Emit the specified global variable to the .s file.
virtual void EmitGlobalVariable(const GlobalVariable *GV);
@@ -254,7 +262,7 @@ public:
const MCExpr *lowerConstant(const Constant *CV);
/// \brief Print a general LLVM constant to the .s file.
- void EmitGlobalConstant(const Constant *CV);
+ void EmitGlobalConstant(const DataLayout &DL, const Constant *CV);
/// \brief Unnamed constant global variables solely contaning a pointer to
/// another globals variable act like a global variable "proxy", or GOT
@@ -317,7 +325,9 @@ public:
/// Targets can override this to change how global constants that are part of
/// a C++ static/global constructor list are emitted.
- virtual void EmitXXStructor(const Constant *CV) { EmitGlobalConstant(CV); }
+ virtual void EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+ EmitGlobalConstant(DL, CV);
+ }
/// Return true if the basic block has exactly one predecessor and the control
/// transfer mechanism between the predecessor and this block is a
@@ -404,9 +414,6 @@ public:
void EmitULEB128(uint64_t Value, const char *Desc = nullptr,
unsigned PadTo = 0) const;
- /// Emit a .byte 42 directive for a DW_CFA_xxx value.
- void EmitCFAByte(unsigned Val) const;
-
/// Emit a .byte 42 directive that corresponds to an encoding. If verbose
/// assembly output is enabled, we output comments describing the encoding.
/// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
@@ -446,7 +453,16 @@ public:
void emitCFIInstruction(const MCCFIInstruction &Inst) const;
/// \brief Emit Dwarf abbreviation table.
- void emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const;
+ template <typename T> void emitDwarfAbbrevs(const T &Abbrevs) const {
+ // For each abbreviation.
+ for (const auto &Abbrev : Abbrevs)
+ emitDwarfAbbrev(*Abbrev);
+
+ // Mark end of abbreviations.
+ EmitULEB128(0, "EOM(3)");
+ }
+
+ void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const;
/// \brief Recursively emit Dwarf DIE tree.
void emitDwarfDIE(const DIE &Die) const;
@@ -532,7 +548,8 @@ private:
void EmitLLVMUsedList(const ConstantArray *InitList);
/// Emit llvm.ident metadata in an '.ident' directive.
void EmitModuleIdents(Module &M);
- void EmitXXStructorList(const Constant *List, bool isCtor);
+ void EmitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool isCtor);
GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
};
}
diff --git a/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h b/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h
new file mode 100644
index 0000000..ac18eac
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h
@@ -0,0 +1,57 @@
+//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/IRBuilder.h"
+
+namespace llvm {
+class Value;
+class AtomicRMWInst;
+
+
+/// Parameters (see the expansion example below):
+/// (the builder, %addr, %loaded, %new_val, ordering,
+/// /* OUT */ %success, /* OUT */ %new_loaded)
+typedef function_ref<void(IRBuilder<> &, Value *, Value *, Value *,
+ AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun;
+
+/// \brief Expand an atomic RMW instruction into a loop utilizing
+/// cmpxchg. You'll want to make sure your target machine likes cmpxchg
+/// instructions in the first place and that there isn't another, better,
+/// transformation available (for example AArch32/AArch64 have linked loads).
+///
+/// This is useful in passes which can't rewrite the more exotic RMW
+/// instructions directly into a platform specific intrinsics (because, say,
+/// those intrinsics don't exist). If such a pass is able to expand cmpxchg
+/// instructions directly however, then, with this function, it could avoid two
+/// extra module passes (avoiding passes by `-atomic-expand` and itself). A
+/// specific example would be PNaCl's `RewriteAtomics` pass.
+///
+/// Given: atomicrmw some_op iN* %addr, iN %incr ordering
+///
+/// The standard expansion we produce is:
+/// [...]
+/// %init_loaded = load atomic iN* %addr
+/// br label %loop
+/// loop:
+/// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+/// %new = some_op iN %loaded, %incr
+/// ; This is what -atomic-expand will produce using this function on i686 targets:
+/// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val
+/// %new_loaded = extractvalue { iN, i1 } %pair, 0
+/// %success = extractvalue { iN, i1 } %pair, 1
+/// ; End callback produced IR
+/// br i1 %success, label %atomicrmw.end, label %loop
+/// atomicrmw.end:
+/// [...]
+///
+/// Returns true if the containing function was modified.
+bool
+expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory);
+}
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 9ba2516..d99054e 100644
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -166,7 +166,7 @@ public:
}
if (IID == Intrinsic::ctlz) {
- if (getTLI()->isCheapToSpeculateCtlz())
+ if (getTLI()->isCheapToSpeculateCtlz())
return TargetTransformInfo::TCC_Basic;
return TargetTransformInfo::TCC_Expensive;
}
@@ -256,7 +256,7 @@ public:
for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
- ImmutableCallSite CS(J);
+ ImmutableCallSite CS(&*J);
if (const Function *F = CS.getCalledFunction()) {
if (!static_cast<T *>(this)->isLoweredToCall(F))
continue;
@@ -302,12 +302,8 @@ public:
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
// The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that there is some
- // overhead to this.
// TODO: Once we have extract/insert subvector cost we need to use them.
- if (LT.first > 1)
- return LT.first * 2 * OpCost;
- return LT.first * 1 * OpCost;
+ return LT.first * OpCost;
}
if (!TLI->isOperationExpand(ISD, LT.second)) {
@@ -496,13 +492,11 @@ public:
// itself. Unless the corresponding extending load or truncating store is
// legal, then this will scalarize.
TargetLowering::LegalizeAction LA = TargetLowering::Expand;
- EVT MemVT = getTLI()->getValueType(DL, Src, true);
- if (MemVT.isSimple() && MemVT != MVT::Other) {
- if (Opcode == Instruction::Store)
- LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
- else
- LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
- }
+ EVT MemVT = getTLI()->getValueType(DL, Src);
+ if (Opcode == Instruction::Store)
+ LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
+ else
+ LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
// This is a vector load/store for some illegal type that is scalarized.
@@ -530,7 +524,8 @@ public:
VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
// Firstly, the cost of load/store operation.
- unsigned Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+ unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
+ Opcode, VecTy, Alignment, AddressSpace);
// Then plus the cost of interleave operation.
if (Opcode == Instruction::Load) {
@@ -545,18 +540,20 @@ public:
assert(Indices.size() <= Factor &&
"Interleaved memory op has too many members");
+
for (unsigned Index : Indices) {
assert(Index < Factor && "Invalid index for interleaved memory op");
// Extract elements from loaded vector for each sub vector.
for (unsigned i = 0; i < NumSubElts; i++)
- Cost += getVectorInstrCost(Instruction::ExtractElement, VT,
- Index + i * Factor);
+ Cost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, VT, Index + i * Factor);
}
unsigned InsSubCost = 0;
for (unsigned i = 0; i < NumSubElts; i++)
- InsSubCost += getVectorInstrCost(Instruction::InsertElement, SubVT, i);
+ InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::InsertElement, SubVT, i);
Cost += Indices.size() * InsSubCost;
} else {
@@ -571,17 +568,51 @@ public:
unsigned ExtSubCost = 0;
for (unsigned i = 0; i < NumSubElts; i++)
- ExtSubCost += getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
-
- Cost += Factor * ExtSubCost;
+ ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
+ Instruction::ExtractElement, SubVT, i);
+ Cost += ExtSubCost * Factor;
for (unsigned i = 0; i < NumElts; i++)
- Cost += getVectorInstrCost(Instruction::InsertElement, VT, i);
+ Cost += static_cast<T *>(this)
+ ->getVectorInstrCost(Instruction::InsertElement, VT, i);
}
return Cost;
}
+ /// Get intrinsic cost based on arguments
+ unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Value *> Args) {
+ switch (IID) {
+ default: {
+ SmallVector<Type *, 4> Types;
+ for (Value *Op : Args)
+ Types.push_back(Op->getType());
+ return getIntrinsicInstrCost(IID, RetTy, Types);
+ }
+ case Intrinsic::masked_scatter: {
+ Value *Mask = Args[3];
+ bool VarMask = !isa<Constant>(Mask);
+ unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
+ return
+ static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
+ Args[0]->getType(),
+ Args[1], VarMask,
+ Alignment);
+ }
+ case Intrinsic::masked_gather: {
+ Value *Mask = Args[2];
+ bool VarMask = !isa<Constant>(Mask);
+ unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
+ return
+ static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
+ RetTy, Args[0], VarMask,
+ Alignment);
+ }
+ }
+ }
+
+ /// Get intrinsic cost based on argument types
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys) {
unsigned ISD = 0;
@@ -800,7 +831,7 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
const TargetLoweringBase *getTLI() const { return TLI; }
public:
- explicit BasicTTIImpl(const TargetMachine *ST, Function &F);
+ explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
// Provide value semantics. MSVC requires that we spell all of these out.
BasicTTIImpl(const BasicTTIImpl &Arg)
diff --git a/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index 91fb0a9..17c9415 100644
--- a/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -20,6 +20,7 @@ namespace llvm {
class LiveIntervals;
class MachineBlockFrequencyInfo;
class MachineLoopInfo;
+ class VirtRegMap;
/// \brief Normalize the spill weight of a live interval
///
@@ -51,6 +52,7 @@ namespace llvm {
private:
MachineFunction &MF;
LiveIntervals &LIS;
+ VirtRegMap *VRM;
const MachineLoopInfo &Loops;
const MachineBlockFrequencyInfo &MBFI;
DenseMap<unsigned, float> Hint;
@@ -58,10 +60,10 @@ namespace llvm {
public:
VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
- const MachineLoopInfo &loops,
+ VirtRegMap *vrm, const MachineLoopInfo &loops,
const MachineBlockFrequencyInfo &mbfi,
NormalizingFn norm = normalizeSpillWeight)
- : MF(mf), LIS(lis), Loops(loops), MBFI(mbfi), normalize(norm) {}
+ : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {}
/// \brief (re)compute li's spill weight and allocation hint.
void calculateSpillWeightAndHint(LiveInterval &li);
@@ -70,6 +72,7 @@ namespace llvm {
/// \brief Compute spill weights and allocation hints for all virtual register
/// live intervals.
void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF,
+ VirtRegMap *VRM,
const MachineLoopInfo &MLI,
const MachineBlockFrequencyInfo &MBFI,
VirtRegAuxInfo::NormalizingFn norm =
diff --git a/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h b/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h
index 1fd4eeb..415abb9 100644
--- a/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -201,6 +201,7 @@ private:
LLVMContext &Context;
unsigned StackOffset;
+ unsigned MaxStackArgAlign;
SmallVector<uint32_t, 16> UsedRegs;
SmallVector<CCValAssign, 4> PendingLocs;
@@ -270,7 +271,18 @@ public:
CallingConv::ID getCallingConv() const { return CallingConv; }
bool isVarArg() const { return IsVarArg; }
- unsigned getNextStackOffset() const { return StackOffset; }
+ /// getNextStackOffset - Return the next stack offset such that all stack
+ /// slots satisfy their alignment requirements.
+ unsigned getNextStackOffset() const {
+ return StackOffset;
+ }
+
+ /// getAlignedCallFrameSize - Return the size of the call frame needed to
+ /// be able to store all arguments and such that the alignment requirement
+ /// of each of the arguments is satisfied.
+ unsigned getAlignedCallFrameSize() const {
+ return RoundUpToAlignment(StackOffset, MaxStackArgAlign);
+ }
/// isAllocated - Return true if the specified register (or an alias) is
/// allocated.
@@ -357,7 +369,7 @@ public:
/// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
/// registers. If this is not possible, return zero. Otherwise, return the first
/// register of the block that were allocated, marking the entire block as allocated.
- unsigned AllocateRegBlock(ArrayRef<uint16_t> Regs, unsigned RegsRequired) {
+ unsigned AllocateRegBlock(ArrayRef<MCPhysReg> Regs, unsigned RegsRequired) {
if (RegsRequired > Regs.size())
return 0;
@@ -400,9 +412,10 @@ public:
/// and alignment.
unsigned AllocateStack(unsigned Size, unsigned Align) {
assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
- StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
+ StackOffset = RoundUpToAlignment(StackOffset, Align);
unsigned Result = StackOffset;
StackOffset += Size;
+ MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
MF.getFrameInfo()->ensureMaxAlignment(Align);
return Result;
}
diff --git a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
index bedb7d5..0d37dc0 100644
--- a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -182,6 +182,11 @@ OverrideStackAlignment("stack-alignment",
cl::desc("Override default stack alignment"),
cl::init(0));
+cl::opt<bool>
+StackRealign("stackrealign",
+ cl::desc("Force align the stack to the minimum alignment"),
+ cl::init(false));
+
cl::opt<std::string>
TrapFuncName("trap-func", cl::Hidden,
cl::desc("Emit a call to trap function rather than a trap instruction"),
@@ -219,6 +224,10 @@ FunctionSections("function-sections",
cl::desc("Emit functions into separate sections"),
cl::init(false));
+cl::opt<bool> EmulatedTLS("emulated-tls",
+ cl::desc("Use emulated TLS model"),
+ cl::init(false));
+
cl::opt<bool> UniqueSectionNames("unique-section-names",
cl::desc("Give unique names to every section"),
cl::init(true));
@@ -238,6 +247,26 @@ JTableType("jump-table-type",
"Create one table per unique function type."),
clEnumValEnd));
+cl::opt<llvm::EABI> EABIVersion(
+ "meabi", cl::desc("Set EABI type (default depends on triple):"),
+ cl::init(EABI::Default),
+ cl::values(clEnumValN(EABI::Default, "default",
+ "Triple default EABI version"),
+ clEnumValN(EABI::EABI4, "4", "EABI version 4"),
+ clEnumValN(EABI::EABI5, "5", "EABI version 5"),
+ clEnumValN(EABI::GNU, "gnu", "EABI GNU"), clEnumValEnd));
+
+cl::opt<DebuggerKind>
+DebuggerTuningOpt("debugger-tune",
+ cl::desc("Tune debug info for a particular debugger"),
+ cl::init(DebuggerKind::Default),
+ cl::values(
+ clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
+ clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
+ clEnumValN(DebuggerKind::SCE, "sce",
+ "SCE targets (e.g. PS4)"),
+ clEnumValEnd));
+
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -260,11 +289,14 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.DataSections = DataSections;
Options.FunctionSections = FunctionSections;
Options.UniqueSectionNames = UniqueSectionNames;
+ Options.EmulatedTLS = EmulatedTLS;
Options.MCOptions = InitMCTargetOptionsFromFlags();
Options.JTType = JTableType;
Options.ThreadModel = TMModel;
+ Options.EABIVersion = EABIVersion;
+ Options.DebuggerTuning = DebuggerTuningOpt;
return Options;
}
@@ -325,6 +357,10 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
"disable-tail-calls",
toStringRef(DisableTailCalls));
+ if (StackRealign)
+ NewAttrs = NewAttrs.addAttribute(Ctx, AttributeSet::FunctionIndex,
+ "stackrealign");
+
if (TrapFuncName.getNumOccurrences() > 0)
for (auto &B : F)
for (auto &I : B)
diff --git a/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h b/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h
index c44a7e0..40ec201 100644
--- a/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -40,22 +40,51 @@ class InstrItineraryData;
class DefaultVLIWScheduler;
class SUnit;
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
+// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
+//
+// e.g. terms x resource bit combinations that fit in uint32_t:
+// 4 terms x 8 bits = 32 bits
+// 3 terms x 10 bits = 30 bits
+// 2 terms x 16 bits = 32 bits
+//
+// e.g. terms x resource bit combinations that fit in uint64_t:
+// 8 terms x 8 bits = 64 bits
+// 7 terms x 9 bits = 63 bits
+// 6 terms x 10 bits = 60 bits
+// 5 terms x 12 bits = 60 bits
+// 4 terms x 16 bits = 64 bits <--- current
+// 3 terms x 21 bits = 63 bits
+// 2 terms x 32 bits = 64 bits
+//
+#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
+#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
+
+typedef uint64_t DFAInput;
+typedef int64_t DFAStateInput;
+#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
+// --------------------------------------------------------------------
+
class DFAPacketizer {
private:
- typedef std::pair<unsigned, unsigned> UnsignPair;
+ typedef std::pair<unsigned, DFAInput> UnsignPair;
+
const InstrItineraryData *InstrItins;
int CurrentState;
- const int (*DFAStateInputTable)[2];
+ const DFAStateInput (*DFAStateInputTable)[2];
const unsigned *DFAStateEntryTable;
// CachedTable is a map from <FromState, Input> to ToState.
DenseMap<UnsignPair, unsigned> CachedTable;
// ReadTable - Read the DFA transition table and update CachedTable.
- void ReadTable(unsigned int state);
+ void ReadTable(unsigned state);
public:
- DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+ DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
const unsigned *SET);
// Reset the current state to make all resources available.
@@ -63,6 +92,12 @@ public:
CurrentState = 0;
}
+ // getInsnInput - Return the DFAInput for an instruction class.
+ DFAInput getInsnInput(unsigned InsnClass);
+
+ // getInsnInput - Return the DFAInput for an instruction class input vector.
+ static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
+
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool canReserveResources(const llvm::MCInstrDesc *MID);
@@ -93,6 +128,7 @@ class VLIWPacketizerList {
protected:
MachineFunction &MF;
const TargetInstrInfo *TII;
+ AliasAnalysis *AA;
// The VLIW Scheduler.
DefaultVLIWScheduler *VLIWScheduler;
@@ -106,7 +142,9 @@ protected:
std::map<MachineInstr*, SUnit*> MIToSUnit;
public:
- VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, bool IsPostRA);
+ // The AliasAnalysis parameter can be nullptr.
+ VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ AliasAnalysis *AA);
virtual ~VLIWPacketizerList();
@@ -126,8 +164,10 @@ public:
return MII;
}
- // endPacket - End the current packet.
- void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
+ // End the current packet and reset the state of the packetizer.
+ // Overriding this function allows the target-specific packetizer
+ // to perform custom finalization.
+ virtual void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
// initPacketizerState - perform initialization before packetizing
// an instruction. This function is supposed to be overrided by
@@ -135,14 +175,24 @@ public:
virtual void initPacketizerState() { return; }
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- virtual bool ignorePseudoInstruction(MachineInstr *I,
- MachineBasicBlock *MBB) {
+ virtual bool ignorePseudoInstruction(const MachineInstr *I,
+ const MachineBasicBlock *MBB) {
return false;
}
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
- virtual bool isSoloInstruction(MachineInstr *MI) {
+ virtual bool isSoloInstruction(const MachineInstr *MI) {
+ return true;
+ }
+
+ // Check if the packetizer should try to add the given instruction to
+ // the current packet. One reasons for which it may not be desirable
+ // to include an instruction in the current packet could be that it
+ // would cause a stall.
+ // If this function returns "false", the current packet will be ended,
+ // and the instruction will be added to the next packet.
+ virtual bool shouldAddToPacket(const MachineInstr *MI) {
return true;
}
diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h
index f07712a..fa612d9 100644
--- a/contrib/llvm/include/llvm/CodeGen/DIE.h
+++ b/contrib/llvm/include/llvm/CodeGen/DIE.h
@@ -100,10 +100,8 @@ public:
///
void Emit(const AsmPrinter *AP) const;
-#ifndef NDEBUG
void print(raw_ostream &O);
void dump();
-#endif
};
//===--------------------------------------------------------------------===//
@@ -143,9 +141,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -164,9 +160,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -185,9 +179,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -203,9 +195,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -223,9 +213,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -252,9 +240,7 @@ public:
: sizeof(int32_t);
}
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -273,9 +259,7 @@ public:
return 8;
}
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -295,9 +279,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
@@ -444,10 +426,8 @@ public:
///
unsigned SizeOf(const AsmPrinter *AP) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
void dump() const;
-#endif
};
struct IntrusiveBackListNode {
@@ -566,64 +546,70 @@ class DIEValueList {
ListTy List;
public:
- bool empty() const { return List.empty(); }
-
- class const_iterator;
- class iterator
- : public iterator_adaptor_base<iterator, ListTy::iterator,
+ class const_value_iterator;
+ class value_iterator
+ : public iterator_adaptor_base<value_iterator, ListTy::iterator,
std::forward_iterator_tag, DIEValue> {
- friend class const_iterator;
- typedef iterator_adaptor_base<iterator, ListTy::iterator,
+ friend class const_value_iterator;
+ typedef iterator_adaptor_base<value_iterator, ListTy::iterator,
std::forward_iterator_tag,
DIEValue> iterator_adaptor;
public:
- iterator() = default;
- explicit iterator(ListTy::iterator X) : iterator_adaptor(X) {}
+ value_iterator() = default;
+ explicit value_iterator(ListTy::iterator X) : iterator_adaptor(X) {}
explicit operator bool() const { return bool(wrapped()); }
DIEValue &operator*() const { return wrapped()->V; }
};
- class const_iterator
- : public iterator_adaptor_base<const_iterator, ListTy::const_iterator,
- std::forward_iterator_tag,
- const DIEValue> {
- typedef iterator_adaptor_base<const_iterator, ListTy::const_iterator,
+ class const_value_iterator : public iterator_adaptor_base<
+ const_value_iterator, ListTy::const_iterator,
+ std::forward_iterator_tag, const DIEValue> {
+ typedef iterator_adaptor_base<const_value_iterator, ListTy::const_iterator,
std::forward_iterator_tag,
const DIEValue> iterator_adaptor;
public:
- const_iterator() = default;
- const_iterator(DIEValueList::iterator X) : iterator_adaptor(X.wrapped()) {}
- explicit const_iterator(ListTy::const_iterator X) : iterator_adaptor(X) {}
+ const_value_iterator() = default;
+ const_value_iterator(DIEValueList::value_iterator X)
+ : iterator_adaptor(X.wrapped()) {}
+ explicit const_value_iterator(ListTy::const_iterator X)
+ : iterator_adaptor(X) {}
explicit operator bool() const { return bool(wrapped()); }
const DIEValue &operator*() const { return wrapped()->V; }
};
- iterator insert(BumpPtrAllocator &Alloc, DIEValue V) {
+ typedef iterator_range<value_iterator> value_range;
+ typedef iterator_range<const_value_iterator> const_value_range;
+
+ value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue V) {
List.push_back(*new (Alloc) Node(V));
- return iterator(ListTy::toIterator(List.back()));
+ return value_iterator(ListTy::toIterator(List.back()));
}
- template <class... Ts>
- iterator emplace(BumpPtrAllocator &Alloc, Ts &&... Args) {
- return insert(Alloc, DIEValue(std::forward<Ts>(Args)...));
+ template <class T>
+ value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+ dwarf::Form Form, T &&Value) {
+ return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value)));
}
- iterator begin() { return iterator(List.begin()); }
- iterator end() { return iterator(List.end()); }
- const_iterator begin() const { return const_iterator(List.begin()); }
- const_iterator end() const { return const_iterator(List.end()); }
+ value_range values() {
+ return llvm::make_range(value_iterator(List.begin()),
+ value_iterator(List.end()));
+ }
+ const_value_range values() const {
+ return llvm::make_range(const_value_iterator(List.begin()),
+ const_value_iterator(List.end()));
+ }
};
//===--------------------------------------------------------------------===//
/// DIE - A structured debug information entry. Has an abbreviation which
/// describes its organization.
-class DIE : IntrusiveBackListNode {
+class DIE : IntrusiveBackListNode, public DIEValueList {
friend class IntrusiveBackList<DIE>;
-protected:
/// Offset - Offset in debug info section.
///
unsigned Offset;
@@ -643,14 +629,7 @@ protected:
DIE *Parent = nullptr;
- /// Attribute values.
- ///
- DIEValueList Values;
-
-protected:
- DIE() : Offset(0), Size(0) {}
-
-private:
+ DIE() = delete;
explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {}
public:
@@ -677,20 +656,6 @@ public:
return llvm::make_range(Children.begin(), Children.end());
}
- typedef DIEValueList::iterator value_iterator;
- typedef iterator_range<value_iterator> value_range;
-
- value_range values() {
- return llvm::make_range(Values.begin(), Values.end());
- }
-
- typedef DIEValueList::const_iterator const_value_iterator;
- typedef iterator_range<const_value_iterator> const_value_range;
-
- const_value_range values() const {
- return llvm::make_range(Values.begin(), Values.end());
- }
-
DIE *getParent() const { return Parent; }
/// Generate the abbreviation for this DIE.
@@ -711,17 +676,6 @@ public:
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
- /// addValue - Add a value and attributes to a DIE.
- ///
- value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue Value) {
- return Values.insert(Alloc, Value);
- }
- template <class T>
- value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
- dwarf::Form Form, T &&Value) {
- return Values.emplace(Alloc, Attribute, Form, std::forward<T>(Value));
- }
-
/// Add a child to the DIE.
DIE &addChild(DIE *Child) {
assert(!Child->getParent() && "Child should be orphaned");
@@ -736,16 +690,14 @@ public:
/// gives \a DIEValue::isNone) if no such attribute exists.
DIEValue findAttribute(dwarf::Attribute Attribute) const;
-#ifndef NDEBUG
void print(raw_ostream &O, unsigned IndentCount = 0) const;
void dump();
-#endif
};
//===--------------------------------------------------------------------===//
/// DIELoc - Represents an expression location.
//
-class DIELoc : public DIE {
+class DIELoc : public DIEValueList {
mutable unsigned Size; // Size in bytes excluding size header.
public:
@@ -773,15 +725,13 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
//===--------------------------------------------------------------------===//
/// DIEBlock - Represents a block of values.
//
-class DIEBlock : public DIE {
+class DIEBlock : public DIEValueList {
mutable unsigned Size; // Size in bytes excluding size header.
public:
@@ -806,9 +756,7 @@ public:
void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
-#ifndef NDEBUG
void print(raw_ostream &O) const;
-#endif
};
} // end llvm namespace
diff --git a/contrib/llvm/include/llvm/CodeGen/FastISel.h b/contrib/llvm/include/llvm/CodeGen/FastISel.h
index f04a7cd..cc4e370 100644
--- a/contrib/llvm/include/llvm/CodeGen/FastISel.h
+++ b/contrib/llvm/include/llvm/CodeGen/FastISel.h
@@ -419,11 +419,11 @@ protected:
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm1, uint64_t Imm2);
- /// \brief Emit a MachineInstr with two register operands and a result
+ /// \brief Emit a MachineInstr with a floating point immediate, and a result
/// register in the given register class.
- unsigned fastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, const ConstantFP *FPImm);
+ unsigned fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm);
/// \brief Emit a MachineInstr with two register operands, an immediate, and a
/// result register in the given register class.
@@ -432,23 +432,11 @@ protected:
bool Op0IsKill, unsigned Op1, bool Op1IsKill,
uint64_t Imm);
- /// \brief Emit a MachineInstr with two register operands, two immediates
- /// operands, and a result register in the given register class.
- unsigned fastEmitInst_rrii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, unsigned Op1, bool Op1IsKill,
- uint64_t Imm1, uint64_t Imm2);
-
/// \brief Emit a MachineInstr with a single immediate operand, and a result
/// register in the given register class.
unsigned fastEmitInst_i(unsigned MachineInstrOpcode,
const TargetRegisterClass *RC, uint64_t Imm);
- /// \brief Emit a MachineInstr with a two immediate operands.
- unsigned fastEmitInst_ii(unsigned MachineInstrOpcode,
- const TargetRegisterClass *RC, uint64_t Imm1,
- uint64_t Imm2);
-
/// \brief Emit a MachineInstr for an extract_subreg from a specified index of
/// a superregister to a specified type.
unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill,
@@ -462,6 +450,11 @@ protected:
/// immediate (fall-through) successor, and update the CFG.
void fastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
+ /// Emit an unconditional branch to \p FalseMBB, obtains the branch weight
+ /// and adds TrueMBB and FalseMBB to the successor list.
+ void finishCondBranch(const BasicBlock *BranchBB, MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB);
+
/// \brief Update the value map to include the new mapping for this
/// instruction, or insert an extra copy to get the result in a previous
/// determined register.
@@ -566,6 +559,9 @@ private:
/// across heavy instructions like calls.
void flushLocalValueMap();
+ /// \brief Removes dead local value instructions after SavedLastLocalvalue.
+ void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);
+
/// \brief Insertion point before trying to select the current instruction.
MachineBasicBlock::iterator SavedInsertPt;
diff --git a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 82c762e..09a9991 100644
--- a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -62,6 +62,9 @@ public:
/// registers.
bool CanLowerReturn;
+ /// True if part of the CSRs will be handled via explicit copies.
+ bool SplitCSR;
+
/// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
/// allocated to hold a pointer to the hidden sret parameter.
unsigned DemoteRegister;
@@ -72,7 +75,10 @@ public:
/// ValueMap - Since we emit code for the function a basic block at a time,
/// we must remember which virtual registers hold the values for
/// cross-basic-block values.
- DenseMap<const Value*, unsigned> ValueMap;
+ DenseMap<const Value *, unsigned> ValueMap;
+
+ /// Track virtual registers created for exception pointers.
+ DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
// Keep track of frame indices allocated for statepoints as they could be used
// across basic block boundaries.
@@ -99,7 +105,7 @@ public:
/// RegFixups - Registers which need to be replaced after isel is done.
DenseMap<unsigned, unsigned> RegFixups;
- /// StatepointStackSlots - A list of temporary stack slots (frame indices)
+ /// StatepointStackSlots - A list of temporary stack slots (frame indices)
/// used to spill values at a statepoint. We store them here to enable
/// reuse of the same stack slots across different statepoints in different
/// basic blocks.
@@ -111,11 +117,6 @@ public:
/// MBB - The current insert position inside the current block.
MachineBasicBlock::iterator InsertPt;
-#ifndef NDEBUG
- SmallPtrSet<const Instruction *, 8> CatchInfoLost;
- SmallPtrSet<const Instruction *, 8> CatchInfoFound;
-#endif
-
struct LiveOutInfo {
unsigned NumSignBits : 31;
bool IsValid : 1;
@@ -161,10 +162,13 @@ public:
}
unsigned CreateReg(MVT VT);
-
+
unsigned CreateRegs(Type *Ty);
-
+
unsigned InitializeRegForValue(const Value *V) {
+ // Tokens never live in vregs.
+ if (V->getType()->isTokenTy())
+ return 0;
unsigned &R = ValueMap[V];
assert(R == 0 && "Already initialized this value register!");
return R = CreateRegs(V->getType());
@@ -231,6 +235,9 @@ public:
/// getArgumentFrameIndex - Get frame index for the byval argument.
int getArgumentFrameIndex(const Argument *A);
+ unsigned getCatchPadExceptionPointerVReg(const Value *CPI,
+ const TargetRegisterClass *RC);
+
private:
void addSEHHandlersForLPads(ArrayRef<const LandingPadInst *> LPads);
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
index e883bd1..163117b 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
@@ -160,9 +160,9 @@ class GCModuleInfo : public ImmutablePass {
public:
/// Lookup the GCStrategy object associated with the given gc name.
/// Objects are owned internally; No caller should attempt to delete the
- /// returned objects.
+ /// returned objects.
GCStrategy *getGCStrategy(const StringRef Name);
-
+
/// List of per function info objects. In theory, Each of these
/// may be associated with a different GC.
typedef std::vector<std::unique_ptr<GCFunctionInfo>> FuncInfoVec;
diff --git a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
index a1b8e89..3088a86 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
@@ -117,11 +117,11 @@ public:
/** @name Statepoint Specific Properties */
///@{
- /// If the value specified can be reliably distinguished, returns true for
+ /// If the type specified can be reliably distinguished, returns true for
/// pointers to GC managed locations and false for pointers to non-GC
/// managed locations. Note a GCStrategy can always return 'None' (i.e. an
/// empty optional indicating it can't reliably distinguish.
- virtual Optional<bool> isGCManagedPointer(const Value *V) const {
+ virtual Optional<bool> isGCManagedPointer(const Type *Ty) const {
return None;
}
///@}
diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
index fa44301..158ff3c 100644
--- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -108,6 +108,10 @@ namespace ISD {
/// and returns an outchain.
EH_SJLJ_LONGJMP,
+ /// OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN)
+ /// The target initializes the dispatch table here.
+ EH_SJLJ_SETUP_DISPATCH,
+
/// TargetConstant* - Like Constant*, but the DAG does not do any folding,
/// simplification, or lowering of the constant. They are used for constants
/// which are known to fit in the immediate fields of their users, or for
@@ -332,7 +336,7 @@ namespace ISD {
SHL, SRA, SRL, ROTL, ROTR,
/// Byte Swap and Counting operators.
- BSWAP, CTTZ, CTLZ, CTPOP,
+ BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
/// Bit counting operators with an undefined result for zero inputs.
CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
@@ -364,9 +368,14 @@ namespace ISD {
/// then the result type must also be a vector type.
SETCC,
+ /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
+ /// op #2 is a *carry value*. This operator checks the result of
+ /// "LHS - RHS - Carry", and can be used to compare two wide integers:
+ /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers.
+ SETCCE,
+
/// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
- /// integer shift operations, just like ADD/SUB_PARTS. The operation
- /// ordering is:
+ /// integer shift operations. The operation ordering is:
/// [Lo,Hi] = op [LoLHS,HiLHS], Amt
SHL_PARTS, SRA_PARTS, SRL_PARTS,
@@ -506,7 +515,15 @@ namespace ISD {
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
FLOG, FLOG2, FLOG10, FEXP, FEXP2,
FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
+ /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
+ /// values.
+ /// In the case where a single input is NaN, the non-NaN input is returned.
+ ///
+ /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
FMINNUM, FMAXNUM,
+ /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that
+ /// when a single input is NaN, NaN is returned.
+ FMINNAN, FMAXNAN,
/// FSINCOS - Compute both fsin and fcos as a single operation.
FSINCOS,
@@ -575,6 +592,18 @@ namespace ISD {
/// take a chain as input and return a chain.
EH_LABEL,
+ /// CATCHPAD - Represents a catchpad instruction.
+ CATCHPAD,
+
+ /// CATCHRET - Represents a return from a catch block funclet. Used for
+ /// MSVC compatible exception handling. Takes a chain operand and a
+ /// destination basic block operand.
+ CATCHRET,
+
+ /// CLEANUPRET - Represents a return from a cleanup block funclet. Used for
+ /// MSVC compatible exception handling. Takes only a chain operand.
+ CLEANUPRET,
+
/// STACKSAVE - STACKSAVE has one operand, an input chain. It produces a
/// value, the same type as the pointer type for the system, and an output
/// chain.
@@ -618,9 +647,11 @@ namespace ISD {
PCMARKER,
/// READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
- /// The only operand is a chain and a value and a chain are produced. The
- /// value is the contents of the architecture specific cycle counter like
- /// register (or other high accuracy low latency clock source)
+ /// It produces a chain and one i64 value. The only operand is a chain.
+ /// If i64 is not legal, the result will be expanded into smaller values.
+ /// Still, it returns an i64, so targets should set legality for i64.
+ /// The result is the content of the architecture-specific cycle
+ /// counter-like register (or other high accuracy low latency clock source).
READCYCLECOUNTER,
/// HANDLENODE node - Used as a handle for various purposes.
@@ -719,6 +750,12 @@ namespace ISD {
GC_TRANSITION_START,
GC_TRANSITION_END,
+ /// GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of
+ /// the most recent dynamic alloca. For most targets that would be 0, but
+ /// for some others (e.g. PowerPC, PowerPC64) that would be compile-time
+ /// known nonzero constant. The only operand here is the chain.
+ GET_DYNAMIC_AREA_OFFSET,
+
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
BUILTIN_OP_END
diff --git a/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h
index 9e6ab7d..a404b9b 100644
--- a/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h
@@ -19,41 +19,40 @@
#include "llvm/IR/Intrinsics.h"
namespace llvm {
- class CallInst;
- class Module;
- class DataLayout;
-
- class IntrinsicLowering {
- const DataLayout& DL;
-
-
- bool Warned;
- public:
- explicit IntrinsicLowering(const DataLayout &DL) :
- DL(DL), Warned(false) {}
-
- /// AddPrototypes - This method, if called, causes all of the prototypes
- /// that might be needed by an intrinsic lowering implementation to be
- /// inserted into the module specified.
- void AddPrototypes(Module &M);
-
- /// LowerIntrinsicCall - This method replaces a call with the LLVM function
- /// which should be used to implement the specified intrinsic function call.
- /// If an intrinsic function must be implemented by the code generator
- /// (such as va_start), this function should print a message and abort.
- ///
- /// Otherwise, if an intrinsic function call can be lowered, the code to
- /// implement it (often a call to a non-intrinsic function) is inserted
- /// _after_ the call instruction and the call is deleted. The caller must
- /// be capable of handling this kind of change.
- ///
- void LowerIntrinsicCall(CallInst *CI);
-
- /// LowerToByteSwap - Replace a call instruction into a call to bswap
- /// intrinsic. Return false if it has determined the call is not a
- /// simple integer bswap.
- static bool LowerToByteSwap(CallInst *CI);
- };
+class CallInst;
+class Module;
+class DataLayout;
+
+class IntrinsicLowering {
+ const DataLayout &DL;
+
+ bool Warned;
+
+public:
+ explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
+
+ /// AddPrototypes - This method, if called, causes all of the prototypes
+ /// that might be needed by an intrinsic lowering implementation to be
+ /// inserted into the module specified.
+ void AddPrototypes(Module &M);
+
+ /// LowerIntrinsicCall - This method replaces a call with the LLVM function
+ /// which should be used to implement the specified intrinsic function call.
+ /// If an intrinsic function must be implemented by the code generator
+ /// (such as va_start), this function should print a message and abort.
+ ///
+ /// Otherwise, if an intrinsic function call can be lowered, the code to
+ /// implement it (often a call to a non-intrinsic function) is inserted
+ /// _after_ the call instruction and the call is deleted. The caller must
+ /// be capable of handling this kind of change.
+ ///
+ void LowerIntrinsicCall(CallInst *CI);
+
+ /// LowerToByteSwap - Replace a call instruction into a call to bswap
+ /// intrinsic. Return false if it has determined the call is not a
+ /// simple integer bswap.
+ static bool LowerToByteSwap(CallInst *CI);
+};
}
#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
index 9b8b91c..0157bf9 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <climits>
#include <set>
@@ -595,15 +596,15 @@ namespace llvm {
class SubRange : public LiveRange {
public:
SubRange *Next;
- unsigned LaneMask;
+ LaneBitmask LaneMask;
/// Constructs a new SubRange object.
- SubRange(unsigned LaneMask)
+ SubRange(LaneBitmask LaneMask)
: Next(nullptr), LaneMask(LaneMask) {
}
/// Constructs a new SubRange object by copying liveness from @p Other.
- SubRange(unsigned LaneMask, const LiveRange &Other,
+ SubRange(LaneBitmask LaneMask, const LiveRange &Other,
BumpPtrAllocator &Allocator)
: LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) {
}
@@ -677,7 +678,8 @@ namespace llvm {
/// Creates a new empty subregister live range. The range is added at the
/// beginning of the subrange list; subrange iterators stay valid.
- SubRange *createSubRange(BumpPtrAllocator &Allocator, unsigned LaneMask) {
+ SubRange *createSubRange(BumpPtrAllocator &Allocator,
+ LaneBitmask LaneMask) {
SubRange *Range = new (Allocator) SubRange(LaneMask);
appendSubRange(Range);
return Range;
@@ -685,7 +687,8 @@ namespace llvm {
/// Like createSubRange() but the new range is filled with a copy of the
/// liveness information in @p CopyFrom.
- SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator, unsigned LaneMask,
+ SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator,
+ LaneBitmask LaneMask,
const LiveRange &CopyFrom) {
SubRange *Range = new (Allocator) SubRange(LaneMask, CopyFrom, Allocator);
appendSubRange(Range);
@@ -842,11 +845,6 @@ namespace llvm {
LiveIntervals &LIS;
IntEqClasses EqClass;
- // Note that values a and b are connected.
- void Connect(unsigned a, unsigned b);
-
- unsigned Renumber();
-
public:
explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {}
@@ -858,12 +856,12 @@ namespace llvm {
/// the equivalence class assigned the VNI.
unsigned getEqClass(const VNInfo *VNI) const { return EqClass[VNI->id]; }
- /// Distribute - Distribute values in LIV[0] into a separate LiveInterval
- /// for each connected component. LIV must have a LiveInterval for each
- /// connected component. The LiveIntervals in Liv[1..] must be empty.
- /// Instructions using LIV[0] are rewritten.
- void Distribute(LiveInterval *LIV[], MachineRegisterInfo &MRI);
-
+ /// Distribute values in \p LI into a separate LiveIntervals
+ /// for each connected component. LIV must have an empty LiveInterval for
+ /// each additional connected component. The first connected component is
+ /// left in \p LI.
+ void Distribute(LiveInterval &LI, LiveInterval *LIV[],
+ MachineRegisterInfo &MRI);
};
}
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 9673f80..87421e2 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -36,7 +37,6 @@ namespace llvm {
extern cl::opt<bool> UseSegmentSetForPhysRegs;
- class AliasAnalysis;
class BitVector;
class BlockFrequency;
class LiveRangeCalc;
@@ -147,13 +147,12 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg,
MachineInstr* startInst);
- /// shrinkToUses - After removing some uses of a register, shrink its live
- /// range to just the remaining uses. This method does not compute reaching
- /// defs for new uses, and it doesn't remove dead defs.
- /// Dead PHIDef values are marked as unused.
- /// New dead machine instructions are added to the dead vector.
- /// Return true if the interval may have been separated into multiple
- /// connected components.
+ /// After removing some uses of a register, shrink its live range to just
+ /// the remaining uses. This method does not compute reaching defs for new
+ /// uses, and it doesn't remove dead defs.
+ /// Dead PHIDef values are marked as unused. New dead machine instructions
+ /// are added to the dead vector. Returns true if the interval may have been
+ /// separated into multiple connected components.
bool shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead = nullptr);
@@ -161,6 +160,8 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
/// shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead)
/// that works on a subregister live range and only looks at uses matching
/// the lane mask of the subregister range.
+ /// This may leave the subrange empty which needs to be cleaned up with
+ /// LiveInterval::removeEmptySubranges() afterwards.
void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg);
/// extendToIndices - Extend the live range of LI to reach all points in
@@ -257,11 +258,6 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
Indexes->replaceMachineInstrInMaps(MI, NewMI);
}
- bool findLiveInMBBs(SlotIndex Start, SlotIndex End,
- SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
- return Indexes->findLiveInMBBs(Start, End, MBBs);
- }
-
VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -406,6 +402,10 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
/// that start at position @p Pos.
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos);
+ /// Split separate components in LiveInterval \p LI into separate intervals.
+ void splitSeparateComponents(LiveInterval &LI,
+ SmallVectorImpl<LiveInterval*> &SplitLIs);
+
private:
/// Compute live intervals for all virtual registers.
void computeVirtRegs();
@@ -440,7 +440,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
void repairOldRegInRange(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
const SlotIndex endIdx, LiveRange &LR,
- unsigned Reg, unsigned LaneMask = ~0u);
+ unsigned Reg, LaneBitmask LaneMask = ~0u);
class HMEditor;
};
diff --git a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 6475e7b..3bdf5ae 100644
--- a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -109,7 +109,7 @@ public:
/// \brief Simulates liveness when stepping forward over an
/// instruction(bundle): Remove killed-uses, add defs. This is the not
/// recommended way, because it depends on accurate kill flags. If possible
- /// use stepBackwards() instead of this function.
+ /// use stepBackward() instead of this function.
/// The clobbers set will be the list of registers either defined or clobbered
/// by a regmask. The operand will identify whether this is a regmask or
/// register operand.
@@ -122,9 +122,9 @@ public:
void addLiveIns(const MachineBasicBlock *MBB, bool AddPristines = false);
/// \brief Adds all live-out registers of basic block @p MBB; After prologue/
- /// epilogue insertion \p AddPristines should be set to true to insert the
- /// pristine registers.
- void addLiveOuts(const MachineBasicBlock *MBB, bool AddPristines = false);
+ /// epilogue insertion \p AddPristinesAndCSRs should be set to true.
+ void addLiveOuts(const MachineBasicBlock *MBB,
+ bool AddPristinesAndCSRs = false);
typedef SparseSet<unsigned>::const_iterator const_iterator;
const_iterator begin() const { return LiveRegs.begin(); }
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index c97c636..2271e33 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -28,7 +29,6 @@
namespace llvm {
-class AliasAnalysis;
class LiveIntervals;
class MachineBlockFrequencyInfo;
class MachineLoopInfo;
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/contrib/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index 86a0c7b..e169058 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -32,13 +32,11 @@ namespace llvm {
class LiveInterval;
class LiveIntervalAnalysis;
-class MachineRegisterInfo;
class TargetRegisterInfo;
class VirtRegMap;
class LiveRegMatrix : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
LiveIntervals *LIS;
VirtRegMap *VRM;
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h
index f495507..3ffbe3d 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -25,76 +25,74 @@
namespace llvm {
- class LiveStacks : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
-
- /// Special pool allocator for VNInfo's (LiveInterval val#).
- ///
- VNInfo::Allocator VNInfoAllocator;
-
- /// S2IMap - Stack slot indices to live interval mapping.
- ///
- typedef std::unordered_map<int, LiveInterval> SS2IntervalMap;
- SS2IntervalMap S2IMap;
-
- /// S2RCMap - Stack slot indices to register class mapping.
- std::map<int, const TargetRegisterClass*> S2RCMap;
-
- public:
- static char ID; // Pass identification, replacement for typeid
- LiveStacks() : MachineFunctionPass(ID) {
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- }
-
- typedef SS2IntervalMap::iterator iterator;
- typedef SS2IntervalMap::const_iterator const_iterator;
- const_iterator begin() const { return S2IMap.begin(); }
- const_iterator end() const { return S2IMap.end(); }
- iterator begin() { return S2IMap.begin(); }
- iterator end() { return S2IMap.end(); }
-
- unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
-
- LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
-
- LiveInterval &getInterval(int Slot) {
- assert(Slot >= 0 && "Spill slot indice must be >= 0");
- SS2IntervalMap::iterator I = S2IMap.find(Slot);
- assert(I != S2IMap.end() && "Interval does not exist for stack slot");
- return I->second;
- }
-
- const LiveInterval &getInterval(int Slot) const {
- assert(Slot >= 0 && "Spill slot indice must be >= 0");
- SS2IntervalMap::const_iterator I = S2IMap.find(Slot);
- assert(I != S2IMap.end() && "Interval does not exist for stack slot");
- return I->second;
- }
-
- bool hasInterval(int Slot) const {
- return S2IMap.count(Slot);
- }
-
- const TargetRegisterClass *getIntervalRegClass(int Slot) const {
- assert(Slot >= 0 && "Spill slot indice must be >= 0");
- std::map<int, const TargetRegisterClass*>::const_iterator
- I = S2RCMap.find(Slot);
- assert(I != S2RCMap.end() &&
- "Register class info does not exist for stack slot");
- return I->second;
- }
-
- VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void releaseMemory() override;
-
- /// runOnMachineFunction - pass entry point
- bool runOnMachineFunction(MachineFunction&) override;
-
- /// print - Implement the dump method.
- void print(raw_ostream &O, const Module* = nullptr) const override;
- };
+class LiveStacks : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+
+ /// Special pool allocator for VNInfo's (LiveInterval val#).
+ ///
+ VNInfo::Allocator VNInfoAllocator;
+
+ /// S2IMap - Stack slot indices to live interval mapping.
+ ///
+ typedef std::unordered_map<int, LiveInterval> SS2IntervalMap;
+ SS2IntervalMap S2IMap;
+
+ /// S2RCMap - Stack slot indices to register class mapping.
+ std::map<int, const TargetRegisterClass *> S2RCMap;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ LiveStacks() : MachineFunctionPass(ID) {
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ }
+
+ typedef SS2IntervalMap::iterator iterator;
+ typedef SS2IntervalMap::const_iterator const_iterator;
+ const_iterator begin() const { return S2IMap.begin(); }
+ const_iterator end() const { return S2IMap.end(); }
+ iterator begin() { return S2IMap.begin(); }
+ iterator end() { return S2IMap.end(); }
+
+ unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
+
+ LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
+
+ LiveInterval &getInterval(int Slot) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ assert(I != S2IMap.end() && "Interval does not exist for stack slot");
+ return I->second;
+ }
+
+ const LiveInterval &getInterval(int Slot) const {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::const_iterator I = S2IMap.find(Slot);
+ assert(I != S2IMap.end() && "Interval does not exist for stack slot");
+ return I->second;
+ }
+
+ bool hasInterval(int Slot) const { return S2IMap.count(Slot); }
+
+ const TargetRegisterClass *getIntervalRegClass(int Slot) const {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ std::map<int, const TargetRegisterClass *>::const_iterator I =
+ S2RCMap.find(Slot);
+ assert(I != S2RCMap.end() &&
+ "Register class info does not exist for stack slot");
+ return I->second;
+ }
+
+ VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction &) override;
+
+ /// print - Implement the dump method.
+ void print(raw_ostream &O, const Module * = nullptr) const override;
+};
}
#endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
index 67b756d..a569d5e 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -1,4 +1,4 @@
-//===- MIRParser.h - MIR serialization format parser ----------------------===//
+//===- MIRParser.h - MIR serialization format parser ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -37,7 +37,7 @@ class MIRParser : public MachineFunctionInitializer {
public:
MIRParser(std::unique_ptr<MIRParserImpl> Impl);
MIRParser(const MIRParser &) = delete;
- ~MIRParser();
+ ~MIRParser() override;
/// Parse the optional LLVM IR module that's embedded in the MIR file.
///
@@ -78,4 +78,4 @@ createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context);
} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 9798e5c..14d3744 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -19,6 +19,7 @@
#define LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Support/YAMLTraits.h"
#include <vector>
@@ -72,54 +73,109 @@ template <> struct ScalarTraits<FlowStringValue> {
static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
};
+struct BlockStringValue {
+ StringValue Value;
+};
+
+template <> struct BlockScalarTraits<BlockStringValue> {
+ static void output(const BlockStringValue &S, void *Ctx, raw_ostream &OS) {
+ return ScalarTraits<StringValue>::output(S.Value, Ctx, OS);
+ }
+
+ static StringRef input(StringRef Scalar, void *Ctx, BlockStringValue &S) {
+ return ScalarTraits<StringValue>::input(Scalar, Ctx, S.Value);
+ }
+};
+
+/// A wrapper around unsigned which contains a source range that's being set
+/// during parsing.
+struct UnsignedValue {
+ unsigned Value;
+ SMRange SourceRange;
+
+ UnsignedValue() : Value(0) {}
+ UnsignedValue(unsigned Value) : Value(Value) {}
+
+ bool operator==(const UnsignedValue &Other) const {
+ return Value == Other.Value;
+ }
+};
+
+template <> struct ScalarTraits<UnsignedValue> {
+ static void output(const UnsignedValue &Value, void *Ctx, raw_ostream &OS) {
+ return ScalarTraits<unsigned>::output(Value.Value, Ctx, OS);
+ }
+
+ static StringRef input(StringRef Scalar, void *Ctx, UnsignedValue &Value) {
+ if (const auto *Node =
+ reinterpret_cast<yaml::Input *>(Ctx)->getCurrentNode())
+ Value.SourceRange = Node->getSourceRange();
+ return ScalarTraits<unsigned>::input(Scalar, Ctx, Value.Value);
+ }
+
+ static bool mustQuote(StringRef Scalar) {
+ return ScalarTraits<unsigned>::mustQuote(Scalar);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<MachineJumpTableInfo::JTEntryKind> {
+ static void enumeration(yaml::IO &IO,
+ MachineJumpTableInfo::JTEntryKind &EntryKind) {
+ IO.enumCase(EntryKind, "block-address",
+ MachineJumpTableInfo::EK_BlockAddress);
+ IO.enumCase(EntryKind, "gp-rel64-block-address",
+ MachineJumpTableInfo::EK_GPRel64BlockAddress);
+ IO.enumCase(EntryKind, "gp-rel32-block-address",
+ MachineJumpTableInfo::EK_GPRel32BlockAddress);
+ IO.enumCase(EntryKind, "label-difference32",
+ MachineJumpTableInfo::EK_LabelDifference32);
+ IO.enumCase(EntryKind, "inline", MachineJumpTableInfo::EK_Inline);
+ IO.enumCase(EntryKind, "custom32", MachineJumpTableInfo::EK_Custom32);
+ }
+};
+
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::StringValue)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::FlowStringValue)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::UnsignedValue)
namespace llvm {
namespace yaml {
struct VirtualRegisterDefinition {
- unsigned ID;
+ UnsignedValue ID;
StringValue Class;
- // TODO: Serialize the virtual register hints.
+ StringValue PreferredRegister;
+ // TODO: Serialize the target specific register hints.
};
template <> struct MappingTraits<VirtualRegisterDefinition> {
static void mapping(IO &YamlIO, VirtualRegisterDefinition &Reg) {
YamlIO.mapRequired("id", Reg.ID);
YamlIO.mapRequired("class", Reg.Class);
+ YamlIO.mapOptional("preferred-register", Reg.PreferredRegister,
+ StringValue()); // Don't print out when it's empty.
}
static const bool flow = true;
};
-struct MachineBasicBlock {
- unsigned ID;
- StringValue Name;
- unsigned Alignment = 0;
- bool IsLandingPad = false;
- bool AddressTaken = false;
- // TODO: Serialize the successor weights.
- std::vector<FlowStringValue> Successors;
- std::vector<FlowStringValue> LiveIns;
- std::vector<StringValue> Instructions;
+struct MachineFunctionLiveIn {
+ StringValue Register;
+ StringValue VirtualRegister;
};
-template <> struct MappingTraits<MachineBasicBlock> {
- static void mapping(IO &YamlIO, MachineBasicBlock &MBB) {
- YamlIO.mapRequired("id", MBB.ID);
- YamlIO.mapOptional("name", MBB.Name,
- StringValue()); // Don't print out an empty name.
- YamlIO.mapOptional("alignment", MBB.Alignment);
- YamlIO.mapOptional("isLandingPad", MBB.IsLandingPad);
- YamlIO.mapOptional("addressTaken", MBB.AddressTaken);
- YamlIO.mapOptional("successors", MBB.Successors);
- YamlIO.mapOptional("liveins", MBB.LiveIns);
- YamlIO.mapOptional("instructions", MBB.Instructions);
+template <> struct MappingTraits<MachineFunctionLiveIn> {
+ static void mapping(IO &YamlIO, MachineFunctionLiveIn &LiveIn) {
+ YamlIO.mapRequired("reg", LiveIn.Register);
+ YamlIO.mapOptional(
+ "virtual-reg", LiveIn.VirtualRegister,
+ StringValue()); // Don't print the virtual register when it's empty.
}
+
+ static const bool flow = true;
};
/// Serializable representation of stack object from the MachineFrameInfo class.
@@ -128,16 +184,21 @@ template <> struct MappingTraits<MachineBasicBlock> {
/// determined by the object's type and frame information flags.
/// Dead stack objects aren't serialized.
///
-/// TODO: Determine isPreallocated flag by mapping between objects and local
-/// objects (Serialize local objects).
+/// The 'isPreallocated' flag is determined by the local offset.
struct MachineStackObject {
enum ObjectType { DefaultType, SpillSlot, VariableSized };
- // TODO: Serialize LLVM alloca reference.
- unsigned ID;
+ UnsignedValue ID;
+ StringValue Name;
+ // TODO: Serialize unnamed LLVM alloca reference.
ObjectType Type = DefaultType;
int64_t Offset = 0;
uint64_t Size = 0;
unsigned Alignment = 0;
+ StringValue CalleeSavedRegister;
+ Optional<int64_t> LocalOffset;
+ StringValue DebugVar;
+ StringValue DebugExpr;
+ StringValue DebugLoc;
};
template <> struct ScalarEnumerationTraits<MachineStackObject::ObjectType> {
@@ -151,6 +212,8 @@ template <> struct ScalarEnumerationTraits<MachineStackObject::ObjectType> {
template <> struct MappingTraits<MachineStackObject> {
static void mapping(yaml::IO &YamlIO, MachineStackObject &Object) {
YamlIO.mapRequired("id", Object.ID);
+ YamlIO.mapOptional("name", Object.Name,
+ StringValue()); // Don't print out an empty name.
YamlIO.mapOptional(
"type", Object.Type,
MachineStackObject::DefaultType); // Don't print the default type.
@@ -158,6 +221,15 @@ template <> struct MappingTraits<MachineStackObject> {
if (Object.Type != MachineStackObject::VariableSized)
YamlIO.mapRequired("size", Object.Size);
YamlIO.mapOptional("alignment", Object.Alignment);
+ YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
+ StringValue()); // Don't print it out when it's empty.
+ YamlIO.mapOptional("local-offset", Object.LocalOffset);
+ YamlIO.mapOptional("di-variable", Object.DebugVar,
+ StringValue()); // Don't print it out when it's empty.
+ YamlIO.mapOptional("di-expression", Object.DebugExpr,
+ StringValue()); // Don't print it out when it's empty.
+ YamlIO.mapOptional("di-location", Object.DebugLoc,
+ StringValue()); // Don't print it out when it's empty.
}
static const bool flow = true;
@@ -167,13 +239,14 @@ template <> struct MappingTraits<MachineStackObject> {
/// MachineFrameInfo class.
struct FixedMachineStackObject {
enum ObjectType { DefaultType, SpillSlot };
- unsigned ID;
+ UnsignedValue ID;
ObjectType Type = DefaultType;
int64_t Offset = 0;
uint64_t Size = 0;
unsigned Alignment = 0;
bool IsImmutable = false;
bool IsAliased = false;
+ StringValue CalleeSavedRegister;
};
template <>
@@ -198,22 +271,64 @@ template <> struct MappingTraits<FixedMachineStackObject> {
YamlIO.mapOptional("isImmutable", Object.IsImmutable);
YamlIO.mapOptional("isAliased", Object.IsAliased);
}
+ YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
+ StringValue()); // Don't print it out when it's empty.
}
static const bool flow = true;
};
+struct MachineConstantPoolValue {
+ UnsignedValue ID;
+ StringValue Value;
+ unsigned Alignment = 0;
+};
+
+template <> struct MappingTraits<MachineConstantPoolValue> {
+ static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) {
+ YamlIO.mapRequired("id", Constant.ID);
+ YamlIO.mapOptional("value", Constant.Value);
+ YamlIO.mapOptional("alignment", Constant.Alignment);
+ }
+};
+
+struct MachineJumpTable {
+ struct Entry {
+ UnsignedValue ID;
+ std::vector<FlowStringValue> Blocks;
+ };
+
+ MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32;
+ std::vector<Entry> Entries;
+};
+
+template <> struct MappingTraits<MachineJumpTable::Entry> {
+ static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) {
+ YamlIO.mapRequired("id", Entry.ID);
+ YamlIO.mapOptional("blocks", Entry.Blocks);
+ }
+};
+
} // end namespace yaml
} // end namespace llvm
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineFunctionLiveIn)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::VirtualRegisterDefinition)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineBasicBlock)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineStackObject)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineJumpTable::Entry)
namespace llvm {
namespace yaml {
+template <> struct MappingTraits<MachineJumpTable> {
+ static void mapping(IO &YamlIO, MachineJumpTable &JT) {
+ YamlIO.mapRequired("kind", JT.Kind);
+ YamlIO.mapOptional("entries", JT.Entries);
+ }
+};
+
/// Serializable representation of MachineFrameInfo.
///
/// Doesn't serialize attributes like 'StackAlignment', 'IsStackRealignable' and
@@ -231,14 +346,14 @@ struct MachineFrameInfo {
unsigned MaxAlignment = 0;
bool AdjustsStack = false;
bool HasCalls = false;
- // TODO: Serialize StackProtectorIdx and FunctionContextIdx
+ StringValue StackProtector;
+ // TODO: Serialize FunctionContextIdx
unsigned MaxCallFrameSize = 0;
- // TODO: Serialize callee saved info.
- // TODO: Serialize local frame objects.
bool HasOpaqueSPAdjustment = false;
bool HasVAStart = false;
bool HasMustTailInVarArgFunc = false;
- // TODO: Serialize save and restore MBB references.
+ StringValue SavePoint;
+ StringValue RestorePoint;
};
template <> struct MappingTraits<MachineFrameInfo> {
@@ -252,10 +367,16 @@ template <> struct MappingTraits<MachineFrameInfo> {
YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment);
YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack);
YamlIO.mapOptional("hasCalls", MFI.HasCalls);
+ YamlIO.mapOptional("stackProtector", MFI.StackProtector,
+ StringValue()); // Don't print it out when it's empty.
YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize);
YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment);
YamlIO.mapOptional("hasVAStart", MFI.HasVAStart);
YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc);
+ YamlIO.mapOptional("savePoint", MFI.SavePoint,
+ StringValue()); // Don't print it out when it's empty.
+ YamlIO.mapOptional("restorePoint", MFI.RestorePoint,
+ StringValue()); // Don't print it out when it's empty.
}
};
@@ -269,14 +390,16 @@ struct MachineFunction {
bool TracksRegLiveness = false;
bool TracksSubRegLiveness = false;
std::vector<VirtualRegisterDefinition> VirtualRegisters;
+ std::vector<MachineFunctionLiveIn> LiveIns;
+ Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
// TODO: Serialize the various register masks.
- // TODO: Serialize live in registers.
// Frame information
MachineFrameInfo FrameInfo;
std::vector<FixedMachineStackObject> FixedStackObjects;
std::vector<MachineStackObject> StackObjects;
-
- std::vector<MachineBasicBlock> BasicBlocks;
+ std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
+ MachineJumpTable JumpTableInfo;
+ BlockStringValue Body;
};
template <> struct MappingTraits<MachineFunction> {
@@ -289,10 +412,15 @@ template <> struct MappingTraits<MachineFunction> {
YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness);
YamlIO.mapOptional("tracksSubRegLiveness", MF.TracksSubRegLiveness);
YamlIO.mapOptional("registers", MF.VirtualRegisters);
+ YamlIO.mapOptional("liveins", MF.LiveIns);
+ YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters);
YamlIO.mapOptional("frameInfo", MF.FrameInfo);
YamlIO.mapOptional("fixedStack", MF.FixedStackObjects);
YamlIO.mapOptional("stack", MF.StackObjects);
- YamlIO.mapOptional("body", MF.BasicBlocks);
+ YamlIO.mapOptional("constants", MF.Constants);
+ if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty())
+ YamlIO.mapOptional("jumpTable", MF.JumpTableInfo);
+ YamlIO.mapOptional("body", MF.Body);
}
};
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 5e5f45c..3d58c49 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -16,6 +16,8 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/DataTypes.h"
#include <functional>
@@ -25,11 +27,15 @@ class Pass;
class BasicBlock;
class MachineFunction;
class MCSymbol;
+class MIPrinter;
class SlotIndexes;
class StringRef;
class raw_ostream;
class MachineBranchProbabilityInfo;
+// Forward declaration to avoid circular include problem with TargetRegisterInfo
+typedef unsigned LaneBitmask;
+
template <>
struct ilist_traits<MachineInstr> : public ilist_default_traits<MachineInstr> {
private:
@@ -52,57 +58,76 @@ public:
void addNodeToList(MachineInstr* N);
void removeNodeFromList(MachineInstr* N);
void transferNodesFromList(ilist_traits &SrcTraits,
- ilist_iterator<MachineInstr> first,
- ilist_iterator<MachineInstr> last);
+ ilist_iterator<MachineInstr> First,
+ ilist_iterator<MachineInstr> Last);
void deleteNode(MachineInstr *N);
private:
void createNode(const MachineInstr &);
};
-class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
+class MachineBasicBlock
+ : public ilist_node_with_parent<MachineBasicBlock, MachineFunction> {
+public:
+ /// Pair of physical register and lane mask.
+ /// This is not simply a std::pair typedef because the members should be named
+ /// clearly as they both have an integer type.
+ struct RegisterMaskPair {
+ public:
+ MCPhysReg PhysReg;
+ LaneBitmask LaneMask;
+
+ RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask)
+ : PhysReg(PhysReg), LaneMask(LaneMask) {}
+ };
+
+private:
typedef ilist<MachineInstr> Instructions;
Instructions Insts;
const BasicBlock *BB;
int Number;
MachineFunction *xParent;
- /// Predecessors/Successors - Keep track of the predecessor / successor
- /// basicblocks.
+ /// Keep track of the predecessor / successor basic blocks.
std::vector<MachineBasicBlock *> Predecessors;
std::vector<MachineBasicBlock *> Successors;
- /// Weights - Keep track of the weights to the successors. This vector
- /// has the same order as Successors, or it is empty if we don't use it
- /// (disable optimization).
- std::vector<uint32_t> Weights;
- typedef std::vector<uint32_t>::iterator weight_iterator;
- typedef std::vector<uint32_t>::const_iterator const_weight_iterator;
+ /// Keep track of the probabilities to the successors. This vector has the
+ /// same order as Successors, or it is empty if we don't use it (disable
+ /// optimization).
+ std::vector<BranchProbability> Probs;
+ typedef std::vector<BranchProbability>::iterator probability_iterator;
+ typedef std::vector<BranchProbability>::const_iterator
+ const_probability_iterator;
+
+ /// Keep track of the physical registers that are livein of the basicblock.
+ typedef std::vector<RegisterMaskPair> LiveInVector;
+ LiveInVector LiveIns;
+
+ /// Alignment of the basic block. Zero if the basic block does not need to be
+ /// aligned. The alignment is specified as log2(bytes).
+ unsigned Alignment = 0;
- /// LiveIns - Keep track of the physical registers that are livein of
- /// the basicblock.
- std::vector<unsigned> LiveIns;
+ /// Indicate that this basic block is entered via an exception handler.
+ bool IsEHPad = false;
- /// Alignment - Alignment of the basic block. Zero if the basic block does
- /// not need to be aligned.
- /// The alignment is specified as log2(bytes).
- unsigned Alignment;
+ /// Indicate that this basic block is potentially the target of an indirect
+ /// branch.
+ bool AddressTaken = false;
- /// IsLandingPad - Indicate that this basic block is entered via an
- /// exception handler.
- bool IsLandingPad;
+ /// Indicate that this basic block is the entry block of an EH funclet.
+ bool IsEHFuncletEntry = false;
- /// AddressTaken - Indicate that this basic block is potentially the
- /// target of an indirect branch.
- bool AddressTaken;
+ /// Indicate that this basic block is the entry block of a cleanup funclet.
+ bool IsCleanupFuncletEntry = false;
/// \brief since getSymbol is a relatively heavy-weight operation, the symbol
/// is only computed once and is cached.
- mutable MCSymbol *CachedMCSymbol;
+ mutable MCSymbol *CachedMCSymbol = nullptr;
// Intrusive list support
MachineBasicBlock() {}
- explicit MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb);
+ explicit MachineBasicBlock(MachineFunction &MF, const BasicBlock *BB);
~MachineBasicBlock();
@@ -110,50 +135,44 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
friend class MachineFunction;
public:
- /// getBasicBlock - Return the LLVM basic block that this instance
- /// corresponded to originally. Note that this may be NULL if this instance
- /// does not correspond directly to an LLVM basic block.
- ///
+ /// Return the LLVM basic block that this instance corresponded to originally.
+ /// Note that this may be NULL if this instance does not correspond directly
+ /// to an LLVM basic block.
const BasicBlock *getBasicBlock() const { return BB; }
- /// getName - Return the name of the corresponding LLVM basic block, or
- /// "(null)".
+ /// Return the name of the corresponding LLVM basic block, or "(null)".
StringRef getName() const;
- /// getFullName - Return a formatted string to identify this block and its
- /// parent function.
+ /// Return a formatted string to identify this block and its parent function.
std::string getFullName() const;
- /// hasAddressTaken - Test whether this block is potentially the target
- /// of an indirect branch.
+ /// Test whether this block is potentially the target of an indirect branch.
bool hasAddressTaken() const { return AddressTaken; }
- /// setHasAddressTaken - Set this block to reflect that it potentially
- /// is the target of an indirect branch.
+ /// Set this block to reflect that it potentially is the target of an indirect
+ /// branch.
void setHasAddressTaken() { AddressTaken = true; }
- /// getParent - Return the MachineFunction containing this basic block.
- ///
+ /// Return the MachineFunction containing this basic block.
const MachineFunction *getParent() const { return xParent; }
MachineFunction *getParent() { return xParent; }
-
- /// bundle_iterator - MachineBasicBlock iterator that automatically skips over
- /// MIs that are inside bundles (i.e. walk top level MIs only).
+ /// MachineBasicBlock iterator that automatically skips over MIs that are
+ /// inside bundles (i.e. walk top level MIs only).
template<typename Ty, typename IterTy>
class bundle_iterator
: public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
IterTy MII;
public:
- bundle_iterator(IterTy mii) : MII(mii) {}
+ bundle_iterator(IterTy MI) : MII(MI) {}
- bundle_iterator(Ty &mi) : MII(mi) {
- assert(!mi.isBundledWithPred() &&
+ bundle_iterator(Ty &MI) : MII(MI) {
+ assert(!MI.isBundledWithPred() &&
"It's not legal to initialize bundle_iterator with a bundled MI");
}
- bundle_iterator(Ty *mi) : MII(mi) {
- assert((!mi || !mi->isBundledWithPred()) &&
+ bundle_iterator(Ty *MI) : MII(MI) {
+ assert((!MI || !MI->isBundledWithPred()) &&
"It's not legal to initialize bundle_iterator with a bundled MI");
}
// Template allows conversion from const to nonconst.
@@ -165,13 +184,13 @@ public:
Ty &operator*() const { return *MII; }
Ty *operator->() const { return &operator*(); }
- operator Ty*() const { return MII; }
+ operator Ty *() const { return MII.getNodePtrUnchecked(); }
- bool operator==(const bundle_iterator &x) const {
- return MII == x.MII;
+ bool operator==(const bundle_iterator &X) const {
+ return MII == X.MII;
}
- bool operator!=(const bundle_iterator &x) const {
- return !operator==(x);
+ bool operator!=(const bundle_iterator &X) const {
+ return !operator==(X);
}
// Increment and decrement operators...
@@ -247,11 +266,16 @@ public:
reverse_iterator rend () { return instr_rend(); }
const_reverse_iterator rend () const { return instr_rend(); }
+ /// Support for MachineInstr::getNextNode().
+ static Instructions MachineBasicBlock::*getSublistAccess(MachineInstr *) {
+ return &MachineBasicBlock::Insts;
+ }
+
inline iterator_range<iterator> terminators() {
- return iterator_range<iterator>(getFirstTerminator(), end());
+ return make_range(getFirstTerminator(), end());
}
inline iterator_range<const_iterator> terminators() const {
- return iterator_range<const_iterator>(getFirstTerminator(), end());
+ return make_range(getFirstTerminator(), end());
}
// Machine-CFG iterators
@@ -301,16 +325,16 @@ public:
bool succ_empty() const { return Successors.empty(); }
inline iterator_range<pred_iterator> predecessors() {
- return iterator_range<pred_iterator>(pred_begin(), pred_end());
+ return make_range(pred_begin(), pred_end());
}
inline iterator_range<const_pred_iterator> predecessors() const {
- return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
+ return make_range(pred_begin(), pred_end());
}
inline iterator_range<succ_iterator> successors() {
- return iterator_range<succ_iterator>(succ_begin(), succ_end());
+ return make_range(succ_begin(), succ_end());
}
inline iterator_range<const_succ_iterator> successors() const {
- return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
+ return make_range(succ_begin(), succ_end());
}
// LiveIn management methods.
@@ -318,131 +342,177 @@ public:
/// Adds the specified register as a live in. Note that it is an error to add
/// the same register to the same set more than once unless the intention is
/// to call sortUniqueLiveIns after all registers are added.
- void addLiveIn(unsigned Reg) { LiveIns.push_back(Reg); }
+ void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask = ~0u) {
+ LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
+ }
+ void addLiveIn(const RegisterMaskPair &RegMaskPair) {
+ LiveIns.push_back(RegMaskPair);
+ }
/// Sorts and uniques the LiveIns vector. It can be significantly faster to do
/// this than repeatedly calling isLiveIn before calling addLiveIn for every
/// LiveIn insertion.
- void sortUniqueLiveIns() {
- std::sort(LiveIns.begin(), LiveIns.end());
- LiveIns.erase(std::unique(LiveIns.begin(), LiveIns.end()), LiveIns.end());
- }
+ void sortUniqueLiveIns();
/// Add PhysReg as live in to this block, and ensure that there is a copy of
/// PhysReg to a virtual register of class RC. Return the virtual register
/// that is a copy of the live in PhysReg.
- unsigned addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC);
+ unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC);
- /// removeLiveIn - Remove the specified register from the live in set.
- ///
- void removeLiveIn(unsigned Reg);
+ /// Remove the specified register from the live in set.
+ void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u);
- /// isLiveIn - Return true if the specified register is in the live in set.
- ///
- bool isLiveIn(unsigned Reg) const;
+ /// Return true if the specified register is in the live in set.
+ bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u) const;
// Iteration support for live in sets. These sets are kept in sorted
// order by their register number.
- typedef std::vector<unsigned>::const_iterator livein_iterator;
+ typedef LiveInVector::const_iterator livein_iterator;
livein_iterator livein_begin() const { return LiveIns.begin(); }
livein_iterator livein_end() const { return LiveIns.end(); }
bool livein_empty() const { return LiveIns.empty(); }
+ iterator_range<livein_iterator> liveins() const {
+ return make_range(livein_begin(), livein_end());
+ }
- /// getAlignment - Return alignment of the basic block.
- /// The alignment is specified as log2(bytes).
- ///
+ /// Get the clobber mask for the start of this basic block. Funclets use this
+ /// to prevent register allocation across funclet transitions.
+ const uint32_t *getBeginClobberMask(const TargetRegisterInfo *TRI) const;
+
+ /// Get the clobber mask for the end of the basic block.
+ /// \see getBeginClobberMask()
+ const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const;
+
+ /// Return alignment of the basic block. The alignment is specified as
+ /// log2(bytes).
unsigned getAlignment() const { return Alignment; }
- /// setAlignment - Set alignment of the basic block.
- /// The alignment is specified as log2(bytes).
- ///
+ /// Set alignment of the basic block. The alignment is specified as
+ /// log2(bytes).
void setAlignment(unsigned Align) { Alignment = Align; }
- /// isLandingPad - Returns true if the block is a landing pad. That is
- /// this basic block is entered via an exception handler.
- bool isLandingPad() const { return IsLandingPad; }
+ /// Returns true if the block is a landing pad. That is this basic block is
+ /// entered via an exception handler.
+ bool isEHPad() const { return IsEHPad; }
- /// setIsLandingPad - Indicates the block is a landing pad. That is
- /// this basic block is entered via an exception handler.
- void setIsLandingPad(bool V = true) { IsLandingPad = V; }
+ /// Indicates the block is a landing pad. That is this basic block is entered
+ /// via an exception handler.
+ void setIsEHPad(bool V = true) { IsEHPad = V; }
- /// getLandingPadSuccessor - If this block has a successor that is a landing
- /// pad, return it. Otherwise return NULL.
+ /// If this block has a successor that is a landing pad, return it. Otherwise
+ /// return NULL.
const MachineBasicBlock *getLandingPadSuccessor() const;
+ bool hasEHPadSuccessor() const;
+
+ /// Returns true if this is the entry block of an EH funclet.
+ bool isEHFuncletEntry() const { return IsEHFuncletEntry; }
+
+ /// Indicates if this is the entry block of an EH funclet.
+ void setIsEHFuncletEntry(bool V = true) { IsEHFuncletEntry = V; }
+
+ /// Returns true if this is the entry block of a cleanup funclet.
+ bool isCleanupFuncletEntry() const { return IsCleanupFuncletEntry; }
+
+ /// Indicates if this is the entry block of a cleanup funclet.
+ void setIsCleanupFuncletEntry(bool V = true) { IsCleanupFuncletEntry = V; }
+
// Code Layout methods.
- /// moveBefore/moveAfter - move 'this' block before or after the specified
- /// block. This only moves the block, it does not modify the CFG or adjust
- /// potential fall-throughs at the end of the block.
+ /// Move 'this' block before or after the specified block. This only moves
+ /// the block, it does not modify the CFG or adjust potential fall-throughs at
+ /// the end of the block.
void moveBefore(MachineBasicBlock *NewAfter);
void moveAfter(MachineBasicBlock *NewBefore);
- /// updateTerminator - Update the terminator instructions in block to account
- /// for changes to the layout. If the block previously used a fallthrough,
- /// it may now need a branch, and if it previously used branching it may now
- /// be able to use a fallthrough.
+ /// Update the terminator instructions in block to account for changes to the
+ /// layout. If the block previously used a fallthrough, it may now need a
+ /// branch, and if it previously used branching it may now be able to use a
+ /// fallthrough.
void updateTerminator();
// Machine-CFG mutators
- /// addSuccessor - Add succ as a successor of this MachineBasicBlock.
- /// The Predecessors list of succ is automatically updated. WEIGHT
- /// parameter is stored in Weights list and it may be used by
- /// MachineBranchProbabilityInfo analysis to calculate branch probability.
+ /// Add Succ as a successor of this MachineBasicBlock. The Predecessors list
+ /// of Succ is automatically updated. PROB parameter is stored in
+ /// Probabilities list. The default probability is set as unknown. Mixing
+ /// known and unknown probabilities in successor list is not allowed. When all
+ /// successors have unknown probabilities, 1 / N is returned as the
+ /// probability for each successor, where N is the number of successors.
///
/// Note that duplicate Machine CFG edges are not allowed.
- ///
- void addSuccessor(MachineBasicBlock *succ, uint32_t weight = 0);
-
- /// Set successor weight of a given iterator.
- void setSuccWeight(succ_iterator I, uint32_t weight);
-
- /// removeSuccessor - Remove successor from the successors list of this
- /// MachineBasicBlock. The Predecessors list of succ is automatically updated.
- ///
- void removeSuccessor(MachineBasicBlock *succ);
-
- /// removeSuccessor - Remove specified successor from the successors list of
- /// this MachineBasicBlock. The Predecessors list of succ is automatically
- /// updated. Return the iterator to the element after the one removed.
- ///
- succ_iterator removeSuccessor(succ_iterator I);
-
- /// replaceSuccessor - Replace successor OLD with NEW and update weight info.
- ///
+ void addSuccessor(MachineBasicBlock *Succ,
+ BranchProbability Prob = BranchProbability::getUnknown());
+
+ /// Add Succ as a successor of this MachineBasicBlock. The Predecessors list
+ /// of Succ is automatically updated. The probability is not provided because
+ /// BPI is not available (e.g. -O0 is used), in which case edge probabilities
+ /// won't be used. Using this interface can save some space.
+ void addSuccessorWithoutProb(MachineBasicBlock *Succ);
+
+ /// Set successor probability of a given iterator.
+ void setSuccProbability(succ_iterator I, BranchProbability Prob);
+
+ /// Normalize probabilities of all successors so that the sum of them becomes
+ /// one. This is usually done when the current update on this MBB is done, and
+ /// the sum of its successors' probabilities is not guaranteed to be one. The
+ /// user is responsible for the correct use of this function.
+ /// MBB::removeSuccessor() has an option to do this automatically.
+ void normalizeSuccProbs() {
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ }
+
+ /// Validate successors' probabilities and check if the sum of them is
+ /// approximate one. This only works in DEBUG mode.
+ void validateSuccProbs() const;
+
+ /// Remove successor from the successors list of this MachineBasicBlock. The
+ /// Predecessors list of Succ is automatically updated.
+ /// If NormalizeSuccProbs is true, then normalize successors' probabilities
+ /// after the successor is removed.
+ void removeSuccessor(MachineBasicBlock *Succ,
+ bool NormalizeSuccProbs = false);
+
+ /// Remove specified successor from the successors list of this
+ /// MachineBasicBlock. The Predecessors list of Succ is automatically updated.
+ /// If NormalizeSuccProbs is true, then normalize successors' probabilities
+ /// after the successor is removed.
+ /// Return the iterator to the element after the one removed.
+ succ_iterator removeSuccessor(succ_iterator I,
+ bool NormalizeSuccProbs = false);
+
+ /// Replace successor OLD with NEW and update probability info.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
+ /// Transfers all the successors from MBB to this machine basic block (i.e.,
+ /// copies all the successors FromMBB and remove all the successors from
+ /// FromMBB).
+ void transferSuccessors(MachineBasicBlock *FromMBB);
- /// transferSuccessors - Transfers all the successors from MBB to this
- /// machine basic block (i.e., copies all the successors fromMBB and
- /// remove all the successors from fromMBB).
- void transferSuccessors(MachineBasicBlock *fromMBB);
+ /// Transfers all the successors, as in transferSuccessors, and update PHI
+ /// operands in the successor blocks which refer to FromMBB to refer to this.
+ void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB);
- /// transferSuccessorsAndUpdatePHIs - Transfers all the successors, as
- /// in transferSuccessors, and update PHI operands in the successor blocks
- /// which refer to fromMBB to refer to this.
- void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB);
+ /// Return true if any of the successors have probabilities attached to them.
+ bool hasSuccessorProbabilities() const { return !Probs.empty(); }
- /// isPredecessor - Return true if the specified MBB is a predecessor of this
- /// block.
+ /// Return true if the specified MBB is a predecessor of this block.
bool isPredecessor(const MachineBasicBlock *MBB) const;
- /// isSuccessor - Return true if the specified MBB is a successor of this
- /// block.
+ /// Return true if the specified MBB is a successor of this block.
bool isSuccessor(const MachineBasicBlock *MBB) const;
- /// isLayoutSuccessor - Return true if the specified MBB will be emitted
- /// immediately after this block, such that if this block exits by
- /// falling through, control will transfer to the specified MBB. Note
- /// that MBB need not be a successor at all, for example if this block
- /// ends with an unconditional branch to some other block.
+ /// Return true if the specified MBB will be emitted immediately after this
+ /// block, such that if this block exits by falling through, control will
+ /// transfer to the specified MBB. Note that MBB need not be a successor at
+ /// all, for example if this block ends with an unconditional branch to some
+ /// other block.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const;
- /// canFallThrough - Return true if the block can implicitly transfer
- /// control to the block after it by falling off the end of it. This should
- /// return false if it can reach the block after it, but it uses an explicit
- /// branch to do so (e.g., a table jump). True is a conservative answer.
+ /// Return true if the block can implicitly transfer control to the block
+ /// after it by falling off the end of it. This should return false if it can
+ /// reach the block after it, but it uses an explicit branch to do so (e.g., a
+ /// table jump). True is a conservative answer.
bool canFallThrough();
/// Returns a pointer to the first instruction in this block that is not a
@@ -452,40 +522,44 @@ public:
/// Returns end() is there's no non-PHI instruction.
iterator getFirstNonPHI();
- /// SkipPHIsAndLabels - Return the first instruction in MBB after I that is
- /// not a PHI or a label. This is the correct point to insert copies at the
- /// beginning of a basic block.
+ /// Return the first instruction in MBB after I that is not a PHI or a label.
+ /// This is the correct point to insert copies at the beginning of a basic
+ /// block.
iterator SkipPHIsAndLabels(iterator I);
- /// getFirstTerminator - returns an iterator to the first terminator
- /// instruction of this basic block. If a terminator does not exist,
- /// it returns end()
+ /// Returns an iterator to the first terminator instruction of this basic
+ /// block. If a terminator does not exist, it returns end().
iterator getFirstTerminator();
const_iterator getFirstTerminator() const {
return const_cast<MachineBasicBlock *>(this)->getFirstTerminator();
}
- /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles
- /// and return an instr_iterator instead.
+ /// Same getFirstTerminator but it ignores bundles and return an
+ /// instr_iterator instead.
instr_iterator getFirstInstrTerminator();
- /// getFirstNonDebugInstr - returns an iterator to the first non-debug
- /// instruction in the basic block, or end()
+ /// Returns an iterator to the first non-debug instruction in the basic block,
+ /// or end().
iterator getFirstNonDebugInstr();
const_iterator getFirstNonDebugInstr() const {
return const_cast<MachineBasicBlock *>(this)->getFirstNonDebugInstr();
}
- /// getLastNonDebugInstr - returns an iterator to the last non-debug
- /// instruction in the basic block, or end()
+ /// Returns an iterator to the last non-debug instruction in the basic block,
+ /// or end().
iterator getLastNonDebugInstr();
const_iterator getLastNonDebugInstr() const {
return const_cast<MachineBasicBlock *>(this)->getLastNonDebugInstr();
}
- /// SplitCriticalEdge - Split the critical edge from this block to the
- /// given successor block, and return the newly created block, or null
- /// if splitting is not possible.
+ /// Convenience function that returns true if the block ends in a return
+ /// instruction.
+ bool isReturnBlock() const {
+ return !empty() && back().isReturn();
+ }
+
+ /// Split the critical edge from this block to the given successor block, and
+ /// return the newly created block, or null if splitting is not possible.
///
/// This function updates LiveVariables, MachineDominatorTree, and
/// MachineLoopInfo, as applicable.
@@ -570,7 +644,7 @@ public:
/// remove_instr to remove individual instructions from a bundle.
MachineInstr *remove(MachineInstr *I) {
assert(!I->isBundled() && "Cannot remove bundled instructions");
- return Insts.remove(I);
+ return Insts.remove(instr_iterator(I));
}
/// Remove the possibly bundled instruction from the instruction list
@@ -605,30 +679,29 @@ public:
From.getInstrIterator(), To.getInstrIterator());
}
- /// removeFromParent - This method unlinks 'this' from the containing
- /// function, and returns it, but does not delete it.
+ /// This method unlinks 'this' from the containing function, and returns it,
+ /// but does not delete it.
MachineBasicBlock *removeFromParent();
- /// eraseFromParent - This method unlinks 'this' from the containing
- /// function and deletes it.
+ /// This method unlinks 'this' from the containing function and deletes it.
void eraseFromParent();
- /// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
- /// 'Old', change the code and CFG so that it branches to 'New' instead.
+ /// Given a machine basic block that branched to 'Old', change the code and
+ /// CFG so that it branches to 'New' instead.
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New);
- /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in
- /// the CFG to be inserted. If we have proven that MBB can only branch to
- /// DestA and DestB, remove any other MBB successors from the CFG. DestA and
- /// DestB can be null. Besides DestA and DestB, retain other edges leading
- /// to LandingPads (currently there can be only one; we don't check or require
- /// that here). Note it is possible that DestA and/or DestB are LandingPads.
+ /// Various pieces of code can cause excess edges in the CFG to be inserted.
+ /// If we have proven that MBB can only branch to DestA and DestB, remove any
+ /// other MBB successors from the CFG. DestA and DestB can be null. Besides
+ /// DestA and DestB, retain other edges leading to LandingPads (currently
+ /// there can be only one; we don't check or require that here). Note it is
+ /// possible that DestA and/or DestB are LandingPads.
bool CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock *DestB,
- bool isCond);
+ bool IsCond);
- /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
- /// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+ /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+ /// instructions. Return UnknownLoc if there is none.
DebugLoc findDebugLoc(instr_iterator MBBI);
DebugLoc findDebugLoc(iterator MBBI) {
return findDebugLoc(MBBI.getInstrIterator());
@@ -636,12 +709,9 @@ public:
/// Possible outcome of a register liveness query to computeRegisterLiveness()
enum LivenessQueryResult {
- LQR_Live, ///< Register is known to be live.
- LQR_OverlappingLive, ///< Register itself is not live, but some overlapping
- ///< register is.
- LQR_Dead, ///< Register is known to be dead.
- LQR_Unknown ///< Register liveness not decidable from local
- ///< neighborhood.
+ LQR_Live, ///< Register is known to be (at least partially) live.
+ LQR_Dead, ///< Register is known to be fully dead.
+ LQR_Unknown ///< Register liveness not decidable from local neighborhood.
};
/// Return whether (physical) register \p Reg has been <def>ined and not
@@ -666,49 +736,43 @@ public:
// Printing method used by LoopInfo.
void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
- /// getNumber - MachineBasicBlocks are uniquely numbered at the function
- /// level, unless they're not in a MachineFunction yet, in which case this
- /// will return -1.
- ///
+ /// MachineBasicBlocks are uniquely numbered at the function level, unless
+ /// they're not in a MachineFunction yet, in which case this will return -1.
int getNumber() const { return Number; }
void setNumber(int N) { Number = N; }
- /// getSymbol - Return the MCSymbol for this basic block.
- ///
+ /// Return the MCSymbol for this basic block.
MCSymbol *getSymbol() const;
private:
- /// getWeightIterator - Return weight iterator corresponding to the I
- /// successor iterator.
- weight_iterator getWeightIterator(succ_iterator I);
- const_weight_iterator getWeightIterator(const_succ_iterator I) const;
+ /// Return probability iterator corresponding to the I successor iterator.
+ probability_iterator getProbabilityIterator(succ_iterator I);
+ const_probability_iterator
+ getProbabilityIterator(const_succ_iterator I) const;
friend class MachineBranchProbabilityInfo;
+ friend class MIPrinter;
- /// getSuccWeight - Return weight of the edge from this block to MBB. This
- /// method should NOT be called directly, but by using getEdgeWeight method
- /// from MachineBranchProbabilityInfo class.
- uint32_t getSuccWeight(const_succ_iterator Succ) const;
-
+ /// Return probability of the edge from this block to MBB. This method should
+ /// NOT be called directly, but by using getEdgeProbability method from
+ /// MachineBranchProbabilityInfo class.
+ BranchProbability getSuccProbability(const_succ_iterator Succ) const;
// Methods used to maintain doubly linked list of blocks...
friend struct ilist_traits<MachineBasicBlock>;
// Machine-CFG mutators
- /// addPredecessor - Remove pred as a predecessor of this MachineBasicBlock.
- /// Don't do this unless you know what you're doing, because it doesn't
- /// update pred's successors list. Use pred->addSuccessor instead.
- ///
- void addPredecessor(MachineBasicBlock *pred);
+ /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this
+ /// unless you know what you're doing, because it doesn't update Pred's
+ /// successors list. Use Pred->addSuccessor instead.
+ void addPredecessor(MachineBasicBlock *Pred);
- /// removePredecessor - Remove pred as a predecessor of this
- /// MachineBasicBlock. Don't do this unless you know what you're
- /// doing, because it doesn't update pred's successors list. Use
- /// pred->removeSuccessor instead.
- ///
- void removePredecessor(MachineBasicBlock *pred);
+ /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this
+ /// unless you know what you're doing, because it doesn't update Pred's
+ /// successors list. Use Pred->removeSuccessor instead.
+ void removePredecessor(MachineBasicBlock *Pred);
};
raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
@@ -726,7 +790,7 @@ struct MBB2NumberFunctor :
//===--------------------------------------------------------------------===//
// Provide specializations of GraphTraits to be able to treat a
-// MachineFunction as a graph of MachineBasicBlocks...
+// MachineFunction as a graph of MachineBasicBlocks.
//
template <> struct GraphTraits<MachineBasicBlock *> {
@@ -756,7 +820,7 @@ template <> struct GraphTraits<const MachineBasicBlock *> {
};
// Provide specializations of GraphTraits to be able to treat a
-// MachineFunction as a graph of MachineBasicBlocks... and to walk it
+// MachineFunction as a graph of MachineBasicBlocks and to walk it
// in inverse order. Inverse order for a function is considered
// to be when traversing the predecessor edges of a MBB
// instead of the successor edges.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 7ba7495..81b0524 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include <climits>
+#include <numeric>
namespace llvm {
@@ -44,20 +45,15 @@ public:
AU.setPreservesAll();
}
- // Return edge weight. If we don't have any informations about it - return
- // DEFAULT_WEIGHT.
- uint32_t getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const;
-
- // Same thing, but using a const_succ_iterator from Src. This is faster when
- // the iterator is already available.
- uint32_t getEdgeWeight(const MachineBasicBlock *Src,
- MachineBasicBlock::const_succ_iterator Dst) const;
+ // Return edge probability.
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
- // Get sum of the block successors' weights, potentially scaling them to fit
- // within 32-bits. If scaling is required, sets Scale based on the necessary
- // adjustment. Any edge weights used with the sum should be divided by Scale.
- uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const;
+ // Same as above, but using a const_succ_iterator from Src. This is faster
+ // when the iterator is already available.
+ BranchProbability
+ getEdgeProbability(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const;
// A 'Hot' edge is an edge which probability is >= 80%.
bool isEdgeHot(const MachineBasicBlock *Src,
@@ -67,15 +63,6 @@ public:
// NB: This routine's complexity is linear on the number of successors.
MachineBasicBlock *getHotSucc(MachineBasicBlock *MBB) const;
- // Return a probability as a fraction between 0 (0% probability) and
- // 1 (100% probability), however the value is never equal to 0, and can be 1
- // only iff SRC block has only one successor.
- // NB: This routine's complexity is linear on the number of successors of
- // Src. Querying sequentially for each successor's probability is a quadratic
- // query pattern.
- BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const;
-
// Print value between 0 (0% probability) and 1 (100% probability),
// however the value is never equal to 0, and can be 1 only iff SRC block
// has only one successor.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/contrib/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 176af14..f389122 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -17,13 +17,30 @@
namespace llvm {
-/// Enumeration of instruction pattern supported by machine combiner
-///
-///
-namespace MachineCombinerPattern {
-// Forward declaration
-enum MC_PATTERN : int;
-} // end namespace MachineCombinerPattern
+/// These are instruction patterns matched by the machine combiner pass.
+enum class MachineCombinerPattern {
+ // These are commutative variants for reassociating a computation chain. See
+ // the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp.
+ REASSOC_AX_BY,
+ REASSOC_AX_YB,
+ REASSOC_XA_BY,
+ REASSOC_XA_YB,
+
+ // These are multiply-add patterns matched by the AArch64 machine combiner.
+ MULADDW_OP1,
+ MULADDW_OP2,
+ MULSUBW_OP1,
+ MULSUBW_OP2,
+ MULADDWI_OP1,
+ MULSUBWI_OP1,
+ MULADDX_OP1,
+ MULADDX_OP2,
+ MULSUBX_OP1,
+ MULSUBX_OP2,
+ MULADDXI_OP1,
+ MULSUBXI_OP1
+};
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h b/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h
index 6284003..d2036c4 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h
@@ -46,13 +46,6 @@ public:
///
Type *getType() const { return Ty; }
-
- /// getRelocationInfo - This method classifies the entry according to
- /// whether or not it may generate a relocation entry. This must be
- /// conservative, so if it might codegen to a relocatable entry, it should say
- /// so. The return values are the same as Constant::getRelocationInfo().
- virtual unsigned getRelocationInfo() const = 0;
-
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) = 0;
@@ -67,7 +60,6 @@ inline raw_ostream &operator<<(raw_ostream &OS,
V.print(OS);
return OS;
}
-
/// This class is a data container for one entry in a MachineConstantPool.
/// It contains a pointer to the value and an offset from the start of
@@ -90,9 +82,9 @@ public:
Val.ConstVal = V;
}
MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A)
- : Alignment(A) {
- Val.MachineCPVal = V;
- Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1);
+ : Alignment(A) {
+ Val.MachineCPVal = V;
+ Alignment |= 1U << (sizeof(unsigned) * CHAR_BIT - 1);
}
/// isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry
@@ -102,28 +94,20 @@ public:
return (int)Alignment < 0;
}
- int getAlignment() const {
- return Alignment & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
+ int getAlignment() const {
+ return Alignment & ~(1 << (sizeof(unsigned) * CHAR_BIT - 1));
}
Type *getType() const;
-
- /// getRelocationInfo - This method classifies the entry according to
- /// whether or not it may generate a relocation entry. This must be
- /// conservative, so if it might codegen to a relocatable entry, it should say
- /// so. The return values are:
- ///
- /// 0: This constant pool entry is guaranteed to never have a relocation
- /// applied to it (because it holds a simple constant like '4').
- /// 1: This entry has relocations, but the entries are guaranteed to be
- /// resolvable by the static linker, so the dynamic linker will never see
- /// them.
- /// 2: This entry may have arbitrary relocations.
- unsigned getRelocationInfo() const;
+
+ /// This method classifies the entry according to whether or not it may
+ /// generate a relocation entry. This must be conservative, so if it might
+ /// codegen to a relocatable entry, it should say so.
+ bool needsRelocation() const;
SectionKind getSectionKind(const DataLayout *DL) const;
};
-
+
/// The MachineConstantPool class keeps track of constants referenced by a
/// function which must be spilled to memory. This is used for constants which
/// are unable to be used directly as operands to instructions, which typically
@@ -148,17 +132,18 @@ public:
explicit MachineConstantPool(const DataLayout &DL)
: PoolAlignment(1), DL(DL) {}
~MachineConstantPool();
-
+
/// getConstantPoolAlignment - Return the alignment required by
/// the whole constant pool, of which the first element must be aligned.
unsigned getConstantPoolAlignment() const { return PoolAlignment; }
-
+
/// getConstantPoolIndex - Create a new entry in the constant pool or return
/// an existing one. User must specify the minimum required alignment for
/// the object.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment);
- unsigned getConstantPoolIndex(MachineConstantPoolValue *V,unsigned Alignment);
-
+ unsigned getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment);
+
/// isEmpty - Return true if this constant pool contains no constants.
bool isEmpty() const { return Constants.empty(); }
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
index 735dd06..a69936f 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -246,21 +246,29 @@ public:
/// iterable by generic graph iterators.
///
-template<class T> struct GraphTraits;
+template <class Node, class ChildIterator>
+struct MachineDomTreeGraphTraitsBase {
+ typedef Node NodeType;
+ typedef ChildIterator ChildIteratorType;
-template <> struct GraphTraits<MachineDomTreeNode *> {
- typedef MachineDomTreeNode NodeType;
- typedef NodeType::iterator ChildIteratorType;
-
- static NodeType *getEntryNode(NodeType *N) {
- return N;
- }
- static inline ChildIteratorType child_begin(NodeType* N) {
+ static NodeType *getEntryNode(NodeType *N) { return N; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
return N->begin();
}
- static inline ChildIteratorType child_end(NodeType* N) {
- return N->end();
- }
+ static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+};
+
+template <class T> struct GraphTraits;
+
+template <>
+struct GraphTraits<MachineDomTreeNode *>
+ : public MachineDomTreeGraphTraitsBase<MachineDomTreeNode,
+ MachineDomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const MachineDomTreeNode *>
+ : public MachineDomTreeGraphTraitsBase<const MachineDomTreeNode,
+ MachineDomTreeNode::const_iterator> {
};
template <> struct GraphTraits<MachineDominatorTree*>
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index cbc4e66..48e8ca7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -101,6 +101,13 @@ class MachineFrameInfo {
// cannot alias any other memory objects.
bool isSpillSlot;
+ /// If true, this stack slot is used to spill a value (could be deopt
+ /// and/or GC related) over a statepoint. We know that the address of the
+ /// slot can't alias any LLVM IR value. This is very similiar to a Spill
+ /// Slot, but is created by statepoint lowering is SelectionDAG, not the
+ /// register allocator.
+ bool isStatepointSpillSlot;
+
/// If this stack object is originated from an Alloca instruction
/// this value saves the original IR allocation. Can be NULL.
const AllocaInst *Alloca;
@@ -118,13 +125,24 @@ class MachineFrameInfo {
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
bool isSS, const AllocaInst *Val, bool A)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
- isSpillSlot(isSS), Alloca(Val), PreAllocated(false), isAliased(A) {}
+ isSpillSlot(isSS), isStatepointSpillSlot(false), Alloca(Val),
+ PreAllocated(false), isAliased(A) {}
};
/// The alignment of the stack.
unsigned StackAlignment;
/// Can the stack be realigned.
+ /// Targets that set this to false don't have the ability to overalign
+ /// their stack frame, and thus, overaligned allocas are all treated
+ /// as dynamic allocations and the target must handle them as part
+ /// of DYNAMIC_STACKALLOC lowering.
+ /// FIXME: There is room for improvement in this case, in terms of
+ /// grouping overaligned allocas into a "secondary stack frame" and
+ /// then only use a single alloca to allocate this frame and only a
+ /// single virtual register to access it. Currently, without such an
+ /// optimization, each such alloca gets it's own dynamic
+ /// realignment.
bool StackRealignable;
/// The list of stack objects allocated.
@@ -168,7 +186,7 @@ class MachineFrameInfo {
/// SP then OffsetAdjustment is zero; if FP is used, OffsetAdjustment is set
/// to the distance between the initial SP and the value in FP. For many
/// targets, this value is only used when generating debug info (via
- /// TargetRegisterInfo::getFrameIndexOffset); when generating code, the
+ /// TargetRegisterInfo::getFrameIndexReference); when generating code, the
/// corresponding adjustments are performed directly.
int OffsetAdjustment;
@@ -198,7 +216,7 @@ class MachineFrameInfo {
/// This contains the size of the largest call frame if the target uses frame
/// setup/destroy pseudo instructions (as defined in the TargetFrameInfo
/// class). This information is important for frame pointer elimination.
- /// If is only valid during and after prolog/epilog code insertion.
+ /// It is only valid during and after prolog/epilog code insertion.
unsigned MaxCallFrameSize;
/// The prolog/epilog code inserter fills in this vector with each
@@ -288,6 +306,7 @@ public:
/// Return the index for the stack protector object.
int getStackProtectorIndex() const { return StackProtectorIdx; }
void setStackProtectorIndex(int I) { StackProtectorIdx = I; }
+ bool hasStackProtectorIndex() const { return StackProtectorIdx != -1; }
/// Return the index for the function context object.
/// This object is used for SjLj exceptions.
@@ -337,14 +356,14 @@ public:
}
/// Get the local offset mapping for a for an object.
- std::pair<int, int64_t> getLocalFrameObjectMap(int i) {
+ std::pair<int, int64_t> getLocalFrameObjectMap(int i) const {
assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() &&
"Invalid local object reference!");
return LocalFrameObjects[i];
}
/// Return the number of objects allocated into the local object block.
- int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); }
+ int64_t getLocalFrameObjectCount() const { return LocalFrameObjects.size(); }
/// Set the size of the local object blob.
void setLocalFrameSize(int64_t sz) { LocalFrameSize = sz; }
@@ -361,7 +380,9 @@ public:
/// Get whether the local allocation blob should be allocated together or
/// let PEI allocate the locals in it directly.
- bool getUseLocalStackAllocationBlock() {return UseLocalStackAllocationBlock;}
+ bool getUseLocalStackAllocationBlock() const {
+ return UseLocalStackAllocationBlock;
+ }
/// setUseLocalStackAllocationBlock - Set whether the local allocation blob
/// should be allocated together or let PEI allocate the locals in it
@@ -534,6 +555,12 @@ public:
return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;
}
+ bool isStatepointSpillSlotObjectIndex(int ObjectIdx) const {
+ assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ return Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot;
+ }
+
/// Returns true if the specified index corresponds to a dead object.
bool isDeadObjectIndex(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
@@ -549,6 +576,13 @@ public:
return Objects[ObjectIdx + NumFixedObjects].Size == 0;
}
+ void markAsStatepointSpillSlotObjectIndex(int ObjectIdx) {
+ assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot = true;
+ assert(isStatepointSpillSlotObjectIndex(ObjectIdx) && "inconsistent");
+ }
+
/// Create a new statically sized stack object, returning
/// a nonnegative identifier to represent it.
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
index c15ee1c..82c30d3 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -38,10 +38,12 @@ class MachineJumpTableInfo;
class MachineModuleInfo;
class MCContext;
class Pass;
+class PseudoSourceValueManager;
class TargetMachine;
class TargetSubtargetInfo;
class TargetRegisterClass;
struct MachinePointerInfo;
+struct WinEHFuncInfo;
template <>
struct ilist_traits<MachineBasicBlock>
@@ -102,10 +104,14 @@ class MachineFunction {
// Keep track of constants which are spilled to memory
MachineConstantPool *ConstantPool;
-
+
// Keep track of jump tables for switch instructions
MachineJumpTableInfo *JumpTableInfo;
+ // Keeps track of Windows exception handling related data. This will be null
+ // for functions that aren't using a funclet-based EH personality.
+ WinEHFuncInfo *WinEHInfo = nullptr;
+
// Function-level unique numbering for MachineBasicBlocks. When a
// MachineBasicBlock is inserted into a MachineFunction is it automatically
// numbered and this vector keeps track of the mapping from ID's to MBB's.
@@ -131,7 +137,7 @@ class MachineFunction {
/// this translation unit.
///
unsigned FunctionNumber;
-
+
/// Alignment - The alignment of the function.
unsigned Alignment;
@@ -145,6 +151,9 @@ class MachineFunction {
/// True if the function includes any inline assembly.
bool HasInlineAsm;
+ // Allocation management for pseudo source values.
+ std::unique_ptr<PseudoSourceValueManager> PSVManager;
+
MachineFunction(const MachineFunction &) = delete;
void operator=(const MachineFunction&) = delete;
public:
@@ -155,6 +164,8 @@ public:
MachineModuleInfo &getMMI() const { return MMI; }
MCContext &getContext() const { return Ctx; }
+ PseudoSourceValueManager &getPSVManager() const { return *PSVManager; }
+
/// Return the DataLayout attached to the Module associated to this MF.
const DataLayout &getDataLayout() const;
@@ -198,7 +209,7 @@ public:
MachineFrameInfo *getFrameInfo() { return FrameInfo; }
const MachineFrameInfo *getFrameInfo() const { return FrameInfo; }
- /// getJumpTableInfo - Return the jump table info object for the current
+ /// getJumpTableInfo - Return the jump table info object for the current
/// function. This object contains information about jump tables in the
/// current function. If the current function has no jump tables, this will
/// return null.
@@ -209,13 +220,18 @@ public:
/// does already exist, allocate one.
MachineJumpTableInfo *getOrCreateJumpTableInfo(unsigned JTEntryKind);
-
/// getConstantPool - Return the constant pool object for the current
/// function.
///
MachineConstantPool *getConstantPool() { return ConstantPool; }
const MachineConstantPool *getConstantPool() const { return ConstantPool; }
+ /// getWinEHFuncInfo - Return information about how the current function uses
+ /// Windows exception handling. Returns null for functions that don't use
+ /// funclets for exception handling.
+ const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; }
+ WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; }
+
/// getAlignment - Return the alignment (log2, not bytes) of the function.
///
unsigned getAlignment() const { return Alignment; }
@@ -284,14 +300,14 @@ public:
/// getNumBlockIDs - Return the number of MBB ID's allocated.
///
unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); }
-
+
/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
/// recomputes them. This guarantees that the MBB numbers are sequential,
/// dense, and match the ordering of the blocks within the function. If a
/// specific MachineBasicBlock is specified, only that block and those after
/// it are renumbered.
void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr);
-
+
/// print - Print out the MachineFunction in a format suitable for debugging
/// to the specified stream.
///
@@ -326,6 +342,12 @@ public:
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
+ /// Support for MachineBasicBlock::getNextNode().
+ static BasicBlockListType MachineFunction::*
+ getSublistAccess(MachineBasicBlock *) {
+ return &MachineFunction::BasicBlocks;
+ }
+
/// addLiveIn - Add the specified physical register as a live-in value and
/// create a corresponding virtual register for it.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC);
@@ -358,15 +380,21 @@ public:
void splice(iterator InsertPt, iterator MBBI) {
BasicBlocks.splice(InsertPt, BasicBlocks, MBBI);
}
+ void splice(iterator InsertPt, MachineBasicBlock *MBB) {
+ BasicBlocks.splice(InsertPt, BasicBlocks, MBB);
+ }
void splice(iterator InsertPt, iterator MBBI, iterator MBBE) {
BasicBlocks.splice(InsertPt, BasicBlocks, MBBI, MBBE);
}
- void remove(iterator MBBI) {
- BasicBlocks.remove(MBBI);
- }
- void erase(iterator MBBI) {
- BasicBlocks.erase(MBBI);
+ void remove(iterator MBBI) { BasicBlocks.remove(MBBI); }
+ void remove(MachineBasicBlock *MBBI) { BasicBlocks.remove(MBBI); }
+ void erase(iterator MBBI) { BasicBlocks.erase(MBBI); }
+ void erase(MachineBasicBlock *MBBI) { BasicBlocks.erase(MBBI); }
+
+ template <typename Comp>
+ void sort(Comp comp) {
+ BasicBlocks.sort(comp);
}
//===--------------------------------------------------------------------===//
@@ -425,7 +453,7 @@ public:
unsigned base_alignment,
const AAMDNodes &AAInfo = AAMDNodes(),
const MDNode *Ranges = nullptr);
-
+
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
/// an existing one, adjusting by an offset and using the given size.
/// MachineMemOperands are owned by the MachineFunction and need not be
@@ -475,16 +503,19 @@ public:
extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
MachineInstr::mmo_iterator End);
+ /// Allocate a string and populate it with the given external symbol name.
+ const char *createExternalSymbolName(StringRef Name);
+
//===--------------------------------------------------------------------===//
// Label Manipulation.
//
-
+
/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
/// normal 'L' label is returned.
- MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx,
+ MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate = false) const;
-
+
/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
/// base.
MCSymbol *getPICBaseSymbol() const;
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
index de7e0a2..978864e 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
@@ -34,7 +35,6 @@
namespace llvm {
template <typename T> class SmallVectorImpl;
-class AliasAnalysis;
class TargetInstrInfo;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -48,7 +48,8 @@ class MachineMemOperand;
/// MachineFunction is deleted, all the contained MachineInstrs are deallocated
/// without having their destructor called.
///
-class MachineInstr : public ilist_node<MachineInstr> {
+class MachineInstr
+ : public ilist_node_with_parent<MachineInstr, MachineBasicBlock> {
public:
typedef MachineMemOperand **mmo_iterator;
@@ -64,8 +65,10 @@ public:
NoFlags = 0,
FrameSetup = 1 << 0, // Instruction is used as a part of
// function frame setup code.
- BundledPred = 1 << 1, // Instruction has bundled predecessors.
- BundledSucc = 1 << 2 // Instruction has bundled successors.
+ FrameDestroy = 1 << 1, // Instruction is used as a part of
+ // function frame destruction code.
+ BundledPred = 1 << 2, // Instruction has bundled predecessors.
+ BundledSucc = 1 << 3 // Instruction has bundled successors.
};
private:
const MCInstrDesc *MCID; // Instruction descriptor.
@@ -89,6 +92,12 @@ private:
// information to AsmPrinter.
uint8_t NumMemRefs; // Information on memory references.
+ // Note that MemRefs == nullptr, means 'don't know', not 'no memory access'.
+ // Calling code must treat missing information conservatively. If the number
+ // of memory operands required to be precise exceeds the maximum value of
+ // NumMemRefs - currently 256 - we remove the operands entirely. Note also
+ // that this is a non-owning reference to a shared copy on write buffer owned
+ // by the MachineFunction and created via MF.allocateMemRefsArray.
mmo_iterator MemRefs;
DebugLoc debugLoc; // Source line information.
@@ -293,42 +302,46 @@ public:
const_mop_iterator operands_end() const { return Operands + NumOperands; }
iterator_range<mop_iterator> operands() {
- return iterator_range<mop_iterator>(operands_begin(), operands_end());
+ return make_range(operands_begin(), operands_end());
}
iterator_range<const_mop_iterator> operands() const {
- return iterator_range<const_mop_iterator>(operands_begin(), operands_end());
+ return make_range(operands_begin(), operands_end());
}
iterator_range<mop_iterator> explicit_operands() {
- return iterator_range<mop_iterator>(
- operands_begin(), operands_begin() + getNumExplicitOperands());
+ return make_range(operands_begin(),
+ operands_begin() + getNumExplicitOperands());
}
iterator_range<const_mop_iterator> explicit_operands() const {
- return iterator_range<const_mop_iterator>(
- operands_begin(), operands_begin() + getNumExplicitOperands());
+ return make_range(operands_begin(),
+ operands_begin() + getNumExplicitOperands());
}
iterator_range<mop_iterator> implicit_operands() {
- return iterator_range<mop_iterator>(explicit_operands().end(),
- operands_end());
+ return make_range(explicit_operands().end(), operands_end());
}
iterator_range<const_mop_iterator> implicit_operands() const {
- return iterator_range<const_mop_iterator>(explicit_operands().end(),
- operands_end());
+ return make_range(explicit_operands().end(), operands_end());
}
+ /// Returns a range over all explicit operands that are register definitions.
+ /// Implicit definition are not included!
iterator_range<mop_iterator> defs() {
- return iterator_range<mop_iterator>(
- operands_begin(), operands_begin() + getDesc().getNumDefs());
+ return make_range(operands_begin(),
+ operands_begin() + getDesc().getNumDefs());
}
+ /// \copydoc defs()
iterator_range<const_mop_iterator> defs() const {
- return iterator_range<const_mop_iterator>(
- operands_begin(), operands_begin() + getDesc().getNumDefs());
+ return make_range(operands_begin(),
+ operands_begin() + getDesc().getNumDefs());
}
+ /// Returns a range that includes all operands that are register uses.
+ /// This may include unrelated operands which are not register uses.
iterator_range<mop_iterator> uses() {
- return iterator_range<mop_iterator>(
- operands_begin() + getDesc().getNumDefs(), operands_end());
+ return make_range(operands_begin() + getDesc().getNumDefs(),
+ operands_end());
}
+ /// \copydoc uses()
iterator_range<const_mop_iterator> uses() const {
- return iterator_range<const_mop_iterator>(
- operands_begin() + getDesc().getNumDefs(), operands_end());
+ return make_range(operands_begin() + getDesc().getNumDefs(),
+ operands_end());
}
/// Returns the number of the operand iterator \p I points to.
@@ -339,13 +352,16 @@ public:
/// Access to memory operands of the instruction
mmo_iterator memoperands_begin() const { return MemRefs; }
mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
+ /// Return true if we don't have any memory operands which described the the
+ /// memory access done by this instruction. If this is true, calling code
+ /// must be conservative.
bool memoperands_empty() const { return NumMemRefs == 0; }
iterator_range<mmo_iterator> memoperands() {
- return iterator_range<mmo_iterator>(memoperands_begin(), memoperands_end());
+ return make_range(memoperands_begin(), memoperands_end());
}
iterator_range<mmo_iterator> memoperands() const {
- return iterator_range<mmo_iterator>(memoperands_begin(), memoperands_end());
+ return make_range(memoperands_begin(), memoperands_end());
}
/// Return true if this instruction has exactly one MachineMemOperand.
@@ -489,8 +505,8 @@ public:
}
/// Return true if this instruction is convergent.
- /// Convergent instructions can only be moved to locations that are
- /// control-equivalent to their initial position.
+ /// Convergent instructions can not be made control-dependent on any
+ /// additional values.
bool isConvergent(QueryType Type = AnyInBundle) const {
return hasProperty(MCID::Convergent, Type);
}
@@ -897,6 +913,13 @@ public:
return (Idx == -1) ? nullptr : &getOperand(Idx);
}
+ const MachineOperand *findRegisterUseOperand(
+ unsigned Reg, bool isKill = false,
+ const TargetRegisterInfo *TRI = nullptr) const {
+ return const_cast<MachineInstr *>(this)->
+ findRegisterUseOperand(Reg, isKill, TRI);
+ }
+
/// Returns the operand index that is a def of the specified register or
/// -1 if it is not found. If isDead is true, defs that are not dead are
/// skipped. If Overlap is true, then it also looks for defs that merely
@@ -1048,7 +1071,7 @@ public:
/// Mark all subregister defs of register @p Reg with the undef flag.
/// This function is used when we determined to have a subregister def in an
/// otherwise undefined super register.
- void addRegisterDefReadUndef(unsigned Reg);
+ void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true);
/// We have determined MI defines a register. Make sure there is an operand
/// defining Reg.
@@ -1094,6 +1117,9 @@ public:
///
bool hasUnmodeledSideEffects() const;
+ /// Returns true if it is illegal to fold a load across this instruction.
+ bool isLoadFoldBarrier() const;
+
/// Return true if all the defs of this instruction are dead.
bool allDefsAreDead() const;
@@ -1159,8 +1185,11 @@ public:
assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
}
- /// Clear this MachineInstr's memory reference descriptor list.
- void clearMemRefs() {
+ /// Clear this MachineInstr's memory reference descriptor list. This resets
+ /// the memrefs to their most conservative state. This should be used only
+ /// as a last resort since it greatly pessimizes our knowledge of the memory
+ /// access performed by the instruction.
+ void dropMemRefs() {
MemRefs = nullptr;
NumMemRefs = 0;
}
@@ -1174,6 +1203,8 @@ public:
}
}
+ /// Add all implicit def and use operands to this instruction.
+ void addImplicitDefUseOperands(MachineFunction &MF);
private:
/// If this instruction is embedded into a MachineFunction, return the
@@ -1181,9 +1212,6 @@ private:
/// return null.
MachineRegisterInfo *getRegInfo();
- /// Add all implicit def and use operands to this instruction.
- void addImplicitDefUseOperands(MachineFunction &MF);
-
/// Unlink all of the register operands in this instruction from their
/// respective use lists. This requires that the operands already be on their
/// use lists.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 4f68f38..aa5f4b2 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -49,11 +49,10 @@ public:
MachineInstrBuilder() : MF(nullptr), MI(nullptr) {}
/// Create a MachineInstrBuilder for manipulating an existing instruction.
- /// F must be the machine function that was used to allocate I.
+ /// F must be the machine function that was used to allocate I.
MachineInstrBuilder(MachineFunction &F, MachineInstr *I) : MF(&F), MI(I) {}
/// Allow automatic conversion to the machine instruction we are working on.
- ///
operator MachineInstr*() const { return MI; }
MachineInstr *operator->() const { return MI; }
operator MachineBasicBlock::iterator() const { return MI; }
@@ -62,11 +61,9 @@ public:
/// explicitly.
MachineInstr *getInstr() const { return MI; }
- /// addReg - Add a new virtual register operand...
- ///
- const
- MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
- unsigned SubReg = 0) const {
+ /// Add a new virtual register operand.
+ const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
+ unsigned SubReg = 0) const {
assert((flags & 0x1) == 0 &&
"Passing in 'true' to addReg is forbidden! Use enums instead.");
MI->addOperand(*MF, MachineOperand::CreateReg(RegNo,
@@ -82,8 +79,7 @@ public:
return *this;
}
- /// addImm - Add a new immediate operand.
- ///
+ /// Add a new immediate operand.
const MachineInstrBuilder &addImm(int64_t Val) const {
MI->addOperand(*MF, MachineOperand::CreateImm(Val));
return *this;
@@ -204,44 +200,44 @@ public:
// Add a displacement from an existing MachineOperand with an added offset.
const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off,
unsigned char TargetFlags = 0) const {
+ // If caller specifies new TargetFlags then use it, otherwise the
+ // default behavior is to copy the target flags from the existing
+ // MachineOperand. This means if the caller wants to clear the
+ // target flags it needs to do so explicitly.
+ if (0 == TargetFlags)
+ TargetFlags = Disp.getTargetFlags();
+
switch (Disp.getType()) {
default:
llvm_unreachable("Unhandled operand type in addDisp()");
case MachineOperand::MO_Immediate:
return addImm(Disp.getImm() + off);
- case MachineOperand::MO_GlobalAddress: {
- // If caller specifies new TargetFlags then use it, otherwise the
- // default behavior is to copy the target flags from the existing
- // MachineOperand. This means if the caller wants to clear the
- // target flags it needs to do so explicitly.
- if (TargetFlags)
- return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
- TargetFlags);
+ case MachineOperand::MO_ConstantPoolIndex:
+ return addConstantPoolIndex(Disp.getIndex(), Disp.getOffset() + off,
+ TargetFlags);
+ case MachineOperand::MO_GlobalAddress:
return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
- Disp.getTargetFlags());
- }
+ TargetFlags);
}
}
/// Copy all the implicit operands from OtherMI onto this one.
- const MachineInstrBuilder &copyImplicitOps(const MachineInstr *OtherMI) {
+ const MachineInstrBuilder &
+ copyImplicitOps(const MachineInstr *OtherMI) const {
MI->copyImplicitOps(*MF, OtherMI);
return *this;
}
};
-/// BuildMI - Builder interface. Specify how to create the initial instruction
-/// itself.
-///
+/// Builder interface. Specify how to create the initial instruction itself.
inline MachineInstrBuilder BuildMI(MachineFunction &MF,
DebugLoc DL,
const MCInstrDesc &MCID) {
return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL));
}
-/// BuildMI - This version of the builder sets up the first operand as a
+/// This version of the builder sets up the first operand as a
/// destination virtual register.
-///
inline MachineInstrBuilder BuildMI(MachineFunction &MF,
DebugLoc DL,
const MCInstrDesc &MCID,
@@ -250,10 +246,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
.addReg(DestReg, RegState::Define);
}
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction before the given position in the given MachineBasicBlock, and
-/// sets up the first operand as a destination virtual register.
-///
+/// This version of the builder inserts the newly-built instruction before
+/// the given position in the given MachineBasicBlock, and sets up the first
+/// operand as a destination virtual register.
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
DebugLoc DL,
@@ -282,7 +277,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
const MCInstrDesc &MCID,
unsigned DestReg) {
if (I->isInsideBundle()) {
- MachineBasicBlock::instr_iterator MII = I;
+ MachineBasicBlock::instr_iterator MII(I);
return BuildMI(BB, MII, DL, MCID, DestReg);
}
@@ -290,10 +285,9 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
return BuildMI(BB, MII, DL, MCID, DestReg);
}
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction before the given position in the given MachineBasicBlock, and
-/// does NOT take a destination register.
-///
+/// This version of the builder inserts the newly-built instruction before the
+/// given position in the given MachineBasicBlock, and does NOT take a
+/// destination register.
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
DebugLoc DL,
@@ -319,7 +313,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
DebugLoc DL,
const MCInstrDesc &MCID) {
if (I->isInsideBundle()) {
- MachineBasicBlock::instr_iterator MII = I;
+ MachineBasicBlock::instr_iterator MII(I);
return BuildMI(BB, MII, DL, MCID);
}
@@ -327,20 +321,17 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
return BuildMI(BB, MII, DL, MCID);
}
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction at the end of the given MachineBasicBlock, and does NOT take a
-/// destination register.
-///
+/// This version of the builder inserts the newly-built instruction at the end
+/// of the given MachineBasicBlock, and does NOT take a destination register.
inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
DebugLoc DL,
const MCInstrDesc &MCID) {
return BuildMI(*BB, BB->end(), DL, MCID);
}
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction at the end of the given MachineBasicBlock, and sets up the first
-/// operand as a destination virtual register.
-///
+/// This version of the builder inserts the newly-built instruction at the
+/// end of the given MachineBasicBlock, and sets up the first operand as a
+/// destination virtual register.
inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
DebugLoc DL,
const MCInstrDesc &MCID,
@@ -348,11 +339,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
return BuildMI(*BB, BB->end(), DL, MCID, DestReg);
}
-/// BuildMI - This version of the builder builds a DBG_VALUE intrinsic
+/// This version of the builder builds a DBG_VALUE intrinsic
/// for either a value in a register or a register-indirect+offset
/// address. The convention is that a DBG_VALUE is indirect iff the
/// second operand is an immediate.
-///
inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
const MCInstrDesc &MCID, bool IsIndirect,
unsigned Reg, unsigned Offset,
@@ -377,10 +367,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
}
}
-/// BuildMI - This version of the builder builds a DBG_VALUE intrinsic
+/// This version of the builder builds a DBG_VALUE intrinsic
/// for either a value in a register or a register-indirect+offset
/// address and inserts it at position I.
-///
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I, DebugLoc DL,
const MCInstrDesc &MCID, bool IsIndirect,
@@ -476,7 +465,7 @@ public:
if (I == Begin) {
if (!empty())
MI->bundleWithSucc();
- Begin = MI;
+ Begin = MI->getIterator();
return *this;
}
if (I == End) {
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index 1220224..4fbe206 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -28,7 +28,7 @@ namespace llvm {
void finalizeBundle(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator FirstMI,
MachineBasicBlock::instr_iterator LastMI);
-
+
/// finalizeBundle - Same functionality as the previous finalizeBundle except
/// the last instruction in the bundle is not provided as an input. This is
/// used in cases where bundles are pre-determined by marking instructions
@@ -44,23 +44,23 @@ bool finalizeBundles(MachineFunction &MF);
/// getBundleStart - Returns the first instruction in the bundle containing MI.
///
inline MachineInstr *getBundleStart(MachineInstr *MI) {
- MachineBasicBlock::instr_iterator I = MI;
+ MachineBasicBlock::instr_iterator I(MI);
while (I->isBundledWithPred())
--I;
- return I;
+ return &*I;
}
inline const MachineInstr *getBundleStart(const MachineInstr *MI) {
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I(MI);
while (I->isBundledWithPred())
--I;
- return I;
+ return &*I;
}
/// Return an iterator pointing beyond the bundle containing MI.
inline MachineBasicBlock::instr_iterator
getBundleEnd(MachineInstr *MI) {
- MachineBasicBlock::instr_iterator I = MI;
+ MachineBasicBlock::instr_iterator I(MI);
while (I->isBundledWithSucc())
++I;
return ++I;
@@ -69,7 +69,7 @@ getBundleEnd(MachineInstr *MI) {
/// Return an iterator pointing beyond the bundle containing MI.
inline MachineBasicBlock::const_instr_iterator
getBundleEnd(const MachineInstr *MI) {
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I(MI);
while (I->isBundledWithSucc())
++I;
return ++I;
@@ -116,10 +116,10 @@ protected:
///
explicit MachineOperandIteratorBase(MachineInstr *MI, bool WholeBundle) {
if (WholeBundle) {
- InstrI = getBundleStart(MI);
+ InstrI = getBundleStart(MI)->getIterator();
InstrE = MI->getParent()->instr_end();
} else {
- InstrI = InstrE = MI;
+ InstrI = InstrE = MI->getIterator();
++InstrE;
}
OpI = InstrI->operands_begin();
@@ -164,27 +164,32 @@ public:
bool Tied;
};
- /// PhysRegInfo - Information about a physical register used by a set of
+ /// Information about how a physical register Reg is used by a set of
/// operands.
struct PhysRegInfo {
- /// Clobbers - Reg or an overlapping register is defined, or a regmask
- /// clobbers Reg.
- bool Clobbers;
-
- /// Defines - Reg or a super-register is defined.
- bool Defines;
-
- /// Reads - Read or a super-register is read.
- bool Reads;
-
- /// ReadsOverlap - Reg or an overlapping register is read.
- bool ReadsOverlap;
-
- /// DefinesDead - All defs of a Reg or a super-register are dead.
- bool DefinesDead;
-
- /// There is a kill of Reg or a super-register.
- bool Kills;
+ /// There is a regmask operand indicating Reg is clobbered.
+ /// \see MachineOperand::CreateRegMask().
+ bool Clobbered;
+
+ /// Reg or one of its aliases is defined. The definition may only cover
+ /// parts of the register.
+ bool Defined;
+ /// Reg or a super-register is defined. The definition covers the full
+ /// register.
+ bool FullyDefined;
+
+ /// Reg or ont of its aliases is read. The register may only be read
+ /// partially.
+ bool Read;
+ /// Reg or a super-register is read. The full register is read.
+ bool FullyRead;
+
+ /// Reg is FullyDefined and all defs of reg or an overlapping register are
+ /// dead.
+ bool DeadDef;
+
+ /// There is a use operand of reg or a super-register with kill flag set.
+ bool Killed;
};
/// analyzeVirtReg - Analyze how the current instruction or bundle uses a
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
index a73b92f..1ca0d90 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -27,6 +27,7 @@ namespace llvm {
class FoldingSetNodeID;
class MDNode;
class raw_ostream;
+class MachineFunction;
class ModuleSlotTracker;
/// MachinePointerInfo - This class contains a discriminated union of
@@ -62,22 +63,23 @@ struct MachinePointerInfo {
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
- static MachinePointerInfo getConstantPool();
+ static MachinePointerInfo getConstantPool(MachineFunction &MF);
/// getFixedStack - Return a MachinePointerInfo record that refers to the
/// the specified FrameIndex.
- static MachinePointerInfo getFixedStack(int FI, int64_t offset = 0);
+ static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI,
+ int64_t Offset = 0);
/// getJumpTable - Return a MachinePointerInfo record that refers to a
/// jump table entry.
- static MachinePointerInfo getJumpTable();
+ static MachinePointerInfo getJumpTable(MachineFunction &MF);
/// getGOT - Return a MachinePointerInfo record that refers to a
/// GOT entry.
- static MachinePointerInfo getGOT();
+ static MachinePointerInfo getGOT(MachineFunction &MF);
/// getStack - stack pointer relative access.
- static MachinePointerInfo getStack(int64_t Offset);
+ static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset);
};
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 4cdfe24..7757112 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -35,11 +35,12 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Pass.h"
#include "llvm/Support/DataTypes.h"
@@ -59,7 +60,6 @@ class MachineFunction;
class Module;
class PointerType;
class StructType;
-struct WinEHFuncInfo;
struct SEHHandler {
// Filter or finally function. Null indicates a catch-all.
@@ -79,13 +79,10 @@ struct LandingPadInfo {
SmallVector<MCSymbol *, 1> EndLabels; // Labels after invoke.
SmallVector<SEHHandler, 1> SEHHandlers; // SEH handlers active at this lpad.
MCSymbol *LandingPadLabel; // Label at beginning of landing pad.
- const Function *Personality; // Personality function.
std::vector<int> TypeIds; // List of type ids (filters negative).
- int WinEHState; // WinEH specific state number.
explicit LandingPadInfo(MachineBasicBlock *MBB)
- : LandingPadBlock(MBB), LandingPadLabel(nullptr), Personality(nullptr),
- WinEHState(-1) {}
+ : LandingPadBlock(MBB), LandingPadLabel(nullptr) {}
};
//===----------------------------------------------------------------------===//
@@ -163,6 +160,13 @@ class MachineModuleInfo : public ImmutablePass {
bool CallsEHReturn;
bool CallsUnwindInit;
+ bool HasEHFunclets;
+
+ // TODO: Ideally, what we'd like is to have a switch that allows emitting
+ // synchronous (precise at call-sites only) CFA into .eh_frame. However,
+ // even under this switch, we'd like .debug_frame to be precise when using.
+ // -g. At this moment, there's no way to specify that some CFI directives
+ // go into .eh_frame only, while others go into .debug_frame only.
/// DbgInfoAvailable - True if debugging information is available
/// in this module.
@@ -182,8 +186,6 @@ class MachineModuleInfo : public ImmutablePass {
EHPersonality PersonalityTypeCache;
- DenseMap<const Function *, std::unique_ptr<WinEHFuncInfo>> FuncInfoMap;
-
public:
static char ID; // Pass identification, replacement for typeid
@@ -220,12 +222,6 @@ public:
void setModule(const Module *M) { TheModule = M; }
const Module *getModule() const { return TheModule; }
- const Function *getWinEHParent(const Function *F) const;
- WinEHFuncInfo &getWinEHFuncInfo(const Function *F);
- bool hasWinEHFuncInfo(const Function *F) const {
- return FuncInfoMap.count(getWinEHParent(F)) > 0;
- }
-
/// getInfo - Keep track of various per-function pieces of information for
/// backends that would like to do so.
///
@@ -252,6 +248,9 @@ public:
bool callsUnwindInit() const { return CallsUnwindInit; }
void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
+ bool hasEHFunclets() const { return HasEHFunclets; }
+ void setHasEHFunclets(bool V) { HasEHFunclets = V; }
+
bool usesVAFloatArgument() const {
return UsesVAFloatArgument;
}
@@ -318,16 +317,8 @@ public:
/// addPersonality - Provide the personality function for the exception
/// information.
- void addPersonality(MachineBasicBlock *LandingPad,
- const Function *Personality);
void addPersonality(const Function *Personality);
- void addWinEHState(MachineBasicBlock *LandingPad, int State);
-
- /// getPersonalityIndex - Get index of the current personality function inside
- /// Personalitites array
- unsigned getPersonalityIndex() const;
-
/// getPersonalities - Return array of personality functions ever seen.
const std::vector<const Function *>& getPersonalities() const {
return Personalities;
@@ -426,13 +417,6 @@ public:
return FilterIds;
}
- /// getPersonality - Return a personality function if available. The presence
- /// of one is required to emit exception handling info.
- const Function *getPersonality() const;
-
- /// Classify the personality function amongst known EH styles.
- EHPersonality getPersonalityType();
-
/// setVariableDbgInfo - Collect information used to emit debugging
/// information of a variable.
void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
index a67f9b5..e747214 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -18,79 +18,71 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
namespace llvm {
- class MCSymbol;
-
- /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
- /// for MachO targets.
- class MachineModuleInfoMachO : public MachineModuleInfoImpl {
- /// FnStubs - Darwin '$stub' stubs. The key is something like "Lfoo$stub",
- /// the value is something like "_foo".
- DenseMap<MCSymbol*, StubValueTy> FnStubs;
-
- /// GVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like
- /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit
- /// is true if this GV is external.
- DenseMap<MCSymbol*, StubValueTy> GVStubs;
-
- /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like
- /// "Lfoo$non_lazy_ptr", the value is something like "_foo". Unlike GVStubs
- /// these are for things with hidden visibility. The extra bit is true if
- /// this GV is external.
- DenseMap<MCSymbol*, StubValueTy> HiddenGVStubs;
-
- virtual void anchor(); // Out of line virtual method.
- public:
- MachineModuleInfoMachO(const MachineModuleInfo &) {}
-
- StubValueTy &getFnStubEntry(MCSymbol *Sym) {
- assert(Sym && "Key cannot be null");
- return FnStubs[Sym];
- }
-
- StubValueTy &getGVStubEntry(MCSymbol *Sym) {
- assert(Sym && "Key cannot be null");
- return GVStubs[Sym];
- }
-
- StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) {
- assert(Sym && "Key cannot be null");
- return HiddenGVStubs[Sym];
- }
-
- /// Accessor methods to return the set of stubs in sorted order.
- SymbolListTy GetFnStubList() {
- return getSortedStubs(FnStubs);
- }
- SymbolListTy GetGVStubList() {
- return getSortedStubs(GVStubs);
- }
- SymbolListTy GetHiddenGVStubList() {
- return getSortedStubs(HiddenGVStubs);
- }
- };
-
- /// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation
- /// for ELF targets.
- class MachineModuleInfoELF : public MachineModuleInfoImpl {
- /// GVStubs - These stubs are used to materialize global addresses in PIC
- /// mode.
- DenseMap<MCSymbol*, StubValueTy> GVStubs;
-
- virtual void anchor(); // Out of line virtual method.
- public:
- MachineModuleInfoELF(const MachineModuleInfo &) {}
-
- StubValueTy &getGVStubEntry(MCSymbol *Sym) {
- assert(Sym && "Key cannot be null");
- return GVStubs[Sym];
- }
-
- /// Accessor methods to return the set of stubs in sorted order.
-
- SymbolListTy GetGVStubList() {
- return getSortedStubs(GVStubs);
- }
- };
+class MCSymbol;
+
+/// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
+/// for MachO targets.
+class MachineModuleInfoMachO : public MachineModuleInfoImpl {
+ /// FnStubs - Darwin '$stub' stubs. The key is something like "Lfoo$stub",
+ /// the value is something like "_foo".
+ DenseMap<MCSymbol *, StubValueTy> FnStubs;
+
+ /// GVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like
+ /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit
+ /// is true if this GV is external.
+ DenseMap<MCSymbol *, StubValueTy> GVStubs;
+
+ /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like
+ /// "Lfoo$non_lazy_ptr", the value is something like "_foo". Unlike GVStubs
+ /// these are for things with hidden visibility. The extra bit is true if
+ /// this GV is external.
+ DenseMap<MCSymbol *, StubValueTy> HiddenGVStubs;
+
+ virtual void anchor(); // Out of line virtual method.
+public:
+ MachineModuleInfoMachO(const MachineModuleInfo &) {}
+
+ StubValueTy &getFnStubEntry(MCSymbol *Sym) {
+ assert(Sym && "Key cannot be null");
+ return FnStubs[Sym];
+ }
+
+ StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+ assert(Sym && "Key cannot be null");
+ return GVStubs[Sym];
+ }
+
+ StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) {
+ assert(Sym && "Key cannot be null");
+ return HiddenGVStubs[Sym];
+ }
+
+ /// Accessor methods to return the set of stubs in sorted order.
+ SymbolListTy GetFnStubList() { return getSortedStubs(FnStubs); }
+ SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
+ SymbolListTy GetHiddenGVStubList() { return getSortedStubs(HiddenGVStubs); }
+};
+
+/// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation
+/// for ELF targets.
+class MachineModuleInfoELF : public MachineModuleInfoImpl {
+ /// GVStubs - These stubs are used to materialize global addresses in PIC
+ /// mode.
+ DenseMap<MCSymbol *, StubValueTy> GVStubs;
+
+ virtual void anchor(); // Out of line virtual method.
+public:
+ MachineModuleInfoELF(const MachineModuleInfo &) {}
+
+ StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+ assert(Sym && "Key cannot be null");
+ return GVStubs[Sym];
+ }
+
+ /// Accessor methods to return the set of stubs in sorted order.
+
+ SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
+};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 5e607cd..04191bc 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -73,7 +73,7 @@ private:
/// PhysRegUseDefLists - This is an array of the head of the use/def list for
/// physical registers.
- std::vector<MachineOperand *> PhysRegUseDefLists;
+ std::unique_ptr<MachineOperand *[]> PhysRegUseDefLists;
/// getRegUseDefListHead - Return the head pointer for the register use/def
/// list for the specified virtual or physical register.
@@ -95,20 +95,8 @@ private:
return MO->Contents.Reg.Next;
}
- /// UsedRegUnits - This is a bit vector that is computed and set by the
- /// register allocator, and must be kept up to date by passes that run after
- /// register allocation (though most don't modify this). This is used
- /// so that the code generator knows which callee save registers to save and
- /// for other target specific uses.
- /// This vector has bits set for register units that are modified in the
- /// current function. It doesn't include registers clobbered by function
- /// calls with register mask operands.
- BitVector UsedRegUnits;
-
/// UsedPhysRegMask - Additional used physregs including aliases.
/// This bit vector represents all the registers clobbered by function calls.
- /// It can model things that UsedRegUnits can't, such as function calls that
- /// clobber ymm7 but preserve the low half in xmm7.
BitVector UsedPhysRegMask;
/// ReservedRegs - This is a bit vector of reserved registers. The target
@@ -246,7 +234,7 @@ public:
static reg_iterator reg_end() { return reg_iterator(nullptr); }
inline iterator_range<reg_iterator> reg_operands(unsigned Reg) const {
- return iterator_range<reg_iterator>(reg_begin(Reg), reg_end());
+ return make_range(reg_begin(Reg), reg_end());
}
/// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses
@@ -262,8 +250,7 @@ public:
inline iterator_range<reg_instr_iterator>
reg_instructions(unsigned Reg) const {
- return iterator_range<reg_instr_iterator>(reg_instr_begin(Reg),
- reg_instr_end());
+ return make_range(reg_instr_begin(Reg), reg_instr_end());
}
/// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses
@@ -278,8 +265,7 @@ public:
}
inline iterator_range<reg_bundle_iterator> reg_bundles(unsigned Reg) const {
- return iterator_range<reg_bundle_iterator>(reg_bundle_begin(Reg),
- reg_bundle_end());
+ return make_range(reg_bundle_begin(Reg), reg_bundle_end());
}
/// reg_empty - Return true if there are no instructions using or defining the
@@ -299,8 +285,7 @@ public:
inline iterator_range<reg_nodbg_iterator>
reg_nodbg_operands(unsigned Reg) const {
- return iterator_range<reg_nodbg_iterator>(reg_nodbg_begin(Reg),
- reg_nodbg_end());
+ return make_range(reg_nodbg_begin(Reg), reg_nodbg_end());
}
/// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk
@@ -317,8 +302,7 @@ public:
inline iterator_range<reg_instr_nodbg_iterator>
reg_nodbg_instructions(unsigned Reg) const {
- return iterator_range<reg_instr_nodbg_iterator>(reg_instr_nodbg_begin(Reg),
- reg_instr_nodbg_end());
+ return make_range(reg_instr_nodbg_begin(Reg), reg_instr_nodbg_end());
}
/// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk
@@ -333,10 +317,9 @@ public:
return reg_bundle_nodbg_iterator(nullptr);
}
- inline iterator_range<reg_bundle_nodbg_iterator>
+ inline iterator_range<reg_bundle_nodbg_iterator>
reg_nodbg_bundles(unsigned Reg) const {
- return iterator_range<reg_bundle_nodbg_iterator>(reg_bundle_nodbg_begin(Reg),
- reg_bundle_nodbg_end());
+ return make_range(reg_bundle_nodbg_begin(Reg), reg_bundle_nodbg_end());
}
/// reg_nodbg_empty - Return true if the only instructions using or defining
@@ -354,7 +337,7 @@ public:
static def_iterator def_end() { return def_iterator(nullptr); }
inline iterator_range<def_iterator> def_operands(unsigned Reg) const {
- return iterator_range<def_iterator>(def_begin(Reg), def_end());
+ return make_range(def_begin(Reg), def_end());
}
/// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the
@@ -370,8 +353,7 @@ public:
inline iterator_range<def_instr_iterator>
def_instructions(unsigned Reg) const {
- return iterator_range<def_instr_iterator>(def_instr_begin(Reg),
- def_instr_end());
+ return make_range(def_instr_begin(Reg), def_instr_end());
}
/// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the
@@ -386,8 +368,7 @@ public:
}
inline iterator_range<def_bundle_iterator> def_bundles(unsigned Reg) const {
- return iterator_range<def_bundle_iterator>(def_bundle_begin(Reg),
- def_bundle_end());
+ return make_range(def_bundle_begin(Reg), def_bundle_end());
}
/// def_empty - Return true if there are no instructions defining the
@@ -412,7 +393,7 @@ public:
static use_iterator use_end() { return use_iterator(nullptr); }
inline iterator_range<use_iterator> use_operands(unsigned Reg) const {
- return iterator_range<use_iterator>(use_begin(Reg), use_end());
+ return make_range(use_begin(Reg), use_end());
}
/// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the
@@ -428,8 +409,7 @@ public:
inline iterator_range<use_instr_iterator>
use_instructions(unsigned Reg) const {
- return iterator_range<use_instr_iterator>(use_instr_begin(Reg),
- use_instr_end());
+ return make_range(use_instr_begin(Reg), use_instr_end());
}
/// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the
@@ -444,8 +424,7 @@ public:
}
inline iterator_range<use_bundle_iterator> use_bundles(unsigned Reg) const {
- return iterator_range<use_bundle_iterator>(use_bundle_begin(Reg),
- use_bundle_end());
+ return make_range(use_bundle_begin(Reg), use_bundle_end());
}
/// use_empty - Return true if there are no instructions using the specified
@@ -474,8 +453,7 @@ public:
inline iterator_range<use_nodbg_iterator>
use_nodbg_operands(unsigned Reg) const {
- return iterator_range<use_nodbg_iterator>(use_nodbg_begin(Reg),
- use_nodbg_end());
+ return make_range(use_nodbg_begin(Reg), use_nodbg_end());
}
/// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk
@@ -492,8 +470,7 @@ public:
inline iterator_range<use_instr_nodbg_iterator>
use_nodbg_instructions(unsigned Reg) const {
- return iterator_range<use_instr_nodbg_iterator>(use_instr_nodbg_begin(Reg),
- use_instr_nodbg_end());
+ return make_range(use_instr_nodbg_begin(Reg), use_instr_nodbg_end());
}
/// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk
@@ -510,8 +487,7 @@ public:
inline iterator_range<use_bundle_nodbg_iterator>
use_nodbg_bundles(unsigned Reg) const {
- return iterator_range<use_bundle_nodbg_iterator>(use_bundle_nodbg_begin(Reg),
- use_bundle_nodbg_end());
+ return make_range(use_bundle_nodbg_begin(Reg), use_bundle_nodbg_end());
}
/// use_nodbg_empty - Return true if there are no non-Debug instructions
@@ -540,7 +516,7 @@ public:
/// apply sub registers to ToReg in order to obtain a final/proper physical
/// register.
void replaceRegWith(unsigned FromReg, unsigned ToReg);
-
+
/// getVRegDef - Return the machine instr that defines the specified virtual
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
@@ -626,6 +602,12 @@ public:
RegAllocHints[VReg].second = PrefReg;
}
+ /// Specify the preferred register allocation hint for the specified virtual
+ /// register.
+ void setSimpleHint(unsigned VReg, unsigned PrefReg) {
+ setRegAllocationHint(VReg, /*Type=*/0, PrefReg);
+ }
+
/// getRegAllocationHint - Return the register allocation hint for the
/// specified virtual register.
std::pair<unsigned, unsigned>
@@ -650,41 +632,15 @@ public:
/// Return true if the specified register is modified in this function.
/// This checks that no defining machine operands exist for the register or
/// any of its aliases. Definitions found on functions marked noreturn are
- /// ignored.
+ /// ignored. The register is also considered modified when it is set in the
+ /// UsedPhysRegMask.
bool isPhysRegModified(unsigned PhysReg) const;
- //===--------------------------------------------------------------------===//
- // Physical Register Use Info
- //===--------------------------------------------------------------------===//
-
- /// isPhysRegUsed - Return true if the specified register is used in this
- /// function. Also check for clobbered aliases and registers clobbered by
- /// function calls with register mask operands.
- ///
- /// This only works after register allocation.
- bool isPhysRegUsed(unsigned Reg) const {
- if (UsedPhysRegMask.test(Reg))
- return true;
- for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
- Units.isValid(); ++Units)
- if (UsedRegUnits.test(*Units))
- return true;
- return false;
- }
-
- /// Mark the specified register unit as used in this function.
- /// This should only be called during and after register allocation.
- void setRegUnitUsed(unsigned RegUnit) {
- UsedRegUnits.set(RegUnit);
- }
-
- /// setPhysRegUsed - Mark the specified register used in this function.
- /// This should only be called during and after register allocation.
- void setPhysRegUsed(unsigned Reg) {
- for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
- Units.isValid(); ++Units)
- UsedRegUnits.set(*Units);
- }
+ /// Return true if the specified register is modified or read in this
+ /// function. This checks that no machine operands exist for the register or
+ /// any of its aliases. The register is also considered used when it is set
+ /// in the UsedPhysRegMask.
+ bool isPhysRegUsed(unsigned PhysReg) const;
/// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
/// This corresponds to the bit mask attached to register mask operands.
@@ -692,15 +648,9 @@ public:
UsedPhysRegMask.setBitsNotInMask(RegMask);
}
- /// setPhysRegUnused - Mark the specified register unused in this function.
- /// This should only be called during and after register allocation.
- void setPhysRegUnused(unsigned Reg) {
- UsedPhysRegMask.reset(Reg);
- for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
- Units.isValid(); ++Units)
- UsedRegUnits.reset(*Units);
- }
+ const BitVector &getUsedPhysRegsMask() const { return UsedPhysRegMask; }
+ void setUsedPhysRegMask(BitVector &Mask) { UsedPhysRegMask = Mask; }
//===--------------------------------------------------------------------===//
// Reserved Register Info
@@ -797,7 +747,7 @@ public:
/// Returns a mask covering all bits that can appear in lane masks of
/// subregisters of the virtual register @p Reg.
- unsigned getMaxLaneMaskForVReg(unsigned Reg) const;
+ LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const;
/// defusechain_iterator - This class provides iterator support for machine
/// operands in the function that use or define a specific register. If
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
index e80e14e..358fd5a 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -77,6 +77,7 @@
#ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
#define LLVM_CODEGEN_MACHINESCHEDULER_H
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
@@ -87,7 +88,6 @@ namespace llvm {
extern cl::opt<bool> ForceTopDown;
extern cl::opt<bool> ForceBottomUp;
-class AliasAnalysis;
class LiveIntervals;
class MachineDominatorTree;
class MachineLoopInfo;
@@ -156,8 +156,12 @@ struct MachineSchedPolicy {
bool OnlyTopDown;
bool OnlyBottomUp;
+ // Disable heuristic that tries to fetch nodes from long dependency chains
+ // first.
+ bool DisableLatencyHeuristic;
+
MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
- OnlyBottomUp(false) {}
+ OnlyBottomUp(false), DisableLatencyHeuristic(false) {}
};
/// MachineSchedStrategy - Interface to the scheduling algorithm used by
@@ -175,6 +179,8 @@ public:
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {}
+ virtual void dumpPolicy() {}
+
/// Check if pressure tracking is needed before building the DAG and
/// initializing this strategy. Called after initPolicy.
virtual bool shouldTrackPressure() const { return true; }
@@ -222,6 +228,7 @@ public:
class ScheduleDAGMI : public ScheduleDAGInstrs {
protected:
AliasAnalysis *AA;
+ LiveIntervals *LIS;
std::unique_ptr<MachineSchedStrategy> SchedImpl;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
@@ -248,11 +255,11 @@ protected:
#endif
public:
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
- bool IsPostRA)
- : ScheduleDAGInstrs(*C->MF, C->MLI, IsPostRA,
- /*RemoveKillFlags=*/IsPostRA, C->LIS),
- AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(),
- CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) {
+ bool RemoveKillFlags)
+ : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
+ LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU),
+ CurrentTop(), CurrentBottom(), NextClusterPred(nullptr),
+ NextClusterSucc(nullptr) {
#ifndef NDEBUG
NumInstrsScheduled = 0;
#endif
@@ -261,6 +268,9 @@ public:
// Provide a vtable anchor
~ScheduleDAGMI() override;
+ // Returns LiveIntervals instance for use in DAG mutators and such.
+ LiveIntervals *getLIS() const { return LIS; }
+
/// Return true if this DAG supports VReg liveness and RegPressure.
virtual bool hasVRegLiveness() const { return false; }
@@ -380,7 +390,7 @@ protected:
public:
ScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S)
- : ScheduleDAGMI(C, std::move(S), /*IsPostRA=*/false),
+ : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
RegClassInfo(C->RegClassInfo), DFSResult(nullptr),
ShouldTrackPressure(false), RPTracker(RegPressure),
TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
@@ -858,6 +868,8 @@ public:
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) override;
+ void dumpPolicy() override;
+
bool shouldTrackPressure() const override {
return RegionPolicy.ShouldTrackPressure;
}
@@ -915,7 +927,7 @@ public:
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) override {
/* no configurable policy */
- };
+ }
/// PostRA scheduling does not track pressure.
bool shouldTrackPressure() const override { return false; }
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
index a728df3..04d6ee3 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
@@ -56,53 +56,66 @@ class MVT {
FIRST_FP_VALUETYPE = f16,
LAST_FP_VALUETYPE = ppcf128,
- v2i1 = 13, // 2 x i1
- v4i1 = 14, // 4 x i1
- v8i1 = 15, // 8 x i1
- v16i1 = 16, // 16 x i1
- v32i1 = 17, // 32 x i1
- v64i1 = 18, // 64 x i1
-
- v1i8 = 19, // 1 x i8
- v2i8 = 20, // 2 x i8
- v4i8 = 21, // 4 x i8
- v8i8 = 22, // 8 x i8
- v16i8 = 23, // 16 x i8
- v32i8 = 24, // 32 x i8
- v64i8 = 25, // 64 x i8
- v1i16 = 26, // 1 x i16
- v2i16 = 27, // 2 x i16
- v4i16 = 28, // 4 x i16
- v8i16 = 29, // 8 x i16
- v16i16 = 30, // 16 x i16
- v32i16 = 31, // 32 x i16
- v1i32 = 32, // 1 x i32
- v2i32 = 33, // 2 x i32
- v4i32 = 34, // 4 x i32
- v8i32 = 35, // 8 x i32
- v16i32 = 36, // 16 x i32
- v1i64 = 37, // 1 x i64
- v2i64 = 38, // 2 x i64
- v4i64 = 39, // 4 x i64
- v8i64 = 40, // 8 x i64
- v16i64 = 41, // 16 x i64
- v1i128 = 42, // 1 x i128
-
+ v2i1 = 13, // 2 x i1
+ v4i1 = 14, // 4 x i1
+ v8i1 = 15, // 8 x i1
+ v16i1 = 16, // 16 x i1
+ v32i1 = 17, // 32 x i1
+ v64i1 = 18, // 64 x i1
+ v512i1 = 19, // 512 x i1
+ v1024i1 = 20, // 1024 x i1
+
+ v1i8 = 21, // 1 x i8
+ v2i8 = 22, // 2 x i8
+ v4i8 = 23, // 4 x i8
+ v8i8 = 24, // 8 x i8
+ v16i8 = 25, // 16 x i8
+ v32i8 = 26, // 32 x i8
+ v64i8 = 27, // 64 x i8
+ v128i8 = 28, //128 x i8
+ v256i8 = 29, //256 x i8
+
+ v1i16 = 30, // 1 x i16
+ v2i16 = 31, // 2 x i16
+ v4i16 = 32, // 4 x i16
+ v8i16 = 33, // 8 x i16
+ v16i16 = 34, // 16 x i16
+ v32i16 = 35, // 32 x i16
+ v64i16 = 36, // 64 x i16
+ v128i16 = 37, //128 x i16
+
+ v1i32 = 38, // 1 x i32
+ v2i32 = 39, // 2 x i32
+ v4i32 = 40, // 4 x i32
+ v8i32 = 41, // 8 x i32
+ v16i32 = 42, // 16 x i32
+ v32i32 = 43, // 32 x i32
+ v64i32 = 44, // 64 x i32
+
+ v1i64 = 45, // 1 x i64
+ v2i64 = 46, // 2 x i64
+ v4i64 = 47, // 4 x i64
+ v8i64 = 48, // 8 x i64
+ v16i64 = 49, // 16 x i64
+ v32i64 = 50, // 32 x i64
+
+ v1i128 = 51, // 1 x i128
+
FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
LAST_INTEGER_VECTOR_VALUETYPE = v1i128,
- v2f16 = 43, // 2 x f16
- v4f16 = 44, // 4 x f16
- v8f16 = 45, // 8 x f16
- v1f32 = 46, // 1 x f32
- v2f32 = 47, // 2 x f32
- v4f32 = 48, // 4 x f32
- v8f32 = 49, // 8 x f32
- v16f32 = 50, // 16 x f32
- v1f64 = 51, // 1 x f64
- v2f64 = 52, // 2 x f64
- v4f64 = 53, // 4 x f64
- v8f64 = 54, // 8 x f64
+ v2f16 = 52, // 2 x f16
+ v4f16 = 53, // 4 x f16
+ v8f16 = 54, // 8 x f16
+ v1f32 = 55, // 1 x f32
+ v2f32 = 56, // 2 x f32
+ v4f32 = 57, // 4 x f32
+ v8f32 = 58, // 8 x f32
+ v16f32 = 59, // 16 x f32
+ v1f64 = 60, // 1 x f64
+ v2f64 = 61, // 2 x f64
+ v4f64 = 62, // 4 x f64
+ v8f64 = 63, // 8 x f64
FIRST_FP_VECTOR_VALUETYPE = v2f16,
LAST_FP_VECTOR_VALUETYPE = v8f64,
@@ -110,23 +123,26 @@ class MVT {
FIRST_VECTOR_VALUETYPE = v2i1,
LAST_VECTOR_VALUETYPE = v8f64,
- x86mmx = 55, // This is an X86 MMX value
+ x86mmx = 64, // This is an X86 MMX value
- Glue = 56, // This glues nodes together during pre-RA sched
+ Glue = 65, // This glues nodes together during pre-RA sched
- isVoid = 57, // This has no value
+ isVoid = 66, // This has no value
- Untyped = 58, // This value takes a register, but has
+ Untyped = 67, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
FIRST_VALUETYPE = 0, // This is always the beginning of the list.
- LAST_VALUETYPE = 59, // This always remains at the end of the list.
+ LAST_VALUETYPE = 68, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
// This value must be a multiple of 32.
- MAX_ALLOWED_VALUETYPE = 64,
+ MAX_ALLOWED_VALUETYPE = 96,
+
+ // Token - A value of type llvm::TokenTy
+ token = 249,
// Metadata - This is MDNode or MDString.
Metadata = 250,
@@ -238,14 +254,23 @@ class MVT {
/// is512BitVector - Return true if this is a 512-bit vector type.
bool is512BitVector() const {
- return (SimpleTy == MVT::v8f64 || SimpleTy == MVT::v16f32 ||
- SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
- SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32);
+ return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 ||
+ SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 ||
+ SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
+ SimpleTy == MVT::v8i64);
}
/// is1024BitVector - Return true if this is a 1024-bit vector type.
bool is1024BitVector() const {
- return (SimpleTy == MVT::v16i64);
+ return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 ||
+ SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 ||
+ SimpleTy == MVT::v16i64);
+ }
+
+ /// is2048BitVector - Return true if this is a 1024-bit vector type.
+ bool is2048BitVector() const {
+ return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 ||
+ SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64);
}
/// isOverloaded - Return true if this is an overloaded type for TableGen.
@@ -282,35 +307,44 @@ class MVT {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
- case v2i1 :
- case v4i1 :
- case v8i1 :
- case v16i1 :
- case v32i1 :
- case v64i1: return i1;
- case v1i8 :
- case v2i8 :
- case v4i8 :
- case v8i8 :
+ case v2i1:
+ case v4i1:
+ case v8i1:
+ case v16i1:
+ case v32i1:
+ case v64i1:
+ case v512i1:
+ case v1024i1: return i1;
+ case v1i8:
+ case v2i8:
+ case v4i8:
+ case v8i8:
case v16i8:
case v32i8:
- case v64i8: return i8;
+ case v64i8:
+ case v128i8:
+ case v256i8: return i8;
case v1i16:
case v2i16:
case v4i16:
case v8i16:
case v16i16:
- case v32i16: return i16;
+ case v32i16:
+ case v64i16:
+ case v128i16: return i16;
case v1i32:
case v2i32:
case v4i32:
case v8i32:
- case v16i32: return i32;
+ case v16i32:
+ case v32i32:
+ case v64i32: return i32;
case v1i64:
case v2i64:
case v4i64:
case v8i64:
- case v16i64: return i64;
+ case v16i64:
+ case v32i64: return i64;
case v1i128: return i128;
case v2f16:
case v4f16:
@@ -331,19 +365,28 @@ class MVT {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
+ case v1024i1: return 1024;
+ case v512i1: return 512;
+ case v256i8: return 256;
+ case v128i8:
+ case v128i16: return 128;
+ case v64i1:
+ case v64i8:
+ case v64i16:
+ case v64i32: return 64;
case v32i1:
case v32i8:
- case v32i16: return 32;
- case v64i1:
- case v64i8: return 64;
+ case v32i16:
+ case v32i32:
+ case v32i64: return 32;
case v16i1:
case v16i8:
case v16i16:
case v16i32:
case v16i64:
case v16f32: return 16;
- case v8i1 :
- case v8i8 :
+ case v8i1:
+ case v8i8:
case v8i16:
case v8i32:
case v8i64:
@@ -390,6 +433,9 @@ class MVT {
case vAny:
case Any:
llvm_unreachable("Value type is overloaded.");
+ case token:
+ llvm_unreachable("Token type is a sentinel that cannot be used "
+ "in codegen and has no size");
case Metadata:
llvm_unreachable("Value type is metadata.");
case i1 : return 1;
@@ -440,13 +486,22 @@ class MVT {
case v4i64:
case v8f32:
case v4f64: return 256;
+ case v512i1:
case v64i8:
case v32i16:
case v16i32:
case v8i64:
case v16f32:
case v8f64: return 512;
- case v16i64:return 1024;
+ case v1024i1:
+ case v128i8:
+ case v64i16:
+ case v32i32:
+ case v16i64: return 1024;
+ case v256i8:
+ case v128i16:
+ case v64i32:
+ case v32i64: return 2048;
}
}
@@ -528,29 +583,35 @@ class MVT {
default:
break;
case MVT::i1:
- if (NumElements == 2) return MVT::v2i1;
- if (NumElements == 4) return MVT::v4i1;
- if (NumElements == 8) return MVT::v8i1;
- if (NumElements == 16) return MVT::v16i1;
- if (NumElements == 32) return MVT::v32i1;
- if (NumElements == 64) return MVT::v64i1;
+ if (NumElements == 2) return MVT::v2i1;
+ if (NumElements == 4) return MVT::v4i1;
+ if (NumElements == 8) return MVT::v8i1;
+ if (NumElements == 16) return MVT::v16i1;
+ if (NumElements == 32) return MVT::v32i1;
+ if (NumElements == 64) return MVT::v64i1;
+ if (NumElements == 512) return MVT::v512i1;
+ if (NumElements == 1024) return MVT::v1024i1;
break;
case MVT::i8:
- if (NumElements == 1) return MVT::v1i8;
- if (NumElements == 2) return MVT::v2i8;
- if (NumElements == 4) return MVT::v4i8;
- if (NumElements == 8) return MVT::v8i8;
- if (NumElements == 16) return MVT::v16i8;
- if (NumElements == 32) return MVT::v32i8;
- if (NumElements == 64) return MVT::v64i8;
+ if (NumElements == 1) return MVT::v1i8;
+ if (NumElements == 2) return MVT::v2i8;
+ if (NumElements == 4) return MVT::v4i8;
+ if (NumElements == 8) return MVT::v8i8;
+ if (NumElements == 16) return MVT::v16i8;
+ if (NumElements == 32) return MVT::v32i8;
+ if (NumElements == 64) return MVT::v64i8;
+ if (NumElements == 128) return MVT::v128i8;
+ if (NumElements == 256) return MVT::v256i8;
break;
case MVT::i16:
- if (NumElements == 1) return MVT::v1i16;
- if (NumElements == 2) return MVT::v2i16;
- if (NumElements == 4) return MVT::v4i16;
- if (NumElements == 8) return MVT::v8i16;
- if (NumElements == 16) return MVT::v16i16;
- if (NumElements == 32) return MVT::v32i16;
+ if (NumElements == 1) return MVT::v1i16;
+ if (NumElements == 2) return MVT::v2i16;
+ if (NumElements == 4) return MVT::v4i16;
+ if (NumElements == 8) return MVT::v8i16;
+ if (NumElements == 16) return MVT::v16i16;
+ if (NumElements == 32) return MVT::v32i16;
+ if (NumElements == 64) return MVT::v64i16;
+ if (NumElements == 128) return MVT::v128i16;
break;
case MVT::i32:
if (NumElements == 1) return MVT::v1i32;
@@ -558,6 +619,8 @@ class MVT {
if (NumElements == 4) return MVT::v4i32;
if (NumElements == 8) return MVT::v8i32;
if (NumElements == 16) return MVT::v16i32;
+ if (NumElements == 32) return MVT::v32i32;
+ if (NumElements == 64) return MVT::v64i32;
break;
case MVT::i64:
if (NumElements == 1) return MVT::v1i64;
@@ -565,6 +628,7 @@ class MVT {
if (NumElements == 4) return MVT::v4i64;
if (NumElements == 8) return MVT::v8i64;
if (NumElements == 16) return MVT::v16i64;
+ if (NumElements == 32) return MVT::v32i64;
break;
case MVT::i128:
if (NumElements == 1) return MVT::v1i128;
diff --git a/contrib/llvm/include/llvm/CodeGen/ParallelCG.h b/contrib/llvm/include/llvm/CodeGen/ParallelCG.h
new file mode 100644
index 0000000..fa7002f
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/ParallelCG.h
@@ -0,0 +1,43 @@
+//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header declares functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PARALLELCG_H
+#define LLVM_CODEGEN_PARALLELCG_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class Module;
+class TargetOptions;
+class raw_pwrite_stream;
+
+/// Split M into OSs.size() partitions, and generate code for each. Writes
+/// OSs.size() output files to the output streams in OSs. The resulting output
+/// files if linked together are intended to be equivalent to the single output
+/// file that would have been code generated from M.
+///
+/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr<Module>().
+std::unique_ptr<Module>
+splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
+ StringRef CPU, StringRef Features, const TargetOptions &Options,
+ Reloc::Model RM = Reloc::Default,
+ CodeModel::Model CM = CodeModel::Default,
+ CodeGenOpt::Level OL = CodeGenOpt::Default,
+ TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile);
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm/include/llvm/CodeGen/Passes.h
index 5d82921..f45f0ed 100644
--- a/contrib/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm/include/llvm/CodeGen/Passes.h
@@ -120,9 +120,6 @@ protected:
/// Default setting for -enable-tail-merge on this target.
bool EnableTailMerge;
- /// Default setting for -enable-shrink-wrap on this target.
- bool EnableShrinkWrap;
-
public:
TargetPassConfig(TargetMachine *tm, PassManagerBase &pm);
// Dummy constructor.
@@ -173,7 +170,8 @@ public:
void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
/// Insert InsertedPassID pass after TargetPassID pass.
- void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
+ void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter = true, bool PrintAfter = true);
/// Allow the target to enable a specific standard pass by default.
void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -228,7 +226,7 @@ public:
///
/// This can also be used to plug a new MachineSchedStrategy into an instance
/// of the standard ScheduleDAGMI:
- /// return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /* IsPostRA= */false)
+ /// return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
///
/// Return NULL to select the default (generic) machine scheduler.
virtual ScheduleDAGInstrs *
@@ -585,6 +583,9 @@ namespace llvm {
/// StackSlotColoring - This pass performs stack slot coloring.
extern char &StackSlotColoringID;
+ /// \brief This pass lays out funclets contiguously.
+ extern char &FuncletLayoutID;
+
/// createStackProtectorPass - This pass adds stack protectors to functions.
///
FunctionPass *createStackProtectorPass(const TargetMachine *TM);
@@ -639,6 +640,9 @@ namespace llvm {
/// the intrinsic for later emission to the StackMap.
extern char &StackMapLivenessID;
+ /// LiveDebugValues pass
+ extern char &LiveDebugValuesID;
+
/// createJumpInstrTables - This pass creates jump-instruction tables.
ModulePass *createJumpInstrTablesPass();
diff --git a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index a518b62..f675520 100644
--- a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -14,97 +14,170 @@
#ifndef LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
#define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueMap.h"
+#include <map>
namespace llvm {
- class MachineFrameInfo;
- class MachineMemOperand;
- class raw_ostream;
-
- raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
-
- /// PseudoSourceValue - Special value supplied for machine level alias
- /// analysis. It indicates that a memory access references the functions
- /// stack frame (e.g., a spill slot), below the stack frame (e.g., argument
- /// space), or constant pool.
- class PseudoSourceValue {
- private:
- friend class MachineMemOperand; // For printCustom().
-
- /// printCustom - Implement printing for PseudoSourceValue. This is called
- /// from Value::print or Value's operator<<.
- ///
- virtual void printCustom(raw_ostream &O) const;
-
- public:
- /// isFixed - Whether this is a FixedStackPseudoSourceValue.
- bool isFixed;
-
- explicit PseudoSourceValue(bool isFixed = false);
-
- virtual ~PseudoSourceValue();
-
- /// isConstant - Test whether the memory pointed to by this
- /// PseudoSourceValue has a constant value.
- ///
- virtual bool isConstant(const MachineFrameInfo *) const;
-
- /// isAliased - Test whether the memory pointed to by this
- /// PseudoSourceValue may also be pointed to by an LLVM IR Value.
- virtual bool isAliased(const MachineFrameInfo *) const;
-
- /// mayAlias - Return true if the memory pointed to by this
- /// PseudoSourceValue can ever alias an LLVM IR Value.
- virtual bool mayAlias(const MachineFrameInfo *) const;
-
- /// A pseudo source value referencing a fixed stack frame entry,
- /// e.g., a spill slot.
- static const PseudoSourceValue *getFixedStack(int FI);
-
- /// A pseudo source value referencing the area below the stack frame of
- /// a function, e.g., the argument space.
- static const PseudoSourceValue *getStack();
-
- /// A pseudo source value referencing the global offset table
- /// (or something the like).
- static const PseudoSourceValue *getGOT();
-
- /// A pseudo source value referencing the constant pool. Since constant
- /// pools are constant, this doesn't need to identify a specific constant
- /// pool entry.
- static const PseudoSourceValue *getConstantPool();
-
- /// A pseudo source value referencing a jump table. Since jump tables are
- /// constant, this doesn't need to identify a specific jump table.
- static const PseudoSourceValue *getJumpTable();
+
+class MachineFrameInfo;
+class MachineMemOperand;
+class raw_ostream;
+
+raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
+
+/// Special value supplied for machine level alias analysis. It indicates that
+/// a memory access references the functions stack frame (e.g., a spill slot),
+/// below the stack frame (e.g., argument space), or constant pool.
+class PseudoSourceValue {
+public:
+ enum PSVKind {
+ Stack,
+ GOT,
+ JumpTable,
+ ConstantPool,
+ FixedStack,
+ GlobalValueCallEntry,
+ ExternalSymbolCallEntry
};
- /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
- /// for holding FixedStack values, which must include a frame
- /// index.
- class FixedStackPseudoSourceValue : public PseudoSourceValue {
- const int FI;
- public:
- explicit FixedStackPseudoSourceValue(int fi) :
- PseudoSourceValue(true), FI(fi) {}
+private:
+ PSVKind Kind;
- /// classof - Methods for support type inquiry through isa, cast, and
- /// dyn_cast:
- ///
- static inline bool classof(const PseudoSourceValue *V) {
- return V->isFixed == true;
- }
+ friend class MachineMemOperand; // For printCustom().
- bool isConstant(const MachineFrameInfo *MFI) const override;
+ /// Implement printing for PseudoSourceValue. This is called from
+ /// Value::print or Value's operator<<.
+ virtual void printCustom(raw_ostream &O) const;
- bool isAliased(const MachineFrameInfo *MFI) const override;
+public:
+ explicit PseudoSourceValue(PSVKind Kind);
- bool mayAlias(const MachineFrameInfo *) const override;
+ virtual ~PseudoSourceValue();
- void printCustom(raw_ostream &OS) const override;
+ PSVKind kind() const { return Kind; }
- int getFrameIndex() const { return FI; }
- };
-} // End llvm namespace
+ bool isStack() const { return Kind == Stack; }
+ bool isGOT() const { return Kind == GOT; }
+ bool isConstantPool() const { return Kind == ConstantPool; }
+ bool isJumpTable() const { return Kind == JumpTable; }
+
+ /// Test whether the memory pointed to by this PseudoSourceValue has a
+ /// constant value.
+ virtual bool isConstant(const MachineFrameInfo *) const;
+
+ /// Test whether the memory pointed to by this PseudoSourceValue may also be
+ /// pointed to by an LLVM IR Value.
+ virtual bool isAliased(const MachineFrameInfo *) const;
+
+ /// Return true if the memory pointed to by this PseudoSourceValue can ever
+ /// alias an LLVM IR Value.
+ virtual bool mayAlias(const MachineFrameInfo *) const;
+};
+
+/// A specialized PseudoSourceValue for holding FixedStack values, which must
+/// include a frame index.
+class FixedStackPseudoSourceValue : public PseudoSourceValue {
+ const int FI;
+
+public:
+ explicit FixedStackPseudoSourceValue(int FI)
+ : PseudoSourceValue(FixedStack), FI(FI) {}
+
+ static inline bool classof(const PseudoSourceValue *V) {
+ return V->kind() == FixedStack;
+ }
+
+ bool isConstant(const MachineFrameInfo *MFI) const override;
+
+ bool isAliased(const MachineFrameInfo *MFI) const override;
+
+ bool mayAlias(const MachineFrameInfo *) const override;
+
+ void printCustom(raw_ostream &OS) const override;
+
+ int getFrameIndex() const { return FI; }
+};
+
+class CallEntryPseudoSourceValue : public PseudoSourceValue {
+protected:
+ CallEntryPseudoSourceValue(PSVKind Kind);
+
+public:
+ bool isConstant(const MachineFrameInfo *) const override;
+ bool isAliased(const MachineFrameInfo *) const override;
+ bool mayAlias(const MachineFrameInfo *) const override;
+};
+
+/// A specialized pseudo soruce value for holding GlobalValue values.
+class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
+ const GlobalValue *GV;
+
+public:
+ GlobalValuePseudoSourceValue(const GlobalValue *GV);
+
+ static inline bool classof(const PseudoSourceValue *V) {
+ return V->kind() == GlobalValueCallEntry;
+ }
+
+ const GlobalValue *getValue() const { return GV; }
+};
+
+/// A specialized pseudo source value for holding external symbol values.
+class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue {
+ const char *ES;
+
+public:
+ ExternalSymbolPseudoSourceValue(const char *ES);
+
+ static inline bool classof(const PseudoSourceValue *V) {
+ return V->kind() == ExternalSymbolCallEntry;
+ }
+
+ const char *getSymbol() const { return ES; }
+};
+
+/// Manages creation of pseudo source values.
+class PseudoSourceValueManager {
+ const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV;
+ std::map<int, std::unique_ptr<FixedStackPseudoSourceValue>> FSValues;
+ StringMap<std::unique_ptr<const ExternalSymbolPseudoSourceValue>>
+ ExternalCallEntries;
+ ValueMap<const GlobalValue *,
+ std::unique_ptr<const GlobalValuePseudoSourceValue>>
+ GlobalCallEntries;
+
+public:
+ PseudoSourceValueManager();
+
+ /// Return a pseudo source value referencing the area below the stack frame of
+ /// a function, e.g., the argument space.
+ const PseudoSourceValue *getStack();
+
+ /// Return a pseudo source value referencing the global offset table
+ /// (or something the like).
+ const PseudoSourceValue *getGOT();
+
+ /// Return a pseudo source value referencing the constant pool. Since constant
+ /// pools are constant, this doesn't need to identify a specific constant
+ /// pool entry.
+ const PseudoSourceValue *getConstantPool();
+
+ /// Return a pseudo source value referencing a jump table. Since jump tables
+ /// are constant, this doesn't need to identify a specific jump table.
+ const PseudoSourceValue *getJumpTable();
+
+ /// Return a pseudo source value referencing a fixed stack frame entry,
+ /// e.g., a spill slot.
+ const PseudoSourceValue *getFixedStack(int FI);
+
+ const PseudoSourceValue *getGlobalValueCallEntry(const GlobalValue *GV);
+
+ const PseudoSourceValue *getExternalSymbolCallEntry(const char *ES);
+};
+
+} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h
index 6046e46..4122811 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h
@@ -134,7 +134,7 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) {
hash_combine_range(OStart, OEnd));
}
-/// \brief Holds graph-level metadata relevent to PBQP RA problems.
+/// \brief Holds graph-level metadata relevant to PBQP RA problems.
class GraphMetadata {
private:
typedef ValuePool<AllowedRegVector> AllowedRegVecPool;
diff --git a/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h b/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
index ca49577..5c7e999 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
@@ -33,12 +33,10 @@ public:
static MachinePassRegistry Registry;
RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
- : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
- {
- Registry.Add(this);
+ : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+ Registry.Add(this);
}
~RegisterRegAlloc() { Registry.Remove(this); }
-
// Accessors.
//
@@ -57,7 +55,6 @@ public:
static void setListener(MachinePassRegistryListener *L) {
Registry.setListener(L);
}
-
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
index 9d8843d..987634f 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -125,11 +125,13 @@ class PressureDiff {
enum { MaxPSets = 16 };
PressureChange PressureChanges[MaxPSets];
-public:
+
typedef PressureChange* iterator;
+ iterator nonconst_begin() { return &PressureChanges[0]; }
+ iterator nonconst_end() { return &PressureChanges[MaxPSets]; }
+
+public:
typedef const PressureChange* const_iterator;
- iterator begin() { return &PressureChanges[0]; }
- iterator end() { return &PressureChanges[MaxPSets]; }
const_iterator begin() const { return &PressureChanges[0]; }
const_iterator end() const { return &PressureChanges[MaxPSets]; }
@@ -191,30 +193,56 @@ struct RegPressureDelta {
}
};
-/// \brief A set of live virtual registers and physical register units.
+/// A set of live virtual registers and physical register units.
///
-/// Virtual and physical register numbers require separate sparse sets, but most
-/// of the RegisterPressureTracker handles them uniformly.
-struct LiveRegSet {
- SparseSet<unsigned> PhysRegs;
- SparseSet<unsigned, VirtReg2IndexFunctor> VirtRegs;
+/// This is a wrapper around a SparseSet which deals with mapping register unit
+/// and virtual register indexes to an index usable by the sparse set.
+class LiveRegSet {
+private:
+ SparseSet<unsigned> Regs;
+ unsigned NumRegUnits;
+
+ unsigned getSparseIndexFromReg(unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits;
+ assert(Reg < NumRegUnits);
+ return Reg;
+ }
+ unsigned getRegFromSparseIndex(unsigned SparseIndex) const {
+ if (SparseIndex >= NumRegUnits)
+ return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits);
+ return SparseIndex;
+ }
+
+public:
+ void clear();
+ void init(const MachineRegisterInfo &MRI);
bool contains(unsigned Reg) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return VirtRegs.count(Reg);
- return PhysRegs.count(Reg);
+ unsigned SparseIndex = getSparseIndexFromReg(Reg);
+ return Regs.count(SparseIndex);
}
bool insert(unsigned Reg) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return VirtRegs.insert(Reg).second;
- return PhysRegs.insert(Reg).second;
+ unsigned SparseIndex = getSparseIndexFromReg(Reg);
+ return Regs.insert(SparseIndex).second;
}
bool erase(unsigned Reg) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return VirtRegs.erase(Reg);
- return PhysRegs.erase(Reg);
+ unsigned SparseIndex = getSparseIndexFromReg(Reg);
+ return Regs.erase(SparseIndex);
+ }
+
+ size_t size() const {
+ return Regs.size();
+ }
+
+ template<typename ContainerT>
+ void appendTo(ContainerT &To) const {
+ for (unsigned I : Regs) {
+ unsigned Reg = getRegFromSparseIndex(I);
+ To.push_back(Reg);
+ }
}
};
@@ -300,16 +328,12 @@ public:
// position changes while pressure does not.
void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
- /// \brief Get the SlotIndex for the first nondebug instruction including or
- /// after the current position.
- SlotIndex getCurrSlot() const;
-
/// Recede across the previous instruction.
- bool recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
+ void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
PressureDiff *PDiff = nullptr);
/// Advance across the current instruction.
- bool advance();
+ void advance();
/// Finalize the region boundaries and recored live ins and live outs.
void closeRegion();
@@ -326,17 +350,15 @@ public:
ArrayRef<unsigned> getLiveThru() const { return LiveThruPressure; }
/// Get the resulting register pressure over the traversed region.
- /// This result is complete if either advance() or recede() has returned true,
- /// or if closeRegion() was explicitly invoked.
+ /// This result is complete if closeRegion() was explicitly invoked.
RegisterPressure &getPressure() { return P; }
const RegisterPressure &getPressure() const { return P; }
/// Get the register set pressure at the current position, which may be less
/// than the pressure across the traversed region.
- std::vector<unsigned> &getRegSetPressureAtPos() { return CurrSetPressure; }
-
- void discoverLiveOut(unsigned Reg);
- void discoverLiveIn(unsigned Reg);
+ const std::vector<unsigned> &getRegSetPressureAtPos() const {
+ return CurrSetPressure;
+ }
bool isTopClosed() const;
bool isBottomClosed() const;
@@ -412,7 +434,12 @@ public:
void dump() const;
protected:
- const LiveRange *getLiveRange(unsigned Reg) const;
+ void discoverLiveOut(unsigned Reg);
+ void discoverLiveIn(unsigned Reg);
+
+ /// \brief Get the SlotIndex for the first nondebug instruction including or
+ /// after the current position.
+ SlotIndex getCurrSlot() const;
void increaseRegPressure(ArrayRef<unsigned> Regs);
void decreaseRegPressure(ArrayRef<unsigned> Regs);
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
index df3fd34..122c785 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -74,10 +74,6 @@ public:
/// Start tracking liveness from the begin of the specific basic block.
void enterBasicBlock(MachineBasicBlock *mbb);
- /// Allow resetting register state info for multiple
- /// passes over/within the same function.
- void initRegState();
-
/// Move the internal MBB iterator and update register states.
void forward();
@@ -104,10 +100,8 @@ public:
MBBI = I;
}
- MachineBasicBlock::iterator getCurrentPosition() const {
- return MBBI;
- }
-
+ MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; }
+
/// Return if a specific register is currently used.
bool isRegUsed(unsigned Reg, bool includeReserved = true) const;
@@ -152,7 +146,7 @@ public:
}
/// Tell the scavenger a register is used.
- void setRegUsed(unsigned Reg);
+ void setRegUsed(unsigned Reg, LaneBitmask LaneMask = ~0u);
private:
/// Returns true if a register is reserved. It is never "unused".
bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
@@ -169,10 +163,10 @@ private:
/// Processes the current instruction and fill the KillRegUnits and
/// DefRegUnits bit vectors.
void determineKillsAndDefs();
-
+
/// Add all Reg Units that Reg contains to BV.
void addRegUnits(BitVector &BV, unsigned Reg);
-
+
/// Return the candidate register that is unused for the longest after
/// StartMI. UseMI is set to the instruction where the search stopped.
///
@@ -182,6 +176,9 @@ private:
unsigned InstrLimit,
MachineBasicBlock::iterator &UseMI);
+ /// Allow resetting register state info for multiple
+ /// passes over/within the same function.
+ void initRegState();
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index 2be5de6..7db0345 100644
--- a/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -231,13 +231,9 @@ namespace RTLIB {
FPROUND_F80_F64,
FPROUND_F128_F64,
FPROUND_PPCF128_F64,
- FPTOSINT_F32_I8,
- FPTOSINT_F32_I16,
FPTOSINT_F32_I32,
FPTOSINT_F32_I64,
FPTOSINT_F32_I128,
- FPTOSINT_F64_I8,
- FPTOSINT_F64_I16,
FPTOSINT_F64_I32,
FPTOSINT_F64_I64,
FPTOSINT_F64_I128,
@@ -250,13 +246,9 @@ namespace RTLIB {
FPTOSINT_PPCF128_I32,
FPTOSINT_PPCF128_I64,
FPTOSINT_PPCF128_I128,
- FPTOUINT_F32_I8,
- FPTOUINT_F32_I16,
FPTOUINT_F32_I32,
FPTOUINT_F32_I64,
FPTOUINT_F32_I128,
- FPTOUINT_F64_I8,
- FPTOUINT_F64_I16,
FPTOUINT_F64_I32,
FPTOUINT_F64_I64,
FPTOUINT_F64_I128,
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 8391314..bda9dbd 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -20,11 +20,11 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
- class AliasAnalysis;
class SUnit;
class MachineConstantPool;
class MachineFunction;
@@ -122,18 +122,7 @@ namespace llvm {
}
/// Return true if the specified SDep is equivalent except for latency.
- bool overlaps(const SDep &Other) const {
- if (Dep != Other.Dep) return false;
- switch (Dep.getInt()) {
- case Data:
- case Anti:
- case Output:
- return Contents.Reg == Other.Contents.Reg;
- case Order:
- return Contents.OrdKind == Other.Contents.OrdKind;
- }
- llvm_unreachable("Invalid dependency kind!");
- }
+ bool overlaps(const SDep &Other) const;
bool operator==(const SDep &Other) const {
return overlaps(Other) && Latency == Other.Latency;
@@ -157,19 +146,13 @@ namespace llvm {
}
//// getSUnit - Return the SUnit to which this edge points.
- SUnit *getSUnit() const {
- return Dep.getPointer();
- }
+ SUnit *getSUnit() const;
//// setSUnit - Assign the SUnit to which this edge points.
- void setSUnit(SUnit *SU) {
- Dep.setPointer(SU);
- }
+ void setSUnit(SUnit *SU);
/// getKind - Return an enum value representing the kind of the dependence.
- Kind getKind() const {
- return Dep.getInt();
- }
+ Kind getKind() const;
/// isCtrl - Shorthand for getKind() != SDep::Data.
bool isCtrl() const {
@@ -374,7 +357,7 @@ namespace llvm {
/// correspond to schedulable entities (e.g. instructions) and do not have a
/// valid ID. Consequently, always check for boundary nodes before accessing
/// an assoicative data structure keyed on node ID.
- bool isBoundaryNode() const { return NodeNum == BoundaryID; };
+ bool isBoundaryNode() const { return NodeNum == BoundaryID; }
/// setNode - Assign the representative SDNode for this SUnit.
/// This may be used during pre-regalloc scheduling.
@@ -490,6 +473,30 @@ namespace llvm {
void ComputeHeight();
};
+ /// Return true if the specified SDep is equivalent except for latency.
+ inline bool SDep::overlaps(const SDep &Other) const {
+ if (Dep != Other.Dep)
+ return false;
+ switch (Dep.getInt()) {
+ case Data:
+ case Anti:
+ case Output:
+ return Contents.Reg == Other.Contents.Reg;
+ case Order:
+ return Contents.OrdKind == Other.Contents.OrdKind;
+ }
+ llvm_unreachable("Invalid dependency kind!");
+ }
+
+ //// getSUnit - Return the SUnit to which this edge points.
+ inline SUnit *SDep::getSUnit() const { return Dep.getPointer(); }
+
+ //// setSUnit - Assign the SUnit to which this edge points.
+ inline void SDep::setSUnit(SUnit *SU) { Dep.setPointer(SU); }
+
+ /// getKind - Return an enum value representing the kind of the dependence.
+ inline SDep::Kind SDep::getKind() const { return Dep.getInt(); }
+
//===--------------------------------------------------------------------===//
/// SchedulingPriorityQueue - This interface is used to plug different
/// priorities computation algorithms into the list scheduler. It implements
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index b56d5ec..c574df0 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -26,22 +26,32 @@ namespace llvm {
class MachineFrameInfo;
class MachineLoopInfo;
class MachineDominatorTree;
- class LiveIntervals;
class RegPressureTracker;
class PressureDiffs;
/// An individual mapping from virtual register number to SUnit.
struct VReg2SUnit {
unsigned VirtReg;
+ LaneBitmask LaneMask;
SUnit *SU;
- VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
+ VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
+ : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
}
};
+ /// Mapping from virtual register to SUnit including an operand index.
+ struct VReg2SUnitOperIdx : public VReg2SUnit {
+ unsigned OperandIndex;
+
+ VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
+ unsigned OperandIndex, SUnit *SU)
+ : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
+ };
+
/// Record a physical register access.
/// For non-data-dependent uses, OpIdx == -1.
struct PhysRegSUOper {
@@ -69,7 +79,10 @@ namespace llvm {
/// Track local uses of virtual registers. These uses are gathered by the DAG
/// builder and may be consulted by the scheduler to avoid iterating an entire
/// vreg use list.
- typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2UseMap;
+ typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap;
+
+ typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
+ VReg2SUnitOperIdxMultiMap;
/// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
/// MachineInstrs.
@@ -78,15 +91,9 @@ namespace llvm {
const MachineLoopInfo *MLI;
const MachineFrameInfo *MFI;
- /// Live Intervals provides reaching defs in preRA scheduling.
- LiveIntervals *LIS;
-
/// TargetSchedModel provides an interface to the machine model.
TargetSchedModel SchedModel;
- /// isPostRA flag indicates vregs cannot be present.
- bool IsPostRA;
-
/// True if the DAG builder should remove kill flags (in preparation for
/// rescheduling).
bool RemoveKillFlags;
@@ -98,6 +105,9 @@ namespace llvm {
/// it has taken responsibility for scheduling the terminator correctly.
bool CanHandleTerminators;
+ /// Whether lane masks should get tracked.
+ bool TrackLaneMasks;
+
/// State specific to the current scheduling region.
/// ------------------------------------------------
@@ -120,7 +130,7 @@ namespace llvm {
/// After calling BuildSchedGraph, each vreg used in the scheduling region
/// is mapped to a set of SUnits. These include all local vreg uses, not
/// just the uses for a singly defined vreg.
- VReg2UseMap VRegUses;
+ VReg2SUnitMultiMap VRegUses;
/// State internal to DAG building.
/// -------------------------------
@@ -132,8 +142,12 @@ namespace llvm {
Reg2SUnitsMap Defs;
Reg2SUnitsMap Uses;
- /// Track the last instruction in this region defining each virtual register.
- VReg2SUnitMap VRegDefs;
+ /// Tracks the last instruction(s) in this region defining each virtual
+ /// register. There may be multiple current definitions for a register with
+ /// disjunct lanemasks.
+ VReg2SUnitMultiMap CurrentVRegDefs;
+ /// Tracks the last instructions in this region using each virtual register.
+ VReg2SUnitOperIdxMultiMap CurrentVRegUses;
/// PendingLoads - Remember where unknown loads are after the most recent
/// unknown store, as we iterate. As with Defs and Uses, this is here
@@ -154,17 +168,10 @@ namespace llvm {
public:
explicit ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
- bool IsPostRAFlag,
- bool RemoveKillFlags = false,
- LiveIntervals *LIS = nullptr);
+ bool RemoveKillFlags = false);
~ScheduleDAGInstrs() override {}
- bool isPostRA() const { return IsPostRA; }
-
- /// \brief Expose LiveIntervals for use in DAG mutators and such.
- LiveIntervals *getLIS() const { return LIS; }
-
/// \brief Get the machine model for instruction scheduling.
const TargetSchedModel *getSchedModel() const { return &SchedModel; }
@@ -206,7 +213,8 @@ namespace llvm {
/// input.
void buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker = nullptr,
- PressureDiffs *PDiffs = nullptr);
+ PressureDiffs *PDiffs = nullptr,
+ bool TrackLaneMasks = false);
/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier. We want to
@@ -253,6 +261,12 @@ namespace llvm {
/// Other adjustments may be made to the instruction if necessary. Return
/// true if the operand has been deleted, false if not.
bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+ /// Returns a mask for which lanes get read/written by the given (register)
+ /// machine operand.
+ LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
+
+ void collectVRegUses(SUnit *SU);
};
/// newSUnit - Creates a new SUnit and return a ptr to it.
diff --git a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
index 51ac7f2..a7a6227 100644
--- a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
@@ -52,12 +52,6 @@ public:
static RegisterScheduler *getList() {
return (RegisterScheduler *)Registry.getList();
}
- static FunctionPassCtor getDefault() {
- return (FunctionPassCtor)Registry.getDefault();
- }
- static void setDefault(FunctionPassCtor C) {
- Registry.setDefault((MachinePassCtor)C);
- }
static void setListener(MachinePassRegistryListener *L) {
Registry.setListener(L);
}
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
index 1ee9238..a21e9ae 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/ilist.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -31,7 +32,6 @@
namespace llvm {
-class AliasAnalysis;
class MachineConstantPoolValue;
class MachineFunction;
class MDNode;
@@ -215,6 +215,8 @@ class SelectionDAG {
/// Tracks dbg_value information through SDISel.
SDDbgInfo *DbgInfo;
+ uint16_t NextPersistentId = 0;
+
public:
/// Clients of various APIs that cause global effects on
/// the DAG can optionally implement this interface. This allows the clients
@@ -324,11 +326,10 @@ public:
}
iterator_range<allnodes_iterator> allnodes() {
- return iterator_range<allnodes_iterator>(allnodes_begin(), allnodes_end());
+ return make_range(allnodes_begin(), allnodes_end());
}
iterator_range<allnodes_const_iterator> allnodes() const {
- return iterator_range<allnodes_const_iterator>(allnodes_begin(),
- allnodes_end());
+ return make_range(allnodes_begin(), allnodes_end());
}
/// Return the root tag of the SelectionDAG.
@@ -532,7 +533,7 @@ public:
SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
return getNode(ISD::CopyToReg, dl, VTs,
- ArrayRef<SDValue>(Ops, Glue.getNode() ? 4 : 3));
+ makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
}
// Similar to last getCopyToReg() except parameter Reg is a SDValue
@@ -541,7 +542,7 @@ public:
SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Reg, N, Glue };
return getNode(ISD::CopyToReg, dl, VTs,
- ArrayRef<SDValue>(Ops, Glue.getNode() ? 4 : 3));
+ makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
}
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT) {
@@ -558,7 +559,7 @@ public:
SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
return getNode(ISD::CopyFromReg, dl, VTs,
- ArrayRef<SDValue>(Ops, Glue.getNode() ? 3 : 2));
+ makeArrayRef(Ops, Glue.getNode() ? 3 : 2));
}
SDValue getCondCode(ISD::CondCode Cond);
@@ -670,7 +671,7 @@ public:
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
ArrayRef<SDUse> Ops);
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
- ArrayRef<SDValue> Ops);
+ ArrayRef<SDValue> Ops, const SDNodeFlags *Flags = nullptr);
SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef<EVT> ResultTys,
ArrayRef<SDValue> Ops);
SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
@@ -687,7 +688,7 @@ public:
SDValue N3, SDValue N4);
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
SDValue N3, SDValue N4, SDValue N5);
-
+
// Specialize again based on number of operands for nodes with a VTList
// rather than a single VT.
SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs);
@@ -901,6 +902,12 @@ public:
/// the target's desired shift amount type.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
+ /// Expand the specified \c ISD::VAARG node as the Legalize pass would.
+ SDValue expandVAArg(SDNode *Node);
+
+ /// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
+ SDValue expandVACopy(SDNode *Node);
+
/// *Mutate* the specified node in-place to have the
/// specified operands. If the resultant node already exists in the DAG,
/// this does not modify the specified node, instead it returns the node that
@@ -1072,6 +1079,10 @@ public:
// target info.
switch (Opcode) {
case ISD::ADD:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::MUL:
case ISD::MULHU:
case ISD::MULHS:
@@ -1088,6 +1099,8 @@ public:
case ISD::ADDE:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
return true;
default: return false;
}
@@ -1150,6 +1163,10 @@ public:
const ConstantSDNode *Cst1,
const ConstantSDNode *Cst2);
+ SDValue FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
+ EVT VT, ArrayRef<SDValue> Ops,
+ const SDNodeFlags *Flags = nullptr);
+
/// Constant fold a setcc to true or false.
SDValue FoldSetCC(EVT VT, SDValue N1,
SDValue N2, ISD::CondCode Cond, SDLoc dl);
@@ -1199,6 +1216,10 @@ public:
/// other positive zero.
bool isEqualTo(SDValue A, SDValue B) const;
+ /// Return true if A and B have no common bits set. As an example, this can
+ /// allow an 'add' to be transformed into an 'or'.
+ bool haveNoCommonBitsSet(SDValue A, SDValue B) const;
+
/// Utility function used by legalize and lowering to
/// "unroll" a vector operation by splitting out the scalars and operating
/// on each element individually. If the ResNE is 0, fully unroll the vector
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 4821d1a..23816bd 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -44,6 +44,7 @@ class GlobalValue;
class MachineBasicBlock;
class MachineConstantPoolValue;
class SDNode;
+class BinaryWithFlagsSDNode;
class Value;
class MCSymbol;
template <typename T> struct DenseMapInfo;
@@ -81,11 +82,6 @@ namespace ISD {
/// all ConstantFPSDNode or undef.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
- /// Return true if the specified node is a
- /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
- /// element is not an undef.
- bool isScalarToVector(const SDNode *N);
-
/// Return true if the node has at least one operand
/// and all operands of the specified node are ISD::UNDEF.
bool allOperandsUndef(const SDNode *N);
@@ -139,7 +135,7 @@ public:
return SDValue(Node, R);
}
- // Return true if this node is an operand of N.
+ /// Return true if this node is an operand of N.
bool isOperandOf(const SDNode *N) const;
/// Return the ValueType of the referenced return value.
@@ -167,6 +163,7 @@ public:
inline bool isTargetMemoryOpcode() const;
inline bool isTargetOpcode() const;
inline bool isMachineOpcode() const;
+ inline bool isUndef() const;
inline unsigned getMachineOpcode() const;
inline const DebugLoc &getDebugLoc() const;
inline void dump() const;
@@ -318,6 +315,61 @@ template<> struct simplify_type<SDUse> {
}
};
+/// These are IR-level optimization flags that may be propagated to SDNodes.
+/// TODO: This data structure should be shared by the IR optimizer and the
+/// the backend.
+struct SDNodeFlags {
+private:
+ bool NoUnsignedWrap : 1;
+ bool NoSignedWrap : 1;
+ bool Exact : 1;
+ bool UnsafeAlgebra : 1;
+ bool NoNaNs : 1;
+ bool NoInfs : 1;
+ bool NoSignedZeros : 1;
+ bool AllowReciprocal : 1;
+
+public:
+ /// Default constructor turns off all optimization flags.
+ SDNodeFlags() {
+ NoUnsignedWrap = false;
+ NoSignedWrap = false;
+ Exact = false;
+ UnsafeAlgebra = false;
+ NoNaNs = false;
+ NoInfs = false;
+ NoSignedZeros = false;
+ AllowReciprocal = false;
+ }
+
+ // These are mutators for each flag.
+ void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
+ void setNoSignedWrap(bool b) { NoSignedWrap = b; }
+ void setExact(bool b) { Exact = b; }
+ void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
+ void setNoNaNs(bool b) { NoNaNs = b; }
+ void setNoInfs(bool b) { NoInfs = b; }
+ void setNoSignedZeros(bool b) { NoSignedZeros = b; }
+ void setAllowReciprocal(bool b) { AllowReciprocal = b; }
+
+ // These are accessors for each flag.
+ bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
+ bool hasNoSignedWrap() const { return NoSignedWrap; }
+ bool hasExact() const { return Exact; }
+ bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
+ bool hasNoNaNs() const { return NoNaNs; }
+ bool hasNoInfs() const { return NoInfs; }
+ bool hasNoSignedZeros() const { return NoSignedZeros; }
+ bool hasAllowReciprocal() const { return AllowReciprocal; }
+
+ /// Return a raw encoding of the flags.
+ /// This function should only be used to add data to the NodeID value.
+ unsigned getRawFlags() const {
+ return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
+ (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
+ (NoSignedZeros << 6) | (AllowReciprocal << 7);
+ }
+};
/// Represents one node in the SelectionDAG.
///
@@ -374,6 +426,10 @@ private:
friend struct ilist_traits<SDNode>;
public:
+ /// Unique and persistent id per SDNode in the DAG.
+ /// Used for debug printing.
+ uint16_t PersistentId;
+
//===--------------------------------------------------------------------===//
// Accessors
//
@@ -395,6 +451,9 @@ public:
return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
}
+ /// Return true if the type of the node type undefined.
+ bool isUndef() const { return NodeType == ISD::UNDEF; }
+
/// Test if this node is a memory intrinsic (with valid pointer information).
/// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
/// non-memory intrinsics (with chains) that are not really instances of
@@ -517,10 +576,10 @@ public:
static use_iterator use_end() { return use_iterator(nullptr); }
inline iterator_range<use_iterator> uses() {
- return iterator_range<use_iterator>(use_begin(), use_end());
+ return make_range(use_begin(), use_end());
}
inline iterator_range<use_iterator> uses() const {
- return iterator_range<use_iterator>(use_begin(), use_end());
+ return make_range(use_begin(), use_end());
}
/// Return true if there are exactly NUSES uses of the indicated value.
@@ -592,8 +651,8 @@ public:
};
iterator_range<value_op_iterator> op_values() const {
- return iterator_range<value_op_iterator>(value_op_iterator(op_begin()),
- value_op_iterator(op_end()));
+ return make_range(value_op_iterator(op_begin()),
+ value_op_iterator(op_end()));
}
SDVTList getVTList() const {
@@ -605,27 +664,11 @@ public:
/// to which the glue operand points. Otherwise return NULL.
SDNode *getGluedNode() const {
if (getNumOperands() != 0 &&
- getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
+ getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
return getOperand(getNumOperands()-1).getNode();
return nullptr;
}
- // If this is a pseudo op, like copyfromreg, look to see if there is a
- // real target node glued to it. If so, return the target node.
- const SDNode *getGluedMachineNode() const {
- const SDNode *FoundNode = this;
-
- // Climb up glue edges until a machine-opcode node is found, or the
- // end of the chain is reached.
- while (!FoundNode->isMachineOpcode()) {
- const SDNode *N = FoundNode->getGluedNode();
- if (!N) break;
- FoundNode = N;
- }
-
- return FoundNode;
- }
-
/// If this node has a glue value with a user, return
/// the user (there is at most one). Otherwise return NULL.
SDNode *getGluedUser() const {
@@ -635,6 +678,10 @@ public:
return nullptr;
}
+ /// This could be defined as a virtual function and implemented more simply
+ /// and directly, but it is not to avoid creating a vtable for this class.
+ const SDNodeFlags *getFlags() const;
+
/// Return the number of values defined/returned by this operator.
unsigned getNumValues() const { return NumValues; }
@@ -909,6 +956,9 @@ inline bool SDValue::isMachineOpcode() const {
inline unsigned SDValue::getMachineOpcode() const {
return Node->getMachineOpcode();
}
+inline bool SDValue::isUndef() const {
+ return Node->isUndef();
+}
inline bool SDValue::use_empty() const {
return !Node->hasAnyUseOfValue(ResNo);
}
@@ -943,62 +993,6 @@ inline void SDUse::setNode(SDNode *N) {
if (N) N->addUse(*this);
}
-/// These are IR-level optimization flags that may be propagated to SDNodes.
-/// TODO: This data structure should be shared by the IR optimizer and the
-/// the backend.
-struct SDNodeFlags {
-private:
- bool NoUnsignedWrap : 1;
- bool NoSignedWrap : 1;
- bool Exact : 1;
- bool UnsafeAlgebra : 1;
- bool NoNaNs : 1;
- bool NoInfs : 1;
- bool NoSignedZeros : 1;
- bool AllowReciprocal : 1;
-
-public:
- /// Default constructor turns off all optimization flags.
- SDNodeFlags() {
- NoUnsignedWrap = false;
- NoSignedWrap = false;
- Exact = false;
- UnsafeAlgebra = false;
- NoNaNs = false;
- NoInfs = false;
- NoSignedZeros = false;
- AllowReciprocal = false;
- }
-
- // These are mutators for each flag.
- void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
- void setNoSignedWrap(bool b) { NoSignedWrap = b; }
- void setExact(bool b) { Exact = b; }
- void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
- void setNoNaNs(bool b) { NoNaNs = b; }
- void setNoInfs(bool b) { NoInfs = b; }
- void setNoSignedZeros(bool b) { NoSignedZeros = b; }
- void setAllowReciprocal(bool b) { AllowReciprocal = b; }
-
- // These are accessors for each flag.
- bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
- bool hasNoSignedWrap() const { return NoSignedWrap; }
- bool hasExact() const { return Exact; }
- bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
- bool hasNoNaNs() const { return NoNaNs; }
- bool hasNoInfs() const { return NoInfs; }
- bool hasNoSignedZeros() const { return NoSignedZeros; }
- bool hasAllowReciprocal() const { return AllowReciprocal; }
-
- /// Return a raw encoding of the flags.
- /// This function should only be used to add data to the NodeID value.
- unsigned getRawFlags() const {
- return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
- (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
- (NoSignedZeros << 6) | (AllowReciprocal << 7);
- }
-};
-
/// This class is used for single-operand SDNodes. This is solely
/// to allow co-allocation of node operands with the node itself.
class UnarySDNode : public SDNode {
@@ -1080,6 +1074,9 @@ class HandleSDNode : public SDNode {
public:
explicit HandleSDNode(SDValue X)
: SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
+ // HandleSDNodes are never inserted into the DAG, so they won't be
+ // auto-numbered. Use ID 65535 as a sentinel.
+ PersistentId = 0xffff;
InitOperands(&Op, X);
}
~HandleSDNode();
@@ -1497,6 +1494,15 @@ public:
}
};
+/// Returns true if \p V is a constant integer zero.
+bool isNullConstant(SDValue V);
+/// Returns true if \p V is an FP constant with a value of positive zero.
+bool isNullFPConstant(SDValue V);
+/// Returns true if \p V is an integer constant with all bits set.
+bool isAllOnesConstant(SDValue V);
+/// Returns true if \p V is a constant integer one.
+bool isOneConstant(SDValue V);
+
class GlobalAddressSDNode : public SDNode {
const GlobalValue *TheGlobal;
int64_t Offset;
@@ -1697,6 +1703,14 @@ public:
ConstantFPSDNode *
getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
+ /// \brief If this is a constant FP splat and the splatted constant FP is an
+ /// exact power or 2, return the log base 2 integer value. Otherwise,
+ /// return -1.
+ ///
+ /// The BitWidth specifies the necessary bit precision.
+ int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const;
+
bool isConstant() const;
static inline bool classof(const SDNode *N) {
@@ -2003,9 +2017,9 @@ class MaskedLoadStoreSDNode : public MemSDNode {
public:
friend class SelectionDAG;
MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
- SDValue *Operands, unsigned numOperands,
- SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
- : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ SDValue *Operands, unsigned numOperands, SDVTList VTs,
+ EVT MemVT, MachineMemOperand *MMO)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
InitOperands(Ops, Operands, numOperands);
}
@@ -2036,7 +2050,7 @@ public:
ISD::LoadExtType getExtensionType() const {
return ISD::LoadExtType(SubclassData & 3);
- }
+ }
const SDValue &getSrc0() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;
@@ -2103,17 +2117,18 @@ public:
class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
public:
friend class SelectionDAG;
- MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef<SDValue> Operands,
+ MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef<SDValue> Operands,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
: MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, Operands, VTs, MemVT,
MMO) {
assert(getValue().getValueType() == getValueType(0) &&
- "Incompatible type of the PathThru value in MaskedGatherSDNode");
- assert(getMask().getValueType().getVectorNumElements() ==
- getValueType(0).getVectorNumElements() &&
- "Vector width mismatch between mask and data");
- assert(getMask().getValueType().getScalarType() == MVT::i1 &&
+ "Incompatible type of the PassThru value in MaskedGatherSDNode");
+ assert(getMask().getValueType().getVectorNumElements() ==
+ getValueType(0).getVectorNumElements() &&
"Vector width mismatch between mask and data");
+ assert(getIndex().getValueType().getVectorNumElements() ==
+ getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between index and data");
}
static bool classof(const SDNode *N) {
@@ -2131,11 +2146,12 @@ public:
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, Operands, VTs, MemVT,
MMO) {
- assert(getMask().getValueType().getVectorNumElements() ==
- getValue().getValueType().getVectorNumElements() &&
- "Vector width mismatch between mask and data");
- assert(getMask().getValueType().getScalarType() == MVT::i1 &&
+ assert(getMask().getValueType().getVectorNumElements() ==
+ getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between mask and data");
+ assert(getIndex().getValueType().getVectorNumElements() ==
+ getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between index and data");
}
static bool classof(const SDNode *N) {
diff --git a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
index 9d6d6f5..7b621be 100644
--- a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -155,7 +155,7 @@ namespace llvm {
"Attempt to construct index with 0 pointer.");
}
- /// Returns true if this is a valid index. Invalid indicies do
+ /// Returns true if this is a valid index. Invalid indices do
/// not point into an index table, and cannot be compared.
bool isValid() const {
return lie.getPointer();
@@ -272,7 +272,7 @@ namespace llvm {
SlotIndex getNextSlot() const {
Slot s = getSlot();
if (s == Slot_Dead) {
- return SlotIndex(listEntry()->getNextNode(), Slot_Block);
+ return SlotIndex(&*++listEntry()->getIterator(), Slot_Block);
}
return SlotIndex(listEntry(), s + 1);
}
@@ -280,7 +280,7 @@ namespace llvm {
/// Returns the next index. This is the index corresponding to the this
/// index's slot, but for the next instruction.
SlotIndex getNextIndex() const {
- return SlotIndex(listEntry()->getNextNode(), getSlot());
+ return SlotIndex(&*++listEntry()->getIterator(), getSlot());
}
/// Returns the previous slot in the index list. This could be either the
@@ -292,7 +292,7 @@ namespace llvm {
SlotIndex getPrevSlot() const {
Slot s = getSlot();
if (s == Slot_Block) {
- return SlotIndex(listEntry()->getPrevNode(), Slot_Dead);
+ return SlotIndex(&*--listEntry()->getIterator(), Slot_Dead);
}
return SlotIndex(listEntry(), s - 1);
}
@@ -300,7 +300,7 @@ namespace llvm {
/// Returns the previous index. This is the index corresponding to this
/// index's slot, but for the previous instruction.
SlotIndex getPrevIndex() const {
- return SlotIndex(listEntry()->getPrevNode(), getSlot());
+ return SlotIndex(&*--listEntry()->getIterator(), getSlot());
}
};
@@ -333,6 +333,8 @@ namespace llvm {
/// This pass assigns indexes to each instruction.
class SlotIndexes : public MachineFunctionPass {
private:
+ // IndexListEntry allocator.
+ BumpPtrAllocator ileAllocator;
typedef ilist<IndexListEntry> IndexList;
IndexList indexList;
@@ -353,9 +355,6 @@ namespace llvm {
/// and MBB id.
SmallVector<IdxMBBPair, 8> idx2MBBMap;
- // IndexListEntry allocator.
- BumpPtrAllocator ileAllocator;
-
IndexListEntry* createEntry(MachineInstr *mi, unsigned index) {
IndexListEntry *entry =
static_cast<IndexListEntry*>(
@@ -377,6 +376,11 @@ namespace llvm {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
}
+ ~SlotIndexes() {
+ // The indexList's nodes are all allocated in the BumpPtrAllocator.
+ indexList.clearAndLeakNodesUnsafely();
+ }
+
void getAnalysisUsage(AnalysisUsage &au) const override;
void releaseMemory() override;
@@ -427,11 +431,11 @@ namespace llvm {
/// Returns the next non-null index, if one exists.
/// Otherwise returns getLastIndex().
SlotIndex getNextNonNullIndex(SlotIndex Index) {
- IndexList::iterator I = Index.listEntry();
+ IndexList::iterator I = Index.listEntry()->getIterator();
IndexList::iterator E = indexList.end();
while (++I != E)
if (I->getInstr())
- return SlotIndex(I, Index.getSlot());
+ return SlotIndex(&*I, Index.getSlot());
// We reached the end of the function.
return getLastIndex();
}
@@ -502,49 +506,52 @@ namespace llvm {
return getMBBRange(mbb).second;
}
+ /// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block
+ /// begin and basic block)
+ typedef SmallVectorImpl<IdxMBBPair>::const_iterator MBBIndexIterator;
+ /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or
+ /// equal to \p To.
+ MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const {
+ return std::lower_bound(I, idx2MBBMap.end(), To);
+ }
+ /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex
+ /// that is greater or equal to \p Idx.
+ MBBIndexIterator findMBBIndex(SlotIndex Idx) const {
+ return advanceMBBIndex(idx2MBBMap.begin(), Idx);
+ }
+ /// Returns an iterator for the begin of the idx2MBBMap.
+ MBBIndexIterator MBBIndexBegin() const {
+ return idx2MBBMap.begin();
+ }
+ /// Return an iterator for the end of the idx2MBBMap.
+ MBBIndexIterator MBBIndexEnd() const {
+ return idx2MBBMap.end();
+ }
+
/// Returns the basic block which the given index falls in.
MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
if (MachineInstr *MI = getInstructionFromIndex(index))
return MI->getParent();
- SmallVectorImpl<IdxMBBPair>::const_iterator I =
- std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index);
+
+ MBBIndexIterator I = findMBBIndex(index);
// Take the pair containing the index
- SmallVectorImpl<IdxMBBPair>::const_iterator J =
- ((I != idx2MBBMap.end() && I->first > index) ||
- (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I;
+ MBBIndexIterator J =
+ ((I != MBBIndexEnd() && I->first > index) ||
+ (I == MBBIndexEnd() && !idx2MBBMap.empty())) ? std::prev(I) : I;
- assert(J != idx2MBBMap.end() && J->first <= index &&
+ assert(J != MBBIndexEnd() && J->first <= index &&
index < getMBBEndIdx(J->second) &&
"index does not correspond to an MBB");
return J->second;
}
- bool findLiveInMBBs(SlotIndex start, SlotIndex end,
- SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
- SmallVectorImpl<IdxMBBPair>::const_iterator itr =
- std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
- bool resVal = false;
-
- while (itr != idx2MBBMap.end()) {
- if (itr->first >= end)
- break;
- mbbs.push_back(itr->second);
- resVal = true;
- ++itr;
- }
- return resVal;
- }
-
/// Returns the MBB covering the given range, or null if the range covers
/// more than one basic block.
MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
assert(start < end && "Backwards ranges not allowed.");
-
- SmallVectorImpl<IdxMBBPair>::const_iterator itr =
- std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
-
- if (itr == idx2MBBMap.end()) {
+ MBBIndexIterator itr = findMBBIndex(start);
+ if (itr == MBBIndexEnd()) {
itr = std::prev(itr);
return itr->second;
}
@@ -580,11 +587,11 @@ namespace llvm {
IndexList::iterator prevItr, nextItr;
if (Late) {
// Insert mi's index immediately before the following instruction.
- nextItr = getIndexAfter(mi).listEntry();
+ nextItr = getIndexAfter(mi).listEntry()->getIterator();
prevItr = std::prev(nextItr);
} else {
// Insert mi's index immediately after the preceding instruction.
- prevItr = getIndexBefore(mi).listEntry();
+ prevItr = getIndexBefore(mi).listEntry()->getIterator();
nextItr = std::next(prevItr);
}
@@ -646,11 +653,11 @@ namespace llvm {
if (nextMBB == mbb->getParent()->end()) {
startEntry = &indexList.back();
endEntry = createEntry(nullptr, 0);
- newItr = indexList.insertAfter(startEntry, endEntry);
+ newItr = indexList.insertAfter(startEntry->getIterator(), endEntry);
} else {
startEntry = createEntry(nullptr, 0);
- endEntry = getMBBStartIdx(nextMBB).listEntry();
- newItr = indexList.insert(endEntry, startEntry);
+ endEntry = getMBBStartIdx(&*nextMBB).listEntry();
+ newItr = indexList.insert(endEntry->getIterator(), startEntry);
}
SlotIndex startIdx(startEntry, SlotIndex::Slot_Block);
diff --git a/contrib/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
index fdc1a91..972a616 100644
--- a/contrib/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include <map>
#include <vector>
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10c099d..2f13791 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -41,12 +41,12 @@ public:
~TargetLoweringObjectFileELF() override {}
- void emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM,
+ void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
const MCSymbol *Sym) const override;
/// Given a constant with the SectionKind, return a section that it should be
/// placed in.
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
@@ -103,7 +103,7 @@ public:
Mangler &Mang,
const TargetMachine &TM) const override;
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
/// The mach-o version of this method defaults to returning a stub reference.
@@ -123,6 +123,9 @@ public:
const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI,
MCStreamer &Streamer) const override;
+
+ void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ Mangler &Mang, const TargetMachine &TM) const override;
};
@@ -140,8 +143,7 @@ public:
const TargetMachine &TM) const override;
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
- bool CannotUsePrivateLabel, Mangler &Mang,
- const TargetMachine &TM) const override;
+ Mangler &Mang, const TargetMachine &TM) const override;
MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
const TargetMachine &TM) const override;
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetSchedule.h b/contrib/llvm/include/llvm/CodeGen/TargetSchedule.h
index 751fac4..81054ab 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -81,6 +81,12 @@ public:
return nullptr;
}
+ /// \brief Return true if this machine model includes an instruction-level
+ /// scheduling model or cycle-to-cycle itinerary data.
+ bool hasInstrSchedModelOrItineraries() const {
+ return hasInstrSchedModel() || hasInstrItineraries();
+ }
+
/// \brief Identify the processor corresponding to the current subtarget.
unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
diff --git a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
index e1a9fd3..929eb88 100644
--- a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -89,6 +89,19 @@ namespace llvm {
return VecTy;
}
+ /// Return the type converted to an equivalently sized integer or vector
+ /// with integer element type. Similar to changeVectorElementTypeToInteger,
+ /// but also handles scalars.
+ EVT changeTypeToInteger() {
+ if (isVector())
+ return changeVectorElementTypeToInteger();
+
+ if (isSimple())
+ return MVT::getIntegerVT(getSizeInBits());
+
+ return changeExtendedTypeToInteger();
+ }
+
/// isSimple - Test if the given EVT is simple (as opposed to being
/// extended).
bool isSimple() const {
@@ -151,6 +164,11 @@ namespace llvm {
return isSimple() ? V.is1024BitVector() : isExtended1024BitVector();
}
+ /// is2048BitVector - Return true if this is a 2048-bit vector type.
+ bool is2048BitVector() const {
+ return isSimple() ? V.is2048BitVector() : isExtended2048BitVector();
+ }
+
/// isOverloaded - Return true if this is an overloaded type for TableGen.
bool isOverloaded() const {
return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny);
@@ -342,6 +360,7 @@ namespace llvm {
// Methods for handling the Extended-type case in functions above.
// These are all out-of-line to prevent users of this header file
// from having a dependency on Type.h.
+ EVT changeExtendedTypeToInteger() const;
EVT changeExtendedVectorElementTypeToInteger() const;
static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
@@ -356,6 +375,7 @@ namespace llvm {
bool isExtended256BitVector() const LLVM_READONLY;
bool isExtended512BitVector() const LLVM_READONLY;
bool isExtended1024BitVector() const LLVM_READONLY;
+ bool isExtended2048BitVector() const LLVM_READONLY;
EVT getExtendedVectorElementType() const;
unsigned getExtendedVectorNumElements() const LLVM_READONLY;
unsigned getExtendedSizeInBits() const;
diff --git a/contrib/llvm/include/llvm/CodeGen/ValueTypes.td b/contrib/llvm/include/llvm/CodeGen/ValueTypes.td
index 2b30f14..f29ec42 100644
--- a/contrib/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/contrib/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -33,55 +33,70 @@ def f80 : ValueType<80 , 10>; // 80-bit floating point value
def f128 : ValueType<128, 11>; // 128-bit floating point value
def ppcf128: ValueType<128, 12>; // PPC 128-bit floating point value
-def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value
-def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value
-def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value
-def v16i1 : ValueType<16, 16>; // 16 x i1 vector value
-def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value
-def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value
-def v1i8 : ValueType<16, 19>; // 1 x i8 vector value
-def v2i8 : ValueType<16 , 20>; // 2 x i8 vector value
-def v4i8 : ValueType<32 , 21>; // 4 x i8 vector value
-def v8i8 : ValueType<64 , 22>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 23>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 24>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 25>; // 64 x i8 vector value
-def v1i16 : ValueType<16 , 26>; // 1 x i16 vector value
-def v2i16 : ValueType<32 , 27>; // 2 x i16 vector value
-def v4i16 : ValueType<64 , 28>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 29>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 30>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 31>; // 32 x i16 vector value
-def v1i32 : ValueType<32 , 32>; // 1 x i32 vector value
-def v2i32 : ValueType<64 , 33>; // 2 x i32 vector value
-def v4i32 : ValueType<128, 34>; // 4 x i32 vector value
-def v8i32 : ValueType<256, 35>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 36>; // 16 x i32 vector value
-def v1i64 : ValueType<64 , 37>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 38>; // 2 x i64 vector value
-def v4i64 : ValueType<256, 39>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 40>; // 8 x i64 vector value
-def v16i64 : ValueType<1024,41>; // 16 x i64 vector value
-def v1i128 : ValueType<128, 42>; // 1 x i128 vector value
-
-def v2f16 : ValueType<32 , 43>; // 2 x f16 vector value
-def v4f16 : ValueType<64 , 44>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 45>; // 8 x f16 vector value
-def v1f32 : ValueType<32 , 46>; // 1 x f32 vector value
-def v2f32 : ValueType<64 , 47>; // 2 x f32 vector value
-def v4f32 : ValueType<128, 48>; // 4 x f32 vector value
-def v8f32 : ValueType<256, 49>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 50>; // 16 x f32 vector value
-def v1f64 : ValueType<64, 51>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 52>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 53>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 54>; // 8 x f64 vector value
-
-
-def x86mmx : ValueType<64 , 55>; // X86 MMX value
-def FlagVT : ValueType<0 , 56>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 57>; // Produces no value
-def untyped: ValueType<8 , 58>; // Produces an untyped value
+def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value
+def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value
+def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value
+def v16i1 : ValueType<16, 16>; // 16 x i1 vector value
+def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value
+def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value
+def v512i1 : ValueType<512, 19>; // 512 x i8 vector value
+def v1024i1: ValueType<1024,20>; //1024 x i8 vector value
+
+def v1i8 : ValueType<16, 21>; // 1 x i8 vector value
+def v2i8 : ValueType<16 , 22>; // 2 x i8 vector value
+def v4i8 : ValueType<32 , 23>; // 4 x i8 vector value
+def v8i8 : ValueType<64 , 24>; // 8 x i8 vector value
+def v16i8 : ValueType<128, 25>; // 16 x i8 vector value
+def v32i8 : ValueType<256, 26>; // 32 x i8 vector value
+def v64i8 : ValueType<512, 27>; // 64 x i8 vector value
+def v128i8 : ValueType<1024,28>; //128 x i8 vector value
+def v256i8 : ValueType<2048,29>; //256 x i8 vector value
+
+def v1i16 : ValueType<16 , 30>; // 1 x i16 vector value
+def v2i16 : ValueType<32 , 31>; // 2 x i16 vector value
+def v4i16 : ValueType<64 , 32>; // 4 x i16 vector value
+def v8i16 : ValueType<128, 33>; // 8 x i16 vector value
+def v16i16 : ValueType<256, 34>; // 16 x i16 vector value
+def v32i16 : ValueType<512, 35>; // 32 x i16 vector value
+def v64i16 : ValueType<1024,36>; // 64 x i16 vector value
+def v128i16: ValueType<2048,37>; //128 x i16 vector value
+
+def v1i32 : ValueType<32 , 38>; // 1 x i32 vector value
+def v2i32 : ValueType<64 , 39>; // 2 x i32 vector value
+def v4i32 : ValueType<128, 40>; // 4 x i32 vector value
+def v8i32 : ValueType<256, 41>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 42>; // 16 x i32 vector value
+def v32i32 : ValueType<1024,43>; // 32 x i32 vector value
+def v64i32 : ValueType<2048,44>; // 32 x i32 vector value
+
+def v1i64 : ValueType<64 , 45>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 46>; // 2 x i64 vector value
+def v4i64 : ValueType<256, 47>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 48>; // 8 x i64 vector value
+def v16i64 : ValueType<1024,49>; // 16 x i64 vector value
+def v32i64 : ValueType<2048,50>; // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 51>; // 1 x i128 vector value
+
+def v2f16 : ValueType<32 , 52>; // 2 x f16 vector value
+def v4f16 : ValueType<64 , 53>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 54>; // 8 x f16 vector value
+def v1f32 : ValueType<32 , 55>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 56>; // 2 x f32 vector value
+def v4f32 : ValueType<128, 57>; // 4 x f32 vector value
+def v8f32 : ValueType<256, 58>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 59>; // 16 x f32 vector value
+def v1f64 : ValueType<64, 60>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 61>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 62>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 63>; // 8 x f64 vector value
+
+
+def x86mmx : ValueType<64 , 64>; // X86 MMX value
+def FlagVT : ValueType<0 , 65>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 66>; // Produces no value
+def untyped: ValueType<8 , 67>; // Produces an untyped value
+def token : ValueType<0 , 249>; // TokenTy
def MetadataVT: ValueType<0, 250>; // Metadata
// Pseudo valuetype mapped to the current pointer size to any address space.
diff --git a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
index 75638a0..70d558f 100644
--- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -14,145 +14,103 @@
#ifndef LLVM_CODEGEN_WINEHFUNCINFO_H
#define LLVM_CODEGEN_WINEHFUNCINFO_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/DenseMap.h"
namespace llvm {
+class AllocaInst;
class BasicBlock;
+class CatchReturnInst;
class Constant;
class Function;
class GlobalVariable;
class InvokeInst;
class IntrinsicInst;
class LandingPadInst;
+class MCExpr;
class MCSymbol;
+class MachineBasicBlock;
class Value;
-enum ActionType { Catch, Cleanup };
-
-class ActionHandler {
-public:
- ActionHandler(BasicBlock *BB, ActionType Type)
- : StartBB(BB), Type(Type), EHState(-1), HandlerBlockOrFunc(nullptr) {}
-
- ActionType getType() const { return Type; }
- BasicBlock *getStartBlock() const { return StartBB; }
-
- bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
+// The following structs respresent the .xdata tables for various
+// Windows-related EH personalities.
- void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
- Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
+typedef PointerUnion<const BasicBlock *, MachineBasicBlock *> MBBOrBasicBlock;
- void setEHState(int State) { EHState = State; }
- int getEHState() const { return EHState; }
-
-private:
- BasicBlock *StartBB;
- ActionType Type;
- int EHState;
-
- // Can be either a BlockAddress or a Function depending on the EH personality.
- Constant *HandlerBlockOrFunc;
-};
-
-class CatchHandler : public ActionHandler {
-public:
- CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
- : ActionHandler(BB, ActionType::Catch), Selector(Selector),
- NextBB(NextBB), ExceptionObjectVar(nullptr),
- ExceptionObjectIndex(-1) {}
-
- // Method for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const ActionHandler *H) {
- return H->getType() == ActionType::Catch;
- }
-
- Constant *getSelector() const { return Selector; }
- BasicBlock *getNextBB() const { return NextBB; }
-
- const Value *getExceptionVar() { return ExceptionObjectVar; }
- TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
- void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
- void setExceptionVarIndex(int Index) { ExceptionObjectIndex = Index; }
- int getExceptionVarIndex() const { return ExceptionObjectIndex; }
- void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
- ReturnTargets = Targets;
- }
-
-private:
- Constant *Selector;
- BasicBlock *NextBB;
- // While catch handlers are being outlined the ExceptionObjectVar field will
- // be populated with the instruction in the parent frame that corresponds
- // to the exception object (or nullptr if the catch does not use an
- // exception object) and the ExceptionObjectIndex field will be -1.
- // When the parseEHActions function is called to populate a vector of
- // instances of this class, the ExceptionObjectVar field will be nullptr
- // and the ExceptionObjectIndex will be the index of the exception object in
- // the parent function's localescape block.
- const Value *ExceptionObjectVar;
- int ExceptionObjectIndex;
- TinyPtrVector<BasicBlock *> ReturnTargets;
+struct CxxUnwindMapEntry {
+ int ToState;
+ MBBOrBasicBlock Cleanup;
};
-class CleanupHandler : public ActionHandler {
-public:
- CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
+/// Similar to CxxUnwindMapEntry, but supports SEH filters.
+struct SEHUnwindMapEntry {
+ /// If unwinding continues through this handler, transition to the handler at
+ /// this state. This indexes into SEHUnwindMap.
+ int ToState = -1;
- // Method for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const ActionHandler *H) {
- return H->getType() == ActionType::Cleanup;
- }
-};
-
-void parseEHActions(const IntrinsicInst *II,
- SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions);
+ bool IsFinally = false;
-// The following structs respresent the .xdata for functions using C++
-// exceptions on Windows.
+ /// Holds the filter expression function.
+ const Function *Filter = nullptr;
-struct WinEHUnwindMapEntry {
- int ToState;
- Function *Cleanup;
+ /// Holds the __except or __finally basic block.
+ MBBOrBasicBlock Handler;
};
struct WinEHHandlerType {
int Adjectives;
+ /// The CatchObj starts out life as an LLVM alloca and is eventually turned
+ /// frame index.
+ union {
+ const AllocaInst *Alloca;
+ int FrameIndex;
+ } CatchObj = {};
GlobalVariable *TypeDescriptor;
- int CatchObjRecoverIdx;
- Function *Handler;
+ MBBOrBasicBlock Handler;
};
struct WinEHTryBlockMapEntry {
- int TryLow;
- int TryHigh;
+ int TryLow = -1;
+ int TryHigh = -1;
+ int CatchHigh = -1;
SmallVector<WinEHHandlerType, 1> HandlerArray;
};
+enum class ClrHandlerType { Catch, Finally, Fault, Filter };
+
+struct ClrEHUnwindMapEntry {
+ MBBOrBasicBlock Handler;
+ uint32_t TypeToken;
+ int Parent;
+ ClrHandlerType HandlerType;
+};
+
struct WinEHFuncInfo {
- DenseMap<const Function *, const LandingPadInst *> RootLPad;
- DenseMap<const Function *, const InvokeInst *> LastInvoke;
- DenseMap<const Function *, int> HandlerEnclosedState;
- DenseMap<const Function *, bool> LastInvokeVisited;
- DenseMap<const LandingPadInst *, int> LandingPadStateMap;
- DenseMap<const Function *, int> CatchHandlerParentFrameObjIdx;
- DenseMap<const Function *, int> CatchHandlerParentFrameObjOffset;
- DenseMap<const Function *, int> CatchHandlerMaxState;
- DenseMap<const Function *, int> HandlerBaseState;
- SmallVector<WinEHUnwindMapEntry, 4> UnwindMap;
+ DenseMap<const Instruction *, int> EHPadStateMap;
+ DenseMap<const FuncletPadInst *, int> FuncletBaseStateMap;
+ DenseMap<const InvokeInst *, int> InvokeStateMap;
+ DenseMap<const CatchReturnInst *, const BasicBlock *>
+ CatchRetSuccessorColorMap;
+ DenseMap<MCSymbol *, std::pair<int, MCSymbol *>> LabelToStateMap;
+ SmallVector<CxxUnwindMapEntry, 4> CxxUnwindMap;
SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
- SmallVector<std::pair<MCSymbol *, int>, 4> IPToStateList;
+ SmallVector<SEHUnwindMapEntry, 4> SEHUnwindMap;
+ SmallVector<ClrEHUnwindMapEntry, 4> ClrEHUnwindMap;
int UnwindHelpFrameIdx = INT_MAX;
- int UnwindHelpFrameOffset = -1;
- unsigned NumIPToStateFuncsVisited = 0;
+ int PSPSymFrameIdx = INT_MAX;
+
+ int getLastStateNumber() const { return CxxUnwindMap.size() - 1; }
+
+ void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin,
+ MCSymbol *InvokeEnd);
- /// localescape index of the 32-bit EH registration node. Set by
- /// WinEHStatePass and used indirectly by SEH filter functions of the parent.
- int EHRegNodeEscapeIndex = INT_MAX;
+ int EHRegNodeFrameIndex = INT_MAX;
+ int EHRegNodeEndOffset = INT_MAX;
+ int SEHSetFrameOffset = INT_MAX;
- WinEHFuncInfo() {}
+ WinEHFuncInfo();
};
/// Analyze the IR in ParentFn and it's handlers to build WinEHFuncInfo, which
@@ -161,5 +119,12 @@ struct WinEHFuncInfo {
void calculateWinCXXEHStateNumbers(const Function *ParentFn,
WinEHFuncInfo &FuncInfo);
+void calculateSEHStateNumbers(const Function *ParentFn,
+ WinEHFuncInfo &FuncInfo);
+
+void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo);
+
+void calculateCatchReturnSuccessorColors(const Function *Fn,
+ WinEHFuncInfo &FuncInfo);
}
#endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
new file mode 100644
index 0000000..7728120
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -0,0 +1,367 @@
+//===- CodeView.h -----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H
+#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H
+
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+enum class CallingConvention : uint8_t {
+ NearC = 0x00, // near right to left push, caller pops stack
+ FarC = 0x01, // far right to left push, caller pops stack
+ NearPascal = 0x02, // near left to right push, callee pops stack
+ FarPascal = 0x03, // far left to right push, callee pops stack
+ NearFast = 0x04, // near left to right push with regs, callee pops stack
+ FarFast = 0x05, // far left to right push with regs, callee pops stack
+ NearStdCall = 0x07, // near standard call
+ FarStdCall = 0x08, // far standard call
+ NearSysCall = 0x09, // near sys call
+ FarSysCall = 0x0a, // far sys call
+ ThisCall = 0x0b, // this call (this passed in register)
+ MipsCall = 0x0c, // Mips call
+ Generic = 0x0d, // Generic call sequence
+ AlphaCall = 0x0e, // Alpha call
+ PpcCall = 0x0f, // PPC call
+ SHCall = 0x10, // Hitachi SuperH call
+ ArmCall = 0x11, // ARM call
+ AM33Call = 0x12, // AM33 call
+ TriCall = 0x13, // TriCore Call
+ SH5Call = 0x14, // Hitachi SuperH-5 call
+ M32RCall = 0x15, // M32R Call
+ ClrCall = 0x16, // clr call
+ Inline =
+ 0x17, // Marker for routines always inlined and thus lacking a convention
+ NearVector = 0x18 // near left to right push with regs, callee pops stack
+};
+
+enum class ClassOptions : uint16_t {
+ None = 0x0000,
+ Packed = 0x0001,
+ HasConstructorOrDestructor = 0x0002,
+ HasOverloadedOperator = 0x0004,
+ Nested = 0x0008,
+ ContainsNestedClass = 0x0010,
+ HasOverloadedAssignmentOperator = 0x0020,
+ HasConversionOperator = 0x0040,
+ ForwardReference = 0x0080,
+ Scoped = 0x0100,
+ HasUniqueName = 0x0200,
+ Sealed = 0x0400,
+ Intrinsic = 0x2000
+};
+
+inline ClassOptions operator|(ClassOptions a, ClassOptions b) {
+ return static_cast<ClassOptions>(static_cast<uint16_t>(a) |
+ static_cast<uint16_t>(b));
+}
+
+inline ClassOptions operator&(ClassOptions a, ClassOptions b) {
+ return static_cast<ClassOptions>(static_cast<uint16_t>(a) &
+ static_cast<uint16_t>(b));
+}
+
+inline ClassOptions operator~(ClassOptions a) {
+ return static_cast<ClassOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class FrameProcedureOptions : uint32_t {
+ None = 0x00000000,
+ HasAlloca = 0x00000001,
+ HasSetJmp = 0x00000002,
+ HasLongJmp = 0x00000004,
+ HasInlineAssembly = 0x00000008,
+ HasExceptionHandling = 0x00000010,
+ MarkedInline = 0x00000020,
+ HasStructuredExceptionHandling = 0x00000040,
+ Naked = 0x00000080,
+ SecurityChecks = 0x00000100,
+ AsynchronousExceptionHandling = 0x00000200,
+ NoStackOrderingForSecurityChecks = 0x00000400,
+ Inlined = 0x00000800,
+ StrictSecurityChecks = 0x00001000,
+ SafeBuffers = 0x00002000,
+ ProfileGuidedOptimization = 0x00040000,
+ ValidProfileCounts = 0x00080000,
+ OptimizedForSpeed = 0x00100000,
+ GuardCfg = 0x00200000,
+ GuardCfw = 0x00400000
+};
+
+inline FrameProcedureOptions operator|(FrameProcedureOptions a,
+ FrameProcedureOptions b) {
+ return static_cast<FrameProcedureOptions>(static_cast<uint32_t>(a) |
+ static_cast<uint32_t>(b));
+}
+
+inline FrameProcedureOptions operator&(FrameProcedureOptions a,
+ FrameProcedureOptions b) {
+ return static_cast<FrameProcedureOptions>(static_cast<uint32_t>(a) &
+ static_cast<uint32_t>(b));
+}
+
+inline FrameProcedureOptions operator~(FrameProcedureOptions a) {
+ return static_cast<FrameProcedureOptions>(~static_cast<uint32_t>(a));
+}
+
+enum class FunctionOptions : uint8_t {
+ None = 0x00,
+ CxxReturnUdt = 0x01,
+ Constructor = 0x02,
+ ConstructorWithVirtualBases = 0x04
+};
+
+inline FunctionOptions operator|(FunctionOptions a, FunctionOptions b) {
+ return static_cast<FunctionOptions>(static_cast<uint8_t>(a) |
+ static_cast<uint8_t>(b));
+}
+
+inline FunctionOptions operator&(FunctionOptions a, FunctionOptions b) {
+ return static_cast<FunctionOptions>(static_cast<uint8_t>(a) &
+ static_cast<uint8_t>(b));
+}
+
+inline FunctionOptions operator~(FunctionOptions a) {
+ return static_cast<FunctionOptions>(~static_cast<uint8_t>(a));
+}
+
+enum class HfaKind : uint8_t {
+ None = 0x00,
+ Float = 0x01,
+ Double = 0x02,
+ Other = 0x03
+};
+
+enum class MemberAccess : uint8_t {
+ None = 0,
+ Private = 1,
+ Protected = 2,
+ Public = 3
+};
+
+enum class MethodKind : uint8_t {
+ Vanilla = 0x00,
+ Virtual = 0x01,
+ Static = 0x02,
+ Friend = 0x03,
+ IntroducingVirtual = 0x04,
+ PureVirtual = 0x05,
+ PureIntroducingVirtual = 0x06
+};
+
+enum class MethodOptions : uint16_t {
+ None = 0x0000,
+ Pseudo = 0x0020,
+ CompilerGenerated = 0x0100,
+ Sealed = 0x0200
+};
+
+inline MethodOptions operator|(MethodOptions a, MethodOptions b) {
+ return static_cast<MethodOptions>(static_cast<uint16_t>(a) |
+ static_cast<uint16_t>(b));
+}
+
+inline MethodOptions operator&(MethodOptions a, MethodOptions b) {
+ return static_cast<MethodOptions>(static_cast<uint16_t>(a) &
+ static_cast<uint16_t>(b));
+}
+
+inline MethodOptions operator~(MethodOptions a) {
+ return static_cast<MethodOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class ModifierOptions : uint16_t {
+ None = 0x0000,
+ Const = 0x0001,
+ Volatile = 0x0002,
+ Unaligned = 0x0004
+};
+
+inline ModifierOptions operator|(ModifierOptions a, ModifierOptions b) {
+ return static_cast<ModifierOptions>(static_cast<uint16_t>(a) |
+ static_cast<uint16_t>(b));
+}
+
+inline ModifierOptions operator&(ModifierOptions a, ModifierOptions b) {
+ return static_cast<ModifierOptions>(static_cast<uint16_t>(a) &
+ static_cast<uint16_t>(b));
+}
+
+inline ModifierOptions operator~(ModifierOptions a) {
+ return static_cast<ModifierOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class ModuleSubstreamKind : uint32_t {
+ Symbols = 0xf1,
+ Lines = 0xf2,
+ StringTable = 0xf3,
+ FileChecksums = 0xf4,
+ FrameData = 0xf5,
+ InlineeLines = 0xf6,
+ CrossScopeImports = 0xf7,
+ CrossScopeExports = 0xf8
+};
+
+enum class PointerKind : uint8_t {
+ Near16 = 0x00, // 16 bit pointer
+ Far16 = 0x01, // 16:16 far pointer
+ Huge16 = 0x02, // 16:16 huge pointer
+ BasedOnSegment = 0x03, // based on segment
+ BasedOnValue = 0x04, // based on value of base
+ BasedOnSegmentValue = 0x05, // based on segment value of base
+ BasedOnAddress = 0x06, // based on address of base
+ BasedOnSegmentAddress = 0x07, // based on segment address of base
+ BasedOnType = 0x08, // based on type
+ BasedOnSelf = 0x09, // based on self
+ Near32 = 0x0a, // 32 bit pointer
+ Far32 = 0x0b, // 16:32 pointer
+ Near64 = 0x0c // 64 bit pointer
+};
+
+enum class PointerMode : uint8_t {
+ Pointer = 0x00, // "normal" pointer
+ LValueReference = 0x01, // "old" reference
+ PointerToDataMember = 0x02, // pointer to data member
+ PointerToMemberFunction = 0x03, // pointer to member function
+ RValueReference = 0x04 // r-value reference
+};
+
+enum class PointerOptions : uint32_t {
+ None = 0x00000000,
+ Flat32 = 0x00000100,
+ Volatile = 0x00000200,
+ Const = 0x00000400,
+ Unaligned = 0x00000800,
+ Restrict = 0x00001000,
+ WinRTSmartPointer = 0x00080000
+};
+
+inline PointerOptions operator|(PointerOptions a, PointerOptions b) {
+ return static_cast<PointerOptions>(static_cast<uint16_t>(a) |
+ static_cast<uint16_t>(b));
+}
+
+inline PointerOptions operator&(PointerOptions a, PointerOptions b) {
+ return static_cast<PointerOptions>(static_cast<uint16_t>(a) &
+ static_cast<uint16_t>(b));
+}
+
+inline PointerOptions operator~(PointerOptions a) {
+ return static_cast<PointerOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class PointerToMemberRepresentation : uint16_t {
+ Unknown = 0x00, // not specified (pre VC8)
+ SingleInheritanceData = 0x01, // member data, single inheritance
+ MultipleInheritanceData = 0x02, // member data, multiple inheritance
+ VirtualInheritanceData = 0x03, // member data, virtual inheritance
+ GeneralData = 0x04, // member data, most general
+ SingleInheritanceFunction = 0x05, // member function, single inheritance
+ MultipleInheritanceFunction = 0x06, // member function, multiple inheritance
+ VirtualInheritanceFunction = 0x07, // member function, virtual inheritance
+ GeneralFunction = 0x08 // member function, most general
+};
+
+enum class TypeRecordKind : uint16_t {
+ None = 0,
+
+ VirtualTableShape = 0x000a,
+ Label = 0x000e,
+ EndPrecompiledHeader = 0x0014,
+
+ Modifier = 0x1001,
+ Pointer = 0x1002,
+ Procedure = 0x1008,
+ MemberFunction = 0x1009,
+
+ Oem = 0x100f,
+ Oem2 = 0x1011,
+
+ ArgumentList = 0x1201,
+ FieldList = 0x1203,
+ BitField = 0x1205,
+ MethodList = 0x1206,
+
+ BaseClass = 0x1400,
+ VirtualBaseClass = 0x1401,
+ IndirectVirtualBaseClass = 0x1402,
+ Index = 0x1404,
+ VirtualFunctionTablePointer = 0x1409,
+
+ Enumerate = 0x1502,
+ Array = 0x1503,
+ Class = 0x1504,
+ Structure = 0x1505,
+ Union = 0x1506,
+ Enum = 0x1507,
+ Alias = 0x150a,
+ Member = 0x150d,
+ StaticMember = 0x150e,
+ Method = 0x150f,
+ NestedType = 0x1510,
+ OneMethod = 0x1511,
+ VirtualFunctionTable = 0x151d,
+
+ FunctionId = 0x1601,
+ MemberFunctionId = 0x1602,
+ BuildInfo = 0x1603,
+ SubstringList = 0x1604,
+ StringId = 0x1605,
+ UdtSourceLine = 0x1606,
+
+ SByte = 0x8000,
+ Int16 = 0x8001,
+ UInt16 = 0x8002,
+ Int32 = 0x8003,
+ UInt32 = 0x8004,
+ Single = 0x8005,
+ Double = 0x8006,
+ Float80 = 0x8007,
+ Float128 = 0x8008,
+ Int64 = 0x8009,
+ UInt64 = 0x800a,
+ Float48 = 0x800b,
+ Complex32 = 0x800c,
+ Complex64 = 0x800d,
+ Complex80 = 0x800e,
+ Complex128 = 0x800f,
+ VarString = 0x8010,
+
+ Int128 = 0x8017,
+ UInt128 = 0x8018,
+
+ Decimal = 0x8019,
+ Date = 0x801a,
+ Utf8String = 0x801b,
+
+ Float16 = 0x801c
+};
+
+enum class VirtualTableSlotKind : uint8_t {
+ Near16 = 0x00,
+ Far16 = 0x01,
+ This = 0x02,
+ Outer = 0x03,
+ Meta = 0x04,
+ Near = 0x05,
+ Far = 0x06
+};
+
+enum class WindowsRTClassKind : uint8_t {
+ None = 0x00,
+ RefClass = 0x01,
+ ValueClass = 0x02,
+ Interface = 0x03
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h
new file mode 100644
index 0000000..14d057a
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h
@@ -0,0 +1,39 @@
+//===- CodeViewOStream.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H
+#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+namespace llvm {
+namespace codeview {
+
+template <typename Writer> class CodeViewOStream {
+private:
+ CodeViewOStream(const CodeViewOStream &) = delete;
+ CodeViewOStream &operator=(const CodeViewOStream &) = delete;
+
+public:
+ typedef typename Writer::LabelType LabelType;
+
+public:
+ explicit CodeViewOStream(Writer &W);
+
+private:
+ uint64_t size() const { return W.tell(); }
+
+private:
+ Writer &W;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
new file mode 100644
index 0000000..1ed6248
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
@@ -0,0 +1,78 @@
+//===- FieldListRecordBuilder.h ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
+
+namespace llvm {
+namespace codeview {
+
+class MethodInfo {
+public:
+ MethodInfo() : Access(), Kind(), Options(), Type(), VTableSlotOffset(-1) {}
+
+ MethodInfo(MemberAccess Access, MethodKind Kind, MethodOptions Options,
+ TypeIndex Type, int32_t VTableSlotOffset)
+ : Access(Access), Kind(Kind), Options(Options), Type(Type),
+ VTableSlotOffset(VTableSlotOffset) {}
+
+ MemberAccess getAccess() const { return Access; }
+ MethodKind getKind() const { return Kind; }
+ MethodOptions getOptions() const { return Options; }
+ TypeIndex getType() const { return Type; }
+ int32_t getVTableSlotOffset() const { return VTableSlotOffset; }
+
+private:
+ MemberAccess Access;
+ MethodKind Kind;
+ MethodOptions Options;
+ TypeIndex Type;
+ int32_t VTableSlotOffset;
+};
+
+class FieldListRecordBuilder : public ListRecordBuilder {
+private:
+ FieldListRecordBuilder(const FieldListRecordBuilder &) = delete;
+ void operator=(const FieldListRecordBuilder &) = delete;
+
+public:
+ FieldListRecordBuilder();
+
+ void writeBaseClass(MemberAccess Access, TypeIndex Type, uint64_t Offset);
+ void writeEnumerate(MemberAccess Access, uint64_t Value, StringRef Name);
+ void writeIndirectVirtualBaseClass(MemberAccess Access, TypeIndex Type,
+ TypeIndex VirtualBasePointerType,
+ int64_t VirtualBasePointerOffset,
+ uint64_t SlotIndex);
+ void writeMember(MemberAccess Access, TypeIndex Type, uint64_t Offset,
+ StringRef Name);
+ void writeOneMethod(MemberAccess Access, MethodKind Kind,
+ MethodOptions Options, TypeIndex Type,
+ int32_t VTableSlotOffset, StringRef Name);
+ void writeOneMethod(const MethodInfo &Method, StringRef Name);
+ void writeMethod(uint16_t OverloadCount, TypeIndex MethodList,
+ StringRef Name);
+ void writeNestedType(TypeIndex Type, StringRef Name);
+ void writeStaticMember(MemberAccess Access, TypeIndex Type, StringRef Name);
+ void writeVirtualBaseClass(MemberAccess Access, TypeIndex Type,
+ TypeIndex VirtualBasePointerType,
+ int64_t VirtualBasePointerOffset,
+ uint64_t SlotIndex);
+ void writeVirtualBaseClass(TypeRecordKind Kind, MemberAccess Access,
+ TypeIndex Type, TypeIndex VirtualBasePointerType,
+ int64_t VirtualBasePointerOffset,
+ uint64_t SlotIndex);
+ void writeVirtualFunctionTablePointer(TypeIndex Type);
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/FunctionId.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/FunctionId.h
new file mode 100644
index 0000000..1af3da8
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/FunctionId.h
@@ -0,0 +1,56 @@
+//===- FunctionId.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_FUNCTIONID_H
+#define LLVM_DEBUGINFO_CODEVIEW_FUNCTIONID_H
+
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+class FunctionId {
+public:
+ FunctionId() : Index(0) {}
+
+ explicit FunctionId(uint32_t Index) : Index(Index) {}
+
+ uint32_t getIndex() const { return Index; }
+
+private:
+ uint32_t Index;
+};
+
+inline bool operator==(const FunctionId &A, const FunctionId &B) {
+ return A.getIndex() == B.getIndex();
+}
+
+inline bool operator!=(const FunctionId &A, const FunctionId &B) {
+ return A.getIndex() != B.getIndex();
+}
+
+inline bool operator<(const FunctionId &A, const FunctionId &B) {
+ return A.getIndex() < B.getIndex();
+}
+
+inline bool operator<=(const FunctionId &A, const FunctionId &B) {
+ return A.getIndex() <= B.getIndex();
+}
+
+inline bool operator>(const FunctionId &A, const FunctionId &B) {
+ return A.getIndex() > B.getIndex();
+}
+
+inline bool operator>=(const FunctionId &A, const FunctionId &B) {
+ return A.getIndex() >= B.getIndex();
+}
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/Line.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/Line.h
new file mode 100644
index 0000000..a7cdbda
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/Line.h
@@ -0,0 +1,124 @@
+//===- Line.h ---------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_LINE_H
+#define LLVM_DEBUGINFO_CODEVIEW_LINE_H
+
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+class LineInfo {
+public:
+ static const uint32_t AlwaysStepIntoLineNumber = 0xfeefee;
+ static const uint32_t NeverStepIntoLineNumber = 0xf00f00;
+
+private:
+ static const uint32_t StartLineMask = 0x00ffffff;
+ static const uint32_t EndLineDeltaMask = 0x7f000000;
+ static const int EndLineDeltaShift = 24;
+ static const uint32_t StatementFlag = 0x80000000u;
+
+public:
+ LineInfo(uint32_t StartLine, uint32_t EndLine, bool IsStatement);
+
+ uint32_t getStartLine() const { return LineData & StartLineMask; }
+
+ uint32_t getLineDelta() const {
+ return (LineData & EndLineDeltaMask) >> EndLineDeltaShift;
+ }
+
+ uint32_t getEndLine() const { return getStartLine() + getLineDelta(); }
+
+ bool isStatement() const { return (LineData & StatementFlag) != 0; }
+
+ uint32_t getRawData() const { return LineData; }
+
+ bool isAlwaysStepInto() const {
+ return getStartLine() == AlwaysStepIntoLineNumber;
+ }
+
+ bool isNeverStepInto() const {
+ return getStartLine() == NeverStepIntoLineNumber;
+ }
+
+private:
+ uint32_t LineData;
+};
+
+class ColumnInfo {
+private:
+ static const uint32_t StartColumnMask = 0x0000ffffu;
+ static const uint32_t EndColumnMask = 0xffff0000u;
+ static const int EndColumnShift = 16;
+
+public:
+ ColumnInfo(uint16_t StartColumn, uint16_t EndColumn) {
+ ColumnData =
+ (static_cast<uint32_t>(StartColumn) & StartColumnMask) |
+ ((static_cast<uint32_t>(EndColumn) << EndColumnShift) & EndColumnMask);
+ }
+
+ uint16_t getStartColumn() const {
+ return static_cast<uint16_t>(ColumnData & StartColumnMask);
+ }
+
+ uint16_t getEndColumn() const {
+ return static_cast<uint16_t>((ColumnData & EndColumnMask) >>
+ EndColumnShift);
+ }
+
+ uint32_t getRawData() const { return ColumnData; }
+
+private:
+ uint32_t ColumnData;
+};
+
+class Line {
+private:
+ int32_t CodeOffset;
+ LineInfo LineInf;
+ ColumnInfo ColumnInf;
+
+public:
+ Line(int32_t CodeOffset, uint32_t StartLine, uint32_t EndLine,
+ uint16_t StartColumn, uint16_t EndColumn, bool IsStatement)
+ : CodeOffset(CodeOffset), LineInf(StartLine, EndLine, IsStatement),
+ ColumnInf(StartColumn, EndColumn) {}
+
+ Line(int32_t CodeOffset, LineInfo LineInf, ColumnInfo ColumnInf)
+ : CodeOffset(CodeOffset), LineInf(LineInf), ColumnInf(ColumnInf) {}
+
+ LineInfo getLineInfo() const { return LineInf; }
+
+ ColumnInfo getColumnInfo() const { return ColumnInf; }
+
+ int32_t getCodeOffset() const { return CodeOffset; }
+
+ uint32_t getStartLine() const { return LineInf.getStartLine(); }
+
+ uint32_t getLineDelta() const { return LineInf.getLineDelta(); }
+
+ uint32_t getEndLine() const { return LineInf.getEndLine(); }
+
+ uint16_t getStartColumn() const { return ColumnInf.getStartColumn(); }
+
+ uint16_t getEndColumn() const { return ColumnInf.getEndColumn(); }
+
+ bool isStatement() const { return LineInf.isStatement(); }
+
+ bool isAlwaysStepInto() const { return LineInf.isAlwaysStepInto(); }
+
+ bool isNeverStepInto() const { return LineInf.isNeverStepInto(); }
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
new file mode 100644
index 0000000..df0a2e0
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
@@ -0,0 +1,43 @@
+//===- ListRecordBuilder.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
+
+namespace llvm {
+namespace codeview {
+
+class ListRecordBuilder {
+private:
+ ListRecordBuilder(const ListRecordBuilder &) = delete;
+ ListRecordBuilder &operator=(const ListRecordBuilder &) = delete;
+
+protected:
+ const int MethodKindShift = 2;
+
+ explicit ListRecordBuilder(TypeRecordKind Kind);
+
+public:
+ llvm::StringRef str() { return Builder.str(); }
+
+protected:
+ void finishSubRecord();
+
+ TypeRecordBuilder &getBuilder() { return Builder; }
+
+private:
+ TypeRecordBuilder Builder;
+ SmallVector<size_t, 4> ContinuationOffsets;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
new file mode 100644
index 0000000..5bfe2a0
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
@@ -0,0 +1,68 @@
+//===- MemoryTypeTableBuilder.h ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
+
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include <functional>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+namespace codeview {
+
+class MemoryTypeTableBuilder : public TypeTableBuilder {
+public:
+ class Record {
+ public:
+ explicit Record(llvm::StringRef RData);
+
+ const char *data() const { return Data.get(); }
+ uint16_t size() const { return Size; }
+
+ private:
+ uint16_t Size;
+ std::unique_ptr<char[]> Data;
+ };
+
+private:
+ class RecordHash : std::unary_function<llvm::StringRef, size_t> {
+ public:
+ size_t operator()(llvm::StringRef Val) const {
+ return static_cast<size_t>(llvm::hash_value(Val));
+ }
+ };
+
+public:
+ MemoryTypeTableBuilder() {}
+
+ template <typename TFunc> void ForEachRecord(TFunc Func) {
+ uint32_t Index = TypeIndex::FirstNonSimpleIndex;
+
+ for (const std::unique_ptr<Record> &R : Records) {
+ Func(TypeIndex(Index), R.get());
+ ++Index;
+ }
+ }
+
+private:
+ virtual TypeIndex writeRecord(llvm::StringRef Data) override;
+
+private:
+ std::vector<std::unique_ptr<Record>> Records;
+ std::unordered_map<llvm::StringRef, TypeIndex, RecordHash> HashedRecords;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
new file mode 100644
index 0000000..faa404d
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
@@ -0,0 +1,35 @@
+//===- MethodListRecordBuilder.h --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
+
+namespace llvm {
+namespace codeview {
+
+class MethodInfo;
+
+class MethodListRecordBuilder : public ListRecordBuilder {
+private:
+ MethodListRecordBuilder(const MethodListRecordBuilder &) = delete;
+ MethodListRecordBuilder &operator=(const MethodListRecordBuilder &) = delete;
+
+public:
+ MethodListRecordBuilder();
+
+ void writeMethod(MemberAccess Access, MethodKind Kind, MethodOptions Options,
+ TypeIndex Type, int32_t VTableSlotOffset);
+ void writeMethod(const MethodInfo &Method);
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
new file mode 100644
index 0000000..d3a541b
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -0,0 +1,176 @@
+//===- TypeIndex.h ----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H
+
+#include <cassert>
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+enum class SimpleTypeKind : uint32_t {
+ None = 0x0000, // uncharacterized type (no type)
+ Void = 0x0003, // void
+ NotTranslated = 0x0007, // type not translated by cvpack
+ HResult = 0x0008, // OLE/COM HRESULT
+
+ SignedCharacter = 0x0010, // 8 bit signed
+ UnsignedCharacter = 0x0020, // 8 bit unsigned
+ NarrowCharacter = 0x0070, // really a char
+ WideCharacter = 0x0071, // wide char
+
+ SByte = 0x0068, // 8 bit signed int
+ Byte = 0x0069, // 8 bit unsigned int
+ Int16Short = 0x0011, // 16 bit signed
+ UInt16Short = 0x0021, // 16 bit unsigned
+ Int16 = 0x0072, // 16 bit signed int
+ UInt16 = 0x0073, // 16 bit unsigned int
+ Int32Long = 0x0012, // 32 bit signed
+ UInt32Long = 0x0022, // 32 bit unsigned
+ Int32 = 0x0074, // 32 bit signed int
+ UInt32 = 0x0075, // 32 bit unsigned int
+ Int64Quad = 0x0013, // 64 bit signed
+ UInt64Quad = 0x0023, // 64 bit unsigned
+ Int64 = 0x0076, // 64 bit signed int
+ UInt64 = 0x0077, // 64 bit unsigned int
+ Int128 = 0x0078, // 128 bit signed int
+ UInt128 = 0x0079, // 128 bit unsigned int
+
+ Float16 = 0x0046, // 16 bit real
+ Float32 = 0x0040, // 32 bit real
+ Float32PartialPrecision = 0x0045, // 32 bit PP real
+ Float48 = 0x0044, // 48 bit real
+ Float64 = 0x0041, // 64 bit real
+ Float80 = 0x0042, // 80 bit real
+ Float128 = 0x0043, // 128 bit real
+
+ Complex32 = 0x0050, // 32 bit complex
+ Complex64 = 0x0051, // 64 bit complex
+ Complex80 = 0x0052, // 80 bit complex
+ Complex128 = 0x0053, // 128 bit complex
+
+ Boolean8 = 0x0030, // 8 bit boolean
+ Boolean16 = 0x0031, // 16 bit boolean
+ Boolean32 = 0x0032, // 32 bit boolean
+ Boolean64 = 0x0033 // 64 bit boolean
+};
+
+enum class SimpleTypeMode : uint32_t {
+ Direct = 0x00000000, // Not a pointer
+ NearPointer = 0x00000100, // Near pointer
+ FarPointer = 0x00000200, // Far pointer
+ HugePointer = 0x00000300, // Huge pointer
+ NearPointer32 = 0x00000400, // 32 bit near pointer
+ FarPointer32 = 0x00000500, // 32 bit far pointer
+ NearPointer64 = 0x00000600, // 64 bit near pointer
+ NearPointer128 = 0x00000700 // 128 bit near pointer
+};
+
+class TypeIndex {
+public:
+ static const uint32_t FirstNonSimpleIndex = 0x1000;
+ static const uint32_t SimpleKindMask = 0x000000ff;
+ static const uint32_t SimpleModeMask = 0x00000700;
+
+public:
+ TypeIndex() : Index(0) {}
+ explicit TypeIndex(uint32_t Index) : Index(Index) {}
+ explicit TypeIndex(SimpleTypeKind Kind)
+ : Index(static_cast<uint32_t>(Kind)) {}
+ TypeIndex(SimpleTypeKind Kind, SimpleTypeMode Mode)
+ : Index(static_cast<uint32_t>(Kind) | static_cast<uint32_t>(Mode)) {}
+
+ uint32_t getIndex() const { return Index; }
+ bool isSimple() const { return Index < FirstNonSimpleIndex; }
+
+ SimpleTypeKind getSimpleKind() const {
+ assert(isSimple());
+ return static_cast<SimpleTypeKind>(Index & SimpleKindMask);
+ }
+
+ SimpleTypeMode getSimpleMode() const {
+ assert(isSimple());
+ return static_cast<SimpleTypeMode>(Index & SimpleModeMask);
+ }
+
+ static TypeIndex Void() { return TypeIndex(SimpleTypeKind::Void); }
+ static TypeIndex VoidPointer32() {
+ return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer32);
+ }
+ static TypeIndex VoidPointer64() {
+ return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64);
+ }
+
+ static TypeIndex SignedCharacter() {
+ return TypeIndex(SimpleTypeKind::SignedCharacter);
+ }
+ static TypeIndex UnsignedCharacter() {
+ return TypeIndex(SimpleTypeKind::UnsignedCharacter);
+ }
+ static TypeIndex NarrowCharacter() {
+ return TypeIndex(SimpleTypeKind::NarrowCharacter);
+ }
+ static TypeIndex WideCharacter() {
+ return TypeIndex(SimpleTypeKind::WideCharacter);
+ }
+ static TypeIndex Int16Short() {
+ return TypeIndex(SimpleTypeKind::Int16Short);
+ }
+ static TypeIndex UInt16Short() {
+ return TypeIndex(SimpleTypeKind::UInt16Short);
+ }
+ static TypeIndex Int32() { return TypeIndex(SimpleTypeKind::Int32); }
+ static TypeIndex UInt32() { return TypeIndex(SimpleTypeKind::UInt32); }
+ static TypeIndex Int32Long() { return TypeIndex(SimpleTypeKind::Int32Long); }
+ static TypeIndex UInt32Long() {
+ return TypeIndex(SimpleTypeKind::UInt32Long);
+ }
+ static TypeIndex Int64() { return TypeIndex(SimpleTypeKind::Int64); }
+ static TypeIndex UInt64() { return TypeIndex(SimpleTypeKind::UInt64); }
+ static TypeIndex Int64Quad() { return TypeIndex(SimpleTypeKind::Int64Quad); }
+ static TypeIndex UInt64Quad() {
+ return TypeIndex(SimpleTypeKind::UInt64Quad);
+ }
+
+ static TypeIndex Float32() { return TypeIndex(SimpleTypeKind::Float32); }
+ static TypeIndex Float64() { return TypeIndex(SimpleTypeKind::Float64); }
+
+private:
+ uint32_t Index;
+};
+
+inline bool operator==(const TypeIndex &A, const TypeIndex &B) {
+ return A.getIndex() == B.getIndex();
+}
+
+inline bool operator!=(const TypeIndex &A, const TypeIndex &B) {
+ return A.getIndex() != B.getIndex();
+}
+
+inline bool operator<(const TypeIndex &A, const TypeIndex &B) {
+ return A.getIndex() < B.getIndex();
+}
+
+inline bool operator<=(const TypeIndex &A, const TypeIndex &B) {
+ return A.getIndex() <= B.getIndex();
+}
+
+inline bool operator>(const TypeIndex &A, const TypeIndex &B) {
+ return A.getIndex() > B.getIndex();
+}
+
+inline bool operator>=(const TypeIndex &A, const TypeIndex &B) {
+ return A.getIndex() >= B.getIndex();
+}
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
new file mode 100644
index 0000000..21755f5
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -0,0 +1,270 @@
+//===- TypeRecord.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+class TypeRecord {
+protected:
+ explicit TypeRecord(TypeRecordKind Kind) : Kind(Kind) {}
+
+public:
+ TypeRecordKind getKind() const { return Kind; }
+
+private:
+ TypeRecordKind Kind;
+};
+
+class ModifierRecord : public TypeRecord {
+public:
+ ModifierRecord(TypeIndex ModifiedType, ModifierOptions Options)
+ : TypeRecord(TypeRecordKind::Modifier), ModifiedType(ModifiedType),
+ Options(Options) {}
+
+ TypeIndex getModifiedType() const { return ModifiedType; }
+ ModifierOptions getOptions() const { return Options; }
+
+private:
+ TypeIndex ModifiedType;
+ ModifierOptions Options;
+};
+
+class ProcedureRecord : public TypeRecord {
+public:
+ ProcedureRecord(TypeIndex ReturnType, CallingConvention CallConv,
+ FunctionOptions Options, uint16_t ParameterCount,
+ TypeIndex ArgumentList)
+ : TypeRecord(TypeRecordKind::Procedure), ReturnType(ReturnType),
+ CallConv(CallConv), Options(Options), ParameterCount(ParameterCount),
+ ArgumentList(ArgumentList) {}
+
+ TypeIndex getReturnType() const { return ReturnType; }
+ CallingConvention getCallConv() const { return CallConv; }
+ FunctionOptions getOptions() const { return Options; }
+ uint16_t getParameterCount() const { return ParameterCount; }
+ TypeIndex getArgumentList() const { return ArgumentList; }
+
+private:
+ TypeIndex ReturnType;
+ CallingConvention CallConv;
+ FunctionOptions Options;
+ uint16_t ParameterCount;
+ TypeIndex ArgumentList;
+};
+
+class MemberFunctionRecord : public TypeRecord {
+public:
+ MemberFunctionRecord(TypeIndex ReturnType, TypeIndex ClassType,
+ TypeIndex ThisType, CallingConvention CallConv,
+ FunctionOptions Options, uint16_t ParameterCount,
+ TypeIndex ArgumentList, int32_t ThisPointerAdjustment)
+ : TypeRecord(TypeRecordKind::MemberFunction), ReturnType(ReturnType),
+ ClassType(ClassType), ThisType(ThisType), CallConv(CallConv),
+ Options(Options), ParameterCount(ParameterCount),
+ ArgumentList(ArgumentList),
+ ThisPointerAdjustment(ThisPointerAdjustment) {}
+
+ TypeIndex getReturnType() const { return ReturnType; }
+ TypeIndex getClassType() const { return ClassType; }
+ TypeIndex getThisType() const { return ThisType; }
+ CallingConvention getCallConv() const { return CallConv; }
+ FunctionOptions getOptions() const { return Options; }
+ uint16_t getParameterCount() const { return ParameterCount; }
+ TypeIndex getArgumentList() const { return ArgumentList; }
+ int32_t getThisPointerAdjustment() const { return ThisPointerAdjustment; }
+
+private:
+ TypeIndex ReturnType;
+ TypeIndex ClassType;
+ TypeIndex ThisType;
+ CallingConvention CallConv;
+ FunctionOptions Options;
+ uint16_t ParameterCount;
+ TypeIndex ArgumentList;
+ int32_t ThisPointerAdjustment;
+};
+
+class ArgumentListRecord : public TypeRecord {
+public:
+ explicit ArgumentListRecord(llvm::ArrayRef<TypeIndex> ArgumentTypes)
+ : TypeRecord(TypeRecordKind::ArgumentList), ArgumentTypes(ArgumentTypes) {
+ }
+
+ llvm::ArrayRef<TypeIndex> getArgumentTypes() const { return ArgumentTypes; }
+
+private:
+ llvm::ArrayRef<TypeIndex> ArgumentTypes;
+};
+
+class PointerRecordBase : public TypeRecord {
+public:
+ PointerRecordBase(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode,
+ PointerOptions Options, uint8_t Size)
+ : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType),
+ PtrKind(Kind), Mode(Mode), Options(Options), Size(Size) {}
+
+ TypeIndex getReferentType() const { return ReferentType; }
+ PointerKind getPointerKind() const { return PtrKind; }
+ PointerMode getMode() const { return Mode; }
+ PointerOptions getOptions() const { return Options; }
+ uint8_t getSize() const { return Size; }
+
+private:
+ TypeIndex ReferentType;
+ PointerKind PtrKind;
+ PointerMode Mode;
+ PointerOptions Options;
+ uint8_t Size;
+};
+
+class PointerRecord : public PointerRecordBase {
+public:
+ PointerRecord(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode,
+ PointerOptions Options, uint8_t Size)
+ : PointerRecordBase(ReferentType, Kind, Mode, Options, Size) {}
+};
+
+class PointerToMemberRecord : public PointerRecordBase {
+public:
+ PointerToMemberRecord(TypeIndex ReferentType, PointerKind Kind,
+ PointerMode Mode, PointerOptions Options, uint8_t Size,
+ TypeIndex ContainingType,
+ PointerToMemberRepresentation Representation)
+ : PointerRecordBase(ReferentType, Kind, Mode, Options, Size),
+ ContainingType(ContainingType), Representation(Representation) {}
+
+ TypeIndex getContainingType() const { return ContainingType; }
+ PointerToMemberRepresentation getRepresentation() const {
+ return Representation;
+ }
+
+private:
+ TypeIndex ContainingType;
+ PointerToMemberRepresentation Representation;
+};
+
+class ArrayRecord : public TypeRecord {
+public:
+ ArrayRecord(TypeIndex ElementType, TypeIndex IndexType, uint64_t Size,
+ llvm::StringRef Name)
+ : TypeRecord(TypeRecordKind::Array), ElementType(ElementType),
+ IndexType(IndexType), Size(Size), Name(Name) {}
+
+ TypeIndex getElementType() const { return ElementType; }
+ TypeIndex getIndexType() const { return IndexType; }
+ uint64_t getSize() const { return Size; }
+ llvm::StringRef getName() const { return Name; }
+
+private:
+ TypeIndex ElementType;
+ TypeIndex IndexType;
+ uint64_t Size;
+ llvm::StringRef Name;
+};
+
+class TagRecord : public TypeRecord {
+protected:
+ TagRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options,
+ TypeIndex FieldList, StringRef Name, StringRef UniqueName)
+ : TypeRecord(Kind), MemberCount(MemberCount), Options(Options),
+ FieldList(FieldList), Name(Name), UniqueName(UniqueName) {}
+
+public:
+ uint16_t getMemberCount() const { return MemberCount; }
+ ClassOptions getOptions() const { return Options; }
+ TypeIndex getFieldList() const { return FieldList; }
+ StringRef getName() const { return Name; }
+ StringRef getUniqueName() const { return UniqueName; }
+
+private:
+ uint16_t MemberCount;
+ ClassOptions Options;
+ TypeIndex FieldList;
+ StringRef Name;
+ StringRef UniqueName;
+};
+
+class AggregateRecord : public TagRecord {
+public:
+ AggregateRecord(TypeRecordKind Kind, uint16_t MemberCount,
+ ClassOptions Options, HfaKind Hfa,
+ WindowsRTClassKind WinRTKind, TypeIndex FieldList,
+ TypeIndex DerivationList, TypeIndex VTableShape,
+ uint64_t Size, StringRef Name, StringRef UniqueName)
+ : TagRecord(Kind, MemberCount, Options, FieldList, Name, UniqueName),
+ Hfa(Hfa), WinRTKind(WinRTKind), DerivationList(DerivationList),
+ VTableShape(VTableShape), Size(Size) {}
+
+ HfaKind getHfa() const { return Hfa; }
+ WindowsRTClassKind getWinRTKind() const { return WinRTKind; }
+ TypeIndex getDerivationList() const { return DerivationList; }
+ TypeIndex getVTableShape() const { return VTableShape; }
+ uint64_t getSize() const { return Size; }
+
+private:
+ HfaKind Hfa;
+ WindowsRTClassKind WinRTKind;
+ TypeIndex DerivationList;
+ TypeIndex VTableShape;
+ uint64_t Size;
+};
+
+class EnumRecord : public TagRecord {
+public:
+ EnumRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList,
+ StringRef Name, StringRef UniqueName, TypeIndex UnderlyingType)
+ : TagRecord(TypeRecordKind::Enum, MemberCount, Options, FieldList, Name,
+ UniqueName),
+ UnderlyingType(UnderlyingType) {}
+
+ TypeIndex getUnderlyingType() const { return UnderlyingType; }
+
+private:
+ TypeIndex UnderlyingType;
+};
+
+class BitFieldRecord : TypeRecord {
+public:
+ BitFieldRecord(TypeIndex Type, uint8_t BitSize, uint8_t BitOffset)
+ : TypeRecord(TypeRecordKind::BitField), Type(Type), BitSize(BitSize),
+ BitOffset(BitOffset) {}
+
+ TypeIndex getType() const { return Type; }
+ uint8_t getBitOffset() const { return BitOffset; }
+ uint8_t getBitSize() const { return BitSize; }
+
+private:
+ TypeIndex Type;
+ uint8_t BitSize;
+ uint8_t BitOffset;
+};
+
+class VirtualTableShapeRecord : TypeRecord {
+public:
+ explicit VirtualTableShapeRecord(ArrayRef<VirtualTableSlotKind> Slots)
+ : TypeRecord(TypeRecordKind::VirtualTableShape), Slots(Slots) {}
+
+ ArrayRef<VirtualTableSlotKind> getSlots() const { return Slots; }
+
+private:
+ ArrayRef<VirtualTableSlotKind> Slots;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
new file mode 100644
index 0000000..1f48cf7
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
@@ -0,0 +1,57 @@
+//===- TypeRecordBuilder.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace codeview {
+
+class TypeRecordBuilder {
+private:
+ TypeRecordBuilder(const TypeRecordBuilder &) = delete;
+ TypeRecordBuilder &operator=(const TypeRecordBuilder &) = delete;
+
+public:
+ explicit TypeRecordBuilder(TypeRecordKind Kind);
+
+ void writeUInt8(uint8_t Value);
+ void writeInt16(int16_t Value);
+ void writeUInt16(uint16_t Value);
+ void writeInt32(int32_t Value);
+ void writeUInt32(uint32_t Value);
+ void writeInt64(int64_t Value);
+ void writeUInt64(uint64_t Value);
+ void writeTypeIndex(TypeIndex TypeInd);
+ void writeTypeRecordKind(TypeRecordKind Kind);
+ void writeEncodedInteger(int64_t Value);
+ void writeEncodedSignedInteger(int64_t Value);
+ void writeEncodedUnsignedInteger(uint64_t Value);
+ void writeNullTerminatedString(const char *Value);
+ void writeNullTerminatedString(StringRef Value);
+
+ llvm::StringRef str();
+
+ uint64_t size() const { return Stream.tell(); }
+
+private:
+ llvm::SmallVector<char, 256> Buffer;
+ llvm::raw_svector_ostream Stream;
+ llvm::support::endian::Writer<llvm::support::endianness::little> Writer;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
new file mode 100644
index 0000000..9de110e
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
@@ -0,0 +1,37 @@
+//===- TypeSymbolEmitter.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESYMBOLEMITTER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPESYMBOLEMITTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+namespace llvm {
+namespace codeview {
+
+class TypeSymbolEmitter {
+private:
+ TypeSymbolEmitter(const TypeSymbolEmitter &) = delete;
+ TypeSymbolEmitter &operator=(const TypeSymbolEmitter &) = delete;
+
+protected:
+ TypeSymbolEmitter() {}
+
+public:
+ virtual ~TypeSymbolEmitter() {}
+
+public:
+ virtual void writeUserDefinedType(TypeIndex TI, StringRef Name) = 0;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
new file mode 100644
index 0000000..2c950e8
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
@@ -0,0 +1,60 @@
+//===- TypeTableBuilder.h ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+namespace codeview {
+
+class FieldListRecordBuilder;
+class MethodListRecordBuilder;
+class TypeRecordBuilder;
+
+class TypeTableBuilder {
+private:
+ TypeTableBuilder(const TypeTableBuilder &) = delete;
+ TypeTableBuilder &operator=(const TypeTableBuilder &) = delete;
+
+protected:
+ TypeTableBuilder();
+
+public:
+ virtual ~TypeTableBuilder();
+
+public:
+ TypeIndex writeModifier(const ModifierRecord &Record);
+ TypeIndex writeProcedure(const ProcedureRecord &Record);
+ TypeIndex writeMemberFunction(const MemberFunctionRecord &Record);
+ TypeIndex writeArgumentList(const ArgumentListRecord &Record);
+ TypeIndex writeRecord(TypeRecordBuilder &builder);
+ TypeIndex writePointer(const PointerRecord &Record);
+ TypeIndex writePointerToMember(const PointerToMemberRecord &Record);
+ TypeIndex writeArray(const ArrayRecord &Record);
+ TypeIndex writeAggregate(const AggregateRecord &Record);
+ TypeIndex writeEnum(const EnumRecord &Record);
+ TypeIndex writeBitField(const BitFieldRecord &Record);
+ TypeIndex writeVirtualTableShape(const VirtualTableShapeRecord &Record);
+
+ TypeIndex writeFieldList(FieldListRecordBuilder &FieldList);
+ TypeIndex writeMethodList(MethodListRecordBuilder &MethodList);
+
+private:
+ virtual TypeIndex writeRecord(llvm::StringRef record) = 0;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
index 871e60c..6659a97 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
@@ -57,6 +57,10 @@ class DIInliningInfo {
assert(Index < Frames.size());
return Frames[Index];
}
+ DILineInfo *getMutableFrame(unsigned Index) {
+ assert(Index < Frames.size());
+ return &Frames[Index];
+ }
uint32_t getNumberOfFrames() const {
return Frames.size();
}
@@ -65,6 +69,15 @@ class DIInliningInfo {
}
};
+/// DIGlobal - container for description of a global variable.
+struct DIGlobal {
+ std::string Name;
+ uint64_t Start;
+ uint64_t Size;
+
+ DIGlobal() : Name("<invalid>"), Start(0), Size(0) {}
+};
+
/// A DINameKind is passed to name search methods to specify a
/// preference regarding the type of name resolution the caller wants.
enum class DINameKind { None, ShortName, LinkageName };
@@ -99,6 +112,7 @@ enum DIDumpType {
DIDT_LineDwo,
DIDT_Loc,
DIDT_LocDwo,
+ DIDT_Macro,
DIDT_Ranges,
DIDT_Pubnames,
DIDT_Pubtypes,
@@ -110,7 +124,9 @@ enum DIDumpType {
DIDT_AppleNames,
DIDT_AppleTypes,
DIDT_AppleNamespaces,
- DIDT_AppleObjC
+ DIDT_AppleObjC,
+ DIDT_CUIndex,
+ DIDT_TUIndex,
};
class DIContext {
@@ -140,17 +156,21 @@ private:
/// to be used by the DIContext implementations when applying relocations
/// on the fly.
class LoadedObjectInfo {
+protected:
+ LoadedObjectInfo(const LoadedObjectInfo &) = default;
+ LoadedObjectInfo() = default;
+
public:
virtual ~LoadedObjectInfo() = default;
- /// Obtain the Load Address of a section by Name.
+ /// Obtain the Load Address of a section by SectionRef.
///
- /// Calculate the address of the section identified by the passed in Name.
+ /// Calculate the address of the given section.
/// The section need not be present in the local address space. The addresses
/// need to be consistent with the addresses used to query the DIContext and
/// the output of this function should be deterministic, i.e. repeated calls with
- /// the same Name should give the same address.
- virtual uint64_t getSectionLoadAddress(StringRef Name) const = 0;
+ /// the same Sec should give the same address.
+ virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const = 0;
/// If conveniently available, return the content of the given Section.
///
@@ -162,7 +182,8 @@ public:
/// local (unrelocated) object file and applied on the fly. Note that this method
/// is used purely for optimzation purposes in the common case of JITting in the
/// local address space, so returning false should always be correct.
- virtual bool getLoadedSectionContents(StringRef Name, StringRef &Data) const {
+ virtual bool getLoadedSectionContents(const object::SectionRef &Sec,
+ StringRef &Data) const {
return false;
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 743f9c6..bae3154 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -18,10 +18,13 @@ class DWARFCompileUnit : public DWARFUnit {
public:
DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, bool LE,
- const DWARFUnitSectionBase &UnitSection)
- : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {}
+ StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+ const DWARFUnitSectionBase &UnitSection,
+ const DWARFUnitIndex::Entry *Entry)
+ : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, UnitSection,
+ Entry) {}
void dump(raw_ostream &OS);
+ static const DWARFSectionKind Section = DW_SECT_INFO;
// VTable anchor.
~DWARFCompileUnit() override;
};
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 423c0d3..c91012b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -18,6 +18,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
@@ -40,11 +41,14 @@ class DWARFContext : public DIContext {
DWARFUnitSection<DWARFCompileUnit> CUs;
std::vector<DWARFUnitSection<DWARFTypeUnit>> TUs;
+ std::unique_ptr<DWARFUnitIndex> CUIndex;
+ std::unique_ptr<DWARFUnitIndex> TUIndex;
std::unique_ptr<DWARFDebugAbbrev> Abbrev;
std::unique_ptr<DWARFDebugLoc> Loc;
std::unique_ptr<DWARFDebugAranges> Aranges;
std::unique_ptr<DWARFDebugLine> Line;
std::unique_ptr<DWARFDebugFrame> DebugFrame;
+ std::unique_ptr<DWARFDebugMacro> Macro;
DWARFUnitSection<DWARFCompileUnit> DWOCUs;
std::vector<DWARFUnitSection<DWARFTypeUnit>> DWOTUs;
@@ -143,6 +147,9 @@ public:
return DWOCUs[index].get();
}
+ const DWARFUnitIndex &getCUIndex();
+ const DWARFUnitIndex &getTUIndex();
+
/// Get a pointer to the parsed DebugAbbrev object.
const DWARFDebugAbbrev *getDebugAbbrev();
@@ -161,6 +168,9 @@ public:
/// Get a pointer to the parsed frame information object.
const DWARFDebugFrame *getDebugFrame();
+ /// Get a pointer to the parsed DebugMacro object.
+ const DWARFDebugMacro *getDebugMacro();
+
/// Get a pointer to a parsed line table corresponding to a compile unit.
const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu);
@@ -184,6 +194,7 @@ public:
virtual const DWARFSection &getLineSection() = 0;
virtual StringRef getStringSection() = 0;
virtual StringRef getRangeSection() = 0;
+ virtual StringRef getMacinfoSection() = 0;
virtual StringRef getPubNamesSection() = 0;
virtual StringRef getPubTypesSection() = 0;
virtual StringRef getGnuPubNamesSection() = 0;
@@ -203,9 +214,11 @@ public:
virtual const DWARFSection& getAppleTypesSection() = 0;
virtual const DWARFSection& getAppleNamespacesSection() = 0;
virtual const DWARFSection& getAppleObjCSection() = 0;
+ virtual StringRef getCUIndexSection() = 0;
+ virtual StringRef getTUIndexSection() = 0;
static bool isSupportedVersion(unsigned version) {
- return version == 2 || version == 3 || version == 4;
+ return version == 2 || version == 3 || version == 4 || version == 5;
}
private:
/// Return the compile unit that includes an offset (relative to .debug_info).
@@ -232,6 +245,7 @@ class DWARFContextInMemory : public DWARFContext {
DWARFSection LineSection;
StringRef StringSection;
StringRef RangeSection;
+ StringRef MacinfoSection;
StringRef PubNamesSection;
StringRef PubTypesSection;
StringRef GnuPubNamesSection;
@@ -251,6 +265,8 @@ class DWARFContextInMemory : public DWARFContext {
DWARFSection AppleTypesSection;
DWARFSection AppleNamespacesSection;
DWARFSection AppleObjCSection;
+ StringRef CUIndexSection;
+ StringRef TUIndexSection;
SmallVector<SmallString<32>, 4> UncompressedSections;
@@ -268,6 +284,7 @@ public:
const DWARFSection &getLineSection() override { return LineSection; }
StringRef getStringSection() override { return StringSection; }
StringRef getRangeSection() override { return RangeSection; }
+ StringRef getMacinfoSection() override { return MacinfoSection; }
StringRef getPubNamesSection() override { return PubNamesSection; }
StringRef getPubTypesSection() override { return PubTypesSection; }
StringRef getGnuPubNamesSection() override { return GnuPubNamesSection; }
@@ -293,6 +310,8 @@ public:
StringRef getAddrSection() override {
return AddrSection;
}
+ StringRef getCUIndexSection() override { return CUIndexSection; }
+ StringRef getTUIndexSection() override { return TUIndexSection; }
};
}
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 93e7c79..760950b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -196,7 +196,7 @@ public:
// Fills the Result argument with the file and line information
// corresponding to Address. Returns true on success.
- bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
+ bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
DILineInfoSpecifier::FileLineInfoKind Kind,
DILineInfo &Result) const;
@@ -247,7 +247,6 @@ private:
const RelocAddrMap *RelocMap;
LineTableMapTy LineTableMap;
};
-
}
#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
new file mode 100644
index 0000000..f791096
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -0,0 +1,59 @@
+//===-- DWARFDebugMacro.h ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H
+#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Dwarf.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugMacro {
+ /// A single macro entry within a macro list.
+ struct Entry {
+ /// The type of the macro entry.
+ uint32_t Type;
+ union {
+ /// The source line where the macro is defined.
+ uint64_t Line;
+ /// Vendor extension constant value.
+ uint64_t ExtConstant;
+ };
+
+ union {
+ /// The string (name, value) of the macro entry.
+ const char *MacroStr;
+ // An unsigned integer indicating the identity of the source file.
+ uint64_t File;
+ /// Vendor extension string.
+ const char *ExtStr;
+ };
+ };
+
+ typedef SmallVector<Entry, 4> MacroList;
+
+ /// A list of all the macro entries in the debug_macinfo section.
+ MacroList Macros;
+
+public:
+ DWARFDebugMacro() {}
+ /// Print the macro list found within the debug_macinfo section.
+ void dump(raw_ostream &OS) const;
+ /// Parse the debug_macinfo section accessible via the 'data' parameter.
+ void parse(DataExtractor data);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 7ddcc0d..3c32a3e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -84,6 +84,9 @@ public:
const DWARFUnit *u) const;
static bool skipValue(uint16_t form, DataExtractor debug_info_data,
uint32_t *offset_ptr, const DWARFUnit *u);
+ static bool skipValue(uint16_t form, DataExtractor debug_info_data,
+ uint32_t *offset_ptr, uint16_t Version,
+ uint8_t AddrSize);
static ArrayRef<uint8_t> getFixedFormSizes(uint8_t AddrSize,
uint16_t Version);
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index f24e278..894a88d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -21,13 +21,17 @@ private:
public:
DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, bool LE,
- const DWARFUnitSectionBase &UnitSection)
- : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {}
+ StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+ const DWARFUnitSectionBase &UnitSection,
+ const DWARFUnitIndex::Entry *Entry)
+ : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, UnitSection,
+ Entry) {}
uint32_t getHeaderSize() const override {
return DWARFUnit::getHeaderSize() + 12;
}
void dump(raw_ostream &OS);
+ static const DWARFSectionKind Section = DW_SECT_TYPES;
+
protected:
bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override;
};
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 5604b93..681b2aa 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -16,6 +16,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include <vector>
namespace llvm {
@@ -39,28 +40,25 @@ public:
virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0;
void parse(DWARFContext &C, const DWARFSection &Section);
- void parseDWO(DWARFContext &C, const DWARFSection &DWOSection);
+ void parseDWO(DWARFContext &C, const DWARFSection &DWOSection,
+ DWARFUnitIndex *Index = nullptr);
protected:
virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, bool isLittleEndian) = 0;
+ StringRef SOS, StringRef AOS, StringRef LS,
+ bool isLittleEndian) = 0;
~DWARFUnitSectionBase() = default;
};
+const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
+ DWARFSectionKind Kind);
+
/// Concrete instance of DWARFUnitSection, specialized for one Unit type.
template<typename UnitType>
class DWARFUnitSection final : public SmallVector<std::unique_ptr<UnitType>, 1>,
public DWARFUnitSectionBase {
-
- struct UnitOffsetComparator {
- bool operator()(uint32_t LHS,
- const std::unique_ptr<UnitType> &RHS) const {
- return LHS < RHS->getNextUnitOffset();
- }
- };
-
bool Parsed;
public:
@@ -73,8 +71,11 @@ public:
typedef llvm::iterator_range<typename UnitVector::iterator> iterator_range;
UnitType *getUnitForOffset(uint32_t Offset) const override {
- auto *CU = std::upper_bound(this->begin(), this->end(), Offset,
- UnitOffsetComparator());
+ auto *CU = std::upper_bound(
+ this->begin(), this->end(), Offset,
+ [](uint32_t LHS, const std::unique_ptr<UnitType> &RHS) {
+ return LHS < RHS->getNextUnitOffset();
+ });
if (CU != this->end())
return CU->get();
return nullptr;
@@ -83,14 +84,16 @@ public:
private:
void parseImpl(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, bool LE) override {
+ StringRef SOS, StringRef AOS, StringRef LS, bool LE) override {
if (Parsed)
return;
+ const auto &Index = getDWARFUnitIndex(Context, UnitType::Section);
DataExtractor Data(Section.Data, LE, 0);
uint32_t Offset = 0;
while (Data.isValidOffset(Offset)) {
auto U = llvm::make_unique<UnitType>(Context, Section, DA, RS, SS, SOS,
- AOS, LE, *this);
+ AOS, LS, LE, *this,
+ Index.getFromOffset(Offset));
if (!U->extract(Data, &Offset))
break;
this->push_back(std::move(U));
@@ -108,6 +111,7 @@ class DWARFUnit {
const DWARFDebugAbbrev *Abbrev;
StringRef RangeSection;
uint32_t RangeSectionBase;
+ StringRef LineSection;
StringRef StringSection;
StringRef StringOffsetSection;
StringRef AddrOffsetSection;
@@ -134,6 +138,8 @@ class DWARFUnit {
};
std::unique_ptr<DWOHolder> DWO;
+ const DWARFUnitIndex::Entry *IndexEntry;
+
protected:
virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr);
/// Size in bytes of the unit header.
@@ -142,13 +148,15 @@ protected:
public:
DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, bool LE,
- const DWARFUnitSectionBase &UnitSection);
+ StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+ const DWARFUnitSectionBase &UnitSection,
+ const DWARFUnitIndex::Entry *IndexEntry = nullptr);
virtual ~DWARFUnit();
DWARFContext& getContext() const { return Context; }
+ StringRef getLineSection() const { return LineSection; }
StringRef getStringSection() const { return StringSection; }
StringRef getStringOffsetSection() const { return StringOffsetSection; }
void setAddrOffsetSection(StringRef AOS, uint32_t Base) {
@@ -246,12 +254,19 @@ public:
assert(!DieArray.empty());
auto it = std::lower_bound(
DieArray.begin(), DieArray.end(), Offset,
- [=](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) {
+ [](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) {
return LHS.getOffset() < Offset;
});
return it == DieArray.end() ? nullptr : &*it;
}
+ uint32_t getLineTableOffset() const {
+ if (IndexEntry)
+ if (const auto *Contrib = IndexEntry->getOffset(DW_SECT_LINE))
+ return Contrib->Offset;
+ return 0;
+ }
+
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); }
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
new file mode 100644
index 0000000..a85c2f9
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -0,0 +1,81 @@
+//===-- DWARFUnitIndex.h --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_DWARFUNITINDEX_H
+#define LLVM_LIB_DEBUGINFO_DWARFUNITINDEX_H
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+
+enum DWARFSectionKind {
+ DW_SECT_INFO = 1,
+ DW_SECT_TYPES,
+ DW_SECT_ABBREV,
+ DW_SECT_LINE,
+ DW_SECT_LOC,
+ DW_SECT_STR_OFFSETS,
+ DW_SECT_MACINFO,
+ DW_SECT_MACRO,
+};
+
+class DWARFUnitIndex {
+ struct Header {
+ uint32_t Version;
+ uint32_t NumColumns;
+ uint32_t NumUnits;
+ uint32_t NumBuckets = 0;
+
+ bool parse(DataExtractor IndexData, uint32_t *OffsetPtr);
+ void dump(raw_ostream &OS) const;
+ };
+
+public:
+ class Entry {
+ public:
+ struct SectionContribution {
+ uint32_t Offset;
+ uint32_t Length;
+ };
+
+ private:
+ const DWARFUnitIndex *Index;
+ uint64_t Signature;
+ std::unique_ptr<SectionContribution[]> Contributions;
+ friend class DWARFUnitIndex;
+
+ public:
+ const SectionContribution *getOffset(DWARFSectionKind Sec) const;
+ const SectionContribution *getOffset() const;
+ };
+
+private:
+ struct Header Header;
+
+ DWARFSectionKind InfoColumnKind;
+ int InfoColumn = -1;
+ std::unique_ptr<DWARFSectionKind[]> ColumnKinds;
+ std::unique_ptr<Entry[]> Rows;
+
+ static StringRef getColumnHeader(DWARFSectionKind DS);
+ bool parseImpl(DataExtractor IndexData);
+
+public:
+ bool parse(DataExtractor IndexData);
+ DWARFUnitIndex(DWARFSectionKind InfoColumnKind)
+ : InfoColumnKind(InfoColumnKind) {}
+ void dump(raw_ostream &OS) const;
+ const Entry *getFromOffset(uint32_t Offset) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
index 2bb9746..9404a59 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -32,8 +32,7 @@ class PDBContext : public DIContext {
public:
PDBContext(const object::COFFObjectFile &Object,
- std::unique_ptr<IPDBSession> PDBSession,
- bool RelativeAddress);
+ std::unique_ptr<IPDBSession> PDBSession);
static bool classof(const DIContext *DICtx) {
return DICtx->getKind() == CK_PDB;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index 2d19e79..a932a56 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -11,6 +11,7 @@
#define LLVM_DEBUGINFO_PDB_PDBTYPES_H
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Endian.h"
#include <functional>
#include <stdint.h>
@@ -500,6 +501,35 @@ struct Variant {
bool operator!=(const Variant &Other) const { return !(*this == Other); }
};
+namespace PDB {
+static const char Magic[] = {'M', 'i', 'c', 'r', 'o', 's', 'o', 'f',
+ 't', ' ', 'C', '/', 'C', '+', '+', ' ',
+ 'M', 'S', 'F', ' ', '7', '.', '0', '0',
+ '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'};
+
+// The superblock is overlaid at the beginning of the file (offset 0).
+// It starts with a magic header and is followed by information which describes
+// the layout of the file system.
+struct SuperBlock {
+ char MagicBytes[sizeof(Magic)];
+ // The file system is split into a variable number of fixed size elements.
+ // These elements are referred to as blocks. The size of a block may vary
+ // from system to system.
+ support::ulittle32_t BlockSize;
+ // This field's purpose is not yet known.
+ support::ulittle32_t Unknown0;
+ // This contains the number of blocks resident in the file system. In
+ // practice, NumBlocks * BlockSize is equivalent to the size of the PDB file.
+ support::ulittle32_t NumBlocks;
+ // This contains the number of bytes which make up the directory.
+ support::ulittle32_t NumDirectoryBytes;
+ // This field's purpose is not yet known.
+ support::ulittle32_t Unknown1;
+ // This contains the block # of the block map.
+ support::ulittle32_t BlockMapAddr;
+};
+}
+
} // namespace llvm
namespace std {
@@ -513,4 +543,5 @@ template <> struct hash<llvm::PDB_SymType> {
};
}
+
#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
new file mode 100644
index 0000000..0703fb1
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -0,0 +1,47 @@
+//===- llvm/DebugInfo/Symbolize/DIPrinter.h ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DIPrinter class, which is responsible for printing
+// structures defined in DebugInfo/DIContext.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_DIPRINTER_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_DIPRINTER_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+struct DILineInfo;
+class DIInliningInfo;
+struct DIGlobal;
+
+namespace symbolize {
+
+class DIPrinter {
+ raw_ostream &OS;
+ bool PrintFunctionNames;
+ bool PrintPretty;
+ void printName(const DILineInfo &Info, bool Inlined);
+
+public:
+ DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true,
+ bool PrintPretty = false)
+ : OS(OS), PrintFunctionNames(PrintFunctionNames),
+ PrintPretty(PrintPretty) {}
+
+ DIPrinter &operator<<(const DILineInfo &Info);
+ DIPrinter &operator<<(const DIInliningInfo &Info);
+ DIPrinter &operator<<(const DIGlobal &Global);
+};
+}
+}
+
+#endif
+
diff --git a/contrib/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/contrib/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
new file mode 100644
index 0000000..ff9cc80
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
@@ -0,0 +1,53 @@
+//===-- SymbolizableModule.h ------------------------------------ C++ -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolizableModule interface.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H
+
+#include "llvm/DebugInfo/DIContext.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace object {
+class ObjectFile;
+}
+}
+
+namespace llvm {
+namespace symbolize {
+
+using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
+
+class SymbolizableModule {
+public:
+ virtual ~SymbolizableModule() {}
+ virtual DILineInfo symbolizeCode(uint64_t ModuleOffset,
+ FunctionNameKind FNKind,
+ bool UseSymbolTable) const = 0;
+ virtual DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
+ FunctionNameKind FNKind,
+ bool UseSymbolTable) const = 0;
+ virtual DIGlobal symbolizeData(uint64_t ModuleOffset) const = 0;
+
+ // Return true if this is a 32-bit x86 PE COFF module.
+ virtual bool isWin32Module() const = 0;
+
+ // Returns the preferred base of the module, i.e. where the loader would place
+ // it in memory assuming there were no conflicts.
+ virtual uint64_t getModulePreferredBase() const = 0;
+};
+
+} // namespace symbolize
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/contrib/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
new file mode 100644
index 0000000..ec3ae00
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -0,0 +1,105 @@
+//===-- Symbolize.h --------------------------------------------- C++ -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Header for LLVM symbolization library.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
+
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ErrorOr.h"
+#include <map>
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace symbolize {
+
+using namespace object;
+using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
+
+class LLVMSymbolizer {
+public:
+ struct Options {
+ FunctionNameKind PrintFunctions;
+ bool UseSymbolTable : 1;
+ bool Demangle : 1;
+ bool RelativeAddresses : 1;
+ std::string DefaultArch;
+ std::vector<std::string> DsymHints;
+ Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
+ bool UseSymbolTable = true, bool Demangle = true,
+ bool RelativeAddresses = false, std::string DefaultArch = "")
+ : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable),
+ Demangle(Demangle), RelativeAddresses(RelativeAddresses),
+ DefaultArch(DefaultArch) {}
+ };
+
+ LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
+ ~LLVMSymbolizer() {
+ flush();
+ }
+
+ ErrorOr<DILineInfo> symbolizeCode(const std::string &ModuleName,
+ uint64_t ModuleOffset);
+ ErrorOr<DIInliningInfo> symbolizeInlinedCode(const std::string &ModuleName,
+ uint64_t ModuleOffset);
+ ErrorOr<DIGlobal> symbolizeData(const std::string &ModuleName,
+ uint64_t ModuleOffset);
+ void flush();
+ static std::string DemangleName(const std::string &Name,
+ const SymbolizableModule *ModInfo);
+
+private:
+ // Bundles together object file with code/data and object file with
+ // corresponding debug info. These objects can be the same.
+ typedef std::pair<ObjectFile*, ObjectFile*> ObjectPair;
+
+ ErrorOr<SymbolizableModule *>
+ getOrCreateModuleInfo(const std::string &ModuleName);
+ ObjectFile *lookUpDsymFile(const std::string &Path,
+ const MachOObjectFile *ExeObj,
+ const std::string &ArchName);
+ ObjectFile *lookUpDebuglinkObject(const std::string &Path,
+ const ObjectFile *Obj,
+ const std::string &ArchName);
+
+ /// \brief Returns pair of pointers to object and debug object.
+ ErrorOr<ObjectPair> getOrCreateObjectPair(const std::string &Path,
+ const std::string &ArchName);
+
+ /// \brief Return a pointer to object file at specified path, for a specified
+ /// architecture (e.g. if path refers to a Mach-O universal binary, only one
+ /// object file from it will be returned).
+ ErrorOr<ObjectFile *> getOrCreateObject(const std::string &Path,
+ const std::string &ArchName);
+
+ std::map<std::string, ErrorOr<std::unique_ptr<SymbolizableModule>>> Modules;
+
+ /// \brief Contains cached results of getOrCreateObjectPair().
+ std::map<std::pair<std::string, std::string>, ErrorOr<ObjectPair>>
+ ObjectPairForPathArch;
+
+ /// \brief Contains parsed binary for each path, or parsing error.
+ std::map<std::string, ErrorOr<OwningBinary<Binary>>> BinaryForPath;
+
+ /// \brief Parsed object file for path/architecture pair, where "path" refers
+ /// to Mach-O universal binary.
+ std::map<std::pair<std::string, std::string>, ErrorOr<std::unique_ptr<ObjectFile>>>
+ ObjectForUBPathAndArch;
+
+ Options Opts;
+};
+
+} // namespace symbolize
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 821c018..a730260 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -104,7 +104,12 @@ class ExecutionEngine {
ExecutionEngineState EEState;
/// The target data for the platform for which execution is being performed.
- const DataLayout *DL;
+ ///
+ /// Note: the DataLayout is LLVMContext specific because it has an
+ /// internal cache based on type pointers. It makes unsafe to reuse the
+ /// ExecutionEngine across context, we don't enforce this rule but undefined
+ /// behavior can occurs if the user tries to do it.
+ const DataLayout DL;
/// Whether lazy JIT compilation is enabled.
bool CompilingLazily;
@@ -126,8 +131,6 @@ protected:
/// optimize for the case where there is only one module.
SmallVector<std::unique_ptr<Module>, 1> Modules;
- void setDataLayout(const DataLayout *Val) { DL = Val; }
-
/// getMemoryforGV - Allocate memory for a global variable.
virtual char *getMemoryForGV(const GlobalVariable *GV);
@@ -194,7 +197,7 @@ public:
//===--------------------------------------------------------------------===//
- const DataLayout *getDataLayout() const { return DL; }
+ const DataLayout &getDataLayout() const { return DL; }
/// removeModule - Remove a Module from the list of modules. Returns true if
/// M is found.
@@ -478,7 +481,8 @@ public:
}
protected:
- ExecutionEngine() {}
+ ExecutionEngine(const DataLayout DL) : DL(std::move(DL)){}
+ explicit ExecutionEngine(DataLayout DL, std::unique_ptr<Module> M);
explicit ExecutionEngine(std::unique_ptr<Module> M);
void emitGlobals();
@@ -488,6 +492,9 @@ protected:
GenericValue getConstantValue(const Constant *C);
void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr,
Type *Ty);
+
+private:
+ void Init(std::unique_ptr<Module> M);
};
namespace EngineKind {
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Interpreter.h b/contrib/llvm/include/llvm/ExecutionEngine/Interpreter.h
index f49d0c4..a147078 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Interpreter.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Interpreter.h
@@ -16,22 +16,12 @@
#define LLVM_EXECUTIONENGINE_INTERPRETER_H
#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include <cstdlib>
extern "C" void LLVMLinkInInterpreter();
namespace {
struct ForceInterpreterLinking {
- ForceInterpreterLinking() {
- // We must reference the interpreter in such a way that compilers will not
- // delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough
- // to know that getenv() never returns -1, this will do the job.
- if (std::getenv("bar") != (char*) -1)
- return;
-
- LLVMLinkInInterpreter();
- }
+ ForceInterpreterLinking() { LLVMLinkInInterpreter(); }
} ForceInterpreterLinking;
}
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 9694b80..7dab5d1 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -22,6 +22,7 @@
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <list>
+#include <memory>
#include <set>
#include "llvm/Support/Debug.h"
@@ -36,56 +37,89 @@ namespace orc {
/// added to the layer below. When a stub is called it triggers the extraction
/// of the function body from the original module. The extracted body is then
/// compiled and executed.
-template <typename BaseLayerT, typename CompileCallbackMgrT,
- typename PartitioningFtor =
- std::function<std::set<Function*>(Function&)>>
+template <typename BaseLayerT,
+ typename CompileCallbackMgrT = JITCompileCallbackManager,
+ typename IndirectStubsMgrT = IndirectStubsManager>
class CompileOnDemandLayer {
private:
- // Utility class for MapValue. Only materializes declarations for global
- // variables.
- class GlobalDeclMaterializer : public ValueMaterializer {
+ template <typename MaterializerFtor>
+ class LambdaMaterializer final : public ValueMaterializer {
public:
- typedef std::set<const Function*> StubSet;
+ LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {}
+ Value *materializeDeclFor(Value *V) final { return M(V); }
- GlobalDeclMaterializer(Module &Dst, const StubSet *StubsToClone = nullptr)
- : Dst(Dst), StubsToClone(StubsToClone) {}
-
- Value* materializeValueFor(Value *V) final {
- if (auto *GV = dyn_cast<GlobalVariable>(V))
- return cloneGlobalVariableDecl(Dst, *GV);
- else if (auto *F = dyn_cast<Function>(V)) {
- auto *ClonedF = cloneFunctionDecl(Dst, *F);
- if (StubsToClone && StubsToClone->count(F)) {
- GlobalVariable *FnBodyPtr =
- createImplPointer(*ClonedF->getType(), *ClonedF->getParent(),
- ClonedF->getName() + "$orc_addr", nullptr);
- makeStub(*ClonedF, *FnBodyPtr);
- ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage);
- ClonedF->addFnAttr(Attribute::AlwaysInline);
- }
- return ClonedF;
- }
- // Else.
- return nullptr;
- }
private:
- Module &Dst;
- const StubSet *StubsToClone;
+ MaterializerFtor M;
};
+ template <typename MaterializerFtor>
+ LambdaMaterializer<MaterializerFtor>
+ createLambdaMaterializer(MaterializerFtor M) {
+ return LambdaMaterializer<MaterializerFtor>(std::move(M));
+ }
+
typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
+ class ModuleOwner {
+ public:
+ ModuleOwner() = default;
+ ModuleOwner(const ModuleOwner&) = delete;
+ ModuleOwner& operator=(const ModuleOwner&) = delete;
+ virtual ~ModuleOwner() { }
+ virtual Module& getModule() const = 0;
+ };
+
+ template <typename ModulePtrT>
+ class ModuleOwnerImpl : public ModuleOwner {
+ public:
+ ModuleOwnerImpl(ModulePtrT ModulePtr) : ModulePtr(std::move(ModulePtr)) {}
+ Module& getModule() const override { return *ModulePtr; }
+ private:
+ ModulePtrT ModulePtr;
+ };
+
+ template <typename ModulePtrT>
+ std::unique_ptr<ModuleOwner> wrapOwnership(ModulePtrT ModulePtr) {
+ return llvm::make_unique<ModuleOwnerImpl<ModulePtrT>>(std::move(ModulePtr));
+ }
+
struct LogicalModuleResources {
- std::shared_ptr<Module> SourceModule;
+ std::unique_ptr<ModuleOwner> SourceModuleOwner;
std::set<const Function*> StubsToClone;
+ std::unique_ptr<IndirectStubsMgrT> StubsMgr;
+
+ LogicalModuleResources() = default;
+
+ // Explicit move constructor to make MSVC happy.
+ LogicalModuleResources(LogicalModuleResources &&Other)
+ : SourceModuleOwner(std::move(Other.SourceModuleOwner)),
+ StubsToClone(std::move(Other.StubsToClone)),
+ StubsMgr(std::move(Other.StubsMgr)) {}
+
+ // Explicit move assignment to make MSVC happy.
+ LogicalModuleResources& operator=(LogicalModuleResources &&Other) {
+ SourceModuleOwner = std::move(Other.SourceModuleOwner);
+ StubsToClone = std::move(Other.StubsToClone);
+ StubsMgr = std::move(Other.StubsMgr);
+ }
+
+ JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
+ if (Name.endswith("$stub_ptr") && !ExportedSymbolsOnly) {
+ assert(!ExportedSymbolsOnly && "Stubs are never exported");
+ return StubsMgr->findPointer(Name.drop_back(9));
+ }
+ return StubsMgr->findStub(Name, ExportedSymbolsOnly);
+ }
+
};
+
+
struct LogicalDylibResources {
typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)>
SymbolResolverFtor;
SymbolResolverFtor ExternalSymbolResolver;
- PartitioningFtor Partitioner;
};
typedef LogicalDylib<BaseLayerT, LogicalModuleResources,
@@ -95,13 +129,25 @@ private:
typedef std::list<CODLogicalDylib> LogicalDylibList;
public:
+
/// @brief Handle to a set of loaded modules.
typedef typename LogicalDylibList::iterator ModuleSetHandleT;
+ /// @brief Module partitioning functor.
+ typedef std::function<std::set<Function*>(Function&)> PartitioningFtor;
+
+ /// @brief Builder for IndirectStubsManagers.
+ typedef std::function<std::unique_ptr<IndirectStubsMgrT>()>
+ IndirectStubsManagerBuilderT;
+
/// @brief Construct a compile-on-demand layer instance.
- CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr,
- bool CloneStubsIntoPartitions)
- : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr),
+ CompileOnDemandLayer(BaseLayerT &BaseLayer, PartitioningFtor Partition,
+ CompileCallbackMgrT &CallbackMgr,
+ IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+ bool CloneStubsIntoPartitions = true)
+ : BaseLayer(BaseLayer), Partition(Partition),
+ CompileCallbackMgr(CallbackMgr),
+ CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
/// @brief Add a module to the compile-on-demand layer.
@@ -122,17 +168,9 @@ public:
return Resolver->findSymbol(Name);
};
- LDResources.Partitioner =
- [](Function &F) {
- std::set<Function*> Partition;
- Partition.insert(&F);
- return Partition;
- };
-
// Process each of the modules in this module set.
for (auto &M : Ms)
- addLogicalModule(LogicalDylibs.back(),
- std::shared_ptr<Module>(std::move(M)));
+ addLogicalModule(LogicalDylibs.back(), std::move(M));
return std::prev(LogicalDylibs.end());
}
@@ -150,6 +188,10 @@ public:
/// @param ExportedSymbolsOnly If true, search only for exported symbols.
/// @return A handle for the given named symbol, if it exists.
JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
+ for (auto LDI = LogicalDylibs.begin(), LDE = LogicalDylibs.end();
+ LDI != LDE; ++LDI)
+ if (auto Symbol = findSymbolIn(LDI, Name, ExportedSymbolsOnly))
+ return Symbol;
return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
}
@@ -162,85 +204,138 @@ public:
private:
- void addLogicalModule(CODLogicalDylib &LD, std::shared_ptr<Module> SrcM) {
+ template <typename ModulePtrT>
+ void addLogicalModule(CODLogicalDylib &LD, ModulePtrT SrcMPtr) {
// Bump the linkage and rename any anonymous/privote members in SrcM to
// ensure that everything will resolve properly after we partition SrcM.
- makeAllSymbolsExternallyAccessible(*SrcM);
+ makeAllSymbolsExternallyAccessible(*SrcMPtr);
// Create a logical module handle for SrcM within the logical dylib.
auto LMH = LD.createLogicalModule();
auto &LMResources = LD.getLogicalModuleResources(LMH);
- LMResources.SourceModule = SrcM;
- // Create the GVs-and-stubs module.
- auto GVsAndStubsM = llvm::make_unique<Module>(
- (SrcM->getName() + ".globals_and_stubs").str(),
- SrcM->getContext());
- GVsAndStubsM->setDataLayout(SrcM->getDataLayout());
- ValueToValueMapTy VMap;
+ LMResources.SourceModuleOwner = wrapOwnership(std::move(SrcMPtr));
- // Process module and create stubs.
- // We create the stubs before copying the global variables as we know the
- // stubs won't refer to any globals (they only refer to their implementation
- // pointer) so there's no ordering/value-mapping issues.
- for (auto &F : *SrcM) {
-
- // Skip declarations.
- if (F.isDeclaration())
- continue;
-
- // Record all functions defined by this module.
- if (CloneStubsIntoPartitions)
- LMResources.StubsToClone.insert(&F);
-
- // For each definition: create a callback, a stub, and a function body
- // pointer. Initialize the function body pointer to point at the callback,
- // and set the callback to compile the function body.
- auto CCInfo = CompileCallbackMgr.getCompileCallback(SrcM->getContext());
- Function *StubF = cloneFunctionDecl(*GVsAndStubsM, F, &VMap);
- GlobalVariable *FnBodyPtr =
- createImplPointer(*StubF->getType(), *StubF->getParent(),
- StubF->getName() + "$orc_addr",
- createIRTypedAddress(*StubF->getFunctionType(),
- CCInfo.getAddress()));
- makeStub(*StubF, *FnBodyPtr);
- CCInfo.setCompileAction(
- [this, &LD, LMH, &F]() {
+ Module &SrcM = LMResources.SourceModuleOwner->getModule();
+
+ // Create the GlobalValues module.
+ const DataLayout &DL = SrcM.getDataLayout();
+ auto GVsM = llvm::make_unique<Module>((SrcM.getName() + ".globals").str(),
+ SrcM.getContext());
+ GVsM->setDataLayout(DL);
+
+ // Create function stubs.
+ ValueToValueMapTy VMap;
+ {
+ typename IndirectStubsMgrT::StubInitsMap StubInits;
+ for (auto &F : SrcM) {
+ // Skip declarations.
+ if (F.isDeclaration())
+ continue;
+
+ // Record all functions defined by this module.
+ if (CloneStubsIntoPartitions)
+ LMResources.StubsToClone.insert(&F);
+
+ // Create a callback, associate it with the stub for the function,
+ // and set the compile action to compile the partition containing the
+ // function.
+ auto CCInfo = CompileCallbackMgr.getCompileCallback();
+ StubInits[mangle(F.getName(), DL)] =
+ std::make_pair(CCInfo.getAddress(),
+ JITSymbolBase::flagsFromGlobalValue(F));
+ CCInfo.setCompileAction([this, &LD, LMH, &F]() {
return this->extractAndCompile(LD, LMH, F);
});
+ }
+
+ LMResources.StubsMgr = CreateIndirectStubsManager();
+ auto EC = LMResources.StubsMgr->createStubs(StubInits);
+ (void)EC;
+ // FIXME: This should be propagated back to the user. Stub creation may
+ // fail for remote JITs.
+ assert(!EC && "Error generating stubs");
}
- // Now clone the global variable declarations.
- GlobalDeclMaterializer GDMat(*GVsAndStubsM);
- for (auto &GV : SrcM->globals())
- if (!GV.isDeclaration())
- cloneGlobalVariableDecl(*GVsAndStubsM, GV, &VMap);
+ // Clone global variable decls.
+ for (auto &GV : SrcM.globals())
+ if (!GV.isDeclaration() && !VMap.count(&GV))
+ cloneGlobalVariableDecl(*GVsM, GV, &VMap);
+
+ // And the aliases.
+ for (auto &A : SrcM.aliases())
+ if (!VMap.count(&A))
+ cloneGlobalAliasDecl(*GVsM, A, VMap);
+
+ // Now we need to clone the GV and alias initializers.
+
+ // Initializers may refer to functions declared (but not defined) in this
+ // module. Build a materializer to clone decls on demand.
+ auto Materializer = createLambdaMaterializer(
+ [this, &GVsM, &LMResources](Value *V) -> Value* {
+ if (auto *F = dyn_cast<Function>(V)) {
+ // Decls in the original module just get cloned.
+ if (F->isDeclaration())
+ return cloneFunctionDecl(*GVsM, *F);
+
+ // Definitions in the original module (which we have emitted stubs
+ // for at this point) get turned into a constant alias to the stub
+ // instead.
+ const DataLayout &DL = GVsM->getDataLayout();
+ std::string FName = mangle(F->getName(), DL);
+ auto StubSym = LMResources.StubsMgr->findStub(FName, false);
+ unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType());
+ ConstantInt *StubAddr =
+ ConstantInt::get(GVsM->getContext(),
+ APInt(PtrBitWidth, StubSym.getAddress()));
+ Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr,
+ StubAddr, F->getType());
+ return GlobalAlias::create(F->getFunctionType(),
+ F->getType()->getAddressSpace(),
+ F->getLinkage(), F->getName(),
+ Init, GVsM.get());
+ }
+ // else....
+ return nullptr;
+ });
- // Then clone the initializers.
- for (auto &GV : SrcM->globals())
+ // Clone the global variable initializers.
+ for (auto &GV : SrcM.globals())
if (!GV.isDeclaration())
- moveGlobalVariableInitializer(GV, VMap, &GDMat);
+ moveGlobalVariableInitializer(GV, VMap, &Materializer);
+
+ // Clone the global alias initializers.
+ for (auto &A : SrcM.aliases()) {
+ auto *NewA = cast<GlobalAlias>(VMap[&A]);
+ assert(NewA && "Alias not cloned?");
+ Value *Init = MapValue(A.getAliasee(), VMap, RF_None, nullptr,
+ &Materializer);
+ NewA->setAliasee(cast<Constant>(Init));
+ }
- // Build a resolver for the stubs module and add it to the base layer.
- auto GVsAndStubsResolver = createLambdaResolver(
- [&LD](const std::string &Name) {
+ // Build a resolver for the globals module and add it to the base layer.
+ auto GVsResolver = createLambdaResolver(
+ [&LD, LMH](const std::string &Name) {
+ auto &LMResources = LD.getLogicalModuleResources(LMH);
+ if (auto Sym = LMResources.StubsMgr->findStub(Name, false))
+ return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
return LD.getDylibResources().ExternalSymbolResolver(Name);
},
[](const std::string &Name) {
return RuntimeDyld::SymbolInfo(nullptr);
});
- std::vector<std::unique_ptr<Module>> GVsAndStubsMSet;
- GVsAndStubsMSet.push_back(std::move(GVsAndStubsM));
- auto GVsAndStubsH =
- BaseLayer.addModuleSet(std::move(GVsAndStubsMSet),
+ std::vector<std::unique_ptr<Module>> GVsMSet;
+ GVsMSet.push_back(std::move(GVsM));
+ auto GVsH =
+ BaseLayer.addModuleSet(std::move(GVsMSet),
llvm::make_unique<SectionMemoryManager>(),
- std::move(GVsAndStubsResolver));
- LD.addToLogicalModule(LMH, GVsAndStubsH);
+ std::move(GVsResolver));
+ LD.addToLogicalModule(LMH, GVsH);
}
- static std::string Mangle(StringRef Name, const DataLayout &DL) {
+ static std::string mangle(StringRef Name, const DataLayout &DL) {
std::string MangledName;
{
raw_string_ostream MangledNameStream(MangledName);
@@ -252,42 +347,35 @@ private:
TargetAddress extractAndCompile(CODLogicalDylib &LD,
LogicalModuleHandle LMH,
Function &F) {
- Module &SrcM = *LD.getLogicalModuleResources(LMH).SourceModule;
+ auto &LMResources = LD.getLogicalModuleResources(LMH);
+ Module &SrcM = LMResources.SourceModuleOwner->getModule();
// If F is a declaration we must already have compiled it.
if (F.isDeclaration())
return 0;
// Grab the name of the function being called here.
- std::string CalledFnName = Mangle(F.getName(), SrcM.getDataLayout());
+ std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout());
- auto Partition = LD.getDylibResources().Partitioner(F);
- auto PartitionH = emitPartition(LD, LMH, Partition);
+ auto Part = Partition(F);
+ auto PartH = emitPartition(LD, LMH, Part);
TargetAddress CalledAddr = 0;
- for (auto *SubF : Partition) {
- std::string FName = SubF->getName();
- auto FnBodySym =
- BaseLayer.findSymbolIn(PartitionH, Mangle(FName, SrcM.getDataLayout()),
- false);
- auto FnPtrSym =
- BaseLayer.findSymbolIn(*LD.moduleHandlesBegin(LMH),
- Mangle(FName + "$orc_addr",
- SrcM.getDataLayout()),
- false);
+ for (auto *SubF : Part) {
+ std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout());
+ auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false);
assert(FnBodySym && "Couldn't find function body.");
- assert(FnPtrSym && "Couldn't find function body pointer.");
TargetAddress FnBodyAddr = FnBodySym.getAddress();
- void *FnPtrAddr = reinterpret_cast<void*>(
- static_cast<uintptr_t>(FnPtrSym.getAddress()));
// If this is the function we're calling record the address so we can
// return it from this function.
if (SubF == &F)
CalledAddr = FnBodyAddr;
- memcpy(FnPtrAddr, &FnBodyAddr, sizeof(uintptr_t));
+ // Update the function body pointer for the stub.
+ if (auto EC = LMResources.StubsMgr->updatePointer(FnName, FnBodyAddr))
+ return 0;
}
return CalledAddr;
@@ -296,13 +384,13 @@ private:
template <typename PartitionT>
BaseLayerModuleSetHandleT emitPartition(CODLogicalDylib &LD,
LogicalModuleHandle LMH,
- const PartitionT &Partition) {
+ const PartitionT &Part) {
auto &LMResources = LD.getLogicalModuleResources(LMH);
- Module &SrcM = *LMResources.SourceModule;
+ Module &SrcM = LMResources.SourceModuleOwner->getModule();
// Create the module.
std::string NewName = SrcM.getName();
- for (auto *F : Partition) {
+ for (auto *F : Part) {
NewName += ".";
NewName += F->getName();
}
@@ -310,15 +398,51 @@ private:
auto M = llvm::make_unique<Module>(NewName, SrcM.getContext());
M->setDataLayout(SrcM.getDataLayout());
ValueToValueMapTy VMap;
- GlobalDeclMaterializer GDM(*M, &LMResources.StubsToClone);
+
+ auto Materializer = createLambdaMaterializer([this, &LMResources, &M,
+ &VMap](Value *V) -> Value * {
+ if (auto *GV = dyn_cast<GlobalVariable>(V))
+ return cloneGlobalVariableDecl(*M, *GV);
+
+ if (auto *F = dyn_cast<Function>(V)) {
+ // Check whether we want to clone an available_externally definition.
+ if (!LMResources.StubsToClone.count(F))
+ return cloneFunctionDecl(*M, *F);
+
+ // Ok - we want an inlinable stub. For that to work we need a decl
+ // for the stub pointer.
+ auto *StubPtr = createImplPointer(*F->getType(), *M,
+ F->getName() + "$stub_ptr", nullptr);
+ auto *ClonedF = cloneFunctionDecl(*M, *F);
+ makeStub(*ClonedF, *StubPtr);
+ ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ ClonedF->addFnAttr(Attribute::AlwaysInline);
+ return ClonedF;
+ }
+
+ if (auto *A = dyn_cast<GlobalAlias>(V)) {
+ auto *Ty = A->getValueType();
+ if (Ty->isFunctionTy())
+ return Function::Create(cast<FunctionType>(Ty),
+ GlobalValue::ExternalLinkage, A->getName(),
+ M.get());
+
+ return new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, A->getName(), nullptr,
+ GlobalValue::NotThreadLocal,
+ A->getType()->getAddressSpace());
+ }
+
+ return nullptr;
+ });
// Create decls in the new module.
- for (auto *F : Partition)
+ for (auto *F : Part)
cloneFunctionDecl(*M, *F, &VMap);
// Move the function bodies.
- for (auto *F : Partition)
- moveFunctionBody(*F, VMap, &GDM);
+ for (auto *F : Part)
+ moveFunctionBody(*F, VMap, &Materializer);
// Create memory manager and symbol resolver.
auto MemMgr = llvm::make_unique<SectionMemoryManager>();
@@ -342,7 +466,10 @@ private:
}
BaseLayerT &BaseLayer;
+ PartitioningFtor Partition;
CompileCallbackMgrT &CompileCallbackMgr;
+ IndirectStubsManagerBuilderT CreateIndirectStubsManager;
+
LogicalDylibList LogicalDylibs;
bool CloneStubsIntoPartitions;
};
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index 49a1fba..1e7d211 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -40,7 +40,6 @@ public:
if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
llvm_unreachable("Target does not support MC emission.");
PM.run(M);
- ObjStream.flush();
std::unique_ptr<MemoryBuffer> ObjBuffer(
new ObjectMemoryBuffer(std::move(ObjBufferSV)));
ErrorOr<std::unique_ptr<object::ObjectFile>> Obj =
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
new file mode 100644
index 0000000..9fa222c
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
@@ -0,0 +1,108 @@
+//===---- GlobalMappingLayer.h - Run all IR through a functor ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convenience layer for injecting symbols that will appear in calls to
+// findSymbol.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
+#define LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
+
+#include "JITSymbol.h"
+#include <map>
+
+namespace llvm {
+namespace orc {
+
+/// @brief Global mapping layer.
+///
+/// This layer overrides the findSymbol method to first search a local symbol
+/// table that the client can define. It can be used to inject new symbol
+/// mappings into the JIT. Beware, however: symbols within a single IR module or
+/// object file will still resolve locally (via RuntimeDyld's symbol table) -
+/// such internal references cannot be overriden via this layer.
+template <typename BaseLayerT>
+class GlobalMappingLayer {
+public:
+ /// @brief Handle to a set of added modules.
+ typedef typename BaseLayerT::ModuleSetHandleT ModuleSetHandleT;
+
+ /// @brief Construct an GlobalMappingLayer with the given BaseLayer
+ GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
+
+ /// @brief Add the given module set to the JIT.
+ /// @return A handle for the added modules.
+ template <typename ModuleSetT, typename MemoryManagerPtrT,
+ typename SymbolResolverPtrT>
+ ModuleSetHandleT addModuleSet(ModuleSetT Ms,
+ MemoryManagerPtrT MemMgr,
+ SymbolResolverPtrT Resolver) {
+ return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr),
+ std::move(Resolver));
+ }
+
+ /// @brief Remove the module set associated with the handle H.
+ void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); }
+
+ /// @brief Manually set the address to return for the given symbol.
+ void setGlobalMapping(const std::string &Name, TargetAddress Addr) {
+ SymbolTable[Name] = Addr;
+ }
+
+ /// @brief Remove the given symbol from the global mapping.
+ void eraseGlobalMapping(const std::string &Name) {
+ SymbolTable.erase(Name);
+ }
+
+ /// @brief Search for the given named symbol.
+ ///
+ /// This method will first search the local symbol table, returning
+ /// any symbol found there. If the symbol is not found in the local
+ /// table then this call will be passed through to the base layer.
+ ///
+ /// @param Name The name of the symbol to search for.
+ /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+ /// @return A handle for the given named symbol, if it exists.
+ JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+ auto I = SymbolTable.find(Name);
+ if (I != SymbolTable.end())
+ return JITSymbol(I->second, JITSymbolFlags::Exported);
+ return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
+ }
+
+ /// @brief Get the address of the given symbol in the context of the set of
+ /// modules represented by the handle H. This call is forwarded to the
+ /// base layer's implementation.
+ /// @param H The handle for the module set to search in.
+ /// @param Name The name of the symbol to search for.
+ /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+ /// @return A handle for the given named symbol, if it is found in the
+ /// given module set.
+ JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name,
+ bool ExportedSymbolsOnly) {
+ return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly);
+ }
+
+ /// @brief Immediately emit and finalize the module set represented by the
+ /// given handle.
+ /// @param H Handle for module set to emit/finalize.
+ void emitAndFinalize(ModuleSetHandleT H) {
+ BaseLayer.emitAndFinalize(H);
+ }
+
+private:
+ BaseLayerT &BaseLayer;
+ std::map<std::string, TargetAddress> SymbolTable;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 6379022..e4bed95 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -85,8 +85,6 @@ public:
ModuleSetHandleT H =
BaseLayer.addObjectSet(Objects, std::move(MemMgr), std::move(Resolver));
- BaseLayer.takeOwnershipOfBuffers(H, std::move(Buffers));
-
return H;
}
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 4b7fc5e..d6ee3a8 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -27,9 +27,8 @@
namespace llvm {
namespace orc {
-/// @brief Base class for JITLayer independent aspects of
-/// JITCompileCallbackManager.
-class JITCompileCallbackManagerBase {
+/// @brief Target-independent base class for compile callback management.
+class JITCompileCallbackManager {
public:
typedef std::function<TargetAddress()> CompileFtor;
@@ -51,18 +50,13 @@ public:
CompileFtor &Compile;
};
- /// @brief Construct a JITCompileCallbackManagerBase.
+ /// @brief Construct a JITCompileCallbackManager.
/// @param ErrorHandlerAddress The address of an error handler in the target
/// process to be used if a compile callback fails.
- /// @param NumTrampolinesPerBlock Number of trampolines to emit if there is no
- /// available trampoline when getCompileCallback is
- /// called.
- JITCompileCallbackManagerBase(TargetAddress ErrorHandlerAddress,
- unsigned NumTrampolinesPerBlock)
- : ErrorHandlerAddress(ErrorHandlerAddress),
- NumTrampolinesPerBlock(NumTrampolinesPerBlock) {}
+ JITCompileCallbackManager(TargetAddress ErrorHandlerAddress)
+ : ErrorHandlerAddress(ErrorHandlerAddress) {}
- virtual ~JITCompileCallbackManagerBase() {}
+ virtual ~JITCompileCallbackManager() {}
/// @brief Execute the callback for the given trampoline id. Called by the JIT
/// to compile functions on demand.
@@ -90,7 +84,11 @@ public:
}
/// @brief Reserve a compile callback.
- virtual CompileCallbackInfo getCompileCallback(LLVMContext &Context) = 0;
+ CompileCallbackInfo getCompileCallback() {
+ TargetAddress TrampolineAddr = getAvailableTrampolineAddr();
+ auto &Compile = this->ActiveTrampolines[TrampolineAddr];
+ return CompileCallbackInfo(TrampolineAddr, Compile);
+ }
/// @brief Get a CompileCallbackInfo for an existing callback.
CompileCallbackInfo getCompileCallbackInfo(TargetAddress TrampolineAddr) {
@@ -113,113 +111,229 @@ public:
protected:
TargetAddress ErrorHandlerAddress;
- unsigned NumTrampolinesPerBlock;
typedef std::map<TargetAddress, CompileFtor> TrampolineMapT;
TrampolineMapT ActiveTrampolines;
std::vector<TargetAddress> AvailableTrampolines;
+
+private:
+
+ TargetAddress getAvailableTrampolineAddr() {
+ if (this->AvailableTrampolines.empty())
+ grow();
+ assert(!this->AvailableTrampolines.empty() &&
+ "Failed to grow available trampolines.");
+ TargetAddress TrampolineAddr = this->AvailableTrampolines.back();
+ this->AvailableTrampolines.pop_back();
+ return TrampolineAddr;
+ }
+
+ // Create new trampolines - to be implemented in subclasses.
+ virtual void grow() = 0;
+
+ virtual void anchor();
};
-/// @brief Manage compile callbacks.
-template <typename JITLayerT, typename TargetT>
-class JITCompileCallbackManager : public JITCompileCallbackManagerBase {
+/// @brief Manage compile callbacks for in-process JITs.
+template <typename TargetT>
+class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
public:
- /// @brief Construct a JITCompileCallbackManager.
- /// @param JIT JIT layer to emit callback trampolines, etc. into.
- /// @param Context LLVMContext to use for trampoline & resolve block modules.
+ /// @brief Construct a InProcessJITCompileCallbackManager.
/// @param ErrorHandlerAddress The address of an error handler in the target
/// process to be used if a compile callback fails.
- /// @param NumTrampolinesPerBlock Number of trampolines to allocate whenever
- /// there is no existing callback trampoline.
- /// (Trampolines are allocated in blocks for
- /// efficiency.)
- JITCompileCallbackManager(JITLayerT &JIT, RuntimeDyld::MemoryManager &MemMgr,
- LLVMContext &Context,
- TargetAddress ErrorHandlerAddress,
- unsigned NumTrampolinesPerBlock)
- : JITCompileCallbackManagerBase(ErrorHandlerAddress,
- NumTrampolinesPerBlock),
- JIT(JIT), MemMgr(MemMgr) {
- emitResolverBlock(Context);
+ LocalJITCompileCallbackManager(TargetAddress ErrorHandlerAddress)
+ : JITCompileCallbackManager(ErrorHandlerAddress) {
+
+ /// Set up the resolver block.
+ std::error_code EC;
+ ResolverBlock =
+ sys::OwningMemoryBlock(
+ sys::Memory::allocateMappedMemory(TargetT::ResolverCodeSize, nullptr,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE, EC));
+ assert(!EC && "Failed to allocate resolver block");
+
+ TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
+ &reenter, this);
+
+ EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
+ sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ assert(!EC && "Failed to mprotect resolver block");
}
- /// @brief Get/create a compile callback with the given signature.
- CompileCallbackInfo getCompileCallback(LLVMContext &Context) final {
- TargetAddress TrampolineAddr = getAvailableTrampolineAddr(Context);
- auto &Compile = this->ActiveTrampolines[TrampolineAddr];
- return CompileCallbackInfo(TrampolineAddr, Compile);
+private:
+
+ static TargetAddress reenter(void *CCMgr, void *TrampolineId) {
+ JITCompileCallbackManager *Mgr =
+ static_cast<JITCompileCallbackManager*>(CCMgr);
+ return Mgr->executeCompileCallback(
+ static_cast<TargetAddress>(
+ reinterpret_cast<uintptr_t>(TrampolineId)));
+ }
+
+ void grow() override {
+ assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
+
+ std::error_code EC;
+ auto TrampolineBlock =
+ sys::OwningMemoryBlock(
+ sys::Memory::allocateMappedMemory(TargetT::PageSize, nullptr,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE, EC));
+ assert(!EC && "Failed to allocate trampoline block");
+
+
+ unsigned NumTrampolines =
+ (TargetT::PageSize - TargetT::PointerSize) / TargetT::TrampolineSize;
+
+ uint8_t *TrampolineMem = static_cast<uint8_t*>(TrampolineBlock.base());
+ TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
+ NumTrampolines);
+
+ for (unsigned I = 0; I < NumTrampolines; ++I)
+ this->AvailableTrampolines.push_back(
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(
+ TrampolineMem + (I * TargetT::TrampolineSize))));
+
+ EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
+ sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ assert(!EC && "Failed to mprotect trampoline block");
+
+ TrampolineBlocks.push_back(std::move(TrampolineBlock));
}
+ sys::OwningMemoryBlock ResolverBlock;
+ std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+};
+
+/// @brief Base class for managing collections of named indirect stubs.
+class IndirectStubsManager {
+public:
+
+ /// @brief Map type for initializing the manager. See init.
+ typedef StringMap<std::pair<TargetAddress, JITSymbolFlags>> StubInitsMap;
+
+ virtual ~IndirectStubsManager() {}
+
+ /// @brief Create a single stub with the given name, target address and flags.
+ virtual std::error_code createStub(StringRef StubName, TargetAddress StubAddr,
+ JITSymbolFlags StubFlags) = 0;
+
+ /// @brief Create StubInits.size() stubs with the given names, target
+ /// addresses, and flags.
+ virtual std::error_code createStubs(const StubInitsMap &StubInits) = 0;
+
+ /// @brief Find the stub with the given name. If ExportedStubsOnly is true,
+ /// this will only return a result if the stub's flags indicate that it
+ /// is exported.
+ virtual JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) = 0;
+
+ /// @brief Find the implementation-pointer for the stub.
+ virtual JITSymbol findPointer(StringRef Name) = 0;
+
+ /// @brief Change the value of the implementation pointer for the stub.
+ virtual std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) = 0;
private:
+ virtual void anchor();
+};
+
+/// @brief IndirectStubsManager implementation for a concrete target, e.g.
+/// OrcX86_64. (See OrcTargetSupport.h).
+template <typename TargetT>
+class LocalIndirectStubsManager : public IndirectStubsManager {
+public:
+
+ std::error_code createStub(StringRef StubName, TargetAddress StubAddr,
+ JITSymbolFlags StubFlags) override {
+ if (auto EC = reserveStubs(1))
+ return EC;
- std::vector<std::unique_ptr<Module>>
- SingletonSet(std::unique_ptr<Module> M) {
- std::vector<std::unique_ptr<Module>> Ms;
- Ms.push_back(std::move(M));
- return Ms;
+ createStubInternal(StubName, StubAddr, StubFlags);
+
+ return std::error_code();
}
- void emitResolverBlock(LLVMContext &Context) {
- std::unique_ptr<Module> M(new Module("resolver_block_module",
- Context));
- TargetT::insertResolverBlock(*M, *this);
- auto NonResolver =
- createLambdaResolver(
- [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
- llvm_unreachable("External symbols in resolver block?");
- },
- [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
- llvm_unreachable("Dylib symbols in resolver block?");
- });
- auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr,
- std::move(NonResolver));
- JIT.emitAndFinalize(H);
- auto ResolverBlockSymbol =
- JIT.findSymbolIn(H, TargetT::ResolverBlockName, false);
- assert(ResolverBlockSymbol && "Failed to insert resolver block");
- ResolverBlockAddr = ResolverBlockSymbol.getAddress();
+ std::error_code createStubs(const StubInitsMap &StubInits) override {
+ if (auto EC = reserveStubs(StubInits.size()))
+ return EC;
+
+ for (auto &Entry : StubInits)
+ createStubInternal(Entry.first(), Entry.second.first,
+ Entry.second.second);
+
+ return std::error_code();
}
- TargetAddress getAvailableTrampolineAddr(LLVMContext &Context) {
- if (this->AvailableTrampolines.empty())
- grow(Context);
- assert(!this->AvailableTrampolines.empty() &&
- "Failed to grow available trampolines.");
- TargetAddress TrampolineAddr = this->AvailableTrampolines.back();
- this->AvailableTrampolines.pop_back();
- return TrampolineAddr;
+ JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+ auto I = StubIndexes.find(Name);
+ if (I == StubIndexes.end())
+ return nullptr;
+ auto Key = I->second.first;
+ void *StubAddr = IndirectStubsInfos[Key.first].getStub(Key.second);
+ assert(StubAddr && "Missing stub address");
+ auto StubTargetAddr =
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(StubAddr));
+ auto StubSymbol = JITSymbol(StubTargetAddr, I->second.second);
+ if (ExportedStubsOnly && !StubSymbol.isExported())
+ return nullptr;
+ return StubSymbol;
}
- void grow(LLVMContext &Context) {
- assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
- std::unique_ptr<Module> M(new Module("trampoline_block", Context));
- auto GetLabelName =
- TargetT::insertCompileCallbackTrampolines(*M, ResolverBlockAddr,
- this->NumTrampolinesPerBlock,
- this->ActiveTrampolines.size());
- auto NonResolver =
- createLambdaResolver(
- [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
- llvm_unreachable("External symbols in trampoline block?");
- },
- [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
- llvm_unreachable("Dylib symbols in trampoline block?");
- });
- auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr,
- std::move(NonResolver));
- JIT.emitAndFinalize(H);
- for (unsigned I = 0; I < this->NumTrampolinesPerBlock; ++I) {
- std::string Name = GetLabelName(I);
- auto TrampolineSymbol = JIT.findSymbolIn(H, Name, false);
- assert(TrampolineSymbol && "Failed to emit trampoline.");
- this->AvailableTrampolines.push_back(TrampolineSymbol.getAddress());
- }
+ JITSymbol findPointer(StringRef Name) override {
+ auto I = StubIndexes.find(Name);
+ if (I == StubIndexes.end())
+ return nullptr;
+ auto Key = I->second.first;
+ void *PtrAddr = IndirectStubsInfos[Key.first].getPtr(Key.second);
+ assert(PtrAddr && "Missing pointer address");
+ auto PtrTargetAddr =
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(PtrAddr));
+ return JITSymbol(PtrTargetAddr, I->second.second);
+ }
+
+ std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) override {
+ auto I = StubIndexes.find(Name);
+ assert(I != StubIndexes.end() && "No stub pointer for symbol");
+ auto Key = I->second.first;
+ *IndirectStubsInfos[Key.first].getPtr(Key.second) =
+ reinterpret_cast<void*>(static_cast<uintptr_t>(NewAddr));
+ return std::error_code();
+ }
+
+private:
+
+ std::error_code reserveStubs(unsigned NumStubs) {
+ if (NumStubs <= FreeStubs.size())
+ return std::error_code();
+
+ unsigned NewStubsRequired = NumStubs - FreeStubs.size();
+ unsigned NewBlockId = IndirectStubsInfos.size();
+ typename TargetT::IndirectStubsInfo ISI;
+ if (auto EC = TargetT::emitIndirectStubsBlock(ISI, NewStubsRequired,
+ nullptr))
+ return EC;
+ for (unsigned I = 0; I < ISI.getNumStubs(); ++I)
+ FreeStubs.push_back(std::make_pair(NewBlockId, I));
+ IndirectStubsInfos.push_back(std::move(ISI));
+ return std::error_code();
+ }
+
+ void createStubInternal(StringRef StubName, TargetAddress InitAddr,
+ JITSymbolFlags StubFlags) {
+ auto Key = FreeStubs.back();
+ FreeStubs.pop_back();
+ *IndirectStubsInfos[Key.first].getPtr(Key.second) =
+ reinterpret_cast<void*>(static_cast<uintptr_t>(InitAddr));
+ StubIndexes[StubName] = std::make_pair(Key, StubFlags);
}
- JITLayerT &JIT;
- RuntimeDyld::MemoryManager &MemMgr;
- TargetAddress ResolverBlockAddr;
+ std::vector<typename TargetT::IndirectStubsInfo> IndirectStubsInfos;
+ typedef std::pair<uint16_t, uint16_t> StubKey;
+ std::vector<StubKey> FreeStubs;
+ StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
};
/// @brief Build a function pointer of FunctionType with the given constant
@@ -236,7 +350,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M,
/// @brief Turn a function declaration into a stub function that makes an
/// indirect call using the given function pointer.
-void makeStub(Function &F, GlobalVariable &ImplPointer);
+void makeStub(Function &F, Value &ImplPointer);
/// @brief Raise linkage types and rename as necessary to ensure that all
/// symbols are accessible for other modules.
@@ -289,6 +403,10 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
ValueMaterializer *Materializer = nullptr,
GlobalVariable *NewGV = nullptr);
+/// @brief Clone
+GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
+ ValueToValueMapTy &VMap);
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index 93ba02b..a5286ff 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -67,10 +67,10 @@ private:
} else
return nullptr;
case Emitting:
- // Calling "emit" can trigger external symbol lookup (e.g. to check for
- // pre-existing definitions of common-symbol), but it will never find in
- // this module that it would not have found already, so return null from
- // here.
+ // Calling "emit" can trigger a recursive call to 'find' (e.g. to check
+ // for pre-existing definitions of common-symbol), but any symbol in
+ // this module would already have been found internally (in the
+ // RuntimeDyld that did the lookup), so just return a nullptr here.
return nullptr;
case Emitted:
return B.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
index 28700ef..883fa9e 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
@@ -14,6 +14,10 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
#define LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
+#include "llvm/ExecutionEngine/Orc/JITSymbol.h"
+#include <string>
+#include <vector>
+
namespace llvm {
namespace orc {
@@ -28,6 +32,12 @@ private:
typedef std::vector<BaseLayerModuleSetHandleT> BaseLayerHandleList;
struct LogicalModule {
+ // Make this move-only to ensure they don't get duplicated across moves of
+ // LogicalDylib or anything like that.
+ LogicalModule(LogicalModule &&RHS)
+ : Resources(std::move(RHS.Resources)),
+ BaseLayerHandles(std::move(RHS.BaseLayerHandles)) {}
+ LogicalModule() = default;
LogicalModuleResources Resources;
BaseLayerHandleList BaseLayerHandles;
};
@@ -46,6 +56,13 @@ public:
BaseLayer.removeModuleSet(BLH);
}
+ // If possible, remove this and ~LogicalDylib once the work in the dtor is
+ // moved to members (eg: self-unregistering base layer handles).
+ LogicalDylib(LogicalDylib &&RHS)
+ : BaseLayer(std::move(RHS.BaseLayer)),
+ LogicalModules(std::move(RHS.LogicalModules)),
+ DylibResources(std::move(RHS.DylibResources)) {}
+
LogicalModuleHandle createLogicalModule() {
LogicalModules.push_back(LogicalModule());
return std::prev(LogicalModules.end());
@@ -69,22 +86,27 @@ public:
}
JITSymbol findSymbolInLogicalModule(LogicalModuleHandle LMH,
- const std::string &Name) {
+ const std::string &Name,
+ bool ExportedSymbolsOnly) {
+
+ if (auto StubSym = LMH->Resources.findSymbol(Name, ExportedSymbolsOnly))
+ return StubSym;
+
for (auto BLH : LMH->BaseLayerHandles)
- if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, false))
+ if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
return Symbol;
return nullptr;
}
JITSymbol findSymbolInternally(LogicalModuleHandle LMH,
const std::string &Name) {
- if (auto Symbol = findSymbolInLogicalModule(LMH, Name))
+ if (auto Symbol = findSymbolInLogicalModule(LMH, Name, false))
return Symbol;
for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
LMI != LME; ++LMI) {
if (LMI != LMH)
- if (auto Symbol = findSymbolInLogicalModule(LMI, Name))
+ if (auto Symbol = findSymbolInLogicalModule(LMI, Name, false))
return Symbol;
}
@@ -92,11 +114,10 @@ public:
}
JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
- for (auto &LM : LogicalModules)
- for (auto BLH : LM.BaseLayerHandles)
- if (auto Symbol =
- BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
- return Symbol;
+ for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
+ LMI != LME; ++LMI)
+ if (auto Sym = findSymbolInLogicalModule(LMI, Name, ExportedSymbolsOnly))
+ return Sym;
return nullptr;
}
@@ -106,7 +127,6 @@ protected:
BaseLayerT BaseLayer;
LogicalModuleList LogicalModules;
LogicalDylibResources DylibResources;
-
};
} // End namespace orc.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index f3094da..2acfecf 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -39,9 +39,12 @@ protected:
void operator=(const LinkedObjectSet&) = delete;
public:
LinkedObjectSet(RuntimeDyld::MemoryManager &MemMgr,
- RuntimeDyld::SymbolResolver &Resolver)
+ RuntimeDyld::SymbolResolver &Resolver,
+ bool ProcessAllSections)
: RTDyld(llvm::make_unique<RuntimeDyld>(MemMgr, Resolver)),
- State(Raw) {}
+ State(Raw) {
+ RTDyld->setProcessAllSections(ProcessAllSections);
+ }
virtual ~LinkedObjectSet() {}
@@ -64,18 +67,9 @@ protected:
RTDyld->mapSectionAddress(LocalAddress, TargetAddr);
}
- void takeOwnershipOfBuffer(std::unique_ptr<MemoryBuffer> B) {
- OwnedBuffers.push_back(std::move(B));
- }
-
protected:
std::unique_ptr<RuntimeDyld> RTDyld;
enum { Raw, Finalizing, Finalized } State;
-
- // FIXME: This ownership hack only exists because RuntimeDyldELF still
- // wants to be able to inspect the original object when resolving
- // relocations. As soon as that can be fixed this should be removed.
- std::vector<std::unique_ptr<MemoryBuffer>> OwnedBuffers;
};
typedef std::list<std::unique_ptr<LinkedObjectSet>> LinkedObjectSetListT;
@@ -83,16 +77,6 @@ protected:
public:
/// @brief Handle to a set of loaded objects.
typedef LinkedObjectSetListT::iterator ObjSetHandleT;
-
- // Ownership hack.
- // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without
- // referencing the original object.
- template <typename OwningMBSet>
- void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
- for (auto &MB : MBs)
- (*H)->takeOwnershipOfBuffer(std::move(MB));
- }
-
};
/// @brief Default (no-op) action to perform when loading objects.
@@ -117,16 +101,16 @@ private:
class ConcreteLinkedObjectSet : public LinkedObjectSet {
public:
ConcreteLinkedObjectSet(MemoryManagerPtrT MemMgr,
- SymbolResolverPtrT Resolver)
- : LinkedObjectSet(*MemMgr, *Resolver), MemMgr(std::move(MemMgr)),
- Resolver(std::move(Resolver)) { }
+ SymbolResolverPtrT Resolver,
+ bool ProcessAllSections)
+ : LinkedObjectSet(*MemMgr, *Resolver, ProcessAllSections),
+ MemMgr(std::move(MemMgr)), Resolver(std::move(Resolver)) { }
void Finalize() override {
State = Finalizing;
RTDyld->resolveRelocations();
RTDyld->registerEHFrames();
MemMgr->finalizeMemory();
- OwnedBuffers.clear();
State = Finalized;
}
@@ -137,9 +121,11 @@ private:
template <typename MemoryManagerPtrT, typename SymbolResolverPtrT>
std::unique_ptr<LinkedObjectSet>
- createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) {
+ createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver,
+ bool ProcessAllSections) {
typedef ConcreteLinkedObjectSet<MemoryManagerPtrT, SymbolResolverPtrT> LOS;
- return llvm::make_unique<LOS>(std::move(MemMgr), std::move(Resolver));
+ return llvm::make_unique<LOS>(std::move(MemMgr), std::move(Resolver),
+ ProcessAllSections);
}
public:
@@ -158,7 +144,18 @@ public:
NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor())
: NotifyLoaded(std::move(NotifyLoaded)),
- NotifyFinalized(std::move(NotifyFinalized)) {}
+ NotifyFinalized(std::move(NotifyFinalized)),
+ ProcessAllSections(false) {}
+
+ /// @brief Set the 'ProcessAllSections' flag.
+ ///
+ /// If set to true, all sections in each object file will be allocated using
+ /// the memory manager, rather than just the sections required for execution.
+ ///
+ /// This is kludgy, and may be removed in the future.
+ void setProcessAllSections(bool ProcessAllSections) {
+ this->ProcessAllSections = ProcessAllSections;
+ }
/// @brief Add a set of objects (or archives) that will be treated as a unit
/// for the purposes of symbol lookup and memory management.
@@ -180,7 +177,8 @@ public:
ObjSetHandleT Handle =
LinkedObjSetList.insert(
LinkedObjSetList.end(),
- createLinkedObjectSet(std::move(MemMgr), std::move(Resolver)));
+ createLinkedObjectSet(std::move(MemMgr), std::move(Resolver),
+ ProcessAllSections));
LinkedObjectSet &LOS = **Handle;
LoadedObjInfoList LoadedObjInfos;
@@ -276,6 +274,7 @@ private:
LinkedObjectSetListT LinkedObjSetList;
NotifyLoadedFtor NotifyLoaded;
NotifyFinalizedFtor NotifyFinalized;
+ bool ProcessAllSections;
};
} // End namespace orc.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index 7af6620..f96e83e 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -87,14 +87,6 @@ public:
BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr);
}
- // Ownership hack.
- // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without
- // referencing the original object.
- template <typename OwningMBSet>
- void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
- BaseLayer.takeOwnershipOfBuffers(H, std::move(MBs));
- }
-
/// @brief Access the transform functor directly.
TransformFtor &getTransform() { return Transform; }
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
index 309f5a9..246d3e0 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
@@ -9,42 +9,92 @@
//
// Target specific code for Orc, e.g. callback assembly.
//
+// Target classes should be part of the JIT *target* process, not the host
+// process (except where you're doing hosted JITing and the two are one and the
+// same).
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
#define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
#include "IndirectionUtils.h"
+#include "llvm/Support/Memory.h"
namespace llvm {
namespace orc {
class OrcX86_64 {
public:
- static const char *ResolverBlockName;
-
- /// @brief Insert module-level inline callback asm into module M for the
- /// symbols managed by JITResolveCallbackHandler J.
- static void insertResolverBlock(Module &M,
- JITCompileCallbackManagerBase &JCBM);
-
- /// @brief Get a label name from the given index.
- typedef std::function<std::string(unsigned)> LabelNameFtor;
-
- /// @brief Insert the requested number of trampolines into the given module.
- /// @param M Module to insert the call block into.
- /// @param NumCalls Number of calls to create in the call block.
- /// @param StartIndex Optional argument specifying the index suffix to start
- /// with.
- /// @return A functor that provides the symbol name for each entry in the call
- /// block.
- ///
- static LabelNameFtor insertCompileCallbackTrampolines(
- Module &M,
- TargetAddress TrampolineAddr,
- unsigned NumCalls,
- unsigned StartIndex = 0);
+ static const unsigned PageSize = 4096;
+ static const unsigned PointerSize = 8;
+ static const unsigned TrampolineSize = 8;
+ static const unsigned ResolverCodeSize = 0x78;
+
+ typedef TargetAddress (*JITReentryFn)(void *CallbackMgr,
+ void *TrampolineId);
+
+ /// @brief Write the resolver code into the given memory. The user is be
+ /// responsible for allocating the memory and setting permissions.
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+ void *CallbackMgr);
+
+ /// @brief Write the requsted number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines);
+
+ /// @brief Provide information about stub blocks generated by the
+ /// makeIndirectStubsBlock function.
+ class IndirectStubsInfo {
+ friend class OrcX86_64;
+ public:
+ const static unsigned StubSize = 8;
+ const static unsigned PtrSize = 8;
+ IndirectStubsInfo() : NumStubs(0) {}
+ IndirectStubsInfo(IndirectStubsInfo &&Other)
+ : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) {
+ Other.NumStubs = 0;
+ }
+ IndirectStubsInfo& operator=(IndirectStubsInfo &&Other) {
+ NumStubs = Other.NumStubs;
+ Other.NumStubs = 0;
+ StubsMem = std::move(Other.StubsMem);
+ return *this;
+ }
+
+ /// @brief Number of stubs in this block.
+ unsigned getNumStubs() const { return NumStubs; }
+
+ /// @brief Get a pointer to the stub at the given index, which must be in
+ /// the range 0 .. getNumStubs() - 1.
+ void* getStub(unsigned Idx) const {
+ return static_cast<uint64_t*>(StubsMem.base()) + Idx;
+ }
+
+ /// @brief Get a pointer to the implementation-pointer at the given index,
+ /// which must be in the range 0 .. getNumStubs() - 1.
+ void** getPtr(unsigned Idx) const {
+ char *PtrsBase =
+ static_cast<char*>(StubsMem.base()) + NumStubs * StubSize;
+ return reinterpret_cast<void**>(PtrsBase) + Idx;
+ }
+ private:
+ unsigned NumStubs;
+ sys::OwningMemoryBlock StubsMem;
+ };
+
+ /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
+ ///
+ /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+ /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+ /// will return a block of 1024 (2-pages worth).
+ static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal);
};
} // End namespace orc.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index a808d92..385b8d0 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -17,8 +17,10 @@
#include "JITSymbolFlags.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Memory.h"
#include "llvm/DebugInfo/DIContext.h"
+#include <map>
#include <memory>
namespace llvm {
@@ -59,26 +61,33 @@ public:
class LoadedObjectInfo : public llvm::LoadedObjectInfo {
friend class RuntimeDyldImpl;
public:
- LoadedObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
- unsigned EndIdx)
- : RTDyld(RTDyld), BeginIdx(BeginIdx), EndIdx(EndIdx) { }
+ typedef std::map<object::SectionRef, unsigned> ObjSectionToIDMap;
+
+ LoadedObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+ : RTDyld(RTDyld), ObjSecToIDMap(ObjSecToIDMap) { }
virtual object::OwningBinary<object::ObjectFile>
getObjectForDebug(const object::ObjectFile &Obj) const = 0;
- uint64_t getSectionLoadAddress(StringRef Name) const;
+ uint64_t
+ getSectionLoadAddress(const object::SectionRef &Sec) const override;
protected:
virtual void anchor();
RuntimeDyldImpl &RTDyld;
- unsigned BeginIdx, EndIdx;
+ ObjSectionToIDMap ObjSecToIDMap;
};
template <typename Derived> struct LoadedObjectInfoHelper : LoadedObjectInfo {
- LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
- unsigned EndIdx)
- : LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {}
+ protected:
+ LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default;
+ LoadedObjectInfoHelper() = default;
+
+ public:
+ LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld,
+ LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap)
+ : LoadedObjectInfo(RTDyld, std::move(ObjSecToIDMap)) {}
std::unique_ptr<llvm::LoadedObjectInfo> clone() const override {
return llvm::make_unique<Derived>(static_cast<const Derived &>(*this));
}
@@ -87,7 +96,7 @@ public:
/// \brief Memory Management.
class MemoryManager {
public:
- virtual ~MemoryManager() {};
+ virtual ~MemoryManager() {}
/// Allocate a memory block of (at least) the given size suitable for
/// executable code. The SectionID is a unique identifier assigned by the
@@ -149,7 +158,7 @@ public:
/// \brief Symbol resolution.
class SymbolResolver {
public:
- virtual ~SymbolResolver() {};
+ virtual ~SymbolResolver() {}
/// This method returns the address of the specified function or variable.
/// It is used to resolve symbols during module linking.
@@ -244,4 +253,4 @@ private:
} // end namespace llvm
-#endif
+#endif // LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 0b0dcb0..7bb96eb 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -83,10 +83,28 @@ public:
virtual void invalidateInstructionCache();
private:
+ struct FreeMemBlock {
+ // The actual block of free memory
+ sys::MemoryBlock Free;
+ // If there is a pending allocation from the same reservation right before
+ // this block, store it's index in PendingMem, to be able to update the
+ // pending region if part of this block is allocated, rather than having to
+ // create a new one
+ unsigned PendingPrefixIndex;
+ };
+
struct MemoryGroup {
- SmallVector<sys::MemoryBlock, 16> AllocatedMem;
- SmallVector<sys::MemoryBlock, 16> FreeMem;
- sys::MemoryBlock Near;
+ // PendingMem contains all blocks of memory (subblocks of AllocatedMem)
+ // which have not yet had their permissions applied, but have been given
+ // out to the user. FreeMem contains all block of memory, which have
+ // neither had their permissions applied, nor been given out to the user.
+ SmallVector<sys::MemoryBlock, 16> PendingMem;
+ SmallVector<FreeMemBlock, 16> FreeMem;
+
+ // All memory blocks that have been requested from the system
+ SmallVector<sys::MemoryBlock, 16> AllocatedMem;
+
+ sys::MemoryBlock Near;
};
uint8_t *allocateSection(MemoryGroup &MemGroup, uintptr_t Size,
@@ -103,4 +121,3 @@ private:
}
#endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
-
diff --git a/contrib/llvm/include/llvm/IR/Argument.h b/contrib/llvm/include/llvm/IR/Argument.h
index fc04fe7..0092f49 100644
--- a/contrib/llvm/include/llvm/IR/Argument.h
+++ b/contrib/llvm/include/llvm/IR/Argument.h
@@ -21,8 +21,7 @@
namespace llvm {
-template<typename ValueSubClass, typename ItemParentClass>
- class SymbolTableListTraits;
+template <typename NodeTy> class SymbolTableListTraits;
/// \brief LLVM Argument representation
///
@@ -36,7 +35,7 @@ class Argument : public Value, public ilist_node<Argument> {
virtual void anchor();
Function *Parent;
- friend class SymbolTableListTraits<Argument, Function>;
+ friend class SymbolTableListTraits<Argument>;
void setParent(Function *parent);
public:
@@ -64,8 +63,8 @@ public:
/// containing function, return the number of bytes known to be
/// dereferenceable. Otherwise, zero is returned.
uint64_t getDereferenceableBytes() const;
-
- /// \brief If this argument has the dereferenceable_or_null attribute on
+
+ /// \brief If this argument has the dereferenceable_or_null attribute on
/// it in its containing function, return the number of bytes known to be
/// dereferenceable. Otherwise, zero is returned.
uint64_t getDereferenceableOrNullBytes() const;
diff --git a/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h b/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h
index 19e32a2..6e1f5c4 100644
--- a/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h
+++ b/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h
@@ -27,7 +27,6 @@ class formatted_raw_ostream;
class AssemblyAnnotationWriter {
public:
-
virtual ~AssemblyAnnotationWriter();
/// emitFunctionAnnot - This may be implemented to emit a string right before
@@ -50,7 +49,7 @@ public:
/// emitInstructionAnnot - This may be implemented to emit a string right
/// before an instruction is emitted.
- virtual void emitInstructionAnnot(const Instruction *,
+ virtual void emitInstructionAnnot(const Instruction *,
formatted_raw_ostream &) {}
/// printInfoComment - This may be implemented to emit a comment to the
diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h
index 4d6d7da..0e33731 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.h
+++ b/contrib/llvm/include/llvm/IR/Attributes.h
@@ -33,6 +33,7 @@ class AttributeSetImpl;
class AttributeSetNode;
class Constant;
template<typename T> struct DenseMapInfo;
+class Function;
class LLVMContext;
class Type;
@@ -64,65 +65,15 @@ public:
enum AttrKind {
// IR-Level Attributes
None, ///< No attributes have been set
- Alignment, ///< Alignment of parameter (5 bits)
- ///< stored as log2 of alignment with +1 bias
- ///< 0 means unaligned (different from align(1))
- AlwaysInline, ///< inline=always
- Builtin, ///< Callee is recognized as a builtin, despite
- ///< nobuiltin attribute on its declaration.
- ByVal, ///< Pass structure by value
- InAlloca, ///< Pass structure in an alloca
- Cold, ///< Marks function as being in a cold path.
- Convergent, ///< Can only be moved to control-equivalent blocks
- InlineHint, ///< Source said inlining was desirable
- InReg, ///< Force argument to be passed in register
- JumpTable, ///< Build jump-instruction tables and replace refs.
- MinSize, ///< Function must be optimized for size first
- Naked, ///< Naked function
- Nest, ///< Nested function static chain
- NoAlias, ///< Considered to not alias after call
- NoBuiltin, ///< Callee isn't recognized as a builtin
- NoCapture, ///< Function creates no aliases of pointer
- NoDuplicate, ///< Call cannot be duplicated
- NoImplicitFloat, ///< Disable implicit floating point insts
- NoInline, ///< inline=never
- NonLazyBind, ///< Function is called early and/or
- ///< often, so lazy binding isn't worthwhile
- NonNull, ///< Pointer is known to be not null
- Dereferenceable, ///< Pointer is known to be dereferenceable
- DereferenceableOrNull, ///< Pointer is either null or dereferenceable
- NoRedZone, ///< Disable redzone
- NoReturn, ///< Mark the function as not returning
- NoUnwind, ///< Function doesn't unwind stack
- OptimizeForSize, ///< opt_size
- OptimizeNone, ///< Function must not be optimized.
- ReadNone, ///< Function does not access memory
- ReadOnly, ///< Function only reads from memory
- ArgMemOnly, ///< Funciton can access memory only using pointers
- ///< based on its arguments.
- Returned, ///< Return value is always equal to this argument
- ReturnsTwice, ///< Function can return twice
- SExt, ///< Sign extended before/after call
- StackAlignment, ///< Alignment of stack for function (3 bits)
- ///< stored as log2 of alignment with +1 bias 0
- ///< means unaligned (different from
- ///< alignstack=(1))
- StackProtect, ///< Stack protection.
- StackProtectReq, ///< Stack protection required.
- StackProtectStrong, ///< Strong Stack protection.
- SafeStack, ///< Safe Stack protection.
- StructRet, ///< Hidden pointer to structure to return
- SanitizeAddress, ///< AddressSanitizer is on.
- SanitizeThread, ///< ThreadSanitizer is on.
- SanitizeMemory, ///< MemorySanitizer is on.
- UWTable, ///< Function must be in a unwind table
- ZExt, ///< Zero extended before/after call
-
+ #define GET_ATTR_ENUM
+ #include "llvm/IR/Attributes.inc"
EndAttrKinds ///< Sentinal value useful for loops
};
+
private:
AttributeImpl *pImpl;
Attribute(AttributeImpl *A) : pImpl(A) {}
+
public:
Attribute() : pImpl(nullptr) {}
@@ -189,11 +140,11 @@ public:
unsigned getStackAlignment() const;
/// \brief Returns the number of dereferenceable bytes from the
- /// dereferenceable attribute (or zero if unknown).
+ /// dereferenceable attribute.
uint64_t getDereferenceableBytes() const;
/// \brief Returns the number of dereferenceable_or_null bytes from the
- /// dereferenceable_or_null attribute (or zero if unknown).
+ /// dereferenceable_or_null attribute.
uint64_t getDereferenceableOrNullBytes() const;
/// \brief The Attribute is converted to a string of equivalent mnemonic. This
@@ -226,6 +177,7 @@ public:
ReturnIndex = 0U,
FunctionIndex = ~0U
};
+
private:
friend class AttrBuilder;
friend class AttributeSetImpl;
@@ -249,8 +201,8 @@ private:
ArrayRef<std::pair<unsigned,
AttributeSetNode*> > Attrs);
-
explicit AttributeSet(AttributeSetImpl *LI) : pImpl(LI) {}
+
public:
AttributeSet() : pImpl(nullptr) {}
@@ -276,6 +228,11 @@ public:
AttributeSet addAttribute(LLVMContext &C, unsigned Index,
StringRef Kind, StringRef Value) const;
+ /// Add an attribute to the attribute set at the given indices. Because
+ /// attribute sets are immutable, this returns a new set.
+ AttributeSet addAttribute(LLVMContext &C, ArrayRef<unsigned> Indices,
+ Attribute A) const;
+
/// \brief Add attributes to the attribute set at the given index. Because
/// attribute sets are immutable, this returns a new set.
AttributeSet addAttributes(LLVMContext &C, unsigned Index,
@@ -284,13 +241,13 @@ public:
/// \brief Remove the specified attribute at the specified index from this
/// attribute list. Because attribute lists are immutable, this returns the
/// new list.
- AttributeSet removeAttribute(LLVMContext &C, unsigned Index,
+ AttributeSet removeAttribute(LLVMContext &C, unsigned Index,
Attribute::AttrKind Attr) const;
/// \brief Remove the specified attributes at the specified index from this
/// attribute list. Because attribute lists are immutable, this returns the
/// new list.
- AttributeSet removeAttributes(LLVMContext &C, unsigned Index,
+ AttributeSet removeAttributes(LLVMContext &C, unsigned Index,
AttributeSet Attrs) const;
/// \brief Remove the specified attributes at the specified index from this
@@ -439,6 +396,7 @@ class AttrBuilder {
uint64_t StackAlignment;
uint64_t DerefBytes;
uint64_t DerefOrNullBytes;
+
public:
AttrBuilder()
: Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
@@ -511,8 +469,8 @@ public:
/// \brief Retrieve the stack alignment attribute, if it exists.
uint64_t getStackAlignment() const { return StackAlignment; }
- /// \brief Retrieve the number of dereferenceable bytes, if the dereferenceable
- /// attribute exists (zero is returned otherwise).
+ /// \brief Retrieve the number of dereferenceable bytes, if the
+ /// dereferenceable attribute exists (zero is returned otherwise).
uint64_t getDereferenceableBytes() const { return DerefBytes; }
/// \brief Retrieve the number of dereferenceable_or_null bytes, if the
@@ -573,7 +531,14 @@ public:
namespace AttributeFuncs {
/// \brief Which attributes cannot be applied to a type.
-AttrBuilder typeIncompatible(const Type *Ty);
+AttrBuilder typeIncompatible(Type *Ty);
+
+/// \returns Return true if the two functions have compatible target-independent
+/// attributes for inlining purposes.
+bool areInlineCompatible(const Function &Caller, const Function &Callee);
+
+/// \brief Merge caller's and callee's attributes.
+void mergeAttributesForInlining(Function &Caller, const Function &Callee);
} // end AttributeFuncs namespace
diff --git a/contrib/llvm/include/llvm/IR/Attributes.td b/contrib/llvm/include/llvm/IR/Attributes.td
new file mode 100644
index 0000000..797cd55
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/Attributes.td
@@ -0,0 +1,192 @@
+/// Attribute base class.
+class Attr<string S> {
+ // String representation of this attribute in the IR.
+ string AttrString = S;
+}
+
+/// Enum attribute.
+class EnumAttr<string S> : Attr<S>;
+
+/// StringBool attribute.
+class StrBoolAttr<string S> : Attr<S>;
+
+/// Target-independent enum attributes.
+
+/// Alignment of parameter (5 bits) stored as log2 of alignment with +1 bias.
+/// 0 means unaligned (different from align(1)).
+def Alignment : EnumAttr<"align">;
+
+/// inline=always.
+def AlwaysInline : EnumAttr<"alwaysinline">;
+
+/// Function can access memory only using pointers based on its arguments.
+def ArgMemOnly : EnumAttr<"argmemonly">;
+
+/// Callee is recognized as a builtin, despite nobuiltin attribute on its
+/// declaration.
+def Builtin : EnumAttr<"builtin">;
+
+/// Pass structure by value.
+def ByVal : EnumAttr<"byval">;
+
+/// Marks function as being in a cold path.
+def Cold : EnumAttr<"cold">;
+
+/// Can only be moved to control-equivalent blocks.
+def Convergent : EnumAttr<"convergent">;
+
+/// Pointer is known to be dereferenceable.
+def Dereferenceable : EnumAttr<"dereferenceable">;
+
+/// Pointer is either null or dereferenceable.
+def DereferenceableOrNull : EnumAttr<"dereferenceable_or_null">;
+
+/// Function may only access memory that is inaccessible from IR.
+def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly">;
+
+/// Function may only access memory that is either inaccessible from the IR,
+/// or pointed to by its pointer arguments.
+def InaccessibleMemOrArgMemOnly : EnumAttr<"inaccessiblemem_or_argmemonly">;
+
+/// Pass structure in an alloca.
+def InAlloca : EnumAttr<"inalloca">;
+
+/// Source said inlining was desirable.
+def InlineHint : EnumAttr<"inlinehint">;
+
+/// Force argument to be passed in register.
+def InReg : EnumAttr<"inreg">;
+
+/// Build jump-instruction tables and replace refs.
+def JumpTable : EnumAttr<"jumptable">;
+
+/// Function must be optimized for size first.
+def MinSize : EnumAttr<"minsize">;
+
+/// Naked function.
+def Naked : EnumAttr<"naked">;
+
+/// Nested function static chain.
+def Nest : EnumAttr<"nest">;
+
+/// Considered to not alias after call.
+def NoAlias : EnumAttr<"noalias">;
+
+/// Callee isn't recognized as a builtin.
+def NoBuiltin : EnumAttr<"nobuiltin">;
+
+/// Function creates no aliases of pointer.
+def NoCapture : EnumAttr<"nocapture">;
+
+/// Call cannot be duplicated.
+def NoDuplicate : EnumAttr<"noduplicate">;
+
+/// Disable implicit floating point insts.
+def NoImplicitFloat : EnumAttr<"noimplicitfloat">;
+
+/// inline=never.
+def NoInline : EnumAttr<"noinline">;
+
+/// Function is called early and/or often, so lazy binding isn't worthwhile.
+def NonLazyBind : EnumAttr<"nonlazybind">;
+
+/// Pointer is known to be not null.
+def NonNull : EnumAttr<"nonnull">;
+
+/// The function does not recurse.
+def NoRecurse : EnumAttr<"norecurse">;
+
+/// Disable redzone.
+def NoRedZone : EnumAttr<"noredzone">;
+
+/// Mark the function as not returning.
+def NoReturn : EnumAttr<"noreturn">;
+
+/// Function doesn't unwind stack.
+def NoUnwind : EnumAttr<"nounwind">;
+
+/// opt_size.
+def OptimizeForSize : EnumAttr<"optsize">;
+
+/// Function must not be optimized.
+def OptimizeNone : EnumAttr<"optnone">;
+
+/// Function does not access memory.
+def ReadNone : EnumAttr<"readnone">;
+
+/// Function only reads from memory.
+def ReadOnly : EnumAttr<"readonly">;
+
+/// Return value is always equal to this argument.
+def Returned : EnumAttr<"returned">;
+
+/// Function can return twice.
+def ReturnsTwice : EnumAttr<"returns_twice">;
+
+/// Safe Stack protection.
+def SafeStack : EnumAttr<"safestack">;
+
+/// Sign extended before/after call.
+def SExt : EnumAttr<"signext">;
+
+/// Alignment of stack for function (3 bits) stored as log2 of alignment with
+/// +1 bias 0 means unaligned (different from alignstack=(1)).
+def StackAlignment : EnumAttr<"alignstack">;
+
+/// Stack protection.
+def StackProtect : EnumAttr<"ssp">;
+
+/// Stack protection required.
+def StackProtectReq : EnumAttr<"sspreq">;
+
+/// Strong Stack protection.
+def StackProtectStrong : EnumAttr<"sspstrong">;
+
+/// Hidden pointer to structure to return.
+def StructRet : EnumAttr<"sret">;
+
+/// AddressSanitizer is on.
+def SanitizeAddress : EnumAttr<"sanitize_address">;
+
+/// ThreadSanitizer is on.
+def SanitizeThread : EnumAttr<"sanitize_thread">;
+
+/// MemorySanitizer is on.
+def SanitizeMemory : EnumAttr<"sanitize_memory">;
+
+/// Function must be in a unwind table.
+def UWTable : EnumAttr<"uwtable">;
+
+/// Zero extended before/after call.
+def ZExt : EnumAttr<"zeroext">;
+
+/// Target-independent string attributes.
+def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">;
+def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
+def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">;
+def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">;
+
+class CompatRule<string F> {
+ // The name of the function called to check the attribute of the caller and
+ // callee and decide whether inlining should be allowed. The function's
+ // signature must match "bool(const Function&, const Function &)", where the
+ // first parameter is the reference to the caller and the second parameter is
+ // the reference to the callee. It must return false if the attributes of the
+ // caller and callee are incompatible, and true otherwise.
+ string CompatFunc = F;
+}
+
+def : CompatRule<"isEqual<SanitizeAddressAttr>">;
+def : CompatRule<"isEqual<SanitizeThreadAttr>">;
+def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
+
+class MergeRule<string F> {
+ // The name of the function called to merge the attributes of the caller and
+ // callee. The function's signature must match
+ // "void(Function&, const Function &)", where the first parameter is the
+ // reference to the caller and the second parameter is the reference to the
+ // callee.
+ string MergeFunc = F;
+}
+
+def : MergeRule<"adjustCallerSSPLevel">;
diff --git a/contrib/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm/include/llvm/IR/BasicBlock.h
index 66581bf..c6b54d3 100644
--- a/contrib/llvm/include/llvm/IR/BasicBlock.h
+++ b/contrib/llvm/include/llvm/IR/BasicBlock.h
@@ -30,22 +30,9 @@ class LLVMContext;
class BlockAddress;
class Function;
-// Traits for intrusive list of basic blocks...
-template<> struct ilist_traits<BasicBlock>
- : public SymbolTableListTraits<BasicBlock, Function> {
-
- BasicBlock *createSentinel() const;
- static void destroySentinel(BasicBlock*) {}
-
- BasicBlock *provideInitialHead() const { return createSentinel(); }
- BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
- static void noteHead(BasicBlock*, BasicBlock*) {}
-
- static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
- mutable ilist_half_node<BasicBlock> Sentinel;
-};
-
+template <>
+struct SymbolTableListSentinelTraits<BasicBlock>
+ : public ilist_half_embedded_sentinel_traits<BasicBlock> {};
/// \brief LLVM Basic Block Representation
///
@@ -63,16 +50,17 @@ private:
/// modifying a program. However, the verifier will ensure that basic blocks
/// are "well formed".
class BasicBlock : public Value, // Basic blocks are data objects also
- public ilist_node<BasicBlock> {
+ public ilist_node_with_parent<BasicBlock, Function> {
friend class BlockAddress;
public:
- typedef iplist<Instruction> InstListType;
+ typedef SymbolTableList<Instruction> InstListType;
+
private:
InstListType InstList;
Function *Parent;
void setParent(Function *parent);
- friend class SymbolTableListTraits<BasicBlock, Function>;
+ friend class SymbolTableListTraits<BasicBlock>;
BasicBlock(const BasicBlock &) = delete;
void operator=(const BasicBlock &) = delete;
@@ -171,7 +159,7 @@ public:
/// \brief Unlink 'this' from the containing function and delete it.
///
// \returns an iterator pointing to the element after the erased one.
- iplist<BasicBlock>::iterator eraseFromParent();
+ SymbolTableList<BasicBlock>::iterator eraseFromParent();
/// \brief Unlink this basic block from its current function and insert it
/// into the function that \p MovePos lives in, right before \p MovePos.
@@ -253,7 +241,7 @@ public:
InstListType &getInstList() { return InstList; }
/// \brief Returns a pointer to a member of the instruction list.
- static iplist<Instruction> BasicBlock::*getSublistAccess(Instruction*) {
+ static InstListType BasicBlock::*getSublistAccess(Instruction*) {
return &BasicBlock::InstList;
}
@@ -283,6 +271,8 @@ public:
/// should be called while the predecessor still refers to this block.
void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
+ bool canSplitPredecessors() const;
+
/// \brief Split the basic block into two basic blocks at the specified
/// instruction.
///
@@ -300,6 +290,9 @@ public:
/// Also note that this doesn't preserve any passes. To split blocks while
/// keeping loop information consistent, use the SplitBlock utility function.
BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
+ BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "") {
+ return splitBasicBlock(I->getIterator(), BBName);
+ }
/// \brief Returns true if there are any uses of this basic block other than
/// direct branches, switches, etc. to it.
@@ -309,6 +302,9 @@ public:
/// basic block \p New instead of to it.
void replaceSuccessorsPhiUsesWith(BasicBlock *New);
+ /// \brief Return true if this basic block is an exception handling block.
+ bool isEHPad() const { return getFirstNonPHI()->isEHPad(); }
+
/// \brief Return true if this basic block is a landing pad.
///
/// Being a ``landing pad'' means that the basic block is the destination of
@@ -337,12 +333,6 @@ private:
}
};
-// createSentinel is used to get hold of the node that marks the end of the
-// list... (same trick used here as in ilist_traits<Instruction>)
-inline BasicBlock *ilist_traits<BasicBlock>::createSentinel() const {
- return static_cast<BasicBlock*>(&Sentinel);
-}
-
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
diff --git a/contrib/llvm/include/llvm/IR/CFG.h b/contrib/llvm/include/llvm/IR/CFG.h
index f78220a..e9bf093 100644
--- a/contrib/llvm/include/llvm/IR/CFG.h
+++ b/contrib/llvm/include/llvm/IR/CFG.h
@@ -107,149 +107,13 @@ inline pred_const_range predecessors(const BasicBlock *BB) {
}
//===----------------------------------------------------------------------===//
-// BasicBlock succ_iterator definition
+// BasicBlock succ_iterator helpers
//===----------------------------------------------------------------------===//
-template <class Term_, class BB_> // Successor Iterator
-class SuccIterator : public std::iterator<std::random_access_iterator_tag, BB_,
- int, BB_ *, BB_ *> {
- typedef std::iterator<std::random_access_iterator_tag, BB_, int, BB_ *, BB_ *>
- super;
-
-public:
- typedef typename super::pointer pointer;
- typedef typename super::reference reference;
-
-private:
- Term_ Term;
- unsigned idx;
- typedef SuccIterator<Term_, BB_> Self;
-
- inline bool index_is_valid(int idx) {
- return idx >= 0 && (unsigned) idx < Term->getNumSuccessors();
- }
-
- /// \brief Proxy object to allow write access in operator[]
- class SuccessorProxy {
- Self it;
-
- public:
- explicit SuccessorProxy(const Self &it) : it(it) {}
-
- SuccessorProxy(const SuccessorProxy&) = default;
-
- SuccessorProxy &operator=(SuccessorProxy r) {
- *this = reference(r);
- return *this;
- }
-
- SuccessorProxy &operator=(reference r) {
- it.Term->setSuccessor(it.idx, r);
- return *this;
- }
-
- operator reference() const { return *it; }
- };
-
-public:
- explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
- }
- inline SuccIterator(Term_ T, bool) // end iterator
- : Term(T) {
- if (Term)
- idx = Term->getNumSuccessors();
- else
- // Term == NULL happens, if a basic block is not fully constructed and
- // consequently getTerminator() returns NULL. In this case we construct a
- // SuccIterator which describes a basic block that has zero successors.
- // Defining SuccIterator for incomplete and malformed CFGs is especially
- // useful for debugging.
- idx = 0;
- }
-
- /// getSuccessorIndex - This is used to interface between code that wants to
- /// operate on terminator instructions directly.
- unsigned getSuccessorIndex() const { return idx; }
-
- inline bool operator==(const Self& x) const { return idx == x.idx; }
- inline bool operator!=(const Self& x) const { return !operator==(x); }
-
- inline reference operator*() const { return Term->getSuccessor(idx); }
- inline pointer operator->() const { return operator*(); }
-
- inline Self& operator++() { ++idx; return *this; } // Preincrement
-
- inline Self operator++(int) { // Postincrement
- Self tmp = *this; ++*this; return tmp;
- }
-
- inline Self& operator--() { --idx; return *this; } // Predecrement
- inline Self operator--(int) { // Postdecrement
- Self tmp = *this; --*this; return tmp;
- }
-
- inline bool operator<(const Self& x) const {
- assert(Term == x.Term && "Cannot compare iterators of different blocks!");
- return idx < x.idx;
- }
-
- inline bool operator<=(const Self& x) const {
- assert(Term == x.Term && "Cannot compare iterators of different blocks!");
- return idx <= x.idx;
- }
- inline bool operator>=(const Self& x) const {
- assert(Term == x.Term && "Cannot compare iterators of different blocks!");
- return idx >= x.idx;
- }
-
- inline bool operator>(const Self& x) const {
- assert(Term == x.Term && "Cannot compare iterators of different blocks!");
- return idx > x.idx;
- }
-
- inline Self& operator+=(int Right) {
- unsigned new_idx = idx + Right;
- assert(index_is_valid(new_idx) && "Iterator index out of bound");
- idx = new_idx;
- return *this;
- }
-
- inline Self operator+(int Right) const {
- Self tmp = *this;
- tmp += Right;
- return tmp;
- }
-
- inline Self& operator-=(int Right) {
- return operator+=(-Right);
- }
-
- inline Self operator-(int Right) const {
- return operator+(-Right);
- }
-
- inline int operator-(const Self& x) const {
- assert(Term == x.Term && "Cannot work on iterators of different blocks!");
- int distance = idx - x.idx;
- return distance;
- }
-
- inline SuccessorProxy operator[](int offset) {
- Self tmp = *this;
- tmp += offset;
- return SuccessorProxy(tmp);
- }
-
- /// Get the source BB of this iterator.
- inline BB_ *getSource() {
- assert(Term && "Source not available, if basic block was malformed");
- return Term->getParent();
- }
-};
-
-typedef SuccIterator<TerminatorInst*, BasicBlock> succ_iterator;
-typedef SuccIterator<const TerminatorInst*,
- const BasicBlock> succ_const_iterator;
+typedef TerminatorInst::SuccIterator<TerminatorInst *, BasicBlock>
+ succ_iterator;
+typedef TerminatorInst::SuccIterator<const TerminatorInst *, const BasicBlock>
+ succ_const_iterator;
typedef llvm::iterator_range<succ_iterator> succ_range;
typedef llvm::iterator_range<succ_const_iterator> succ_const_range;
@@ -275,8 +139,8 @@ inline succ_const_range successors(const BasicBlock *BB) {
return succ_const_range(succ_begin(BB), succ_end(BB));
}
-
-template <typename T, typename U> struct isPodLike<SuccIterator<T, U> > {
+template <typename T, typename U>
+struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
static const bool value = isPodLike<T>::value;
};
diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h
index 2841781..f4b8a8a 100644
--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@@ -41,6 +41,7 @@ template <typename FunTy = const Function,
typename BBTy = const BasicBlock,
typename ValTy = const Value,
typename UserTy = const User,
+ typename UseTy = const Use,
typename InstrTy = const Instruction,
typename CallTy = const CallInst,
typename InvokeTy = const InvokeInst,
@@ -69,6 +70,7 @@ private:
}
return CallSiteBase();
}
+
public:
/// isCall - true if a CallInst is enclosed.
/// Note that !isCall() does not mean it is an InvokeInst enclosed,
@@ -116,6 +118,43 @@ public:
/// Determine whether this Use is the callee operand's Use.
bool isCallee(const Use *U) const { return getCallee() == U; }
+ /// \brief Determine whether the passed iterator points to an argument
+ /// operand.
+ bool isArgOperand(Value::const_user_iterator UI) const {
+ return isArgOperand(&UI.getUse());
+ }
+
+ /// \brief Determine whether the passed use points to an argument operand.
+ bool isArgOperand(const Use *U) const {
+ assert(getInstruction() == U->getUser());
+ return arg_begin() <= U && U < arg_end();
+ }
+
+ /// \brief Determine whether the passed iterator points to a bundle operand.
+ bool isBundleOperand(Value::const_user_iterator UI) const {
+ return isBundleOperand(&UI.getUse());
+ }
+
+ /// \brief Determine whether the passed use points to a bundle operand.
+ bool isBundleOperand(const Use *U) const {
+ assert(getInstruction() == U->getUser());
+ if (!hasOperandBundles())
+ return false;
+ unsigned OperandNo = U - (*this)->op_begin();
+ return getBundleOperandsStartIndex() <= OperandNo &&
+ OperandNo < getBundleOperandsEndIndex();
+ }
+
+ /// \brief Determine whether the passed iterator points to a data operand.
+ bool isDataOperand(Value::const_user_iterator UI) const {
+ return isDataOperand(&UI.getUse());
+ }
+
+ /// \brief Determine whether the passed use points to a data operand.
+ bool isDataOperand(const Use *U) const {
+ return data_operands_begin() <= U && U < data_operands_end();
+ }
+
ValTy *getArgument(unsigned ArgNo) const {
assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
return *(arg_begin() + ArgNo);
@@ -137,8 +176,7 @@ public:
/// it.
unsigned getArgumentNo(const Use *U) const {
assert(getInstruction() && "Not a call or invoke instruction!");
- assert(arg_begin() <= U && U < arg_end()
- && "Argument # out of range!");
+ assert(isArgOperand(U) && "Argument # out of range!");
return U - arg_begin();
}
@@ -146,21 +184,55 @@ public:
/// arguments at this call site.
typedef IterTy arg_iterator;
- /// arg_begin/arg_end - Return iterators corresponding to the actual argument
- /// list for a call site.
- IterTy arg_begin() const {
- assert(getInstruction() && "Not a call or invoke instruction!");
- // Skip non-arguments
- return (*this)->op_begin();
- }
-
- IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); }
iterator_range<IterTy> args() const {
- return iterator_range<IterTy>(arg_begin(), arg_end());
+ return make_range(arg_begin(), arg_end());
}
bool arg_empty() const { return arg_end() == arg_begin(); }
unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
+ /// Given a value use iterator, returns the data operand that corresponds to
+ /// it.
+ /// Iterator must actually correspond to a data operand.
+ unsigned getDataOperandNo(Value::const_user_iterator UI) const {
+ return getDataOperandNo(&UI.getUse());
+ }
+
+ /// Given a use for a data operand, get the data operand number that
+ /// corresponds to it.
+ unsigned getDataOperandNo(const Use *U) const {
+ assert(getInstruction() && "Not a call or invoke instruction!");
+ assert(isDataOperand(U) && "Data operand # out of range!");
+ return U - data_operands_begin();
+ }
+
+ /// Type of iterator to use when looping over data operands at this call site
+ /// (see below).
+ typedef IterTy data_operand_iterator;
+
+ /// data_operands_begin/data_operands_end - Return iterators iterating over
+ /// the call / invoke argument list and bundle operands. For invokes, this is
+ /// the set of instruction operands except the invoke target and the two
+ /// successor blocks; and for calls this is the set of instruction operands
+ /// except the call target.
+
+ IterTy data_operands_begin() const {
+ assert(getInstruction() && "Not a call or invoke instruction!");
+ return (*this)->op_begin();
+ }
+ IterTy data_operands_end() const {
+ assert(getInstruction() && "Not a call or invoke instruction!");
+ return (*this)->op_end() - (isCall() ? 1 : 3);
+ }
+ iterator_range<IterTy> data_ops() const {
+ return make_range(data_operands_begin(), data_operands_end());
+ }
+ bool data_operands_empty() const {
+ return data_operands_end() == data_operands_begin();
+ }
+ unsigned data_operands_size() const {
+ return std::distance(data_operands_begin(), data_operands_end());
+ }
+
/// getType - Return the type of the instruction that generated this call site
///
Type *getType() const { return (*this)->getType(); }
@@ -197,11 +269,11 @@ public:
CALLSITE_DELEGATE_GETTER(getNumArgOperands());
}
- ValTy *getArgOperand(unsigned i) const {
+ ValTy *getArgOperand(unsigned i) const {
CALLSITE_DELEGATE_GETTER(getArgOperand(i));
}
- bool isInlineAsm() const {
+ bool isInlineAsm() const {
if (isCall())
return cast<CallInst>(getInstruction())->isInlineAsm();
return false;
@@ -243,6 +315,17 @@ public:
CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));
}
+ /// \brief Return true if the data operand at index \p i directly or
+ /// indirectly has the attribute \p A.
+ ///
+ /// Normal call or invoke arguments have per operand attributes, as specified
+ /// in the attribute set attached to this instruction, while operand bundle
+ /// operands may have some attributes implied by the type of its containing
+ /// operand bundle.
+ bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const {
+ CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, A));
+ }
+
/// @brief Extract the alignment for a call or parameter (0=unknown).
uint16_t getParamAlignment(uint16_t i) const {
CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
@@ -253,13 +336,20 @@ public:
uint64_t getDereferenceableBytes(uint16_t i) const {
CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i));
}
-
+
/// @brief Extract the number of dereferenceable_or_null bytes for a call or
/// parameter (0=unknown).
uint64_t getDereferenceableOrNullBytes(uint16_t i) const {
CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
}
-
+
+ /// @brief Determine if the parameter or return value is marked with NoAlias
+ /// attribute.
+ /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+ bool doesNotAlias(unsigned n) const {
+ CALLSITE_DELEGATE_GETTER(doesNotAlias(n));
+ }
+
/// \brief Return true if the call should not be treated as a call to a
/// builtin.
bool isNoBuiltin() const {
@@ -315,12 +405,62 @@ public:
CALLSITE_DELEGATE_SETTER(setDoesNotThrow());
}
+ unsigned getNumOperandBundles() const {
+ CALLSITE_DELEGATE_GETTER(getNumOperandBundles());
+ }
+
+ bool hasOperandBundles() const {
+ CALLSITE_DELEGATE_GETTER(hasOperandBundles());
+ }
+
+ unsigned getBundleOperandsStartIndex() const {
+ CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex());
+ }
+
+ unsigned getBundleOperandsEndIndex() const {
+ CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex());
+ }
+
+ unsigned getNumTotalBundleOperands() const {
+ CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands());
+ }
+
+ OperandBundleUse getOperandBundleAt(unsigned Index) const {
+ CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index));
+ }
+
+ Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
+ CALLSITE_DELEGATE_GETTER(getOperandBundle(Name));
+ }
+
+ Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
+ CALLSITE_DELEGATE_GETTER(getOperandBundle(ID));
+ }
+
+ IterTy arg_begin() const {
+ CALLSITE_DELEGATE_GETTER(arg_begin());
+ }
+
+ IterTy arg_end() const {
+ CALLSITE_DELEGATE_GETTER(arg_end());
+ }
+
#undef CALLSITE_DELEGATE_GETTER
#undef CALLSITE_DELEGATE_SETTER
- /// @brief Determine whether this argument is not captured.
- bool doesNotCapture(unsigned ArgNo) const {
- return paramHasAttr(ArgNo + 1, Attribute::NoCapture);
+ void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
+ const Instruction *II = getInstruction();
+ // Since this is actually a getter that "looks like" a setter, don't use the
+ // above macros to avoid confusion.
+ if (isCall())
+ cast<CallInst>(II)->getOperandBundlesAsDefs(Defs);
+ else
+ cast<InvokeInst>(II)->getOperandBundlesAsDefs(Defs);
+ }
+
+ /// @brief Determine whether this data operand is not captured.
+ bool doesNotCapture(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
}
/// @brief Determine whether this argument is passed by value.
@@ -345,13 +485,13 @@ public:
return paramHasAttr(arg_size(), Attribute::InAlloca);
}
- bool doesNotAccessMemory(unsigned ArgNo) const {
- return paramHasAttr(ArgNo + 1, Attribute::ReadNone);
+ bool doesNotAccessMemory(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
}
- bool onlyReadsMemory(unsigned ArgNo) const {
- return paramHasAttr(ArgNo + 1, Attribute::ReadOnly) ||
- paramHasAttr(ArgNo + 1, Attribute::ReadNone);
+ bool onlyReadsMemory(unsigned OpNo) const {
+ return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
+ dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
}
/// @brief Return true if the return value is known to be not null.
@@ -378,13 +518,6 @@ public:
}
private:
- unsigned getArgumentEndOffset() const {
- if (isCall())
- return 1; // Skip Callee
- else
- return 3; // Skip BB, BB, Callee
- }
-
IterTy getCallee() const {
if (isCall()) // Skip Callee
return cast<CallInst>(getInstruction())->op_end() - 1;
@@ -393,7 +526,7 @@ private:
}
};
-class CallSite : public CallSiteBase<Function, BasicBlock, Value, User,
+class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
Instruction, CallInst, InvokeInst,
User::op_iterator> {
public:
diff --git a/contrib/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm/include/llvm/IR/CallingConv.h
index 9872e6e..bc05092 100644
--- a/contrib/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm/include/llvm/IR/CallingConv.h
@@ -69,6 +69,12 @@ namespace CallingConv {
// (almost) all registers.
PreserveAll = 15,
+ // Swift - Calling convention for Swift.
+ Swift = 16,
+
+ // CXX_FAST_TLS - Calling convention for access functions.
+ CXX_FAST_TLS = 17,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
@@ -144,7 +150,26 @@ namespace CallingConv {
/// \brief MSVC calling convention that passes vectors and vector aggregates
/// in SSE registers.
- X86_VectorCall = 80
+ X86_VectorCall = 80,
+
+ /// \brief Calling convention used by HipHop Virtual Machine (HHVM) to
+ /// perform calls to and from translation cache, and for calling PHP
+ /// functions.
+ /// HHVM calling convention supports tail/sibling call elimination.
+ HHVM = 81,
+
+ /// \brief HHVM calling convention for invoking C/C++ helpers.
+ HHVM_C = 82,
+
+ /// X86_INTR - x86 hardware interrupt context. Callee may take one or two
+ /// parameters, where the 1st represents a pointer to hardware context frame
+ /// and the 2nd represents hardware error code, the presence of the later
+ /// depends on the interrupt vector taken. Valid for both 32- and 64-bit
+ /// subtargets.
+ X86_INTR = 83,
+
+ /// The highest possible calling convention ID. Must be some 2^k - 1.
+ MaxID = 1023
};
} // End CallingConv namespace
diff --git a/contrib/llvm/include/llvm/IR/Comdat.h b/contrib/llvm/include/llvm/IR/Comdat.h
index 4d4c15f..fb79e13 100644
--- a/contrib/llvm/include/llvm/IR/Comdat.h
+++ b/contrib/llvm/include/llvm/IR/Comdat.h
@@ -42,7 +42,7 @@ public:
SelectionKind getSelectionKind() const { return SK; }
void setSelectionKind(SelectionKind Val) { SK = Val; }
StringRef getName() const;
- void print(raw_ostream &OS) const;
+ void print(raw_ostream &OS, bool IsForDebug = false) const;
void dump() const;
private:
diff --git a/contrib/llvm/include/llvm/IR/Constant.h b/contrib/llvm/include/llvm/IR/Constant.h
index 019b434..bb88905 100644
--- a/contrib/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm/include/llvm/IR/Constant.h
@@ -24,18 +24,18 @@ namespace llvm {
/// This is an important base class in LLVM. It provides the common facilities
/// of all constant values in an LLVM program. A constant is a value that is
/// immutable at runtime. Functions are constants because their address is
-/// immutable. Same with global variables.
-///
+/// immutable. Same with global variables.
+///
/// All constants share the capabilities provided in this class. All constants
/// can have a null value. They can have an operand list. Constants can be
/// simple (integer and floating point values), complex (arrays and structures),
-/// or expression based (computations yielding a constant value composed of
+/// or expression based (computations yielding a constant value composed of
/// only certain operators and other constant values).
-///
-/// Note that Constants are immutable (once created they never change)
-/// and are fully shared by structural equivalence. This means that two
-/// structurally equivalent constants will always have the same address.
-/// Constants are created on demand as needed and never deleted: thus clients
+///
+/// Note that Constants are immutable (once created they never change)
+/// and are fully shared by structural equivalence. This means that two
+/// structurally equivalent constants will always have the same address.
+/// Constants are created on demand as needed and never deleted: thus clients
/// don't have to worry about the lifetime of the objects.
/// @brief LLVM Constant Representation
class Constant : public User {
@@ -59,7 +59,7 @@ public:
/// getAllOnesValue.
bool isAllOnesValue() const;
- /// isNegativeZeroValue - Return true if the value is what would be returned
+ /// isNegativeZeroValue - Return true if the value is what would be returned
/// by getZeroValueForNegation.
bool isNegativeZeroValue() const;
@@ -85,29 +85,14 @@ public:
/// isConstantUsed - Return true if the constant has users other than constant
/// exprs and other dangling things.
bool isConstantUsed() const;
-
- enum PossibleRelocationsTy {
- NoRelocation = 0,
- LocalRelocation = 1,
- GlobalRelocations = 2
- };
-
- /// getRelocationInfo - This method classifies the entry according to
- /// whether or not it may generate a relocation entry. This must be
- /// conservative, so if it might codegen to a relocatable entry, it should say
- /// so. The return values are:
- ///
- /// NoRelocation: This constant pool entry is guaranteed to never have a
- /// relocation applied to it (because it holds a simple constant like
- /// '4').
- /// LocalRelocation: This entry has relocations, but the entries are
- /// guaranteed to be resolvable by the static linker, so the dynamic
- /// linker will never see them.
- /// GlobalRelocations: This entry may have arbitrary relocations.
+
+ /// This method classifies the entry according to whether or not it may
+ /// generate a relocation entry. This must be conservative, so if it might
+ /// codegen to a relocatable entry, it should say so.
///
- /// FIXME: This really should not be in VMCore.
- PossibleRelocationsTy getRelocationInfo() const;
-
+ /// FIXME: This really should not be in IR.
+ bool needsRelocation() const;
+
/// getAggregateElement - For aggregates (struct/array/vector) return the
/// constant that corresponds to the specified element if possible, or null if
/// not. This can return null if the element index is a ConstantExpr, or if
@@ -159,8 +144,8 @@ public:
/// getIntegerValue - Return the value for an integer or pointer constant,
/// or a vector thereof, with the given scalar value.
- static Constant *getIntegerValue(Type* Ty, const APInt &V);
-
+ static Constant *getIntegerValue(Type *Ty, const APInt &V);
+
/// removeDeadConstantUsers - If there are any dead constant users dangling
/// off of this constant, remove them. This method is useful for clients
/// that want to check to see if a global is unused, but don't want to deal
diff --git a/contrib/llvm/include/llvm/IR/ConstantRange.h b/contrib/llvm/include/llvm/IR/ConstantRange.h
index 9ded3ca..fb596a3 100644
--- a/contrib/llvm/include/llvm/IR/ConstantRange.h
+++ b/contrib/llvm/include/llvm/IR/ConstantRange.h
@@ -82,6 +82,17 @@ public:
static ConstantRange makeSatisfyingICmpRegion(CmpInst::Predicate Pred,
const ConstantRange &Other);
+ /// Return the largest range containing all X such that "X BinOpC C" does not
+ /// wrap (overflow).
+ ///
+ /// Example:
+ /// typedef OverflowingBinaryOperator OBO;
+ /// makeNoWrapRegion(Add, i8 1, OBO::NoSignedWrap) == [-128, 127)
+ /// makeNoWrapRegion(Add, i8 1, OBO::NoUnsignedWrap) == [0, -1)
+ /// makeNoWrapRegion(Add, i8 0, OBO::NoUnsignedWrap) == Full Set
+ static ConstantRange makeNoWrapRegion(Instruction::BinaryOps BinOp,
+ const APInt &C, unsigned NoWrapKind);
+
/// Return the lower value for this range.
///
const APInt &getLower() const { return Lower; }
@@ -207,7 +218,7 @@ public:
/// Make this range have the bit width given by \p BitWidth. The
/// value is zero extended, truncated, or left alone to make it that width.
ConstantRange zextOrTrunc(uint32_t BitWidth) const;
-
+
/// Make this range have the bit width given by \p BitWidth. The
/// value is sign extended, truncated, or left alone to make it that width.
ConstantRange sextOrTrunc(uint32_t BitWidth) const;
@@ -258,7 +269,7 @@ public:
/// Return a new range that is the logical not of the current set.
///
ConstantRange inverse() const;
-
+
/// Print out the bounds to a stream.
///
void print(raw_ostream &OS) const;
diff --git a/contrib/llvm/include/llvm/IR/Constants.h b/contrib/llvm/include/llvm/IR/Constants.h
index 0c7a84f..a5a20c9 100644
--- a/contrib/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm/include/llvm/IR/Constants.h
@@ -590,7 +590,7 @@ public:
/// formed with a vector or array of the specified element type.
/// ConstantDataArray only works with normal float and int types that are
/// stored densely in memory, not with things like i42 or x86_f80.
- static bool isElementTypeCompatible(const Type *Ty);
+ static bool isElementTypeCompatible(Type *Ty);
/// getElementAsInteger - If this is a sequential container of integers (of
/// any size), return the specified element in the low bits of a uint64_t.
@@ -795,7 +795,32 @@ public:
}
};
+//===----------------------------------------------------------------------===//
+/// ConstantTokenNone - a constant token which is empty
+///
+class ConstantTokenNone : public Constant {
+ void *operator new(size_t, unsigned) = delete;
+ ConstantTokenNone(const ConstantTokenNone &) = delete;
+
+ friend class Constant;
+ void destroyConstantImpl();
+ Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
+protected:
+ explicit ConstantTokenNone(LLVMContext &Context)
+ : Constant(Type::getTokenTy(Context), ConstantTokenNoneVal, nullptr, 0) {}
+ // allocate space for exactly zero operands
+ void *operator new(size_t s) { return User::operator new(s, 0); }
+
+public:
+ /// Return the ConstantTokenNone.
+ static ConstantTokenNone *get(LLVMContext &Context);
+ /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+ static bool classof(const Value *V) {
+ return V->getValueID() == ConstantTokenNoneVal;
+ }
+};
/// BlockAddress - The address of a basic block.
///
@@ -1175,7 +1200,8 @@ public:
/// gets constant-folded, the type changes, or the expression is otherwise
/// canonicalized. This parameter should almost always be \c false.
Constant *getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
- bool OnlyIfReduced = false) const;
+ bool OnlyIfReduced = false,
+ Type *SrcTy = nullptr) const;
/// getAsInstruction - Returns an Instruction which implements the same
/// operation as this ConstantExpr. The instruction is not linked to any basic
diff --git a/contrib/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm/include/llvm/IR/DIBuilder.h
index aa43c02..aeec395 100644
--- a/contrib/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm/include/llvm/IR/DIBuilder.h
@@ -158,7 +158,9 @@ namespace llvm {
/// Create debugging information entry for a c++
/// style reference or rvalue reference type.
- DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy);
+ DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy,
+ uint64_t SizeInBits = 0,
+ uint64_t AlignInBits = 0);
/// Create debugging information entry for a typedef.
/// \param Ty Original type.
@@ -375,15 +377,20 @@ namespace llvm {
DIType *UnderlyingType, StringRef UniqueIdentifier = "");
/// Create subroutine type.
- /// \param File File in which this subroutine is defined.
/// \param ParameterTypes An array of subroutine parameter types. This
/// includes return type at 0th index.
/// \param Flags E.g.: LValueReference.
/// These flags are used to emit dwarf attributes.
- DISubroutineType *createSubroutineType(DIFile *File,
- DITypeRefArray ParameterTypes,
+ DISubroutineType *createSubroutineType(DITypeRefArray ParameterTypes,
unsigned Flags = 0);
+ /// Create an external type reference.
+ /// \param Tag Dwarf TAG.
+ /// \param File File in which the type is defined.
+ /// \param UniqueIdentifier A unique identifier for the type.
+ DICompositeType *createExternalTypeRef(unsigned Tag, DIFile *File,
+ StringRef UniqueIdentifier);
+
/// Create a new DIType* with "artificial" flag set.
DIType *createArtificialType(DIType *Ty);
@@ -450,26 +457,36 @@ namespace llvm {
unsigned LineNo, DIType *Ty, bool isLocalToUnit, llvm::Constant *Val,
MDNode *Decl = nullptr);
- /// Create a new descriptor for the specified
- /// local variable.
- /// \param Tag Dwarf TAG. Usually DW_TAG_auto_variable or
- /// DW_TAG_arg_variable.
- /// \param Scope Variable scope.
- /// \param Name Variable name.
- /// \param File File where this variable is defined.
- /// \param LineNo Line number.
- /// \param Ty Variable Type
- /// \param AlwaysPreserve Boolean. Set to true if debug info for this
- /// variable should be preserved in optimized build.
- /// \param Flags Flags, e.g. artificial variable.
- /// \param ArgNo If this variable is an argument then this argument's
- /// number. 1 indicates 1st argument.
- DILocalVariable *createLocalVariable(unsigned Tag, DIScope *Scope,
- StringRef Name, DIFile *File,
- unsigned LineNo, DIType *Ty,
+ /// Create a new descriptor for an auto variable. This is a local variable
+ /// that is not a subprogram parameter.
+ ///
+ /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually
+ /// leads to a \a DISubprogram.
+ ///
+ /// If \c AlwaysPreserve, this variable will be referenced from its
+ /// containing subprogram, and will survive some optimizations.
+ DILocalVariable *createAutoVariable(DIScope *Scope, StringRef Name,
+ DIFile *File, unsigned LineNo,
+ DIType *Ty,
bool AlwaysPreserve = false,
- unsigned Flags = 0,
- unsigned ArgNo = 0);
+ unsigned Flags = 0);
+
+ /// Create a new descriptor for a parameter variable.
+ ///
+ /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually
+ /// leads to a \a DISubprogram.
+ ///
+ /// \c ArgNo is the index (starting from \c 1) of this variable in the
+ /// subprogram parameters. \c ArgNo should not conflict with other
+ /// parameters of the same subprogram.
+ ///
+ /// If \c AlwaysPreserve, this variable will be referenced from its
+ /// containing subprogram, and will survive some optimizations.
+ DILocalVariable *createParameterVariable(DIScope *Scope, StringRef Name,
+ unsigned ArgNo, DIFile *File,
+ unsigned LineNo, DIType *Ty,
+ bool AlwaysPreserve = false,
+ unsigned Flags = 0);
/// Create a new descriptor for the specified
/// variable which has a complex address expression for its address.
@@ -499,15 +516,15 @@ namespace llvm {
/// \param Flags e.g. is this function prototyped or not.
/// These flags are used to emit dwarf attributes.
/// \param isOptimized True if optimization is ON.
- /// \param Fn llvm::Function pointer.
- /// \param TParam Function template parameters.
- DISubprogram *
- createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned LineNo, DISubroutineType *Ty,
- bool isLocalToUnit, bool isDefinition, unsigned ScopeLine,
- unsigned Flags = 0, bool isOptimized = false,
- Function *Fn = nullptr, MDNode *TParam = nullptr,
- MDNode *Decl = nullptr);
+ /// \param TParams Function template parameters.
+ DISubprogram *createFunction(DIScope *Scope, StringRef Name,
+ StringRef LinkageName, DIFile *File,
+ unsigned LineNo, DISubroutineType *Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine, unsigned Flags = 0,
+ bool isOptimized = false,
+ DITemplateParameterArray TParams = nullptr,
+ DISubprogram *Decl = nullptr);
/// Identical to createFunction,
/// except that the resulting DbgNode is meant to be RAUWed.
@@ -515,18 +532,19 @@ namespace llvm {
DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
bool isDefinition, unsigned ScopeLine, unsigned Flags = 0,
- bool isOptimized = false, Function *Fn = nullptr,
- MDNode *TParam = nullptr, MDNode *Decl = nullptr);
+ bool isOptimized = false, DITemplateParameterArray TParams = nullptr,
+ DISubprogram *Decl = nullptr);
/// FIXME: this is added for dragonegg. Once we update dragonegg
/// to call resolve function, this will be removed.
- DISubprogram *
- createFunction(DIScopeRef Scope, StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned LineNo, DISubroutineType *Ty,
- bool isLocalToUnit, bool isDefinition, unsigned ScopeLine,
- unsigned Flags = 0, bool isOptimized = false,
- Function *Fn = nullptr, MDNode *TParam = nullptr,
- MDNode *Decl = nullptr);
+ DISubprogram *createFunction(DIScopeRef Scope, StringRef Name,
+ StringRef LinkageName, DIFile *File,
+ unsigned LineNo, DISubroutineType *Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine, unsigned Flags = 0,
+ bool isOptimized = false,
+ DITemplateParameterArray TParams = nullptr,
+ DISubprogram *Decl = nullptr);
/// Create a new descriptor for the specified C++ method.
/// See comments in \a DISubprogram* for descriptions of these fields.
@@ -545,15 +563,14 @@ namespace llvm {
/// \param Flags e.g. is this function prototyped or not.
/// This flags are used to emit dwarf attributes.
/// \param isOptimized True if optimization is ON.
- /// \param Fn llvm::Function pointer.
- /// \param TParam Function template parameters.
+ /// \param TParams Function template parameters.
DISubprogram *
createMethod(DIScope *Scope, StringRef Name, StringRef LinkageName,
DIFile *File, unsigned LineNo, DISubroutineType *Ty,
bool isLocalToUnit, bool isDefinition, unsigned Virtuality = 0,
unsigned VTableIndex = 0, DIType *VTableHolder = nullptr,
unsigned Flags = 0, bool isOptimized = false,
- Function *Fn = nullptr, MDNode *TParam = nullptr);
+ DITemplateParameterArray TParams = nullptr);
/// This creates new descriptor for a namespace with the specified
/// parent scope.
@@ -685,7 +702,7 @@ namespace llvm {
/// has a self-reference -- \a DIBuilder needs to track the array to
/// resolve cycles.
void replaceArrays(DICompositeType *&T, DINodeArray Elements,
- DINodeArray TParems = DINodeArray());
+ DINodeArray TParams = DINodeArray());
/// Replace a temporary node.
///
diff --git a/contrib/llvm/include/llvm/IR/DataLayout.h b/contrib/llvm/include/llvm/IR/DataLayout.h
index 892d6c9..19a3a66 100644
--- a/contrib/llvm/include/llvm/IR/DataLayout.h
+++ b/contrib/llvm/include/llvm/IR/DataLayout.h
@@ -475,7 +475,8 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) {
class StructLayout {
uint64_t StructSize;
unsigned StructAlignment;
- unsigned NumElements;
+ bool IsPadded : 1;
+ unsigned NumElements : 31;
uint64_t MemberOffsets[1]; // variable sized array!
public:
uint64_t getSizeInBytes() const { return StructSize; }
@@ -484,6 +485,10 @@ public:
unsigned getAlignment() const { return StructAlignment; }
+ /// Returns whether the struct has padding or not between its fields.
+ /// NB: Padding in nested element is not taken into account.
+ bool hasPadding() const { return IsPadded; }
+
/// \brief Given a valid byte offset into the structure, returns the structure
/// index that contains it.
unsigned getElementContainingOffset(uint64_t Offset) const;
diff --git a/contrib/llvm/include/llvm/IR/DebugInfo.h b/contrib/llvm/include/llvm/IR/DebugInfo.h
index 5429648..4caceac 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfo.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfo.h
@@ -44,9 +44,6 @@ DISubprogram *getDISubprogram(const MDNode *Scope);
/// \returns a valid subprogram, if found. Otherwise, return \c nullptr.
DISubprogram *getDISubprogram(const Function *F);
-/// \brief Find underlying composite type.
-DICompositeTypeBase *getDICompositeType(DIType *T);
-
/// \brief Generate map by visiting all retained types.
DITypeIdentifierMap generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes);
@@ -108,23 +105,23 @@ public:
typedef SmallVectorImpl<DIScope *>::const_iterator scope_iterator;
iterator_range<compile_unit_iterator> compile_units() const {
- return iterator_range<compile_unit_iterator>(CUs.begin(), CUs.end());
+ return make_range(CUs.begin(), CUs.end());
}
iterator_range<subprogram_iterator> subprograms() const {
- return iterator_range<subprogram_iterator>(SPs.begin(), SPs.end());
+ return make_range(SPs.begin(), SPs.end());
}
iterator_range<global_variable_iterator> global_variables() const {
- return iterator_range<global_variable_iterator>(GVs.begin(), GVs.end());
+ return make_range(GVs.begin(), GVs.end());
}
iterator_range<type_iterator> types() const {
- return iterator_range<type_iterator>(TYs.begin(), TYs.end());
+ return make_range(TYs.begin(), TYs.end());
}
iterator_range<scope_iterator> scopes() const {
- return iterator_range<scope_iterator>(Scopes.begin(), Scopes.end());
+ return make_range(Scopes.begin(), Scopes.end());
}
unsigned compile_unit_count() const { return CUs.size(); }
@@ -146,8 +143,6 @@ private:
bool TypeMapInitialized;
};
-DenseMap<const Function *, DISubprogram *> makeSubprogramMap(const Module &M);
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
index d5de868..9756c12 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
+++ b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
@@ -32,5 +32,6 @@ HANDLE_DI_FLAG((1 << 11), Vector)
HANDLE_DI_FLAG((1 << 12), StaticMember)
HANDLE_DI_FLAG((1 << 13), LValueReference)
HANDLE_DI_FLAG((1 << 14), RValueReference)
+HANDLE_DI_FLAG((1 << 15), ExternalTypeRef)
#undef HANDLE_DI_FLAG
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
index 9c5a957..456313a 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -20,15 +20,7 @@
// Helper macros for defining get() overrides.
#define DEFINE_MDNODE_GET_UNPACK_IMPL(...) __VA_ARGS__
#define DEFINE_MDNODE_GET_UNPACK(ARGS) DEFINE_MDNODE_GET_UNPACK_IMPL ARGS
-#define DEFINE_MDNODE_GET(CLASS, FORMAL, ARGS) \
- static CLASS *get(LLVMContext &Context, DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \
- return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued); \
- } \
- static CLASS *getIfExists(LLVMContext &Context, \
- DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \
- return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued, \
- /* ShouldCreate */ false); \
- } \
+#define DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS) \
static CLASS *getDistinct(LLVMContext &Context, \
DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \
return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Distinct); \
@@ -38,6 +30,16 @@
return Temp##CLASS( \
getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Temporary)); \
}
+#define DEFINE_MDNODE_GET(CLASS, FORMAL, ARGS) \
+ static CLASS *get(LLVMContext &Context, DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \
+ return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued); \
+ } \
+ static CLASS *getIfExists(LLVMContext &Context, \
+ DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \
+ return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued, \
+ /* ShouldCreate */ false); \
+ } \
+ DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS)
namespace llvm {
@@ -67,8 +69,8 @@ public:
operator Metadata *() const { return const_cast<Metadata *>(MD); }
- bool operator==(const TypedDINodeRef<T> &X) const { return MD == X.MD; };
- bool operator!=(const TypedDINodeRef<T> &X) const { return MD != X.MD; };
+ bool operator==(const TypedDINodeRef<T> &X) const { return MD == X.MD; }
+ bool operator!=(const TypedDINodeRef<T> &X) const { return MD != X.MD; }
/// \brief Create a reference.
///
@@ -97,6 +99,7 @@ class DITypeRefArray {
const MDTuple *N = nullptr;
public:
+ DITypeRefArray() = default;
DITypeRefArray(const MDTuple *N) : N(N) {}
explicit operator bool() const { return get(); }
@@ -574,6 +577,7 @@ public:
bool isStaticMember() const { return getFlags() & FlagStaticMember; }
bool isLValueReference() const { return getFlags() & FlagLValueReference; }
bool isRValueReference() const { return getFlags() & FlagRValueReference; }
+ bool isExternalTypeRef() const { return getFlags() & FlagExternalTypeRef; }
DITypeRef getRef() const { return DITypeRef::get(this); }
@@ -646,45 +650,21 @@ public:
}
};
-/// \brief Base class for DIDerivedType and DICompositeType.
-///
-/// TODO: Delete; they're not really related.
-class DIDerivedTypeBase : public DIType {
-protected:
- DIDerivedTypeBase(LLVMContext &C, unsigned ID, StorageType Storage,
- unsigned Tag, unsigned Line, uint64_t SizeInBits,
- uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
- ArrayRef<Metadata *> Ops)
- : DIType(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
- Flags, Ops) {}
- ~DIDerivedTypeBase() = default;
-
-public:
- DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
- Metadata *getRawBaseType() const { return getOperand(3); }
-
- static bool classof(const Metadata *MD) {
- return MD->getMetadataID() == DIDerivedTypeKind ||
- MD->getMetadataID() == DICompositeTypeKind ||
- MD->getMetadataID() == DISubroutineTypeKind;
- }
-};
-
/// \brief Derived types.
///
/// This includes qualified types, pointers, references, friends, typedefs, and
/// class members.
///
/// TODO: Split out members (inheritance, fields, methods, etc.).
-class DIDerivedType : public DIDerivedTypeBase {
+class DIDerivedType : public DIType {
friend class LLVMContextImpl;
friend class MDNode;
DIDerivedType(LLVMContext &C, StorageType Storage, unsigned Tag,
unsigned Line, uint64_t SizeInBits, uint64_t AlignInBits,
uint64_t OffsetInBits, unsigned Flags, ArrayRef<Metadata *> Ops)
- : DIDerivedTypeBase(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits,
- AlignInBits, OffsetInBits, Flags, Ops) {}
+ : DIType(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits,
+ AlignInBits, OffsetInBits, Flags, Ops) {}
~DIDerivedType() = default;
static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
@@ -732,6 +712,10 @@ public:
TempDIDerivedType clone() const { return cloneImpl(); }
+ //// Get the base type this is derived from.
+ DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+ Metadata *getRawBaseType() const { return getOperand(3); }
+
/// \brief Get extra data associated with this derived type.
///
/// Class type for pointer-to-members, objective-c property node for ivars,
@@ -764,88 +748,23 @@ public:
}
};
-/// \brief Base class for DICompositeType and DISubroutineType.
-///
-/// TODO: Delete; they're not really related.
-class DICompositeTypeBase : public DIDerivedTypeBase {
- unsigned RuntimeLang;
-
-protected:
- DICompositeTypeBase(LLVMContext &C, unsigned ID, StorageType Storage,
- unsigned Tag, unsigned Line, unsigned RuntimeLang,
- uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags,
- ArrayRef<Metadata *> Ops)
- : DIDerivedTypeBase(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits,
- OffsetInBits, Flags, Ops),
- RuntimeLang(RuntimeLang) {}
- ~DICompositeTypeBase() = default;
-
-public:
- /// \brief Get the elements of the composite type.
- ///
- /// \note Calling this is only valid for \a DICompositeType. This assertion
- /// can be removed once \a DISubroutineType has been separated from
- /// "composite types".
- DINodeArray getElements() const {
- assert(!isa<DISubroutineType>(this) && "no elements for DISubroutineType");
- return cast_or_null<MDTuple>(getRawElements());
- }
- DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); }
- DITemplateParameterArray getTemplateParams() const {
- return cast_or_null<MDTuple>(getRawTemplateParams());
- }
- StringRef getIdentifier() const { return getStringOperand(7); }
- unsigned getRuntimeLang() const { return RuntimeLang; }
-
- Metadata *getRawElements() const { return getOperand(4); }
- Metadata *getRawVTableHolder() const { return getOperand(5); }
- Metadata *getRawTemplateParams() const { return getOperand(6); }
- MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
-
- /// \brief Replace operands.
- ///
- /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
- /// this will be RAUW'ed and deleted. Use a \a TrackingMDRef to keep track
- /// of its movement if necessary.
- /// @{
- void replaceElements(DINodeArray Elements) {
-#ifndef NDEBUG
- for (DINode *Op : getElements())
- assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
- "Lost a member during member list replacement");
-#endif
- replaceOperandWith(4, Elements.get());
- }
- void replaceVTableHolder(DITypeRef VTableHolder) {
- replaceOperandWith(5, VTableHolder);
- }
- void replaceTemplateParams(DITemplateParameterArray TemplateParams) {
- replaceOperandWith(6, TemplateParams.get());
- }
- /// @}
-
- static bool classof(const Metadata *MD) {
- return MD->getMetadataID() == DICompositeTypeKind ||
- MD->getMetadataID() == DISubroutineTypeKind;
- }
-};
-
/// \brief Composite types.
///
/// TODO: Detach from DerivedTypeBase (split out MDEnumType?).
/// TODO: Create a custom, unrelated node for DW_TAG_array_type.
-class DICompositeType : public DICompositeTypeBase {
+class DICompositeType : public DIType {
friend class LLVMContextImpl;
friend class MDNode;
+ unsigned RuntimeLang;
+
DICompositeType(LLVMContext &C, StorageType Storage, unsigned Tag,
unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits,
uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
ArrayRef<Metadata *> Ops)
- : DICompositeTypeBase(C, DICompositeTypeKind, Storage, Tag, Line,
- RuntimeLang, SizeInBits, AlignInBits, OffsetInBits,
- Flags, Ops) {}
+ : DIType(C, DICompositeTypeKind, Storage, Tag, Line, SizeInBits,
+ AlignInBits, OffsetInBits, Flags, Ops),
+ RuntimeLang(RuntimeLang) {}
~DICompositeType() = default;
static DICompositeType *
@@ -903,6 +822,45 @@ public:
TempDICompositeType clone() const { return cloneImpl(); }
+ DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+ DINodeArray getElements() const {
+ return cast_or_null<MDTuple>(getRawElements());
+ }
+ DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); }
+ DITemplateParameterArray getTemplateParams() const {
+ return cast_or_null<MDTuple>(getRawTemplateParams());
+ }
+ StringRef getIdentifier() const { return getStringOperand(7); }
+ unsigned getRuntimeLang() const { return RuntimeLang; }
+
+ Metadata *getRawBaseType() const { return getOperand(3); }
+ Metadata *getRawElements() const { return getOperand(4); }
+ Metadata *getRawVTableHolder() const { return getOperand(5); }
+ Metadata *getRawTemplateParams() const { return getOperand(6); }
+ MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
+
+ /// \brief Replace operands.
+ ///
+ /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
+ /// this will be RAUW'ed and deleted. Use a \a TrackingMDRef to keep track
+ /// of its movement if necessary.
+ /// @{
+ void replaceElements(DINodeArray Elements) {
+#ifndef NDEBUG
+ for (DINode *Op : getElements())
+ assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+ "Lost a member during member list replacement");
+#endif
+ replaceOperandWith(4, Elements.get());
+ }
+ void replaceVTableHolder(DITypeRef VTableHolder) {
+ replaceOperandWith(5, VTableHolder);
+ }
+ void replaceTemplateParams(DITemplateParameterArray TemplateParams) {
+ replaceOperandWith(6, TemplateParams.get());
+ }
+ /// @}
+
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DICompositeTypeKind;
}
@@ -918,17 +876,15 @@ template <class T> TypedDINodeRef<T> TypedDINodeRef<T>::get(const T *N) {
/// \brief Type array for a subprogram.
///
-/// TODO: Detach from CompositeType, and fold the array of types in directly
-/// as operands.
-class DISubroutineType : public DICompositeTypeBase {
+/// TODO: Fold the array of types in directly as operands.
+class DISubroutineType : public DIType {
friend class LLVMContextImpl;
friend class MDNode;
DISubroutineType(LLVMContext &C, StorageType Storage, unsigned Flags,
ArrayRef<Metadata *> Ops)
- : DICompositeTypeBase(C, DISubroutineTypeKind, Storage,
- dwarf::DW_TAG_subroutine_type, 0, 0, 0, 0, 0, Flags,
- Ops) {}
+ : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type,
+ 0, 0, 0, 0, Flags, Ops) {}
~DISubroutineType() = default;
static DISubroutineType *getImpl(LLVMContext &Context, unsigned Flags,
@@ -957,7 +913,7 @@ public:
DITypeRefArray getTypeArray() const {
return cast_or_null<MDTuple>(getRawTypeArray());
}
- Metadata *getRawTypeArray() const { return getRawElements(); }
+ Metadata *getRawTypeArray() const { return getOperand(3); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DISubroutineTypeKind;
@@ -981,7 +937,9 @@ class DICompileUnit : public DIScope {
: DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind),
- DWOId(DWOId) {}
+ DWOId(DWOId) {
+ assert(Storage != Uniqued);
+ }
~DICompileUnit() = default;
static DICompileUnit *
@@ -991,15 +949,16 @@ class DICompileUnit : public DIScope {
unsigned EmissionKind, DICompositeTypeArray EnumTypes,
DITypeArray RetainedTypes, DISubprogramArray Subprograms,
DIGlobalVariableArray GlobalVariables,
- DIImportedEntityArray ImportedEntities, uint64_t DWOId,
- StorageType Storage, bool ShouldCreate = true) {
+ DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
+ uint64_t DWOId, StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, SourceLanguage, File,
getCanonicalMDString(Context, Producer), IsOptimized,
getCanonicalMDString(Context, Flags), RuntimeVersion,
getCanonicalMDString(Context, SplitDebugFilename),
EmissionKind, EnumTypes.get(), RetainedTypes.get(),
Subprograms.get(), GlobalVariables.get(),
- ImportedEntities.get(), DWOId, Storage, ShouldCreate);
+ ImportedEntities.get(), Macros.get(), DWOId, Storage,
+ ShouldCreate);
}
static DICompileUnit *
getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -1007,40 +966,44 @@ class DICompileUnit : public DIScope {
unsigned RuntimeVersion, MDString *SplitDebugFilename,
unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
Metadata *Subprograms, Metadata *GlobalVariables,
- Metadata *ImportedEntities, uint64_t DWOId, StorageType Storage,
- bool ShouldCreate = true);
+ Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
+ StorageType Storage, bool ShouldCreate = true);
TempDICompileUnit cloneImpl() const {
return getTemporary(
getContext(), getSourceLanguage(), getFile(), getProducer(),
isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(),
getEmissionKind(), getEnumTypes(), getRetainedTypes(), getSubprograms(),
- getGlobalVariables(), getImportedEntities(), DWOId);
+ getGlobalVariables(), getImportedEntities(), getMacros(), DWOId);
}
+ static void get() = delete;
+ static void getIfExists() = delete;
+
public:
- DEFINE_MDNODE_GET(DICompileUnit,
- (unsigned SourceLanguage, DIFile *File, StringRef Producer,
- bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
- StringRef SplitDebugFilename, unsigned EmissionKind,
- DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes,
- DISubprogramArray Subprograms,
- DIGlobalVariableArray GlobalVariables,
- DIImportedEntityArray ImportedEntities, uint64_t DWOId),
- (SourceLanguage, File, Producer, IsOptimized, Flags,
- RuntimeVersion, SplitDebugFilename, EmissionKind,
- EnumTypes, RetainedTypes, Subprograms, GlobalVariables,
- ImportedEntities, DWOId))
- DEFINE_MDNODE_GET(
+ DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
+ DICompileUnit,
+ (unsigned SourceLanguage, DIFile *File, StringRef Producer,
+ bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
+ StringRef SplitDebugFilename, unsigned EmissionKind,
+ DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes,
+ DISubprogramArray Subprograms, DIGlobalVariableArray GlobalVariables,
+ DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
+ uint64_t DWOId),
+ (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
+ SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms,
+ GlobalVariables, ImportedEntities, Macros, DWOId))
+ DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
DICompileUnit,
(unsigned SourceLanguage, Metadata *File, MDString *Producer,
bool IsOptimized, MDString *Flags, unsigned RuntimeVersion,
MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes,
Metadata *RetainedTypes, Metadata *Subprograms,
- Metadata *GlobalVariables, Metadata *ImportedEntities, uint64_t DWOId),
+ Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros,
+ uint64_t DWOId),
(SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms,
- GlobalVariables, ImportedEntities, DWOId))
+ GlobalVariables, ImportedEntities, Macros, DWOId))
TempDICompileUnit clone() const { return cloneImpl(); }
@@ -1066,7 +1029,11 @@ public:
DIImportedEntityArray getImportedEntities() const {
return cast_or_null<MDTuple>(getRawImportedEntities());
}
- unsigned getDWOId() const { return DWOId; }
+ DIMacroNodeArray getMacros() const {
+ return cast_or_null<MDTuple>(getRawMacros());
+ }
+ uint64_t getDWOId() const { return DWOId; }
+ void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
MDString *getRawProducer() const { return getOperandAs<MDString>(1); }
MDString *getRawFlags() const { return getOperandAs<MDString>(2); }
@@ -1078,6 +1045,7 @@ public:
Metadata *getRawSubprograms() const { return getOperand(6); }
Metadata *getRawGlobalVariables() const { return getOperand(7); }
Metadata *getRawImportedEntities() const { return getOperand(8); }
+ Metadata *getRawMacros() const { return getOperand(9); }
/// \brief Replace arrays.
///
@@ -1100,6 +1068,7 @@ public:
void replaceImportedEntities(DIImportedEntityArray N) {
replaceOperandWith(8, N.get());
}
+ void replaceMacros(DIMacroNodeArray N) { replaceOperandWith(9, N.get()); }
/// @}
static bool classof(const Metadata *MD) {
@@ -1157,8 +1126,10 @@ class DILocation : public MDNode {
}
TempDILocation cloneImpl() const {
- return getTemporary(getContext(), getLine(), getColumn(), getScope(),
- getInlinedAt());
+ // Get the raw scope/inlinedAt since it is possible to invoke this on
+ // a DILocation containing temporary metadata.
+ return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
+ getRawInlinedAt());
}
// Disallow replacing operands.
@@ -1276,14 +1247,13 @@ class DISubprogram : public DILocalScope {
DISubroutineType *Type, bool IsLocalToUnit, bool IsDefinition,
unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality,
unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
- Constant *Function, DITemplateParameterArray TemplateParams,
- DISubprogram *Declaration, DILocalVariableArray Variables,
- StorageType Storage, bool ShouldCreate = true) {
+ DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
+ DILocalVariableArray Variables, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
Virtuality, VirtualIndex, Flags, IsOptimized,
- Function ? ConstantAsMetadata::get(Function) : nullptr,
TemplateParams.get(), Declaration, Variables.get(), Storage,
ShouldCreate);
}
@@ -1292,17 +1262,16 @@ class DISubprogram : public DILocalScope {
MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
- unsigned Flags, bool IsOptimized, Metadata *Function,
- Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
- StorageType Storage, bool ShouldCreate = true);
+ unsigned Flags, bool IsOptimized, Metadata *TemplateParams,
+ Metadata *Declaration, Metadata *Variables, StorageType Storage,
+ bool ShouldCreate = true);
TempDISubprogram cloneImpl() const {
- return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
- getFile(), getLine(), getType(), isLocalToUnit(),
- isDefinition(), getScopeLine(), getContainingType(),
- getVirtuality(), getVirtualIndex(), getFlags(),
- isOptimized(), getFunctionConstant(),
- getTemplateParams(), getDeclaration(), getVariables());
+ return getTemporary(
+ getContext(), getScope(), getName(), getLinkageName(), getFile(),
+ getLine(), getType(), isLocalToUnit(), isDefinition(), getScopeLine(),
+ getContainingType(), getVirtuality(), getVirtualIndex(), getFlags(),
+ isOptimized(), getTemplateParams(), getDeclaration(), getVariables());
}
public:
@@ -1312,13 +1281,12 @@ public:
bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
DITypeRef ContainingType, unsigned Virtuality,
unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
- Constant *Function = nullptr,
DITemplateParameterArray TemplateParams = nullptr,
DISubprogram *Declaration = nullptr,
DILocalVariableArray Variables = nullptr),
(Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
IsDefinition, ScopeLine, ContainingType, Virtuality,
- VirtualIndex, Flags, IsOptimized, Function, TemplateParams,
+ VirtualIndex, Flags, IsOptimized, TemplateParams,
Declaration, Variables))
DEFINE_MDNODE_GET(
DISubprogram,
@@ -1326,11 +1294,11 @@ public:
unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
- Metadata *Function = nullptr, Metadata *TemplateParams = nullptr,
- Metadata *Declaration = nullptr, Metadata *Variables = nullptr),
+ Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
+ Metadata *Variables = nullptr),
(Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized,
- Function, TemplateParams, Declaration, Variables))
+ TemplateParams, Declaration, Variables))
TempDISubprogram clone() const { return cloneImpl(); }
@@ -1389,11 +1357,6 @@ public:
return DITypeRef(getRawContainingType());
}
- Constant *getFunctionConstant() const {
- if (auto *C = cast_or_null<ConstantAsMetadata>(getRawFunction()))
- return C->getValue();
- return nullptr;
- }
DITemplateParameterArray getTemplateParams() const {
return cast_or_null<MDTuple>(getRawTemplateParams());
}
@@ -1407,30 +1370,11 @@ public:
Metadata *getRawScope() const { return getOperand(1); }
Metadata *getRawType() const { return getOperand(5); }
Metadata *getRawContainingType() const { return getOperand(6); }
- Metadata *getRawFunction() const { return getOperand(7); }
- Metadata *getRawTemplateParams() const { return getOperand(8); }
- Metadata *getRawDeclaration() const { return getOperand(9); }
- Metadata *getRawVariables() const { return getOperand(10); }
-
- /// \brief Get a pointer to the function this subprogram describes.
- ///
- /// This dyn_casts \a getFunctionConstant() to \a Function.
- ///
- /// FIXME: Should this be looking through bitcasts?
- Function *getFunction() const;
-
- /// \brief Replace the function.
- ///
- /// If \a isUniqued() and not \a isResolved(), this could node will be
- /// RAUW'ed and deleted out from under the caller. Use a \a TrackingMDRef if
- /// that's a problem.
- /// @{
- void replaceFunction(Function *F);
- void replaceFunction(ConstantAsMetadata *MD) { replaceOperandWith(7, MD); }
- void replaceFunction(std::nullptr_t) { replaceOperandWith(7, nullptr); }
- /// @}
+ Metadata *getRawTemplateParams() const { return getOperand(7); }
+ Metadata *getRawDeclaration() const { return getOperand(8); }
+ Metadata *getRawVariables() const { return getOperand(9); }
- /// \brief Check if this subprogram decribes the given function.
+ /// \brief Check if this subprogram describes the given function.
///
/// FIXME: Should this be looking through bitcasts?
bool describes(const Function *F) const;
@@ -1452,13 +1396,6 @@ public:
Metadata *getRawScope() const { return getOperand(1); }
- /// \brief Forwarding accessors to LexicalBlock.
- ///
- /// TODO: Remove these and update code to use \a DILexicalBlock directly.
- /// @{
- inline unsigned getLine() const;
- inline unsigned getColumn() const;
- /// @}
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DILexicalBlockKind ||
MD->getMetadataID() == DILexicalBlockFileKind;
@@ -1470,12 +1407,14 @@ class DILexicalBlock : public DILexicalBlockBase {
friend class MDNode;
unsigned Line;
- unsigned Column;
+ uint16_t Column;
DILexicalBlock(LLVMContext &C, StorageType Storage, unsigned Line,
unsigned Column, ArrayRef<Metadata *> Ops)
: DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops), Line(Line),
- Column(Column) {}
+ Column(Column) {
+ assert(Column < (1u << 16) && "Expected 16-bit column");
+ }
~DILexicalBlock() = default;
static DILexicalBlock *getImpl(LLVMContext &Context, DILocalScope *Scope,
@@ -1514,18 +1453,6 @@ public:
}
};
-unsigned DILexicalBlockBase::getLine() const {
- if (auto *N = dyn_cast<DILexicalBlock>(this))
- return N->getLine();
- return 0;
-}
-
-unsigned DILexicalBlockBase::getColumn() const {
- if (auto *N = dyn_cast<DILexicalBlock>(this))
- return N->getColumn();
- return 0;
-}
-
class DILexicalBlockFile : public DILexicalBlockBase {
friend class LLVMContextImpl;
friend class MDNode;
@@ -1797,15 +1724,13 @@ public:
};
/// \brief Base class for variables.
-///
-/// TODO: Hardcode to DW_TAG_variable.
class DIVariable : public DINode {
unsigned Line;
protected:
- DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
- unsigned Line, ArrayRef<Metadata *> Ops)
- : DINode(C, ID, Storage, Tag, Ops), Line(Line) {}
+ DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Line,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {}
~DIVariable() = default;
public:
@@ -1850,8 +1775,7 @@ class DIGlobalVariable : public DIVariable {
DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
bool IsLocalToUnit, bool IsDefinition,
ArrayRef<Metadata *> Ops)
- : DIVariable(C, DIGlobalVariableKind, Storage, dwarf::DW_TAG_variable,
- Line, Ops),
+ : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops),
IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
~DIGlobalVariable() = default;
@@ -1923,8 +1847,6 @@ public:
/// \brief Local variable.
///
-/// TODO: Split between arguments and otherwise.
-/// TODO: Use \c DW_TAG_variable instead of fake tags.
/// TODO: Split up flags.
class DILocalVariable : public DIVariable {
friend class LLVMContextImpl;
@@ -1933,42 +1855,42 @@ class DILocalVariable : public DIVariable {
unsigned Arg;
unsigned Flags;
- DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Tag,
- unsigned Line, unsigned Arg, unsigned Flags,
- ArrayRef<Metadata *> Ops)
- : DIVariable(C, DILocalVariableKind, Storage, Tag, Line, Ops), Arg(Arg),
+ DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
+ unsigned Arg, unsigned Flags, ArrayRef<Metadata *> Ops)
+ : DIVariable(C, DILocalVariableKind, Storage, Line, Ops), Arg(Arg),
Flags(Flags) {}
~DILocalVariable() = default;
- static DILocalVariable *getImpl(LLVMContext &Context, unsigned Tag,
- DIScope *Scope, StringRef Name, DIFile *File,
- unsigned Line, DITypeRef Type, unsigned Arg,
- unsigned Flags, StorageType Storage,
+ static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
+ StringRef Name, DIFile *File, unsigned Line,
+ DITypeRef Type, unsigned Arg, unsigned Flags,
+ StorageType Storage,
bool ShouldCreate = true) {
- return getImpl(Context, Tag, Scope, getCanonicalMDString(Context, Name),
- File, Line, Type, Arg, Flags, Storage, ShouldCreate);
+ return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
+ Line, Type, Arg, Flags, Storage, ShouldCreate);
}
- static DILocalVariable *
- getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name,
- Metadata *File, unsigned Line, Metadata *Type, unsigned Arg,
- unsigned Flags, StorageType Storage, bool ShouldCreate = true);
+ static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
+ MDString *Name, Metadata *File, unsigned Line,
+ Metadata *Type, unsigned Arg, unsigned Flags,
+ StorageType Storage,
+ bool ShouldCreate = true);
TempDILocalVariable cloneImpl() const {
- return getTemporary(getContext(), getTag(), getScope(), getName(),
- getFile(), getLine(), getType(), getArg(), getFlags());
+ return getTemporary(getContext(), getScope(), getName(), getFile(),
+ getLine(), getType(), getArg(), getFlags());
}
public:
DEFINE_MDNODE_GET(DILocalVariable,
- (unsigned Tag, DILocalScope *Scope, StringRef Name,
- DIFile *File, unsigned Line, DITypeRef Type, unsigned Arg,
+ (DILocalScope * Scope, StringRef Name, DIFile *File,
+ unsigned Line, DITypeRef Type, unsigned Arg,
unsigned Flags),
- (Tag, Scope, Name, File, Line, Type, Arg, Flags))
+ (Scope, Name, File, Line, Type, Arg, Flags))
DEFINE_MDNODE_GET(DILocalVariable,
- (unsigned Tag, Metadata *Scope, MDString *Name,
- Metadata *File, unsigned Line, Metadata *Type,
- unsigned Arg, unsigned Flags),
- (Tag, Scope, Name, File, Line, Type, Arg, Flags))
+ (Metadata * Scope, MDString *Name, Metadata *File,
+ unsigned Line, Metadata *Type, unsigned Arg,
+ unsigned Flags),
+ (Scope, Name, File, Line, Type, Arg, Flags))
TempDILocalVariable clone() const { return cloneImpl(); }
@@ -1979,6 +1901,7 @@ public:
return cast<DILocalScope>(DIVariable::getScope());
}
+ bool isParameter() const { return Arg; }
unsigned getArg() const { return Arg; }
unsigned getFlags() const { return Flags; }
@@ -1988,7 +1911,7 @@ public:
/// \brief Check that a location is valid for this variable.
///
/// Check that \c DL exists, is in the same subprogram, and has the same
- /// inlined-at location as \c this. (Otherwise, it's not a valid attachemnt
+ /// inlined-at location as \c this. (Otherwise, it's not a valid attachment
/// to a \a DbgInfoIntrinsic.)
bool isValidLocationForIntrinsic(const DILocation *DL) const {
return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram();
@@ -2284,6 +2207,165 @@ public:
}
};
+/// \brief Macro Info DWARF-like metadata node.
+///
+/// A metadata node with a DWARF macro info (i.e., a constant named
+/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h). Called \a DIMacroNode
+/// because it's potentially used for non-DWARF output.
+class DIMacroNode : public MDNode {
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+protected:
+ DIMacroNode(LLVMContext &C, unsigned ID, StorageType Storage, unsigned MIType,
+ ArrayRef<Metadata *> Ops1, ArrayRef<Metadata *> Ops2 = None)
+ : MDNode(C, ID, Storage, Ops1, Ops2) {
+ assert(MIType < 1u << 16);
+ SubclassData16 = MIType;
+ }
+ ~DIMacroNode() = default;
+
+ template <class Ty> Ty *getOperandAs(unsigned I) const {
+ return cast_or_null<Ty>(getOperand(I));
+ }
+
+ StringRef getStringOperand(unsigned I) const {
+ if (auto *S = getOperandAs<MDString>(I))
+ return S->getString();
+ return StringRef();
+ }
+
+ static MDString *getCanonicalMDString(LLVMContext &Context, StringRef S) {
+ if (S.empty())
+ return nullptr;
+ return MDString::get(Context, S);
+ }
+
+public:
+ unsigned getMacinfoType() const { return SubclassData16; }
+
+ static bool classof(const Metadata *MD) {
+ switch (MD->getMetadataID()) {
+ default:
+ return false;
+ case DIMacroKind:
+ case DIMacroFileKind:
+ return true;
+ }
+ }
+};
+
+class DIMacro : public DIMacroNode {
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+ unsigned Line;
+
+ DIMacro(LLVMContext &C, StorageType Storage, unsigned MIType, unsigned Line,
+ ArrayRef<Metadata *> Ops)
+ : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops), Line(Line) {}
+ ~DIMacro() = default;
+
+ static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
+ StringRef Name, StringRef Value, StorageType Storage,
+ bool ShouldCreate = true) {
+ return getImpl(Context, MIType, Line, getCanonicalMDString(Context, Name),
+ getCanonicalMDString(Context, Value), Storage, ShouldCreate);
+ }
+ static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
+ MDString *Name, MDString *Value, StorageType Storage,
+ bool ShouldCreate = true);
+
+ TempDIMacro cloneImpl() const {
+ return getTemporary(getContext(), getMacinfoType(), getLine(), getName(),
+ getValue());
+ }
+
+public:
+ DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name,
+ StringRef Value = ""),
+ (MIType, Line, Name, Value))
+ DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name,
+ MDString *Value),
+ (MIType, Line, Name, Value))
+
+ TempDIMacro clone() const { return cloneImpl(); }
+
+ unsigned getLine() const { return Line; }
+
+ StringRef getName() const { return getStringOperand(0); }
+ StringRef getValue() const { return getStringOperand(1); }
+
+ MDString *getRawName() const { return getOperandAs<MDString>(0); }
+ MDString *getRawValue() const { return getOperandAs<MDString>(1); }
+
+ static bool classof(const Metadata *MD) {
+ return MD->getMetadataID() == DIMacroKind;
+ }
+};
+
+class DIMacroFile : public DIMacroNode {
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+ unsigned Line;
+
+ DIMacroFile(LLVMContext &C, StorageType Storage, unsigned MIType,
+ unsigned Line, ArrayRef<Metadata *> Ops)
+ : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops), Line(Line) {}
+ ~DIMacroFile() = default;
+
+ static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType,
+ unsigned Line, DIFile *File,
+ DIMacroNodeArray Elements, StorageType Storage,
+ bool ShouldCreate = true) {
+ return getImpl(Context, MIType, Line, static_cast<Metadata *>(File),
+ Elements.get(), Storage, ShouldCreate);
+ }
+
+ static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType,
+ unsigned Line, Metadata *File, Metadata *Elements,
+ StorageType Storage, bool ShouldCreate = true);
+
+ TempDIMacroFile cloneImpl() const {
+ return getTemporary(getContext(), getMacinfoType(), getLine(), getFile(),
+ getElements());
+ }
+
+public:
+ DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File,
+ DIMacroNodeArray Elements),
+ (MIType, Line, File, Elements))
+ DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line,
+ Metadata *File, Metadata *Elements),
+ (MIType, Line, File, Elements))
+
+ TempDIMacroFile clone() const { return cloneImpl(); }
+
+ void replaceElements(DIMacroNodeArray Elements) {
+#ifndef NDEBUG
+ for (DIMacroNode *Op : getElements())
+ assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+ "Lost a macro node during macro node list replacement");
+#endif
+ replaceOperandWith(1, Elements.get());
+ }
+
+ unsigned getLine() const { return Line; }
+ DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+
+ DIMacroNodeArray getElements() const {
+ return cast_or_null<MDTuple>(getRawElements());
+ }
+
+ Metadata *getRawFile() const { return getOperand(0); }
+ Metadata *getRawElements() const { return getOperand(1); }
+
+ static bool classof(const Metadata *MD) {
+ return MD->getMetadataID() == DIMacroFileKind;
+ }
+};
+
} // end namespace llvm
#undef DEFINE_MDNODE_GET_UNPACK_IMPL
diff --git a/contrib/llvm/include/llvm/IR/DerivedTypes.h b/contrib/llvm/include/llvm/IR/DerivedTypes.h
index 4a94499..071e69b 100644
--- a/contrib/llvm/include/llvm/IR/DerivedTypes.h
+++ b/contrib/llvm/include/llvm/IR/DerivedTypes.h
@@ -36,11 +36,12 @@ class StringRef;
/// @brief Integer representation type
class IntegerType : public Type {
friend class LLVMContextImpl;
-
+
protected:
explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
setSubclassData(NumBits);
}
+
public:
/// This enum is just used to hold constants we need for IntegerType.
enum {
@@ -90,6 +91,9 @@ public:
}
};
+unsigned Type::getIntegerBitWidth() const {
+ return cast<IntegerType>(this)->getBitWidth();
+}
/// FunctionType - Class to represent function types
///
@@ -108,7 +112,7 @@ public:
/// FunctionType::get - Create a FunctionType taking no parameters.
///
static FunctionType *get(Type *Result, bool isVarArg);
-
+
/// isValidReturnType - Return true if the specified type is valid as a return
/// type.
static bool isValidReturnType(Type *RetTy);
@@ -143,18 +147,30 @@ public:
static_assert(AlignOf<FunctionType>::Alignment >= AlignOf<Type *>::Alignment,
"Alignment sufficient for objects appended to FunctionType");
+bool Type::isFunctionVarArg() const {
+ return cast<FunctionType>(this)->isVarArg();
+}
+
+Type *Type::getFunctionParamType(unsigned i) const {
+ return cast<FunctionType>(this)->getParamType(i);
+}
+
+unsigned Type::getFunctionNumParams() const {
+ return cast<FunctionType>(this)->getNumParams();
+}
+
/// CompositeType - Common super class of ArrayType, StructType, PointerType
/// and VectorType.
class CompositeType : public Type {
protected:
- explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) { }
-public:
+ explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) {}
+public:
/// getTypeAtIndex - Given an index value into the type, return the type of
/// the element.
///
- Type *getTypeAtIndex(const Value *V);
- Type *getTypeAtIndex(unsigned Idx);
+ Type *getTypeAtIndex(const Value *V) const;
+ Type *getTypeAtIndex(unsigned Idx) const;
bool indexValid(const Value *V) const;
bool indexValid(unsigned Idx) const;
@@ -167,14 +183,13 @@ public:
}
};
-
/// StructType - Class to represent struct types. There are two different kinds
/// of struct types: Literal structs and Identified structs.
///
/// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must
/// always have a body when created. You can get one of these by using one of
/// the StructType::get() forms.
-///
+///
/// Identified structs (e.g. %foo or %42) may optionally have a name and are not
/// uniqued. The names for identified structs are managed at the LLVMContext
/// level, so there can only be a single identified struct with a given name in
@@ -205,23 +220,20 @@ class StructType : public CompositeType {
/// pointer to the symbol table entry (maintained by LLVMContext) for the
/// struct. This is null if the type is an literal struct or if it is
/// a identified type that has an empty name.
- ///
+ ///
void *SymbolTableEntry;
-public:
+public:
/// StructType::create - This creates an identified struct.
static StructType *create(LLVMContext &Context, StringRef Name);
static StructType *create(LLVMContext &Context);
-
- static StructType *create(ArrayRef<Type*> Elements,
- StringRef Name,
- bool isPacked = false);
- static StructType *create(ArrayRef<Type*> Elements);
- static StructType *create(LLVMContext &Context,
- ArrayRef<Type*> Elements,
- StringRef Name,
+
+ static StructType *create(ArrayRef<Type *> Elements, StringRef Name,
bool isPacked = false);
- static StructType *create(LLVMContext &Context, ArrayRef<Type*> Elements);
+ static StructType *create(ArrayRef<Type *> Elements);
+ static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements,
+ StringRef Name, bool isPacked = false);
+ static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements);
static StructType *create(StringRef Name, Type *elt1, ...) LLVM_END_WITH_NULL;
/// StructType::get - This static method is the primary way to create a
@@ -232,7 +244,7 @@ public:
/// StructType::get - Create an empty structure type.
///
static StructType *get(LLVMContext &Context, bool isPacked = false);
-
+
/// StructType::get - This static method is a convenience method for creating
/// structure types by specifying the elements as arguments. Note that this
/// method always returns a non-packed struct, and requires at least one
@@ -240,26 +252,26 @@ public:
static StructType *get(Type *elt1, ...) LLVM_END_WITH_NULL;
bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
-
+
/// isLiteral - Return true if this type is uniqued by structural
/// equivalence, false if it is a struct definition.
bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; }
-
+
/// isOpaque - Return true if this is a type with an identity that has no body
/// specified yet. These prints as 'opaque' in .ll files.
bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
/// isSized - Return true if this is a sized type.
- bool isSized(SmallPtrSetImpl<const Type*> *Visited = nullptr) const;
-
+ bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
+
/// hasName - Return true if this is a named struct that has a non-empty name.
bool hasName() const { return SymbolTableEntry != nullptr; }
-
+
/// getName - Return the name for this struct type if it has an identity.
/// This may return an empty string for an unnamed struct type. Do not call
/// this on an literal type.
StringRef getName() const;
-
+
/// setName - Change the name of this type to the specified name, or to a name
/// with a suffix if there is a collision. Do not call this on an literal
/// type.
@@ -268,11 +280,10 @@ public:
/// setBody - Specify a body for an opaque identified type.
void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
void setBody(Type *elt1, ...) LLVM_END_WITH_NULL;
-
+
/// isValidElementType - Return true if the specified type is valid as a
/// element type.
static bool isValidElementType(Type *ElemTy);
-
// Iterator access to the elements.
typedef Type::subtype_iterator element_iterator;
@@ -284,8 +295,8 @@ public:
/// isLayoutIdentical - Return true if this is layout identical to the
/// specified struct.
- bool isLayoutIdentical(StructType *Other) const;
-
+ bool isLayoutIdentical(StructType *Other) const;
+
/// Random access to the elements
unsigned getNumElements() const { return NumContainedTys; }
Type *getElementType(unsigned N) const {
@@ -299,6 +310,18 @@ public:
}
};
+StringRef Type::getStructName() const {
+ return cast<StructType>(this)->getName();
+}
+
+unsigned Type::getStructNumElements() const {
+ return cast<StructType>(this)->getNumElements();
+}
+
+Type *Type::getStructElementType(unsigned N) const {
+ return cast<StructType>(this)->getElementType(N);
+}
+
/// SequentialType - This is the superclass of the array, pointer and vector
/// type classes. All of these represent "arrays" in memory. The array type
/// represents a specifically sized array, pointer types are unsized/unknown
@@ -330,6 +353,9 @@ public:
}
};
+Type *Type::getSequentialElementType() const {
+ return cast<SequentialType>(this)->getElementType();
+}
/// ArrayType - Class to represent array types.
///
@@ -339,6 +365,7 @@ class ArrayType : public SequentialType {
ArrayType(const ArrayType &) = delete;
const ArrayType &operator=(const ArrayType &) = delete;
ArrayType(Type *ElType, uint64_t NumEl);
+
public:
/// ArrayType::get - This static method is the primary way to construct an
/// ArrayType
@@ -357,6 +384,10 @@ public:
}
};
+uint64_t Type::getArrayNumElements() const {
+ return cast<ArrayType>(this)->getNumElements();
+}
+
/// VectorType - Class to represent vector types.
///
class VectorType : public SequentialType {
@@ -365,6 +396,7 @@ class VectorType : public SequentialType {
VectorType(const VectorType &) = delete;
const VectorType &operator=(const VectorType &) = delete;
VectorType(Type *ElType, unsigned NumEl);
+
public:
/// VectorType::get - This static method is the primary way to construct an
/// VectorType.
@@ -443,6 +475,9 @@ public:
}
};
+unsigned Type::getVectorNumElements() const {
+ return cast<VectorType>(this)->getNumElements();
+}
/// PointerType - Class to represent pointers.
///
@@ -450,6 +485,7 @@ class PointerType : public SequentialType {
PointerType(const PointerType &) = delete;
const PointerType &operator=(const PointerType &) = delete;
explicit PointerType(Type *ElType, unsigned AddrSpace);
+
public:
/// PointerType::get - This constructs a pointer to an object of the specified
/// type in a numbered address space.
@@ -477,6 +513,10 @@ public:
}
};
+unsigned Type::getPointerAddressSpace() const {
+ return cast<PointerType>(getScalarType())->getAddressSpace();
+}
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
index f38313f..f69955e 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -15,7 +15,6 @@
#ifndef LLVM_IR_DIAGNOSTICINFO_H
#define LLVM_IR_DIAGNOSTICINFO_H
-#include "llvm-c/Core.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Module.h"
@@ -56,8 +55,11 @@ enum DiagnosticKind {
DK_OptimizationRemark,
DK_OptimizationRemarkMissed,
DK_OptimizationRemarkAnalysis,
+ DK_OptimizationRemarkAnalysisFPCommute,
+ DK_OptimizationRemarkAnalysisAliasing,
DK_OptimizationFailure,
DK_MIRParser,
+ DK_PGOProfile,
DK_FirstPluginKind
};
@@ -99,6 +101,8 @@ public:
/// The printed message must not end with '.' nor start with a severity
/// keyword.
virtual void print(DiagnosticPrinter &DP) const = 0;
+
+ static const char *AlwaysPrint;
};
typedef std::function<void(const DiagnosticInfo &)> DiagnosticHandlerFunction;
@@ -210,19 +214,18 @@ public:
/// Diagnostic information for the sample profiler.
class DiagnosticInfoSampleProfile : public DiagnosticInfo {
public:
- DiagnosticInfoSampleProfile(const char *FileName, unsigned LineNum,
+ DiagnosticInfoSampleProfile(StringRef FileName, unsigned LineNum,
const Twine &Msg,
DiagnosticSeverity Severity = DS_Error)
: DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName),
LineNum(LineNum), Msg(Msg) {}
- DiagnosticInfoSampleProfile(const char *FileName, const Twine &Msg,
+ DiagnosticInfoSampleProfile(StringRef FileName, const Twine &Msg,
DiagnosticSeverity Severity = DS_Error)
: DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName),
LineNum(0), Msg(Msg) {}
DiagnosticInfoSampleProfile(const Twine &Msg,
DiagnosticSeverity Severity = DS_Error)
- : DiagnosticInfo(DK_SampleProfile, Severity), FileName(nullptr),
- LineNum(0), Msg(Msg) {}
+ : DiagnosticInfo(DK_SampleProfile, Severity), LineNum(0), Msg(Msg) {}
/// \see DiagnosticInfo::print.
void print(DiagnosticPrinter &DP) const override;
@@ -231,13 +234,13 @@ public:
return DI->getKind() == DK_SampleProfile;
}
- const char *getFileName() const { return FileName; }
+ StringRef getFileName() const { return FileName; }
unsigned getLineNum() const { return LineNum; }
const Twine &getMsg() const { return Msg; }
private:
/// Name of the input file associated with this diagnostic.
- const char *FileName;
+ StringRef FileName;
/// Line number where the diagnostic occurred. If 0, no line number will
/// be emitted in the message.
@@ -247,6 +250,31 @@ private:
const Twine &Msg;
};
+/// Diagnostic information for the PGO profiler.
+class DiagnosticInfoPGOProfile : public DiagnosticInfo {
+public:
+ DiagnosticInfoPGOProfile(const char *FileName, const Twine &Msg,
+ DiagnosticSeverity Severity = DS_Error)
+ : DiagnosticInfo(DK_PGOProfile, Severity), FileName(FileName), Msg(Msg) {}
+
+ /// \see DiagnosticInfo::print.
+ void print(DiagnosticPrinter &DP) const override;
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_PGOProfile;
+ }
+
+ const char *getFileName() const { return FileName; }
+ const Twine &getMsg() const { return Msg; }
+
+private:
+ /// Name of the input file associated with this diagnostic.
+ const char *FileName;
+
+ /// Message to report.
+ const Twine &Msg;
+};
+
/// Common features for diagnostics dealing with optimization remarks.
class DiagnosticInfoOptimizationBase : public DiagnosticInfo {
public:
@@ -267,10 +295,6 @@ public:
/// \see DiagnosticInfo::print.
void print(DiagnosticPrinter &DP) const override;
- static bool classof(const DiagnosticInfo *DI) {
- return DI->getKind() == DK_OptimizationRemark;
- }
-
/// Return true if this optimization remark is enabled by one of
/// of the LLVM command line flags (-pass-remarks, -pass-remarks-missed,
/// or -pass-remarks-analysis). Note that this only handles the LLVM
@@ -386,6 +410,69 @@ public:
/// \see DiagnosticInfoOptimizationBase::isEnabled.
bool isEnabled() const override;
+
+protected:
+ DiagnosticInfoOptimizationRemarkAnalysis(enum DiagnosticKind Kind,
+ const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg)
+ : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc,
+ Msg) {}
+};
+
+/// Diagnostic information for optimization analysis remarks related to
+/// floating-point non-commutativity.
+class DiagnosticInfoOptimizationRemarkAnalysisFPCommute
+ : public DiagnosticInfoOptimizationRemarkAnalysis {
+public:
+ /// \p PassName is the name of the pass emitting this diagnostic. If
+ /// this name matches the regular expression given in -Rpass-analysis=, then
+ /// the diagnostic will be emitted. \p Fn is the function where the diagnostic
+ /// is being emitted. \p DLoc is the location information to use in the
+ /// diagnostic. If line table information is available, the diagnostic will
+ /// include the source code location. \p Msg is the message to show. The
+ /// front-end will append its own message related to options that address
+ /// floating-point non-commutativity. Note that this class does not copy this
+ /// message, so this reference must be valid for the whole life time of the
+ /// diagnostic.
+ DiagnosticInfoOptimizationRemarkAnalysisFPCommute(const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg)
+ : DiagnosticInfoOptimizationRemarkAnalysis(
+ DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg) {}
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_OptimizationRemarkAnalysisFPCommute;
+ }
+};
+
+/// Diagnostic information for optimization analysis remarks related to
+/// pointer aliasing.
+class DiagnosticInfoOptimizationRemarkAnalysisAliasing
+ : public DiagnosticInfoOptimizationRemarkAnalysis {
+public:
+ /// \p PassName is the name of the pass emitting this diagnostic. If
+ /// this name matches the regular expression given in -Rpass-analysis=, then
+ /// the diagnostic will be emitted. \p Fn is the function where the diagnostic
+ /// is being emitted. \p DLoc is the location information to use in the
+ /// diagnostic. If line table information is available, the diagnostic will
+ /// include the source code location. \p Msg is the message to show. The
+ /// front-end will append its own message related to options that address
+ /// pointer aliasing legality. Note that this class does not copy this
+ /// message, so this reference must be valid for the whole life time of the
+ /// diagnostic.
+ DiagnosticInfoOptimizationRemarkAnalysisAliasing(const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg)
+ : DiagnosticInfoOptimizationRemarkAnalysis(
+ DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, DLoc, Msg) {}
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_OptimizationRemarkAnalysisAliasing;
+ }
};
/// Diagnostic information for machine IR parser.
@@ -438,6 +525,30 @@ void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName,
const Function &Fn, const DebugLoc &DLoc,
const Twine &Msg);
+/// Emit an optimization analysis remark related to messages about
+/// floating-point non-commutativity. \p PassName is the name of the pass
+/// emitting the message. If -Rpass-analysis= is given and \p PassName matches
+/// the regular expression in -Rpass, then the remark will be emitted. \p Fn is
+/// the function triggering the remark, \p DLoc is the debug location where the
+/// diagnostic is generated. \p Msg is the message string to use.
+void emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
+ const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg);
+
+/// Emit an optimization analysis remark related to messages about
+/// pointer aliasing. \p PassName is the name of the pass emitting the message.
+/// If -Rpass-analysis= is given and \p PassName matches the regular expression
+/// in -Rpass, then the remark will be emitted. \p Fn is the function triggering
+/// the remark, \p DLoc is the debug location where the diagnostic is generated.
+/// \p Msg is the message string to use.
+void emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
+ const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg);
+
/// Diagnostic information for optimization failures.
class DiagnosticInfoOptimizationFailure
: public DiagnosticInfoOptimizationBase {
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h b/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
index 735e3ad..1bcd737 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
@@ -63,7 +63,7 @@ protected:
raw_ostream &Stream;
public:
- DiagnosticPrinterRawOStream(raw_ostream &Stream) : Stream(Stream) {};
+ DiagnosticPrinterRawOStream(raw_ostream &Stream) : Stream(Stream) {}
// Simple types.
DiagnosticPrinter &operator<<(char C) override;
diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h
index 27d989b..37447c3 100644
--- a/contrib/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm/include/llvm/IR/Dominators.h
@@ -64,11 +64,30 @@ public:
/// \brief Concrete subclass of DominatorTreeBase that is used to compute a
/// normal dominator tree.
+///
+/// Definition: A block is said to be forward statically reachable if there is
+/// a path from the entry of the function to the block. A statically reachable
+/// block may become statically unreachable during optimization.
+///
+/// A forward unreachable block may appear in the dominator tree, or it may
+/// not. If it does, dominance queries will return results as if all reachable
+/// blocks dominate it. When asking for a Node corresponding to a potentially
+/// unreachable block, calling code must handle the case where the block was
+/// unreachable and the result of getNode() is nullptr.
+///
+/// Generally, a block known to be unreachable when the dominator tree is
+/// constructed will not be in the tree. One which becomes unreachable after
+/// the dominator tree is initially constructed may still exist in the tree,
+/// even if the tree is properly updated. Calling code should not rely on the
+/// preceding statements; this is stated only to assist human understanding.
class DominatorTree : public DominatorTreeBase<BasicBlock> {
public:
typedef DominatorTreeBase<BasicBlock> Base;
DominatorTree() : DominatorTreeBase<BasicBlock>(false) {}
+ explicit DominatorTree(Function &F) : DominatorTreeBase<BasicBlock>(false) {
+ recalculate(F);
+ }
DominatorTree(DominatorTree &&Arg)
: Base(std::move(static_cast<Base &>(Arg))) {}
@@ -122,31 +141,35 @@ public:
// DominatorTree GraphTraits specializations so the DominatorTree can be
// iterable by generic graph iterators.
-template <> struct GraphTraits<DomTreeNode*> {
- typedef DomTreeNode NodeType;
- typedef NodeType::iterator ChildIteratorType;
+template <class Node, class ChildIterator> struct DomTreeGraphTraitsBase {
+ typedef Node NodeType;
+ typedef ChildIterator ChildIteratorType;
+ typedef df_iterator<Node *, SmallPtrSet<NodeType *, 8>> nodes_iterator;
- static NodeType *getEntryNode(NodeType *N) {
- return N;
- }
+ static NodeType *getEntryNode(NodeType *N) { return N; }
static inline ChildIteratorType child_begin(NodeType *N) {
return N->begin();
}
- static inline ChildIteratorType child_end(NodeType *N) {
- return N->end();
- }
+ static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
- typedef df_iterator<DomTreeNode*> nodes_iterator;
-
- static nodes_iterator nodes_begin(DomTreeNode *N) {
+ static nodes_iterator nodes_begin(NodeType *N) {
return df_begin(getEntryNode(N));
}
- static nodes_iterator nodes_end(DomTreeNode *N) {
+ static nodes_iterator nodes_end(NodeType *N) {
return df_end(getEntryNode(N));
}
};
+template <>
+struct GraphTraits<DomTreeNode *>
+ : public DomTreeGraphTraitsBase<DomTreeNode, DomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const DomTreeNode *>
+ : public DomTreeGraphTraitsBase<const DomTreeNode,
+ DomTreeNode::const_iterator> {};
+
template <> struct GraphTraits<DominatorTree*>
: public GraphTraits<DomTreeNode*> {
static NodeType *getEntryNode(DominatorTree *DT) {
diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h
index ec9f4cad..2a98393 100644
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@@ -32,28 +32,16 @@ namespace llvm {
class FunctionType;
class LLVMContext;
+class DISubprogram;
-template<> struct ilist_traits<Argument>
- : public SymbolTableListTraits<Argument, Function> {
-
- Argument *createSentinel() const {
- return static_cast<Argument*>(&Sentinel);
- }
- static void destroySentinel(Argument*) {}
-
- Argument *provideInitialHead() const { return createSentinel(); }
- Argument *ensureHead(Argument*) const { return createSentinel(); }
- static void noteHead(Argument*, Argument*) {}
-
- static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
- mutable ilist_half_node<Argument> Sentinel;
-};
+template <>
+struct SymbolTableListSentinelTraits<Argument>
+ : public ilist_half_embedded_sentinel_traits<Argument> {};
class Function : public GlobalObject, public ilist_node<Function> {
public:
- typedef iplist<Argument> ArgumentListType;
- typedef iplist<BasicBlock> BasicBlockListType;
+ typedef SymbolTableList<Argument> ArgumentListType;
+ typedef SymbolTableList<BasicBlock> BasicBlockListType;
// BasicBlock iterators...
typedef BasicBlockListType::iterator iterator;
@@ -73,10 +61,12 @@ private:
/*
* Value::SubclassData
*
- * bit 0 : HasLazyArguments
- * bit 1 : HasPrefixData
- * bit 2 : HasPrologueData
- * bit 3-6: CallingConvention
+ * bit 0 : HasLazyArguments
+ * bit 1 : HasPrefixData
+ * bit 2 : HasPrologueData
+ * bit 3 : HasPersonalityFn
+ * bits 4-13 : CallingConvention
+ * bits 14-15 : [reserved]
*/
/// Bits from GlobalObject::GlobalObjectSubclassData.
@@ -90,7 +80,7 @@ private:
(Value ? Mask : 0u));
}
- friend class SymbolTableListTraits<Function, Module>;
+ friend class SymbolTableListTraits<Function>;
void setParent(Module *parent);
@@ -120,7 +110,7 @@ private:
public:
static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
const Twine &N = "", Module *M = nullptr) {
- return new(1) Function(Ty, Linkage, N, M);
+ return new Function(Ty, Linkage, N, M);
}
~Function() override;
@@ -128,14 +118,6 @@ public:
/// \brief Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
- /// \brief Get the personality function associated with this function.
- bool hasPersonalityFn() const { return getNumOperands() != 0; }
- Constant *getPersonalityFn() const {
- assert(hasPersonalityFn());
- return cast<Constant>(Op<0>());
- }
- void setPersonalityFn(Constant *C);
-
Type *getReturnType() const; // Return the type of the ret val
FunctionType *getFunctionType() const; // Return the FunctionType for me
@@ -170,11 +152,13 @@ public:
/// calling convention of this function. The enum values for the known
/// calling conventions are defined in CallingConv.h.
CallingConv::ID getCallingConv() const {
- return static_cast<CallingConv::ID>(getSubclassDataFromValue() >> 3);
+ return static_cast<CallingConv::ID>((getSubclassDataFromValue() >> 4) &
+ CallingConv::MaxID);
}
void setCallingConv(CallingConv::ID CC) {
- setValueSubclassData((getSubclassDataFromValue() & 7) |
- (static_cast<unsigned>(CC) << 3));
+ auto ID = static_cast<unsigned>(CC);
+ assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
+ setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
}
/// @brief Return the attribute list for this Function.
@@ -267,13 +251,13 @@ public:
uint64_t getDereferenceableBytes(unsigned i) const {
return AttributeSets.getDereferenceableBytes(i);
}
-
+
/// @brief Extract the number of dereferenceable_or_null bytes for a call or
/// parameter (0=unknown).
uint64_t getDereferenceableOrNullBytes(unsigned i) const {
return AttributeSets.getDereferenceableOrNullBytes(i);
}
-
+
/// @brief Determine if the function does not access memory.
bool doesNotAccessMemory() const {
return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
@@ -299,10 +283,28 @@ public:
return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
Attribute::ArgMemOnly);
}
- void setOnlyAccessesArgMemory() {
- addFnAttr(Attribute::ArgMemOnly);
+ void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
+
+ /// @brief Determine if the function may only access memory that is
+ /// inaccessible from the IR.
+ bool onlyAccessesInaccessibleMemory() const {
+ return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InaccessibleMemOnly);
+ }
+ void setOnlyAccessesInaccessibleMemory() {
+ addFnAttr(Attribute::InaccessibleMemOnly);
}
-
+
+ /// @brief Determine if the function may only access memory that is
+ // either inaccessible from the IR or pointed to by its arguments.
+ bool onlyAccessesInaccessibleMemOrArgMem() const {
+ return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InaccessibleMemOrArgMemOnly);
+ }
+ void setOnlyAccessesInaccessibleMemOrArgMem() {
+ addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+ }
+
/// @brief Determine if the function cannot return.
bool doesNotReturn() const {
return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
@@ -339,6 +341,15 @@ public:
addFnAttr(Attribute::Convergent);
}
+ /// Determine if the function is known not to recurse, directly or
+ /// indirectly.
+ bool doesNotRecurse() const {
+ return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoRecurse);
+ }
+ void setDoesNotRecurse() {
+ addFnAttr(Attribute::NoRecurse);
+ }
/// @brief True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
@@ -362,7 +373,8 @@ public:
AttributeSets.hasAttribute(2, Attribute::StructRet);
}
- /// @brief Determine if the parameter does not alias other parameters.
+ /// @brief Determine if the parameter or return value is marked with NoAlias
+ /// attribute.
/// @param n The parameter to check. 1 is the first parameter, 0 is the return
bool doesNotAlias(unsigned n) const {
return AttributeSets.hasAttribute(n, Attribute::NoAlias);
@@ -395,6 +407,14 @@ public:
addAttribute(n, Attribute::ReadOnly);
}
+ /// Optimize this function for minimum size (-Oz).
+ bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); };
+
+ /// Optimize this function for size (-Os) or minimum size (-Oz).
+ bool optForSize() const {
+ return hasFnAttribute(Attribute::OptimizeForSize) || optForMinSize();
+ }
+
/// copyAttributesFrom - copy all additional attributes (those not needed to
/// create a Function) from the Function Src to this one.
void copyAttributesFrom(const GlobalValue *Src) override;
@@ -417,7 +437,6 @@ public:
///
void eraseFromParent() override;
-
/// Get the underlying elements of the Function... the basic block list is
/// empty for external functions.
///
@@ -429,13 +448,13 @@ public:
CheckLazyArguments();
return ArgumentList;
}
- static iplist<Argument> Function::*getSublistAccess(Argument*) {
+ static ArgumentListType Function::*getSublistAccess(Argument*) {
return &Function::ArgumentList;
}
const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
BasicBlockListType &getBasicBlockList() { return BasicBlocks; }
- static iplist<BasicBlock> Function::*getSublistAccess(BasicBlock*) {
+ static BasicBlockListType Function::*getSublistAccess(BasicBlock*) {
return &Function::BasicBlocks;
}
@@ -450,7 +469,6 @@ public:
inline ValueSymbolTable &getValueSymbolTable() { return *SymTab; }
inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; }
-
//===--------------------------------------------------------------------===//
// BasicBlock iterator forwarding functions
//
@@ -487,11 +505,11 @@ public:
}
iterator_range<arg_iterator> args() {
- return iterator_range<arg_iterator>(arg_begin(), arg_end());
+ return make_range(arg_begin(), arg_end());
}
iterator_range<const_arg_iterator> args() const {
- return iterator_range<const_arg_iterator>(arg_begin(), arg_end());
+ return make_range(arg_begin(), arg_end());
}
/// @}
@@ -499,24 +517,33 @@ public:
size_t arg_size() const;
bool arg_empty() const;
+ /// \brief Check whether this function has a personality function.
+ bool hasPersonalityFn() const {
+ return getSubclassDataFromValue() & (1<<3);
+ }
+
+ /// \brief Get the personality function associated with this function.
+ Constant *getPersonalityFn() const;
+ void setPersonalityFn(Constant *Fn);
+
+ /// \brief Check whether this function has prefix data.
bool hasPrefixData() const {
return getSubclassDataFromValue() & (1<<1);
}
+ /// \brief Get the prefix data associated with this function.
Constant *getPrefixData() const;
void setPrefixData(Constant *PrefixData);
+ /// \brief Check whether this function has prologue data.
bool hasPrologueData() const {
return getSubclassDataFromValue() & (1<<2);
}
+ /// \brief Get the prologue data associated with this function.
Constant *getPrologueData() const;
void setPrologueData(Constant *PrologueData);
- /// Print the function to an output stream with an optional
- /// AssemblyAnnotationWriter.
- void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr) const;
-
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
/// program, displaying the CFG of the current function with the code for each
@@ -596,12 +623,27 @@ public:
/// Drop all metadata from \c this not included in \c KnownIDs.
void dropUnknownMetadata(ArrayRef<unsigned> KnownIDs);
+ /// \brief Set the attached subprogram.
+ ///
+ /// Calls \a setMetadata() with \a LLVMContext::MD_dbg.
+ void setSubprogram(DISubprogram *SP);
+
+ /// \brief Get the attached subprogram.
+ ///
+ /// Calls \a getMetadata() with \a LLVMContext::MD_dbg and casts the result
+ /// to \a DISubprogram.
+ DISubprogram *getSubprogram() const;
+
private:
+ void allocHungoffUselist();
+ template<int Idx> void setHungoffOperand(Constant *C);
+
// Shadow Value::setValueSubclassData with a private forwarding method so that
// subclasses cannot accidentally use it.
void setValueSubclassData(unsigned short D) {
Value::setValueSubclassData(D);
}
+ void setValueSubclassDataBit(unsigned Bit, bool On);
bool hasMetadataHashEntry() const {
return getGlobalObjectSubClassData() & HasMetadataHashEntryBit;
@@ -613,18 +655,8 @@ private:
void clearMetadata();
};
-inline ValueSymbolTable *
-ilist_traits<BasicBlock>::getSymTab(Function *F) {
- return F ? &F->getValueSymbolTable() : nullptr;
-}
-
-inline ValueSymbolTable *
-ilist_traits<Argument>::getSymTab(Function *F) {
- return F ? &F->getValueSymbolTable() : nullptr;
-}
-
template <>
-struct OperandTraits<Function> : public OptionalOperandTraits<Function> {};
+struct OperandTraits<Function> : public HungoffOperandTraits<3> {};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)
diff --git a/contrib/llvm/include/llvm/IR/FunctionInfo.h b/contrib/llvm/include/llvm/IR/FunctionInfo.h
new file mode 100644
index 0000000..eba088a
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/FunctionInfo.h
@@ -0,0 +1,241 @@
+//===-- llvm/FunctionInfo.h - Function Info Index ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// FunctionInfo.h This file contains the declarations the classes that hold
+/// the function info index and summary.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_FUNCTIONINFO_H
+#define LLVM_IR_FUNCTIONINFO_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// \brief Function summary information to aid decisions and implementation of
+/// importing.
+///
+/// This is a separate class from FunctionInfo to enable lazy reading of this
+/// function summary information from the combined index file during imporing.
+class FunctionSummary {
+private:
+ /// \brief Path of module containing function IR, used to locate module when
+ /// importing this function.
+ ///
+ /// This is only used during parsing of the combined function index, or when
+ /// parsing the per-module index for creation of the combined function index,
+ /// not during writing of the per-module index which doesn't contain a
+ /// module path string table.
+ StringRef ModulePath;
+
+ /// \brief Used to flag functions that have local linkage types and need to
+ /// have module identifier appended before placing into the combined
+ /// index, to disambiguate from other functions with the same name.
+ ///
+ /// This is only used in the per-module function index, as it is consumed
+ /// while creating the combined index.
+ bool IsLocalFunction;
+
+ // The rest of the information is used to help decide whether importing
+ // is likely to be profitable.
+ // Other information will be added as the importing is tuned, such
+ // as hotness (when profile available), and other function characteristics.
+
+ /// Number of instructions (ignoring debug instructions, e.g.) computed
+ /// during the initial compile step when the function index is first built.
+ unsigned InstCount;
+
+public:
+ /// Construct a summary object from summary data expected for all
+ /// summary records.
+ FunctionSummary(unsigned NumInsts) : InstCount(NumInsts) {}
+
+ /// Set the path to the module containing this function, for use in
+ /// the combined index.
+ void setModulePath(StringRef ModPath) { ModulePath = ModPath; }
+
+ /// Get the path to the module containing this function.
+ StringRef modulePath() const { return ModulePath; }
+
+ /// Record whether this is a local function in the per-module index.
+ void setLocalFunction(bool IsLocal) { IsLocalFunction = IsLocal; }
+
+ /// Check whether this was a local function, for use in creating
+ /// the combined index.
+ bool isLocalFunction() const { return IsLocalFunction; }
+
+ /// Get the instruction count recorded for this function.
+ unsigned instCount() const { return InstCount; }
+};
+
+/// \brief Class to hold pointer to function summary and information required
+/// for parsing it.
+///
+/// For the per-module index, this holds the bitcode offset
+/// of the corresponding function block. For the combined index,
+/// after parsing of the \a ValueSymbolTable, this initially
+/// holds the offset of the corresponding function summary bitcode
+/// record. After parsing the associated summary information from the summary
+/// block the \a FunctionSummary is populated and stored here.
+class FunctionInfo {
+private:
+ /// Function summary information used to help make ThinLTO importing
+ /// decisions.
+ std::unique_ptr<FunctionSummary> Summary;
+
+ /// \brief The bitcode offset corresponding to either the associated
+ /// function's function body record, or its function summary record,
+ /// depending on whether this is a per-module or combined index.
+ ///
+ /// This bitcode offset is written to or read from the associated
+ /// \a ValueSymbolTable entry for the function.
+ /// For the per-module index this holds the bitcode offset of the
+ /// function's body record within bitcode module block in its module,
+ /// which is used during lazy function parsing or ThinLTO importing.
+ /// For the combined index this holds the offset of the corresponding
+ /// function summary record, to enable associating the combined index
+ /// VST records with the summary records.
+ uint64_t BitcodeIndex;
+
+public:
+ /// Constructor used during parsing of VST entries.
+ FunctionInfo(uint64_t FuncOffset)
+ : Summary(nullptr), BitcodeIndex(FuncOffset) {}
+
+ /// Constructor used for per-module index bitcode writing.
+ FunctionInfo(uint64_t FuncOffset,
+ std::unique_ptr<FunctionSummary> FuncSummary)
+ : Summary(std::move(FuncSummary)), BitcodeIndex(FuncOffset) {}
+
+ /// Record the function summary information parsed out of the function
+ /// summary block during parsing or combined index creation.
+ void setFunctionSummary(std::unique_ptr<FunctionSummary> FuncSummary) {
+ Summary = std::move(FuncSummary);
+ }
+
+ /// Get the function summary recorded for this function.
+ FunctionSummary *functionSummary() const { return Summary.get(); }
+
+ /// Get the bitcode index recorded for this function, depending on
+ /// the index type.
+ uint64_t bitcodeIndex() const { return BitcodeIndex; }
+
+ /// Record the bitcode index for this function, depending on
+ /// the index type.
+ void setBitcodeIndex(uint64_t FuncOffset) { BitcodeIndex = FuncOffset; }
+};
+
+/// List of function info structures for a particular function name held
+/// in the FunctionMap. Requires a vector in the case of multiple
+/// COMDAT functions of the same name.
+typedef std::vector<std::unique_ptr<FunctionInfo>> FunctionInfoList;
+
+/// Map from function name to corresponding function info structures.
+typedef StringMap<FunctionInfoList> FunctionInfoMapTy;
+
+/// Type used for iterating through the function info map.
+typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator;
+typedef FunctionInfoMapTy::iterator funcinfo_iterator;
+
+/// String table to hold/own module path strings, which additionally holds the
+/// module ID assigned to each module during the plugin step. The StringMap
+/// makes a copy of and owns inserted strings.
+typedef StringMap<uint64_t> ModulePathStringTableTy;
+
+/// Class to hold module path string table and function map,
+/// and encapsulate methods for operating on them.
+class FunctionInfoIndex {
+private:
+ /// Map from function name to list of function information instances
+ /// for functions of that name (may be duplicates in the COMDAT case, e.g.).
+ FunctionInfoMapTy FunctionMap;
+
+ /// Holds strings for combined index, mapping to the corresponding module ID.
+ ModulePathStringTableTy ModulePathStringTable;
+
+public:
+ FunctionInfoIndex() = default;
+
+ // Disable the copy constructor and assignment operators, so
+ // no unexpected copying/moving occurs.
+ FunctionInfoIndex(const FunctionInfoIndex &) = delete;
+ void operator=(const FunctionInfoIndex &) = delete;
+
+ funcinfo_iterator begin() { return FunctionMap.begin(); }
+ const_funcinfo_iterator begin() const { return FunctionMap.begin(); }
+ funcinfo_iterator end() { return FunctionMap.end(); }
+ const_funcinfo_iterator end() const { return FunctionMap.end(); }
+
+ /// Get the list of function info objects for a given function.
+ const FunctionInfoList &getFunctionInfoList(StringRef FuncName) {
+ return FunctionMap[FuncName];
+ }
+
+ /// Get the list of function info objects for a given function.
+ const const_funcinfo_iterator findFunctionInfoList(StringRef FuncName) const {
+ return FunctionMap.find(FuncName);
+ }
+
+ /// Add a function info for a function of the given name.
+ void addFunctionInfo(StringRef FuncName, std::unique_ptr<FunctionInfo> Info) {
+ FunctionMap[FuncName].push_back(std::move(Info));
+ }
+
+ /// Iterator to allow writer to walk through table during emission.
+ iterator_range<StringMap<uint64_t>::const_iterator>
+ modPathStringEntries() const {
+ return llvm::make_range(ModulePathStringTable.begin(),
+ ModulePathStringTable.end());
+ }
+
+ /// Get the module ID recorded for the given module path.
+ uint64_t getModuleId(const StringRef ModPath) const {
+ return ModulePathStringTable.lookup(ModPath);
+ }
+
+ /// Add the given per-module index into this function index/summary,
+ /// assigning it the given module ID. Each module merged in should have
+ /// a unique ID, necessary for consistent renaming of promoted
+ /// static (local) variables.
+ void mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
+ uint64_t NextModuleId);
+
+ /// Convenience method for creating a promoted global name
+ /// for the given value name of a local, and its original module's ID.
+ static std::string getGlobalNameForLocal(StringRef Name, uint64_t ModId) {
+ SmallString<256> NewName(Name);
+ NewName += ".llvm.";
+ raw_svector_ostream(NewName) << ModId;
+ return NewName.str();
+ }
+
+ /// Add a new module path, mapped to the given module Id, and return StringRef
+ /// owned by string table map.
+ StringRef addModulePath(StringRef ModPath, uint64_t ModId) {
+ return ModulePathStringTable.insert(std::make_pair(ModPath, ModId))
+ .first->first();
+ }
+
+ /// Check if the given Module has any functions available for exporting
+ /// in the index. We consider any module present in the ModulePathStringTable
+ /// to have exported functions.
+ bool hasExportedFunctions(const Module &M) const {
+ return ModulePathStringTable.count(M.getModuleIdentifier());
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/IR/GVMaterializer.h b/contrib/llvm/include/llvm/IR/GVMaterializer.h
index 1d6c915..6cb593c 100644
--- a/contrib/llvm/include/llvm/IR/GVMaterializer.h
+++ b/contrib/llvm/include/llvm/IR/GVMaterializer.h
@@ -18,12 +18,14 @@
#ifndef LLVM_IR_GVMATERIALIZER_H
#define LLVM_IR_GVMATERIALIZER_H
+#include "llvm/ADT/DenseMap.h"
#include <system_error>
#include <vector>
namespace llvm {
class Function;
class GlobalValue;
+class Metadata;
class Module;
class StructType;
@@ -34,28 +36,25 @@ protected:
public:
virtual ~GVMaterializer();
- /// True if GV has been materialized and can be dematerialized back to
- /// whatever backing store this GVMaterializer uses.
- virtual bool isDematerializable(const GlobalValue *GV) const = 0;
-
/// Make sure the given GlobalValue is fully read.
///
virtual std::error_code materialize(GlobalValue *GV) = 0;
- /// If the given GlobalValue is read in, and if the GVMaterializer supports
- /// it, release the memory for the GV, and set it up to be materialized
- /// lazily. If the Materializer doesn't support this capability, this method
- /// is a noop.
- ///
- virtual void dematerialize(GlobalValue *) {}
-
/// Make sure the entire Module has been completely read.
///
- virtual std::error_code materializeModule(Module *M) = 0;
+ virtual std::error_code materializeModule() = 0;
virtual std::error_code materializeMetadata() = 0;
virtual void setStripDebugInfo() = 0;
+ /// Client should define this interface if the mapping between metadata
+ /// values and value ids needs to be preserved, e.g. across materializer
+ /// instantiations. If OnlyTempMD is true, only those that have remained
+ /// temporary metadata are recorded in the map.
+ virtual void
+ saveMetadataList(DenseMap<const Metadata *, unsigned> &MetadataToIDs,
+ bool OnlyTempMD) {}
+
virtual std::vector<StructType *> getIdentifiedStructTypes() const = 0;
};
diff --git a/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
index 6bba0ae..7cb13fa 100644
--- a/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -78,7 +78,7 @@ namespace llvm {
// current type directly.
Type *operator->() const { return operator*(); }
- Value *getOperand() const { return *OpIt; }
+ Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
generic_gep_type_iterator& operator++() { // Preincrement
if (CurTy.getInt()) {
diff --git a/contrib/llvm/include/llvm/IR/GlobalAlias.h b/contrib/llvm/include/llvm/IR/GlobalAlias.h
index ce73b7a..b077214 100644
--- a/contrib/llvm/include/llvm/IR/GlobalAlias.h
+++ b/contrib/llvm/include/llvm/IR/GlobalAlias.h
@@ -23,18 +23,17 @@
namespace llvm {
class Module;
-template<typename ValueSubClass, typename ItemParentClass>
- class SymbolTableListTraits;
+template <typename ValueSubClass> class SymbolTableListTraits;
class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
- friend class SymbolTableListTraits<GlobalAlias, Module>;
+ friend class SymbolTableListTraits<GlobalAlias>;
void operator=(const GlobalAlias &) = delete;
GlobalAlias(const GlobalAlias &) = delete;
void setParent(Module *parent);
- GlobalAlias(PointerType *Ty, LinkageTypes Linkage, const Twine &Name,
- Constant *Aliasee, Module *Parent);
+ GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
+ const Twine &Name, Constant *Aliasee, Module *Parent);
public:
// allocate space for exactly one operand
@@ -44,17 +43,19 @@ public:
/// If a parent module is specified, the alias is automatically inserted into
/// the end of the specified module's alias list.
- static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage,
- const Twine &Name, Constant *Aliasee,
- Module *Parent);
+ static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ Constant *Aliasee, Module *Parent);
// Without the Aliasee.
- static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage,
- const Twine &Name, Module *Parent);
+ static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ Module *Parent);
// The module is taken from the Aliasee.
- static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage,
- const Twine &Name, GlobalValue *Aliasee);
+ static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ GlobalValue *Aliasee);
// Type, Parent and AddressSpace taken from the Aliasee.
static GlobalAlias *create(LinkageTypes Linkage, const Twine &Name,
diff --git a/contrib/llvm/include/llvm/IR/GlobalObject.h b/contrib/llvm/include/llvm/IR/GlobalObject.h
index f055241..ee111a0 100644
--- a/contrib/llvm/include/llvm/IR/GlobalObject.h
+++ b/contrib/llvm/include/llvm/IR/GlobalObject.h
@@ -27,9 +27,11 @@ class GlobalObject : public GlobalValue {
GlobalObject(const GlobalObject &) = delete;
protected:
- GlobalObject(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
- LinkageTypes Linkage, const Twine &Name)
- : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name), ObjComdat(nullptr) {
+ GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
+ LinkageTypes Linkage, const Twine &Name,
+ unsigned AddressSpace = 0)
+ : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace),
+ ObjComdat(nullptr) {
setGlobalValueSubClassData(0);
}
diff --git a/contrib/llvm/include/llvm/IR/GlobalValue.h b/contrib/llvm/include/llvm/IR/GlobalValue.h
index 2961369..4fa4e7d 100644
--- a/contrib/llvm/include/llvm/IR/GlobalValue.h
+++ b/contrib/llvm/include/llvm/IR/GlobalValue.h
@@ -65,15 +65,16 @@ public:
};
protected:
- GlobalValue(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
- LinkageTypes Linkage, const Twine &Name)
- : Constant(Ty, VTy, Ops, NumOps), Linkage(Linkage),
- Visibility(DefaultVisibility), UnnamedAddr(0),
- DllStorageClass(DefaultStorageClass),
+ GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
+ LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace)
+ : Constant(PointerType::get(Ty, AddressSpace), VTy, Ops, NumOps),
+ ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility),
+ UnnamedAddr(0), DllStorageClass(DefaultStorageClass),
ThreadLocal(NotThreadLocal), IntID((Intrinsic::ID)0U), Parent(nullptr) {
setName(Name);
}
+ Type *ValueType;
// Note: VC++ treats enums as signed, so an extra bit is required to prevent
// Linkage and Visibility from turning into negative values.
LinkageTypes Linkage : 5; // The linkage of this global
@@ -184,7 +185,7 @@ public:
/// Global values are always pointers.
PointerType *getType() const { return cast<PointerType>(User::getType()); }
- Type *getValueType() const { return getType()->getElementType(); }
+ Type *getValueType() const { return ValueType; }
static LinkageTypes getLinkOnceLinkage(bool ODR) {
return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
@@ -236,7 +237,8 @@ public:
/// Whether the definition of this global may be discarded if it is not used
/// in its compilation unit.
static bool isDiscardableIfUnused(LinkageTypes Linkage) {
- return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage);
+ return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage) ||
+ isAvailableExternallyLinkage(Linkage);
}
/// Whether the definition of this global may be replaced by something
@@ -320,21 +322,11 @@ public:
/// function has been read in yet or not.
bool isMaterializable() const;
- /// Returns true if this function was loaded from a GVMaterializer that's
- /// still attached to its Module and that knows how to dematerialize the
- /// function.
- bool isDematerializable() const;
-
/// Make sure this GlobalValue is fully read. If the module is corrupt, this
/// returns true and fills in the optional string with information about the
/// problem. If successful, this returns false.
std::error_code materialize();
- /// If this GlobalValue is read in, and if the GVMaterializer supports it,
- /// release the memory for the function, and set it up to be materialized
- /// lazily. If !isDematerializable(), this method is a noop.
- void dematerialize();
-
/// @}
/// Return true if the primary definition of this global value is outside of
diff --git a/contrib/llvm/include/llvm/IR/GlobalVariable.h b/contrib/llvm/include/llvm/IR/GlobalVariable.h
index a015983..342bdc0 100644
--- a/contrib/llvm/include/llvm/IR/GlobalVariable.h
+++ b/contrib/llvm/include/llvm/IR/GlobalVariable.h
@@ -29,11 +29,10 @@ namespace llvm {
class Module;
class Constant;
-template<typename ValueSubClass, typename ItemParentClass>
- class SymbolTableListTraits;
+template <typename ValueSubClass> class SymbolTableListTraits;
class GlobalVariable : public GlobalObject, public ilist_node<GlobalVariable> {
- friend class SymbolTableListTraits<GlobalVariable, Module>;
+ friend class SymbolTableListTraits<GlobalVariable>;
void *operator new(size_t, unsigned) = delete;
void operator=(const GlobalVariable &) = delete;
GlobalVariable(const GlobalVariable &) = delete;
@@ -106,18 +105,13 @@ public:
/// hasUniqueInitializer - Whether the global variable has an initializer, and
/// any changes made to the initializer will turn up in the final executable.
inline bool hasUniqueInitializer() const {
- return hasInitializer() &&
- // It's not safe to modify initializers of global variables with weak
- // linkage, because the linker might choose to discard the initializer and
- // use the initializer from another instance of the global variable
- // instead. It is wrong to modify the initializer of a global variable
- // with *_odr linkage because then different instances of the global may
- // have different initializers, breaking the One Definition Rule.
- !isWeakForLinker() &&
- // It is not safe to modify initializers of global variables with the
- // external_initializer marker since the value may be changed at runtime
- // before C++ initializers are evaluated.
- !isExternallyInitialized();
+ return
+ // We need to be sure this is the definition that will actually be used
+ isStrongDefinitionForLinker() &&
+ // It is not safe to modify initializers of global variables with the
+ // external_initializer marker since the value may be changed at runtime
+ // before C++ initializers are evaluated.
+ !isExternallyInitialized();
}
/// getInitializer - Return the initializer for this global variable. It is
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index 6c67c79..7fe04f2 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -24,6 +24,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
@@ -51,6 +52,7 @@ protected:
/// \brief Common base class shared among various IRBuilders.
class IRBuilderBase {
DebugLoc CurDbgLocation;
+
protected:
BasicBlock *BB;
BasicBlock::iterator InsertPt;
@@ -58,8 +60,8 @@ protected:
MDNode *DefaultFPMathTag;
FastMathFlags FMF;
-public:
+public:
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr)
: Context(context), DefaultFPMathTag(FPMathTag), FMF() {
ClearInsertionPoint();
@@ -73,7 +75,7 @@ public:
/// inserted into a block.
void ClearInsertionPoint() {
BB = nullptr;
- InsertPt = nullptr;
+ InsertPt.reset(nullptr);
}
BasicBlock *GetInsertBlock() const { return BB; }
@@ -91,8 +93,8 @@ public:
/// the specified instruction.
void SetInsertPoint(Instruction *I) {
BB = I->getParent();
- InsertPt = I;
- assert(I != BB->end() && "Can't read debug loc from end()");
+ InsertPt = I->getIterator();
+ assert(InsertPt != BB->end() && "Can't read debug loc from end()");
SetCurrentDebugLocation(I->getDebugLoc());
}
@@ -313,10 +315,8 @@ public:
}
/// \brief Fetch the type representing a 128-bit integer.
- IntegerType *getInt128Ty() {
- return Type::getInt128Ty(Context);
- }
-
+ IntegerType *getInt128Ty() { return Type::getInt128Ty(Context); }
+
/// \brief Fetch the type representing an N-bit integer.
IntegerType *getIntNTy(unsigned N) {
return Type::getIntNTy(Context, N);
@@ -426,7 +426,7 @@ public:
/// \brief Create a call to Masked Load intrinsic
CallInst *CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask,
- Value *PassThru = 0, const Twine &Name = "");
+ Value *PassThru = nullptr, const Twine &Name = "");
/// \brief Create a call to Masked Store intrinsic
CallInst *CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align,
@@ -445,6 +445,16 @@ public:
ArrayRef<Value *> GCArgs,
const Twine &Name = "");
+ /// \brief Create a call to the experimental.gc.statepoint intrinsic to
+ /// start a new statepoint sequence.
+ CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
+ Value *ActualCallee, uint32_t Flags,
+ ArrayRef<Use> CallArgs,
+ ArrayRef<Use> TransitionArgs,
+ ArrayRef<Use> DeoptArgs,
+ ArrayRef<Value *> GCArgs,
+ const Twine &Name = "");
+
// \brief Conveninence function for the common case when CallArgs are filled
// in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be
// .get()'ed to get the Value pointer.
@@ -463,6 +473,15 @@ public:
ArrayRef<Value *> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name = "");
+ /// brief Create an invoke to the experimental.gc.statepoint intrinsic to
+ /// start a new statepoint sequence.
+ InvokeInst *CreateGCStatepointInvoke(
+ uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
+ ArrayRef<Use> InvokeArgs, ArrayRef<Use> TransitionArgs,
+ ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs,
+ const Twine &Name = "");
+
// Conveninence function for the common case when CallArgs are filled in using
// makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be .get()'ed to
// get the Value *.
@@ -516,11 +535,11 @@ template<bool preserveNames = true, typename T = ConstantFolder,
typename Inserter = IRBuilderDefaultInserter<preserveNames> >
class IRBuilder : public IRBuilderBase, public Inserter {
T Folder;
+
public:
- IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
+ IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(),
MDNode *FPMathTag = nullptr)
- : IRBuilderBase(C, FPMathTag), Inserter(I), Folder(F) {
- }
+ : IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {}
explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr)
: IRBuilderBase(C, FPMathTag), Folder() {
@@ -578,12 +597,15 @@ public:
//===--------------------------------------------------------------------===//
private:
- /// \brief Helper to add branch weight metadata onto an instruction.
+ /// \brief Helper to add branch weight and unpredictable metadata onto an
+ /// instruction.
/// \returns The annotated instruction.
template <typename InstTy>
- InstTy *addBranchWeights(InstTy *I, MDNode *Weights) {
+ InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpredictable) {
if (Weights)
I->setMetadata(LLVMContext::MD_prof, Weights);
+ if (Unpredictable)
+ I->setMetadata(LLVMContext::MD_unpredictable, Unpredictable);
return I;
}
@@ -620,18 +642,20 @@ public:
/// \brief Create a conditional 'br Cond, TrueDest, FalseDest'
/// instruction.
BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
- MDNode *BranchWeights = nullptr) {
- return Insert(addBranchWeights(BranchInst::Create(True, False, Cond),
- BranchWeights));
+ MDNode *BranchWeights = nullptr,
+ MDNode *Unpredictable = nullptr) {
+ return Insert(addBranchMetadata(BranchInst::Create(True, False, Cond),
+ BranchWeights, Unpredictable));
}
/// \brief Create a switch instruction with the specified value, default dest,
/// and with a hint for the number of cases that will be added (for efficient
/// allocation).
SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10,
- MDNode *BranchWeights = nullptr) {
- return Insert(addBranchWeights(SwitchInst::Create(V, Dest, NumCases),
- BranchWeights));
+ MDNode *BranchWeights = nullptr,
+ MDNode *Unpredictable = nullptr) {
+ return Insert(addBranchMetadata(SwitchInst::Create(V, Dest, NumCases),
+ BranchWeights, Unpredictable));
}
/// \brief Create an indirect branch instruction with the specified address
@@ -667,11 +691,45 @@ public:
return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
Name);
}
+ InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+ BasicBlock *UnwindDest, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "") {
+ return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+ OpBundles), Name);
+ }
ResumeInst *CreateResume(Value *Exn) {
return Insert(ResumeInst::Create(Exn));
}
+ CleanupReturnInst *CreateCleanupRet(CleanupPadInst *CleanupPad,
+ BasicBlock *UnwindBB = nullptr) {
+ return Insert(CleanupReturnInst::Create(CleanupPad, UnwindBB));
+ }
+
+ CatchSwitchInst *CreateCatchSwitch(Value *ParentPad, BasicBlock *UnwindBB,
+ unsigned NumHandlers,
+ const Twine &Name = "") {
+ return Insert(CatchSwitchInst::Create(ParentPad, UnwindBB, NumHandlers),
+ Name);
+ }
+
+ CatchPadInst *CreateCatchPad(Value *ParentPad, ArrayRef<Value *> Args,
+ const Twine &Name = "") {
+ return Insert(CatchPadInst::Create(ParentPad, Args), Name);
+ }
+
+ CleanupPadInst *CreateCleanupPad(Value *ParentPad,
+ ArrayRef<Value *> Args = None,
+ const Twine &Name = "") {
+ return Insert(CleanupPadInst::Create(ParentPad, Args), Name);
+ }
+
+ CatchReturnInst *CreateCatchRet(CatchPadInst *CatchPad, BasicBlock *BB) {
+ return Insert(CatchReturnInst::Create(CatchPad, BB));
+ }
+
UnreachableInst *CreateUnreachable() {
return Insert(new UnreachableInst(Context));
}
@@ -700,6 +758,7 @@ private:
I->setFastMathFlags(FMF);
return I;
}
+
public:
Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
@@ -1326,18 +1385,22 @@ public:
const Twine &Name = "") {
if (V->getType() == DestTy)
return V;
- if (V->getType()->isPointerTy() && DestTy->isIntegerTy())
+ if (V->getType()->getScalarType()->isPointerTy() &&
+ DestTy->getScalarType()->isIntegerTy())
return CreatePtrToInt(V, DestTy, Name);
- if (V->getType()->isIntegerTy() && DestTy->isPointerTy())
+ if (V->getType()->getScalarType()->isIntegerTy() &&
+ DestTy->getScalarType()->isPointerTy())
return CreateIntToPtr(V, DestTy, Name);
return CreateBitCast(V, DestTy, Name);
}
+
private:
// \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
// compile time error, instead of converting the string to bool for the
// isSigned parameter.
Value *CreateIntCast(Value *, Type *, const char *) = delete;
+
public:
Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
if (V->getType() == DestTy)
@@ -1465,18 +1528,30 @@ public:
}
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
+ ArrayRef<OperandBundleDef> OpBundles = None,
const Twine &Name = "") {
- return Insert(CallInst::Create(Callee, Args), Name);
+ return Insert(CallInst::Create(Callee, Args, OpBundles), Name);
+ }
+
+ CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+ const Twine &Name, MDNode *FPMathTag = nullptr) {
+ PointerType *PTy = cast<PointerType>(Callee->getType());
+ FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ return CreateCall(FTy, Callee, Args, Name, FPMathTag);
}
CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
- ArrayRef<Value *> Args, const Twine &Name = "") {
- return Insert(CallInst::Create(FTy, Callee, Args), Name);
+ ArrayRef<Value *> Args, const Twine &Name = "",
+ MDNode *FPMathTag = nullptr) {
+ CallInst *CI = CallInst::Create(FTy, Callee, Args);
+ if (isa<FPMathOperator>(CI))
+ CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+ return Insert(CI, Name);
}
CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
- const Twine &Name = "") {
- return CreateCall(Callee->getFunctionType(), Callee, Args, Name);
+ const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+ return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
}
Value *CreateSelect(Value *C, Value *True, Value *False,
@@ -1594,6 +1669,32 @@ public:
Name);
}
+ /// \brief Create an invariant.group.barrier intrinsic call, that stops
+ /// optimizer to propagate equality using invariant.group metadata.
+ /// If Ptr type is different from i8*, it's casted to i8* before call
+ /// and casted back to Ptr type after call.
+ Value *CreateInvariantGroupBarrier(Value *Ptr) {
+ Module *M = BB->getParent()->getParent();
+ Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M,
+ Intrinsic::invariant_group_barrier);
+
+ Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType();
+ assert(ArgumentAndReturnType ==
+ FnInvariantGroupBarrier->getFunctionType()->getParamType(0) &&
+ "InvariantGroupBarrier should take and return the same type");
+ Type *PtrType = Ptr->getType();
+
+ bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType;
+ if (PtrTypeConversionNeeded)
+ Ptr = CreateBitCast(Ptr, ArgumentAndReturnType);
+
+ CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr});
+
+ if (PtrTypeConversionNeeded)
+ return CreateBitCast(Fn, PtrType);
+ return Fn;
+ }
+
/// \brief Return a vector value that contains \arg V broadcasted to \p
/// NumElts elements.
Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") {
@@ -1676,6 +1777,6 @@ public:
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef)
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_IR_IRBUILDER_H
diff --git a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
index 5f1d56f..88b18e8 100644
--- a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -47,6 +47,12 @@ FunctionPass *createPrintFunctionPass(raw_ostream &OS,
BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS,
const std::string &Banner = "");
+/// Print out a name of an LLVM value without any prefixes.
+///
+/// The name is surrounded with ""'s and escaped if it has any special or
+/// non-printable characters in it.
+void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name);
+
/// \brief Pass for printing a Module as LLVM's text IR assembly.
///
/// Note: This pass is for use with the new pass manager. Use the create...Pass
diff --git a/contrib/llvm/include/llvm/IR/InlineAsm.h b/contrib/llvm/include/llvm/IR/InlineAsm.h
index 08b5102..d2e9e48 100644
--- a/contrib/llvm/include/llvm/IR/InlineAsm.h
+++ b/contrib/llvm/include/llvm/IR/InlineAsm.h
@@ -44,11 +44,12 @@ private:
void operator=(const InlineAsm&) = delete;
std::string AsmString, Constraints;
+ FunctionType *FTy;
bool HasSideEffects;
bool IsAlignStack;
AsmDialect Dialect;
- InlineAsm(PointerType *Ty, const std::string &AsmString,
+ InlineAsm(FunctionType *Ty, const std::string &AsmString,
const std::string &Constraints, bool hasSideEffects,
bool isAlignStack, AsmDialect asmDialect);
~InlineAsm() override;
@@ -56,15 +57,15 @@ private:
/// When the ConstantUniqueMap merges two types and makes two InlineAsms
/// identical, it destroys one of them with this method.
void destroyConstant();
-public:
+public:
/// InlineAsm::get - Return the specified uniqued inline asm string.
///
static InlineAsm *get(FunctionType *Ty, StringRef AsmString,
StringRef Constraints, bool hasSideEffects,
bool isAlignStack = false,
AsmDialect asmDialect = AD_ATT);
-
+
bool hasSideEffects() const { return HasSideEffects; }
bool isAlignStack() const { return IsAlignStack; }
AsmDialect getDialect() const { return Dialect; }
@@ -74,11 +75,11 @@ public:
PointerType *getType() const {
return reinterpret_cast<PointerType*>(Value::getType());
}
-
+
/// getFunctionType - InlineAsm's are always pointers to functions.
///
FunctionType *getFunctionType() const;
-
+
const std::string &getAsmString() const { return AsmString; }
const std::string &getConstraintString() const { return Constraints; }
@@ -88,15 +89,15 @@ public:
///
static bool Verify(FunctionType *Ty, StringRef Constraints);
- // Constraint String Parsing
+ // Constraint String Parsing
enum ConstraintPrefix {
isInput, // 'x'
isOutput, // '=x'
isClobber // '~x'
};
-
+
typedef std::vector<std::string> ConstraintCodeVector;
-
+
struct SubConstraintInfo {
/// MatchingInput - If this is not -1, this is an output constraint where an
/// input constraint is required to match it (e.g. "0"). The value is the
@@ -113,80 +114,79 @@ public:
typedef std::vector<SubConstraintInfo> SubConstraintInfoVector;
struct ConstraintInfo;
typedef std::vector<ConstraintInfo> ConstraintInfoVector;
-
+
struct ConstraintInfo {
/// Type - The basic type of the constraint: input/output/clobber
///
ConstraintPrefix Type;
-
+
/// isEarlyClobber - "&": output operand writes result before inputs are all
/// read. This is only ever set for an output operand.
- bool isEarlyClobber;
-
+ bool isEarlyClobber;
+
/// MatchingInput - If this is not -1, this is an output constraint where an
/// input constraint is required to match it (e.g. "0"). The value is the
/// constraint number that matches this one (for example, if this is
/// constraint #0 and constraint #4 has the value "0", this will be 4).
signed char MatchingInput;
-
+
/// hasMatchingInput - Return true if this is an output constraint that has
/// a matching input constraint.
bool hasMatchingInput() const { return MatchingInput != -1; }
-
+
/// isCommutative - This is set to true for a constraint that is commutative
/// with the next operand.
bool isCommutative;
-
+
/// isIndirect - True if this operand is an indirect operand. This means
/// that the address of the source or destination is present in the call
/// instruction, instead of it being returned or passed in explicitly. This
/// is represented with a '*' in the asm string.
bool isIndirect;
-
+
/// Code - The constraint code, either the register name (in braces) or the
/// constraint letter/number.
ConstraintCodeVector Codes;
-
+
/// isMultipleAlternative - '|': has multiple-alternative constraints.
bool isMultipleAlternative;
-
+
/// multipleAlternatives - If there are multiple alternative constraints,
/// this array will contain them. Otherwise it will be empty.
SubConstraintInfoVector multipleAlternatives;
-
+
/// The currently selected alternative constraint index.
unsigned currentAlternativeIndex;
-
- ///Default constructor.
+
+ /// Default constructor.
ConstraintInfo();
-
+
/// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
/// fields in this structure. If the constraint string is not understood,
/// return true, otherwise return false.
bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar);
-
+
/// selectAlternative - Point this constraint to the alternative constraint
/// indicated by the index.
void selectAlternative(unsigned index);
};
-
+
/// ParseConstraints - Split up the constraint string into the specific
/// constraints and their prefixes. If this returns an empty vector, and if
/// the constraint string itself isn't empty, there was an error parsing.
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString);
-
- /// ParseConstraints - Parse the constraints of this inlineasm object,
+
+ /// ParseConstraints - Parse the constraints of this inlineasm object,
/// returning them the same way that ParseConstraints(str) does.
ConstraintInfoVector ParseConstraints() const {
return ParseConstraints(Constraints);
}
-
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const Value *V) {
return V->getValueID() == Value::InlineAsmVal;
}
-
// These are helper methods for dealing with flags in the INLINEASM SDNode
// in the backend.
//
@@ -203,7 +203,7 @@ public:
// code.
// Else:
// Bit 30-16 - The register class ID to use for the operand.
-
+
enum : uint32_t {
// Fixed operands on an INLINEASM SDNode.
Op_InputChain = 0,
@@ -264,15 +264,15 @@ public:
Flag_MatchingOperand = 0x80000000
};
-
+
static unsigned getFlagWord(unsigned Kind, unsigned NumOps) {
assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!");
assert(Kind >= Kind_RegUse && Kind <= Kind_Mem && "Invalid Kind");
return Kind | (NumOps << 3);
}
-
+
/// getFlagWordForMatchingOp - Augment an existing flag word returned by
- /// getFlagWord with information indicating that this input operand is tied
+ /// getFlagWord with information indicating that this input operand is tied
/// to a previous output operand.
static unsigned getFlagWordForMatchingOp(unsigned InputFlag,
unsigned MatchedOperandNo) {
@@ -355,7 +355,6 @@ public:
RC = High - 1;
return true;
}
-
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/IR/InstIterator.h b/contrib/llvm/include/llvm/IR/InstIterator.h
index f3ce649..1baca21 100644
--- a/contrib/llvm/include/llvm/IR/InstIterator.h
+++ b/contrib/llvm/include/llvm/IR/InstIterator.h
@@ -115,19 +115,18 @@ private:
}
};
-
-typedef InstIterator<iplist<BasicBlock>,
- Function::iterator, BasicBlock::iterator,
- Instruction> inst_iterator;
-typedef InstIterator<const iplist<BasicBlock>,
- Function::const_iterator,
- BasicBlock::const_iterator,
+typedef InstIterator<SymbolTableList<BasicBlock>, Function::iterator,
+ BasicBlock::iterator, Instruction> inst_iterator;
+typedef InstIterator<const SymbolTableList<BasicBlock>,
+ Function::const_iterator, BasicBlock::const_iterator,
const Instruction> const_inst_iterator;
+typedef iterator_range<inst_iterator> inst_range;
+typedef iterator_range<const_inst_iterator> const_inst_range;
inline inst_iterator inst_begin(Function *F) { return inst_iterator(*F); }
inline inst_iterator inst_end(Function *F) { return inst_iterator(*F, true); }
-inline iterator_range<inst_iterator> inst_range(Function *F) {
- return iterator_range<inst_iterator>(inst_begin(F), inst_end(F));
+inline inst_range instructions(Function *F) {
+ return inst_range(inst_begin(F), inst_end(F));
}
inline const_inst_iterator inst_begin(const Function *F) {
return const_inst_iterator(*F);
@@ -135,13 +134,13 @@ inline const_inst_iterator inst_begin(const Function *F) {
inline const_inst_iterator inst_end(const Function *F) {
return const_inst_iterator(*F, true);
}
-inline iterator_range<const_inst_iterator> inst_range(const Function *F) {
- return iterator_range<const_inst_iterator>(inst_begin(F), inst_end(F));
+inline const_inst_range instructions(const Function *F) {
+ return const_inst_range(inst_begin(F), inst_end(F));
}
inline inst_iterator inst_begin(Function &F) { return inst_iterator(F); }
inline inst_iterator inst_end(Function &F) { return inst_iterator(F, true); }
-inline iterator_range<inst_iterator> inst_range(Function &F) {
- return iterator_range<inst_iterator>(inst_begin(F), inst_end(F));
+inline inst_range instructions(Function &F) {
+ return inst_range(inst_begin(F), inst_end(F));
}
inline const_inst_iterator inst_begin(const Function &F) {
return const_inst_iterator(F);
@@ -149,8 +148,8 @@ inline const_inst_iterator inst_begin(const Function &F) {
inline const_inst_iterator inst_end(const Function &F) {
return const_inst_iterator(F, true);
}
-inline iterator_range<const_inst_iterator> inst_range(const Function &F) {
- return iterator_range<const_inst_iterator>(inst_begin(F), inst_end(F));
+inline const_inst_range instructions(const Function &F) {
+ return const_inst_range(inst_begin(F), inst_end(F));
}
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/IR/InstVisitor.h b/contrib/llvm/include/llvm/IR/InstVisitor.h
index 581e860..088d3e0 100644
--- a/contrib/llvm/include/llvm/IR/InstVisitor.h
+++ b/contrib/llvm/include/llvm/IR/InstVisitor.h
@@ -169,6 +169,9 @@ public:
RetTy visitIndirectBrInst(IndirectBrInst &I) { DELEGATE(TerminatorInst);}
RetTy visitResumeInst(ResumeInst &I) { DELEGATE(TerminatorInst);}
RetTy visitUnreachableInst(UnreachableInst &I) { DELEGATE(TerminatorInst);}
+ RetTy visitCleanupReturnInst(CleanupReturnInst &I) { DELEGATE(TerminatorInst);}
+ RetTy visitCatchReturnInst(CatchReturnInst &I) { DELEGATE(TerminatorInst); }
+ RetTy visitCatchSwitchInst(CatchSwitchInst &I) { DELEGATE(TerminatorInst);}
RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);}
RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);}
RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);}
@@ -200,6 +203,9 @@ public:
RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);}
RetTy visitInsertValueInst(InsertValueInst &I) { DELEGATE(Instruction); }
RetTy visitLandingPadInst(LandingPadInst &I) { DELEGATE(Instruction); }
+ RetTy visitFuncletPadInst(FuncletPadInst &I) { DELEGATE(Instruction); }
+ RetTy visitCleanupPadInst(CleanupPadInst &I) { DELEGATE(FuncletPadInst); }
+ RetTy visitCatchPadInst(CatchPadInst &I) { DELEGATE(FuncletPadInst); }
// Handle the special instrinsic instruction classes.
RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgInfoIntrinsic);}
diff --git a/contrib/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm/include/llvm/IR/InstrTypes.h
index b791ded..5091bb4 100644
--- a/contrib/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm/include/llvm/IR/InstrTypes.h
@@ -16,9 +16,12 @@
#ifndef LLVM_IR_INSTRTYPES_H
#define LLVM_IR_INSTRTYPES_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OperandTraits.h"
namespace llvm {
@@ -51,8 +54,8 @@ protected:
virtual BasicBlock *getSuccessorV(unsigned idx) const = 0;
virtual unsigned getNumSuccessorsV() const = 0;
virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
-public:
+public:
/// Return the number of successors that this terminator has.
unsigned getNumSuccessors() const {
return getNumSuccessorsV();
@@ -75,8 +78,198 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
-};
+ // \brief Returns true if this terminator relates to exception handling.
+ bool isExceptional() const {
+ switch (getOpcode()) {
+ case Instruction::CatchSwitch:
+ case Instruction::CatchRet:
+ case Instruction::CleanupRet:
+ case Instruction::Invoke:
+ case Instruction::Resume:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ //===--------------------------------------------------------------------===//
+ // succ_iterator definition
+ //===--------------------------------------------------------------------===//
+
+ template <class Term, class BB> // Successor Iterator
+ class SuccIterator : public std::iterator<std::random_access_iterator_tag, BB,
+ int, BB *, BB *> {
+ typedef std::iterator<std::random_access_iterator_tag, BB, int, BB *, BB *>
+ super;
+
+ public:
+ typedef typename super::pointer pointer;
+ typedef typename super::reference reference;
+
+ private:
+ Term TermInst;
+ unsigned idx;
+ typedef SuccIterator<Term, BB> Self;
+
+ inline bool index_is_valid(unsigned idx) {
+ return idx < TermInst->getNumSuccessors();
+ }
+
+ /// \brief Proxy object to allow write access in operator[]
+ class SuccessorProxy {
+ Self it;
+
+ public:
+ explicit SuccessorProxy(const Self &it) : it(it) {}
+
+ SuccessorProxy(const SuccessorProxy &) = default;
+
+ SuccessorProxy &operator=(SuccessorProxy r) {
+ *this = reference(r);
+ return *this;
+ }
+
+ SuccessorProxy &operator=(reference r) {
+ it.TermInst->setSuccessor(it.idx, r);
+ return *this;
+ }
+
+ operator reference() const { return *it; }
+ };
+
+ public:
+ // begin iterator
+ explicit inline SuccIterator(Term T) : TermInst(T), idx(0) {}
+ // end iterator
+ inline SuccIterator(Term T, bool) : TermInst(T) {
+ if (TermInst)
+ idx = TermInst->getNumSuccessors();
+ else
+ // Term == NULL happens, if a basic block is not fully constructed and
+ // consequently getTerminator() returns NULL. In this case we construct
+ // a SuccIterator which describes a basic block that has zero
+ // successors.
+ // Defining SuccIterator for incomplete and malformed CFGs is especially
+ // useful for debugging.
+ idx = 0;
+ }
+
+ /// This is used to interface between code that wants to
+ /// operate on terminator instructions directly.
+ unsigned getSuccessorIndex() const { return idx; }
+
+ inline bool operator==(const Self &x) const { return idx == x.idx; }
+ inline bool operator!=(const Self &x) const { return !operator==(x); }
+
+ inline reference operator*() const { return TermInst->getSuccessor(idx); }
+ inline pointer operator->() const { return operator*(); }
+
+ inline Self &operator++() {
+ ++idx;
+ return *this;
+ } // Preincrement
+
+ inline Self operator++(int) { // Postincrement
+ Self tmp = *this;
+ ++*this;
+ return tmp;
+ }
+
+ inline Self &operator--() {
+ --idx;
+ return *this;
+ } // Predecrement
+ inline Self operator--(int) { // Postdecrement
+ Self tmp = *this;
+ --*this;
+ return tmp;
+ }
+
+ inline bool operator<(const Self &x) const {
+ assert(TermInst == x.TermInst &&
+ "Cannot compare iterators of different blocks!");
+ return idx < x.idx;
+ }
+
+ inline bool operator<=(const Self &x) const {
+ assert(TermInst == x.TermInst &&
+ "Cannot compare iterators of different blocks!");
+ return idx <= x.idx;
+ }
+ inline bool operator>=(const Self &x) const {
+ assert(TermInst == x.TermInst &&
+ "Cannot compare iterators of different blocks!");
+ return idx >= x.idx;
+ }
+
+ inline bool operator>(const Self &x) const {
+ assert(TermInst == x.TermInst &&
+ "Cannot compare iterators of different blocks!");
+ return idx > x.idx;
+ }
+
+ inline Self &operator+=(int Right) {
+ unsigned new_idx = idx + Right;
+ assert(index_is_valid(new_idx) && "Iterator index out of bound");
+ idx = new_idx;
+ return *this;
+ }
+
+ inline Self operator+(int Right) const {
+ Self tmp = *this;
+ tmp += Right;
+ return tmp;
+ }
+
+ inline Self &operator-=(int Right) { return operator+=(-Right); }
+
+ inline Self operator-(int Right) const { return operator+(-Right); }
+
+ inline int operator-(const Self &x) const {
+ assert(TermInst == x.TermInst &&
+ "Cannot work on iterators of different blocks!");
+ int distance = idx - x.idx;
+ return distance;
+ }
+
+ inline SuccessorProxy operator[](int offset) {
+ Self tmp = *this;
+ tmp += offset;
+ return SuccessorProxy(tmp);
+ }
+
+ /// Get the source BB of this iterator.
+ inline BB *getSource() {
+ assert(TermInst && "Source not available, if basic block was malformed");
+ return TermInst->getParent();
+ }
+ };
+
+ typedef SuccIterator<TerminatorInst *, BasicBlock> succ_iterator;
+ typedef SuccIterator<const TerminatorInst *, const BasicBlock>
+ succ_const_iterator;
+ typedef llvm::iterator_range<succ_iterator> succ_range;
+ typedef llvm::iterator_range<succ_const_iterator> succ_const_range;
+
+private:
+ inline succ_iterator succ_begin() { return succ_iterator(this); }
+ inline succ_const_iterator succ_begin() const {
+ return succ_const_iterator(this);
+ }
+ inline succ_iterator succ_end() { return succ_iterator(this, true); }
+ inline succ_const_iterator succ_end() const {
+ return succ_const_iterator(this, true);
+ }
+
+public:
+ inline succ_range successors() {
+ return succ_range(succ_begin(), succ_end());
+ }
+ inline succ_const_range successors() const {
+ return succ_const_range(succ_begin(), succ_end());
+ }
+};
//===----------------------------------------------------------------------===//
// UnaryInstruction Class
@@ -95,6 +288,7 @@ protected:
: Instruction(Ty, iType, &Op<0>(), 1, IAE) {
Op<0>() = V;
}
+
public:
// allocate space for exactly one operand
void *operator new(size_t s) {
@@ -133,6 +327,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
class BinaryOperator : public Instruction {
void *operator new(size_t, unsigned) = delete;
+
protected:
void init(BinaryOps iType);
BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
@@ -209,7 +404,7 @@ public:
BO->setHasNoSignedWrap(true);
return BO;
}
-
+
static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
const Twine &Name = "") {
BinaryOperator *BO = Create(Opc, V1, V2, Name);
@@ -228,7 +423,7 @@ public:
BO->setHasNoUnsignedWrap(true);
return BO;
}
-
+
static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
const Twine &Name = "") {
BinaryOperator *BO = Create(Opc, V1, V2, Name);
@@ -247,29 +442,29 @@ public:
BO->setIsExact(true);
return BO;
}
-
-#define DEFINE_HELPERS(OPC, NUWNSWEXACT) \
- static BinaryOperator *Create ## NUWNSWEXACT ## OPC \
- (Value *V1, Value *V2, const Twine &Name = "") { \
- return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name); \
- } \
- static BinaryOperator *Create ## NUWNSWEXACT ## OPC \
- (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) { \
- return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB); \
- } \
- static BinaryOperator *Create ## NUWNSWEXACT ## OPC \
- (Value *V1, Value *V2, const Twine &Name, Instruction *I) { \
- return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I); \
- }
-
- DEFINE_HELPERS(Add, NSW) // CreateNSWAdd
- DEFINE_HELPERS(Add, NUW) // CreateNUWAdd
- DEFINE_HELPERS(Sub, NSW) // CreateNSWSub
- DEFINE_HELPERS(Sub, NUW) // CreateNUWSub
- DEFINE_HELPERS(Mul, NSW) // CreateNSWMul
- DEFINE_HELPERS(Mul, NUW) // CreateNUWMul
- DEFINE_HELPERS(Shl, NSW) // CreateNSWShl
- DEFINE_HELPERS(Shl, NUW) // CreateNUWShl
+
+#define DEFINE_HELPERS(OPC, NUWNSWEXACT) \
+ static BinaryOperator *Create##NUWNSWEXACT##OPC(Value *V1, Value *V2, \
+ const Twine &Name = "") { \
+ return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name); \
+ } \
+ static BinaryOperator *Create##NUWNSWEXACT##OPC( \
+ Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) { \
+ return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB); \
+ } \
+ static BinaryOperator *Create##NUWNSWEXACT##OPC( \
+ Value *V1, Value *V2, const Twine &Name, Instruction *I) { \
+ return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I); \
+ }
+
+ DEFINE_HELPERS(Add, NSW) // CreateNSWAdd
+ DEFINE_HELPERS(Add, NUW) // CreateNUWAdd
+ DEFINE_HELPERS(Sub, NSW) // CreateNSWSub
+ DEFINE_HELPERS(Sub, NUW) // CreateNUWSub
+ DEFINE_HELPERS(Mul, NSW) // CreateNSWMul
+ DEFINE_HELPERS(Mul, NUW) // CreateNUWMul
+ DEFINE_HELPERS(Shl, NSW) // CreateNSWShl
+ DEFINE_HELPERS(Shl, NUW) // CreateNUWShl
DEFINE_HELPERS(SDiv, Exact) // CreateExactSDiv
DEFINE_HELPERS(UDiv, Exact) // CreateExactUDiv
@@ -277,7 +472,7 @@ public:
DEFINE_HELPERS(LShr, Exact) // CreateExactLShr
#undef DEFINE_HELPERS
-
+
/// Helper functions to construct and inspect unary operations (NEG and NOT)
/// via binary operators SUB and XOR:
///
@@ -355,7 +550,7 @@ public:
/// Convenience method to copy supported wrapping, exact, and fast-math flags
/// from V to this instruction.
void copyIRFlags(const Value *V);
-
+
/// Logical 'and' of any supported wrapping, exact, and fast-math flags of
/// V and this instruction.
void andIRFlags(const Value *V);
@@ -388,6 +583,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
/// @brief Base class of casting instructions.
class CastInst : public UnaryInstruction {
void anchor() override;
+
protected:
/// @brief Constructor with insert-before-instruction semantics for subclasses
CastInst(Type *Ty, unsigned iType, Value *S,
@@ -401,6 +597,7 @@ protected:
: UnaryInstruction(Ty, iType, S, InsertAtEnd) {
setName(NameStr);
}
+
public:
/// Provides a way to construct any of the CastInst subclasses using an
/// opcode instead of the subclass's constructor. The opcode must be in the
@@ -490,7 +687,7 @@ public:
Value *S, ///< The pointer value to be casted (operand 0)
Type *Ty, ///< The type to which cast should be made
const Twine &Name = "", ///< Name for the instruction
- Instruction *InsertBefore = 0 ///< Place to insert the instruction
+ Instruction *InsertBefore = nullptr ///< Place to insert the instruction
);
/// @brief Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
@@ -503,7 +700,7 @@ public:
Value *S, ///< The pointer value to be casted (operand 0)
Type *Ty, ///< The type to which cast should be made
const Twine &Name = "", ///< Name for the instruction
- Instruction *InsertBefore = 0 ///< Place to insert the instruction
+ Instruction *InsertBefore = nullptr ///< Place to insert the instruction
);
/// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
@@ -677,18 +874,6 @@ public:
/// This class is the base class for the comparison instructions.
/// @brief Abstract base class of comparison instructions.
class CmpInst : public Instruction {
- void *operator new(size_t, unsigned) = delete;
- CmpInst() = delete;
-protected:
- CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
- Value *LHS, Value *RHS, const Twine &Name = "",
- Instruction *InsertBefore = nullptr);
-
- CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
- Value *LHS, Value *RHS, const Twine &Name,
- BasicBlock *InsertAtEnd);
-
- void anchor() override; // Out of line virtual method.
public:
/// This enumeration lists the possible predicates for CmpInst subclasses.
/// Values in the range 0-31 are reserved for FCmpInst, while values in the
@@ -730,6 +915,22 @@ public:
BAD_ICMP_PREDICATE = ICMP_SLE + 1
};
+private:
+ void *operator new(size_t, unsigned) = delete;
+ CmpInst() = delete;
+
+protected:
+ CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
+ Value *LHS, Value *RHS, const Twine &Name = "",
+ Instruction *InsertBefore = nullptr);
+
+ CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
+ Value *LHS, Value *RHS, const Twine &Name,
+ BasicBlock *InsertAtEnd);
+
+ void anchor() override; // Out of line virtual method.
+
+public:
// allocate space for exactly two operands
void *operator new(size_t s) {
return User::operator new(s, 2);
@@ -740,7 +941,7 @@ public:
/// The specified Instruction is allowed to be a dereferenced end iterator.
/// @brief Create a CmpInst
static CmpInst *Create(OtherOps Op,
- unsigned short predicate, Value *S1,
+ Predicate predicate, Value *S1,
Value *S2, const Twine &Name = "",
Instruction *InsertBefore = nullptr);
@@ -748,7 +949,7 @@ public:
/// two operands. Also automatically insert this instruction to the end of
/// the BasicBlock specified.
/// @brief Create a CmpInst
- static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
+ static CmpInst *Create(OtherOps Op, Predicate predicate, Value *S1,
Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
/// @brief Get the opcode casted to the right type
@@ -775,7 +976,6 @@ public:
bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
-
/// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
/// OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
/// @returns the inverse predicate for the instruction's current predicate.
@@ -833,6 +1033,19 @@ public:
return isUnsigned(getPredicate());
}
+ /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert
+ /// @returns the signed version of the unsigned predicate pred.
+ /// @brief return the signed version of a predicate
+ static Predicate getSignedPredicate(Predicate pred);
+
+ /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert
+ /// @returns the signed version of the predicate for this instruction (which
+ /// has to be an unsigned predicate).
+ /// @brief return the signed version of a predicate
+ Predicate getSignedPredicate() {
+ return getSignedPredicate(getPredicate());
+ }
+
/// This is just a convenience.
/// @brief Determine if this is true when both operands are the same.
bool isTrueWhenEqual() const {
@@ -847,23 +1060,23 @@ public:
/// @returns true if the predicate is unsigned, false otherwise.
/// @brief Determine if the predicate is an unsigned operation.
- static bool isUnsigned(unsigned short predicate);
+ static bool isUnsigned(Predicate predicate);
/// @returns true if the predicate is signed, false otherwise.
/// @brief Determine if the predicate is an signed operation.
- static bool isSigned(unsigned short predicate);
+ static bool isSigned(Predicate predicate);
/// @brief Determine if the predicate is an ordered operation.
- static bool isOrdered(unsigned short predicate);
+ static bool isOrdered(Predicate predicate);
/// @brief Determine if the predicate is an unordered operation.
- static bool isUnordered(unsigned short predicate);
+ static bool isUnordered(Predicate predicate);
/// Determine if the predicate is true when comparing a value with itself.
- static bool isTrueWhenEqual(unsigned short predicate);
+ static bool isTrueWhenEqual(Predicate predicate);
/// Determine if the predicate is false when comparing a value with itself.
- static bool isFalseWhenEqual(unsigned short predicate);
+ static bool isFalseWhenEqual(Predicate predicate);
/// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const Instruction *I) {
@@ -882,6 +1095,7 @@ public:
}
return Type::getInt1Ty(opnd_type->getContext());
}
+
private:
// Shadow Value::setValueSubclassData with a private forwarding method so that
// subclasses cannot accidentally use it.
@@ -890,7 +1104,6 @@ private:
}
};
-
// FIXME: these are redundant if CmpInst < BinaryOperator
template <>
struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
@@ -898,6 +1111,523 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
-} // End llvm namespace
+//===----------------------------------------------------------------------===//
+// FuncletPadInst Class
+//===----------------------------------------------------------------------===//
+class FuncletPadInst : public Instruction {
+private:
+ void init(Value *ParentPad, ArrayRef<Value *> Args, const Twine &NameStr);
+
+ FuncletPadInst(const FuncletPadInst &CPI);
+
+ explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+ ArrayRef<Value *> Args, unsigned Values,
+ const Twine &NameStr, Instruction *InsertBefore);
+ explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+ ArrayRef<Value *> Args, unsigned Values,
+ const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+ friend class CatchPadInst;
+ friend class CleanupPadInst;
+ FuncletPadInst *cloneImpl() const;
+
+public:
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ /// getNumArgOperands - Return the number of funcletpad arguments.
+ ///
+ unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+
+ /// Convenience accessors
+
+ /// \brief Return the outer EH-pad this funclet is nested within.
+ ///
+ /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
+ /// is a CatchPadInst.
+ Value *getParentPad() const { return Op<-1>(); }
+ void setParentPad(Value *ParentPad) {
+ assert(ParentPad);
+ Op<-1>() = ParentPad;
+ }
+
+ /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument.
+ ///
+ Value *getArgOperand(unsigned i) const { return getOperand(i); }
+ void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+ /// arg_operands - iteration adapter for range-for loops.
+ op_range arg_operands() { return op_range(op_begin(), op_end() - 1); }
+
+ /// arg_operands - iteration adapter for range-for loops.
+ const_op_range arg_operands() const {
+ return const_op_range(op_begin(), op_end() - 1);
+ }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) { return I->isFuncletPad(); }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+};
+
+template <>
+struct OperandTraits<FuncletPadInst>
+ : public VariadicOperandTraits<FuncletPadInst, /*MINARITY=*/1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
+
+/// \brief A lightweight accessor for an operand bundle meant to be passed
+/// around by value.
+struct OperandBundleUse {
+ ArrayRef<Use> Inputs;
+
+ OperandBundleUse() {}
+ explicit OperandBundleUse(StringMapEntry<uint32_t> *Tag, ArrayRef<Use> Inputs)
+ : Inputs(Inputs), Tag(Tag) {}
+
+ /// \brief Return true if the operand at index \p Idx in this operand bundle
+ /// has the attribute A.
+ bool operandHasAttr(unsigned Idx, Attribute::AttrKind A) const {
+ if (isDeoptOperandBundle())
+ if (A == Attribute::ReadOnly || A == Attribute::NoCapture)
+ return Inputs[Idx]->getType()->isPointerTy();
+
+ // Conservative answer: no operands have any attributes.
+ return false;
+ };
+
+ /// \brief Return the tag of this operand bundle as a string.
+ StringRef getTagName() const {
+ return Tag->getKey();
+ }
+
+ /// \brief Return the tag of this operand bundle as an integer.
+ ///
+ /// Operand bundle tags are interned by LLVMContextImpl::getOrInsertBundleTag,
+ /// and this function returns the unique integer getOrInsertBundleTag
+ /// associated the tag of this operand bundle to.
+ uint32_t getTagID() const {
+ return Tag->getValue();
+ }
+
+ /// \brief Return true if this is a "deopt" operand bundle.
+ bool isDeoptOperandBundle() const {
+ return getTagID() == LLVMContext::OB_deopt;
+ }
+
+ /// \brief Return true if this is a "funclet" operand bundle.
+ bool isFuncletOperandBundle() const {
+ return getTagID() == LLVMContext::OB_funclet;
+ }
+
+private:
+ /// \brief Pointer to an entry in LLVMContextImpl::getOrInsertBundleTag.
+ StringMapEntry<uint32_t> *Tag;
+};
+
+/// \brief A container for an operand bundle being viewed as a set of values
+/// rather than a set of uses.
+///
+/// Unlike OperandBundleUse, OperandBundleDefT owns the memory it carries, and
+/// so it is possible to create and pass around "self-contained" instances of
+/// OperandBundleDef and ConstOperandBundleDef.
+template <typename InputTy> class OperandBundleDefT {
+ std::string Tag;
+ std::vector<InputTy> Inputs;
+
+public:
+ explicit OperandBundleDefT(std::string Tag, std::vector<InputTy> Inputs)
+ : Tag(std::move(Tag)), Inputs(std::move(Inputs)) {}
+ explicit OperandBundleDefT(std::string Tag, ArrayRef<InputTy> Inputs)
+ : Tag(std::move(Tag)), Inputs(Inputs) {}
+
+ explicit OperandBundleDefT(const OperandBundleUse &OBU) {
+ Tag = OBU.getTagName();
+ Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end());
+ }
+
+ ArrayRef<InputTy> inputs() const { return Inputs; }
+
+ typedef typename std::vector<InputTy>::const_iterator input_iterator;
+ size_t input_size() const { return Inputs.size(); }
+ input_iterator input_begin() const { return Inputs.begin(); }
+ input_iterator input_end() const { return Inputs.end(); }
+
+ StringRef getTag() const { return Tag; }
+};
+
+typedef OperandBundleDefT<Value *> OperandBundleDef;
+typedef OperandBundleDefT<const Value *> ConstOperandBundleDef;
+
+/// \brief A mixin to add operand bundle functionality to llvm instruction
+/// classes.
+///
+/// OperandBundleUser uses the descriptor area co-allocated with the host User
+/// to store some meta information about which operands are "normal" operands,
+/// and which ones belong to some operand bundle.
+///
+/// The layout of an operand bundle user is
+///
+/// +-----------uint32_t End-------------------------------------+
+/// | |
+/// | +--------uint32_t Begin--------------------+ |
+/// | | | |
+/// ^ ^ v v
+/// |------|------|----|----|----|----|----|---------|----|---------|----|-----
+/// | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un
+/// |------|------|----|----|----|----|----|---------|----|---------|----|-----
+/// v v ^ ^
+/// | | | |
+/// | +--------uint32_t Begin------------+ |
+/// | |
+/// +-----------uint32_t End-----------------------------+
+///
+///
+/// BOI0, BOI1 ... are descriptions of operand bundles in this User's use list.
+/// These descriptions are installed and managed by this class, and they're all
+/// instances of OperandBundleUser<T>::BundleOpInfo.
+///
+/// DU is an additional descriptor installed by User's 'operator new' to keep
+/// track of the 'BOI0 ... BOIN' co-allocation. OperandBundleUser does not
+/// access or modify DU in any way, it's an implementation detail private to
+/// User.
+///
+/// The regular Use& vector for the User starts at U0. The operand bundle uses
+/// are part of the Use& vector, just like normal uses. In the diagram above,
+/// the operand bundle uses start at BOI0_U0. Each instance of BundleOpInfo has
+/// information about a contiguous set of uses constituting an operand bundle,
+/// and the total set of operand bundle uses themselves form a contiguous set of
+/// uses (i.e. there are no gaps between uses corresponding to individual
+/// operand bundles).
+///
+/// This class does not know the location of the set of operand bundle uses
+/// within the use list -- that is decided by the User using this class via the
+/// BeginIdx argument in populateBundleOperandInfos.
+///
+/// Currently operand bundle users with hung-off operands are not supported.
+template <typename InstrTy, typename OpIteratorTy> class OperandBundleUser {
+public:
+ /// \brief Return the number of operand bundles associated with this User.
+ unsigned getNumOperandBundles() const {
+ return std::distance(bundle_op_info_begin(), bundle_op_info_end());
+ }
+
+ /// \brief Return true if this User has any operand bundles.
+ bool hasOperandBundles() const { return getNumOperandBundles() != 0; }
+
+ /// \brief Return the index of the first bundle operand in the Use array.
+ unsigned getBundleOperandsStartIndex() const {
+ assert(hasOperandBundles() && "Don't call otherwise!");
+ return bundle_op_info_begin()->Begin;
+ }
+
+ /// \brief Return the index of the last bundle operand in the Use array.
+ unsigned getBundleOperandsEndIndex() const {
+ assert(hasOperandBundles() && "Don't call otherwise!");
+ return bundle_op_info_end()[-1].End;
+ }
+
+ /// \brief Return the total number operands (not operand bundles) used by
+ /// every operand bundle in this OperandBundleUser.
+ unsigned getNumTotalBundleOperands() const {
+ if (!hasOperandBundles())
+ return 0;
+
+ unsigned Begin = getBundleOperandsStartIndex();
+ unsigned End = getBundleOperandsEndIndex();
+
+ assert(Begin <= End && "Should be!");
+ return End - Begin;
+ }
+
+ /// \brief Return the operand bundle at a specific index.
+ OperandBundleUse getOperandBundleAt(unsigned Index) const {
+ assert(Index < getNumOperandBundles() && "Index out of bounds!");
+ return operandBundleFromBundleOpInfo(*(bundle_op_info_begin() + Index));
+ }
+
+ /// \brief Return the number of operand bundles with the tag Name attached to
+ /// this instruction.
+ unsigned countOperandBundlesOfType(StringRef Name) const {
+ unsigned Count = 0;
+ for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
+ if (getOperandBundleAt(i).getTagName() == Name)
+ Count++;
+
+ return Count;
+ }
+
+ /// \brief Return the number of operand bundles with the tag ID attached to
+ /// this instruction.
+ unsigned countOperandBundlesOfType(uint32_t ID) const {
+ unsigned Count = 0;
+ for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
+ if (getOperandBundleAt(i).getTagID() == ID)
+ Count++;
+
+ return Count;
+ }
+
+ /// \brief Return an operand bundle by name, if present.
+ ///
+ /// It is an error to call this for operand bundle types that may have
+ /// multiple instances of them on the same instruction.
+ Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
+ assert(countOperandBundlesOfType(Name) < 2 && "Precondition violated!");
+
+ for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) {
+ OperandBundleUse U = getOperandBundleAt(i);
+ if (U.getTagName() == Name)
+ return U;
+ }
+
+ return None;
+ }
+
+ /// \brief Return an operand bundle by tag ID, if present.
+ ///
+ /// It is an error to call this for operand bundle types that may have
+ /// multiple instances of them on the same instruction.
+ Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
+ assert(countOperandBundlesOfType(ID) < 2 && "Precondition violated!");
+
+ for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) {
+ OperandBundleUse U = getOperandBundleAt(i);
+ if (U.getTagID() == ID)
+ return U;
+ }
+
+ return None;
+ }
+
+ /// \brief Return the list of operand bundles attached to this instruction as
+ /// a vector of OperandBundleDefs.
+ ///
+ /// This function copies the OperandBundeUse instances associated with this
+ /// OperandBundleUser to a vector of OperandBundleDefs. Note:
+ /// OperandBundeUses and OperandBundleDefs are non-trivially *different*
+ /// representations of operand bundles (see documentation above).
+ void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
+ for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
+ Defs.emplace_back(getOperandBundleAt(i));
+ }
+
+ /// \brief Return the operand bundle for the operand at index OpIdx.
+ ///
+ /// It is an error to call this with an OpIdx that does not correspond to an
+ /// bundle operand.
+ OperandBundleUse getOperandBundleForOperand(unsigned OpIdx) const {
+ return operandBundleFromBundleOpInfo(getBundleOpInfoForOperand(OpIdx));
+ }
+
+ /// \brief Return true if this operand bundle user has operand bundles that
+ /// may read from the heap.
+ bool hasReadingOperandBundles() const {
+ // Implementation note: this is a conservative implementation of operand
+ // bundle semantics, where *any* operand bundle forces a callsite to be at
+ // least readonly.
+ return hasOperandBundles();
+ }
+
+ /// \brief Return true if this operand bundle user has operand bundles that
+ /// may write to the heap.
+ bool hasClobberingOperandBundles() const {
+ for (auto &BOI : bundle_op_infos()) {
+ if (BOI.Tag->second == LLVMContext::OB_deopt ||
+ BOI.Tag->second == LLVMContext::OB_funclet)
+ continue;
+
+ // This instruction has an operand bundle that is not known to us.
+ // Assume the worst.
+ return true;
+ }
+
+ return false;
+ }
+
+ /// \brief Return true if the bundle operand at index \p OpIdx has the
+ /// attribute \p A.
+ bool bundleOperandHasAttr(unsigned OpIdx, Attribute::AttrKind A) const {
+ auto &BOI = getBundleOpInfoForOperand(OpIdx);
+ auto OBU = operandBundleFromBundleOpInfo(BOI);
+ return OBU.operandHasAttr(OpIdx - BOI.Begin, A);
+ }
+
+ /// \brief Return true if \p Other has the same sequence of operand bundle
+ /// tags with the same number of operands on each one of them as this
+ /// OperandBundleUser.
+ bool hasIdenticalOperandBundleSchema(
+ const OperandBundleUser<InstrTy, OpIteratorTy> &Other) const {
+ if (getNumOperandBundles() != Other.getNumOperandBundles())
+ return false;
+
+ return std::equal(bundle_op_info_begin(), bundle_op_info_end(),
+ Other.bundle_op_info_begin());
+ };
+
+protected:
+ /// \brief Is the function attribute S disallowed by some operand bundle on
+ /// this operand bundle user?
+ bool isFnAttrDisallowedByOpBundle(StringRef S) const {
+ // Operand bundles only possibly disallow readnone, readonly and argmenonly
+ // attributes. All String attributes are fine.
+ return false;
+ }
+
+ /// \brief Is the function attribute A disallowed by some operand bundle on
+ /// this operand bundle user?
+ bool isFnAttrDisallowedByOpBundle(Attribute::AttrKind A) const {
+ switch (A) {
+ default:
+ return false;
+
+ case Attribute::ArgMemOnly:
+ return hasReadingOperandBundles();
+
+ case Attribute::ReadNone:
+ return hasReadingOperandBundles();
+
+ case Attribute::ReadOnly:
+ return hasClobberingOperandBundles();
+ }
+
+ llvm_unreachable("switch has a default case!");
+ }
+
+ /// \brief Used to keep track of an operand bundle. See the main comment on
+ /// OperandBundleUser above.
+ struct BundleOpInfo {
+ /// \brief The operand bundle tag, interned by
+ /// LLVMContextImpl::getOrInsertBundleTag.
+ StringMapEntry<uint32_t> *Tag;
+
+ /// \brief The index in the Use& vector where operands for this operand
+ /// bundle starts.
+ uint32_t Begin;
+
+ /// \brief The index in the Use& vector where operands for this operand
+ /// bundle ends.
+ uint32_t End;
+
+ bool operator==(const BundleOpInfo &Other) const {
+ return Tag == Other.Tag && Begin == Other.Begin && End == Other.End;
+ }
+ };
+
+ /// \brief Simple helper function to map a BundleOpInfo to an
+ /// OperandBundleUse.
+ OperandBundleUse
+ operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const {
+ auto op_begin = static_cast<const InstrTy *>(this)->op_begin();
+ ArrayRef<Use> Inputs(op_begin + BOI.Begin, op_begin + BOI.End);
+ return OperandBundleUse(BOI.Tag, Inputs);
+ }
+
+ typedef BundleOpInfo *bundle_op_iterator;
+ typedef const BundleOpInfo *const_bundle_op_iterator;
+
+ /// \brief Return the start of the list of BundleOpInfo instances associated
+ /// with this OperandBundleUser.
+ bundle_op_iterator bundle_op_info_begin() {
+ if (!static_cast<InstrTy *>(this)->hasDescriptor())
+ return nullptr;
+
+ uint8_t *BytesBegin = static_cast<InstrTy *>(this)->getDescriptor().begin();
+ return reinterpret_cast<bundle_op_iterator>(BytesBegin);
+ }
+
+ /// \brief Return the start of the list of BundleOpInfo instances associated
+ /// with this OperandBundleUser.
+ const_bundle_op_iterator bundle_op_info_begin() const {
+ auto *NonConstThis =
+ const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+ return NonConstThis->bundle_op_info_begin();
+ }
+
+ /// \brief Return the end of the list of BundleOpInfo instances associated
+ /// with this OperandBundleUser.
+ bundle_op_iterator bundle_op_info_end() {
+ if (!static_cast<InstrTy *>(this)->hasDescriptor())
+ return nullptr;
+
+ uint8_t *BytesEnd = static_cast<InstrTy *>(this)->getDescriptor().end();
+ return reinterpret_cast<bundle_op_iterator>(BytesEnd);
+ }
+
+ /// \brief Return the end of the list of BundleOpInfo instances associated
+ /// with this OperandBundleUser.
+ const_bundle_op_iterator bundle_op_info_end() const {
+ auto *NonConstThis =
+ const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+ return NonConstThis->bundle_op_info_end();
+ }
+
+ /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
+ iterator_range<bundle_op_iterator> bundle_op_infos() {
+ return make_range(bundle_op_info_begin(), bundle_op_info_end());
+ }
+
+ /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
+ iterator_range<const_bundle_op_iterator> bundle_op_infos() const {
+ return make_range(bundle_op_info_begin(), bundle_op_info_end());
+ }
+
+ /// \brief Populate the BundleOpInfo instances and the Use& vector from \p
+ /// Bundles. Return the op_iterator pointing to the Use& one past the last
+ /// last bundle operand use.
+ ///
+ /// Each \p OperandBundleDef instance is tracked by a OperandBundleInfo
+ /// instance allocated in this User's descriptor.
+ OpIteratorTy populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
+ const unsigned BeginIndex) {
+ auto It = static_cast<InstrTy *>(this)->op_begin() + BeginIndex;
+ for (auto &B : Bundles)
+ It = std::copy(B.input_begin(), B.input_end(), It);
+
+ auto *ContextImpl = static_cast<InstrTy *>(this)->getContext().pImpl;
+ auto BI = Bundles.begin();
+ unsigned CurrentIndex = BeginIndex;
+
+ for (auto &BOI : bundle_op_infos()) {
+ assert(BI != Bundles.end() && "Incorrect allocation?");
+
+ BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag());
+ BOI.Begin = CurrentIndex;
+ BOI.End = CurrentIndex + BI->input_size();
+ CurrentIndex = BOI.End;
+ BI++;
+ }
+
+ assert(BI == Bundles.end() && "Incorrect allocation?");
+
+ return It;
+ }
+
+ /// \brief Return the BundleOpInfo for the operand at index OpIdx.
+ ///
+ /// It is an error to call this with an OpIdx that does not correspond to an
+ /// bundle operand.
+ const BundleOpInfo &getBundleOpInfoForOperand(unsigned OpIdx) const {
+ for (auto &BOI : bundle_op_infos())
+ if (BOI.Begin <= OpIdx && OpIdx < BOI.End)
+ return BOI;
+
+ llvm_unreachable("Did not find operand bundle for operand!");
+ }
+
+ /// \brief Return the total number of values used in \p Bundles.
+ static unsigned CountBundleInputs(ArrayRef<OperandBundleDef> Bundles) {
+ unsigned Total = 0;
+ for (auto &B : Bundles)
+ Total += B.input_size();
+ return Total;
+ }
+};
+
+} // end llvm namespace
-#endif
+#endif // LLVM_IR_INSTRTYPES_H
diff --git a/contrib/llvm/include/llvm/IR/Instruction.def b/contrib/llvm/include/llvm/IR/Instruction.def
index d46314c..18711ab 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.def
+++ b/contrib/llvm/include/llvm/IR/Instruction.def
@@ -1,21 +1,21 @@
//===-- llvm/Instruction.def - File that describes Instructions -*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file contains descriptions of the various LLVM instructions. This is
-// used as a central place for enumerating the different instructions and
+// used as a central place for enumerating the different instructions and
// should eventually be the place to put comments about the instructions.
//
//===----------------------------------------------------------------------===//
// NOTE: NO INCLUDE GUARD DESIRED!
-// Provide definitions of macros so that users of this file do not have to
+// Provide definitions of macros so that users of this file do not have to
// define everything to use it...
//
#ifndef FIRST_TERM_INST
@@ -74,6 +74,20 @@
#define LAST_CAST_INST(num)
#endif
+#ifndef FIRST_FUNCLETPAD_INST
+#define FIRST_FUNCLETPAD_INST(num)
+#endif
+#ifndef HANDLE_FUNCLETPAD_INST
+#ifndef HANDLE_INST
+#define HANDLE_FUNCLETPAD_INST(num, opcode, Class)
+#else
+#define HANDLE_FUNCLETPAD_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_FUNCLETPAD_INST
+#define LAST_FUNCLETPAD_INST(num)
+#endif
+
#ifndef FIRST_OTHER_INST
#define FIRST_OTHER_INST(num)
#endif
@@ -88,92 +102,99 @@
#define LAST_OTHER_INST(num)
#endif
-
// Terminator Instructions - These instructions are used to terminate a basic
// block of the program. Every basic block must end with one of these
// instructions for it to be a well formed basic block.
//
FIRST_TERM_INST ( 1)
-HANDLE_TERM_INST ( 1, Ret , ReturnInst)
-HANDLE_TERM_INST ( 2, Br , BranchInst)
-HANDLE_TERM_INST ( 3, Switch , SwitchInst)
-HANDLE_TERM_INST ( 4, IndirectBr , IndirectBrInst)
-HANDLE_TERM_INST ( 5, Invoke , InvokeInst)
-HANDLE_TERM_INST ( 6, Resume , ResumeInst)
-HANDLE_TERM_INST ( 7, Unreachable, UnreachableInst)
- LAST_TERM_INST ( 7)
+HANDLE_TERM_INST ( 1, Ret , ReturnInst)
+HANDLE_TERM_INST ( 2, Br , BranchInst)
+HANDLE_TERM_INST ( 3, Switch , SwitchInst)
+HANDLE_TERM_INST ( 4, IndirectBr , IndirectBrInst)
+HANDLE_TERM_INST ( 5, Invoke , InvokeInst)
+HANDLE_TERM_INST ( 6, Resume , ResumeInst)
+HANDLE_TERM_INST ( 7, Unreachable , UnreachableInst)
+HANDLE_TERM_INST ( 8, CleanupRet , CleanupReturnInst)
+HANDLE_TERM_INST ( 9, CatchRet , CatchReturnInst)
+HANDLE_TERM_INST (10, CatchSwitch , CatchSwitchInst)
+ LAST_TERM_INST (10)
// Standard binary operators...
- FIRST_BINARY_INST( 8)
-HANDLE_BINARY_INST( 8, Add , BinaryOperator)
-HANDLE_BINARY_INST( 9, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(10, Sub , BinaryOperator)
-HANDLE_BINARY_INST(11, FSub , BinaryOperator)
-HANDLE_BINARY_INST(12, Mul , BinaryOperator)
-HANDLE_BINARY_INST(13, FMul , BinaryOperator)
-HANDLE_BINARY_INST(14, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(15, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(16, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(17, URem , BinaryOperator)
-HANDLE_BINARY_INST(18, SRem , BinaryOperator)
-HANDLE_BINARY_INST(19, FRem , BinaryOperator)
+ FIRST_BINARY_INST(11)
+HANDLE_BINARY_INST(11, Add , BinaryOperator)
+HANDLE_BINARY_INST(12, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(13, Sub , BinaryOperator)
+HANDLE_BINARY_INST(14, FSub , BinaryOperator)
+HANDLE_BINARY_INST(15, Mul , BinaryOperator)
+HANDLE_BINARY_INST(16, FMul , BinaryOperator)
+HANDLE_BINARY_INST(17, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(18, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(19, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(20, URem , BinaryOperator)
+HANDLE_BINARY_INST(21, SRem , BinaryOperator)
+HANDLE_BINARY_INST(22, FRem , BinaryOperator)
// Logical operators (integer operands)
-HANDLE_BINARY_INST(20, Shl , BinaryOperator) // Shift left (logical)
-HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(23, And , BinaryOperator)
-HANDLE_BINARY_INST(24, Or , BinaryOperator)
-HANDLE_BINARY_INST(25, Xor , BinaryOperator)
- LAST_BINARY_INST(25)
+HANDLE_BINARY_INST(23, Shl , BinaryOperator) // Shift left (logical)
+HANDLE_BINARY_INST(24, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(25, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(26, And , BinaryOperator)
+HANDLE_BINARY_INST(27, Or , BinaryOperator)
+HANDLE_BINARY_INST(28, Xor , BinaryOperator)
+ LAST_BINARY_INST(28)
// Memory operators...
- FIRST_MEMORY_INST(26)
-HANDLE_MEMORY_INST(26, Alloca, AllocaInst) // Stack management
-HANDLE_MEMORY_INST(27, Load , LoadInst ) // Memory manipulation instrs
-HANDLE_MEMORY_INST(28, Store , StoreInst )
-HANDLE_MEMORY_INST(29, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(30, Fence , FenceInst )
-HANDLE_MEMORY_INST(31, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(32, AtomicRMW , AtomicRMWInst )
- LAST_MEMORY_INST(32)
+ FIRST_MEMORY_INST(29)
+HANDLE_MEMORY_INST(29, Alloca, AllocaInst) // Stack management
+HANDLE_MEMORY_INST(30, Load , LoadInst ) // Memory manipulation instrs
+HANDLE_MEMORY_INST(31, Store , StoreInst )
+HANDLE_MEMORY_INST(32, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(33, Fence , FenceInst )
+HANDLE_MEMORY_INST(34, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(35, AtomicRMW , AtomicRMWInst )
+ LAST_MEMORY_INST(35)
// Cast operators ...
-// NOTE: The order matters here because CastInst::isEliminableCastPair
+// NOTE: The order matters here because CastInst::isEliminableCastPair
// NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(33)
-HANDLE_CAST_INST(33, Trunc , TruncInst ) // Truncate integers
-HANDLE_CAST_INST(34, ZExt , ZExtInst ) // Zero extend integers
-HANDLE_CAST_INST(35, SExt , SExtInst ) // Sign extend integers
-HANDLE_CAST_INST(36, FPToUI , FPToUIInst ) // floating point -> UInt
-HANDLE_CAST_INST(37, FPToSI , FPToSIInst ) // floating point -> SInt
-HANDLE_CAST_INST(38, UIToFP , UIToFPInst ) // UInt -> floating point
-HANDLE_CAST_INST(39, SIToFP , SIToFPInst ) // SInt -> floating point
-HANDLE_CAST_INST(40, FPTrunc , FPTruncInst ) // Truncate floating point
-HANDLE_CAST_INST(41, FPExt , FPExtInst ) // Extend floating point
-HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst) // Pointer -> Integer
-HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst) // Integer -> Pointer
-HANDLE_CAST_INST(44, BitCast , BitCastInst ) // Type cast
-HANDLE_CAST_INST(45, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast
- LAST_CAST_INST(45)
+ FIRST_CAST_INST(36)
+HANDLE_CAST_INST(36, Trunc , TruncInst ) // Truncate integers
+HANDLE_CAST_INST(37, ZExt , ZExtInst ) // Zero extend integers
+HANDLE_CAST_INST(38, SExt , SExtInst ) // Sign extend integers
+HANDLE_CAST_INST(39, FPToUI , FPToUIInst ) // floating point -> UInt
+HANDLE_CAST_INST(40, FPToSI , FPToSIInst ) // floating point -> SInt
+HANDLE_CAST_INST(41, UIToFP , UIToFPInst ) // UInt -> floating point
+HANDLE_CAST_INST(42, SIToFP , SIToFPInst ) // SInt -> floating point
+HANDLE_CAST_INST(43, FPTrunc , FPTruncInst ) // Truncate floating point
+HANDLE_CAST_INST(44, FPExt , FPExtInst ) // Extend floating point
+HANDLE_CAST_INST(45, PtrToInt, PtrToIntInst) // Pointer -> Integer
+HANDLE_CAST_INST(46, IntToPtr, IntToPtrInst) // Integer -> Pointer
+HANDLE_CAST_INST(47, BitCast , BitCastInst ) // Type cast
+HANDLE_CAST_INST(48, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast
+ LAST_CAST_INST(48)
+
+ FIRST_FUNCLETPAD_INST(49)
+HANDLE_FUNCLETPAD_INST(49, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(50, CatchPad , CatchPadInst)
+ LAST_FUNCLETPAD_INST(50)
// Other operators...
- FIRST_OTHER_INST(46)
-HANDLE_OTHER_INST(46, ICmp , ICmpInst ) // Integer comparison instruction
-HANDLE_OTHER_INST(47, FCmp , FCmpInst ) // Floating point comparison instr.
-HANDLE_OTHER_INST(48, PHI , PHINode ) // PHI node instruction
-HANDLE_OTHER_INST(49, Call , CallInst ) // Call a function
-HANDLE_OTHER_INST(50, Select , SelectInst ) // select instruction
-HANDLE_OTHER_INST(51, UserOp1, Instruction) // May be used internally in a pass
-HANDLE_OTHER_INST(52, UserOp2, Instruction) // Internal to passes only
-HANDLE_OTHER_INST(53, VAArg , VAArgInst ) // vaarg instruction
-HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(55, InsertElement, InsertElementInst) // insert into vector
-HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
-HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(58, InsertValue, InsertValueInst) // insert into aggregate
-HANDLE_OTHER_INST(59, LandingPad, LandingPadInst) // Landing pad instruction.
- LAST_OTHER_INST(59)
+ FIRST_OTHER_INST(51)
+HANDLE_OTHER_INST(51, ICmp , ICmpInst ) // Integer comparison instruction
+HANDLE_OTHER_INST(52, FCmp , FCmpInst ) // Floating point comparison instr.
+HANDLE_OTHER_INST(53, PHI , PHINode ) // PHI node instruction
+HANDLE_OTHER_INST(54, Call , CallInst ) // Call a function
+HANDLE_OTHER_INST(55, Select , SelectInst ) // select instruction
+HANDLE_OTHER_INST(56, UserOp1, Instruction) // May be used internally in a pass
+HANDLE_OTHER_INST(57, UserOp2, Instruction) // Internal to passes only
+HANDLE_OTHER_INST(58, VAArg , VAArgInst ) // vaarg instruction
+HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(60, InsertElement, InsertElementInst) // insert into vector
+HANDLE_OTHER_INST(61, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
+HANDLE_OTHER_INST(62, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(63, InsertValue, InsertValueInst) // insert into aggregate
+HANDLE_OTHER_INST(64, LandingPad, LandingPadInst) // Landing pad instruction.
+ LAST_OTHER_INST(64)
#undef FIRST_TERM_INST
#undef HANDLE_TERM_INST
@@ -191,6 +212,10 @@ HANDLE_OTHER_INST(59, LandingPad, LandingPadInst) // Landing pad instruction.
#undef HANDLE_CAST_INST
#undef LAST_CAST_INST
+#undef FIRST_FUNCLETPAD_INST
+#undef HANDLE_FUNCLETPAD_INST
+#undef LAST_FUNCLETPAD_INST
+
#undef FIRST_OTHER_INST
#undef HANDLE_OTHER_INST
#undef LAST_OTHER_INST
diff --git a/contrib/llvm/include/llvm/IR/Instruction.h b/contrib/llvm/include/llvm/IR/Instruction.h
index 31f363f..03c4549 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm/include/llvm/IR/Instruction.h
@@ -30,25 +30,11 @@ class BasicBlock;
struct AAMDNodes;
template <>
-struct ilist_traits<Instruction>
- : public SymbolTableListTraits<Instruction, BasicBlock> {
+struct SymbolTableListSentinelTraits<Instruction>
+ : public ilist_half_embedded_sentinel_traits<Instruction> {};
- /// \brief Return a node that marks the end of a list.
- ///
- /// The sentinel is relative to this instance, so we use a non-static
- /// method.
- Instruction *createSentinel() const;
- static void destroySentinel(Instruction *) {}
-
- Instruction *provideInitialHead() const { return createSentinel(); }
- Instruction *ensureHead(Instruction *) const { return createSentinel(); }
- static void noteHead(Instruction *, Instruction *) {}
-
-private:
- mutable ilist_half_node<Instruction> Sentinel;
-};
-
-class Instruction : public User, public ilist_node<Instruction> {
+class Instruction : public User,
+ public ilist_node_with_parent<Instruction, BasicBlock> {
void operator=(const Instruction &) = delete;
Instruction(const Instruction &) = delete;
@@ -80,6 +66,13 @@ public:
const Module *getModule() const;
Module *getModule();
+ /// \brief Return the function this instruction belongs to.
+ ///
+ /// Note: it is undefined behavior to call this on an instruction not
+ /// currently inserted into a function.
+ const Function *getFunction() const;
+ Function *getFunction();
+
/// removeFromParent - This method unlinks 'this' from the containing basic
/// block, but does not delete it.
///
@@ -89,7 +82,7 @@ public:
/// block and deletes it.
///
/// \returns an iterator pointing to the element after the erased one
- iplist<Instruction>::iterator eraseFromParent();
+ SymbolTableList<Instruction>::iterator eraseFromParent();
/// Insert an unlinked instruction into a basic block immediately before
/// the specified instruction.
@@ -116,6 +109,7 @@ public:
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
bool isShift() { return isShift(getOpcode()); }
bool isCast() const { return isCast(getOpcode()); }
+ bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
static const char* getOpcodeName(unsigned OpCode);
@@ -148,6 +142,11 @@ public:
return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
}
+ /// @brief Determine if the OpCode is one of the FuncletPadInst instructions.
+ static inline bool isFuncletPad(unsigned OpCode) {
+ return OpCode >= FuncletPadOpsBegin && OpCode < FuncletPadOpsEnd;
+ }
+
//===--------------------------------------------------------------------===//
// Metadata manipulation.
//===--------------------------------------------------------------------===//
@@ -204,20 +203,22 @@ public:
void setMetadata(unsigned KindID, MDNode *Node);
void setMetadata(StringRef Kind, MDNode *Node);
- /// \brief Drop unknown metadata.
+ /// Drop all unknown metadata except for debug locations.
+ /// @{
/// Passes are required to drop metadata they don't understand. This is a
/// convenience method for passes to do so.
- void dropUnknownMetadata(ArrayRef<unsigned> KnownIDs);
- void dropUnknownMetadata() {
- return dropUnknownMetadata(None);
+ void dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs);
+ void dropUnknownNonDebugMetadata() {
+ return dropUnknownNonDebugMetadata(None);
}
- void dropUnknownMetadata(unsigned ID1) {
- return dropUnknownMetadata(makeArrayRef(ID1));
+ void dropUnknownNonDebugMetadata(unsigned ID1) {
+ return dropUnknownNonDebugMetadata(makeArrayRef(ID1));
}
- void dropUnknownMetadata(unsigned ID1, unsigned ID2) {
+ void dropUnknownNonDebugMetadata(unsigned ID1, unsigned ID2) {
unsigned IDs[] = {ID1, ID2};
- return dropUnknownMetadata(IDs);
+ return dropUnknownNonDebugMetadata(IDs);
}
+ /// @}
/// setAAMetadata - Sets the metadata on this instruction from the
/// AAMDNodes structure.
@@ -388,6 +389,19 @@ public:
return mayWriteToMemory() || mayThrow() || !mayReturn();
}
+ /// \brief Return true if the instruction is a variety of EH-block.
+ bool isEHPad() const {
+ switch (getOpcode()) {
+ case Instruction::CatchSwitch:
+ case Instruction::CatchPad:
+ case Instruction::CleanupPad:
+ case Instruction::LandingPad:
+ return true;
+ default:
+ return false;
+ }
+ }
+
/// clone() - Create a copy of 'this' instruction that is identical in all
/// ways except the following:
/// * The instruction has no parent
@@ -468,6 +482,13 @@ public:
#include "llvm/IR/Instruction.def"
};
+ enum FuncletPadOps {
+#define FIRST_FUNCLETPAD_INST(N) FuncletPadOpsBegin = N,
+#define HANDLE_FUNCLETPAD_INST(N, OPC, CLASS) OPC = N,
+#define LAST_FUNCLETPAD_INST(N) FuncletPadOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+ };
+
enum OtherOps {
#define FIRST_OTHER_INST(N) OtherOpsBegin = N,
#define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N,
@@ -489,7 +510,7 @@ private:
(V ? HasMetadataBit : 0));
}
- friend class SymbolTableListTraits<Instruction, BasicBlock>;
+ friend class SymbolTableListTraits<Instruction>;
void setParent(BasicBlock *P);
protected:
// Instruction subclasses can stick up to 15 bits of stuff into the
@@ -515,17 +536,6 @@ private:
Instruction *cloneImpl() const;
};
-inline Instruction *ilist_traits<Instruction>::createSentinel() const {
- // Since i(p)lists always publicly derive from their corresponding traits,
- // placing a data member in this class will augment the i(p)list. But since
- // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
- // there is a legal viable downcast from it to NodeTy. We use this trick to
- // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
- // sentinel. Dereferencing the sentinel is forbidden (save the
- // ilist_node<NodeTy>), so no one will ever notice the superposition.
- return static_cast<Instruction *>(&Sentinel);
-}
-
// Instruction* is only 4-byte aligned.
template<>
class PointerLikeTypeTraits<Instruction*> {
diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h
index 07d5f11..d781c7a 100644
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
@@ -158,6 +159,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -166,7 +168,6 @@ private:
}
};
-
//===----------------------------------------------------------------------===//
// LoadInst Class
//===----------------------------------------------------------------------===//
@@ -176,6 +177,7 @@ private:
///
class LoadInst : public UnaryInstruction {
void AssertOK();
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -290,7 +292,6 @@ public:
return getPointerOperand()->getType()->getPointerAddressSpace();
}
-
// Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Load;
@@ -298,6 +299,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -306,7 +308,6 @@ private:
}
};
-
//===----------------------------------------------------------------------===//
// StoreInst Class
//===----------------------------------------------------------------------===//
@@ -316,6 +317,7 @@ private:
class StoreInst : public Instruction {
void *operator new(size_t, unsigned) = delete;
void AssertOK();
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -344,7 +346,6 @@ public:
SynchronizationScope SynchScope,
BasicBlock *InsertAtEnd);
-
/// isVolatile - Return true if this is a store to a volatile memory
/// location.
///
@@ -422,6 +423,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -445,6 +447,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
class FenceInst : public Instruction {
void *operator new(size_t, unsigned) = delete;
void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -496,6 +499,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -517,6 +521,7 @@ class AtomicCmpXchgInst : public Instruction {
void Init(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -648,6 +653,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -673,6 +679,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
///
class AtomicRMWInst : public Instruction {
void *operator new(size_t, unsigned) = delete;
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -795,6 +802,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
void Init(BinOp Operation, Value *Ptr, Value *Val,
AtomicOrdering Ordering, SynchronizationScope SynchScope);
@@ -831,6 +839,8 @@ class GetElementPtrInst : public Instruction {
Type *SourceElementType;
Type *ResultElementType;
+ void anchor() override;
+
GetElementPtrInst(const GetElementPtrInst &GEPI);
void init(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr);
@@ -1078,10 +1088,8 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
init(Ptr, IdxList, NameStr);
}
-
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
-
//===----------------------------------------------------------------------===//
// ICmpInst Class
//===----------------------------------------------------------------------===//
@@ -1091,6 +1099,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
/// must be identical types.
/// \brief Represent an integer comparison operator.
class ICmpInst: public CmpInst {
+ void anchor() override;
+
void AssertOK() {
assert(getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
getPredicate() <= CmpInst::LAST_ICMP_PREDICATE &&
@@ -1226,7 +1236,6 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
-
};
//===----------------------------------------------------------------------===//
@@ -1350,62 +1359,102 @@ public:
/// field to indicate whether or not this is a tail call. The rest of the bits
/// hold the calling convention of the call.
///
-class CallInst : public Instruction {
+class CallInst : public Instruction,
+ public OperandBundleUser<CallInst, User::op_iterator> {
AttributeSet AttributeList; ///< parameter attributes for call
FunctionType *FTy;
CallInst(const CallInst &CI);
- void init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+ void init(Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
init(cast<FunctionType>(
cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, NameStr);
+ Func, Args, Bundles, NameStr);
}
void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr);
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
void init(Value *Func, const Twine &NameStr);
/// Construct a CallInst given a range of arguments.
/// \brief Construct a CallInst from a range of arguments
inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr, Instruction *InsertBefore);
- inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+ Instruction *InsertBefore);
+ inline CallInst(Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
Instruction *InsertBefore)
: CallInst(cast<FunctionType>(
cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, NameStr, InsertBefore) {}
+ Func, Args, Bundles, NameStr, InsertBefore) {}
+
+ inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
+ Instruction *InsertBefore)
+ : CallInst(Func, Args, None, NameStr, InsertBefore) {}
/// Construct a CallInst given a range of arguments.
/// \brief Construct a CallInst from a range of arguments
inline CallInst(Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr, BasicBlock *InsertAtEnd);
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
explicit CallInst(Value *F, const Twine &NameStr,
Instruction *InsertBefore);
CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+ friend class OperandBundleUser<CallInst, User::op_iterator>;
+ bool hasDescriptor() const { return HasDescriptor; }
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
CallInst *cloneImpl() const;
public:
- static CallInst *Create(Value *Func,
- ArrayRef<Value *> Args,
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
return Create(cast<FunctionType>(
cast<PointerType>(Func->getType())->getElementType()),
- Func, Args, NameStr, InsertBefore);
+ Func, Args, Bundles, NameStr, InsertBefore);
+ }
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr,
+ Instruction *InsertBefore = nullptr) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, Args, None, NameStr, InsertBefore);
}
static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr = "",
+ const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
return new (unsigned(Args.size() + 1))
- CallInst(Ty, Func, Args, NameStr, InsertBefore);
+ CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
}
- static CallInst *Create(Value *Func,
- ArrayRef<Value *> Args,
+ static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ const unsigned TotalOps =
+ unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+ const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (TotalOps, DescriptorBytes)
+ CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
+ }
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- return new(unsigned(Args.size() + 1))
- CallInst(Func, Args, NameStr, InsertAtEnd);
+ const unsigned TotalOps =
+ unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+ const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (TotalOps, DescriptorBytes)
+ CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
+ }
+ static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return new (unsigned(Args.size() + 1))
+ CallInst(Func, Args, None, NameStr, InsertAtEnd);
}
static CallInst *Create(Value *F, const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
@@ -1415,6 +1464,16 @@ public:
BasicBlock *InsertAtEnd) {
return new(1) CallInst(F, NameStr, InsertAtEnd);
}
+
+ /// \brief Create a clone of \p CI with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned call instruction is identical \p CI in every way except that
+ /// the operand bundles for the new instruction are set to the operand bundles
+ /// in \p Bundles.
+ static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
+ Instruction *InsertPt = nullptr);
+
/// CreateMalloc - Generate the IR for a call to malloc:
/// 1. Compute the malloc call's argument as the specified type's size,
/// possibly multiplied by the array size if the array size is not
@@ -1445,16 +1504,21 @@ public:
}
// Note that 'musttail' implies 'tail'.
- enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2 };
+ enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2,
+ TCK_NoTail = 3 };
TailCallKind getTailCallKind() const {
return TailCallKind(getSubclassDataFromInstruction() & 3);
}
bool isTailCall() const {
- return (getSubclassDataFromInstruction() & 3) != TCK_None;
+ unsigned Kind = getSubclassDataFromInstruction() & 3;
+ return Kind == TCK_Tail || Kind == TCK_MustTail;
}
bool isMustTailCall() const {
return (getSubclassDataFromInstruction() & 3) == TCK_MustTail;
}
+ bool isNoTailCall() const {
+ return (getSubclassDataFromInstruction() & 3) == TCK_NoTail;
+ }
void setTailCall(bool isTC = true) {
setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
unsigned(isTC ? TCK_Tail : TCK_None));
@@ -1469,28 +1533,58 @@ public:
/// getNumArgOperands - Return the number of call arguments.
///
- unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+ unsigned getNumArgOperands() const {
+ return getNumOperands() - getNumTotalBundleOperands() - 1;
+ }
/// getArgOperand/setArgOperand - Return/set the i-th call argument.
///
- Value *getArgOperand(unsigned i) const { return getOperand(i); }
- void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+ Value *getArgOperand(unsigned i) const {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperand(i);
+ }
+ void setArgOperand(unsigned i, Value *v) {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ setOperand(i, v);
+ }
+
+ /// \brief Return the iterator pointing to the beginning of the argument list.
+ op_iterator arg_begin() { return op_begin(); }
- /// arg_operands - iteration adapter for range-for loops.
+ /// \brief Return the iterator pointing to the end of the argument list.
+ op_iterator arg_end() {
+ // [ call args ], [ operand bundles ], callee
+ return op_end() - getNumTotalBundleOperands() - 1;
+ };
+
+ /// \brief Iteration adapter for range-for loops.
iterator_range<op_iterator> arg_operands() {
- // The last operand in the op list is the callee - it's not one of the args
- // so we don't want to iterate over it.
- return iterator_range<op_iterator>(op_begin(), op_end() - 1);
+ return make_range(arg_begin(), arg_end());
}
- /// arg_operands - iteration adapter for range-for loops.
+ /// \brief Return the iterator pointing to the beginning of the argument list.
+ const_op_iterator arg_begin() const { return op_begin(); }
+
+ /// \brief Return the iterator pointing to the end of the argument list.
+ const_op_iterator arg_end() const {
+ // [ call args ], [ operand bundles ], callee
+ return op_end() - getNumTotalBundleOperands() - 1;
+ };
+
+ /// \brief Iteration adapter for range-for loops.
iterator_range<const_op_iterator> arg_operands() const {
- return iterator_range<const_op_iterator>(op_begin(), op_end() - 1);
+ return make_range(arg_begin(), arg_end());
}
/// \brief Wrappers for getting the \c Use of a call argument.
- const Use &getArgOperandUse(unsigned i) const { return getOperandUse(i); }
- Use &getArgOperandUse(unsigned i) { return getOperandUse(i); }
+ const Use &getArgOperandUse(unsigned i) const {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperandUse(i);
+ }
+ Use &getArgOperandUse(unsigned i) {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperandUse(i);
+ }
/// getCallingConv/setCallingConv - Get or set the calling convention of this
/// function call.
@@ -1498,8 +1592,10 @@ public:
return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 2);
}
void setCallingConv(CallingConv::ID CC) {
+ auto ID = static_cast<unsigned>(CC);
+ assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
- (static_cast<unsigned>(CC) << 2));
+ (ID << 2));
}
/// getAttributes - Return the parameter attributes for this call.
@@ -1541,6 +1637,21 @@ public:
/// \brief Determine whether the call or the callee has the given attributes.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
+ /// \brief Return true if the data operand at index \p i has the attribute \p
+ /// A.
+ ///
+ /// Data operands include call arguments and values used in operand bundles,
+ /// but does not include the callee operand. This routine dispatches to the
+ /// underlying AttributeList or the OperandBundleUser as appropriate.
+ ///
+ /// The index \p i is interpreted as
+ ///
+ /// \p i == Attribute::ReturnIndex -> the return value
+ /// \p i in [1, arg_size + 1) -> argument number (\p i - 1)
+ /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
+ /// (\p i - 1) in the operand list.
+ bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const;
+
/// \brief Extract the alignment for a call or parameter (0=unknown).
unsigned getParamAlignment(unsigned i) const {
return AttributeList.getParamAlignment(i);
@@ -1557,7 +1668,14 @@ public:
uint64_t getDereferenceableOrNullBytes(unsigned i) const {
return AttributeList.getDereferenceableOrNullBytes(i);
}
-
+
+ /// @brief Determine if the parameter or return value is marked with NoAlias
+ /// attribute.
+ /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+ bool doesNotAlias(unsigned n) const {
+ return AttributeList.hasAttribute(n, Attribute::NoAlias);
+ }
+
/// \brief Return true if the call should not be treated as a call to a
/// builtin.
bool isNoBuiltin() const {
@@ -1622,9 +1740,18 @@ public:
addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate);
}
+ /// \brief Determine if the call is convergent
+ bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
+ void setConvergent() {
+ addAttribute(AttributeSet::FunctionIndex, Attribute::Convergent);
+ }
+
/// \brief Determine if the call returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
+ if (getNumArgOperands() == 0)
+ return false;
+
// Be friendly and also check the callee.
return paramHasAttr(1, Attribute::StructRet);
}
@@ -1671,12 +1798,17 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
-private:
- template<typename AttrKind>
- bool hasFnAttrImpl(AttrKind A) const {
+private:
+ template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
return true;
+
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the call instruction.
+ if (isFnAttrDisallowedByOpBundle(A))
+ return false;
+
if (const Function *F = getCalledFunction())
return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
return false;
@@ -1694,24 +1826,28 @@ struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
};
CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr, BasicBlock *InsertAtEnd)
- : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
- ->getElementType())->getReturnType(),
- Instruction::Call,
- OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
- unsigned(Args.size() + 1), InsertAtEnd) {
- init(Func, Args, NameStr);
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+ BasicBlock *InsertAtEnd)
+ : Instruction(
+ cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Call, OperandTraits<CallInst>::op_end(this) -
+ (Args.size() + CountBundleInputs(Bundles) + 1),
+ unsigned(Args.size() + CountBundleInputs(Bundles) + 1), InsertAtEnd) {
+ init(Func, Args, Bundles, NameStr);
}
CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr, Instruction *InsertBefore)
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+ Instruction *InsertBefore)
: Instruction(Ty->getReturnType(), Instruction::Call,
- OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
- unsigned(Args.size() + 1), InsertBefore) {
- init(Ty, Func, Args, NameStr);
+ OperandTraits<CallInst>::op_end(this) -
+ (Args.size() + CountBundleInputs(Bundles) + 1),
+ unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+ InsertBefore) {
+ init(Ty, Func, Args, Bundles, NameStr);
}
-
// Note: if you get compile errors about private methods then
// please update your code to use the high-level operand
// interfaces. See line 943 above.
@@ -1745,6 +1881,7 @@ class SelectInst : public Instruction {
init(C, S1, S2);
setName(NameStr);
}
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -1845,6 +1982,7 @@ class ExtractElementInst : public Instruction {
Instruction *InsertBefore = nullptr);
ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -1875,7 +2013,6 @@ public:
return cast<VectorType>(getVectorOperand()->getType());
}
-
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -1906,8 +2043,9 @@ class InsertElementInst : public Instruction {
InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr);
- InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
- const Twine &NameStr, BasicBlock *InsertAtEnd);
+ InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -2020,7 +2158,6 @@ public:
return Mask;
}
-
// Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ShuffleVector;
@@ -2063,9 +2200,8 @@ class ExtractValueInst : public UnaryInstruction {
const Twine &NameStr, BasicBlock *InsertAtEnd);
// allocate space for exactly one operand
- void *operator new(size_t s) {
- return User::operator new(s, 1);
- }
+ void *operator new(size_t s) { return User::operator new(s, 1); }
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -2096,7 +2232,7 @@ public:
inline idx_iterator idx_begin() const { return Indices.begin(); }
inline idx_iterator idx_end() const { return Indices.end(); }
inline iterator_range<idx_iterator> indices() const {
- return iterator_range<idx_iterator>(idx_begin(), idx_end());
+ return make_range(idx_begin(), idx_end());
}
Value *getAggregateOperand() {
@@ -2147,7 +2283,6 @@ ExtractValueInst::ExtractValueInst(Value *Agg,
init(Idxs, NameStr);
}
-
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
@@ -2177,11 +2312,12 @@ class InsertValueInst : public Instruction {
/// Constructors - These two constructors are convenience methods because one
/// and two index insertvalue instructions are so common.
- InsertValueInst(Value *Agg, Value *Val,
- unsigned Idx, const Twine &NameStr = "",
- Instruction *InsertBefore = nullptr);
InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
- const Twine &NameStr, BasicBlock *InsertAtEnd);
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr);
+ InsertValueInst(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -2213,7 +2349,7 @@ public:
inline idx_iterator idx_begin() const { return Indices.begin(); }
inline idx_iterator idx_end() const { return Indices.end(); }
inline iterator_range<idx_iterator> indices() const {
- return iterator_range<idx_iterator>(idx_begin(), idx_end());
+ return make_range(idx_begin(), idx_end());
}
Value *getAggregateOperand() {
@@ -2294,6 +2430,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
// scientist's overactive imagination.
//
class PHINode : public Instruction {
+ void anchor() override;
+
void *operator new(size_t, unsigned) = delete;
/// ReservedSpace - The number of operands actually allocated. NumOperands is
/// the number actually in use.
@@ -2319,6 +2457,7 @@ class PHINode : public Instruction {
setName(NameStr);
allocHungoffUses(ReservedSpace);
}
+
protected:
// allocHungoffUses - this is more complicated than the generic
// User::allocHungoffUses, because we have to allocate Uses for the incoming
@@ -2387,6 +2526,9 @@ public:
return getOperand(i);
}
void setIncomingValue(unsigned i, Value *V) {
+ assert(V && "PHI node got a null value!");
+ assert(getType() == V->getType() &&
+ "All operands to PHI node must be the same type as the PHI node!");
setOperand(i, V);
}
static unsigned getOperandNumForIncomingValue(unsigned i) {
@@ -2418,16 +2560,13 @@ public:
}
void setIncomingBlock(unsigned i, BasicBlock *BB) {
+ assert(BB && "PHI node got a null basic block!");
block_begin()[i] = BB;
}
/// addIncoming - Add an incoming value to the end of the PHI list
///
void addIncoming(Value *V, BasicBlock *BB) {
- assert(V && "PHI node got a null value!");
- assert(BB && "PHI node got a null basic block!");
- assert(getType() == V->getType() &&
- "All operands to PHI node must be the same type as the PHI node!");
if (getNumOperands() == ReservedSpace)
growOperands(); // Get more space!
// Initialize some new operands.
@@ -2479,7 +2618,8 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
- private:
+
+private:
void growOperands();
};
@@ -2506,8 +2646,10 @@ class LandingPadInst : public Instruction {
/// the number actually in use.
unsigned ReservedSpace;
LandingPadInst(const LandingPadInst &LP);
+
public:
enum ClauseType { Catch, Filter };
+
private:
void *operator new(size_t, unsigned) = delete;
// Allocate space for exactly zero operands.
@@ -2618,6 +2760,7 @@ private:
Instruction *InsertBefore = nullptr);
ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -2654,7 +2797,8 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
- private:
+
+private:
BasicBlock *getSuccessorV(unsigned idx) const override;
unsigned getNumSuccessorsV() const override;
void setSuccessorV(unsigned idx, BasicBlock *B) override;
@@ -2693,6 +2837,7 @@ class BranchInst : public TerminatorInst {
BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd);
BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -2740,7 +2885,7 @@ public:
void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
assert(idx < getNumSuccessors() && "Successor # out of range for Branch!");
- *(&Op<-1>() - idx) = (Value*)NewSucc;
+ *(&Op<-1>() - idx) = NewSucc;
}
/// \brief Swap the successors of this branch instruction.
@@ -2757,6 +2902,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
BasicBlock *getSuccessorV(unsigned idx) const override;
unsigned getNumSuccessorsV() const override;
@@ -2803,25 +2949,23 @@ class SwitchInst : public TerminatorInst {
/// constructor also autoinserts at the end of the specified BasicBlock.
SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
SwitchInst *cloneImpl() const;
public:
-
// -2
static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
template <class SwitchInstTy, class ConstantIntTy, class BasicBlockTy>
class CaseIteratorT {
protected:
-
SwitchInstTy *SI;
unsigned Index;
public:
-
typedef CaseIteratorT<SwitchInstTy, ConstantIntTy, BasicBlockTy> Self;
/// Initializes case iterator for given SwitchInst and for given
@@ -2912,8 +3056,7 @@ public:
typedef CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> ParentTy;
public:
-
- CaseIt(const ParentTy& Src) : ParentTy(Src) {}
+ CaseIt(const ParentTy &Src) : ParentTy(Src) {}
CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {}
/// Sets the new value for current case.
@@ -2983,12 +3126,12 @@ public:
/// cases - iteration adapter for range-for loops.
iterator_range<CaseIt> cases() {
- return iterator_range<CaseIt>(case_begin(), case_end());
+ return make_range(case_begin(), case_end());
}
/// cases - iteration adapter for range-for loops.
iterator_range<ConstCaseIt> cases() const {
- return iterator_range<ConstCaseIt>(case_begin(), case_end());
+ return make_range(case_begin(), case_end());
}
/// Returns an iterator that points to the default case.
@@ -3056,7 +3199,7 @@ public:
}
void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
assert(idx < getNumSuccessors() && "Successor # out of range for switch!");
- setOperand(idx*2+1, (Value*)NewSucc);
+ setOperand(idx * 2 + 1, NewSucc);
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -3066,6 +3209,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
BasicBlock *getSuccessorV(unsigned idx) const override;
unsigned getNumSuccessorsV() const override;
@@ -3078,7 +3222,6 @@ struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
-
//===----------------------------------------------------------------------===//
// IndirectBrInst Class
//===----------------------------------------------------------------------===//
@@ -3111,6 +3254,7 @@ class IndirectBrInst : public TerminatorInst {
/// here to make memory allocation more efficient. This constructor also
/// autoinserts at the end of the specified BasicBlock.
IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -3134,7 +3278,6 @@ public:
const Value *getAddress() const { return getOperand(0); }
void setAddress(Value *V) { setOperand(0, V); }
-
/// getNumDestinations - return the number of possible destinations in this
/// indirectbr instruction.
unsigned getNumDestinations() const { return getNumOperands()-1; }
@@ -3156,7 +3299,7 @@ public:
return cast<BasicBlock>(getOperand(i+1));
}
void setSuccessor(unsigned i, BasicBlock *NewSucc) {
- setOperand(i+1, (Value*)NewSucc);
+ setOperand(i + 1, NewSucc);
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -3166,6 +3309,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
BasicBlock *getSuccessorV(unsigned idx) const override;
unsigned getNumSuccessorsV() const override;
@@ -3178,7 +3322,6 @@ struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
-
//===----------------------------------------------------------------------===//
// InvokeInst Class
//===----------------------------------------------------------------------===//
@@ -3186,72 +3329,123 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
/// InvokeInst - Invoke instruction. The SubclassData field is used to hold the
/// calling convention of the call.
///
-class InvokeInst : public TerminatorInst {
+class InvokeInst : public TerminatorInst,
+ public OperandBundleUser<InvokeInst, User::op_iterator> {
AttributeSet AttributeList;
FunctionType *FTy;
InvokeInst(const InvokeInst &BI);
void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, const Twine &NameStr) {
+ ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr) {
init(cast<FunctionType>(
cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, NameStr);
+ Func, IfNormal, IfException, Args, Bundles, NameStr);
}
void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- const Twine &NameStr);
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
/// Construct an InvokeInst given a range of arguments.
///
/// \brief Construct an InvokeInst from a range of arguments
inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, unsigned Values,
- const Twine &NameStr, Instruction *InsertBefore)
+ ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+ unsigned Values, const Twine &NameStr,
+ Instruction *InsertBefore)
: InvokeInst(cast<FunctionType>(
cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, Values, NameStr,
+ Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
InsertBefore) {}
inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- unsigned Values, const Twine &NameStr,
- Instruction *InsertBefore);
+ ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+ const Twine &NameStr, Instruction *InsertBefore);
/// Construct an InvokeInst given a range of arguments.
///
/// \brief Construct an InvokeInst from a range of arguments
inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, unsigned Values,
- const Twine &NameStr, BasicBlock *InsertAtEnd);
+ ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+ unsigned Values, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
+
+ friend class OperandBundleUser<InvokeInst, User::op_iterator>;
+ bool hasDescriptor() const { return HasDescriptor; }
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
InvokeInst *cloneImpl() const;
public:
- static InvokeInst *Create(Value *Func,
- BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, const Twine &NameStr = "",
+ static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
return Create(cast<FunctionType>(
cast<PointerType>(Func->getType())->getElementType()),
- Func, IfNormal, IfException, Args, NameStr, InsertBefore);
+ Func, IfNormal, IfException, Args, None, NameStr,
+ InsertBefore);
}
- static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
+ return Create(cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType()),
+ Func, IfNormal, IfException, Args, Bundles, NameStr,
+ InsertBefore);
+ }
+ static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ const Twine &NameStr,
+ Instruction *InsertBefore = nullptr) {
unsigned Values = unsigned(Args.size()) + 3;
- return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args,
+ return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, None,
Values, NameStr, InsertBefore);
}
+ static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
+ unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (Values, DescriptorBytes)
+ InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, Values,
+ NameStr, InsertBefore);
+ }
static InvokeInst *Create(Value *Func,
BasicBlock *IfNormal, BasicBlock *IfException,
ArrayRef<Value *> Args, const Twine &NameStr,
BasicBlock *InsertAtEnd) {
unsigned Values = unsigned(Args.size()) + 3;
- return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
- Values, NameStr, InsertAtEnd);
+ return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None,
+ Values, NameStr, InsertAtEnd);
+ }
+ static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
+ unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (Values, DescriptorBytes)
+ InvokeInst(Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
+ InsertAtEnd);
}
+ /// \brief Create a clone of \p II with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned invoke instruction is identical to \p II in every way except
+ /// that the operand bundles for the new instruction are set to the operand
+ /// bundles in \p Bundles.
+ static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles,
+ Instruction *InsertPt = nullptr);
+
/// Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -3264,26 +3458,58 @@ public:
/// getNumArgOperands - Return the number of invoke arguments.
///
- unsigned getNumArgOperands() const { return getNumOperands() - 3; }
+ unsigned getNumArgOperands() const {
+ return getNumOperands() - getNumTotalBundleOperands() - 3;
+ }
/// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
///
- Value *getArgOperand(unsigned i) const { return getOperand(i); }
- void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+ Value *getArgOperand(unsigned i) const {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperand(i);
+ }
+ void setArgOperand(unsigned i, Value *v) {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ setOperand(i, v);
+ }
+
+ /// \brief Return the iterator pointing to the beginning of the argument list.
+ op_iterator arg_begin() { return op_begin(); }
- /// arg_operands - iteration adapter for range-for loops.
+ /// \brief Return the iterator pointing to the end of the argument list.
+ op_iterator arg_end() {
+ // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
+ return op_end() - getNumTotalBundleOperands() - 3;
+ };
+
+ /// \brief Iteration adapter for range-for loops.
iterator_range<op_iterator> arg_operands() {
- return iterator_range<op_iterator>(op_begin(), op_end() - 3);
+ return make_range(arg_begin(), arg_end());
}
- /// arg_operands - iteration adapter for range-for loops.
+ /// \brief Return the iterator pointing to the beginning of the argument list.
+ const_op_iterator arg_begin() const { return op_begin(); }
+
+ /// \brief Return the iterator pointing to the end of the argument list.
+ const_op_iterator arg_end() const {
+ // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
+ return op_end() - getNumTotalBundleOperands() - 3;
+ };
+
+ /// \brief Iteration adapter for range-for loops.
iterator_range<const_op_iterator> arg_operands() const {
- return iterator_range<const_op_iterator>(op_begin(), op_end() - 3);
+ return make_range(arg_begin(), arg_end());
}
/// \brief Wrappers for getting the \c Use of a invoke argument.
- const Use &getArgOperandUse(unsigned i) const { return getOperandUse(i); }
- Use &getArgOperandUse(unsigned i) { return getOperandUse(i); }
+ const Use &getArgOperandUse(unsigned i) const {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperandUse(i);
+ }
+ Use &getArgOperandUse(unsigned i) {
+ assert(i < getNumArgOperands() && "Out of bounds!");
+ return getOperandUse(i);
+ }
/// getCallingConv/setCallingConv - Get or set the calling convention of this
/// function call.
@@ -3291,7 +3517,9 @@ public:
return static_cast<CallingConv::ID>(getSubclassDataFromInstruction());
}
void setCallingConv(CallingConv::ID CC) {
- setInstructionSubclassData(static_cast<unsigned>(CC));
+ auto ID = static_cast<unsigned>(CC);
+ assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
+ setInstructionSubclassData(ID);
}
/// getAttributes - Return the parameter attributes for this invoke.
@@ -3325,6 +3553,22 @@ public:
/// \brief Determine whether the call or the callee has the given attributes.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
+ /// \brief Return true if the data operand at index \p i has the attribute \p
+ /// A.
+ ///
+ /// Data operands include invoke arguments and values used in operand bundles,
+ /// but does not include the invokee operand, or the two successor blocks.
+ /// This routine dispatches to the underlying AttributeList or the
+ /// OperandBundleUser as appropriate.
+ ///
+ /// The index \p i is interpreted as
+ ///
+ /// \p i == Attribute::ReturnIndex -> the return value
+ /// \p i in [1, arg_size + 1) -> argument number (\p i - 1)
+ /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
+ /// (\p i - 1) in the operand list.
+ bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const;
+
/// \brief Extract the alignment for a call or parameter (0=unknown).
unsigned getParamAlignment(unsigned i) const {
return AttributeList.getParamAlignment(i);
@@ -3335,13 +3579,20 @@ public:
uint64_t getDereferenceableBytes(unsigned i) const {
return AttributeList.getDereferenceableBytes(i);
}
-
+
/// \brief Extract the number of dereferenceable_or_null bytes for a call or
/// parameter (0=unknown).
uint64_t getDereferenceableOrNullBytes(unsigned i) const {
return AttributeList.getDereferenceableOrNullBytes(i);
}
+ /// @brief Determine if the parameter or return value is marked with NoAlias
+ /// attribute.
+ /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+ bool doesNotAlias(unsigned n) const {
+ return AttributeList.hasAttribute(n, Attribute::NoAlias);
+ }
+
/// \brief Return true if the call should not be treated as a call to a
/// builtin.
bool isNoBuiltin() const {
@@ -3403,6 +3654,9 @@ public:
/// \brief Determine if the call returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
+ if (getNumArgOperands() == 0)
+ return false;
+
// Be friendly and also check the callee.
return paramHasAttr(1, Attribute::StructRet);
}
@@ -3495,23 +3749,23 @@ struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
- unsigned Values, const Twine &NameStr,
- Instruction *InsertBefore)
+ ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+ const Twine &NameStr, Instruction *InsertBefore)
: TerminatorInst(Ty->getReturnType(), Instruction::Invoke,
OperandTraits<InvokeInst>::op_end(this) - Values, Values,
InsertBefore) {
- init(Ty, Func, IfNormal, IfException, Args, NameStr);
+ init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
}
-InvokeInst::InvokeInst(Value *Func,
- BasicBlock *IfNormal, BasicBlock *IfException,
- ArrayRef<Value *> Args, unsigned Values,
+InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
+ BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, unsigned Values,
const Twine &NameStr, BasicBlock *InsertAtEnd)
- : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
- ->getElementType())->getReturnType(),
- Instruction::Invoke,
- OperandTraits<InvokeInst>::op_end(this) - Values,
- Values, InsertAtEnd) {
- init(Func, IfNormal, IfException, Args, NameStr);
+ : TerminatorInst(
+ cast<FunctionType>(cast<PointerType>(Func->getType())
+ ->getElementType())->getReturnType(),
+ Instruction::Invoke, OperandTraits<InvokeInst>::op_end(this) - Values,
+ Values, InsertAtEnd) {
+ init(Func, IfNormal, IfException, Args, Bundles, NameStr);
}
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
@@ -3528,6 +3782,7 @@ class ResumeInst : public TerminatorInst {
explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr);
ResumeInst(Value *Exn, BasicBlock *InsertAtEnd);
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -3556,6 +3811,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
BasicBlock *getSuccessorV(unsigned idx) const override;
unsigned getNumSuccessorsV() const override;
@@ -3570,6 +3826,430 @@ struct OperandTraits<ResumeInst> :
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
//===----------------------------------------------------------------------===//
+// CatchSwitchInst Class
+//===----------------------------------------------------------------------===//
+class CatchSwitchInst : public TerminatorInst {
+ void *operator new(size_t, unsigned) = delete;
+ /// ReservedSpace - The number of operands actually allocated. NumOperands is
+ /// the number actually in use.
+ unsigned ReservedSpace;
+ // Operand[0] = Outer scope
+ // Operand[1] = Unwind block destination
+ // Operand[n] = BasicBlock to go to on match
+ CatchSwitchInst(const CatchSwitchInst &CSI);
+ void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved);
+ void growOperands(unsigned Size);
+ // allocate space for exactly zero operands
+ void *operator new(size_t s) { return User::operator new(s); }
+ /// CatchSwitchInst ctor - Create a new switch instruction, specifying a
+ /// default destination. The number of additional handlers can be specified
+ /// here to make memory allocation more efficient.
+ /// This constructor can also autoinsert before another instruction.
+ CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumHandlers, const Twine &NameStr,
+ Instruction *InsertBefore);
+
+ /// CatchSwitchInst ctor - Create a new switch instruction, specifying a
+ /// default destination. The number of additional handlers can be specified
+ /// here to make memory allocation more efficient.
+ /// This constructor also autoinserts at the end of the specified BasicBlock.
+ CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumHandlers, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
+
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+ CatchSwitchInst *cloneImpl() const;
+
+public:
+ static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumHandlers,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr,
+ InsertBefore);
+ }
+ static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumHandlers, const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr,
+ InsertAtEnd);
+ }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ // Accessor Methods for CatchSwitch stmt
+ Value *getParentPad() const { return getOperand(0); }
+ void setParentPad(Value *ParentPad) { setOperand(0, ParentPad); }
+
+ // Accessor Methods for CatchSwitch stmt
+ bool hasUnwindDest() const { return getSubclassDataFromInstruction() & 1; }
+ bool unwindsToCaller() const { return !hasUnwindDest(); }
+ BasicBlock *getUnwindDest() const {
+ if (hasUnwindDest())
+ return cast<BasicBlock>(getOperand(1));
+ return nullptr;
+ }
+ void setUnwindDest(BasicBlock *UnwindDest) {
+ assert(UnwindDest);
+ assert(hasUnwindDest());
+ setOperand(1, UnwindDest);
+ }
+
+ /// getNumHandlers - return the number of 'handlers' in this catchswitch
+ /// instruction, except the default handler
+ unsigned getNumHandlers() const {
+ if (hasUnwindDest())
+ return getNumOperands() - 2;
+ return getNumOperands() - 1;
+ }
+
+private:
+ static BasicBlock *handler_helper(Value *V) { return cast<BasicBlock>(V); }
+ static const BasicBlock *handler_helper(const Value *V) {
+ return cast<BasicBlock>(V);
+ }
+
+public:
+ typedef std::pointer_to_unary_function<Value *, BasicBlock *> DerefFnTy;
+ typedef mapped_iterator<op_iterator, DerefFnTy> handler_iterator;
+ typedef iterator_range<handler_iterator> handler_range;
+
+
+ typedef std::pointer_to_unary_function<const Value *, const BasicBlock *>
+ ConstDerefFnTy;
+ typedef mapped_iterator<const_op_iterator, ConstDerefFnTy> const_handler_iterator;
+ typedef iterator_range<const_handler_iterator> const_handler_range;
+
+ /// Returns an iterator that points to the first handler in CatchSwitchInst.
+ handler_iterator handler_begin() {
+ op_iterator It = op_begin() + 1;
+ if (hasUnwindDest())
+ ++It;
+ return handler_iterator(It, DerefFnTy(handler_helper));
+ }
+ /// Returns an iterator that points to the first handler in the
+ /// CatchSwitchInst.
+ const_handler_iterator handler_begin() const {
+ const_op_iterator It = op_begin() + 1;
+ if (hasUnwindDest())
+ ++It;
+ return const_handler_iterator(It, ConstDerefFnTy(handler_helper));
+ }
+
+ /// Returns a read-only iterator that points one past the last
+ /// handler in the CatchSwitchInst.
+ handler_iterator handler_end() {
+ return handler_iterator(op_end(), DerefFnTy(handler_helper));
+ }
+ /// Returns an iterator that points one past the last handler in the
+ /// CatchSwitchInst.
+ const_handler_iterator handler_end() const {
+ return const_handler_iterator(op_end(), ConstDerefFnTy(handler_helper));
+ }
+
+ /// handlers - iteration adapter for range-for loops.
+ handler_range handlers() {
+ return make_range(handler_begin(), handler_end());
+ }
+
+ /// handlers - iteration adapter for range-for loops.
+ const_handler_range handlers() const {
+ return make_range(handler_begin(), handler_end());
+ }
+
+ /// addHandler - Add an entry to the switch instruction...
+ /// Note:
+ /// This action invalidates handler_end(). Old handler_end() iterator will
+ /// point to the added handler.
+ void addHandler(BasicBlock *Dest);
+
+ unsigned getNumSuccessors() const { return getNumOperands() - 1; }
+ BasicBlock *getSuccessor(unsigned Idx) const {
+ assert(Idx < getNumSuccessors() &&
+ "Successor # out of range for catchswitch!");
+ return cast<BasicBlock>(getOperand(Idx + 1));
+ }
+ void setSuccessor(unsigned Idx, BasicBlock *NewSucc) {
+ assert(Idx < getNumSuccessors() &&
+ "Successor # out of range for catchswitch!");
+ setOperand(Idx + 1, NewSucc);
+ }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) {
+ return I->getOpcode() == Instruction::CatchSwitch;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+
+private:
+ BasicBlock *getSuccessorV(unsigned Idx) const override;
+ unsigned getNumSuccessorsV() const override;
+ void setSuccessorV(unsigned Idx, BasicBlock *B) override;
+};
+
+template <>
+struct OperandTraits<CatchSwitchInst> : public HungoffOperandTraits<2> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchSwitchInst, Value)
+
+//===----------------------------------------------------------------------===//
+// CleanupPadInst Class
+//===----------------------------------------------------------------------===//
+class CleanupPadInst : public FuncletPadInst {
+private:
+ explicit CleanupPadInst(Value *ParentPad, ArrayRef<Value *> Args,
+ unsigned Values, const Twine &NameStr,
+ Instruction *InsertBefore)
+ : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values,
+ NameStr, InsertBefore) {}
+ explicit CleanupPadInst(Value *ParentPad, ArrayRef<Value *> Args,
+ unsigned Values, const Twine &NameStr,
+ BasicBlock *InsertAtEnd)
+ : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values,
+ NameStr, InsertAtEnd) {}
+
+public:
+ static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ unsigned Values = 1 + Args.size();
+ return new (Values)
+ CleanupPadInst(ParentPad, Args, Values, NameStr, InsertBefore);
+ }
+ static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ unsigned Values = 1 + Args.size();
+ return new (Values)
+ CleanupPadInst(ParentPad, Args, Values, NameStr, InsertAtEnd);
+ }
+
+ /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) {
+ return I->getOpcode() == Instruction::CleanupPad;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// CatchPadInst Class
+//===----------------------------------------------------------------------===//
+class CatchPadInst : public FuncletPadInst {
+private:
+ explicit CatchPadInst(Value *CatchSwitch, ArrayRef<Value *> Args,
+ unsigned Values, const Twine &NameStr,
+ Instruction *InsertBefore)
+ : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values,
+ NameStr, InsertBefore) {}
+ explicit CatchPadInst(Value *CatchSwitch, ArrayRef<Value *> Args,
+ unsigned Values, const Twine &NameStr,
+ BasicBlock *InsertAtEnd)
+ : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values,
+ NameStr, InsertAtEnd) {}
+
+public:
+ static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ unsigned Values = 1 + Args.size();
+ return new (Values)
+ CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertBefore);
+ }
+ static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ unsigned Values = 1 + Args.size();
+ return new (Values)
+ CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertAtEnd);
+ }
+
+ /// Convenience accessors
+ CatchSwitchInst *getCatchSwitch() const {
+ return cast<CatchSwitchInst>(Op<-1>());
+ }
+ void setCatchSwitch(Value *CatchSwitch) {
+ assert(CatchSwitch);
+ Op<-1>() = CatchSwitch;
+ }
+
+ /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) {
+ return I->getOpcode() == Instruction::CatchPad;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// CatchReturnInst Class
+//===----------------------------------------------------------------------===//
+
+class CatchReturnInst : public TerminatorInst {
+ CatchReturnInst(const CatchReturnInst &RI);
+
+ void init(Value *CatchPad, BasicBlock *BB);
+ CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore);
+ CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd);
+
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+ CatchReturnInst *cloneImpl() const;
+
+public:
+ static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB,
+ Instruction *InsertBefore = nullptr) {
+ assert(CatchPad);
+ assert(BB);
+ return new (2) CatchReturnInst(CatchPad, BB, InsertBefore);
+ }
+ static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB,
+ BasicBlock *InsertAtEnd) {
+ assert(CatchPad);
+ assert(BB);
+ return new (2) CatchReturnInst(CatchPad, BB, InsertAtEnd);
+ }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ /// Convenience accessors.
+ CatchPadInst *getCatchPad() const { return cast<CatchPadInst>(Op<0>()); }
+ void setCatchPad(CatchPadInst *CatchPad) {
+ assert(CatchPad);
+ Op<0>() = CatchPad;
+ }
+
+ BasicBlock *getSuccessor() const { return cast<BasicBlock>(Op<1>()); }
+ void setSuccessor(BasicBlock *NewSucc) {
+ assert(NewSucc);
+ Op<1>() = NewSucc;
+ }
+ unsigned getNumSuccessors() const { return 1; }
+
+ Value *getParentPad() const {
+ return getCatchPad()->getCatchSwitch()->getParentPad();
+ }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) {
+ return (I->getOpcode() == Instruction::CatchRet);
+ }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+
+private:
+ BasicBlock *getSuccessorV(unsigned Idx) const override;
+ unsigned getNumSuccessorsV() const override;
+ void setSuccessorV(unsigned Idx, BasicBlock *B) override;
+};
+
+template <>
+struct OperandTraits<CatchReturnInst>
+ : public FixedNumOperandTraits<CatchReturnInst, 2> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value)
+
+//===----------------------------------------------------------------------===//
+// CleanupReturnInst Class
+//===----------------------------------------------------------------------===//
+
+class CleanupReturnInst : public TerminatorInst {
+private:
+ CleanupReturnInst(const CleanupReturnInst &RI);
+
+ void init(Value *CleanupPad, BasicBlock *UnwindBB);
+ CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
+ Instruction *InsertBefore = nullptr);
+ CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
+ BasicBlock *InsertAtEnd);
+
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+ CleanupReturnInst *cloneImpl() const;
+
+public:
+ static CleanupReturnInst *Create(Value *CleanupPad,
+ BasicBlock *UnwindBB = nullptr,
+ Instruction *InsertBefore = nullptr) {
+ assert(CleanupPad);
+ unsigned Values = 1;
+ if (UnwindBB)
+ ++Values;
+ return new (Values)
+ CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertBefore);
+ }
+ static CleanupReturnInst *Create(Value *CleanupPad, BasicBlock *UnwindBB,
+ BasicBlock *InsertAtEnd) {
+ assert(CleanupPad);
+ unsigned Values = 1;
+ if (UnwindBB)
+ ++Values;
+ return new (Values)
+ CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertAtEnd);
+ }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ bool hasUnwindDest() const { return getSubclassDataFromInstruction() & 1; }
+ bool unwindsToCaller() const { return !hasUnwindDest(); }
+
+ /// Convenience accessor.
+ CleanupPadInst *getCleanupPad() const {
+ return cast<CleanupPadInst>(Op<0>());
+ }
+ void setCleanupPad(CleanupPadInst *CleanupPad) {
+ assert(CleanupPad);
+ Op<0>() = CleanupPad;
+ }
+
+ unsigned getNumSuccessors() const { return hasUnwindDest() ? 1 : 0; }
+
+ BasicBlock *getUnwindDest() const {
+ return hasUnwindDest() ? cast<BasicBlock>(Op<1>()) : nullptr;
+ }
+ void setUnwindDest(BasicBlock *NewDest) {
+ assert(NewDest);
+ assert(hasUnwindDest());
+ Op<1>() = NewDest;
+ }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const Instruction *I) {
+ return (I->getOpcode() == Instruction::CleanupRet);
+ }
+ static inline bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+
+private:
+ BasicBlock *getSuccessorV(unsigned Idx) const override;
+ unsigned getNumSuccessorsV() const override;
+ void setSuccessorV(unsigned Idx, BasicBlock *B) override;
+
+ // Shadow Instruction::setInstructionSubclassData with a private forwarding
+ // method so that subclasses cannot accidentally use it.
+ void setInstructionSubclassData(unsigned short D) {
+ Instruction::setInstructionSubclassData(D);
+ }
+};
+
+template <>
+struct OperandTraits<CleanupReturnInst>
+ : public VariadicOperandTraits<CleanupReturnInst, /*MINARITY=*/1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value)
+
+//===----------------------------------------------------------------------===//
// UnreachableInst Class
//===----------------------------------------------------------------------===//
@@ -3580,6 +4260,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
///
class UnreachableInst : public TerminatorInst {
void *operator new(size_t, unsigned) = delete;
+
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
friend class Instruction;
@@ -3602,6 +4283,7 @@ public:
static inline bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+
private:
BasicBlock *getSuccessorV(unsigned idx) const override;
unsigned getNumSuccessorsV() const override;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
index 2c8b6eb..169bcc0 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
@@ -372,6 +372,39 @@ namespace llvm {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
}
};
-}
+
+ /// This represents the llvm.instrprof_value_profile intrinsic.
+ class InstrProfValueProfileInst : public IntrinsicInst {
+ public:
+ static inline bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::instrprof_value_profile;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+
+ GlobalVariable *getName() const {
+ return cast<GlobalVariable>(
+ const_cast<Value *>(getArgOperand(0))->stripPointerCasts());
+ }
+
+ ConstantInt *getHash() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+ }
+
+ Value *getTargetValue() const {
+ return cast<Value>(const_cast<Value *>(getArgOperand(2)));
+ }
+
+ ConstantInt *getValueKind() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
+ }
+
+ // Returns the value site index.
+ ConstantInt *getIndex() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(4)));
+ }
+ };
+} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.h b/contrib/llvm/include/llvm/IR/Intrinsics.h
index 43b8325..314e2aa 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.h
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.h
@@ -79,7 +79,7 @@ namespace Intrinsic {
/// intrinsic. This is returned by getIntrinsicInfoTableEntries.
struct IITDescriptor {
enum IITDescriptorKind {
- Void, VarArg, MMX, Metadata, Half, Float, Double,
+ Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
Integer, Vector, Pointer, Struct,
Argument, ExtendArgument, TruncArgument, HalfVecArgument,
SameVecWidthArgument, PtrToArgument, VecOfPtrsToElt
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td
index bbae720..5a95ddc 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.td
@@ -73,8 +73,8 @@ def IntrNoReturn : IntrinsicProperty;
// Parallels the noduplicate attribute on LLVM IR functions.
def IntrNoDuplicate : IntrinsicProperty;
-// IntrConvergent - Calls to this intrinsic are convergent and may only be
-// moved to control equivalent blocks.
+// IntrConvergent - Calls to this intrinsic are convergent and may not be made
+// control-dependent on any additional values.
// Parallels the convergent attribute on LLVM IR functions.
def IntrConvergent : IntrinsicProperty;
@@ -150,16 +150,20 @@ def llvm_anyptr_ty : LLVMAnyPointerType<llvm_i8_ty>; // (space)i8*
def llvm_empty_ty : LLVMType<OtherVT>; // { }
def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>; // { }*
def llvm_metadata_ty : LLVMType<MetadataVT>; // !{...}
+def llvm_token_ty : LLVMType<token>; // token
def llvm_x86mmx_ty : LLVMType<x86mmx>;
def llvm_ptrx86mmx_ty : LLVMPointerType<llvm_x86mmx_ty>; // <1 x i64>*
-def llvm_v2i1_ty : LLVMType<v2i1>; // 2 x i1
-def llvm_v4i1_ty : LLVMType<v4i1>; // 4 x i1
-def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1
-def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
-def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
-def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
+def llvm_v2i1_ty : LLVMType<v2i1>; // 2 x i1
+def llvm_v4i1_ty : LLVMType<v4i1>; // 4 x i1
+def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1
+def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
+def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
+def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
+def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1
+def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1
+
def llvm_v1i8_ty : LLVMType<v1i8>; // 1 x i8
def llvm_v2i8_ty : LLVMType<v2i8>; // 2 x i8
def llvm_v4i8_ty : LLVMType<v4i8>; // 4 x i8
@@ -167,6 +171,8 @@ def llvm_v8i8_ty : LLVMType<v8i8>; // 8 x i8
def llvm_v16i8_ty : LLVMType<v16i8>; // 16 x i8
def llvm_v32i8_ty : LLVMType<v32i8>; // 32 x i8
def llvm_v64i8_ty : LLVMType<v64i8>; // 64 x i8
+def llvm_v128i8_ty : LLVMType<v128i8>; //128 x i8
+def llvm_v256i8_ty : LLVMType<v256i8>; //256 x i8
def llvm_v1i16_ty : LLVMType<v1i16>; // 1 x i16
def llvm_v2i16_ty : LLVMType<v2i16>; // 2 x i16
@@ -174,17 +180,23 @@ def llvm_v4i16_ty : LLVMType<v4i16>; // 4 x i16
def llvm_v8i16_ty : LLVMType<v8i16>; // 8 x i16
def llvm_v16i16_ty : LLVMType<v16i16>; // 16 x i16
def llvm_v32i16_ty : LLVMType<v32i16>; // 32 x i16
+def llvm_v64i16_ty : LLVMType<v64i16>; // 64 x i16
+def llvm_v128i16_ty : LLVMType<v128i16>; //128 x i16
def llvm_v1i32_ty : LLVMType<v1i32>; // 1 x i32
def llvm_v2i32_ty : LLVMType<v2i32>; // 2 x i32
def llvm_v4i32_ty : LLVMType<v4i32>; // 4 x i32
def llvm_v8i32_ty : LLVMType<v8i32>; // 8 x i32
def llvm_v16i32_ty : LLVMType<v16i32>; // 16 x i32
+def llvm_v32i32_ty : LLVMType<v32i32>; // 32 x i32
+def llvm_v64i32_ty : LLVMType<v64i32>; // 64 x i32
+
def llvm_v1i64_ty : LLVMType<v1i64>; // 1 x i64
def llvm_v2i64_ty : LLVMType<v2i64>; // 2 x i64
def llvm_v4i64_ty : LLVMType<v4i64>; // 4 x i64
def llvm_v8i64_ty : LLVMType<v8i64>; // 8 x i64
def llvm_v16i64_ty : LLVMType<v16i64>; // 16 x i64
+def llvm_v32i64_ty : LLVMType<v32i64>; // 32 x i64
def llvm_v1i128_ty : LLVMType<v1i128>; // 1 x i128
@@ -292,6 +304,8 @@ def int_stacksave : Intrinsic<[llvm_ptr_ty]>,
def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>,
GCCBuiltin<"__builtin_stack_restore">;
+def int_get_dynamic_area_offset : Intrinsic<[llvm_anyint_ty]>;
+
// IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch,
// however it does conveniently prevent the prefetch from being reordered
// with respect to nearby accesses to the same memory.
@@ -319,6 +333,14 @@ def int_instrprof_increment : Intrinsic<[],
llvm_i32_ty, llvm_i32_ty],
[]>;
+// A call to profile runtime for value profiling of target expressions
+// through instrumentation based profiling.
+def int_instrprof_value_profile : Intrinsic<[],
+ [llvm_ptr_ty, llvm_i64_ty,
+ llvm_i64_ty, llvm_i32_ty,
+ llvm_i32_ty],
+ []>;
+
//===------------------- Standard C Library Intrinsics --------------------===//
//
@@ -399,6 +421,7 @@ let Properties = [IntrNoMem] in {
def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+ def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
}
//===------------------------ Debugger Intrinsics -------------------------===//
@@ -428,17 +451,13 @@ def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>;
-// eh.begincatch takes a pointer returned by a landingpad instruction and
-// copies the exception object into the memory pointed to by the second
-// parameter. If the second parameter is null, no copy occurs.
-def int_eh_begincatch : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
- [NoCapture<0>, NoCapture<1>]>;
-def int_eh_endcatch : Intrinsic<[], []>;
+// eh.exceptionpointer returns the pointer to the exception caught by
+// the given `catchpad`.
+def int_eh_exceptionpointer : Intrinsic<[llvm_anyptr_ty], [llvm_token_ty],
+ [IntrNoMem]>;
-// Represents the list of actions to take when an exception is thrown.
-def int_eh_actions : Intrinsic<[llvm_ptr_ty], [llvm_vararg_ty], []>;
-
-def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
+// Gets the exception code from a catchpad token. Only used on some platforms.
+def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrNoMem]>;
// __builtin_unwind_init is an undocumented GCC intrinsic that causes all
// callee-saved registers to be saved and restored (regardless of whether they
@@ -455,6 +474,7 @@ let Properties = [IntrNoMem] in {
def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
+def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>;
//===---------------- Generic Variable Attribute Intrinsics----------------===//
//
@@ -523,6 +543,10 @@ def int_invariant_end : Intrinsic<[],
llvm_ptr_ty],
[IntrReadWriteArgMem, NoCapture<2>]>;
+def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty],
+ [IntrNoMem]>;
+
//===------------------------ Stackmap Intrinsics -------------------------===//
//
def int_experimental_stackmap : Intrinsic<[],
@@ -543,21 +567,17 @@ def int_experimental_patchpoint_i64 : Intrinsic<[llvm_i64_ty],
//===------------------------ Garbage Collection Intrinsics ---------------===//
// These are documented in docs/Statepoint.rst
-def int_experimental_gc_statepoint : Intrinsic<[llvm_i32_ty],
+def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
[llvm_i64_ty, llvm_i32_ty,
llvm_anyptr_ty, llvm_i32_ty,
llvm_i32_ty, llvm_vararg_ty],
[Throws]>;
-def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_i32_ty]>;
+def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty],
+ [IntrReadMem]>;
def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
-
-// Deprecated: will be removed in a couple of weeks
-def int_experimental_gc_result_int : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty]>;
-def int_experimental_gc_result_float : Intrinsic<[llvm_anyfloat_ty],
- [llvm_i32_ty]>;
-def int_experimental_gc_result_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty]>;
+ [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrReadMem]>;
//===-------------------------- Other Intrinsics --------------------------===//
//
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td b/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7d69ed5..578f259 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -13,6 +13,9 @@
let TargetPrefix = "aarch64" in {
+def int_aarch64_thread_pointer : GCCBuiltin<"__builtin_thread_pointer">,
+ Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+
def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 510e5ad..84582e8 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -33,6 +33,14 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tgid">;
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tidig">;
+
+def int_r600_rat_store_typed :
+ // 1st parameter: Data
+ // 2nd parameter: Index
+ // 3rd parameter: Constant RAT ID
+ Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
+ GCCBuiltin<"__builtin_r600_rat_store_typed">;
+
} // End TargetPrefix = "r600"
let TargetPrefix = "AMDGPU" in {
@@ -83,3 +91,67 @@ def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
"__builtin_amdgpu_read_workdim">;
} // End TargetPrefix = "AMDGPU"
+
+let TargetPrefix = "amdgcn" in {
+
+// SI only
+def int_amdgcn_buffer_wbinvl1_sc :
+ GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
+ Intrinsic<[], [], []>;
+
+// On CI+
+def int_amdgcn_buffer_wbinvl1_vol :
+ GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
+ Intrinsic<[], [], []>;
+
+def int_amdgcn_buffer_wbinvl1 :
+ GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
+ Intrinsic<[], [], []>;
+
+def int_amdgcn_s_dcache_inv :
+ GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
+ Intrinsic<[], [], []>;
+
+// CI+
+def int_amdgcn_s_dcache_inv_vol :
+ GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
+ Intrinsic<[], [], []>;
+
+// VI
+def int_amdgcn_s_dcache_wb :
+ GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
+ Intrinsic<[], [], []>;
+
+// VI
+def int_amdgcn_s_dcache_wb_vol :
+ GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
+ Intrinsic<[], [], []>;
+
+def int_amdgcn_dispatch_ptr :
+ GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
+// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
+def int_amdgcn_interp_p1 :
+ GCCBuiltin<"__builtin_amdgcn_interp_p1">,
+ Intrinsic<[llvm_float_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>; // This intrinsic reads from lds, but the memory
+ // values are constant, so it behaves like IntrNoMem.
+
+// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
+def int_amdgcn_interp_p2 :
+ GCCBuiltin<"__builtin_amdgcn_interp_p2">,
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is
+ // IntrNoMem.
+
+def int_amdgcn_mbcnt_lo :
+ GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_mbcnt_hi :
+ GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsARM.td b/contrib/llvm/include/llvm/IR/IntrinsicsARM.td
index 1dff808..c1d911c 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -405,36 +405,36 @@ def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
// De-interleaving vector loads from N-element structures.
// Source operands are the address and alignment.
def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
- [llvm_ptr_ty, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
- [llvm_ptr_ty, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>],
- [llvm_ptr_ty, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
- [llvm_ptr_ty, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadArgMem]>;
// Vector load N-element structure to one lane.
// Source operands are: the address, the N input vectors (since only one
// lane is assigned), the lane number, and the alignment.
def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
- [llvm_ptr_ty, LLVMMatchType<0>,
+ [llvm_anyptr_ty, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_i32_ty,
llvm_i32_ty], [IntrReadArgMem]>;
def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>],
- [llvm_ptr_ty, LLVMMatchType<0>,
+ [llvm_anyptr_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i32_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
- [llvm_ptr_ty, LLVMMatchType<0>,
+ [llvm_anyptr_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_i32_ty,
llvm_i32_ty], [IntrReadArgMem]>;
@@ -442,38 +442,38 @@ def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
// Interleaving vector stores from N-element structures.
// Source operands are: the address, the N vectors, and the alignment.
def int_arm_neon_vst1 : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
+ [llvm_anyptr_ty, llvm_anyvector_ty,
llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_arm_neon_vst2 : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_arm_neon_vst3 : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, LLVMMatchType<0>,
+ [llvm_anyptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, LLVMMatchType<1>,
llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_arm_neon_vst4 : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, LLVMMatchType<1>,
+ LLVMMatchType<1>, llvm_i32_ty],
[IntrReadWriteArgMem]>;
// Vector store N-element structure from one lane.
// Source operands are: the address, the N vectors, the lane number, and
// the alignment.
def int_arm_neon_vst2lane : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, llvm_i32_ty,
+ [llvm_anyptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, llvm_i32_ty,
llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_arm_neon_vst3lane : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, LLVMMatchType<0>,
+ [llvm_anyptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, LLVMMatchType<1>,
llvm_i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_arm_neon_vst4lane : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>, llvm_i32_ty,
+ [llvm_anyptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, LLVMMatchType<1>,
+ LLVMMatchType<1>, llvm_i32_ty,
llvm_i32_ty], [IntrReadWriteArgMem]>;
// Vector bitwise select.
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 78ee651..ca6fcbd 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -32,14 +32,16 @@ class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem]>;
+
//
// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) ->
// Hexagon_void_si_Intrinsic<string GCCIntSuffix>
//
class Hexagon_void_si_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
- [llvm_void_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+ [], [llvm_ptr_ty],
+ []>;
+
//
// DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
// Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
@@ -458,6 +460,11 @@ class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
llvm_i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
+class Hexagon_v256_v256v256_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrReadWriteArgMem]>;
+
//
// Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
//
@@ -756,6 +763,12 @@ def int_hexagon_circ_stb :
Hexagon_mem_memsisisi_Intrinsic<"circ_stb">;
+def int_hexagon_mm256i_vaddw :
+Hexagon_v256_v256v256_Intrinsic<"_mm256i_vaddw">;
+
+
+// This one above will not be auto-generated,
+// so make sure, you don't overwrite this one.
//
// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
//
@@ -4946,6 +4959,11 @@ Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">;
//
def int_hexagon_S2_deinterleave :
Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">;
+//
+// BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1)
+//
+def int_hexagon_prefetch :
+Hexagon_void_si_Intrinsic<"HEXAGON_prefetch">;
def llvm_ptr32_ty : LLVMPointerType<llvm_i32_ty>;
def llvm_ptr64_ty : LLVMPointerType<llvm_i64_ty>;
@@ -4964,3 +4982,4392 @@ Hexagon_Intrinsic<"HEXAGON_S2_storew_locked", [llvm_i32_ty],
def int_hexagon_S4_stored_locked :
Hexagon_Intrinsic<"HEXAGON_S4_stored_locked", [llvm_i32_ty],
[llvm_ptr64_ty, llvm_i64_ty], [IntrReadWriteArgMem, NoCapture<0>]>;
+
+// V60
+
+class Hexagon_v2048v2048_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_hi_W
+// tag : V6_lo_W
+class Hexagon_v512v1024_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// tag : V6_hi_W_128B
+// tag : V6_lo_W_128B
+class Hexagon_v1024v2048_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v64i32_ty],
+ [IntrNoMem]>;
+
+class Hexagon_v1024v1024_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
+// tag : V6_hi
+def int_hexagon_V6_hi :
+Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_hi">;
+
+// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
+// tag : V6_lo
+def int_hexagon_V6_lo :
+Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_lo">;
+
+// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
+// tag : V6_hi_128B
+def int_hexagon_V6_hi_128B :
+Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_hi_128B">;
+
+// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
+// tag : V6_lo_128B
+def int_hexagon_V6_lo_128B :
+Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_lo_128B">;
+
+// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
+// tag : V6_vassignp
+def int_hexagon_V6_vassignp :
+Hexagon_v1024v1024_Intrinsic_T<"HEXAGON_V6_vassignp">;
+
+// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
+// tag : V6_vassignp_128B
+def int_hexagon_V6_vassignp_128B :
+Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">;
+
+
+
+//
+// Hexagon_iii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_r
+class Hexagon_iii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_p
+class Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_iiii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_r_acc
+class Hexagon_iiii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_p_acc
+class Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_valignb
+class Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_valignb_128B
+class Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vror
+class Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vror_128B
+class Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackub
+class Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackub_128B
+class Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackob
+class Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackob_128B
+class Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vpackeb
+class Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vpackeb_128B
+class Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpybus_dv_128B
+class Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpybus_dv_acc_128B
+class Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhvsat_acc
+class Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhvsat_acc_128B
+class Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat
+class Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat_128B
+class Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat_acc
+class Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat_acc_128B
+class Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi
+class Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi_128B
+class Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi_acc
+class Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi_acc_128B
+class Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddb_dv_128B
+class Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddubh
+class Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddubh_128B
+class Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vd0
+class Hexagon_v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vd0_128B
+class Hexagon_v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddbq
+class Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddbq_128B
+class Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vabsh
+class Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vabsh_128B
+class Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpybv_acc
+class Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpybv_acc_128B
+class Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub
+class Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub_128B
+class Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub_acc
+class Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub_acc_128B
+class Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt
+class Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt_128B
+class Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt_acc
+class Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt_acc_128B
+class Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt
+class Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt_128B
+class Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt_acc
+class Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt_acc_128B
+class Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw
+class Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw_128B
+class Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw_and
+class Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw_and_128B
+class Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_or
+class Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_or_128B
+class Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_not
+class Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_v512i1_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_not_128B
+class Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_v1024i1_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_scalar2
+class Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v512i1_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_scalar2_128B
+class Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v1024i1_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vswap
+class Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vswap_128B
+class Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vshuffvdd
+class Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vshuffvdd_128B
+class Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+
+//
+// Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_extractw
+class Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_extractw_128B
+class Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_lvsplatw
+class Hexagon_v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_lvsplatw_128B
+class Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb
+class Hexagon_v512v512LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_128B
+class Hexagon_v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_acc
+class Hexagon_v512v512v512LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_acc_128B
+class Hexagon_v1024v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_dv_128B
+class Hexagon_v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_dv_acc_128B
+class Hexagon_v2048v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i64_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvvb_oracc
+class Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvvb_oracc_128B
+class Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvwh_oracc
+class Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvwh_oracc_128B
+class Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
+// tag : M6_vabsdiffb
+class Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty],
+ [IntrNoMem]>;
+
+//
+// Hexagon_LLii_Intrinsic<string GCCIntSuffix>
+// tag : S6_vsplatrbp
+class Hexagon_LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+ [llvm_i64_ty], [llvm_i32_ty],
+ [IntrNoMem]>;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r,SI_ftype_SISI,2)
+// tag : S6_rol_i_r
+def int_hexagon_S6_rol_i_r :
+Hexagon_iii_Intrinsic<"HEXAGON_S6_rol_i_r">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p,DI_ftype_DISI,2)
+// tag : S6_rol_i_p
+def int_hexagon_S6_rol_i_p :
+Hexagon_LLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_acc,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_acc
+def int_hexagon_S6_rol_i_r_acc :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_acc,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_acc
+def int_hexagon_S6_rol_i_p_acc :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_nac,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_nac
+def int_hexagon_S6_rol_i_r_nac :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_nac,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_nac
+def int_hexagon_S6_rol_i_p_nac :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_xacc,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_xacc
+def int_hexagon_S6_rol_i_r_xacc :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_xacc,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_xacc
+def int_hexagon_S6_rol_i_p_xacc :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_and,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_and
+def int_hexagon_S6_rol_i_r_and :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_or,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_or
+def int_hexagon_S6_rol_i_r_or :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_and,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_and
+def int_hexagon_S6_rol_i_p_and :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_or,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_or
+def int_hexagon_S6_rol_i_p_or :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.S2_cabacencbin,DI_ftype_DIDIQI,3)
+// tag : S2_cabacencbin
+def int_hexagon_S2_cabacencbin :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S2_cabacencbin">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignb,VI_ftype_VIVISI,3)
+// tag : V6_valignb
+def int_hexagon_V6_valignb :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignb_128B,VI_ftype_VIVISI,3)
+// tag : V6_valignb_128B
+def int_hexagon_V6_valignb_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignb,VI_ftype_VIVISI,3)
+// tag : V6_vlalignb
+def int_hexagon_V6_vlalignb :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignb_128B,VI_ftype_VIVISI,3)
+// tag : V6_vlalignb_128B
+def int_hexagon_V6_vlalignb_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignbi,VI_ftype_VIVISI,3)
+// tag : V6_valignbi
+def int_hexagon_V6_valignbi :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignbi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignbi_128B,VI_ftype_VIVISI,3)
+// tag : V6_valignbi_128B
+def int_hexagon_V6_valignbi_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignbi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignbi,VI_ftype_VIVISI,3)
+// tag : V6_vlalignbi
+def int_hexagon_V6_vlalignbi :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignbi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignbi_128B,VI_ftype_VIVISI,3)
+// tag : V6_vlalignbi_128B
+def int_hexagon_V6_vlalignbi_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vror,VI_ftype_VISI,2)
+// tag : V6_vror
+def int_hexagon_V6_vror :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vror">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vror_128B,VI_ftype_VISI,2)
+// tag : V6_vror_128B
+def int_hexagon_V6_vror_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vror_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackub,VD_ftype_VI,1)
+// tag : V6_vunpackub
+def int_hexagon_V6_vunpackub :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackub_128B,VD_ftype_VI,1)
+// tag : V6_vunpackub_128B
+def int_hexagon_V6_vunpackub_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackb,VD_ftype_VI,1)
+// tag : V6_vunpackb
+def int_hexagon_V6_vunpackb :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackb_128B,VD_ftype_VI,1)
+// tag : V6_vunpackb_128B
+def int_hexagon_V6_vunpackb_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackuh,VD_ftype_VI,1)
+// tag : V6_vunpackuh
+def int_hexagon_V6_vunpackuh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackuh_128B,VD_ftype_VI,1)
+// tag : V6_vunpackuh_128B
+def int_hexagon_V6_vunpackuh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackh,VD_ftype_VI,1)
+// tag : V6_vunpackh
+def int_hexagon_V6_vunpackh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackh_128B,VD_ftype_VI,1)
+// tag : V6_vunpackh_128B
+def int_hexagon_V6_vunpackh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackob,VD_ftype_VDVI,2)
+// tag : V6_vunpackob
+def int_hexagon_V6_vunpackob :
+Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackob">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackob_128B,VD_ftype_VDVI,2)
+// tag : V6_vunpackob_128B
+def int_hexagon_V6_vunpackob_128B :
+Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackob_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackoh,VD_ftype_VDVI,2)
+// tag : V6_vunpackoh
+def int_hexagon_V6_vunpackoh :
+Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackoh_128B,VD_ftype_VDVI,2)
+// tag : V6_vunpackoh_128B
+def int_hexagon_V6_vunpackoh_128B :
+Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeb,VI_ftype_VIVI,2)
+// tag : V6_vpackeb
+def int_hexagon_V6_vpackeb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeb_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackeb_128B
+def int_hexagon_V6_vpackeb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeh,VI_ftype_VIVI,2)
+// tag : V6_vpackeh
+def int_hexagon_V6_vpackeh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeh_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackeh_128B
+def int_hexagon_V6_vpackeh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackob,VI_ftype_VIVI,2)
+// tag : V6_vpackob
+def int_hexagon_V6_vpackob :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackob">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackob_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackob_128B
+def int_hexagon_V6_vpackob_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackob_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackoh,VI_ftype_VIVI,2)
+// tag : V6_vpackoh
+def int_hexagon_V6_vpackoh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackoh_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackoh_128B
+def int_hexagon_V6_vpackoh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackhub_sat
+def int_hexagon_V6_vpackhub_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhub_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackhub_sat_128B
+def int_hexagon_V6_vpackhub_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackhb_sat
+def int_hexagon_V6_vpackhb_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhb_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackhb_sat_128B
+def int_hexagon_V6_vpackhb_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackwuh_sat
+def int_hexagon_V6_vpackwuh_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwuh_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackwuh_sat_128B
+def int_hexagon_V6_vpackwuh_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackwh_sat
+def int_hexagon_V6_vpackwh_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwh_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackwh_sat_128B
+def int_hexagon_V6_vpackwh_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzb,VD_ftype_VI,1)
+// tag : V6_vzb
+def int_hexagon_V6_vzb :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzb_128B,VD_ftype_VI,1)
+// tag : V6_vzb_128B
+def int_hexagon_V6_vzb_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsb,VD_ftype_VI,1)
+// tag : V6_vsb
+def int_hexagon_V6_vsb :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsb_128B,VD_ftype_VI,1)
+// tag : V6_vsb_128B
+def int_hexagon_V6_vsb_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzh,VD_ftype_VI,1)
+// tag : V6_vzh
+def int_hexagon_V6_vzh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzh_128B,VD_ftype_VI,1)
+// tag : V6_vzh_128B
+def int_hexagon_V6_vzh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsh,VD_ftype_VI,1)
+// tag : V6_vsh
+def int_hexagon_V6_vsh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsh_128B,VD_ftype_VI,1)
+// tag : V6_vsh_128B
+def int_hexagon_V6_vsh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus,VI_ftype_VISI,2)
+// tag : V6_vdmpybus
+def int_hexagon_V6_vdmpybus :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpybus_128B
+def int_hexagon_V6_vdmpybus_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpybus_acc
+def int_hexagon_V6_vdmpybus_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpybus_acc_128B
+def int_hexagon_V6_vdmpybus_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv,VD_ftype_VDSI,2)
+// tag : V6_vdmpybus_dv
+def int_hexagon_V6_vdmpybus_dv :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_128B,VD_ftype_VDSI,2)
+// tag : V6_vdmpybus_dv_128B
+def int_hexagon_V6_vdmpybus_dv_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpybus_dv_acc
+def int_hexagon_V6_vdmpybus_dv_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpybus_dv_acc_128B
+def int_hexagon_V6_vdmpybus_dv_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb,VI_ftype_VISI,2)
+// tag : V6_vdmpyhb
+def int_hexagon_V6_vdmpyhb :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpyhb_128B
+def int_hexagon_V6_vdmpyhb_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhb_acc
+def int_hexagon_V6_vdmpyhb_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhb_acc_128B
+def int_hexagon_V6_vdmpyhb_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv,VD_ftype_VDSI,2)
+// tag : V6_vdmpyhb_dv
+def int_hexagon_V6_vdmpyhb_dv :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_128B,VD_ftype_VDSI,2)
+// tag : V6_vdmpyhb_dv_128B
+def int_hexagon_V6_vdmpyhb_dv_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpyhb_dv_acc
+def int_hexagon_V6_vdmpyhb_dv_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpyhb_dv_acc_128B
+def int_hexagon_V6_vdmpyhb_dv_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat,VI_ftype_VIVI,2)
+// tag : V6_vdmpyhvsat
+def int_hexagon_V6_vdmpyhvsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vdmpyhvsat_128B
+def int_hexagon_V6_vdmpyhvsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vdmpyhvsat_acc
+def int_hexagon_V6_vdmpyhvsat_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vdmpyhvsat_acc_128B
+def int_hexagon_V6_vdmpyhvsat_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsat
+def int_hexagon_V6_vdmpyhsat :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsat_128B
+def int_hexagon_V6_vdmpyhsat_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsat_acc
+def int_hexagon_V6_vdmpyhsat_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsat_acc_128B
+def int_hexagon_V6_vdmpyhsat_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhisat
+def int_hexagon_V6_vdmpyhisat :
+Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_128B,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhisat_128B
+def int_hexagon_V6_vdmpyhisat_128B :
+Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhisat_acc
+def int_hexagon_V6_vdmpyhisat_acc :
+Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc_128B,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhisat_acc_128B
+def int_hexagon_V6_vdmpyhisat_acc_128B :
+Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsusat
+def int_hexagon_V6_vdmpyhsusat :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsusat_128B
+def int_hexagon_V6_vdmpyhsusat_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsusat_acc
+def int_hexagon_V6_vdmpyhsusat_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsusat_acc_128B
+def int_hexagon_V6_vdmpyhsusat_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhsuisat
+def int_hexagon_V6_vdmpyhsuisat :
+Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_128B,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhsuisat_128B
+def int_hexagon_V6_vdmpyhsuisat_128B :
+Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhsuisat_acc
+def int_hexagon_V6_vdmpyhsuisat_acc :
+Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc_128B,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhsuisat_acc_128B
+def int_hexagon_V6_vdmpyhsuisat_acc_128B :
+Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb,VD_ftype_VDSI,2)
+// tag : V6_vtmpyb
+def int_hexagon_V6_vtmpyb :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb_128B,VD_ftype_VDSI,2)
+// tag : V6_vtmpyb_128B
+def int_hexagon_V6_vtmpyb_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyb_acc
+def int_hexagon_V6_vtmpyb_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyb_acc_128B
+def int_hexagon_V6_vtmpyb_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus,VD_ftype_VDSI,2)
+// tag : V6_vtmpybus
+def int_hexagon_V6_vtmpybus :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus_128B,VD_ftype_VDSI,2)
+// tag : V6_vtmpybus_128B
+def int_hexagon_V6_vtmpybus_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpybus_acc
+def int_hexagon_V6_vtmpybus_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpybus_acc_128B
+def int_hexagon_V6_vtmpybus_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb,VD_ftype_VDSI,2)
+// tag : V6_vtmpyhb
+def int_hexagon_V6_vtmpyhb :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_128B,VD_ftype_VDSI,2)
+// tag : V6_vtmpyhb_128B
+def int_hexagon_V6_vtmpyhb_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyhb_acc
+def int_hexagon_V6_vtmpyhb_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyhb_acc_128B
+def int_hexagon_V6_vtmpyhb_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub,VI_ftype_VISI,2)
+// tag : V6_vrmpyub
+def int_hexagon_V6_vrmpyub :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub_128B,VI_ftype_VISI,2)
+// tag : V6_vrmpyub_128B
+def int_hexagon_V6_vrmpyub_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc,VI_ftype_VIVISI,3)
+// tag : V6_vrmpyub_acc
+def int_hexagon_V6_vrmpyub_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vrmpyub_acc_128B
+def int_hexagon_V6_vrmpyub_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv,VI_ftype_VIVI,2)
+// tag : V6_vrmpyubv
+def int_hexagon_V6_vrmpyubv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_128B,VI_ftype_VIVI,2)
+// tag : V6_vrmpyubv_128B
+def int_hexagon_V6_vrmpyubv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpyubv_acc
+def int_hexagon_V6_vrmpyubv_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpyubv_acc_128B
+def int_hexagon_V6_vrmpyubv_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv,VI_ftype_VIVI,2)
+// tag : V6_vrmpybv
+def int_hexagon_V6_vrmpybv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv_128B,VI_ftype_VIVI,2)
+// tag : V6_vrmpybv_128B
+def int_hexagon_V6_vrmpybv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybv_acc
+def int_hexagon_V6_vrmpybv_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybv_acc_128B
+def int_hexagon_V6_vrmpybv_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi,VD_ftype_VDSISI,3)
+// tag : V6_vrmpyubi
+def int_hexagon_V6_vrmpyubi :
+Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_128B,VD_ftype_VDSISI,3)
+// tag : V6_vrmpyubi_128B
+def int_hexagon_V6_vrmpyubi_128B :
+Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpyubi_acc
+def int_hexagon_V6_vrmpyubi_acc :
+Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc_128B,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpyubi_acc_128B
+def int_hexagon_V6_vrmpyubi_acc_128B :
+Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus,VI_ftype_VISI,2)
+// tag : V6_vrmpybus
+def int_hexagon_V6_vrmpybus :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus_128B,VI_ftype_VISI,2)
+// tag : V6_vrmpybus_128B
+def int_hexagon_V6_vrmpybus_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc,VI_ftype_VIVISI,3)
+// tag : V6_vrmpybus_acc
+def int_hexagon_V6_vrmpybus_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vrmpybus_acc_128B
+def int_hexagon_V6_vrmpybus_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi,VD_ftype_VDSISI,3)
+// tag : V6_vrmpybusi
+def int_hexagon_V6_vrmpybusi :
+Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_128B,VD_ftype_VDSISI,3)
+// tag : V6_vrmpybusi_128B
+def int_hexagon_V6_vrmpybusi_128B :
+Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpybusi_acc
+def int_hexagon_V6_vrmpybusi_acc :
+Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc_128B,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpybusi_acc_128B
+def int_hexagon_V6_vrmpybusi_acc_128B :
+Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv,VI_ftype_VIVI,2)
+// tag : V6_vrmpybusv
+def int_hexagon_V6_vrmpybusv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_128B,VI_ftype_VIVI,2)
+// tag : V6_vrmpybusv_128B
+def int_hexagon_V6_vrmpybusv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybusv_acc
+def int_hexagon_V6_vrmpybusv_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybusv_acc_128B
+def int_hexagon_V6_vrmpybusv_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh,VD_ftype_VDSI,2)
+// tag : V6_vdsaduh
+def int_hexagon_V6_vdsaduh :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh_128B,VD_ftype_VDSI,2)
+// tag : V6_vdsaduh_128B
+def int_hexagon_V6_vdsaduh_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vdsaduh_acc
+def int_hexagon_V6_vdsaduh_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vdsaduh_acc_128B
+def int_hexagon_V6_vdsaduh_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi,VD_ftype_VDSISI,3)
+// tag : V6_vrsadubi
+def int_hexagon_V6_vrsadubi :
+Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi_128B,VD_ftype_VDSISI,3)
+// tag : V6_vrsadubi_128B
+def int_hexagon_V6_vrsadubi_128B :
+Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc,VD_ftype_VDVDSISI,4)
+// tag : V6_vrsadubi_acc
+def int_hexagon_V6_vrsadubi_acc :
+Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc_128B,VD_ftype_VDVDSISI,4)
+// tag : V6_vrsadubi_acc_128B
+def int_hexagon_V6_vrsadubi_acc_128B :
+Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw,VI_ftype_VISI,2)
+// tag : V6_vasrw
+def int_hexagon_V6_vasrw :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw_128B,VI_ftype_VISI,2)
+// tag : V6_vasrw_128B
+def int_hexagon_V6_vasrw_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_128B">;
+
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw,VI_ftype_VISI,2)
+// tag : V6_vaslw
+def int_hexagon_V6_vaslw :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw_128B,VI_ftype_VISI,2)
+// tag : V6_vaslw_128B
+def int_hexagon_V6_vaslw_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrw,VI_ftype_VISI,2)
+// tag : V6_vlsrw
+def int_hexagon_V6_vlsrw :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrw_128B,VI_ftype_VISI,2)
+// tag : V6_vlsrw_128B
+def int_hexagon_V6_vlsrw_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwv,VI_ftype_VIVI,2)
+// tag : V6_vasrwv
+def int_hexagon_V6_vasrwv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrwv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwv_128B,VI_ftype_VIVI,2)
+// tag : V6_vasrwv_128B
+def int_hexagon_V6_vasrwv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrwv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslwv,VI_ftype_VIVI,2)
+// tag : V6_vaslwv
+def int_hexagon_V6_vaslwv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslwv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslwv_128B,VI_ftype_VIVI,2)
+// tag : V6_vaslwv_128B
+def int_hexagon_V6_vaslwv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslwv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrwv,VI_ftype_VIVI,2)
+// tag : V6_vlsrwv
+def int_hexagon_V6_vlsrwv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrwv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrwv_128B,VI_ftype_VIVI,2)
+// tag : V6_vlsrwv_128B
+def int_hexagon_V6_vlsrwv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrwv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrh,VI_ftype_VISI,2)
+// tag : V6_vasrh
+def int_hexagon_V6_vasrh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrh_128B,VI_ftype_VISI,2)
+// tag : V6_vasrh_128B
+def int_hexagon_V6_vasrh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslh,VI_ftype_VISI,2)
+// tag : V6_vaslh
+def int_hexagon_V6_vaslh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslh_128B,VI_ftype_VISI,2)
+// tag : V6_vaslh_128B
+def int_hexagon_V6_vaslh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrh,VI_ftype_VISI,2)
+// tag : V6_vlsrh
+def int_hexagon_V6_vlsrh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrh_128B,VI_ftype_VISI,2)
+// tag : V6_vlsrh_128B
+def int_hexagon_V6_vlsrh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhv,VI_ftype_VIVI,2)
+// tag : V6_vasrhv
+def int_hexagon_V6_vasrhv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhv_128B,VI_ftype_VIVI,2)
+// tag : V6_vasrhv_128B
+def int_hexagon_V6_vasrhv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslhv,VI_ftype_VIVI,2)
+// tag : V6_vaslhv
+def int_hexagon_V6_vaslhv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslhv_128B,VI_ftype_VIVI,2)
+// tag : V6_vaslhv_128B
+def int_hexagon_V6_vaslhv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrhv,VI_ftype_VIVI,2)
+// tag : V6_vlsrhv
+def int_hexagon_V6_vlsrhv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrhv_128B,VI_ftype_VIVI,2)
+// tag : V6_vlsrhv_128B
+def int_hexagon_V6_vlsrhv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwh,VI_ftype_VIVISI,3)
+// tag : V6_vasrwh
+def int_hexagon_V6_vasrwh :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwh_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwh_128B
+def int_hexagon_V6_vasrwh_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhsat
+def int_hexagon_V6_vasrwhsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhsat_128B
+def int_hexagon_V6_vasrwhsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhrndsat
+def int_hexagon_V6_vasrwhrndsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhrndsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhrndsat_128B
+def int_hexagon_V6_vasrwhrndsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrwuhsat
+def int_hexagon_V6_vasrwuhsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwuhsat_128B
+def int_hexagon_V6_vasrwuhsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwh,VI_ftype_VIVI,2)
+// tag : V6_vroundwh
+def int_hexagon_V6_vroundwh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwh_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundwh_128B
+def int_hexagon_V6_vroundwh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwuh,VI_ftype_VIVI,2)
+// tag : V6_vroundwuh
+def int_hexagon_V6_vroundwuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundwuh_128B
+def int_hexagon_V6_vroundwuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubsat
+def int_hexagon_V6_vasrhubsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubsat_128B
+def int_hexagon_V6_vasrhubsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubrndsat
+def int_hexagon_V6_vasrhubrndsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubrndsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubrndsat_128B
+def int_hexagon_V6_vasrhubrndsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrhbrndsat
+def int_hexagon_V6_vasrhbrndsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbrndsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrhbrndsat_128B
+def int_hexagon_V6_vasrhbrndsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhb,VI_ftype_VIVI,2)
+// tag : V6_vroundhb
+def int_hexagon_V6_vroundhb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhb_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundhb_128B
+def int_hexagon_V6_vroundhb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhub,VI_ftype_VIVI,2)
+// tag : V6_vroundhub
+def int_hexagon_V6_vroundhub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhub_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundhub_128B
+def int_hexagon_V6_vroundhub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw_acc,VI_ftype_VIVISI,3)
+// tag : V6_vaslw_acc
+def int_hexagon_V6_vaslw_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslw_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vaslw_acc_128B
+def int_hexagon_V6_vaslw_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw_acc,VI_ftype_VIVISI,3)
+// tag : V6_vasrw_acc
+def int_hexagon_V6_vasrw_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrw_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrw_acc_128B
+def int_hexagon_V6_vasrw_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb,VI_ftype_VIVI,2)
+// tag : V6_vaddb
+def int_hexagon_V6_vaddb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddb_128B
+def int_hexagon_V6_vaddb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb,VI_ftype_VIVI,2)
+// tag : V6_vsubb
+def int_hexagon_V6_vsubb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubb_128B
+def int_hexagon_V6_vsubb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddb_dv
+def int_hexagon_V6_vaddb_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddb_dv_128B
+def int_hexagon_V6_vaddb_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubb_dv
+def int_hexagon_V6_vsubb_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubb_dv_128B
+def int_hexagon_V6_vsubb_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh,VI_ftype_VIVI,2)
+// tag : V6_vaddh
+def int_hexagon_V6_vaddh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddh_128B
+def int_hexagon_V6_vaddh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh,VI_ftype_VIVI,2)
+// tag : V6_vsubh
+def int_hexagon_V6_vsubh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubh_128B
+def int_hexagon_V6_vsubh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddh_dv
+def int_hexagon_V6_vaddh_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddh_dv_128B
+def int_hexagon_V6_vaddh_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubh_dv
+def int_hexagon_V6_vsubh_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubh_dv_128B
+def int_hexagon_V6_vsubh_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw,VI_ftype_VIVI,2)
+// tag : V6_vaddw
+def int_hexagon_V6_vaddw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddw_128B
+def int_hexagon_V6_vaddw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw,VI_ftype_VIVI,2)
+// tag : V6_vsubw
+def int_hexagon_V6_vsubw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubw_128B
+def int_hexagon_V6_vsubw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddw_dv
+def int_hexagon_V6_vaddw_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddw_dv_128B
+def int_hexagon_V6_vaddw_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubw_dv
+def int_hexagon_V6_vsubw_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubw_dv_128B
+def int_hexagon_V6_vsubw_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat,VI_ftype_VIVI,2)
+// tag : V6_vaddubsat
+def int_hexagon_V6_vaddubsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddubsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddubsat_128B
+def int_hexagon_V6_vaddubsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddubsat_dv
+def int_hexagon_V6_vaddubsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddubsat_dv_128B
+def int_hexagon_V6_vaddubsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat,VI_ftype_VIVI,2)
+// tag : V6_vsububsat
+def int_hexagon_V6_vsububsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsububsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsububsat_128B
+def int_hexagon_V6_vsububsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsububsat_dv
+def int_hexagon_V6_vsububsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsububsat_dv_128B
+def int_hexagon_V6_vsububsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat,VI_ftype_VIVI,2)
+// tag : V6_vadduhsat
+def int_hexagon_V6_vadduhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vadduhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vadduhsat_128B
+def int_hexagon_V6_vadduhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vadduhsat_dv
+def int_hexagon_V6_vadduhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vadduhsat_dv_128B
+def int_hexagon_V6_vadduhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat,VI_ftype_VIVI,2)
+// tag : V6_vsubuhsat
+def int_hexagon_V6_vsubuhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubuhsat_128B
+def int_hexagon_V6_vsubuhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubuhsat_dv
+def int_hexagon_V6_vsubuhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubuhsat_dv_128B
+def int_hexagon_V6_vsubuhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat,VI_ftype_VIVI,2)
+// tag : V6_vaddhsat
+def int_hexagon_V6_vaddhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddhsat_128B
+def int_hexagon_V6_vaddhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddhsat_dv
+def int_hexagon_V6_vaddhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddhsat_dv_128B
+def int_hexagon_V6_vaddhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat,VI_ftype_VIVI,2)
+// tag : V6_vsubhsat
+def int_hexagon_V6_vsubhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubhsat_128B
+def int_hexagon_V6_vsubhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubhsat_dv
+def int_hexagon_V6_vsubhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubhsat_dv_128B
+def int_hexagon_V6_vsubhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat,VI_ftype_VIVI,2)
+// tag : V6_vaddwsat
+def int_hexagon_V6_vaddwsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddwsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddwsat_128B
+def int_hexagon_V6_vaddwsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddwsat_dv
+def int_hexagon_V6_vaddwsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddwsat_dv_128B
+def int_hexagon_V6_vaddwsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat,VI_ftype_VIVI,2)
+// tag : V6_vsubwsat
+def int_hexagon_V6_vsubwsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubwsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubwsat_128B
+def int_hexagon_V6_vsubwsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubwsat_dv
+def int_hexagon_V6_vsubwsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubwsat_dv_128B
+def int_hexagon_V6_vsubwsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgub,VI_ftype_VIVI,2)
+// tag : V6_vavgub
+def int_hexagon_V6_vavgub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgub_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgub_128B
+def int_hexagon_V6_vavgub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgubrnd,VI_ftype_VIVI,2)
+// tag : V6_vavgubrnd
+def int_hexagon_V6_vavgubrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgubrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgubrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgubrnd_128B
+def int_hexagon_V6_vavgubrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguh,VI_ftype_VIVI,2)
+// tag : V6_vavguh
+def int_hexagon_V6_vavguh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguh_128B,VI_ftype_VIVI,2)
+// tag : V6_vavguh_128B
+def int_hexagon_V6_vavguh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguhrnd,VI_ftype_VIVI,2)
+// tag : V6_vavguhrnd
+def int_hexagon_V6_vavguhrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguhrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguhrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavguhrnd_128B
+def int_hexagon_V6_vavguhrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgh,VI_ftype_VIVI,2)
+// tag : V6_vavgh
+def int_hexagon_V6_vavgh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgh_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgh_128B
+def int_hexagon_V6_vavgh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavghrnd,VI_ftype_VIVI,2)
+// tag : V6_vavghrnd
+def int_hexagon_V6_vavghrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavghrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavghrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavghrnd_128B
+def int_hexagon_V6_vavghrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavghrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgh,VI_ftype_VIVI,2)
+// tag : V6_vnavgh
+def int_hexagon_V6_vnavgh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgh_128B,VI_ftype_VIVI,2)
+// tag : V6_vnavgh_128B
+def int_hexagon_V6_vnavgh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgw,VI_ftype_VIVI,2)
+// tag : V6_vavgw
+def int_hexagon_V6_vavgw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgw_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgw_128B
+def int_hexagon_V6_vavgw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgwrnd,VI_ftype_VIVI,2)
+// tag : V6_vavgwrnd
+def int_hexagon_V6_vavgwrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgwrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgwrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgwrnd_128B
+def int_hexagon_V6_vavgwrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgw,VI_ftype_VIVI,2)
+// tag : V6_vnavgw
+def int_hexagon_V6_vnavgw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgw_128B,VI_ftype_VIVI,2)
+// tag : V6_vnavgw_128B
+def int_hexagon_V6_vnavgw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffub,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffub
+def int_hexagon_V6_vabsdiffub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffub_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffub_128B
+def int_hexagon_V6_vabsdiffub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffuh
+def int_hexagon_V6_vabsdiffuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffuh_128B
+def int_hexagon_V6_vabsdiffuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffh,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffh
+def int_hexagon_V6_vabsdiffh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffh_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffh_128B
+def int_hexagon_V6_vabsdiffh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffw,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffw
+def int_hexagon_V6_vabsdiffw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffw_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffw_128B
+def int_hexagon_V6_vabsdiffw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgub,VI_ftype_VIVI,2)
+// tag : V6_vnavgub
+def int_hexagon_V6_vnavgub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgub_128B,VI_ftype_VIVI,2)
+// tag : V6_vnavgub_128B
+def int_hexagon_V6_vnavgub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubh,VD_ftype_VIVI,2)
+// tag : V6_vaddubh
+def int_hexagon_V6_vaddubh :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubh_128B,VD_ftype_VIVI,2)
+// tag : V6_vaddubh_128B
+def int_hexagon_V6_vaddubh_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububh,VD_ftype_VIVI,2)
+// tag : V6_vsububh
+def int_hexagon_V6_vsububh :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsububh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububh_128B,VD_ftype_VIVI,2)
+// tag : V6_vsububh_128B
+def int_hexagon_V6_vsububh_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsububh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhw,VD_ftype_VIVI,2)
+// tag : V6_vaddhw
+def int_hexagon_V6_vaddhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vaddhw_128B
+def int_hexagon_V6_vaddhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhw,VD_ftype_VIVI,2)
+// tag : V6_vsubhw
+def int_hexagon_V6_vsubhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vsubhw_128B
+def int_hexagon_V6_vsubhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhw,VD_ftype_VIVI,2)
+// tag : V6_vadduhw
+def int_hexagon_V6_vadduhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vadduhw_128B
+def int_hexagon_V6_vadduhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhw,VD_ftype_VIVI,2)
+// tag : V6_vsubuhw
+def int_hexagon_V6_vsubuhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubuhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vsubuhw_128B
+def int_hexagon_V6_vsubuhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vd0,VI_ftype_,0)
+// tag : V6_vd0
+def int_hexagon_V6_vd0 :
+Hexagon_v512_Intrinsic<"HEXAGON_V6_vd0">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vd0_128B,VI_ftype_,0)
+// tag : V6_vd0_128B
+def int_hexagon_V6_vd0_128B :
+Hexagon_v1024_Intrinsic<"HEXAGON_V6_vd0_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbq
+def int_hexagon_V6_vaddbq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbq_128B
+def int_hexagon_V6_vaddbq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
+
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbq
+def int_hexagon_V6_vsubbq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbq_128B
+def int_hexagon_V6_vsubbq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbnq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbnq
+def int_hexagon_V6_vaddbnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbnq_128B
+def int_hexagon_V6_vaddbnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbnq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbnq
+def int_hexagon_V6_vsubbnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbnq_128B
+def int_hexagon_V6_vsubbnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhq
+def int_hexagon_V6_vaddhq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhq_128B
+def int_hexagon_V6_vaddhq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhq
+def int_hexagon_V6_vsubhq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhq_128B
+def int_hexagon_V6_vsubhq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhnq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhnq
+def int_hexagon_V6_vaddhnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhnq_128B
+def int_hexagon_V6_vaddhnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhnq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhnq
+def int_hexagon_V6_vsubhnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhnq_128B
+def int_hexagon_V6_vsubhnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwq
+def int_hexagon_V6_vaddwq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwq_128B
+def int_hexagon_V6_vaddwq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwq
+def int_hexagon_V6_vsubwq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwq_128B
+def int_hexagon_V6_vsubwq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwnq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwnq
+def int_hexagon_V6_vaddwnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwnq_128B
+def int_hexagon_V6_vaddwnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwnq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwnq
+def int_hexagon_V6_vsubwnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwnq_128B
+def int_hexagon_V6_vsubwnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh,VI_ftype_VI,1)
+// tag : V6_vabsh
+def int_hexagon_V6_vabsh :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh_128B,VI_ftype_VI,1)
+// tag : V6_vabsh_128B
+def int_hexagon_V6_vabsh_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh_sat,VI_ftype_VI,1)
+// tag : V6_vabsh_sat
+def int_hexagon_V6_vabsh_sat :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh_sat_128B,VI_ftype_VI,1)
+// tag : V6_vabsh_sat_128B
+def int_hexagon_V6_vabsh_sat_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw,VI_ftype_VI,1)
+// tag : V6_vabsw
+def int_hexagon_V6_vabsw :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw_128B,VI_ftype_VI,1)
+// tag : V6_vabsw_128B
+def int_hexagon_V6_vabsw_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw_sat,VI_ftype_VI,1)
+// tag : V6_vabsw_sat
+def int_hexagon_V6_vabsw_sat :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw_sat_128B,VI_ftype_VI,1)
+// tag : V6_vabsw_sat_128B
+def int_hexagon_V6_vabsw_sat_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv,VD_ftype_VIVI,2)
+// tag : V6_vmpybv
+def int_hexagon_V6_vmpybv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpybv_128B
+def int_hexagon_V6_vmpybv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybv_acc
+def int_hexagon_V6_vmpybv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybv_acc_128B
+def int_hexagon_V6_vmpybv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv,VD_ftype_VIVI,2)
+// tag : V6_vmpyubv
+def int_hexagon_V6_vmpyubv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyubv_128B
+def int_hexagon_V6_vmpyubv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyubv_acc
+def int_hexagon_V6_vmpyubv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyubv_acc_128B
+def int_hexagon_V6_vmpyubv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv,VD_ftype_VIVI,2)
+// tag : V6_vmpybusv
+def int_hexagon_V6_vmpybusv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpybusv_128B
+def int_hexagon_V6_vmpybusv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybusv_acc
+def int_hexagon_V6_vmpybusv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybusv_acc_128B
+def int_hexagon_V6_vmpybusv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabusv,VD_ftype_VDVD,2)
+// tag : V6_vmpabusv
+def int_hexagon_V6_vmpabusv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabusv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabusv_128B,VD_ftype_VDVD,2)
+// tag : V6_vmpabusv_128B
+def int_hexagon_V6_vmpabusv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabusv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabuuv,VD_ftype_VDVD,2)
+// tag : V6_vmpabuuv
+def int_hexagon_V6_vmpabuuv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabuuv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabuuv_128B,VD_ftype_VDVD,2)
+// tag : V6_vmpabuuv_128B
+def int_hexagon_V6_vmpabuuv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv,VD_ftype_VIVI,2)
+// tag : V6_vmpyhv
+def int_hexagon_V6_vmpyhv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyhv_128B
+def int_hexagon_V6_vmpyhv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhv_acc
+def int_hexagon_V6_vmpyhv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhv_acc_128B
+def int_hexagon_V6_vmpyhv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv,VD_ftype_VIVI,2)
+// tag : V6_vmpyuhv
+def int_hexagon_V6_vmpyuhv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyuhv_128B
+def int_hexagon_V6_vmpyuhv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyuhv_acc
+def int_hexagon_V6_vmpyuhv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyuhv_acc_128B
+def int_hexagon_V6_vmpyuhv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs,VI_ftype_VIVI,2)
+// tag : V6_vmpyhvsrs
+def int_hexagon_V6_vmpyhvsrs :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyhvsrs">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyhvsrs_128B
+def int_hexagon_V6_vmpyhvsrs_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus,VD_ftype_VIVI,2)
+// tag : V6_vmpyhus
+def int_hexagon_V6_vmpyhus :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyhus_128B
+def int_hexagon_V6_vmpyhus_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhus_acc
+def int_hexagon_V6_vmpyhus_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhus_acc_128B
+def int_hexagon_V6_vmpyhus_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih,VI_ftype_VIVI,2)
+// tag : V6_vmpyih
+def int_hexagon_V6_vmpyih :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyih_128B
+def int_hexagon_V6_vmpyih_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyih_acc
+def int_hexagon_V6_vmpyih_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyih_acc_128B
+def int_hexagon_V6_vmpyih_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyewuh,VI_ftype_VIVI,2)
+// tag : V6_vmpyewuh
+def int_hexagon_V6_vmpyewuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyewuh_128B
+def int_hexagon_V6_vmpyewuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh
+def int_hexagon_V6_vmpyowh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh_128B
+def int_hexagon_V6_vmpyowh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh_rnd
+def int_hexagon_V6_vmpyowh_rnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh_rnd_128B
+def int_hexagon_V6_vmpyowh_rnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_sacc
+def int_hexagon_V6_vmpyowh_sacc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_sacc_128B
+def int_hexagon_V6_vmpyowh_sacc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_rnd_sacc
+def int_hexagon_V6_vmpyowh_rnd_sacc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_rnd_sacc_128B
+def int_hexagon_V6_vmpyowh_rnd_sacc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyieoh,VI_ftype_VIVI,2)
+// tag : V6_vmpyieoh
+def int_hexagon_V6_vmpyieoh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyieoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyieoh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyieoh_128B
+def int_hexagon_V6_vmpyieoh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh,VI_ftype_VIVI,2)
+// tag : V6_vmpyiewuh
+def int_hexagon_V6_vmpyiewuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyiewuh_128B
+def int_hexagon_V6_vmpyiewuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiowh,VI_ftype_VIVI,2)
+// tag : V6_vmpyiowh
+def int_hexagon_V6_vmpyiowh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiowh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiowh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyiowh_128B
+def int_hexagon_V6_vmpyiowh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewh_acc
+def int_hexagon_V6_vmpyiewh_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewh_acc_128B
+def int_hexagon_V6_vmpyiewh_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewuh_acc
+def int_hexagon_V6_vmpyiewuh_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewuh_acc_128B
+def int_hexagon_V6_vmpyiewuh_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub,VD_ftype_VISI,2)
+// tag : V6_vmpyub
+def int_hexagon_V6_vmpyub :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub_128B,VD_ftype_VISI,2)
+// tag : V6_vmpyub_128B
+def int_hexagon_V6_vmpyub_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpyub_acc
+def int_hexagon_V6_vmpyub_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpyub_acc_128B
+def int_hexagon_V6_vmpyub_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus,VD_ftype_VISI,2)
+// tag : V6_vmpybus
+def int_hexagon_V6_vmpybus :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus_128B,VD_ftype_VISI,2)
+// tag : V6_vmpybus_128B
+def int_hexagon_V6_vmpybus_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpybus_acc
+def int_hexagon_V6_vmpybus_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpybus_acc_128B
+def int_hexagon_V6_vmpybus_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus,VD_ftype_VDSI,2)
+// tag : V6_vmpabus
+def int_hexagon_V6_vmpabus :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus_128B,VD_ftype_VDSI,2)
+// tag : V6_vmpabus_128B
+def int_hexagon_V6_vmpabus_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vmpabus_acc
+def int_hexagon_V6_vmpabus_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vmpabus_acc_128B
+def int_hexagon_V6_vmpabus_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb,VD_ftype_VDSI,2)
+// tag : V6_vmpahb
+def int_hexagon_V6_vmpahb :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb_128B,VD_ftype_VDSI,2)
+// tag : V6_vmpahb_128B
+def int_hexagon_V6_vmpahb_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vmpahb_acc
+def int_hexagon_V6_vmpahb_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vmpahb_acc_128B
+def int_hexagon_V6_vmpahb_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyh,VD_ftype_VISI,2)
+// tag : V6_vmpyh
+def int_hexagon_V6_vmpyh :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyh_128B,VD_ftype_VISI,2)
+// tag : V6_vmpyh_128B
+def int_hexagon_V6_vmpyh_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpyhsat_acc
+def int_hexagon_V6_vmpyhsat_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpyhsat_acc_128B
+def int_hexagon_V6_vmpyhsat_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhss,VI_ftype_VISI,2)
+// tag : V6_vmpyhss
+def int_hexagon_V6_vmpyhss :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhss">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhss_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyhss_128B
+def int_hexagon_V6_vmpyhss_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhss_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs,VI_ftype_VISI,2)
+// tag : V6_vmpyhsrs
+def int_hexagon_V6_vmpyhsrs :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhsrs">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyhsrs_128B
+def int_hexagon_V6_vmpyhsrs_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh,VD_ftype_VISI,2)
+// tag : V6_vmpyuh
+def int_hexagon_V6_vmpyuh :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh_128B,VD_ftype_VISI,2)
+// tag : V6_vmpyuh_128B
+def int_hexagon_V6_vmpyuh_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpyuh_acc
+def int_hexagon_V6_vmpyuh_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpyuh_acc_128B
+def int_hexagon_V6_vmpyuh_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb,VI_ftype_VISI,2)
+// tag : V6_vmpyihb
+def int_hexagon_V6_vmpyihb :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyihb_128B
+def int_hexagon_V6_vmpyihb_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc,VI_ftype_VIVISI,3)
+// tag : V6_vmpyihb_acc
+def int_hexagon_V6_vmpyihb_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vmpyihb_acc_128B
+def int_hexagon_V6_vmpyihb_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb,VI_ftype_VISI,2)
+// tag : V6_vmpyiwb
+def int_hexagon_V6_vmpyiwb :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyiwb_128B
+def int_hexagon_V6_vmpyiwb_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwb_acc
+def int_hexagon_V6_vmpyiwb_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwb_acc_128B
+def int_hexagon_V6_vmpyiwb_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh,VI_ftype_VISI,2)
+// tag : V6_vmpyiwh
+def int_hexagon_V6_vmpyiwh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyiwh_128B
+def int_hexagon_V6_vmpyiwh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwh_acc
+def int_hexagon_V6_vmpyiwh_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwh_acc_128B
+def int_hexagon_V6_vmpyiwh_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vand,VI_ftype_VIVI,2)
+// tag : V6_vand
+def int_hexagon_V6_vand :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vand">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vand_128B,VI_ftype_VIVI,2)
+// tag : V6_vand_128B
+def int_hexagon_V6_vand_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vand_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vor,VI_ftype_VIVI,2)
+// tag : V6_vor
+def int_hexagon_V6_vor :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vor_128B,VI_ftype_VIVI,2)
+// tag : V6_vor_128B
+def int_hexagon_V6_vor_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vxor,VI_ftype_VIVI,2)
+// tag : V6_vxor
+def int_hexagon_V6_vxor :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vxor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vxor_128B,VI_ftype_VIVI,2)
+// tag : V6_vxor_128B
+def int_hexagon_V6_vxor_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vxor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnot,VI_ftype_VI,1)
+// tag : V6_vnot
+def int_hexagon_V6_vnot :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnot">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnot_128B,VI_ftype_VI,1)
+// tag : V6_vnot_128B
+def int_hexagon_V6_vnot_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnot_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt,VI_ftype_QVSI,2)
+// tag : V6_vandqrt
+def int_hexagon_V6_vandqrt :
+Hexagon_v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt_128B,VI_ftype_QVSI,2)
+// tag : V6_vandqrt_128B
+def int_hexagon_V6_vandqrt_128B :
+Hexagon_v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc,VI_ftype_VIQVSI,3)
+// tag : V6_vandqrt_acc
+def int_hexagon_V6_vandqrt_acc :
+Hexagon_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc_128B,VI_ftype_VIQVSI,3)
+// tag : V6_vandqrt_acc_128B
+def int_hexagon_V6_vandqrt_acc_128B :
+Hexagon_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt,QV_ftype_VISI,2)
+// tag : V6_vandvrt
+def int_hexagon_V6_vandvrt :
+Hexagon_v64iv512i_Intrinsic<"HEXAGON_V6_vandvrt">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt_128B,QV_ftype_VISI,2)
+// tag : V6_vandvrt_128B
+def int_hexagon_V6_vandvrt_128B :
+Hexagon_v128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc,QV_ftype_QVVISI,3)
+// tag : V6_vandvrt_acc
+def int_hexagon_V6_vandvrt_acc :
+Hexagon_v64iv64iv512i_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc_128B,QV_ftype_QVVISI,3)
+// tag : V6_vandvrt_acc_128B
+def int_hexagon_V6_vandvrt_acc_128B :
+Hexagon_v128iv128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw,QV_ftype_VIVI,2)
+// tag : V6_vgtw
+def int_hexagon_V6_vgtw :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtw_128B
+def int_hexagon_V6_vgtw_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_and
+def int_hexagon_V6_vgtw_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_and_128B
+def int_hexagon_V6_vgtw_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_or
+def int_hexagon_V6_vgtw_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_or_128B
+def int_hexagon_V6_vgtw_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_xor
+def int_hexagon_V6_vgtw_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_xor_128B
+def int_hexagon_V6_vgtw_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw,QV_ftype_VIVI,2)
+// tag : V6_veqw
+def int_hexagon_V6_veqw :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_128B,QV_ftype_VIVI,2)
+// tag : V6_veqw_128B
+def int_hexagon_V6_veqw_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_and,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_and
+def int_hexagon_V6_veqw_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_and_128B
+def int_hexagon_V6_veqw_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_or,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_or
+def int_hexagon_V6_veqw_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_or_128B
+def int_hexagon_V6_veqw_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_xor,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_xor
+def int_hexagon_V6_veqw_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_xor_128B
+def int_hexagon_V6_veqw_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth,QV_ftype_VIVI,2)
+// tag : V6_vgth
+def int_hexagon_V6_vgth :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgth">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_128B,QV_ftype_VIVI,2)
+// tag : V6_vgth_128B
+def int_hexagon_V6_vgth_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_and
+def int_hexagon_V6_vgth_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_and_128B
+def int_hexagon_V6_vgth_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_or
+def int_hexagon_V6_vgth_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_or_128B
+def int_hexagon_V6_vgth_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_xor
+def int_hexagon_V6_vgth_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_xor_128B
+def int_hexagon_V6_vgth_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh,QV_ftype_VIVI,2)
+// tag : V6_veqh
+def int_hexagon_V6_veqh :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_128B,QV_ftype_VIVI,2)
+// tag : V6_veqh_128B
+def int_hexagon_V6_veqh_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_and,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_and
+def int_hexagon_V6_veqh_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_and_128B
+def int_hexagon_V6_veqh_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_or,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_or
+def int_hexagon_V6_veqh_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_or_128B
+def int_hexagon_V6_veqh_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_xor,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_xor
+def int_hexagon_V6_veqh_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_xor_128B
+def int_hexagon_V6_veqh_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb,QV_ftype_VIVI,2)
+// tag : V6_vgtb
+def int_hexagon_V6_vgtb :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtb_128B
+def int_hexagon_V6_vgtb_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_and
+def int_hexagon_V6_vgtb_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_and_128B
+def int_hexagon_V6_vgtb_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_or
+def int_hexagon_V6_vgtb_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_or_128B
+def int_hexagon_V6_vgtb_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_xor
+def int_hexagon_V6_vgtb_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_xor_128B
+def int_hexagon_V6_vgtb_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb,QV_ftype_VIVI,2)
+// tag : V6_veqb
+def int_hexagon_V6_veqb :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_128B,QV_ftype_VIVI,2)
+// tag : V6_veqb_128B
+def int_hexagon_V6_veqb_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_and,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_and
+def int_hexagon_V6_veqb_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_and_128B
+def int_hexagon_V6_veqb_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_or,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_or
+def int_hexagon_V6_veqb_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_or_128B
+def int_hexagon_V6_veqb_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_xor,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_xor
+def int_hexagon_V6_veqb_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_xor_128B
+def int_hexagon_V6_veqb_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw,QV_ftype_VIVI,2)
+// tag : V6_vgtuw
+def int_hexagon_V6_vgtuw :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtuw_128B
+def int_hexagon_V6_vgtuw_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_and
+def int_hexagon_V6_vgtuw_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_and_128B
+def int_hexagon_V6_vgtuw_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_or
+def int_hexagon_V6_vgtuw_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_or_128B
+def int_hexagon_V6_vgtuw_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_xor
+def int_hexagon_V6_vgtuw_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_xor_128B
+def int_hexagon_V6_vgtuw_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh,QV_ftype_VIVI,2)
+// tag : V6_vgtuh
+def int_hexagon_V6_vgtuh :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtuh_128B
+def int_hexagon_V6_vgtuh_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_and
+def int_hexagon_V6_vgtuh_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_and_128B
+def int_hexagon_V6_vgtuh_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_or
+def int_hexagon_V6_vgtuh_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_or_128B
+def int_hexagon_V6_vgtuh_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_xor
+def int_hexagon_V6_vgtuh_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_xor_128B
+def int_hexagon_V6_vgtuh_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub,QV_ftype_VIVI,2)
+// tag : V6_vgtub
+def int_hexagon_V6_vgtub :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtub_128B
+def int_hexagon_V6_vgtub_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_and
+def int_hexagon_V6_vgtub_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_and_128B
+def int_hexagon_V6_vgtub_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_or
+def int_hexagon_V6_vgtub_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_or_128B
+def int_hexagon_V6_vgtub_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_xor
+def int_hexagon_V6_vgtub_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_xor_128B
+def int_hexagon_V6_vgtub_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or,QV_ftype_QVQV,2)
+// tag : V6_pred_or
+def int_hexagon_V6_pred_or :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_or_128B
+def int_hexagon_V6_pred_or_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and,QV_ftype_QVQV,2)
+// tag : V6_pred_and
+def int_hexagon_V6_pred_and :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_and_128B
+def int_hexagon_V6_pred_and_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_not,QV_ftype_QV,1)
+// tag : V6_pred_not
+def int_hexagon_V6_pred_not :
+Hexagon_v64iv64i_Intrinsic<"HEXAGON_V6_pred_not">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_not_128B,QV_ftype_QV,1)
+// tag : V6_pred_not_128B
+def int_hexagon_V6_pred_not_128B :
+Hexagon_v128iv128i_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_xor,QV_ftype_QVQV,2)
+// tag : V6_pred_xor
+def int_hexagon_V6_pred_xor :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_xor_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_xor_128B
+def int_hexagon_V6_pred_xor_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and_n,QV_ftype_QVQV,2)
+// tag : V6_pred_and_n
+def int_hexagon_V6_pred_and_n :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and_n">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and_n_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_and_n_128B
+def int_hexagon_V6_pred_and_n_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or_n,QV_ftype_QVQV,2)
+// tag : V6_pred_or_n
+def int_hexagon_V6_pred_or_n :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or_n">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or_n_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_or_n_128B
+def int_hexagon_V6_pred_or_n_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_scalar2,QV_ftype_SI,1)
+// tag : V6_pred_scalar2
+def int_hexagon_V6_pred_scalar2 :
+Hexagon_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_scalar2_128B,QV_ftype_SI,1)
+// tag : V6_pred_scalar2_128B
+def int_hexagon_V6_pred_scalar2_128B :
+Hexagon_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmux,VI_ftype_QVVIVI,3)
+// tag : V6_vmux
+def int_hexagon_V6_vmux :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vmux">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmux_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vmux_128B
+def int_hexagon_V6_vmux_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vmux_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vswap,VD_ftype_QVVIVI,3)
+// tag : V6_vswap
+def int_hexagon_V6_vswap :
+Hexagon_v1024v64iv512v512_Intrinsic<"HEXAGON_V6_vswap">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vswap_128B,VD_ftype_QVVIVI,3)
+// tag : V6_vswap_128B
+def int_hexagon_V6_vswap_128B :
+Hexagon_v2048v128iv1024v1024_Intrinsic<"HEXAGON_V6_vswap_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxub,VI_ftype_VIVI,2)
+// tag : V6_vmaxub
+def int_hexagon_V6_vmaxub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxub_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxub_128B
+def int_hexagon_V6_vmaxub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminub,VI_ftype_VIVI,2)
+// tag : V6_vminub
+def int_hexagon_V6_vminub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminub_128B,VI_ftype_VIVI,2)
+// tag : V6_vminub_128B
+def int_hexagon_V6_vminub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxuh,VI_ftype_VIVI,2)
+// tag : V6_vmaxuh
+def int_hexagon_V6_vmaxuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxuh_128B
+def int_hexagon_V6_vmaxuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminuh,VI_ftype_VIVI,2)
+// tag : V6_vminuh
+def int_hexagon_V6_vminuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vminuh_128B
+def int_hexagon_V6_vminuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxh,VI_ftype_VIVI,2)
+// tag : V6_vmaxh
+def int_hexagon_V6_vmaxh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxh_128B
+def int_hexagon_V6_vmaxh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminh,VI_ftype_VIVI,2)
+// tag : V6_vminh
+def int_hexagon_V6_vminh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminh_128B,VI_ftype_VIVI,2)
+// tag : V6_vminh_128B
+def int_hexagon_V6_vminh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxw,VI_ftype_VIVI,2)
+// tag : V6_vmaxw
+def int_hexagon_V6_vmaxw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxw_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxw_128B
+def int_hexagon_V6_vmaxw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminw,VI_ftype_VIVI,2)
+// tag : V6_vminw
+def int_hexagon_V6_vminw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminw_128B,VI_ftype_VIVI,2)
+// tag : V6_vminw_128B
+def int_hexagon_V6_vminw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsathub,VI_ftype_VIVI,2)
+// tag : V6_vsathub
+def int_hexagon_V6_vsathub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsathub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsathub_128B,VI_ftype_VIVI,2)
+// tag : V6_vsathub_128B
+def int_hexagon_V6_vsathub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsathub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsatwh,VI_ftype_VIVI,2)
+// tag : V6_vsatwh
+def int_hexagon_V6_vsatwh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsatwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsatwh_128B,VI_ftype_VIVI,2)
+// tag : V6_vsatwh_128B
+def int_hexagon_V6_vsatwh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffeb,VI_ftype_VIVI,2)
+// tag : V6_vshuffeb
+def int_hexagon_V6_vshuffeb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffeb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffeb_128B,VI_ftype_VIVI,2)
+// tag : V6_vshuffeb_128B
+def int_hexagon_V6_vshuffeb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffeb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffob,VI_ftype_VIVI,2)
+// tag : V6_vshuffob
+def int_hexagon_V6_vshuffob :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffob">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffob_128B,VI_ftype_VIVI,2)
+// tag : V6_vshuffob_128B
+def int_hexagon_V6_vshuffob_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffob_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufeh,VI_ftype_VIVI,2)
+// tag : V6_vshufeh
+def int_hexagon_V6_vshufeh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufeh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufeh_128B,VI_ftype_VIVI,2)
+// tag : V6_vshufeh_128B
+def int_hexagon_V6_vshufeh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufeh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoh,VI_ftype_VIVI,2)
+// tag : V6_vshufoh
+def int_hexagon_V6_vshufoh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoh_128B,VI_ftype_VIVI,2)
+// tag : V6_vshufoh_128B
+def int_hexagon_V6_vshufoh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffvdd,VD_ftype_VIVISI,3)
+// tag : V6_vshuffvdd
+def int_hexagon_V6_vshuffvdd :
+Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vshuffvdd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffvdd_128B,VD_ftype_VIVISI,3)
+// tag : V6_vshuffvdd_128B
+def int_hexagon_V6_vshuffvdd_128B :
+Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealvdd,VD_ftype_VIVISI,3)
+// tag : V6_vdealvdd
+def int_hexagon_V6_vdealvdd :
+Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vdealvdd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealvdd_128B,VD_ftype_VIVISI,3)
+// tag : V6_vdealvdd_128B
+def int_hexagon_V6_vdealvdd_128B :
+Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vdealvdd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeh,VD_ftype_VIVI,2)
+// tag : V6_vshufoeh
+def int_hexagon_V6_vshufoeh :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeh_128B,VD_ftype_VIVI,2)
+// tag : V6_vshufoeh_128B
+def int_hexagon_V6_vshufoeh_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeb,VD_ftype_VIVI,2)
+// tag : V6_vshufoeb
+def int_hexagon_V6_vshufoeb :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeb_128B,VD_ftype_VIVI,2)
+// tag : V6_vshufoeb_128B
+def int_hexagon_V6_vshufoeb_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealh,VI_ftype_VI,1)
+// tag : V6_vdealh
+def int_hexagon_V6_vdealh :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealh_128B,VI_ftype_VI,1)
+// tag : V6_vdealh_128B
+def int_hexagon_V6_vdealh_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb,VI_ftype_VI,1)
+// tag : V6_vdealb
+def int_hexagon_V6_vdealb :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb_128B,VI_ftype_VI,1)
+// tag : V6_vdealb_128B
+def int_hexagon_V6_vdealb_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb4w,VI_ftype_VIVI,2)
+// tag : V6_vdealb4w
+def int_hexagon_V6_vdealb4w :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdealb4w">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb4w_128B,VI_ftype_VIVI,2)
+// tag : V6_vdealb4w_128B
+def int_hexagon_V6_vdealb4w_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdealb4w_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffh,VI_ftype_VI,1)
+// tag : V6_vshuffh
+def int_hexagon_V6_vshuffh :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffh_128B,VI_ftype_VI,1)
+// tag : V6_vshuffh_128B
+def int_hexagon_V6_vshuffh_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffb,VI_ftype_VI,1)
+// tag : V6_vshuffb
+def int_hexagon_V6_vshuffb :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffb_128B,VI_ftype_VI,1)
+// tag : V6_vshuffb_128B
+def int_hexagon_V6_vshuffb_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_extractw,SI_ftype_VISI,2)
+// tag : V6_extractw
+def int_hexagon_V6_extractw :
+Hexagon_iv512i_Intrinsic<"HEXAGON_V6_extractw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_extractw_128B,SI_ftype_VISI,2)
+// tag : V6_extractw_128B
+def int_hexagon_V6_extractw_128B :
+Hexagon_iv1024i_Intrinsic<"HEXAGON_V6_extractw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vinsertwr,VI_ftype_VISI,2)
+// tag : V6_vinsertwr
+def int_hexagon_V6_vinsertwr :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vinsertwr">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vinsertwr_128B,VI_ftype_VISI,2)
+// tag : V6_vinsertwr_128B
+def int_hexagon_V6_vinsertwr_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vinsertwr_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_lvsplatw,VI_ftype_SI,1)
+// tag : V6_lvsplatw
+def int_hexagon_V6_lvsplatw :
+Hexagon_v512i_Intrinsic<"HEXAGON_V6_lvsplatw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_lvsplatw_128B,VI_ftype_SI,1)
+// tag : V6_lvsplatw_128B
+def int_hexagon_V6_lvsplatw_128B :
+Hexagon_v1024i_Intrinsic<"HEXAGON_V6_lvsplatw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vassign,VI_ftype_VI,1)
+// tag : V6_vassign
+def int_hexagon_V6_vassign :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vassign">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vassign_128B,VI_ftype_VI,1)
+// tag : V6_vassign_128B
+def int_hexagon_V6_vassign_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vassign_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcombine,VD_ftype_VIVI,2)
+// tag : V6_vcombine
+def int_hexagon_V6_vcombine :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vcombine">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcombine_128B,VD_ftype_VIVI,2)
+// tag : V6_vcombine_128B
+def int_hexagon_V6_vcombine_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb,VI_ftype_VIDISI,3)
+// tag : V6_vlutb
+def int_hexagon_V6_vlutb :
+Hexagon_v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_128B,VI_ftype_VIDISI,3)
+// tag : V6_vlutb_128B
+def int_hexagon_V6_vlutb_128B :
+Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_acc,VI_ftype_VIVIDISI,4)
+// tag : V6_vlutb_acc
+def int_hexagon_V6_vlutb_acc :
+Hexagon_v512v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_acc_128B,VI_ftype_VIVIDISI,4)
+// tag : V6_vlutb_acc_128B
+def int_hexagon_V6_vlutb_acc_128B :
+Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv,VD_ftype_VDDISI,3)
+// tag : V6_vlutb_dv
+def int_hexagon_V6_vlutb_dv :
+Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_128B,VD_ftype_VDDISI,3)
+// tag : V6_vlutb_dv_128B
+def int_hexagon_V6_vlutb_dv_128B :
+Hexagon_v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc,VD_ftype_VDVDDISI,4)
+// tag : V6_vlutb_dv_acc
+def int_hexagon_V6_vlutb_dv_acc :
+Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc_128B,VD_ftype_VDVDDISI,4)
+// tag : V6_vlutb_dv_acc_128B
+def int_hexagon_V6_vlutb_dv_acc_128B :
+Hexagon_v2048v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2)
+// tag : V6_vdelta
+def int_hexagon_V6_vdelta :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdelta">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdelta_128B,VI_ftype_VIVI,2)
+// tag : V6_vdelta_128B
+def int_hexagon_V6_vdelta_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdelta_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrdelta,VI_ftype_VIVI,2)
+// tag : V6_vrdelta
+def int_hexagon_V6_vrdelta :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrdelta">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrdelta_128B,VI_ftype_VIVI,2)
+// tag : V6_vrdelta_128B
+def int_hexagon_V6_vrdelta_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrdelta_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0w,VI_ftype_VI,1)
+// tag : V6_vcl0w
+def int_hexagon_V6_vcl0w :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0w">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0w_128B,VI_ftype_VI,1)
+// tag : V6_vcl0w_128B
+def int_hexagon_V6_vcl0w_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0w_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0h,VI_ftype_VI,1)
+// tag : V6_vcl0h
+def int_hexagon_V6_vcl0h :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0h">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0h_128B,VI_ftype_VI,1)
+// tag : V6_vcl0h_128B
+def int_hexagon_V6_vcl0h_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0h_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamtw,VI_ftype_VI,1)
+// tag : V6_vnormamtw
+def int_hexagon_V6_vnormamtw :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamtw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamtw_128B,VI_ftype_VI,1)
+// tag : V6_vnormamtw_128B
+def int_hexagon_V6_vnormamtw_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamtw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamth,VI_ftype_VI,1)
+// tag : V6_vnormamth
+def int_hexagon_V6_vnormamth :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamth">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamth_128B,VI_ftype_VI,1)
+// tag : V6_vnormamth_128B
+def int_hexagon_V6_vnormamth_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamth_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpopcounth,VI_ftype_VI,1)
+// tag : V6_vpopcounth
+def int_hexagon_V6_vpopcounth :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vpopcounth">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpopcounth_128B,VI_ftype_VI,1)
+// tag : V6_vpopcounth_128B
+def int_hexagon_V6_vpopcounth_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vpopcounth_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb,VI_ftype_VIVISI,3)
+// tag : V6_vlutvvb
+def int_hexagon_V6_vlutvvb :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb_128B,VI_ftype_VIVISI,3)
+// tag : V6_vlutvvb_128B
+def int_hexagon_V6_vlutvvb_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc,VI_ftype_VIVIVISI,4)
+// tag : V6_vlutvvb_oracc
+def int_hexagon_V6_vlutvvb_oracc :
+Hexagon_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc_128B,VI_ftype_VIVIVISI,4)
+// tag : V6_vlutvvb_oracc_128B
+def int_hexagon_V6_vlutvvb_oracc_128B :
+Hexagon_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh,VD_ftype_VIVISI,3)
+// tag : V6_vlutvwh
+def int_hexagon_V6_vlutvwh :
+Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh_128B,VD_ftype_VIVISI,3)
+// tag : V6_vlutvwh_128B
+def int_hexagon_V6_vlutvwh_128B :
+Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc,VD_ftype_VDVIVISI,4)
+// tag : V6_vlutvwh_oracc
+def int_hexagon_V6_vlutvwh_oracc :
+Hexagon_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc_128B,VD_ftype_VDVIVISI,4)
+// tag : V6_vlutvwh_oracc_128B
+def int_hexagon_V6_vlutvwh_oracc_128B :
+Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2)
+// tag : M6_vabsdiffb
+def int_hexagon_M6_vabsdiffb :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffb">;
+
+//
+// BUILTIN_INFO(HEXAGON.M6_vabsdiffub,DI_ftype_DIDI,2)
+// tag : M6_vabsdiffub
+def int_hexagon_M6_vabsdiffub :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffub">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_vsplatrbp,DI_ftype_SI,1)
+// tag : S6_vsplatrbp
+def int_hexagon_S6_vsplatrbp :
+Hexagon_LLii_Intrinsic<"HEXAGON_S6_vsplatrbp">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_vtrunehb_ppp,DI_ftype_DIDI,2)
+// tag : S6_vtrunehb_ppp
+def int_hexagon_S6_vtrunehb_ppp :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_vtrunohb_ppp,DI_ftype_DIDI,2)
+// tag : S6_vtrunohb_ppp
+def int_hexagon_S6_vtrunohb_ppp :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index eb8f1e6..06dfc32 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -710,21 +710,39 @@ def int_ppc_vsx_xvrsqrtedp : GCCBuiltin<"__builtin_vsx_xvrsqrtedp">,
def int_ppc_vsx_xvcmpeqdp :
PowerPC_VSX_Intrinsic<"xvcmpeqdp", [llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpeqdp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqdp_p">,
+ Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
+ [IntrNoMem]>;
def int_ppc_vsx_xvcmpeqsp :
PowerPC_VSX_Intrinsic<"xvcmpeqsp", [llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpeqsp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqsp_p">,
+ Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+ [IntrNoMem]>;
def int_ppc_vsx_xvcmpgedp :
PowerPC_VSX_Intrinsic<"xvcmpgedp", [llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgedp_p : GCCBuiltin<"__builtin_vsx_xvcmpgedp_p">,
+ Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
+ [IntrNoMem]>;
def int_ppc_vsx_xvcmpgesp :
PowerPC_VSX_Intrinsic<"xvcmpgesp", [llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgesp_p : GCCBuiltin<"__builtin_vsx_xvcmpgesp_p">,
+ Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+ [IntrNoMem]>;
def int_ppc_vsx_xvcmpgtdp :
PowerPC_VSX_Intrinsic<"xvcmpgtdp", [llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgtdp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtdp_p">,
+ Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
+ [IntrNoMem]>;
def int_ppc_vsx_xvcmpgtsp :
PowerPC_VSX_Intrinsic<"xvcmpgtsp", [llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgtsp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtsp_p">,
+ Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+ [IntrNoMem]>;
def int_ppc_vsx_xxleqv :
PowerPC_VSX_Intrinsic<"xxleqv", [llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 3ccde47..3953aef 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -13,4 +13,10 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.".
+
+// Note that memory_size is not IntrNoMem because it must be sequenced with
+// respect to grow_memory calls.
+def int_wasm_memory_size : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>;
+def int_wasm_grow_memory : Intrinsic<[], [llvm_anyint_ty], []>;
+
}
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index a3bc4af..18390f8 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -22,10 +22,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in {
def int_x86_seh_lsda : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>;
- // Restores the frame, base, and stack pointers as necessary after recovering
- // from an exception. Any block resuming control flow in the parent function
- // should call this before accessing any stack memory.
- def int_x86_seh_restoreframe : Intrinsic<[], [], []>;
+ // Marks the EH registration node created in LLVM IR prior to code generation.
+ def int_x86_seh_ehregnode : Intrinsic<[], [llvm_ptr_ty], []>;
// Given a pointer to the end of an EH registration object, returns the true
// parent frame address that can be used with llvm.localrecover.
@@ -1406,6 +1404,78 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
+ def int_x86_avx512_mask_vpermil_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vpermilpd_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermil_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermilpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermil_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vpermilpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermil_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vpermilps_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermil_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermilps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermil_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vpermilps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermilvar_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermilvar_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermilvar_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermilvar_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermilvar_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermilvar_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
def int_x86_avx512_mask_pshuf_b_128 :
GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
Intrinsic<[llvm_v16i8_ty],
@@ -1423,8 +1493,145 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
-}
+ def int_x86_avx512_mask_shuf_f32x4_256 :
+ GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_f32x4 :
+ GCCBuiltin<"__builtin_ia32_shuf_f32x4_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_f64x2_256 :
+ GCCBuiltin<"__builtin_ia32_shuf_f64x2_256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_f64x2 :
+ GCCBuiltin<"__builtin_ia32_shuf_f64x2_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_i32x4_256 :
+ GCCBuiltin<"__builtin_ia32_shuf_i32x4_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_i32x4 :
+ GCCBuiltin<"__builtin_ia32_shuf_i32x4_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_i64x2_256 :
+ GCCBuiltin<"__builtin_ia32_shuf_i64x2_256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_i64x2 :
+ GCCBuiltin<"__builtin_ia32_shuf_i64x2_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_pd_128 :
+ GCCBuiltin<"__builtin_ia32_shufpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_pd_256 :
+ GCCBuiltin<"__builtin_ia32_shufpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_pd_512 :
+ GCCBuiltin<"__builtin_ia32_shufpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_ps_128 :
+ GCCBuiltin<"__builtin_ia32_shufps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_ps_256 :
+ GCCBuiltin<"__builtin_ia32_shufps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_shuf_ps_512 :
+ GCCBuiltin<"__builtin_ia32_shufps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movshdup_128 :
+ GCCBuiltin<"__builtin_ia32_movshdup128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movshdup_256 :
+ GCCBuiltin<"__builtin_ia32_movshdup256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movshdup_512 :
+ GCCBuiltin<"__builtin_ia32_movshdup512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movsldup_128 :
+ GCCBuiltin<"__builtin_ia32_movsldup128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movsldup_256 :
+ GCCBuiltin<"__builtin_ia32_movsldup256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movsldup_512 :
+ GCCBuiltin<"__builtin_ia32_movsldup512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movddup_128 :
+ GCCBuiltin<"__builtin_ia32_movddup128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movddup_256 :
+ GCCBuiltin<"__builtin_ia32_movddup256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_movddup_512 :
+ GCCBuiltin<"__builtin_ia32_movddup512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
@@ -1526,6 +1733,38 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">,
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_pd_128 :
+ GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_pd_256 :
+ GCCBuiltin<"__builtin_ia32_fpclasspd256_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_pd_512 :
+ GCCBuiltin<"__builtin_ia32_fpclasspd512_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_ps_128 :
+ GCCBuiltin<"__builtin_ia32_fpclassps128_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_ps_256 :
+ GCCBuiltin<"__builtin_ia32_fpclassps256_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_ps_512 :
+ GCCBuiltin<"__builtin_ia32_fpclassps512_mask">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_sd :
+ GCCBuiltin<"__builtin_ia32_fpclasssd">,
+ Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_fpclass_ss :
+ GCCBuiltin<"__builtin_ia32_fpclassss">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
}
// Vector extract sign mask
@@ -1573,16 +1812,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty],
+ Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
[IntrReadArgMem]>;
def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
[IntrReadArgMem]>;
def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty],
+ Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
[IntrReadArgMem]>;
def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
- Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
+ Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
@@ -1596,24 +1835,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrReadArgMem]>;
+
+ def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
}
// Conditional store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
+ llvm_v2i64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+ llvm_v4i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_maskstore_pd_256 :
GCCBuiltin<"__builtin_ia32_maskstorepd256">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
+ llvm_v4i64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx_maskstore_ps_256 :
GCCBuiltin<"__builtin_ia32_maskstoreps256">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
+ llvm_v8i32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_mask_storeu_ps_512 :
GCCBuiltin<"__builtin_ia32_storeups512_mask">,
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
@@ -1946,6 +2192,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_w_128 : GCCBuiltin<"__builtin_ia32_psrlw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_w_256 : GCCBuiltin<"__builtin_ia32_psrlw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_wi_128 : GCCBuiltin<"__builtin_ia32_psrlwi128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_wi_256 : GCCBuiltin<"__builtin_ia32_psrlwi256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_wi_512 : GCCBuiltin<"__builtin_ia32_psrlwi512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
@@ -2167,39 +2432,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_vbroadcast_ss_ps :
- GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx2_vbroadcast_sd_pd_256 :
- GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_avx2_vbroadcast_ss_ps_256 :
- GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastb_128 :
- GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastb_256 :
- GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastw_128 :
- GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastw_256 :
- GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastd_128 :
- GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastd_256 :
- GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastq_128 :
- GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx2_pbroadcastq_256 :
- GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pbroadcast_d_gpr_512 :
GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty,
@@ -2220,7 +2452,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
@@ -2231,20 +2463,124 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_vextractf32x4_512 :
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
- llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti32x4_512 :
GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextractf32x4_256 :
+ GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextracti32x4_256 :
+ GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextractf64x2_256 :
+ GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextracti64x2_256 :
+ GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextractf64x2_512 :
+ GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextracti64x2_512 :
+ GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextractf32x8_512 :
+ GCCBuiltin<"__builtin_ia32_extractf32x8_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vextracti32x8_512 :
+ GCCBuiltin<"__builtin_ia32_extracti32x8_mask">,
+ Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf64x4_512 :
GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
- llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti64x4_512 :
GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf32x4_256 :
+ GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf32x4_512 :
+ GCCBuiltin<"__builtin_ia32_insertf32x4_512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf32x8_512 :
+ GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf64x2_256 :
+ GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf64x2_512 :
+ GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf64x4_512 :
+ GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti32x4_256 :
+ GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti32x4_512 :
+ GCCBuiltin<"__builtin_ia32_inserti32x4_512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti32x8_512 :
+ GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti64x2_256 :
+ GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti64x2_512 :
+ GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti64x4_512 :
+ GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
}
// Conditional load ops
@@ -2354,6 +2690,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
+ def int_x86_avx512_psll_dq_512 : GCCBuiltin<"__builtin_ia32_pslldq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
}
// Gather ops
@@ -3545,6 +3887,43 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
//===----------------------------------------------------------------------===//
+// XSAVE
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_xsave :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsave64 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xrstor :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xrstor64 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsaveopt :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsaveopt64 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xrstors :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xrstors64 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsavec :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsavec64 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsaves :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_x86_xsaves64 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Support protection key
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
+ Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+}
+//===----------------------------------------------------------------------===//
// Half float conversion
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
@@ -3561,9 +3940,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -3657,6 +4048,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
+ def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
@@ -3671,10 +4068,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
- Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
+ Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
+ Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -3686,10 +4087,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">,
- Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
- Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
+ Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
+ Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
@@ -3698,17 +4103,74 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+ llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+ llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtb2mask_128 : GCCBuiltin<"__builtin_ia32_cvtb2mask128">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtb2mask_256 : GCCBuiltin<"__builtin_ia32_cvtb2mask256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtb2mask_512 : GCCBuiltin<"__builtin_ia32_cvtb2mask512">,
+ Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtw2mask_128 : GCCBuiltin<"__builtin_ia32_cvtw2mask128">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtw2mask_256 : GCCBuiltin<"__builtin_ia32_cvtw2mask256">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtw2mask_512 : GCCBuiltin<"__builtin_ia32_cvtw2mask512">,
+ Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtd2mask_128 : GCCBuiltin<"__builtin_ia32_cvtd2mask128">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtd2mask_256 : GCCBuiltin<"__builtin_ia32_cvtd2mask256">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtd2mask_512 : GCCBuiltin<"__builtin_ia32_cvtd2mask512">,
+ Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtq2mask_128 : GCCBuiltin<"__builtin_ia32_cvtq2mask128">,
+ Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtq2mask_256 : GCCBuiltin<"__builtin_ia32_cvtq2mask256">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtq2mask_512 : GCCBuiltin<"__builtin_ia32_cvtq2mask512">,
+ Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtmask2b_128 : GCCBuiltin<"__builtin_ia32_cvtmask2b128">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2b_256 : GCCBuiltin<"__builtin_ia32_cvtmask2b256">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2b_512 : GCCBuiltin<"__builtin_ia32_cvtmask2b512">,
+ Intrinsic<[llvm_v64i8_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtmask2w_128 : GCCBuiltin<"__builtin_ia32_cvtmask2w128">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2w_256 : GCCBuiltin<"__builtin_ia32_cvtmask2w256">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2w_512 : GCCBuiltin<"__builtin_ia32_cvtmask2w512">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtmask2d_128 : GCCBuiltin<"__builtin_ia32_cvtmask2d128">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2d_256 : GCCBuiltin<"__builtin_ia32_cvtmask2d256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2d_512 : GCCBuiltin<"__builtin_ia32_cvtmask2d512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_cvtmask2q_128 : GCCBuiltin<"__builtin_ia32_cvtmask2q128">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2q_256 : GCCBuiltin<"__builtin_ia32_cvtmask2q256">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_cvtmask2q_512 : GCCBuiltin<"__builtin_ia32_cvtmask2q512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+
}
// Pack ops.
@@ -3751,53 +4213,761 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
}
+// Unpack ops.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx512_mask_unpckh_pd_128 :
+ GCCBuiltin<"__builtin_ia32_unpckhpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckh_pd_256 :
+ GCCBuiltin<"__builtin_ia32_unpckhpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckh_pd_512 :
+ GCCBuiltin<"__builtin_ia32_unpckhpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckh_ps_128 :
+ GCCBuiltin<"__builtin_ia32_unpckhps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckh_ps_256 :
+ GCCBuiltin<"__builtin_ia32_unpckhps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckh_ps_512 :
+ GCCBuiltin<"__builtin_ia32_unpckhps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckl_pd_128 :
+ GCCBuiltin<"__builtin_ia32_unpcklpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckl_pd_256 :
+ GCCBuiltin<"__builtin_ia32_unpcklpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckl_pd_512 :
+ GCCBuiltin<"__builtin_ia32_unpcklpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckl_ps_128 :
+ GCCBuiltin<"__builtin_ia32_unpcklps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckl_ps_256 :
+ GCCBuiltin<"__builtin_ia32_unpcklps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_unpckl_ps_512 :
+ GCCBuiltin<"__builtin_ia32_unpcklps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhb_w_128 :
+ GCCBuiltin<"__builtin_ia32_punpckhbw128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhb_w_256 :
+ GCCBuiltin<"__builtin_ia32_punpckhbw256_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhb_w_512 :
+ GCCBuiltin<"__builtin_ia32_punpckhbw512_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhd_q_128 :
+ GCCBuiltin<"__builtin_ia32_punpckhdq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhd_q_256 :
+ GCCBuiltin<"__builtin_ia32_punpckhdq256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhd_q_512 :
+ GCCBuiltin<"__builtin_ia32_punpckhdq512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhqd_q_128 :
+ GCCBuiltin<"__builtin_ia32_punpckhqdq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhqd_q_256 :
+ GCCBuiltin<"__builtin_ia32_punpckhqdq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhqd_q_512 :
+ GCCBuiltin<"__builtin_ia32_punpckhqdq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhw_d_128 :
+ GCCBuiltin<"__builtin_ia32_punpckhwd128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhw_d_256 :
+ GCCBuiltin<"__builtin_ia32_punpckhwd256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckhw_d_512 :
+ GCCBuiltin<"__builtin_ia32_punpckhwd512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklb_w_128 :
+ GCCBuiltin<"__builtin_ia32_punpcklbw128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklb_w_256 :
+ GCCBuiltin<"__builtin_ia32_punpcklbw256_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklb_w_512 :
+ GCCBuiltin<"__builtin_ia32_punpcklbw512_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckld_q_128 :
+ GCCBuiltin<"__builtin_ia32_punpckldq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckld_q_256 :
+ GCCBuiltin<"__builtin_ia32_punpckldq256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpckld_q_512 :
+ GCCBuiltin<"__builtin_ia32_punpckldq512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklqd_q_128 :
+ GCCBuiltin<"__builtin_ia32_punpcklqdq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklqd_q_256 :
+ GCCBuiltin<"__builtin_ia32_punpcklqdq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklqd_q_512 :
+ GCCBuiltin<"__builtin_ia32_punpcklqdq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklw_d_128 :
+ GCCBuiltin<"__builtin_ia32_punpcklwd128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklw_d_256 :
+ GCCBuiltin<"__builtin_ia32_punpcklwd256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_punpcklw_d_512 :
+ GCCBuiltin<"__builtin_ia32_punpcklwd512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+}
+
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvttps2udq_512: GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvttpd2dq_512: GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvttpd2udq_512: GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
+ def int_x86_avx512_mask_cvtdq2pd_128 :
+ GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtdq2pd_256 :
+ GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtdq2pd_512 :
+ GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtdq2ps_128 :
+ GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtdq2ps_256 :
+ GCCBuiltin<"__builtin_ia32_cvtdq2ps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtdq2ps_512 :
+ GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2dq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2dq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2dq256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2dq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2ps_256 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2ps256_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2ps_512 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtsd2ss_round :
+ GCCBuiltin<"__builtin_ia32_cvtsd2ss_round">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtss2sd_round :
+ GCCBuiltin<"__builtin_ia32_cvtss2sd_round">,
+ Intrinsic<[llvm_v2f64_ty],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2ps :
+ GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2qq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2qq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2qq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2qq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2qq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2udq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2udq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2udq256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2udq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2uqq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2uqq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtpd2uqq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2dq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2dq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtps2dq256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2dq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2pd_128 :
+ GCCBuiltin<"__builtin_ia32_cvtps2pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2pd_256 :
+ GCCBuiltin<"__builtin_ia32_cvtps2pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2pd_512 :
+ GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2qq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2qq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtps2qq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2qq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2udq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2udq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtps2udq256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2udq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2uqq_128 :
+ GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2uqq_256 :
+ GCCBuiltin<"__builtin_ia32_cvtps2uqq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtps2uqq_512 :
+ GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtqq2pd_128 :
+ GCCBuiltin<"__builtin_ia32_cvtqq2pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtqq2pd_256 :
+ GCCBuiltin<"__builtin_ia32_cvtqq2pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtqq2pd_512 :
+ GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtqq2ps_128 :
+ GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtqq2ps_256 :
+ GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtqq2ps_512 :
+ GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2dq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2dq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2dq256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2dq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2qq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2qq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2qq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2qq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2udq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2udq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2udq256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2udq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2uqq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2uqq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttpd2uqq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2dq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttps2dq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2dq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttps2dq256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2dq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2qq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2qq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttps2qq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2qq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2udq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2udq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttps2udq256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2udq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2uqq_128 :
+ GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2uqq_256 :
+ GCCBuiltin<"__builtin_ia32_cvttps2uqq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvttps2uqq_512 :
+ GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtudq2pd_128 :
+ GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtudq2pd_256 :
+ GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtudq2pd_512 :
+ GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtudq2ps_128 :
+ GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtudq2ps_256 :
+ GCCBuiltin<"__builtin_ia32_cvtudq2ps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtudq2ps_512 :
+ GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtuqq2pd_128 :
+ GCCBuiltin<"__builtin_ia32_cvtuqq2pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtuqq2pd_256 :
+ GCCBuiltin<"__builtin_ia32_cvtuqq2pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtuqq2pd_512 :
+ GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtuqq2ps_128 :
+ GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtuqq2ps_256 :
+ GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_cvtuqq2ps_512 :
+ GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
+ def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtps2dq_512: GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtpd2dq_512: GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtps2udq_512: GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtpd2udq_512: GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtdq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtdq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty,
+ def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtudq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cvtpd2ps_512 : GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
}
// Vector load with broadcast
@@ -3805,28 +4975,183 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_vbroadcast_ss_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastss512">,
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_x86_avx512_vbroadcast_ss_ps_512 :
- GCCBuiltin<"__builtin_ia32_vbroadcastss_ps512">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_broadcast_ss_ps_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastss512">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_broadcast_ss_ps_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastss256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_broadcast_ss_ps_128 :
+ GCCBuiltin<"__builtin_ia32_broadcastss128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_vbroadcast_sd_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_x86_avx512_vbroadcast_sd_pd_512 :
- GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-
+ def int_x86_avx512_mask_broadcast_sd_pd_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastsd512">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_broadcast_sd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastsd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_pbroadcastb_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastb_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb256_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v16i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastb_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb512_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_v16i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastw_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastw_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastw_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastd_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastd_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastd_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf32x2_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastf32x2_256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf32x2_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastf32x2_512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti32x2_128 :
+ GCCBuiltin<"__builtin_ia32_broadcasti32x2_128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti32x2_256 :
+ GCCBuiltin<"__builtin_ia32_broadcasti32x2_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti32x2_512 :
+ GCCBuiltin<"__builtin_ia32_broadcasti32x2_512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf32x4_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastf32x4_256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf32x4_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastf32x4_512">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf32x8_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastf32x8_512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v8f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf64x2_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastf64x2_256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf64x2_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastf64x2_512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcastf64x4_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastf64x4_512">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v4f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti32x4_256 :
+ GCCBuiltin<"__builtin_ia32_broadcasti32x4_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti32x4_512 :
+ GCCBuiltin<"__builtin_ia32_broadcasti32x4_512">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti32x8_512 :
+ GCCBuiltin<"__builtin_ia32_broadcasti32x8_512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v8i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti64x2_256 :
+ GCCBuiltin<"__builtin_ia32_broadcasti64x2_256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti64x2_512 :
+ GCCBuiltin<"__builtin_ia32_broadcasti64x2_512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_broadcasti64x4_512 :
+ GCCBuiltin<"__builtin_ia32_broadcasti64x4_512">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v4i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
def int_x86_avx512_pbroadcastd_i32_512 :
Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pbroadcastq_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastq_i64_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_broadcastmw_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastmw512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_broadcastmw_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastmw256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_broadcastmw_128 :
+ GCCBuiltin<"__builtin_ia32_broadcastmw128">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_broadcastmb_512 :
+ GCCBuiltin<"__builtin_ia32_broadcastmb512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_broadcastmb_256 :
+ GCCBuiltin<"__builtin_ia32_broadcastmb256">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_broadcastmb_128 :
+ GCCBuiltin<"__builtin_ia32_broadcastmb128">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
}
// Vector sign and zero extend
@@ -4071,15 +5396,36 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">,
+ def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">,
+ def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-
+ def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -4099,12 +5445,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
- [IntrNoMem]>;
- def int_x86_avx512_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtrndsd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
- [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
@@ -4143,29 +5489,108 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
+ def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_pd_128 :
+ GCCBuiltin<"__builtin_ia32_getmantpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_pd_256 :
+ GCCBuiltin<"__builtin_ia32_getmantpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_pd_512 :
+ GCCBuiltin<"__builtin_ia32_getmantpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_ps_128 :
+ GCCBuiltin<"__builtin_ia32_getmantps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_ps_256 :
+ GCCBuiltin<"__builtin_ia32_getmantps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_ps_512 :
+ GCCBuiltin<"__builtin_ia32_getmantps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_ss :
+ GCCBuiltin<"__builtin_ia32_getmantss_round">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_getmant_sd :
+ GCCBuiltin<"__builtin_ia32_getmantsd_round">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">,
+ def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rsqrt14_pd_128 : GCCBuiltin<"__builtin_ia32_rsqrt14pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rsqrt14_pd_256 : GCCBuiltin<"__builtin_ia32_rsqrt14pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rsqrt14_ps_128 : GCCBuiltin<"__builtin_ia32_rsqrt14ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rsqrt14_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrt14ps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">,
+ def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">,
+ def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rcp14_pd_128 : GCCBuiltin<"__builtin_ia32_rcp14pd128_mask">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rcp14_pd_256 : GCCBuiltin<"__builtin_ia32_rcp14pd256_mask">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rcp14_ps_128 : GCCBuiltin<"__builtin_ia32_rcp14ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_rcp14_ps_256 : GCCBuiltin<"__builtin_ia32_rcp14ps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrNoMem]>;
@@ -4183,11 +5608,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">,
+ def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">,
+ def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
@@ -4199,14 +5624,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">,
+ def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">,
+ def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
+def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
+ [IntrNoMem]>;
}
// FP logical ops
let TargetPrefix = "x86" in {
@@ -4511,6 +5939,54 @@ let TargetPrefix = "x86" in {
def int_x86_avx512_mask_pavg_w_256 : GCCBuiltin<"__builtin_ia32_pavgw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddw_d_128 :
+ GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddw_d_256 :
+ GCCBuiltin<"__builtin_ia32_pmaddwd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddw_d_512 :
+ GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddubs_w_128 :
+ GCCBuiltin<"__builtin_ia32_pmaddubsw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddubs_w_256 :
+ GCCBuiltin<"__builtin_ia32_pmaddubsw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddubs_w_512 :
+ GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_dbpsadbw_128 :
+ GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v8i16_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_dbpsadbw_256 :
+ GCCBuiltin<"__builtin_ia32_dbpsadbw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v16i16_ty,
+ llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_dbpsadbw_512 :
+ GCCBuiltin<"__builtin_ia32_dbpsadbw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v32i16_ty,
+ llvm_i32_ty], [IntrNoMem]>;
}
// Gather and Scatter ops
@@ -4807,27 +6283,71 @@ let TargetPrefix = "x86" in {
llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
}
-// AVX-512 conflict detection
+// AVX-512 conflict detection instruction
+// Instructions that count the number of leading zero bits
let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_conflict_d_128 :
+ GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_conflict_d_256 :
+ GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
def int_x86_avx512_mask_conflict_d_512 :
GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty],
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_conflict_q_128 :
+ GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_conflict_q_256 :
+ GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_conflict_q_512 :
GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty],
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_lzcnt_d_128 :
+ GCCBuiltin<"__builtin_ia32_vplzcntd_128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_lzcnt_d_256 :
+ GCCBuiltin<"__builtin_ia32_vplzcntd_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_lzcnt_d_512 :
GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty],
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_lzcnt_q_128 :
+ GCCBuiltin<"__builtin_ia32_vplzcntq_128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_lzcnt_q_256 :
+ GCCBuiltin<"__builtin_ia32_vplzcntq_256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_lzcnt_q_512 :
GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty],
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
}
@@ -4911,20 +6431,70 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">,
+ def int_x86_avx512_mask_valign_q_512 :
+ GCCBuiltin<"__builtin_ia32_alignq512_mask">,
Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">,
+ def int_x86_avx512_mask_valign_d_512 :
+ GCCBuiltin<"__builtin_ia32_alignd512_mask">,
Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+ llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_valign_q_256 :
+ GCCBuiltin<"__builtin_ia32_alignq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_valign_d_256 :
+ GCCBuiltin<"__builtin_ia32_alignd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_valign_q_128 :
+ GCCBuiltin<"__builtin_ia32_alignq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_valign_d_128 :
+ GCCBuiltin<"__builtin_ia32_alignd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_palignr_128 :
+ GCCBuiltin<"__builtin_ia32_palignr128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty,
+ llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_palignr_256 :
+ GCCBuiltin<"__builtin_ia32_palignr256_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v32i8_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_palignr_512 :
+ GCCBuiltin<"__builtin_ia32_palignr512_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v64i8_ty,
+ llvm_i64_ty], [IntrNoMem]>;
}
// Compares
let TargetPrefix = "x86" in {
// 512-bit
+ def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">,
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
@@ -5288,6 +6858,626 @@ let TargetPrefix = "x86" in {
llvm_i8_ty], [IntrReadArgMem]>;
}
+
+// truncate
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_pmov_qb_128 :
+ GCCBuiltin<"__builtin_ia32_pmovqb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qb_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qb_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qb_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qb_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qb_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qb_256 :
+ GCCBuiltin<"__builtin_ia32_pmovqb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qb_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qb_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qb_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qb_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qb_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qb_512 :
+ GCCBuiltin<"__builtin_ia32_pmovqb512_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qb_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qb_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qb_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qb_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qb_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qw_128 :
+ GCCBuiltin<"__builtin_ia32_pmovqw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qw_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qw_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qw_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qw_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qw_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qw_256 :
+ GCCBuiltin<"__builtin_ia32_pmovqw256_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qw_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qw_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qw_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qw_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qw_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qw_512 :
+ GCCBuiltin<"__builtin_ia32_pmovqw512_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qw_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qw_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qw_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qw_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qw_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qd_128 :
+ GCCBuiltin<"__builtin_ia32_pmovqd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qd_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qd_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qd_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qd_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qd_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qd_256 :
+ GCCBuiltin<"__builtin_ia32_pmovqd256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qd_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qd_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qd_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qd_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qd_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_qd_512 :
+ GCCBuiltin<"__builtin_ia32_pmovqd512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_qd_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_qd_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_qd_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_qd_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_qd_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_db_128 :
+ GCCBuiltin<"__builtin_ia32_pmovdb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_db_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_db_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_db_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_db_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_db_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_db_256 :
+ GCCBuiltin<"__builtin_ia32_pmovdb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_db_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_db_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_db_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_db_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_db_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_db_512 :
+ GCCBuiltin<"__builtin_ia32_pmovdb512_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_db_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_db_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_db_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_db_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_db_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_dw_128 :
+ GCCBuiltin<"__builtin_ia32_pmovdw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_dw_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_dw_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_dw_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_dw_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_dw_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_dw_256 :
+ GCCBuiltin<"__builtin_ia32_pmovdw256_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_dw_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_dw_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_dw_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_dw_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_dw_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_dw_512 :
+ GCCBuiltin<"__builtin_ia32_pmovdw512_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_dw_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_dw_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_dw_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_dw_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_dw_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_wb_128 :
+ GCCBuiltin<"__builtin_ia32_pmovwb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_wb_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_wb_128 :
+ GCCBuiltin<"__builtin_ia32_pmovswb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_wb_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_wb_128 :
+ GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_wb_mem_128 :
+ GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_wb_256 :
+ GCCBuiltin<"__builtin_ia32_pmovwb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_wb_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_wb_256 :
+ GCCBuiltin<"__builtin_ia32_pmovswb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_wb_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_wb_256 :
+ GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_wb_mem_256 :
+ GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmov_wb_512 :
+ GCCBuiltin<"__builtin_ia32_pmovwb512_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmov_wb_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovs_wb_512 :
+ GCCBuiltin<"__builtin_ia32_pmovswb512_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovs_wb_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+ def int_x86_avx512_mask_pmovus_wb_512 :
+ GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovus_wb_mem_512 :
+ GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">,
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+}
+
+// Bitwise ternary logic
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_mask_pternlog_d_128 :
+ GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_pternlog_d_128 :
+ GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pternlog_d_256 :
+ GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_pternlog_d_256 :
+ GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pternlog_d_512 :
+ GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
+ llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_pternlog_d_512 :
+ GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
+ llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pternlog_q_128 :
+ GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_pternlog_q_128 :
+ GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pternlog_q_256 :
+ GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_pternlog_q_256 :
+ GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pternlog_q_512 :
+ GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_pternlog_q_512 :
+ GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+}
+
// Misc.
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_ps_512 :
@@ -5314,6 +7504,14 @@ let TargetPrefix = "x86" in {
GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_ss :
+ GCCBuiltin<"__builtin_ia32_cmpss_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_cmp_sd :
+ GCCBuiltin<"__builtin_ia32_cmpsd_mask">,
+ Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_movntdqa :
GCCBuiltin<"__builtin_ia32_movntdqa512">,
diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h
index e6c2209..c546fc3 100644
--- a/contrib/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm/include/llvm/IR/LLVMContext.h
@@ -15,7 +15,6 @@
#ifndef LLVM_IR_LLVMCONTEXT_H
#define LLVM_IR_LLVMCONTEXT_H
-#include "llvm-c/Core.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Options.h"
@@ -60,7 +59,20 @@ public:
MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access"
MD_nonnull = 11, // "nonnull"
MD_dereferenceable = 12, // "dereferenceable"
- MD_dereferenceable_or_null = 13 // "dereferenceable_or_null"
+ MD_dereferenceable_or_null = 13, // "dereferenceable_or_null"
+ MD_make_implicit = 14, // "make.implicit"
+ MD_unpredictable = 15, // "unpredictable"
+ MD_invariant_group = 16, // "invariant.group"
+ MD_align = 17 // "align"
+ };
+
+ /// Known operand bundle tag IDs, which always have the same value. All
+ /// operand bundle tags that LLVM has special knowledge of are listed here.
+ /// Additionally, this scheme allows LLVM to efficiently check for specific
+ /// operand bundle tags without comparing strings.
+ enum {
+ OB_deopt = 0, // "deopt"
+ OB_funclet = 1, // "funclet"
};
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -71,6 +83,15 @@ public:
/// custom metadata IDs registered in this LLVMContext.
void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
+ /// getOperandBundleTags - Populate client supplied SmallVector with the
+ /// bundle tags registered in this LLVMContext. The bundle tags are ordered
+ /// by increasing bundle IDs.
+ /// \see LLVMContext::getOperandBundleTagID
+ void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;
+
+ /// getOperandBundleTagID - Maps a bundle tag to an integer ID. Every bundle
+ /// tag registered with an LLVMContext has an unique ID.
+ uint32_t getOperandBundleTagID(StringRef Tag) const;
typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
unsigned LocCookie);
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
index 7f7889a..b8e3347 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Pass.h"
@@ -118,6 +119,7 @@ class PassManagerPrettyStackEntry : public PrettyStackTraceEntry {
Pass *P;
Value *V;
Module *M;
+
public:
explicit PassManagerPrettyStackEntry(Pass *p)
: P(p), V(nullptr), M(nullptr) {} // When P is releaseMemory'd.
@@ -130,7 +132,6 @@ public:
void print(raw_ostream &OS) const override;
};
-
//===----------------------------------------------------------------------===//
// PMStack
//
@@ -158,7 +159,6 @@ private:
std::vector<PMDataManager *> S;
};
-
//===----------------------------------------------------------------------===//
// PMTopLevelManager
//
@@ -204,10 +204,7 @@ public:
virtual ~PMTopLevelManager();
/// Add immutable pass and initialize it.
- inline void addImmutablePass(ImmutablePass *P) {
- P->initializePass();
- ImmutablePasses.push_back(P);
- }
+ void addImmutablePass(ImmutablePass *P);
inline SmallVectorImpl<ImmutablePass *>& getImmutablePasses() {
return ImmutablePasses;
@@ -231,12 +228,10 @@ public:
PMStack activeStack;
protected:
-
/// Collection of pass managers
SmallVector<PMDataManager *, 8> PassManagers;
private:
-
/// Collection of pass managers that are not directly maintained
/// by this pass manager
SmallVector<PMDataManager *, 8> IndirectPassManagers;
@@ -253,7 +248,46 @@ private:
/// Immutable passes are managed by top level manager.
SmallVector<ImmutablePass *, 16> ImmutablePasses;
- DenseMap<Pass *, AnalysisUsage *> AnUsageMap;
+ /// Map from ID to immutable passes.
+ SmallDenseMap<AnalysisID, ImmutablePass *, 8> ImmutablePassMap;
+
+
+ /// A wrapper around AnalysisUsage for the purpose of uniqueing. The wrapper
+ /// is used to avoid needing to make AnalysisUsage itself a folding set node.
+ struct AUFoldingSetNode : public FoldingSetNode {
+ AnalysisUsage AU;
+ AUFoldingSetNode(const AnalysisUsage &AU) : AU(AU) {}
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, AU);
+ }
+ static void Profile(FoldingSetNodeID &ID, const AnalysisUsage &AU) {
+ // TODO: We could consider sorting the dependency arrays within the
+ // AnalysisUsage (since they are conceptually unordered).
+ ID.AddBoolean(AU.getPreservesAll());
+ auto ProfileVec = [&](const SmallVectorImpl<AnalysisID>& Vec) {
+ ID.AddInteger(Vec.size());
+ for(AnalysisID AID : Vec)
+ ID.AddPointer(AID);
+ };
+ ProfileVec(AU.getRequiredSet());
+ ProfileVec(AU.getRequiredTransitiveSet());
+ ProfileVec(AU.getPreservedSet());
+ ProfileVec(AU.getUsedSet());
+ }
+ };
+
+ // Contains all of the unique combinations of AnalysisUsage. This is helpful
+ // when we have multiple instances of the same pass since they'll usually
+ // have the same analysis usage and can share storage.
+ FoldingSet<AUFoldingSetNode> UniqueAnalysisUsages;
+
+ // Allocator used for allocating UAFoldingSetNodes. This handles deletion of
+ // all allocated nodes in one fell swoop.
+ SpecificBumpPtrAllocator<AUFoldingSetNode> AUFoldingSetNodeAllocator;
+
+ // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does
+ // not own the storage associated with either key or value..
+ DenseMap<Pass *, AnalysisUsage*> AnUsageMap;
/// Collection of PassInfo objects found via analysis IDs and in this top
/// level manager. This is used to memoize queries to the pass registry.
@@ -262,8 +296,6 @@ private:
mutable DenseMap<AnalysisID, const PassInfo *> AnalysisPassInfos;
};
-
-
//===----------------------------------------------------------------------===//
// PMDataManager
@@ -271,7 +303,6 @@ private:
/// used by pass managers.
class PMDataManager {
public:
-
explicit PMDataManager() : TPM(nullptr), Depth(0) {
initializeAnalysisInfo();
}
@@ -319,13 +350,12 @@ public:
// passes that are managed by this manager.
bool preserveHigherLevelAnalysis(Pass *P);
-
- /// Populate RequiredPasses with analysis pass that are required by
- /// pass P and are available. Populate ReqPassNotAvailable with analysis
- /// pass that are required by pass P but are not available.
- void collectRequiredAnalysis(SmallVectorImpl<Pass *> &RequiredPasses,
- SmallVectorImpl<AnalysisID> &ReqPassNotAvailable,
- Pass *P);
+ /// Populate UsedPasses with analysis pass that are used or required by pass
+ /// P and are available. Populate ReqPassNotAvailable with analysis pass that
+ /// are required by pass P but are not available.
+ void collectRequiredAndUsedAnalyses(
+ SmallVectorImpl<Pass *> &UsedPasses,
+ SmallVectorImpl<AnalysisID> &ReqPassNotAvailable, Pass *P);
/// All Required analyses should be available to the pass as it runs! Here
/// we fill in the AnalysisImpls member of the pass so that it can
@@ -351,6 +381,7 @@ public:
enum PassDebuggingString S2, StringRef Msg);
void dumpRequiredSet(const Pass *P) const;
void dumpPreservedSet(const Pass *P) const;
+ void dumpUsedSet(const Pass *P) const;
unsigned getNumContainedPasses() const {
return (unsigned)PassVector.size();
@@ -374,7 +405,6 @@ public:
}
protected:
-
// Top level manager.
PMTopLevelManager *TPM;
@@ -439,9 +469,9 @@ public:
/// doFinalization - Overrides ModulePass doFinalization for global
/// finalization tasks
- ///
+ ///
using ModulePass::doFinalization;
-
+
/// doFinalization - Run all of the finalizers for the function passes.
///
bool doFinalization(Module &M) override;
@@ -473,7 +503,6 @@ public:
};
Timer *getPassTimer(Pass *);
-
}
#endif
diff --git a/contrib/llvm/include/llvm/IR/MDBuilder.h b/contrib/llvm/include/llvm/IR/MDBuilder.h
index ceb1c73..35341e3 100644
--- a/contrib/llvm/include/llvm/IR/MDBuilder.h
+++ b/contrib/llvm/include/llvm/IR/MDBuilder.h
@@ -60,6 +60,9 @@ public:
/// \brief Return metadata containing a number of branch weights.
MDNode *createBranchWeights(ArrayRef<uint32_t> Weights);
+ /// Return metadata specifying that a branch or switch is unpredictable.
+ MDNode *createUnpredictable();
+
/// Return metadata containing the entry count for a function.
MDNode *createFunctionEntryCount(uint64_t Count);
diff --git a/contrib/llvm/include/llvm/IR/Mangler.h b/contrib/llvm/include/llvm/IR/Mangler.h
index b72b259..ea2f0c3 100644
--- a/contrib/llvm/include/llvm/IR/Mangler.h
+++ b/contrib/llvm/include/llvm/IR/Mangler.h
@@ -15,12 +15,12 @@
#define LLVM_IR_MANGLER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class DataLayout;
-class GlobalValue;
template <typename T> class SmallVectorImpl;
class Twine;
diff --git a/contrib/llvm/include/llvm/IR/Metadata.def b/contrib/llvm/include/llvm/IR/Metadata.def
index 857e4637d..b1d2217 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.def
+++ b/contrib/llvm/include/llvm/IR/Metadata.def
@@ -13,7 +13,8 @@
#if !(defined HANDLE_METADATA || defined HANDLE_METADATA_LEAF || \
defined HANDLE_METADATA_BRANCH || defined HANDLE_MDNODE_LEAF || \
- defined HANDLE_MDNODE_BRANCH || \
+ defined HANDLE_MDNODE_LEAF_UNIQUABLE || defined HANDLE_MDNODE_BRANCH || \
+ defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE || \
defined HANDLE_SPECIALIZED_MDNODE_LEAF || \
defined HANDLE_SPECIALIZED_MDNODE_BRANCH)
#error "Missing macro definition of HANDLE_METADATA*"
@@ -34,6 +35,24 @@
#define HANDLE_METADATA_BRANCH(CLASS) HANDLE_METADATA(CLASS)
#endif
+// Handler for specialized and uniquable leaf nodes under MDNode. Defers to
+// HANDLE_MDNODE_LEAF_UNIQUABLE if it's defined, otherwise to
+// HANDLE_SPECIALIZED_MDNODE_LEAF.
+#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE
+#ifdef HANDLE_MDNODE_LEAF_UNIQUABLE
+#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS) \
+ HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
+#else
+#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS) \
+ HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)
+#endif
+#endif
+
+// Handler for leaf nodes under MDNode.
+#ifndef HANDLE_MDNODE_LEAF_UNIQUABLE
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) HANDLE_MDNODE_LEAF(CLASS)
+#endif
+
// Handler for leaf nodes under MDNode.
#ifndef HANDLE_MDNODE_LEAF
#define HANDLE_MDNODE_LEAF(CLASS) HANDLE_METADATA_LEAF(CLASS)
@@ -59,43 +78,46 @@ HANDLE_METADATA_BRANCH(ValueAsMetadata)
HANDLE_METADATA_LEAF(ConstantAsMetadata)
HANDLE_METADATA_LEAF(LocalAsMetadata)
HANDLE_MDNODE_BRANCH(MDNode)
-HANDLE_MDNODE_LEAF(MDTuple)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILocation)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIExpression)
+HANDLE_MDNODE_LEAF_UNIQUABLE(MDTuple)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocation)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIExpression)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DINode)
-HANDLE_SPECIALIZED_MDNODE_LEAF(GenericDINode)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DISubrange)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIEnumerator)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(GenericDINode)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubrange)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIEnumerator)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DIScope)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DIType)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIBasicType)
-HANDLE_SPECIALIZED_MDNODE_BRANCH(DIDerivedTypeBase)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIDerivedType)
-HANDLE_SPECIALIZED_MDNODE_BRANCH(DICompositeTypeBase)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DICompositeType)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DISubroutineType)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIFile)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIBasicType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIDerivedType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICompositeType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubroutineType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIFile)
HANDLE_SPECIALIZED_MDNODE_LEAF(DICompileUnit)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DILocalScope)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DISubprogram)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubprogram)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DILexicalBlockBase)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlock)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlockFile)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DINamespace)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIModule)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILexicalBlock)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILexicalBlockFile)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DINamespace)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIModule)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DITemplateParameter)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateTypeParameter)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateValueParameter)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateTypeParameter)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateValueParameter)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DIVariable)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIGlobalVariable)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILocalVariable)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIObjCProperty)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIImportedEntity)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariable)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity)
+HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile)
#undef HANDLE_METADATA
#undef HANDLE_METADATA_LEAF
#undef HANDLE_METADATA_BRANCH
#undef HANDLE_MDNODE_LEAF
+#undef HANDLE_MDNODE_LEAF_UNIQUABLE
#undef HANDLE_MDNODE_BRANCH
#undef HANDLE_SPECIALIZED_MDNODE_LEAF
+#undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE
#undef HANDLE_SPECIALIZED_MDNODE_BRANCH
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index c639625..2ea5913 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -18,10 +18,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Constant.h"
-#include "llvm/IR/MetadataTracking.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
#include <type_traits>
@@ -32,9 +33,6 @@ class LLVMContext;
class Module;
class ModuleSlotTracker;
-template<typename ValueSubClass, typename ItemParentClass>
- class SymbolTableListTraits;
-
enum LLVMConstants : uint32_t {
DEBUG_METADATA_VERSION = 3 // Current debug info version number.
};
@@ -86,7 +84,9 @@ public:
DIImportedEntityKind,
ConstantAsMetadataKind,
LocalAsMetadataKind,
- MDStringKind
+ MDStringKind,
+ DIMacroKind,
+ DIMacroFileKind
};
protected:
@@ -126,9 +126,10 @@ public:
/// If \c M is provided, metadata nodes will be numbered canonically;
/// otherwise, pointer addresses are substituted.
/// @{
- void print(raw_ostream &OS, const Module *M = nullptr) const;
- void print(raw_ostream &OS, ModuleSlotTracker &MST,
- const Module *M = nullptr) const;
+ void print(raw_ostream &OS, const Module *M = nullptr,
+ bool IsForDebug = false) const;
+ void print(raw_ostream &OS, ModuleSlotTracker &MST, const Module *M = nullptr,
+ bool IsForDebug = false) const;
/// @}
/// \brief Print as operand.
@@ -196,6 +197,77 @@ private:
void untrack();
};
+/// \brief API for tracking metadata references through RAUW and deletion.
+///
+/// Shared API for updating \a Metadata pointers in subclasses that support
+/// RAUW.
+///
+/// This API is not meant to be used directly. See \a TrackingMDRef for a
+/// user-friendly tracking reference.
+class MetadataTracking {
+public:
+ /// \brief Track the reference to metadata.
+ ///
+ /// Register \c MD with \c *MD, if the subclass supports tracking. If \c *MD
+ /// gets RAUW'ed, \c MD will be updated to the new address. If \c *MD gets
+ /// deleted, \c MD will be set to \c nullptr.
+ ///
+ /// If tracking isn't supported, \c *MD will not change.
+ ///
+ /// \return true iff tracking is supported by \c MD.
+ static bool track(Metadata *&MD) {
+ return track(&MD, *MD, static_cast<Metadata *>(nullptr));
+ }
+
+ /// \brief Track the reference to metadata for \a Metadata.
+ ///
+ /// As \a track(Metadata*&), but with support for calling back to \c Owner to
+ /// tell it that its operand changed. This could trigger \c Owner being
+ /// re-uniqued.
+ static bool track(void *Ref, Metadata &MD, Metadata &Owner) {
+ return track(Ref, MD, &Owner);
+ }
+
+ /// \brief Track the reference to metadata for \a MetadataAsValue.
+ ///
+ /// As \a track(Metadata*&), but with support for calling back to \c Owner to
+ /// tell it that its operand changed. This could trigger \c Owner being
+ /// re-uniqued.
+ static bool track(void *Ref, Metadata &MD, MetadataAsValue &Owner) {
+ return track(Ref, MD, &Owner);
+ }
+
+ /// \brief Stop tracking a reference to metadata.
+ ///
+ /// Stops \c *MD from tracking \c MD.
+ static void untrack(Metadata *&MD) { untrack(&MD, *MD); }
+ static void untrack(void *Ref, Metadata &MD);
+
+ /// \brief Move tracking from one reference to another.
+ ///
+ /// Semantically equivalent to \c untrack(MD) followed by \c track(New),
+ /// except that ownership callbacks are maintained.
+ ///
+ /// Note: it is an error if \c *MD does not equal \c New.
+ ///
+ /// \return true iff tracking is supported by \c MD.
+ static bool retrack(Metadata *&MD, Metadata *&New) {
+ return retrack(&MD, *MD, &New);
+ }
+ static bool retrack(void *Ref, Metadata &MD, void *New);
+
+ /// \brief Check whether metadata is replaceable.
+ static bool isReplaceable(const Metadata &MD);
+
+ typedef PointerUnion<MetadataAsValue *, Metadata *> OwnerTy;
+
+private:
+ /// \brief Track a reference to metadata for an owner.
+ ///
+ /// Generalized version of tracking.
+ static bool track(void *Ref, Metadata &MD, OwnerTy Owner);
+};
+
/// \brief Shared implementation of use-lists for replaceable metadata.
///
/// Most metadata cannot be RAUW'ed. This is a shared implementation of
@@ -572,10 +644,12 @@ struct AAMDNodes {
template<>
struct DenseMapInfo<AAMDNodes> {
static inline AAMDNodes getEmptyKey() {
- return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(), 0, 0);
+ return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(),
+ nullptr, nullptr);
}
static inline AAMDNodes getTombstoneKey() {
- return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(), 0, 0);
+ return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(),
+ nullptr, nullptr);
}
static unsigned getHashValue(const AAMDNodes &Val) {
return DenseMapInfo<MDNode *>::getHashValue(Val.TBAA) ^
@@ -830,10 +904,11 @@ public:
/// \brief Resolve cycles.
///
/// Once all forward declarations have been resolved, force cycles to be
- /// resolved.
+ /// resolved. If \p MDMaterialized is true, then any temporary metadata
+ /// is ignored, otherwise it asserts when encountering temporary metadata.
///
/// \pre No operands (or operands' operands, etc.) have \a isTemporary().
- void resolveCycles();
+ void resolveCycles(bool MDMaterialized = true);
/// \brief Replace a temporary node with a permanent one.
///
@@ -881,6 +956,7 @@ protected:
void storeDistinctInContext();
template <class T, class StoreT>
static T *storeImpl(T *N, StorageType Storage, StoreT &Store);
+ template <class T> static T *storeImpl(T *N, StorageType Storage);
private:
void handleChangedOperand(void *Ref, Metadata *New);
@@ -913,13 +989,13 @@ private:
N->recalculateHash();
}
template <class NodeTy>
- static void dispatchRecalculateHash(NodeTy *N, std::false_type) {}
+ static void dispatchRecalculateHash(NodeTy *, std::false_type) {}
template <class NodeTy>
static void dispatchResetHash(NodeTy *N, std::true_type) {
N->setHash(0);
}
template <class NodeTy>
- static void dispatchResetHash(NodeTy *N, std::false_type) {}
+ static void dispatchResetHash(NodeTy *, std::false_type) {}
public:
typedef const MDOperand *op_iterator;
@@ -963,6 +1039,8 @@ public:
static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);
static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B);
+ static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B);
+
};
/// \brief Tuple of metadata.
@@ -1125,7 +1203,6 @@ public:
///
/// TODO: Inherit from Metadata.
class NamedMDNode : public ilist_node<NamedMDNode> {
- friend class SymbolTableListTraits<NamedMDNode, Module>;
friend struct ilist_traits<NamedMDNode>;
friend class LLVMContextImpl;
friend class Module;
@@ -1193,7 +1270,7 @@ public:
void addOperand(MDNode *M);
void setOperand(unsigned I, MDNode *New);
StringRef getName() const;
- void print(raw_ostream &ROS) const;
+ void print(raw_ostream &ROS, bool IsForDebug = false) const;
void dump() const;
// ---------------------------------------------------------------------------
@@ -1208,13 +1285,13 @@ public:
const_op_iterator op_end() const { return const_op_iterator(this, getNumOperands()); }
inline iterator_range<op_iterator> operands() {
- return iterator_range<op_iterator>(op_begin(), op_end());
+ return make_range(op_begin(), op_end());
}
inline iterator_range<const_op_iterator> operands() const {
- return iterator_range<const_op_iterator>(op_begin(), op_end());
+ return make_range(op_begin(), op_end());
}
};
} // end llvm namespace
-#endif
+#endif // LLVM_IR_METADATA_H
diff --git a/contrib/llvm/include/llvm/IR/MetadataTracking.h b/contrib/llvm/include/llvm/IR/MetadataTracking.h
deleted file mode 100644
index 541d9b3..0000000
--- a/contrib/llvm/include/llvm/IR/MetadataTracking.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- llvm/IR/MetadataTracking.h - Metadata tracking ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Low-level functions to enable tracking of metadata that could RAUW.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_METADATATRACKING_H
-#define LLVM_IR_METADATATRACKING_H
-
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/Support/Casting.h"
-#include <type_traits>
-
-namespace llvm {
-
-class Metadata;
-class MetadataAsValue;
-
-/// \brief API for tracking metadata references through RAUW and deletion.
-///
-/// Shared API for updating \a Metadata pointers in subclasses that support
-/// RAUW.
-///
-/// This API is not meant to be used directly. See \a TrackingMDRef for a
-/// user-friendly tracking reference.
-class MetadataTracking {
-public:
- /// \brief Track the reference to metadata.
- ///
- /// Register \c MD with \c *MD, if the subclass supports tracking. If \c *MD
- /// gets RAUW'ed, \c MD will be updated to the new address. If \c *MD gets
- /// deleted, \c MD will be set to \c nullptr.
- ///
- /// If tracking isn't supported, \c *MD will not change.
- ///
- /// \return true iff tracking is supported by \c MD.
- static bool track(Metadata *&MD) {
- return track(&MD, *MD, static_cast<Metadata *>(nullptr));
- }
-
- /// \brief Track the reference to metadata for \a Metadata.
- ///
- /// As \a track(Metadata*&), but with support for calling back to \c Owner to
- /// tell it that its operand changed. This could trigger \c Owner being
- /// re-uniqued.
- static bool track(void *Ref, Metadata &MD, Metadata &Owner) {
- return track(Ref, MD, &Owner);
- }
-
- /// \brief Track the reference to metadata for \a MetadataAsValue.
- ///
- /// As \a track(Metadata*&), but with support for calling back to \c Owner to
- /// tell it that its operand changed. This could trigger \c Owner being
- /// re-uniqued.
- static bool track(void *Ref, Metadata &MD, MetadataAsValue &Owner) {
- return track(Ref, MD, &Owner);
- }
-
- /// \brief Stop tracking a reference to metadata.
- ///
- /// Stops \c *MD from tracking \c MD.
- static void untrack(Metadata *&MD) { untrack(&MD, *MD); }
- static void untrack(void *Ref, Metadata &MD);
-
- /// \brief Move tracking from one reference to another.
- ///
- /// Semantically equivalent to \c untrack(MD) followed by \c track(New),
- /// except that ownership callbacks are maintained.
- ///
- /// Note: it is an error if \c *MD does not equal \c New.
- ///
- /// \return true iff tracking is supported by \c MD.
- static bool retrack(Metadata *&MD, Metadata *&New) {
- return retrack(&MD, *MD, &New);
- }
- static bool retrack(void *Ref, Metadata &MD, void *New);
-
- /// \brief Check whether metadata is replaceable.
- static bool isReplaceable(const Metadata &MD);
-
- typedef PointerUnion<MetadataAsValue *, Metadata *> OwnerTy;
-
-private:
- /// \brief Track a reference to metadata for an owner.
- ///
- /// Generalized version of tracking.
- static bool track(void *Ref, Metadata &MD, OwnerTy Owner);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h
index 1668b95..942f685 100644
--- a/contrib/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm/include/llvm/IR/Module.h
@@ -15,6 +15,7 @@
#ifndef LLVM_IR_MODULE_H
#define LLVM_IR_MODULE_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/DataLayout.h"
@@ -34,54 +35,6 @@ class LLVMContext;
class RandomNumberGenerator;
class StructType;
-template<> struct ilist_traits<Function>
- : public SymbolTableListTraits<Function, Module> {
-
- // createSentinel is used to get hold of the node that marks the end of the
- // list... (same trick used here as in ilist_traits<Instruction>)
- Function *createSentinel() const {
- return static_cast<Function*>(&Sentinel);
- }
- static void destroySentinel(Function*) {}
-
- Function *provideInitialHead() const { return createSentinel(); }
- Function *ensureHead(Function*) const { return createSentinel(); }
- static void noteHead(Function*, Function*) {}
-
-private:
- mutable ilist_node<Function> Sentinel;
-};
-
-template<> struct ilist_traits<GlobalVariable>
- : public SymbolTableListTraits<GlobalVariable, Module> {
- // createSentinel is used to create a node that marks the end of the list.
- GlobalVariable *createSentinel() const {
- return static_cast<GlobalVariable*>(&Sentinel);
- }
- static void destroySentinel(GlobalVariable*) {}
-
- GlobalVariable *provideInitialHead() const { return createSentinel(); }
- GlobalVariable *ensureHead(GlobalVariable*) const { return createSentinel(); }
- static void noteHead(GlobalVariable*, GlobalVariable*) {}
-private:
- mutable ilist_node<GlobalVariable> Sentinel;
-};
-
-template<> struct ilist_traits<GlobalAlias>
- : public SymbolTableListTraits<GlobalAlias, Module> {
- // createSentinel is used to create a node that marks the end of the list.
- GlobalAlias *createSentinel() const {
- return static_cast<GlobalAlias*>(&Sentinel);
- }
- static void destroySentinel(GlobalAlias*) {}
-
- GlobalAlias *provideInitialHead() const { return createSentinel(); }
- GlobalAlias *ensureHead(GlobalAlias*) const { return createSentinel(); }
- static void noteHead(GlobalAlias*, GlobalAlias*) {}
-private:
- mutable ilist_node<GlobalAlias> Sentinel;
-};
-
template<> struct ilist_traits<NamedMDNode>
: public ilist_default_traits<NamedMDNode> {
// createSentinel is used to get hold of a node that marks the end of
@@ -96,6 +49,7 @@ template<> struct ilist_traits<NamedMDNode>
static void noteHead(NamedMDNode*, NamedMDNode*) {}
void addNodeToList(NamedMDNode *) {}
void removeNodeFromList(NamedMDNode *) {}
+
private:
mutable ilist_node<NamedMDNode> Sentinel;
};
@@ -116,11 +70,11 @@ class Module {
/// @{
public:
/// The type for the list of global variables.
- typedef iplist<GlobalVariable> GlobalListType;
+ typedef SymbolTableList<GlobalVariable> GlobalListType;
/// The type for the list of functions.
- typedef iplist<Function> FunctionListType;
+ typedef SymbolTableList<Function> FunctionListType;
/// The type for the list of aliases.
- typedef iplist<GlobalAlias> AliasListType;
+ typedef SymbolTableList<GlobalAlias> AliasListType;
/// The type for the list of named metadata.
typedef ilist<NamedMDNode> NamedMDListType;
/// The type of the comdat "symbol" table.
@@ -328,6 +282,11 @@ public:
/// registered in this LLVMContext.
void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
+ /// Populate client supplied SmallVector with the bundle tags registered in
+ /// this LLVMContext. The bundle tags are ordered by increasing bundle IDs.
+ /// \see LLVMContext::getOperandBundleTagID
+ void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;
+
/// Return the type with the specified name, or null if there is none by that
/// name.
StructType *getTypeByName(StringRef Name) const;
@@ -472,7 +431,7 @@ public:
/// Sets the GVMaterializer to GVM. This module must not yet have a
/// Materializer. To reset the materializer for a module that already has one,
- /// call MaterializeAllPermanently first. Destroying this module will destroy
+ /// call materializeAll first. Destroying this module will destroy
/// its materializer without materializing any more GlobalValues. Without
/// destroying the Module, there is no way to detach or destroy a materializer
/// without materializing all the GVs it controls, to avoid leaving orphan
@@ -480,27 +439,16 @@ public:
void setMaterializer(GVMaterializer *GVM);
/// Retrieves the GVMaterializer, if any, for this Module.
GVMaterializer *getMaterializer() const { return Materializer.get(); }
-
- /// Returns true if this GV was loaded from this Module's GVMaterializer and
- /// the GVMaterializer knows how to dematerialize the GV.
- bool isDematerializable(const GlobalValue *GV) const;
+ bool isMaterialized() const { return !getMaterializer(); }
/// Make sure the GlobalValue is fully read. If the module is corrupt, this
/// returns true and fills in the optional string with information about the
/// problem. If successful, this returns false.
std::error_code materialize(GlobalValue *GV);
- /// If the GlobalValue is read in, and if the GVMaterializer supports it,
- /// release the memory for the function, and set it up to be materialized
- /// lazily. If !isDematerializable(), this method is a no-op.
- void dematerialize(GlobalValue *GV);
-
- /// Make sure all GlobalValues in this Module are fully read.
- std::error_code materializeAll();
/// Make sure all GlobalValues in this Module are fully read and clear the
- /// Materializer. If the module is corrupt, this DOES NOT clear the old
/// Materializer.
- std::error_code materializeAllPermanently();
+ std::error_code materializeAll();
std::error_code materializeMetadata();
@@ -556,10 +504,10 @@ public:
bool global_empty() const { return GlobalList.empty(); }
iterator_range<global_iterator> globals() {
- return iterator_range<global_iterator>(global_begin(), global_end());
+ return make_range(global_begin(), global_end());
}
iterator_range<const_global_iterator> globals() const {
- return iterator_range<const_global_iterator>(global_begin(), global_end());
+ return make_range(global_begin(), global_end());
}
/// @}
@@ -578,10 +526,10 @@ public:
bool empty() const { return FunctionList.empty(); }
iterator_range<iterator> functions() {
- return iterator_range<iterator>(begin(), end());
+ return make_range(begin(), end());
}
iterator_range<const_iterator> functions() const {
- return iterator_range<const_iterator>(begin(), end());
+ return make_range(begin(), end());
}
/// @}
@@ -596,10 +544,10 @@ public:
bool alias_empty() const { return AliasList.empty(); }
iterator_range<alias_iterator> aliases() {
- return iterator_range<alias_iterator>(alias_begin(), alias_end());
+ return make_range(alias_begin(), alias_end());
}
iterator_range<const_alias_iterator> aliases() const {
- return iterator_range<const_alias_iterator>(alias_begin(), alias_end());
+ return make_range(alias_begin(), alias_end());
}
/// @}
@@ -620,12 +568,10 @@ public:
bool named_metadata_empty() const { return NamedMDList.empty(); }
iterator_range<named_metadata_iterator> named_metadata() {
- return iterator_range<named_metadata_iterator>(named_metadata_begin(),
- named_metadata_end());
+ return make_range(named_metadata_begin(), named_metadata_end());
}
iterator_range<const_named_metadata_iterator> named_metadata() const {
- return iterator_range<const_named_metadata_iterator>(named_metadata_begin(),
- named_metadata_end());
+ return make_range(named_metadata_begin(), named_metadata_end());
}
/// Destroy ConstantArrays in LLVMContext if they are not used.
@@ -646,11 +592,12 @@ public:
/// uselistorder directives so that use-lists can be recreated when reading
/// the assembly.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder = false) const;
+ bool ShouldPreserveUseListOrder = false,
+ bool IsForDebug = false) const;
/// Dump the module to stderr (for debugging).
void dump() const;
-
+
/// This function causes all the subinstructions to "let go" of all references
/// that they are maintaining. This allows one to 'delete' a whole class at
/// a time, even though there may be circular references... first all
@@ -666,6 +613,10 @@ public:
/// \brief Returns the Dwarf Version by checking module flags.
unsigned getDwarfVersion() const;
+ /// \brief Returns the CodeView Version by checking module flags.
+ /// Returns zero if not present in module.
+ unsigned getCodeViewFlag() const;
+
/// @}
/// @name Utility functions for querying and setting PIC level
/// @{
@@ -676,6 +627,16 @@ public:
/// \brief Set the PIC level (small or large model)
void setPICLevel(PICLevel::Level PL);
/// @}
+
+ /// @name Utility functions for querying and setting PGO counts
+ /// @{
+
+ /// \brief Set maximum function count in PGO mode
+ void setMaximumFunctionCount(uint64_t);
+
+ /// \brief Returns maximum function count in PGO mode
+ Optional<uint64_t> getMaximumFunctionCount();
+ /// @}
};
/// An raw_ostream inserter for modules.
@@ -693,7 +654,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module, LLVMModuleRef)
inline Module *unwrap(LLVMModuleProviderRef MP) {
return reinterpret_cast<Module*>(MP);
}
-
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/IR/ModuleSlotTracker.h b/contrib/llvm/include/llvm/IR/ModuleSlotTracker.h
index c37dcec..49730a6 100644
--- a/contrib/llvm/include/llvm/IR/ModuleSlotTracker.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSlotTracker.h
@@ -17,6 +17,7 @@ namespace llvm {
class Module;
class Function;
class SlotTracker;
+class Value;
/// Manage lifetime of a slot tracker for printing IR.
///
@@ -61,6 +62,13 @@ public:
/// Purge the currently incorporated function and incorporate \c F. If \c F
/// is currently incorporated, this is a no-op.
void incorporateFunction(const Function &F);
+
+ /// Return the slot number of the specified local value.
+ ///
+ /// A function that defines this value should be incorporated prior to calling
+ /// this method.
+ /// Return -1 if the value is not in the function's SlotTracker.
+ int getLocalSlot(const Value *V);
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/IR/PassManager.h b/contrib/llvm/include/llvm/IR/PassManager.h
index 4166bab..2ceb53d 100644
--- a/contrib/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm/include/llvm/IR/PassManager.h
@@ -203,7 +203,8 @@ public:
for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
if (DebugLogging)
- dbgs() << "Running pass: " << Passes[Idx]->name() << "\n";
+ dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
+ << IR.getName() << "\n";
PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM);
diff --git a/contrib/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm/include/llvm/IR/PatternMatch.h
index 41154e6..f4d7d8c 100644
--- a/contrib/llvm/include/llvm/IR/PatternMatch.h
+++ b/contrib/llvm/include/llvm/IR/PatternMatch.h
@@ -1272,6 +1272,46 @@ inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
}
+template <typename Opnd_t> struct Signum_match {
+ Opnd_t Val;
+ Signum_match(const Opnd_t &V) : Val(V) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ unsigned TypeSize = V->getType()->getScalarSizeInBits();
+ if (TypeSize == 0)
+ return false;
+
+ unsigned ShiftWidth = TypeSize - 1;
+ Value *OpL = nullptr, *OpR = nullptr;
+
+ // This is the representation of signum we match:
+ //
+ // signum(x) == (x >> 63) | (-x >>u 63)
+ //
+ // An i1 value is its own signum, so it's correct to match
+ //
+ // signum(x) == (x >> 0) | (-x >>u 0)
+ //
+ // for i1 values.
+
+ auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
+ auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
+ auto Signum = m_Or(LHS, RHS);
+
+ return Signum.match(V) && OpL == OpR && Val.match(OpL);
+ }
+};
+
+/// \brief Matches a signum pattern.
+///
+/// signum(x) =
+/// x > 0 -> 1
+/// x == 0 -> 0
+/// x < 0 -> -1
+template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
+ return Signum_match<Val_t>(V);
+}
+
} // end namespace PatternMatch
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm/include/llvm/IR/Statepoint.h
index 4ab1f84..7310c56 100644
--- a/contrib/llvm/include/llvm/IR/Statepoint.h
+++ b/contrib/llvm/include/llvm/IR/Statepoint.h
@@ -173,7 +173,7 @@ public:
/// range adapter for call arguments
iterator_range<arg_iterator> call_args() const {
- return iterator_range<arg_iterator>(arg_begin(), arg_end());
+ return make_range(arg_begin(), arg_end());
}
/// \brief Return true if the call or the callee has the given attribute.
@@ -201,8 +201,7 @@ public:
/// range adapter for GC transition arguments
iterator_range<arg_iterator> gc_transition_args() const {
- return iterator_range<arg_iterator>(gc_transition_args_begin(),
- gc_transition_args_end());
+ return make_range(gc_transition_args_begin(), gc_transition_args_end());
}
/// Number of additional arguments excluding those intended
@@ -225,7 +224,7 @@ public:
/// range adapter for vm state arguments
iterator_range<arg_iterator> vm_state_args() const {
- return iterator_range<arg_iterator>(vm_state_begin(), vm_state_end());
+ return make_range(vm_state_begin(), vm_state_end());
}
typename CallSiteTy::arg_iterator gc_args_begin() const {
@@ -235,9 +234,13 @@ public:
return getCallSite().arg_end();
}
+ unsigned gcArgsStartIdx() const {
+ return gc_args_begin() - getInstruction()->op_begin();
+ }
+
/// range adapter for gc arguments
iterator_range<arg_iterator> gc_args() const {
- return iterator_range<arg_iterator>(gc_args_begin(), gc_args_end());
+ return make_range(gc_args_begin(), gc_args_end());
}
/// Get list of all gc reloactes linked to this statepoint
@@ -320,7 +323,7 @@ public:
bool isTiedToInvoke() const {
const Value *Token = RelocateCS.getArgument(0);
- return isa<ExtractValueInst>(Token) || isa<InvokeInst>(Token);
+ return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token);
}
/// Get enclosed relocate intrinsic
@@ -332,7 +335,7 @@ public:
// This takes care both of relocates for call statepoints and relocates
// on normal path of invoke statepoint.
- if (!isa<ExtractValueInst>(Token)) {
+ if (!isa<LandingPadInst>(Token)) {
return cast<Instruction>(Token);
}
@@ -396,16 +399,10 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
LandingPadInst *LandingPad =
cast<InvokeInst>(getInstruction())->getLandingPadInst();
- // Search for extract value from landingpad instruction to which
- // gc relocates will be attached
+ // Search for gc relocates that are attached to this landingpad.
for (const User *LandingPadUser : LandingPad->users()) {
- if (!isa<ExtractValueInst>(LandingPadUser))
- continue;
-
- // gc relocates should be attached to this extract value
- for (const User *U : LandingPadUser->users())
- if (isGCRelocate(U))
- Result.push_back(GCRelocateOperands(U));
+ if (isGCRelocate(LandingPadUser))
+ Result.push_back(GCRelocateOperands(LandingPadUser));
}
return Result;
}
diff --git a/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h b/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
index 0a5149c..5fc48d1 100644
--- a/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
+++ b/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
@@ -29,31 +29,66 @@
namespace llvm {
class ValueSymbolTable;
-
-template<typename NodeTy> class ilist_iterator;
-template<typename NodeTy, typename Traits> class iplist;
-template<typename Ty> struct ilist_traits;
+
+template <typename NodeTy> class ilist_iterator;
+template <typename NodeTy, typename Traits> class iplist;
+template <typename Ty> struct ilist_traits;
+
+template <typename NodeTy>
+struct SymbolTableListSentinelTraits
+ : public ilist_embedded_sentinel_traits<NodeTy> {};
+
+/// Template metafunction to get the parent type for a symbol table list.
+///
+/// Implementations create a typedef called \c type so that we only need a
+/// single template parameter for the list and traits.
+template <typename NodeTy> struct SymbolTableListParentType {};
+class Argument;
+class BasicBlock;
+class Function;
+class Instruction;
+class GlobalVariable;
+class GlobalAlias;
+class Module;
+#define DEFINE_SYMBOL_TABLE_PARENT_TYPE(NODE, PARENT) \
+ template <> struct SymbolTableListParentType<NODE> { typedef PARENT type; };
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(Instruction, BasicBlock)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(BasicBlock, Function)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(Argument, Function)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(Function, Module)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalVariable, Module)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalAlias, Module)
+#undef DEFINE_SYMBOL_TABLE_PARENT_TYPE
+
+template <typename NodeTy> class SymbolTableList;
// ValueSubClass - The type of objects that I hold, e.g. Instruction.
// ItemParentClass - The type of object that owns the list, e.g. BasicBlock.
//
-template<typename ValueSubClass, typename ItemParentClass>
-class SymbolTableListTraits : public ilist_default_traits<ValueSubClass> {
- typedef ilist_traits<ValueSubClass> TraitsClass;
+template <typename ValueSubClass>
+class SymbolTableListTraits
+ : public ilist_nextprev_traits<ValueSubClass>,
+ public SymbolTableListSentinelTraits<ValueSubClass>,
+ public ilist_node_traits<ValueSubClass> {
+ typedef SymbolTableList<ValueSubClass> ListTy;
+ typedef
+ typename SymbolTableListParentType<ValueSubClass>::type ItemParentClass;
+
public:
SymbolTableListTraits() {}
+private:
/// getListOwner - Return the object that owns this list. If this is a list
/// of instructions, it returns the BasicBlock that owns them.
ItemParentClass *getListOwner() {
size_t Offset(size_t(&((ItemParentClass*)nullptr->*ItemParentClass::
getSublistAccess(static_cast<ValueSubClass*>(nullptr)))));
- iplist<ValueSubClass>* Anchor(static_cast<iplist<ValueSubClass>*>(this));
+ ListTy *Anchor(static_cast<ListTy *>(this));
return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)-
Offset);
}
- static iplist<ValueSubClass> &getList(ItemParentClass *Par) {
+ static ListTy &getList(ItemParentClass *Par) {
return Par->*(Par->getSublistAccess((ValueSubClass*)nullptr));
}
@@ -61,9 +96,10 @@ public:
return Par ? toPtr(Par->getValueSymbolTable()) : nullptr;
}
+public:
void addNodeToList(ValueSubClass *V);
void removeNodeFromList(ValueSubClass *V);
- void transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+ void transferNodesFromList(SymbolTableListTraits &L2,
ilist_iterator<ValueSubClass> first,
ilist_iterator<ValueSubClass> last);
//private:
@@ -73,6 +109,14 @@ public:
static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
};
+/// List that automatically updates parent links and symbol tables.
+///
+/// When nodes are inserted into and removed from this list, the associated
+/// symbol table will be automatically updated. Similarly, parent links get
+/// updated automatically.
+template <typename NodeTy>
+class SymbolTableList : public iplist<NodeTy, SymbolTableListTraits<NodeTy>> {};
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/IR/TrackingMDRef.h b/contrib/llvm/include/llvm/IR/TrackingMDRef.h
index e241121..97efaff 100644
--- a/contrib/llvm/include/llvm/IR/TrackingMDRef.h
+++ b/contrib/llvm/include/llvm/IR/TrackingMDRef.h
@@ -14,15 +14,11 @@
#ifndef LLVM_IR_TRACKINGMDREF_H
#define LLVM_IR_TRACKINGMDREF_H
-#include "llvm/IR/MetadataTracking.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
namespace llvm {
-class Metadata;
-class MDNode;
-class ValueAsMetadata;
-
/// \brief Tracking metadata reference.
///
/// This class behaves like \a TrackingVH, but for metadata.
diff --git a/contrib/llvm/include/llvm/IR/Type.h b/contrib/llvm/include/llvm/IR/Type.h
index 6ab0bd0..b2920dd 100644
--- a/contrib/llvm/include/llvm/IR/Type.h
+++ b/contrib/llvm/include/llvm/IR/Type.h
@@ -15,7 +15,6 @@
#ifndef LLVM_IR_TYPE_H
#define LLVM_IR_TYPE_H
-#include "llvm-c/Core.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CBindingWrapping.h"
@@ -38,10 +37,10 @@ template<class GraphType> struct GraphTraits;
/// they are never changed. Also note that only one instance of a particular
/// type is ever created. Thus seeing if two types are equal is a matter of
/// doing a trivial pointer comparison. To enforce that no two equal instances
-/// are created, Type instances can only be created via static factory methods
+/// are created, Type instances can only be created via static factory methods
/// in class Type and in derived classes. Once allocated, Types are never
/// free'd.
-///
+///
class Type {
public:
//===--------------------------------------------------------------------===//
@@ -63,45 +62,36 @@ public:
LabelTyID, ///< 7: Labels
MetadataTyID, ///< 8: Metadata
X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific)
+ TokenTyID, ///< 10: Tokens
// Derived types... see DerivedTypes.h file.
// Make sure FirstDerivedTyID stays up to date!
- IntegerTyID, ///< 10: Arbitrary bit width integers
- FunctionTyID, ///< 11: Functions
- StructTyID, ///< 12: Structures
- ArrayTyID, ///< 13: Arrays
- PointerTyID, ///< 14: Pointers
- VectorTyID ///< 15: SIMD 'packed' format, or other vector type
+ IntegerTyID, ///< 11: Arbitrary bit width integers
+ FunctionTyID, ///< 12: Functions
+ StructTyID, ///< 13: Structures
+ ArrayTyID, ///< 14: Arrays
+ PointerTyID, ///< 15: Pointers
+ VectorTyID ///< 16: SIMD 'packed' format, or other vector type
};
private:
/// Context - This refers to the LLVMContext in which this type was uniqued.
LLVMContext &Context;
- // Due to Ubuntu GCC bug 910363:
- // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363
- // Bitpack ID and SubclassData manually.
- // Note: TypeID : low 8 bit; SubclassData : high 24 bit.
- uint32_t IDAndSubclassData;
+ TypeID ID : 8; // The current base type of this type.
+ unsigned SubclassData : 24; // Space for subclasses to store data.
protected:
friend class LLVMContextImpl;
explicit Type(LLVMContext &C, TypeID tid)
- : Context(C), IDAndSubclassData(0),
- NumContainedTys(0), ContainedTys(nullptr) {
- setTypeID(tid);
- }
+ : Context(C), ID(tid), SubclassData(0),
+ NumContainedTys(0), ContainedTys(nullptr) {}
~Type() = default;
- void setTypeID(TypeID ID) {
- IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00);
- assert(getTypeID() == ID && "TypeID data too large for field");
- }
-
- unsigned getSubclassData() const { return IDAndSubclassData >> 8; }
-
+ unsigned getSubclassData() const { return SubclassData; }
+
void setSubclassData(unsigned val) {
- IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8);
+ SubclassData = val;
// Ensure we don't have any accidental truncation.
assert(getSubclassData() == val && "Subclass data too large for field");
}
@@ -118,7 +108,7 @@ protected:
Type * const *ContainedTys;
public:
- void print(raw_ostream &O) const;
+ void print(raw_ostream &O, bool IsForDebug = false) const;
void dump() const;
/// getContext - Return the LLVMContext in which this type was uniqued.
@@ -131,7 +121,7 @@ public:
/// getTypeID - Return the type id for the type. This will return one
/// of the TypeID enum elements defined above.
///
- TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); }
+ TypeID getTypeID() const { return ID; }
/// isVoidTy - Return true if this is 'void'.
bool isVoidTy() const { return getTypeID() == VoidTyID; }
@@ -141,7 +131,7 @@ public:
/// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
bool isFloatTy() const { return getTypeID() == FloatTyID; }
-
+
/// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
@@ -181,16 +171,19 @@ public:
/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
///
bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
-
+
/// isLabelTy - Return true if this is 'label'.
bool isLabelTy() const { return getTypeID() == LabelTyID; }
/// isMetadataTy - Return true if this is 'metadata'.
bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
+ /// isTokenTy - Return true if this is 'token'.
+ bool isTokenTy() const { return getTypeID() == TokenTyID; }
+
/// isIntegerTy - True if this is an instance of IntegerType.
///
- bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
+ bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
/// isIntegerTy - Return true if this is an IntegerType of the given width.
bool isIntegerTy(unsigned Bitwidth) const;
@@ -199,7 +192,7 @@ public:
/// integer types.
///
bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
-
+
/// isFunctionTy - True if this is an instance of FunctionType.
///
bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
@@ -220,14 +213,14 @@ public:
/// pointer types.
///
bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
-
+
/// isVectorTy - True if this is an instance of VectorType.
///
bool isVectorTy() const { return getTypeID() == VectorTyID; }
- /// canLosslesslyBitCastTo - Return true if this type could be converted
- /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts
- /// are valid for types of the same size only where no re-interpretation of
+ /// canLosslesslyBitCastTo - Return true if this type could be converted
+ /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts
+ /// are valid for types of the same size only where no re-interpretation of
/// the bits is done.
/// @brief Determine if this type could be losslessly bitcast to Ty
bool canLosslesslyBitCastTo(Type *Ty) const;
@@ -265,7 +258,7 @@ public:
/// get the actual size for a particular target, it is reasonable to use the
/// DataLayout subsystem to do this.
///
- bool isSized(SmallPtrSetImpl<const Type*> *Visited = nullptr) const {
+ bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
// If it's a primitive, it is always sized.
if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
getTypeID() == PointerTyID ||
@@ -304,8 +297,7 @@ public:
/// getScalarType - If this is a vector type, return the element type,
/// otherwise return 'this'.
- const Type *getScalarType() const LLVM_READONLY;
- Type *getScalarType() LLVM_READONLY;
+ Type *getScalarType() const LLVM_READONLY;
//===--------------------------------------------------------------------===//
// Type Iteration support.
@@ -344,30 +336,30 @@ public:
// example) is shorthand for cast<VectorType>(Ty)->getNumElements(). This is
// only intended to cover the core methods that are frequently used, helper
// methods should not be added here.
-
- unsigned getIntegerBitWidth() const;
-
- Type *getFunctionParamType(unsigned i) const;
- unsigned getFunctionNumParams() const;
- bool isFunctionVarArg() const;
-
- StringRef getStructName() const;
- unsigned getStructNumElements() const;
- Type *getStructElementType(unsigned N) const;
-
- Type *getSequentialElementType() const;
-
- uint64_t getArrayNumElements() const;
+
+ inline unsigned getIntegerBitWidth() const;
+
+ inline Type *getFunctionParamType(unsigned i) const;
+ inline unsigned getFunctionNumParams() const;
+ inline bool isFunctionVarArg() const;
+
+ inline StringRef getStructName() const;
+ inline unsigned getStructNumElements() const;
+ inline Type *getStructElementType(unsigned N) const;
+
+ inline Type *getSequentialElementType() const;
+
+ inline uint64_t getArrayNumElements() const;
Type *getArrayElementType() const { return getSequentialElementType(); }
- unsigned getVectorNumElements() const;
+ inline unsigned getVectorNumElements() const;
Type *getVectorElementType() const { return getSequentialElementType(); }
Type *getPointerElementType() const { return getSequentialElementType(); }
/// \brief Get the address space of this pointer or pointer vector type.
- unsigned getPointerAddressSpace() const;
-
+ inline unsigned getPointerAddressSpace() const;
+
//===--------------------------------------------------------------------===//
// Static members exported by the Type class itself. Useful for getting
// instances of Type.
@@ -389,6 +381,7 @@ public:
static Type *getFP128Ty(LLVMContext &C);
static Type *getPPC_FP128Ty(LLVMContext &C);
static Type *getX86_MMXTy(LLVMContext &C);
+ static Type *getTokenTy(LLVMContext &C);
static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
static IntegerType *getInt1Ty(LLVMContext &C);
static IntegerType *getInt8Ty(LLVMContext &C);
@@ -396,7 +389,7 @@ public:
static IntegerType *getInt32Ty(LLVMContext &C);
static IntegerType *getInt64Ty(LLVMContext &C);
static IntegerType *getInt128Ty(LLVMContext &C);
-
+
//===--------------------------------------------------------------------===//
// Convenience methods for getting pointer types with one of the above builtin
// types as pointee.
@@ -417,13 +410,13 @@ public:
/// getPointerTo - Return a pointer to the current type. This is equivalent
/// to PointerType::get(Foo, AddrSpace).
- PointerType *getPointerTo(unsigned AddrSpace = 0);
+ PointerType *getPointerTo(unsigned AddrSpace = 0) const;
private:
/// isSizedDerivedType - Derived types like structures and arrays are sized
/// iff all of the members of the type are sized as well. Since asking for
/// their size is relatively uncommon, move this operation out of line.
- bool isSizedDerivedType(SmallPtrSetImpl<const Type*> *Visited = nullptr) const;
+ bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
};
// Printing of types.
@@ -439,13 +432,11 @@ template <> struct isa_impl<PointerType, Type> {
}
};
-
//===----------------------------------------------------------------------===//
// Provide specializations of GraphTraits to be able to treat a type as a
// graph of sub types.
-
-template <> struct GraphTraits<Type*> {
+template <> struct GraphTraits<Type *> {
typedef Type NodeType;
typedef Type::subtype_iterator ChildIteratorType;
@@ -483,7 +474,7 @@ inline Type **unwrap(LLVMTypeRef* Tys) {
inline LLVMTypeRef *wrap(Type **Tys) {
return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
}
-
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/IR/TypeFinder.h b/contrib/llvm/include/llvm/IR/TypeFinder.h
index 73a63ad..5f38543 100644
--- a/contrib/llvm/include/llvm/IR/TypeFinder.h
+++ b/contrib/llvm/include/llvm/IR/TypeFinder.h
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the declaration of the TypeFinder class.
+// This file contains the declaration of the TypeFinder class.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/Use.h b/contrib/llvm/include/llvm/IR/Use.h
index 160d71b..a738677 100644
--- a/contrib/llvm/include/llvm/IR/Use.h
+++ b/contrib/llvm/include/llvm/IR/Use.h
@@ -25,7 +25,6 @@
#ifndef LLVM_IR_USE_H
#define LLVM_IR_USE_H
-#include "llvm-c/Core.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm/include/llvm/IR/UseListOrder.h b/contrib/llvm/include/llvm/IR/UseListOrder.h
index b7c2418..1cabf03 100644
--- a/contrib/llvm/include/llvm/IR/UseListOrder.h
+++ b/contrib/llvm/include/llvm/IR/UseListOrder.h
@@ -34,7 +34,7 @@ struct UseListOrder {
UseListOrder(const Value *V, const Function *F, size_t ShuffleSize)
: V(V), F(F), Shuffle(ShuffleSize) {}
- UseListOrder() : V(0), F(0) {}
+ UseListOrder() : V(nullptr), F(nullptr) {}
UseListOrder(UseListOrder &&X)
: V(X.V), F(X.F), Shuffle(std::move(X.Shuffle)) {}
UseListOrder &operator=(UseListOrder &&X) {
@@ -53,4 +53,4 @@ typedef std::vector<UseListOrder> UseListOrderStack;
} // end namespace llvm
-#endif
+#endif // LLVM_IR_USELISTORDER_H
diff --git a/contrib/llvm/include/llvm/IR/User.h b/contrib/llvm/include/llvm/IR/User.h
index 93614fa..885ae19 100644
--- a/contrib/llvm/include/llvm/IR/User.h
+++ b/contrib/llvm/include/llvm/IR/User.h
@@ -19,6 +19,7 @@
#ifndef LLVM_IR_USER_H
#define LLVM_IR_USER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Value.h"
@@ -39,6 +40,9 @@ class User : public Value {
friend struct HungoffOperandTraits;
virtual void anchor();
+ LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void *
+ allocateFixedOperandUser(size_t, unsigned, unsigned);
+
protected:
/// Allocate a User with an operand pointer co-allocated.
///
@@ -51,7 +55,17 @@ protected:
/// This is used for subclasses which have a fixed number of operands.
void *operator new(size_t Size, unsigned Us);
- User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
+ /// Allocate a User with the operands co-allocated. If DescBytes is non-zero
+ /// then allocate an additional DescBytes bytes before the operands. These
+ /// bytes can be accessed by calling getDescriptor.
+ ///
+ /// DescBytes needs to be divisible by sizeof(void *). The allocated
+ /// descriptor, if any, is aligned to sizeof(void *) bytes.
+ ///
+ /// This is used for subclasses which have a fixed number of operands.
+ void *operator new(size_t Size, unsigned Us, unsigned DescBytes);
+
+ User(Type *ty, unsigned vty, Use *, unsigned NumOps)
: Value(ty, vty) {
assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands");
NumUserOperands = NumOps;
@@ -137,6 +151,12 @@ public:
unsigned getNumOperands() const { return NumUserOperands; }
+ /// Returns the descriptor co-allocated with this User instance.
+ ArrayRef<const uint8_t> getDescriptor() const;
+
+ /// Returns the descriptor co-allocated with this User instance.
+ MutableArrayRef<uint8_t> getDescriptor();
+
/// Set the number of operands on a GlobalVariable.
///
/// GlobalVariable always allocates space for a single operands, but
@@ -150,19 +170,6 @@ public:
NumUserOperands = NumOps;
}
- /// Set the number of operands on a Function.
- ///
- /// Function always allocates space for a single operands, but
- /// doesn't always use it.
- ///
- /// FIXME: As that the number of operands is used to find the start of
- /// the allocated memory in operator delete, we need to always think we have
- /// 1 operand before delete.
- void setFunctionNumOperands(unsigned NumOps) {
- assert(NumOps <= 1 && "Function can only have 0 or 1 operands");
- NumUserOperands = NumOps;
- }
-
/// \brief Subclasses with hung off uses need to manage the operand count
/// themselves. In these instances, the operand count isn't used to find the
/// OperandList, so there's no issue in having the operand count change.
@@ -213,7 +220,7 @@ public:
return value_op_iterator(op_end());
}
iterator_range<value_op_iterator> operand_values() {
- return iterator_range<value_op_iterator>(value_op_begin(), value_op_end());
+ return make_range(value_op_begin(), value_op_end());
}
/// \brief Drop all references to operands.
diff --git a/contrib/llvm/include/llvm/IR/Value.def b/contrib/llvm/include/llvm/IR/Value.def
index c2a0639..4c5d452 100644
--- a/contrib/llvm/include/llvm/IR/Value.def
+++ b/contrib/llvm/include/llvm/IR/Value.def
@@ -70,6 +70,7 @@ HANDLE_CONSTANT(ConstantArray)
HANDLE_CONSTANT(ConstantStruct)
HANDLE_CONSTANT(ConstantVector)
HANDLE_CONSTANT(ConstantPointerNull)
+HANDLE_CONSTANT(ConstantTokenNone)
HANDLE_METADATA_VALUE(MetadataAsValue)
HANDLE_INLINE_ASM_VALUE(InlineAsm)
@@ -79,7 +80,7 @@ HANDLE_INSTRUCTION(Instruction)
// don't add new values here!
HANDLE_CONSTANT_MARKER(ConstantFirstVal, Function)
-HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantPointerNull)
+HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantTokenNone)
#undef HANDLE_GLOBAL_VALUE
#undef HANDLE_CONSTANT
diff --git a/contrib/llvm/include/llvm/IR/Value.h b/contrib/llvm/include/llvm/IR/Value.h
index 17a80c8..bb7ff27 100644
--- a/contrib/llvm/include/llvm/IR/Value.h
+++ b/contrib/llvm/include/llvm/IR/Value.h
@@ -14,7 +14,6 @@
#ifndef LLVM_IR_VALUE_H
#define LLVM_IR_VALUE_H
-#include "llvm-c/Core.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Use.h"
#include "llvm/Support/CBindingWrapping.h"
@@ -104,12 +103,13 @@ protected:
///
/// Note, this should *NOT* be used directly by any class other than User.
/// User uses this value to find the Use list.
- enum : unsigned { NumUserOperandsBits = 29 };
+ enum : unsigned { NumUserOperandsBits = 28 };
unsigned NumUserOperands : NumUserOperandsBits;
bool IsUsedByMD : 1;
bool HasName : 1;
bool HasHungOffUses : 1;
+ bool HasDescriptor : 1;
private:
template <typename UseT> // UseT == 'Use' or 'const Use'
@@ -201,8 +201,9 @@ public:
/// \brief Implement operator<< on Value.
/// @{
- void print(raw_ostream &O) const;
- void print(raw_ostream &O, ModuleSlotTracker &MST) const;
+ void print(raw_ostream &O, bool IsForDebug = false) const;
+ void print(raw_ostream &O, ModuleSlotTracker &MST,
+ bool IsForDebug = false) const;
/// @}
/// \brief Print the name of this Value out to the specified raw_ostream.
@@ -272,36 +273,91 @@ public:
//----------------------------------------------------------------------
// Methods for handling the chain of uses of this Value.
//
- bool use_empty() const { return UseList == nullptr; }
+ // Materializing a function can introduce new uses, so these methods come in
+ // two variants:
+ // The methods that start with materialized_ check the uses that are
+ // currently known given which functions are materialized. Be very careful
+ // when using them since you might not get all uses.
+ // The methods that don't start with materialized_ assert that modules is
+ // fully materialized.
+#ifdef NDEBUG
+ void assertModuleIsMaterialized() const {}
+#else
+ void assertModuleIsMaterialized() const;
+#endif
+
+ bool use_empty() const {
+ assertModuleIsMaterialized();
+ return UseList == nullptr;
+ }
- typedef use_iterator_impl<Use> use_iterator;
+ typedef use_iterator_impl<Use> use_iterator;
typedef use_iterator_impl<const Use> const_use_iterator;
- use_iterator use_begin() { return use_iterator(UseList); }
- const_use_iterator use_begin() const { return const_use_iterator(UseList); }
- use_iterator use_end() { return use_iterator(); }
- const_use_iterator use_end() const { return const_use_iterator(); }
+ use_iterator materialized_use_begin() { return use_iterator(UseList); }
+ const_use_iterator materialized_use_begin() const {
+ return const_use_iterator(UseList);
+ }
+ use_iterator use_begin() {
+ assertModuleIsMaterialized();
+ return materialized_use_begin();
+ }
+ const_use_iterator use_begin() const {
+ assertModuleIsMaterialized();
+ return materialized_use_begin();
+ }
+ use_iterator use_end() { return use_iterator(); }
+ const_use_iterator use_end() const { return const_use_iterator(); }
+ iterator_range<use_iterator> materialized_uses() {
+ return make_range(materialized_use_begin(), use_end());
+ }
+ iterator_range<const_use_iterator> materialized_uses() const {
+ return make_range(materialized_use_begin(), use_end());
+ }
iterator_range<use_iterator> uses() {
- return iterator_range<use_iterator>(use_begin(), use_end());
+ assertModuleIsMaterialized();
+ return materialized_uses();
}
iterator_range<const_use_iterator> uses() const {
- return iterator_range<const_use_iterator>(use_begin(), use_end());
+ assertModuleIsMaterialized();
+ return materialized_uses();
}
- bool user_empty() const { return UseList == nullptr; }
+ bool user_empty() const {
+ assertModuleIsMaterialized();
+ return UseList == nullptr;
+ }
- typedef user_iterator_impl<User> user_iterator;
+ typedef user_iterator_impl<User> user_iterator;
typedef user_iterator_impl<const User> const_user_iterator;
- user_iterator user_begin() { return user_iterator(UseList); }
- const_user_iterator user_begin() const { return const_user_iterator(UseList); }
- user_iterator user_end() { return user_iterator(); }
- const_user_iterator user_end() const { return const_user_iterator(); }
- User *user_back() { return *user_begin(); }
- const User *user_back() const { return *user_begin(); }
+ user_iterator materialized_user_begin() { return user_iterator(UseList); }
+ const_user_iterator materialized_user_begin() const {
+ return const_user_iterator(UseList);
+ }
+ user_iterator user_begin() {
+ assertModuleIsMaterialized();
+ return materialized_user_begin();
+ }
+ const_user_iterator user_begin() const {
+ assertModuleIsMaterialized();
+ return materialized_user_begin();
+ }
+ user_iterator user_end() { return user_iterator(); }
+ const_user_iterator user_end() const { return const_user_iterator(); }
+ User *user_back() {
+ assertModuleIsMaterialized();
+ return *materialized_user_begin();
+ }
+ const User *user_back() const {
+ assertModuleIsMaterialized();
+ return *materialized_user_begin();
+ }
iterator_range<user_iterator> users() {
- return iterator_range<user_iterator>(user_begin(), user_end());
+ assertModuleIsMaterialized();
+ return make_range(materialized_user_begin(), user_end());
}
iterator_range<const_user_iterator> users() const {
- return iterator_range<const_user_iterator>(user_begin(), user_end());
+ assertModuleIsMaterialized();
+ return make_range(materialized_user_begin(), user_end());
}
/// \brief Return true if there is exactly one user of this value.
@@ -493,7 +549,28 @@ private:
template <class Compare>
static Use *mergeUseLists(Use *L, Use *R, Compare Cmp) {
Use *Merged;
- mergeUseListsImpl(L, R, &Merged, Cmp);
+ Use **Next = &Merged;
+
+ for (;;) {
+ if (!L) {
+ *Next = R;
+ break;
+ }
+ if (!R) {
+ *Next = L;
+ break;
+ }
+ if (Cmp(*R, *L)) {
+ *Next = R;
+ Next = &R->Next;
+ R = R->Next;
+ } else {
+ *Next = L;
+ Next = &L->Next;
+ L = L->Next;
+ }
+ }
+
return Merged;
}
@@ -586,25 +663,6 @@ template <class Compare> void Value::sortUseList(Compare Cmp) {
}
}
-template <class Compare>
-void Value::mergeUseListsImpl(Use *L, Use *R, Use **Next, Compare Cmp) {
- if (!L) {
- *Next = R;
- return;
- }
- if (!R) {
- *Next = L;
- return;
- }
- if (Cmp(*R, *L)) {
- *Next = R;
- mergeUseListsImpl(L, R->Next, &R->Next, Cmp);
- return;
- }
- *Next = L;
- mergeUseListsImpl(L->Next, R, &L->Next, Cmp);
-}
-
// isa - Provide some specializations of isa so that we don't have to include
// the subtype header files to test to see if the value is a subclass...
//
diff --git a/contrib/llvm/include/llvm/IR/ValueHandle.h b/contrib/llvm/include/llvm/IR/ValueHandle.h
index 53fa80a..3c28059 100644
--- a/contrib/llvm/include/llvm/IR/ValueHandle.h
+++ b/contrib/llvm/include/llvm/IR/ValueHandle.h
@@ -52,13 +52,21 @@ protected:
Weak
};
+ ValueHandleBase(const ValueHandleBase &RHS)
+ : ValueHandleBase(RHS.PrevPair.getInt(), RHS) {}
+
+ ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
+ : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) {
+ if (isValid(V))
+ AddToExistingUseList(RHS.getPrevPtr());
+ }
+
private:
PointerIntPair<ValueHandleBase**, 2, HandleBaseKind> PrevPair;
ValueHandleBase *Next;
Value* V;
- ValueHandleBase(const ValueHandleBase&) = delete;
public:
explicit ValueHandleBase(HandleBaseKind Kind)
: PrevPair(nullptr, Kind), Next(nullptr), V(nullptr) {}
@@ -67,11 +75,7 @@ public:
if (isValid(V))
AddToUseList();
}
- ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
- : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) {
- if (isValid(V))
- AddToExistingUseList(RHS.getPrevPtr());
- }
+
~ValueHandleBase() {
if (isValid(V))
RemoveFromUseList();
@@ -145,6 +149,8 @@ public:
WeakVH(const WeakVH &RHS)
: ValueHandleBase(Weak, RHS) {}
+ WeakVH &operator=(const WeakVH &RHS) = default;
+
Value *operator=(Value *RHS) {
return ValueHandleBase::operator=(RHS);
}
@@ -314,7 +320,6 @@ class TrackingVH : public ValueHandleBase {
public:
TrackingVH() : ValueHandleBase(Tracking) {}
TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {}
- TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {}
operator ValueTy*() const {
return getValPtr();
@@ -324,10 +329,6 @@ public:
setValPtr(RHS);
return getValPtr();
}
- ValueTy *operator=(const TrackingVH<ValueTy> &RHS) {
- setValPtr(RHS.getValPtr());
- return getValPtr();
- }
ValueTy *operator->() const { return getValPtr(); }
ValueTy &operator*() const { return *getValPtr(); }
@@ -339,15 +340,13 @@ public:
/// when the underlying Value has RAUW called on it or is destroyed. This
/// class can be used as the key of a map, as long as the user takes it out of
/// the map before calling setValPtr() (since the map has to rearrange itself
-/// when the pointer changes). Unlike ValueHandleBase, this class has a vtable
-/// and a virtual destructor.
+/// when the pointer changes). Unlike ValueHandleBase, this class has a vtable.
class CallbackVH : public ValueHandleBase {
virtual void anchor();
protected:
- CallbackVH(const CallbackVH &RHS)
- : ValueHandleBase(Callback, RHS) {}
-
- virtual ~CallbackVH() {}
+ ~CallbackVH() = default;
+ CallbackVH(const CallbackVH &) = default;
+ CallbackVH &operator=(const CallbackVH &) = default;
void setValPtr(Value *P) {
ValueHandleBase::operator=(P);
diff --git a/contrib/llvm/include/llvm/IR/ValueMap.h b/contrib/llvm/include/llvm/IR/ValueMap.h
index 4d00b63..ad518ac 100644
--- a/contrib/llvm/include/llvm/IR/ValueMap.h
+++ b/contrib/llvm/include/llvm/IR/ValueMap.h
@@ -214,8 +214,8 @@ private:
// This CallbackVH updates its ValueMap when the contained Value changes,
// according to the user's preferences expressed through the Config object.
-template<typename KeyT, typename ValueT, typename Config>
-class ValueMapCallbackVH : public CallbackVH {
+template <typename KeyT, typename ValueT, typename Config>
+class ValueMapCallbackVH final : public CallbackVH {
friend class ValueMap<KeyT, ValueT, Config>;
friend struct DenseMapInfo<ValueMapCallbackVH>;
typedef ValueMap<KeyT, ValueT, Config> ValueMapT;
diff --git a/contrib/llvm/include/llvm/IR/ValueSymbolTable.h b/contrib/llvm/include/llvm/IR/ValueSymbolTable.h
index bf1fade..65bd7fc 100644
--- a/contrib/llvm/include/llvm/IR/ValueSymbolTable.h
+++ b/contrib/llvm/include/llvm/IR/ValueSymbolTable.h
@@ -14,13 +14,13 @@
#ifndef LLVM_IR_VALUESYMBOLTABLE_H
#define LLVM_IR_VALUESYMBOLTABLE_H
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
- template<typename ValueSubClass, typename ItemParentClass>
- class SymbolTableListTraits;
+ template <typename ValueSubClass> class SymbolTableListTraits;
class BasicBlock;
class Function;
class NamedMDNode;
@@ -33,12 +33,12 @@ namespace llvm {
///
class ValueSymbolTable {
friend class Value;
- friend class SymbolTableListTraits<Argument, Function>;
- friend class SymbolTableListTraits<BasicBlock, Function>;
- friend class SymbolTableListTraits<Instruction, BasicBlock>;
- friend class SymbolTableListTraits<Function, Module>;
- friend class SymbolTableListTraits<GlobalVariable, Module>;
- friend class SymbolTableListTraits<GlobalAlias, Module>;
+ friend class SymbolTableListTraits<Argument>;
+ friend class SymbolTableListTraits<BasicBlock>;
+ friend class SymbolTableListTraits<Instruction>;
+ friend class SymbolTableListTraits<Function>;
+ friend class SymbolTableListTraits<GlobalVariable>;
+ friend class SymbolTableListTraits<GlobalAlias>;
/// @name Types
/// @{
public:
@@ -55,7 +55,6 @@ public:
/// @name Constructors
/// @{
public:
-
ValueSymbolTable() : vmap(0), LastUnique(0) {}
~ValueSymbolTable();
@@ -63,9 +62,8 @@ public:
/// @name Accessors
/// @{
public:
-
/// This method finds the value with the given \p Name in the
- /// the symbol table.
+ /// the symbol table.
/// @returns the value associated with the \p Name
/// @brief Lookup a named Value.
Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
@@ -97,30 +95,32 @@ public:
/// @brief Get a const_iterator to the end of the symbol table.
inline const_iterator end() const { return vmap.end(); }
-
-/// @}
-/// @name Mutators
-/// @{
+
+ /// @}
+ /// @name Mutators
+ /// @{
private:
+ ValueName *makeUniqueName(Value *V, SmallString<256> &UniqueName);
+
/// This method adds the provided value \p N to the symbol table. The Value
- /// must have a name which is used to place the value in the symbol table.
+ /// must have a name which is used to place the value in the symbol table.
/// If the inserted name conflicts, this renames the value.
/// @brief Add a named value to the symbol table
void reinsertValue(Value *V);
-
+
/// createValueName - This method attempts to create a value name and insert
/// it into the symbol table with the specified name. If it conflicts, it
/// auto-renames the name and returns that instead.
ValueName *createValueName(StringRef Name, Value *V);
-
+
/// This method removes a value from the symbol table. It leaves the
/// ValueName attached to the value, but it is no longer inserted in the
/// symtab.
void removeValueName(ValueName *V);
-
-/// @}
-/// @name Internal Data
-/// @{
+
+ /// @}
+ /// @name Internal Data
+ /// @{
private:
ValueMap vmap; ///< The map that holds the symbol table.
mutable uint32_t LastUnique; ///< Counter for tracking unique names
diff --git a/contrib/llvm/include/llvm/IRReader/IRReader.h b/contrib/llvm/include/llvm/IRReader/IRReader.h
index 2d9ace0..523cd3d 100644
--- a/contrib/llvm/include/llvm/IRReader/IRReader.h
+++ b/contrib/llvm/include/llvm/IRReader/IRReader.h
@@ -27,10 +27,11 @@ class LLVMContext;
/// If the given file holds a bitcode image, return a Module
/// for it which does lazy deserialization of function bodies. Otherwise,
/// attempt to parse it as LLVM Assembly and return a fully populated
-/// Module.
-std::unique_ptr<Module> getLazyIRFileModule(StringRef Filename,
- SMDiagnostic &Err,
- LLVMContext &Context);
+/// Module. The ShouldLazyLoadMetadata flag is passed down to the bitcode
+/// reader to optionally enable lazy metadata loading.
+std::unique_ptr<Module>
+getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
+ bool ShouldLazyLoadMetadata = false);
/// If the given MemoryBuffer holds a bitcode image, return a Module
/// for it. Otherwise, attempt to parse it as LLVM Assembly and return
diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h
index e3b9a95..cb2b139 100644
--- a/contrib/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm/include/llvm/InitializePasses.h
@@ -53,9 +53,6 @@ void initializeInstrumentation(PassRegistry&);
/// initializeAnalysis - Initialize all passes linked into the Analysis library.
void initializeAnalysis(PassRegistry&);
-/// initializeIPA - Initialize all passes linked into the IPA library.
-void initializeIPA(PassRegistry&);
-
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void initializeCodeGen(PassRegistry&);
@@ -64,11 +61,8 @@ void initializeTarget(PassRegistry&);
void initializeAAEvalPass(PassRegistry&);
void initializeAddDiscriminatorsPass(PassRegistry&);
-void initializeADCEPass(PassRegistry&);
+void initializeADCELegacyPassPass(PassRegistry&);
void initializeBDCEPass(PassRegistry&);
-void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
-void initializeAliasAnalysisCounterPass(PassRegistry&);
-void initializeAliasDebuggerPass(PassRegistry&);
void initializeAliasSetPrinterPass(PassRegistry&);
void initializeAlwaysInlinerPass(PassRegistry&);
void initializeArgPromotionPass(PassRegistry&);
@@ -76,13 +70,13 @@ void initializeAtomicExpandPass(PassRegistry&);
void initializeSampleProfileLoaderPass(PassRegistry&);
void initializeAlignmentFromAssumptionsPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
-void initializeBasicAliasAnalysisPass(PassRegistry&);
+void initializeBasicAAWrapperPassPass(PassRegistry&);
void initializeCallGraphWrapperPassPass(PassRegistry &);
void initializeBlockExtractorPassPass(PassRegistry&);
-void initializeBlockFrequencyInfoPass(PassRegistry&);
+void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
void initializeBoundsCheckingPass(PassRegistry&);
void initializeBranchFolderPassPass(PassRegistry&);
-void initializeBranchProbabilityInfoPass(PassRegistry&);
+void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
void initializeBreakCriticalEdgesPass(PassRegistry&);
void initializeCallGraphPrinterPass(PassRegistry&);
void initializeCallGraphViewerPass(PassRegistry&);
@@ -90,7 +84,8 @@ void initializeCFGOnlyPrinterPass(PassRegistry&);
void initializeCFGOnlyViewerPass(PassRegistry&);
void initializeCFGPrinterPass(PassRegistry&);
void initializeCFGSimplifyPassPass(PassRegistry&);
-void initializeCFLAliasAnalysisPass(PassRegistry&);
+void initializeCFLAAWrapperPassPass(PassRegistry&);
+void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeForwardControlFlowIntegrityPass(PassRegistry&);
void initializeFlattenCFGPassPass(PassRegistry&);
void initializeStructurizeCFGPass(PassRegistry&);
@@ -102,6 +97,7 @@ void initializeConstantPropagationPass(PassRegistry&);
void initializeMachineCopyPropagationPass(PassRegistry&);
void initializeCostModelAnalysisPass(PassRegistry&);
void initializeCorrelatedValuePropagationPass(PassRegistry&);
+void initializeCrossDSOCFIPass(PassRegistry&);
void initializeDAEPass(PassRegistry&);
void initializeDAHPass(PassRegistry&);
void initializeDCEPass(PassRegistry&);
@@ -120,7 +116,10 @@ void initializeDominatorTreeWrapperPassPass(PassRegistry&);
void initializeEarlyIfConverterPass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&);
+void initializeAAResultsWrapperPassPass(PassRegistry &);
void initializeGCOVProfilerPass(PassRegistry&);
+void initializePGOInstrumentationGenPass(PassRegistry&);
+void initializePGOInstrumentationUsePass(PassRegistry&);
void initializeInstrProfilingPass(PassRegistry&);
void initializeAddressSanitizerPass(PassRegistry&);
void initializeAddressSanitizerModulePass(PassRegistry&);
@@ -132,19 +131,21 @@ void initializeScalarizerPass(PassRegistry&);
void initializeEarlyCSELegacyPassPass(PassRegistry &);
void initializeEliminateAvailableExternallyPass(PassRegistry&);
void initializeExpandISelPseudosPass(PassRegistry&);
+void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
void initializeFunctionAttrsPass(PassRegistry&);
void initializeGCMachineCodeAnalysisPass(PassRegistry&);
void initializeGCModuleInfoPass(PassRegistry&);
void initializeGVNPass(PassRegistry&);
void initializeGlobalDCEPass(PassRegistry&);
void initializeGlobalOptPass(PassRegistry&);
-void initializeGlobalsModRefPass(PassRegistry&);
+void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeIPCPPass(PassRegistry&);
void initializeIPSCCPPass(PassRegistry&);
void initializeIVUsersPass(PassRegistry&);
void initializeIfConverterPass(PassRegistry&);
void initializeInductiveRangeCheckEliminationPass(PassRegistry&);
void initializeIndVarSimplifyPass(PassRegistry&);
+void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&);
void initializeInlineCostAnalysisPass(PassRegistry&);
void initializeInstructionCombiningPassPass(PassRegistry&);
void initializeInstCountPass(PassRegistry&);
@@ -155,7 +156,6 @@ void initializeJumpThreadingPass(PassRegistry&);
void initializeLCSSAPass(PassRegistry&);
void initializeLICMPass(PassRegistry&);
void initializeLazyValueInfoPass(PassRegistry&);
-void initializeLibCallAliasAnalysisPass(PassRegistry&);
void initializeLintPass(PassRegistry&);
void initializeLiveDebugVariablesPass(PassRegistry&);
void initializeLiveIntervalsPass(PassRegistry&);
@@ -210,7 +210,7 @@ void initializeMergeFunctionsPass(PassRegistry&);
void initializeModuleDebugInfoPrinterPass(PassRegistry&);
void initializeNaryReassociatePass(PassRegistry&);
void initializeNoAAPass(PassRegistry&);
-void initializeObjCARCAliasAnalysisPass(PassRegistry&);
+void initializeObjCARCAAWrapperPassPass(PassRegistry&);
void initializeObjCARCAPElimPass(PassRegistry&);
void initializeObjCARCExpandPass(PassRegistry&);
void initializeObjCARCContractPass(PassRegistry&);
@@ -245,14 +245,14 @@ void initializeRegionViewerPass(PassRegistry&);
void initializeRewriteStatepointsForGCPass(PassRegistry&);
void initializeSafeStackPass(PassRegistry&);
void initializeSCCPPass(PassRegistry&);
-void initializeSROAPass(PassRegistry&);
+void initializeSROALegacyPassPass(PassRegistry&);
void initializeSROA_DTPass(PassRegistry&);
void initializeSROA_SSAUpPass(PassRegistry&);
-void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
-void initializeScalarEvolutionPass(PassRegistry&);
+void initializeSCEVAAWrapperPassPass(PassRegistry&);
+void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
void initializeShrinkWrapPass(PassRegistry &);
void initializeSimpleInlinerPass(PassRegistry&);
-void initializeShadowStackGCLoweringPass(PassRegistry&);
+void initializeShadowStackGCLoweringPass(PassRegistry&);
void initializeRegisterCoalescerPass(PassRegistry&);
void initializeSingleLoopExtractorPass(PassRegistry&);
void initializeSinkingPass(PassRegistry&);
@@ -265,7 +265,7 @@ void initializeStackColoringPass(PassRegistry&);
void initializeStackSlotColoringPass(PassRegistry&);
void initializeStraightLineStrengthReducePass(PassRegistry &);
void initializeStripDeadDebugInfoPass(PassRegistry&);
-void initializeStripDeadPrototypesPassPass(PassRegistry&);
+void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&);
void initializeStripDebugDeclarePass(PassRegistry&);
void initializeStripNonDebugSymbolsPass(PassRegistry&);
void initializeStripSymbolsPass(PassRegistry&);
@@ -276,8 +276,8 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &);
void initializeAssumptionCacheTrackerPass(PassRegistry &);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
-void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
-void initializeScopedNoAliasAAPass(PassRegistry&);
+void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
+void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
void initializeUnifyFunctionExitNodesPass(PassRegistry&);
void initializeUnreachableBlockElimPass(PassRegistry&);
void initializeUnreachableMachineBlockElimPass(PassRegistry&);
@@ -294,6 +294,7 @@ void initializeBBVectorizePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeMIRPrintingPassPass(PassRegistry&);
void initializeStackMapLivenessPass(PassRegistry&);
+void initializeLiveDebugValuesPass(PassRegistry&);
void initializeMachineCombinerPass(PassRegistry &);
void initializeLoadCombinePass(PassRegistry&);
void initializeRewriteSymbolsPass(PassRegistry&);
@@ -304,6 +305,10 @@ void initializeDwarfEHPreparePass(PassRegistry&);
void initializeFloat2IntPass(PassRegistry&);
void initializeLoopDistributePass(PassRegistry&);
void initializeSjLjEHPreparePass(PassRegistry&);
+void initializeDemandedBitsPass(PassRegistry&);
+void initializeFuncletLayoutPass(PassRegistry &);
+void initializeLoopLoadEliminationPass(PassRegistry&);
+void initializeFunctionImportPassPass(PassRegistry &);
}
#endif
diff --git a/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h
index 0c46fc0..3820b21 100644
--- a/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h
@@ -39,7 +39,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Linker/Linker.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
#include <vector>
@@ -48,6 +48,7 @@ namespace llvm {
class LLVMContext;
class DiagnosticInfo;
class GlobalValue;
+ class Linker;
class Mangler;
class MemoryBuffer;
class TargetLibraryInfo;
@@ -61,121 +62,135 @@ namespace llvm {
struct LTOCodeGenerator {
static const char *getVersionString();
- LTOCodeGenerator();
- LTOCodeGenerator(std::unique_ptr<LLVMContext> Context);
+ LTOCodeGenerator(LLVMContext &Context);
~LTOCodeGenerator();
- // Merge given module, return true on success.
+ /// Merge given module. Return true on success.
bool addModule(struct LTOModule *);
- // Set the destination module.
- void setModule(struct LTOModule *);
+ /// Set the destination module.
+ void setModule(std::unique_ptr<LTOModule> M);
- void setTargetOptions(TargetOptions options);
+ void setTargetOptions(TargetOptions Options);
void setDebugInfo(lto_debug_model);
- void setCodePICModel(lto_codegen_model);
+ void setCodePICModel(Reloc::Model Model) { RelocModel = Model; }
+
+ /// Set the file type to be emitted (assembly or object code).
+ /// The default is TargetMachine::CGFT_ObjectFile.
+ void setFileType(TargetMachine::CodeGenFileType FT) { FileType = FT; }
- void setCpu(const char *mCpu) { MCpu = mCpu; }
- void setAttr(const char *mAttr) { MAttr = mAttr; }
- void setOptLevel(unsigned optLevel) { OptLevel = optLevel; }
+ void setCpu(const char *MCpu) { this->MCpu = MCpu; }
+ void setAttr(const char *MAttr) { this->MAttr = MAttr; }
+ void setOptLevel(unsigned OptLevel);
void setShouldInternalize(bool Value) { ShouldInternalize = Value; }
void setShouldEmbedUselists(bool Value) { ShouldEmbedUselists = Value; }
- void addMustPreserveSymbol(StringRef sym) { MustPreserveSymbols[sym] = 1; }
-
- // To pass options to the driver and optimization passes. These options are
- // not necessarily for debugging purpose (The function name is misleading).
- // This function should be called before LTOCodeGenerator::compilexxx(),
- // and LTOCodeGenerator::writeMergedModules().
- void setCodeGenDebugOptions(const char *opts);
-
- // Parse the options set in setCodeGenDebugOptions. Like
- // setCodeGenDebugOptions, this must be called before
- // LTOCodeGenerator::compilexxx() and LTOCodeGenerator::writeMergedModules()
+ void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; }
+
+ /// Pass options to the driver and optimization passes.
+ ///
+ /// These options are not necessarily for debugging purpose (the function
+ /// name is misleading). This function should be called before
+ /// LTOCodeGenerator::compilexxx(), and
+ /// LTOCodeGenerator::writeMergedModules().
+ void setCodeGenDebugOptions(const char *Opts);
+
+ /// Parse the options set in setCodeGenDebugOptions.
+ ///
+ /// Like \a setCodeGenDebugOptions(), this must be called before
+ /// LTOCodeGenerator::compilexxx() and
+ /// LTOCodeGenerator::writeMergedModules().
void parseCodeGenDebugOptions();
- // Write the merged module to the file specified by the given path.
- // Return true on success.
- bool writeMergedModules(const char *path, std::string &errMsg);
-
- // Compile the merged module into a *single* object file; the path to object
- // file is returned to the caller via argument "name". Return true on
- // success.
- //
- // NOTE that it is up to the linker to remove the intermediate object file.
- // Do not try to remove the object file in LTOCodeGenerator's destructor
- // as we don't who (LTOCodeGenerator or the obj file) will last longer.
- bool compile_to_file(const char **name,
- bool disableInline,
- bool disableGVNLoadPRE,
- bool disableVectorization,
- std::string &errMsg);
-
- // As with compile_to_file(), this function compiles the merged module into
- // single object file. Instead of returning the object-file-path to the caller
- // (linker), it brings the object to a buffer, and return the buffer to the
- // caller. This function should delete intermediate object file once its content
- // is brought to memory. Return NULL if the compilation was not successful.
- std::unique_ptr<MemoryBuffer> compile(bool disableInline,
- bool disableGVNLoadPRE,
- bool disableVectorization,
- std::string &errMsg);
-
- // Optimizes the merged module. Returns true on success.
- bool optimize(bool disableInline,
- bool disableGVNLoadPRE,
- bool disableVectorization,
- std::string &errMsg);
-
- // Compiles the merged optimized module into a single object file. It brings
- // the object to a buffer, and returns the buffer to the caller. Return NULL
- // if the compilation was not successful.
- std::unique_ptr<MemoryBuffer> compileOptimized(std::string &errMsg);
+ /// Write the merged module to the file specified by the given path. Return
+ /// true on success.
+ bool writeMergedModules(const char *Path);
+
+ /// Compile the merged module into a *single* output file; the path to output
+ /// file is returned to the caller via argument "name". Return true on
+ /// success.
+ ///
+ /// \note It is up to the linker to remove the intermediate output file. Do
+ /// not try to remove the object file in LTOCodeGenerator's destructor as we
+ /// don't who (LTOCodeGenerator or the output file) will last longer.
+ bool compile_to_file(const char **Name, bool DisableVerify,
+ bool DisableInline, bool DisableGVNLoadPRE,
+ bool DisableVectorization);
+
+ /// As with compile_to_file(), this function compiles the merged module into
+ /// single output file. Instead of returning the output file path to the
+ /// caller (linker), it brings the output to a buffer, and returns the buffer
+ /// to the caller. This function should delete the intermediate file once
+ /// its content is brought to memory. Return NULL if the compilation was not
+ /// successful.
+ std::unique_ptr<MemoryBuffer> compile(bool DisableVerify, bool DisableInline,
+ bool DisableGVNLoadPRE,
+ bool DisableVectorization);
+
+ /// Optimizes the merged module. Returns true on success.
+ bool optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE,
+ bool DisableVectorization);
+
+ /// Compiles the merged optimized module into a single output file. It brings
+ /// the output to a buffer, and returns the buffer to the caller. Return NULL
+ /// if the compilation was not successful.
+ std::unique_ptr<MemoryBuffer> compileOptimized();
+
+ /// Compile the merged optimized module into out.size() output files each
+ /// representing a linkable partition of the module. If out contains more
+ /// than one element, code generation is done in parallel with out.size()
+ /// threads. Output files will be written to members of out. Returns true on
+ /// success.
+ bool compileOptimized(ArrayRef<raw_pwrite_stream *> Out);
void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
LLVMContext &getContext() { return Context; }
+ void resetMergedModule() { MergedModule.reset(); }
+
private:
void initializeLTOPasses();
- bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg);
- bool compileOptimizedToFile(const char **name, std::string &errMsg);
+ bool compileOptimizedToFile(const char **Name);
void applyScopeRestrictions();
void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
std::vector<const char *> &MustPreserveList,
SmallPtrSetImpl<GlobalValue *> &AsmUsed,
Mangler &Mangler);
- bool determineTarget(std::string &errMsg);
+ bool determineTarget();
static void DiagnosticHandler(const DiagnosticInfo &DI, void *Context);
void DiagnosticHandler2(const DiagnosticInfo &DI);
+ void emitError(const std::string &ErrMsg);
+
typedef StringMap<uint8_t> StringSet;
- void destroyMergedModule();
- std::unique_ptr<LLVMContext> OwnedContext;
LLVMContext &Context;
- Linker IRLinker;
- TargetMachine *TargetMach = nullptr;
+ std::unique_ptr<Module> MergedModule;
+ std::unique_ptr<Linker> TheLinker;
+ std::unique_ptr<TargetMachine> TargetMach;
bool EmitDwarfDebugInfo = false;
bool ScopeRestrictionsDone = false;
- lto_codegen_model CodeModel = LTO_CODEGEN_PIC_MODEL_DEFAULT;
+ Reloc::Model RelocModel = Reloc::Default;
StringSet MustPreserveSymbols;
StringSet AsmUndefinedRefs;
- std::vector<char *> CodegenOptions;
+ std::vector<std::string> CodegenOptions;
+ std::string FeatureStr;
std::string MCpu;
std::string MAttr;
std::string NativeObjectPath;
TargetOptions Options;
+ CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
unsigned OptLevel = 2;
lto_diagnostic_handler_t DiagHandler = nullptr;
void *DiagContext = nullptr;
- LTOModule *OwnedModule = nullptr;
bool ShouldInternalize = true;
bool ShouldEmbedUselists = false;
+ TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
};
}
#endif
diff --git a/contrib/llvm/include/llvm/LTO/LTOModule.h b/contrib/llvm/include/llvm/LTO/LTOModule.h
index c4e2be6..97b5865 100644
--- a/contrib/llvm/include/llvm/LTO/LTOModule.h
+++ b/contrib/llvm/include/llvm/LTO/LTOModule.h
@@ -74,6 +74,11 @@ public:
static bool isBitcodeForTarget(MemoryBuffer *memBuffer,
StringRef triplePrefix);
+ /// Returns a string representing the producer identification stored in the
+ /// bitcode, or "" if the bitcode does not contains any.
+ ///
+ static std::string getProducerString(MemoryBuffer *Buffer);
+
/// Create a MemoryBuffer from a memory range with an optional name.
static std::unique_ptr<MemoryBuffer>
makeBuffer(const void *mem, size_t length, StringRef name = "");
@@ -86,25 +91,24 @@ public:
/// InitializeAllTargetMCs();
/// InitializeAllAsmPrinters();
/// InitializeAllAsmParsers();
- static LTOModule *createFromFile(const char *path, TargetOptions options,
- std::string &errMsg);
- static LTOModule *createFromOpenFile(int fd, const char *path, size_t size,
- TargetOptions options,
- std::string &errMsg);
- static LTOModule *createFromOpenFileSlice(int fd, const char *path,
- size_t map_size, off_t offset,
- TargetOptions options,
- std::string &errMsg);
- static LTOModule *createFromBuffer(const void *mem, size_t length,
- TargetOptions options, std::string &errMsg,
- StringRef path = "");
-
- static LTOModule *createInLocalContext(const void *mem, size_t length,
- TargetOptions options,
- std::string &errMsg, StringRef path);
- static LTOModule *createInContext(const void *mem, size_t length,
- TargetOptions options, std::string &errMsg,
- StringRef path, LLVMContext *Context);
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ createFromFile(LLVMContext &Context, const char *path, TargetOptions options);
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ createFromOpenFile(LLVMContext &Context, int fd, const char *path,
+ size_t size, TargetOptions options);
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ createFromOpenFileSlice(LLVMContext &Context, int fd, const char *path,
+ size_t map_size, off_t offset, TargetOptions options);
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ createFromBuffer(LLVMContext &Context, const void *mem, size_t length,
+ TargetOptions options, StringRef path = "");
+
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ createInLocalContext(const void *mem, size_t length, TargetOptions options,
+ StringRef path);
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ createInContext(const void *mem, size_t length, TargetOptions options,
+ StringRef path, LLVMContext *Context);
const Module &getModule() const {
return const_cast<LTOModule*>(this)->getModule();
@@ -113,6 +117,8 @@ public:
return IRFile->getModule();
}
+ std::unique_ptr<Module> takeModule() { return IRFile->takeModule(); }
+
/// Return the Module's target triple.
const std::string &getTargetTriple() {
return getModule().getTargetTriple();
@@ -163,7 +169,7 @@ private:
/// Parse the symbols from the module and model-level ASM and add them to
/// either the defined or undefined lists.
- bool parseSymbols(std::string &errMsg);
+ void parseSymbols();
/// Add a symbol which isn't defined just yet to a list to be resolved later.
void addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
@@ -200,8 +206,9 @@ private:
bool objcClassNameFromExpression(const Constant *c, std::string &name);
/// Create an LTOModule (private version).
- static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
- std::string &errMsg, LLVMContext *Context);
+ static ErrorOr<std::unique_ptr<LTOModule>>
+ makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
+ LLVMContext *Context);
};
}
#endif
diff --git a/contrib/llvm/include/llvm/LibDriver/LibDriver.h b/contrib/llvm/include/llvm/LibDriver/LibDriver.h
index aaaa7b7..0949565 100644
--- a/contrib/llvm/include/llvm/LibDriver/LibDriver.h
+++ b/contrib/llvm/include/llvm/LibDriver/LibDriver.h
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// Defines an interface to a lib.exe-compatible driver that also understands
-// bitcode files. Used by llvm-lib and lld-link2 /lib.
+// bitcode files. Used by llvm-lib and lld-link /lib.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h
index cea5530..29fcd93 100644
--- a/contrib/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm/include/llvm/LinkAllPasses.h
@@ -17,8 +17,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "llvm/Analysis/CallPrinter.h"
#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IntervalPartition.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/Passes.h"
@@ -26,6 +29,9 @@
#include "llvm/Analysis/RegionPass.h"
#include "llvm/Analysis/RegionPrinter.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRPrintingPasses.h"
@@ -52,21 +58,18 @@ namespace {
(void) llvm::createAAEvalPass();
(void) llvm::createAggressiveDCEPass();
(void) llvm::createBitTrackingDCEPass();
- (void) llvm::createAliasAnalysisCounterPass();
- (void) llvm::createAliasDebugger();
(void) llvm::createArgumentPromotionPass();
(void) llvm::createAlignmentFromAssumptionsPass();
- (void) llvm::createBasicAliasAnalysisPass();
- (void) llvm::createLibCallAliasAnalysisPass(nullptr);
- (void) llvm::createScalarEvolutionAliasAnalysisPass();
- (void) llvm::createTypeBasedAliasAnalysisPass();
- (void) llvm::createScopedNoAliasAAPass();
+ (void) llvm::createBasicAAWrapperPass();
+ (void) llvm::createSCEVAAWrapperPass();
+ (void) llvm::createTypeBasedAAWrapperPass();
+ (void) llvm::createScopedNoAliasAAWrapperPass();
(void) llvm::createBoundsCheckingPass();
(void) llvm::createBreakCriticalEdgesPass();
(void) llvm::createCallGraphPrinterPass();
(void) llvm::createCallGraphViewerPass();
(void) llvm::createCFGSimplificationPass();
- (void) llvm::createCFLAliasAnalysisPass();
+ (void) llvm::createCFLAAWrapperPass();
(void) llvm::createStructurizeCFGPass();
(void) llvm::createConstantMergePass();
(void) llvm::createConstantPropagationPass();
@@ -82,12 +85,15 @@ namespace {
(void) llvm::createDomOnlyViewerPass();
(void) llvm::createDomViewerPass();
(void) llvm::createGCOVProfilerPass();
+ (void) llvm::createPGOInstrumentationGenPass();
+ (void) llvm::createPGOInstrumentationUsePass();
(void) llvm::createInstrProfilingPass();
+ (void) llvm::createFunctionImportPass();
(void) llvm::createFunctionInliningPass();
(void) llvm::createAlwaysInlinerPass();
(void) llvm::createGlobalDCEPass();
(void) llvm::createGlobalOptimizerPass();
- (void) llvm::createGlobalsModRefPass();
+ (void) llvm::createGlobalsAAWrapperPass();
(void) llvm::createIPConstantPropagationPass();
(void) llvm::createIPSCCPPass();
(void) llvm::createInductiveRangeCheckEliminationPass();
@@ -110,8 +116,7 @@ namespace {
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
(void) llvm::createNaryReassociatePass();
- (void) llvm::createNoAAPass();
- (void) llvm::createObjCARCAliasAnalysisPass();
+ (void) llvm::createObjCARCAAWrapperPass();
(void) llvm::createObjCARCAPElimPass();
(void) llvm::createObjCARCExpandPass();
(void) llvm::createObjCARCContractPass();
@@ -179,7 +184,7 @@ namespace {
(void) llvm::createEliminateAvailableExternallyPass();
(void)new llvm::IntervalPartition();
- (void)new llvm::ScalarEvolution();
+ (void)new llvm::ScalarEvolutionWrapperPass();
((llvm::Function*)nullptr)->viewCFGOnly();
llvm::RGPassManager RGM;
((llvm::RegionPass*)nullptr)->runOnRegion((llvm::Region*)nullptr, RGM);
diff --git a/contrib/llvm/include/llvm/Linker/IRMover.h b/contrib/llvm/include/llvm/Linker/IRMover.h
new file mode 100644
index 0000000..a964cc4
--- /dev/null
+++ b/contrib/llvm/include/llvm/Linker/IRMover.h
@@ -0,0 +1,76 @@
+//===- IRMover.h ------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LINKER_IRMOVER_H
+#define LLVM_LINKER_IRMOVER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include <functional>
+
+namespace llvm {
+class GlobalValue;
+class MDNode;
+class Module;
+class StructType;
+class Type;
+
+class IRMover {
+ struct StructTypeKeyInfo {
+ struct KeyTy {
+ ArrayRef<Type *> ETypes;
+ bool IsPacked;
+ KeyTy(ArrayRef<Type *> E, bool P);
+ KeyTy(const StructType *ST);
+ bool operator==(const KeyTy &that) const;
+ bool operator!=(const KeyTy &that) const;
+ };
+ static StructType *getEmptyKey();
+ static StructType *getTombstoneKey();
+ static unsigned getHashValue(const KeyTy &Key);
+ static unsigned getHashValue(const StructType *ST);
+ static bool isEqual(const KeyTy &LHS, const StructType *RHS);
+ static bool isEqual(const StructType *LHS, const StructType *RHS);
+ };
+
+public:
+ class IdentifiedStructTypeSet {
+ // The set of opaque types is the composite module.
+ DenseSet<StructType *> OpaqueStructTypes;
+
+ // The set of identified but non opaque structures in the composite module.
+ DenseSet<StructType *, StructTypeKeyInfo> NonOpaqueStructTypes;
+
+ public:
+ void addNonOpaque(StructType *Ty);
+ void switchToNonOpaque(StructType *Ty);
+ void addOpaque(StructType *Ty);
+ StructType *findNonOpaque(ArrayRef<Type *> ETypes, bool IsPacked);
+ bool hasType(StructType *Ty);
+ };
+
+ IRMover(Module &M);
+
+ typedef std::function<void(GlobalValue &)> ValueAdder;
+ /// Move in the provide values. The source is destroyed.
+ /// Returns true on error.
+ bool move(Module &Src, ArrayRef<GlobalValue *> ValuesToLink,
+ std::function<void(GlobalValue &GV, ValueAdder Add)> AddLazyFor,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr,
+ bool IsMetadataLinkingPostpass = false);
+ Module &getModule() { return Composite; }
+
+private:
+ Module &Composite;
+ IdentifiedStructTypeSet IdentifiedStructTypes;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/Linker/Linker.h b/contrib/llvm/include/llvm/Linker/Linker.h
index c43b90e..dde3f73 100644
--- a/contrib/llvm/include/llvm/Linker/Linker.h
+++ b/contrib/llvm/include/llvm/Linker/Linker.h
@@ -10,10 +10,8 @@
#ifndef LLVM_LINKER_LINKER_H
#define LLVM_LINKER_LINKER_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/Linker/IRMover.h"
namespace llvm {
class Module;
@@ -25,71 +23,55 @@ class Type;
/// module since it is assumed that the user of this class will want to do
/// something with it after the linking.
class Linker {
-public:
- struct StructTypeKeyInfo {
- struct KeyTy {
- ArrayRef<Type *> ETypes;
- bool IsPacked;
- KeyTy(ArrayRef<Type *> E, bool P);
- KeyTy(const StructType *ST);
- bool operator==(const KeyTy &that) const;
- bool operator!=(const KeyTy &that) const;
- };
- static StructType *getEmptyKey();
- static StructType *getTombstoneKey();
- static unsigned getHashValue(const KeyTy &Key);
- static unsigned getHashValue(const StructType *ST);
- static bool isEqual(const KeyTy &LHS, const StructType *RHS);
- static bool isEqual(const StructType *LHS, const StructType *RHS);
- };
-
- typedef DenseSet<StructType *, StructTypeKeyInfo> NonOpaqueStructTypeSet;
- typedef DenseSet<StructType *> OpaqueStructTypeSet;
-
- struct IdentifiedStructTypeSet {
- // The set of opaque types is the composite module.
- OpaqueStructTypeSet OpaqueStructTypes;
+ IRMover Mover;
- // The set of identified but non opaque structures in the composite module.
- NonOpaqueStructTypeSet NonOpaqueStructTypes;
-
- void addNonOpaque(StructType *Ty);
- void switchToNonOpaque(StructType *Ty);
- void addOpaque(StructType *Ty);
- StructType *findNonOpaque(ArrayRef<Type *> ETypes, bool IsPacked);
- bool hasType(StructType *Ty);
+public:
+ enum Flags {
+ None = 0,
+ OverrideFromSrc = (1 << 0),
+ LinkOnlyNeeded = (1 << 1),
+ InternalizeLinkedSymbols = (1 << 2)
};
- Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
- Linker(Module *M);
- ~Linker();
-
- Module *getModule() const { return Composite; }
- void deleteModule();
+ Linker(Module &M);
- /// \brief Link \p Src into the composite. The source is destroyed.
+ /// \brief Link \p Src into the composite.
+ ///
/// Passing OverrideSymbols as true will have symbols from Src
/// shadow those in the Dest.
+ /// For ThinLTO function importing/exporting the \p FunctionInfoIndex
+ /// is passed. If \p FunctionsToImport is provided, only the functions that
+ /// are part of the set will be imported from the source module.
+ /// The \p ValIDToTempMDMap is populated by the linker when function
+ /// importing is performed.
+ ///
/// Returns true on error.
- bool linkInModule(Module *Src, bool OverrideSymbols = false);
-
- /// \brief Set the composite to the passed-in module.
- void setModule(Module *Dst);
-
- static bool LinkModules(Module *Dest, Module *Src,
- DiagnosticHandlerFunction DiagnosticHandler);
-
- static bool LinkModules(Module *Dest, Module *Src);
-
-private:
- void init(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
- Module *Composite;
-
- IdentifiedStructTypeSet IdentifiedStructTypes;
-
- DiagnosticHandlerFunction DiagnosticHandler;
+ bool linkInModule(std::unique_ptr<Module> Src, unsigned Flags = Flags::None,
+ const FunctionInfoIndex *Index = nullptr,
+ DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr);
+
+ /// This exists to implement the deprecated LLVMLinkModules C api. Don't use
+ /// for anything else.
+ bool linkInModuleForCAPI(Module &Src);
+
+ static bool linkModules(Module &Dest, std::unique_ptr<Module> Src,
+ unsigned Flags = Flags::None);
+
+ /// \brief Link metadata from \p Src into the composite. The source is
+ /// destroyed.
+ ///
+ /// The \p ValIDToTempMDMap sound have been populated earlier during function
+ /// importing from \p Src.
+ bool linkInMetadata(Module &Src,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap);
};
+/// Create a new module with exported local functions renamed and promoted
+/// for ThinLTO.
+std::unique_ptr<Module> renameModuleForThinLTO(std::unique_ptr<Module> M,
+ const FunctionInfoIndex *Index);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/MC/ConstantPools.h b/contrib/llvm/include/llvm/MC/ConstantPools.h
index 9aa4663b..552e144 100644
--- a/contrib/llvm/include/llvm/MC/ConstantPools.h
+++ b/contrib/llvm/include/llvm/MC/ConstantPools.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/SMLoc.h"
namespace llvm {
class MCContext;
@@ -26,11 +27,12 @@ class MCStreamer;
class MCSymbol;
struct ConstantPoolEntry {
- ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz)
- : Label(L), Value(Val), Size(Sz) {}
+ ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz, SMLoc Loc_)
+ : Label(L), Value(Val), Size(Sz), Loc(Loc_) {}
MCSymbol *Label;
const MCExpr *Value;
unsigned Size;
+ SMLoc Loc;
};
// A class to keep track of assembler-generated constant pools that are use to
@@ -49,7 +51,7 @@ public:
//
// \returns a MCExpr that references the newly inserted value
const MCExpr *addEntry(const MCExpr *Value, MCContext &Context,
- unsigned Size);
+ unsigned Size, SMLoc Loc);
// Emit the contents of the constant pool using the provided streamer.
void emitEntries(MCStreamer &Streamer);
@@ -80,7 +82,7 @@ public:
void emitAll(MCStreamer &Streamer);
void emitForCurrentSection(MCStreamer &Streamer);
const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr,
- unsigned Size);
+ unsigned Size, SMLoc Loc);
private:
ConstantPool *getConstantPool(MCSection *Section);
diff --git a/contrib/llvm/include/llvm/MC/MCAsmBackend.h b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
index 2bfad2d..51312ff 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
@@ -67,6 +67,11 @@ public:
/// Get the number of target specific fixup kinds.
virtual unsigned getNumFixupKinds() const = 0;
+ /// Map a relocation name used in .reloc to a fixup kind.
+ /// Returns true and sets MappedKind if Name is successfully mapped.
+ /// Otherwise returns false and leaves MappedKind unchanged.
+ virtual bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const;
+
/// Get information on a fixup kind.
virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfo.h b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
index 9bb0fa6..384584e 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
@@ -414,6 +414,15 @@ public:
/// syntactically correct.
virtual bool isValidUnquotedName(StringRef Name) const;
+ /// Return true if the .section directive should be omitted when
+ /// emitting \p SectionName. For example:
+ ///
+ /// shouldOmitSectionDirective(".text")
+ ///
+ /// returns false => .section .text,#alloc,#execinstr
+ /// returns true => .text
+ virtual bool shouldOmitSectionDirective(StringRef SectionName) const;
+
bool usesSunStyleELFSectionSwitchSyntax() const {
return SunStyleELFSectionSwitchSyntax;
}
diff --git a/contrib/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm/include/llvm/MC/MCAssembler.h
index 0642af8..c0bd128 100644
--- a/contrib/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm/include/llvm/MC/MCAssembler.h
@@ -10,23 +10,18 @@
#ifndef LLVM_MC_MCASSEMBLER_H
#define LLVM_MC_MCASSEMBLER_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator.h"
#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
-#include <algorithm>
-#include <vector> // FIXME: Shouldn't be needed.
+#include "llvm/MC/MCSymbol.h"
namespace llvm {
class raw_ostream;
@@ -42,476 +37,6 @@ class MCSubtargetInfo;
class MCValue;
class MCAsmBackend;
-class MCFragment : public ilist_node<MCFragment> {
- friend class MCAsmLayout;
-
- MCFragment(const MCFragment &) = delete;
- void operator=(const MCFragment &) = delete;
-
-public:
- enum FragmentType : uint8_t {
- FT_Align,
- FT_Data,
- FT_CompactEncodedInst,
- FT_Fill,
- FT_Relaxable,
- FT_Org,
- FT_Dwarf,
- FT_DwarfFrame,
- FT_LEB,
- FT_SafeSEH
- };
-
-private:
- FragmentType Kind;
-
-protected:
- bool HasInstructions;
-
-private:
- /// \brief Should this fragment be aligned to the end of a bundle?
- bool AlignToBundleEnd;
-
- uint8_t BundlePadding;
-
- /// LayoutOrder - The layout order of this fragment.
- unsigned LayoutOrder;
-
- /// The data for the section this fragment is in.
- MCSection *Parent;
-
- /// Atom - The atom this fragment is in, as represented by it's defining
- /// symbol.
- const MCSymbol *Atom;
-
- /// \name Assembler Backend Data
- /// @{
- //
- // FIXME: This could all be kept private to the assembler implementation.
-
- /// Offset - The offset of this fragment in its section. This is ~0 until
- /// initialized.
- uint64_t Offset;
-
- /// @}
-
-protected:
- MCFragment(FragmentType Kind, bool HasInstructions,
- uint8_t BundlePadding, MCSection *Parent = nullptr);
-
- ~MCFragment();
-private:
-
- // This is a friend so that the sentinal can be created.
- friend struct ilist_sentinel_traits<MCFragment>;
- MCFragment();
-
-public:
- /// Destroys the current fragment.
- ///
- /// This must be used instead of delete as MCFragment is non-virtual.
- /// This method will dispatch to the appropriate subclass.
- void destroy();
-
- FragmentType getKind() const { return Kind; }
-
- MCSection *getParent() const { return Parent; }
- void setParent(MCSection *Value) { Parent = Value; }
-
- const MCSymbol *getAtom() const { return Atom; }
- void setAtom(const MCSymbol *Value) { Atom = Value; }
-
- unsigned getLayoutOrder() const { return LayoutOrder; }
- void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
-
- /// \brief Does this fragment have instructions emitted into it? By default
- /// this is false, but specific fragment types may set it to true.
- bool hasInstructions() const { return HasInstructions; }
-
- /// \brief Should this fragment be placed at the end of an aligned bundle?
- bool alignToBundleEnd() const { return AlignToBundleEnd; }
- void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
-
- /// \brief Get the padding size that must be inserted before this fragment.
- /// Used for bundling. By default, no padding is inserted.
- /// Note that padding size is restricted to 8 bits. This is an optimization
- /// to reduce the amount of space used for each fragment. In practice, larger
- /// padding should never be required.
- uint8_t getBundlePadding() const { return BundlePadding; }
-
- /// \brief Set the padding size for this fragment. By default it's a no-op,
- /// and only some fragments have a meaningful implementation.
- void setBundlePadding(uint8_t N) { BundlePadding = N; }
-
- void dump();
-};
-
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data.
-///
-class MCEncodedFragment : public MCFragment {
-protected:
- MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions,
- MCSection *Sec)
- : MCFragment(FType, HasInstructions, 0, Sec) {}
-
-public:
- static bool classof(const MCFragment *F) {
- MCFragment::FragmentType Kind = F->getKind();
- switch (Kind) {
- default:
- return false;
- case MCFragment::FT_Relaxable:
- case MCFragment::FT_CompactEncodedInst:
- case MCFragment::FT_Data:
- return true;
- }
- }
-};
-
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data.
-///
-template<unsigned ContentsSize>
-class MCEncodedFragmentWithContents : public MCEncodedFragment {
- SmallVector<char, ContentsSize> Contents;
-
-protected:
- MCEncodedFragmentWithContents(MCFragment::FragmentType FType,
- bool HasInstructions,
- MCSection *Sec)
- : MCEncodedFragment(FType, HasInstructions, Sec) {}
-
-public:
- SmallVectorImpl<char> &getContents() { return Contents; }
- const SmallVectorImpl<char> &getContents() const { return Contents; }
-};
-
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data and also have fixups registered.
-///
-template<unsigned ContentsSize, unsigned FixupsSize>
-class MCEncodedFragmentWithFixups :
- public MCEncodedFragmentWithContents<ContentsSize> {
-
- /// Fixups - The list of fixups in this fragment.
- SmallVector<MCFixup, FixupsSize> Fixups;
-
-protected:
- MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
- bool HasInstructions,
- MCSection *Sec)
- : MCEncodedFragmentWithContents<ContentsSize>(FType, HasInstructions,
- Sec) {}
-
-public:
- typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
- typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
-
- SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
- const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
-
- fixup_iterator fixup_begin() { return Fixups.begin(); }
- const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
-
- fixup_iterator fixup_end() { return Fixups.end(); }
- const_fixup_iterator fixup_end() const { return Fixups.end(); }
-
- static bool classof(const MCFragment *F) {
- MCFragment::FragmentType Kind = F->getKind();
- return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data;
- }
-};
-
-/// Fragment for data and encoded instructions.
-///
-class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
-public:
- MCDataFragment(MCSection *Sec = nullptr)
- : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {}
-
- void setHasInstructions(bool V) { HasInstructions = V; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Data;
- }
-};
-
-/// This is a compact (memory-size-wise) fragment for holding an encoded
-/// instruction (non-relaxable) that has no fixups registered. When applicable,
-/// it can be used instead of MCDataFragment and lead to lower memory
-/// consumption.
-///
-class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> {
-public:
- MCCompactEncodedInstFragment(MCSection *Sec = nullptr)
- : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) {
- }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_CompactEncodedInst;
- }
-};
-
-/// A relaxable fragment holds on to its MCInst, since it may need to be
-/// relaxed during the assembler layout and relaxation stage.
-///
-class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
-
- /// Inst - The instruction this is a fragment for.
- MCInst Inst;
-
- /// STI - The MCSubtargetInfo in effect when the instruction was encoded.
- /// Keep a copy instead of a reference to make sure that updates to STI
- /// in the assembler are not seen here.
- const MCSubtargetInfo STI;
-
-public:
- MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCSection *Sec = nullptr)
- : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec),
- Inst(Inst), STI(STI) {}
-
- const MCInst &getInst() const { return Inst; }
- void setInst(const MCInst &Value) { Inst = Value; }
-
- const MCSubtargetInfo &getSubtargetInfo() { return STI; }
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Relaxable;
- }
-};
-
-class MCAlignFragment : public MCFragment {
-
- /// Alignment - The alignment to ensure, in bytes.
- unsigned Alignment;
-
- /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
- /// of using the provided value. The exact interpretation of this flag is
- /// target dependent.
- bool EmitNops : 1;
-
- /// Value - Value to use for filling padding bytes.
- int64_t Value;
-
- /// ValueSize - The size of the integer (in bytes) of \p Value.
- unsigned ValueSize;
-
- /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
- /// cannot be satisfied in this width then this fragment is ignored.
- unsigned MaxBytesToEmit;
-
-public:
- MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
- unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
- : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment),
- EmitNops(false), Value(Value),
- ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
-
- /// \name Accessors
- /// @{
-
- unsigned getAlignment() const { return Alignment; }
-
- int64_t getValue() const { return Value; }
-
- unsigned getValueSize() const { return ValueSize; }
-
- unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
-
- bool hasEmitNops() const { return EmitNops; }
- void setEmitNops(bool Value) { EmitNops = Value; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Align;
- }
-};
-
-class MCFillFragment : public MCFragment {
-
- /// Value - Value to use for filling bytes.
- int64_t Value;
-
- /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if
- /// this is a virtual fill fragment.
- unsigned ValueSize;
-
- /// Size - The number of bytes to insert.
- uint64_t Size;
-
-public:
- MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size,
- MCSection *Sec = nullptr)
- : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize),
- Size(Size) {
- assert((!ValueSize || (Size % ValueSize) == 0) &&
- "Fill size must be a multiple of the value size!");
- }
-
- /// \name Accessors
- /// @{
-
- int64_t getValue() const { return Value; }
-
- unsigned getValueSize() const { return ValueSize; }
-
- uint64_t getSize() const { return Size; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Fill;
- }
-};
-
-class MCOrgFragment : public MCFragment {
-
- /// Offset - The offset this fragment should start at.
- const MCExpr *Offset;
-
- /// Value - Value to use for filling bytes.
- int8_t Value;
-
-public:
- MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr)
- : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {}
-
- /// \name Accessors
- /// @{
-
- const MCExpr &getOffset() const { return *Offset; }
-
- uint8_t getValue() const { return Value; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Org;
- }
-};
-
-class MCLEBFragment : public MCFragment {
-
- /// Value - The value this fragment should contain.
- const MCExpr *Value;
-
- /// IsSigned - True if this is a sleb128, false if uleb128.
- bool IsSigned;
-
- SmallString<8> Contents;
-
-public:
- MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
- : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) {
- Contents.push_back(0);
- }
-
- /// \name Accessors
- /// @{
-
- const MCExpr &getValue() const { return *Value; }
-
- bool isSigned() const { return IsSigned; }
-
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_LEB;
- }
-};
-
-class MCDwarfLineAddrFragment : public MCFragment {
-
- /// LineDelta - the value of the difference between the two line numbers
- /// between two .loc dwarf directives.
- int64_t LineDelta;
-
- /// AddrDelta - The expression for the difference of the two symbols that
- /// make up the address delta between two .loc dwarf directives.
- const MCExpr *AddrDelta;
-
- SmallString<8> Contents;
-
-public:
- MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta,
- MCSection *Sec = nullptr)
- : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta),
- AddrDelta(&AddrDelta) {
- Contents.push_back(0);
- }
-
- /// \name Accessors
- /// @{
-
- int64_t getLineDelta() const { return LineDelta; }
-
- const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_Dwarf;
- }
-};
-
-class MCDwarfCallFrameFragment : public MCFragment {
-
- /// AddrDelta - The expression for the difference of the two symbols that
- /// make up the address delta between two .cfi_* dwarf directives.
- const MCExpr *AddrDelta;
-
- SmallString<8> Contents;
-
-public:
- MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr)
- : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) {
- Contents.push_back(0);
- }
-
- /// \name Accessors
- /// @{
-
- const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
- SmallString<8> &getContents() { return Contents; }
- const SmallString<8> &getContents() const { return Contents; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_DwarfFrame;
- }
-};
-
-class MCSafeSEHFragment : public MCFragment {
- const MCSymbol *Sym;
-
-public:
- MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
- : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {}
-
- /// \name Accessors
- /// @{
-
- const MCSymbol *getSymbol() { return Sym; }
- const MCSymbol *getSymbol() const { return Sym; }
-
- /// @}
-
- static bool classof(const MCFragment *F) {
- return F->getKind() == MCFragment::FT_SafeSEH;
- }
-};
-
// FIXME: This really doesn't belong here. See comments below.
struct IndirectSymbolData {
MCSymbol *Symbol;
@@ -575,8 +100,6 @@ private:
MCObjectWriter &Writer;
- raw_ostream &OS;
-
SectionListType Sections;
SymbolDataListType Symbols;
@@ -591,6 +114,8 @@ private:
/// List of declared file names
std::vector<std::string> FileNames;
+ MCDwarfLineTableParams LTParams;
+
/// The set of function symbols for which a .thumb_func directive has
/// been seen.
//
@@ -607,6 +132,7 @@ private:
unsigned RelaxAll : 1;
unsigned SubsectionsViaSymbols : 1;
+ unsigned IncrementalLinkerCompatible : 1;
/// ELF specific e_header flags
// It would be good if there were an MCELFAssembler class to hold this.
@@ -712,16 +238,13 @@ public:
public:
/// Construct a new assembler instance.
- ///
- /// \param OS The stream to output to.
//
// FIXME: How are we going to parameterize this? Two obvious options are stay
// concrete and require clients to pass in a target like object. The other
// option is to make this abstract, and have targets provide concrete
// implementations as we do with AsmParser.
MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
- MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
- raw_ostream &OS);
+ MCCodeEmitter &Emitter_, MCObjectWriter &Writer_);
~MCAssembler();
/// Reuse an assembler instance
@@ -736,15 +259,28 @@ public:
MCObjectWriter &getWriter() const { return Writer; }
+ MCDwarfLineTableParams getDWARFLinetableParams() const { return LTParams; }
+ void setDWARFLinetableParams(MCDwarfLineTableParams P) { LTParams = P; }
+
/// Finish - Do final processing and write the object to the output stream.
/// \p Writer is used for custom object writer (as the MCJIT does),
/// if not specified it is automatically created from backend.
void Finish();
+ // Layout all section and prepare them for emission.
+ void layout(MCAsmLayout &Layout);
+
// FIXME: This does not belong here.
bool getSubsectionsViaSymbols() const { return SubsectionsViaSymbols; }
void setSubsectionsViaSymbols(bool Value) { SubsectionsViaSymbols = Value; }
+ bool isIncrementalLinkerCompatible() const {
+ return IncrementalLinkerCompatible;
+ }
+ void setIncrementalLinkerCompatible(bool Value) {
+ IncrementalLinkerCompatible = Value;
+ }
+
bool getRelaxAll() const { return RelaxAll; }
void setRelaxAll(bool Value) { RelaxAll = Value; }
@@ -856,13 +392,7 @@ public:
/// \name Backend Data Access
/// @{
- bool registerSection(MCSection &Section) {
- if (Section.isRegistered())
- return false;
- Sections.push_back(&Section);
- Section.setIsRegistered(true);
- return true;
- }
+ bool registerSection(MCSection &Section);
void registerSymbol(const MCSymbol &Symbol, bool *Created = nullptr);
diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h
index 41169e9..e5a9afd 100644
--- a/contrib/llvm/include/llvm/MC/MCContext.h
+++ b/contrib/llvm/include/llvm/MC/MCContext.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -71,6 +72,10 @@ namespace llvm {
/// objects.
BumpPtrAllocator Allocator;
+ SpecificBumpPtrAllocator<MCSectionCOFF> COFFAllocator;
+ SpecificBumpPtrAllocator<MCSectionELF> ELFAllocator;
+ SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
+
/// Bindings of names to symbols.
SymbolTable Symbols;
@@ -108,7 +113,7 @@ namespace llvm {
/// directive is used or it is an error.
char *SecureLogFile;
/// The stream that gets written to for the .secure_log_unique directive.
- raw_ostream *SecureLog;
+ std::unique_ptr<raw_fd_ostream> SecureLog;
/// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to
/// catch errors if .secure_log_unique appears twice without
/// .secure_log_reset appearing between them.
@@ -203,9 +208,13 @@ namespace llvm {
std::map<COFFSectionKey, MCSectionCOFF *> COFFUniquingMap;
StringMap<bool> ELFRelSecNames;
+ SpecificBumpPtrAllocator<MCSubtargetInfo> MCSubtargetAllocator;
+
/// Do automatic reset in destructor
bool AutoReset;
+ bool HadError;
+
MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
bool CanBeUnnamed);
MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix,
@@ -376,6 +385,9 @@ namespace llvm {
MCSectionCOFF *getAssociativeCOFFSection(MCSectionCOFF *Sec,
const MCSymbol *KeySym);
+ // Create and save a copy of STI and return a reference to the copy.
+ MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI);
+
/// @}
/// \name Dwarf Management
@@ -494,9 +506,11 @@ namespace llvm {
/// @}
char *getSecureLogFile() { return SecureLogFile; }
- raw_ostream *getSecureLog() { return SecureLog; }
+ raw_fd_ostream *getSecureLog() { return SecureLog.get(); }
bool getSecureLogUsed() { return SecureLogUsed; }
- void setSecureLog(raw_ostream *Value) { SecureLog = Value; }
+ void setSecureLog(std::unique_ptr<raw_fd_ostream> Value) {
+ SecureLog = std::move(Value);
+ }
void setSecureLogUsed(bool Value) { SecureLogUsed = Value; }
void *allocate(unsigned Size, unsigned Align = 8) {
@@ -504,11 +518,13 @@ namespace llvm {
}
void deallocate(void *Ptr) {}
+ bool hadError() { return HadError; }
+ void reportError(SMLoc L, const Twine &Msg);
// Unrecoverable error has occurred. Display the best diagnostic we can
// and bail via exit(1). For now, most MC backend errors are unrecoverable.
// FIXME: We should really do something about that.
LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L,
- const Twine &Msg) const;
+ const Twine &Msg);
};
} // end namespace llvm
@@ -538,7 +554,7 @@ namespace llvm {
/// allocator supports it).
/// \return The allocated memory. Could be NULL.
inline void *operator new(size_t Bytes, llvm::MCContext &C,
- size_t Alignment = 8) throw() {
+ size_t Alignment = 8) LLVM_NOEXCEPT {
return C.allocate(Bytes, Alignment);
}
/// \brief Placement delete companion to the new above.
@@ -547,8 +563,8 @@ inline void *operator new(size_t Bytes, llvm::MCContext &C,
/// invoking it directly; see the new operator for more details. This operator
/// is called implicitly by the compiler if a placement new expression using
/// the MCContext throws in the object constructor.
-inline void operator delete(void *Ptr, llvm::MCContext &C, size_t)
- throw () {
+inline void operator delete(void *Ptr, llvm::MCContext &C,
+ size_t) LLVM_NOEXCEPT {
C.deallocate(Ptr);
}
@@ -571,8 +587,8 @@ inline void operator delete(void *Ptr, llvm::MCContext &C, size_t)
/// \param Alignment The alignment of the allocated memory (if the underlying
/// allocator supports it).
/// \return The allocated memory. Could be NULL.
-inline void *operator new[](size_t Bytes, llvm::MCContext& C,
- size_t Alignment = 8) throw() {
+inline void *operator new[](size_t Bytes, llvm::MCContext &C,
+ size_t Alignment = 8) LLVM_NOEXCEPT {
return C.allocate(Bytes, Alignment);
}
@@ -582,7 +598,7 @@ inline void *operator new[](size_t Bytes, llvm::MCContext& C,
/// invoking it directly; see the new[] operator for more details. This operator
/// is called implicitly by the compiler if a placement new[] expression using
/// the MCContext throws in the object constructor.
-inline void operator delete[](void *Ptr, llvm::MCContext &C) throw () {
+inline void operator delete[](void *Ptr, llvm::MCContext &C) LLVM_NOEXCEPT {
C.deallocate(Ptr);
}
diff --git a/contrib/llvm/include/llvm/MC/MCDirectives.h b/contrib/llvm/include/llvm/MC/MCDirectives.h
index f9d66e0..326b2a1 100644
--- a/contrib/llvm/include/llvm/MC/MCDirectives.h
+++ b/contrib/llvm/include/llvm/MC/MCDirectives.h
@@ -62,7 +62,9 @@ enum MCDataRegionType {
enum MCVersionMinType {
MCVM_IOSVersionMin, ///< .ios_version_min
- MCVM_OSXVersionMin ///< .macosx_version_min
+ MCVM_OSXVersionMin, ///< .macosx_version_min
+ MCVM_TvOSVersionMin, ///< .tvos_version_min
+ MCVM_WatchOSVersionMin, ///< .watchos_version_min
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm/include/llvm/MC/MCDwarf.h
index 1e72dfe..8a50863 100644
--- a/contrib/llvm/include/llvm/MC/MCDwarf.h
+++ b/contrib/llvm/include/llvm/MC/MCDwarf.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/raw_ostream.h"
@@ -31,7 +32,6 @@ namespace llvm {
class MCAsmBackend;
class MCContext;
class MCObjectStreamer;
-class MCSection;
class MCStreamer;
class MCSymbol;
class SourceMgr;
@@ -182,6 +182,19 @@ public:
}
};
+struct MCDwarfLineTableParams {
+ /// First special line opcode - leave room for the standard opcodes.
+ /// Note: If you want to change this, you'll have to update the
+ /// "StandardOpcodeLengths" table that is emitted in
+ /// \c Emit().
+ uint8_t DWARF2LineOpcodeBase = 13;
+ /// Minimum line offset in a special line info. opcode. The value
+ /// -5 was chosen to give a reasonable range of values.
+ int8_t DWARF2LineBase = -5;
+ /// Range of line offsets in a special line info. opcode.
+ uint8_t DWARF2LineRange = 14;
+};
+
struct MCDwarfLineTableHeader {
MCSymbol *Label;
SmallVector<std::string, 3> MCDwarfDirs;
@@ -192,9 +205,11 @@ struct MCDwarfLineTableHeader {
MCDwarfLineTableHeader() : Label(nullptr) {}
unsigned getFile(StringRef &Directory, StringRef &FileName,
unsigned FileNumber = 0);
- std::pair<MCSymbol *, MCSymbol *> Emit(MCStreamer *MCOS) const;
+ std::pair<MCSymbol *, MCSymbol *> Emit(MCStreamer *MCOS,
+ MCDwarfLineTableParams Params) const;
std::pair<MCSymbol *, MCSymbol *>
- Emit(MCStreamer *MCOS, ArrayRef<char> SpecialOpcodeLengths) const;
+ Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
+ ArrayRef<char> SpecialOpcodeLengths) const;
};
class MCDwarfDwoLineTable {
@@ -206,7 +221,7 @@ public:
unsigned getFile(StringRef Directory, StringRef FileName) {
return Header.getFile(Directory, FileName);
}
- void Emit(MCStreamer &MCOS) const;
+ void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params) const;
};
class MCDwarfLineTable {
@@ -215,10 +230,10 @@ class MCDwarfLineTable {
public:
// This emits the Dwarf file and the line tables for all Compile Units.
- static void Emit(MCObjectStreamer *MCOS);
+ static void Emit(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params);
// This emits the Dwarf file and the line tables for a given Compile Unit.
- void EmitCU(MCObjectStreamer *MCOS) const;
+ void EmitCU(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params) const;
unsigned getFile(StringRef &Directory, StringRef &FileName,
unsigned FileNumber = 0);
@@ -262,11 +277,12 @@ public:
class MCDwarfLineAddr {
public:
/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
- static void Encode(MCContext &Context, int64_t LineDelta, uint64_t AddrDelta,
- raw_ostream &OS);
+ static void Encode(MCContext &Context, MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
/// Utility function to emit the encoding to a streamer.
- static void Emit(MCStreamer *MCOS, int64_t LineDelta, uint64_t AddrDelta);
+ static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta);
};
class MCGenDwarfInfo {
@@ -324,7 +340,8 @@ public:
OpRestore,
OpUndefined,
OpRegister,
- OpWindowSave
+ OpWindowSave,
+ OpGnuArgsSize
};
private:
@@ -438,6 +455,11 @@ public:
return MCCFIInstruction(OpEscape, L, 0, 0, Vals);
}
+ /// \brief A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE
+ static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size) {
+ return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, "");
+ }
+
OpType getOperation() const { return Operation; }
MCSymbol *getLabel() const { return Label; }
@@ -457,7 +479,7 @@ public:
int getOffset() const {
assert(Operation == OpDefCfa || Operation == OpOffset ||
Operation == OpRelOffset || Operation == OpDefCfaOffset ||
- Operation == OpAdjustCfaOffset);
+ Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize);
return Offset;
}
diff --git a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
index 01f694d..193dac0 100644
--- a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -57,8 +57,6 @@ public:
case Triple::PS4:
case Triple::FreeBSD:
return ELF::ELFOSABI_FREEBSD;
- case Triple::Linux:
- return ELF::ELFOSABI_LINUX;
default:
return ELF::ELFOSABI_NONE;
}
diff --git a/contrib/llvm/include/llvm/MC/MCELFStreamer.h b/contrib/llvm/include/llvm/MC/MCELFStreamer.h
index a5b257f..6eb2c2c 100644
--- a/contrib/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCELFStreamer.h
@@ -36,7 +36,6 @@ public:
/// state management
void reset() override {
SeenIdent = false;
- LocalCommons.clear();
BundleGroups.clear();
MCObjectStreamer::reset();
}
@@ -69,7 +68,7 @@ public:
void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment = 0) override;
void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc = SMLoc()) override;
+ SMLoc Loc = SMLoc()) override;
void EmitFileDirective(StringRef Filename) override;
@@ -77,8 +76,6 @@ public:
void EmitValueToAlignment(unsigned, int64_t, unsigned, unsigned) override;
- void Flush() override;
-
void FinishImpl() override;
void EmitBundleAlignMode(unsigned AlignPow2) override;
@@ -97,14 +94,6 @@ private:
bool SeenIdent;
- struct LocalCommon {
- const MCSymbol *Symbol;
- uint64_t Size;
- unsigned ByteAlignment;
- };
-
- std::vector<LocalCommon> LocalCommons;
-
/// BundleGroups - The stack of fragments holding the bundle-locked
/// instructions.
llvm::SmallVector<MCDataFragment *, 4> BundleGroups;
diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h
index b3a6073..1d6bdef 100644
--- a/contrib/llvm/include/llvm/MC/MCExpr.h
+++ b/contrib/llvm/include/llvm/MC/MCExpr.h
@@ -20,6 +20,7 @@ class MCAsmLayout;
class MCAssembler;
class MCContext;
class MCFixup;
+class MCFragment;
class MCSection;
class MCStreamer;
class MCSymbol;
@@ -115,7 +116,7 @@ public:
/// currently defined as the absolute section for constants, or
/// otherwise the section associated with the first defined symbol in the
/// expression.
- MCSection *findAssociatedSection() const;
+ MCFragment *findAssociatedFragment() const;
/// @}
};
@@ -187,6 +188,7 @@ public:
VK_WEAKREF, // The link between the symbols in .weakref foo, bar
VK_ARM_NONE,
+ VK_ARM_GOT_PREL,
VK_ARM_TARGET1,
VK_ARM_TARGET2,
VK_ARM_PREL31,
@@ -556,7 +558,7 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const = 0;
virtual void visitUsedExpr(MCStreamer& Streamer) const = 0;
- virtual MCSection *findAssociatedSection() const = 0;
+ virtual MCFragment *findAssociatedFragment() const = 0;
virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
diff --git a/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h b/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h
index ad99943..ad34d94 100644
--- a/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h
@@ -22,6 +22,8 @@ enum DecoderOps {
// uleb128 Val, uint16_t NumToSkip)
OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
OPC_Decode, // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
+ OPC_TryDecode, // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx,
+ // uint16_t NumToSkip)
OPC_SoftFail, // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
OPC_Fail // OPC_Fail()
};
diff --git a/contrib/llvm/include/llvm/MC/MCFragment.h b/contrib/llvm/include/llvm/MC/MCFragment.h
new file mode 100644
index 0000000..7d6db52
--- /dev/null
+++ b/contrib/llvm/include/llvm/MC/MCFragment.h
@@ -0,0 +1,506 @@
+//===- MCFragment.h - Fragment type hierarchy -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFRAGMENT_H
+#define LLVM_MC_MCFRAGMENT_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+class MCSection;
+class MCSymbol;
+class MCSubtargetInfo;
+
+class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> {
+ friend class MCAsmLayout;
+
+ MCFragment(const MCFragment &) = delete;
+ void operator=(const MCFragment &) = delete;
+
+public:
+ enum FragmentType : uint8_t {
+ FT_Align,
+ FT_Data,
+ FT_CompactEncodedInst,
+ FT_Fill,
+ FT_Relaxable,
+ FT_Org,
+ FT_Dwarf,
+ FT_DwarfFrame,
+ FT_LEB,
+ FT_SafeSEH,
+ FT_Dummy
+ };
+
+private:
+ FragmentType Kind;
+
+protected:
+ bool HasInstructions;
+
+private:
+ /// \brief Should this fragment be aligned to the end of a bundle?
+ bool AlignToBundleEnd;
+
+ uint8_t BundlePadding;
+
+ /// LayoutOrder - The layout order of this fragment.
+ unsigned LayoutOrder;
+
+ /// The data for the section this fragment is in.
+ MCSection *Parent;
+
+ /// Atom - The atom this fragment is in, as represented by it's defining
+ /// symbol.
+ const MCSymbol *Atom;
+
+ /// \name Assembler Backend Data
+ /// @{
+ //
+ // FIXME: This could all be kept private to the assembler implementation.
+
+ /// Offset - The offset of this fragment in its section. This is ~0 until
+ /// initialized.
+ uint64_t Offset;
+
+ /// @}
+
+protected:
+ MCFragment(FragmentType Kind, bool HasInstructions,
+ uint8_t BundlePadding, MCSection *Parent = nullptr);
+
+ ~MCFragment();
+private:
+
+ // This is a friend so that the sentinal can be created.
+ friend struct ilist_sentinel_traits<MCFragment>;
+ MCFragment();
+
+public:
+ /// Destroys the current fragment.
+ ///
+ /// This must be used instead of delete as MCFragment is non-virtual.
+ /// This method will dispatch to the appropriate subclass.
+ void destroy();
+
+ FragmentType getKind() const { return Kind; }
+
+ MCSection *getParent() const { return Parent; }
+ void setParent(MCSection *Value) { Parent = Value; }
+
+ const MCSymbol *getAtom() const { return Atom; }
+ void setAtom(const MCSymbol *Value) { Atom = Value; }
+
+ unsigned getLayoutOrder() const { return LayoutOrder; }
+ void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
+
+ /// \brief Does this fragment have instructions emitted into it? By default
+ /// this is false, but specific fragment types may set it to true.
+ bool hasInstructions() const { return HasInstructions; }
+
+ /// \brief Should this fragment be placed at the end of an aligned bundle?
+ bool alignToBundleEnd() const { return AlignToBundleEnd; }
+ void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
+
+ /// \brief Get the padding size that must be inserted before this fragment.
+ /// Used for bundling. By default, no padding is inserted.
+ /// Note that padding size is restricted to 8 bits. This is an optimization
+ /// to reduce the amount of space used for each fragment. In practice, larger
+ /// padding should never be required.
+ uint8_t getBundlePadding() const { return BundlePadding; }
+
+ /// \brief Set the padding size for this fragment. By default it's a no-op,
+ /// and only some fragments have a meaningful implementation.
+ void setBundlePadding(uint8_t N) { BundlePadding = N; }
+
+ /// \brief Return true if given frgment has FT_Dummy type.
+ bool isDummy() const { return Kind == FT_Dummy; }
+
+ void dump();
+};
+
+class MCDummyFragment : public MCFragment {
+public:
+ explicit MCDummyFragment(MCSection *Sec)
+ : MCFragment(FT_Dummy, false, 0, Sec){};
+ static bool classof(const MCFragment *F) { return F->getKind() == FT_Dummy; }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data.
+///
+class MCEncodedFragment : public MCFragment {
+protected:
+ MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions,
+ MCSection *Sec)
+ : MCFragment(FType, HasInstructions, 0, Sec) {}
+
+public:
+ static bool classof(const MCFragment *F) {
+ MCFragment::FragmentType Kind = F->getKind();
+ switch (Kind) {
+ default:
+ return false;
+ case MCFragment::FT_Relaxable:
+ case MCFragment::FT_CompactEncodedInst:
+ case MCFragment::FT_Data:
+ return true;
+ }
+ }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data.
+///
+template<unsigned ContentsSize>
+class MCEncodedFragmentWithContents : public MCEncodedFragment {
+ SmallVector<char, ContentsSize> Contents;
+
+protected:
+ MCEncodedFragmentWithContents(MCFragment::FragmentType FType,
+ bool HasInstructions,
+ MCSection *Sec)
+ : MCEncodedFragment(FType, HasInstructions, Sec) {}
+
+public:
+ SmallVectorImpl<char> &getContents() { return Contents; }
+ const SmallVectorImpl<char> &getContents() const { return Contents; }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data and also have fixups registered.
+///
+template<unsigned ContentsSize, unsigned FixupsSize>
+class MCEncodedFragmentWithFixups :
+ public MCEncodedFragmentWithContents<ContentsSize> {
+
+ /// Fixups - The list of fixups in this fragment.
+ SmallVector<MCFixup, FixupsSize> Fixups;
+
+protected:
+ MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
+ bool HasInstructions,
+ MCSection *Sec)
+ : MCEncodedFragmentWithContents<ContentsSize>(FType, HasInstructions,
+ Sec) {}
+
+public:
+ typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
+ typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
+
+ SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
+ const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
+
+ fixup_iterator fixup_begin() { return Fixups.begin(); }
+ const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
+
+ fixup_iterator fixup_end() { return Fixups.end(); }
+ const_fixup_iterator fixup_end() const { return Fixups.end(); }
+
+ static bool classof(const MCFragment *F) {
+ MCFragment::FragmentType Kind = F->getKind();
+ return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data;
+ }
+};
+
+/// Fragment for data and encoded instructions.
+///
+class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
+public:
+ MCDataFragment(MCSection *Sec = nullptr)
+ : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {}
+
+ void setHasInstructions(bool V) { HasInstructions = V; }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Data;
+ }
+};
+
+/// This is a compact (memory-size-wise) fragment for holding an encoded
+/// instruction (non-relaxable) that has no fixups registered. When applicable,
+/// it can be used instead of MCDataFragment and lead to lower memory
+/// consumption.
+///
+class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> {
+public:
+ MCCompactEncodedInstFragment(MCSection *Sec = nullptr)
+ : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) {
+ }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_CompactEncodedInst;
+ }
+};
+
+/// A relaxable fragment holds on to its MCInst, since it may need to be
+/// relaxed during the assembler layout and relaxation stage.
+///
+class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
+
+ /// Inst - The instruction this is a fragment for.
+ MCInst Inst;
+
+ /// STI - The MCSubtargetInfo in effect when the instruction was encoded.
+ const MCSubtargetInfo &STI;
+
+public:
+ MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCSection *Sec = nullptr)
+ : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec),
+ Inst(Inst), STI(STI) {}
+
+ const MCInst &getInst() const { return Inst; }
+ void setInst(const MCInst &Value) { Inst = Value; }
+
+ const MCSubtargetInfo &getSubtargetInfo() { return STI; }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Relaxable;
+ }
+};
+
+class MCAlignFragment : public MCFragment {
+
+ /// Alignment - The alignment to ensure, in bytes.
+ unsigned Alignment;
+
+ /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
+ /// of using the provided value. The exact interpretation of this flag is
+ /// target dependent.
+ bool EmitNops : 1;
+
+ /// Value - Value to use for filling padding bytes.
+ int64_t Value;
+
+ /// ValueSize - The size of the integer (in bytes) of \p Value.
+ unsigned ValueSize;
+
+ /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
+ /// cannot be satisfied in this width then this fragment is ignored.
+ unsigned MaxBytesToEmit;
+
+public:
+ MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
+ : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment),
+ EmitNops(false), Value(Value),
+ ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
+
+ /// \name Accessors
+ /// @{
+
+ unsigned getAlignment() const { return Alignment; }
+
+ int64_t getValue() const { return Value; }
+
+ unsigned getValueSize() const { return ValueSize; }
+
+ unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
+
+ bool hasEmitNops() const { return EmitNops; }
+ void setEmitNops(bool Value) { EmitNops = Value; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Align;
+ }
+};
+
+class MCFillFragment : public MCFragment {
+
+ /// Value - Value to use for filling bytes.
+ int64_t Value;
+
+ /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if
+ /// this is a virtual fill fragment.
+ unsigned ValueSize;
+
+ /// Size - The number of bytes to insert.
+ uint64_t Size;
+
+public:
+ MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size,
+ MCSection *Sec = nullptr)
+ : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize),
+ Size(Size) {
+ assert((!ValueSize || (Size % ValueSize) == 0) &&
+ "Fill size must be a multiple of the value size!");
+ }
+
+ /// \name Accessors
+ /// @{
+
+ int64_t getValue() const { return Value; }
+
+ unsigned getValueSize() const { return ValueSize; }
+
+ uint64_t getSize() const { return Size; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Fill;
+ }
+};
+
+class MCOrgFragment : public MCFragment {
+
+ /// Offset - The offset this fragment should start at.
+ const MCExpr *Offset;
+
+ /// Value - Value to use for filling bytes.
+ int8_t Value;
+
+public:
+ MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr)
+ : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {}
+
+ /// \name Accessors
+ /// @{
+
+ const MCExpr &getOffset() const { return *Offset; }
+
+ uint8_t getValue() const { return Value; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Org;
+ }
+};
+
+class MCLEBFragment : public MCFragment {
+
+ /// Value - The value this fragment should contain.
+ const MCExpr *Value;
+
+ /// IsSigned - True if this is a sleb128, false if uleb128.
+ bool IsSigned;
+
+ SmallString<8> Contents;
+
+public:
+ MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
+ : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) {
+ Contents.push_back(0);
+ }
+
+ /// \name Accessors
+ /// @{
+
+ const MCExpr &getValue() const { return *Value; }
+
+ bool isSigned() const { return IsSigned; }
+
+ SmallString<8> &getContents() { return Contents; }
+ const SmallString<8> &getContents() const { return Contents; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_LEB;
+ }
+};
+
+class MCDwarfLineAddrFragment : public MCFragment {
+
+ /// LineDelta - the value of the difference between the two line numbers
+ /// between two .loc dwarf directives.
+ int64_t LineDelta;
+
+ /// AddrDelta - The expression for the difference of the two symbols that
+ /// make up the address delta between two .loc dwarf directives.
+ const MCExpr *AddrDelta;
+
+ SmallString<8> Contents;
+
+public:
+ MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta,
+ MCSection *Sec = nullptr)
+ : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta),
+ AddrDelta(&AddrDelta) {
+ Contents.push_back(0);
+ }
+
+ /// \name Accessors
+ /// @{
+
+ int64_t getLineDelta() const { return LineDelta; }
+
+ const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+ SmallString<8> &getContents() { return Contents; }
+ const SmallString<8> &getContents() const { return Contents; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_Dwarf;
+ }
+};
+
+class MCDwarfCallFrameFragment : public MCFragment {
+
+ /// AddrDelta - The expression for the difference of the two symbols that
+ /// make up the address delta between two .cfi_* dwarf directives.
+ const MCExpr *AddrDelta;
+
+ SmallString<8> Contents;
+
+public:
+ MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr)
+ : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) {
+ Contents.push_back(0);
+ }
+
+ /// \name Accessors
+ /// @{
+
+ const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+ SmallString<8> &getContents() { return Contents; }
+ const SmallString<8> &getContents() const { return Contents; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_DwarfFrame;
+ }
+};
+
+class MCSafeSEHFragment : public MCFragment {
+ const MCSymbol *Sym;
+
+public:
+ MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
+ : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {}
+
+ /// \name Accessors
+ /// @{
+
+ const MCSymbol *getSymbol() { return Sym; }
+ const MCSymbol *getSymbol() const { return Sym; }
+
+ /// @}
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_SafeSEH;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
index 6a582e8..88aab73 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
@@ -15,12 +15,12 @@
#ifndef LLVM_MC_MCINSTRDESC_H
#define LLVM_MC_MCINSTRDESC_H
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
class MCInst;
- class MCRegisterInfo;
class MCSubtargetInfo;
class FeatureBitset;
@@ -137,16 +137,16 @@ enum Flag {
/// directly to describe itself.
class MCInstrDesc {
public:
- unsigned short Opcode; // The opcode number
- unsigned short NumOperands; // Num of args (may be more if variable_ops)
- unsigned char NumDefs; // Num of args that are definitions
- unsigned char Size; // Number of bytes in encoding.
- unsigned short SchedClass; // enum identifying instr sched class
- uint64_t Flags; // Flags identifying machine instr class
- uint64_t TSFlags; // Target Specific Flag values
- const uint16_t *ImplicitUses; // Registers implicitly read by this instr
- const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr
- const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands
+ unsigned short Opcode; // The opcode number
+ unsigned short NumOperands; // Num of args (may be more if variable_ops)
+ unsigned char NumDefs; // Num of args that are definitions
+ unsigned char Size; // Number of bytes in encoding.
+ unsigned short SchedClass; // enum identifying instr sched class
+ uint64_t Flags; // Flags identifying machine instr class
+ uint64_t TSFlags; // Target Specific Flag values
+ const MCPhysReg *ImplicitUses; // Registers implicitly read by this instr
+ const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr
+ const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands
// Subtarget feature that this is deprecated on, if any
// -1 implies this is not deprecated by any single feature. It may still be
// deprecated due to a "complex" reason, below.
@@ -336,8 +336,8 @@ public:
/// \brief Return true if this instruction is convergent.
///
- /// Convergent instructions may only be moved to locations that are
- /// control-equivalent to their original positions.
+ /// Convergent instructions may not be made control-dependent on any
+ /// additional values.
bool isConvergent() const { return Flags & (1 << MCID::Convergent); }
//===--------------------------------------------------------------------===//
@@ -472,7 +472,7 @@ public:
/// marked as implicitly reading the 'CL' register, which it always does.
///
/// This method returns null if the instruction has no implicit uses.
- const uint16_t *getImplicitUses() const { return ImplicitUses; }
+ const MCPhysReg *getImplicitUses() const { return ImplicitUses; }
/// \brief Return the number of implicit uses this instruction has.
unsigned getNumImplicitUses() const {
@@ -494,7 +494,7 @@ public:
/// EAX/EDX/EFLAGS registers.
///
/// This method returns null if the instruction has no implicit defs.
- const uint16_t *getImplicitDefs() const { return ImplicitDefs; }
+ const MCPhysReg *getImplicitDefs() const { return ImplicitDefs; }
/// \brief Return the number of implicit defs this instruct has.
unsigned getNumImplicitDefs() const {
@@ -509,7 +509,7 @@ public:
/// \brief Return true if this instruction implicitly
/// uses the specified physical register.
bool hasImplicitUseOfPhysReg(unsigned Reg) const {
- if (const uint16_t *ImpUses = ImplicitUses)
+ if (const MCPhysReg *ImpUses = ImplicitUses)
for (; *ImpUses; ++ImpUses)
if (*ImpUses == Reg)
return true;
diff --git a/contrib/llvm/include/llvm/MC/MCInstrItineraries.h b/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
index 161705d..b2871a9 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
@@ -38,7 +38,7 @@ namespace llvm {
///
/// { 2, x|y, 1 }
/// indicates that the stage occupies either FU x or FU y for 2
-/// consecuative cycles and that the next stage starts one cycle
+/// consecutive cycles and that the next stage starts one cycle
/// after this stage starts. That is, the stage requirements
/// overlap in time.
///
diff --git a/contrib/llvm/include/llvm/MC/MCLinkerOptimizationHint.h b/contrib/llvm/include/llvm/MC/MCLinkerOptimizationHint.h
index 4b6f7ec..a519c4b 100644
--- a/contrib/llvm/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/contrib/llvm/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -160,7 +160,7 @@ class MCLOHContainer {
public:
typedef SmallVectorImpl<MCLOHDirective> LOHDirectives;
- MCLOHContainer() : EmitSize(0) {};
+ MCLOHContainer() : EmitSize(0) {}
/// Const accessor to the directives.
const LOHDirectives &getDirectives() const {
diff --git a/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h b/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
index 175d73e..cd3db95 100644
--- a/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/DataTypes.h"
@@ -104,7 +105,7 @@ class MachObjectWriter : public MCObjectWriter {
/// \name Symbol Table Data
/// @{
- StringTableBuilder StringTable;
+ StringTableBuilder StringTable{StringTableBuilder::MachO};
std::vector<MachSymbolData> LocalSymbolData;
std::vector<MachSymbolData> ExternalSymbolData;
std::vector<MachSymbolData> UndefinedSymbolData;
@@ -159,19 +160,21 @@ public:
/// @}
- void writeHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
- bool SubsectionsViaSymbols);
+ void writeHeader(MachO::HeaderFileType Type, unsigned NumLoadCommands,
+ unsigned LoadCommandsSize, bool SubsectionsViaSymbols);
/// Write a segment load command.
///
/// \param NumSections The number of sections in this segment.
/// \param SectionDataSize The total size of the sections.
- void writeSegmentLoadCommand(unsigned NumSections, uint64_t VMSize,
+ void writeSegmentLoadCommand(StringRef Name, unsigned NumSections,
+ uint64_t VMAddr, uint64_t VMSize,
uint64_t SectionDataStartOffset,
- uint64_t SectionDataSize);
+ uint64_t SectionDataSize, uint32_t MaxProt,
+ uint32_t InitProt);
- void writeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCSection &Sec, uint64_t FileOffset,
+ void writeSection(const MCAsmLayout &Layout, const MCSection &Sec,
+ uint64_t VMAddr, uint64_t FileOffset, unsigned Flags,
uint64_t RelocationsStart, unsigned NumRelocations);
void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
@@ -246,6 +249,11 @@ public:
const MCAsmLayout &Layout) override;
bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbol &A,
+ const MCSymbol &B,
+ bool InSet) const override;
+
+ bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
index 99e3f92..cf2c3f1 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -35,16 +35,18 @@ protected:
/// without an associated EH frame section.
bool SupportsCompactUnwindWithoutEHFrame;
- /// Some encoding values for EH.
+ /// OmitDwarfIfHaveCompactUnwind - True if the target object file
+ /// supports having some functions with compact unwind and other with
+ /// dwarf unwind.
+ bool OmitDwarfIfHaveCompactUnwind;
+
+ /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
+ /// for EH.
unsigned PersonalityEncoding;
unsigned LSDAEncoding;
unsigned FDECFIEncoding;
unsigned TTypeEncoding;
- /// Section flags for eh_frame
- unsigned EHSectionType;
- unsigned EHSectionFlags;
-
/// Compact unwind encoding indicating that we should emit only an EH frame.
unsigned CompactUnwindDwarfEHFrameOnly;
@@ -114,6 +116,10 @@ protected:
MCSection *DwarfStrOffDWOSection;
MCSection *DwarfAddrSection;
+ // These are for Fission DWP files.
+ MCSection *DwarfCUIndexSection;
+ MCSection *DwarfTUIndexSection;
+
/// Section for newer gnu pubnames.
MCSection *DwarfGnuPubNamesSection;
/// Section for newer gnu pubtypes.
@@ -147,10 +153,7 @@ protected:
MCSection *EHFrameSection;
// ELF specific sections.
- MCSection *DataRelSection;
- const MCSection *DataRelLocalSection;
MCSection *DataRelROSection;
- MCSection *DataRelROLocalSection;
MCSection *MergeableConst4Section;
MCSection *MergeableConst8Section;
MCSection *MergeableConst16Section;
@@ -200,6 +203,10 @@ public:
bool getSupportsCompactUnwindWithoutEHFrame() const {
return SupportsCompactUnwindWithoutEHFrame;
}
+ bool getOmitDwarfIfHaveCompactUnwind() const {
+ return OmitDwarfIfHaveCompactUnwind;
+ }
+
bool getCommDirectiveSupportsAlignment() const {
return CommDirectiveSupportsAlignment;
}
@@ -216,6 +223,7 @@ public:
MCSection *getTextSection() const { return TextSection; }
MCSection *getDataSection() const { return DataSection; }
MCSection *getBSSSection() const { return BSSSection; }
+ MCSection *getReadOnlySection() const { return ReadOnlySection; }
MCSection *getLSDASection() const { return LSDASection; }
MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
@@ -258,6 +266,8 @@ public:
MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; }
MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; }
MCSection *getDwarfAddrSection() const { return DwarfAddrSection; }
+ MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; }
+ MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; }
MCSection *getCOFFDebugSymbolsSection() const {
return COFFDebugSymbolsSection;
@@ -271,12 +281,7 @@ public:
MCSection *getFaultMapSection() const { return FaultMapSection; }
// ELF specific sections.
- MCSection *getDataRelSection() const { return DataRelSection; }
- const MCSection *getDataRelLocalSection() const {
- return DataRelLocalSection;
- }
MCSection *getDataRelROSection() const { return DataRelROSection; }
- MCSection *getDataRelROLocalSection() const { return DataRelROLocalSection; }
const MCSection *getMergeableConst4Section() const {
return MergeableConst4Section;
}
@@ -325,8 +330,6 @@ public:
MCSection *getSXDataSection() const { return SXDataSection; }
MCSection *getEHFrameSection() {
- if (!EHFrameSection)
- InitEHFrameSection();
return EHFrameSection;
}
@@ -346,9 +349,6 @@ private:
void initELFMCObjectFileInfo(Triple T);
void initCOFFMCObjectFileInfo(Triple T);
- /// Initialize EHFrameSection on demand.
- void InitEHFrameSection();
-
public:
const Triple &getTargetTriple() const { return TT; }
};
diff --git a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
index ce1fc80..9fe2fda 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -92,7 +92,7 @@ public:
void EmitLabel(MCSymbol *Symbol) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc = SMLoc()) override;
+ SMLoc Loc = SMLoc()) override;
void EmitULEB128Value(const MCExpr *Value) override;
void EmitSLEB128Value(const MCExpr *Value) override;
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
@@ -112,7 +112,7 @@ public:
unsigned MaxBytesToEmit = 0) override;
void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0) override;
- bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value) override;
+ void emitValueToOffset(const MCExpr *Offset, unsigned char Value) override;
void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
unsigned Column, unsigned Flags,
unsigned Isa, unsigned Discriminator,
@@ -124,8 +124,9 @@ public:
const MCSymbol *Label);
void EmitGPRel32Value(const MCExpr *Value) override;
void EmitGPRel64Value(const MCExpr *Value) override;
+ bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc Loc) override;
void EmitFill(uint64_t NumBytes, uint8_t FillValue) override;
- void EmitZeros(uint64_t NumBytes) override;
void FinishImpl() override;
/// Emit the absolute difference between two symbols if possible.
diff --git a/contrib/llvm/include/llvm/MC/MCObjectWriter.h b/contrib/llvm/include/llvm/MC/MCObjectWriter.h
index 2211673..63c833a 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectWriter.h
@@ -40,14 +40,18 @@ class MCObjectWriter {
MCObjectWriter(const MCObjectWriter &) = delete;
void operator=(const MCObjectWriter &) = delete;
-protected:
- raw_pwrite_stream &OS;
+ raw_pwrite_stream *OS;
+protected:
unsigned IsLittleEndian : 1;
protected: // Can only create subclasses.
MCObjectWriter(raw_pwrite_stream &OS, bool IsLittleEndian)
- : OS(OS), IsLittleEndian(IsLittleEndian) {}
+ : OS(&OS), IsLittleEndian(IsLittleEndian) {}
+
+ unsigned getInitialOffset() {
+ return OS->tell();
+ }
public:
virtual ~MCObjectWriter();
@@ -57,7 +61,8 @@ public:
bool isLittleEndian() const { return IsLittleEndian; }
- raw_ostream &getStream() { return OS; }
+ raw_pwrite_stream &getStream() { return *OS; }
+ void setStream(raw_pwrite_stream &NewOS) { OS = &NewOS; }
/// \name High-Level API
/// @{
@@ -92,6 +97,11 @@ public:
bool InSet) const;
virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbol &A,
+ const MCSymbol &B,
+ bool InSet) const;
+
+ virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB,
bool InSet,
@@ -113,30 +123,30 @@ public:
/// \name Binary Output
/// @{
- void write8(uint8_t Value) { OS << char(Value); }
+ void write8(uint8_t Value) { *OS << char(Value); }
void writeLE16(uint16_t Value) {
- support::endian::Writer<support::little>(OS).write(Value);
+ support::endian::Writer<support::little>(*OS).write(Value);
}
void writeLE32(uint32_t Value) {
- support::endian::Writer<support::little>(OS).write(Value);
+ support::endian::Writer<support::little>(*OS).write(Value);
}
void writeLE64(uint64_t Value) {
- support::endian::Writer<support::little>(OS).write(Value);
+ support::endian::Writer<support::little>(*OS).write(Value);
}
void writeBE16(uint16_t Value) {
- support::endian::Writer<support::big>(OS).write(Value);
+ support::endian::Writer<support::big>(*OS).write(Value);
}
void writeBE32(uint32_t Value) {
- support::endian::Writer<support::big>(OS).write(Value);
+ support::endian::Writer<support::big>(*OS).write(Value);
}
void writeBE64(uint64_t Value) {
- support::endian::Writer<support::big>(OS).write(Value);
+ support::endian::Writer<support::big>(*OS).write(Value);
}
void write16(uint16_t Value) {
@@ -164,9 +174,9 @@ public:
const char Zeros[16] = {0};
for (unsigned i = 0, e = N / 16; i != e; ++i)
- OS << StringRef(Zeros, 16);
+ *OS << StringRef(Zeros, 16);
- OS << StringRef(Zeros, N % 16);
+ *OS << StringRef(Zeros, N % 16);
}
void writeBytes(const SmallVectorImpl<char> &ByteVec,
@@ -180,7 +190,7 @@ public:
assert(
(ZeroFillSize == 0 || Str.size() <= ZeroFillSize) &&
"data size greater than fill size, unexpected large write will occur");
- OS << Str;
+ *OS << Str;
if (ZeroFillSize)
WriteZeros(ZeroFillSize - Str.size());
}
diff --git a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 62d39b2..1bb6d21 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -47,7 +47,8 @@ public:
StringRef LexUntilEndOfStatement() override;
StringRef LexUntilEndOfLine();
- const AsmToken peekTok(bool ShouldSkipSpace = true) override;
+ size_t peekTokens(MutableArrayRef<AsmToken> Buf,
+ bool ShouldSkipSpace = true) override;
bool isAtStartOfComment(const char *Ptr);
bool isAtStatementSeparator(const char *Ptr);
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 71f15b3..55279f4 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -118,7 +118,7 @@ public:
/// lexers.
class MCAsmLexer {
/// The current token, stored in the base class for faster access.
- AsmToken CurTok;
+ SmallVector<AsmToken, 1> CurTok;
/// The location and description of the current error
SMLoc ErrLoc;
@@ -135,7 +135,7 @@ protected: // Can only create subclasses.
virtual AsmToken LexToken() = 0;
- void SetError(const SMLoc &errLoc, const std::string &err) {
+ void SetError(SMLoc errLoc, const std::string &err) {
ErrLoc = errLoc;
Err = err;
}
@@ -148,7 +148,15 @@ public:
/// The lexer will continuosly return the end-of-file token once the end of
/// the main input file has been reached.
const AsmToken &Lex() {
- return CurTok = LexToken();
+ assert(!CurTok.empty());
+ CurTok.erase(CurTok.begin());
+ if (CurTok.empty())
+ CurTok.emplace_back(LexToken());
+ return CurTok.front();
+ }
+
+ void UnLex(AsmToken const &Token) {
+ CurTok.insert(CurTok.begin(), Token);
}
virtual StringRef LexUntilEndOfStatement() = 0;
@@ -158,14 +166,28 @@ public:
/// Get the current (last) lexed token.
const AsmToken &getTok() const {
- return CurTok;
+ return CurTok[0];
}
/// Look ahead at the next token to be lexed.
- virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
+ const AsmToken peekTok(bool ShouldSkipSpace = true) {
+ AsmToken Tok;
+
+ MutableArrayRef<AsmToken> Buf(Tok);
+ size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
+
+ assert(ReadCount == 1);
+ (void)ReadCount;
+
+ return Tok;
+ }
+
+ /// Look ahead an arbitrary number of tokens.
+ virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
+ bool ShouldSkipSpace = true) = 0;
/// Get the current error location
- const SMLoc &getErrLoc() {
+ SMLoc getErrLoc() {
return ErrLoc;
}
@@ -175,13 +197,13 @@ public:
}
/// Get the kind of current token.
- AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
+ AsmToken::TokenKind getKind() const { return getTok().getKind(); }
/// Check if the current token has kind \p K.
- bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
+ bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
/// Check if the current token has kind \p K.
- bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+ bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
/// Set whether spaces should be ignored by the lexer
void setSkipSpace(bool val) { SkipSpace = val; }
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 077fd21..30b25dc 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -71,6 +71,9 @@ public:
bool Error(SMLoc L, const Twine &Msg) {
return getParser().Error(L, Msg);
}
+ void Note(SMLoc L, const Twine &Msg) {
+ getParser().Note(L, Msg);
+ }
bool TokError(const Twine &Msg) {
return getParser().TokError(Msg);
}
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/contrib/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index a25108a..a90d280 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -30,8 +30,16 @@ class MCParsedAsmOperand {
/// MS-style inline assembly.
std::string Constraint;
+protected:
+ // This only seems to need to be movable (by ARMOperand) but ARMOperand has
+ // lots of members and MSVC doesn't support defaulted move ops, so to avoid
+ // that verbosity, just rely on defaulted copy ops. It's only the Constraint
+ // string member that would benefit from movement anyway.
+ MCParsedAsmOperand(const MCParsedAsmOperand &RHS) = default;
+ MCParsedAsmOperand &operator=(const MCParsedAsmOperand &) = default;
+ MCParsedAsmOperand() = default;
+
public:
- MCParsedAsmOperand() {}
virtual ~MCParsedAsmOperand() {}
void setConstraint(StringRef C) { Constraint = C.str(); }
diff --git a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
index 8e25ee1..a4d5e08 100644
--- a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -632,7 +632,7 @@ private:
unsigned Reg;
const MCRegisterInfo *MCRI;
bool IncludeSelf;
-
+
MCRegUnitIterator RI;
MCRegUnitRootIterator RRI;
MCSuperRegIterator SI;
@@ -652,10 +652,8 @@ public:
}
}
- bool isValid() const {
- return RI.isValid();
- }
-
+ bool isValid() const { return RI.isValid(); }
+
unsigned operator*() const {
assert (SI.isValid() && "Cannot dereference an invalid iterator.");
return *SI;
diff --git a/contrib/llvm/include/llvm/MC/MCSchedule.h b/contrib/llvm/include/llvm/MC/MCSchedule.h
index c097916..d7f9b69 100644
--- a/contrib/llvm/include/llvm/MC/MCSchedule.h
+++ b/contrib/llvm/include/llvm/MC/MCSchedule.h
@@ -183,7 +183,7 @@ struct MCSchedModel {
// takes to recover from a branch misprediction.
unsigned MispredictPenalty;
static const unsigned DefaultMispredictPenalty = 10;
-
+
bool PostRAScheduler; // default value is false
bool CompleteModel;
@@ -206,6 +206,9 @@ struct MCSchedModel {
/// scheduling class (itinerary class or SchedRW list).
bool isComplete() const { return CompleteModel; }
+ /// Return true if machine supports out of order execution.
+ bool isOutOfOrder() const { return MicroOpBufferSize > 1; }
+
unsigned getNumProcResourceKinds() const {
return NumProcResourceKinds;
}
diff --git a/contrib/llvm/include/llvm/MC/MCSection.h b/contrib/llvm/include/llvm/MC/MCSection.h
index 2d0d4df..09a9892 100644
--- a/contrib/llvm/include/llvm/MC/MCSection.h
+++ b/contrib/llvm/include/llvm/MC/MCSection.h
@@ -18,12 +18,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
-class MCAssembler;
class MCAsmInfo;
+class MCAssembler;
class MCContext;
class MCExpr;
class MCFragment;
@@ -92,6 +93,8 @@ private:
unsigned IsRegistered : 1;
+ MCDummyFragment DummyFragment;
+
FragmentListType Fragments;
/// Mapping from subsection number to insertion point for subsection numbers
@@ -102,10 +105,9 @@ protected:
MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin);
SectionVariant Variant;
SectionKind Kind;
+ ~MCSection();
public:
- virtual ~MCSection();
-
SectionKind getKind() const { return Kind; }
SectionVariant getVariant() const { return Variant; }
@@ -152,6 +154,14 @@ public:
return const_cast<MCSection *>(this)->getFragmentList();
}
+ /// Support for MCFragment::getNextNode().
+ static FragmentListType MCSection::*getSublistAccess(MCFragment *) {
+ return &MCSection::Fragments;
+ }
+
+ const MCDummyFragment &getDummyFragment() const { return DummyFragment; }
+ MCDummyFragment &getDummyFragment() { return DummyFragment; }
+
MCSection::iterator begin();
MCSection::const_iterator begin() const {
return const_cast<MCSection *>(this)->begin();
diff --git a/contrib/llvm/include/llvm/MC/MCSectionCOFF.h b/contrib/llvm/include/llvm/MC/MCSectionCOFF.h
index 237f6d3..d94682c 100644
--- a/contrib/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/contrib/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -16,66 +16,63 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/Support/COFF.h"
namespace llvm {
class MCSymbol;
-/// MCSectionCOFF - This represents a section on Windows
- class MCSectionCOFF : public MCSection {
- // The memory for this string is stored in the same MCContext as *this.
- StringRef SectionName;
-
- // FIXME: The following fields should not be mutable, but are for now so
- // the asm parser can honor the .linkonce directive.
-
- /// Characteristics - This is the Characteristics field of a section,
- /// drawn from the enums below.
- mutable unsigned Characteristics;
-
- /// The COMDAT symbol of this section. Only valid if this is a COMDAT
- /// section. Two COMDAT sections are merged if they have the same
- /// COMDAT symbol.
- MCSymbol *COMDATSymbol;
-
- /// Selection - This is the Selection field for the section symbol, if
- /// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
- mutable int Selection;
-
- private:
- friend class MCContext;
- MCSectionCOFF(StringRef Section, unsigned Characteristics,
- MCSymbol *COMDATSymbol, int Selection, SectionKind K,
- MCSymbol *Begin)
- : MCSection(SV_COFF, K, Begin), SectionName(Section),
- Characteristics(Characteristics), COMDATSymbol(COMDATSymbol),
- Selection(Selection) {
- assert ((Characteristics & 0x00F00000) == 0 &&
- "alignment must not be set upon section creation");
- }
- ~MCSectionCOFF() override;
-
- public:
- /// ShouldOmitSectionDirective - Decides whether a '.section' directive
- /// should be printed before the section name
- bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
-
- StringRef getSectionName() const { return SectionName; }
- unsigned getCharacteristics() const { return Characteristics; }
- MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; }
- int getSelection() const { return Selection; }
-
- void setSelection(int Selection) const;
-
- void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS,
- const MCExpr *Subsection) const override;
- bool UseCodeAlign() const override;
- bool isVirtualSection() const override;
-
- static bool classof(const MCSection *S) {
- return S->getVariant() == SV_COFF;
- }
- };
+/// This represents a section on Windows
+class MCSectionCOFF final : public MCSection {
+ // The memory for this string is stored in the same MCContext as *this.
+ StringRef SectionName;
+
+ // FIXME: The following fields should not be mutable, but are for now so the
+ // asm parser can honor the .linkonce directive.
+
+ /// This is the Characteristics field of a section, drawn from the enums
+ /// below.
+ mutable unsigned Characteristics;
+
+ /// The COMDAT symbol of this section. Only valid if this is a COMDAT section.
+ /// Two COMDAT sections are merged if they have the same COMDAT symbol.
+ MCSymbol *COMDATSymbol;
+
+ /// This is the Selection field for the section symbol, if it is a COMDAT
+ /// section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
+ mutable int Selection;
+
+private:
+ friend class MCContext;
+ MCSectionCOFF(StringRef Section, unsigned Characteristics,
+ MCSymbol *COMDATSymbol, int Selection, SectionKind K,
+ MCSymbol *Begin)
+ : MCSection(SV_COFF, K, Begin), SectionName(Section),
+ Characteristics(Characteristics), COMDATSymbol(COMDATSymbol),
+ Selection(Selection) {
+ assert((Characteristics & 0x00F00000) == 0 &&
+ "alignment must not be set upon section creation");
+ }
+
+public:
+ ~MCSectionCOFF();
+
+ /// Decides whether a '.section' directive should be printed before the
+ /// section name
+ bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+
+ StringRef getSectionName() const { return SectionName; }
+ unsigned getCharacteristics() const { return Characteristics; }
+ MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; }
+ int getSelection() const { return Selection; }
+
+ void setSelection(int Selection) const;
+
+ void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS,
+ const MCExpr *Subsection) const override;
+ bool UseCodeAlign() const override;
+ bool isVirtualSection() const override;
+
+ static bool classof(const MCSection *S) { return S->getVariant() == SV_COFF; }
+};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCSectionELF.h b/contrib/llvm/include/llvm/MC/MCSectionELF.h
index f673037..b3bb3ad 100644
--- a/contrib/llvm/include/llvm/MC/MCSectionELF.h
+++ b/contrib/llvm/include/llvm/MC/MCSectionELF.h
@@ -25,25 +25,24 @@ namespace llvm {
class MCSymbol;
-/// MCSectionELF - This represents a section on linux, lots of unix variants
-/// and some bare metal systems.
-class MCSectionELF : public MCSection {
- /// SectionName - This is the name of the section. The referenced memory is
- /// owned by TargetLoweringObjectFileELF's ELFUniqueMap.
+/// This represents a section on linux, lots of unix variants and some bare
+/// metal systems.
+class MCSectionELF final : public MCSection {
+ /// This is the name of the section. The referenced memory is owned by
+ /// TargetLoweringObjectFileELF's ELFUniqueMap.
StringRef SectionName;
- /// Type - This is the sh_type field of a section, drawn from the enums below.
+ /// This is the sh_type field of a section, drawn from the enums below.
unsigned Type;
- /// Flags - This is the sh_flags field of a section, drawn from the enums.
- /// below.
+ /// This is the sh_flags field of a section, drawn from the enums below.
unsigned Flags;
unsigned UniqueID;
- /// EntrySize - The size of each entry in this section. This size only
- /// makes sense for sections that contain fixed-sized entries. If a
- /// section does not contain fixed-sized entries 'EntrySize' will be 0.
+ /// The size of each entry in this section. This size only makes sense for
+ /// sections that contain fixed-sized entries. If a section does not contain
+ /// fixed-sized entries 'EntrySize' will be 0.
unsigned EntrySize;
const MCSymbolELF *Group;
@@ -62,14 +61,14 @@ private:
if (Group)
Group->setIsSignature();
}
- ~MCSectionELF() override;
void setSectionName(StringRef Name) { SectionName = Name; }
public:
+ ~MCSectionELF();
- /// ShouldOmitSectionDirective - Decides whether a '.section' directive
- /// should be printed before the section name
+ /// Decides whether a '.section' directive should be printed before the
+ /// section name
bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
StringRef getSectionName() const { return SectionName; }
diff --git a/contrib/llvm/include/llvm/MC/MCSectionMachO.h b/contrib/llvm/include/llvm/MC/MCSectionMachO.h
index 9722751..658dfcd 100644
--- a/contrib/llvm/include/llvm/MC/MCSectionMachO.h
+++ b/contrib/llvm/include/llvm/MC/MCSectionMachO.h
@@ -20,19 +20,18 @@
namespace llvm {
-/// MCSectionMachO - This represents a section on a Mach-O system (used by
-/// Mac OS X). On a Mac system, these are also described in
-/// /usr/include/mach-o/loader.h.
-class MCSectionMachO : public MCSection {
+/// This represents a section on a Mach-O system (used by Mac OS X). On a Mac
+/// system, these are also described in /usr/include/mach-o/loader.h.
+class MCSectionMachO final : public MCSection {
char SegmentName[16]; // Not necessarily null terminated!
char SectionName[16]; // Not necessarily null terminated!
- /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES
- /// field of a section, drawn from the enums below.
+ /// This is the SECTION_TYPE and SECTION_ATTRIBUTES field of a section, drawn
+ /// from the enums below.
unsigned TypeAndAttributes;
- /// Reserved2 - The 'reserved2' field of a section, used to represent the
- /// size of stubs, for example.
+ /// The 'reserved2' field of a section, used to represent the size of stubs,
+ /// for example.
unsigned Reserved2;
MCSectionMachO(StringRef Segment, StringRef Section, unsigned TAA,
@@ -64,12 +63,12 @@ public:
return (TypeAndAttributes & Value) != 0;
}
- /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
- /// This is a string that can appear after a .section directive in a mach-o
- /// flavored .s file. If successful, this fills in the specified Out
- /// parameters and returns an empty string. When an invalid section
- /// specifier is present, this returns a string indicating the problem.
- /// If no TAA was parsed, TAA is not altered, and TAAWasSet becomes false.
+ /// Parse the section specifier indicated by "Spec". This is a string that can
+ /// appear after a .section directive in a mach-o flavored .s file. If
+ /// successful, this fills in the specified Out parameters and returns an
+ /// empty string. When an invalid section specifier is present, this returns
+ /// a string indicating the problem. If no TAA was parsed, TAA is not altered,
+ /// and TAAWasSet becomes false.
static std::string ParseSectionSpecifier(StringRef Spec, // In.
StringRef &Segment, // Out.
StringRef &Section, // Out.
diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h
index 6b9b8a1..494f02d 100644
--- a/contrib/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCStreamer.h
@@ -19,6 +19,7 @@
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWinEH.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
@@ -33,7 +34,6 @@ class MCInst;
class MCInstPrinter;
class MCSection;
class MCStreamer;
-class MCSymbol;
class MCSymbolELF;
class MCSymbolRefExpr;
class MCSubtargetInfo;
@@ -134,7 +134,7 @@ public:
/// Callback used to implement the ldr= pseudo.
/// Add a new entry to the constant pool for the current section and return an
/// MCExpr that can be used to refer to the constant pool location.
- const MCExpr *addConstantPoolEntry(const MCExpr *);
+ const MCExpr *addConstantPoolEntry(const MCExpr *, SMLoc Loc);
/// Callback used to implemnt the .ltorg directive.
/// Emit contents of constant pool for the current section.
@@ -358,7 +358,7 @@ public:
///
/// Each emitted symbol will be tracked in the ordering table,
/// so we can sort on them later.
- void AssignSection(MCSymbol *Symbol, MCSection *Section);
+ void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment);
/// \brief Emit a label for \p Symbol into the current section.
///
@@ -522,10 +522,9 @@ public:
/// match a native machine width.
/// \param Loc - The location of the expression for error reporting.
virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc = SMLoc());
+ SMLoc Loc = SMLoc());
- void EmitValue(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc = SMLoc());
+ void EmitValue(const MCExpr *Value, unsigned Size, SMLoc Loc = SMLoc());
/// \brief Special case of EmitValue that avoids the client having
/// to pass in a MCExpr for constant integers.
@@ -568,7 +567,7 @@ public:
/// \brief Emit NumBytes worth of zeros.
/// This function properly handles data in virtual sections.
- virtual void EmitZeros(uint64_t NumBytes);
+ void EmitZeros(uint64_t NumBytes);
/// \brief Emit some number of copies of \p Value until the byte alignment \p
/// ByteAlignment is reached.
@@ -612,9 +611,7 @@ public:
/// \param Offset - The offset to reach. This may be an expression, but the
/// expression must be associated with the current section.
/// \param Value - The value to use when filling bytes.
- /// \return false on success, true if the offset was invalid.
- virtual bool EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0);
+ virtual void emitValueToOffset(const MCExpr *Offset, unsigned char Value = 0);
/// @}
@@ -662,6 +659,7 @@ public:
virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
virtual void EmitCFIEscape(StringRef Values);
+ virtual void EmitCFIGnuArgsSize(int64_t Size);
virtual void EmitCFISignalFrame();
virtual void EmitCFIUndefined(int64_t Register);
virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
@@ -682,6 +680,16 @@ public:
virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except);
virtual void EmitWinEHHandlerData();
+ virtual void EmitSyntaxDirective();
+
+ /// \brief Emit a .reloc directive.
+ /// Returns true if the relocation could not be emitted because Name is not
+ /// known.
+ virtual bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc Loc) {
+ return true;
+ }
+
/// \brief Emit the given \p Instruction into the current section.
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
@@ -704,9 +712,6 @@ public:
/// the hasRawTextSupport() predicate. By default this aborts.
void EmitRawText(const Twine &String);
- /// \brief Causes any cached state to be written out.
- virtual void Flush() {}
-
/// \brief Streamer specific finalization.
virtual void FinishImpl();
/// \brief Finish emission of machine code.
diff --git a/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h b/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h
index d5ad4ee..446feef 100644
--- a/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -86,8 +86,9 @@ protected:
void InitMCProcessorInfo(StringRef CPU, StringRef FS);
public:
- /// Set the features to the default for the given CPU.
- void setDefaultFeatures(StringRef CPU);
+ /// Set the features to the default for the given CPU with an appended feature
+ /// string.
+ void setDefaultFeatures(StringRef CPU, StringRef FS);
/// ToggleFeature - Toggle a feature and returns the re-computed feature
/// bits. This version does not change the implied bits.
@@ -159,11 +160,8 @@ public:
/// Check whether the CPU string is valid.
bool isCPUStringValid(StringRef CPU) const {
- auto Found = std::find_if(ProcDesc.begin(), ProcDesc.end(),
- [=](const SubtargetFeatureKV &KV) {
- return CPU == KV.Key;
- });
- return Found != ProcDesc.end();
+ auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
+ return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
};
diff --git a/contrib/llvm/include/llvm/MC/MCSymbol.h b/contrib/llvm/include/llvm/MC/MCSymbol.h
index b2910df..c51ecfc 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbol.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbol.h
@@ -56,19 +56,17 @@ protected:
SymContentsCommon,
};
- // Special sentinal value for the absolute pseudo section.
- //
- // FIXME: Use a PointerInt wrapper for this?
- static MCSection *AbsolutePseudoSection;
+ // Special sentinal value for the absolute pseudo fragment.
+ static MCFragment *AbsolutePseudoFragment;
/// If a symbol has a Fragment, the section is implied, so we only need
/// one pointer.
+ /// The special AbsolutePseudoFragment value is for absolute symbols.
+ /// If this is a variable symbol, this caches the variable value's fragment.
/// FIXME: We might be able to simplify this by having the asm streamer create
/// dummy fragments.
/// If this is a section, then it gives the symbol is defined in. This is null
- /// for undefined symbols, and the special AbsolutePseudoSection value for
- /// absolute symbols. If this is a variable symbol, this caches the variable
- /// value's section.
+ /// for undefined symbols.
///
/// If this is a fragment, then it gives the fragment this symbol's value is
/// relative to, if any.
@@ -76,8 +74,7 @@ protected:
/// For the 'HasName' integer, this is true if this symbol is named.
/// A named symbol will have a pointer to the name allocated in the bytes
/// immediately prior to the MCSymbol.
- mutable PointerIntPair<PointerUnion<MCSection *, MCFragment *>, 1>
- SectionOrFragmentAndHasName;
+ mutable PointerIntPair<MCFragment *, 1> FragmentAndHasName;
/// IsTemporary - True if this is an assembler temporary label, which
/// typically does not survive in the .o file's symbol table. Usually
@@ -155,7 +152,7 @@ protected: // MCContext creates and uniques these.
Kind(Kind), IsUsedInReloc(false), SymbolContents(SymContentsUnset),
CommonAlignLog2(0), Flags(0) {
Offset = 0;
- SectionOrFragmentAndHasName.setInt(!!Name);
+ FragmentAndHasName.setInt(!!Name);
if (Name)
getNameEntryPtr() = Name;
}
@@ -179,20 +176,17 @@ private:
MCSymbol(const MCSymbol &) = delete;
void operator=(const MCSymbol &) = delete;
- MCSection *getSectionPtr() const {
- if (MCFragment *F = getFragment())
+ MCSection *getSectionPtr(bool SetUsed = true) const {
+ if (MCFragment *F = getFragment(SetUsed)) {
+ assert(F != AbsolutePseudoFragment);
return F->getParent();
- const auto &SectionOrFragment = SectionOrFragmentAndHasName.getPointer();
- assert(!SectionOrFragment.is<MCFragment *>() && "Section or null expected");
- MCSection *Section = SectionOrFragment.dyn_cast<MCSection *>();
- if (Section || !isVariable())
- return Section;
- return Section = getVariableValue()->findAssociatedSection();
+ }
+ return nullptr;
}
/// \brief Get a reference to the name field. Requires that we have a name
const StringMapEntry<bool> *&getNameEntryPtr() {
- assert(SectionOrFragmentAndHasName.getInt() && "Name is required");
+ assert(FragmentAndHasName.getInt() && "Name is required");
NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
return (*(Name - 1)).NameEntry;
}
@@ -203,7 +197,7 @@ private:
public:
/// getName - Get the symbol name.
StringRef getName() const {
- if (!SectionOrFragmentAndHasName.getInt())
+ if (!FragmentAndHasName.getInt())
return StringRef();
return getNameEntryPtr()->first();
@@ -223,7 +217,7 @@ public:
/// isUsed - Check if this is used.
bool isUsed() const { return IsUsed; }
- void setUsed(bool Value) const { IsUsed = Value; }
+ void setUsed(bool Value) const { IsUsed |= Value; }
/// \brief Check if this symbol is redefinable.
bool isRedefinable() const { return IsRedefinable; }
@@ -248,37 +242,38 @@ public:
/// isDefined - Check if this symbol is defined (i.e., it has an address).
///
/// Defined symbols are either absolute or in some section.
- bool isDefined() const { return getSectionPtr() != nullptr; }
+ bool isDefined(bool SetUsed = true) const {
+ return getFragment(SetUsed) != nullptr;
+ }
/// isInSection - Check if this symbol is defined in some section (i.e., it
/// is defined but not absolute).
- bool isInSection() const { return isDefined() && !isAbsolute(); }
+ bool isInSection(bool SetUsed = true) const {
+ return isDefined(SetUsed) && !isAbsolute(SetUsed);
+ }
/// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
- bool isUndefined() const { return !isDefined(); }
+ bool isUndefined(bool SetUsed = true) const { return !isDefined(SetUsed); }
/// isAbsolute - Check if this is an absolute symbol.
- bool isAbsolute() const { return getSectionPtr() == AbsolutePseudoSection; }
+ bool isAbsolute(bool SetUsed = true) const {
+ return getFragment(SetUsed) == AbsolutePseudoFragment;
+ }
/// Get the section associated with a defined, non-absolute symbol.
- MCSection &getSection() const {
- assert(isInSection() && "Invalid accessor!");
- return *getSectionPtr();
+ MCSection &getSection(bool SetUsed = true) const {
+ assert(isInSection(SetUsed) && "Invalid accessor!");
+ return *getSectionPtr(SetUsed);
}
- /// Mark the symbol as defined in the section \p S.
- void setSection(MCSection &S) {
- assert(!isVariable() && "Cannot set section of variable");
- assert(!SectionOrFragmentAndHasName.getPointer().is<MCFragment *>() &&
- "Section or null expected");
- SectionOrFragmentAndHasName.setPointer(&S);
+ /// Mark the symbol as defined in the fragment \p F.
+ void setFragment(MCFragment *F) const {
+ assert(!isVariable() && "Cannot set fragment of variable");
+ FragmentAndHasName.setPointer(F);
}
/// Mark the symbol as undefined.
- void setUndefined() {
- SectionOrFragmentAndHasName.setPointer(
- PointerUnion<MCSection *, MCFragment *>());
- }
+ void setUndefined() { FragmentAndHasName.setPointer(nullptr); }
bool isELF() const { return Kind == SymbolKindELF; }
@@ -295,10 +290,10 @@ public:
return SymbolContents == SymContentsVariable;
}
- /// getVariableValue() - Get the value for variable symbols.
- const MCExpr *getVariableValue() const {
+ /// getVariableValue - Get the value for variable symbols.
+ const MCExpr *getVariableValue(bool SetUsed = true) const {
assert(isVariable() && "Invalid accessor!");
- IsUsed = true;
+ IsUsed |= SetUsed;
return Value;
}
@@ -379,11 +374,13 @@ public:
return SymbolContents == SymContentsCommon;
}
- MCFragment *getFragment() const {
- return SectionOrFragmentAndHasName.getPointer().dyn_cast<MCFragment *>();
- }
- void setFragment(MCFragment *Value) const {
- SectionOrFragmentAndHasName.setPointer(Value);
+ MCFragment *getFragment(bool SetUsed = true) const {
+ MCFragment *Fragment = FragmentAndHasName.getPointer();
+ if (Fragment || !isVariable())
+ return Fragment;
+ Fragment = getVariableValue(SetUsed)->findAssociatedFragment();
+ FragmentAndHasName.setPointer(Fragment);
+ return Fragment;
}
bool isExternal() const { return IsExternal; }
diff --git a/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h b/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h
index 36db391..03b2dc9 100644
--- a/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h
@@ -20,6 +20,7 @@ class AsmToken;
class MCInst;
class MCParsedAsmOperand;
class MCStreamer;
+class MCSubtargetInfo;
class SMLoc;
class StringRef;
template <typename T> class SmallVectorImpl;
@@ -29,6 +30,7 @@ typedef SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> OperandVector;
enum AsmRewriteKind {
AOK_Delete = 0, // Rewrite should be ignored.
AOK_Align, // Rewrite align as .align.
+ AOK_EVEN, // Rewrite even as .even.
AOK_DotOperator, // Rewrite a dot operator expression as an immediate.
// E.g., [eax].foo.bar -> [eax].8
AOK_Emit, // Rewrite _emit as .byte.
@@ -44,6 +46,7 @@ enum AsmRewriteKind {
const char AsmRewritePrecedence [] = {
0, // AOK_Delete
2, // AOK_Align
+ 2, // AOK_EVEN
2, // AOK_DotOperator
2, // AOK_Emit
4, // AOK_Imm
@@ -92,7 +95,10 @@ private:
MCTargetAsmParser(const MCTargetAsmParser &) = delete;
void operator=(const MCTargetAsmParser &) = delete;
protected: // Can only create subclasses.
- MCTargetAsmParser();
+ MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI);
+
+ /// Create a copy of STI and return a non-const reference to it.
+ MCSubtargetInfo &copySTI();
/// AvailableFeatures - The current set of available features.
uint64_t AvailableFeatures;
@@ -107,9 +113,14 @@ protected: // Can only create subclasses.
/// Set of options which affects instrumentation of inline assembly.
MCTargetOptions MCOptions;
+ /// Current STI.
+ const MCSubtargetInfo *STI;
+
public:
~MCTargetAsmParser() override;
+ const MCSubtargetInfo &getSTI() const;
+
uint64_t getAvailableFeatures() const { return AvailableFeatures; }
void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
@@ -143,6 +154,10 @@ public:
/// \return True on failure.
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) = 0;
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ AsmToken Token, OperandVector &Operands) {
+ return ParseInstruction(Info, Name, Token.getLoc(), Operands);
+ }
/// ParseDirective - Parse a target specific assembler directive
///
@@ -156,10 +171,6 @@ public:
/// \param DirectiveID - the identifier token of the directive.
virtual bool ParseDirective(AsmToken DirectiveID) = 0;
- /// mnemonicIsValid - This returns true if this is a valid mnemonic and false
- /// otherwise.
- virtual bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) = 0;
-
/// MatchAndEmitInstruction - Recognize a series of operands of a parsed
/// instruction as an actual MCInst and emit it to the specified MCStreamer.
/// This returns false on success and returns true on failure to match.
@@ -192,13 +203,18 @@ public:
virtual void convertToMapAndConstraints(unsigned Kind,
const OperandVector &Operands) = 0;
+ // Return whether this parser uses assignment statements with equals tokens
+ virtual bool equalIsAsmAssignment() { return true; };
+ // Return whether this start of statement identifier is a label
+ virtual bool isLabel(AsmToken &Token) { return true; };
+
virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind,
MCContext &Ctx) {
return nullptr;
}
- virtual void onLabelParsed(MCSymbol *Symbol) { };
+ virtual void onLabelParsed(MCSymbol *Symbol) { }
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/MC/MCTargetOptions.h b/contrib/llvm/include/llvm/MC/MCTargetOptions.h
index 7f4f23e..4b66a75 100644
--- a/contrib/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/contrib/llvm/include/llvm/MC/MCTargetOptions.h
@@ -29,8 +29,10 @@ public:
bool MCRelaxAll : 1;
bool MCNoExecStack : 1;
bool MCFatalWarnings : 1;
+ bool MCNoWarn : 1;
bool MCSaveTempLabels : 1;
bool MCUseDwarfDirectory : 1;
+ bool MCIncrementalLinkerCompatible : 1;
bool ShowMCEncoding : 1;
bool ShowMCInst : 1;
bool AsmVerbose : 1;
@@ -49,8 +51,10 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
ARE_EQUAL(MCRelaxAll) &&
ARE_EQUAL(MCNoExecStack) &&
ARE_EQUAL(MCFatalWarnings) &&
+ ARE_EQUAL(MCNoWarn) &&
ARE_EQUAL(MCSaveTempLabels) &&
ARE_EQUAL(MCUseDwarfDirectory) &&
+ ARE_EQUAL(MCIncrementalLinkerCompatible) &&
ARE_EQUAL(ShowMCEncoding) &&
ARE_EQUAL(ShowMCInst) &&
ARE_EQUAL(AsmVerbose) &&
diff --git a/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index af23a92..5180208 100644
--- a/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -33,6 +33,12 @@ cl::opt<bool> RelaxAll("mc-relax-all",
cl::desc("When used with filetype=obj, "
"relax all fixups in the emitted object file"));
+cl::opt<bool> IncrementalLinkerCompatible(
+ "incremental-linker-compatible",
+ cl::desc(
+ "When used with filetype=obj, "
+ "emit an object file which can be used with an incremental linker"));
+
cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
cl::init(0));
@@ -40,6 +46,12 @@ cl::opt<bool> ShowMCInst("asm-show-inst",
cl::desc("Emit internal instruction representation to "
"assembly file"));
+cl::opt<bool> FatalWarnings("fatal-warnings",
+ cl::desc("Treat warnings as errors"));
+
+cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings"));
+cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn));
+
cl::opt<std::string>
ABIName("target-abi", cl::Hidden,
cl::desc("The name of the ABI to be targeted from the backend."),
@@ -50,9 +62,12 @@ static inline MCTargetOptions InitMCTargetOptionsFromFlags() {
Options.SanitizeAddress =
(AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
Options.MCRelaxAll = RelaxAll;
+ Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
Options.DwarfVersion = DwarfVersion;
Options.ShowMCInst = ShowMCInst;
Options.ABIName = ABIName;
+ Options.MCFatalWarnings = FatalWarnings;
+ Options.MCNoWarn = NoWarn;
return Options;
}
diff --git a/contrib/llvm/include/llvm/MC/MCValue.h b/contrib/llvm/include/llvm/MC/MCValue.h
index 6bdf436..ead08fd 100644
--- a/contrib/llvm/include/llvm/MC/MCValue.h
+++ b/contrib/llvm/include/llvm/MC/MCValue.h
@@ -35,10 +35,6 @@ class raw_ostream;
/// relocation modifiers apply to the closest symbol or the whole
/// expression.
///
-/// In the general form, SymbolB can only be defined if SymbolA is, and both
-/// must be in the same (non-external) section. The latter constraint is not
-/// enforced, since a symbol's section may not be known at construction.
-///
/// Note that this class must remain a simple POD value class, because we need
/// it to live in unions etc.
class MCValue {
@@ -67,7 +63,6 @@ public:
const MCSymbolRefExpr *SymB = nullptr,
int64_t Val = 0, uint32_t RefKind = 0) {
MCValue R;
- assert((!SymB || SymA) && "Invalid relocatable MCValue!");
R.Cst = Val;
R.SymA = SymA;
R.SymB = SymB;
diff --git a/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 6fbc754..fe1ada9 100644
--- a/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -73,7 +73,7 @@ protected:
void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) override;
private:
- LLVM_ATTRIBUTE_NORETURN void FatalError(const Twine &Msg) const;
+ void Error(const Twine &Msg) const;
};
}
diff --git a/contrib/llvm/include/llvm/MC/MachineLocation.h b/contrib/llvm/include/llvm/MC/MachineLocation.h
index 2a18615..4b5cf43 100644
--- a/contrib/llvm/include/llvm/MC/MachineLocation.h
+++ b/contrib/llvm/include/llvm/MC/MachineLocation.h
@@ -68,10 +68,6 @@ public:
Register = R;
Offset = O;
}
-
-#ifndef NDEBUG
- void dump();
-#endif
};
inline bool operator!=(const MachineLocation &LHS, const MachineLocation &RHS) {
diff --git a/contrib/llvm/include/llvm/MC/SectionKind.h b/contrib/llvm/include/llvm/MC/SectionKind.h
index 9e8b68f..b09b93c 100644
--- a/contrib/llvm/include/llvm/MC/SectionKind.h
+++ b/contrib/llvm/include/llvm/MC/SectionKind.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
+//===-- llvm/MC/SectionKind.h - Classification of sections ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,11 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements classes used to handle lowerings specific to common
-// object file formats.
-//
-//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_SECTIONKIND_H
#define LLVM_MC_SECTIONKIND_H
@@ -99,21 +94,8 @@ class SectionKind {
/// marked 'constant'.
Common,
- /// DataRel - This is the most general form of data that is written
- /// to by the program, it can have random relocations to arbitrary
- /// globals.
- DataRel,
-
- /// DataRelLocal - This is writeable data that has a non-zero
- /// initializer and has relocations in it, but all of the
- /// relocations are known to be within the final linked image
- /// the global is linked into.
- DataRelLocal,
-
- /// DataNoRel - This is writeable data that has a non-zero
- /// initializer, but whose initializer is known to have no
- /// relocations.
- DataNoRel,
+ /// This is writeable data that has a non-zero initializer.
+ Data,
/// ReadOnlyWithRel - These are global variables that are never
/// written to by the program, but that have relocations, so they
@@ -121,15 +103,7 @@ class SectionKind {
/// can write to them. If it chooses to, the dynamic linker can
/// mark the pages these globals end up on as read-only after it is
/// done with its relocation phase.
- ReadOnlyWithRel,
-
- /// ReadOnlyWithRelLocal - This is data that is readonly by the
- /// program, but must be writeable so that the dynamic linker
- /// can perform relocations in it. This is used when we know
- /// that all the relocations are to globals in this final
- /// linked image.
- ReadOnlyWithRelLocal
-
+ ReadOnlyWithRel
} K : 8;
public:
@@ -169,7 +143,7 @@ public:
bool isThreadData() const { return K == ThreadData; }
bool isGlobalWriteableData() const {
- return isBSS() || isCommon() || isDataRel() || isReadOnlyWithRel();
+ return isBSS() || isCommon() || isData() || isReadOnlyWithRel();
}
bool isBSS() const { return K == BSS || K == BSSLocal || K == BSSExtern; }
@@ -178,22 +152,10 @@ public:
bool isCommon() const { return K == Common; }
- bool isDataRel() const {
- return K == DataRel || K == DataRelLocal || K == DataNoRel;
- }
-
- bool isDataRelLocal() const {
- return K == DataRelLocal || K == DataNoRel;
- }
-
- bool isDataNoRel() const { return K == DataNoRel; }
+ bool isData() const { return K == Data; }
bool isReadOnlyWithRel() const {
- return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal;
- }
-
- bool isReadOnlyWithRelLocal() const {
- return K == ReadOnlyWithRelLocal;
+ return K == ReadOnlyWithRel;
}
private:
static SectionKind get(Kind K) {
@@ -224,13 +186,8 @@ public:
static SectionKind getBSSLocal() { return get(BSSLocal); }
static SectionKind getBSSExtern() { return get(BSSExtern); }
static SectionKind getCommon() { return get(Common); }
- static SectionKind getDataRel() { return get(DataRel); }
- static SectionKind getDataRelLocal() { return get(DataRelLocal); }
- static SectionKind getDataNoRel() { return get(DataNoRel); }
+ static SectionKind getData() { return get(Data); }
static SectionKind getReadOnlyWithRel() { return get(ReadOnlyWithRel); }
- static SectionKind getReadOnlyWithRelLocal(){
- return get(ReadOnlyWithRelLocal);
- }
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/StringTableBuilder.h b/contrib/llvm/include/llvm/MC/StringTableBuilder.h
index 897d449..adde86b 100644
--- a/contrib/llvm/include/llvm/MC/StringTableBuilder.h
+++ b/contrib/llvm/include/llvm/MC/StringTableBuilder.h
@@ -11,53 +11,51 @@
#define LLVM_MC_STRINGTABLEBUILDER_H
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseMap.h"
#include <cassert>
namespace llvm {
/// \brief Utility for building string tables with deduplicated suffixes.
class StringTableBuilder {
+public:
+ enum Kind { ELF, WinCOFF, MachO, RAW };
+
+private:
SmallString<256> StringTable;
- StringMap<size_t> StringIndexMap;
+ DenseMap<StringRef, size_t> StringIndexMap;
+ size_t Size = 0;
+ Kind K;
public:
- /// \brief Add a string to the builder. Returns a StringRef to the internal
- /// copy of s. Can only be used before the table is finalized.
- StringRef add(StringRef s) {
- assert(!isFinalized());
- return StringIndexMap.insert(std::make_pair(s, 0)).first->first();
- }
+ StringTableBuilder(Kind K);
- enum Kind {
- ELF,
- WinCOFF,
- MachO
- };
+ /// \brief Add a string to the builder. Returns the position of S in the
+ /// table. The position will be changed if finalize is used.
+ /// Can only be used before the table is finalized.
+ size_t add(StringRef S);
/// \brief Analyze the strings and build the final table. No more strings can
/// be added after this point.
- void finalize(Kind kind);
+ void finalize();
/// \brief Retrieve the string table data. Can only be used after the table
/// is finalized.
- StringRef data() {
+ StringRef data() const {
assert(isFinalized());
return StringTable;
}
/// \brief Get the offest of a string in the string table. Can only be used
/// after the table is finalized.
- size_t getOffset(StringRef s) {
- assert(isFinalized());
- assert(StringIndexMap.count(s) && "String is not in table!");
- return StringIndexMap[s];
- }
+ size_t getOffset(StringRef S) const;
+ const DenseMap<StringRef, size_t> &getMap() const { return StringIndexMap; }
+ size_t getSize() const { return Size; }
void clear();
private:
- bool isFinalized() {
+ bool isFinalized() const {
return !StringTable.empty();
}
};
diff --git a/contrib/llvm/include/llvm/MC/SubtargetFeature.h b/contrib/llvm/include/llvm/MC/SubtargetFeature.h
index 2fb9b4a..0d97b22 100644
--- a/contrib/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/contrib/llvm/include/llvm/MC/SubtargetFeature.h
@@ -30,7 +30,7 @@ namespace llvm {
// A container class for subtarget features.
// This is convenient because std::bitset does not have a constructor
// with an initializer list of set bits.
-const unsigned MAX_SUBTARGET_FEATURES = 64;
+const unsigned MAX_SUBTARGET_FEATURES = 128;
class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
public:
// Cannot inherit constructors because it's not supported by VC++..
diff --git a/contrib/llvm/include/llvm/Object/Archive.h b/contrib/llvm/include/llvm/Object/Archive.h
index 597f0d4..8dd042a 100644
--- a/contrib/llvm/include/llvm/Object/Archive.h
+++ b/contrib/llvm/include/llvm/Object/Archive.h
@@ -37,7 +37,7 @@ struct ArchiveMemberHeader {
llvm::StringRef getName() const;
/// Members are not larger than 4GB.
- uint32_t getSize() const;
+ ErrorOr<uint32_t> getSize() const;
sys::fs::perms getAccessMode() const;
sys::TimeValue getLastModified() const;
@@ -52,6 +52,7 @@ class Archive : public Binary {
virtual void anchor();
public:
class Child {
+ friend Archive;
const Archive *Parent;
/// \brief Includes header but not padding byte.
StringRef Data;
@@ -62,19 +63,19 @@ public:
return reinterpret_cast<const ArchiveMemberHeader *>(Data.data());
}
+ bool isThinMember() const;
+
public:
- Child(const Archive *Parent, const char *Start);
+ Child(const Archive *Parent, const char *Start, std::error_code *EC);
+ Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
bool operator ==(const Child &other) const {
assert(Parent == other.Parent);
return Data.begin() == other.Data.begin();
}
- bool operator <(const Child &other) const {
- return Data.begin() < other.Data.begin();
- }
-
- Child getNext() const;
+ const Archive *getParent() const { return Parent; }
+ ErrorOr<Child> getNext() const;
ErrorOr<StringRef> getName() const;
StringRef getRawName() const { return getHeader()->getName(); }
@@ -90,9 +91,9 @@ public:
return getHeader()->getAccessMode();
}
/// \return the size of the archive member without the header or padding.
- uint64_t getSize() const;
+ ErrorOr<uint64_t> getSize() const;
/// \return the size in the archive header for this member.
- uint64_t getRawSize() const;
+ ErrorOr<uint64_t> getRawSize() const;
ErrorOr<StringRef> getBuffer() const;
uint64_t getChildOffset() const;
@@ -104,28 +105,32 @@ public:
};
class child_iterator {
- Child child;
+ ErrorOr<Child> child;
public:
- child_iterator() : child(Child(nullptr, nullptr)) {}
+ child_iterator() : child(Child(nullptr, nullptr, nullptr)) {}
child_iterator(const Child &c) : child(c) {}
- const Child *operator->() const { return &child; }
- const Child &operator*() const { return child; }
+ child_iterator(std::error_code EC) : child(EC) {}
+ const ErrorOr<Child> *operator->() const { return &child; }
+ const ErrorOr<Child> &operator*() const { return child; }
bool operator==(const child_iterator &other) const {
- return child == other.child;
+ // We ignore error states so that comparisions with end() work, which
+ // allows range loops.
+ if (child.getError() || other.child.getError())
+ return false;
+ return *child == *other.child;
}
bool operator!=(const child_iterator &other) const {
return !(*this == other);
}
- bool operator<(const child_iterator &other) const {
- return child < other.child;
- }
-
+ // Code in loops with child_iterators must check for errors on each loop
+ // iteration. And if there is an error break out of the loop.
child_iterator &operator++() { // Preincrement
- child = child.getNext();
+ assert(child && "Can't increment iterator with error");
+ child = child->getNext();
return *this;
}
};
@@ -145,7 +150,7 @@ public:
, SymbolIndex(symi)
, StringIndex(stri) {}
StringRef getName() const;
- ErrorOr<child_iterator> getMember() const;
+ ErrorOr<Child> getMember() const;
Symbol getNext() const;
};
@@ -186,14 +191,13 @@ public:
child_iterator child_begin(bool SkipInternal = true) const;
child_iterator child_end() const;
iterator_range<child_iterator> children(bool SkipInternal = true) const {
- return iterator_range<child_iterator>(child_begin(SkipInternal),
- child_end());
+ return make_range(child_begin(SkipInternal), child_end());
}
symbol_iterator symbol_begin() const;
symbol_iterator symbol_end() const;
iterator_range<symbol_iterator> symbols() const {
- return iterator_range<symbol_iterator>(symbol_begin(), symbol_end());
+ return make_range(symbol_begin(), symbol_end());
}
// Cast methods.
@@ -205,18 +209,17 @@ public:
child_iterator findSym(StringRef name) const;
bool hasSymbolTable() const;
- child_iterator getSymbolTableChild() const { return SymbolTable; }
- StringRef getSymbolTable() const {
- // We know that the symbol table is not an external file,
- // so we just assert there is no error.
- return *SymbolTable->getBuffer();
- }
+ StringRef getSymbolTable() const { return SymbolTable; }
uint32_t getNumberOfSymbols() const;
private:
- child_iterator SymbolTable;
- child_iterator StringTable;
- child_iterator FirstRegular;
+ StringRef SymbolTable;
+ StringRef StringTable;
+
+ StringRef FirstRegularData;
+ uint16_t FirstRegularStartOfFile = -1;
+ void setFirstRegular(const Child &C);
+
unsigned Format : 2;
unsigned IsThin : 1;
mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
diff --git a/contrib/llvm/include/llvm/Object/ArchiveWriter.h b/contrib/llvm/include/llvm/Object/ArchiveWriter.h
index 3648d0c..b5d2ba3 100644
--- a/contrib/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/contrib/llvm/include/llvm/Object/ArchiveWriter.h
@@ -24,17 +24,15 @@ class NewArchiveIterator {
bool IsNewMember;
StringRef Name;
- object::Archive::child_iterator OldI;
-
- StringRef NewFilename;
+ object::Archive::Child OldMember;
public:
- NewArchiveIterator(object::Archive::child_iterator I, StringRef Name);
- NewArchiveIterator(StringRef I, StringRef Name);
+ NewArchiveIterator(const object::Archive::Child &OldMember, StringRef Name);
+ NewArchiveIterator(StringRef FileName);
bool isNewMember() const;
StringRef getName() const;
- object::Archive::child_iterator getOld() const;
+ const object::Archive::Child &getOld() const;
StringRef getNew() const;
llvm::ErrorOr<int> getFD(sys::fs::file_status &NewStatus) const;
@@ -43,7 +41,8 @@ public:
std::pair<StringRef, std::error_code>
writeArchive(StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic);
+ bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic,
+ bool Thin);
}
#endif
diff --git a/contrib/llvm/include/llvm/Object/Binary.h b/contrib/llvm/include/llvm/Object/Binary.h
index a3d6d0d..a0d1127 100644
--- a/contrib/llvm/include/llvm/Object/Binary.h
+++ b/contrib/llvm/include/llvm/Object/Binary.h
@@ -41,7 +41,9 @@ protected:
enum {
ID_Archive,
ID_MachOUniversalBinary,
- ID_IR, // LLVM IR
+ ID_COFFImportFile,
+ ID_IR, // LLVM IR
+ ID_FunctionIndex, // Function summary index
// Object and children.
ID_StartObjects,
@@ -113,10 +115,16 @@ public:
return TypeID == ID_COFF;
}
+ bool isCOFFImportFile() const {
+ return TypeID == ID_COFFImportFile;
+ }
+
bool isIR() const {
return TypeID == ID_IR;
}
+ bool isFunctionIndex() const { return TypeID == ID_FunctionIndex; }
+
bool isLittleEndian() const {
return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
TypeID == ID_MachO32B || TypeID == ID_MachO64B);
diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h
index 025a9db..1b0e2e3 100644
--- a/contrib/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm/include/llvm/Object/COFF.h
@@ -653,8 +653,7 @@ protected:
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
- std::error_code getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const override;
+ ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
void moveSectionNext(DataRefImpl &Sec) const override;
std::error_code getSectionName(DataRefImpl Sec,
StringRef &Res) const override;
@@ -774,6 +773,7 @@ public:
std::error_code getSectionContents(const coff_section *Sec,
ArrayRef<uint8_t> &Res) const;
+ uint64_t getImageBase() const;
std::error_code getVaPtr(uint64_t VA, uintptr_t &Res) const;
std::error_code getRvaPtr(uint32_t Rva, uintptr_t &Res) const;
std::error_code getHintName(uint32_t Rva, uint16_t &Hint,
diff --git a/contrib/llvm/include/llvm/Object/COFFImportFile.h b/contrib/llvm/include/llvm/Object/COFFImportFile.h
new file mode 100644
index 0000000..b04a44e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Object/COFFImportFile.h
@@ -0,0 +1,74 @@
+//===- COFFImportFile.h - COFF short import file implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF short import file is a special kind of file which contains
+// only symbol names for DLL-exported symbols. This class implements
+// SymbolicFile interface for the file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_COFF_IMPORT_FILE_H
+#define LLVM_OBJECT_COFF_IMPORT_FILE_H
+
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace object {
+
+class COFFImportFile : public SymbolicFile {
+public:
+ COFFImportFile(MemoryBufferRef Source)
+ : SymbolicFile(ID_COFFImportFile, Source) {}
+
+ static inline bool classof(Binary const *V) { return V->isCOFFImportFile(); }
+
+ void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; }
+
+ std::error_code printSymbolName(raw_ostream &OS,
+ DataRefImpl Symb) const override {
+ if (Symb.p == 0)
+ OS << "__imp_";
+ OS << StringRef(Data.getBufferStart() + sizeof(coff_import_header));
+ return std::error_code();
+ }
+
+ uint32_t getSymbolFlags(DataRefImpl Symb) const override {
+ return SymbolRef::SF_Global;
+ }
+
+ basic_symbol_iterator symbol_begin_impl() const override {
+ return BasicSymbolRef(DataRefImpl(), this);
+ }
+
+ basic_symbol_iterator symbol_end_impl() const override {
+ DataRefImpl Symb;
+ Symb.p = isCode() ? 2 : 1;
+ return BasicSymbolRef(Symb, this);
+ }
+
+ const coff_import_header *getCOFFImportHeader() const {
+ return reinterpret_cast<const object::coff_import_header *>(
+ Data.getBufferStart());
+ }
+
+private:
+ bool isCode() const {
+ return getCOFFImportHeader()->getType() == COFF::IMPORT_CODE;
+ }
+};
+
+} // namespace object
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Object/ELF.h b/contrib/llvm/include/llvm/Object/ELF.h
index cc27185..b0eaa3f 100644
--- a/contrib/llvm/include/llvm/Object/ELF.h
+++ b/contrib/llvm/include/llvm/Object/ELF.h
@@ -14,25 +14,9 @@
#ifndef LLVM_OBJECT_ELF_H
#define LLVM_OBJECT_ELF_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Object/ELFTypes.h"
-#include "llvm/Object/Error.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <limits>
-#include <utility>
namespace llvm {
namespace object {
@@ -56,78 +40,6 @@ public:
typedef typename std::conditional<ELFT::Is64Bits,
uint64_t, uint32_t>::type uintX_t;
- /// \brief Iterate over constant sized entities.
- template <class EntT>
- class ELFEntityIterator {
- public:
- typedef ptrdiff_t difference_type;
- typedef EntT value_type;
- typedef std::forward_iterator_tag iterator_category;
- typedef value_type &reference;
- typedef value_type *pointer;
-
- /// \brief Default construct iterator.
- ELFEntityIterator() : EntitySize(0), Current(nullptr) {}
- ELFEntityIterator(uintX_t EntSize, const char *Start)
- : EntitySize(EntSize), Current(Start) {}
-
- reference operator *() {
- assert(Current && "Attempted to dereference an invalid iterator!");
- return *reinterpret_cast<pointer>(Current);
- }
-
- pointer operator ->() {
- assert(Current && "Attempted to dereference an invalid iterator!");
- return reinterpret_cast<pointer>(Current);
- }
-
- bool operator ==(const ELFEntityIterator &Other) {
- return Current == Other.Current;
- }
-
- bool operator !=(const ELFEntityIterator &Other) {
- return !(*this == Other);
- }
-
- ELFEntityIterator &operator ++() {
- assert(Current && "Attempted to increment an invalid iterator!");
- Current += EntitySize;
- return *this;
- }
-
- ELFEntityIterator &operator+(difference_type n) {
- assert(Current && "Attempted to increment an invalid iterator!");
- Current += (n * EntitySize);
- return *this;
- }
-
- ELFEntityIterator &operator-(difference_type n) {
- assert(Current && "Attempted to subtract an invalid iterator!");
- Current -= (n * EntitySize);
- return *this;
- }
-
- ELFEntityIterator operator ++(int) {
- ELFEntityIterator Tmp = *this;
- ++*this;
- return Tmp;
- }
-
- difference_type operator -(const ELFEntityIterator &Other) const {
- assert(EntitySize == Other.EntitySize &&
- "Subtracting iterators of different EntitySize!");
- return (Current - Other.Current) / EntitySize;
- }
-
- const char *get() const { return Current; }
-
- uintX_t getEntSize() const { return EntitySize; }
-
- private:
- uintX_t EntitySize;
- const char *Current;
- };
-
typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -141,98 +53,22 @@ public:
typedef Elf_Vernaux_Impl<ELFT> Elf_Vernaux;
typedef Elf_Versym_Impl<ELFT> Elf_Versym;
typedef Elf_Hash_Impl<ELFT> Elf_Hash;
- typedef ELFEntityIterator<const Elf_Dyn> Elf_Dyn_Iter;
- typedef iterator_range<Elf_Dyn_Iter> Elf_Dyn_Range;
- typedef ELFEntityIterator<const Elf_Rela> Elf_Rela_Iter;
- typedef ELFEntityIterator<const Elf_Rel> Elf_Rel_Iter;
+ typedef Elf_GnuHash_Impl<ELFT> Elf_GnuHash;
+ typedef iterator_range<const Elf_Dyn *> Elf_Dyn_Range;
typedef iterator_range<const Elf_Shdr *> Elf_Shdr_Range;
-
- /// \brief Archive files are 2 byte aligned, so we need this for
- /// PointerIntPair to work.
- template <typename T>
- class ArchivePointerTypeTraits {
- public:
- static inline const void *getAsVoidPointer(T *P) { return P; }
- static inline T *getFromVoidPointer(const void *P) {
- return static_cast<T *>(P);
- }
- enum { NumLowBitsAvailable = 1 };
- };
-
typedef iterator_range<const Elf_Sym *> Elf_Sym_Range;
-private:
- typedef SmallVector<const Elf_Shdr *, 2> Sections_t;
- typedef DenseMap<unsigned, unsigned> IndexMap_t;
-
- StringRef Buf;
-
const uint8_t *base() const {
return reinterpret_cast<const uint8_t *>(Buf.data());
}
+private:
+
+ StringRef Buf;
+
const Elf_Ehdr *Header;
const Elf_Shdr *SectionHeaderTable = nullptr;
StringRef DotShstrtab; // Section header string table.
- StringRef DotStrtab; // Symbol header string table.
- const Elf_Shdr *dot_symtab_sec = nullptr; // Symbol table section.
- const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section.
- const Elf_Hash *HashTable = nullptr;
-
- const Elf_Shdr *SymbolTableSectionHeaderIndex = nullptr;
- DenseMap<const Elf_Sym *, ELF::Elf64_Word> ExtendedSymbolTable;
-
- const Elf_Shdr *dot_gnu_version_sec = nullptr; // .gnu.version
- const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
- const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
-
- /// \brief Represents a region described by entries in the .dynamic table.
- struct DynRegionInfo {
- DynRegionInfo() : Addr(nullptr), Size(0), EntSize(0) {}
- /// \brief Address in current address space.
- const void *Addr;
- /// \brief Size in bytes of the region.
- uintX_t Size;
- /// \brief Size of each entity in the region.
- uintX_t EntSize;
- };
-
- DynRegionInfo DynamicRegion;
- DynRegionInfo DynHashRegion;
- DynRegionInfo DynStrRegion;
- DynRegionInfo DynRelaRegion;
-
- // Pointer to SONAME entry in dynamic string table
- // This is set the first time getLoadName is called.
- mutable const char *dt_soname = nullptr;
-
- // Records for each version index the corresponding Verdef or Vernaux entry.
- // This is filled the first time LoadVersionMap() is called.
- class VersionMapEntry : public PointerIntPair<const void*, 1> {
- public:
- // If the integer is 0, this is an Elf_Verdef*.
- // If the integer is 1, this is an Elf_Vernaux*.
- VersionMapEntry() : PointerIntPair<const void*, 1>(nullptr, 0) { }
- VersionMapEntry(const Elf_Verdef *verdef)
- : PointerIntPair<const void*, 1>(verdef, 0) { }
- VersionMapEntry(const Elf_Vernaux *vernaux)
- : PointerIntPair<const void*, 1>(vernaux, 1) { }
- bool isNull() const { return getPointer() == nullptr; }
- bool isVerdef() const { return !isNull() && getInt() == 0; }
- bool isVernaux() const { return !isNull() && getInt() == 1; }
- const Elf_Verdef *getVerdef() const {
- return isVerdef() ? (const Elf_Verdef*)getPointer() : nullptr;
- }
- const Elf_Vernaux *getVernaux() const {
- return isVernaux() ? (const Elf_Vernaux*)getPointer() : nullptr;
- }
- };
- mutable SmallVector<VersionMapEntry, 16> VersionMap;
- void LoadVersionDefs(const Elf_Shdr *sec) const;
- void LoadVersionNeeds(const Elf_Shdr *ec) const;
- void LoadVersionMap() const;
-
- void scanDynamicTable();
public:
template<typename T>
@@ -240,25 +76,20 @@ public:
template <typename T>
const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
- const Elf_Shdr *getDotSymtabSec() const { return dot_symtab_sec; }
- const Elf_Shdr *getDotDynSymSec() const { return DotDynSymSec; }
- const Elf_Hash *getHashTable() const { return HashTable; }
-
ErrorOr<StringRef> getStringTable(const Elf_Shdr *Section) const;
- const char *getDynamicString(uintX_t Offset) const;
- ErrorOr<StringRef> getSymbolVersion(const Elf_Shdr *section,
- const Elf_Sym *Symb,
- bool &IsDefault) const;
+ ErrorOr<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const;
+
+ ErrorOr<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section) const;
+
void VerifyStrTab(const Elf_Shdr *sh) const;
StringRef getRelocationTypeName(uint32_t Type) const;
void getRelocationTypeName(uint32_t Type,
SmallVectorImpl<char> &Result) const;
- /// \brief Get the symbol table section and symbol for a given relocation.
- template <class RelT>
- std::pair<const Elf_Shdr *, const Elf_Sym *>
- getRelocationSymbol(const Elf_Shdr *RelSec, const RelT *Rel) const;
+ /// \brief Get the symbol for a given relocation.
+ const Elf_Sym *getRelocationSymbol(const Elf_Rel *Rel,
+ const Elf_Shdr *SymTab) const;
ELFFile(StringRef Object, std::error_code &EC);
@@ -273,111 +104,116 @@ public:
Header->getDataEncoding() == ELF::ELFDATA2LSB;
}
+ ErrorOr<const Elf_Dyn *> dynamic_table_begin(const Elf_Phdr *Phdr) const;
+ ErrorOr<const Elf_Dyn *> dynamic_table_end(const Elf_Phdr *Phdr) const;
+ ErrorOr<Elf_Dyn_Range> dynamic_table(const Elf_Phdr *Phdr) const {
+ ErrorOr<const Elf_Dyn *> Begin = dynamic_table_begin(Phdr);
+ if (std::error_code EC = Begin.getError())
+ return EC;
+ ErrorOr<const Elf_Dyn *> End = dynamic_table_end(Phdr);
+ if (std::error_code EC = End.getError())
+ return EC;
+ return make_range(*Begin, *End);
+ }
+
const Elf_Shdr *section_begin() const;
const Elf_Shdr *section_end() const;
Elf_Shdr_Range sections() const {
return make_range(section_begin(), section_end());
}
- const Elf_Sym *symbol_begin() const;
- const Elf_Sym *symbol_end() const;
- Elf_Sym_Range symbols() const {
- return make_range(symbol_begin(), symbol_end());
- }
-
- Elf_Dyn_Iter dynamic_table_begin() const;
- /// \param NULLEnd use one past the first DT_NULL entry as the end instead of
- /// the section size.
- Elf_Dyn_Iter dynamic_table_end(bool NULLEnd = false) const;
- Elf_Dyn_Range dynamic_table(bool NULLEnd = false) const {
- return make_range(dynamic_table_begin(), dynamic_table_end(NULLEnd));
- }
-
- const Elf_Sym *dynamic_symbol_begin() const {
- if (!DotDynSymSec)
+ const Elf_Sym *symbol_begin(const Elf_Shdr *Sec) const {
+ if (!Sec)
return nullptr;
- if (DotDynSymSec->sh_entsize != sizeof(Elf_Sym))
+ if (Sec->sh_entsize != sizeof(Elf_Sym))
report_fatal_error("Invalid symbol size");
- return reinterpret_cast<const Elf_Sym *>(base() + DotDynSymSec->sh_offset);
+ return reinterpret_cast<const Elf_Sym *>(base() + Sec->sh_offset);
}
-
- const Elf_Sym *dynamic_symbol_end() const {
- if (!DotDynSymSec)
+ const Elf_Sym *symbol_end(const Elf_Shdr *Sec) const {
+ if (!Sec)
return nullptr;
- return reinterpret_cast<const Elf_Sym *>(base() + DotDynSymSec->sh_offset +
- DotDynSymSec->sh_size);
+ uint64_t Size = Sec->sh_size;
+ if (Size % sizeof(Elf_Sym))
+ report_fatal_error("Invalid symbol table size");
+ return symbol_begin(Sec) + Size / sizeof(Elf_Sym);
}
-
- Elf_Sym_Range dynamic_symbols() const {
- return make_range(dynamic_symbol_begin(), dynamic_symbol_end());
+ Elf_Sym_Range symbols(const Elf_Shdr *Sec) const {
+ return make_range(symbol_begin(Sec), symbol_end(Sec));
}
- Elf_Rela_Iter dyn_rela_begin() const {
- if (DynRelaRegion.Addr)
- return Elf_Rela_Iter(DynRelaRegion.EntSize,
- (const char *)DynRelaRegion.Addr);
- return Elf_Rela_Iter(0, nullptr);
+ typedef iterator_range<const Elf_Rela *> Elf_Rela_Range;
+
+ const Elf_Rela *rela_begin(const Elf_Shdr *sec) const {
+ if (sec->sh_entsize != sizeof(Elf_Rela))
+ report_fatal_error("Invalid relocation entry size");
+ return reinterpret_cast<const Elf_Rela *>(base() + sec->sh_offset);
}
- Elf_Rela_Iter dyn_rela_end() const {
- if (DynRelaRegion.Addr)
- return Elf_Rela_Iter(
- DynRelaRegion.EntSize,
- (const char *)DynRelaRegion.Addr + DynRelaRegion.Size);
- return Elf_Rela_Iter(0, nullptr);
+ const Elf_Rela *rela_end(const Elf_Shdr *sec) const {
+ uint64_t Size = sec->sh_size;
+ if (Size % sizeof(Elf_Rela))
+ report_fatal_error("Invalid relocation table size");
+ return rela_begin(sec) + Size / sizeof(Elf_Rela);
}
- Elf_Rela_Iter rela_begin(const Elf_Shdr *sec) const {
- return Elf_Rela_Iter(sec->sh_entsize,
- (const char *)(base() + sec->sh_offset));
+ Elf_Rela_Range relas(const Elf_Shdr *Sec) const {
+ return make_range(rela_begin(Sec), rela_end(Sec));
}
- Elf_Rela_Iter rela_end(const Elf_Shdr *sec) const {
- return Elf_Rela_Iter(
- sec->sh_entsize,
- (const char *)(base() + sec->sh_offset + sec->sh_size));
+ const Elf_Rel *rel_begin(const Elf_Shdr *sec) const {
+ if (sec->sh_entsize != sizeof(Elf_Rel))
+ report_fatal_error("Invalid relocation entry size");
+ return reinterpret_cast<const Elf_Rel *>(base() + sec->sh_offset);
}
- Elf_Rel_Iter rel_begin(const Elf_Shdr *sec) const {
- return Elf_Rel_Iter(sec->sh_entsize,
- (const char *)(base() + sec->sh_offset));
+ const Elf_Rel *rel_end(const Elf_Shdr *sec) const {
+ uint64_t Size = sec->sh_size;
+ if (Size % sizeof(Elf_Rel))
+ report_fatal_error("Invalid relocation table size");
+ return rel_begin(sec) + Size / sizeof(Elf_Rel);
}
- Elf_Rel_Iter rel_end(const Elf_Shdr *sec) const {
- return Elf_Rel_Iter(sec->sh_entsize,
- (const char *)(base() + sec->sh_offset + sec->sh_size));
+ typedef iterator_range<const Elf_Rel *> Elf_Rel_Range;
+ Elf_Rel_Range rels(const Elf_Shdr *Sec) const {
+ return make_range(rel_begin(Sec), rel_end(Sec));
}
/// \brief Iterate over program header table.
- typedef ELFEntityIterator<const Elf_Phdr> Elf_Phdr_Iter;
+ const Elf_Phdr *program_header_begin() const {
+ if (Header->e_phnum && Header->e_phentsize != sizeof(Elf_Phdr))
+ report_fatal_error("Invalid program header size");
+ return reinterpret_cast<const Elf_Phdr *>(base() + Header->e_phoff);
+ }
- Elf_Phdr_Iter program_header_begin() const {
- return Elf_Phdr_Iter(Header->e_phentsize,
- (const char*)base() + Header->e_phoff);
+ const Elf_Phdr *program_header_end() const {
+ return program_header_begin() + Header->e_phnum;
}
- Elf_Phdr_Iter program_header_end() const {
- return Elf_Phdr_Iter(Header->e_phentsize,
- (const char*)base() +
- Header->e_phoff +
- (Header->e_phnum * Header->e_phentsize));
+ typedef iterator_range<const Elf_Phdr *> Elf_Phdr_Range;
+
+ const Elf_Phdr_Range program_headers() const {
+ return make_range(program_header_begin(), program_header_end());
}
uint64_t getNumSections() const;
uintX_t getStringTableIndex() const;
- ELF::Elf64_Word getExtendedSymbolTableIndex(const Elf_Sym *symb) const;
+ uint32_t getExtendedSymbolTableIndex(const Elf_Sym *Sym,
+ const Elf_Shdr *SymTab,
+ ArrayRef<Elf_Word> ShndxTable) const;
const Elf_Ehdr *getHeader() const { return Header; }
- ErrorOr<const Elf_Shdr *> getSection(const Elf_Sym *symb) const;
+ ErrorOr<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
+ const Elf_Shdr *SymTab,
+ ArrayRef<Elf_Word> ShndxTable) const;
ErrorOr<const Elf_Shdr *> getSection(uint32_t Index) const;
- const Elf_Sym *getSymbol(uint32_t index) const;
- ErrorOr<StringRef> getStaticSymbolName(const Elf_Sym *Symb) const;
- ErrorOr<StringRef> getDynamicSymbolName(const Elf_Sym *Symb) const;
- ErrorOr<StringRef> getSymbolName(const Elf_Sym *Symb, bool IsDynamic) const;
+ const Elf_Sym *getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
+ return &*(symbol_begin(Sec) + Index);
+ }
ErrorOr<StringRef> getSectionName(const Elf_Shdr *Section) const;
+ template <typename T>
+ ErrorOr<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr *Sec) const;
ErrorOr<ArrayRef<uint8_t> > getSectionContents(const Elf_Shdr *Sec) const;
- StringRef getLoadName() const;
};
typedef ELFFile<ELFType<support::little, false>> ELF32LEFile;
@@ -385,118 +221,50 @@ typedef ELFFile<ELFType<support::little, true>> ELF64LEFile;
typedef ELFFile<ELFType<support::big, false>> ELF32BEFile;
typedef ELFFile<ELFType<support::big, true>> ELF64BEFile;
-// Iterate through the version definitions, and place each Elf_Verdef
-// in the VersionMap according to its index.
template <class ELFT>
-void ELFFile<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
- unsigned vd_size = sec->sh_size; // Size of section in bytes
- unsigned vd_count = sec->sh_info; // Number of Verdef entries
- const char *sec_start = (const char*)base() + sec->sh_offset;
- const char *sec_end = sec_start + vd_size;
- // The first Verdef entry is at the start of the section.
- const char *p = sec_start;
- for (unsigned i = 0; i < vd_count; i++) {
- if (p + sizeof(Elf_Verdef) > sec_end)
- report_fatal_error("Section ended unexpectedly while scanning "
- "version definitions.");
- const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
- if (vd->vd_version != ELF::VER_DEF_CURRENT)
- report_fatal_error("Unexpected verdef version");
- size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
- if (index >= VersionMap.size())
- VersionMap.resize(index + 1);
- VersionMap[index] = VersionMapEntry(vd);
- p += vd->vd_next;
- }
-}
+uint32_t ELFFile<ELFT>::getExtendedSymbolTableIndex(
+ const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+ ArrayRef<Elf_Word> ShndxTable) const {
+ assert(Sym->st_shndx == ELF::SHN_XINDEX);
+ unsigned Index = Sym - symbol_begin(SymTab);
-// Iterate through the versions needed section, and place each Elf_Vernaux
-// in the VersionMap according to its index.
-template <class ELFT>
-void ELFFile<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
- unsigned vn_size = sec->sh_size; // Size of section in bytes
- unsigned vn_count = sec->sh_info; // Number of Verneed entries
- const char *sec_start = (const char *)base() + sec->sh_offset;
- const char *sec_end = sec_start + vn_size;
- // The first Verneed entry is at the start of the section.
- const char *p = sec_start;
- for (unsigned i = 0; i < vn_count; i++) {
- if (p + sizeof(Elf_Verneed) > sec_end)
- report_fatal_error("Section ended unexpectedly while scanning "
- "version needed records.");
- const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
- if (vn->vn_version != ELF::VER_NEED_CURRENT)
- report_fatal_error("Unexpected verneed version");
- // Iterate through the Vernaux entries
- const char *paux = p + vn->vn_aux;
- for (unsigned j = 0; j < vn->vn_cnt; j++) {
- if (paux + sizeof(Elf_Vernaux) > sec_end)
- report_fatal_error("Section ended unexpected while scanning auxiliary "
- "version needed records.");
- const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
- size_t index = vna->vna_other & ELF::VERSYM_VERSION;
- if (index >= VersionMap.size())
- VersionMap.resize(index + 1);
- VersionMap[index] = VersionMapEntry(vna);
- paux += vna->vna_next;
- }
- p += vn->vn_next;
- }
-}
-
-template <class ELFT>
-void ELFFile<ELFT>::LoadVersionMap() const {
- // If there is no dynamic symtab or version table, there is nothing to do.
- if (!DotDynSymSec || !dot_gnu_version_sec)
- return;
-
- // Has the VersionMap already been loaded?
- if (VersionMap.size() > 0)
- return;
-
- // The first two version indexes are reserved.
- // Index 0 is LOCAL, index 1 is GLOBAL.
- VersionMap.push_back(VersionMapEntry());
- VersionMap.push_back(VersionMapEntry());
-
- if (dot_gnu_version_d_sec)
- LoadVersionDefs(dot_gnu_version_d_sec);
-
- if (dot_gnu_version_r_sec)
- LoadVersionNeeds(dot_gnu_version_r_sec);
-}
-
-template <class ELFT>
-ELF::Elf64_Word
-ELFFile<ELFT>::getExtendedSymbolTableIndex(const Elf_Sym *symb) const {
- assert(symb->st_shndx == ELF::SHN_XINDEX);
- return ExtendedSymbolTable.lookup(symb);
+ // The size of the table was checked in getSHNDXTable.
+ return ShndxTable[Index];
}
template <class ELFT>
ErrorOr<const typename ELFFile<ELFT>::Elf_Shdr *>
-ELFFile<ELFT>::getSection(const Elf_Sym *symb) const {
- uint32_t Index = symb->st_shndx;
+ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+ ArrayRef<Elf_Word> ShndxTable) const {
+ uint32_t Index = Sym->st_shndx;
if (Index == ELF::SHN_XINDEX)
- return getSection(ExtendedSymbolTable.lookup(symb));
+ return getSection(getExtendedSymbolTableIndex(Sym, SymTab, ShndxTable));
+
if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE)
return nullptr;
- return getSection(symb->st_shndx);
+ return getSection(Sym->st_shndx);
}
template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *
-ELFFile<ELFT>::getSymbol(uint32_t Index) const {
- return &*(symbol_begin() + Index);
+template <typename T>
+ErrorOr<ArrayRef<T>>
+ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
+ uintX_t Offset = Sec->sh_offset;
+ uintX_t Size = Sec->sh_size;
+
+ if (Size % sizeof(T))
+ return object_error::parse_failed;
+ if (Offset + Size > Buf.size())
+ return object_error::parse_failed;
+
+ const T *Start = reinterpret_cast<const T *>(base() + Offset);
+ return makeArrayRef(Start, Size / sizeof(T));
}
template <class ELFT>
-ErrorOr<ArrayRef<uint8_t> >
+ErrorOr<ArrayRef<uint8_t>>
ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const {
- if (Sec->sh_offset + Sec->sh_size > Buf.size())
- return object_error::parse_failed;
- const uint8_t *Start = base() + Sec->sh_offset;
- return makeArrayRef(Start, Sec->sh_size);
+ return getSectionContentsAsArray<uint8_t>(Sec);
}
template <class ELFT>
@@ -536,18 +304,13 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type,
}
template <class ELFT>
-template <class RelT>
-std::pair<const typename ELFFile<ELFT>::Elf_Shdr *,
- const typename ELFFile<ELFT>::Elf_Sym *>
-ELFFile<ELFT>::getRelocationSymbol(const Elf_Shdr *Sec, const RelT *Rel) const {
- if (!Sec->sh_link)
- return std::make_pair(nullptr, nullptr);
- ErrorOr<const Elf_Shdr *> SymTableOrErr = getSection(Sec->sh_link);
- if (std::error_code EC = SymTableOrErr.getError())
- report_fatal_error(EC.message());
- const Elf_Shdr *SymTable = *SymTableOrErr;
- return std::make_pair(
- SymTable, getEntry<Elf_Sym>(SymTable, Rel->getSymbol(isMips64EL())));
+const typename ELFFile<ELFT>::Elf_Sym *
+ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel,
+ const Elf_Shdr *SymTab) const {
+ uint32_t Index = Rel->getSymbol(isMips64EL());
+ if (Index == 0)
+ return nullptr;
+ return getEntry<Elf_Sym>(SymTab, Index);
}
template <class ELFT>
@@ -584,10 +347,8 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
Header = reinterpret_cast<const Elf_Ehdr *>(base());
- if (Header->e_shoff == 0) {
- scanDynamicTable();
+ if (Header->e_shoff == 0)
return;
- }
const uint64_t SectionTableOffset = Header->e_shoff;
@@ -608,185 +369,25 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
return;
}
- // Scan sections for special sections.
-
- for (const Elf_Shdr &Sec : sections()) {
- switch (Sec.sh_type) {
- case ELF::SHT_HASH:
- if (HashTable) {
- EC = object_error::parse_failed;
- return;
- }
- HashTable = reinterpret_cast<const Elf_Hash *>(base() + Sec.sh_offset);
- break;
- case ELF::SHT_SYMTAB_SHNDX:
- if (SymbolTableSectionHeaderIndex) {
- // More than one .symtab_shndx!
- EC = object_error::parse_failed;
- return;
- }
- SymbolTableSectionHeaderIndex = &Sec;
- break;
- case ELF::SHT_SYMTAB: {
- if (dot_symtab_sec) {
- // More than one .symtab!
- EC = object_error::parse_failed;
- return;
- }
- dot_symtab_sec = &Sec;
- ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
- if ((EC = SectionOrErr.getError()))
- return;
- ErrorOr<StringRef> SymtabOrErr = getStringTable(*SectionOrErr);
- if ((EC = SymtabOrErr.getError()))
- return;
- DotStrtab = *SymtabOrErr;
- } break;
- case ELF::SHT_DYNSYM: {
- if (DotDynSymSec) {
- // More than one .dynsym!
- EC = object_error::parse_failed;
- return;
- }
- DotDynSymSec = &Sec;
- ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
- if ((EC = SectionOrErr.getError()))
- return;
- ErrorOr<StringRef> SymtabOrErr = getStringTable(*SectionOrErr);
- if ((EC = SymtabOrErr.getError()))
- return;
- DynStrRegion.Addr = SymtabOrErr->data();
- DynStrRegion.Size = SymtabOrErr->size();
- DynStrRegion.EntSize = 1;
- break;
- }
- case ELF::SHT_DYNAMIC:
- if (DynamicRegion.Addr) {
- // More than one .dynamic!
- EC = object_error::parse_failed;
- return;
- }
- DynamicRegion.Addr = base() + Sec.sh_offset;
- DynamicRegion.Size = Sec.sh_size;
- DynamicRegion.EntSize = Sec.sh_entsize;
- break;
- case ELF::SHT_GNU_versym:
- if (dot_gnu_version_sec != nullptr) {
- // More than one .gnu.version section!
- EC = object_error::parse_failed;
- return;
- }
- dot_gnu_version_sec = &Sec;
- break;
- case ELF::SHT_GNU_verdef:
- if (dot_gnu_version_d_sec != nullptr) {
- // More than one .gnu.version_d section!
- EC = object_error::parse_failed;
- return;
- }
- dot_gnu_version_d_sec = &Sec;
- break;
- case ELF::SHT_GNU_verneed:
- if (dot_gnu_version_r_sec != nullptr) {
- // More than one .gnu.version_r section!
- EC = object_error::parse_failed;
- return;
- }
- dot_gnu_version_r_sec = &Sec;
- break;
- }
- }
-
// Get string table sections.
- ErrorOr<const Elf_Shdr *> StrTabSecOrErr = getSection(getStringTableIndex());
- if ((EC = StrTabSecOrErr.getError()))
- return;
+ uintX_t StringTableIndex = getStringTableIndex();
+ if (StringTableIndex) {
+ ErrorOr<const Elf_Shdr *> StrTabSecOrErr = getSection(StringTableIndex);
+ if ((EC = StrTabSecOrErr.getError()))
+ return;
- ErrorOr<StringRef> SymtabOrErr = getStringTable(*StrTabSecOrErr);
- if ((EC = SymtabOrErr.getError()))
- return;
- DotShstrtab = *SymtabOrErr;
-
- // Build symbol name side-mapping if there is one.
- if (SymbolTableSectionHeaderIndex) {
- const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
- SymbolTableSectionHeaderIndex->sh_offset);
- for (const Elf_Sym &S : symbols()) {
- if (*ShndxTable != ELF::SHN_UNDEF)
- ExtendedSymbolTable[&S] = *ShndxTable;
- ++ShndxTable;
- }
+ ErrorOr<StringRef> StringTableOrErr = getStringTable(*StrTabSecOrErr);
+ if ((EC = StringTableOrErr.getError()))
+ return;
+ DotShstrtab = *StringTableOrErr;
}
- scanDynamicTable();
-
EC = std::error_code();
}
template <class ELFT>
-void ELFFile<ELFT>::scanDynamicTable() {
- // Build load-address to file-offset map.
- typedef IntervalMap<
- uintX_t, uintptr_t,
- IntervalMapImpl::NodeSizer<uintX_t, uintptr_t>::LeafSize,
- IntervalMapHalfOpenInfo<uintX_t>> LoadMapT;
- typename LoadMapT::Allocator Alloc;
- // Allocate the IntervalMap on the heap to work around MSVC bug where the
- // stack doesn't get realigned despite LoadMap having alignment 8 (PR24113).
- std::unique_ptr<LoadMapT> LoadMap(new LoadMapT(Alloc));
-
- for (Elf_Phdr_Iter PhdrI = program_header_begin(),
- PhdrE = program_header_end();
- PhdrI != PhdrE; ++PhdrI) {
- if (PhdrI->p_type == ELF::PT_DYNAMIC) {
- DynamicRegion.Addr = base() + PhdrI->p_offset;
- DynamicRegion.Size = PhdrI->p_filesz;
- DynamicRegion.EntSize = sizeof(Elf_Dyn);
- continue;
- }
- if (PhdrI->p_type != ELF::PT_LOAD)
- continue;
- if (PhdrI->p_filesz == 0)
- continue;
- LoadMap->insert(PhdrI->p_vaddr, PhdrI->p_vaddr + PhdrI->p_filesz,
- PhdrI->p_offset);
- }
-
- auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
- auto I = LoadMap->find(VAddr);
- if (I == LoadMap->end())
- return nullptr;
- return this->base() + I.value() + (VAddr - I.start());
- };
-
- for (Elf_Dyn_Iter DynI = dynamic_table_begin(), DynE = dynamic_table_end();
- DynI != DynE; ++DynI) {
- switch (DynI->d_tag) {
- case ELF::DT_HASH:
- if (HashTable)
- continue;
- HashTable =
- reinterpret_cast<const Elf_Hash *>(toMappedAddr(DynI->getPtr()));
- break;
- case ELF::DT_STRTAB:
- if (!DynStrRegion.Addr)
- DynStrRegion.Addr = toMappedAddr(DynI->getPtr());
- break;
- case ELF::DT_STRSZ:
- if (!DynStrRegion.Size)
- DynStrRegion.Size = DynI->getVal();
- break;
- case ELF::DT_RELA:
- if (!DynRelaRegion.Addr)
- DynRelaRegion.Addr = toMappedAddr(DynI->getPtr());
- break;
- case ELF::DT_RELASZ:
- DynRelaRegion.Size = DynI->getVal();
- break;
- case ELF::DT_RELAENT:
- DynRelaRegion.EntSize = DynI->getVal();
- }
- }
+static bool compareAddr(uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
+ return VAddr < Phdr->p_vaddr;
}
template <class ELFT>
@@ -803,64 +404,31 @@ const typename ELFFile<ELFT>::Elf_Shdr *ELFFile<ELFT>::section_end() const {
}
template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *ELFFile<ELFT>::symbol_begin() const {
- if (!dot_symtab_sec)
+ErrorOr<const typename ELFFile<ELFT>::Elf_Dyn *>
+ELFFile<ELFT>::dynamic_table_begin(const Elf_Phdr *Phdr) const {
+ if (!Phdr)
return nullptr;
- if (dot_symtab_sec->sh_entsize != sizeof(Elf_Sym))
- report_fatal_error("Invalid symbol size");
- return reinterpret_cast<const Elf_Sym *>(base() + dot_symtab_sec->sh_offset);
+ assert(Phdr->p_type == ELF::PT_DYNAMIC && "Got the wrong program header");
+ uintX_t Offset = Phdr->p_offset;
+ if (Offset > Buf.size())
+ return object_error::parse_failed;
+ return reinterpret_cast<const Elf_Dyn *>(base() + Offset);
}
template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *ELFFile<ELFT>::symbol_end() const {
- if (!dot_symtab_sec)
+ErrorOr<const typename ELFFile<ELFT>::Elf_Dyn *>
+ELFFile<ELFT>::dynamic_table_end(const Elf_Phdr *Phdr) const {
+ if (!Phdr)
return nullptr;
- return reinterpret_cast<const Elf_Sym *>(base() + dot_symtab_sec->sh_offset +
- dot_symtab_sec->sh_size);
-}
-
-template <class ELFT>
-typename ELFFile<ELFT>::Elf_Dyn_Iter
-ELFFile<ELFT>::dynamic_table_begin() const {
- if (DynamicRegion.Addr)
- return Elf_Dyn_Iter(DynamicRegion.EntSize,
- (const char *)DynamicRegion.Addr);
- return Elf_Dyn_Iter(0, nullptr);
-}
-
-template <class ELFT>
-typename ELFFile<ELFT>::Elf_Dyn_Iter
-ELFFile<ELFT>::dynamic_table_end(bool NULLEnd) const {
- if (!DynamicRegion.Addr)
- return Elf_Dyn_Iter(0, nullptr);
- Elf_Dyn_Iter Ret(DynamicRegion.EntSize,
- (const char *)DynamicRegion.Addr + DynamicRegion.Size);
-
- if (NULLEnd) {
- Elf_Dyn_Iter Start = dynamic_table_begin();
- while (Start != Ret && Start->getTag() != ELF::DT_NULL)
- ++Start;
-
- // Include the DT_NULL.
- if (Start != Ret)
- ++Start;
- Ret = Start;
- }
- return Ret;
-}
-
-template <class ELFT>
-StringRef ELFFile<ELFT>::getLoadName() const {
- if (!dt_soname) {
- dt_soname = "";
- // Find the DT_SONAME entry
- for (const auto &Entry : dynamic_table())
- if (Entry.getTag() == ELF::DT_SONAME) {
- dt_soname = getDynamicString(Entry.getVal());
- break;
- }
- }
- return dt_soname;
+ assert(Phdr->p_type == ELF::PT_DYNAMIC && "Got the wrong program header");
+ uintX_t Size = Phdr->p_filesz;
+ if (Size % sizeof(Elf_Dyn))
+ return object_error::elf_invalid_dynamic_table_size;
+ // FIKME: Check for overflow?
+ uintX_t End = Phdr->p_offset + Size;
+ if (End > Buf.size())
+ return object_error::parse_failed;
+ return reinterpret_cast<const Elf_Dyn *>(base() + End);
}
template <class ELFT>
@@ -908,127 +476,52 @@ ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
}
template <class ELFT>
-const char *ELFFile<ELFT>::getDynamicString(uintX_t Offset) const {
- if (Offset >= DynStrRegion.Size)
- return nullptr;
- return (const char *)DynStrRegion.Addr + Offset;
-}
-
-template <class ELFT>
-ErrorOr<StringRef>
-ELFFile<ELFT>::getStaticSymbolName(const Elf_Sym *Symb) const {
- return Symb->getName(DotStrtab);
+ErrorOr<ArrayRef<typename ELFFile<ELFT>::Elf_Word>>
+ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section) const {
+ assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX);
+ const Elf_Word *ShndxTableBegin =
+ reinterpret_cast<const Elf_Word *>(base() + Section.sh_offset);
+ uintX_t Size = Section.sh_size;
+ if (Size % sizeof(uint32_t))
+ return object_error::parse_failed;
+ uintX_t NumSymbols = Size / sizeof(uint32_t);
+ const Elf_Word *ShndxTableEnd = ShndxTableBegin + NumSymbols;
+ if (reinterpret_cast<const char *>(ShndxTableEnd) > Buf.end())
+ return object_error::parse_failed;
+ ErrorOr<const Elf_Shdr *> SymTableOrErr = getSection(Section.sh_link);
+ if (std::error_code EC = SymTableOrErr.getError())
+ return EC;
+ const Elf_Shdr &SymTable = **SymTableOrErr;
+ if (SymTable.sh_type != ELF::SHT_SYMTAB &&
+ SymTable.sh_type != ELF::SHT_DYNSYM)
+ return object_error::parse_failed;
+ if (NumSymbols != (SymTable.sh_size / sizeof(Elf_Sym)))
+ return object_error::parse_failed;
+ return makeArrayRef(ShndxTableBegin, ShndxTableEnd);
}
template <class ELFT>
ErrorOr<StringRef>
-ELFFile<ELFT>::getDynamicSymbolName(const Elf_Sym *Symb) const {
- return StringRef(getDynamicString(Symb->st_name));
-}
-
-template <class ELFT>
-ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(const Elf_Sym *Symb,
- bool IsDynamic) const {
- if (IsDynamic)
- return getDynamicSymbolName(Symb);
- return getStaticSymbolName(Symb);
+ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec) const {
+ if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
+ return object_error::parse_failed;
+ ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
+ if (std::error_code EC = SectionOrErr.getError())
+ return EC;
+ return getStringTable(*SectionOrErr);
}
template <class ELFT>
ErrorOr<StringRef>
ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section) const {
uint32_t Offset = Section->sh_name;
+ if (Offset == 0)
+ return StringRef();
if (Offset >= DotShstrtab.size())
return object_error::parse_failed;
return StringRef(DotShstrtab.data() + Offset);
}
-template <class ELFT>
-ErrorOr<StringRef> ELFFile<ELFT>::getSymbolVersion(const Elf_Shdr *section,
- const Elf_Sym *symb,
- bool &IsDefault) const {
- StringRef StrTab;
- if (section) {
- ErrorOr<StringRef> StrTabOrErr = getStringTable(section);
- if (std::error_code EC = StrTabOrErr.getError())
- return EC;
- StrTab = *StrTabOrErr;
- }
- // Handle non-dynamic symbols.
- if (section != DotDynSymSec && section != nullptr) {
- // Non-dynamic symbols can have versions in their names
- // A name of the form 'foo@V1' indicates version 'V1', non-default.
- // A name of the form 'foo@@V2' indicates version 'V2', default version.
- ErrorOr<StringRef> SymName = symb->getName(StrTab);
- if (!SymName)
- return SymName;
- StringRef Name = *SymName;
- size_t atpos = Name.find('@');
- if (atpos == StringRef::npos) {
- IsDefault = false;
- return StringRef("");
- }
- ++atpos;
- if (atpos < Name.size() && Name[atpos] == '@') {
- IsDefault = true;
- ++atpos;
- } else {
- IsDefault = false;
- }
- return Name.substr(atpos);
- }
-
- // This is a dynamic symbol. Look in the GNU symbol version table.
- if (!dot_gnu_version_sec) {
- // No version table.
- IsDefault = false;
- return StringRef("");
- }
-
- // Determine the position in the symbol table of this entry.
- size_t entry_index =
- (reinterpret_cast<uintptr_t>(symb) - DotDynSymSec->sh_offset -
- reinterpret_cast<uintptr_t>(base())) /
- sizeof(Elf_Sym);
-
- // Get the corresponding version index entry
- const Elf_Versym *vs = getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
- size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
-
- // Special markers for unversioned symbols.
- if (version_index == ELF::VER_NDX_LOCAL ||
- version_index == ELF::VER_NDX_GLOBAL) {
- IsDefault = false;
- return StringRef("");
- }
-
- // Lookup this symbol in the version table
- LoadVersionMap();
- if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
- return object_error::parse_failed;
- const VersionMapEntry &entry = VersionMap[version_index];
-
- // Get the version name string
- size_t name_offset;
- if (entry.isVerdef()) {
- // The first Verdaux entry holds the name.
- name_offset = entry.getVerdef()->getAux()->vda_name;
- } else {
- name_offset = entry.getVernaux()->vna_name;
- }
-
- // Set IsDefault
- if (entry.isVerdef()) {
- IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
- } else {
- IsDefault = false;
- }
-
- if (name_offset >= DynStrRegion.Size)
- return object_error::parse_failed;
- return StringRef(getDynamicString(name_offset));
-}
-
/// This function returns the hash value for a symbol in the .dynsym section
/// Name of the API remains consistent as specified in the libelf
/// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
index 6e8ace4..5823848 100644
--- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
@@ -189,11 +189,13 @@ public:
typedef typename ELFFile<ELFT>::Elf_Rela Elf_Rela;
typedef typename ELFFile<ELFT>::Elf_Dyn Elf_Dyn;
- typedef typename ELFFile<ELFT>::Elf_Dyn_Iter Elf_Dyn_Iter;
-
protected:
ELFFile<ELFT> EF;
+ const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section.
+ const Elf_Shdr *DotSymtabSec = nullptr; // Symbol table section.
+ ArrayRef<Elf_Word> ShndxTable;
+
void moveSymbolNext(DataRefImpl &Symb) const override;
ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const override;
ErrorOr<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
@@ -204,9 +206,9 @@ protected:
uint8_t getSymbolOther(DataRefImpl Symb) const override;
uint8_t getSymbolELFType(DataRefImpl Symb) const override;
SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
- section_iterator getSymbolSection(const Elf_Sym *Symb) const;
- std::error_code getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const override;
+ ErrorOr<section_iterator> getSymbolSection(const Elf_Sym *Symb,
+ const Elf_Shdr *SymTab) const;
+ ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
void moveSectionNext(DataRefImpl &Sec) const override;
std::error_code getSectionName(DataRefImpl Sec,
@@ -240,10 +242,6 @@ protected:
return *EF.getSection(Rel.d.a);
}
- const Elf_Sym *toELFSymIter(DataRefImpl Sym) const {
- return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
- }
-
DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const {
DataRefImpl DRI;
if (!SymTable) {
@@ -273,9 +271,9 @@ protected:
return DRI;
}
- DataRefImpl toDRI(Elf_Dyn_Iter Dyn) const {
+ DataRefImpl toDRI(const Elf_Dyn *Dyn) const {
DataRefImpl DRI;
- DRI.p = reinterpret_cast<uintptr_t>(Dyn.get());
+ DRI.p = reinterpret_cast<uintptr_t>(Dyn);
return DRI;
}
@@ -304,7 +302,13 @@ public:
const Elf_Rel *getRel(DataRefImpl Rel) const;
const Elf_Rela *getRela(DataRefImpl Rela) const;
- const Elf_Sym *getSymbol(DataRefImpl Symb) const;
+ const Elf_Sym *getSymbol(DataRefImpl Sym) const {
+ return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
+ }
+
+ const Elf_Shdr *getSection(DataRefImpl Sec) const {
+ return reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ }
basic_symbol_iterator symbol_begin_impl() const override;
basic_symbol_iterator symbol_end_impl() const override;
@@ -320,7 +324,6 @@ public:
uint8_t getBytesInAddress() const override;
StringRef getFileFormatName() const override;
unsigned getArch() const override;
- StringRef getLoadName() const;
std::error_code getPlatformFlags(unsigned &Result) const override {
Result = EF.getHeader()->e_flags;
@@ -352,7 +355,7 @@ void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Sym) const {
template <class ELFT>
ErrorOr<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
- const Elf_Sym *ESym = toELFSymIter(Sym);
+ const Elf_Sym *ESym = getSymbol(Sym);
const Elf_Shdr *SymTableSec = *EF.getSection(Sym.d.a);
const Elf_Shdr *StringTableSec = *EF.getSection(SymTableSec->sh_link);
StringRef SymTable = *EF.getStringTable(StringTableSec);
@@ -361,12 +364,12 @@ ErrorOr<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSectionFlags(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_flags;
+ return getSection(Sec)->sh_flags;
}
template <class ELFT>
uint32_t ELFObjectFile<ELFT>::getSectionType(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_type;
+ return getSection(Sec)->sh_type;
}
template <class ELFT>
@@ -398,9 +401,11 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
}
const Elf_Ehdr *Header = EF.getHeader();
+ const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a);
if (Header->e_type == ELF::ET_REL) {
- ErrorOr<const Elf_Shdr *> SectionOrErr = EF.getSection(ESym);
+ ErrorOr<const Elf_Shdr *> SectionOrErr =
+ EF.getSection(ESym, SymTab, ShndxTable);
if (std::error_code EC = SectionOrErr.getError())
return EC;
const Elf_Shdr *Section = *SectionOrErr;
@@ -413,7 +418,7 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
template <class ELFT>
uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
- const Elf_Sym *Sym = toELFSymIter(Symb);
+ const Elf_Sym *Sym = getSymbol(Symb);
if (Sym->st_shndx == ELF::SHN_COMMON)
return Sym->st_value;
return 0;
@@ -421,22 +426,22 @@ uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Sym) const {
- return toELFSymIter(Sym)->st_size;
+ return getSymbol(Sym)->st_size;
}
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
- return toELFSymIter(Symb)->st_size;
+ return getSymbol(Symb)->st_size;
}
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
- return toELFSymIter(Symb)->st_other;
+ return getSymbol(Symb)->st_other;
}
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolELFType(DataRefImpl Symb) const {
- return toELFSymIter(Symb)->getType();
+ return getSymbol(Symb)->getType();
}
template <class ELFT>
@@ -463,7 +468,7 @@ SymbolRef::Type ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const {
template <class ELFT>
uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
- const Elf_Sym *ESym = toELFSymIter(Sym);
+ const Elf_Sym *ESym = getSymbol(Sym);
uint32_t Result = SymbolRef::SF_None;
@@ -477,7 +482,8 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
Result |= SymbolRef::SF_Absolute;
if (ESym->getType() == ELF::STT_FILE || ESym->getType() == ELF::STT_SECTION ||
- ESym == EF.symbol_begin() || ESym == EF.dynamic_symbol_begin())
+ ESym == EF.symbol_begin(DotSymtabSec) ||
+ ESym == EF.symbol_begin(DotDynSymSec))
Result |= SymbolRef::SF_FormatSpecific;
if (EF.getHeader()->e_machine == ELF::EM_ARM) {
@@ -505,11 +511,12 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
}
template <class ELFT>
-section_iterator
-ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym) const {
- ErrorOr<const Elf_Shdr *> ESecOrErr = EF.getSection(ESym);
+ErrorOr<section_iterator>
+ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym,
+ const Elf_Shdr *SymTab) const {
+ ErrorOr<const Elf_Shdr *> ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable);
if (std::error_code EC = ESecOrErr.getError())
- report_fatal_error(EC.message());
+ return EC;
const Elf_Shdr *ESec = *ESecOrErr;
if (!ESec)
@@ -521,23 +528,23 @@ ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym) const {
}
template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const {
- Res = getSymbolSection(getSymbol(Symb));
- return std::error_code();
+ErrorOr<section_iterator>
+ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb) const {
+ const Elf_Sym *Sym = getSymbol(Symb);
+ const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a);
+ return getSymbolSection(Sym, SymTab);
}
template <class ELFT>
void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
- const Elf_Shdr *ESec = toELFShdrIter(Sec);
+ const Elf_Shdr *ESec = getSection(Sec);
Sec = toDRI(++ESec);
}
template <class ELFT>
std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
StringRef &Result) const {
- ErrorOr<StringRef> Name = EF.getSectionName(&*toELFShdrIter(Sec));
+ ErrorOr<StringRef> Name = EF.getSectionName(&*getSection(Sec));
if (!Name)
return Name.getError();
Result = *Name;
@@ -546,50 +553,50 @@ std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSectionAddress(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_addr;
+ return getSection(Sec)->sh_addr;
}
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSectionSize(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_size;
+ return getSection(Sec)->sh_size;
}
template <class ELFT>
std::error_code
ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
StringRef &Result) const {
- const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+ const Elf_Shdr *EShdr = getSection(Sec);
Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size);
return std::error_code();
}
template <class ELFT>
uint64_t ELFObjectFile<ELFT>::getSectionAlignment(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_addralign;
+ return getSection(Sec)->sh_addralign;
}
template <class ELFT>
bool ELFObjectFile<ELFT>::isSectionText(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_flags & ELF::SHF_EXECINSTR;
+ return getSection(Sec)->sh_flags & ELF::SHF_EXECINSTR;
}
template <class ELFT>
bool ELFObjectFile<ELFT>::isSectionData(DataRefImpl Sec) const {
- const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+ const Elf_Shdr *EShdr = getSection(Sec);
return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) &&
EShdr->sh_type == ELF::SHT_PROGBITS;
}
template <class ELFT>
bool ELFObjectFile<ELFT>::isSectionBSS(DataRefImpl Sec) const {
- const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+ const Elf_Shdr *EShdr = getSection(Sec);
return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) &&
EShdr->sh_type == ELF::SHT_NOBITS;
}
template <class ELFT>
bool ELFObjectFile<ELFT>::isSectionVirtual(DataRefImpl Sec) const {
- return toELFShdrIter(Sec)->sh_type == ELF::SHT_NOBITS;
+ return getSection(Sec)->sh_type == ELF::SHT_NOBITS;
}
template <class ELFT>
@@ -636,7 +643,7 @@ ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
if (EF.getHeader()->e_type != ELF::ET_REL)
return section_end();
- const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+ const Elf_Shdr *EShdr = getSection(Sec);
uintX_t Type = EShdr->sh_type;
if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
return section_end();
@@ -668,9 +675,9 @@ ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel) const {
bool IsDyn = Rel.d.b & 1;
DataRefImpl SymbolData;
if (IsDyn)
- SymbolData = toDRI(EF.getDotDynSymSec(), symbolIdx);
+ SymbolData = toDRI(DotDynSymSec, symbolIdx);
else
- SymbolData = toDRI(EF.getDotSymtabSec(), symbolIdx);
+ SymbolData = toDRI(DotSymtabSec, symbolIdx);
return symbol_iterator(SymbolRef(SymbolData, this));
}
@@ -715,12 +722,6 @@ ELFObjectFile<ELFT>::getRelocationAddend(DataRefImpl Rel) const {
}
template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *
-ELFObjectFile<ELFT>::getSymbol(DataRefImpl Symb) const {
- return &*toELFSymIter(Symb);
-}
-
-template <class ELFT>
const typename ELFObjectFile<ELFT>::Elf_Rel *
ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
assert(getRelSection(Rel)->sh_type == ELF::SHT_REL);
@@ -737,21 +738,51 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
template <class ELFT>
ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC)
: ELFObjectFileBase(
- getELFType(static_cast<endianness>(ELFT::TargetEndianness) ==
- support::little,
- ELFT::Is64Bits),
+ getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits),
Object),
- EF(Data.getBuffer(), EC) {}
+ EF(Data.getBuffer(), EC) {
+ if (EC)
+ return;
+ for (const Elf_Shdr &Sec : EF.sections()) {
+ switch (Sec.sh_type) {
+ case ELF::SHT_DYNSYM: {
+ if (DotDynSymSec) {
+ // More than one .dynsym!
+ EC = object_error::parse_failed;
+ return;
+ }
+ DotDynSymSec = &Sec;
+ break;
+ }
+ case ELF::SHT_SYMTAB: {
+ if (DotSymtabSec) {
+ // More than one .dynsym!
+ EC = object_error::parse_failed;
+ return;
+ }
+ DotSymtabSec = &Sec;
+ break;
+ }
+ case ELF::SHT_SYMTAB_SHNDX: {
+ ErrorOr<ArrayRef<Elf_Word>> TableOrErr = EF.getSHNDXTable(Sec);
+ if ((EC = TableOrErr.getError()))
+ return;
+ ShndxTable = *TableOrErr;
+ break;
+ }
+ }
+ }
+}
template <class ELFT>
basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin_impl() const {
- DataRefImpl Sym = toDRI(EF.getDotSymtabSec(), 0);
+ DataRefImpl Sym = toDRI(DotSymtabSec, 0);
return basic_symbol_iterator(SymbolRef(Sym, this));
}
template <class ELFT>
basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end_impl() const {
- const Elf_Shdr *SymTab = EF.getDotSymtabSec();
+ const Elf_Shdr *SymTab = DotSymtabSec;
if (!SymTab)
return symbol_begin_impl();
DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
@@ -760,13 +791,13 @@ basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end_impl() const {
template <class ELFT>
elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_begin() const {
- DataRefImpl Sym = toDRI(EF.getDotDynSymSec(), 0);
+ DataRefImpl Sym = toDRI(DotDynSymSec, 0);
return symbol_iterator(SymbolRef(Sym, this));
}
template <class ELFT>
elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_end() const {
- const Elf_Shdr *SymTab = EF.getDotDynSymSec();
+ const Elf_Shdr *SymTab = DotDynSymSec;
DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
return basic_symbol_iterator(SymbolRef(Sym, this));
}
@@ -782,19 +813,6 @@ section_iterator ELFObjectFile<ELFT>::section_end() const {
}
template <class ELFT>
-StringRef ELFObjectFile<ELFT>::getLoadName() const {
- Elf_Dyn_Iter DI = EF.dynamic_table_begin();
- Elf_Dyn_Iter DE = EF.dynamic_table_end();
-
- while (DI != DE && DI->getTag() != ELF::DT_SONAME)
- ++DI;
-
- if (DI != DE)
- return EF.getDynamicString(DI->getVal());
- return "";
-}
-
-template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getBytesInAddress() const {
return ELFT::Is64Bits ? 8 : 4;
}
@@ -807,10 +825,14 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
switch (EF.getHeader()->e_machine) {
case ELF::EM_386:
return "ELF32-i386";
+ case ELF::EM_IAMCU:
+ return "ELF32-iamcu";
case ELF::EM_X86_64:
return "ELF32-x86-64";
case ELF::EM_ARM:
return (IsLittleEndian ? "ELF32-arm-little" : "ELF32-arm-big");
+ case ELF::EM_AVR:
+ return "ELF32-avr";
case ELF::EM_HEXAGON:
return "ELF32-hexagon";
case ELF::EM_MIPS:
@@ -853,6 +875,7 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
bool IsLittleEndian = ELFT::TargetEndianness == support::little;
switch (EF.getHeader()->e_machine) {
case ELF::EM_386:
+ case ELF::EM_IAMCU:
return Triple::x86;
case ELF::EM_X86_64:
return Triple::x86_64;
@@ -860,6 +883,8 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
return Triple::aarch64;
case ELF::EM_ARM:
return Triple::arm;
+ case ELF::EM_AVR:
+ return Triple::avr;
case ELF::EM_HEXAGON:
return Triple::hexagon;
case ELF::EM_MIPS:
diff --git a/contrib/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm/include/llvm/Object/ELFTypes.h
index 27e987b..07b312a 100644
--- a/contrib/llvm/include/llvm/Object/ELFTypes.h
+++ b/contrib/llvm/include/llvm/Object/ELFTypes.h
@@ -12,7 +12,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Object/Error.h"
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorOr.h"
@@ -307,14 +306,18 @@ struct Elf_Dyn_Base<ELFType<TargetEndianness, true>> {
} d_un;
};
-/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters.
+/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters.
template <class ELFT>
struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
using Elf_Dyn_Base<ELFT>::d_tag;
using Elf_Dyn_Base<ELFT>::d_un;
- int64_t getTag() const { return d_tag; }
- uint64_t getVal() const { return d_un.d_val; }
- uint64_t getPtr() const { return d_un.d_ptr; }
+ typedef typename std::conditional<ELFT::Is64Bits,
+ int64_t, int32_t>::type intX_t;
+ typedef typename std::conditional<ELFT::Is64Bits,
+ uint64_t, uint32_t>::type uintX_t;
+ intX_t getTag() const { return d_tag; }
+ uintX_t getVal() const { return d_un.d_val; }
+ uintX_t getPtr() const { return d_un.d_ptr; }
};
// Elf_Rel: Elf Relocation
@@ -481,6 +484,30 @@ struct Elf_Hash_Impl {
}
};
+// .gnu.hash section
+template <class ELFT>
+struct Elf_GnuHash_Impl {
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
+ Elf_Word nbuckets;
+ Elf_Word symndx;
+ Elf_Word maskwords;
+ Elf_Word shift2;
+
+ ArrayRef<Elf_Off> filter() const {
+ return ArrayRef<Elf_Off>(reinterpret_cast<const Elf_Off *>(&shift2 + 1),
+ maskwords);
+ }
+
+ ArrayRef<Elf_Word> buckets() const {
+ return ArrayRef<Elf_Word>(
+ reinterpret_cast<const Elf_Word *>(filter().end()), nbuckets);
+ }
+
+ ArrayRef<Elf_Word> values(unsigned DynamicSymCount) const {
+ return ArrayRef<Elf_Word>(buckets().end(), DynamicSymCount - symndx);
+ }
+};
+
// MIPS .reginfo section
template <class ELFT>
struct Elf_Mips_RegInfo;
diff --git a/contrib/llvm/include/llvm/Object/Error.h b/contrib/llvm/include/llvm/Object/Error.h
index aa320bb..0f79a6e 100644
--- a/contrib/llvm/include/llvm/Object/Error.h
+++ b/contrib/llvm/include/llvm/Object/Error.h
@@ -30,6 +30,7 @@ enum class object_error {
string_table_non_null_end,
invalid_section_index,
bitcode_section_not_found,
+ elf_invalid_dynamic_table_size,
macho_small_load_command,
macho_load_segment_too_many_sections,
macho_load_segment_too_small,
diff --git a/contrib/llvm/include/llvm/Object/FunctionIndexObjectFile.h b/contrib/llvm/include/llvm/Object/FunctionIndexObjectFile.h
new file mode 100644
index 0000000..74b461d
--- /dev/null
+++ b/contrib/llvm/include/llvm/Object/FunctionIndexObjectFile.h
@@ -0,0 +1,110 @@
+//===- FunctionIndexObjectFile.h - Function index file implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the FunctionIndexObjectFile template class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H
+#define LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Object/SymbolicFile.h"
+
+namespace llvm {
+class FunctionInfoIndex;
+class Module;
+
+namespace object {
+class ObjectFile;
+
+/// This class is used to read just the function summary index related
+/// sections out of the given object (which may contain a single module's
+/// bitcode or be a combined index bitcode file). It builds a FunctionInfoIndex
+/// object.
+class FunctionIndexObjectFile : public SymbolicFile {
+ std::unique_ptr<FunctionInfoIndex> Index;
+
+public:
+ FunctionIndexObjectFile(MemoryBufferRef Object,
+ std::unique_ptr<FunctionInfoIndex> I);
+ ~FunctionIndexObjectFile() override;
+
+ // TODO: Walk through FunctionMap entries for function symbols.
+ // However, currently these interfaces are not used by any consumers.
+ void moveSymbolNext(DataRefImpl &Symb) const override {
+ llvm_unreachable("not implemented");
+ }
+ std::error_code printSymbolName(raw_ostream &OS,
+ DataRefImpl Symb) const override {
+ llvm_unreachable("not implemented");
+ return std::error_code();
+ }
+ uint32_t getSymbolFlags(DataRefImpl Symb) const override {
+ llvm_unreachable("not implemented");
+ return 0;
+ }
+ basic_symbol_iterator symbol_begin_impl() const override {
+ llvm_unreachable("not implemented");
+ return basic_symbol_iterator(BasicSymbolRef());
+ }
+ basic_symbol_iterator symbol_end_impl() const override {
+ llvm_unreachable("not implemented");
+ return basic_symbol_iterator(BasicSymbolRef());
+ }
+
+ const FunctionInfoIndex &getIndex() const {
+ return const_cast<FunctionIndexObjectFile *>(this)->getIndex();
+ }
+ FunctionInfoIndex &getIndex() { return *Index; }
+ std::unique_ptr<FunctionInfoIndex> takeIndex();
+
+ static inline bool classof(const Binary *v) { return v->isFunctionIndex(); }
+
+ /// \brief Finds and returns bitcode embedded in the given object file, or an
+ /// error code if not found.
+ static ErrorOr<MemoryBufferRef> findBitcodeInObject(const ObjectFile &Obj);
+
+ /// \brief Finds and returns bitcode in the given memory buffer (which may
+ /// be either a bitcode file or a native object file with embedded bitcode),
+ /// or an error code if not found.
+ static ErrorOr<MemoryBufferRef>
+ findBitcodeInMemBuffer(MemoryBufferRef Object);
+
+ /// \brief Looks for function summary in the given memory buffer,
+ /// returns true if found, else false.
+ static bool
+ hasFunctionSummaryInMemBuffer(MemoryBufferRef Object,
+ DiagnosticHandlerFunction DiagnosticHandler);
+
+ /// \brief Parse function index in the given memory buffer.
+ /// Return new FunctionIndexObjectFile instance containing parsed function
+ /// summary/index.
+ static ErrorOr<std::unique_ptr<FunctionIndexObjectFile>>
+ create(MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy = false);
+
+ /// \brief Parse the function summary information for function with the
+ /// given name out of the given buffer. Parsed information is
+ /// stored on the index object saved in this object.
+ std::error_code
+ findFunctionSummaryInMemBuffer(MemoryBufferRef Object,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ StringRef FunctionName);
+};
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+getFunctionIndexForFile(StringRef Path,
+ DiagnosticHandlerFunction DiagnosticHandler);
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h
index 489ecef..e02ce3b 100644
--- a/contrib/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm/include/llvm/Object/MachO.h
@@ -100,7 +100,7 @@ private:
};
typedef content_iterator<ExportEntry> export_iterator;
-/// MachORebaseEntry encapsulates the current state in the decompression of
+/// MachORebaseEntry encapsulates the current state in the decompression of
/// rebasing opcodes. This allows you to iterate through the compressed table of
/// rebasing using:
/// for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) {
@@ -116,7 +116,7 @@ public:
bool operator==(const MachORebaseEntry &) const;
void moveNext();
-
+
private:
friend class MachOObjectFile;
void moveToFirst();
@@ -210,8 +210,7 @@ public:
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
- std::error_code getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const override;
+ ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
unsigned getSymbolSectionID(SymbolRef Symb) const;
unsigned getSectionID(SectionRef Sec) const;
@@ -423,6 +422,24 @@ public:
return v->isMachO();
}
+ static uint32_t
+ getVersionMinMajor(MachO::version_min_command &C, bool SDK) {
+ uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
+ return (VersionOrSDK >> 16) & 0xffff;
+ }
+
+ static uint32_t
+ getVersionMinMinor(MachO::version_min_command &C, bool SDK) {
+ uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
+ return (VersionOrSDK >> 8) & 0xff;
+ }
+
+ static uint32_t
+ getVersionMinUpdate(MachO::version_min_command &C, bool SDK) {
+ uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
+ return VersionOrSDK & 0xff;
+ }
+
private:
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
@@ -504,4 +521,3 @@ inline const ObjectFile *DiceRef::getObjectFile() const {
}
#endif
-
diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h
index 8dd5256..ce0c891 100644
--- a/contrib/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ObjectFile.h
@@ -100,8 +100,7 @@ public:
relocation_iterator relocation_begin() const;
relocation_iterator relocation_end() const;
iterator_range<relocation_iterator> relocations() const {
- return iterator_range<relocation_iterator>(relocation_begin(),
- relocation_end());
+ return make_range(relocation_begin(), relocation_end());
}
section_iterator getRelocatedSection() const;
@@ -147,7 +146,7 @@ public:
/// @brief Get section this symbol is defined in reference to. Result is
/// end_sections() if it is undefined or is an absolute symbol.
- std::error_code getSection(section_iterator &Result) const;
+ ErrorOr<section_iterator> getSection() const;
const ObjectFile *getObject() const;
};
@@ -202,8 +201,8 @@ protected:
virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
virtual SymbolRef::Type getSymbolType(DataRefImpl Symb) const = 0;
- virtual std::error_code getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const = 0;
+ virtual ErrorOr<section_iterator>
+ getSymbolSection(DataRefImpl Symb) const = 0;
// Same as above for SectionRef.
friend class SectionRef;
@@ -323,8 +322,8 @@ inline uint64_t SymbolRef::getCommonSize() const {
return getObject()->getCommonSymbolSize(getRawDataRefImpl());
}
-inline std::error_code SymbolRef::getSection(section_iterator &Result) const {
- return getObject()->getSymbolSection(getRawDataRefImpl(), Result);
+inline ErrorOr<section_iterator> SymbolRef::getSection() const {
+ return getObject()->getSymbolSection(getRawDataRefImpl());
}
inline SymbolRef::Type SymbolRef::getType() const {
diff --git a/contrib/llvm/include/llvm/Object/SymbolicFile.h b/contrib/llvm/include/llvm/Object/SymbolicFile.h
index 537997a..0c5b381 100644
--- a/contrib/llvm/include/llvm/Object/SymbolicFile.h
+++ b/contrib/llvm/include/llvm/Object/SymbolicFile.h
@@ -15,6 +15,7 @@
#define LLVM_OBJECT_SYMBOLICFILE_H
#include "llvm/Object/Binary.h"
+#include "llvm/Support/Format.h"
namespace llvm {
namespace object {
@@ -29,6 +30,12 @@ union DataRefImpl {
DataRefImpl() { std::memset(this, 0, sizeof(DataRefImpl)); }
};
+template <typename OStream>
+OStream& operator<<(OStream &OS, const DataRefImpl &D) {
+ OS << "(" << format("0x%x8", D.p) << " (" << format("0x%x8", D.d.a) << ", " << format("0x%x8", D.d.b) << "))";
+ return OS;
+}
+
inline bool operator==(const DataRefImpl &a, const DataRefImpl &b) {
// Check bitwise identical. This is the only legal way to compare a union w/o
// knowing which member is in use.
@@ -94,6 +101,7 @@ public:
// (e.g. section symbols)
SF_Thumb = 1U << 8, // Thumb symbol in a 32-bit ARM binary
SF_Hidden = 1U << 9, // Symbol has hidden visibility
+ SF_Const = 1U << 10, // Symbol value is constant
};
BasicSymbolRef() : OwningObject(nullptr) { }
diff --git a/contrib/llvm/include/llvm/Option/Arg.h b/contrib/llvm/include/llvm/Option/Arg.h
index e1b72b6..99d3296 100644
--- a/contrib/llvm/include/llvm/Option/Arg.h
+++ b/contrib/llvm/include/llvm/Option/Arg.h
@@ -113,6 +113,7 @@ public:
/// when rendered as a input (e.g., Xlinker).
void renderAsInput(const ArgList &Args, ArgStringList &Output) const;
+ void print(raw_ostream &O) const;
void dump() const;
/// \brief Return a formatted version of the argument and
diff --git a/contrib/llvm/include/llvm/Option/ArgList.h b/contrib/llvm/include/llvm/Option/ArgList.h
index ef40057..89771b5 100644
--- a/contrib/llvm/include/llvm/Option/ArgList.h
+++ b/contrib/llvm/include/llvm/Option/ArgList.h
@@ -259,6 +259,9 @@ public:
void AddLastArg(ArgStringList &Output, OptSpecifier Id0,
OptSpecifier Id1) const;
+ /// AddAllArgs - Render all arguments matching any of the given ids.
+ void AddAllArgs(ArgStringList &Output, ArrayRef<OptSpecifier> Ids) const;
+
/// AddAllArgs - Render all arguments matching the given ids.
void AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const;
@@ -303,6 +306,9 @@ public:
const char *GetOrMakeJoinedArgString(unsigned Index, StringRef LHS,
StringRef RHS) const;
+ void print(raw_ostream &O) const;
+ void dump() const;
+
/// @}
};
diff --git a/contrib/llvm/include/llvm/Option/OptTable.h b/contrib/llvm/include/llvm/Option/OptTable.h
index 96f51cf..390e527 100644
--- a/contrib/llvm/include/llvm/Option/OptTable.h
+++ b/contrib/llvm/include/llvm/Option/OptTable.h
@@ -50,8 +50,7 @@ public:
private:
/// \brief The static option information table.
- const Info *OptionInfos;
- unsigned NumOptionInfos;
+ ArrayRef<Info> OptionInfos;
bool IgnoreCase;
unsigned TheInputOptionID;
@@ -74,14 +73,13 @@ private:
}
protected:
- OptTable(const Info *OptionInfos, unsigned NumOptionInfos,
- bool IgnoreCase = false);
+ OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
public:
~OptTable();
/// \brief Return the total number of option classes.
- unsigned getNumOptions() const { return NumOptionInfos; }
+ unsigned getNumOptions() const { return OptionInfos.size(); }
/// \brief Get the given Opt's Option instance, lazily creating it
/// if necessary.
diff --git a/contrib/llvm/include/llvm/Option/Option.h b/contrib/llvm/include/llvm/Option/Option.h
index 09be26c..494987a 100644
--- a/contrib/llvm/include/llvm/Option/Option.h
+++ b/contrib/llvm/include/llvm/Option/Option.h
@@ -195,6 +195,7 @@ public:
/// start.
Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+ void print(raw_ostream &O) const;
void dump() const;
};
diff --git a/contrib/llvm/include/llvm/PassAnalysisSupport.h b/contrib/llvm/include/llvm/PassAnalysisSupport.h
index 0b318fc..492a4ef 100644
--- a/contrib/llvm/include/llvm/PassAnalysisSupport.h
+++ b/contrib/llvm/include/llvm/PassAnalysisSupport.h
@@ -36,11 +36,17 @@ namespace llvm {
///
class AnalysisUsage {
public:
- typedef SmallVector<AnalysisID, 32> VectorType;
+ typedef SmallVectorImpl<AnalysisID> VectorType;
private:
/// Sets of analyses required and preserved by a pass
- VectorType Required, RequiredTransitive, Preserved;
+ // TODO: It's not clear that SmallVector is an appropriate data structure for
+ // this usecase. The sizes were picked to minimize wasted space, but are
+ // otherwise fairly meaningless.
+ SmallVector<AnalysisID, 8> Required;
+ SmallVector<AnalysisID, 2> RequiredTransitive;
+ SmallVector<AnalysisID, 2> Preserved;
+ SmallVector<AnalysisID, 0> Used;
bool PreservesAll;
public:
@@ -72,14 +78,32 @@ public:
Preserved.push_back(&ID);
return *this;
}
- ///@}
-
/// Add the specified Pass class to the set of analyses preserved by this pass.
template<class PassClass>
AnalysisUsage &addPreserved() {
Preserved.push_back(&PassClass::ID);
return *this;
}
+ ///@}
+
+ ///@{
+ /// Add the specified ID to the set of analyses used by this pass if they are
+ /// available..
+ AnalysisUsage &addUsedIfAvailableID(const void *ID) {
+ Used.push_back(ID);
+ return *this;
+ }
+ AnalysisUsage &addUsedIfAvailableID(char &ID) {
+ Used.push_back(&ID);
+ return *this;
+ }
+ /// Add the specified Pass class to the set of analyses used by this pass.
+ template<class PassClass>
+ AnalysisUsage &addUsedIfAvailable() {
+ Used.push_back(&PassClass::ID);
+ return *this;
+ }
+ ///@}
/// Add the Pass with the specified argument string to the set of analyses
/// preserved by this pass. If no such Pass exists, do nothing. This can be
@@ -108,6 +132,7 @@ public:
return RequiredTransitive;
}
const VectorType &getPreservedSet() const { return Preserved; }
+ const VectorType &getUsedSet() const { return Used; }
};
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/PassInfo.h b/contrib/llvm/include/llvm/PassInfo.h
index d107618..cee4ade 100644
--- a/contrib/llvm/include/llvm/PassInfo.h
+++ b/contrib/llvm/include/llvm/PassInfo.h
@@ -33,13 +33,13 @@ public:
typedef Pass *(*TargetMachineCtor_t)(TargetMachine *);
private:
- const char *const PassName; // Nice name for Pass
- const char *const PassArgument; // Command Line argument to run this pass
- const void *PassID;
- const bool IsCFGOnlyPass; // Pass only looks at the CFG.
- const bool IsAnalysis; // True if an analysis pass.
- const bool IsAnalysisGroup; // True if an analysis group.
- std::vector<const PassInfo*> ItfImpl;// Interfaces implemented by this pass
+ const char *const PassName; // Nice name for Pass
+ const char *const PassArgument; // Command Line argument to run this pass
+ const void *PassID;
+ const bool IsCFGOnlyPass; // Pass only looks at the CFG.
+ const bool IsAnalysis; // True if an analysis pass.
+ const bool IsAnalysisGroup; // True if an analysis group.
+ std::vector<const PassInfo *> ItfImpl; // Interfaces implemented by this pass
NormalCtor_t NormalCtor;
TargetMachineCtor_t TargetMachineCtor;
@@ -50,18 +50,16 @@ public:
PassInfo(const char *name, const char *arg, const void *pi,
NormalCtor_t normal, bool isCFGOnly, bool is_analysis,
TargetMachineCtor_t machine = nullptr)
- : PassName(name), PassArgument(arg), PassID(pi),
- IsCFGOnlyPass(isCFGOnly),
- IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal),
- TargetMachineCtor(machine) {}
+ : PassName(name), PassArgument(arg), PassID(pi), IsCFGOnlyPass(isCFGOnly),
+ IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal),
+ TargetMachineCtor(machine) {}
/// PassInfo ctor - Do not call this directly, this should only be invoked
/// through RegisterPass. This version is for use by analysis groups; it
/// does not auto-register the pass.
PassInfo(const char *name, const void *pi)
- : PassName(name), PassArgument(""), PassID(pi),
- IsCFGOnlyPass(false),
- IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr),
- TargetMachineCtor(nullptr) {}
+ : PassName(name), PassArgument(""), PassID(pi), IsCFGOnlyPass(false),
+ IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr),
+ TargetMachineCtor(nullptr) {}
/// getPassName - Return the friendly name for the pass, never returns null
///
@@ -78,10 +76,8 @@ public:
const void *getTypeInfo() const { return PassID; }
/// Return true if this PassID implements the specified ID pointer.
- bool isPassID(const void *IDPtr) const {
- return PassID == IDPtr;
- }
-
+ bool isPassID(const void *IDPtr) const { return PassID == IDPtr; }
+
/// isAnalysisGroup - Return true if this is an analysis group, not a normal
/// pass.
///
@@ -91,7 +87,7 @@ public:
/// isCFGOnlyPass - return true if this pass only looks at the CFG for the
/// function.
bool isCFGOnlyPass() const { return IsCFGOnlyPass; }
-
+
/// getNormalCtor - Return a pointer to a function, that when called, creates
/// an instance of the pass and returns it. This pointer may be null if there
/// is no default constructor for the pass.
diff --git a/contrib/llvm/include/llvm/PassRegistry.h b/contrib/llvm/include/llvm/PassRegistry.h
index 8c28ef5..e7fe1f5 100644
--- a/contrib/llvm/include/llvm/PassRegistry.h
+++ b/contrib/llvm/include/llvm/PassRegistry.h
@@ -17,7 +17,6 @@
#ifndef LLVM_PASSREGISTRY_H
#define LLVM_PASSREGISTRY_H
-#include "llvm-c/Core.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
diff --git a/contrib/llvm/include/llvm/PassSupport.h b/contrib/llvm/include/llvm/PassSupport.h
index 6cb6516..7c3d49f 100644
--- a/contrib/llvm/include/llvm/PassSupport.h
+++ b/contrib/llvm/include/llvm/PassSupport.h
@@ -26,7 +26,7 @@
#include "llvm/PassInfo.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/Atomic.h"
-#include "llvm/Support/Valgrind.h"
+#include "llvm/Support/Compiler.h"
#include <vector>
namespace llvm {
diff --git a/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h
index 3488e79..3790e13 100644
--- a/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h
+++ b/contrib/llvm/include/llvm/ProfileData/CoverageMapping.h
@@ -104,7 +104,7 @@ struct CounterExpression {
};
/// \brief A Counter expression builder is used to construct the
-/// counter expressions. It avoids unecessary duplication
+/// counter expressions. It avoids unnecessary duplication
/// and simplifies algebraic expressions.
class CounterExpressionBuilder {
/// \brief A list of all the counter expressions
@@ -236,7 +236,7 @@ class CounterMappingContext {
public:
CounterMappingContext(ArrayRef<CounterExpression> Expressions,
- ArrayRef<uint64_t> CounterValues = ArrayRef<uint64_t>())
+ ArrayRef<uint64_t> CounterValues = None)
: Expressions(Expressions), CounterValues(CounterValues) {}
void setCounts(ArrayRef<uint64_t> Counts) { CounterValues = Counts; }
@@ -443,7 +443,7 @@ public:
/// \brief Get the list of function instantiations in the file.
///
- /// Fucntions that are instantiated more than once, such as C++ template
+ /// Functions that are instantiated more than once, such as C++ template
/// specializations, have distinct coverage records for each instantiation.
std::vector<const FunctionRecord *> getInstantiations(StringRef Filename);
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
index 77055ba..4688759 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@@ -16,34 +16,310 @@
#ifndef LLVM_PROFILEDATA_INSTRPROF_H_
#define LLVM_PROFILEDATA_INSTRPROF_H_
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/ProfileData/InstrProfData.inc"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MD5.h"
#include <cstdint>
+#include <list>
#include <system_error>
#include <vector>
+#define INSTR_PROF_INDEX_VERSION 3
namespace llvm {
+
+class Function;
+class GlobalVariable;
+class Module;
+
+/// Return the name of data section containing profile counter variables.
+inline StringRef getInstrProfCountersSectionName(bool AddSegment) {
+ return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR
+ : INSTR_PROF_CNTS_SECT_NAME_STR;
+}
+
+/// Return the name of data section containing names of instrumented
+/// functions.
+inline StringRef getInstrProfNameSectionName(bool AddSegment) {
+ return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR
+ : INSTR_PROF_NAME_SECT_NAME_STR;
+}
+
+/// Return the name of the data section containing per-function control
+/// data.
+inline StringRef getInstrProfDataSectionName(bool AddSegment) {
+ return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR
+ : INSTR_PROF_DATA_SECT_NAME_STR;
+}
+
+/// Return the name profile runtime entry point to do value profiling
+/// for a given site.
+inline StringRef getInstrProfValueProfFuncName() {
+ return INSTR_PROF_VALUE_PROF_FUNC_STR;
+}
+
+/// Return the name of the section containing function coverage mapping
+/// data.
+inline StringRef getInstrProfCoverageSectionName(bool AddSegment) {
+ return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+}
+
+/// Return the name prefix of variables containing instrumented function names.
+inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
+
+/// Return the name prefix of variables containing per-function control data.
+inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
+
+/// Return the name prefix of profile counter variables.
+inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
+
+/// Return the name prefix of the COMDAT group for instrumentation variables
+/// associated with a COMDAT function.
+inline StringRef getInstrProfComdatPrefix() { return "__profv_"; }
+
+/// Return the name of a covarage mapping variable (internal linkage)
+/// for each instrumented source module. Such variables are allocated
+/// in the __llvm_covmap section.
+inline StringRef getCoverageMappingVarName() {
+ return "__llvm_coverage_mapping";
+}
+
+/// Return the name of function that registers all the per-function control
+/// data at program startup time by calling __llvm_register_function. This
+/// function has internal linkage and is called by __llvm_profile_init
+/// runtime method. This function is not generated for these platforms:
+/// Darwin, Linux, and FreeBSD.
+inline StringRef getInstrProfRegFuncsName() {
+ return "__llvm_profile_register_functions";
+}
+
+/// Return the name of the runtime interface that registers per-function control
+/// data for one instrumented function.
+inline StringRef getInstrProfRegFuncName() {
+ return "__llvm_profile_register_function";
+}
+
+/// Return the name of the runtime initialization method that is generated by
+/// the compiler. The function calls __llvm_profile_register_functions and
+/// __llvm_profile_override_default_filename functions if needed. This function
+/// has internal linkage and invoked at startup time via init_array.
+inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
+
+/// Return the name of the hook variable defined in profile runtime library.
+/// A reference to the variable causes the linker to link in the runtime
+/// initialization module (which defines the hook variable).
+inline StringRef getInstrProfRuntimeHookVarName() {
+ return "__llvm_profile_runtime";
+}
+
+/// Return the name of the compiler generated function that references the
+/// runtime hook variable. The function is a weak global.
+inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
+ return "__llvm_profile_runtime_user";
+}
+
+/// Return the name of the profile runtime interface that overrides the default
+/// profile data file name.
+inline StringRef getInstrProfFileOverriderFuncName() {
+ return "__llvm_profile_override_default_filename";
+}
+
+/// Return the modified name for function \c F suitable to be
+/// used the key for profile lookup.
+std::string getPGOFuncName(const Function &F,
+ uint64_t Version = INSTR_PROF_INDEX_VERSION);
+
+/// Return the modified name for a function suitable to be
+/// used the key for profile lookup. The function's original
+/// name is \c RawFuncName and has linkage of type \c Linkage.
+/// The function is defined in module \c FileName.
+std::string getPGOFuncName(StringRef RawFuncName,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef FileName,
+ uint64_t Version = INSTR_PROF_INDEX_VERSION);
+
+/// Create and return the global variable for function name used in PGO
+/// instrumentation. \c FuncName is the name of the function returned
+/// by \c getPGOFuncName call.
+GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName);
+
+/// Create and return the global variable for function name used in PGO
+/// instrumentation. /// \c FuncName is the name of the function
+/// returned by \c getPGOFuncName call, \c M is the owning module,
+/// and \c Linkage is the linkage of the instrumented function.
+GlobalVariable *createPGOFuncNameVar(Module &M,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef FuncName);
+
+/// Given a PGO function name, remove the filename prefix and return
+/// the original (static) function name.
+StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
+
const std::error_category &instrprof_category();
enum class instrprof_error {
- success = 0,
- eof,
- bad_magic,
- bad_header,
- unsupported_version,
- unsupported_hash_type,
- too_large,
- truncated,
- malformed,
- unknown_function,
- hash_mismatch,
- count_mismatch,
- counter_overflow
+ success = 0,
+ eof,
+ unrecognized_format,
+ bad_magic,
+ bad_header,
+ unsupported_version,
+ unsupported_hash_type,
+ too_large,
+ truncated,
+ malformed,
+ unknown_function,
+ hash_mismatch,
+ count_mismatch,
+ counter_overflow,
+ value_site_count_mismatch
};
inline std::error_code make_error_code(instrprof_error E) {
return std::error_code(static_cast<int>(E), instrprof_category());
}
+inline instrprof_error MergeResult(instrprof_error &Accumulator,
+ instrprof_error Result) {
+ // Prefer first error encountered as later errors may be secondary effects of
+ // the initial problem.
+ if (Accumulator == instrprof_error::success &&
+ Result != instrprof_error::success)
+ Accumulator = Result;
+ return Accumulator;
+}
+
+enum InstrProfValueKind : uint32_t {
+#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+namespace object {
+class SectionRef;
+}
+
+namespace IndexedInstrProf {
+uint64_t ComputeHash(StringRef K);
+}
+
+/// A symbol table used for function PGO name look-up with keys
+/// (such as pointers, md5hash values) to the function. A function's
+/// PGO name or name's md5hash are used in retrieving the profile
+/// data of the function. See \c getPGOFuncName() method for details
+/// on how PGO name is formed.
+class InstrProfSymtab {
+public:
+ typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap;
+
+private:
+ StringRef Data;
+ uint64_t Address;
+ // A map from MD5 hash keys to function name strings.
+ std::vector<std::pair<uint64_t, std::string>> HashNameMap;
+ // A map from function runtime address to function name MD5 hash.
+ // This map is only populated and used by raw instr profile reader.
+ AddrHashMap AddrToMD5Map;
+
+public:
+ InstrProfSymtab() : Data(), Address(0), HashNameMap(), AddrToMD5Map() {}
+
+ /// Create InstrProfSymtab from an object file section which
+ /// contains function PGO names that are uncompressed.
+ /// This interface is used by CoverageMappingReader.
+ std::error_code create(object::SectionRef &Section);
+ /// This interface is used by reader of CoverageMapping test
+ /// format.
+ inline std::error_code create(StringRef D, uint64_t BaseAddr);
+ /// Create InstrProfSymtab from a set of names iteratable from
+ /// \p IterRange. This interface is used by IndexedProfReader.
+ template <typename NameIterRange> void create(const NameIterRange &IterRange);
+ // If the symtab is created by a series of calls to \c addFuncName, \c
+ // finalizeSymtab needs to be called before looking up function names.
+ // This is required because the underlying map is a vector (for space
+ // efficiency) which needs to be sorted.
+ inline void finalizeSymtab();
+ /// Update the symtab by adding \p FuncName to the table. This interface
+ /// is used by the raw and text profile readers.
+ void addFuncName(StringRef FuncName) {
+ HashNameMap.push_back(std::make_pair(
+ IndexedInstrProf::ComputeHash(FuncName), FuncName.str()));
+ }
+ /// Map a function address to its name's MD5 hash. This interface
+ /// is only used by the raw profiler reader.
+ void mapAddress(uint64_t Addr, uint64_t MD5Val) {
+ AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
+ }
+ AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
+ /// Return function's PGO name from the function name's symabol
+ /// address in the object file. If an error occurs, Return
+ /// an empty string.
+ StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
+ /// Return function's PGO name from the name's md5 hash value.
+ /// If not found, return an empty string.
+ inline StringRef getFuncName(uint64_t FuncMD5Hash);
+};
+
+std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
+ Data = D;
+ Address = BaseAddr;
+ return std::error_code();
+}
+
+template <typename NameIterRange>
+void InstrProfSymtab::create(const NameIterRange &IterRange) {
+ for (auto Name : IterRange)
+ HashNameMap.push_back(
+ std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str()));
+ finalizeSymtab();
+}
+
+void InstrProfSymtab::finalizeSymtab() {
+ std::sort(HashNameMap.begin(), HashNameMap.end(), less_first());
+ HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()),
+ HashNameMap.end());
+ std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first());
+ AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
+ AddrToMD5Map.end());
+}
+
+StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
+ auto Result =
+ std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash,
+ [](const std::pair<uint64_t, std::string> &LHS,
+ uint64_t RHS) { return LHS.first < RHS; });
+ if (Result != HashNameMap.end())
+ return Result->second;
+ return StringRef();
+}
+
+struct InstrProfValueSiteRecord {
+ /// Value profiling data pairs at a given value site.
+ std::list<InstrProfValueData> ValueData;
+
+ InstrProfValueSiteRecord() { ValueData.clear(); }
+ template <class InputIterator>
+ InstrProfValueSiteRecord(InputIterator F, InputIterator L)
+ : ValueData(F, L) {}
+
+ /// Sort ValueData ascending by Value
+ void sortByTargetValues() {
+ ValueData.sort(
+ [](const InstrProfValueData &left, const InstrProfValueData &right) {
+ return left.Value < right.Value;
+ });
+ }
+
+ /// Merge data from another InstrProfValueSiteRecord
+ /// Optionally scale merged counts by \p Weight.
+ instrprof_error mergeValueData(InstrProfValueSiteRecord &Input,
+ uint64_t Weight = 1);
+};
+
/// Profiling information for a single function.
struct InstrProfRecord {
InstrProfRecord() {}
@@ -52,8 +328,258 @@ struct InstrProfRecord {
StringRef Name;
uint64_t Hash;
std::vector<uint64_t> Counts;
+
+ typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType;
+
+ /// Return the number of value profile kinds with non-zero number
+ /// of profile sites.
+ inline uint32_t getNumValueKinds() const;
+ /// Return the number of instrumented sites for ValueKind.
+ inline uint32_t getNumValueSites(uint32_t ValueKind) const;
+ /// Return the total number of ValueData for ValueKind.
+ inline uint32_t getNumValueData(uint32_t ValueKind) const;
+ /// Return the number of value data collected for ValueKind at profiling
+ /// site: Site.
+ inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
+ uint32_t Site) const;
+ /// Return the array of profiled values at \p Site.
+ inline std::unique_ptr<InstrProfValueData[]>
+ getValueForSite(uint32_t ValueKind, uint32_t Site,
+ uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const;
+ inline void
+ getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, uint32_t Site,
+ uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const;
+ /// Reserve space for NumValueSites sites.
+ inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
+ /// Add ValueData for ValueKind at value Site.
+ void addValueData(uint32_t ValueKind, uint32_t Site,
+ InstrProfValueData *VData, uint32_t N,
+ ValueMapType *ValueMap);
+
+ /// Merge the counts in \p Other into this one.
+ /// Optionally scale merged counts by \p Weight.
+ instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1);
+
+ /// Clear value data entries
+ void clearValueData() {
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ getValueSitesForKind(Kind).clear();
+ }
+
+private:
+ std::vector<InstrProfValueSiteRecord> IndirectCallSites;
+ const std::vector<InstrProfValueSiteRecord> &
+ getValueSitesForKind(uint32_t ValueKind) const {
+ switch (ValueKind) {
+ case IPVK_IndirectCallTarget:
+ return IndirectCallSites;
+ default:
+ llvm_unreachable("Unknown value kind!");
+ }
+ return IndirectCallSites;
+ }
+
+ std::vector<InstrProfValueSiteRecord> &
+ getValueSitesForKind(uint32_t ValueKind) {
+ return const_cast<std::vector<InstrProfValueSiteRecord> &>(
+ const_cast<const InstrProfRecord *>(this)
+ ->getValueSitesForKind(ValueKind));
+ }
+
+ // Map indirect call target name hash to name string.
+ uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
+ ValueMapType *HashKeys);
+
+ // Merge Value Profile data from Src record to this record for ValueKind.
+ // Scale merged value counts by \p Weight.
+ instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
+ uint64_t Weight);
+};
+
+uint32_t InstrProfRecord::getNumValueKinds() const {
+ uint32_t NumValueKinds = 0;
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ NumValueKinds += !(getValueSitesForKind(Kind).empty());
+ return NumValueKinds;
+}
+
+uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
+ uint32_t N = 0;
+ const std::vector<InstrProfValueSiteRecord> &SiteRecords =
+ getValueSitesForKind(ValueKind);
+ for (auto &SR : SiteRecords) {
+ N += SR.ValueData.size();
+ }
+ return N;
+}
+
+uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
+ return getValueSitesForKind(ValueKind).size();
+}
+
+uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
+ uint32_t Site) const {
+ return getValueSitesForKind(ValueKind)[Site].ValueData.size();
+}
+
+std::unique_ptr<InstrProfValueData[]> InstrProfRecord::getValueForSite(
+ uint32_t ValueKind, uint32_t Site,
+ uint64_t (*ValueMapper)(uint32_t, uint64_t)) const {
+ uint32_t N = getNumValueDataForSite(ValueKind, Site);
+ if (N == 0)
+ return std::unique_ptr<InstrProfValueData[]>(nullptr);
+
+ auto VD = llvm::make_unique<InstrProfValueData[]>(N);
+ getValueForSite(VD.get(), ValueKind, Site, ValueMapper);
+
+ return VD;
+}
+
+void InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
+ uint32_t ValueKind, uint32_t Site,
+ uint64_t (*ValueMapper)(uint32_t,
+ uint64_t)) const {
+ uint32_t I = 0;
+ for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
+ Dest[I].Value = ValueMapper ? ValueMapper(ValueKind, V.Value) : V.Value;
+ Dest[I].Count = V.Count;
+ I++;
+ }
+}
+
+void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
+ std::vector<InstrProfValueSiteRecord> &ValueSites =
+ getValueSitesForKind(ValueKind);
+ ValueSites.reserve(NumValueSites);
+}
+
+inline support::endianness getHostEndianness() {
+ return sys::IsLittleEndianHost ? support::little : support::big;
+}
+
+// Include definitions for value profile data
+#define INSTR_PROF_VALUE_PROF_DATA
+#include "llvm/ProfileData/InstrProfData.inc"
+
+ /*
+ * Initialize the record for runtime value profile data.
+ * Return 0 if the initialization is successful, otherwise
+ * return 1.
+ */
+int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
+ const uint16_t *NumValueSites,
+ ValueProfNode **Nodes);
+
+/* Release memory allocated for the runtime record. */
+void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord);
+
+/* Return the size of ValueProfData structure that can be used to store
+ the value profile data collected at runtime. */
+uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record);
+
+/* Return a ValueProfData instance that stores the data collected at runtime. */
+ValueProfData *
+serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
+ ValueProfData *Dst);
+
+namespace IndexedInstrProf {
+
+enum class HashT : uint32_t {
+ MD5,
+
+ Last = MD5
+};
+
+static inline uint64_t MD5Hash(StringRef Str) {
+ MD5 Hash;
+ Hash.update(Str);
+ llvm::MD5::MD5Result Result;
+ Hash.final(Result);
+ // Return the least significant 8 bytes. Our MD5 implementation returns the
+ // result in little endian, so we may need to swap bytes.
+ using namespace llvm::support;
+ return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+inline uint64_t ComputeHash(HashT Type, StringRef K) {
+ switch (Type) {
+ case HashT::MD5:
+ return IndexedInstrProf::MD5Hash(K);
+ }
+ llvm_unreachable("Unhandled hash type");
+}
+
+const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
+const uint64_t Version = INSTR_PROF_INDEX_VERSION;
+const HashT HashType = HashT::MD5;
+
+inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
+
+// This structure defines the file header of the LLVM profile
+// data file in indexed-format.
+struct Header {
+ uint64_t Magic;
+ uint64_t Version;
+ uint64_t MaxFunctionCount;
+ uint64_t HashType;
+ uint64_t HashOffset;
+};
+
+} // end namespace IndexedInstrProf
+
+namespace RawInstrProf {
+
+const uint64_t Version = INSTR_PROF_RAW_VERSION;
+
+template <class IntPtrT> inline uint64_t getMagic();
+template <> inline uint64_t getMagic<uint64_t>() {
+ return INSTR_PROF_RAW_MAGIC_64;
+}
+
+template <> inline uint64_t getMagic<uint32_t>() {
+ return INSTR_PROF_RAW_MAGIC_32;
+}
+
+// Per-function profile data header/control structure.
+// The definition should match the structure defined in
+// compiler-rt/lib/profile/InstrProfiling.h.
+// It should also match the synthesized type in
+// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
+template <class IntPtrT> struct LLVM_ALIGNAS(8) ProfileData {
+ #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
+ #include "llvm/ProfileData/InstrProfData.inc"
};
+// File header structure of the LLVM profile data in raw format.
+// The definition should match the header referenced in
+// compiler-rt/lib/profile/InstrProfilingFile.c and
+// InstrProfilingBuffer.c.
+struct Header {
+#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+} // end namespace RawInstrProf
+
+namespace coverage {
+
+// Profile coverage map has the following layout:
+// [CoverageMapFileHeader]
+// [ArrayStart]
+// [CovMapFunctionRecord]
+// [CovMapFunctionRecord]
+// ...
+// [ArrayEnd]
+// [Encoded Region Mapping Data]
+LLVM_PACKED_START
+template <class IntPtrT> struct CovMapFunctionRecord {
+ #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
+ #include "llvm/ProfileData/InstrProfData.inc"
+};
+LLVM_PACKED_END
+
+}
+
} // end namespace llvm
namespace std {
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
new file mode 100644
index 0000000..48dae50
--- /dev/null
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -0,0 +1,735 @@
+/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\
+|*
+|* The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This is the master file that defines all the data structure, signature,
+ * constant literals that are shared across profiling runtime library,
+ * compiler (instrumentation), and host tools (reader/writer). The entities
+ * defined in this file affect the profile runtime ABI, the raw profile format,
+ * or both.
+ *
+ * The file has two identical copies. The master copy lives in LLVM and
+ * the other one sits in compiler-rt/lib/profile directory. To make changes
+ * in this file, first modify the master copy and copy it over to compiler-rt.
+ * Testing of any change in this file can start only after the two copies are
+ * synced up.
+ *
+ * The first part of the file includes macros that defines types, names, and
+ * initializers for the member fields of the core data structures. The field
+ * declarations for one structure is enabled by defining the field activation
+ * macro associated with that structure. Only one field activation record
+ * can be defined at one time and the rest definitions will be filtered out by
+ * the preprocessor.
+ *
+ * Examples of how the template is used to instantiate structure definition:
+ * 1. To declare a structure:
+ *
+ * struct ProfData {
+ * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ * Type Name;
+ * #include "llvm/ProfileData/InstrProfData.inc"
+ * };
+ *
+ * 2. To construct LLVM type arrays for the struct type:
+ *
+ * Type *DataTypes[] = {
+ * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ * LLVMType,
+ * #include "llvm/ProfileData/InstrProfData.inc"
+ * };
+ *
+ * 4. To construct constant array for the initializers:
+ * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ * Initializer,
+ * Constant *ConstantVals[] = {
+ * #include "llvm/ProfileData/InstrProfData.inc"
+ * };
+ *
+ *
+ * The second part of the file includes definitions all other entities that
+ * are related to runtime ABI and format. When no field activation macro is
+ * defined, this file can be included to introduce the definitions.
+ *
+\*===----------------------------------------------------------------------===*/
+
+/* INSTR_PROF_DATA start. */
+/* Definition of member fields of the per-function control structure. */
+#ifndef INSTR_PROF_DATA
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+
+INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \
+ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \
+ NamePtr->getType()->getPointerElementType()->getArrayNumElements()))
+INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \
+ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters))
+INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
+ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
+ Inc->getHash()->getZExtValue()))
+INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), NamePtr, \
+ ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
+ ConstantExpr::getBitCast(CounterPtr, \
+ llvm::Type::getInt64PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \
+ FunctionAddr)
+INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \
+ ConstantPointerNull::get(Int8PtrTy))
+INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
+ ConstantArray::get(Int16ArrayTy, Int16ArrayVals))
+#undef INSTR_PROF_DATA
+/* INSTR_PROF_DATA end. */
+
+/* INSTR_PROF_RAW_HEADER start */
+/* Definition of member fields of the raw profile header data structure. */
+#ifndef INSTR_PROF_RAW_HEADER
+#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
+INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
+INSTR_PROF_RAW_HEADER(uint64_t, ValueDataSize, ValueDataSize)
+INSTR_PROF_RAW_HEADER(uint64_t, ValueDataDelta, (uintptr_t)ValueDataBegin)
+#undef INSTR_PROF_RAW_HEADER
+/* INSTR_PROF_RAW_HEADER end */
+
+/* VALUE_PROF_FUNC_PARAM start */
+/* Definition of parameter types of the runtime API used to do value profiling
+ * for a given value site.
+ */
+#ifndef VALUE_PROF_FUNC_PARAM
+#define VALUE_PROF_FUNC_PARAM(ArgType, ArgName, ArgLLVMType)
+#define INSTR_PROF_COMMA
+#else
+#define INSTR_PROF_DATA_DEFINED
+#define INSTR_PROF_COMMA ,
+#endif
+VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \
+ INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
+#undef VALUE_PROF_FUNC_PARAM
+#undef INSTR_PROF_COMMA
+/* VALUE_PROF_FUNC_PARAM end */
+
+/* VALUE_PROF_KIND start */
+#ifndef VALUE_PROF_KIND
+#define VALUE_PROF_KIND(Enumerator, Value)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0)
+/* These two kinds must be the last to be
+ * declared. This is to make sure the string
+ * array created with the template can be
+ * indexed with the kind value.
+ */
+VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget)
+VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget)
+
+#undef VALUE_PROF_KIND
+/* VALUE_PROF_KIND end */
+
+/* COVMAP_FUNC_RECORD start */
+/* Definition of member fields of the function record structure in coverage
+ * map.
+ */
+#ifndef COVMAP_FUNC_RECORD
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \
+ NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \
+ llvm::Type::getInt8PtrTy(Ctx)))
+COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
+ NameValue.size()))
+COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
+ CoverageMapping.size()))
+COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
+ llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), FuncHash))
+#undef COVMAP_FUNC_RECORD
+/* COVMAP_FUNC_RECORD end. */
+
+
+#ifdef INSTR_PROF_VALUE_PROF_DATA
+#define INSTR_PROF_DATA_DEFINED
+
+/*!
+ * This is the header of the data structure that defines the on-disk
+ * layout of the value profile data of a particular kind for one function.
+ */
+typedef struct ValueProfRecord {
+ /* The kind of the value profile record. */
+ uint32_t Kind;
+ /*
+ * The number of value profile sites. It is guaranteed to be non-zero;
+ * otherwise the record for this kind won't be emitted.
+ */
+ uint32_t NumValueSites;
+ /*
+ * The first element of the array that stores the number of profiled
+ * values for each value site. The size of the array is NumValueSites.
+ * Since NumValueSites is greater than zero, there is at least one
+ * element in the array.
+ */
+ uint8_t SiteCountArray[1];
+
+ /*
+ * The fake declaration is for documentation purpose only.
+ * Align the start of next field to be on 8 byte boundaries.
+ uint8_t Padding[X];
+ */
+
+ /* The array of value profile data. The size of the array is the sum
+ * of all elements in SiteCountArray[].
+ InstrProfValueData ValueData[];
+ */
+
+#ifdef __cplusplus
+ /*!
+ * \brief Return the number of value sites.
+ */
+ uint32_t getNumValueSites() const { return NumValueSites; }
+ /*!
+ * \brief Read data from this record and save it to Record.
+ */
+ void deserializeTo(InstrProfRecord &Record,
+ InstrProfRecord::ValueMapType *VMap);
+ /*
+ * In-place byte swap:
+ * Do byte swap for this instance. \c Old is the original order before
+ * the swap, and \c New is the New byte order.
+ */
+ void swapBytes(support::endianness Old, support::endianness New);
+#endif
+} ValueProfRecord;
+
+/*!
+ * Per-function header/control data structure for value profiling
+ * data in indexed format.
+ */
+typedef struct ValueProfData {
+ /*
+ * Total size in bytes including this field. It must be a multiple
+ * of sizeof(uint64_t).
+ */
+ uint32_t TotalSize;
+ /*
+ *The number of value profile kinds that has value profile data.
+ * In this implementation, a value profile kind is considered to
+ * have profile data if the number of value profile sites for the
+ * kind is not zero. More aggressively, the implementation can
+ * choose to check the actual data value: if none of the value sites
+ * has any profiled values, the kind can be skipped.
+ */
+ uint32_t NumValueKinds;
+
+ /*
+ * Following are a sequence of variable length records. The prefix/header
+ * of each record is defined by ValueProfRecord type. The number of
+ * records is NumValueKinds.
+ * ValueProfRecord Record_1;
+ * ValueProfRecord Record_N;
+ */
+
+#if __cplusplus
+ /*!
+ * Return the total size in bytes of the on-disk value profile data
+ * given the data stored in Record.
+ */
+ static uint32_t getSize(const InstrProfRecord &Record);
+ /*!
+ * Return a pointer to \c ValueProfData instance ready to be streamed.
+ */
+ static std::unique_ptr<ValueProfData>
+ serializeFrom(const InstrProfRecord &Record);
+ /*!
+ * Check the integrity of the record. Return the error code when
+ * an error is detected, otherwise return instrprof_error::success.
+ */
+ instrprof_error checkIntegrity();
+ /*!
+ * Return a pointer to \c ValueProfileData instance ready to be read.
+ * All data in the instance are properly byte swapped. The input
+ * data is assumed to be in little endian order.
+ */
+ static ErrorOr<std::unique_ptr<ValueProfData>>
+ getValueProfData(const unsigned char *SrcBuffer,
+ const unsigned char *const SrcBufferEnd,
+ support::endianness SrcDataEndianness);
+ /*!
+ * Swap byte order from \c Endianness order to host byte order.
+ */
+ void swapBytesToHost(support::endianness Endianness);
+ /*!
+ * Swap byte order from host byte order to \c Endianness order.
+ */
+ void swapBytesFromHost(support::endianness Endianness);
+ /*!
+ * Return the total size of \c ValueProfileData.
+ */
+ uint32_t getSize() const { return TotalSize; }
+ /*!
+ * Read data from this data and save it to \c Record.
+ */
+ void deserializeTo(InstrProfRecord &Record,
+ InstrProfRecord::ValueMapType *VMap);
+ void operator delete(void *ptr) { ::operator delete(ptr); }
+#endif
+} ValueProfData;
+
+/*
+ * The closure is designed to abstact away two types of value profile data:
+ * - InstrProfRecord which is the primary data structure used to
+ * represent profile data in host tools (reader, writer, and profile-use)
+ * - value profile runtime data structure suitable to be used by C
+ * runtime library.
+ *
+ * Both sources of data need to serialize to disk/memory-buffer in common
+ * format: ValueProfData. The abstraction allows compiler-rt's raw profiler
+ * writer to share the same format and code with indexed profile writer.
+ *
+ * For documentation of the member methods below, refer to corresponding methods
+ * in class InstrProfRecord.
+ */
+typedef struct ValueProfRecordClosure {
+ const void *Record;
+ uint32_t (*GetNumValueKinds)(const void *Record);
+ uint32_t (*GetNumValueSites)(const void *Record, uint32_t VKind);
+ uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind);
+ uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S);
+
+ /*
+ * After extracting the value profile data from the value profile record,
+ * this method is used to map the in-memory value to on-disk value. If
+ * the method is null, value will be written out untranslated.
+ */
+ uint64_t (*RemapValueData)(uint32_t, uint64_t Value);
+ void (*GetValueForSite)(const void *R, InstrProfValueData *Dst, uint32_t K,
+ uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t));
+ ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes);
+} ValueProfRecordClosure;
+
+/*
+ * A wrapper struct that represents value profile runtime data.
+ * Like InstrProfRecord class which is used by profiling host tools,
+ * ValueProfRuntimeRecord also implements the abstract intefaces defined in
+ * ValueProfRecordClosure so that the runtime data can be serialized using
+ * shared C implementation. In this structure, NumValueSites and Nodes
+ * members are the primary fields while other fields hold the derived
+ * information for fast implementation of closure interfaces.
+ */
+typedef struct ValueProfRuntimeRecord {
+ /* Number of sites for each value profile kind. */
+ const uint16_t *NumValueSites;
+ /* An array of linked-list headers. The size of of the array is the
+ * total number of value profile sites : sum(NumValueSites[*])). Each
+ * linked-list stores the values profiled for a value profile site. */
+ ValueProfNode **Nodes;
+
+ /* Total number of value profile kinds which have at least one
+ * value profile sites. */
+ uint32_t NumValueKinds;
+ /* An array recording the number of values tracked at each site.
+ * The size of the array is TotalNumValueSites. */
+ uint8_t *SiteCountArray[IPVK_Last + 1];
+ ValueProfNode **NodesKind[IPVK_Last + 1];
+} ValueProfRuntimeRecord;
+
+/* Forward declarations of C interfaces. */
+int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
+ const uint16_t *NumValueSites,
+ ValueProfNode **Nodes);
+void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord);
+uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record);
+ValueProfData *
+serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
+ ValueProfData *Dst);
+uint32_t getNumValueKindsRT(const void *R);
+
+#undef INSTR_PROF_VALUE_PROF_DATA
+#endif /* INSTR_PROF_VALUE_PROF_DATA */
+
+
+#ifdef INSTR_PROF_COMMON_API_IMPL
+#define INSTR_PROF_DATA_DEFINED
+#ifdef __cplusplus
+#define INSTR_PROF_INLINE inline
+#else
+#define INSTR_PROF_INLINE
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+/*!
+ * \brief Return the \c ValueProfRecord header size including the
+ * padding bytes.
+ */
+INSTR_PROF_INLINE
+uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) {
+ uint32_t Size = offsetof(ValueProfRecord, SiteCountArray) +
+ sizeof(uint8_t) * NumValueSites;
+ /* Round the size to multiple of 8 bytes. */
+ Size = (Size + 7) & ~7;
+ return Size;
+}
+
+/*!
+ * \brief Return the total size of the value profile record including the
+ * header and the value data.
+ */
+INSTR_PROF_INLINE
+uint32_t getValueProfRecordSize(uint32_t NumValueSites,
+ uint32_t NumValueData) {
+ return getValueProfRecordHeaderSize(NumValueSites) +
+ sizeof(InstrProfValueData) * NumValueData;
+}
+
+/*!
+ * \brief Return the pointer to the start of value data array.
+ */
+INSTR_PROF_INLINE
+InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) {
+ return (InstrProfValueData *)((char *)This + getValueProfRecordHeaderSize(
+ This->NumValueSites));
+}
+
+/*!
+ * \brief Return the total number of value data for \c This record.
+ */
+INSTR_PROF_INLINE
+uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) {
+ uint32_t NumValueData = 0;
+ uint32_t I;
+ for (I = 0; I < This->NumValueSites; I++)
+ NumValueData += This->SiteCountArray[I];
+ return NumValueData;
+}
+
+/*!
+ * \brief Use this method to advance to the next \c This \c ValueProfRecord.
+ */
+INSTR_PROF_INLINE
+ValueProfRecord *getValueProfRecordNext(ValueProfRecord *This) {
+ uint32_t NumValueData = getValueProfRecordNumValueData(This);
+ return (ValueProfRecord *)((char *)This +
+ getValueProfRecordSize(This->NumValueSites,
+ NumValueData));
+}
+
+/*!
+ * \brief Return the first \c ValueProfRecord instance.
+ */
+INSTR_PROF_INLINE
+ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) {
+ return (ValueProfRecord *)((char *)This + sizeof(ValueProfData));
+}
+
+/* Closure based interfaces. */
+
+/*!
+ * Return the total size in bytes of the on-disk value profile data
+ * given the data stored in Record.
+ */
+uint32_t getValueProfDataSize(ValueProfRecordClosure *Closure) {
+ uint32_t Kind;
+ uint32_t TotalSize = sizeof(ValueProfData);
+ const void *Record = Closure->Record;
+ uint32_t NumValueKinds = Closure->GetNumValueKinds(Record);
+ if (NumValueKinds == 0)
+ return TotalSize;
+
+ for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) {
+ uint32_t NumValueSites = Closure->GetNumValueSites(Record, Kind);
+ if (!NumValueSites)
+ continue;
+ TotalSize += getValueProfRecordSize(NumValueSites,
+ Closure->GetNumValueData(Record, Kind));
+ }
+ return TotalSize;
+}
+
+/*!
+ * Extract value profile data of a function for the profile kind \c ValueKind
+ * from the \c Closure and serialize the data into \c This record instance.
+ */
+void serializeValueProfRecordFrom(ValueProfRecord *This,
+ ValueProfRecordClosure *Closure,
+ uint32_t ValueKind, uint32_t NumValueSites) {
+ uint32_t S;
+ const void *Record = Closure->Record;
+ This->Kind = ValueKind;
+ This->NumValueSites = NumValueSites;
+ InstrProfValueData *DstVD = getValueProfRecordValueData(This);
+
+ for (S = 0; S < NumValueSites; S++) {
+ uint32_t ND = Closure->GetNumValueDataForSite(Record, ValueKind, S);
+ This->SiteCountArray[S] = ND;
+ Closure->GetValueForSite(Record, DstVD, ValueKind, S,
+ Closure->RemapValueData);
+ DstVD += ND;
+ }
+}
+
+/*!
+ * Extract value profile data of a function from the \c Closure
+ * and serialize the data into \c DstData if it is not NULL or heap
+ * memory allocated by the \c Closure's allocator method.
+ */
+ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
+ ValueProfData *DstData) {
+ uint32_t Kind;
+ uint32_t TotalSize = getValueProfDataSize(Closure);
+
+ ValueProfData *VPD =
+ DstData ? DstData : Closure->AllocValueProfData(TotalSize);
+
+ VPD->TotalSize = TotalSize;
+ VPD->NumValueKinds = Closure->GetNumValueKinds(Closure->Record);
+ ValueProfRecord *VR = getFirstValueProfRecord(VPD);
+ for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) {
+ uint32_t NumValueSites = Closure->GetNumValueSites(Closure->Record, Kind);
+ if (!NumValueSites)
+ continue;
+ serializeValueProfRecordFrom(VR, Closure, Kind, NumValueSites);
+ VR = getValueProfRecordNext(VR);
+ }
+ return VPD;
+}
+
+/*
+ * The value profiler runtime library stores the value profile data
+ * for a given function in \c NumValueSites and \c Nodes structures.
+ * \c ValueProfRuntimeRecord class is used to encapsulate the runtime
+ * profile data and provides fast interfaces to retrieve the profile
+ * information. This interface is used to initialize the runtime record
+ * and pre-compute the information needed for efficient implementation
+ * of callbacks required by ValueProfRecordClosure class.
+ */
+int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
+ const uint16_t *NumValueSites,
+ ValueProfNode **Nodes) {
+ unsigned I, J, S = 0, NumValueKinds = 0;
+ RuntimeRecord->NumValueSites = NumValueSites;
+ RuntimeRecord->Nodes = Nodes;
+ for (I = 0; I <= IPVK_Last; I++) {
+ uint16_t N = NumValueSites[I];
+ if (!N) {
+ RuntimeRecord->SiteCountArray[I] = 0;
+ continue;
+ }
+ NumValueKinds++;
+ RuntimeRecord->SiteCountArray[I] = (uint8_t *)calloc(N, 1);
+ if (!RuntimeRecord->SiteCountArray[I])
+ return 1;
+ RuntimeRecord->NodesKind[I] = Nodes ? &Nodes[S] : NULL;
+ for (J = 0; J < N; J++) {
+ /* Compute value count for each site. */
+ uint32_t C = 0;
+ ValueProfNode *Site = Nodes ? RuntimeRecord->NodesKind[I][J] : NULL;
+ while (Site) {
+ C++;
+ Site = Site->Next;
+ }
+ if (C > UCHAR_MAX)
+ C = UCHAR_MAX;
+ RuntimeRecord->SiteCountArray[I][J] = C;
+ }
+ S += N;
+ }
+ RuntimeRecord->NumValueKinds = NumValueKinds;
+ return 0;
+}
+
+void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord) {
+ unsigned I;
+ for (I = 0; I <= IPVK_Last; I++) {
+ if (RuntimeRecord->SiteCountArray[I])
+ free(RuntimeRecord->SiteCountArray[I]);
+ }
+}
+
+/* ValueProfRecordClosure Interface implementation for
+ * ValueProfDataRuntimeRecord. */
+uint32_t getNumValueKindsRT(const void *R) {
+ return ((const ValueProfRuntimeRecord *)R)->NumValueKinds;
+}
+
+uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
+ return ((const ValueProfRuntimeRecord *)R)->NumValueSites[VK];
+}
+
+uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, uint32_t S) {
+ const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+ return Record->SiteCountArray[VK][S];
+}
+
+uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
+ unsigned I, S = 0;
+ const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+ if (Record->SiteCountArray[VK] == 0)
+ return 0;
+ for (I = 0; I < Record->NumValueSites[VK]; I++)
+ S += Record->SiteCountArray[VK][I];
+ return S;
+}
+
+void getValueForSiteRT(const void *R, InstrProfValueData *Dst, uint32_t VK,
+ uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t)) {
+ unsigned I, N = 0;
+ const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+ N = getNumValueDataForSiteRT(R, VK, S);
+ if (N == 0)
+ return;
+ ValueProfNode *VNode = Record->NodesKind[VK][S];
+ for (I = 0; I < N; I++) {
+ Dst[I] = VNode->VData;
+ VNode = VNode->Next;
+ }
+}
+
+ValueProfData *allocValueProfDataRT(size_t TotalSizeInBytes) {
+ return (ValueProfData *)calloc(TotalSizeInBytes, 1);
+}
+
+static ValueProfRecordClosure RTRecordClosure = {0,
+ getNumValueKindsRT,
+ getNumValueSitesRT,
+ getNumValueDataRT,
+ getNumValueDataForSiteRT,
+ 0,
+ getValueForSiteRT,
+ allocValueProfDataRT};
+
+/*
+ * Return the size of ValueProfData structure to store data
+ * recorded in the runtime record.
+ */
+uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) {
+ RTRecordClosure.Record = Record;
+ return getValueProfDataSize(&RTRecordClosure);
+}
+
+/*
+ * Return a ValueProfData instance that stores the data collected
+ * from runtime. If \c DstData is provided by the caller, the value
+ * profile data will be store in *DstData and DstData is returned,
+ * otherwise the method will allocate space for the value data and
+ * return pointer to the newly allocated space.
+ */
+ValueProfData *
+serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
+ ValueProfData *DstData) {
+ RTRecordClosure.Record = Record;
+ return serializeValueProfDataFrom(&RTRecordClosure, DstData);
+}
+
+
+#undef INSTR_PROF_COMMON_API_IMPL
+#endif /* INSTR_PROF_COMMON_API_IMPL */
+
+/*============================================================================*/
+
+
+#ifndef INSTR_PROF_DATA_DEFINED
+
+#ifndef INSTR_PROF_DATA_INC_
+#define INSTR_PROF_DATA_INC_
+
+/* Helper macros. */
+#define INSTR_PROF_SIMPLE_QUOTE(x) #x
+#define INSTR_PROF_QUOTE(x) INSTR_PROF_SIMPLE_QUOTE(x)
+#define INSTR_PROF_SIMPLE_CONCAT(x,y) x ## y
+#define INSTR_PROF_CONCAT(x,y) INSTR_PROF_SIMPLE_CONCAT(x,y)
+
+/* Magic number to detect file format and endianness.
+ * Use 255 at one end, since no UTF-8 file can use that character. Avoid 0,
+ * so that utilities, like strings, don't grab it as a string. 129 is also
+ * invalid UTF-8, and high enough to be interesting.
+ * Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR"
+ * for 32-bit platforms.
+ */
+#define INSTR_PROF_RAW_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
+ (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \
+ (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129
+#define INSTR_PROF_RAW_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
+ (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \
+ (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
+
+/* Raw profile format version. */
+#define INSTR_PROF_RAW_VERSION 2
+
+/* Runtime section names and name strings. */
+#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
+#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
+#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
+
+#define INSTR_PROF_DATA_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
+#define INSTR_PROF_NAME_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
+#define INSTR_PROF_CNTS_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
+
+/* Macros to define start/stop section symbol for a given
+ * section on Linux. For instance
+ * INSTR_PROF_SECT_START(INSTR_PROF_DATA_SECT_NAME) will
+ * expand to __start___llvm_prof_data
+ */
+#define INSTR_PROF_SECT_START(Sect) \
+ INSTR_PROF_CONCAT(__start_,Sect)
+#define INSTR_PROF_SECT_STOP(Sect) \
+ INSTR_PROF_CONCAT(__stop_,Sect)
+
+/* Value Profiling API linkage name. */
+#define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
+#define INSTR_PROF_VALUE_PROF_FUNC_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
+
+/* InstrProfile per-function control data alignment. */
+#define INSTR_PROF_DATA_ALIGNMENT 8
+
+/* The data structure that represents a tracked value by the
+ * value profiler.
+ */
+typedef struct InstrProfValueData {
+ /* Profiled value. */
+ uint64_t Value;
+ /* Number of times the value appears in the training run. */
+ uint64_t Count;
+} InstrProfValueData;
+
+/* This is an internal data structure used by value profiler. It
+ * is defined here to allow serialization code sharing by LLVM
+ * to be used in unit test.
+ */
+typedef struct ValueProfNode {
+ InstrProfValueData VData;
+ struct ValueProfNode *Next;
+} ValueProfNode;
+
+#endif /* INSTR_PROF_DATA_INC_ */
+
+#else
+#undef INSTR_PROF_DATA_DEFINED
+#endif
+
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h b/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
index f937e7d..fed3e69 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -23,6 +23,7 @@
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/OnDiskHashTable.h"
+#include "llvm/Support/raw_ostream.h"
#include <iterator>
namespace llvm {
@@ -53,7 +54,7 @@ class InstrProfReader {
std::error_code LastError;
public:
- InstrProfReader() : LastError(instrprof_error::success) {}
+ InstrProfReader() : LastError(instrprof_error::success), Symtab() {}
virtual ~InstrProfReader() {}
/// Read the header. Required before reading first record.
@@ -64,7 +65,20 @@ public:
InstrProfIterator begin() { return InstrProfIterator(this); }
InstrProfIterator end() { return InstrProfIterator(); }
+ /// Return the PGO symtab. There are three different readers:
+ /// Raw, Text, and Indexed profile readers. The first two types
+ /// of readers are used only by llvm-profdata tool, while the indexed
+ /// profile reader is also used by llvm-cov tool and the compiler (
+ /// backend or frontend). Since creating PGO symtab can create
+ /// significant runtime and memory overhead (as it touches data
+ /// for the whole program), InstrProfSymtab for the indexed profile
+ /// reader should be created on demand and it is recommended to be
+ /// only used for dumping purpose with llvm-proftool, not with the
+ /// compiler.
+ virtual InstrProfSymtab &getSymtab() = 0;
+
protected:
+ std::unique_ptr<InstrProfSymtab> Symtab;
/// Set the current std::error_code and return same.
std::error_code error(std::error_code EC) {
LastError = EC;
@@ -107,14 +121,24 @@ private:
TextInstrProfReader(const TextInstrProfReader &) = delete;
TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
+ std::error_code readValueProfileData(InstrProfRecord &Record);
+
public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
: DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
+ /// Return true if the given buffer is in text instrprof format.
+ static bool hasFormat(const MemoryBuffer &Buffer);
+
/// Read the header.
- std::error_code readHeader() override { return success(); }
+ std::error_code readHeader() override;
/// Read a single record.
std::error_code readNextRecord(InstrProfRecord &Record) override;
+
+ InstrProfSymtab &getSymtab() override {
+ assert(Symtab.get());
+ return *Symtab.get();
+ }
};
/// Reader for the raw instrprof binary format from runtime.
@@ -129,31 +153,19 @@ class RawInstrProfReader : public InstrProfReader {
private:
/// The profile data file contents.
std::unique_ptr<MemoryBuffer> DataBuffer;
- struct ProfileData {
- const uint32_t NameSize;
- const uint32_t NumCounters;
- const uint64_t FuncHash;
- const IntPtrT NamePtr;
- const IntPtrT CounterPtr;
- };
- struct RawHeader {
- const uint64_t Magic;
- const uint64_t Version;
- const uint64_t DataSize;
- const uint64_t CountersSize;
- const uint64_t NamesSize;
- const uint64_t CountersDelta;
- const uint64_t NamesDelta;
- };
-
bool ShouldSwapBytes;
uint64_t CountersDelta;
uint64_t NamesDelta;
- const ProfileData *Data;
- const ProfileData *DataEnd;
+ const RawInstrProf::ProfileData<IntPtrT> *Data;
+ const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
const uint64_t *CountersStart;
const char *NamesStart;
+ const uint8_t *ValueDataStart;
const char *ProfileEnd;
+ uint32_t ValueKindLast;
+ uint32_t CurValueDataSize;
+
+ InstrProfRecord::ValueMapType FunctionPtrToNameMap;
RawInstrProfReader(const RawInstrProfReader &) = delete;
RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
@@ -165,13 +177,41 @@ public:
std::error_code readHeader() override;
std::error_code readNextRecord(InstrProfRecord &Record) override;
+ InstrProfSymtab &getSymtab() override {
+ assert(Symtab.get());
+ return *Symtab.get();
+ }
+
private:
+ void createSymtab(InstrProfSymtab &Symtab);
std::error_code readNextHeader(const char *CurrentPos);
- std::error_code readHeader(const RawHeader &Header);
- template <class IntT>
- IntT swap(IntT Int) const {
+ std::error_code readHeader(const RawInstrProf::Header &Header);
+ template <class IntT> IntT swap(IntT Int) const {
return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
}
+ support::endianness getDataEndianness() const {
+ support::endianness HostEndian = getHostEndianness();
+ if (!ShouldSwapBytes)
+ return HostEndian;
+ if (HostEndian == support::little)
+ return support::big;
+ else
+ return support::little;
+ }
+
+ inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
+ return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
+ }
+ std::error_code readName(InstrProfRecord &Record);
+ std::error_code readFuncHash(InstrProfRecord &Record);
+ std::error_code readRawCounts(InstrProfRecord &Record);
+ std::error_code readValueProfilingData(InstrProfRecord &Record);
+ bool atEnd() const { return Data == DataEnd; }
+ void advanceData() {
+ Data++;
+ ValueDataStart += CurValueDataSize;
+ }
+
const uint64_t *getCounter(IntPtrT CounterPtr) const {
ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
return CountersStart + Offset;
@@ -195,10 +235,15 @@ class InstrProfLookupTrait {
std::vector<InstrProfRecord> DataBuffer;
IndexedInstrProf::HashT HashType;
unsigned FormatVersion;
+ // Endianness of the input value profile data.
+ // It should be LE by default, but can be changed
+ // for testing purpose.
+ support::endianness ValueProfDataEndianness;
public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
- : HashType(HashType), FormatVersion(FormatVersion) {}
+ : HashType(HashType), FormatVersion(FormatVersion),
+ ValueProfDataEndianness(support::little) {}
typedef ArrayRef<InstrProfRecord> data_type;
@@ -209,6 +254,7 @@ public:
static bool EqualKey(StringRef A, StringRef B) { return A == B; }
static StringRef GetInternalKey(StringRef K) { return K; }
+ static StringRef GetExternalKey(StringRef K) { return K; }
hash_value_type ComputeHash(StringRef K);
@@ -224,11 +270,64 @@ public:
return StringRef((const char *)D, N);
}
+ bool readValueProfilingData(const unsigned char *&D,
+ const unsigned char *const End);
data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
+
+ // Used for testing purpose only.
+ void setValueProfDataEndianness(support::endianness Endianness) {
+ ValueProfDataEndianness = Endianness;
+ }
+};
+
+struct InstrProfReaderIndexBase {
+ // Read all the profile records with the same key pointed to the current
+ // iterator.
+ virtual std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
+ // Read all the profile records with the key equal to FuncName
+ virtual std::error_code getRecords(StringRef FuncName,
+ ArrayRef<InstrProfRecord> &Data) = 0;
+ virtual void advanceToNextKey() = 0;
+ virtual bool atEnd() const = 0;
+ virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
+ virtual ~InstrProfReaderIndexBase() {}
+ virtual uint64_t getVersion() const = 0;
+ virtual void populateSymtab(InstrProfSymtab &) = 0;
};
typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
- InstrProfReaderIndex;
+ OnDiskHashTableImplV3;
+
+template <typename HashTableImpl>
+class InstrProfReaderIndex : public InstrProfReaderIndexBase {
+
+private:
+ std::unique_ptr<HashTableImpl> HashTable;
+ typename HashTableImpl::data_iterator RecordIterator;
+ uint64_t FormatVersion;
+
+public:
+ InstrProfReaderIndex(const unsigned char *Buckets,
+ const unsigned char *const Payload,
+ const unsigned char *const Base,
+ IndexedInstrProf::HashT HashType, uint64_t Version);
+
+ std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) override;
+ std::error_code getRecords(StringRef FuncName,
+ ArrayRef<InstrProfRecord> &Data) override;
+ void advanceToNextKey() override { RecordIterator++; }
+ bool atEnd() const override {
+ return RecordIterator == HashTable->data_end();
+ }
+ void setValueProfDataEndianness(support::endianness Endianness) override {
+ HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
+ }
+ ~InstrProfReaderIndex() override {}
+ uint64_t getVersion() const override { return FormatVersion; }
+ void populateSymtab(InstrProfSymtab &Symtab) override {
+ Symtab.create(HashTable->keys());
+ }
+};
/// Reader for the indexed binary instrprof format.
class IndexedInstrProfReader : public InstrProfReader {
@@ -236,17 +335,15 @@ private:
/// The profile data file contents.
std::unique_ptr<MemoryBuffer> DataBuffer;
/// The index into the profile data.
- std::unique_ptr<InstrProfReaderIndex> Index;
- /// Iterator over the profile data.
- InstrProfReaderIndex::data_iterator RecordIterator;
- /// The file format version of the profile data.
- uint64_t FormatVersion;
+ std::unique_ptr<InstrProfReaderIndexBase> Index;
/// The maximal execution count among all functions.
uint64_t MaxFunctionCount;
IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
+
public:
+ uint64_t getVersion() const { return Index->getVersion(); }
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
: DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
@@ -258,9 +355,15 @@ public:
/// Read a single record.
std::error_code readNextRecord(InstrProfRecord &Record) override;
+ /// Return the pointer to InstrProfRecord associated with FuncName
+ /// and FuncHash
+ ErrorOr<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
+ uint64_t FuncHash);
+
/// Fill Counts with the profile data for the given function name.
std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
std::vector<uint64_t> &Counts);
+
/// Return the maximum of all known function counts.
uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
@@ -270,6 +373,16 @@ public:
static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
create(std::unique_ptr<MemoryBuffer> Buffer);
+
+ // Used for testing purpose only.
+ void setValueProfDataEndianness(support::endianness Endianness) {
+ Index->setValueProfDataEndianness(Endianness);
+ }
+
+ // See description in the base class. This interface is designed
+ // to be used by llvm-profdata (for dumping). Avoid using this when
+ // the client is the compiler.
+ InstrProfSymtab &getSymtab() override;
};
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h b/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h
index ce0bb52..e7f53de 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -15,38 +15,43 @@
#ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H
#define LLVM_PROFILEDATA_INSTRPROFWRITER_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include <vector>
namespace llvm {
/// Writer for instrumentation based profile data.
class InstrProfWriter {
public:
- typedef SmallDenseMap<uint64_t, std::vector<uint64_t>, 1> CounterData;
+ typedef SmallDenseMap<uint64_t, InstrProfRecord, 1> ProfilingData;
+
private:
- StringMap<CounterData> FunctionData;
+ StringMap<ProfilingData> FunctionData;
uint64_t MaxFunctionCount;
+
public:
InstrProfWriter() : MaxFunctionCount(0) {}
/// Add function counts for the given function. If there are already counts
/// for this function and the hash and number of counts match, each counter is
- /// summed.
- std::error_code addFunctionCounts(StringRef FunctionName,
- uint64_t FunctionHash,
- ArrayRef<uint64_t> Counters);
+ /// summed. Optionally scale counts by \p Weight.
+ std::error_code addRecord(InstrProfRecord &&I, uint64_t Weight = 1);
/// Write the profile to \c OS
void write(raw_fd_ostream &OS);
+ /// Write the profile in text format to \c OS
+ void writeText(raw_fd_ostream &OS);
+ /// Write \c Record in text format to \c OS
+ static void writeRecordInText(const InstrProfRecord &Record,
+ InstrProfSymtab &Symtab, raw_fd_ostream &OS);
/// Write the profile, returning the raw data. For testing.
std::unique_ptr<MemoryBuffer> writeBuffer();
+ // Internal interface for testing purpose only.
+ void setValueProfDataEndianness(support::endianness Endianness);
+
private:
std::pair<uint64_t, uint64_t> writeImpl(raw_ostream &OS);
};
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
index 1b82e55..8df3fe8 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
@@ -11,14 +11,17 @@
// sample profile data.
//
//===----------------------------------------------------------------------===//
+
#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_
#define LLVM_PROFILEDATA_SAMPLEPROF_H_
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
+
+#include <map>
#include <system_error>
namespace llvm {
@@ -32,13 +35,27 @@ enum class sampleprof_error {
too_large,
truncated,
malformed,
- unrecognized_format
+ unrecognized_format,
+ unsupported_writing_format,
+ truncated_name_table,
+ not_implemented,
+ counter_overflow
};
inline std::error_code make_error_code(sampleprof_error E) {
return std::error_code(static_cast<int>(E), sampleprof_category());
}
+inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
+ sampleprof_error Result) {
+ // Prefer first error encountered as later errors may be secondary effects of
+ // the initial problem.
+ if (Accumulator == sampleprof_error::success &&
+ Result != sampleprof_error::success)
+ Accumulator = Result;
+ return Accumulator;
+}
+
} // end namespace llvm
namespace std {
@@ -57,7 +74,7 @@ static inline uint64_t SPMagic() {
uint64_t('2') << (64 - 56) | uint64_t(0xff);
}
-static inline uint64_t SPVersion() { return 100; }
+static inline uint64_t SPVersion() { return 102; }
/// Represents the relative location of an instruction.
///
@@ -69,36 +86,36 @@ static inline uint64_t SPVersion() { return 100; }
/// that are on the same line but belong to different basic blocks
/// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
struct LineLocation {
- LineLocation(int L, unsigned D) : LineOffset(L), Discriminator(D) {}
- int LineOffset;
- unsigned Discriminator;
+ LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
+ void print(raw_ostream &OS) const;
+ void dump() const;
+ bool operator<(const LineLocation &O) const {
+ return LineOffset < O.LineOffset ||
+ (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
+ }
+
+ uint32_t LineOffset;
+ uint32_t Discriminator;
};
-} // End namespace sampleprof
+raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
-template <> struct DenseMapInfo<sampleprof::LineLocation> {
- typedef DenseMapInfo<int> OffsetInfo;
- typedef DenseMapInfo<unsigned> DiscriminatorInfo;
- static inline sampleprof::LineLocation getEmptyKey() {
- return sampleprof::LineLocation(OffsetInfo::getEmptyKey(),
- DiscriminatorInfo::getEmptyKey());
- }
- static inline sampleprof::LineLocation getTombstoneKey() {
- return sampleprof::LineLocation(OffsetInfo::getTombstoneKey(),
- DiscriminatorInfo::getTombstoneKey());
- }
- static inline unsigned getHashValue(sampleprof::LineLocation Val) {
- return DenseMapInfo<std::pair<int, unsigned>>::getHashValue(
- std::pair<int, unsigned>(Val.LineOffset, Val.Discriminator));
- }
- static inline bool isEqual(sampleprof::LineLocation LHS,
- sampleprof::LineLocation RHS) {
- return LHS.LineOffset == RHS.LineOffset &&
- LHS.Discriminator == RHS.Discriminator;
- }
+/// Represents the relative location of a callsite.
+///
+/// Callsite locations are specified by the line offset from the
+/// beginning of the function (marked by the line where the function
+/// head is), the discriminator value within that line, and the callee
+/// function name.
+struct CallsiteLocation : public LineLocation {
+ CallsiteLocation(uint32_t L, uint32_t D, StringRef N)
+ : LineLocation(L, D), CalleeName(N) {}
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+ StringRef CalleeName;
};
-namespace sampleprof {
+raw_ostream &operator<<(raw_ostream &OS, const CallsiteLocation &Loc);
/// Representation of a single sample record.
///
@@ -112,52 +129,79 @@ namespace sampleprof {
/// will be a list of one or more functions.
class SampleRecord {
public:
- typedef StringMap<unsigned> CallTargetMap;
+ typedef StringMap<uint64_t> CallTargetMap;
SampleRecord() : NumSamples(0), CallTargets() {}
/// Increment the number of samples for this record by \p S.
+ /// Optionally scale sample count \p S by \p Weight.
///
/// Sample counts accumulate using saturating arithmetic, to avoid wrapping
/// around unsigned integers.
- void addSamples(unsigned S) {
- if (NumSamples <= std::numeric_limits<unsigned>::max() - S)
- NumSamples += S;
- else
- NumSamples = std::numeric_limits<unsigned>::max();
+ sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
+ bool Overflowed;
+ if (Weight > 1) {
+ S = SaturatingMultiply(S, Weight, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+ }
+ NumSamples = SaturatingAdd(NumSamples, S, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+
+ return sampleprof_error::success;
}
/// Add called function \p F with samples \p S.
+ /// Optionally scale sample count \p S by \p Weight.
///
/// Sample counts accumulate using saturating arithmetic, to avoid wrapping
/// around unsigned integers.
- void addCalledTarget(StringRef F, unsigned S) {
- unsigned &TargetSamples = CallTargets[F];
- if (TargetSamples <= std::numeric_limits<unsigned>::max() - S)
- TargetSamples += S;
- else
- TargetSamples = std::numeric_limits<unsigned>::max();
+ sampleprof_error addCalledTarget(StringRef F, uint64_t S,
+ uint64_t Weight = 1) {
+ uint64_t &TargetSamples = CallTargets[F];
+ bool Overflowed;
+ if (Weight > 1) {
+ S = SaturatingMultiply(S, Weight, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+ }
+ TargetSamples = SaturatingAdd(TargetSamples, S, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+
+ return sampleprof_error::success;
}
/// Return true if this sample record contains function calls.
bool hasCalls() const { return CallTargets.size() > 0; }
- unsigned getSamples() const { return NumSamples; }
+ uint64_t getSamples() const { return NumSamples; }
const CallTargetMap &getCallTargets() const { return CallTargets; }
/// Merge the samples in \p Other into this record.
- void merge(const SampleRecord &Other) {
- addSamples(Other.getSamples());
- for (const auto &I : Other.getCallTargets())
- addCalledTarget(I.first(), I.second);
+ /// Optionally scale sample counts by \p Weight.
+ sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
+ sampleprof_error Result = addSamples(Other.getSamples(), Weight);
+ for (const auto &I : Other.getCallTargets()) {
+ MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
+ }
+ return Result;
}
+ void print(raw_ostream &OS, unsigned Indent) const;
+ void dump() const;
+
private:
- unsigned NumSamples;
+ uint64_t NumSamples;
CallTargetMap CallTargets;
};
-typedef DenseMap<LineLocation, SampleRecord> BodySampleMap;
+raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
+
+typedef std::map<LineLocation, SampleRecord> BodySampleMap;
+class FunctionSamples;
+typedef std::map<CallsiteLocation, FunctionSamples> CallsiteSampleMap;
/// Representation of the samples collected for a function.
///
@@ -167,59 +211,109 @@ typedef DenseMap<LineLocation, SampleRecord> BodySampleMap;
class FunctionSamples {
public:
FunctionSamples() : TotalSamples(0), TotalHeadSamples(0) {}
- void print(raw_ostream &OS = dbgs());
- void addTotalSamples(unsigned Num) { TotalSamples += Num; }
- void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
- void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) {
- assert(LineOffset >= 0);
- // When dealing with instruction weights, we use the value
- // zero to indicate the absence of a sample. If we read an
- // actual zero from the profile file, use the value 1 to
- // avoid the confusion later on.
- if (Num == 0)
- Num = 1;
- BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num);
- }
- void addCalledTargetSamples(int LineOffset, unsigned Discriminator,
- std::string FName, unsigned Num) {
- assert(LineOffset >= 0);
- BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(FName,
- Num);
+ void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
+ void dump() const;
+ sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
+ bool Overflowed;
+ if (Weight > 1) {
+ Num = SaturatingMultiply(Num, Weight, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+ }
+ TotalSamples = SaturatingAdd(TotalSamples, Num, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+
+ return sampleprof_error::success;
}
+ sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
+ bool Overflowed;
+ if (Weight > 1) {
+ Num = SaturatingMultiply(Num, Weight, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
+ }
+ TotalHeadSamples = SaturatingAdd(TotalHeadSamples, Num, &Overflowed);
+ if (Overflowed)
+ return sampleprof_error::counter_overflow;
- /// Return the sample record at the given location.
- /// Each location is specified by \p LineOffset and \p Discriminator.
- SampleRecord &sampleRecordAt(const LineLocation &Loc) {
- return BodySamples[Loc];
+ return sampleprof_error::success;
+ }
+ sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
+ uint64_t Num, uint64_t Weight = 1) {
+ return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
+ Num, Weight);
+ }
+ sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
+ uint32_t Discriminator,
+ std::string FName, uint64_t Num,
+ uint64_t Weight = 1) {
+ return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
+ FName, Num, Weight);
}
/// Return the number of samples collected at the given location.
/// Each location is specified by \p LineOffset and \p Discriminator.
- unsigned samplesAt(int LineOffset, unsigned Discriminator) {
- return sampleRecordAt(LineLocation(LineOffset, Discriminator)).getSamples();
+ /// If the location is not found in profile, return error.
+ ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
+ uint32_t Discriminator) const {
+ const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
+ if (ret == BodySamples.end())
+ return std::error_code();
+ else
+ return ret->second.getSamples();
}
- bool empty() const { return BodySamples.empty(); }
+ /// Return the function samples at the given callsite location.
+ FunctionSamples &functionSamplesAt(const CallsiteLocation &Loc) {
+ return CallsiteSamples[Loc];
+ }
+
+ /// Return a pointer to function samples at the given callsite location.
+ const FunctionSamples *
+ findFunctionSamplesAt(const CallsiteLocation &Loc) const {
+ auto iter = CallsiteSamples.find(Loc);
+ if (iter == CallsiteSamples.end()) {
+ return nullptr;
+ } else {
+ return &iter->second;
+ }
+ }
+
+ bool empty() const { return TotalSamples == 0; }
/// Return the total number of samples collected inside the function.
- unsigned getTotalSamples() const { return TotalSamples; }
+ uint64_t getTotalSamples() const { return TotalSamples; }
/// Return the total number of samples collected at the head of the
/// function.
- unsigned getHeadSamples() const { return TotalHeadSamples; }
+ uint64_t getHeadSamples() const { return TotalHeadSamples; }
/// Return all the samples collected in the body of the function.
const BodySampleMap &getBodySamples() const { return BodySamples; }
+ /// Return all the callsite samples collected in the body of the function.
+ const CallsiteSampleMap &getCallsiteSamples() const {
+ return CallsiteSamples;
+ }
+
/// Merge the samples in \p Other into this one.
- void merge(const FunctionSamples &Other) {
- addTotalSamples(Other.getTotalSamples());
- addHeadSamples(Other.getHeadSamples());
+ /// Optionally scale samples by \p Weight.
+ sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
+ sampleprof_error Result = sampleprof_error::success;
+ MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
+ MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
for (const auto &I : Other.getBodySamples()) {
const LineLocation &Loc = I.first;
const SampleRecord &Rec = I.second;
- sampleRecordAt(Loc).merge(Rec);
+ MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
+ }
+ for (const auto &I : Other.getCallsiteSamples()) {
+ const CallsiteLocation &Loc = I.first;
+ const FunctionSamples &Rec = I.second;
+ MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight));
}
+ return Result;
}
private:
@@ -227,12 +321,12 @@ private:
///
/// Samples are cumulative, they include all the samples collected
/// inside this function and all its inlined callees.
- unsigned TotalSamples;
+ uint64_t TotalSamples;
/// Total number of samples collected at the head of the function.
/// This is an approximation of the number of calls made to this function
/// at runtime.
- unsigned TotalHeadSamples;
+ uint64_t TotalHeadSamples;
/// Map instruction locations to collected samples.
///
@@ -240,10 +334,53 @@ private:
/// collected at the corresponding line offset. All line locations
/// are an offset from the start of the function.
BodySampleMap BodySamples;
+
+ /// Map call sites to collected samples for the called function.
+ ///
+ /// Each entry in this map corresponds to all the samples
+ /// collected for the inlined function call at the given
+ /// location. For example, given:
+ ///
+ /// void foo() {
+ /// 1 bar();
+ /// ...
+ /// 8 baz();
+ /// }
+ ///
+ /// If the bar() and baz() calls were inlined inside foo(), this
+ /// map will contain two entries. One for all the samples collected
+ /// in the call to bar() at line offset 1, the other for all the samples
+ /// collected in the call to baz() at line offset 8.
+ CallsiteSampleMap CallsiteSamples;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
+
+/// Sort a LocationT->SampleT map by LocationT.
+///
+/// It produces a sorted list of <LocationT, SampleT> records by ascending
+/// order of LocationT.
+template <class LocationT, class SampleT> class SampleSorter {
+public:
+ typedef std::pair<const LocationT, SampleT> SamplesWithLoc;
+ typedef SmallVector<const SamplesWithLoc *, 20> SamplesWithLocList;
+
+ SampleSorter(const std::map<LocationT, SampleT> &Samples) {
+ for (const auto &I : Samples)
+ V.push_back(&I);
+ std::stable_sort(V.begin(), V.end(),
+ [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
+ return A->first < B->first;
+ });
+ }
+ const SamplesWithLocList &get() const { return V; }
+
+private:
+ SamplesWithLocList V;
};
-} // End namespace sampleprof
+} // end namespace sampleprof
-} // End namespace llvm
+} // end namespace llvm
#endif // LLVM_PROFILEDATA_SAMPLEPROF_H_
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h b/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
index c082a1a..6db0fbb 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -9,11 +9,181 @@
//
// This file contains definitions needed for reading sample profiles.
//
+// NOTE: If you are making changes to this file format, please remember
+// to document them in the Clang documentation at
+// tools/clang/docs/UsersManual.rst.
+//
+// Text format
+// -----------
+//
+// Sample profiles are written as ASCII text. The file is divided into
+// sections, which correspond to each of the functions executed at runtime.
+// Each section has the following format
+//
+// function1:total_samples:total_head_samples
+// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
+// ...
+// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+// offsetA[.discriminator]: fnA:num_of_total_samples
+// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
+// ...
+//
+// This is a nested tree in which the identation represents the nesting level
+// of the inline stack. There are no blank lines in the file. And the spacing
+// within a single line is fixed. Additional spaces will result in an error
+// while reading the file.
+//
+// Any line starting with the '#' character is completely ignored.
+//
+// Inlined calls are represented with indentation. The Inline stack is a
+// stack of source locations in which the top of the stack represents the
+// leaf function, and the bottom of the stack represents the actual
+// symbol to which the instruction belongs.
+//
+// Function names must be mangled in order for the profile loader to
+// match them in the current translation unit. The two numbers in the
+// function header specify how many total samples were accumulated in the
+// function (first number), and the total number of samples accumulated
+// in the prologue of the function (second number). This head sample
+// count provides an indicator of how frequently the function is invoked.
+//
+// There are two types of lines in the function body.
+//
+// * Sampled line represents the profile information of a source location.
+// * Callsite line represents the profile information of a callsite.
+//
+// Each sampled line may contain several items. Some are optional (marked
+// below):
+//
+// a. Source line offset. This number represents the line number
+// in the function where the sample was collected. The line number is
+// always relative to the line where symbol of the function is
+// defined. So, if the function has its header at line 280, the offset
+// 13 is at line 293 in the file.
+//
+// Note that this offset should never be a negative number. This could
+// happen in cases like macros. The debug machinery will register the
+// line number at the point of macro expansion. So, if the macro was
+// expanded in a line before the start of the function, the profile
+// converter should emit a 0 as the offset (this means that the optimizers
+// will not be able to associate a meaningful weight to the instructions
+// in the macro).
+//
+// b. [OPTIONAL] Discriminator. This is used if the sampled program
+// was compiled with DWARF discriminator support
+// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
+// DWARF discriminators are unsigned integer values that allow the
+// compiler to distinguish between multiple execution paths on the
+// same source line location.
+//
+// For example, consider the line of code ``if (cond) foo(); else bar();``.
+// If the predicate ``cond`` is true 80% of the time, then the edge
+// into function ``foo`` should be considered to be taken most of the
+// time. But both calls to ``foo`` and ``bar`` are at the same source
+// line, so a sample count at that line is not sufficient. The
+// compiler needs to know which part of that line is taken more
+// frequently.
+//
+// This is what discriminators provide. In this case, the calls to
+// ``foo`` and ``bar`` will be at the same line, but will have
+// different discriminator values. This allows the compiler to correctly
+// set edge weights into ``foo`` and ``bar``.
+//
+// c. Number of samples. This is an integer quantity representing the
+// number of samples collected by the profiler at this source
+// location.
+//
+// d. [OPTIONAL] Potential call targets and samples. If present, this
+// line contains a call instruction. This models both direct and
+// number of samples. For example,
+//
+// 130: 7 foo:3 bar:2 baz:7
+//
+// The above means that at relative line offset 130 there is a call
+// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
+// with ``baz()`` being the relatively more frequently called target.
+//
+// Each callsite line may contain several items. Some are optional.
+//
+// a. Source line offset. This number represents the line number of the
+// callsite that is inlined in the profiled binary.
+//
+// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
+//
+// c. Number of samples. This is an integer quantity representing the
+// total number of samples collected for the inlined instance at this
+// callsite
+//
+//
+// Binary format
+// -------------
+//
+// This is a more compact encoding. Numbers are encoded as ULEB128 values
+// and all strings are encoded in a name table. The file is organized in
+// the following sections:
+//
+// MAGIC (uint64_t)
+// File identifier computed by function SPMagic() (0x5350524f463432ff)
+//
+// VERSION (uint32_t)
+// File format version number computed by SPVersion()
+//
+// NAME TABLE
+// SIZE (uint32_t)
+// Number of entries in the name table.
+// NAMES
+// A NUL-separated list of SIZE strings.
+//
+// FUNCTION BODY (one for each uninlined function body present in the profile)
+// HEAD_SAMPLES (uint64_t) [only for top-level functions]
+// Total number of samples collected at the head (prologue) of the
+// function.
+// NOTE: This field should only be present for top-level functions
+// (i.e., not inlined into any caller). Inlined function calls
+// have no prologue, so they don't need this.
+// NAME_IDX (uint32_t)
+// Index into the name table indicating the function name.
+// SAMPLES (uint64_t)
+// Total number of samples collected in this function.
+// NRECS (uint32_t)
+// Total number of sampling records this function's profile.
+// BODY RECORDS
+// A list of NRECS entries. Each entry contains:
+// OFFSET (uint32_t)
+// Line offset from the start of the function.
+// DISCRIMINATOR (uint32_t)
+// Discriminator value (see description of discriminators
+// in the text format documentation above).
+// SAMPLES (uint64_t)
+// Number of samples collected at this location.
+// NUM_CALLS (uint32_t)
+// Number of non-inlined function calls made at this location. In the
+// case of direct calls, this number will always be 1. For indirect
+// calls (virtual functions and function pointers) this will
+// represent all the actual functions called at runtime.
+// CALL_TARGETS
+// A list of NUM_CALLS entries for each called function:
+// NAME_IDX (uint32_t)
+// Index into the name table with the callee name.
+// SAMPLES (uint64_t)
+// Number of samples collected at the call site.
+// NUM_INLINED_FUNCTIONS (uint32_t)
+// Number of callees inlined into this function.
+// INLINED FUNCTION RECORDS
+// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
+// callees.
+// OFFSET (uint32_t)
+// Line offset from the start of the function.
+// DISCRIMINATOR (uint32_t)
+// Discriminator value (see description of discriminators
+// in the text format documentation above).
+// FUNCTION BODY
+// A FUNCTION BODY entry describing the inlined function.
//===----------------------------------------------------------------------===//
#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -24,6 +194,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/GCOV.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -57,7 +228,7 @@ namespace sampleprof {
///
/// The reader supports two file formats: text and binary. The text format
/// is useful for debugging and testing, while the binary format is more
-/// compact. They can both be used interchangeably.
+/// compact and I/O efficient. They can both be used interchangeably.
class SampleProfileReader {
public:
SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
@@ -86,7 +257,7 @@ public:
StringMap<FunctionSamples> &getProfiles() { return Profiles; }
/// \brief Report a parse error message.
- void reportParseError(int64_t LineNumber, Twine Msg) const {
+ void reportError(int64_t LineNumber, Twine Msg) const {
Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
LineNumber, Msg));
}
@@ -95,6 +266,10 @@ public:
static ErrorOr<std::unique_ptr<SampleProfileReader>>
create(StringRef Filename, LLVMContext &C);
+ /// \brief Create a sample profile reader from the supplied memory buffer.
+ static ErrorOr<std::unique_ptr<SampleProfileReader>>
+ create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C);
+
protected:
/// \brief Map every function to its associated profile.
///
@@ -120,6 +295,9 @@ public:
/// \brief Read sample profiles from the associated file.
std::error_code read() override;
+
+ /// \brief Return true if \p Buffer is in the format supported by this class.
+ static bool hasFormat(const MemoryBuffer &Buffer);
};
class SampleProfileReaderBinary : public SampleProfileReader {
@@ -153,14 +331,75 @@ protected:
/// \returns the read value.
ErrorOr<StringRef> readString();
+ /// Read a string indirectly via the name table.
+ ErrorOr<StringRef> readStringFromTable();
+
/// \brief Return true if we've reached the end of file.
bool at_eof() const { return Data >= End; }
+ /// Read the contents of the given profile instance.
+ std::error_code readProfile(FunctionSamples &FProfile);
+
/// \brief Points to the current location in the buffer.
const uint8_t *Data;
/// \brief Points to the end of the buffer.
const uint8_t *End;
+
+ /// Function name table.
+ std::vector<StringRef> NameTable;
+};
+
+typedef SmallVector<FunctionSamples *, 10> InlineCallStack;
+
+// Supported histogram types in GCC. Currently, we only need support for
+// call target histograms.
+enum HistType {
+ HIST_TYPE_INTERVAL,
+ HIST_TYPE_POW2,
+ HIST_TYPE_SINGLE_VALUE,
+ HIST_TYPE_CONST_DELTA,
+ HIST_TYPE_INDIR_CALL,
+ HIST_TYPE_AVERAGE,
+ HIST_TYPE_IOR,
+ HIST_TYPE_INDIR_CALL_TOPN
+};
+
+class SampleProfileReaderGCC : public SampleProfileReader {
+public:
+ SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
+ : SampleProfileReader(std::move(B), C), GcovBuffer(Buffer.get()) {}
+
+ /// \brief Read and validate the file header.
+ std::error_code readHeader() override;
+
+ /// \brief Read sample profiles from the associated file.
+ std::error_code read() override;
+
+ /// \brief Return true if \p Buffer is in the format supported by this class.
+ static bool hasFormat(const MemoryBuffer &Buffer);
+
+protected:
+ std::error_code readNameTable();
+ std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
+ bool Update, uint32_t Offset);
+ std::error_code readFunctionProfiles();
+ std::error_code skipNextWord();
+ template <typename T> ErrorOr<T> readNumber();
+ ErrorOr<StringRef> readString();
+
+ /// \brief Read the section tag and check that it's the same as \p Expected.
+ std::error_code readSectionTag(uint32_t Expected);
+
+ /// GCOV buffer containing the profile.
+ GCOVBuffer GcovBuffer;
+
+ /// Function names in this profile.
+ std::vector<std::string> Names;
+
+ /// GCOV tags used to separate sections in the profile file.
+ static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
+ static const uint32_t GCOVTagAFDOFunction = 0xac000000;
};
} // End namespace sampleprof
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h b/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 302a82d..029dd2e 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -13,9 +13,8 @@
#ifndef LLVM_PROFILEDATA_SAMPLEPROFWRITER_H
#define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
@@ -30,77 +29,102 @@ enum SampleProfileFormat { SPF_None = 0, SPF_Text, SPF_Binary, SPF_GCC };
/// \brief Sample-based profile writer. Base class.
class SampleProfileWriter {
public:
- SampleProfileWriter(StringRef Filename, std::error_code &EC,
- sys::fs::OpenFlags Flags)
- : OS(Filename, EC, Flags) {}
virtual ~SampleProfileWriter() {}
- /// \brief Write sample profiles in \p S for function \p FName.
+ /// Write sample profiles in \p S for function \p FName.
///
- /// \returns true if the file was updated successfully. False, otherwise.
- virtual bool write(StringRef FName, const FunctionSamples &S) = 0;
+ /// \returns status code of the file update operation.
+ virtual std::error_code write(StringRef FName, const FunctionSamples &S) = 0;
- /// \brief Write sample profiles in \p S for function \p F.
- bool write(const Function &F, const FunctionSamples &S) {
- return write(F.getName(), S);
- }
-
- /// \brief Write all the sample profiles for all the functions in \p M.
+ /// Write all the sample profiles in the given map of samples.
///
- /// \returns true if the file was updated successfully. False, otherwise.
- bool write(const Module &M, StringMap<FunctionSamples> &P) {
- for (const auto &F : M) {
- StringRef Name = F.getName();
- if (!write(Name, P[Name]))
- return false;
- }
- return true;
- }
+ /// \returns status code of the file update operation.
+ std::error_code write(const StringMap<FunctionSamples> &ProfileMap) {
+ if (std::error_code EC = writeHeader(ProfileMap))
+ return EC;
- /// \brief Write all the sample profiles in the given map of samples.
- ///
- /// \returns true if the file was updated successfully. False, otherwise.
- bool write(StringMap<FunctionSamples> &ProfileMap) {
- for (auto &I : ProfileMap) {
+ for (const auto &I : ProfileMap) {
StringRef FName = I.first();
- FunctionSamples &Profile = I.second;
- if (!write(FName, Profile))
- return false;
+ const FunctionSamples &Profile = I.second;
+ if (std::error_code EC = write(FName, Profile))
+ return EC;
}
- return true;
+ return sampleprof_error::success;
}
- /// \brief Profile writer factory. Create a new writer based on the value of
- /// \p Format.
+ raw_ostream &getOutputStream() { return *OutputStream; }
+
+ /// Profile writer factory.
+ ///
+ /// Create a new file writer based on the value of \p Format.
static ErrorOr<std::unique_ptr<SampleProfileWriter>>
create(StringRef Filename, SampleProfileFormat Format);
+ /// Create a new stream writer based on the value of \p Format.
+ /// For testing.
+ static ErrorOr<std::unique_ptr<SampleProfileWriter>>
+ create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format);
+
protected:
+ SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
+ : OutputStream(std::move(OS)) {}
+
+ /// \brief Write a file header for the profile file.
+ virtual std::error_code
+ writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0;
+
/// \brief Output stream where to emit the profile to.
- raw_fd_ostream OS;
+ std::unique_ptr<raw_ostream> OutputStream;
};
/// \brief Sample-based profile writer (text format).
class SampleProfileWriterText : public SampleProfileWriter {
public:
- SampleProfileWriterText(StringRef F, std::error_code &EC)
- : SampleProfileWriter(F, EC, sys::fs::F_Text) {}
+ std::error_code write(StringRef FName, const FunctionSamples &S) override;
- bool write(StringRef FName, const FunctionSamples &S) override;
- bool write(const Module &M, StringMap<FunctionSamples> &P) {
- return SampleProfileWriter::write(M, P);
+protected:
+ SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS)
+ : SampleProfileWriter(OS), Indent(0) {}
+
+ std::error_code
+ writeHeader(const StringMap<FunctionSamples> &ProfileMap) override {
+ return sampleprof_error::success;
}
+
+private:
+ /// Indent level to use when writing.
+ ///
+ /// This is used when printing inlined callees.
+ unsigned Indent;
+
+ friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
+ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
+ SampleProfileFormat Format);
};
/// \brief Sample-based profile writer (binary format).
class SampleProfileWriterBinary : public SampleProfileWriter {
public:
- SampleProfileWriterBinary(StringRef F, std::error_code &EC);
+ std::error_code write(StringRef F, const FunctionSamples &S) override;
- bool write(StringRef F, const FunctionSamples &S) override;
- bool write(const Module &M, StringMap<FunctionSamples> &P) {
- return SampleProfileWriter::write(M, P);
- }
+protected:
+ SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS)
+ : SampleProfileWriter(OS), NameTable() {}
+
+ std::error_code
+ writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ std::error_code writeNameIdx(StringRef FName);
+ std::error_code writeBody(StringRef FName, const FunctionSamples &S);
+
+private:
+ void addName(StringRef FName);
+ void addNames(const FunctionSamples &S);
+
+ MapVector<StringRef, uint32_t> NameTable;
+
+ friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
+ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
+ SampleProfileFormat Format);
};
} // End namespace sampleprof
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
new file mode 100644
index 0000000..2f99b07
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
@@ -0,0 +1,223 @@
+//===- ARMTargetParser.def - ARM target parsing defines ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides defines to build up the ARM target parser's logic.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef ARM_FPU
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION)
+#endif
+ARM_FPU("invalid", FK_INVALID, FV_NONE, NS_None, FR_None)
+ARM_FPU("none", FK_NONE, FV_NONE, NS_None, FR_None)
+ARM_FPU("vfp", FK_VFP, FV_VFPV2, NS_None, FR_None)
+ARM_FPU("vfpv2", FK_VFPV2, FV_VFPV2, NS_None, FR_None)
+ARM_FPU("vfpv3", FK_VFPV3, FV_VFPV3, NS_None, FR_None)
+ARM_FPU("vfpv3-fp16", FK_VFPV3_FP16, FV_VFPV3_FP16, NS_None, FR_None)
+ARM_FPU("vfpv3-d16", FK_VFPV3_D16, FV_VFPV3, NS_None, FR_D16)
+ARM_FPU("vfpv3-d16-fp16", FK_VFPV3_D16_FP16, FV_VFPV3_FP16, NS_None, FR_D16)
+ARM_FPU("vfpv3xd", FK_VFPV3XD, FV_VFPV3, NS_None, FR_SP_D16)
+ARM_FPU("vfpv3xd-fp16", FK_VFPV3XD_FP16, FV_VFPV3_FP16, NS_None, FR_SP_D16)
+ARM_FPU("vfpv4", FK_VFPV4, FV_VFPV4, NS_None, FR_None)
+ARM_FPU("vfpv4-d16", FK_VFPV4_D16, FV_VFPV4, NS_None, FR_D16)
+ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FV_VFPV4, NS_None, FR_SP_D16)
+ARM_FPU("fpv5-d16", FK_FPV5_D16, FV_VFPV5, NS_None, FR_D16)
+ARM_FPU("fpv5-sp-d16", FK_FPV5_SP_D16, FV_VFPV5, NS_None, FR_SP_D16)
+ARM_FPU("fp-armv8", FK_FP_ARMV8, FV_VFPV5, NS_None, FR_None)
+ARM_FPU("neon", FK_NEON, FV_VFPV3, NS_Neon, FR_None)
+ARM_FPU("neon-fp16", FK_NEON_FP16, FV_VFPV3_FP16, NS_Neon, FR_None)
+ARM_FPU("neon-vfpv4", FK_NEON_VFPV4, FV_VFPV4, NS_Neon, FR_None)
+ARM_FPU("neon-fp-armv8", FK_NEON_FP_ARMV8, FV_VFPV5, NS_Neon, FR_None)
+ARM_FPU("crypto-neon-fp-armv8", FK_CRYPTO_NEON_FP_ARMV8, FV_VFPV5, NS_Crypto,
+ FR_None)
+ARM_FPU("softvfp", FK_SOFTVFP, FV_NONE, NS_None, FR_None)
+#undef ARM_FPU
+
+#ifndef ARM_ARCH
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)
+#endif
+ARM_ARCH("invalid", AK_INVALID, nullptr, nullptr,
+ ARMBuildAttrs::CPUArch::Pre_v4, FK_NONE, AEK_NONE)
+ARM_ARCH("armv2", AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv2a", AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv3", AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv3m", AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv4", AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv4t", AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv5t", AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv5te", AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE,
+ FK_NONE, AEK_DSP)
+ARM_ARCH("armv5tej", AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ,
+ FK_NONE, AEK_DSP)
+ARM_ARCH("armv6", AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6,
+ FK_VFPV2, AEK_DSP)
+ARM_ARCH("armv6k", AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K,
+ FK_VFPV2, AEK_DSP)
+ARM_ARCH("armv6t2", AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2,
+ FK_NONE, AEK_DSP)
+ARM_ARCH("armv6kz", AK_ARMV6KZ, "6KZ", "v6kz", ARMBuildAttrs::CPUArch::v6KZ,
+ FK_VFPV2, (AEK_SEC | AEK_DSP))
+ARM_ARCH("armv6-m", AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv7-a", AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7,
+ FK_NEON, AEK_DSP)
+ARM_ARCH("armv7-r", AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7,
+ FK_NONE, (AEK_HWDIV | AEK_DSP))
+ARM_ARCH("armv7-m", AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7,
+ FK_NONE, AEK_HWDIV)
+ARM_ARCH("armv7e-m", AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M,
+ FK_NONE, (AEK_HWDIV | AEK_DSP))
+ARM_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8,
+ FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM |
+ AEK_HWDIV | AEK_DSP | AEK_CRC))
+ARM_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8,
+ FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM |
+ AEK_HWDIV | AEK_DSP | AEK_CRC))
+ARM_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a", ARMBuildAttrs::CPUArch::v8,
+ FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM |
+ AEK_HWDIV | AEK_DSP | AEK_CRC))
+// Non-standard Arch names.
+ARM_ARCH("iwmmxt", AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("iwmmxt2", AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("xscale", AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE,
+ FK_NONE, AEK_NONE)
+ARM_ARCH("armv7s", AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7,
+ FK_NEON_VFPV4, AEK_DSP)
+ARM_ARCH("armv7k", AK_ARMV7K, "7-K", "v7k", ARMBuildAttrs::CPUArch::v7,
+ FK_NONE, AEK_DSP)
+#undef ARM_ARCH
+
+#ifndef ARM_ARCH_EXT_NAME
+#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
+#endif
+// FIXME: This would be nicer were it tablegen
+ARM_ARCH_EXT_NAME("invalid", AEK_INVALID, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("none", AEK_NONE, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("crc", AEK_CRC, "+crc", "-crc")
+ARM_ARCH_EXT_NAME("crypto", AEK_CRYPTO, "+crypto","-crypto")
+ARM_ARCH_EXT_NAME("fp", AEK_FP, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("idiv", (AEK_HWDIVARM | AEK_HWDIV), nullptr, nullptr)
+ARM_ARCH_EXT_NAME("mp", AEK_MP, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("simd", AEK_SIMD, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("sec", AEK_SEC, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("virt", AEK_VIRT, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("fp16", AEK_FP16, "+fullfp16", "-fullfp16")
+ARM_ARCH_EXT_NAME("os", AEK_OS, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("iwmmxt", AEK_IWMMXT, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("iwmmxt2", AEK_IWMMXT2, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("maverick", AEK_MAVERICK, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("xscale", AEK_XSCALE, nullptr, nullptr)
+#undef ARM_ARCH_EXT_NAME
+
+#ifndef ARM_HW_DIV_NAME
+#define ARM_HW_DIV_NAME(NAME, ID)
+#endif
+ARM_HW_DIV_NAME("invalid", AEK_INVALID)
+ARM_HW_DIV_NAME("none", AEK_NONE)
+ARM_HW_DIV_NAME("thumb", AEK_HWDIV)
+ARM_HW_DIV_NAME("arm", AEK_HWDIVARM)
+ARM_HW_DIV_NAME("arm,thumb", (AEK_HWDIVARM | AEK_HWDIV))
+#undef ARM_HW_DIV_NAME
+
+#ifndef ARM_CPU_NAME
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)
+#endif
+ARM_CPU_NAME("arm2", AK_ARMV2, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm3", AK_ARMV2A, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm6", AK_ARMV3, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm7m", AK_ARMV3M, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm8", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm810", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("strongarm", AK_ARMV4, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("strongarm110", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("strongarm1100", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("strongarm1110", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm7tdmi", AK_ARMV4T, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm7tdmi-s", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm710t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm720t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9tdmi", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm920", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm920t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm922t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9312", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm940t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("ep9312", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm10tdmi", AK_ARMV5T, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1020t", AK_ARMV5T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9e", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm946e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm966e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm968e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm10e", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1020e", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1022e", AK_ARMV5TE, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm926ej-s", AK_ARMV5TEJ, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1136j-s", AK_ARMV6, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1136jf-s", AK_ARMV6, FK_VFPV2, true, AEK_NONE)
+ARM_CPU_NAME("arm1136jz-s", AK_ARMV6, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1176j-s", AK_ARMV6K, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1176jz-s", AK_ARMV6KZ, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("mpcore", AK_ARMV6K, FK_VFPV2, false, AEK_NONE)
+ARM_CPU_NAME("mpcorenovfp", AK_ARMV6K, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1176jzf-s", AK_ARMV6KZ, FK_VFPV2, true, AEK_NONE)
+ARM_CPU_NAME("arm1156t2-s", AK_ARMV6T2, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1156t2f-s", AK_ARMV6T2, FK_VFPV2, false, AEK_NONE)
+ARM_CPU_NAME("cortex-m0", AK_ARMV6M, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("cortex-m0plus", AK_ARMV6M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("cortex-m1", AK_ARMV6M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("sc000", AK_ARMV6M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("cortex-a5", AK_ARMV7A, FK_NEON_VFPV4, false, (AEK_SEC | AEK_MP))
+ARM_CPU_NAME("cortex-a7", AK_ARMV7A, FK_NEON_VFPV4, false,
+ (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, true, AEK_SEC)
+ARM_CPU_NAME("cortex-a9", AK_ARMV7A, FK_NEON_FP16, false, (AEK_SEC | AEK_MP))
+ARM_CPU_NAME("cortex-a12", AK_ARMV7A, FK_NEON_VFPV4, false,
+ (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-a15", AK_ARMV7A, FK_NEON_VFPV4, false,
+ (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-a17", AK_ARMV7A, FK_NEON_VFPV4, false,
+ (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("krait", AK_ARMV7A, FK_NEON_VFPV4, false,
+ (AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-r4", AK_ARMV7R, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("cortex-r4f", AK_ARMV7R, FK_VFPV3_D16, false, AEK_NONE)
+ARM_CPU_NAME("cortex-r5", AK_ARMV7R, FK_VFPV3_D16, false,
+ (AEK_MP | AEK_HWDIVARM))
+ARM_CPU_NAME("cortex-r7", AK_ARMV7R, FK_VFPV3_D16_FP16, false,
+ (AEK_MP | AEK_HWDIVARM))
+ARM_CPU_NAME("sc300", AK_ARMV7M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("cortex-m3", AK_ARMV7M, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("cortex-m4", AK_ARMV7EM, FK_FPV4_SP_D16, true, AEK_NONE)
+ARM_CPU_NAME("cortex-m7", AK_ARMV7EM, FK_FPV5_D16, false, AEK_NONE)
+ARM_CPU_NAME("cortex-a35", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC)
+ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+// Non-standard Arch names.
+ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("swift", AK_ARMV7S, FK_NEON_VFPV4, true,
+ (AEK_HWDIVARM | AEK_HWDIV))
+// Invalid CPU
+ARM_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, AEK_INVALID)
+#undef ARM_CPU_NAME
diff --git a/contrib/llvm/include/llvm/Support/AlignOf.h b/contrib/llvm/include/llvm/Support/AlignOf.h
index 07da02d..5268c8d 100644
--- a/contrib/llvm/include/llvm/Support/AlignOf.h
+++ b/contrib/llvm/include/llvm/Support/AlignOf.h
@@ -17,9 +17,15 @@
#include "llvm/Support/Compiler.h"
#include <cstddef>
+#include <type_traits>
namespace llvm {
-template <typename T>
+
+namespace detail {
+
+// For everything other than an abstract class we can calulate alignment by
+// building a class with a single character and a member of the given type.
+template <typename T, bool = std::is_abstract<T>::value>
struct AlignmentCalcImpl {
char x;
#if defined(_MSC_VER)
@@ -35,6 +41,25 @@ private:
AlignmentCalcImpl() {} // Never instantiate.
};
+// Abstract base class helper, this will have the minimal alignment and size
+// for any abstract class. We don't even define its destructor because this
+// type should never be used in a way that requires it.
+struct AlignmentCalcImplBase {
+ virtual ~AlignmentCalcImplBase() = 0;
+};
+
+// When we have an abstract class type, specialize the alignment computation
+// engine to create another abstract class that derives from both an empty
+// abstract base class and the provided type. This has the same effect as the
+// above except that it handles the fact that we can't actually create a member
+// of type T.
+template <typename T>
+struct AlignmentCalcImpl<T, true> : AlignmentCalcImplBase, T {
+ virtual ~AlignmentCalcImpl() = 0;
+};
+
+} // End detail namespace.
+
/// AlignOf - A templated class that contains an enum value representing
/// the alignment of the template argument. For example,
/// AlignOf<int>::Alignment represents the alignment of type "int". The
@@ -50,11 +75,13 @@ struct AlignOf {
// llvm::AlignOf<Y>::<anonymous>' [-Wenum-compare]
// by using constexpr instead of enum.
// (except on MSVC, since it doesn't support constexpr yet).
- static constexpr unsigned Alignment =
- static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T));
+ static constexpr unsigned Alignment = static_cast<unsigned int>(
+ sizeof(detail::AlignmentCalcImpl<T>) - sizeof(T));
#else
- enum { Alignment =
- static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T)) };
+ enum {
+ Alignment = static_cast<unsigned int>(sizeof(detail::AlignmentCalcImpl<T>) -
+ sizeof(T))
+ };
#endif
enum { Alignment_GreaterEqual_2Bytes = Alignment >= 2 ? 1 : 0 };
enum { Alignment_GreaterEqual_4Bytes = Alignment >= 4 ? 1 : 0 };
diff --git a/contrib/llvm/include/llvm/Support/Allocator.h b/contrib/llvm/include/llvm/Support/Allocator.h
index f9b5cf2..c608736 100644
--- a/contrib/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm/include/llvm/Support/Allocator.h
@@ -222,6 +222,8 @@ public:
// Without this, MemorySanitizer messages for values originated from here
// will point to the allocation of the entire slab.
__msan_allocated_memory(AlignedPtr, Size);
+ // Similarly, tell ASan about this space.
+ __asan_unpoison_memory_region(AlignedPtr, Size);
return AlignedPtr;
}
@@ -229,12 +231,16 @@ public:
size_t PaddedSize = Size + Alignment - 1;
if (PaddedSize > SizeThreshold) {
void *NewSlab = Allocator.Allocate(PaddedSize, 0);
+ // We own the new slab and don't want anyone reading anyting other than
+ // pieces returned from this method. So poison the whole slab.
+ __asan_poison_memory_region(NewSlab, PaddedSize);
CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize));
uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment);
assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize);
char *AlignedPtr = (char*)AlignedAddr;
__msan_allocated_memory(AlignedPtr, Size);
+ __asan_unpoison_memory_region(AlignedPtr, Size);
return AlignedPtr;
}
@@ -246,13 +252,16 @@ public:
char *AlignedPtr = (char*)AlignedAddr;
CurPtr = AlignedPtr + Size;
__msan_allocated_memory(AlignedPtr, Size);
+ __asan_unpoison_memory_region(AlignedPtr, Size);
return AlignedPtr;
}
// Pull in base class overloads.
using AllocatorBase<BumpPtrAllocatorImpl>::Allocate;
- void Deallocate(const void * /*Ptr*/, size_t /*Size*/) {}
+ void Deallocate(const void *Ptr, size_t Size) {
+ __asan_poison_memory_region(Ptr, Size);
+ }
// Pull in base class overloads.
using AllocatorBase<BumpPtrAllocatorImpl>::Deallocate;
@@ -310,6 +319,10 @@ private:
size_t AllocatedSlabSize = computeSlabSize(Slabs.size());
void *NewSlab = Allocator.Allocate(AllocatedSlabSize, 0);
+ // We own the new slab and don't want anyone reading anything other than
+ // pieces returned from this method. So poison the whole slab.
+ __asan_poison_memory_region(NewSlab, AllocatedSlabSize);
+
Slabs.push_back(NewSlab);
CurPtr = (char *)(NewSlab);
End = ((char *)NewSlab) + AllocatedSlabSize;
diff --git a/contrib/llvm/include/llvm/Support/BlockFrequency.h b/contrib/llvm/include/llvm/Support/BlockFrequency.h
index 4304a25..1b45cc5 100644
--- a/contrib/llvm/include/llvm/Support/BlockFrequency.h
+++ b/contrib/llvm/include/llvm/Support/BlockFrequency.h
@@ -14,12 +14,12 @@
#ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H
#define LLVM_SUPPORT_BLOCKFREQUENCY_H
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
class raw_ostream;
-class BranchProbability;
// This class represents Block Frequency as a 64-bit value.
class BlockFrequency {
@@ -37,34 +37,38 @@ public:
/// \brief Multiplies with a branch probability. The computation will never
/// overflow.
- BlockFrequency &operator*=(const BranchProbability &Prob);
- const BlockFrequency operator*(const BranchProbability &Prob) const;
+ BlockFrequency &operator*=(BranchProbability Prob);
+ BlockFrequency operator*(BranchProbability Prob) const;
/// \brief Divide by a non-zero branch probability using saturating
/// arithmetic.
- BlockFrequency &operator/=(const BranchProbability &Prob);
- BlockFrequency operator/(const BranchProbability &Prob) const;
+ BlockFrequency &operator/=(BranchProbability Prob);
+ BlockFrequency operator/(BranchProbability Prob) const;
/// \brief Adds another block frequency using saturating arithmetic.
- BlockFrequency &operator+=(const BlockFrequency &Freq);
- const BlockFrequency operator+(const BlockFrequency &Freq) const;
+ BlockFrequency &operator+=(BlockFrequency Freq);
+ BlockFrequency operator+(BlockFrequency Freq) const;
+
+ /// \brief Subtracts another block frequency using saturating arithmetic.
+ BlockFrequency &operator-=(BlockFrequency Freq);
+ BlockFrequency operator-(BlockFrequency Freq) const;
/// \brief Shift block frequency to the right by count digits saturating to 1.
BlockFrequency &operator>>=(const unsigned count);
- bool operator<(const BlockFrequency &RHS) const {
+ bool operator<(BlockFrequency RHS) const {
return Frequency < RHS.Frequency;
}
- bool operator<=(const BlockFrequency &RHS) const {
+ bool operator<=(BlockFrequency RHS) const {
return Frequency <= RHS.Frequency;
}
- bool operator>(const BlockFrequency &RHS) const {
+ bool operator>(BlockFrequency RHS) const {
return Frequency > RHS.Frequency;
}
- bool operator>=(const BlockFrequency &RHS) const {
+ bool operator>=(BlockFrequency RHS) const {
return Frequency >= RHS.Frequency;
}
};
diff --git a/contrib/llvm/include/llvm/Support/BranchProbability.h b/contrib/llvm/include/llvm/Support/BranchProbability.h
index a6429dd..26bc888 100644
--- a/contrib/llvm/include/llvm/Support/BranchProbability.h
+++ b/contrib/llvm/include/llvm/Support/BranchProbability.h
@@ -15,36 +15,59 @@
#define LLVM_SUPPORT_BRANCHPROBABILITY_H
#include "llvm/Support/DataTypes.h"
+#include <algorithm>
#include <cassert>
+#include <climits>
+#include <numeric>
namespace llvm {
class raw_ostream;
-// This class represents Branch Probability as a non-negative fraction.
+// This class represents Branch Probability as a non-negative fraction that is
+// no greater than 1. It uses a fixed-point-like implementation, in which the
+// denominator is always a constant value (here we use 1<<31 for maximum
+// precision).
class BranchProbability {
// Numerator
uint32_t N;
- // Denominator
- uint32_t D;
+ // Denominator, which is a constant value.
+ static const uint32_t D = 1u << 31;
+ static const uint32_t UnknownN = UINT32_MAX;
-public:
- BranchProbability(uint32_t n, uint32_t d) : N(n), D(d) {
- assert(d > 0 && "Denominator cannot be 0!");
- assert(n <= d && "Probability cannot be bigger than 1!");
- }
+ // Construct a BranchProbability with only numerator assuming the denominator
+ // is 1<<31. For internal use only.
+ explicit BranchProbability(uint32_t n) : N(n) {}
- static BranchProbability getZero() { return BranchProbability(0, 1); }
- static BranchProbability getOne() { return BranchProbability(1, 1); }
+public:
+ BranchProbability() : N(UnknownN) {}
+ BranchProbability(uint32_t Numerator, uint32_t Denominator);
+
+ bool isZero() const { return N == 0; }
+ bool isUnknown() const { return N == UnknownN; }
+
+ static BranchProbability getZero() { return BranchProbability(0); }
+ static BranchProbability getOne() { return BranchProbability(D); }
+ static BranchProbability getUnknown() { return BranchProbability(UnknownN); }
+ // Create a BranchProbability object with the given numerator and 1<<31
+ // as denominator.
+ static BranchProbability getRaw(uint32_t N) { return BranchProbability(N); }
+ // Create a BranchProbability object from 64-bit integers.
+ static BranchProbability getBranchProbability(uint64_t Numerator,
+ uint64_t Denominator);
+
+ // Normalize given probabilties so that the sum of them becomes approximate
+ // one.
+ template <class ProbabilityIter>
+ static void normalizeProbabilities(ProbabilityIter Begin,
+ ProbabilityIter End);
uint32_t getNumerator() const { return N; }
- uint32_t getDenominator() const { return D; }
+ static uint32_t getDenominator() { return D; }
// Return (1 - Probability).
- BranchProbability getCompl() const {
- return BranchProbability(D - N, D);
- }
+ BranchProbability getCompl() const { return BranchProbability(D - N); }
raw_ostream &print(raw_ostream &OS) const;
@@ -66,24 +89,131 @@ public:
/// \return \c Num divided by \c this.
uint64_t scaleByInverse(uint64_t Num) const;
- bool operator==(BranchProbability RHS) const {
- return (uint64_t)N * RHS.D == (uint64_t)D * RHS.N;
+ BranchProbability &operator+=(BranchProbability RHS) {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in arithmetics.");
+ // Saturate the result in case of overflow.
+ N = (uint64_t(N) + RHS.N > D) ? D : N + RHS.N;
+ return *this;
+ }
+
+ BranchProbability &operator-=(BranchProbability RHS) {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in arithmetics.");
+ // Saturate the result in case of underflow.
+ N = N < RHS.N ? 0 : N - RHS.N;
+ return *this;
+ }
+
+ BranchProbability &operator*=(BranchProbability RHS) {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in arithmetics.");
+ N = (static_cast<uint64_t>(N) * RHS.N + D / 2) / D;
+ return *this;
+ }
+
+ BranchProbability &operator/=(uint32_t RHS) {
+ assert(N != UnknownN &&
+ "Unknown probability cannot participate in arithmetics.");
+ assert(RHS > 0 && "The divider cannot be zero.");
+ N /= RHS;
+ return *this;
+ }
+
+ BranchProbability operator+(BranchProbability RHS) const {
+ BranchProbability Prob(*this);
+ return Prob += RHS;
+ }
+
+ BranchProbability operator-(BranchProbability RHS) const {
+ BranchProbability Prob(*this);
+ return Prob -= RHS;
+ }
+
+ BranchProbability operator*(BranchProbability RHS) const {
+ BranchProbability Prob(*this);
+ return Prob *= RHS;
}
- bool operator!=(BranchProbability RHS) const {
- return !(*this == RHS);
+
+ BranchProbability operator/(uint32_t RHS) const {
+ BranchProbability Prob(*this);
+ return Prob /= RHS;
}
+
+ bool operator==(BranchProbability RHS) const { return N == RHS.N; }
+ bool operator!=(BranchProbability RHS) const { return !(*this == RHS); }
+
bool operator<(BranchProbability RHS) const {
- return (uint64_t)N * RHS.D < (uint64_t)D * RHS.N;
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in comparisons.");
+ return N < RHS.N;
+ }
+
+ bool operator>(BranchProbability RHS) const {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in comparisons.");
+ return RHS < *this;
+ }
+
+ bool operator<=(BranchProbability RHS) const {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in comparisons.");
+ return !(RHS < *this);
+ }
+
+ bool operator>=(BranchProbability RHS) const {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in comparisons.");
+ return !(*this < RHS);
}
- bool operator>(BranchProbability RHS) const { return RHS < *this; }
- bool operator<=(BranchProbability RHS) const { return !(RHS < *this); }
- bool operator>=(BranchProbability RHS) const { return !(*this < RHS); }
};
-inline raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
+inline raw_ostream &operator<<(raw_ostream &OS, BranchProbability Prob) {
return Prob.print(OS);
}
+template <class ProbabilityIter>
+void BranchProbability::normalizeProbabilities(ProbabilityIter Begin,
+ ProbabilityIter End) {
+ if (Begin == End)
+ return;
+
+ unsigned UnknownProbCount = 0;
+ uint64_t Sum = std::accumulate(Begin, End, uint64_t(0),
+ [&](uint64_t S, const BranchProbability &BP) {
+ if (!BP.isUnknown())
+ return S + BP.N;
+ UnknownProbCount++;
+ return S;
+ });
+
+ if (UnknownProbCount > 0) {
+ BranchProbability ProbForUnknown = BranchProbability::getZero();
+ // If the sum of all known probabilities is less than one, evenly distribute
+ // the complement of sum to unknown probabilities. Otherwise, set unknown
+ // probabilities to zeros and continue to normalize known probabilities.
+ if (Sum < BranchProbability::getDenominator())
+ ProbForUnknown = BranchProbability::getRaw(
+ (BranchProbability::getDenominator() - Sum) / UnknownProbCount);
+
+ std::replace_if(Begin, End,
+ [](const BranchProbability &BP) { return BP.isUnknown(); },
+ ProbForUnknown);
+
+ if (Sum <= BranchProbability::getDenominator())
+ return;
+ }
+
+ if (Sum == 0) {
+ BranchProbability BP(1, std::distance(Begin, End));
+ std::fill(Begin, End, BP);
+ return;
+ }
+
+ for (auto I = Begin; I != End; ++I)
+ I->N = (I->N * uint64_t(D) + Sum / 2) / Sum;
+}
+
}
#endif
diff --git a/contrib/llvm/include/llvm/Support/CBindingWrapping.h b/contrib/llvm/include/llvm/Support/CBindingWrapping.h
index 786ba18..d4633aa 100644
--- a/contrib/llvm/include/llvm/Support/CBindingWrapping.h
+++ b/contrib/llvm/include/llvm/Support/CBindingWrapping.h
@@ -15,6 +15,7 @@
#define LLVM_SUPPORT_CBINDINGWRAPPING_H
#include "llvm/Support/Casting.h"
+#include "llvm-c/Types.h"
#define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref) \
inline ty *unwrap(ref P) { \
diff --git a/contrib/llvm/include/llvm/Support/COFF.h b/contrib/llvm/include/llvm/Support/COFF.h
index 3c5ee06..0162175 100644
--- a/contrib/llvm/include/llvm/Support/COFF.h
+++ b/contrib/llvm/include/llvm/Support/COFF.h
@@ -88,6 +88,7 @@ namespace COFF {
IMAGE_FILE_MACHINE_AMD64 = 0x8664,
IMAGE_FILE_MACHINE_ARM = 0x1C0,
IMAGE_FILE_MACHINE_ARMNT = 0x1C4,
+ IMAGE_FILE_MACHINE_ARM64 = 0xAA64,
IMAGE_FILE_MACHINE_EBC = 0xEBC,
IMAGE_FILE_MACHINE_I386 = 0x14C,
IMAGE_FILE_MACHINE_IA64 = 0x200,
@@ -247,6 +248,7 @@ namespace COFF {
enum SectionCharacteristics : uint32_t {
SC_Invalid = 0xffffffff,
+ IMAGE_SCN_TYPE_NOLOAD = 0x00000002,
IMAGE_SCN_TYPE_NO_PAD = 0x00000008,
IMAGE_SCN_CNT_CODE = 0x00000020,
IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040,
diff --git a/contrib/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm/include/llvm/Support/CommandLine.h
index 379d06a..943d2df 100644
--- a/contrib/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm/include/llvm/Support/CommandLine.h
@@ -33,7 +33,6 @@
namespace llvm {
-class BumpPtrStringSaver;
class StringSaver;
/// cl Namespace - This namespace contains all of the command line option
@@ -206,9 +205,9 @@ class Option {
unsigned AdditionalVals; // Greater than 0 for multi-valued option.
public:
- const char *ArgStr; // The argument string itself (ex: "help", "o")
- const char *HelpStr; // The descriptive text message for -help
- const char *ValueStr; // String describing what the value of this option is
+ StringRef ArgStr; // The argument string itself (ex: "help", "o")
+ StringRef HelpStr; // The descriptive text message for -help
+ StringRef ValueStr; // String describing what the value of this option is
OptionCategory *Category; // The Category this option belongs to
bool FullyInitialized; // Has addArguemnt been called?
@@ -229,14 +228,14 @@ public:
inline unsigned getNumAdditionalVals() const { return AdditionalVals; }
// hasArgStr - Return true if the argstr != ""
- bool hasArgStr() const { return ArgStr[0] != 0; }
+ bool hasArgStr() const { return !ArgStr.empty(); }
//-------------------------------------------------------------------------===
// Accessor functions set by OptionModifiers
//
- void setArgStr(const char *S);
- void setDescription(const char *S) { HelpStr = S; }
- void setValueStr(const char *S) { ValueStr = S; }
+ void setArgStr(StringRef S);
+ void setDescription(StringRef S) { HelpStr = S; }
+ void setValueStr(StringRef S) { ValueStr = S; }
void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) { Occurrences = Val; }
void setValueExpectedFlag(enum ValueExpected Val) { Value = Val; }
void setHiddenFlag(enum OptionHidden Val) { HiddenFlag = Val; }
@@ -276,7 +275,7 @@ public:
virtual void printOptionValue(size_t GlobalWidth, bool Force) const = 0;
- virtual void getExtraOptionNames(SmallVectorImpl<const char *> &) {}
+ virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
// addOccurrence - Wrapper around handleOccurrence that enforces Flags.
//
@@ -606,7 +605,7 @@ public:
void initialize() {}
- void getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) {
+ void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) {
// If there has been no argstr specified, that means that we need to add an
// argument for every possible option. This ensures that our options are
// vectored to us.
@@ -715,14 +714,14 @@ public:
//
class basic_parser_impl { // non-template implementation of basic_parser<t>
public:
- basic_parser_impl(Option &O) {}
+ basic_parser_impl(Option &) {}
enum ValueExpected getValueExpectedFlagDefault() const {
return ValueRequired;
}
- void getExtraOptionNames(SmallVectorImpl<const char *> &) {}
+ void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
void initialize() {}
@@ -1206,8 +1205,7 @@ class opt : public Option,
enum ValueExpected getValueExpectedFlagDefault() const override {
return Parser.getValueExpectedFlagDefault();
}
- void
- getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) override {
+ void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
return Parser.getExtraOptionNames(OptionNames);
}
@@ -1368,8 +1366,7 @@ class list : public Option, public list_storage<DataType, StorageClass> {
enum ValueExpected getValueExpectedFlagDefault() const override {
return Parser.getValueExpectedFlagDefault();
}
- void
- getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) override {
+ void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
return Parser.getExtraOptionNames(OptionNames);
}
@@ -1508,8 +1505,7 @@ class bits : public Option, public bits_storage<DataType, Storage> {
enum ValueExpected getValueExpectedFlagDefault() const override {
return Parser.getValueExpectedFlagDefault();
}
- void
- getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) override {
+ void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
return Parser.getExtraOptionNames(OptionNames);
}
diff --git a/contrib/llvm/include/llvm/Support/Compiler.h b/contrib/llvm/include/llvm/Support/Compiler.h
index 1416398..b3416bb 100644
--- a/contrib/llvm/include/llvm/Support/Compiler.h
+++ b/contrib/llvm/include/llvm/Support/Compiler.h
@@ -69,7 +69,7 @@
#if !defined(_MSC_VER) || defined(__clang__) || LLVM_MSC_PREREQ(1900)
#define LLVM_NOEXCEPT noexcept
#else
-#define LLVM_NOEXCEPT
+#define LLVM_NOEXCEPT throw()
#endif
/// \brief Does the compiler support ref-qualifiers for *this?
@@ -189,7 +189,7 @@
/// 3.4 supported this but is buggy in various cases and produces unimplemented
/// errors, just use it in GCC 4.0 and later.
#if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0)
-#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
#else
@@ -293,6 +293,34 @@
# define LLVM_ALIGNAS(x) alignas(x)
#endif
+/// \macro LLVM_PACKED
+/// \brief Used to specify a packed structure.
+/// LLVM_PACKED(
+/// struct A {
+/// int i;
+/// int j;
+/// int k;
+/// long long l;
+/// });
+///
+/// LLVM_PACKED_START
+/// struct B {
+/// int i;
+/// int j;
+/// int k;
+/// long long l;
+/// };
+/// LLVM_PACKED_END
+#ifdef _MSC_VER
+# define LLVM_PACKED(d) __pragma(pack(push, 1)) d __pragma(pack(pop))
+# define LLVM_PACKED_START __pragma(pack(push, 1))
+# define LLVM_PACKED_END __pragma(pack(pop))
+#else
+# define LLVM_PACKED(d) d __attribute__((packed))
+# define LLVM_PACKED_START _Pragma("pack(push, 1)")
+# define LLVM_PACKED_END _Pragma("pack(pop)")
+#endif
+
/// \macro LLVM_PTR_SIZE
/// \brief A constant integer equivalent to the value of sizeof(void*).
/// Generally used in combination with LLVM_ALIGNAS or when doing computation in
@@ -333,8 +361,50 @@
/// \brief Whether LLVM itself is built with AddressSanitizer instrumentation.
#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
# define LLVM_ADDRESS_SANITIZER_BUILD 1
+# include <sanitizer/asan_interface.h>
#else
# define LLVM_ADDRESS_SANITIZER_BUILD 0
+# define __asan_poison_memory_region(p, size)
+# define __asan_unpoison_memory_region(p, size)
+#endif
+
+/// \macro LLVM_THREAD_SANITIZER_BUILD
+/// \brief Whether LLVM itself is built with ThreadSanitizer instrumentation.
+#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__)
+# define LLVM_THREAD_SANITIZER_BUILD 1
+#else
+# define LLVM_THREAD_SANITIZER_BUILD 0
+#endif
+
+#if LLVM_THREAD_SANITIZER_BUILD
+// Thread Sanitizer is a tool that finds races in code.
+// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
+// tsan detects these exact functions by name.
+extern "C" {
+void AnnotateHappensAfter(const char *file, int line, const volatile void *cv);
+void AnnotateHappensBefore(const char *file, int line, const volatile void *cv);
+void AnnotateIgnoreWritesBegin(const char *file, int line);
+void AnnotateIgnoreWritesEnd(const char *file, int line);
+}
+
+// This marker is used to define a happens-before arc. The race detector will
+// infer an arc from the begin to the end when they share the same pointer
+// argument.
+# define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
+
+// This marker defines the destination of a happens-before arc.
+# define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
+
+// Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
+# define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+// Resume checking for racy writes.
+# define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+#else
+# define TsanHappensBefore(cv)
+# define TsanHappensAfter(cv)
+# define TsanIgnoreWritesBegin()
+# define TsanIgnoreWritesEnd()
#endif
/// \brief Mark debug helper function definitions like dump() that should not be
diff --git a/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h b/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h
index c08c3c1..1a1c743 100644
--- a/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -39,8 +39,6 @@ class CrashRecoveryContextCleanup;
///
/// ... no crash was detected ...
/// }
-///
-/// Crash recovery contexts may not be nested.
class CrashRecoveryContext {
void *Impl;
CrashRecoveryContextCleanup *head;
@@ -109,10 +107,11 @@ class CrashRecoveryContextCleanup {
protected:
CrashRecoveryContext *context;
CrashRecoveryContextCleanup(CrashRecoveryContext *context)
- : context(context), cleanupFired(false) {}
+ : context(context), cleanupFired(false) {}
+
public:
bool cleanupFired;
-
+
virtual ~CrashRecoveryContextCleanup();
virtual void recoverResources() = 0;
@@ -129,15 +128,16 @@ template<typename DERIVED, typename T>
class CrashRecoveryContextCleanupBase : public CrashRecoveryContextCleanup {
protected:
T *resource;
- CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T* resource)
- : CrashRecoveryContextCleanup(context), resource(resource) {}
+ CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T *resource)
+ : CrashRecoveryContextCleanup(context), resource(resource) {}
+
public:
static DERIVED *create(T *x) {
if (x) {
if (CrashRecoveryContext *context = CrashRecoveryContext::GetCurrent())
return new DERIVED(context, x);
}
- return 0;
+ return nullptr;
}
};
@@ -146,9 +146,9 @@ class CrashRecoveryContextDestructorCleanup : public
CrashRecoveryContextCleanupBase<CrashRecoveryContextDestructorCleanup<T>, T> {
public:
CrashRecoveryContextDestructorCleanup(CrashRecoveryContext *context,
- T *resource)
- : CrashRecoveryContextCleanupBase<
- CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {}
+ T *resource)
+ : CrashRecoveryContextCleanupBase<
+ CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {}
virtual void recoverResources() {
this->resource->~T();
@@ -171,7 +171,7 @@ class CrashRecoveryContextReleaseRefCleanup : public
CrashRecoveryContextCleanupBase<CrashRecoveryContextReleaseRefCleanup<T>, T>
{
public:
- CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context,
+ CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context,
T *resource)
: CrashRecoveryContextCleanupBase<CrashRecoveryContextReleaseRefCleanup<T>,
T>(context, resource) {}
@@ -182,6 +182,7 @@ public:
template <typename T, typename Cleanup = CrashRecoveryContextDeleteCleanup<T> >
class CrashRecoveryContextCleanupRegistrar {
CrashRecoveryContextCleanup *cleanup;
+
public:
CrashRecoveryContextCleanupRegistrar(T *x)
: cleanup(Cleanup::create(x)) {
@@ -189,16 +190,14 @@ public:
cleanup->getContext()->registerCleanup(cleanup);
}
- ~CrashRecoveryContextCleanupRegistrar() {
- unregister();
- }
-
+ ~CrashRecoveryContextCleanupRegistrar() { unregister(); }
+
void unregister() {
if (cleanup && !cleanup->cleanupFired)
cleanup->getContext()->unregisterCleanup(cleanup);
- cleanup = 0;
+ cleanup = nullptr;
}
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
diff --git a/contrib/llvm/include/llvm/Support/DOTGraphTraits.h b/contrib/llvm/include/llvm/Support/DOTGraphTraits.h
index 95e37c0..4381b5b 100644
--- a/contrib/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/contrib/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -72,11 +72,12 @@ public:
return "";
}
- /// hasNodeAddressLabel - If this method returns true, the address of the node
- /// is added to the label of the node.
- template<typename GraphType>
- static bool hasNodeAddressLabel(const void *, const GraphType &) {
- return false;
+ // getNodeIdentifierLabel - Returns a string representing the
+ // address or other unique identifier of the node. (Only used if
+ // non-empty.)
+ template <typename GraphType>
+ static std::string getNodeIdentifierLabel(const void *, const GraphType &) {
+ return "";
}
template<typename GraphType>
diff --git a/contrib/llvm/include/llvm/Support/Debug.h b/contrib/llvm/include/llvm/Support/Debug.h
index fff4f98..6e21347 100644
--- a/contrib/llvm/include/llvm/Support/Debug.h
+++ b/contrib/llvm/include/llvm/Support/Debug.h
@@ -13,7 +13,7 @@
//
// In particular, just wrap your code with the DEBUG() macro, and it will be
// enabled automatically if you specify '-debug' on the command-line.
-// Alternatively, you can also define the DEBUG_TYPE macro to "foo" specify
+// DEBUG() requires the DEBUG_TYPE macro to be defined. Set it to "foo" specify
// that your debug code belongs to class "foo". Be careful that you only do
// this after including Debug.h and not around any #include of headers. Headers
// should define and undef the macro acround the code that needs to use the
diff --git a/contrib/llvm/include/llvm/Support/Dwarf.def b/contrib/llvm/include/llvm/Support/Dwarf.def
index 4b923b8..b15070b 100644
--- a/contrib/llvm/include/llvm/Support/Dwarf.def
+++ b/contrib/llvm/include/llvm/Support/Dwarf.def
@@ -99,10 +99,6 @@ HANDLE_DW_TAG(0x0041, type_unit)
HANDLE_DW_TAG(0x0042, rvalue_reference_type)
HANDLE_DW_TAG(0x0043, template_alias)
-// Mock tags we use as discriminators.
-HANDLE_DW_TAG(0x0100, auto_variable) // Tag for local (auto) variables.
-HANDLE_DW_TAG(0x0101, arg_variable) // Tag for argument variables.
-
// New in DWARF v5.
HANDLE_DW_TAG(0x0044, coarray_type)
HANDLE_DW_TAG(0x0045, generic_subrange)
@@ -117,6 +113,11 @@ HANDLE_DW_TAG(0x4106, GNU_template_template_param)
HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack)
HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack)
HANDLE_DW_TAG(0x4200, APPLE_property)
+HANDLE_DW_TAG(0xb000, BORLAND_property)
+HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string)
+HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array)
+HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set)
+HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant)
HANDLE_DW_OP(0x03, addr)
HANDLE_DW_OP(0x06, deref)
@@ -319,6 +320,7 @@ HANDLE_DW_LANG(0x0021, C_plus_plus_14)
HANDLE_DW_LANG(0x0022, Fortran03)
HANDLE_DW_LANG(0x0023, Fortran08)
HANDLE_DW_LANG(0x8001, Mips_Assembler)
+HANDLE_DW_LANG(0xb000, BORLAND_Delphi)
// DWARF attribute type encodings.
HANDLE_DW_ATE(0x01, address)
diff --git a/contrib/llvm/include/llvm/Support/Dwarf.h b/contrib/llvm/include/llvm/Support/Dwarf.h
index 17e9c15..cea61bd 100644
--- a/contrib/llvm/include/llvm/Support/Dwarf.h
+++ b/contrib/llvm/include/llvm/Support/Dwarf.h
@@ -40,6 +40,7 @@ enum LLVMConstants : uint32_t {
// LLVM mock tags (see also llvm/Support/Dwarf.def).
DW_TAG_invalid = ~0U, // Tag for invalid results.
DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results.
+ DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results.
// Other constants.
DWARF_VERSION = 4, // Default dwarf version we output.
@@ -195,6 +196,7 @@ enum Attribute : uint16_t {
DW_AT_dwo_name = 0x76,
DW_AT_reference = 0x77,
DW_AT_rvalue_reference = 0x78,
+ DW_AT_macros = 0x79,
DW_AT_lo_user = 0x2000,
DW_AT_hi_user = 0x3fff,
@@ -230,6 +232,7 @@ enum Attribute : uint16_t {
DW_AT_GNU_template_name = 0x2110,
DW_AT_GNU_odr_signature = 0x210f,
+ DW_AT_GNU_macros = 0x2119,
// Extensions for Fission proposal.
DW_AT_GNU_dwo_name = 0x2130,
@@ -238,6 +241,26 @@ enum Attribute : uint16_t {
DW_AT_GNU_addr_base = 0x2133,
DW_AT_GNU_pubnames = 0x2134,
DW_AT_GNU_pubtypes = 0x2135,
+ DW_AT_GNU_discriminator = 0x2136,
+
+ // Borland extensions.
+ DW_AT_BORLAND_property_read = 0x3b11,
+ DW_AT_BORLAND_property_write = 0x3b12,
+ DW_AT_BORLAND_property_implements = 0x3b13,
+ DW_AT_BORLAND_property_index = 0x3b14,
+ DW_AT_BORLAND_property_default = 0x3b15,
+ DW_AT_BORLAND_Delphi_unit = 0x3b20,
+ DW_AT_BORLAND_Delphi_class = 0x3b21,
+ DW_AT_BORLAND_Delphi_record = 0x3b22,
+ DW_AT_BORLAND_Delphi_metaclass = 0x3b23,
+ DW_AT_BORLAND_Delphi_constructor = 0x3b24,
+ DW_AT_BORLAND_Delphi_destructor = 0x3b25,
+ DW_AT_BORLAND_Delphi_anonymous_method = 0x3b26,
+ DW_AT_BORLAND_Delphi_interface = 0x3b27,
+ DW_AT_BORLAND_Delphi_ABI = 0x3b28,
+ DW_AT_BORLAND_Delphi_return = 0x3b29,
+ DW_AT_BORLAND_Delphi_frameptr = 0x3b30,
+ DW_AT_BORLAND_closure = 0x3b31,
// LLVM project extensions.
DW_AT_LLVM_include_path = 0x3e00,
@@ -370,6 +393,14 @@ enum CallingConvention {
DW_CC_program = 0x02,
DW_CC_nocall = 0x03,
DW_CC_lo_user = 0x40,
+ DW_CC_GNU_borland_fastcall_i386 = 0x41,
+ DW_CC_BORLAND_safecall = 0xb0,
+ DW_CC_BORLAND_stdcall = 0xb1,
+ DW_CC_BORLAND_pascal = 0xb2,
+ DW_CC_BORLAND_msfastcall = 0xb3,
+ DW_CC_BORLAND_msreturn = 0xb4,
+ DW_CC_BORLAND_thiscall = 0xb5,
+ DW_CC_BORLAND_fastcall = 0xb6,
DW_CC_hi_user = 0xff
};
@@ -429,6 +460,24 @@ enum MacinfoRecordType {
DW_MACINFO_vendor_ext = 0xff
};
+enum MacroEntryType {
+ // Macro Information Entry Type Encodings
+ DW_MACRO_define = 0x01,
+ DW_MACRO_undef = 0x02,
+ DW_MACRO_start_file = 0x03,
+ DW_MACRO_end_file = 0x04,
+ DW_MACRO_define_indirect = 0x05,
+ DW_MACRO_undef_indirect = 0x06,
+ DW_MACRO_transparent_include = 0x07,
+ DW_MACRO_define_indirect_sup = 0x08,
+ DW_MACRO_undef_indirect_sup = 0x09,
+ DW_MACRO_transparent_include_sup = 0x0a,
+ DW_MACRO_define_indirectx = 0x0b,
+ DW_MACRO_undef_indirectx = 0x0c,
+ DW_MACRO_lo_user = 0xe0,
+ DW_MACRO_hi_user = 0xff
+};
+
enum CallFrameInfo {
// Call frame instruction encodings
DW_CFA_extended = 0x00,
@@ -596,6 +645,7 @@ const char *GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage);
///
/// \li \a getTag() returns \a DW_TAG_invalid on invalid input.
/// \li \a getVirtuality() returns \a DW_VIRTUALITY_invalid on invalid input.
+/// \li \a getMacinfo() returns \a DW_MACINFO_invalid on invalid input.
///
/// @{
unsigned getTag(StringRef TagString);
@@ -603,6 +653,7 @@ unsigned getOperationEncoding(StringRef OperationEncodingString);
unsigned getVirtuality(StringRef VirtualityString);
unsigned getLanguage(StringRef LanguageString);
unsigned getAttributeEncoding(StringRef EncodingString);
+unsigned getMacinfo(StringRef MacinfoString);
/// @}
/// \brief Returns the symbolic string representing Val when used as a value
@@ -610,7 +661,7 @@ unsigned getAttributeEncoding(StringRef EncodingString);
const char *AttributeValueString(uint16_t Attr, unsigned Val);
/// \brief Decsribes an entry of the various gnu_pub* debug sections.
-///
+///
/// The gnu_pub* kind looks like:
///
/// 0-3 reserved
@@ -642,7 +693,6 @@ private:
};
};
-
} // End of namespace dwarf
} // End of namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/ELF.h b/contrib/llvm/include/llvm/Support/ELF.h
index 94a4bfb..97708a7 100644
--- a/contrib/llvm/include/llvm/Support/ELF.h
+++ b/contrib/llvm/include/llvm/Support/ELF.h
@@ -429,6 +429,33 @@ enum {
#include "ELFRelocs/ARM.def"
};
+// AVR specific e_flags
+enum : unsigned {
+ EF_AVR_ARCH_AVR1 = 1,
+ EF_AVR_ARCH_AVR2 = 2,
+ EF_AVR_ARCH_AVR25 = 25,
+ EF_AVR_ARCH_AVR3 = 3,
+ EF_AVR_ARCH_AVR31 = 31,
+ EF_AVR_ARCH_AVR35 = 35,
+ EF_AVR_ARCH_AVR4 = 4,
+ EF_AVR_ARCH_AVR5 = 5,
+ EF_AVR_ARCH_AVR51 = 51,
+ EF_AVR_ARCH_AVR6 = 6,
+ EF_AVR_ARCH_AVRTINY = 100,
+ EF_AVR_ARCH_XMEGA1 = 101,
+ EF_AVR_ARCH_XMEGA2 = 102,
+ EF_AVR_ARCH_XMEGA3 = 103,
+ EF_AVR_ARCH_XMEGA4 = 104,
+ EF_AVR_ARCH_XMEGA5 = 105,
+ EF_AVR_ARCH_XMEGA6 = 106,
+ EF_AVR_ARCH_XMEGA7 = 107
+};
+
+// ELF Relocation types for AVR
+enum {
+#include "ELFRelocs/AVR.def"
+};
+
// Mips Specific e_flags
enum : unsigned {
EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions
@@ -522,26 +549,28 @@ enum {
ODK_PAGESIZE = 11 // Page size information
};
-// Hexagon Specific e_flags
-// Release 5 ABI
+// Hexagon-specific e_flags
enum {
- // Object processor version flags, bits[3:0]
+ // Object processor version flags, bits[11:0]
EF_HEXAGON_MACH_V2 = 0x00000001, // Hexagon V2
EF_HEXAGON_MACH_V3 = 0x00000002, // Hexagon V3
EF_HEXAGON_MACH_V4 = 0x00000003, // Hexagon V4
EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5
+ EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55
+ EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60
// Highest ISA version flags
- EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[3:0]
+ EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0]
// of e_flags
EF_HEXAGON_ISA_V2 = 0x00000010, // Hexagon V2 ISA
EF_HEXAGON_ISA_V3 = 0x00000020, // Hexagon V3 ISA
EF_HEXAGON_ISA_V4 = 0x00000030, // Hexagon V4 ISA
- EF_HEXAGON_ISA_V5 = 0x00000040 // Hexagon V5 ISA
+ EF_HEXAGON_ISA_V5 = 0x00000040, // Hexagon V5 ISA
+ EF_HEXAGON_ISA_V55 = 0x00000050, // Hexagon V55 ISA
+ EF_HEXAGON_ISA_V60 = 0x00000060, // Hexagon V60 ISA
};
-// Hexagon specific Section indexes for common small data
-// Release 5 ABI
+// Hexagon-specific section indexes for common small data
enum {
SHN_HEXAGON_SCOMMON = 0xff00, // Other access sizes
SHN_HEXAGON_SCOMMON_1 = 0xff01, // Byte-sized access
@@ -747,7 +776,12 @@ enum : unsigned {
SHF_MIPS_ADDR = 0x40000000,
// Section data is string data by default.
- SHF_MIPS_STRING = 0x80000000
+ SHF_MIPS_STRING = 0x80000000,
+
+ SHF_AMDGPU_HSA_GLOBAL = 0x00100000,
+ SHF_AMDGPU_HSA_READONLY = 0x00200000,
+ SHF_AMDGPU_HSA_CODE = 0x00400000,
+ SHF_AMDGPU_HSA_AGENT = 0x00800000
};
// Section Group Flags
@@ -828,7 +862,12 @@ enum {
STT_LOOS = 10, // Lowest operating system-specific symbol type
STT_HIOS = 12, // Highest operating system-specific symbol type
STT_LOPROC = 13, // Lowest processor-specific symbol type
- STT_HIPROC = 15 // Highest processor-specific symbol type
+ STT_HIPROC = 15, // Highest processor-specific symbol type
+
+ // AMDGPU symbol types
+ STT_AMDGPU_HSA_KERNEL = 10,
+ STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11,
+ STT_AMDGPU_HSA_METADATA = 12
};
enum {
@@ -979,7 +1018,13 @@ enum {
PT_MIPS_REGINFO = 0x70000000, // Register usage information.
PT_MIPS_RTPROC = 0x70000001, // Runtime procedure table.
PT_MIPS_OPTIONS = 0x70000002, // Options segment.
- PT_MIPS_ABIFLAGS = 0x70000003 // Abiflags segment.
+ PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment.
+
+ // AMDGPU program header types.
+ PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000,
+ PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001,
+ PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002,
+ PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003
};
// Segment flag bits.
@@ -1139,8 +1184,10 @@ enum {
DT_MIPS_GP_VALUE = 0x70000030, // GP value for auxiliary GOTs.
DT_MIPS_AUX_DYNAMIC = 0x70000031, // Address of auxiliary .dynamic.
DT_MIPS_PLTGOT = 0x70000032, // Address of the base of the PLTGOT.
- DT_MIPS_RWPLT = 0x70000034 // Points to the base
+ DT_MIPS_RWPLT = 0x70000034, // Points to the base
// of a writable PLT.
+ DT_MIPS_RLD_MAP_REL = 0x70000035 // Relative offset of run time loader
+ // map, used for debugging.
};
// DT_FLAGS values.
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/AVR.def b/contrib/llvm/include/llvm/Support/ELFRelocs/AVR.def
new file mode 100644
index 0000000..5692d6c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/AVR.def
@@ -0,0 +1,40 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_AVR_NONE, 0)
+ELF_RELOC(R_AVR_32, 1)
+ELF_RELOC(R_AVR_7_PCREL, 2)
+ELF_RELOC(R_AVR_13_PCREL, 3)
+ELF_RELOC(R_AVR_16, 4)
+ELF_RELOC(R_AVR_16_PM, 5)
+ELF_RELOC(R_AVR_LO8_LDI, 6)
+ELF_RELOC(R_AVR_HI8_LDI, 7)
+ELF_RELOC(R_AVR_HH8_LDI, 8)
+ELF_RELOC(R_AVR_LO8_LDI_NEG, 9)
+ELF_RELOC(R_AVR_HI8_LDI_NEG, 10)
+ELF_RELOC(R_AVR_HH8_LDI_NEG, 11)
+ELF_RELOC(R_AVR_LO8_LDI_PM, 12)
+ELF_RELOC(R_AVR_HI8_LDI_PM, 13)
+ELF_RELOC(R_AVR_HH8_LDI_PM, 14)
+ELF_RELOC(R_AVR_LO8_LDI_PM_NEG, 15)
+ELF_RELOC(R_AVR_HI8_LDI_PM_NEG, 16)
+ELF_RELOC(R_AVR_HH8_LDI_PM_NEG, 17)
+ELF_RELOC(R_AVR_CALL, 18)
+ELF_RELOC(R_AVR_LDI, 19)
+ELF_RELOC(R_AVR_6, 20)
+ELF_RELOC(R_AVR_6_ADIW, 21)
+ELF_RELOC(R_AVR_MS8_LDI, 22)
+ELF_RELOC(R_AVR_MS8_LDI_NEG, 23)
+ELF_RELOC(R_AVR_LO8_LDI_GS, 24)
+ELF_RELOC(R_AVR_HI8_LDI_GS, 25)
+ELF_RELOC(R_AVR_8, 26)
+ELF_RELOC(R_AVR_8_LO8, 27)
+ELF_RELOC(R_AVR_8_HI8, 28)
+ELF_RELOC(R_AVR_8_HLO8, 29)
+ELF_RELOC(R_AVR_SYM_DIFF, 30)
+ELF_RELOC(R_AVR_16_LDST, 31)
+ELF_RELOC(R_AVR_LDS_STS_16, 33)
+ELF_RELOC(R_AVR_PORT6, 34)
+ELF_RELOC(R_AVR_PORT5, 35)
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC.def b/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC.def
index b6c3941..e4f8ee0 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC.def
@@ -3,6 +3,68 @@
#error "ELF_RELOC must be defined"
#endif
+// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the
+// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc.
+// to their corresponding integer values. As a result, we need to undef them
+// here before continuing.
+
+#undef R_PPC_NONE
+#undef R_PPC_ADDR32
+#undef R_PPC_ADDR24
+#undef R_PPC_ADDR16
+#undef R_PPC_ADDR16_LO
+#undef R_PPC_ADDR16_HI
+#undef R_PPC_ADDR16_HA
+#undef R_PPC_ADDR14
+#undef R_PPC_ADDR14_BRTAKEN
+#undef R_PPC_ADDR14_BRNTAKEN
+#undef R_PPC_REL24
+#undef R_PPC_REL14
+#undef R_PPC_REL14_BRTAKEN
+#undef R_PPC_REL14_BRNTAKEN
+#undef R_PPC_GOT16
+#undef R_PPC_GOT16_LO
+#undef R_PPC_GOT16_HI
+#undef R_PPC_GOT16_HA
+#undef R_PPC_PLTREL24
+#undef R_PPC_JMP_SLOT
+#undef R_PPC_LOCAL24PC
+#undef R_PPC_REL32
+#undef R_PPC_TLS
+#undef R_PPC_DTPMOD32
+#undef R_PPC_TPREL16
+#undef R_PPC_TPREL16_LO
+#undef R_PPC_TPREL16_HI
+#undef R_PPC_TPREL16_HA
+#undef R_PPC_TPREL32
+#undef R_PPC_DTPREL16
+#undef R_PPC_DTPREL16_LO
+#undef R_PPC_DTPREL16_HI
+#undef R_PPC_DTPREL16_HA
+#undef R_PPC_DTPREL32
+#undef R_PPC_GOT_TLSGD16
+#undef R_PPC_GOT_TLSGD16_LO
+#undef R_PPC_GOT_TLSGD16_HI
+#undef R_PPC_GOT_TLSGD16_HA
+#undef R_PPC_GOT_TLSLD16
+#undef R_PPC_GOT_TLSLD16_LO
+#undef R_PPC_GOT_TLSLD16_HI
+#undef R_PPC_GOT_TLSLD16_HA
+#undef R_PPC_GOT_TPREL16
+#undef R_PPC_GOT_TPREL16_LO
+#undef R_PPC_GOT_TPREL16_HI
+#undef R_PPC_GOT_TPREL16_HA
+#undef R_PPC_GOT_DTPREL16
+#undef R_PPC_GOT_DTPREL16_LO
+#undef R_PPC_GOT_DTPREL16_HI
+#undef R_PPC_GOT_DTPREL16_HA
+#undef R_PPC_TLSGD
+#undef R_PPC_TLSLD
+#undef R_PPC_REL16
+#undef R_PPC_REL16_LO
+#undef R_PPC_REL16_HI
+#undef R_PPC_REL16_HA
+
ELF_RELOC(R_PPC_NONE, 0) /* No relocation. */
ELF_RELOC(R_PPC_ADDR32, 1)
ELF_RELOC(R_PPC_ADDR24, 2)
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC64.def b/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC64.def
index 7b2a3cb..3a47c5a 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC64.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/PowerPC64.def
@@ -3,6 +3,97 @@
#error "ELF_RELOC must be defined"
#endif
+// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the
+// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc.
+// to their corresponding integer values. As a result, we need to undef them
+// here before continuing.
+
+#undef R_PPC64_NONE
+#undef R_PPC64_ADDR32
+#undef R_PPC64_ADDR24
+#undef R_PPC64_ADDR16
+#undef R_PPC64_ADDR16_LO
+#undef R_PPC64_ADDR16_HI
+#undef R_PPC64_ADDR16_HA
+#undef R_PPC64_ADDR14
+#undef R_PPC64_ADDR14_BRTAKEN
+#undef R_PPC64_ADDR14_BRNTAKEN
+#undef R_PPC64_REL24
+#undef R_PPC64_REL14
+#undef R_PPC64_REL14_BRTAKEN
+#undef R_PPC64_REL14_BRNTAKEN
+#undef R_PPC64_GOT16
+#undef R_PPC64_GOT16_LO
+#undef R_PPC64_GOT16_HI
+#undef R_PPC64_GOT16_HA
+#undef R_PPC64_GLOB_DAT
+#undef R_PPC64_JMP_SLOT
+#undef R_PPC64_RELATIVE
+#undef R_PPC64_REL32
+#undef R_PPC64_ADDR64
+#undef R_PPC64_ADDR16_HIGHER
+#undef R_PPC64_ADDR16_HIGHERA
+#undef R_PPC64_ADDR16_HIGHEST
+#undef R_PPC64_ADDR16_HIGHESTA
+#undef R_PPC64_REL64
+#undef R_PPC64_TOC16
+#undef R_PPC64_TOC16_LO
+#undef R_PPC64_TOC16_HI
+#undef R_PPC64_TOC16_HA
+#undef R_PPC64_TOC
+#undef R_PPC64_ADDR16_DS
+#undef R_PPC64_ADDR16_LO_DS
+#undef R_PPC64_GOT16_DS
+#undef R_PPC64_GOT16_LO_DS
+#undef R_PPC64_TOC16_DS
+#undef R_PPC64_TOC16_LO_DS
+#undef R_PPC64_TLS
+#undef R_PPC64_DTPMOD64
+#undef R_PPC64_TPREL16
+#undef R_PPC64_TPREL16_LO
+#undef R_PPC64_TPREL16_HI
+#undef R_PPC64_TPREL16_HA
+#undef R_PPC64_TPREL64
+#undef R_PPC64_DTPREL16
+#undef R_PPC64_DTPREL16_LO
+#undef R_PPC64_DTPREL16_HI
+#undef R_PPC64_DTPREL16_HA
+#undef R_PPC64_DTPREL64
+#undef R_PPC64_GOT_TLSGD16
+#undef R_PPC64_GOT_TLSGD16_LO
+#undef R_PPC64_GOT_TLSGD16_HI
+#undef R_PPC64_GOT_TLSGD16_HA
+#undef R_PPC64_GOT_TLSLD16
+#undef R_PPC64_GOT_TLSLD16_LO
+#undef R_PPC64_GOT_TLSLD16_HI
+#undef R_PPC64_GOT_TLSLD16_HA
+#undef R_PPC64_GOT_TPREL16_DS
+#undef R_PPC64_GOT_TPREL16_LO_DS
+#undef R_PPC64_GOT_TPREL16_HI
+#undef R_PPC64_GOT_TPREL16_HA
+#undef R_PPC64_GOT_DTPREL16_DS
+#undef R_PPC64_GOT_DTPREL16_LO_DS
+#undef R_PPC64_GOT_DTPREL16_HI
+#undef R_PPC64_GOT_DTPREL16_HA
+#undef R_PPC64_TPREL16_DS
+#undef R_PPC64_TPREL16_LO_DS
+#undef R_PPC64_TPREL16_HIGHER
+#undef R_PPC64_TPREL16_HIGHERA
+#undef R_PPC64_TPREL16_HIGHEST
+#undef R_PPC64_TPREL16_HIGHESTA
+#undef R_PPC64_DTPREL16_DS
+#undef R_PPC64_DTPREL16_LO_DS
+#undef R_PPC64_DTPREL16_HIGHER
+#undef R_PPC64_DTPREL16_HIGHERA
+#undef R_PPC64_DTPREL16_HIGHEST
+#undef R_PPC64_DTPREL16_HIGHESTA
+#undef R_PPC64_TLSGD
+#undef R_PPC64_TLSLD
+#undef R_PPC64_REL16
+#undef R_PPC64_REL16_LO
+#undef R_PPC64_REL16_HI
+#undef R_PPC64_REL16_HA
+
ELF_RELOC(R_PPC64_NONE, 0)
ELF_RELOC(R_PPC64_ADDR32, 1)
ELF_RELOC(R_PPC64_ADDR24, 2)
@@ -21,7 +112,9 @@ ELF_RELOC(R_PPC64_GOT16, 14)
ELF_RELOC(R_PPC64_GOT16_LO, 15)
ELF_RELOC(R_PPC64_GOT16_HI, 16)
ELF_RELOC(R_PPC64_GOT16_HA, 17)
+ELF_RELOC(R_PPC64_GLOB_DAT, 20)
ELF_RELOC(R_PPC64_JMP_SLOT, 21)
+ELF_RELOC(R_PPC64_RELATIVE, 22)
ELF_RELOC(R_PPC64_REL32, 26)
ELF_RELOC(R_PPC64_ADDR64, 38)
ELF_RELOC(R_PPC64_ADDR16_HIGHER, 39)
diff --git a/contrib/llvm/include/llvm/Support/Endian.h b/contrib/llvm/include/llvm/Support/Endian.h
index fd59009..bc93c9a 100644
--- a/contrib/llvm/include/llvm/Support/Endian.h
+++ b/contrib/llvm/include/llvm/Support/Endian.h
@@ -77,6 +77,95 @@ inline void write(void *memory, value_type value) {
&value,
sizeof(value_type));
}
+
+template <typename value_type>
+using make_unsigned_t = typename std::make_unsigned<value_type>::type;
+
+/// Read a value of a particular endianness from memory, for a location
+/// that starts at the given bit offset within the first byte.
+template <typename value_type, endianness endian, std::size_t alignment>
+inline value_type readAtBitAlignment(const void *memory, uint64_t startBit) {
+ assert(startBit < 8);
+ if (startBit == 0)
+ return read<value_type, endian, alignment>(memory);
+ else {
+ // Read two values and compose the result from them.
+ value_type val[2];
+ memcpy(&val[0],
+ LLVM_ASSUME_ALIGNED(
+ memory, (detail::PickAlignment<value_type, alignment>::value)),
+ sizeof(value_type) * 2);
+ val[0] = byte_swap<value_type, endian>(val[0]);
+ val[1] = byte_swap<value_type, endian>(val[1]);
+
+ // Shift bits from the lower value into place.
+ make_unsigned_t<value_type> lowerVal = val[0] >> startBit;
+ // Mask off upper bits after right shift in case of signed type.
+ make_unsigned_t<value_type> numBitsFirstVal =
+ (sizeof(value_type) * 8) - startBit;
+ lowerVal &= ((make_unsigned_t<value_type>)1 << numBitsFirstVal) - 1;
+
+ // Get the bits from the upper value.
+ make_unsigned_t<value_type> upperVal =
+ val[1] & (((make_unsigned_t<value_type>)1 << startBit) - 1);
+ // Shift them in to place.
+ upperVal <<= numBitsFirstVal;
+
+ return lowerVal | upperVal;
+ }
+}
+
+/// Write a value to memory with a particular endianness, for a location
+/// that starts at the given bit offset within the first byte.
+template <typename value_type, endianness endian, std::size_t alignment>
+inline void writeAtBitAlignment(void *memory, value_type value,
+ uint64_t startBit) {
+ assert(startBit < 8);
+ if (startBit == 0)
+ write<value_type, endian, alignment>(memory, value);
+ else {
+ // Read two values and shift the result into them.
+ value_type val[2];
+ memcpy(&val[0],
+ LLVM_ASSUME_ALIGNED(
+ memory, (detail::PickAlignment<value_type, alignment>::value)),
+ sizeof(value_type) * 2);
+ val[0] = byte_swap<value_type, endian>(val[0]);
+ val[1] = byte_swap<value_type, endian>(val[1]);
+
+ // Mask off any existing bits in the upper part of the lower value that
+ // we want to replace.
+ val[0] &= ((make_unsigned_t<value_type>)1 << startBit) - 1;
+ make_unsigned_t<value_type> numBitsFirstVal =
+ (sizeof(value_type) * 8) - startBit;
+ make_unsigned_t<value_type> lowerVal = value;
+ if (startBit > 0) {
+ // Mask off the upper bits in the new value that are not going to go into
+ // the lower value. This avoids a left shift of a negative value, which
+ // is undefined behavior.
+ lowerVal &= (((make_unsigned_t<value_type>)1 << numBitsFirstVal) - 1);
+ // Now shift the new bits into place
+ lowerVal <<= startBit;
+ }
+ val[0] |= lowerVal;
+
+ // Mask off any existing bits in the lower part of the upper value that
+ // we want to replace.
+ val[1] &= ~(((make_unsigned_t<value_type>)1 << startBit) - 1);
+ // Next shift the bits that go into the upper value into position.
+ make_unsigned_t<value_type> upperVal = value >> numBitsFirstVal;
+ // Mask off upper bits after right shift in case of signed type.
+ upperVal &= ((make_unsigned_t<value_type>)1 << startBit) - 1;
+ val[1] |= upperVal;
+
+ // Finally, rewrite values.
+ val[0] = byte_swap<value_type, endian>(val[0]);
+ val[1] = byte_swap<value_type, endian>(val[1]);
+ memcpy(LLVM_ASSUME_ALIGNED(
+ memory, (detail::PickAlignment<value_type, alignment>::value)),
+ &val[0], sizeof(value_type) * 2);
+ }
+}
} // end namespace endian
namespace detail {
@@ -208,19 +297,47 @@ typedef detail::packed_endian_specific_integral
<int64_t, native, unaligned> unaligned_int64_t;
namespace endian {
-inline uint16_t read16le(const void *p) { return *(const ulittle16_t *)p; }
-inline uint32_t read32le(const void *p) { return *(const ulittle32_t *)p; }
-inline uint64_t read64le(const void *p) { return *(const ulittle64_t *)p; }
-inline uint16_t read16be(const void *p) { return *(const ubig16_t *)p; }
-inline uint32_t read32be(const void *p) { return *(const ubig32_t *)p; }
-inline uint64_t read64be(const void *p) { return *(const ubig64_t *)p; }
-
-inline void write16le(void *p, uint16_t v) { *(ulittle16_t *)p = v; }
-inline void write32le(void *p, uint32_t v) { *(ulittle32_t *)p = v; }
-inline void write64le(void *p, uint64_t v) { *(ulittle64_t *)p = v; }
-inline void write16be(void *p, uint16_t v) { *(ubig16_t *)p = v; }
-inline void write32be(void *p, uint32_t v) { *(ubig32_t *)p = v; }
-inline void write64be(void *p, uint64_t v) { *(ubig64_t *)p = v; }
+template <typename T, endianness E> inline T read(const void *P) {
+ return *(const detail::packed_endian_specific_integral<T, E, unaligned> *)P;
+}
+
+template <endianness E> inline uint16_t read16(const void *P) {
+ return read<uint16_t, E>(P);
+}
+template <endianness E> inline uint32_t read32(const void *P) {
+ return read<uint32_t, E>(P);
+}
+template <endianness E> inline uint64_t read64(const void *P) {
+ return read<uint64_t, E>(P);
+}
+
+inline uint16_t read16le(const void *P) { return read16<little>(P); }
+inline uint32_t read32le(const void *P) { return read32<little>(P); }
+inline uint64_t read64le(const void *P) { return read64<little>(P); }
+inline uint16_t read16be(const void *P) { return read16<big>(P); }
+inline uint32_t read32be(const void *P) { return read32<big>(P); }
+inline uint64_t read64be(const void *P) { return read64<big>(P); }
+
+template <typename T, endianness E> inline void write(void *P, T V) {
+ *(detail::packed_endian_specific_integral<T, E, unaligned> *)P = V;
+}
+
+template <endianness E> inline void write16(void *P, uint16_t V) {
+ write<uint16_t, E>(P, V);
+}
+template <endianness E> inline void write32(void *P, uint32_t V) {
+ write<uint32_t, E>(P, V);
+}
+template <endianness E> inline void write64(void *P, uint64_t V) {
+ write<uint64_t, E>(P, V);
+}
+
+inline void write16le(void *P, uint16_t V) { write16<little>(P, V); }
+inline void write32le(void *P, uint32_t V) { write32<little>(P, V); }
+inline void write64le(void *P, uint64_t V) { write64<little>(P, V); }
+inline void write16be(void *P, uint16_t V) { write16<big>(P, V); }
+inline void write32be(void *P, uint32_t V) { write32<big>(P, V); }
+inline void write64be(void *P, uint64_t V) { write64<big>(P, V); }
} // end namespace endian
} // end namespace support
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/ErrorHandling.h b/contrib/llvm/include/llvm/Support/ErrorHandling.h
index 9afd52d..32f05e0 100644
--- a/contrib/llvm/include/llvm/Support/ErrorHandling.h
+++ b/contrib/llvm/include/llvm/Support/ErrorHandling.h
@@ -61,22 +61,22 @@ namespace llvm {
~ScopedFatalErrorHandler() { remove_fatal_error_handler(); }
};
- /// Reports a serious error, calling any installed error handler. These
- /// functions are intended to be used for error conditions which are outside
- /// the control of the compiler (I/O errors, invalid user input, etc.)
- ///
- /// If no error handler is installed the default is to print the message to
- /// standard error, followed by a newline.
- /// After the error handler is called this function will call exit(1), it
- /// does not return.
- LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
- bool gen_crash_diag = true);
- LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
- bool gen_crash_diag = true);
- LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
- bool gen_crash_diag = true);
- LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
- bool gen_crash_diag = true);
+/// Reports a serious error, calling any installed error handler. These
+/// functions are intended to be used for error conditions which are outside
+/// the control of the compiler (I/O errors, invalid user input, etc.)
+///
+/// If no error handler is installed the default is to print the message to
+/// standard error, followed by a newline.
+/// After the error handler is called this function will call exit(1), it
+/// does not return.
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
+ bool gen_crash_diag = true);
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
+ bool gen_crash_diag = true);
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
+ bool gen_crash_diag = true);
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
+ bool gen_crash_diag = true);
/// This function calls abort(), and prints the optional message to stderr.
/// Use the llvm_unreachable macro (that adds location info), instead of
diff --git a/contrib/llvm/include/llvm/Support/ErrorOr.h b/contrib/llvm/include/llvm/Support/ErrorOr.h
index 589404f..ca6ede7 100644
--- a/contrib/llvm/include/llvm/Support/ErrorOr.h
+++ b/contrib/llvm/include/llvm/Support/ErrorOr.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/ErrorOr.h - Error Smart Pointer -----------------------===//
+//===- llvm/Support/ErrorOr.h - Error Smart Pointer -------------*- C++ -*-===//
//
// The LLVM Linker
//
@@ -91,6 +91,7 @@ private:
typedef typename std::remove_reference<T>::type &reference;
typedef const typename std::remove_reference<T>::type &const_reference;
typedef typename std::remove_reference<T>::type *pointer;
+ typedef const typename std::remove_reference<T>::type *const_pointer;
public:
template <class E>
@@ -183,10 +184,14 @@ public:
return toPointer(getStorage());
}
+ const_pointer operator->() const { return toPointer(getStorage()); }
+
reference operator *() {
return *getStorage();
}
+ const_reference operator*() const { return *getStorage(); }
+
private:
template <class OtherT>
void copyConstruct(const ErrorOr<OtherT> &Other) {
@@ -246,10 +251,14 @@ private:
return Val;
}
+ const_pointer toPointer(const_pointer Val) const { return Val; }
+
pointer toPointer(wrap *Val) {
return &Val->get();
}
+ const_pointer toPointer(const wrap *Val) const { return &Val->get(); }
+
storage_type *getStorage() {
assert(!HasError && "Cannot get value when an error exists!");
return reinterpret_cast<storage_type*>(TStorage.buffer);
diff --git a/contrib/llvm/include/llvm/Support/FileOutputBuffer.h b/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
index fd8879c..3bcf64a 100644
--- a/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
+++ b/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
namespace llvm {
@@ -37,9 +38,8 @@ public:
/// Factory method to create an OutputBuffer object which manages a read/write
/// buffer of the specified size. When committed, the buffer will be written
/// to the file at the specified path.
- static std::error_code create(StringRef FilePath, size_t Size,
- std::unique_ptr<FileOutputBuffer> &Result,
- unsigned Flags = 0);
+ static ErrorOr<std::unique_ptr<FileOutputBuffer>>
+ create(StringRef FilePath, size_t Size, unsigned Flags = 0);
/// Returns a pointer to the start of the buffer.
uint8_t *getBufferStart() {
diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h
index a736c32..4733ddb 100644
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@@ -95,21 +95,21 @@ enum perms {
};
// Helper functions so that you can use & and | to manipulate perms bits:
-inline perms operator|(perms l , perms r) {
- return static_cast<perms>(
- static_cast<unsigned short>(l) | static_cast<unsigned short>(r));
+inline perms operator|(perms l, perms r) {
+ return static_cast<perms>(static_cast<unsigned short>(l) |
+ static_cast<unsigned short>(r));
}
-inline perms operator&(perms l , perms r) {
- return static_cast<perms>(
- static_cast<unsigned short>(l) & static_cast<unsigned short>(r));
+inline perms operator&(perms l, perms r) {
+ return static_cast<perms>(static_cast<unsigned short>(l) &
+ static_cast<unsigned short>(r));
}
inline perms &operator|=(perms &l, perms r) {
- l = l | r;
- return l;
+ l = l | r;
+ return l;
}
inline perms &operator&=(perms &l, perms r) {
- l = l & r;
- return l;
+ l = l & r;
+ return l;
}
inline perms operator~(perms x) {
return static_cast<perms>(~static_cast<unsigned short>(x));
@@ -156,6 +156,7 @@ class file_status
friend bool equivalent(file_status A, file_status B);
file_type Type;
perms Perms;
+
public:
#if defined(LLVM_ON_UNIX)
file_status() : fs_st_dev(0), fs_st_ino(0), fs_st_mtime(0),
@@ -267,6 +268,20 @@ private:
/// @brief Make \a path an absolute path.
///
+/// Makes \a path absolute using the \a current_directory if it is not already.
+/// An empty \a path will result in the \a current_directory.
+///
+/// /absolute/path => /absolute/path
+/// relative/../path => <current-directory>/relative/../path
+///
+/// @param path A path that is modified to be an absolute path.
+/// @returns errc::success if \a path has been made absolute, otherwise a
+/// platform-specific error_code.
+std::error_code make_absolute(const Twine &current_directory,
+ SmallVectorImpl<char> &path);
+
+/// @brief Make \a path an absolute path.
+///
/// Makes \a path absolute using the current directory if it is not already. An
/// empty \a path will result in the current directory.
///
@@ -285,7 +300,8 @@ std::error_code make_absolute(SmallVectorImpl<char> &path);
/// specific error_code. If IgnoreExisting is false, also returns
/// error if the directory already existed.
std::error_code create_directories(const Twine &path,
- bool IgnoreExisting = true);
+ bool IgnoreExisting = true,
+ perms Perms = owner_all | group_all);
/// @brief Create the directory in path.
///
@@ -293,7 +309,8 @@ std::error_code create_directories(const Twine &path,
/// @returns errc::success if is_directory(path), otherwise a platform
/// specific error_code. If IgnoreExisting is false, also returns
/// error if the directory already existed.
-std::error_code create_directory(const Twine &path, bool IgnoreExisting = true);
+std::error_code create_directory(const Twine &path, bool IgnoreExisting = true,
+ perms Perms = owner_all | group_all);
/// @brief Create a link from \a from to \a to.
///
@@ -375,9 +392,7 @@ inline bool exists(const Twine &Path) {
///
/// @param Path Input path.
/// @returns True if we can execute it, false otherwise.
-inline bool can_execute(const Twine &Path) {
- return !access(Path, AccessMode::Execute);
-}
+bool can_execute(const Twine &Path);
/// @brief Can we write this file?
///
@@ -531,15 +546,15 @@ std::error_code status_known(const Twine &path, bool &result);
///
/// Generates a unique path suitable for a temporary file and then opens it as a
/// file. The name is based on \a model with '%' replaced by a random char in
-/// [0-9a-f]. If \a model is not an absolute path, a suitable temporary
-/// directory will be prepended.
+/// [0-9a-f]. If \a model is not an absolute path, the temporary file will be
+/// created in the current directory.
///
/// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s
///
/// This is an atomic operation. Either the file is created and opened, or the
/// file system is left untouched.
///
-/// The intendend use is for files that are to be kept, possibly after
+/// The intended use is for files that are to be kept, possibly after
/// renaming them. For example, when running 'clang -c foo.o', the file can
/// be first created as foo-abc123.o and then renamed.
///
diff --git a/contrib/llvm/include/llvm/Support/Format.h b/contrib/llvm/include/llvm/Support/Format.h
index 4319a3b..f0b437a 100644
--- a/contrib/llvm/include/llvm/Support/Format.h
+++ b/contrib/llvm/include/llvm/Support/Format.h
@@ -118,6 +118,7 @@ class FormattedString {
unsigned Width;
bool RightJustify;
friend class raw_ostream;
+
public:
FormattedString(StringRef S, unsigned W, bool R)
: Str(S), Width(W), RightJustify(R) { }
@@ -146,6 +147,7 @@ class FormattedNumber {
bool Upper;
bool HexPrefix;
friend class raw_ostream;
+
public:
FormattedNumber(uint64_t HV, int64_t DV, unsigned W, bool H, bool U,
bool Prefix)
@@ -178,7 +180,7 @@ inline FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width,
return FormattedNumber(N, 0, Width, true, Upper, false);
}
-/// format_decimal - Output \p N as a right justified, fixed-width decimal. If
+/// format_decimal - Output \p N as a right justified, fixed-width decimal. If
/// number will not fit in width, full number is still printed. Examples:
/// OS << format_decimal(0, 5) => " 0"
/// OS << format_decimal(255, 5) => " 255"
@@ -188,7 +190,6 @@ inline FormattedNumber format_decimal(int64_t N, unsigned Width) {
return FormattedNumber(0, N, Width, false, false, false);
}
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Support/GCOV.h b/contrib/llvm/include/llvm/Support/GCOV.h
index c2e34bd..544434f 100644
--- a/contrib/llvm/include/llvm/Support/GCOV.h
+++ b/contrib/llvm/include/llvm/Support/GCOV.h
@@ -30,12 +30,11 @@ class GCOVBlock;
class FileInfo;
namespace GCOV {
-enum GCOVVersion { V402, V404 };
-} // end GCOV namespace
+enum GCOVVersion { V402, V404, V704 };
-/// GCOVOptions - A struct for passing gcov options between functions.
-struct GCOVOptions {
- GCOVOptions(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N)
+/// \brief A struct for passing gcov options between functions.
+struct Options {
+ Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N)
: AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F),
PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {}
@@ -48,6 +47,7 @@ struct GCOVOptions {
bool LongFileNames;
bool NoOutput;
};
+} // end GCOV namespace
/// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific
/// read operations.
@@ -90,6 +90,11 @@ public:
Version = GCOV::V404;
return true;
}
+ if (VersionStr == "*704") {
+ Cursor += 4;
+ Version = GCOV::V704;
+ return true;
+ }
errs() << "Unexpected version: " << VersionStr << ".\n";
return false;
}
@@ -390,7 +395,7 @@ class FileInfo {
};
public:
- FileInfo(const GCOVOptions &Options)
+ FileInfo(const GCOV::Options &Options)
: Options(Options), LineInfo(), RunCount(0), ProgramCount(0) {}
void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) {
@@ -424,7 +429,7 @@ private:
void printFuncCoverage(raw_ostream &OS) const;
void printFileCoverage(raw_ostream &OS) const;
- const GCOVOptions &Options;
+ const GCOV::Options &Options;
StringMap<LineData> LineInfo;
uint32_t RunCount;
uint32_t ProgramCount;
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h
index 63678bb..8751f27 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@@ -371,8 +371,9 @@ public:
void releaseMemory() { reset(); }
/// getNode - return the (Post)DominatorTree node for the specified basic
- /// block. This is the same as using operator[] on this class.
- ///
+ /// block. This is the same as using operator[] on this class. The result
+ /// may (but is not required to) be null for a forward (backwards)
+ /// statically unreachable block.
DomTreeNodeBase<NodeT> *getNode(NodeT *BB) const {
auto I = DomTreeNodes.find(BB);
if (I != DomTreeNodes.end())
@@ -380,6 +381,7 @@ public:
return nullptr;
}
+ /// See getNode.
DomTreeNodeBase<NodeT> *operator[](NodeT *BB) const { return getNode(BB); }
/// getRootNode - This returns the entry node for the CFG of the function. If
@@ -732,13 +734,13 @@ public:
for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
E = TraitsTy::nodes_end(&F);
I != E; ++I) {
- if (TraitsTy::child_begin(I) == TraitsTy::child_end(I))
- addRoot(I);
+ if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I))
+ addRoot(&*I);
// Prepopulate maps so that we don't get iterator invalidation issues
// later.
- this->IDoms[I] = nullptr;
- this->DomTreeNodes[I] = nullptr;
+ this->IDoms[&*I] = nullptr;
+ this->DomTreeNodes[&*I] = nullptr;
}
Calculate<FT, Inverse<NodeT *>>(*this, F);
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 7c065f9..3e867dc 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -21,7 +21,6 @@
///
//===----------------------------------------------------------------------===//
-
#ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
#define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
@@ -88,7 +87,7 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
// Increment the successor number for the next time we get to it.
++Worklist.back().second;
-
+
// Visit the successor next, if it isn't already visited.
typename GraphT::NodeType* Succ = *NextSucc;
@@ -103,9 +102,9 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
return N;
}
-template<class GraphT>
-typename GraphT::NodeType*
-Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
+template <class GraphT>
+typename GraphT::NodeType *
+Eval(DominatorTreeBase<typename GraphT::NodeType> &DT,
typename GraphT::NodeType *VIn, unsigned LastLinked) {
typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInInfo =
DT.Info[VIn];
@@ -117,7 +116,7 @@ Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
if (VInInfo.Parent >= LastLinked)
Work.push_back(VIn);
-
+
while (!Work.empty()) {
typename GraphT::NodeType* V = Work.back();
typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInfo =
@@ -128,8 +127,8 @@ Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
Work.push_back(VAncestor);
continue;
- }
- Work.pop_back();
+ }
+ Work.pop_back();
// Update VInfo based on Ancestor info
if (VInfo.Parent < LastLinked)
@@ -169,7 +168,7 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
i != e; ++i)
N = DFSPass<GraphT>(DT, DT.Roots[i], N);
- // it might be that some blocks did not get a DFS number (e.g., blocks of
+ // it might be that some blocks did not get a DFS number (e.g., blocks of
// infinite loops). In these cases an artificial exit node is required.
MultipleRoots |= (DT.isPostDominator() && N != GraphTraits<FuncT*>::size(&F));
@@ -287,7 +286,6 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
DT.updateDFSNumbers();
}
-
}
#endif
diff --git a/contrib/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm/include/llvm/Support/GraphWriter.h
index b1af3d7..86985c5 100644
--- a/contrib/llvm/include/llvm/Support/GraphWriter.h
+++ b/contrib/llvm/include/llvm/Support/GraphWriter.h
@@ -175,8 +175,9 @@ public:
O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
// If we should include the address of the node in the label, do so now.
- if (DTraits.hasNodeAddressLabel(Node, G))
- O << "|" << static_cast<const void*>(Node);
+ std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
+ if (!Id.empty())
+ O << "|" << DOT::EscapeString(Id);
std::string NodeDesc = DTraits.getNodeDescription(Node, G);
if (!NodeDesc.empty())
@@ -199,8 +200,9 @@ public:
O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
// If we should include the address of the node in the label, do so now.
- if (DTraits.hasNodeAddressLabel(Node, G))
- O << "|" << static_cast<const void*>(Node);
+ std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
+ if (!Id.empty())
+ O << "|" << DOT::EscapeString(Id);
std::string NodeDesc = DTraits.getNodeDescription(Node, G);
if (!NodeDesc.empty())
diff --git a/contrib/llvm/include/llvm/Support/JamCRC.h b/contrib/llvm/include/llvm/Support/JamCRC.h
new file mode 100644
index 0000000..20c28a5
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/JamCRC.h
@@ -0,0 +1,48 @@
+//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of JamCRC.
+//
+// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties
+// of this CRC:
+// Width : 32
+// Poly : 04C11DB7
+// Init : FFFFFFFF
+// RefIn : True
+// RefOut : True
+// XorOut : 00000000
+// Check : 340BC6D9 (result of CRC for "123456789")
+//
+// N.B. We permit flexibility of the "Init" value. Some consumers of this need
+// it to be zero.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_JAMCRC_H
+#define LLVM_SUPPORT_JAMCRC_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class JamCRC {
+public:
+ JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {}
+
+ // \brief Update the CRC calculation with Data.
+ void update(ArrayRef<char> Data);
+
+ uint32_t getCRC() const { return CRC; }
+
+private:
+ uint32_t CRC;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/MachO.h b/contrib/llvm/include/llvm/Support/MachO.h
index 7751275..54b8745 100644
--- a/contrib/llvm/include/llvm/Support/MachO.h
+++ b/contrib/llvm/include/llvm/Support/MachO.h
@@ -132,7 +132,9 @@ namespace llvm {
LC_DYLIB_CODE_SIGN_DRS = 0x0000002Bu,
LC_ENCRYPTION_INFO_64 = 0x0000002Cu,
LC_LINKER_OPTION = 0x0000002Du,
- LC_LINKER_OPTIMIZATION_HINT = 0x0000002Eu
+ LC_LINKER_OPTIMIZATION_HINT = 0x0000002Eu,
+ LC_VERSION_MIN_TVOS = 0x0000002Fu,
+ LC_VERSION_MIN_WATCHOS = 0x00000030u,
};
enum : uint32_t {
@@ -142,7 +144,6 @@ namespace llvm {
SG_NORELOC = 0x4u,
SG_PROTECTED_VERSION_1 = 0x8u,
-
// Constant masks for the "flags" field in llvm::MachO::section and
// llvm::MachO::section_64
SECTION_TYPE = 0x000000ffu, // SECTION_TYPE
@@ -334,7 +335,6 @@ namespace llvm {
EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u
};
-
enum {
// Constant masks for the "n_type" field in llvm::MachO::nlist and
// llvm::MachO::nlist_64
@@ -385,7 +385,7 @@ namespace llvm {
SELF_LIBRARY_ORDINAL = 0x0,
MAX_LIBRARY_ORDINAL = 0xfd,
DYNAMIC_LOOKUP_ORDINAL = 0xfe,
- EXECUTABLE_ORDINAL = 0xff
+ EXECUTABLE_ORDINAL = 0xff
};
enum StabType {
@@ -506,7 +506,6 @@ namespace llvm {
// Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
ARM64_RELOC_ADDEND = 10,
-
// Constant values for the r_type field in an x86_64 architecture
// llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
// structure
@@ -530,7 +529,6 @@ namespace llvm {
VM_PROT_EXECUTE = 0x4
};
-
// Structs from <mach-o/loader.h>
struct mach_header {
@@ -784,7 +782,6 @@ namespace llvm {
flags:8;
};
-
struct twolevel_hints_command {
uint32_t cmd;
uint32_t cmdsize;
@@ -924,7 +921,6 @@ namespace llvm {
uint64_t stacksize;
};
-
// Structs from <mach-o/fat.h>
struct fat_header {
uint32_t magic;
@@ -995,7 +991,6 @@ namespace llvm {
uint64_t n_value;
};
-
// Byte order swapping functions for MachO structs
inline void swapStruct(mach_header &mh) {
diff --git a/contrib/llvm/include/llvm/Support/ManagedStatic.h b/contrib/llvm/include/llvm/Support/ManagedStatic.h
index addd34e..2e131e4 100644
--- a/contrib/llvm/include/llvm/Support/ManagedStatic.h
+++ b/contrib/llvm/include/llvm/Support/ManagedStatic.h
@@ -15,8 +15,8 @@
#define LLVM_SUPPORT_MANAGEDSTATIC_H
#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Threading.h"
-#include "llvm/Support/Valgrind.h"
namespace llvm {
diff --git a/contrib/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm/include/llvm/Support/MathExtras.h
index 2cf7e0e..8111aee 100644
--- a/contrib/llvm/include/llvm/Support/MathExtras.h
+++ b/contrib/llvm/include/llvm/Support/MathExtras.h
@@ -63,7 +63,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
}
};
-#if __GNUC__ >= 4 || _MSC_VER
+#if __GNUC__ >= 4 || defined(_MSC_VER)
template <typename T> struct TrailingZerosCounter<T, 4> {
static std::size_t count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
@@ -71,7 +71,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
#if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0)
return __builtin_ctz(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
unsigned long Index;
_BitScanForward(&Index, Val);
return Index;
@@ -87,7 +87,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
#if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0)
return __builtin_ctzll(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
unsigned long Index;
_BitScanForward64(&Index, Val);
return Index;
@@ -132,7 +132,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
}
};
-#if __GNUC__ >= 4 || _MSC_VER
+#if __GNUC__ >= 4 || defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 4> {
static std::size_t count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
@@ -140,7 +140,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
#if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0)
return __builtin_clz(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
unsigned long Index;
_BitScanReverse(&Index, Val);
return Index ^ 31;
@@ -156,7 +156,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
#if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0)
return __builtin_clzll(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
unsigned long Index;
_BitScanReverse64(&Index, Val);
return Index ^ 63;
@@ -313,7 +313,7 @@ inline bool isShiftedUInt(uint64_t x) {
/// isUIntN - Checks if an unsigned integer fits into the given (dynamic)
/// bit width.
inline bool isUIntN(unsigned N, uint64_t x) {
- return x == (x & (~0ULL >> (64 - N)));
+ return N >= 64 || x < (UINT64_C(1)<<(N));
}
/// isIntN - Checks if an signed integer fits into the given (dynamic)
@@ -552,7 +552,7 @@ inline uint32_t FloatToBits(float Float) {
inline uint64_t MinAlign(uint64_t A, uint64_t B) {
// The largest power of 2 that divides both A and B.
//
- // Replace "-Value" by "1+~Value" in the following commented code to avoid
+ // Replace "-Value" by "1+~Value" in the following commented code to avoid
// MSVC warning C4146
// return (A | B) & -(A | B);
return (A | B) & (1 + ~(A | B));
@@ -599,15 +599,27 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
/// Returns the next integer (mod 2**64) that is greater than or equal to
/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
///
+/// If non-zero \p Skew is specified, the return value will be a minimal
+/// integer that is greater than or equal to \p Value and equal to
+/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
+/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
+///
/// Examples:
/// \code
/// RoundUpToAlignment(5, 8) = 8
/// RoundUpToAlignment(17, 8) = 24
/// RoundUpToAlignment(~0LL, 8) = 0
/// RoundUpToAlignment(321, 255) = 510
+///
+/// RoundUpToAlignment(5, 8, 7) = 7
+/// RoundUpToAlignment(17, 8, 1) = 17
+/// RoundUpToAlignment(~0LL, 8, 3) = 3
+/// RoundUpToAlignment(321, 255, 42) = 552
/// \endcode
-inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) {
- return (Value + Align - 1) / Align * Align;
+inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align,
+ uint64_t Skew = 0) {
+ Skew %= Align;
+ return (Value + Align - 1 - Skew) / Align * Align + Skew;
}
/// Returns the offset to the next integer (mod 2**64) that is greater than
@@ -641,6 +653,70 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) {
return int64_t(X << (64 - B)) >> (64 - B);
}
+/// \brief Add two unsigned integers, X and Y, of type T.
+/// Clamp the result to the maximum representable value of T on overflow.
+/// ResultOverflowed indicates if the result is larger than the maximum
+/// representable value of type T.
+template <typename T>
+typename std::enable_if<std::is_unsigned<T>::value, T>::type
+SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
+ bool Dummy;
+ bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
+ // Hacker's Delight, p. 29
+ T Z = X + Y;
+ Overflowed = (Z < X || Z < Y);
+ if (Overflowed)
+ return std::numeric_limits<T>::max();
+ else
+ return Z;
+}
+
+/// \brief Multiply two unsigned integers, X and Y, of type T.
+/// Clamp the result to the maximum representable value of T on overflow.
+/// ResultOverflowed indicates if the result is larger than the maximum
+/// representable value of type T.
+template <typename T>
+typename std::enable_if<std::is_unsigned<T>::value, T>::type
+SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
+ bool Dummy;
+ bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
+
+ // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
+ // because it fails for uint16_t (where multiplication can have undefined
+ // behavior due to promotion to int), and requires a division in addition
+ // to the multiplication.
+
+ Overflowed = false;
+
+ // Log2(Z) would be either Log2Z or Log2Z + 1.
+ // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
+ // will necessarily be less than Log2Max as desired.
+ int Log2Z = Log2_64(X) + Log2_64(Y);
+ const T Max = std::numeric_limits<T>::max();
+ int Log2Max = Log2_64(Max);
+ if (Log2Z < Log2Max) {
+ return X * Y;
+ }
+ if (Log2Z > Log2Max) {
+ Overflowed = true;
+ return Max;
+ }
+
+ // We're going to use the top bit, and maybe overflow one
+ // bit past it. Multiply all but the bottom bit then add
+ // that on at the end.
+ T Z = (X >> 1) * Y;
+ if (Z & ~(Max >> 1)) {
+ Overflowed = true;
+ return Max;
+ }
+ Z <<= 1;
+ if (X & 1)
+ return SaturatingAdd(Z, Y, ResultOverflowed);
+
+ return Z;
+}
+
extern const float huge_valf;
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Support/Memory.h b/contrib/llvm/include/llvm/Support/Memory.h
index b4305cb..8103aea 100644
--- a/contrib/llvm/include/llvm/Support/Memory.h
+++ b/contrib/llvm/include/llvm/Support/Memory.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/Memory.h - Memory Support --------------------*- C++ -*-===//
+//===- llvm/Support/Memory.h - Memory Support -------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -32,6 +32,7 @@ namespace sys {
MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
void *base() const { return Address; }
size_t size() const { return Size; }
+
private:
void *Address; ///< Address of first byte of memory area
size_t Size; ///< Size, in bytes of the memory area
@@ -70,7 +71,7 @@ namespace sys {
/// If the address following \p NearBlock is not so aligned, it will be
/// rounded up to the next allocation granularity boundary.
///
- /// \r a non-null MemoryBlock if the function was successful,
+ /// \r a non-null MemoryBlock if the function was successful,
/// otherwise a null MemoryBlock is with \p EC describing the error.
///
/// @brief Allocate mapped memory.
@@ -86,7 +87,7 @@ namespace sys {
///
/// \r error_success if the function was successful, or an error_code
/// describing the failure if an error occurred.
- ///
+ ///
/// @brief Release mapped memory.
static std::error_code releaseMappedMemory(MemoryBlock &Block);
@@ -131,7 +132,6 @@ namespace sys {
/// @brief Release Read/Write/Execute memory.
static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = nullptr);
-
/// InvalidateInstructionCache - Before the JIT can run a block of code
/// that has been emitted it must invalidate the instruction cache on some
/// platforms.
@@ -155,6 +155,31 @@ namespace sys {
/// as writable.
static bool setRangeWritable(const void *Addr, size_t Size);
};
+
+ /// Owning version of MemoryBlock.
+ class OwningMemoryBlock {
+ public:
+ OwningMemoryBlock() = default;
+ explicit OwningMemoryBlock(MemoryBlock M) : M(M) {}
+ OwningMemoryBlock(OwningMemoryBlock &&Other) {
+ M = Other.M;
+ Other.M = MemoryBlock();
+ }
+ OwningMemoryBlock& operator=(OwningMemoryBlock &&Other) {
+ M = Other.M;
+ Other.M = MemoryBlock();
+ return *this;
+ }
+ ~OwningMemoryBlock() {
+ Memory::releaseMappedMemory(M);
+ }
+ void *base() const { return M.base(); }
+ size_t size() const { return M.size(); }
+ MemoryBlock getMemoryBlock() const { return M; }
+ private:
+ MemoryBlock M;
+ };
+
}
}
diff --git a/contrib/llvm/include/llvm/Support/MemoryBuffer.h b/contrib/llvm/include/llvm/Support/MemoryBuffer.h
index 81616d8..73d6435 100644
--- a/contrib/llvm/include/llvm/Support/MemoryBuffer.h
+++ b/contrib/llvm/include/llvm/Support/MemoryBuffer.h
@@ -14,7 +14,6 @@
#ifndef LLVM_SUPPORT_MEMORYBUFFER_H
#define LLVM_SUPPORT_MEMORYBUFFER_H
-#include "llvm-c/Support.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/DataTypes.h"
@@ -122,7 +121,8 @@ public:
/// Open the specified file as a MemoryBuffer, or open stdin if the Filename
/// is "-".
static ErrorOr<std::unique_ptr<MemoryBuffer>>
- getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1);
+ getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1,
+ bool RequiresNullTerminator = true);
/// Map a subrange of the specified file as a MemoryBuffer.
static ErrorOr<std::unique_ptr<MemoryBuffer>>
@@ -151,6 +151,8 @@ class MemoryBufferRef {
public:
MemoryBufferRef() {}
+ MemoryBufferRef(MemoryBuffer& Buffer)
+ : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
MemoryBufferRef(StringRef Buffer, StringRef Identifier)
: Buffer(Buffer), Identifier(Identifier) {}
diff --git a/contrib/llvm/include/llvm/Support/OnDiskHashTable.h b/contrib/llvm/include/llvm/Support/OnDiskHashTable.h
index 08e277a..ac978d4 100644
--- a/contrib/llvm/include/llvm/Support/OnDiskHashTable.h
+++ b/contrib/llvm/include/llvm/Support/OnDiskHashTable.h
@@ -53,6 +53,8 @@ namespace llvm {
/// /// Write Data to Out. DataLen is the length from EmitKeyDataLength.
/// static void EmitData(raw_ostream &Out, key_type_ref Key,
/// data_type_ref Data, offset_type DataLen);
+/// /// Determine if two keys are equal. Optional, only needed by contains.
+/// static bool EqualKey(key_type_ref Key1, key_type_ref Key2);
/// };
/// \endcode
template <typename Info> class OnDiskChainedHashTableGenerator {
@@ -122,13 +124,21 @@ public:
/// Uses the provided Info instead of a stack allocated one.
void insert(typename Info::key_type_ref Key,
typename Info::data_type_ref Data, Info &InfoObj) {
-
++NumEntries;
if (4 * NumEntries >= 3 * NumBuckets)
resize(NumBuckets * 2);
insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj));
}
+ /// \brief Determine whether an entry has been inserted.
+ bool contains(typename Info::key_type_ref Key, Info &InfoObj) {
+ unsigned Hash = InfoObj.ComputeHash(Key);
+ for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next)
+ if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key))
+ return true;
+ return false;
+ }
+
/// \brief Emit the table to Out, which must not be at offset 0.
offset_type Emit(raw_ostream &Out) {
Info InfoObj;
@@ -161,8 +171,22 @@ public:
LE.write<typename Info::hash_value_type>(I->Hash);
const std::pair<offset_type, offset_type> &Len =
InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
+#ifdef NDEBUG
InfoObj.EmitKey(Out, I->Key, Len.first);
InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
+#else
+ // In asserts mode, check that the users length matches the data they
+ // wrote.
+ uint64_t KeyStart = Out.tell();
+ InfoObj.EmitKey(Out, I->Key, Len.first);
+ uint64_t DataStart = Out.tell();
+ InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
+ uint64_t End = Out.tell();
+ assert(offset_type(DataStart - KeyStart) == Len.first &&
+ "key length does not match bytes written");
+ assert(offset_type(End - DataStart) == Len.second &&
+ "data length does not match bytes written");
+#endif
}
}
@@ -239,11 +263,12 @@ template <typename Info> class OnDiskChainedHashTable {
Info InfoObj;
public:
+ typedef Info InfoType;
typedef typename Info::internal_key_type internal_key_type;
typedef typename Info::external_key_type external_key_type;
- typedef typename Info::data_type data_type;
- typedef typename Info::hash_value_type hash_value_type;
- typedef typename Info::offset_type offset_type;
+ typedef typename Info::data_type data_type;
+ typedef typename Info::hash_value_type hash_value_type;
+ typedef typename Info::offset_type offset_type;
OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
const unsigned char *Buckets,
@@ -255,6 +280,21 @@ public:
"'buckets' must have a 4-byte alignment");
}
+ /// Read the number of buckets and the number of entries from a hash table
+ /// produced by OnDiskHashTableGenerator::Emit, and advance the Buckets
+ /// pointer past them.
+ static std::pair<offset_type, offset_type>
+ readNumBucketsAndEntries(const unsigned char *&Buckets) {
+ assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
+ "buckets should be 4-byte aligned.");
+ using namespace llvm::support;
+ offset_type NumBuckets =
+ endian::readNext<offset_type, little, aligned>(Buckets);
+ offset_type NumEntries =
+ endian::readNext<offset_type, little, aligned>(Buckets);
+ return std::make_pair(NumBuckets, NumEntries);
+ }
+
offset_type getNumBuckets() const { return NumBuckets; }
offset_type getNumEntries() const { return NumEntries; }
const unsigned char *getBase() const { return Base; }
@@ -275,6 +315,10 @@ public:
: Key(K), Data(D), Len(L), InfoObj(InfoObj) {}
data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); }
+
+ const unsigned char *getDataPtr() const { return Data; }
+ offset_type getDataLen() const { return Len; }
+
bool operator==(const iterator &X) const { return X.Data == Data; }
bool operator!=(const iterator &X) const { return X.Data != Data; }
};
@@ -356,17 +400,11 @@ public:
static OnDiskChainedHashTable *Create(const unsigned char *Buckets,
const unsigned char *const Base,
const Info &InfoObj = Info()) {
- using namespace llvm::support;
assert(Buckets > Base);
- assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
- "buckets should be 4-byte aligned.");
-
- offset_type NumBuckets =
- endian::readNext<offset_type, little, aligned>(Buckets);
- offset_type NumEntries =
- endian::readNext<offset_type, little, aligned>(Buckets);
- return new OnDiskChainedHashTable<Info>(NumBuckets, NumEntries, Buckets,
- Base, InfoObj);
+ auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets);
+ return new OnDiskChainedHashTable<Info>(NumBucketsAndEntries.first,
+ NumBucketsAndEntries.second,
+ Buckets, Base, InfoObj);
}
};
@@ -385,40 +423,30 @@ public:
typedef typename base_type::hash_value_type hash_value_type;
typedef typename base_type::offset_type offset_type;
- OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
- const unsigned char *Buckets,
- const unsigned char *Payload,
- const unsigned char *Base,
- const Info &InfoObj = Info())
- : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
- Payload(Payload) {}
-
+private:
/// \brief Iterates over all of the keys in the table.
- class key_iterator {
+ class iterator_base {
const unsigned char *Ptr;
offset_type NumItemsInBucketLeft;
offset_type NumEntriesLeft;
- Info *InfoObj;
public:
typedef external_key_type value_type;
- key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
- Info *InfoObj)
- : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
- InfoObj(InfoObj) {}
- key_iterator()
- : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0),
- InfoObj(0) {}
+ iterator_base(const unsigned char *const Ptr, offset_type NumEntries)
+ : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {}
+ iterator_base()
+ : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {}
- friend bool operator==(const key_iterator &X, const key_iterator &Y) {
+ friend bool operator==(const iterator_base &X, const iterator_base &Y) {
return X.NumEntriesLeft == Y.NumEntriesLeft;
}
- friend bool operator!=(const key_iterator &X, const key_iterator &Y) {
+ friend bool operator!=(const iterator_base &X, const iterator_base &Y) {
return X.NumEntriesLeft != Y.NumEntriesLeft;
}
- key_iterator &operator++() { // Preincrement
+ /// Move to the next item.
+ void advance() {
using namespace llvm::support;
if (!NumItemsInBucketLeft) {
// 'Items' starts with a 16-bit unsigned integer representing the
@@ -435,25 +463,58 @@ public:
--NumItemsInBucketLeft;
assert(NumEntriesLeft);
--NumEntriesLeft;
+ }
+
+ /// Get the start of the item as written by the trait (after the hash and
+ /// immediately before the key and value length).
+ const unsigned char *getItem() const {
+ return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type);
+ }
+ };
+
+public:
+ OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
+ const unsigned char *Buckets,
+ const unsigned char *Payload,
+ const unsigned char *Base,
+ const Info &InfoObj = Info())
+ : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
+ Payload(Payload) {}
+
+ /// \brief Iterates over all of the keys in the table.
+ class key_iterator : public iterator_base {
+ Info *InfoObj;
+
+ public:
+ typedef external_key_type value_type;
+
+ key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
+ Info *InfoObj)
+ : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
+ key_iterator() : iterator_base(), InfoObj() {}
+
+ key_iterator &operator++() {
+ this->advance();
return *this;
}
key_iterator operator++(int) { // Postincrement
- key_iterator tmp = *this; ++*this; return tmp;
+ key_iterator tmp = *this;
+ ++*this;
+ return tmp;
}
- value_type operator*() const {
- const unsigned char *LocalPtr = Ptr;
- if (!NumItemsInBucketLeft)
- LocalPtr += 2; // number of items in bucket
- LocalPtr += sizeof(hash_value_type); // Skip the hash.
+ internal_key_type getInternalKey() const {
+ auto *LocalPtr = this->getItem();
// Determine the length of the key and the data.
- const std::pair<offset_type, offset_type> &L =
- Info::ReadKeyDataLength(LocalPtr);
+ auto L = Info::ReadKeyDataLength(LocalPtr);
// Read the key.
- const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
- return InfoObj->GetExternalKey(Key);
+ return InfoObj->ReadKey(LocalPtr, L.first);
+ }
+
+ value_type operator*() const {
+ return InfoObj->GetExternalKey(getInternalKey());
}
};
@@ -467,10 +528,7 @@ public:
}
/// \brief Iterates over all the entries in the table, returning the data.
- class data_iterator {
- const unsigned char *Ptr;
- offset_type NumItemsInBucketLeft;
- offset_type NumEntriesLeft;
+ class data_iterator : public iterator_base {
Info *InfoObj;
public:
@@ -478,51 +536,24 @@ public:
data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
Info *InfoObj)
- : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
- InfoObj(InfoObj) {}
- data_iterator()
- : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0),
- InfoObj(nullptr) {}
-
- bool operator==(const data_iterator &X) const {
- return X.NumEntriesLeft == NumEntriesLeft;
- }
- bool operator!=(const data_iterator &X) const {
- return X.NumEntriesLeft != NumEntriesLeft;
- }
+ : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
+ data_iterator() : iterator_base(), InfoObj() {}
data_iterator &operator++() { // Preincrement
- using namespace llvm::support;
- if (!NumItemsInBucketLeft) {
- // 'Items' starts with a 16-bit unsigned integer representing the
- // number of items in this bucket.
- NumItemsInBucketLeft =
- endian::readNext<uint16_t, little, unaligned>(Ptr);
- }
- Ptr += sizeof(hash_value_type); // Skip the hash.
- // Determine the length of the key and the data.
- const std::pair<offset_type, offset_type> &L =
- Info::ReadKeyDataLength(Ptr);
- Ptr += L.first + L.second;
- assert(NumItemsInBucketLeft);
- --NumItemsInBucketLeft;
- assert(NumEntriesLeft);
- --NumEntriesLeft;
+ this->advance();
return *this;
}
data_iterator operator++(int) { // Postincrement
- data_iterator tmp = *this; ++*this; return tmp;
+ data_iterator tmp = *this;
+ ++*this;
+ return tmp;
}
value_type operator*() const {
- const unsigned char *LocalPtr = Ptr;
- if (!NumItemsInBucketLeft)
- LocalPtr += 2; // number of items in bucket
- LocalPtr += sizeof(hash_value_type); // Skip the hash.
+ auto *LocalPtr = this->getItem();
// Determine the length of the key and the data.
- const std::pair<offset_type, offset_type> &L =
- Info::ReadKeyDataLength(LocalPtr);
+ auto L = Info::ReadKeyDataLength(LocalPtr);
// Read the key.
const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
@@ -555,17 +586,12 @@ public:
static OnDiskIterableChainedHashTable *
Create(const unsigned char *Buckets, const unsigned char *const Payload,
const unsigned char *const Base, const Info &InfoObj = Info()) {
- using namespace llvm::support;
assert(Buckets > Base);
- assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
- "buckets should be 4-byte aligned.");
-
- offset_type NumBuckets =
- endian::readNext<offset_type, little, aligned>(Buckets);
- offset_type NumEntries =
- endian::readNext<offset_type, little, aligned>(Buckets);
+ auto NumBucketsAndEntries =
+ OnDiskIterableChainedHashTable<Info>::readNumBucketsAndEntries(Buckets);
return new OnDiskIterableChainedHashTable<Info>(
- NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj);
+ NumBucketsAndEntries.first, NumBucketsAndEntries.second,
+ Buckets, Payload, Base, InfoObj);
}
};
diff --git a/contrib/llvm/include/llvm/Support/Options.h b/contrib/llvm/include/llvm/Support/Options.h
index 2742d39..7b61b23 100644
--- a/contrib/llvm/include/llvm/Support/Options.h
+++ b/contrib/llvm/include/llvm/Support/Options.h
@@ -71,7 +71,7 @@ private:
/// \param Key unique key for option
/// \param O option to map to \p Key
///
- /// Allocated cl::Options are owened by the OptionRegistry and are deallocated
+ /// Allocated cl::Options are owned by the OptionRegistry and are deallocated
/// on destruction or removal
void addOption(void *Key, cl::Option *O);
@@ -91,7 +91,7 @@ public:
/// Options are keyed off the template parameters to generate unique static
/// characters. The template parameters are (1) the type of the data the
/// option stores (\p ValT), the class that will read the option (\p Base),
- /// and the memeber that the class will store the data into (\p Mem).
+ /// and the member that the class will store the data into (\p Mem).
template <typename ValT, typename Base, ValT(Base::*Mem)>
static void registerOption(const char *ArgStr, const char *Desc,
const ValT &InitValue) {
diff --git a/contrib/llvm/include/llvm/Support/OutputBuffer.h b/contrib/llvm/include/llvm/Support/OutputBuffer.h
deleted file mode 100644
index 6b98e99..0000000
--- a/contrib/llvm/include/llvm/Support/OutputBuffer.h
+++ /dev/null
@@ -1,166 +0,0 @@
-//=== OutputBuffer.h - Output Buffer ----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Methods to output values to a data buffer.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_OUTPUTBUFFER_H
-#define LLVM_SUPPORT_OUTPUTBUFFER_H
-
-#include <cassert>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
- class OutputBuffer {
- /// Output buffer.
- std::vector<unsigned char> &Output;
-
- /// is64Bit/isLittleEndian - This information is inferred from the target
- /// machine directly, indicating what header values and flags to set.
- bool is64Bit, isLittleEndian;
- public:
- OutputBuffer(std::vector<unsigned char> &Out,
- bool is64bit, bool le)
- : Output(Out), is64Bit(is64bit), isLittleEndian(le) {}
-
- // align - Emit padding into the file until the current output position is
- // aligned to the specified power of two boundary.
- void align(unsigned Boundary) {
- assert(Boundary && (Boundary & (Boundary - 1)) == 0 &&
- "Must align to 2^k boundary");
- size_t Size = Output.size();
-
- if (Size & (Boundary - 1)) {
- // Add padding to get alignment to the correct place.
- size_t Pad = Boundary - (Size & (Boundary - 1));
- Output.resize(Size + Pad);
- }
- }
-
- //===------------------------------------------------------------------===//
- // Out Functions - Output the specified value to the data buffer.
-
- void outbyte(unsigned char X) {
- Output.push_back(X);
- }
- void outhalf(unsigned short X) {
- if (isLittleEndian) {
- Output.push_back(X & 255);
- Output.push_back(X >> 8);
- } else {
- Output.push_back(X >> 8);
- Output.push_back(X & 255);
- }
- }
- void outword(unsigned X) {
- if (isLittleEndian) {
- Output.push_back((X >> 0) & 255);
- Output.push_back((X >> 8) & 255);
- Output.push_back((X >> 16) & 255);
- Output.push_back((X >> 24) & 255);
- } else {
- Output.push_back((X >> 24) & 255);
- Output.push_back((X >> 16) & 255);
- Output.push_back((X >> 8) & 255);
- Output.push_back((X >> 0) & 255);
- }
- }
- void outxword(uint64_t X) {
- if (isLittleEndian) {
- Output.push_back(unsigned(X >> 0) & 255);
- Output.push_back(unsigned(X >> 8) & 255);
- Output.push_back(unsigned(X >> 16) & 255);
- Output.push_back(unsigned(X >> 24) & 255);
- Output.push_back(unsigned(X >> 32) & 255);
- Output.push_back(unsigned(X >> 40) & 255);
- Output.push_back(unsigned(X >> 48) & 255);
- Output.push_back(unsigned(X >> 56) & 255);
- } else {
- Output.push_back(unsigned(X >> 56) & 255);
- Output.push_back(unsigned(X >> 48) & 255);
- Output.push_back(unsigned(X >> 40) & 255);
- Output.push_back(unsigned(X >> 32) & 255);
- Output.push_back(unsigned(X >> 24) & 255);
- Output.push_back(unsigned(X >> 16) & 255);
- Output.push_back(unsigned(X >> 8) & 255);
- Output.push_back(unsigned(X >> 0) & 255);
- }
- }
- void outaddr32(unsigned X) {
- outword(X);
- }
- void outaddr64(uint64_t X) {
- outxword(X);
- }
- void outaddr(uint64_t X) {
- if (!is64Bit)
- outword((unsigned)X);
- else
- outxword(X);
- }
- void outstring(const std::string &S, unsigned Length) {
- unsigned len_to_copy = static_cast<unsigned>(S.length()) < Length
- ? static_cast<unsigned>(S.length()) : Length;
- unsigned len_to_fill = static_cast<unsigned>(S.length()) < Length
- ? Length - static_cast<unsigned>(S.length()) : 0;
-
- for (unsigned i = 0; i < len_to_copy; ++i)
- outbyte(S[i]);
-
- for (unsigned i = 0; i < len_to_fill; ++i)
- outbyte(0);
- }
-
- //===------------------------------------------------------------------===//
- // Fix Functions - Replace an existing entry at an offset.
-
- void fixhalf(unsigned short X, unsigned Offset) {
- unsigned char *P = &Output[Offset];
- P[0] = (X >> (isLittleEndian ? 0 : 8)) & 255;
- P[1] = (X >> (isLittleEndian ? 8 : 0)) & 255;
- }
- void fixword(unsigned X, unsigned Offset) {
- unsigned char *P = &Output[Offset];
- P[0] = (X >> (isLittleEndian ? 0 : 24)) & 255;
- P[1] = (X >> (isLittleEndian ? 8 : 16)) & 255;
- P[2] = (X >> (isLittleEndian ? 16 : 8)) & 255;
- P[3] = (X >> (isLittleEndian ? 24 : 0)) & 255;
- }
- void fixxword(uint64_t X, unsigned Offset) {
- unsigned char *P = &Output[Offset];
- P[0] = (X >> (isLittleEndian ? 0 : 56)) & 255;
- P[1] = (X >> (isLittleEndian ? 8 : 48)) & 255;
- P[2] = (X >> (isLittleEndian ? 16 : 40)) & 255;
- P[3] = (X >> (isLittleEndian ? 24 : 32)) & 255;
- P[4] = (X >> (isLittleEndian ? 32 : 24)) & 255;
- P[5] = (X >> (isLittleEndian ? 40 : 16)) & 255;
- P[6] = (X >> (isLittleEndian ? 48 : 8)) & 255;
- P[7] = (X >> (isLittleEndian ? 56 : 0)) & 255;
- }
- void fixaddr(uint64_t X, unsigned Offset) {
- if (!is64Bit)
- fixword((unsigned)X, Offset);
- else
- fixxword(X, Offset);
- }
-
- unsigned char &operator[](unsigned Index) {
- return Output[Index];
- }
- const unsigned char &operator[](unsigned Index) const {
- return Output[Index];
- }
- };
-
-} // end llvm namespace
-
-#endif // LLVM_SUPPORT_OUTPUTBUFFER_H
diff --git a/contrib/llvm/include/llvm/Support/Path.h b/contrib/llvm/include/llvm/Support/Path.h
index 8fae853..955cc99 100644
--- a/contrib/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm/include/llvm/Support/Path.h
@@ -61,7 +61,6 @@ public:
reference operator*() const { return Component; }
pointer operator->() const { return &Component; }
const_iterator &operator++(); // preincrement
- const_iterator &operator++(int); // postincrement
bool operator==(const const_iterator &RHS) const;
bool operator!=(const const_iterator &RHS) const { return !(*this == RHS); }
@@ -87,7 +86,6 @@ public:
reference operator*() const { return Component; }
pointer operator->() const { return &Component; }
reverse_iterator &operator++(); // preincrement
- reverse_iterator &operator++(int); // postincrement
bool operator==(const reverse_iterator &RHS) const;
bool operator!=(const reverse_iterator &RHS) const { return !(*this == RHS); }
};
@@ -218,7 +216,7 @@ StringRef root_name(StringRef path);
/// @result The root directory of \a path if it has one, otherwise
/// "".
StringRef root_directory(StringRef path);
-
+
/// @brief Get root path.
///
/// Equivalent to root_name + root_directory.
@@ -310,7 +308,7 @@ bool is_separator(char value);
/// @result StringRef of the preferred separator, null-terminated.
StringRef get_separator();
-/// @brief Get the typical temporary directory for the system, e.g.,
+/// @brief Get the typical temporary directory for the system, e.g.,
/// "/var/tmp" or "C:/TEMP"
///
/// @param erasedOnReboot Whether to favor a path that is erased on reboot
@@ -327,6 +325,22 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result);
/// @result True if a home directory is set, false otherwise.
bool home_directory(SmallVectorImpl<char> &result);
+/// @brief Get the user's cache directory.
+///
+/// Expect the resulting path to be a directory shared with other
+/// applications/services used by the user. Params \p Path1 to \p Path3 can be
+/// used to append additional directory names to the resulting path. Recommended
+/// pattern is <user_cache_directory>/<vendor>/<application>.
+///
+/// @param Result Holds the resulting path.
+/// @param Path1 Additional path to be appended to the user's cache directory
+/// path. "" can be used to append nothing.
+/// @param Path2 Second additional path to be appended.
+/// @param Path3 Third additional path to be appended.
+/// @result True if a cache directory path is set, false otherwise.
+bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
+ const Twine &Path2 = "", const Twine &Path3 = "");
+
/// @brief Has root name?
///
/// root_name != ""
@@ -403,6 +417,19 @@ bool is_absolute(const Twine &path);
/// @result True if the path is relative, false if it is not.
bool is_relative(const Twine &path);
+/// @brief Remove redundant leading "./" pieces and consecutive separators.
+///
+/// @param path Input path.
+/// @result The cleaned-up \a path.
+StringRef remove_leading_dotslash(StringRef path);
+
+/// @brief In-place remove any './' and optionally '../' components from a path.
+///
+/// @param path processed path
+/// @param remove_dot_dot specify if '../' should be removed
+/// @result True if path was changed
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false);
+
} // end namespace path
} // end namespace sys
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h b/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h
index 8370821..c12d237 100644
--- a/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h
@@ -15,59 +15,70 @@
#ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
#define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
+#include "llvm/Support/AlignOf.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
-
-/// PointerLikeTypeTraits - This is a traits object that is used to handle
-/// pointer types and things that are just wrappers for pointers as a uniform
-/// entity.
-template <typename T>
-class PointerLikeTypeTraits {
+
+/// A traits type that is used to handle pointer types and things that are just
+/// wrappers for pointers as a uniform entity.
+template <typename T> class PointerLikeTypeTraits {
// getAsVoidPointer
// getFromVoidPointer
// getNumLowBitsAvailable
};
+namespace detail {
+/// A tiny meta function to compute the log2 of a compile time constant.
+template <size_t N>
+struct ConstantLog2
+ : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {};
+template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {};
+}
+
// Provide PointerLikeTypeTraits for non-cvr pointers.
-template<typename T>
-class PointerLikeTypeTraits<T*> {
-public:
- static inline void *getAsVoidPointer(T* P) { return P; }
- static inline T *getFromVoidPointer(void *P) {
- return static_cast<T*>(P);
- }
-
- /// Note, we assume here that malloc returns objects at least 4-byte aligned.
- /// However, this may be wrong, or pointers may be from something other than
- /// malloc. In this case, you should specialize this template to reduce this.
+template <typename T> struct PointerLikeTypeTraits<T *> {
+ static inline void *getAsVoidPointer(T *P) { return P; }
+ static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); }
+
+ enum {
+ NumLowBitsAvailable = detail::ConstantLog2<AlignOf<T>::Alignment>::value
+ };
+};
+
+template <> struct PointerLikeTypeTraits<void *> {
+ static inline void *getAsVoidPointer(void *P) { return P; }
+ static inline void *getFromVoidPointer(void *P) { return P; }
+
+ /// Note, we assume here that void* is related to raw malloc'ed memory and
+ /// that malloc returns objects at least 4-byte aligned. However, this may be
+ /// wrong, or pointers may be from something other than malloc. In this case,
+ /// you should specify a real typed pointer or avoid this template.
///
/// All clients should use assertions to do a run-time check to ensure that
/// this is actually true.
enum { NumLowBitsAvailable = 2 };
};
-
+
// Provide PointerLikeTypeTraits for const pointers.
-template<typename T>
-class PointerLikeTypeTraits<const T*> {
- typedef PointerLikeTypeTraits<T*> NonConst;
+template <typename T> class PointerLikeTypeTraits<const T *> {
+ typedef PointerLikeTypeTraits<T *> NonConst;
public:
- static inline const void *getAsVoidPointer(const T* P) {
- return NonConst::getAsVoidPointer(const_cast<T*>(P));
+ static inline const void *getAsVoidPointer(const T *P) {
+ return NonConst::getAsVoidPointer(const_cast<T *>(P));
}
static inline const T *getFromVoidPointer(const void *P) {
- return NonConst::getFromVoidPointer(const_cast<void*>(P));
+ return NonConst::getFromVoidPointer(const_cast<void *>(P));
}
enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
};
// Provide PointerLikeTypeTraits for uintptr_t.
-template<>
-class PointerLikeTypeTraits<uintptr_t> {
+template <> class PointerLikeTypeTraits<uintptr_t> {
public:
static inline void *getAsVoidPointer(uintptr_t P) {
- return reinterpret_cast<void*>(P);
+ return reinterpret_cast<void *>(P);
}
static inline uintptr_t getFromVoidPointer(void *P) {
return reinterpret_cast<uintptr_t>(P);
@@ -75,7 +86,7 @@ public:
// No bits are available!
enum { NumLowBitsAvailable = 0 };
};
-
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Support/PrettyStackTrace.h b/contrib/llvm/include/llvm/Support/PrettyStackTrace.h
index 96afb60..027f943 100644
--- a/contrib/llvm/include/llvm/Support/PrettyStackTrace.h
+++ b/contrib/llvm/include/llvm/Support/PrettyStackTrace.h
@@ -66,6 +66,18 @@ namespace llvm {
void print(raw_ostream &OS) const override;
};
+ /// Returns the topmost element of the "pretty" stack state.
+ const void* SavePrettyStackState();
+
+ /// Restores the topmost element of the "pretty" stack state to State, which
+ /// should come from a previous call to SavePrettyStackState(). This is
+ /// useful when using a CrashRecoveryContext in code that also uses
+ /// PrettyStackTraceEntries, to make sure the stack that's printed if a crash
+ /// happens after a crash that's been recovered by CrashRecoveryContext
+ /// doesn't have frames on it that were added in code unwound by the
+ /// CrashRecoveryContext.
+ void RestorePrettyStackState(const void* State);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Support/Printable.h b/contrib/llvm/include/llvm/Support/Printable.h
new file mode 100644
index 0000000..5c1b8d5
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/Printable.h
@@ -0,0 +1,52 @@
+//===--- Printable.h - Print function helpers -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Printable struct.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PRINTABLE_H
+#define LLVM_SUPPORT_PRINTABLE_H
+
+#include <functional>
+
+namespace llvm {
+
+class raw_ostream;
+
+/// Simple wrapper around std::function<void(raw_ostream&)>.
+/// This class is usefull to construct print helpers for raw_ostream.
+///
+/// Example:
+/// Printable PrintRegister(unsigned Register) {
+/// return Printable([Register](raw_ostream &OS) {
+/// OS << getRegisterName(Register);
+/// }
+/// }
+/// ... OS << PrintRegister(Register); ...
+///
+/// Implementation note: Ideally this would just be a typedef, but doing so
+/// leads to operator << being ambiguous as function has matching constructors
+/// in some STL versions. I have seen the problem on gcc 4.6 libstdc++ and
+/// microsoft STL.
+class Printable {
+public:
+ std::function<void(raw_ostream &OS)> Print;
+ Printable(const std::function<void(raw_ostream &OS)> Print)
+ : Print(Print) {}
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) {
+ P.Print(OS);
+ return OS;
+}
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/Program.h b/contrib/llvm/include/llvm/Support/Program.h
index b89a0f7..4330210 100644
--- a/contrib/llvm/include/llvm/Support/Program.h
+++ b/contrib/llvm/include/llvm/Support/Program.h
@@ -67,8 +67,7 @@ struct ProcessInfo {
/// \returns The fully qualified path to the first \p Name in \p Paths if it
/// exists. \p Name if \p Name has slashes in it. Otherwise an error.
ErrorOr<std::string>
- findProgramByName(StringRef Name,
- ArrayRef<StringRef> Paths = ArrayRef<StringRef>());
+ findProgramByName(StringRef Name, ArrayRef<StringRef> Paths = None);
// These functions change the specified standard stream (stdin or stdout) to
// binary mode. They return errc::success if the specified stream
diff --git a/contrib/llvm/include/llvm/Support/Recycler.h b/contrib/llvm/include/llvm/Support/Recycler.h
index e97f36a..a38050d 100644
--- a/contrib/llvm/include/llvm/Support/Recycler.h
+++ b/contrib/llvm/include/llvm/Support/Recycler.h
@@ -28,53 +28,36 @@ namespace llvm {
///
void PrintRecyclerStats(size_t Size, size_t Align, size_t FreeListSize);
-/// RecyclerStruct - Implementation detail for Recycler. This is a
-/// class that the recycler imposes on free'd memory to carve out
-/// next/prev pointers.
-struct RecyclerStruct {
- RecyclerStruct *Prev, *Next;
-};
-
-template<>
-struct ilist_traits<RecyclerStruct> :
- public ilist_default_traits<RecyclerStruct> {
- static RecyclerStruct *getPrev(const RecyclerStruct *t) { return t->Prev; }
- static RecyclerStruct *getNext(const RecyclerStruct *t) { return t->Next; }
- static void setPrev(RecyclerStruct *t, RecyclerStruct *p) { t->Prev = p; }
- static void setNext(RecyclerStruct *t, RecyclerStruct *n) { t->Next = n; }
-
- mutable RecyclerStruct Sentinel;
- RecyclerStruct *createSentinel() const {
- return &Sentinel;
- }
- static void destroySentinel(RecyclerStruct *) {}
-
- RecyclerStruct *provideInitialHead() const { return createSentinel(); }
- RecyclerStruct *ensureHead(RecyclerStruct*) const { return createSentinel(); }
- static void noteHead(RecyclerStruct*, RecyclerStruct*) {}
-
- static void deleteNode(RecyclerStruct *) {
- llvm_unreachable("Recycler's ilist_traits shouldn't see a deleteNode call!");
- }
-};
-
/// Recycler - This class manages a linked-list of deallocated nodes
/// and facilitates reusing deallocated memory in place of allocating
/// new memory.
///
template<class T, size_t Size = sizeof(T), size_t Align = AlignOf<T>::Alignment>
class Recycler {
- /// FreeList - Doubly-linked list of nodes that have deleted contents and
- /// are not in active use.
- ///
- iplist<RecyclerStruct> FreeList;
+ struct FreeNode {
+ FreeNode *Next;
+ };
+
+ /// List of nodes that have deleted contents and are not in active use.
+ FreeNode *FreeList = nullptr;
+
+ FreeNode *pop_val() {
+ auto *Val = FreeList;
+ FreeList = FreeList->Next;
+ return Val;
+ }
+
+ void push(FreeNode *N) {
+ N->Next = FreeList;
+ FreeList = N;
+ }
public:
~Recycler() {
// If this fails, either the callee has lost track of some allocation,
// or the callee isn't tracking allocations and should just call
// clear() before deleting the Recycler.
- assert(FreeList.empty() && "Non-empty recycler deleted!");
+ assert(!FreeList && "Non-empty recycler deleted!");
}
/// clear - Release all the tracked allocations to the allocator. The
@@ -82,8 +65,8 @@ public:
/// deleted; calling clear is one way to ensure this.
template<class AllocatorType>
void clear(AllocatorType &Allocator) {
- while (!FreeList.empty()) {
- T *t = reinterpret_cast<T *>(FreeList.remove(FreeList.begin()));
+ while (FreeList) {
+ T *t = reinterpret_cast<T *>(pop_val());
Allocator.Deallocate(t);
}
}
@@ -93,9 +76,7 @@ public:
///
/// There is no need to traverse the free list, pulling all the objects into
/// cache.
- void clear(BumpPtrAllocator&) {
- FreeList.clearAndLeakNodesUnsafely();
- }
+ void clear(BumpPtrAllocator &) { FreeList = nullptr; }
template<class SubClass, class AllocatorType>
SubClass *Allocate(AllocatorType &Allocator) {
@@ -103,9 +84,8 @@ public:
"Recycler allocation alignment is less than object align!");
static_assert(sizeof(SubClass) <= Size,
"Recycler allocation size is less than object size!");
- return !FreeList.empty() ?
- reinterpret_cast<SubClass *>(FreeList.remove(FreeList.begin())) :
- static_cast<SubClass *>(Allocator.Allocate(Size, Align));
+ return FreeList ? reinterpret_cast<SubClass *>(pop_val())
+ : static_cast<SubClass *>(Allocator.Allocate(Size, Align));
}
template<class AllocatorType>
@@ -115,14 +95,20 @@ public:
template<class SubClass, class AllocatorType>
void Deallocate(AllocatorType & /*Allocator*/, SubClass* Element) {
- FreeList.push_front(reinterpret_cast<RecyclerStruct *>(Element));
+ push(reinterpret_cast<FreeNode *>(Element));
}
- void PrintStats() {
- PrintRecyclerStats(Size, Align, FreeList.size());
- }
+ void PrintStats();
};
+template <class T, size_t Size, size_t Align>
+void Recycler<T, Size, Align>::PrintStats() {
+ size_t S = 0;
+ for (auto *I = FreeList; I; I = I->Next)
+ ++S;
+ PrintRecyclerStats(Size, Align, S);
+}
+
}
#endif
diff --git a/contrib/llvm/include/llvm/Support/Registry.h b/contrib/llvm/include/llvm/Support/Registry.h
index 95c4e96..bbea97b 100644
--- a/contrib/llvm/include/llvm/Support/Registry.h
+++ b/contrib/llvm/include/llvm/Support/Registry.h
@@ -37,7 +37,6 @@ namespace llvm {
std::unique_ptr<T> instantiate() const { return Ctor(); }
};
-
/// Traits for registry entries. If using other than SimpleRegistryEntry, it
/// is necessary to define an alternate traits class.
template <typename T>
@@ -53,7 +52,6 @@ namespace llvm {
static const char *descof(const entry &Entry) { return Entry.getDesc(); }
};
-
/// A global registry used in conjunction with static constructors to make
/// pluggable components (like targets or garbage collectors) "just work" when
/// linked with an executable.
@@ -102,7 +100,6 @@ namespace llvm {
}
};
-
/// Iterators for registry entries.
///
class iterator {
@@ -122,10 +119,9 @@ namespace llvm {
static iterator end() { return iterator(nullptr); }
static iterator_range<iterator> entries() {
- return iterator_range<iterator>(begin(), end());
+ return make_range(begin(), end());
}
-
/// Abstract base class for registry listeners, which are informed when new
/// entries are added to the registry. Simply subclass and instantiate:
///
@@ -160,7 +156,7 @@ namespace llvm {
}
public:
- listener() : Prev(ListenerTail), Next(0) {
+ listener() : Prev(ListenerTail), Next(nullptr) {
if (Prev)
Prev->Next = this;
else
@@ -180,7 +176,6 @@ namespace llvm {
}
};
-
/// A static registration template. Use like such:
///
/// Registry<Collector>::Add<FancyGC>
@@ -210,7 +205,6 @@ namespace llvm {
};
/// Registry::Parser now lives in llvm/Support/RegistryParser.h.
-
};
// Since these are defined in a header file, plugins must be sure to export
@@ -228,6 +222,6 @@ namespace llvm {
template <typename T, typename U>
typename Registry<T,U>::listener *Registry<T,U>::ListenerTail;
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_SUPPORT_REGISTRY_H
diff --git a/contrib/llvm/include/llvm/Support/SMLoc.h b/contrib/llvm/include/llvm/Support/SMLoc.h
index d5b4c57..c6e9a14 100644
--- a/contrib/llvm/include/llvm/Support/SMLoc.h
+++ b/contrib/llvm/include/llvm/Support/SMLoc.h
@@ -22,6 +22,7 @@ namespace llvm {
/// Represents a location in source code.
class SMLoc {
const char *Ptr;
+
public:
SMLoc() : Ptr(nullptr) {}
@@ -53,11 +54,10 @@ public:
assert(Start.isValid() == End.isValid() &&
"Start and end should either both be valid or both be invalid!");
}
-
+
bool isValid() const { return Start.isValid(); }
};
-
+
} // end namespace llvm
#endif
-
diff --git a/contrib/llvm/include/llvm/Support/ScaledNumber.h b/contrib/llvm/include/llvm/Support/ScaledNumber.h
index 0a4262b..c6421ef 100644
--- a/contrib/llvm/include/llvm/Support/ScaledNumber.h
+++ b/contrib/llvm/include/llvm/Support/ScaledNumber.h
@@ -282,7 +282,7 @@ int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale) {
///
/// As a convenience, returns the matching scale. If the output value of one
/// number is zero, returns the scale of the other. If both are zero, which
-/// scale is returned is unspecifed.
+/// scale is returned is unspecified.
template <class DigitsT>
int16_t matchScales(DigitsT &LDigits, int16_t &LScale, DigitsT &RDigits,
int16_t &RScale) {
@@ -334,7 +334,7 @@ std::pair<DigitsT, int16_t> getSum(DigitsT LDigits, int16_t LScale,
DigitsT RDigits, int16_t RScale) {
static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
- // Check inputs up front. This is only relevent if addition overflows, but
+ // Check inputs up front. This is only relevant if addition overflows, but
// testing here should catch more bugs.
assert(LScale < INT16_MAX && "scale too large");
assert(RScale < INT16_MAX && "scale too large");
diff --git a/contrib/llvm/include/llvm/Support/Signals.h b/contrib/llvm/include/llvm/Support/Signals.h
index 7e165d7..2a4d84b 100644
--- a/contrib/llvm/include/llvm/Support/Signals.h
+++ b/contrib/llvm/include/llvm/Support/Signals.h
@@ -47,6 +47,9 @@ namespace sys {
/// \brief Print the stack trace using the given \c raw_ostream object.
void PrintStackTrace(raw_ostream &OS);
+ // Run all registered signal handlers.
+ void RunSignalHandlers();
+
/// AddSignalHandler - Add a function to be called when an abort/kill signal
/// is delivered to the process. The handler can have a cookie passed to it
/// to identify what instance of the handler it is.
diff --git a/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h b/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
index 7cb6438..a5980c2 100644
--- a/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
+++ b/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
@@ -50,8 +50,10 @@ public:
/// starts (although it can be called anytime).
void setKnownObjectSize(size_t size);
+ /// The number of bytes read at a time from the data streamer.
+ static const uint32_t kChunkSize = 4096 * 4;
+
private:
- const static uint32_t kChunkSize = 4096 * 4;
mutable std::vector<unsigned char> Bytes;
std::unique_ptr<DataStreamer> Streamer;
mutable size_t BytesRead; // Bytes read from stream
diff --git a/contrib/llvm/include/llvm/Support/StringSaver.h b/contrib/llvm/include/llvm/Support/StringSaver.h
index f3853ee..38fb7bb 100644
--- a/contrib/llvm/include/llvm/Support/StringSaver.h
+++ b/contrib/llvm/include/llvm/Support/StringSaver.h
@@ -18,25 +18,15 @@ namespace llvm {
/// \brief Saves strings in the inheritor's stable storage and returns a stable
/// raw character pointer.
-class StringSaver {
-protected:
- ~StringSaver() {}
- virtual const char *saveImpl(StringRef S);
+class StringSaver final {
+ BumpPtrAllocator &Alloc;
public:
StringSaver(BumpPtrAllocator &Alloc) : Alloc(Alloc) {}
const char *save(const char *S) { return save(StringRef(S)); }
- const char *save(StringRef S) { return saveImpl(S); }
+ const char *save(StringRef S);
const char *save(const Twine &S) { return save(StringRef(S.str())); }
const char *save(std::string &S) { return save(StringRef(S)); }
-
-private:
- BumpPtrAllocator &Alloc;
-};
-
-class BumpPtrStringSaver final : public StringSaver {
-public:
- BumpPtrStringSaver(BumpPtrAllocator &Alloc) : StringSaver(Alloc) {}
};
}
#endif
diff --git a/contrib/llvm/include/llvm/Support/TargetParser.h b/contrib/llvm/include/llvm/Support/TargetParser.h
index dab7248..c21019d 100644
--- a/contrib/llvm/include/llvm/Support/TargetParser.h
+++ b/contrib/llvm/include/llvm/Support/TargetParser.h
@@ -20,7 +20,7 @@
#include <vector>
namespace llvm {
- class StringRef;
+class StringRef;
// Target specific information into their own namespaces. These should be
// generated from TableGen because the information is already there, and there
@@ -29,177 +29,117 @@ namespace llvm {
// even if the back-end is not compiled with LLVM, plus we need to create a new
// back-end to TableGen to create these clean tables.
namespace ARM {
- // FPU names.
- enum FPUKind {
- FK_INVALID = 0,
- FK_NONE,
- FK_VFP,
- FK_VFPV2,
- FK_VFPV3,
- FK_VFPV3_FP16,
- FK_VFPV3_D16,
- FK_VFPV3_D16_FP16,
- FK_VFPV3XD,
- FK_VFPV3XD_FP16,
- FK_VFPV4,
- FK_VFPV4_D16,
- FK_FPV4_SP_D16,
- FK_FPV5_D16,
- FK_FPV5_SP_D16,
- FK_FP_ARMV8,
- FK_NEON,
- FK_NEON_FP16,
- FK_NEON_VFPV4,
- FK_NEON_FP_ARMV8,
- FK_CRYPTO_NEON_FP_ARMV8,
- FK_SOFTVFP,
- FK_LAST
- };
-
- // FPU Version
- enum FPUVersion {
- FV_NONE = 0,
- FV_VFPV2,
- FV_VFPV3,
- FV_VFPV3_FP16,
- FV_VFPV4,
- FV_VFPV5
- };
-
- // An FPU name implies one of three levels of Neon support:
- enum NeonSupportLevel {
- NS_None = 0, ///< No Neon
- NS_Neon, ///< Neon
- NS_Crypto ///< Neon with Crypto
- };
-
- // An FPU name restricts the FPU in one of three ways:
- enum FPURestriction {
- FR_None = 0, ///< No restriction
- FR_D16, ///< Only 16 D registers
- FR_SP_D16 ///< Only single-precision instructions, with 16 D registers
- };
-
- // Arch names.
- enum ArchKind {
- AK_INVALID = 0,
- AK_ARMV2,
- AK_ARMV2A,
- AK_ARMV3,
- AK_ARMV3M,
- AK_ARMV4,
- AK_ARMV4T,
- AK_ARMV5T,
- AK_ARMV5TE,
- AK_ARMV5TEJ,
- AK_ARMV6,
- AK_ARMV6K,
- AK_ARMV6T2,
- AK_ARMV6Z,
- AK_ARMV6ZK,
- AK_ARMV6M,
- AK_ARMV6SM,
- AK_ARMV7A,
- AK_ARMV7R,
- AK_ARMV7M,
- AK_ARMV7EM,
- AK_ARMV8A,
- AK_ARMV8_1A,
- // Non-standard Arch names.
- AK_IWMMXT,
- AK_IWMMXT2,
- AK_XSCALE,
- AK_ARMV5,
- AK_ARMV5E,
- AK_ARMV6J,
- AK_ARMV6HL,
- AK_ARMV7,
- AK_ARMV7L,
- AK_ARMV7HL,
- AK_ARMV7S,
- AK_LAST
- };
-
- // Arch extension modifiers for CPUs.
- enum ArchExtKind {
- AEK_INVALID = 0,
- AEK_CRC,
- AEK_CRYPTO,
- AEK_FP,
- AEK_HWDIV,
- AEK_MP,
- AEK_SIMD,
- AEK_SEC,
- AEK_VIRT,
- // Unsupported extensions.
- AEK_OS,
- AEK_IWMMXT,
- AEK_IWMMXT2,
- AEK_MAVERICK,
- AEK_XSCALE,
- AEK_LAST
- };
-
- // ISA kinds.
- enum ISAKind {
- IK_INVALID = 0,
- IK_ARM,
- IK_THUMB,
- IK_AARCH64
- };
-
- // Endianness
- // FIXME: BE8 vs. BE32?
- enum EndianKind {
- EK_INVALID = 0,
- EK_LITTLE,
- EK_BIG
- };
-
- // v6/v7/v8 Profile
- enum ProfileKind {
- PK_INVALID = 0,
- PK_A,
- PK_R,
- PK_M
- };
-} // namespace ARM
-// Target Parsers, one per architecture.
-class ARMTargetParser {
- static StringRef getFPUSynonym(StringRef FPU);
- static StringRef getArchSynonym(StringRef Arch);
-
-public:
- static StringRef getCanonicalArchName(StringRef Arch);
-
- // Information by ID
- static const char * getFPUName(unsigned FPUKind);
- static unsigned getFPUVersion(unsigned FPUKind);
- static unsigned getFPUNeonSupportLevel(unsigned FPUKind);
- static unsigned getFPURestriction(unsigned FPUKind);
- // FIXME: This should be moved to TargetTuple once it exists
- static bool getFPUFeatures(unsigned FPUKind,
- std::vector<const char*> &Features);
- static const char * getArchName(unsigned ArchKind);
- static unsigned getArchAttr(unsigned ArchKind);
- static const char * getCPUAttr(unsigned ArchKind);
- static const char * getSubArch(unsigned ArchKind);
- static const char * getArchExtName(unsigned ArchExtKind);
- static const char * getDefaultCPU(StringRef Arch);
-
- // Parser
- static unsigned parseFPU(StringRef FPU);
- static unsigned parseArch(StringRef Arch);
- static unsigned parseArchExt(StringRef ArchExt);
- static unsigned parseCPUArch(StringRef CPU);
- static unsigned parseArchISA(StringRef Arch);
- static unsigned parseArchEndian(StringRef Arch);
- static unsigned parseArchProfile(StringRef Arch);
- static unsigned parseArchVersion(StringRef Arch);
+// FPU names.
+enum FPUKind {
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND,
+#include "ARMTargetParser.def"
+ FK_LAST
+};
+
+// FPU Version
+enum FPUVersion {
+ FV_NONE = 0,
+ FV_VFPV2,
+ FV_VFPV3,
+ FV_VFPV3_FP16,
+ FV_VFPV4,
+ FV_VFPV5
+};
+
+// An FPU name implies one of three levels of Neon support:
+enum NeonSupportLevel {
+ NS_None = 0, ///< No Neon
+ NS_Neon, ///< Neon
+ NS_Crypto ///< Neon with Crypto
+};
+
+// An FPU name restricts the FPU in one of three ways:
+enum FPURestriction {
+ FR_None = 0, ///< No restriction
+ FR_D16, ///< Only 16 D registers
+ FR_SP_D16 ///< Only single-precision instructions, with 16 D registers
+};
+
+// Arch names.
+enum ArchKind {
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "ARMTargetParser.def"
+ AK_LAST
+};
+// Arch extension modifiers for CPUs.
+enum ArchExtKind : unsigned {
+ AEK_INVALID = 0x0,
+ AEK_NONE = 0x1,
+ AEK_CRC = 0x2,
+ AEK_CRYPTO = 0x4,
+ AEK_FP = 0x8,
+ AEK_HWDIV = 0x10,
+ AEK_HWDIVARM = 0x20,
+ AEK_MP = 0x40,
+ AEK_SIMD = 0x80,
+ AEK_SEC = 0x100,
+ AEK_VIRT = 0x200,
+ AEK_DSP = 0x400,
+ AEK_FP16 = 0x800,
+ // Unsupported extensions.
+ AEK_OS = 0x8000000,
+ AEK_IWMMXT = 0x10000000,
+ AEK_IWMMXT2 = 0x20000000,
+ AEK_MAVERICK = 0x40000000,
+ AEK_XSCALE = 0x80000000,
};
+// ISA kinds.
+enum ISAKind { IK_INVALID = 0, IK_ARM, IK_THUMB, IK_AARCH64 };
+
+// Endianness
+// FIXME: BE8 vs. BE32?
+enum EndianKind { EK_INVALID = 0, EK_LITTLE, EK_BIG };
+
+// v6/v7/v8 Profile
+enum ProfileKind { PK_INVALID = 0, PK_A, PK_R, PK_M };
+
+StringRef getCanonicalArchName(StringRef Arch);
+
+// Information by ID
+StringRef getFPUName(unsigned FPUKind);
+unsigned getFPUVersion(unsigned FPUKind);
+unsigned getFPUNeonSupportLevel(unsigned FPUKind);
+unsigned getFPURestriction(unsigned FPUKind);
+
+// FIXME: These should be moved to TargetTuple once it exists
+bool getFPUFeatures(unsigned FPUKind, std::vector<const char *> &Features);
+bool getHWDivFeatures(unsigned HWDivKind, std::vector<const char *> &Features);
+bool getExtensionFeatures(unsigned Extensions,
+ std::vector<const char*> &Features);
+
+StringRef getArchName(unsigned ArchKind);
+unsigned getArchAttr(unsigned ArchKind);
+StringRef getCPUAttr(unsigned ArchKind);
+StringRef getSubArch(unsigned ArchKind);
+StringRef getArchExtName(unsigned ArchExtKind);
+const char *getArchExtFeature(StringRef ArchExt);
+StringRef getHWDivName(unsigned HWDivKind);
+
+// Information by Name
+unsigned getDefaultFPU(StringRef CPU, unsigned ArchKind);
+unsigned getDefaultExtensions(StringRef CPU, unsigned ArchKind);
+StringRef getDefaultCPU(StringRef Arch);
+
+// Parser
+unsigned parseHWDiv(StringRef HWDiv);
+unsigned parseFPU(StringRef FPU);
+unsigned parseArch(StringRef Arch);
+unsigned parseArchExt(StringRef ArchExt);
+unsigned parseCPUArch(StringRef CPU);
+unsigned parseArchISA(StringRef Arch);
+unsigned parseArchEndian(StringRef Arch);
+unsigned parseArchProfile(StringRef Arch);
+unsigned parseArchVersion(StringRef Arch);
+
+} // namespace ARM
} // namespace llvm
#endif
diff --git a/contrib/llvm/include/llvm/Support/TargetRegistry.h b/contrib/llvm/include/llvm/Support/TargetRegistry.h
index 40bf6fb..aec181b 100644
--- a/contrib/llvm/include/llvm/Support/TargetRegistry.h
+++ b/contrib/llvm/include/llvm/Support/TargetRegistry.h
@@ -115,7 +115,7 @@ public:
const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU);
typedef MCTargetAsmParser *(*MCAsmParserCtorTy)(
- MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
+ const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
const MCTargetOptions &Options);
typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T,
const MCSubtargetInfo &STI,
@@ -141,7 +141,8 @@ public:
typedef MCStreamer *(*COFFStreamerCtorTy)(MCContext &Ctx, MCAsmBackend &TAB,
raw_pwrite_stream &OS,
MCCodeEmitter *Emitter,
- bool RelaxAll);
+ bool RelaxAll,
+ bool IncrementalLinkerCompatible);
typedef MCTargetStreamer *(*NullTargetStreamerCtorTy)(MCStreamer &S);
typedef MCTargetStreamer *(*AsmTargetStreamerCtorTy)(
MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
@@ -382,7 +383,7 @@ public:
///
/// \param Parser The target independent parser implementation to use for
/// parsing and lexing.
- MCTargetAsmParser *createMCAsmParser(MCSubtargetInfo &STI,
+ MCTargetAsmParser *createMCAsmParser(const MCSubtargetInfo &STI,
MCAsmParser &Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options) const {
@@ -437,6 +438,7 @@ public:
MCAsmBackend &TAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter,
const MCSubtargetInfo &STI, bool RelaxAll,
+ bool IncrementalLinkerCompatible,
bool DWARFMustBeAtTheEnd) const {
MCStreamer *S;
switch (T.getObjectFormat()) {
@@ -444,7 +446,8 @@ public:
llvm_unreachable("Unknown object format");
case Triple::COFF:
assert(T.isOSWindows() && "only Windows COFF is supported");
- S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll);
+ S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll,
+ IncrementalLinkerCompatible);
break;
case Triple::MachO:
if (MachOStreamerCtorFn)
@@ -1133,8 +1136,8 @@ template <class MCAsmParserImpl> struct RegisterMCAsmParser {
}
private:
- static MCTargetAsmParser *Allocator(MCSubtargetInfo &STI, MCAsmParser &P,
- const MCInstrInfo &MII,
+ static MCTargetAsmParser *Allocator(const MCSubtargetInfo &STI,
+ MCAsmParser &P, const MCInstrInfo &MII,
const MCTargetOptions &Options) {
return new MCAsmParserImpl(STI, P, MII, Options);
}
diff --git a/contrib/llvm/include/llvm/Support/TargetSelect.h b/contrib/llvm/include/llvm/Support/TargetSelect.h
index a86e953..582785c 100644
--- a/contrib/llvm/include/llvm/Support/TargetSelect.h
+++ b/contrib/llvm/include/llvm/Support/TargetSelect.h
@@ -25,11 +25,11 @@ extern "C" {
#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
#include "llvm/Config/Targets.def"
-
+
// Declare all of the target-MC-initialization functions that are available.
#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetMC();
#include "llvm/Config/Targets.def"
-
+
// Declare all of the available assembly printer initialization functions.
#define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter();
#include "llvm/Config/AsmPrinters.def"
@@ -54,7 +54,7 @@ namespace llvm {
#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
#include "llvm/Config/Targets.def"
}
-
+
/// InitializeAllTargets - The main program should call this function if it
/// wants access to all available target machines that LLVM is configured to
/// support, to make them available via the TargetRegistry.
@@ -67,7 +67,7 @@ namespace llvm {
#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
#include "llvm/Config/Targets.def"
}
-
+
/// InitializeAllTargetMCs - The main program should call this function if it
/// wants access to all available target MC that LLVM is configured to
/// support, to make them available via the TargetRegistry.
@@ -77,7 +77,7 @@ namespace llvm {
#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetMC();
#include "llvm/Config/Targets.def"
}
-
+
/// InitializeAllAsmPrinters - The main program should call this function if
/// it wants all asm printers that LLVM is configured to support, to make them
/// available via the TargetRegistry.
@@ -87,7 +87,7 @@ namespace llvm {
#define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
#include "llvm/Config/AsmPrinters.def"
}
-
+
/// InitializeAllAsmParsers - The main program should call this function if it
/// wants all asm parsers that LLVM is configured to support, to make them
/// available via the TargetRegistry.
@@ -97,7 +97,7 @@ namespace llvm {
#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser();
#include "llvm/Config/AsmParsers.def"
}
-
+
/// InitializeAllDisassemblers - The main program should call this function if
/// it wants all disassemblers that LLVM is configured to support, to make
/// them available via the TargetRegistry.
@@ -107,9 +107,9 @@ namespace llvm {
#define LLVM_DISASSEMBLER(TargetName) LLVMInitialize##TargetName##Disassembler();
#include "llvm/Config/Disassemblers.def"
}
-
+
/// InitializeNativeTarget - The main program should call this function to
- /// initialize the native target corresponding to the host. This is useful
+ /// initialize the native target corresponding to the host. This is useful
/// for JIT applications to ensure that the target gets linked in correctly.
///
/// It is legal for a client to make multiple calls to this function.
@@ -123,7 +123,7 @@ namespace llvm {
#else
return true;
#endif
- }
+ }
/// InitializeNativeTargetAsmPrinter - The main program should call
/// this function to initialize the native target asm printer.
@@ -135,7 +135,7 @@ namespace llvm {
#else
return true;
#endif
- }
+ }
/// InitializeNativeTargetAsmParser - The main program should call
/// this function to initialize the native target asm parser.
@@ -147,7 +147,7 @@ namespace llvm {
#else
return true;
#endif
- }
+ }
/// InitializeNativeTargetDisassembler - The main program should call
/// this function to initialize the native target disassembler.
@@ -159,8 +159,7 @@ namespace llvm {
#else
return true;
#endif
- }
-
+ }
}
#endif
diff --git a/contrib/llvm/include/llvm/Support/ThreadPool.h b/contrib/llvm/include/llvm/Support/ThreadPool.h
new file mode 100644
index 0000000..745334d
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ThreadPool.h
@@ -0,0 +1,136 @@
+//===-- llvm/Support/ThreadPool.h - A ThreadPool implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a crude C++11 based thread pool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_THREAD_POOL_H
+#define LLVM_SUPPORT_THREAD_POOL_H
+
+#include "llvm/Support/thread.h"
+
+#ifdef _MSC_VER
+// concrt.h depends on eh.h for __uncaught_exception declaration
+// even if we disable exceptions.
+#include <eh.h>
+
+// Disable warnings from ppltasks.h transitively included by <future>.
+#pragma warning(push)
+#pragma warning(disable:4530)
+#pragma warning(disable:4062)
+#endif
+
+#include <future>
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <utility>
+
+namespace llvm {
+
+/// A ThreadPool for asynchronous parallel execution on a defined number of
+/// threads.
+///
+/// The pool keeps a vector of threads alive, waiting on a condition variable
+/// for some work to become available.
+class ThreadPool {
+public:
+#ifndef _MSC_VER
+ using VoidTy = void;
+ using TaskTy = std::function<void()>;
+ using PackagedTaskTy = std::packaged_task<void()>;
+#else
+ // MSVC 2013 has a bug and can't use std::packaged_task<void()>;
+ // We force it to use bool(bool) instead.
+ using VoidTy = bool;
+ using TaskTy = std::function<bool(bool)>;
+ using PackagedTaskTy = std::packaged_task<bool(bool)>;
+#endif
+
+ /// Construct a pool with the number of core available on the system (or
+ /// whatever the value returned by std::thread::hardware_concurrency() is).
+ ThreadPool();
+
+ /// Construct a pool of \p ThreadCount threads
+ ThreadPool(unsigned ThreadCount);
+
+ /// Blocking destructor: the pool will wait for all the threads to complete.
+ ~ThreadPool();
+
+ /// Asynchronous submission of a task to the pool. The returned future can be
+ /// used to wait for the task to finish and is *non-blocking* on destruction.
+ template <typename Function, typename... Args>
+ inline std::shared_future<VoidTy> async(Function &&F, Args &&... ArgList) {
+ auto Task =
+ std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...);
+#ifndef _MSC_VER
+ return asyncImpl(std::move(Task));
+#else
+ // This lambda has to be marked mutable because MSVC 2013's std::bind call
+ // operator isn't const qualified.
+ return asyncImpl([Task](VoidTy) mutable -> VoidTy {
+ Task();
+ return VoidTy();
+ });
+#endif
+ }
+
+ /// Asynchronous submission of a task to the pool. The returned future can be
+ /// used to wait for the task to finish and is *non-blocking* on destruction.
+ template <typename Function>
+ inline std::shared_future<VoidTy> async(Function &&F) {
+#ifndef _MSC_VER
+ return asyncImpl(std::forward<Function>(F));
+#else
+ return asyncImpl([F] (VoidTy) -> VoidTy { F(); return VoidTy(); });
+#endif
+ }
+
+ /// Blocking wait for all the threads to complete and the queue to be empty.
+ /// It is an error to try to add new tasks while blocking on this call.
+ void wait();
+
+private:
+ /// Asynchronous submission of a task to the pool. The returned future can be
+ /// used to wait for the task to finish and is *non-blocking* on destruction.
+ std::shared_future<VoidTy> asyncImpl(TaskTy F);
+
+ /// Threads in flight
+ std::vector<llvm::thread> Threads;
+
+ /// Tasks waiting for execution in the pool.
+ std::queue<PackagedTaskTy> Tasks;
+
+ /// Locking and signaling for accessing the Tasks queue.
+ std::mutex QueueLock;
+ std::condition_variable QueueCondition;
+
+ /// Locking and signaling for job completion
+ std::mutex CompletionLock;
+ std::condition_variable CompletionCondition;
+
+ /// Keep track of the number of thread actually busy
+ std::atomic<unsigned> ActiveThreads;
+
+#if LLVM_ENABLE_THREADS // avoids warning for unused variable
+ /// Signal for the destruction of the pool, asking thread to exit.
+ bool EnableFlag;
+#endif
+};
+}
+
+#endif // LLVM_SUPPORT_THREAD_POOL_H
diff --git a/contrib/llvm/include/llvm/Support/Threading.h b/contrib/llvm/include/llvm/Support/Threading.h
index 3cca1d6..9007c13 100644
--- a/contrib/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm/include/llvm/Support/Threading.h
@@ -21,7 +21,7 @@ namespace llvm {
bool llvm_is_multithreaded();
/// llvm_execute_on_thread - Execute the given \p UserFn on a separate
- /// thread, passing it the provided \p UserData and waits for thread
+ /// thread, passing it the provided \p UserData and waits for thread
/// completion.
///
/// This function does not guarantee that the code will actually be executed
diff --git a/contrib/llvm/include/llvm/Support/Timer.h b/contrib/llvm/include/llvm/Support/Timer.h
index 2cd30e2..499fe7b 100644
--- a/contrib/llvm/include/llvm/Support/Timer.h
+++ b/contrib/llvm/include/llvm/Support/Timer.h
@@ -30,26 +30,25 @@ class TimeRecord {
ssize_t MemUsed; // Memory allocated (in bytes)
public:
TimeRecord() : WallTime(0), UserTime(0), SystemTime(0), MemUsed(0) {}
-
+
/// getCurrentTime - Get the current time and memory usage. If Start is true
/// we get the memory usage before the time, otherwise we get time before
/// memory usage. This matters if the time to get the memory usage is
/// significant and shouldn't be counted as part of a duration.
static TimeRecord getCurrentTime(bool Start = true);
-
- double getProcessTime() const { return UserTime+SystemTime; }
+
+ double getProcessTime() const { return UserTime + SystemTime; }
double getUserTime() const { return UserTime; }
double getSystemTime() const { return SystemTime; }
double getWallTime() const { return WallTime; }
ssize_t getMemUsed() const { return MemUsed; }
-
-
+
// operator< - Allow sorting.
bool operator<(const TimeRecord &T) const {
// Sort by Wall Time elapsed, as it is the only thing really accurate
return WallTime < T.WallTime;
}
-
+
void operator+=(const TimeRecord &RHS) {
WallTime += RHS.WallTime;
UserTime += RHS.UserTime;
@@ -62,12 +61,12 @@ public:
SystemTime -= RHS.SystemTime;
MemUsed -= RHS.MemUsed;
}
-
- /// print - Print the current timer to standard error, and reset the "Started"
- /// flag.
+
+ /// Print the current time record to \p OS, with a breakdown showing
+ /// contributions to the \p Total time record.
void print(const TimeRecord &Total, raw_ostream &OS) const;
};
-
+
/// Timer - This class is used to track the amount of time spent between
/// invocations of its startTimer()/stopTimer() methods. Given appropriate OS
/// support it can also keep track of the RSS of the program at various points.
@@ -77,11 +76,13 @@ public:
/// if they are never started.
///
class Timer {
- TimeRecord Time;
+ TimeRecord Time; // The total time captured
+ TimeRecord StartTime; // The time startTimer() was last called
std::string Name; // The name of this time variable.
- bool Started; // Has this time variable ever been started?
+ bool Running; // Is the timer currently running?
+ bool Triggered; // Has the timer ever been triggered?
TimerGroup *TG; // The TimerGroup this Timer is in.
-
+
Timer **Prev, *Next; // Doubly linked list of timers in the group.
public:
explicit Timer(StringRef N) : TG(nullptr) { init(N); }
@@ -99,25 +100,31 @@ public:
explicit Timer() : TG(nullptr) {}
void init(StringRef N);
void init(StringRef N, TimerGroup &tg);
-
+
const std::string &getName() const { return Name; }
bool isInitialized() const { return TG != nullptr; }
-
- /// startTimer - Start the timer running. Time between calls to
- /// startTimer/stopTimer is counted by the Timer class. Note that these calls
- /// must be correctly paired.
- ///
+
+ /// Check if startTimer() has ever been called on this timer.
+ bool hasTriggered() const { return Triggered; }
+
+ /// Start the timer running. Time between calls to startTimer/stopTimer is
+ /// counted by the Timer class. Note that these calls must be correctly
+ /// paired.
void startTimer();
- /// stopTimer - Stop the timer.
- ///
+ /// Stop the timer.
void stopTimer();
+ /// Clear the timer state.
+ void clear();
+
+ /// Return the duration for which this timer has been running.
+ TimeRecord getTotalTime() const { return Time; }
+
private:
friend class TimerGroup;
};
-
/// The TimeRegion class is used as a helper class to call the startTimer() and
/// stopTimer() methods of the Timer class. When the object is constructed, it
/// starts the timer specified as its argument. When it is destroyed, it stops
@@ -126,6 +133,7 @@ private:
class TimeRegion {
Timer *T;
TimeRegion(const TimeRegion &) = delete;
+
public:
explicit TimeRegion(Timer &t) : T(&t) {
T->startTimer();
@@ -138,7 +146,6 @@ public:
}
};
-
/// NamedRegionTimer - This class is basically a combination of TimeRegion and
/// Timer. It allows you to declare a new timer, AND specify the region to
/// time, all in one statement. All timers with the same name are merged. This
@@ -151,7 +158,6 @@ struct NamedRegionTimer : public TimeRegion {
bool Enabled = true);
};
-
/// The TimerGroup class is used to group together related timers into a single
/// report that is printed when the TimerGroup is destroyed. It is illegal to
/// destroy a TimerGroup object before all of the Timers in it are gone. A
@@ -160,11 +166,12 @@ struct NamedRegionTimer : public TimeRegion {
class TimerGroup {
std::string Name;
Timer *FirstTimer; // First timer in the group.
- std::vector<std::pair<TimeRecord, std::string> > TimersToPrint;
-
+ std::vector<std::pair<TimeRecord, std::string>> TimersToPrint;
+
TimerGroup **Prev, *Next; // Doubly linked list of TimerGroup's.
TimerGroup(const TimerGroup &TG) = delete;
void operator=(const TimerGroup &TG) = delete;
+
public:
explicit TimerGroup(StringRef name);
~TimerGroup();
@@ -173,10 +180,10 @@ public:
/// print - Print any started timers in this group and zero them.
void print(raw_ostream &OS);
-
+
/// printAll - This static method prints all timers and clears them all out.
static void printAll(raw_ostream &OS);
-
+
private:
friend class Timer;
void addTimer(Timer &T);
diff --git a/contrib/llvm/include/llvm/Support/TrailingObjects.h b/contrib/llvm/include/llvm/Support/TrailingObjects.h
new file mode 100644
index 0000000..8529746
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/TrailingObjects.h
@@ -0,0 +1,349 @@
+//===--- TrailingObjects.h - Variable-length classes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This header defines support for implementing classes that have
+/// some trailing object (or arrays of objects) appended to them. The
+/// main purpose is to make it obvious where this idiom is being used,
+/// and to make the usage more idiomatic and more difficult to get
+/// wrong.
+///
+/// The TrailingObject template abstracts away the reinterpret_cast,
+/// pointer arithmetic, and size calculations used for the allocation
+/// and access of appended arrays of objects, and takes care that they
+/// are all allocated at their required alignment. Additionally, it
+/// ensures that the base type is final -- deriving from a class that
+/// expects data appended immediately after it is typically not safe.
+///
+/// Users are expected to derive from this template, and provide
+/// numTrailingObjects implementations for each trailing type except
+/// the last, e.g. like this sample:
+///
+/// \code
+/// class VarLengthObj : private TrailingObjects<VarLengthObj, int, double> {
+/// friend TrailingObjects;
+///
+/// unsigned NumInts, NumDoubles;
+/// size_t numTrailingObjects(OverloadToken<int>) const { return NumInts; }
+/// };
+/// \endcode
+///
+/// You can access the appended arrays via 'getTrailingObjects', and
+/// determine the size needed for allocation via
+/// 'additionalSizeToAlloc' and 'totalSizeToAlloc'.
+///
+/// All the methods implemented by this class are are intended for use
+/// by the implementation of the class, not as part of its interface
+/// (thus, private inheritance is suggested).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TRAILINGOBJECTS_H
+#define LLVM_SUPPORT_TRAILINGOBJECTS_H
+
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/type_traits.h"
+#include <new>
+#include <type_traits>
+
+namespace llvm {
+
+namespace trailing_objects_internal {
+/// Helper template to calculate the max alignment requirement for a set of
+/// objects.
+template <typename First, typename... Rest> class AlignmentCalcHelper {
+private:
+ enum {
+ FirstAlignment = AlignOf<First>::Alignment,
+ RestAlignment = AlignmentCalcHelper<Rest...>::Alignment,
+ };
+
+public:
+ enum {
+ Alignment = FirstAlignment > RestAlignment ? FirstAlignment : RestAlignment
+ };
+};
+
+template <typename First> class AlignmentCalcHelper<First> {
+public:
+ enum { Alignment = AlignOf<First>::Alignment };
+};
+
+/// The base class for TrailingObjects* classes.
+class TrailingObjectsBase {
+protected:
+ /// OverloadToken's purpose is to allow specifying function overloads
+ /// for different types, without actually taking the types as
+ /// parameters. (Necessary because member function templates cannot
+ /// be specialized, so overloads must be used instead of
+ /// specialization.)
+ template <typename T> struct OverloadToken {};
+};
+
+/// This helper template works-around MSVC 2013's lack of useful
+/// alignas() support. The argument to LLVM_ALIGNAS(), in MSVC, is
+/// required to be a literal integer. But, you *can* use template
+/// specialization to select between a bunch of different LLVM_ALIGNAS
+/// expressions...
+template <int Align>
+class TrailingObjectsAligner : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(1) TrailingObjectsAligner<1> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(2) TrailingObjectsAligner<2> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(4) TrailingObjectsAligner<4> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(8) TrailingObjectsAligner<8> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(16) TrailingObjectsAligner<16> : public TrailingObjectsBase {
+};
+template <>
+class LLVM_ALIGNAS(32) TrailingObjectsAligner<32> : public TrailingObjectsBase {
+};
+
+// Just a little helper for transforming a type pack into the same
+// number of a different type. e.g.:
+// ExtractSecondType<Foo..., int>::type
+template <typename Ty1, typename Ty2> struct ExtractSecondType {
+ typedef Ty2 type;
+};
+
+// TrailingObjectsImpl is somewhat complicated, because it is a
+// recursively inheriting template, in order to handle the template
+// varargs. Each level of inheritance picks off a single trailing type
+// then recurses on the rest. The "Align", "BaseTy", and
+// "TopTrailingObj" arguments are passed through unchanged through the
+// recursion. "PrevTy" is, at each level, the type handled by the
+// level right above it.
+
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
+ typename... MoreTys>
+struct TrailingObjectsImpl {
+ // The main template definition is never used -- the two
+ // specializations cover all possibilities.
+};
+
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
+ typename NextTy, typename... MoreTys>
+struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
+ MoreTys...>
+ : public TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy,
+ MoreTys...> {
+
+ typedef TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy, MoreTys...>
+ ParentType;
+
+ // Ensure the methods we inherit are not hidden.
+ using ParentType::getTrailingObjectsImpl;
+ using ParentType::additionalSizeToAllocImpl;
+
+ static LLVM_CONSTEXPR bool requiresRealignment() {
+ return llvm::AlignOf<PrevTy>::Alignment < llvm::AlignOf<NextTy>::Alignment;
+ }
+
+ // These two functions are helper functions for
+ // TrailingObjects::getTrailingObjects. They recurse to the left --
+ // the result for each type in the list of trailing types depends on
+ // the result of calling the function on the type to the
+ // left. However, the function for the type to the left is
+ // implemented by a *subclass* of this class, so we invoke it via
+ // the TopTrailingObj, which is, via the
+ // curiously-recurring-template-pattern, the most-derived type in
+ // this recursion, and thus, contains all the overloads.
+ static const NextTy *
+ getTrailingObjectsImpl(const BaseTy *Obj,
+ TrailingObjectsBase::OverloadToken<NextTy>) {
+ auto *Ptr = TopTrailingObj::getTrailingObjectsImpl(
+ Obj, TrailingObjectsBase::OverloadToken<PrevTy>()) +
+ TopTrailingObj::callNumTrailingObjects(
+ Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
+
+ if (requiresRealignment())
+ return reinterpret_cast<const NextTy *>(
+ llvm::alignAddr(Ptr, llvm::alignOf<NextTy>()));
+ else
+ return reinterpret_cast<const NextTy *>(Ptr);
+ }
+
+ static NextTy *
+ getTrailingObjectsImpl(BaseTy *Obj,
+ TrailingObjectsBase::OverloadToken<NextTy>) {
+ auto *Ptr = TopTrailingObj::getTrailingObjectsImpl(
+ Obj, TrailingObjectsBase::OverloadToken<PrevTy>()) +
+ TopTrailingObj::callNumTrailingObjects(
+ Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
+
+ if (requiresRealignment())
+ return reinterpret_cast<NextTy *>(
+ llvm::alignAddr(Ptr, llvm::alignOf<NextTy>()));
+ else
+ return reinterpret_cast<NextTy *>(Ptr);
+ }
+
+ // Helper function for TrailingObjects::additionalSizeToAlloc: this
+ // function recurses to superclasses, each of which requires one
+ // fewer size_t argument, and adds its own size.
+ static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(
+ size_t SizeSoFar, size_t Count1,
+ typename ExtractSecondType<MoreTys, size_t>::type... MoreCounts) {
+ return additionalSizeToAllocImpl(
+ (requiresRealignment()
+ ? llvm::RoundUpToAlignment(SizeSoFar, llvm::alignOf<NextTy>())
+ : SizeSoFar) +
+ sizeof(NextTy) * Count1,
+ MoreCounts...);
+ }
+};
+
+// The base case of the TrailingObjectsImpl inheritance recursion,
+// when there's no more trailing types.
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy>
+struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy>
+ : public TrailingObjectsAligner<Align> {
+ // This is a dummy method, only here so the "using" doesn't fail --
+ // it will never be called, because this function recurses backwards
+ // up the inheritance chain to subclasses.
+ static void getTrailingObjectsImpl();
+
+ static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(size_t SizeSoFar) {
+ return SizeSoFar;
+ }
+
+ template <bool CheckAlignment> static void verifyTrailingObjectsAlignment() {}
+};
+
+} // end namespace trailing_objects_internal
+
+// Finally, the main type defined in this file, the one intended for users...
+
+/// See the file comment for details on the usage of the
+/// TrailingObjects type.
+template <typename BaseTy, typename... TrailingTys>
+class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl<
+ trailing_objects_internal::AlignmentCalcHelper<
+ TrailingTys...>::Alignment,
+ BaseTy, TrailingObjects<BaseTy, TrailingTys...>,
+ BaseTy, TrailingTys...> {
+
+ template <int A, typename B, typename T, typename P, typename... M>
+ friend struct trailing_objects_internal::TrailingObjectsImpl;
+
+ template <typename... Tys> class Foo {};
+
+ typedef trailing_objects_internal::TrailingObjectsImpl<
+ trailing_objects_internal::AlignmentCalcHelper<TrailingTys...>::Alignment,
+ BaseTy, TrailingObjects<BaseTy, TrailingTys...>, BaseTy, TrailingTys...>
+ ParentType;
+ using TrailingObjectsBase = trailing_objects_internal::TrailingObjectsBase;
+
+ using ParentType::getTrailingObjectsImpl;
+
+ // This function contains only a static_assert BaseTy is final. The
+ // static_assert must be in a function, and not at class-level
+ // because BaseTy isn't complete at class instantiation time, but
+ // will be by the time this function is instantiated.
+ static void verifyTrailingObjectsAssertions() {
+#ifdef LLVM_IS_FINAL
+ static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final.");
+#endif
+ }
+
+ // These two methods are the base of the recursion for this method.
+ static const BaseTy *
+ getTrailingObjectsImpl(const BaseTy *Obj,
+ TrailingObjectsBase::OverloadToken<BaseTy>) {
+ return Obj;
+ }
+
+ static BaseTy *
+ getTrailingObjectsImpl(BaseTy *Obj,
+ TrailingObjectsBase::OverloadToken<BaseTy>) {
+ return Obj;
+ }
+
+ // callNumTrailingObjects simply calls numTrailingObjects on the
+ // provided Obj -- except when the type being queried is BaseTy
+ // itself. There is always only one of the base object, so that case
+ // is handled here. (An additional benefit of indirecting through
+ // this function is that consumers only say "friend
+ // TrailingObjects", and thus, only this class itself can call the
+ // numTrailingObjects function.)
+ static size_t
+ callNumTrailingObjects(const BaseTy *Obj,
+ TrailingObjectsBase::OverloadToken<BaseTy>) {
+ return 1;
+ }
+
+ template <typename T>
+ static size_t callNumTrailingObjects(const BaseTy *Obj,
+ TrailingObjectsBase::OverloadToken<T>) {
+ return Obj->numTrailingObjects(TrailingObjectsBase::OverloadToken<T>());
+ }
+
+public:
+ // make this (privately inherited) class public.
+ using ParentType::OverloadToken;
+
+ /// Returns a pointer to the trailing object array of the given type
+ /// (which must be one of those specified in the class template). The
+ /// array may have zero or more elements in it.
+ template <typename T> const T *getTrailingObjects() const {
+ verifyTrailingObjectsAssertions();
+ // Forwards to an impl function with overloads, since member
+ // function templates can't be specialized.
+ return this->getTrailingObjectsImpl(
+ static_cast<const BaseTy *>(this),
+ TrailingObjectsBase::OverloadToken<T>());
+ }
+
+ /// Returns a pointer to the trailing object array of the given type
+ /// (which must be one of those specified in the class template). The
+ /// array may have zero or more elements in it.
+ template <typename T> T *getTrailingObjects() {
+ verifyTrailingObjectsAssertions();
+ // Forwards to an impl function with overloads, since member
+ // function templates can't be specialized.
+ return this->getTrailingObjectsImpl(
+ static_cast<BaseTy *>(this), TrailingObjectsBase::OverloadToken<T>());
+ }
+
+ /// Returns the size of the trailing data, if an object were
+ /// allocated with the given counts (The counts are in the same order
+ /// as the template arguments). This does not include the size of the
+ /// base object. The template arguments must be the same as those
+ /// used in the class; they are supplied here redundantly only so
+ /// that it's clear what the counts are counting in callers.
+ template <typename... Tys>
+ static LLVM_CONSTEXPR typename std::enable_if<
+ std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
+ additionalSizeToAlloc(
+ typename trailing_objects_internal::ExtractSecondType<
+ TrailingTys, size_t>::type... Counts) {
+ return ParentType::additionalSizeToAllocImpl(0, Counts...);
+ }
+
+ /// Returns the total size of an object if it were allocated with the
+ /// given trailing object counts. This is the same as
+ /// additionalSizeToAlloc, except it *does* include the size of the base
+ /// object.
+ template <typename... Tys>
+ static LLVM_CONSTEXPR typename std::enable_if<
+ std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
+ totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
+ TrailingTys, size_t>::type... Counts) {
+ return sizeof(BaseTy) + ParentType::additionalSizeToAllocImpl(0, Counts...);
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h b/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h
index 9f738df..134698c 100644
--- a/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h
+++ b/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h
@@ -51,6 +51,11 @@ public:
/// the constructor, so it makes sense to create as few UnicodeCharSet
/// instances per each array of ranges, as possible.
#ifdef NDEBUG
+
+ // FIXME: This could use constexpr + static_assert. This way we
+ // may get rid of NDEBUG in this header. Unfortunately there are some
+ // problems to get this working with MSVC 2013. Change this when
+ // the support for MSVC 2013 is dropped.
LLVM_CONSTEXPR UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
#else
UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
diff --git a/contrib/llvm/include/llvm/Support/Valgrind.h b/contrib/llvm/include/llvm/Support/Valgrind.h
index cebf75c..12b0dc9 100644
--- a/contrib/llvm/include/llvm/Support/Valgrind.h
+++ b/contrib/llvm/include/llvm/Support/Valgrind.h
@@ -20,17 +20,6 @@
#include "llvm/Support/Compiler.h"
#include <stddef.h>
-#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
-// tsan (Thread Sanitizer) is a valgrind-based tool that detects these exact
-// functions by name.
-extern "C" {
-void AnnotateHappensAfter(const char *file, int line, const volatile void *cv);
-void AnnotateHappensBefore(const char *file, int line, const volatile void *cv);
-void AnnotateIgnoreWritesBegin(const char *file, int line);
-void AnnotateIgnoreWritesEnd(const char *file, int line);
-}
-#endif
-
namespace llvm {
namespace sys {
// True if Valgrind is controlling this process.
@@ -39,34 +28,6 @@ namespace sys {
// Discard valgrind's translation of code in the range [Addr .. Addr + Len).
// Otherwise valgrind may continue to execute the old version of the code.
void ValgrindDiscardTranslations(const void *Addr, size_t Len);
-
-#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
- // Thread Sanitizer is a valgrind tool that finds races in code.
- // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
-
- // This marker is used to define a happens-before arc. The race detector will
- // infer an arc from the begin to the end when they share the same pointer
- // argument.
- #define TsanHappensBefore(cv) \
- AnnotateHappensBefore(__FILE__, __LINE__, cv)
-
- // This marker defines the destination of a happens-before arc.
- #define TsanHappensAfter(cv) \
- AnnotateHappensAfter(__FILE__, __LINE__, cv)
-
- // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
- #define TsanIgnoreWritesBegin() \
- AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
-
- // Resume checking for racy writes.
- #define TsanIgnoreWritesEnd() \
- AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-#else
- #define TsanHappensBefore(cv)
- #define TsanHappensAfter(cv)
- #define TsanIgnoreWritesBegin()
- #define TsanIgnoreWritesEnd()
-#endif
}
}
diff --git a/contrib/llvm/include/llvm/Support/YAMLParser.h b/contrib/llvm/include/llvm/Support/YAMLParser.h
index 0fbb7d2..b056ab6 100644
--- a/contrib/llvm/include/llvm/Support/YAMLParser.h
+++ b/contrib/llvm/include/llvm/Support/YAMLParser.h
@@ -145,11 +145,12 @@ public:
unsigned int getType() const { return TypeID; }
void *operator new(size_t Size, BumpPtrAllocator &Alloc,
- size_t Alignment = 16) throw() {
+ size_t Alignment = 16) LLVM_NOEXCEPT {
return Alloc.Allocate(Size, Alignment);
}
- void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() {
+ void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
+ size_t Size) LLVM_NOEXCEPT {
Alloc.Deallocate(Ptr, Size);
}
@@ -157,7 +158,7 @@ protected:
std::unique_ptr<Document> &Doc;
SMRange SourceRange;
- void operator delete(void *) throw() {}
+ void operator delete(void *) LLVM_NOEXCEPT = delete;
~Node() = default;
diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h
index c04294a..fb2badf 100644
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@@ -10,7 +10,6 @@
#ifndef LLVM_SUPPORT_YAMLTRAITS_H
#define LLVM_SUPPORT_YAMLTRAITS_H
-
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Optional.h"
@@ -29,7 +28,6 @@
namespace llvm {
namespace yaml {
-
/// This class should be specialized by any type that needs to be converted
/// to/from a YAML mapping. For example:
///
@@ -52,7 +50,6 @@ struct MappingTraits {
// static const bool flow = true;
};
-
/// This class should be specialized by any integral type that converts
/// to/from a YAML scalar where there is a one-to-one mapping between
/// in-memory values and a string in YAML. For example:
@@ -70,7 +67,6 @@ struct ScalarEnumerationTraits {
// static void enumeration(IO &io, T &value);
};
-
/// This class should be specialized by any integer type that is a union
/// of bit values and the YAML representation is a flow sequence of
/// strings. For example:
@@ -88,7 +84,6 @@ struct ScalarBitSetTraits {
// static void bitset(IO &io, T &value);
};
-
/// This class should be specialized by type that requires custom conversion
/// to/from a yaml scalar. For example:
///
@@ -149,7 +144,6 @@ struct BlockScalarTraits {
// static StringRef input(StringRef Scalar, void *ctxt, T &Value);
};
-
/// This class should be specialized by any type that needs to be converted
/// to/from a YAML sequence. For example:
///
@@ -175,7 +169,6 @@ struct SequenceTraits {
// static const bool flow = true;
};
-
/// This class should be specialized by any type that needs to be converted
/// to/from a list of YAML documents.
template<typename T>
@@ -185,7 +178,6 @@ struct DocumentListTraits {
// static T::value_type& element(IO &io, T &seq, size_t index);
};
-
// Only used by compiler if both template types are the same
template <typename T, T>
struct SameType;
@@ -194,8 +186,6 @@ struct SameType;
template <typename T>
struct MissingTrait;
-
-
// Test if ScalarEnumerationTraits<T> is defined on type T.
template <class T>
struct has_ScalarEnumerationTraits
@@ -213,7 +203,6 @@ public:
(sizeof(test<ScalarEnumerationTraits<T> >(nullptr)) == 1);
};
-
// Test if ScalarBitSetTraits<T> is defined on type T.
template <class T>
struct has_ScalarBitSetTraits
@@ -230,7 +219,6 @@ public:
static bool const value = (sizeof(test<ScalarBitSetTraits<T> >(nullptr)) == 1);
};
-
// Test if ScalarTraits<T> is defined on type T.
template <class T>
struct has_ScalarTraits
@@ -252,7 +240,6 @@ public:
(sizeof(test<ScalarTraits<T>>(nullptr, nullptr, nullptr)) == 1);
};
-
// Test if BlockScalarTraits<T> is defined on type T.
template <class T>
struct has_BlockScalarTraits
@@ -272,7 +259,6 @@ public:
(sizeof(test<BlockScalarTraits<T>>(nullptr, nullptr)) == 1);
};
-
// Test if MappingTraits<T> is defined on type T.
template <class T>
struct has_MappingTraits
@@ -305,8 +291,6 @@ public:
static bool const value = (sizeof(test<MappingTraits<T> >(nullptr)) == 1);
};
-
-
// Test if SequenceTraits<T> is defined on type T.
template <class T>
struct has_SequenceMethodTraits
@@ -323,7 +307,6 @@ public:
static bool const value = (sizeof(test<SequenceTraits<T> >(nullptr)) == 1);
};
-
// has_FlowTraits<int> will cause an error with some compilers because
// it subclasses int. Using this wrapper only instantiates the
// real has_FlowTraits only if the template type is a class.
@@ -353,14 +336,11 @@ public:
static bool const value = sizeof(f<Derived>(nullptr)) == 2;
};
-
-
// Test if SequenceTraits<T> is defined on type T
template<typename T>
struct has_SequenceTraits : public std::integral_constant<bool,
has_SequenceMethodTraits<T>::value > { };
-
// Test if DocumentListTraits<T> is defined on type T
template <class T>
struct has_DocumentListTraits
@@ -453,7 +433,6 @@ inline bool needsQuotes(StringRef S) {
return false;
}
-
template<typename T>
struct missingTraits : public std::integral_constant<bool,
!has_ScalarEnumerationTraits<T>::value
@@ -654,8 +633,6 @@ private:
void *Ctxt;
};
-
-
template<typename T>
typename std::enable_if<has_ScalarEnumerationTraits<T>::value,void>::type
yamlize(IO &io, T &Val, bool) {
@@ -676,7 +653,6 @@ yamlize(IO &io, T &Val, bool) {
}
}
-
template<typename T>
typename std::enable_if<has_ScalarTraits<T>::value,void>::type
yamlize(IO &io, T &Val, bool) {
@@ -791,7 +767,6 @@ yamlize(IO &io, T &Seq, bool) {
}
}
-
template<>
struct ScalarTraits<bool> {
static void output(const bool &, void*, llvm::raw_ostream &);
@@ -883,8 +858,6 @@ struct ScalarTraits<double> {
static bool mustQuote(StringRef) { return false; }
};
-
-
// Utility for use within MappingTraits<>::mapping() method
// to [de]normalize an object for use with YAML conversion.
template <typename TNorm, typename TFinal>
@@ -917,14 +890,12 @@ private:
TFinal &Result;
};
-
-
// Utility for use within MappingTraits<>::mapping() method
// to [de]normalize an object for use with YAML conversion.
template <typename TNorm, typename TFinal>
struct MappingNormalizationHeap {
MappingNormalizationHeap(IO &i_o, TFinal &Obj)
- : io(i_o), BufPtr(NULL), Result(Obj) {
+ : io(i_o), BufPtr(nullptr), Result(Obj) {
if ( io.outputting() ) {
BufPtr = new (&Buffer) TNorm(io, Obj);
}
@@ -953,8 +924,6 @@ private:
TFinal &Result;
};
-
-
///
/// The Input class is used to parse a yaml document into in-memory structs
/// and vectors.
@@ -1083,7 +1052,6 @@ private:
void setError(HNode *hnode, const Twine &message);
void setError(Node *node, const Twine &message);
-
public:
// These are only used by operator>>. They could be private
// if those templated things could be made friends.
@@ -1105,9 +1073,6 @@ private:
bool ScalarMatchFound;
};
-
-
-
///
/// The Output class is used to generate a yaml document from in-memory structs
/// and vectors.
@@ -1181,9 +1146,6 @@ private:
bool NeedsNewLine;
};
-
-
-
/// YAML I/O does conversion based on types. But often native data types
/// are just a typedef of built in intergral types (e.g. int). But the C++
/// type matching system sees through the typedef and all the typedefed types
@@ -1206,8 +1168,6 @@ private:
_base value; \
};
-
-
///
/// Use these types instead of uintXX_t in any mapping to have
/// its yaml output formatted as hexadecimal.
@@ -1217,7 +1177,6 @@ LLVM_YAML_STRONG_TYPEDEF(uint16_t, Hex16)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, Hex32)
LLVM_YAML_STRONG_TYPEDEF(uint64_t, Hex64)
-
template<>
struct ScalarTraits<Hex8> {
static void output(const Hex8 &, void*, llvm::raw_ostream &);
@@ -1246,7 +1205,6 @@ struct ScalarTraits<Hex64> {
static bool mustQuote(StringRef) { return false; }
};
-
// Define non-member operator>> so that Input can stream in a document list.
template <typename T>
inline
@@ -1303,7 +1261,6 @@ operator>>(Input &yin, T &docSeq) {
return yin;
}
-
// Define non-member operator<< so that Output can stream out document list.
template <typename T>
inline
@@ -1372,11 +1329,9 @@ operator<<(Output &yout, T &seq) {
return yout;
}
-
} // namespace yaml
} // namespace llvm
-
/// Utility for declaring that a std::vector of a particular type
/// should be considered a YAML sequence.
#define LLVM_YAML_IS_SEQUENCE_VECTOR(_type) \
@@ -1436,6 +1391,4 @@ operator<<(Output &yout, T &seq) {
} \
}
-
-
#endif // LLVM_SUPPORT_YAMLTRAITS_H
diff --git a/contrib/llvm/include/llvm/Support/circular_raw_ostream.h b/contrib/llvm/include/llvm/Support/circular_raw_ostream.h
index 19f9c2c..b46fd7f 100644
--- a/contrib/llvm/include/llvm/Support/circular_raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/circular_raw_ostream.h
@@ -17,8 +17,7 @@
#include "llvm/Support/raw_ostream.h"
-namespace llvm
-{
+namespace llvm {
/// circular_raw_ostream - A raw_ostream which *can* save its data
/// to a circular buffer, or can pass it through directly to an
/// underlying stream if specified with a buffer of zero.
@@ -154,5 +153,4 @@ namespace llvm
};
} // end llvm namespace
-
#endif
diff --git a/contrib/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm/include/llvm/Support/raw_ostream.h
index 28e512c..d1e96f8 100644
--- a/contrib/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/raw_ostream.h
@@ -218,14 +218,13 @@ public:
// Formatted output, see the leftJustify() function in Support/Format.h.
raw_ostream &operator<<(const FormattedString &);
-
+
// Formatted output, see the formatHex() function in Support/Format.h.
raw_ostream &operator<<(const FormattedNumber &);
-
+
/// indent - Insert 'NumSpaces' spaces.
raw_ostream &indent(unsigned NumSpaces);
-
/// Changes the foreground color of text that will be output from this point
/// forward.
/// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
@@ -246,7 +245,7 @@ public:
/// outputting colored text, or before program exit.
virtual raw_ostream &resetColor() { return *this; }
- /// Reverses the forground and background colors.
+ /// Reverses the foreground and background colors.
virtual raw_ostream &reverseColor() { return *this; }
/// This function determines if this stream is connected to a "tty" or
@@ -316,7 +315,7 @@ private:
};
/// An abstract base class for streams implementations that also support a
-/// pwrite operation. This is usefull for code that can mostly stream out data,
+/// pwrite operation. This is useful for code that can mostly stream out data,
/// but needs to patch in a header that needs to know the output size.
class raw_pwrite_stream : public raw_ostream {
virtual void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) = 0;
@@ -350,10 +349,6 @@ class raw_fd_ostream : public raw_pwrite_stream {
///
bool Error;
- /// Controls whether the stream should attempt to use atomic writes, when
- /// possible.
- bool UseAtomicWrites;
-
uint64_t pos;
bool SupportsSeeking;
@@ -403,16 +398,6 @@ public:
/// to the offset specified from the beginning of the file.
uint64_t seek(uint64_t off);
- /// Set the stream to attempt to use atomic writes for individual output
- /// routines where possible.
- ///
- /// Note that because raw_ostream's are typically buffered, this flag is only
- /// sensible when used on unbuffered streams which will flush their output
- /// immediately.
- void SetUseAtomicWrites(bool Value) {
- UseAtomicWrites = Value;
- }
-
raw_ostream &changeColor(enum Colors colors, bool bold=false,
bool bg=false) override;
raw_ostream &resetColor() override;
@@ -471,6 +456,7 @@ class raw_string_ostream : public raw_ostream {
/// Return the current position within the stream, not counting the bytes
/// currently in the buffer.
uint64_t current_pos() const override { return OS.size(); }
+
public:
explicit raw_string_ostream(std::string &O) : OS(O) {}
~raw_string_ostream() override;
@@ -485,6 +471,9 @@ public:
/// A raw_ostream that writes to an SmallVector or SmallString. This is a
/// simple adaptor class. This class does not encounter output errors.
+/// raw_svector_ostream operates without a buffer, delegating all memory
+/// management to the SmallString. Thus the SmallString is always up-to-date,
+/// may be used directly and there is no need to call flush().
class raw_svector_ostream : public raw_pwrite_stream {
SmallVectorImpl<char> &OS;
@@ -493,32 +482,23 @@ class raw_svector_ostream : public raw_pwrite_stream {
void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) override;
- /// Return the current position within the stream, not counting the bytes
- /// currently in the buffer.
+ /// Return the current position within the stream.
uint64_t current_pos() const override;
-protected:
- // Like the regular constructor, but doesn't call init.
- explicit raw_svector_ostream(SmallVectorImpl<char> &O, unsigned);
- void init();
-
public:
/// Construct a new raw_svector_ostream.
///
/// \param O The vector to write to; this should generally have at least 128
/// bytes free to avoid any extraneous memory overhead.
- explicit raw_svector_ostream(SmallVectorImpl<char> &O);
- ~raw_svector_ostream() override;
-
+ explicit raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+ SetUnbuffered();
+ }
+ ~raw_svector_ostream() override {}
- /// This is called when the SmallVector we're appending to is changed outside
- /// of the raw_svector_ostream's control. It is only safe to do this if the
- /// raw_svector_ostream has previously been flushed.
- void resync();
+ void flush() = delete;
- /// Flushes the stream contents to the target vector and return a StringRef
- /// for the vector contents.
- StringRef str();
+ /// Return a StringRef for the vector contents.
+ StringRef str() { return StringRef(OS.data(), OS.size()); }
};
/// A raw_ostream that discards all output.
@@ -541,12 +521,10 @@ class buffer_ostream : public raw_svector_ostream {
SmallVector<char, 0> Buffer;
public:
- buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer, 0), OS(OS) {
- init();
- }
- ~buffer_ostream() { OS << str(); }
+ buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer), OS(OS) {}
+ ~buffer_ostream() override { OS << str(); }
};
} // end llvm namespace
-#endif
+#endif // LLVM_SUPPORT_RAW_OSTREAM_H
diff --git a/contrib/llvm/include/llvm/Support/thread.h b/contrib/llvm/include/llvm/Support/thread.h
new file mode 100644
index 0000000..2d13041
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/thread.h
@@ -0,0 +1,66 @@
+//===-- llvm/Support/thread.h - Wrapper for <thread> ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header is a wrapper for <thread> that works around problems with the
+// MSVC headers when exceptions are disabled. It also provides llvm::thread,
+// which is either a typedef of std::thread or a replacement that calls the
+// function synchronously depending on the value of LLVM_ENABLE_THREADS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_THREAD_H
+#define LLVM_SUPPORT_THREAD_H
+
+#include "llvm/Config/llvm-config.h"
+
+#if LLVM_ENABLE_THREADS
+
+#ifdef _MSC_VER
+// concrt.h depends on eh.h for __uncaught_exception declaration
+// even if we disable exceptions.
+#include <eh.h>
+
+// Suppress 'C++ exception handler used, but unwind semantics are not enabled.'
+#pragma warning(push)
+#pragma warning(disable:4530)
+#endif
+
+#include <thread>
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+namespace llvm {
+typedef std::thread thread;
+}
+
+#else // !LLVM_ENABLE_THREADS
+
+#include <utility>
+
+namespace llvm {
+
+struct thread {
+ thread() {}
+ thread(thread &&other) {}
+ template <class Function, class... Args>
+ explicit thread(Function &&f, Args &&... args) {
+ f(std::forward<Args>(args)...);
+ }
+ thread(const thread &) = delete;
+
+ void join() {}
+};
+
+}
+
+#endif // LLVM_ENABLE_THREADS
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/type_traits.h b/contrib/llvm/include/llvm/Support/type_traits.h
index 45465ae..88385c3 100644
--- a/contrib/llvm/include/llvm/Support/type_traits.h
+++ b/contrib/llvm/include/llvm/Support/type_traits.h
@@ -93,6 +93,15 @@ struct add_const_past_pointer<
}
+// If the compiler supports detecting whether a class is final, define
+// an LLVM_IS_FINAL macro. If it cannot be defined properly, this
+// macro will be left undefined.
+#if __cplusplus >= 201402L
+#define LLVM_IS_FINAL(Ty) std::is_final<Ty>()
+#elif __has_feature(is_final) || LLVM_GNUC_PREREQ(4, 7, 0)
+#define LLVM_IS_FINAL(Ty) __is_final(Ty)
+#endif
+
#ifdef LLVM_DEFINED_HAS_FEATURE
#undef __has_feature
#endif
diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h
index b4642c9..eb1c5c7 100644
--- a/contrib/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm/include/llvm/TableGen/Record.h
@@ -366,7 +366,7 @@ class TypedInit : public Init {
protected:
explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
- ~TypedInit() {
+ ~TypedInit() override {
// If this is a DefInit we need to delete the RecordRecTy.
if (getKind() == IK_DefInit)
delete Ty;
@@ -547,7 +547,7 @@ public:
class StringInit : public TypedInit {
std::string Value;
- explicit StringInit(const std::string &V)
+ explicit StringInit(StringRef V)
: TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {}
StringInit(const StringInit &Other) = delete;
@@ -836,8 +836,6 @@ public:
class VarInit : public TypedInit {
Init *VarName;
- explicit VarInit(const std::string &VN, RecTy *T)
- : TypedInit(IK_VarInit, T), VarName(StringInit::get(VN)) {}
explicit VarInit(Init *VN, RecTy *T)
: TypedInit(IK_VarInit, T), VarName(VN) {}
@@ -1589,6 +1587,6 @@ Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
const std::string &Name, const std::string &Scoper);
-} // End llvm namespace
+} // end llvm namespace
-#endif
+#endif // LLVM_TABLEGEN_RECORD_H
diff --git a/contrib/llvm/include/llvm/Target/CostTable.h b/contrib/llvm/include/llvm/Target/CostTable.h
index 34f6041..2499f5c 100644
--- a/contrib/llvm/include/llvm/Target/CostTable.h
+++ b/contrib/llvm/include/llvm/Target/CostTable.h
@@ -15,64 +15,54 @@
#ifndef LLVM_TARGET_COSTTABLE_H_
#define LLVM_TARGET_COSTTABLE_H_
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineValueType.h"
+
namespace llvm {
/// Cost Table Entry
-template <class TypeTy>
struct CostTblEntry {
int ISD;
- TypeTy Type;
+ MVT::SimpleValueType Type;
unsigned Cost;
};
/// Find in cost table, TypeTy must be comparable to CompareTy by ==
-template <class TypeTy, class CompareTy>
-int CostTableLookup(const CostTblEntry<TypeTy> *Tbl, unsigned len, int ISD,
- CompareTy Ty) {
- for (unsigned int i = 0; i < len; ++i)
- if (ISD == Tbl[i].ISD && Ty == Tbl[i].Type)
- return i;
+inline const CostTblEntry *CostTableLookup(ArrayRef<CostTblEntry> Tbl,
+ int ISD, MVT Ty) {
+ auto I = std::find_if(Tbl.begin(), Tbl.end(),
+ [=](const CostTblEntry &Entry) {
+ return ISD == Entry.ISD && Ty == Entry.Type; });
+ if (I != Tbl.end())
+ return I;
// Could not find an entry.
- return -1;
-}
-
-/// Find in cost table, TypeTy must be comparable to CompareTy by ==
-template <class TypeTy, class CompareTy, unsigned N>
-int CostTableLookup(const CostTblEntry<TypeTy>(&Tbl)[N], int ISD,
- CompareTy Ty) {
- return CostTableLookup(Tbl, N, ISD, Ty);
+ return nullptr;
}
/// Type Conversion Cost Table
-template <class TypeTy>
struct TypeConversionCostTblEntry {
int ISD;
- TypeTy Dst;
- TypeTy Src;
+ MVT::SimpleValueType Dst;
+ MVT::SimpleValueType Src;
unsigned Cost;
};
/// Find in type conversion cost table, TypeTy must be comparable to CompareTy
/// by ==
-template <class TypeTy, class CompareTy>
-int ConvertCostTableLookup(const TypeConversionCostTblEntry<TypeTy> *Tbl,
- unsigned len, int ISD, CompareTy Dst,
- CompareTy Src) {
- for (unsigned int i = 0; i < len; ++i)
- if (ISD == Tbl[i].ISD && Src == Tbl[i].Src && Dst == Tbl[i].Dst)
- return i;
+inline const TypeConversionCostTblEntry *
+ConvertCostTableLookup(ArrayRef<TypeConversionCostTblEntry> Tbl,
+ int ISD, MVT Dst, MVT Src) {
+ auto I = std::find_if(Tbl.begin(), Tbl.end(),
+ [=](const TypeConversionCostTblEntry &Entry) {
+ return ISD == Entry.ISD && Src == Entry.Src &&
+ Dst == Entry.Dst;
+ });
+ if (I != Tbl.end())
+ return I;
// Could not find an entry.
- return -1;
-}
-
-/// Find in type conversion cost table, TypeTy must be comparable to CompareTy
-/// by ==
-template <class TypeTy, class CompareTy, unsigned N>
-int ConvertCostTableLookup(const TypeConversionCostTblEntry<TypeTy>(&Tbl)[N],
- int ISD, CompareTy Dst, CompareTy Src) {
- return ConvertCostTableLookup(Tbl, N, ISD, Dst, Src);
+ return nullptr;
}
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td
index e0aea18..79046b2 100644
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@@ -441,6 +441,30 @@ class Instruction {
string PostEncoderMethod = "";
string DecoderMethod = "";
+ // Is the instruction decoder method able to completely determine if the
+ // given instruction is valid or not. If the TableGen definition of the
+ // instruction specifies bitpattern A??B where A and B are static bits, the
+ // hasCompleteDecoder flag says whether the decoder method fully handles the
+ // ?? space, i.e. if it is a final arbiter for the instruction validity.
+ // If not then the decoder attempts to continue decoding when the decoder
+ // method fails.
+ //
+ // This allows to handle situations where the encoding is not fully
+ // orthogonal. Example:
+ // * InstA with bitpattern 0b0000????,
+ // * InstB with bitpattern 0b000000?? but the associated decoder method
+ // DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
+ //
+ // The decoder tries to decode a bitpattern that matches both InstA and
+ // InstB bitpatterns first as InstB (because it is the most specific
+ // encoding). In the default case (hasCompleteDecoder = 1), when
+ // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
+ // hasCompleteDecoder = 0 in InstB, the decoder is informed that
+ // DecodeInstB() is not able to determine if all possible values of ?? are
+ // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
+ // decode the bitpattern as InstA too.
+ bit hasCompleteDecoder = 1;
+
/// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
bits<64> TSFlags = 0;
@@ -595,6 +619,8 @@ class Operand<ValueType ty> : DAGOperand {
string PrintMethod = "printOperand";
string EncoderMethod = "";
string DecoderMethod = "";
+ bit hasCompleteDecoder = 1;
+ string OperandNamespace = "MCOI";
string OperandType = "OPERAND_UNKNOWN";
dag MIOperandInfo = (ops);
@@ -910,9 +936,6 @@ class AsmParser {
// ShouldEmitMatchRegisterName - Set to false if the target needs a hand
// written register name matcher
bit ShouldEmitMatchRegisterName = 1;
-
- /// Does the instruction mnemonic allow '.'
- bit MnemonicContainsDot = 0;
}
def DefaultAsmParser : AsmParser;
@@ -940,6 +963,15 @@ class AsmParserVariant {
// register tokens as constrained registers, instead of tokens, for the
// purposes of matching.
string RegisterPrefix = "";
+
+ // TokenizingCharacters - Characters that are standalone tokens
+ string TokenizingCharacters = "[]*!";
+
+ // SeparatorCharacters - Characters that are not tokens
+ string SeparatorCharacters = " \t,";
+
+ // BreakCharacters - Characters that start new identifiers
+ string BreakCharacters = "";
}
def DefaultAsmParserVariant : AsmParserVariant;
diff --git a/contrib/llvm/include/llvm/Target/TargetCallingConv.h b/contrib/llvm/include/llvm/Target/TargetCallingConv.h
index 9d4e7a0..0c6c1f1 100644
--- a/contrib/llvm/include/llvm/Target/TargetCallingConv.h
+++ b/contrib/llvm/include/llvm/Target/TargetCallingConv.h
@@ -46,6 +46,8 @@ namespace ISD {
static const uint64_t SplitOffs = 11;
static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca
static const uint64_t InAllocaOffs = 12;
+ static const uint64_t SplitEnd = 1ULL<<13; ///< Last part of a split
+ static const uint64_t SplitEndOffs = 13;
static const uint64_t OrigAlign = 0x1FULL<<27;
static const uint64_t OrigAlignOffs = 27;
static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size
@@ -103,6 +105,9 @@ namespace ISD {
bool isSplit() const { return Flags & Split; }
void setSplit() { Flags |= One << SplitOffs; }
+ bool isSplitEnd() const { return Flags & SplitEnd; }
+ void setSplitEnd() { Flags |= One << SplitEndOffs; }
+
unsigned getOrigAlign() const {
return (unsigned)
((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2);
diff --git a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
index 3af2227..cadd07d 100644
--- a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
@@ -70,6 +70,18 @@ public:
///
unsigned getStackAlignment() const { return StackAlignment; }
+ /// alignSPAdjust - This method aligns the stack adjustment to the correct
+ /// alignment.
+ ///
+ int alignSPAdjust(int SPAdj) const {
+ if (SPAdj < 0) {
+ SPAdj = -RoundUpToAlignment(-SPAdj, StackAlignment);
+ } else {
+ SPAdj = RoundUpToAlignment(SPAdj, StackAlignment);
+ }
+ return SPAdj;
+ }
+
/// getTransientStackAlignment - This method returns the number of bytes to
/// which the stack pointer must be aligned at all times, even between
/// calls.
@@ -84,6 +96,11 @@ public:
return StackRealignable;
}
+ /// Return the skew that has to be applied to stack alignment under
+ /// certain conditions (e.g. stack was adjusted before function \p MF
+ /// was called).
+ virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
+
/// getOffsetOfLocalArea - This method returns the offset of the local area
/// from the stack pointer on entrance to a function.
///
@@ -129,6 +146,11 @@ public:
return false;
}
+ /// Returns true if the target will correctly handle shrink wrapping.
+ virtual bool enableShrinkWrapping(const MachineFunction &MF) const {
+ return false;
+ }
+
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
virtual void emitPrologue(MachineFunction &MF,
@@ -136,6 +158,10 @@ public:
virtual void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const = 0;
+ /// Replace a StackProbe stub (if any) with the actual probe code inline
+ virtual void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologueMBB) const {}
+
/// Adjust the prologue to have the function use segmented stacks. This works
/// by adding a check even before the "normal" function prologue.
virtual void adjustForSegmentedStacks(MachineFunction &MF,
@@ -207,10 +233,6 @@ public:
// has any stack objects. However, targets may want to override this.
virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
- /// getFrameIndexOffset - Returns the displacement from the frame register to
- /// the stack frame of the specified index.
- virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
/// getFrameIndexReference - This method should return the base register
/// and offset used to reference a frame index location. The offset is
/// returned directly, and the base register is returned via FrameReg.
@@ -218,10 +240,11 @@ public:
unsigned &FrameReg) const;
/// Same as above, except that the 'base register' will always be RSP, not
- /// RBP on x86. This is used exclusively for lowering STATEPOINT nodes.
+ /// RBP on x86. This is generally used for emitting statepoint or EH tables
+ /// that use offsets from RSP.
/// TODO: This should really be a parameterizable choice.
virtual int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
+ unsigned &FrameReg) const {
// default to calling normal version, we override this on x86 only
llvm_unreachable("unimplemented for non-x86");
return 0;
@@ -246,6 +269,10 @@ public:
RegScavenger *RS = nullptr) const {
}
+ virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
+ report_fatal_error("WinEH not implemented for this target");
+ }
+
/// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
/// code insertion to eliminate call frame setup and destroy pseudo
/// instructions (but only if the Target is using them). It is responsible
diff --git a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
index 8b314f4..0cebcf1 100644
--- a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
@@ -38,7 +39,6 @@ class SelectionDAG;
class ScheduleDAG;
class TargetRegisterClass;
class TargetRegisterInfo;
-class BranchProbability;
class TargetSubtargetInfo;
class TargetSchedModel;
class DFAPacketizer;
@@ -54,13 +54,18 @@ class TargetInstrInfo : public MCInstrInfo {
TargetInstrInfo(const TargetInstrInfo &) = delete;
void operator=(const TargetInstrInfo &) = delete;
public:
- TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u)
- : CallFrameSetupOpcode(CFSetupOpcode),
- CallFrameDestroyOpcode(CFDestroyOpcode) {
- }
+ TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u,
+ unsigned CatchRetOpcode = ~0u)
+ : CallFrameSetupOpcode(CFSetupOpcode),
+ CallFrameDestroyOpcode(CFDestroyOpcode),
+ CatchRetOpcode(CatchRetOpcode) {}
virtual ~TargetInstrInfo();
+ static bool isGenericOpcode(unsigned Opc) {
+ return Opc <= TargetOpcode::GENERIC_OP_END;
+ }
+
/// Given a machine instruction descriptor, returns the register
/// class constraint for OpNum, or NULL.
const TargetRegisterClass *getRegClass(const MCInstrDesc &TID,
@@ -94,6 +99,41 @@ protected:
return false;
}
+ /// This method commutes the operands of the given machine instruction MI.
+ /// The operands to be commuted are specified by their indices OpIdx1 and
+ /// OpIdx2.
+ ///
+ /// If a target has any instructions that are commutable but require
+ /// converting to different instructions or making non-trivial changes
+ /// to commute them, this method can be overloaded to do that.
+ /// The default implementation simply swaps the commutable operands.
+ ///
+ /// If NewMI is false, MI is modified in place and returned; otherwise, a
+ /// new machine instruction is created and returned.
+ ///
+ /// Do not call this method for a non-commutable instruction.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ virtual MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const;
+
+ /// Assigns the (CommutableOpIdx1, CommutableOpIdx2) pair of commutable
+ /// operand indices to (ResultIdx1, ResultIdx2).
+ /// One or both input values of the pair: (ResultIdx1, ResultIdx2) may be
+ /// predefined to some indices or be undefined (designated by the special
+ /// value 'CommuteAnyOperandIndex').
+ /// The predefined result indices cannot be re-defined.
+ /// The function returns true iff after the result pair redefinition
+ /// the fixed result pair is equal to or equivalent to the source pair of
+ /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that
+ /// the pairs (x,y) and (y,x) are equivalent.
+ static bool fixCommutedOpIndices(unsigned &ResultIdx1,
+ unsigned &ResultIdx2,
+ unsigned CommutableOpIdx1,
+ unsigned CommutableOpIdx2);
+
private:
/// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
/// set and the target hook isReallyTriviallyReMaterializable returns false,
@@ -111,6 +151,8 @@ public:
unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
+ unsigned getCatchReturnOpcode() const { return CatchRetOpcode; }
+
/// Returns the actual stack pointer adjustment made by an instruction
/// as part of a call sequence. By default, only call frame setup/destroy
/// instructions adjust the stack, but targets may want to override this
@@ -250,20 +292,51 @@ public:
return nullptr;
}
- /// If a target has any instructions that are commutable but require
- /// converting to different instructions or making non-trivial changes to
- /// commute them, this method can overloaded to do that.
- /// The default implementation simply swaps the commutable operands.
+ // This constant can be used as an input value of operand index passed to
+ // the method findCommutedOpIndices() to tell the method that the
+ // corresponding operand index is not pre-defined and that the method
+ // can pick any commutable operand.
+ static const unsigned CommuteAnyOperandIndex = ~0U;
+
+ /// This method commutes the operands of the given machine instruction MI.
+ ///
+ /// The operands to be commuted are specified by their indices OpIdx1 and
+ /// OpIdx2. OpIdx1 and OpIdx2 arguments may be set to a special value
+ /// 'CommuteAnyOperandIndex', which means that the method is free to choose
+ /// any arbitrarily chosen commutable operand. If both arguments are set to
+ /// 'CommuteAnyOperandIndex' then the method looks for 2 different commutable
+ /// operands; then commutes them if such operands could be found.
+ ///
/// If NewMI is false, MI is modified in place and returned; otherwise, a
- /// new machine instruction is created and returned. Do not call this
- /// method for a non-commutable instruction, but there may be some cases
- /// where this method fails and returns null.
- virtual MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI = false) const;
-
- /// If specified MI is commutable, return the two operand indices that would
- /// swap value. Return false if the instruction
- /// is not in a form which this routine understands.
+ /// new machine instruction is created and returned.
+ ///
+ /// Do not call this method for a non-commutable instruction or
+ /// for non-commuable operands.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *
+ commuteInstruction(MachineInstr *MI,
+ bool NewMI = false,
+ unsigned OpIdx1 = CommuteAnyOperandIndex,
+ unsigned OpIdx2 = CommuteAnyOperandIndex) const;
+
+ /// Returns true iff the routine could find two commutable operands in the
+ /// given machine instruction.
+ /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments.
+ /// If any of the INPUT values is set to the special value
+ /// 'CommuteAnyOperandIndex' then the method arbitrarily picks a commutable
+ /// operand, then returns its index in the corresponding argument.
+ /// If both of INPUT values are set to 'CommuteAnyOperandIndex' then method
+ /// looks for 2 commutable operands.
+ /// If INPUT values refer to some operands of MI, then the method simply
+ /// returns true if the corresponding operands are commutable and returns
+ /// false otherwise.
+ ///
+ /// For example, calling this method this way:
+ /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+ /// findCommutedOpIndices(MI, Op1, Op2);
+ /// can be interpreted as a query asking to find an operand that would be
+ /// commutable with the operand#1.
virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const;
@@ -511,7 +584,7 @@ public:
virtual
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return false;
}
@@ -526,7 +599,7 @@ public:
unsigned NumTCycles, unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return false;
}
@@ -538,7 +611,7 @@ public:
/// will be properly predicted.
virtual bool
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return false;
}
@@ -724,13 +797,30 @@ public:
/// order since the pattern evaluator stops checking as soon as it finds a
/// faster sequence.
/// \param Root - Instruction that could be combined with one of its operands
- /// \param Pattern - Vector of possible combination patterns
+ /// \param Patterns - Vector of possible combination patterns
virtual bool getMachineCombinerPatterns(
MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const;
+
+ /// Return true if the input \P Inst is part of a chain of dependent ops
+ /// that are suitable for reassociation, otherwise return false.
+ /// If the instruction's operands must be commuted to have a previous
+ /// instruction of the same type define the first source operand, \P Commuted
+ /// will be set to true.
+ bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const;
+
+ /// Return true when \P Inst is both associative and commutative.
+ virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const {
return false;
}
+ /// Return true when \P Inst has reassociable operands in the same \P MBB.
+ virtual bool hasReassociableOperands(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) const;
+
+ /// Return true when \P Inst has reassociable sibling.
+ bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const;
+
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence. The client
/// has to decide whether the actual replacement is beneficial or not.
@@ -742,12 +832,26 @@ public:
/// \param InstrIdxForVirtReg - map of virtual register to instruction in
/// InsInstr that defines it
virtual void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+
+ /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to
+ /// reduce critical path length.
+ void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+
+ /// This is an architecture-specific helper function of reassociateOps.
+ /// Set special operand attributes for new instructions after reassociation.
+ virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const {
return;
- }
+ };
/// Return true when a target supports MachineCombiner.
virtual bool useMachineCombiner() const { return false; }
@@ -819,10 +923,6 @@ protected:
}
public:
- /// Returns true for the specified load / store if folding is possible.
- virtual bool canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const;
-
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@@ -1266,8 +1366,73 @@ public:
return 5;
}
+ /// Return an array that contains the ids of the target indices (used for the
+ /// TargetIndex machine operand) and their names.
+ ///
+ /// MIR Serialization is able to serialize only the target indices that are
+ /// defined by this method.
+ virtual ArrayRef<std::pair<int, const char *>>
+ getSerializableTargetIndices() const {
+ return None;
+ }
+
+ /// Decompose the machine operand's target flags into two values - the direct
+ /// target flag value and any of bit flags that are applied.
+ virtual std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned /*TF*/) const {
+ return std::make_pair(0u, 0u);
+ }
+
+ /// Return an array that contains the direct target flag values and their
+ /// names.
+ ///
+ /// MIR Serialization is able to serialize only the target flags that are
+ /// defined by this method.
+ virtual ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const {
+ return None;
+ }
+
+ /// Return an array that contains the bitmask target flag values and their
+ /// names.
+ ///
+ /// MIR Serialization is able to serialize only the target flags that are
+ /// defined by this method.
+ virtual ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const {
+ return None;
+ }
+
private:
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
+ unsigned CatchRetOpcode;
+};
+
+/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
+template<>
+struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
+ typedef DenseMapInfo<unsigned> RegInfo;
+
+ static inline TargetInstrInfo::RegSubRegPair getEmptyKey() {
+ return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(),
+ RegInfo::getEmptyKey());
+ }
+ static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() {
+ return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(),
+ RegInfo::getTombstoneKey());
+ }
+ /// \brief Reuse getHashValue implementation from
+ /// std::pair<unsigned, unsigned>.
+ static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
+ std::pair<unsigned, unsigned> PairVal =
+ std::make_pair(Val.Reg, Val.SubReg);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS,
+ const TargetInstrInfo::RegSubRegPair &RHS) {
+ return RegInfo::isEqual(LHS.Reg, RHS.Reg) &&
+ RegInfo::isEqual(LHS.SubReg, RHS.SubReg);
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Target/TargetItinerary.td b/contrib/llvm/include/llvm/Target/TargetItinerary.td
index cc74006..a37bbf2 100644
--- a/contrib/llvm/include/llvm/Target/TargetItinerary.td
+++ b/contrib/llvm/include/llvm/Target/TargetItinerary.td
@@ -134,3 +134,19 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
// info. Subtargets using NoItineraries can bypass the scheduler's
// expensive HazardRecognizer because no reservation table is needed.
def NoItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// Combo Function Unit data - This is a map of combo function unit names to
+// the list of functional units that are included in the combination.
+//
+class ComboFuncData<FuncUnit ComboFunc, list<FuncUnit> funclist> {
+ FuncUnit TheComboFunc = ComboFunc;
+ list<FuncUnit> FuncList = funclist;
+}
+
+//===----------------------------------------------------------------------===//
+// Combo Function Units - This is a list of all combo function unit data.
+class ComboFuncUnits<list<ComboFuncData> cfd> {
+ list<ComboFuncData> CFD = cfd;
+}
+
diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h
index 4412d9b..140c3659 100644
--- a/contrib/llvm/include/llvm/Target/TargetLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetLowering.h
@@ -83,20 +83,22 @@ class TargetLoweringBase {
public:
/// This enum indicates whether operations are valid for a target, and if not,
/// what action should be used to make them valid.
- enum LegalizeAction {
+ enum LegalizeAction : uint8_t {
Legal, // The target natively supports this operation.
Promote, // This operation should be executed in a larger type.
Expand, // Try to expand this to other ops, otherwise use a libcall.
+ LibCall, // Don't try to expand this to other ops, always use a libcall.
Custom // Use the LowerOperation hook to implement custom lowering.
};
/// This enum indicates whether a types are legal for a target, and if not,
/// what action should be used to make them valid.
- enum LegalizeTypeAction {
+ enum LegalizeTypeAction : uint8_t {
TypeLegal, // The target natively supports this type.
TypePromoteInteger, // Replace this integer with a larger one.
TypeExpandInteger, // Split this integer into two of half the size.
- TypeSoftenFloat, // Convert this float to a same size integer type.
+ TypeSoftenFloat, // Convert this float to a same size integer type,
+ // if an operation is not supported in target HW.
TypeExpandFloat, // Split this float into two of half the size.
TypeScalarizeVector, // Replace this one-element vector with its element.
TypeSplitVector, // Split this vector into two of half the size.
@@ -124,16 +126,17 @@ public:
// mask (ex: x86 blends).
};
- /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. Exists
- /// because different targets have different levels of support for these
- /// atomic RMW instructions, and also have different options w.r.t. what they
- /// should expand to.
- enum class AtomicRMWExpansionKind {
- None, // Don't expand the instruction.
- LLSC, // Expand the instruction into loadlinked/storeconditional; used
- // by ARM/AArch64. Implies `hasLoadLinkedStoreConditional`
- // returns true.
- CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
+ /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
+ /// to, if at all. Exists because different targets have different levels of
+ /// support for these atomic instructions, and also have different options
+ /// w.r.t. what they should expand to.
+ enum class AtomicExpansionKind {
+ None, // Don't expand the instruction.
+ LLSC, // Expand the instruction into loadlinked/storeconditional; used
+ // by ARM/AArch64.
+ LLOnly, // Expand the (load) instruction into just a load-linked, which has
+ // greater atomic guarantees than a normal load.
+ CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
};
static ISD::NodeType getExtendForContent(BooleanContent Content) {
@@ -226,7 +229,11 @@ public:
/// Return true if integer divide is usually cheaper than a sequence of
/// several shifts, adds, and multiplies for this target.
- bool isIntDivCheap() const { return IntDivIsCheap; }
+ /// The definition of "cheaper" may depend on whether we're optimizing
+ /// for speed or for size.
+ virtual bool isIntDivCheap(EVT VT, AttributeSet Attr) const {
+ return false;
+ }
/// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
bool isFsqrtCheap() const {
@@ -242,9 +249,6 @@ public:
return BypassSlowDivWidths;
}
- /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra.
- bool isPow2SDivCheap() const { return Pow2SDivIsCheap; }
-
/// Return true if Flow Control is an expensive operation that should be
/// avoided.
bool isJumpExpensive() const { return JumpIsExpensive; }
@@ -409,20 +413,20 @@ public:
class ValueTypeActionImpl {
/// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
/// that indicates how instruction selection should deal with the type.
- uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
+ LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
public:
ValueTypeActionImpl() {
- std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 0);
+ std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
+ TypeLegal);
}
LegalizeTypeAction getTypeAction(MVT VT) const {
- return (LegalizeTypeAction)ValueTypeActions[VT.SimpleTy];
+ return ValueTypeActions[VT.SimpleTy];
}
void setTypeAction(MVT VT, LegalizeTypeAction Action) {
- unsigned I = VT.SimpleTy;
- ValueTypeActions[I] = Action;
+ ValueTypeActions[VT.SimpleTy] = Action;
}
};
@@ -546,8 +550,7 @@ public:
// If a target-specific SDNode requires legalization, require the target
// to provide custom legalization for it.
if (Op > array_lengthof(OpActions[0])) return Custom;
- unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
- return (LegalizeAction)OpActions[I][Op];
+ return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
}
/// Return true if the specified operation is legal on this target or can be
@@ -591,7 +594,7 @@ public:
unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&
MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!");
- return (LegalizeAction)LoadExtActions[ValI][MemI][ExtType];
+ return LoadExtActions[ValI][MemI][ExtType];
}
/// Return true if the specified load with extension is legal on this target.
@@ -617,7 +620,7 @@ public:
unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!");
- return (LegalizeAction)TruncStoreActions[ValI][MemI];
+ return TruncStoreActions[ValI][MemI];
}
/// Return true if the specified store with truncation is legal on this
@@ -672,9 +675,9 @@ public:
((unsigned)VT.SimpleTy >> 4) < array_lengthof(CondCodeActions[0]) &&
"Table isn't big enough!");
// See setCondCodeAction for how this is encoded.
- uint32_t Shift = 2 * (VT.SimpleTy & 0xF);
- uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 4];
- LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0x3);
+ uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+ uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
+ LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
assert(Action != Promote && "Can't promote condition code!");
return Action;
}
@@ -832,6 +835,10 @@ public:
return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
}
+ unsigned getGatherAllAliasesMaxDepth() const {
+ return GatherAllAliasesMaxDepth;
+ }
+
/// \brief Get maximum # of store operations permitted for llvm.memset
///
/// This function returns the maximum number of store operations permitted
@@ -878,6 +885,14 @@ public:
return false;
}
+ /// Return true if the target supports a memory access of this type for the
+ /// given address space and alignment. If the access is allowed, the optional
+ /// final parameter returns if the access is also fast (as defined by the
+ /// target).
+ bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+ unsigned AddrSpace = 0, unsigned Alignment = 1,
+ bool *Fast = nullptr) const;
+
/// Returns the target specific optimal type for load and store operations as
/// a result of memset, memcpy, and memmove lowering.
///
@@ -930,15 +945,19 @@ public:
}
/// If a physical register, this returns the register that receives the
- /// exception address on entry to a landing pad.
- unsigned getExceptionPointerRegister() const {
- return ExceptionPointerRegister;
+ /// exception address on entry to an EH pad.
+ virtual unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const {
+ // 0 is guaranteed to be the NoRegister value on all targets
+ return 0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned getExceptionSelectorRegister() const {
- return ExceptionSelectorRegister;
+ virtual unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const {
+ // 0 is guaranteed to be the NoRegister value on all targets
+ return 0;
}
/// Returns the target's jmp_buf size in bytes (if never set, the default is
@@ -987,6 +1006,10 @@ public:
return false;
}
+ /// If the target has a standard location for the unsafe stack pointer,
+ /// returns the address of that location. Otherwise, returns nullptr.
+ virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
+
/// Returns true if a cast between SrcAS and DestAS is a noop.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
return false;
@@ -1009,8 +1032,8 @@ public:
int InstructionOpcodeToISD(unsigned Opcode) const;
/// Estimate the cost of type-legalization and the legalized type.
- std::pair<unsigned, MVT> getTypeLegalizationCost(const DataLayout &DL,
- Type *Ty) const;
+ std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
+ Type *Ty) const;
/// @}
@@ -1018,10 +1041,6 @@ public:
/// \name Helpers for atomic expansion.
/// @{
- /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional
- /// and expand AtomicCmpXchgInst.
- virtual bool hasLoadLinkedStoreConditional() const { return false; }
-
/// Perform a load-linked operation on Addr, returning a "Value *" with the
/// corresponding pointee type. This may entail some non-trivial operations to
/// truncate or reconstruct types that will be illegal in the backend. See
@@ -1093,6 +1112,14 @@ public:
}
/// @}
+ // Emits code that executes when the comparison result in the ll/sc
+ // expansion of a cmpxchg instruction is such that the store-conditional will
+ // not execute. This makes it possible to balance out the load-linked with
+ // a dedicated instruction, if desired.
+ // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
+ // be unnecessarily held, except if clrex, inserted by this hook, is executed.
+ virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
+
/// Returns true if the given (atomic) store should be expanded by the
/// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
@@ -1102,18 +1129,25 @@ public:
/// Returns true if arguments should be sign-extended in lib calls.
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
return IsSigned;
- }
+ }
+
+ /// Returns how the given (atomic) load should be expanded by the
+ /// IR-level AtomicExpand pass.
+ virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ return AtomicExpansionKind::None;
+ }
- /// Returns true if the given (atomic) load should be expanded by the
- /// IR-level AtomicExpand pass into a load-linked instruction
- /// (through emitLoadLinked()).
- virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; }
+ /// Returns true if the given atomic cmpxchg should be expanded by the
+ /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
+ /// (through emitLoadLinked() and emitStoreConditional()).
+ virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
+ return false;
+ }
/// Returns how the IR-level AtomicExpand pass should expand the given
/// AtomicRMW, if at all. Default is to never expand.
- virtual AtomicRMWExpansionKind
- shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
- return AtomicRMWExpansionKind::None;
+ virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
+ return AtomicExpansionKind::None;
}
/// On some platforms, an AtomicRMW that never actually modifies the value
@@ -1204,18 +1238,6 @@ protected:
StackPointerRegisterToSaveRestore = R;
}
- /// If set to a physical register, this sets the register that receives the
- /// exception address on entry to a landing pad.
- void setExceptionPointerRegister(unsigned R) {
- ExceptionPointerRegister = R;
- }
-
- /// If set to a physical register, this sets the register that receives the
- /// exception typeid on entry to a landing pad.
- void setExceptionSelectorRegister(unsigned R) {
- ExceptionSelectorRegister = R;
- }
-
/// Tells the code generator not to expand operations into sequences that use
/// the select operations if possible.
void setSelectIsExpensive(bool isExpensive = true) {
@@ -1244,11 +1266,6 @@ protected:
/// control.
void setJumpIsExpensive(bool isExpensive = true);
- /// Tells the code generator that integer divide is expensive, and if
- /// possible, should be replaced by an alternate sequence of instructions not
- /// containing an integer divide.
- void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
-
/// Tells the code generator that fsqrt is cheap, and should not be replaced
/// with an alternative sequence of instructions.
void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; }
@@ -1264,10 +1281,6 @@ protected:
BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
}
- /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a
- /// signed divide by power of two; let the target handle it.
- void setPow2SDivIsCheap(bool isCheap = true) { Pow2SDivIsCheap = isCheap; }
-
/// Add the specified register class as an available regclass for the
/// specified value type. This indicates the selector can handle values of
/// that class natively.
@@ -1279,7 +1292,7 @@ protected:
/// Remove all register classes.
void clearRegisterClasses() {
- memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*));
+ std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
AvailableRegClasses.clear();
}
@@ -1302,7 +1315,7 @@ protected:
void setOperationAction(unsigned Op, MVT VT,
LegalizeAction Action) {
assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
- OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action;
+ OpActions[(unsigned)VT.SimpleTy][Op] = Action;
}
/// Indicate that the specified load with extension does not work with the
@@ -1311,7 +1324,7 @@ protected:
LegalizeAction Action) {
assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
MemVT.isValid() && "Table isn't big enough!");
- LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = (uint8_t)Action;
+ LoadExtActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = Action;
}
/// Indicate that the specified truncating store does not work with the
@@ -1319,7 +1332,7 @@ protected:
void setTruncStoreAction(MVT ValVT, MVT MemVT,
LegalizeAction Action) {
assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
- TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action;
+ TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
}
/// Indicate that the specified indexed load does or does not work with the
@@ -1356,12 +1369,13 @@ protected:
LegalizeAction Action) {
assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
"Table isn't big enough!");
- /// The lower 5 bits of the SimpleTy index into Nth 2bit set from the 32-bit
- /// value and the upper 27 bits index into the second dimension of the array
+ assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
+ /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
+ /// value and the upper 29 bits index into the second dimension of the array
/// to select what 32-bit value to use.
- uint32_t Shift = 2 * (VT.SimpleTy & 0xF);
- CondCodeActions[CC][VT.SimpleTy >> 4] &= ~((uint32_t)0x3 << Shift);
- CondCodeActions[CC][VT.SimpleTy >> 4] |= (uint32_t)Action << Shift;
+ uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+ CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
+ CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
}
/// If Opc/OrigVT is specified as being promoted, the promotion code defaults
@@ -1504,23 +1518,24 @@ public:
return false;
}
- /// Return true if it's free to truncate a value of type Ty1 to type
- /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
+ /// Return true if it's free to truncate a value of type FromTy to type
+ /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
/// by referencing its sub-register AX.
- virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const {
+ /// Targets must return false when FromTy <= ToTy.
+ virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
return false;
}
- /// Return true if a truncation from Ty1 to Ty2 is permitted when deciding
+ /// Return true if a truncation from FromTy to ToTy is permitted when deciding
/// whether a call is in tail position. Typically this means that both results
/// would be assigned to the same register or stack slot, but it could mean
/// the target performs adequate checks of its own before proceeding with the
- /// tail call.
- virtual bool allowTruncateForTailCall(Type * /*Ty1*/, Type * /*Ty2*/) const {
+ /// tail call. Targets must return false when FromTy <= ToTy.
+ virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
return false;
}
- virtual bool isTruncateFree(EVT /*VT1*/, EVT /*VT2*/) const {
+ virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
return false;
}
@@ -1553,19 +1568,21 @@ public:
return isExtFreeImpl(I);
}
- /// Return true if any actual instruction that defines a value of type Ty1
- /// implicitly zero-extends the value to Ty2 in the result register.
+ /// Return true if any actual instruction that defines a value of type FromTy
+ /// implicitly zero-extends the value to ToTy in the result register.
///
- /// This does not necessarily include registers defined in unknown ways, such
- /// as incoming arguments, or copies from unknown virtual registers. Also, if
- /// isTruncateFree(Ty2, Ty1) is true, this does not necessarily apply to
- /// truncate instructions. e.g. on x86-64, all instructions that define 32-bit
- /// values implicit zero-extend the result out to 64 bits.
- virtual bool isZExtFree(Type * /*Ty1*/, Type * /*Ty2*/) const {
+ /// The function should return true when it is likely that the truncate can
+ /// be freely folded with an instruction defining a value of FromTy. If
+ /// the defining instruction is unknown (because you're looking at a
+ /// function argument, PHI, etc.) then the target may require an
+ /// explicit truncate, which is not necessarily free, but this function
+ /// does not deal with those cases.
+ /// Targets must return false when FromTy >= ToTy.
+ virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
return false;
}
- virtual bool isZExtFree(EVT /*VT1*/, EVT /*VT2*/) const {
+ virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
return false;
}
@@ -1699,6 +1716,12 @@ public:
return false;
}
+ // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
+ // even if the vector itself has multiple uses.
+ virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
@@ -1755,12 +1778,6 @@ private:
/// combined with "shift" to BitExtract instructions.
bool HasExtractBitsInsn;
- /// Tells the code generator not to expand integer divides by constants into a
- /// sequence of muls, adds, and shifts. This is a hack until a real cost
- /// model is in place. If we ever optimize for size, this will be set to true
- /// unconditionally.
- bool IntDivIsCheap;
-
// Don't expand fsqrt with an approximation based on the inverse sqrt.
bool FsqrtIsCheap;
@@ -1770,10 +1787,6 @@ private:
/// div/rem when the operands are positive and less than 256.
DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
- /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a
- /// signed divide by power of two; let the target handle it.
- bool Pow2SDivIsCheap;
-
/// Tells the code generator that it shouldn't generate extra flow control
/// instructions and should attempt to combine flow control instructions via
/// predication.
@@ -1841,14 +1854,6 @@ private:
/// llvm.savestack/llvm.restorestack should save and restore.
unsigned StackPointerRegisterToSaveRestore;
- /// If set to a physical register, this specifies the register that receives
- /// the exception address on entry to a landing pad.
- unsigned ExceptionPointerRegister;
-
- /// If set to a physical register, this specifies the register that receives
- /// the exception typeid on entry to a landing pad.
- unsigned ExceptionSelectorRegister;
-
/// This indicates the default register class to use for each ValueType the
/// target supports natively.
const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
@@ -1880,17 +1885,17 @@ private:
/// operations are Legal (aka, supported natively by the target), but
/// operations that are not should be described. Note that operations on
/// non-legal value types are not described here.
- uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
+ LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
/// For each load extension type and each value type, keep a LegalizeAction
/// that indicates how instruction selection should deal with a load of a
/// specific value type and extension type.
- uint8_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]
- [ISD::LAST_LOADEXT_TYPE];
+ LegalizeAction LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]
+ [ISD::LAST_LOADEXT_TYPE];
/// For each value type pair keep a LegalizeAction that indicates whether a
/// truncating store of a specific value type and truncating type is legal.
- uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
+ LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
/// For each indexed mode and each value type, keep a pair of LegalizeAction
/// that indicates how instruction selection should deal with the load /
@@ -1903,11 +1908,12 @@ private:
/// For each condition code (ISD::CondCode) keep a LegalizeAction that
/// indicates how instruction selection should deal with the condition code.
///
- /// Because each CC action takes up 2 bits, we need to have the array size be
+ /// Because each CC action takes up 4 bits, we need to have the array size be
/// large enough to fit all of the value types. This can be done by rounding
- /// up the MVT::LAST_VALUETYPE value to the next multiple of 16.
- uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 15) / 16];
+ /// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
+ uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
+protected:
ValueTypeActionImpl ValueTypeActions;
private:
@@ -1947,6 +1953,12 @@ protected:
/// is[Z|FP]ExtFree of the related types is not true.
virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
+ /// Depth that GatherAllAliases should should continue looking for chain
+ /// dependencies when trying to find a more preferrable chain. As an
+ /// approximation, this should be more than the number of consecutive stores
+ /// expected to be merged.
+ unsigned GatherAllAliasesMaxDepth;
+
/// \brief Specify maximum number of store instructions per memset call.
///
/// When lowering \@llvm.memset this field specifies the maximum number of
@@ -1993,7 +2005,7 @@ protected:
unsigned MaxStoresPerMemmove;
/// Maximum number of store instructions that may be substituted for a call to
- /// memmove, used for functions with OpSize attribute.
+ /// memmove, used for functions with OptSize attribute.
unsigned MaxStoresPerMemmoveOptSize;
/// Tells the code generator that select is more expensive than a branch if
@@ -2087,9 +2099,9 @@ public:
/// Returns a pair of (return value, chain).
/// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
- EVT RetVT, const SDValue *Ops,
- unsigned NumOps, bool isSigned,
- SDLoc dl, bool doesNotReturn = false,
+ EVT RetVT, ArrayRef<SDValue> Ops,
+ bool isSigned, SDLoc dl,
+ bool doesNotReturn = false,
bool isReturnValueUsed = true) const;
//===--------------------------------------------------------------------===//
@@ -2251,6 +2263,29 @@ public:
return false;
}
+ /// Return true if the target supports that a subset of CSRs for the given
+ /// machine function is handled explicitly via copies.
+ virtual bool supportSplitCSR(MachineFunction *MF) const {
+ return false;
+ }
+
+ /// Perform necessary initialization to handle a subset of CSRs explicitly
+ /// via copies. This function is called at the beginning of instruction
+ /// selection.
+ virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
+ llvm_unreachable("Not Implemented");
+ }
+
+ /// Insert explicit copies in entry and exit blocks. We copy a subset of
+ /// CSRs to virtual registers in the entry block, and copy them back to
+ /// physical registers in the exit blocks. This function is called at the end
+ /// of instruction selection.
+ virtual void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ llvm_unreachable("Not Implemented");
+ }
+
//===--------------------------------------------------------------------===//
// Lowering methods - These methods must be implemented by targets so that
// the SelectionDAGBuilder code knows how to lower these.
@@ -2726,16 +2761,21 @@ public:
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
bool IsAfterLegalization,
std::vector<SDNode *> *Created) const;
+
+ /// Targets may override this function to provide custom SDIV lowering for
+ /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
+ /// assumes SDIV is expensive and replaces it with a series of other integer
+ /// operations.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
- std::vector<SDNode *> *Created) const {
- return SDValue();
- }
+ std::vector<SDNode *> *Created) const;
- /// Indicate whether this target prefers to combine the given number of FDIVs
- /// with the same divisor.
- virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const {
- return false;
+ /// Indicate whether this target prefers to combine FDIVs with the same
+ /// divisor. If the transform should never be done, return zero. If the
+ /// transform should be done, return the minimum number of divisor uses
+ /// that must exist.
+ virtual unsigned combineRepeatedFPDivisors() const {
+ return 0;
}
/// Hooks for building estimates in place of slower divisions and square
@@ -2821,6 +2861,10 @@ public:
virtual bool useLoadStackGuardNode() const {
return false;
}
+
+ /// Lower TLS global address SDNode for target independent emulated TLS model.
+ virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 5b626c2..cb52698 100644
--- a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -42,16 +42,15 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
void operator=(const TargetLoweringObjectFile&) = delete;
protected:
- const DataLayout *DL;
bool SupportIndirectSymViaGOTPCRel;
bool SupportGOTPCRelWithOffset;
public:
MCContext &getContext() const { return *Ctx; }
- TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr),
- SupportIndirectSymViaGOTPCRel(false),
- SupportGOTPCRelWithOffset(true) {}
+ TargetLoweringObjectFile()
+ : MCObjectFileInfo(), Ctx(nullptr), SupportIndirectSymViaGOTPCRel(false),
+ SupportGOTPCRelWithOffset(true) {}
virtual ~TargetLoweringObjectFile();
@@ -60,8 +59,7 @@ public:
/// implementations a chance to set up their default sections.
virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
- virtual void emitPersonalityValue(MCStreamer &Streamer,
- const TargetMachine &TM,
+ virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
const MCSymbol *Sym) const;
/// Emit the module flags that the platform cares about.
@@ -71,7 +69,8 @@ public:
/// Given a constant with the SectionKind, return a section that it should be
/// placed in.
- virtual MCSection *getSectionForConstant(SectionKind Kind,
+ virtual MCSection *getSectionForConstant(const DataLayout &DL,
+ SectionKind Kind,
const Constant *C) const;
/// Classify the specified global variable into a set of target independent
@@ -94,8 +93,7 @@ public:
}
virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName,
- const GlobalValue *GV,
- bool CannotUsePrivateLabel, Mangler &Mang,
+ const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM) const;
virtual MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
diff --git a/contrib/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm/include/llvm/Target/TargetMachine.h
index f1e9d17..74e91b5 100644
--- a/contrib/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm/include/llvm/Target/TargetMachine.h
@@ -76,7 +76,12 @@ protected: // Can only create subclasses.
/// The Target that this machine was created for.
const Target &TheTarget;
- /// For ABI type size and alignment.
+ /// DataLayout for the target: keep ABI type size and alignment.
+ ///
+ /// The DataLayout is created based on the string representation provided
+ /// during construction. It is kept here only to avoid reparsing the string
+ /// but should not really be used during compilation, because it has an
+ /// internal cache that is context specific.
const DataLayout DL;
/// Triple string, CPU name, and target feature strings the TargetMachine
@@ -97,6 +102,12 @@ protected: // Can only create subclasses.
const MCSubtargetInfo *STI;
unsigned RequireStructuredCFG : 1;
+ unsigned O0WantsFastISel : 1;
+
+ /// This API is here to support the C API, deprecated in 3.7 release.
+ /// This should never be used outside of legacy existing client.
+ const DataLayout &getDataLayout() const { return DL; }
+ friend struct C_API_PRIVATE_ACCESS;
public:
mutable TargetOptions Options;
@@ -125,15 +136,23 @@ public:
return *static_cast<const STC*>(getSubtargetImpl(F));
}
- /// Deprecated in 3.7, will be removed in 3.8. Use createDataLayout() instead.
- ///
- /// This method returns a pointer to the DataLayout for the target. It should
- /// be unchanging for every subtarget.
- const DataLayout *getDataLayout() const { return &DL; }
-
/// Create a DataLayout.
const DataLayout createDataLayout() const { return DL; }
+ /// Test if a DataLayout if compatible with the CodeGen for this target.
+ ///
+ /// The LLVM Module owns a DataLayout that is used for the target independent
+ /// optimizations and code generation. This hook provides a target specific
+ /// check on the validity of this DataLayout.
+ bool isCompatibleDataLayout(const DataLayout &Candidate) const {
+ return DL == Candidate;
+ }
+
+ /// Get the pointer size for this target.
+ ///
+ /// This is the only time the DataLayout in the TargetMachine is used.
+ unsigned getPointerSize() const { return DL.getPointerSize(); }
+
/// \brief Reset the target options based on the function's attributes.
// FIXME: Remove TargetOptions that affect per-function code generation
// from TargetMachine.
@@ -172,6 +191,8 @@ public:
void setOptLevel(CodeGenOpt::Level Level) const;
void setFastISel(bool Enable) { Options.EnableFastISel = Enable; }
+ bool getO0WantsFastISel() { return O0WantsFastISel; }
+ void setO0WantsFastISel(bool Enable) { O0WantsFastISel = Enable; }
bool shouldPrintMachineCode() const { return Options.PrintMachineCode; }
@@ -234,6 +255,13 @@ public:
return true;
}
+ /// True if subtarget inserts the final scheduling pass on its own.
+ ///
+ /// Branch relaxation, which must happen after block placement, can
+ /// on some targets (e.g. SystemZ) expose additional post-RA
+ /// scheduling opportunities.
+ virtual bool targetSchedulesPostRAScheduling() const { return false; };
+
void getNameWithPrefix(SmallVectorImpl<char> &Name, const GlobalValue *GV,
Mangler &Mang, bool MayAlwaysUsePrivate = false) const;
MCSymbol *getSymbol(const GlobalValue *GV, Mangler &Mang) const;
diff --git a/contrib/llvm/include/llvm/Target/TargetOpcodes.h b/contrib/llvm/include/llvm/Target/TargetOpcodes.h
index 5019719..db37bdb 100644
--- a/contrib/llvm/include/llvm/Target/TargetOpcodes.h
+++ b/contrib/llvm/include/llvm/Target/TargetOpcodes.h
@@ -126,8 +126,12 @@ enum {
/// Loading instruction that may page fault, bundled with associated
/// information on how to handle such a page fault. It is intended to support
/// "zero cost" null checks in managed languages by allowing LLVM to fold
- /// comparisions into existing memory operations.
+ /// comparisons into existing memory operations.
FAULTING_LOAD_OP = 22,
+
+ /// BUILTIN_OP_END - This must be the last enum value in this list.
+ /// The target-specific post-isel opcode values start here.
+ GENERIC_OP_END = FAULTING_LOAD_OP,
};
} // end namespace TargetOpcode
} // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm/include/llvm/Target/TargetOptions.h
index d52cb60..d98d0fa 100644
--- a/contrib/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm/include/llvm/Target/TargetOptions.h
@@ -58,24 +58,53 @@ namespace llvm {
};
}
+ enum class EABI {
+ Unknown,
+ Default, // Default means not specified
+ EABI4, // Target-specific (either 4, 5 or gnu depending on triple).
+ EABI5,
+ GNU
+ };
+
+ /// Identify a debugger for "tuning" the debug info.
+ ///
+ /// The "debugger tuning" concept allows us to present a more intuitive
+ /// interface that unpacks into different sets of defaults for the various
+ /// individual feature-flag settings, that suit the preferences of the
+ /// various debuggers. However, it's worth remembering that debuggers are
+ /// not the only consumers of debug info, and some variations in DWARF might
+ /// better be treated as target/platform issues. Fundamentally,
+ /// o if the feature is useful (or not) to a particular debugger, regardless
+ /// of the target, that's a tuning decision;
+ /// o if the feature is useful (or not) on a particular platform, regardless
+ /// of the debugger, that's a target decision.
+ /// It's not impossible to see both factors in some specific case.
+ ///
+ /// The "tuning" should be used to set defaults for individual feature flags
+ /// in DwarfDebug; if a given feature has a more specific command-line option,
+ /// that option should take precedence over the tuning.
+ enum class DebuggerKind {
+ Default, // No specific tuning requested.
+ GDB, // Tune debug info for gdb.
+ LLDB, // Tune debug info for lldb.
+ SCE // Tune debug info for SCE targets (e.g. PS4).
+ };
+
class TargetOptions {
public:
TargetOptions()
- : PrintMachineCode(false),
- LessPreciseFPMADOption(false), UnsafeFPMath(false),
- NoInfsFPMath(false), NoNaNsFPMath(false),
- HonorSignDependentRoundingFPMathOption(false),
- NoZerosInBSS(false),
- GuaranteedTailCallOpt(false),
- StackAlignmentOverride(0),
+ : PrintMachineCode(false), LessPreciseFPMADOption(false),
+ UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
+ HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
+ GuaranteedTailCallOpt(false), StackAlignmentOverride(0),
EnableFastISel(false), PositionIndependentExecutable(false),
UseInitArray(false), DisableIntegratedAS(false),
CompressDebugSections(false), FunctionSections(false),
DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
- FloatABIType(FloatABI::Default),
+ EmulatedTLS(false), FloatABIType(FloatABI::Default),
AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
- JTType(JumpTable::Single),
- ThreadModel(ThreadModel::POSIX) {}
+ JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX),
+ EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default) {}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
/// option is specified on the command line, and should enable debugging
@@ -172,6 +201,10 @@ namespace llvm {
/// Emit target-specific trap instruction for 'unreachable' IR instructions.
unsigned TrapUnreachable : 1;
+ /// EmulatedTLS - This flag enables emulated TLS model, using emutls
+ /// function in the runtime library..
+ unsigned EmulatedTLS : 1;
+
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
@@ -200,7 +233,7 @@ namespace llvm {
/// This class encapsulates options for reciprocal-estimate code generation.
TargetRecip Reciprocals;
-
+
/// JTType - This flag specifies the type of jump-instruction table to
/// create for functions that have the jumptable attribute.
JumpTable::JumpTableType JTType;
@@ -209,6 +242,12 @@ namespace llvm {
/// for things like atomics
ThreadModel::Model ThreadModel;
+ /// EABIVersion - This flag specifies the EABI version
+ EABI EABIVersion;
+
+ /// Which debugger to tune for.
+ DebuggerKind DebuggerTuning;
+
/// Machine level options.
MCTargetOptions MCOptions;
};
@@ -231,11 +270,14 @@ inline bool operator==(const TargetOptions &LHS,
ARE_EQUAL(PositionIndependentExecutable) &&
ARE_EQUAL(UseInitArray) &&
ARE_EQUAL(TrapUnreachable) &&
+ ARE_EQUAL(EmulatedTLS) &&
ARE_EQUAL(FloatABIType) &&
ARE_EQUAL(AllowFPOpFusion) &&
ARE_EQUAL(Reciprocals) &&
ARE_EQUAL(JTType) &&
ARE_EQUAL(ThreadModel) &&
+ ARE_EQUAL(EABIVersion) &&
+ ARE_EQUAL(DebuggerTuning) &&
ARE_EQUAL(MCOptions);
#undef ARE_EQUAL
}
diff --git a/contrib/llvm/include/llvm/Target/TargetRecip.h b/contrib/llvm/include/llvm/Target/TargetRecip.h
index 4cc3672..210d493 100644
--- a/contrib/llvm/include/llvm/Target/TargetRecip.h
+++ b/contrib/llvm/include/llvm/Target/TargetRecip.h
@@ -31,20 +31,20 @@ public:
/// Initialize all or part of the operations from command-line options or
/// a front end.
TargetRecip(const std::vector<std::string> &Args);
-
+
/// Set whether a particular reciprocal operation is enabled and how many
/// refinement steps are needed when using it. Use "all" to set enablement
/// and refinement steps for all operations.
- void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps);
+ void setDefaults(StringRef Key, bool Enable, unsigned RefSteps);
/// Return true if the reciprocal operation has been enabled by default or
/// from the command-line. Return false if the operation has been disabled
/// by default or from the command-line.
- bool isEnabled(const StringRef &Key) const;
+ bool isEnabled(StringRef Key) const;
/// Return the number of iterations necessary to refine the
/// the result of a machine instruction for the given reciprocal operation.
- unsigned getRefinementSteps(const StringRef &Key) const;
+ unsigned getRefinementSteps(StringRef Key) const;
bool operator==(const TargetRecip &Other) const;
@@ -52,14 +52,14 @@ private:
enum {
Uninitialized = -1
};
-
+
struct RecipParams {
int8_t Enabled;
int8_t RefinementSteps;
-
+
RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
};
-
+
std::map<StringRef, RecipParams> RecipMap;
typedef std::map<StringRef, RecipParams>::iterator RecipIter;
typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
diff --git a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
index 0ee936a..fccaad4 100644
--- a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
@@ -21,6 +21,8 @@
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Printable.h"
#include <cassert>
#include <functional>
@@ -32,6 +34,24 @@ class RegScavenger;
template<class T> class SmallVectorImpl;
class VirtRegMap;
class raw_ostream;
+class LiveRegMatrix;
+
+/// A bitmask representing the covering of a register with sub-registers.
+///
+/// This is typically used to track liveness at sub-register granularity.
+/// Lane masks for sub-register indices are similar to register units for
+/// physical registers. The individual bits in a lane mask can't be assigned
+/// any specific meaning. They can be used to check if two sub-register
+/// indices overlap.
+///
+/// Iff the target has a register such that:
+///
+/// getSubReg(Reg, A) overlaps getSubReg(Reg, B)
+///
+/// then:
+///
+/// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
+typedef unsigned LaneBitmask;
class TargetRegisterClass {
public:
@@ -45,7 +65,7 @@ public:
const vt_iterator VTs;
const uint32_t *SubClassMask;
const uint16_t *SuperRegIndices;
- const unsigned LaneMask;
+ const LaneBitmask LaneMask;
/// Classes with a higher priority value are assigned first by register
/// allocators using a greedy heuristic. The value is in the range [0,63].
const uint8_t AllocationPriority;
@@ -54,8 +74,7 @@ public:
const sc_iterator SuperClasses;
ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
- /// getID() - Return the register class ID number.
- ///
+ /// Return the register class ID number.
unsigned getID() const { return MC->getID(); }
/// begin/end - Return all of the registers in this class.
@@ -63,46 +82,42 @@ public:
iterator begin() const { return MC->begin(); }
iterator end() const { return MC->end(); }
- /// getNumRegs - Return the number of registers in this class.
- ///
+ /// Return the number of registers in this class.
unsigned getNumRegs() const { return MC->getNumRegs(); }
- /// getRegister - Return the specified register in the class.
- ///
+ /// Return the specified register in the class.
unsigned getRegister(unsigned i) const {
return MC->getRegister(i);
}
- /// contains - Return true if the specified register is included in this
- /// register class. This does not include virtual registers.
+ /// Return true if the specified register is included in this register class.
+ /// This does not include virtual registers.
bool contains(unsigned Reg) const {
return MC->contains(Reg);
}
- /// contains - Return true if both registers are in this class.
+ /// Return true if both registers are in this class.
bool contains(unsigned Reg1, unsigned Reg2) const {
return MC->contains(Reg1, Reg2);
}
- /// getSize - Return the size of the register in bytes, which is also the size
+ /// Return the size of the register in bytes, which is also the size
/// of a stack slot allocated to hold a spilled copy of this register.
unsigned getSize() const { return MC->getSize(); }
- /// getAlignment - Return the minimum required alignment for a register of
- /// this class.
+ /// Return the minimum required alignment for a register of this class.
unsigned getAlignment() const { return MC->getAlignment(); }
- /// getCopyCost - Return the cost of copying a value between two registers in
- /// this class. A negative number means the register class is very expensive
+ /// Return the cost of copying a value between two registers in this class.
+ /// A negative number means the register class is very expensive
/// to copy e.g. status flag register classes.
int getCopyCost() const { return MC->getCopyCost(); }
- /// isAllocatable - Return true if this register class may be used to create
- /// virtual registers.
+ /// Return true if this register class may be used to create virtual
+ /// registers.
bool isAllocatable() const { return MC->isAllocatable(); }
- /// hasType - return true if this TargetRegisterClass has the ValueType vt.
- ///
+ /// Return true if this TargetRegisterClass has the ValueType vt.
bool hasType(MVT vt) const {
for(int i = 0; VTs[i] != MVT::Other; ++i)
if (MVT(VTs[i]) == vt)
@@ -122,41 +137,39 @@ public:
return I;
}
- /// hasSubClass - return true if the specified TargetRegisterClass
+ /// Return true if the specified TargetRegisterClass
/// is a proper sub-class of this TargetRegisterClass.
bool hasSubClass(const TargetRegisterClass *RC) const {
return RC != this && hasSubClassEq(RC);
}
- /// hasSubClassEq - Returns true if RC is a sub-class of or equal to this
- /// class.
+ /// Returns true if RC is a sub-class of or equal to this class.
bool hasSubClassEq(const TargetRegisterClass *RC) const {
unsigned ID = RC->getID();
return (SubClassMask[ID / 32] >> (ID % 32)) & 1;
}
- /// hasSuperClass - return true if the specified TargetRegisterClass is a
+ /// Return true if the specified TargetRegisterClass is a
/// proper super-class of this TargetRegisterClass.
bool hasSuperClass(const TargetRegisterClass *RC) const {
return RC->hasSubClass(this);
}
- /// hasSuperClassEq - Returns true if RC is a super-class of or equal to this
- /// class.
+ /// Returns true if RC is a super-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const {
return RC->hasSubClassEq(this);
}
- /// getSubClassMask - Returns a bit vector of subclasses, including this one.
+ /// Returns a bit vector of subclasses, including this one.
/// The vector is indexed by class IDs, see hasSubClassEq() above for how to
/// use it.
const uint32_t *getSubClassMask() const {
return SubClassMask;
}
- /// getSuperRegIndices - Returns a 0-terminated list of sub-register indices
- /// that project some super-register class into this register class. The list
- /// has an entry for each Idx such that:
+ /// Returns a 0-terminated list of sub-register indices that project some
+ /// super-register class into this register class. The list has an entry for
+ /// each Idx such that:
///
/// There exists SuperRC where:
/// For all Reg in SuperRC:
@@ -166,23 +179,23 @@ public:
return SuperRegIndices;
}
- /// getSuperClasses - Returns a NULL terminated list of super-classes. The
+ /// Returns a NULL-terminated list of super-classes. The
/// classes are ordered by ID which is also a topological ordering from large
/// to small classes. The list does NOT include the current class.
sc_iterator getSuperClasses() const {
return SuperClasses;
}
- /// isASubClass - return true if this TargetRegisterClass is a subset
+ /// Return true if this TargetRegisterClass is a subset
/// class of at least one other TargetRegisterClass.
bool isASubClass() const {
return SuperClasses[0] != nullptr;
}
- /// getRawAllocationOrder - Returns the preferred order for allocating
- /// registers from this register class in MF. The raw order comes directly
- /// from the .td file and may include reserved registers that are not
- /// allocatable. Register allocators should also make sure to allocate
+ /// Returns the preferred order for allocating registers from this register
+ /// class in MF. The raw order comes directly from the .td file and may
+ /// include reserved registers that are not allocatable.
+ /// Register allocators should also make sure to allocate
/// callee-saved registers only after all the volatiles are used. The
/// RegisterClassInfo class provides filtered allocation orders with
/// callee-saved registers moved to the end.
@@ -200,13 +213,13 @@ public:
/// Returns the combination of all lane masks of register in this class.
/// The lane masks of the registers are the combination of all lane masks
/// of their subregisters.
- unsigned getLaneMask() const {
+ LaneBitmask getLaneMask() const {
return LaneMask;
}
};
-/// TargetRegisterInfoDesc - Extra information, not in MCRegisterDesc, about
-/// registers. These are used by codegen, not by MC.
+/// Extra information, not in MCRegisterDesc, about registers.
+/// These are used by codegen, not by MC.
struct TargetRegisterInfoDesc {
unsigned CostPerUse; // Extra cost of instructions using register.
bool inAllocatableClass; // Register belongs to an allocatable regclass.
@@ -232,7 +245,7 @@ private:
const TargetRegisterInfoDesc *InfoDesc; // Extra desc array for codegen
const char *const *SubRegIndexNames; // Names of subreg indexes.
// Pointer to array of lane masks, one per sub-reg index.
- const unsigned *SubRegIndexLaneMasks;
+ const LaneBitmask *SubRegIndexLaneMasks;
regclass_iterator RegClassBegin, RegClassEnd; // List of regclasses
unsigned CoveringLanes;
@@ -242,7 +255,7 @@ protected:
regclass_iterator RegClassBegin,
regclass_iterator RegClassEnd,
const char *const *SRINames,
- const unsigned *SRILaneMasks,
+ const LaneBitmask *SRILaneMasks,
unsigned CoveringLanes);
virtual ~TargetRegisterInfo();
public:
@@ -270,77 +283,74 @@ public:
return int(Reg) >= (1 << 30);
}
- /// stackSlot2Index - Compute the frame index from a register value
- /// representing a stack slot.
+ /// Compute the frame index from a register value representing a stack slot.
static int stackSlot2Index(unsigned Reg) {
assert(isStackSlot(Reg) && "Not a stack slot");
return int(Reg - (1u << 30));
}
- /// index2StackSlot - Convert a non-negative frame index to a stack slot
- /// register value.
+ /// Convert a non-negative frame index to a stack slot register value.
static unsigned index2StackSlot(int FI) {
assert(FI >= 0 && "Cannot hold a negative frame index.");
return FI + (1u << 30);
}
- /// isPhysicalRegister - Return true if the specified register number is in
+ /// Return true if the specified register number is in
/// the physical register namespace.
static bool isPhysicalRegister(unsigned Reg) {
assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
return int(Reg) > 0;
}
- /// isVirtualRegister - Return true if the specified register number is in
+ /// Return true if the specified register number is in
/// the virtual register namespace.
static bool isVirtualRegister(unsigned Reg) {
assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
return int(Reg) < 0;
}
- /// virtReg2Index - Convert a virtual register number to a 0-based index.
+ /// Convert a virtual register number to a 0-based index.
/// The first virtual register in a function will get the index 0.
static unsigned virtReg2Index(unsigned Reg) {
assert(isVirtualRegister(Reg) && "Not a virtual register");
return Reg & ~(1u << 31);
}
- /// index2VirtReg - Convert a 0-based index to a virtual register number.
+ /// Convert a 0-based index to a virtual register number.
/// This is the inverse operation of VirtReg2IndexFunctor below.
static unsigned index2VirtReg(unsigned Index) {
return Index | (1u << 31);
}
- /// getMinimalPhysRegClass - Returns the Register Class of a physical
- /// register of the given type, picking the most sub register class of
- /// the right type that contains this physreg.
+ /// Returns the Register Class of a physical register of the given type,
+ /// picking the most sub register class of the right type that contains this
+ /// physreg.
const TargetRegisterClass *
getMinimalPhysRegClass(unsigned Reg, MVT VT = MVT::Other) const;
- /// getAllocatableClass - Return the maximal subclass of the given register
- /// class that is alloctable, or NULL.
+ /// Return the maximal subclass of the given register class that is
+ /// allocatable or NULL.
const TargetRegisterClass *
getAllocatableClass(const TargetRegisterClass *RC) const;
- /// getAllocatableSet - Returns a bitset indexed by register number
- /// indicating if a register is allocatable or not. If a register class is
- /// specified, returns the subset for the class.
+ /// Returns a bitset indexed by register number indicating if a register is
+ /// allocatable or not. If a register class is specified, returns the subset
+ /// for the class.
BitVector getAllocatableSet(const MachineFunction &MF,
const TargetRegisterClass *RC = nullptr) const;
- /// getCostPerUse - Return the additional cost of using this register instead
+ /// Return the additional cost of using this register instead
/// of other registers in its class.
unsigned getCostPerUse(unsigned RegNo) const {
return InfoDesc[RegNo].CostPerUse;
}
- /// isInAllocatableClass - Return true if the register is in the allocation
- /// of any register class.
+ /// Return true if the register is in the allocation of any register class.
bool isInAllocatableClass(unsigned RegNo) const {
return InfoDesc[RegNo].inAllocatableClass;
}
- /// getSubRegIndexName - Return the human-readable symbolic target-specific
+ /// Return the human-readable symbolic target-specific
/// name for the specified SubRegIndex.
const char *getSubRegIndexName(unsigned SubIdx) const {
assert(SubIdx && SubIdx < getNumSubRegIndices() &&
@@ -348,44 +358,15 @@ public:
return SubRegIndexNames[SubIdx-1];
}
- /// getSubRegIndexLaneMask - Return a bitmask representing the parts of a
- /// register that are covered by SubIdx.
+ /// Return a bitmask representing the parts of a register that are covered by
+ /// SubIdx \see LaneBitmask.
///
- /// Lane masks for sub-register indices are similar to register units for
- /// physical registers. The individual bits in a lane mask can't be assigned
- /// any specific meaning. They can be used to check if two sub-register
- /// indices overlap.
- ///
- /// If the target has a register such that:
- ///
- /// getSubReg(Reg, A) overlaps getSubReg(Reg, B)
- ///
- /// then:
- ///
- /// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
- ///
- /// The converse is not necessarily true. If two lane masks have a common
- /// bit, the corresponding sub-registers may not overlap, but it can be
- /// assumed that they usually will.
/// SubIdx == 0 is allowed, it has the lane mask ~0u.
- unsigned getSubRegIndexLaneMask(unsigned SubIdx) const {
+ LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const {
assert(SubIdx < getNumSubRegIndices() && "This is not a subregister index");
return SubRegIndexLaneMasks[SubIdx];
}
- /// Returns true if the given lane mask is imprecise.
- ///
- /// LaneMasks as given by getSubRegIndexLaneMask() have a limited number of
- /// bits, so for targets with more than 31 disjunct subregister indices there
- /// may be cases where:
- /// getSubReg(Reg,A) does not overlap getSubReg(Reg,B)
- /// but we still have
- /// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0.
- /// This function returns true in those cases.
- static bool isImpreciseLaneMask(unsigned LaneMask) {
- return LaneMask & 0x80000000u;
- }
-
/// The lane masks returned by getSubRegIndexLaneMask() above can only be
/// used to determine if sub-registers overlap - they can't be used to
/// determine if a set of sub-registers completely cover another
@@ -409,10 +390,10 @@ public:
///
/// If (MaskA & ~(MaskB & Covering)) == 0, then SubA is completely covered by
/// SubB.
- unsigned getCoveringLanes() const { return CoveringLanes; }
+ LaneBitmask getCoveringLanes() const { return CoveringLanes; }
- /// regsOverlap - Returns true if the two registers are equal or alias each
- /// other. The registers may be virtual register.
+ /// Returns true if the two registers are equal or alias each other.
+ /// The registers may be virtual registers.
bool regsOverlap(unsigned regA, unsigned regB) const {
if (regA == regB) return true;
if (isVirtualRegister(regA) || isVirtualRegister(regB))
@@ -429,7 +410,7 @@ public:
return false;
}
- /// hasRegUnit - Returns true if Reg contains RegUnit.
+ /// Returns true if Reg contains RegUnit.
bool hasRegUnit(unsigned Reg, unsigned RegUnit) const {
for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
if (*Units == RegUnit)
@@ -437,18 +418,23 @@ public:
return false;
}
- /// getCalleeSavedRegs - Return a null-terminated list of all of the
- /// callee saved registers on this target. The register should be in the
- /// order of desired callee-save stack frame offset. The first register is
- /// closest to the incoming stack pointer if stack grows down, and vice versa.
+ /// Return a null-terminated list of all of the callee-saved registers on
+ /// this target. The register should be in the order of desired callee-save
+ /// stack frame offset. The first register is closest to the incoming stack
+ /// pointer if stack grows down, and vice versa.
///
virtual const MCPhysReg*
getCalleeSavedRegs(const MachineFunction *MF) const = 0;
- /// getCallPreservedMask - Return a mask of call-preserved registers for the
- /// given calling convention on the current function. The mask should
- /// include all call-preserved aliases. This is used by the register
- /// allocator to determine which registers can be live across a call.
+ virtual const MCPhysReg*
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
+ return nullptr;
+ }
+
+ /// Return a mask of call-preserved registers for the given calling convention
+ /// on the current function. The mask should include all call-preserved
+ /// aliases. This is used by the register allocator to determine which
+ /// registers can be live across a call.
///
/// The mask is an array containing (TRI::getNumRegs()+31)/32 entries.
/// A set bit indicates that all bits of the corresponding register are
@@ -469,13 +455,18 @@ public:
return nullptr;
}
+ /// Return a register mask that clobbers everything.
+ virtual const uint32_t *getNoPreservedMask() const {
+ llvm_unreachable("target does not provide no presered mask");
+ }
+
/// Return all the call-preserved register masks defined for this target.
virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
virtual ArrayRef<const char *> getRegMaskNames() const = 0;
- /// getReservedRegs - Returns a bitset indexed by physical register number
- /// indicating if a register is a special register that has particular uses
- /// and should be considered unavailable at all times, e.g. SP, RA. This is
+ /// Returns a bitset indexed by physical register number indicating if a
+ /// register is a special register that has particular uses and should be
+ /// considered unavailable at all times, e.g. SP, RA. This is
/// used by register scavenger to determine what registers are free.
virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
@@ -484,14 +475,14 @@ public:
/// remove pseudo-registers that should be ignored).
virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const { }
- /// getMatchingSuperReg - Return a super-register of the specified register
+ /// Return a super-register of the specified register
/// Reg so its sub-register of index SubIdx is Reg.
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
const TargetRegisterClass *RC) const {
return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC);
}
- /// getMatchingSuperRegClass - Return a subclass of the specified register
+ /// Return a subclass of the specified register
/// class A so that each register in it has a sub-register of the
/// specified sub-register index which is in the specified register class B.
///
@@ -500,7 +491,16 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
- /// getSubClassWithSubReg - Returns the largest legal sub-class of RC that
+ // For a copy-like instruction that defines a register of class DefRC with
+ // subreg index DefSubReg, reading from another source with class SrcRC and
+ // subregister SrcSubReg return true if this is a preferrable copy
+ // instruction or an earlier use should be used.
+ virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const;
+
+ /// Returns the largest legal sub-class of RC that
/// supports the sub-register index Idx.
/// If no such sub-class exists, return NULL.
/// If all registers in RC already have an Idx sub-register, return RC.
@@ -518,7 +518,7 @@ public:
return RC;
}
- /// composeSubRegIndices - Return the subregister index you get from composing
+ /// Return the subregister index you get from composing
/// two subregister indices.
///
/// The special null sub-register index composes as the identity.
@@ -541,10 +541,11 @@ public:
/// Transforms a LaneMask computed for one subregister to the lanemask that
/// would have been computed when composing the subsubregisters with IdxA
/// first. @sa composeSubRegIndices()
- unsigned composeSubRegIndexLaneMask(unsigned IdxA, unsigned LaneMask) const {
+ LaneBitmask composeSubRegIndexLaneMask(unsigned IdxA,
+ LaneBitmask Mask) const {
if (!IdxA)
- return LaneMask;
- return composeSubRegIndexLaneMaskImpl(IdxA, LaneMask);
+ return Mask;
+ return composeSubRegIndexLaneMaskImpl(IdxA, Mask);
}
/// Debugging helper: dump register in human readable form to dbgs() stream.
@@ -558,13 +559,13 @@ protected:
}
/// Overridden by TableGen in targets that have sub-registers.
- virtual unsigned
- composeSubRegIndexLaneMaskImpl(unsigned, unsigned) const {
+ virtual LaneBitmask
+ composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {
llvm_unreachable("Target has no sub-registers");
}
public:
- /// getCommonSuperRegClass - Find a common super-register class if it exists.
+ /// Find a common super-register class if it exists.
///
/// Find a register class, SuperRC and two sub-register indices, PreA and
/// PreB, such that:
@@ -605,44 +606,47 @@ public:
return (unsigned)(regclass_end()-regclass_begin());
}
- /// getRegClass - Returns the register class associated with the enumeration
- /// value. See class MCOperandInfo.
+ /// Returns the register class associated with the enumeration value.
+ /// See class MCOperandInfo.
const TargetRegisterClass *getRegClass(unsigned i) const {
assert(i < getNumRegClasses() && "Register Class ID out of range");
return RegClassBegin[i];
}
- /// getRegClassName - Returns the name of the register class.
+ /// Returns the name of the register class.
const char *getRegClassName(const TargetRegisterClass *Class) const {
return MCRegisterInfo::getRegClassName(Class->MC);
}
- /// getCommonSubClass - find the largest common subclass of A and B. Return
- /// NULL if there is no common subclass.
+ /// Find the largest common subclass of A and B.
+ /// Return NULL if there is no common subclass.
+ /// The common subclass should contain
+ /// simple value type SVT if it is not the Any type.
const TargetRegisterClass *
getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B) const;
+ const TargetRegisterClass *B,
+ const MVT::SimpleValueType SVT =
+ MVT::SimpleValueType::Any) const;
- /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
- /// values. If a target supports multiple different pointer register classes,
+ /// Returns a TargetRegisterClass used for pointer values.
+ /// If a target supports multiple different pointer register classes,
/// kind specifies which one is indicated.
virtual const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const {
llvm_unreachable("Target didn't implement getPointerRegClass!");
}
- /// getCrossCopyRegClass - Returns a legal register class to copy a register
- /// in the specified class to or from. If it is possible to copy the register
- /// directly without using a cross register class copy, return the specified
- /// RC. Returns NULL if it is not possible to copy between a two registers of
- /// the specified class.
+ /// Returns a legal register class to copy a register in the specified class
+ /// to or from. If it is possible to copy the register directly without using
+ /// a cross register class copy, return the specified RC. Returns NULL if it
+ /// is not possible to copy between two registers of the specified class.
virtual const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const {
return RC;
}
- /// getLargestLegalSuperClass - Returns the largest super class of RC that is
- /// legal to use in the current sub-target and has the same spill size.
+ /// Returns the largest super class of RC that is legal to use in the current
+ /// sub-target and has the same spill size.
/// The returned register class can be used to create virtual registers which
/// means that all its registers can be copied and spilled.
virtual const TargetRegisterClass *
@@ -653,9 +657,9 @@ public:
return RC;
}
- /// getRegPressureLimit - Return the register pressure "high water mark" for
- /// the specific register class. The scheduler is in high register pressure
- /// mode (for the specific register class) if it goes over the limit.
+ /// Return the register pressure "high water mark" for the specific register
+ /// class. The scheduler is in high register pressure mode (for the specific
+ /// register class) if it goes over the limit.
///
/// Note: this is the old register pressure model that relies on a manually
/// specified representative register class per value type.
@@ -664,6 +668,15 @@ public:
return 0;
}
+ /// Return a heuristic for the machine scheduler to compare the profitability
+ /// of increasing one register pressure set versus another. The scheduler
+ /// will prefer increasing the register pressure of the set which returns
+ /// the largest value for this function.
+ virtual unsigned getRegPressureSetScore(const MachineFunction &MF,
+ unsigned PSetID) const {
+ return PSetID;
+ }
+
/// Get the weight in units of pressure for this register class.
virtual const RegClassWeight &getRegClassWeight(
const TargetRegisterClass *RC) const = 0;
@@ -709,14 +722,15 @@ public:
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM = nullptr) const;
-
- /// updateRegAllocHint - A callback to allow target a chance to update
- /// register allocation hints when a register is "changed" (e.g. coalesced)
- /// to another register. e.g. On ARM, some virtual registers should target
- /// register pairs, if one of pair is coalesced to another register, the
- /// allocation hint of the other half of the pair should be changed to point
- /// to the new register.
+ const VirtRegMap *VRM = nullptr,
+ const LiveRegMatrix *Matrix = nullptr)
+ const;
+
+ /// A callback to allow target a chance to update register allocation hints
+ /// when a register is "changed" (e.g. coalesced) to another register.
+ /// e.g. On ARM, some virtual registers should target register pairs,
+ /// if one of pair is coalesced to another register, the allocation hint of
+ /// the other half of the pair should be changed to point to the new register.
virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const {
// Do nothing.
@@ -738,73 +752,72 @@ public:
/// register if it is available.
virtual unsigned getCSRFirstUseCost() const { return 0; }
- /// requiresRegisterScavenging - returns true if the target requires (and can
- /// make use of) the register scavenger.
+ /// Returns true if the target requires (and can make use of) the register
+ /// scavenger.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
return false;
}
- /// useFPForScavengingIndex - returns true if the target wants to use
- /// frame pointer based accesses to spill to the scavenger emergency spill
- /// slot.
+ /// Returns true if the target wants to use frame pointer based accesses to
+ /// spill to the scavenger emergency spill slot.
virtual bool useFPForScavengingIndex(const MachineFunction &MF) const {
return true;
}
- /// requiresFrameIndexScavenging - returns true if the target requires post
- /// PEI scavenging of registers for materializing frame index constants.
+ /// Returns true if the target requires post PEI scavenging of registers for
+ /// materializing frame index constants.
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
return false;
}
- /// requiresVirtualBaseRegisters - Returns true if the target wants the
- /// LocalStackAllocation pass to be run and virtual base registers
- /// used for more efficient stack access.
+ /// Returns true if the target wants the LocalStackAllocation pass to be run
+ /// and virtual base registers used for more efficient stack access.
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
return false;
}
- /// hasReservedSpillSlot - Return true if target has reserved a spill slot in
- /// the stack frame of the given function for the specified register. e.g. On
- /// x86, if the frame register is required, the first fixed stack object is
- /// reserved as its spill slot. This tells PEI not to create a new stack frame
+ /// Return true if target has reserved a spill slot in the stack frame of
+ /// the given function for the specified register. e.g. On x86, if the frame
+ /// register is required, the first fixed stack object is reserved as its
+ /// spill slot. This tells PEI not to create a new stack frame
/// object for the given register. It should be called only after
- /// processFunctionBeforeCalleeSavedScan().
+ /// determineCalleeSaves().
virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const {
return false;
}
- /// trackLivenessAfterRegAlloc - returns true if the live-ins should be tracked
- /// after register allocation.
+ /// Returns true if the live-ins should be tracked after register allocation.
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
return false;
}
- /// needsStackRealignment - true if storage within the function requires the
- /// stack pointer to be aligned more than the normal calling convention calls
- /// for.
- virtual bool needsStackRealignment(const MachineFunction &MF) const {
- return false;
- }
+ /// True if the stack can be realigned for the target.
+ virtual bool canRealignStack(const MachineFunction &MF) const;
+
+ /// True if storage within the function requires the stack pointer to be
+ /// aligned more than the normal calling convention calls for.
+ /// This cannot be overriden by the target, but canRealignStack can be
+ /// overridden.
+ bool needsStackRealignment(const MachineFunction &MF) const;
- /// getFrameIndexInstrOffset - Get the offset from the referenced frame
- /// index in the instruction, if there is one.
+ /// Get the offset from the referenced frame index in the instruction,
+ /// if there is one.
virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const {
return 0;
}
- /// needsFrameBaseReg - Returns true if the instruction's frame index
- /// reference would be better served by a base register other than FP
- /// or SP. Used by LocalStackFrameAllocation to determine which frame index
+ /// Returns true if the instruction's frame index reference would be better
+ /// served by a base register other than FP or SP.
+ /// Used by LocalStackFrameAllocation to determine which frame index
/// references it should create new base registers for.
virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
return false;
}
- /// materializeFrameBaseRegister - Insert defining instruction(s) for
- /// BaseReg to be a pointer to FrameIdx before insertion point I.
+ /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
+ /// before insertion point I.
virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const {
@@ -812,24 +825,23 @@ public:
"target");
}
- /// resolveFrameIndex - Resolve a frame index operand of an instruction
+ /// Resolve a frame index operand of an instruction
/// to reference the indicated base register plus offset instead.
virtual void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
llvm_unreachable("resolveFrameIndex does not exist on this target");
}
- /// isFrameOffsetLegal - Determine whether a given base register plus offset
- /// immediate is encodable to resolve a frame index.
+ /// Determine whether a given base register plus offset immediate is
+ /// encodable to resolve a frame index.
virtual bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const {
llvm_unreachable("isFrameOffsetLegal does not exist on this target");
}
-
- /// saveScavengerRegister - Spill the register so it can be used by the
- /// register scavenger. Return true if the register was spilled, false
- /// otherwise. If this function does not spill the register, the scavenger
+ /// Spill the register so it can be used by the register scavenger.
+ /// Return true if the register was spilled, false otherwise.
+ /// If this function does not spill the register, the scavenger
/// will instead spill it to the emergency spill slot.
///
virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
@@ -840,13 +852,13 @@ public:
return false;
}
- /// eliminateFrameIndex - This method must be overriden to eliminate abstract
- /// frame indices from instructions which may use them. The instruction
- /// referenced by the iterator contains an MO_FrameIndex operand which must be
- /// eliminated by this method. This method may modify or replace the
- /// specified instruction, as long as it keeps the iterator pointing at the
- /// finished product. SPAdj is the SP adjustment due to call frame setup
- /// instruction. FIOperandNum is the FI operand number.
+ /// This method must be overriden to eliminate abstract frame indices from
+ /// instructions which may use them. The instruction referenced by the
+ /// iterator contains an MO_FrameIndex operand which must be eliminated by
+ /// this method. This method may modify or replace the specified instruction,
+ /// as long as it keeps the iterator pointing at the finished product.
+ /// SPAdj is the SP adjustment due to call frame setup instruction.
+ /// FIOperandNum is the FI operand number.
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const = 0;
@@ -935,7 +947,6 @@ struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
}
};
-/// PrintReg - Helper class for printing registers on a raw_ostream.
/// Prints virtual and physical registers with or without a TRI instance.
///
/// The format is:
@@ -946,24 +957,10 @@ struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
/// %physreg17 - a physical register when no TRI instance given.
///
/// Usage: OS << PrintReg(Reg, TRI) << '\n';
-///
-class PrintReg {
- const TargetRegisterInfo *TRI;
- unsigned Reg;
- unsigned SubIdx;
-public:
- explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr,
- unsigned subidx = 0)
- : TRI(tri), Reg(reg), SubIdx(subidx) {}
- void print(raw_ostream&) const;
-};
-
-static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) {
- PR.print(OS);
- return OS;
-}
+Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
+ unsigned SubRegIdx = 0);
-/// PrintRegUnit - Helper class for printing register units on a raw_ostream.
+/// Create Printable object to print register units on a \ref raw_ostream.
///
/// Register units are named after their root registers:
///
@@ -971,36 +968,14 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) {
/// FP0~ST7 - Dual roots.
///
/// Usage: OS << PrintRegUnit(Unit, TRI) << '\n';
-///
-class PrintRegUnit {
-protected:
- const TargetRegisterInfo *TRI;
- unsigned Unit;
-public:
- PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri)
- : TRI(tri), Unit(unit) {}
- void print(raw_ostream&) const;
-};
-
-static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) {
- PR.print(OS);
- return OS;
-}
+Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI);
-/// PrintVRegOrUnit - It is often convenient to track virtual registers and
-/// physical register units in the same list.
-class PrintVRegOrUnit : protected PrintRegUnit {
-public:
- PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri)
- : PrintRegUnit(VRegOrUnit, tri) {}
- void print(raw_ostream&) const;
-};
+/// \brief Create Printable object to print virtual registers and physical
+/// registers on a \ref raw_ostream.
+Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI);
-static inline raw_ostream &operator<<(raw_ostream &OS,
- const PrintVRegOrUnit &PR) {
- PR.print(OS);
- return OS;
-}
+/// Create Printable object to print LaneBitmasks on a \ref raw_ostream.
+Printable PrintLaneMask(LaneBitmask LaneMask);
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
index 4abbe37..5654736 100644
--- a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -80,6 +80,11 @@ class SDTCisSameNumEltsAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
int OtherOperandNum = OtherOp;
}
+// SDTCisSameSizeAs - The two specified operands have identical size.
+class SDTCisSameSizeAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
+ int OtherOperandNum = OtherOp;
+}
+
//===----------------------------------------------------------------------===//
// Selection DAG Type Profile definitions.
//
@@ -186,6 +191,10 @@ def SDTBrind : SDTypeProfile<0, 1, [ // brind
SDTCisPtrTy<0>
]>;
+def SDTCatchret : SDTypeProfile<0, 2, [ // catchret
+ SDTCisVT<0, OtherVT>, SDTCisVT<1, OtherVT>
+]>;
+
def SDTNone : SDTypeProfile<0, 0, []>; // ret, trap
def SDTLoad : SDTypeProfile<1, 1, [ // load
@@ -201,11 +210,12 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
]>;
def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store
- SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>
+ SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<1, 2>
]>;
def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load
- SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<0, 2>
]>;
def SDTMaskedGather: SDTypeProfile<2, 3, [ // masked gather
@@ -387,6 +397,7 @@ def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
+def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>;
@@ -412,6 +423,8 @@ def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp>;
def fmaxnum : SDNode<"ISD::FMAXNUM" , SDTFPBinOp>;
+def fminnan : SDNode<"ISD::FMINNAN" , SDTFPBinOp>;
+def fmaxnan : SDNode<"ISD::FMAXNAN" , SDTFPBinOp>;
def fgetsign : SDNode<"ISD::FGETSIGN" , SDTFPToIntOp>;
def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>;
def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>;
@@ -447,6 +460,12 @@ def brcc : SDNode<"ISD::BR_CC" , SDTBrCC, [SDNPHasChain]>;
def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>;
def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>;
def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>;
+def catchret : SDNode<"ISD::CATCHRET" , SDTCatchret,
+ [SDNPHasChain, SDNPSideEffect]>;
+def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTNone, [SDNPHasChain]>;
+def catchpad : SDNode<"ISD::CATCHPAD" , SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def trap : SDNode<"ISD::TRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
def debugtrap : SDNode<"ISD::DEBUGTRAP" , SDTNone,
@@ -513,6 +532,9 @@ def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
[]>;
+
+// vector_extract/vector_insert are deprecated. extractelt/insertelt
+// are preferred.
def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
@@ -523,7 +545,7 @@ def concat_vectors : SDNode<"ISD::CONCAT_VECTORS",
// This operator does not do subvector type checking. The ARM
// backend, at least, needs it.
def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
- SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>,
+ SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>,
[]>;
// This operator does subvector type checking.
@@ -815,6 +837,21 @@ def truncstoref64 : PatFrag<(ops node:$val, node:$ptr),
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f64;
}]>;
+def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
// indexed store fragments.
def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
(ist node:$val, node:$base, node:$offset), [{
@@ -889,6 +926,24 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
}]>;
+// nontemporal store fragments.
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->isNonTemporal();
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (nontemporalstore node:$val, node:$ptr), [{
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ return St->getAlignment() >= St->getMemoryVT().getStoreSize();
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (nontemporalstore node:$val, node:$ptr), [{
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ return St->getAlignment() < St->getMemoryVT().getStoreSize();
+}]>;
+
// setcc convenience fragments.
def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
(setcc node:$lhs, node:$rhs, SETOEQ)>;
diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h b/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h
index 53db5aa..a7143ac 100644
--- a/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -21,7 +21,7 @@
namespace llvm {
//===----------------------------------------------------------------------===//
-/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the
+/// Targets can subclass this to parameterize the
/// SelectionDAG lowering and instruction selection process.
///
class TargetSelectionDAGInfo {
@@ -32,8 +32,8 @@ public:
explicit TargetSelectionDAGInfo() = default;
virtual ~TargetSelectionDAGInfo();
- /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
- /// memcpy. This can be used by targets to provide code sequences for cases
+ /// Emit target-specific code that performs a memcpy.
+ /// This can be used by targets to provide code sequences for cases
/// that don't fit the target's parameters for simple loads/stores and can be
/// more efficient than using a library call. This function can return a null
/// SDValue if the target declines to use custom code and a different
@@ -56,8 +56,8 @@ public:
return SDValue();
}
- /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
- /// memmove. This can be used by targets to provide code sequences for cases
+ /// Emit target-specific code that performs a memmove.
+ /// This can be used by targets to provide code sequences for cases
/// that don't fit the target's parameters for simple loads/stores and can be
/// more efficient than using a library call. This function can return a null
/// SDValue if the target declines to use custom code and a different
@@ -72,8 +72,8 @@ public:
return SDValue();
}
- /// EmitTargetCodeForMemset - Emit target-specific code that performs a
- /// memset. This can be used by targets to provide code sequences for cases
+ /// Emit target-specific code that performs a memset.
+ /// This can be used by targets to provide code sequences for cases
/// that don't fit the target's parameters for simple stores and can be more
/// efficient than using a library call. This function can return a null
/// SDValue if the target declines to use custom code and a different
@@ -87,11 +87,10 @@ public:
return SDValue();
}
- /// EmitTargetCodeForMemcmp - Emit target-specific code that performs a
- /// memcmp, in cases where that is faster than a libcall. The first
- /// returned SDValue is the result of the memcmp and the second is
- /// the chain. Both SDValues can be null if a normal libcall should
- /// be used.
+ /// Emit target-specific code that performs a memcmp, in cases where that is
+ /// faster than a libcall. The first returned SDValue is the result of the
+ /// memcmp and the second is the chain. Both SDValues can be null if a normal
+ /// libcall should be used.
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
@@ -101,11 +100,10 @@ public:
return std::make_pair(SDValue(), SDValue());
}
- /// EmitTargetCodeForMemchr - Emit target-specific code that performs a
- /// memchr, in cases where that is faster than a libcall. The first
- /// returned SDValue is the result of the memchr and the second is
- /// the chain. Both SDValues can be null if a normal libcall should
- /// be used.
+ /// Emit target-specific code that performs a memchr, in cases where that is
+ /// faster than a libcall. The first returned SDValue is the result of the
+ /// memchr and the second is the chain. Both SDValues can be null if a normal
+ /// libcall should be used.
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
SDValue Src, SDValue Char, SDValue Length,
@@ -113,8 +111,8 @@ public:
return std::make_pair(SDValue(), SDValue());
}
- /// EmitTargetCodeForStrcpy - Emit target-specific code that performs a
- /// strcpy or stpcpy, in cases where that is faster than a libcall.
+ /// Emit target-specific code that performs a strcpy or stpcpy, in cases
+ /// where that is faster than a libcall.
/// The first returned SDValue is the result of the copy (the start
/// of the destination string for strcpy, a pointer to the null terminator
/// for stpcpy) and the second is the chain. Both SDValues can be null
@@ -128,11 +126,10 @@ public:
return std::make_pair(SDValue(), SDValue());
}
- /// EmitTargetCodeForStrcmp - Emit target-specific code that performs a
- /// strcmp, in cases where that is faster than a libcall. The first
- /// returned SDValue is the result of the strcmp and the second is
- /// the chain. Both SDValues can be null if a normal libcall should
- /// be used.
+ /// Emit target-specific code that performs a strcmp, in cases where that is
+ /// faster than a libcall.
+ /// The first returned SDValue is the result of the strcmp and the second is
+ /// the chain. Both SDValues can be null if a normal libcall should be used.
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
diff --git a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
index 07c0c66..d50aa49 100644
--- a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_TARGETSUBTARGETINFO_H
#include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CodeGen.h"
@@ -81,6 +82,11 @@ public:
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
return nullptr;
}
+ /// Target can subclass this hook to select a different DAG scheduler.
+ virtual RegisterScheduler::FunctionPassCtor
+ getDAGScheduler(CodeGenOpt::Level) const {
+ return nullptr;
+ }
/// getRegisterInfo - If register information is available, return it. If
/// not, return null. This is kept separate from RegInfo until RegInfo has
diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h
index 2ea4730..0c374a0 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@@ -16,9 +16,11 @@
#define LLVM_TRANSFORMS_IPO_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
namespace llvm {
+class FunctionInfoIndex;
class ModulePass;
class Pass;
class Function;
@@ -85,6 +87,10 @@ ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool
deleteFn = false);
//===----------------------------------------------------------------------===//
+/// This pass performs iterative function importing from other modules.
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr);
+
+//===----------------------------------------------------------------------===//
/// createFunctionInliningPass - Return a new pass object that uses a heuristic
/// to inline direct function calls to small functions.
///
@@ -209,6 +215,15 @@ ModulePass *createBarrierNoopPass();
/// to bitsets.
ModulePass *createLowerBitSetsPass();
+/// \brief This pass export CFI checks for use by external modules.
+ModulePass *createCrossDSOCFIPass();
+
+//===----------------------------------------------------------------------===//
+// SampleProfilePass - Loads sample profile data from disk and generates
+// IR metadata to reflect the profile.
+ModulePass *createSampleProfileLoaderPass();
+ModulePass *createSampleProfileLoaderPass(StringRef Name);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/contrib/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
new file mode 100644
index 0000000..0ff4afe
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -0,0 +1,35 @@
+//===-- ForceFunctionAttrs.h - Force function attrs for debugging ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Super simple passes to force specific function attrs from the commandline
+/// into the IR for debugging purposes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
+#define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Pass which forces specific function attributes into the IR, primarily as
+/// a debugging tool.
+class ForceFunctionAttrsPass {
+public:
+ static StringRef name() { return "ForceFunctionAttrsPass"; }
+ PreservedAnalyses run(Module &M);
+};
+
+/// Create a legacy pass manager instance of a pass to force function attrs.
+Pass *createForceFunctionAttrsLegacyPass();
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
new file mode 100644
index 0000000..d770779
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -0,0 +1,43 @@
+//===- llvm/Transforms/IPO/FunctionImport.h - ThinLTO importing -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUNCTIONIMPORT_H
+#define LLVM_FUNCTIONIMPORT_H
+
+#include "llvm/ADT/StringMap.h"
+#include <functional>
+
+namespace llvm {
+class LLVMContext;
+class Module;
+class FunctionInfoIndex;
+
+/// The function importer is automatically importing function from other modules
+/// based on the provided summary informations.
+class FunctionImporter {
+
+ /// The summaries index used to trigger importing.
+ const FunctionInfoIndex &Index;
+
+ /// Factory function to load a Module for a given identifier
+ std::function<std::unique_ptr<Module>(StringRef Identifier)> ModuleLoader;
+
+public:
+ /// Create a Function Importer.
+ FunctionImporter(
+ const FunctionInfoIndex &Index,
+ std::function<std::unique_ptr<Module>(StringRef Identifier)> ModuleLoader)
+ : Index(Index), ModuleLoader(ModuleLoader) {}
+
+ /// Import functions in Module \p M based on the summary informations.
+ bool importFunctions(Module &M);
+};
+}
+
+#endif // LLVM_FUNCTIONIMPORT_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
new file mode 100644
index 0000000..80afc02
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -0,0 +1,38 @@
+//===-- InferFunctionAttrs.h - Infer implicit function attributes ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Interfaces for passes which infer implicit function attributes from the
+/// name and signature of function declarations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
+#define LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A pass which infers function attributes from the names and signatures of
+/// function declarations in a module.
+class InferFunctionAttrsPass {
+public:
+ static StringRef name() { return "InferFunctionAttrsPass"; }
+ PreservedAnalyses run(Module &M, AnalysisManager<Module> *AM);
+};
+
+/// Create a legacy pass manager instance of a pass to infer function
+/// attributes.
+Pass *createInferFunctionAttrsLegacyPass();
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
index 6a644ad..58ef0cb 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
@@ -20,11 +20,11 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
namespace llvm {
- class CallSite;
- class DataLayout;
- class InlineCost;
- template<class PtrType, unsigned SmallSize>
- class SmallPtrSet;
+class AssumptionCacheTracker;
+class CallSite;
+class DataLayout;
+class InlineCost;
+template <class PtrType, unsigned SmallSize> class SmallPtrSet;
/// Inliner - This class contains all of the helper code which is used to
/// perform the inlining operations that do not depend on the policy.
@@ -84,6 +84,9 @@ private:
/// shouldInline - Return true if the inliner should attempt to
/// inline at the given CallSite.
bool shouldInline(CallSite CS);
+
+protected:
+ AssumptionCacheTracker *ACT;
};
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/LowerBitSets.h b/contrib/llvm/include/llvm/Transforms/IPO/LowerBitSets.h
index 55d7d84..e5fb7b9 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/LowerBitSets.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/LowerBitSets.h
@@ -26,8 +26,9 @@
namespace llvm {
class DataLayout;
-class GlobalVariable;
+class GlobalObject;
class Value;
+class raw_ostream;
struct BitSetInfo {
// The indices of the set bits in the bitset.
@@ -55,8 +56,10 @@ struct BitSetInfo {
bool containsGlobalOffset(uint64_t Offset) const;
bool containsValue(const DataLayout &DL,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout,
Value *V, uint64_t COffset = 0) const;
+
+ void print(raw_ostream &OS) const;
};
struct BitSetBuilder {
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 1334dd0..a4e7bce 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -15,9 +15,11 @@
#ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
#define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
+#include <memory>
#include <vector>
namespace llvm {
+class FunctionInfoIndex;
class Pass;
class TargetLibraryInfoImpl;
class TargetMachine;
@@ -81,6 +83,11 @@ public:
/// run after everything else.
EP_OptimizerLast,
+ /// EP_VectorizerStart - This extension point allows adding optimization
+ /// passes before the vectorizer and other highly target specific
+ /// optimization passes are executed.
+ EP_VectorizerStart,
+
/// EP_EnabledOnOptLevel0 - This extension point allows adding passes that
/// should not be disabled by O0 optimization level. The passes will be
/// inserted after the inlining pass.
@@ -109,6 +116,9 @@ public:
/// added to the per-module passes.
Pass *Inliner;
+ /// The function summary index to use for function importing.
+ const FunctionInfoIndex *FunctionIndex;
+
bool DisableTailCalls;
bool DisableUnitAtATime;
bool DisableUnrollLoops;
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/contrib/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h
new file mode 100644
index 0000000..9dddd12
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h
@@ -0,0 +1,34 @@
+//===-- StripDeadPrototypes.h - Remove unused function declarations -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
+#define LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Pass to remove unused function declarations.
+class StripDeadPrototypesPass {
+public:
+ static StringRef name() { return "StripDeadPrototypesPass"; }
+ PreservedAnalyses run(Module &M);
+};
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
diff --git a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index a6bad34..5d2b2d0 100644
--- a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -60,13 +60,13 @@ public:
/// AddInitialGroup - Add the specified batch of stuff in reverse order.
/// which should only be done when the worklist is empty and when the group
/// has no duplicates.
- void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+ void AddInitialGroup(ArrayRef<Instruction *> List) {
assert(Worklist.empty() && "Worklist must be empty to add initial group");
- Worklist.reserve(NumEntries+16);
- WorklistMap.resize(NumEntries);
- DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
- for (unsigned Idx = 0; NumEntries; --NumEntries) {
- Instruction *I = List[NumEntries-1];
+ Worklist.reserve(List.size()+16);
+ WorklistMap.resize(List.size());
+ DEBUG(dbgs() << "IC: ADDING: " << List.size() << " instrs to worklist\n");
+ unsigned Idx = 0;
+ for (Instruction *I : reverse(List)) {
WorklistMap.insert(std::make_pair(I, Idx++));
Worklist.push_back(I);
}
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
index 250e389..38dfeb0 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
@@ -15,6 +15,7 @@
#define LLVM_TRANSFORMS_INSTRUMENTATION_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
#include <vector>
#if defined(__GNUC__) && defined(__linux__) && !defined(ANDROID)
@@ -33,6 +34,16 @@ inline void *getDFSanRetValTLSPtrForJIT() {
namespace llvm {
+class TargetMachine;
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator PrepareToSplitEntryBlock(BasicBlock &BB,
+ BasicBlock::iterator IP);
+
class ModulePass;
class FunctionPass;
@@ -68,6 +79,11 @@ struct GCOVOptions {
ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
GCOVOptions::getDefault());
+// PGO Instrumention
+ModulePass *createPGOInstrumentationGenPass();
+ModulePass *
+createPGOInstrumentationUsePass(StringRef Filename = StringRef(""));
+
/// Options for the frontend instrumentation based profiling pass.
struct InstrProfOptions {
InstrProfOptions() : NoRedZone(false) {}
@@ -84,8 +100,10 @@ ModulePass *createInstrProfilingPass(
const InstrProfOptions &Options = InstrProfOptions());
// Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false);
-ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false);
+FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
+ bool Recover = false);
+ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
+ bool Recover = false);
// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0);
@@ -134,7 +152,25 @@ FunctionPass *createBoundsCheckingPass();
/// \brief This pass splits the stack into a safe stack and an unsafe stack to
/// protect against stack-based overflow vulnerabilities.
-FunctionPass *createSafeStackPass();
+FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr);
+
+/// \brief Calculate what to divide by to scale counts.
+///
+/// Given the maximum count, calculate a divisor that will scale all the
+/// weights to strictly less than UINT32_MAX.
+static inline uint64_t calculateCountScale(uint64_t MaxCount) {
+ return MaxCount < UINT32_MAX ? 1 : MaxCount / UINT32_MAX + 1;
+}
+
+/// \brief Scale an individual branch count.
+///
+/// Scale a 64-bit weight down to 32-bits using \c Scale.
+///
+static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
+ uint64_t Scaled = Count / Scale;
+ assert(Scaled <= UINT32_MAX && "overflow 32-bits");
+ return Scaled;
+}
} // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h
index 4676c95..9173de1 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar.h
@@ -93,7 +93,7 @@ FunctionPass *createBitTrackingDCEPass();
//
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
//
-FunctionPass *createSROAPass(bool RequiresDomTree = true);
+FunctionPass *createSROAPass();
//===----------------------------------------------------------------------===//
//
@@ -161,7 +161,8 @@ Pass *createLoopStrengthReducePass();
// It can also be configured to focus on size optimizations only.
//
Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
- bool OnlyOptimizeForSize = false);
+ bool OnlyOptimizeForSize = false,
+ bool MergeExternalByDefault = false);
//===----------------------------------------------------------------------===//
//
@@ -407,13 +408,6 @@ FunctionPass *createPartiallyInlineLibCallsPass();
//===----------------------------------------------------------------------===//
//
-// SampleProfilePass - Loads sample profile data from disk and generates
-// IR metadata to reflect the profile.
-FunctionPass *createSampleProfileLoaderPass();
-FunctionPass *createSampleProfileLoaderPass(StringRef Name);
-
-//===----------------------------------------------------------------------===//
-//
// ScalarizerPass - Converts vector operations into scalar operations
//
FunctionPass *createScalarizerPass();
@@ -486,6 +480,12 @@ FunctionPass *createNaryReassociatePass();
//
FunctionPass *createLoopDistributePass();
+//===----------------------------------------------------------------------===//
+//
+// LoopLoadElimination - Perform loop-aware load elimination.
+//
+FunctionPass *createLoopLoadEliminationPass();
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/ADCE.h b/contrib/llvm/include/llvm/Transforms/Scalar/ADCE.h
new file mode 100644
index 0000000..f9bc7b7
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/ADCE.h
@@ -0,0 +1,38 @@
+//===- ADCE.h - Aggressive dead code elimination --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the Aggressive Dead Code Elimination
+// pass. This pass optimistically assumes that all instructions are dead until
+// proven otherwise, allowing it to eliminate dead computations that other DCE
+// passes do not catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H
+#define LLVM_TRANSFORMS_SCALAR_ADCE_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A DCE pass that assumes instructions are dead until proven otherwise.
+///
+/// This pass eliminates dead code by optimistically assuming that all
+/// instructions are dead until proven otherwise. This allows it to eliminate
+/// dead computations that other DCE passes do not catch, particularly involving
+/// loop computations.
+class ADCEPass {
+public:
+ static StringRef name() { return "ADCEPass"; }
+ PreservedAnalyses run(Function &F);
+};
+}
+
+#endif // LLVM_TRANSFORMS_SCALAR_ADCE_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h b/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h
new file mode 100644
index 0000000..f90cc7b
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -0,0 +1,129 @@
+//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for LLVM's Scalar Replacement of
+/// Aggregates pass. This pass provides both aggregate splitting and the
+/// primary SSA formation used in the compiler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SROA_H
+#define LLVM_TRANSFORMS_SCALAR_SROA_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A private "module" namespace for types and utilities used by SROA. These
+/// are implementation details and should not be used by clients.
+namespace sroa {
+class AllocaSliceRewriter;
+class AllocaSlices;
+class Partition;
+class SROALegacyPass;
+}
+
+/// \brief An optimization pass providing Scalar Replacement of Aggregates.
+///
+/// This pass takes allocations which can be completely analyzed (that is, they
+/// don't escape) and tries to turn them into scalar SSA values. There are
+/// a few steps to this process.
+///
+/// 1) It takes allocations of aggregates and analyzes the ways in which they
+/// are used to try to split them into smaller allocations, ideally of
+/// a single scalar data type. It will split up memcpy and memset accesses
+/// as necessary and try to isolate individual scalar accesses.
+/// 2) It will transform accesses into forms which are suitable for SSA value
+/// promotion. This can be replacing a memset with a scalar store of an
+/// integer value, or it can involve speculating operations on a PHI or
+/// select to be a PHI or select of the results.
+/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
+/// onto insert and extract operations on a vector value, and convert them to
+/// this form. By doing so, it will enable promotion of vector aggregates to
+/// SSA vector values.
+class SROA {
+ LLVMContext *C;
+ DominatorTree *DT;
+ AssumptionCache *AC;
+
+ /// \brief Worklist of alloca instructions to simplify.
+ ///
+ /// Each alloca in the function is added to this. Each new alloca formed gets
+ /// added to it as well to recursively simplify unless that alloca can be
+ /// directly promoted. Finally, each time we rewrite a use of an alloca other
+ /// the one being actively rewritten, we add it back onto the list if not
+ /// already present to ensure it is re-visited.
+ SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
+
+ /// \brief A collection of instructions to delete.
+ /// We try to batch deletions to simplify code and make things a bit more
+ /// efficient.
+ SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
+
+ /// \brief Post-promotion worklist.
+ ///
+ /// Sometimes we discover an alloca which has a high probability of becoming
+ /// viable for SROA after a round of promotion takes place. In those cases,
+ /// the alloca is enqueued here for re-processing.
+ ///
+ /// Note that we have to be very careful to clear allocas out of this list in
+ /// the event they are deleted.
+ SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
+
+ /// \brief A collection of alloca instructions we can directly promote.
+ std::vector<AllocaInst *> PromotableAllocas;
+
+ /// \brief A worklist of PHIs to speculate prior to promoting allocas.
+ ///
+ /// All of these PHIs have been checked for the safety of speculation and by
+ /// being speculated will allow promoting allocas currently in the promotable
+ /// queue.
+ SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
+
+ /// \brief A worklist of select instructions to speculate prior to promoting
+ /// allocas.
+ ///
+ /// All of these select instructions have been checked for the safety of
+ /// speculation and by being speculated will allow promoting allocas
+ /// currently in the promotable queue.
+ SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
+
+public:
+ SROA() : C(nullptr), DT(nullptr), AC(nullptr) {}
+
+ static StringRef name() { return "SROA"; }
+
+ /// \brief Run the pass over the function.
+ PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+
+private:
+ friend class sroa::AllocaSliceRewriter;
+ friend class sroa::SROALegacyPass;
+
+ /// Helper used by both the public run method and by the legacy pass.
+ PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC);
+
+ bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS);
+ AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS,
+ sroa::Partition &P);
+ bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS);
+ bool runOnAlloca(AllocaInst &AI);
+ void clobberUse(Use &U);
+ void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
+ bool promoteAllocas(Function &F);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 9b919b6..13c856d 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -22,7 +22,6 @@
namespace llvm {
-class AliasAnalysis;
class MemoryDependenceAnalysis;
class DominatorTree;
class LoopInfo;
@@ -40,7 +39,7 @@ void DeleteDeadBlock(BasicBlock *BB);
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA = nullptr,
+void FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceAnalysis *MemDep = nullptr);
/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
@@ -54,7 +53,6 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr);
/// if possible. The return value indicates success or failure.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT = nullptr,
LoopInfo *LI = nullptr,
- AliasAnalysis *AA = nullptr,
MemoryDependenceAnalysis *MemDep = nullptr);
// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
@@ -82,27 +80,15 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
/// This provides a builder interface for overriding the default options used
/// during critical edge splitting.
struct CriticalEdgeSplittingOptions {
- AliasAnalysis *AA;
DominatorTree *DT;
LoopInfo *LI;
bool MergeIdenticalEdges;
bool DontDeleteUselessPHIs;
bool PreserveLCSSA;
- CriticalEdgeSplittingOptions()
- : AA(nullptr), DT(nullptr), LI(nullptr), MergeIdenticalEdges(false),
- DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
-
- /// \brief Basic case of setting up all the analysis.
- CriticalEdgeSplittingOptions(AliasAnalysis *AA, DominatorTree *DT = nullptr,
+ CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr,
LoopInfo *LI = nullptr)
- : AA(AA), DT(DT), LI(LI), MergeIdenticalEdges(false),
- DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
-
- /// \brief A common pattern is to preserve the dominator tree and loop
- /// info but not care about AA.
- CriticalEdgeSplittingOptions(DominatorTree *DT, LoopInfo *LI)
- : AA(nullptr), DT(DT), LI(LI), MergeIdenticalEdges(false),
+ : DT(DT), LI(LI), MergeIdenticalEdges(false),
DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
@@ -214,15 +200,13 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
/// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more
/// details on this case.
///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
-/// In particular, it does not preserve LoopSimplify (because it's
-/// complicated to handle the case where one of the edges being split
-/// is an exit of a loop with other exits).
+/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but
+/// no other analyses. In particular, it does not preserve LoopSimplify
+/// (because it's complicated to handle the case where one of the edges being
+/// split is an exit of a loop with other exits).
///
BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix,
- AliasAnalysis *AA = nullptr,
DominatorTree *DT = nullptr,
LoopInfo *LI = nullptr,
bool PreserveLCSSA = false);
@@ -234,17 +218,15 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
/// OrigBB is clone into both of the new basic blocks. The new blocks are given
/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
-/// it does not preserve LoopSimplify (because it's complicated to handle the
-/// case where one of the edges being split is an exit of a loop with other
-/// exits).
+/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but
+/// no other analyses. In particular, it does not preserve LoopSimplify
+/// (because it's complicated to handle the case where one of the edges being
+/// split is an exit of a loop with other exits).
///
void SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
- AliasAnalysis *AA = nullptr,
DominatorTree *DT = nullptr,
LoopInfo *LI = nullptr,
bool PreserveLCSSA = false);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
index 2caa9a2..92a1d52 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -20,9 +20,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <functional>
namespace llvm {
@@ -43,14 +45,21 @@ class DataLayout;
class Loop;
class LoopInfo;
class AllocaInst;
-class AliasAnalysis;
class AssumptionCacheTracker;
class DominatorTree;
-/// CloneModule - Return an exact copy of the specified module
+/// Return an exact copy of the specified module
///
-Module *CloneModule(const Module *M);
-Module *CloneModule(const Module *M, ValueToValueMapTy &VMap);
+std::unique_ptr<Module> CloneModule(const Module *M);
+std::unique_ptr<Module> CloneModule(const Module *M, ValueToValueMapTy &VMap);
+
+/// Return a copy of the specified module. The ShouldCloneDefinition function
+/// controls whether a specific GlobalValue's definition is cloned. If the
+/// function returns false, the module copy will contain an external reference
+/// in place of the global definition.
+std::unique_ptr<Module>
+CloneModule(const Module *M, ValueToValueMapTy &VMap,
+ std::function<bool(const GlobalValue *)> ShouldCloneDefinition);
/// ClonedCodeInfo - This struct can be used to capture information about code
/// being cloned, while it is being cloned.
@@ -65,6 +74,11 @@ struct ClonedCodeInfo {
/// size.
bool ContainsDynamicAllocas;
+ /// All cloned call sites that have operand bundles attached are appended to
+ /// this vector. This vector may contain nulls or undefs if some of the
+ /// originally inserted callsites were DCE'ed after they were cloned.
+ std::vector<WeakVH> OperandBundleCallSites;
+
ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {}
};
@@ -193,14 +207,12 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
class InlineFunctionInfo {
public:
explicit InlineFunctionInfo(CallGraph *cg = nullptr,
- AliasAnalysis *AA = nullptr,
AssumptionCacheTracker *ACT = nullptr)
- : CG(cg), AA(AA), ACT(ACT) {}
+ : CG(cg), ACT(ACT) {}
/// CG - If non-null, InlineFunction will update the callgraph to reflect the
/// changes it makes.
CallGraph *CG;
- AliasAnalysis *AA;
AssumptionCacheTracker *ACT;
/// StaticAllocas - InlineFunction fills this in with all static allocas that
@@ -228,11 +240,11 @@ public:
/// function by one level.
///
bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
- bool InsertLifetime = true);
+ AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
- bool InsertLifetime = true);
+ AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- bool InsertLifetime = true);
+ AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
/// Blocks.
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
index a1bb367a..81b376f 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
@@ -15,6 +15,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
#define LLVM_TRANSFORMS_UTILS_LOCAL_H
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -40,7 +41,6 @@ class DataLayout;
class TargetLibraryInfo;
class TargetTransformInfo;
class DIBuilder;
-class AliasAnalysis;
class DominatorTree;
template<typename T> class SmallVectorImpl;
@@ -271,11 +271,34 @@ bool LowerDbgDeclare(Function &F);
/// an alloca, if any.
DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
-/// \brief Replaces llvm.dbg.declare instruction when an alloca is replaced with
-/// a new value. If Deref is true, tan additional DW_OP_deref is prepended to
-/// the expression.
+/// \brief Replaces llvm.dbg.declare instruction when the address it describes
+/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
+/// prepended to the expression. If Offset is non-zero, a constant displacement
+/// is added to the expression (after the optional Deref). Offset can be
+/// negative.
+bool replaceDbgDeclare(Value *Address, Value *NewAddress,
+ Instruction *InsertBefore, DIBuilder &Builder,
+ bool Deref, int Offset);
+
+/// \brief Replaces llvm.dbg.declare instruction when the alloca it describes
+/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
+/// prepended to the expression. If Offset is non-zero, a constant displacement
+/// is added to the expression (after the optional Deref). Offset can be
+/// negative. New llvm.dbg.declare is inserted immediately before AI.
bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool Deref);
+ DIBuilder &Builder, bool Deref, int Offset = 0);
+
+/// \brief Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+void changeToUnreachable(Instruction *I, bool UseLLVMTrap);
+
+/// Replace 'BB's terminator with one that does not have an unwind successor
+/// block. Rewrites `invoke` to `call`, etc. Updates any PHIs in unwind
+/// successor.
+///
+/// \param BB Block whose terminator will be replaced. Its terminator must
+/// have an unwind successor.
+void removeUnwindEdge(BasicBlock *BB);
/// \brief Remove all blocks that can not be reached from the function's entry.
///
@@ -291,6 +314,22 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> Kn
/// the given edge. Returns the number of replacements made.
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
const BasicBlockEdge &Edge);
+/// \brief Replace each use of 'From' with 'To' if that use is dominated by
+/// the given BasicBlock. Returns the number of replacements made.
+unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
+ const BasicBlock *BB);
+
+
+/// \brief Return true if the CallSite CS calls a gc leaf function.
+///
+/// A leaf function is a function that does not safepoint the thread during its
+/// execution. During a call or invoke to such a function, the callers stack
+/// does not have to be made parseable.
+///
+/// Most passes can and should ignore this information, and it is only used
+/// during lowering by the GC infrastructure.
+bool callsGCLeafFunction(ImmutableCallSite CS);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 15747bc..17aaee0 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -15,11 +15,11 @@
#define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
namespace llvm {
-class AliasAnalysis;
class AliasSet;
class AliasSetTracker;
class AssumptionCache;
@@ -85,24 +85,35 @@ public:
RecurrenceDescriptor()
: StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence),
- MinMaxKind(MRK_Invalid) {}
+ MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr),
+ RecurrenceType(nullptr), IsSigned(false) {}
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
- MinMaxRecurrenceKind MK)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
+ MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
+ bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
+ UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+ CastInsts.insert(CI.begin(), CI.end());
+ }
/// This POD struct holds information about a potential recurrence operation.
class InstDesc {
public:
- InstDesc(bool IsRecur, Instruction *I)
- : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+ InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
+ : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
+ UnsafeAlgebraInst(UAI) {}
- InstDesc(Instruction *I, MinMaxRecurrenceKind K)
- : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K) {}
+ InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
+ : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
+ UnsafeAlgebraInst(UAI) {}
bool isRecurrence() { return IsRecurrence; }
+ bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+ Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
Instruction *getPatternInst() { return PatternLastInst; }
@@ -115,6 +126,8 @@ public:
Instruction *PatternLastInst;
// If this is a min/max pattern the comparison predicate.
MinMaxRecurrenceKind MinMaxKind;
+ // Recurrence has unsafe algebra.
+ Instruction *UnsafeAlgebraInst;
};
/// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -125,7 +138,7 @@ public:
static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr);
- /// Returns true if instuction I has multiple uses in Insts
+ /// Returns true if instruction I has multiple uses in Insts
static bool hasMultipleUsesOf(Instruction *I,
SmallPtrSetImpl<Instruction *> &Insts);
@@ -167,6 +180,51 @@ public:
Instruction *getLoopExitInstr() { return LoopExitInstr; }
+ /// Returns true if the recurrence has unsafe algebra which requires a relaxed
+ /// floating-point model.
+ bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+ /// Returns first unsafe algebra instruction in the PHI node's use-chain.
+ Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
+ /// Returns true if the recurrence kind is an integer kind.
+ static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
+
+ /// Returns true if the recurrence kind is a floating point kind.
+ static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
+
+ /// Returns true if the recurrence kind is an arithmetic kind.
+ static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
+
+ /// Determines if Phi may have been type-promoted. If Phi has a single user
+ /// that ANDs the Phi with a type mask, return the user. RT is updated to
+ /// account for the narrower bit width represented by the mask, and the AND
+ /// instruction is added to CI.
+ static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI);
+
+ /// Returns true if all the source operands of a recurrence are either
+ /// SExtInsts or ZExtInsts. This function is intended to be used with
+ /// lookThroughAnd to determine if the recurrence has been type-promoted. The
+ /// source operands are added to CI, and IsSigned is updated to indicate if
+ /// all source operands are SExtInsts.
+ static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
+ Type *RT, bool &IsSigned,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI);
+
+ /// Returns the type of the recurrence. This type can be narrower than the
+ /// actual type of the Phi if the recurrence has been type-promoted.
+ Type *getRecurrenceType() { return RecurrenceType; }
+
+ /// Returns a reference to the instructions used for type-promoting the
+ /// recurrence.
+ SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
+
+ /// Returns true if all source operands of the recurrence are SExtInsts.
+ bool isSigned() { return IsSigned; }
+
private:
// The starting value of the recurrence.
// It does not have to be zero!
@@ -177,19 +235,74 @@ private:
RecurrenceKind Kind;
// If this a min/max recurrence the kind of recurrence.
MinMaxRecurrenceKind MinMaxKind;
+ // First occurance of unasfe algebra in the PHI's use-chain.
+ Instruction *UnsafeAlgebraInst;
+ // The type of the recurrence.
+ Type *RecurrenceType;
+ // True if all source operands of the recurrence are SExtInsts.
+ bool IsSigned;
+ // Instructions used for type-promoting the recurrence.
+ SmallPtrSet<Instruction *, 8> CastInsts;
+};
+
+/// A struct for saving information about induction variables.
+class InductionDescriptor {
+public:
+ /// This enum represents the kinds of inductions that we support.
+ enum InductionKind {
+ IK_NoInduction, ///< Not an induction variable.
+ IK_IntInduction, ///< Integer induction variable. Step = C.
+ IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
+ };
+
+public:
+ /// Default constructor - creates an invalid induction.
+ InductionDescriptor()
+ : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+
+ /// Get the consecutive direction. Returns:
+ /// 0 - unknown or non-consecutive.
+ /// 1 - consecutive and increasing.
+ /// -1 - consecutive and decreasing.
+ int getConsecutiveDirection() const;
+
+ /// Compute the transformed value of Index at offset StartValue using step
+ /// StepValue.
+ /// For integer induction, returns StartValue + Index * StepValue.
+ /// For pointer induction, returns StartValue[Index * StepValue].
+ /// FIXME: The newly created binary instructions should contain nsw/nuw
+ /// flags, which can be found from the original scalar operations.
+ Value *transform(IRBuilder<> &B, Value *Index) const;
+
+ Value *getStartValue() const { return StartValue; }
+ InductionKind getKind() const { return IK; }
+ ConstantInt *getStepValue() const { return StepValue; }
+
+ static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+ InductionDescriptor &D);
+
+private:
+ /// Private constructor - used by \c isInductionPHI.
+ InductionDescriptor(Value *Start, InductionKind K, ConstantInt *Step);
+
+ /// Start value.
+ TrackingVH<Value> StartValue;
+ /// Induction kind.
+ InductionKind IK;
+ /// Step value.
+ ConstantInt *StepValue;
};
-BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P);
+BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA);
/// \brief Simplify each loop in a loop nest recursively.
///
/// This takes a potentially un-simplified loop L (and its children) and turns
-/// it into a simplified loop nest with preheaders and single backedges. It
-/// will optionally update \c AliasAnalysis and \c ScalarEvolution analyses if
-/// passed into it.
-bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
- AliasAnalysis *AA = nullptr, ScalarEvolution *SE = nullptr,
- AssumptionCache *AC = nullptr);
+/// it into a simplified loop nest with preheaders and single backedges. It will
+/// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null.
+bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+ AssumptionCache *AC, bool PreserveLCSSA);
/// \brief Put loop into LCSSA form.
///
@@ -203,7 +316,7 @@ bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
///
/// Returns true if any modifications are made to the loop.
bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE = nullptr);
+ ScalarEvolution *SE);
/// \brief Put a loop nest into LCSSA form.
///
@@ -215,7 +328,7 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
///
/// Returns true if any modifications are made to the loop.
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE = nullptr);
+ ScalarEvolution *SE);
/// \brief Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in
@@ -242,10 +355,10 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
/// \brief Try to promote memory values to scalars by sinking stores out of
/// the loop and moving loads to before the loop. We do this by looping over
-/// the stores in the loop, looking for stores to Must pointers which are
+/// the stores in the loop, looking for stores to Must pointers which are
/// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks
/// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop,
-/// AliasSet information for all instructions of the loop and loop safety
+/// AliasSet information for all instructions of the loop and loop safety
/// information as arguments. It returns changed status.
bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
SmallVectorImpl<Instruction*> &,
@@ -254,15 +367,13 @@ bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
LICMSafetyInfo *);
/// \brief Computes safety information for a loop
-/// checks loop body & header for the possiblity of may throw
+/// checks loop body & header for the possibility of may throw
/// exception, it takes LICMSafetyInfo and loop as argument.
/// Updates safety information in LICMSafetyInfo argument.
void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *);
-/// \brief Checks if the given PHINode in a loop header is an induction
-/// variable. Returns true if this is an induction PHI along with the step
-/// value.
-bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&);
+/// \brief Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
}
#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
index 009fba4..3b70594 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -16,13 +16,17 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
#define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
namespace llvm {
class Loop;
class LoopAccessInfo;
class LoopInfo;
+class ScalarEvolution;
/// \brief This class emits a version of the loop where run-time checks ensure
/// that may-alias pointers can't overlap.
@@ -31,13 +35,13 @@ class LoopInfo;
/// already has a preheader.
class LoopVersioning {
public:
+ /// \brief Expects LoopAccessInfo, Loop, LoopInfo, DominatorTree as input.
+ /// It uses runtime check provided by the user. If \p UseLAIChecks is true,
+ /// we will retain the default checks made by LAI. Otherwise, construct an
+ /// object having no checks and we expect the user to add them.
LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT,
- const SmallVector<int, 8> *PtrToPartition = nullptr);
-
- /// \brief Returns true if we need memchecks to disambiguate may-aliasing
- /// accesses.
- bool needsRuntimeChecks() const;
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool UseLAIChecks = true);
/// \brief Performs the CFG manipulation part of versioning the loop including
/// the DominatorTree and LoopInfo updates.
@@ -52,15 +56,11 @@ public:
/// analyze L
/// if versioning is necessary version L
/// transform L
- void versionLoop(Pass *P);
+ void versionLoop() { versionLoop(findDefsUsedOutsideOfLoop(VersionedLoop)); }
- /// \brief Adds the necessary PHI nodes for the versioned loops based on the
- /// loop-defined values used outside of the loop.
- ///
- /// This needs to be called after versionLoop if there are defs in the loop
- /// that are used outside the loop. FIXME: this should be invoked internally
- /// by versionLoop and made private.
- void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
+ /// \brief Same but if the client has already precomputed the set of values
+ /// used outside the loop, this API will allows passing that.
+ void versionLoop(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
/// \brief Returns the versioned loop. Control flows here if pointers in the
/// loop don't alias (i.e. all memchecks passed). (This loop is actually the
@@ -71,7 +71,21 @@ public:
/// loop may alias (i.e. one of the memchecks failed).
Loop *getNonVersionedLoop() { return NonVersionedLoop; }
+ /// \brief Sets the runtime alias checks for versioning the loop.
+ void setAliasChecks(
+ const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks);
+
+ /// \brief Sets the runtime SCEV checks for versioning the loop.
+ void setSCEVChecks(SCEVUnionPredicate Check);
+
private:
+ /// \brief Adds the necessary PHI nodes for the versioned loops based on the
+ /// loop-defined values used outside of the loop.
+ ///
+ /// This needs to be called after versionLoop if there are defs in the loop
+ /// that are used outside the loop.
+ void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
+
/// \brief The original loop. This becomes the "versioned" one. I.e.,
/// control flows here if pointers in the loop don't alias.
Loop *VersionedLoop;
@@ -79,21 +93,21 @@ private:
/// loop may alias (memchecks failed).
Loop *NonVersionedLoop;
- /// \brief For each memory pointer it contains the partitionId it is used in.
- /// If nullptr, no partitioning is used.
- ///
- /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
- /// If the pointer is used in multiple partitions the entry is set to -1.
- const SmallVector<int, 8> *PtrToPartition;
-
/// \brief This maps the instructions from VersionedLoop to their counterpart
/// in NonVersionedLoop.
ValueToValueMapTy VMap;
+ /// \brief The set of alias checks that we are versioning for.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> AliasChecks;
+
+ /// \brief The set of SCEV checks that we are versioning for.
+ SCEVUnionPredicate Preds;
+
/// \brief Analyses used.
const LoopAccessInfo &LAI;
LoopInfo *LI;
DominatorTree *DT;
+ ScalarEvolution *SE;
};
}
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 622265b..0f23d34 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -15,6 +15,7 @@
#define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include <utility> // for std::pair
namespace llvm {
@@ -56,7 +57,8 @@ Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
/// respectively.
std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
- ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs);
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ StringRef VersionCheckName = StringRef());
} // End llvm namespace
#endif // LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index ed0841c..425ecd3 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -378,7 +378,7 @@ public:
void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) {
for (typename BlkT::iterator BBI = BB->begin(), BBE = BB->end();
BBI != BBE; ++BBI) {
- PhiT *SomePHI = Traits::InstrIsPHI(BBI);
+ PhiT *SomePHI = Traits::InstrIsPHI(&*BBI);
if (!SomePHI)
break;
if (CheckIfPHIMatches(SomePHI)) {
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index dcb1d67..3c55e64 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -25,7 +25,7 @@ class CastInst;
class DominatorTree;
class IVUsers;
class Loop;
-class LPPassManager;
+class LoopInfo;
class PHINode;
class ScalarEvolution;
@@ -57,13 +57,14 @@ public:
/// simplifyUsersOfIV - Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVVisitor *V = nullptr);
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+ IVVisitor *V = nullptr);
/// SimplifyLoopIVs - Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
-bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead);
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead);
} // namespace llvm
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 4115960..410a075 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -131,8 +131,11 @@ private:
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
Value *optimizeFabs(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeLog(CallInst *CI, IRBuilder<> &B);
Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B);
Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeTan(CallInst *CI, IRBuilder<> &B);
// Integer Library Call Optimizations
Value *optimizeFFS(CallInst *CI, IRBuilder<> &B);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SplitModule.h b/contrib/llvm/include/llvm/Transforms/Utils/SplitModule.h
new file mode 100644
index 0000000..7d896d1
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SplitModule.h
@@ -0,0 +1,43 @@
+//===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
+#define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
+
+#include <functional>
+#include <memory>
+
+namespace llvm {
+
+class Module;
+class StringRef;
+
+/// Splits the module M into N linkable partitions. The function ModuleCallback
+/// is called N times passing each individual partition as the MPart argument.
+///
+/// FIXME: This function does not deal with the somewhat subtle symbol
+/// visibility issues around module splitting, including (but not limited to):
+///
+/// - Internal symbols should not collide with symbols defined outside the
+/// module.
+/// - Internal symbols defined in module-level inline asm should be visible to
+/// each partition.
+void SplitModule(
+ std::unique_ptr<Module> M, unsigned N,
+ std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback);
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 7f2cf8d7..710817c 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -21,20 +21,23 @@
namespace llvm {
class AssumptionCache;
+class DominatorTree;
class Loop;
class LoopInfo;
class LPPassManager;
class MDNode;
class Pass;
+class ScalarEvolution;
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
bool AllowExpensiveTripCount, unsigned TripMultiple,
- LoopInfo *LI, Pass *PP, LPPassManager *LPM,
- AssumptionCache *AC);
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, bool PreserveLCSSA);
bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
bool AllowExpensiveTripCount, LoopInfo *LI,
- LPPassManager *LPM);
+ ScalarEvolution *SE, DominatorTree *DT,
+ bool PreserveLCSSA);
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
}
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 047ab81..469022f 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -38,13 +38,34 @@ namespace llvm {
/// to materialize Values on demand.
class ValueMaterializer {
virtual void anchor(); // Out of line method.
- public:
- virtual ~ValueMaterializer() {}
- /// materializeValueFor - The client should implement this method if they
- /// want to generate a mapped Value on demand. For example, if linking
- /// lazily.
- virtual Value *materializeValueFor(Value *V) = 0;
+ protected:
+ ~ValueMaterializer() = default;
+ ValueMaterializer() = default;
+ ValueMaterializer(const ValueMaterializer&) = default;
+ ValueMaterializer &operator=(const ValueMaterializer&) = default;
+
+ public:
+ /// The client should implement this method if they want to generate a
+ /// mapped Value on demand. For example, if linking lazily.
+ virtual Value *materializeDeclFor(Value *V) = 0;
+
+ /// If the data being mapped is recursive, the above function can map
+ /// just the declaration and this is called to compute the initializer.
+ /// It is called after the mapping is recorded, so it doesn't need to worry
+ /// about recursion.
+ virtual void materializeInitFor(GlobalValue *New, GlobalValue *Old);
+
+ /// If the client needs to handle temporary metadata it must implement
+ /// these methods.
+ virtual Metadata *mapTemporaryMetadata(Metadata *MD) { return nullptr; }
+ virtual void replaceTemporaryMetadata(const Metadata *OrigMD,
+ Metadata *NewMD) {}
+
+ /// The client should implement this method if some metadata need
+ /// not be mapped, for example DISubprogram metadata for functions not
+ /// linked into the destination module.
+ virtual bool isMetadataNeeded(Metadata *MD) { return true; }
};
/// RemapFlags - These are flags that the value mapping APIs allow.
@@ -59,7 +80,20 @@ namespace llvm {
/// RF_IgnoreMissingEntries - If this flag is set, the remapper ignores
/// entries that are not in the value map. If it is unset, it aborts if an
/// operand is asked to be remapped which doesn't exist in the mapping.
- RF_IgnoreMissingEntries = 2
+ RF_IgnoreMissingEntries = 2,
+
+ /// Instruct the remapper to move distinct metadata instead of duplicating
+ /// it when there are module-level changes.
+ RF_MoveDistinctMDs = 4,
+
+ /// Any global values not in value map are mapped to null instead of
+ /// mapping to self. Illegal if RF_IgnoreMissingEntries is also set.
+ RF_NullMapMissingGlobalValues = 8,
+
+ /// Set when there is still temporary metadata that must be handled,
+ /// such as when we are doing function importing and will materialize
+ /// and link metadata as a postpass.
+ RF_HaveUnmaterializedMetadata = 16,
};
static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap
index dcc5ce1..0adce0c 100644
--- a/contrib/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm/include/llvm/module.modulemap
@@ -190,17 +190,15 @@ module LLVM_Utils {
// Exclude this; it's fundamentally non-modular.
exclude header "Support/PluginLoader.h"
- // Exclude this; it's a weirdly-factored part of llvm-gcov and conflicts
- // with the Analysis module (which also defines an llvm::GCOVOptions).
- exclude header "Support/GCOV.h"
-
// FIXME: Mislayered?
exclude header "Support/TargetRegistry.h"
// These are intended for textual inclusion.
+ textual header "Support/ARMTargetParser.def"
textual header "Support/Dwarf.def"
textual header "Support/ELFRelocs/AArch64.def"
textual header "Support/ELFRelocs/ARM.def"
+ textual header "Support/ELFRelocs/AVR.def"
textual header "Support/ELFRelocs/Hexagon.def"
textual header "Support/ELFRelocs/i386.def"
textual header "Support/ELFRelocs/Mips.def"
@@ -210,6 +208,12 @@ module LLVM_Utils {
textual header "Support/ELFRelocs/SystemZ.def"
textual header "Support/ELFRelocs/x86_64.def"
}
+
+ // This part of the module is usable from both C and C++ code.
+ module ConvertUTF {
+ header "Support/ConvertUTF.h"
+ export *
+ }
}
module LLVM_CodeGen_MachineValueType {
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 44d137d..35f2e97 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -25,9 +25,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -40,44 +47,72 @@
#include "llvm/Pass.h"
using namespace llvm;
-// Register the AliasAnalysis interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
-char AliasAnalysis::ID = 0;
+/// Allow disabling BasicAA from the AA results. This is particularly useful
+/// when testing to isolate a single AA implementation.
+static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
+ cl::init(false));
+
+AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) {
+ for (auto &AA : AAs)
+ AA->setAAResults(this);
+}
+
+AAResults &AAResults::operator=(AAResults &&Arg) {
+ AAs = std::move(Arg.AAs);
+ for (auto &AA : AAs)
+ AA->setAAResults(this);
+ return *this;
+}
+
+AAResults::~AAResults() {
+// FIXME; It would be nice to at least clear out the pointers back to this
+// aggregation here, but we end up with non-nesting lifetimes in the legacy
+// pass manager that prevent this from working. In the legacy pass manager
+// we'll end up with dangling references here in some cases.
+#if 0
+ for (auto &AA : AAs)
+ AA->setAAResults(nullptr);
+#endif
+}
//===----------------------------------------------------------------------===//
// Default chaining methods
//===----------------------------------------------------------------------===//
-AliasResult AliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->alias(LocA, LocB);
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ for (const auto &AA : AAs) {
+ auto Result = AA->alias(LocA, LocB);
+ if (Result != MayAlias)
+ return Result;
+ }
+ return MayAlias;
}
-bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->pointsToConstantMemory(Loc, OrLocal);
-}
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
+ for (const auto &AA : AAs)
+ if (AA->pointsToConstantMemory(Loc, OrLocal))
+ return true;
-AliasAnalysis::ModRefResult
-AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->getArgModRefInfo(CS, ArgIdx);
+ return false;
}
-void AliasAnalysis::deleteValue(Value *V) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- AA->deleteValue(V);
-}
+ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ ModRefInfo Result = MRI_ModRef;
+
+ for (const auto &AA : AAs) {
+ Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx));
-void AliasAnalysis::addEscapingUse(Use &U) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- AA->addEscapingUse(U);
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == MRI_NoModRef)
+ return Result;
+ }
+
+ return Result;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// We may have two calls
if (auto CS = ImmutableCallSite(I)) {
// Check if the two calls modify the same memory
@@ -88,289 +123,215 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
- return AliasAnalysis::ModRef;
- }
- return AliasAnalysis::NoModRef;
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-
- ModRefBehavior MRB = getModRefBehavior(CS);
- if (MRB == DoesNotAccessMemory)
- return NoModRef;
-
- ModRefResult Mask = ModRef;
- if (onlyReadsMemory(MRB))
- Mask = Ref;
-
- if (onlyAccessesArgPointees(MRB)) {
- bool doesAlias = false;
- ModRefResult AllArgsMask = NoModRef;
- if (doesAccessArgPointees(MRB)) {
- for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI) {
- const Value *Arg = *AI;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
- MemoryLocation ArgLoc =
- MemoryLocation::getForArgument(CS, ArgIdx, *TLI);
- if (!isNoAlias(ArgLoc, Loc)) {
- ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx);
- doesAlias = true;
- AllArgsMask = ModRefResult(AllArgsMask | ArgMask);
- }
- }
- }
- if (!doesAlias)
- return NoModRef;
- Mask = ModRefResult(Mask & AllArgsMask);
+ if (getModRefInfo(Call, DefLoc) != MRI_NoModRef)
+ return MRI_ModRef;
}
+ return MRI_NoModRef;
+}
- // If Loc is a constant memory location, the call definitely could not
- // modify the memory location.
- if ((Mask & Mod) && pointsToConstantMemory(Loc))
- Mask = ModRefResult(Mask & ~Mod);
-
- // If this is the end of the chain, don't forward.
- if (!AA) return Mask;
-
- // Otherwise, fall back to the next AA in the chain. But we can merge
- // in any mask we've managed to compute.
- return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-
- // If CS1 or CS2 are readnone, they don't interact.
- ModRefBehavior CS1B = getModRefBehavior(CS1);
- if (CS1B == DoesNotAccessMemory) return NoModRef;
-
- ModRefBehavior CS2B = getModRefBehavior(CS2);
- if (CS2B == DoesNotAccessMemory) return NoModRef;
-
- // If they both only read from memory, there is no dependence.
- if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
- return NoModRef;
-
- AliasAnalysis::ModRefResult Mask = ModRef;
-
- // If CS1 only reads memory, the only dependence on CS2 can be
- // from CS1 reading memory written by CS2.
- if (onlyReadsMemory(CS1B))
- Mask = ModRefResult(Mask & Ref);
-
- // If CS2 only access memory through arguments, accumulate the mod/ref
- // information from CS1's references to the memory referenced by
- // CS2's arguments.
- if (onlyAccessesArgPointees(CS2B)) {
- AliasAnalysis::ModRefResult R = NoModRef;
- if (doesAccessArgPointees(CS2B)) {
- for (ImmutableCallSite::arg_iterator
- I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
- const Value *Arg = *I;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
- auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI);
-
- // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of
- // CS1 on that location is the inverse.
- ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx);
- if (ArgMask == Mod)
- ArgMask = ModRef;
- else if (ArgMask == Ref)
- ArgMask = Mod;
-
- R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask);
- if (R == Mask)
- break;
- }
- }
- return R;
- }
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ ModRefInfo Result = MRI_ModRef;
- // If CS1 only accesses memory through arguments, check if CS2 references
- // any of the memory referenced by CS1's arguments. If not, return NoModRef.
- if (onlyAccessesArgPointees(CS1B)) {
- AliasAnalysis::ModRefResult R = NoModRef;
- if (doesAccessArgPointees(CS1B)) {
- for (ImmutableCallSite::arg_iterator
- I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
- const Value *Arg = *I;
- if (!Arg->getType()->isPointerTy())
- continue;
- unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
- auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI);
-
- // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
- // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
- // might Ref, then we care only about a Mod by CS2.
- ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx);
- ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc);
- if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) ||
- ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef))
- R = ModRefResult((R | ArgMask) & Mask);
-
- if (R == Mask)
- break;
- }
- }
- return R;
- }
+ for (const auto &AA : AAs) {
+ Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc));
- // If this is the end of the chain, don't forward.
- if (!AA) return Mask;
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == MRI_NoModRef)
+ return Result;
+ }
- // Otherwise, fall back to the next AA in the chain. But we can merge
- // in any mask we've managed to compute.
- return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+ return Result;
}
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ ModRefInfo Result = MRI_ModRef;
+
+ for (const auto &AA : AAs) {
+ Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2));
+
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == MRI_NoModRef)
+ return Result;
+ }
+
+ return Result;
+}
- ModRefBehavior Min = UnknownModRefBehavior;
+FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+ FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
- // Call back into the alias analysis with the other form of getModRefBehavior
- // to see if it can give a better response.
- if (const Function *F = CS.getCalledFunction())
- Min = getModRefBehavior(F);
+ for (const auto &AA : AAs) {
+ Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
- // If this is the end of the chain, don't forward.
- if (!AA) return Min;
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == FMRB_DoesNotAccessMemory)
+ return Result;
+ }
- // Otherwise, fall back to the next AA in the chain. But we can merge
- // in any result we've managed to compute.
- return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+ return Result;
}
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(const Function *F) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->getModRefBehavior(F);
+FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
+ FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
+
+ for (const auto &AA : AAs) {
+ Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F));
+
+ // Early-exit the moment we reach the bottom of the lattice.
+ if (Result == FMRB_DoesNotAccessMemory)
+ return Result;
+ }
+
+ return Result;
}
//===----------------------------------------------------------------------===//
-// AliasAnalysis non-virtual helper method implementation
+// Helper method implementation
//===----------------------------------------------------------------------===//
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+ const MemoryLocation &Loc) {
// Be conservative in the face of volatile/atomic.
if (!L->isUnordered())
- return ModRef;
+ return MRI_ModRef;
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
- return NoModRef;
+ return MRI_NoModRef;
// Otherwise, a load just reads.
- return Ref;
+ return MRI_Ref;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+ const MemoryLocation &Loc) {
// Be conservative in the face of volatile/atomic.
if (!S->isUnordered())
- return ModRef;
+ return MRI_ModRef;
if (Loc.Ptr) {
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
if (!alias(MemoryLocation::get(S), Loc))
- return NoModRef;
+ return MRI_NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this store.
if (pointsToConstantMemory(Loc))
- return NoModRef;
-
+ return MRI_NoModRef;
}
// Otherwise, a store just writes.
- return Mod;
+ return MRI_Mod;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+ const MemoryLocation &Loc) {
if (Loc.Ptr) {
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
if (!alias(MemoryLocation::get(V), Loc))
- return NoModRef;
+ return MRI_NoModRef;
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this va_arg.
if (pointsToConstantMemory(Loc))
- return NoModRef;
+ return MRI_NoModRef;
}
// Otherwise, a va_arg reads and writes.
- return ModRef;
+ return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+ const MemoryLocation &Loc) {
+ if (Loc.Ptr) {
+ // If the pointer is a pointer to constant memory,
+ // then it could not have been modified by this catchpad.
+ if (pointsToConstantMemory(Loc))
+ return MRI_NoModRef;
+ }
+
+ // Otherwise, a catchpad reads and writes.
+ return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+ const MemoryLocation &Loc) {
+ if (Loc.Ptr) {
+ // If the pointer is a pointer to constant memory,
+ // then it could not have been modified by this catchpad.
+ if (pointsToConstantMemory(Loc))
+ return MRI_NoModRef;
+ }
+
+ // Otherwise, a catchret reads and writes.
+ return MRI_ModRef;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX,
- const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc) {
// Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
if (CX->getSuccessOrdering() > Monotonic)
- return ModRef;
+ return MRI_ModRef;
// If the cmpxchg address does not alias the location, it does not access it.
if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
- return NoModRef;
+ return MRI_NoModRef;
- return ModRef;
+ return MRI_ModRef;
}
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW,
- const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+ const MemoryLocation &Loc) {
// Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
if (RMW->getOrdering() > Monotonic)
- return ModRef;
+ return MRI_ModRef;
// If the atomicrmw address does not alias the location, it does not access it.
if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
- return NoModRef;
+ return MRI_NoModRef;
- return ModRef;
+ return MRI_ModRef;
}
-// FIXME: this is really just shoring-up a deficiency in alias analysis.
-// BasicAA isn't willing to spend linear time determining whether an alloca
-// was captured before or after this particular call, while we are. However,
-// with a smarter AA in place, this test is just wasting compile time.
-AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
- const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) {
+/// \brief Return information about whether a particular call site modifies
+/// or reads the specified memory location \p MemLoc before instruction \p I
+/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+/// instruction-ordering queries inside the BasicBlock containing \p I.
+/// FIXME: this is really just shoring-up a deficiency in alias analysis.
+/// BasicAA isn't willing to spend linear time determining whether an alloca
+/// was captured before or after this particular call, while we are. However,
+/// with a smarter AA in place, this test is just wasting compile time.
+ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
+ const MemoryLocation &MemLoc,
+ DominatorTree *DT,
+ OrderedBasicBlock *OBB) {
if (!DT)
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
- const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL);
+ const Value *Object =
+ GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());
if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
isa<Constant>(Object))
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
ImmutableCallSite CS(I);
if (!CS.getInstruction() || CS.getInstruction() == Object)
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
/* StoreCaptures */ true, I, DT,
- /* include Object */ true))
- return AliasAnalysis::ModRef;
+ /* include Object */ true,
+ /* OrderedBasicBlock */ OBB))
+ return MRI_ModRef;
unsigned ArgNo = 0;
- AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef;
+ ModRefInfo R = MRI_NoModRef;
for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
@@ -389,50 +350,20 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
if (CS.doesNotAccessMemory(ArgNo))
continue;
if (CS.onlyReadsMemory(ArgNo)) {
- R = AliasAnalysis::Ref;
+ R = MRI_Ref;
continue;
}
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
return R;
}
-// AliasAnalysis destructor: DO NOT move this to the header file for
-// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
-// the AliasAnalysis.o file in the current .a file, causing alias analysis
-// support to not be included in the tool correctly!
-//
-AliasAnalysis::~AliasAnalysis() {}
-
-/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
-/// AliasAnalysis interface before any other methods are called.
-///
-void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) {
- DL = NewDL;
- auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &P->getAnalysis<AliasAnalysis>();
-}
-
-// getAnalysisUsage - All alias analysis implementations should invoke this
-// directly (using AliasAnalysis::getAnalysisUsage(AU)).
-void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>(); // All AA's chain
-}
-
-/// getTypeStoreSize - Return the DataLayout store size for the given type,
-/// if known, or a conservative value otherwise.
-///
-uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
- return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize;
-}
-
/// canBasicBlockModify - Return true if it is possible for execution of the
/// specified basic block to modify the location Loc.
///
-bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
- const MemoryLocation &Loc) {
- return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod);
+bool AAResults::canBasicBlockModify(const BasicBlock &BB,
+ const MemoryLocation &Loc) {
+ return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod);
}
/// canInstructionRangeModRef - Return true if it is possible for the
@@ -440,28 +371,178 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
/// mode) the location Loc. The instructions to consider are all
/// of the instructions in the range of [I1,I2] INCLUSIVE.
/// I1 and I2 must be in the same basic block.
-bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
- const Instruction &I2,
- const MemoryLocation &Loc,
- const ModRefResult Mode) {
+bool AAResults::canInstructionRangeModRef(const Instruction &I1,
+ const Instruction &I2,
+ const MemoryLocation &Loc,
+ const ModRefInfo Mode) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
- BasicBlock::const_iterator I = &I1;
- BasicBlock::const_iterator E = &I2;
+ BasicBlock::const_iterator I = I1.getIterator();
+ BasicBlock::const_iterator E = I2.getIterator();
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, Loc) & Mode)
+ if (getModRefInfo(&*I, Loc) & Mode)
return true;
return false;
}
+// Provide a definition for the root virtual destructor.
+AAResults::Concept::~Concept() {}
+
+namespace {
+/// A wrapper pass for external alias analyses. This just squirrels away the
+/// callback used to run any analyses and register their results.
+struct ExternalAAWrapperPass : ImmutablePass {
+ typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT;
+
+ CallbackT CB;
+
+ static char ID;
+
+ ExternalAAWrapperPass() : ImmutablePass(ID) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+ explicit ExternalAAWrapperPass(CallbackT CB)
+ : ImmutablePass(ID), CB(std::move(CB)) {
+ initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+}
+
+char ExternalAAWrapperPass::ID = 0;
+INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
+ false, true)
+
+ImmutablePass *
+llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) {
+ return new ExternalAAWrapperPass(std::move(Callback));
+}
+
+AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) {
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char AAResultsWrapperPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa",
+ "Function Alias Analysis Results", false, true)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass)
+INITIALIZE_PASS_END(AAResultsWrapperPass, "aa",
+ "Function Alias Analysis Results", false, true)
+
+FunctionPass *llvm::createAAResultsWrapperPass() {
+ return new AAResultsWrapperPass();
+}
+
+/// Run the wrapper pass to rebuild an aggregation over known AA passes.
+///
+/// This is the legacy pass manager's interface to the new-style AA results
+/// aggregation object. Because this is somewhat shoe-horned into the legacy
+/// pass manager, we hard code all the specific alias analyses available into
+/// it. While the particular set enabled is configured via commandline flags,
+/// adding a new alias analysis to LLVM will require adding support for it to
+/// this list.
+bool AAResultsWrapperPass::runOnFunction(Function &F) {
+ // NB! This *must* be reset before adding new AA results to the new
+ // AAResults object because in the legacy pass manager, each instance
+ // of these will refer to the *same* immutable analyses, registering and
+ // unregistering themselves with them. We need to carefully tear down the
+ // previous object first, in this case replacing it with an empty one, before
+ // registering new results.
+ AAR.reset(new AAResults());
+
+ // BasicAA is always available for function analyses. Also, we add it first
+ // so that it can trump TBAA results when it proves MustAlias.
+ // FIXME: TBAA should have an explicit mode to support this and then we
+ // should reconsider the ordering here.
+ if (!DisableBasicAA)
+ AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult());
+
+ // Populate the results with the currently available AAs.
+ if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass =
+ getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = getAnalysisIfAvailable<CFLAAWrapperPass>())
+ AAR->addAAResult(WrapperPass->getResult());
+
+ // If available, run an external AA providing callback over the results as
+ // well.
+ if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
+ if (WrapperPass->CB)
+ WrapperPass->CB(*this, F, *AAR);
+
+ // Analyses don't mutate the IR, so return false.
+ return false;
+}
+
+void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicAAWrapperPass>();
+
+ // We also need to mark all the alias analysis passes we will potentially
+ // probe in runOnFunction as used here to ensure the legacy pass manager
+ // preserves them. This hard coding of lists of alias analyses is specific to
+ // the legacy pass manager.
+ AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>();
+ AU.addUsedIfAvailable<TypeBasedAAWrapperPass>();
+ AU.addUsedIfAvailable<objcarc::ObjCARCAAWrapperPass>();
+ AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
+ AU.addUsedIfAvailable<SCEVAAWrapperPass>();
+ AU.addUsedIfAvailable<CFLAAWrapperPass>();
+}
+
+AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
+ BasicAAResult &BAR) {
+ AAResults AAR;
+
+ // Add in our explicitly constructed BasicAA results.
+ if (!DisableBasicAA)
+ AAR.addAAResult(BAR);
+
+ // Populate the results with the other currently available AAs.
+ if (auto *WrapperPass =
+ P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass =
+ P.getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<SCEVAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+
+ return AAR;
+}
+
/// isNoAliasCall - Return true if this pointer is returned by a noalias
/// function.
bool llvm::isNoAliasCall(const Value *V) {
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
- return ImmutableCallSite(cast<Instruction>(V))
- .paramHasAttr(0, Attribute::NoAlias);
+ if (auto CS = ImmutableCallSite(V))
+ return CS.paramHasAttr(0, Attribute::NoAlias);
return false;
}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
deleted file mode 100644
index 9b6a5a4..0000000
--- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass which can be used to count how many alias queries
-// are being made and how the alias analysis implementation being used responds.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<bool>
-PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
-static cl::opt<bool>
-PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
-
-namespace {
- class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
- unsigned No, May, Partial, Must;
- unsigned NoMR, JustRef, JustMod, MR;
- Module *M;
- public:
- static char ID; // Class identification, replacement for typeinfo
- AliasAnalysisCounter() : ModulePass(ID) {
- initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
- No = May = Partial = Must = 0;
- NoMR = JustRef = JustMod = MR = 0;
- }
-
- void printLine(const char *Desc, unsigned Val, unsigned Sum) {
- errs() << " " << Val << " " << Desc << " responses ("
- << Val*100/Sum << "%)\n";
- }
- ~AliasAnalysisCounter() override {
- unsigned AASum = No+May+Partial+Must;
- unsigned MRSum = NoMR+JustRef+JustMod+MR;
- if (AASum + MRSum) { // Print a report if any counted queries occurred...
- errs() << "\n===== Alias Analysis Counter Report =====\n"
- << " Analysis counted:\n"
- << " " << AASum << " Total Alias Queries Performed\n";
- if (AASum) {
- printLine("no alias", No, AASum);
- printLine("may alias", May, AASum);
- printLine("partial alias", Partial, AASum);
- printLine("must alias", Must, AASum);
- errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
- << May*100/AASum << "%/"
- << Partial*100/AASum << "%/"
- << Must*100/AASum<<"%\n\n";
- }
-
- errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n";
- if (MRSum) {
- printLine("no mod/ref", NoMR, MRSum);
- printLine("ref", JustRef, MRSum);
- printLine("mod", JustMod, MRSum);
- printLine("mod/ref", MR, MRSum);
- errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
- << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
- << "%/" << MR*100/MRSum <<"%\n\n";
- }
- }
- }
-
- bool runOnModule(Module &M) override {
- this->M = &M;
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
- AU.setPreservesAll();
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- // FIXME: We could count these too...
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override {
- return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
- }
-
- // Forwarding functions: just delegate to a real AA implementation, counting
- // the number of responses...
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
- }
- };
-}
-
-char AliasAnalysisCounter::ID = 0;
-INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
- "Count Alias Analysis Query Responses", false, true, false)
-
-ModulePass *llvm::createAliasAnalysisCounterPass() {
- return new AliasAnalysisCounter();
-}
-
-AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
-
- const char *AliasString = nullptr;
- switch (R) {
- case NoAlias: No++; AliasString = "No alias"; break;
- case MayAlias: May++; AliasString = "May alias"; break;
- case PartialAlias: Partial++; AliasString = "Partial alias"; break;
- case MustAlias: Must++; AliasString = "Must alias"; break;
- }
-
- if (PrintAll || (PrintAllFailures && R == MayAlias)) {
- errs() << AliasString << ":\t";
- errs() << "[" << LocA.Size << "B] ";
- LocA.Ptr->printAsOperand(errs(), true, M);
- errs() << ", ";
- errs() << "[" << LocB.Size << "B] ";
- LocB.Ptr->printAsOperand(errs(), true, M);
- errs() << "\n";
- }
-
- return R;
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
-
- const char *MRString = nullptr;
- switch (R) {
- case NoModRef: NoMR++; MRString = "NoModRef"; break;
- case Ref: JustRef++; MRString = "JustRef"; break;
- case Mod: JustMod++; MRString = "JustMod"; break;
- case ModRef: MR++; MRString = "ModRef"; break;
- }
-
- if (PrintAll || (PrintAllFailures && R == ModRef)) {
- errs() << MRString << ": Ptr: ";
- errs() << "[" << Loc.Size << "B] ";
- Loc.Ptr->printAsOperand(errs(), true, M);
- errs() << "\t<->" << *CS.getInstruction() << '\n';
- }
- return R;
-}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 5d1b001..12917b6 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -21,8 +21,10 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
@@ -57,7 +59,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.setPreservesAll();
}
@@ -81,7 +83,7 @@ namespace {
char AAEval::ID = 0;
INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
"Exhaustive Alias Analysis Precision Evaluator", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AAEval, "aa-eval",
"Exhaustive Alias Analysis Precision Evaluator", false, true)
@@ -139,16 +141,17 @@ static inline bool isInterestingPointer(Value *V) {
}
bool AAEval::runOnFunction(Function &F) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
SetVector<Value *> Pointers;
- SetVector<CallSite> CallSites;
+ SmallSetVector<CallSite, 16> CallSites;
SetVector<Value *> Loads;
SetVector<Value *> Stores;
- for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
- if (I->getType()->isPointerTy()) // Add all pointer arguments.
- Pointers.insert(I);
+ for (auto &I : F.args())
+ if (I.getType()->isPointerTy()) // Add all pointer arguments.
+ Pointers.insert(&I);
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
@@ -164,10 +167,9 @@ bool AAEval::runOnFunction(Function &F) {
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
Pointers.insert(Callee);
// Consider formals.
- for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI)
- if (isInterestingPointer(*AI))
- Pointers.insert(*AI);
+ for (Use &DataOp : CS.data_ops())
+ if (isInterestingPointer(DataOp))
+ Pointers.insert(DataOp);
CallSites.insert(CS);
} else {
// Consider all operands.
@@ -188,12 +190,12 @@ bool AAEval::runOnFunction(Function &F) {
I1 != E; ++I1) {
uint64_t I1Size = MemoryLocation::UnknownSize;
Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
- if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+ if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
uint64_t I2Size = MemoryLocation::UnknownSize;
Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
- if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+ if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
case NoAlias:
@@ -281,30 +283,29 @@ bool AAEval::runOnFunction(Function &F) {
}
// Mod/ref alias analysis: compare all pairs of calls and values
- for (SetVector<CallSite>::iterator C = CallSites.begin(),
- Ce = CallSites.end(); C != Ce; ++C) {
+ for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
Instruction *I = C->getInstruction();
for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
V != Ve; ++V) {
uint64_t Size = MemoryLocation::UnknownSize;
Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
- if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+ if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
switch (AA.getModRefInfo(*C, *V, Size)) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
++NoModRefCount;
break;
- case AliasAnalysis::Mod:
+ case MRI_Mod:
PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
++ModCount;
break;
- case AliasAnalysis::Ref:
+ case MRI_Ref:
PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
++RefCount;
break;
- case AliasAnalysis::ModRef:
+ case MRI_ModRef:
PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
++ModRefCount;
break;
@@ -313,25 +314,24 @@ bool AAEval::runOnFunction(Function &F) {
}
// Mod/ref alias analysis: compare all pairs of calls
- for (SetVector<CallSite>::iterator C = CallSites.begin(),
- Ce = CallSites.end(); C != Ce; ++C) {
- for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
+ for (auto D = CallSites.begin(); D != Ce; ++D) {
if (D == C)
continue;
switch (AA.getModRefInfo(*C, *D)) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
++NoModRefCount;
break;
- case AliasAnalysis::Mod:
+ case MRI_Mod:
PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
++ModCount;
break;
- case AliasAnalysis::Ref:
+ case MRI_Ref:
PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
++RefCount;
break;
- case AliasAnalysis::ModRef:
+ case MRI_ModRef:
PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
++ModRefCount;
break;
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
deleted file mode 100644
index e5107b3..0000000
--- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This simple pass checks alias analysis users to ensure that if they
-// create a new value, they do not query AA without informing it of the value.
-// It acts as a shim over any other AA pass you want.
-//
-// Yes keeping track of every value in the program is expensive, but this is
-// a debugging pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include <set>
-using namespace llvm;
-
-namespace {
-
- class AliasDebugger : public ModulePass, public AliasAnalysis {
-
- //What we do is simple. Keep track of every value the AA could
- //know about, and verify that queries are one of those.
- //A query to a value that didn't exist when the AA was created
- //means someone forgot to update the AA when creating new values
-
- std::set<const Value*> Vals;
-
- public:
- static char ID; // Class identification, replacement for typeinfo
- AliasDebugger() : ModulePass(ID) {
- initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class
-
- for(Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
- Vals.insert(&*I);
- for (User::const_op_iterator OI = I->op_begin(),
- OE = I->op_end(); OI != OE; ++OI)
- Vals.insert(*OI);
- }
-
- for(Module::iterator I = M.begin(),
- E = M.end(); I != E; ++I){
- Vals.insert(&*I);
- if(!I->isDeclaration()) {
- for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
- AI != AE; ++AI)
- Vals.insert(&*AI);
- for (Function::const_iterator FI = I->begin(), FE = I->end();
- FI != FE; ++FI)
- for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- Vals.insert(&*BI);
- for (User::const_op_iterator OI = BI->op_begin(),
- OE = BI->op_end(); OI != OE; ++OI)
- Vals.insert(*OI);
- }
- }
-
- }
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.setPreservesAll(); // Does not transform code
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- assert(Vals.find(LocA.Ptr) != Vals.end() &&
- "Never seen value in AA before");
- assert(Vals.find(LocB.Ptr) != Vals.end() &&
- "Never seen value in AA before");
- return AliasAnalysis::alias(LocA, LocB);
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override {
- assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::getModRefInfo(CS, Loc);
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
- }
-
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override {
- assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
- }
-
- void deleteValue(Value *V) override {
- assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
- AliasAnalysis::deleteValue(V);
- }
-
- };
-}
-
-char AliasDebugger::ID = 0;
-INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
- "AA use debugger", false, true, false)
-
-Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
-
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 54d0f43..3094049 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
@@ -167,8 +168,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
if (!UnknownInsts.empty()) {
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
if (AA.getModRefInfo(UnknownInsts[i],
- MemoryLocation(Ptr, Size, AAInfo)) !=
- AliasAnalysis::NoModRef)
+ MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)
return true;
}
@@ -182,16 +182,14 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
- if (!C1 || !C2 ||
- AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
- AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
+ if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
+ AA.getModRefInfo(C2, C1) != MRI_NoModRef)
return true;
}
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.getModRefInfo(
- Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) !=
- AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(),
+ I.getAAInfo())) != MRI_NoModRef)
return true;
return false;
@@ -223,7 +221,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
if (!FoundSet) { // If this is the first alias set ptr can go into.
- FoundSet = Cur; // Remember it.
+ FoundSet = &*Cur; // Remember it.
} else { // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
@@ -257,7 +255,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
continue;
if (!FoundSet) // If this is the first alias set ptr can go into.
- FoundSet = Cur; // Remember it.
+ FoundSet = &*Cur; // Remember it.
else if (!Cur->Forward) // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
@@ -309,8 +307,9 @@ bool AliasSetTracker::add(LoadInst *LI) {
AliasSet::AccessLattice Access = AliasSet::RefAccess;
bool NewPtr;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
AliasSet &AS = addPointer(LI->getOperand(0),
- AA.getTypeStoreSize(LI->getType()),
+ DL.getTypeStoreSize(LI->getType()),
AAInfo, Access, NewPtr);
if (LI->isVolatile()) AS.setVolatile();
return NewPtr;
@@ -324,9 +323,10 @@ bool AliasSetTracker::add(StoreInst *SI) {
AliasSet::AccessLattice Access = AliasSet::ModAccess;
bool NewPtr;
+ const DataLayout &DL = SI->getModule()->getDataLayout();
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
- AA.getTypeStoreSize(Val->getType()),
+ DL.getTypeStoreSize(Val->getType()),
AAInfo, Access, NewPtr);
if (SI->isVolatile()) AS.setVolatile();
return NewPtr;
@@ -372,8 +372,8 @@ bool AliasSetTracker::add(Instruction *I) {
}
void AliasSetTracker::add(BasicBlock &BB) {
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
- add(I);
+ for (auto &I : BB)
+ add(&I);
}
void AliasSetTracker::add(const AliasSetTracker &AST) {
@@ -443,7 +443,8 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
}
bool AliasSetTracker::remove(LoadInst *LI) {
- uint64_t Size = AA.getTypeStoreSize(LI->getType());
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(LI->getType());
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
@@ -455,7 +456,8 @@ bool AliasSetTracker::remove(LoadInst *LI) {
}
bool AliasSetTracker::remove(StoreInst *SI) {
- uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
AAMDNodes AAInfo;
SI->getAAMetadata(AAInfo);
@@ -505,9 +507,6 @@ bool AliasSetTracker::remove(Instruction *I) {
// dangling pointers to deleted instructions.
//
void AliasSetTracker::deleteValue(Value *PtrVal) {
- // Notify the alias analysis implementation that this value is gone.
- AA.deleteValue(PtrVal);
-
// If this is a call instruction, remove the callsite from the appropriate
// AliasSet (if present).
if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
@@ -650,11 +649,12 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
bool runOnFunction(Function &F) override {
- Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+ auto &AAWP = getAnalysis<AAResultsWrapperPass>();
+ Tracker = new AliasSetTracker(AAWP.getAAResults());
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
Tracker->add(&*I);
@@ -668,6 +668,6 @@ namespace {
char AliasSetPrinter::ID = 0;
INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
"Alias Set Printer", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
"Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 842ff0a..9c1ac00 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -20,23 +20,23 @@ using namespace llvm;
/// initializeAnalysis - Initialize all passes linked into the Analysis library.
void llvm::initializeAnalysis(PassRegistry &Registry) {
- initializeAliasAnalysisAnalysisGroup(Registry);
- initializeAliasAnalysisCounterPass(Registry);
initializeAAEvalPass(Registry);
- initializeAliasDebuggerPass(Registry);
initializeAliasSetPrinterPass(Registry);
- initializeNoAAPass(Registry);
- initializeBasicAliasAnalysisPass(Registry);
- initializeBlockFrequencyInfoPass(Registry);
- initializeBranchProbabilityInfoPass(Registry);
+ initializeBasicAAWrapperPassPass(Registry);
+ initializeBlockFrequencyInfoWrapperPassPass(Registry);
+ initializeBranchProbabilityInfoWrapperPassPass(Registry);
+ initializeCallGraphWrapperPassPass(Registry);
+ initializeCallGraphPrinterPass(Registry);
+ initializeCallGraphViewerPass(Registry);
initializeCostModelAnalysisPass(Registry);
initializeCFGViewerPass(Registry);
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
- initializeCFLAliasAnalysisPass(Registry);
+ initializeCFLAAWrapperPassPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
+ initializeDemandedBitsPass(Registry);
initializeDivergenceAnalysisPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
@@ -47,34 +47,40 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializePostDomPrinterPass(Registry);
initializePostDomOnlyViewerPass(Registry);
initializePostDomOnlyPrinterPass(Registry);
+ initializeAAResultsWrapperPassPass(Registry);
+ initializeGlobalsAAWrapperPassPass(Registry);
initializeIVUsersPass(Registry);
initializeInstCountPass(Registry);
initializeIntervalPartitionPass(Registry);
initializeLazyValueInfoPass(Registry);
- initializeLibCallAliasAnalysisPass(Registry);
initializeLintPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
initializeMemDerefPrinterPass(Registry);
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
+ initializeObjCARCAAWrapperPassPass(Registry);
initializePostDominatorTreePass(Registry);
initializeRegionInfoPassPass(Registry);
initializeRegionViewerPass(Registry);
initializeRegionPrinterPass(Registry);
initializeRegionOnlyViewerPass(Registry);
initializeRegionOnlyPrinterPass(Registry);
- initializeScalarEvolutionPass(Registry);
- initializeScalarEvolutionAliasAnalysisPass(Registry);
+ initializeSCEVAAWrapperPassPass(Registry);
+ initializeScalarEvolutionWrapperPassPass(Registry);
initializeTargetTransformInfoWrapperPassPass(Registry);
- initializeTypeBasedAliasAnalysisPass(Registry);
- initializeScopedNoAliasAAPass(Registry);
+ initializeTypeBasedAAWrapperPassPass(Registry);
+ initializeScopedNoAliasAAWrapperPassPass(Registry);
}
void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
initializeAnalysis(*unwrap(R));
}
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+ initializeAnalysis(*unwrap(R));
+}
+
LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
char **OutMessages) {
raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr;
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3586354..00f346e 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -13,24 +13,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -42,6 +39,18 @@
#include <algorithm>
using namespace llvm;
+/// Enable analysis of recursive PHI nodes.
+static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
+ cl::init(false));
+
+/// SearchLimitReached / SearchTimes shows how often the limit of
+/// to decompose GEPs is reached. It will affect the precision
+/// of basic alias analysis.
+#define DEBUG_TYPE "basicaa"
+STATISTIC(SearchLimitReached, "Number of times the limit to "
+ "decompose GEPs is reached");
+STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
+
/// Cutoff after which to stop analysing a set of phi nodes potentially involved
/// in a cycle. Because we are analysing 'through' phi nodes we need to be
/// careful with value equivalence. We use reachability to make sure a value
@@ -57,8 +66,8 @@ static const unsigned MaxLookupSearchDepth = 6;
// Useful predicates
//===----------------------------------------------------------------------===//
-/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
-/// object that never escapes from the function.
+/// Returns true if the pointer is to a function-local object that never
+/// escapes from the function.
static bool isNonEscapingLocalObject(const Value *V) {
// If this is a local allocation, check to see if it escapes.
if (isa<AllocaInst>(V) || isNoAliasCall(V))
@@ -82,8 +91,8 @@ static bool isNonEscapingLocalObject(const Value *V) {
return false;
}
-/// isEscapeSource - Return true if the pointer is one which would have
-/// been considered an escape by isNonEscapingLocalObject.
+/// Returns true if the pointer is one which would have been considered an
+/// escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
return true;
@@ -97,8 +106,7 @@ static bool isEscapeSource(const Value *V) {
return false;
}
-/// getObjectSize - Return the size of the object specified by V, or
-/// UnknownSize if unknown.
+/// Returns the size of the object specified by V, or UnknownSize if unknown.
static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
@@ -108,8 +116,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
return MemoryLocation::UnknownSize;
}
-/// isObjectSmallerThan - Return true if we can prove that the object specified
-/// by V is smaller than Size.
+/// Returns true if we can prove that the object specified by V is smaller than
+/// Size.
static bool isObjectSmallerThan(const Value *V, uint64_t Size,
const DataLayout &DL,
const TargetLibraryInfo &TLI) {
@@ -144,15 +152,14 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
- uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true);
+ uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true);
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
}
-/// isObjectSize - Return true if we can prove that the object specified
-/// by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size,
- const DataLayout &DL, const TargetLibraryInfo &TLI) {
+/// Returns true if we can prove that the object specified by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+ const TargetLibraryInfo &TLI) {
uint64_t ObjectSize = getObjectSize(V, DL, TLI);
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
}
@@ -161,42 +168,20 @@ static bool isObjectSize(const Value *V, uint64_t Size,
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
-namespace {
- enum ExtensionKind {
- EK_NotExtended,
- EK_SignExt,
- EK_ZeroExt
- };
-
- struct VariableGEPIndex {
- const Value *V;
- ExtensionKind Extension;
- int64_t Scale;
-
- bool operator==(const VariableGEPIndex &Other) const {
- return V == Other.V && Extension == Other.Extension &&
- Scale == Other.Scale;
- }
-
- bool operator!=(const VariableGEPIndex &Other) const {
- return !operator==(Other);
- }
- };
-}
-
-
-/// GetLinearExpression - Analyze the specified value as a linear expression:
-/// "A*V + B", where A and B are constant integers. Return the scale and offset
-/// values as APInts and return V as a Value*, and return whether we looked
-/// through any sign or zero extends. The incoming Value is known to have
-/// IntegerType and it may already be sign or zero extended.
+/// Analyzes the specified value as a linear expression: "A*V + B", where A and
+/// B are constant integers.
+///
+/// Returns the scale and offset values as APInts and return V as a Value*, and
+/// return whether we looked through any sign or zero extends. The incoming
+/// Value is known to have IntegerType and it may already be sign or zero
+/// extended.
///
/// Note that this looks through extends, so the high bits may not be
/// represented in the result.
-static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
- ExtensionKind &Extension,
- const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, DominatorTree *DT) {
+/*static*/ const Value *BasicAAResult::GetLinearExpression(
+ const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits,
+ unsigned &SExtBits, const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) {
assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
@@ -206,54 +191,125 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
+ // if it's a constant, just convert it to an offset and remove the variable.
+ // If we've been called recursively the Offset bit width will be greater
+ // than the constant's (the Offset's always as wide as the outermost call),
+ // so we'll zext here and process any extension in the isa<SExtInst> &
+ // isa<ZExtInst> cases below.
+ Offset += Const->getValue().zextOrSelf(Offset.getBitWidth());
+ assert(Scale == 0 && "Constant values don't have a scale");
+ return V;
+ }
+
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+
+ // If we've been called recursively then Offset and Scale will be wider
+ // that the BOp operands. We'll always zext it here as we'll process sign
+ // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases).
+ APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth());
+
switch (BOp->getOpcode()) {
- default: break;
+ default:
+ // We don't understand this instruction, so we can't decompose it any
+ // further.
+ Scale = 1;
+ Offset = 0;
+ return V;
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
- BOp, DT))
- break;
- // FALL THROUGH.
+ BOp, DT)) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+ // FALL THROUGH.
case Instruction::Add:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth + 1, AC, DT);
- Offset += RHSC->getValue();
- return V;
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset += RHS;
+ break;
+ case Instruction::Sub:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset -= RHS;
+ break;
case Instruction::Mul:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth + 1, AC, DT);
- Offset *= RHSC->getValue();
- Scale *= RHSC->getValue();
- return V;
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset *= RHS;
+ Scale *= RHS;
+ break;
case Instruction::Shl:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth + 1, AC, DT);
- Offset <<= RHSC->getValue().getLimitedValue();
- Scale <<= RHSC->getValue().getLimitedValue();
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+ SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+ Offset <<= RHS.getLimitedValue();
+ Scale <<= RHS.getLimitedValue();
+ // the semantics of nsw and nuw for left shifts don't match those of
+ // multiplications, so we won't propagate them.
+ NSW = NUW = false;
return V;
}
+
+ if (isa<OverflowingBinaryOperator>(BOp)) {
+ NUW &= BOp->hasNoUnsignedWrap();
+ NSW &= BOp->hasNoSignedWrap();
+ }
+ return V;
}
}
// Since GEP indices are sign extended anyway, we don't care about the high
// bits of a sign or zero extended value - just scales and offsets. The
// extensions have to be consistent though.
- if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
- (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
Value *CastOp = cast<CastInst>(V)->getOperand(0);
- unsigned OldWidth = Scale.getBitWidth();
+ unsigned NewWidth = V->getType()->getPrimitiveSizeInBits();
unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
- Scale = Scale.trunc(SmallWidth);
- Offset = Offset.trunc(SmallWidth);
- Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
-
- Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
- Depth + 1, AC, DT);
- Scale = Scale.zext(OldWidth);
- Offset = Offset.zext(OldWidth);
+ unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits;
+ const Value *Result =
+ GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
+ Depth + 1, AC, DT, NSW, NUW);
+
+ // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+ // by just incrementing the number of bits we've extended by.
+ unsigned ExtendedBy = NewWidth - SmallWidth;
+
+ if (isa<SExtInst>(V) && ZExtBits == 0) {
+ // sext(sext(%x, a), b) == sext(%x, a + b)
+
+ if (NSW) {
+ // We haven't sign-wrapped, so it's valid to decompose sext(%x + c)
+ // into sext(%x) + sext(c). We'll sext the Offset ourselves:
+ unsigned OldWidth = Offset.getBitWidth();
+ Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth);
+ } else {
+ // We may have signed-wrapped, so don't decompose sext(%x + c) into
+ // sext(%x) + sext(c)
+ Scale = 1;
+ Offset = 0;
+ Result = CastOp;
+ ZExtBits = OldZExtBits;
+ SExtBits = OldSExtBits;
+ }
+ SExtBits += ExtendedBy;
+ } else {
+ // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b)
+
+ if (!NUW) {
+ // We may have unsigned-wrapped, so don't decompose zext(%x + c) into
+ // zext(%x) + zext(c)
+ Scale = 1;
+ Offset = 0;
+ Result = CastOp;
+ ZExtBits = OldZExtBits;
+ SExtBits = OldSExtBits;
+ }
+ ZExtBits += ExtendedBy;
+ }
return Result;
}
@@ -263,29 +319,27 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
-/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
-/// into a base pointer with a constant offset and a number of scaled symbolic
-/// offsets.
+/// If V is a symbolic pointer expression, decompose it into a base pointer
+/// with a constant offset and a number of scaled symbolic offsets.
///
-/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
-/// the VarIndices vector) are Value*'s that are known to be scaled by the
-/// specified amount, but which may have other unrepresented high bits. As such,
-/// the gep cannot necessarily be reconstructed from its decomposed form.
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale
+/// in the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As
+/// such, the gep cannot necessarily be reconstructed from its decomposed form.
///
/// When DataLayout is around, this function is capable of analyzing everything
/// that GetUnderlyingObject can look through. To be able to do that
/// GetUnderlyingObject and DecomposeGEPExpression must use the same search
-/// depth (MaxLookupSearchDepth).
-/// When DataLayout not is around, it just looks through pointer casts.
-///
-static const Value *
-DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
- SmallVectorImpl<VariableGEPIndex> &VarIndices,
- bool &MaxLookupReached, const DataLayout &DL,
- AssumptionCache *AC, DominatorTree *DT) {
+/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks
+/// through pointer casts.
+/*static*/ const Value *BasicAAResult::DecomposeGEPExpression(
+ const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached,
+ const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
MaxLookupReached = false;
+ SearchTimes++;
BaseOffs = 0;
do {
@@ -318,7 +372,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// updated when GetUnderlyingObject is updated). TLI should be
// provided also.
if (const Value *Simplified =
- SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
+ SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
V = Simplified;
continue;
}
@@ -333,43 +387,47 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
- for (User::const_op_iterator I = GEPOp->op_begin()+1,
- E = GEPOp->op_end(); I != E; ++I) {
- Value *Index = *I;
+ for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
+ I != E; ++I) {
+ const Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- if (FieldNo == 0) continue;
+ if (FieldNo == 0)
+ continue;
BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
// For an array/pointer, add the element offset, explicitly scaled.
- if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
- if (CIdx->isZero()) continue;
+ if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero())
+ continue;
BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
continue;
}
uint64_t Scale = DL.getTypeAllocSize(*GTI);
- ExtensionKind Extension = EK_NotExtended;
+ unsigned ZExtBits = 0, SExtBits = 0;
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
unsigned Width = Index->getType()->getIntegerBitWidth();
- if (DL.getPointerSizeInBits(AS) > Width)
- Extension = EK_SignExt;
+ unsigned PointerSize = DL.getPointerSizeInBits(AS);
+ if (PointerSize > Width)
+ SExtBits += PointerSize - Width;
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
- Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL,
- 0, AC, DT);
+ bool NSW = true, NUW = true;
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
+ SExtBits, DL, 0, AC, DT, NSW, NUW);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- BaseOffs += IndexOffset.getSExtValue()*Scale;
+ BaseOffs += IndexOffset.getSExtValue() * Scale;
Scale *= IndexScale.getSExtValue();
// If we already had an occurrence of this index variable, merge this
@@ -377,23 +435,23 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
- if (VarIndices[i].V == Index &&
- VarIndices[i].Extension == Extension) {
+ if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits &&
+ VarIndices[i].SExtBits == SExtBits) {
Scale += VarIndices[i].Scale;
- VarIndices.erase(VarIndices.begin()+i);
+ VarIndices.erase(VarIndices.begin() + i);
break;
}
}
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) {
+ if (unsigned ShiftBits = 64 - PointerSize) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
if (Scale) {
- VariableGEPIndex Entry = {Index, Extension,
+ VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
@@ -405,196 +463,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If the chain of expressions is too deep, just return early.
MaxLookupReached = true;
+ SearchLimitReached++;
return V;
}
-//===----------------------------------------------------------------------===//
-// BasicAliasAnalysis Pass
-//===----------------------------------------------------------------------===//
-
-#ifndef NDEBUG
-static const Function *getParent(const Value *V) {
- if (const Instruction *inst = dyn_cast<Instruction>(V))
- return inst->getParent()->getParent();
-
- if (const Argument *arg = dyn_cast<Argument>(V))
- return arg->getParent();
-
- return nullptr;
-}
-
-static bool notDifferentParent(const Value *O1, const Value *O2) {
-
- const Function *F1 = getParent(O1);
- const Function *F2 = getParent(O2);
-
- return !F1 || !F2 || F1 == F2;
-}
-#endif
-
-namespace {
- /// BasicAliasAnalysis - This is the primary alias analysis implementation.
- struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
- static char ID; // Class identification, replacement for typeinfo
- BasicAliasAnalysis() : ImmutablePass(ID) {
- initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- bool doInitialization(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- assert(AliasCache.empty() && "AliasCache must be cleared after use!");
- assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
- "BasicAliasAnalysis doesn't support interprocedural queries.");
- AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags,
- LocB.Ptr, LocB.Size, LocB.AATags);
- // AliasCache rarely has more than 1 or 2 elements, always use
- // shrink_and_clear so it quickly returns to the inline capacity of the
- // SmallDenseMap if it ever grows larger.
- // FIXME: This should really be shrink_to_inline_capacity_and_clear().
- AliasCache.shrink_and_clear();
- VisitedPhiBBs.clear();
- return Alias;
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
-
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
-
- /// pointsToConstantMemory - Chase pointers until we find a (constant
- /// global) or not.
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override;
-
- /// Get the location associated with a pointer argument of a callsite.
- ModRefResult getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) override;
-
- /// getModRefBehavior - Return the behavior when calling the given
- /// call site.
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
-
- /// getModRefBehavior - Return the behavior when calling the given function.
- /// For use when the call site is not known.
- ModRefBehavior getModRefBehavior(const Function *F) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- // AliasCache - Track alias queries to guard against recursion.
- typedef std::pair<MemoryLocation, MemoryLocation> LocPair;
- typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
- AliasCacheTy AliasCache;
-
- /// \brief Track phi nodes we have visited. When interpret "Value" pointer
- /// equality as value equality we need to make sure that the "Value" is not
- /// part of a cycle. Otherwise, two uses could come from different
- /// "iterations" of a cycle and see different values for the same "Value"
- /// pointer.
- /// The following example shows the problem:
- /// %p = phi(%alloca1, %addr2)
- /// %l = load %ptr
- /// %addr1 = gep, %alloca2, 0, %l
- /// %addr2 = gep %alloca2, 0, (%l + 1)
- /// alias(%p, %addr1) -> MayAlias !
- /// store %l, ...
- SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
-
- // Visited - Track instructions visited by pointsToConstantMemory.
- SmallPtrSet<const Value*, 16> Visited;
-
- /// \brief Check whether two Values can be considered equivalent.
- ///
- /// In addition to pointer equivalence of \p V1 and \p V2 this checks
- /// whether they can not be part of a cycle in the value graph by looking at
- /// all visited phi nodes an making sure that the phis cannot reach the
- /// value. We have to do this because we are looking through phi nodes (That
- /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
- bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
-
- /// \brief Dest and Src are the variable indices from two decomposed
- /// GetElementPtr instructions GEP1 and GEP2 which have common base
- /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
- /// difference between the two pointers.
- void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src);
-
- // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
- // instruction against another.
- AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
- const AAMDNodes &V1AAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2);
-
- // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
- // instruction against another.
- AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
- const AAMDNodes &PNAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo);
-
- /// aliasSelect - Disambiguate a Select instruction against another value.
- AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
- const AAMDNodes &SIAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo);
-
- AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
- AAMDNodes V1AATag,
- const Value *V2, uint64_t V2Size,
- AAMDNodes V2AATag);
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char BasicAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
- "Basic Alias Analysis (stateless AA impl)",
- false, true, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
- "Basic Alias Analysis (stateless AA impl)",
- false, true, false)
-
-
-ImmutablePass *llvm::createBasicAliasAnalysisPass() {
- return new BasicAliasAnalysis();
-}
-
-/// pointsToConstantMemory - Returns whether the given pointer value
-/// points to memory that is local to the function, with global constants being
-/// considered local to all functions.
-bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+/// Returns whether the given pointer value points to memory that is local to
+/// the function, with global constants being considered local to all
+/// functions.
+bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
assert(Visited.empty() && "Visited must be cleared after use!");
unsigned MaxLookup = 8;
SmallVector<const Value *, 16> Worklist;
Worklist.push_back(Loc.Ptr);
do {
- const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL);
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
if (!Visited.insert(V).second) {
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
// An alloca instruction defines local memory.
@@ -608,7 +495,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
// others. GV may even be a declaration, not a definition.
if (!GV->isConstant()) {
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
continue;
}
@@ -626,7 +513,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
// Don't bother inspecting phi nodes with many operands.
if (PN->getNumIncomingValues() > MaxLookup) {
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
for (Value *IncValue : PN->incoming_values())
Worklist.push_back(IncValue);
@@ -635,7 +522,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
// Otherwise be conservative.
Visited.clear();
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
} while (!Worklist.empty() && --MaxLookup);
@@ -660,62 +547,51 @@ static bool isMemsetPattern16(const Function *MS,
return false;
}
-/// getModRefBehavior - Return the behavior when calling the given call site.
-AliasAnalysis::ModRefBehavior
-BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+/// Returns the behavior when calling the given call site.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
if (CS.doesNotAccessMemory())
// Can't do better than this.
- return DoesNotAccessMemory;
+ return FMRB_DoesNotAccessMemory;
- ModRefBehavior Min = UnknownModRefBehavior;
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If the callsite knows it only reads memory, don't return worse
// than that.
if (CS.onlyReadsMemory())
- Min = OnlyReadsMemory;
+ Min = FMRB_OnlyReadsMemory;
if (CS.onlyAccessesArgMemory())
- Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- // The AliasAnalysis base class has some smarts, lets use them.
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ // The AAResultBase base class has some smarts, lets use them.
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
}
-/// getModRefBehavior - Return the behavior when calling the given function.
-/// For use when the call site is not known.
-AliasAnalysis::ModRefBehavior
-BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+/// Returns the behavior when calling the given function. For use when the call
+/// site is not known.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
// If the function declares it doesn't access memory, we can't do better.
if (F->doesNotAccessMemory())
- return DoesNotAccessMemory;
-
- // For intrinsics, we can check the table.
- if (Intrinsic::ID iid = F->getIntrinsicID()) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/IR/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
- }
+ return FMRB_DoesNotAccessMemory;
- ModRefBehavior Min = UnknownModRefBehavior;
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If the function declares it only reads memory, go with that.
if (F->onlyReadsMemory())
- Min = OnlyReadsMemory;
+ Min = FMRB_OnlyReadsMemory;
if (F->onlyAccessesArgMemory())
- Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (isMemsetPattern16(F, TLI))
- Min = OnlyAccessesArgumentPointees;
+ Min = FMRB_OnlyAccessesArgumentPointees;
// Otherwise be conservative.
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ unsigned ArgIdx) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
switch (II->getIntrinsicID()) {
default:
@@ -725,7 +601,7 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
case Intrinsic::memmove:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
- return ArgIdx ? Ref : Mod;
+ return ArgIdx ? MRI_Ref : MRI_Mod;
}
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -733,40 +609,82 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
if (CS.getCalledFunction() &&
- isMemsetPattern16(CS.getCalledFunction(), *TLI)) {
+ isMemsetPattern16(CS.getCalledFunction(), TLI)) {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
- return ArgIdx ? Ref : Mod;
+ return ArgIdx ? MRI_Ref : MRI_Mod;
}
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
- return AliasAnalysis::getArgModRefInfo(CS, ArgIdx);
+ if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
+ return MRI_Ref;
+
+ if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone))
+ return MRI_NoModRef;
+
+ return AAResultBase::getArgModRefInfo(CS, ArgIdx);
}
static bool isAssumeIntrinsic(ImmutableCallSite CS) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
- if (II && II->getIntrinsicID() == Intrinsic::assume)
- return true;
+ return II && II->getIntrinsicID() == Intrinsic::assume;
+}
- return false;
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+ if (const Instruction *inst = dyn_cast<Instruction>(V))
+ return inst->getParent()->getParent();
+
+ if (const Argument *arg = dyn_cast<Argument>(V))
+ return arg->getParent();
+
+ return nullptr;
}
-bool BasicAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+ const Function *F1 = getParent(O1);
+ const Function *F2 = getParent(O2);
+
+ return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+ "BasicAliasAnalysis doesn't support interprocedural queries.");
+
+ // If we have a directly cached entry for these locations, we have recursed
+ // through this once, so just return the cached results. Notably, when this
+ // happens, we don't clear the cache.
+ auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
+ if (CacheIt != AliasCache.end())
+ return CacheIt->second;
+
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
+ LocB.Size, LocB.AATags);
+ // AliasCache rarely has more than 1 or 2 elements, always use
+ // shrink_and_clear so it quickly returns to the inline capacity of the
+ // SmallDenseMap if it ever grows larger.
+ // FIXME: This should really be shrink_to_inline_capacity_and_clear().
+ AliasCache.shrink_and_clear();
+ VisitedPhiBBs.clear();
+ return Alias;
}
-/// getModRefInfo - Check to see if the specified callsite can clobber the
-/// specified memory object. Since we only look at local properties of this
-/// function, we really can't say much about this query. We do, however, use
-/// simple "address taken" analysis on local objects.
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
+/// Checks to see if the specified callsite can clobber the specified memory
+/// object.
+///
+/// Since we only look at local properties of this function, we really can't
+/// say much about this query. We do, however, use simple "address taken"
+/// analysis on local objects.
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
- const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL);
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
@@ -776,7 +694,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (isa<AllocaInst>(Object))
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
- return NoModRef;
+ return MRI_NoModRef;
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
@@ -798,41 +716,42 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) {
+ AliasResult AR =
+ getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+ if (AR) {
PassedAsArg = true;
break;
}
}
if (!PassedAsArg)
- return NoModRef;
+ return MRI_NoModRef;
}
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
if (isAssumeIntrinsic(CS))
- return NoModRef;
+ return MRI_NoModRef;
- // The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ // The AAResultBase base class has some smarts, lets use them.
+ return AAResultBase::getModRefInfo(CS, Loc);
}
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
- return NoModRef;
+ return MRI_NoModRef;
- // The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ // The AAResultBase base class has some smarts, lets use them.
+ return AAResultBase::getModRefInfo(CS1, CS2);
}
-/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
-/// operators, both having the exact same pointer operand.
+/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
+/// both having the exact same pointer operand.
static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
uint64_t V1Size,
const GEPOperator *GEP2,
@@ -860,10 +779,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
ConstantInt *C2 =
dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
- // If the last (struct) indices aren't constants, we can't say anything.
- // If they're identical, the other indices might be also be dynamically
- // equal, so the GEPs can alias.
- if (!C1 || !C2 || C1 == C2)
+ // If the last (struct) indices are constants and are equal, the other indices
+ // might be also be dynamically equal, so the GEPs can alias.
+ if (C1 && C2 && C1 == C2)
return MayAlias;
// Find the last-indexed type of the GEP, i.e., the type you'd get if
@@ -886,12 +804,49 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
IntermediateIndices.push_back(GEP1->getOperand(i + 1));
}
- StructType *LastIndexedStruct =
- dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
- GEP1->getSourceElementType(), IntermediateIndices));
+ auto *Ty = GetElementPtrInst::getIndexedType(
+ GEP1->getSourceElementType(), IntermediateIndices);
+ StructType *LastIndexedStruct = dyn_cast<StructType>(Ty);
+
+ if (isa<SequentialType>(Ty)) {
+ // We know that:
+ // - both GEPs begin indexing from the exact same pointer;
+ // - the last indices in both GEPs are constants, indexing into a sequential
+ // type (array or pointer);
+ // - both GEPs only index through arrays prior to that.
+ //
+ // Because array indices greater than the number of elements are valid in
+ // GEPs, unless we know the intermediate indices are identical between
+ // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't
+ // partially overlap. We also need to check that the loaded size matches
+ // the element size, otherwise we could still have overlap.
+ const uint64_t ElementSize =
+ DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType());
+ if (V1Size != ElementSize || V2Size != ElementSize)
+ return MayAlias;
+
+ for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i)
+ if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1))
+ return MayAlias;
- if (!LastIndexedStruct)
+ // Now we know that the array/pointer that GEP1 indexes into and that
+ // that GEP2 indexes into must either precisely overlap or be disjoint.
+ // Because they cannot partially overlap and because fields in an array
+ // cannot overlap, if we can prove the final indices are different between
+ // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
+
+ // If the last indices are constants, we've already checked they don't
+ // equal each other so we can exit early.
+ if (C1 && C2)
+ return NoAlias;
+ if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1),
+ GEP2->getOperand(GEP2->getNumOperands() - 1),
+ DL))
+ return NoAlias;
+ return MayAlias;
+ } else if (!LastIndexedStruct || !C1 || !C2) {
return MayAlias;
+ }
// We know that:
// - both GEPs begin indexing from the exact same pointer;
@@ -925,39 +880,21 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
return MayAlias;
}
-/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
-/// against another pointer. We know that V1 is a GEP, but we don't know
-/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
-/// UnderlyingV2 is the same for V2.
+/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
+/// another pointer.
///
-AliasResult BasicAliasAnalysis::aliasGEP(
- const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo,
- const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2) {
+/// We know that V1 is a GEP, but we don't know anything about V2.
+/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
+/// V2.
+AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+ const AAMDNodes &V1AAInfo, const Value *V2,
+ uint64_t V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderlyingV1,
+ const Value *UnderlyingV2) {
int64_t GEP1BaseOffset;
bool GEP1MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
- // We have to get two AssumptionCaches here because GEP1 and V2 may be from
- // different functions.
- // FIXME: This really doesn't make any sense. We get a dominator tree below
- // that can only refer to a single function. But this function (aliasGEP) is
- // a method on an immutable pass that can be called when there *isn't*
- // a single function. The old pass management layer makes this "work", but
- // this isn't really a clean solution.
- AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>();
- AssumptionCache *AC1 = nullptr, *AC2 = nullptr;
- if (auto *GEP1I = dyn_cast<Instruction>(GEP1))
- AC1 = &ACT.getAssumptionCache(
- const_cast<Function &>(*GEP1I->getParent()->getParent()));
- if (auto *I2 = dyn_cast<Instruction>(V2))
- AC2 = &ACT.getAssumptionCache(
- const_cast<Function &>(*I2->getParent()->getParent()));
-
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
// If we have two gep instructions with must-alias or not-alias'ing base
// pointers, figure out if the indexes to the GEP tell us anything about the
// derived pointer.
@@ -971,9 +908,8 @@ AliasResult BasicAliasAnalysis::aliasGEP(
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
- AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
- V1AAInfo, UnderlyingV2,
- V2Size, V2AAInfo);
+ AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
+ UnderlyingV2, V2Size, V2AAInfo);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
@@ -982,15 +918,15 @@ AliasResult BasicAliasAnalysis::aliasGEP(
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, *DL, AC2, DT);
+ GEP2MaxLookupReached, DL, &AC, DT);
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, *DL, AC1, DT);
+ GEP1MaxLookupReached, DL, &AC, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
+ // FIXME: They always have a DataLayout so this should become an
+ // assert.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
- assert(!DL &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If the max search depth is reached the result is undefined
@@ -1007,35 +943,35 @@ AliasResult BasicAliasAnalysis::aliasGEP(
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
- if (BaseAlias != MustAlias) return BaseAlias;
+ if (BaseAlias != MustAlias)
+ return BaseAlias;
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, *DL, AC1, DT);
+ GEP1MaxLookupReached, DL, &AC, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, *DL, AC2, DT);
+ GEP2MaxLookupReached, DL, &AC, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
+ // FIXME: They always have a DataLayout so this should become an assert.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
- assert(!DL &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If we know the two GEPs are based off of the exact same pointer (and not
// just the same underlying object), see if that tells us anything about
// the resulting pointers.
- if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
- AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL);
+ if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+ AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
// If we couldn't find anything interesting, don't abandon just yet.
if (R != MayAlias)
return R;
@@ -1072,13 +1008,12 @@ AliasResult BasicAliasAnalysis::aliasGEP(
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, *DL, AC1, DT);
+ GEP1MaxLookupReached, DL, &AC, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
+ // FIXME: They always have a DataLayout so this should become an assert.
if (GEP1BasePtr != UnderlyingV1) {
- assert(!DL &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If the max search depth is reached the result is undefined
@@ -1124,12 +1059,42 @@ AliasResult BasicAliasAnalysis::aliasGEP(
}
}
- // Try to distinguish something like &A[i][1] against &A[42][0].
- // Grab the least significant bit set in any of the scales.
if (!GEP1VariableIndices.empty()) {
uint64_t Modulo = 0;
- for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
- Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
+ bool AllPositive = true;
+ for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+
+ // Try to distinguish something like &A[i][1] against &A[42][0].
+ // Grab the least significant bit set in any of the scales. We
+ // don't need std::abs here (even if the scale's negative) as we'll
+ // be ^'ing Modulo with itself later.
+ Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+
+ if (AllPositive) {
+ // If the Value could change between cycles, then any reasoning about
+ // the Value this cycle may not hold in the next cycle. We'll just
+ // give up if we can't determine conditions that hold for every cycle:
+ const Value *V = GEP1VariableIndices[i].V;
+
+ bool SignKnownZero, SignKnownOne;
+ ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+ 0, &AC, nullptr, DT);
+
+ // Zero-extension widens the variable, and so forces the sign
+ // bit to zero.
+ bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
+ SignKnownZero |= IsZExt;
+ SignKnownOne &= !IsZExt;
+
+ // If the variable begins with a zero then we know it's
+ // positive, regardless of whether the value is signed or
+ // unsigned.
+ int64_t Scale = GEP1VariableIndices[i].Scale;
+ AllPositive =
+ (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+ }
+ }
+
Modulo = Modulo ^ (Modulo & (Modulo - 1));
// We can compute the difference between the two addresses
@@ -1140,6 +1105,16 @@ AliasResult BasicAliasAnalysis::aliasGEP(
V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
V1Size <= Modulo - ModOffset)
return NoAlias;
+
+ // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
+ // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
+ // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
+ if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+ return NoAlias;
+
+ if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size,
+ GEP1BaseOffset, &AC, DT))
+ return NoAlias;
}
// Statically, we can see that the base objects are the same, but the
@@ -1164,46 +1139,44 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
return MayAlias;
}
-/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
-/// instruction against another.
-AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI,
- uint64_t SISize,
- const AAMDNodes &SIAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo) {
+/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
+/// against another.
+AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const AAMDNodes &SIAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const AAMDNodes &V2AAInfo) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
- AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
- SI2->getTrueValue(), V2Size, V2AAInfo);
+ AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
+ SI2->getTrueValue(), V2Size, V2AAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
- SI2->getFalseValue(), V2Size, V2AAInfo);
+ aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
+ SI2->getFalseValue(), V2Size, V2AAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
-// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
-// against another.
-AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
- const AAMDNodes &PNAAInfo,
- const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo) {
+/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against
+/// another.
+AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const AAMDNodes &PNAAInfo, const Value *V2,
+ uint64_t V2Size,
+ const AAMDNodes &V2AAInfo) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1232,9 +1205,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
- aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
- PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2AAInfo);
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+ V2Size, V2AAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1247,8 +1220,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
return Alias;
}
- SmallPtrSet<Value*, 4> UniqueSrc;
- SmallVector<Value*, 4> V1Srcs;
+ SmallPtrSet<Value *, 4> UniqueSrc;
+ SmallVector<Value *, 4> V1Srcs;
+ bool isRecursive = false;
for (Value *PV1 : PN->incoming_values()) {
if (isa<PHINode>(PV1))
// If any of the source itself is a PHI, return MayAlias conservatively
@@ -1256,12 +1230,33 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
// sides are PHI nodes. In which case, this is O(m x n) time where 'm'
// and 'n' are the number of PHI sources.
return MayAlias;
+
+ if (EnableRecPhiAnalysis)
+ if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+ // Check whether the incoming value is a GEP that advances the pointer
+ // result of this PHI node (e.g. in a loop). If this is the case, we
+ // would recurse and always get a MayAlias. Handle this case specially
+ // below.
+ if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+ isa<ConstantInt>(PV1GEP->idx_begin())) {
+ isRecursive = true;
+ continue;
+ }
+ }
+
if (UniqueSrc.insert(PV1).second)
V1Srcs.push_back(PV1);
}
- AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo,
- V1Srcs[0], PNSize, PNAAInfo);
+ // If this PHI node is recursive, set the size of the accessed memory to
+ // unknown to represent all the possible values the GEP could advance the
+ // pointer to.
+ if (isRecursive)
+ PNSize = MemoryLocation::UnknownSize;
+
+ AliasResult Alias =
+ aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
if (Alias == MayAlias)
@@ -1272,8 +1267,8 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo,
- V, PNSize, PNAAInfo);
+ AliasResult ThisAlias =
+ aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1282,13 +1277,11 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
return Alias;
}
-// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
-// such as array references.
-//
-AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
- AAMDNodes V1AAInfo, const Value *V2,
- uint64_t V2Size,
- AAMDNodes V2AAInfo) {
+/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as
+/// array references.
+AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
+ AAMDNodes V1AAInfo, const Value *V2,
+ uint64_t V2Size, AAMDNodes V2AAInfo) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
@@ -1313,11 +1306,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
return MustAlias;
if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
- return NoAlias; // Scalars cannot alias each other
+ return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth);
- const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth);
+ const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+ const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1366,12 +1359,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
- if (DL)
- if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O2, V1Size, *DL, *TLI)) ||
- (V2Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O1, V2Size, *DL, *TLI)))
- return NoAlias;
+ if ((V1Size != MemoryLocation::UnknownSize &&
+ isObjectSmallerThan(O2, V1Size, DL, TLI)) ||
+ (V2Size != MemoryLocation::UnknownSize &&
+ isObjectSmallerThan(O1, V2Size, DL, TLI)))
+ return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
@@ -1380,7 +1372,7 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if (V1 > V2)
std::swap(Locs.first, Locs.second);
std::pair<AliasCacheTy::iterator, bool> Pair =
- AliasCache.insert(std::make_pair(Locs, MayAlias));
+ AliasCache.insert(std::make_pair(Locs, MayAlias));
if (!Pair.second)
return Pair.first->second;
@@ -1393,8 +1385,10 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
- AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
- if (Result != MayAlias) return AliasCache[Locs] = Result;
+ AliasResult Result =
+ aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
+ if (Result != MayAlias)
+ return AliasCache[Locs] = Result;
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1403,9 +1397,9 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
- V2, V2Size, V2AAInfo);
- if (Result != MayAlias) return AliasCache[Locs] = Result;
+ AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+ if (Result != MayAlias)
+ return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1414,29 +1408,38 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
- AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo,
- V2, V2Size, V2AAInfo);
- if (Result != MayAlias) return AliasCache[Locs] = Result;
+ AliasResult Result =
+ aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+ if (Result != MayAlias)
+ return AliasCache[Locs] = Result;
}
// If both pointers are pointing into the same object and one of them
// accesses is accessing the entire object, then the accesses must
// overlap in some way.
- if (DL && O1 == O2)
+ if (O1 == O2)
if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSize(O1, V1Size, *DL, *TLI)) ||
+ isObjectSize(O1, V1Size, DL, TLI)) ||
(V2Size != MemoryLocation::UnknownSize &&
- isObjectSize(O2, V2Size, *DL, *TLI)))
+ isObjectSize(O2, V2Size, DL, TLI)))
return AliasCache[Locs] = PartialAlias;
- AliasResult Result =
- AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
+ // Recurse back into the best AA results we have, potentially with refined
+ // memory locations. We have already ensured that BasicAA has a MayAlias
+ // cache result for these, so any recursion back into BasicAA won't loop.
+ AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
return AliasCache[Locs] = Result;
}
-bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
- const Value *V2) {
+/// Check whether two Values can be considered equivalent.
+///
+/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether
+/// they can not be part of a cycle in the value graph by looking at all
+/// visited phi nodes an making sure that the phis cannot reach the value. We
+/// have to do this because we are looking through phi nodes (That is we say
+/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
+ const Value *V2) {
if (V != V2)
return false;
@@ -1450,28 +1453,21 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
return false;
- // Use dominance or loop info if available.
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
-
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
for (auto *P : VisitedPhiBBs)
- if (isPotentiallyReachable(P->begin(), Inst, DT, LI))
+ if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
return false;
return true;
}
-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-void BasicAliasAnalysis::GetIndexDifference(
+/// Computes the symbolic difference between two de-composed GEPs.
+///
+/// Dest and Src are the variable indices from two decomposed GetElementPtr
+/// instructions GEP1 and GEP2 which have common base pointers.
+void BasicAAResult::GetIndexDifference(
SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty())
@@ -1479,14 +1475,14 @@ void BasicAliasAnalysis::GetIndexDifference(
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
const Value *V = Src[i].V;
- ExtensionKind Extension = Src[i].Extension;
+ unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
int64_t Scale = Src[i].Scale;
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
- Dest[j].Extension != Extension)
+ Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)
continue;
// If we found it, subtract off Scale V's from the entry in Dest. If it
@@ -1501,8 +1497,120 @@ void BasicAliasAnalysis::GetIndexDifference(
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
- VariableGEPIndex Entry = { V, Extension, -Scale };
+ VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
Dest.push_back(Entry);
}
}
}
+
+bool BasicAAResult::constantOffsetHeuristic(
+ const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size,
+ uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
+ V2Size == MemoryLocation::UnknownSize)
+ return false;
+
+ const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
+
+ if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
+ Var0.Scale != -Var1.Scale)
+ return false;
+
+ unsigned Width = Var1.V->getType()->getIntegerBitWidth();
+
+ // We'll strip off the Extensions of Var0 and Var1 and do another round
+ // of GetLinearExpression decomposition. In the example above, if Var0
+ // is zext(%x + 1) we should get V1 == %x and V1Offset == 1.
+
+ APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0),
+ V1Offset(Width, 0);
+ bool NSW = true, NUW = true;
+ unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0;
+ const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits,
+ V0SExtBits, DL, 0, AC, DT, NSW, NUW);
+ NSW = true, NUW = true;
+ const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits,
+ V1SExtBits, DL, 0, AC, DT, NSW, NUW);
+
+ if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits ||
+ V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1))
+ return false;
+
+ // We have a hit - Var0 and Var1 only differ by a constant offset!
+
+ // If we've been sext'ed then zext'd the maximum difference between Var0 and
+ // Var1 is possible to calculate, but we're just interested in the absolute
+ // minimum difference between the two. The minimum distance may occur due to
+ // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so
+ // the minimum distance between %i and %i + 5 is 3.
+ APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
+ MinDiff = APIntOps::umin(MinDiff, Wrapped);
+ uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+
+ // We can't definitely say whether GEP1 is before or after V2 due to wrapping
+ // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
+ // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
+ // V2Size can fit in the MinDiffBytes gap.
+ return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
+ V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+char BasicAA::PassID;
+
+BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return BasicAAResult(F.getParent()->getDataLayout(),
+ AM->getResult<TargetLibraryAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F),
+ AM->getCachedResult<DominatorTreeAnalysis>(F),
+ AM->getCachedResult<LoopAnalysis>(F));
+}
+
+BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char BasicAAWrapperPass::ID = 0;
+void BasicAAWrapperPass::anchor() {}
+
+INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)", true, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
+ "Basic Alias Analysis (stateless AA impl)", true, true)
+
+FunctionPass *llvm::createBasicAAWrapperPass() {
+ return new BasicAAWrapperPass();
+}
+
+bool BasicAAWrapperPass::runOnFunction(Function &F) {
+ auto &ACT = getAnalysis<AssumptionCacheTracker>();
+ auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+
+ Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(),
+ ACT.getAssumptionCache(F),
+ DTWP ? &DTWP->getDomTree() : nullptr,
+ LIWP ? &LIWP->getLoopInfo() : nullptr));
+
+ return false;
+}
+
+void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
+ return BasicAAResult(
+ F.getParent()->getDataLayout(),
+ P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 3d819eb..90b7a33 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -55,7 +55,7 @@ struct GraphTraits<BlockFrequencyInfo *> {
typedef Function::const_iterator nodes_iterator;
static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
- return G->getFunction()->begin();
+ return &G->getFunction()->front();
}
static ChildIteratorType child_begin(const NodeType *N) {
return succ_begin(N);
@@ -105,51 +105,36 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
} // end namespace llvm
#endif
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
- "Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
- "Block Frequency Analysis", true, true)
-
-char BlockFrequencyInfo::ID = 0;
-
+BlockFrequencyInfo::BlockFrequencyInfo() {}
-BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
- initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
-}
-
-BlockFrequencyInfo::~BlockFrequencyInfo() {}
-
-void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<BranchProbabilityInfo>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.setPreservesAll();
+BlockFrequencyInfo::BlockFrequencyInfo(const Function &F,
+ const BranchProbabilityInfo &BPI,
+ const LoopInfo &LI) {
+ calculate(F, BPI, LI);
}
-bool BlockFrequencyInfo::runOnFunction(Function &F) {
- BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+void BlockFrequencyInfo::calculate(const Function &F,
+ const BranchProbabilityInfo &BPI,
+ const LoopInfo &LI) {
if (!BFI)
BFI.reset(new ImplType);
- BFI->doFunction(&F, &BPI, &LI);
+ BFI->calculate(F, BPI, LI);
#ifndef NDEBUG
if (ViewBlockFreqPropagationDAG != GVDT_None)
view();
#endif
- return false;
-}
-
-void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
-
-void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
- if (BFI) BFI->print(O);
}
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
return BFI ? BFI->getBlockFreq(BB) : 0;
}
+void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB,
+ uint64_t Freq) {
+ assert(BFI && "Expected analysis to be available");
+ BFI->setBlockFreq(BB, Freq);
+}
+
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
void BlockFrequencyInfo::view() const {
@@ -180,3 +165,49 @@ BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
uint64_t BlockFrequencyInfo::getEntryFreq() const {
return BFI ? BFI->getEntryFreq() : 0;
}
+
+void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
+
+void BlockFrequencyInfo::print(raw_ostream &OS) const {
+ if (BFI)
+ BFI->print(OS);
+}
+
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq",
+ "Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq",
+ "Block Frequency Analysis", true, true)
+
+char BlockFrequencyInfoWrapperPass::ID = 0;
+
+
+BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass()
+ : FunctionPass(ID) {
+ initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {}
+
+void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS,
+ const Module *) const {
+ BFI.print(OS);
+}
+
+void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); }
+
+bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
+ BranchProbabilityInfo &BPI =
+ getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BFI.calculate(F, BPI, LI);
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 6ceda06..48e23af 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
return Freqs[Node.Index].Scaled;
}
+void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
+ uint64_t Freq) {
+ assert(Node.isValid() && "Expected valid node");
+ assert(Node.Index < Freqs.size() && "Expected legal index");
+ Freqs[Node.Index].Integer = Freq;
+}
+
std::string
BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
return std::string();
@@ -743,7 +750,10 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
DEBUG(dbgs() << " - Add back edge mass for node "
<< getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
- Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+ if (BackedgeMass.getMass() > 0)
+ Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+ else
+ DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
}
DitheringDistributer D(Dist, LoopMass);
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 430b412..cf0cc8d 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -27,13 +27,13 @@ using namespace llvm;
#define DEBUG_TYPE "branch-prob"
-INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
-char BranchProbabilityInfo::ID = 0;
+char BranchProbabilityInfoWrapperPass::ID = 0;
// Weights are for internal use only. They are used by heuristics to help to
// estimate edges' probability. Example:
@@ -108,13 +108,6 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
-// Standard weight value. Used when none of the heuristics set weight for
-// the edge.
-static const uint32_t NORMAL_WEIGHT = 16;
-
-// Minimum weight of an edge. Please note, that weight is NEVER 0.
-static const uint32_t MIN_WEIGHT = 1;
-
/// \brief Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
@@ -147,22 +140,34 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
return false;
- uint32_t UnreachableWeight =
- std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(),
- E = UnreachableEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, UnreachableWeight);
+ // If the terminator is an InvokeInst, check only the normal destination block
+ // as the unwind edge of InvokeInst is also very unlikely taken.
+ if (auto *II = dyn_cast<InvokeInst>(TI))
+ if (PostDominatedByUnreachable.count(II->getNormalDest())) {
+ PostDominatedByUnreachable.insert(BB);
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ return false;
+ }
- if (ReachableEdges.empty())
+ if (ReachableEdges.empty()) {
+ BranchProbability Prob(1, UnreachableEdges.size());
+ for (unsigned SuccIdx : UnreachableEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
return true;
- uint32_t ReachableWeight =
- std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(),
- NORMAL_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(),
- E = ReachableEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, ReachableWeight);
+ }
+
+ BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+ UnreachableEdges.size());
+ BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+ ReachableEdges.size());
+
+ for (unsigned SuccIdx : UnreachableEdges)
+ setEdgeProbability(BB, SuccIdx, UnreachableProb);
+ for (unsigned SuccIdx : ReachableEdges)
+ setEdgeProbability(BB, SuccIdx, ReachableProb);
return true;
}
@@ -213,10 +218,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
WeightSum = 0;
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- uint32_t W = Weights[i] / ScalingFactor;
- WeightSum += W;
- setEdgeWeight(BB, i, W);
+ Weights[i] /= ScalingFactor;
+ WeightSum += Weights[i];
}
+
+ if (WeightSum == 0) {
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ setEdgeProbability(BB, i, {1, e});
+ } else {
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+ }
+
assert(WeightSum <= UINT32_MAX &&
"Expected weights to scale down to 32 bits");
@@ -265,21 +278,24 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
return false;
- uint32_t ColdWeight =
- std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(),
- E = ColdEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, ColdWeight);
-
- if (NormalEdges.empty())
+ if (NormalEdges.empty()) {
+ BranchProbability Prob(1, ColdEdges.size());
+ for (unsigned SuccIdx : ColdEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
return true;
- uint32_t NormalWeight = std::max(
- CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
- for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(),
- E = NormalEdges.end();
- I != E; ++I)
- setEdgeWeight(BB, *I, NormalWeight);
+ }
+
+ BranchProbability ColdProb(CC_TAKEN_WEIGHT,
+ (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+ ColdEdges.size());
+ BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
+ (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+ NormalEdges.size());
+
+ for (unsigned SuccIdx : ColdEdges)
+ setEdgeProbability(BB, SuccIdx, ColdProb);
+ for (unsigned SuccIdx : NormalEdges)
+ setEdgeProbability(BB, SuccIdx, NormalProb);
return true;
}
@@ -312,15 +328,18 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT);
- setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT);
+ BranchProbability TakenProb(PH_TAKEN_WEIGHT,
+ PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, TakenIdx, TakenProb);
+ setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
-bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
- Loop *L = LI->getLoopFor(BB);
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB,
+ const LoopInfo &LI) {
+ Loop *L = LI.getLoopFor(BB);
if (!L)
return false;
@@ -340,37 +359,35 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
if (BackEdges.empty() && ExitingEdges.empty())
return false;
- if (uint32_t numBackEdges = BackEdges.size()) {
- uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
- if (backWeight < NORMAL_WEIGHT)
- backWeight = NORMAL_WEIGHT;
+ // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
+ // normalize them so that they sum up to one.
+ SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero());
+ unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+ (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+ (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
+ if (!BackEdges.empty())
+ Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+ if (!InEdges.empty())
+ Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+ if (!ExitingEdges.empty())
+ Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);
- for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(),
- EE = BackEdges.end(); EI != EE; ++EI) {
- setEdgeWeight(BB, *EI, backWeight);
- }
+ if (uint32_t numBackEdges = BackEdges.size()) {
+ auto Prob = Probs[0] / numBackEdges;
+ for (unsigned SuccIdx : BackEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numInEdges = InEdges.size()) {
- uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
- if (inWeight < NORMAL_WEIGHT)
- inWeight = NORMAL_WEIGHT;
-
- for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(),
- EE = InEdges.end(); EI != EE; ++EI) {
- setEdgeWeight(BB, *EI, inWeight);
- }
+ auto Prob = Probs[1] / numInEdges;
+ for (unsigned SuccIdx : InEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
}
if (uint32_t numExitingEdges = ExitingEdges.size()) {
- uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
- if (exitWeight < MIN_WEIGHT)
- exitWeight = MIN_WEIGHT;
-
- for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(),
- EE = ExitingEdges.end(); EI != EE; ++EI) {
- setEdgeWeight(BB, *EI, exitWeight);
- }
+ auto Prob = Probs[2] / numExitingEdges;
+ for (unsigned SuccIdx : ExitingEdges)
+ setEdgeProbability(BB, SuccIdx, Prob);
}
return true;
@@ -452,9 +469,10 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT);
- setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT);
-
+ BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
+ ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, TakenIdx, TakenProb);
+ setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
@@ -488,9 +506,10 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT);
- setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT);
-
+ BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
+ FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, TakenIdx, TakenProb);
+ setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
}
@@ -499,82 +518,30 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
if (!II)
return false;
- setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT);
- setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT);
+ BranchProbability TakenProb(IH_TAKEN_WEIGHT,
+ IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
+ setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb);
+ setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl());
return true;
}
-void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.setPreservesAll();
-}
-
-bool BranchProbabilityInfo::runOnFunction(Function &F) {
- DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
- << " ----\n\n");
- LastF = &F; // Store the last function we ran on for printing.
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- assert(PostDominatedByUnreachable.empty());
- assert(PostDominatedByColdCall.empty());
-
- // Walk the basic blocks in post-order so that we can build up state about
- // the successors of a block iteratively.
- for (auto BB : post_order(&F.getEntryBlock())) {
- DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
- if (calcUnreachableHeuristics(BB))
- continue;
- if (calcMetadataWeights(BB))
- continue;
- if (calcColdCallHeuristics(BB))
- continue;
- if (calcLoopBranchHeuristics(BB))
- continue;
- if (calcPointerHeuristics(BB))
- continue;
- if (calcZeroHeuristics(BB))
- continue;
- if (calcFloatingPointHeuristics(BB))
- continue;
- calcInvokeHeuristics(BB);
- }
-
- PostDominatedByUnreachable.clear();
- PostDominatedByColdCall.clear();
- return false;
-}
-
void BranchProbabilityInfo::releaseMemory() {
- Weights.clear();
+ Probs.clear();
}
-void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
+void BranchProbabilityInfo::print(raw_ostream &OS) const {
OS << "---- Branch Probabilities ----\n";
// We print the probabilities from the last function the analysis ran over,
// or the function it is currently running over.
assert(LastF && "Cannot print prior to running over a function");
- for (Function::const_iterator BI = LastF->begin(), BE = LastF->end();
- BI != BE; ++BI) {
- for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI);
- SI != SE; ++SI) {
- printEdgeProbability(OS << " ", BI, *SI);
+ for (const auto &BI : *LastF) {
+ for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE;
+ ++SI) {
+ printEdgeProbability(OS << " ", &BI, *SI);
}
}
}
-uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
- uint32_t Sum = 0;
-
- for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex());
- uint32_t PrevSum = Sum;
-
- Sum += Weight;
- assert(Sum >= PrevSum); (void) PrevSum;
- }
-
- return Sum;
-}
-
bool BranchProbabilityInfo::
isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
@@ -583,97 +550,74 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
}
BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
- uint32_t Sum = 0;
- uint32_t MaxWeight = 0;
+ auto MaxProb = BranchProbability::getZero();
BasicBlock *MaxSucc = nullptr;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
- uint32_t Weight = getEdgeWeight(BB, Succ);
- uint32_t PrevSum = Sum;
-
- Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
-
- if (Weight > MaxWeight) {
- MaxWeight = Weight;
+ auto Prob = getEdgeProbability(BB, Succ);
+ if (Prob > MaxProb) {
+ MaxProb = Prob;
MaxSucc = Succ;
}
}
// Hot probability is at least 4/5 = 80%
- if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
+ if (MaxProb > BranchProbability(4, 5))
return MaxSucc;
return nullptr;
}
-/// Get the raw edge weight for the edge. If can't find it, return
-/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index
-/// to the successors.
-uint32_t BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
- DenseMap<Edge, uint32_t>::const_iterator I =
- Weights.find(std::make_pair(Src, IndexInSuccessors));
+/// Get the raw edge probability for the edge. If can't find it, return a
+/// default probability 1/N where N is the number of successors. Here an edge is
+/// specified using PredBlock and an
+/// index to the successors.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+ unsigned IndexInSuccessors) const {
+ auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
- if (I != Weights.end())
+ if (I != Probs.end())
return I->second;
- return DEFAULT_WEIGHT;
+ return {1,
+ static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};
}
-uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
- succ_const_iterator Dst) const {
- return getEdgeWeight(Src, Dst.getSuccessorIndex());
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+ succ_const_iterator Dst) const {
+ return getEdgeProbability(Src, Dst.getSuccessorIndex());
}
-/// Get the raw edge weight calculated for the block pair. This returns the sum
-/// of all raw edge weights from Src to Dst.
-uint32_t BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
- uint32_t Weight = 0;
- bool FoundWeight = false;
- DenseMap<Edge, uint32_t>::const_iterator MapI;
+/// Get the raw edge probability calculated for the block pair. This returns the
+/// sum of all raw edge probabilities from Src to Dst.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+ const BasicBlock *Dst) const {
+ auto Prob = BranchProbability::getZero();
+ bool FoundProb = false;
for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
if (*I == Dst) {
- MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex()));
- if (MapI != Weights.end()) {
- FoundWeight = true;
- Weight += MapI->second;
+ auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
+ if (MapI != Probs.end()) {
+ FoundProb = true;
+ Prob += MapI->second;
}
}
- return (!FoundWeight) ? DEFAULT_WEIGHT : Weight;
+ uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
+ return FoundProb ? Prob : BranchProbability(1, succ_num);
}
-/// Set the edge weight for a given edge specified by PredBlock and an index
-/// to the successors.
-void BranchProbabilityInfo::
-setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors,
- uint32_t Weight) {
- Weights[std::make_pair(Src, IndexInSuccessors)] = Weight;
- DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
- << IndexInSuccessors << " successor weight to "
- << Weight << "\n");
-}
-
-/// Get an edge's probability, relative to other out-edges from Src.
-BranchProbability BranchProbabilityInfo::
-getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const {
- uint32_t N = getEdgeWeight(Src, IndexInSuccessors);
- uint32_t D = getSumForBlock(Src);
-
- return BranchProbability(N, D);
-}
-
-/// Get the probability of going from Src to Dst. It returns the sum of all
-/// probabilities for edges from Src to Dst.
-BranchProbability BranchProbabilityInfo::
-getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
-
- uint32_t N = getEdgeWeight(Src, Dst);
- uint32_t D = getSumForBlock(Src);
-
- return BranchProbability(N, D);
+/// Set the edge probability for a given edge specified by PredBlock and an
+/// index to the successors.
+void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
+ unsigned IndexInSuccessors,
+ BranchProbability Prob) {
+ Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
+ DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors
+ << " successor probability to " << Prob << "\n");
}
raw_ostream &
@@ -688,3 +632,54 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
return OS;
}
+
+void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) {
+ DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
+ << " ----\n\n");
+ LastF = &F; // Store the last function we ran on for printing.
+ assert(PostDominatedByUnreachable.empty());
+ assert(PostDominatedByColdCall.empty());
+
+ // Walk the basic blocks in post-order so that we can build up state about
+ // the successors of a block iteratively.
+ for (auto BB : post_order(&F.getEntryBlock())) {
+ DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+ if (calcUnreachableHeuristics(BB))
+ continue;
+ if (calcMetadataWeights(BB))
+ continue;
+ if (calcColdCallHeuristics(BB))
+ continue;
+ if (calcLoopBranchHeuristics(BB, LI))
+ continue;
+ if (calcPointerHeuristics(BB))
+ continue;
+ if (calcZeroHeuristics(BB))
+ continue;
+ if (calcFloatingPointHeuristics(BB))
+ continue;
+ calcInvokeHeuristics(BB);
+ }
+
+ PostDominatedByUnreachable.clear();
+ PostDominatedByColdCall.clear();
+}
+
+void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
+ const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.calculate(F, LI);
+ return false;
+}
+
+void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); }
+
+void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
+ const Module *) const {
+ BPI.print(OS);
+}
diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp
index e15109b..0dfd57d 100644
--- a/contrib/llvm/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/lib/Analysis/CFG.cpp
@@ -69,8 +69,9 @@ void llvm::FindFunctionBackedges(const Function &F,
/// and return its position in the terminator instruction's list of
/// successors. It is an error to call this with a block that is not a
/// successor.
-unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
- TerminatorInst *Term = BB->getTerminator();
+unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
+ const BasicBlock *Succ) {
+ const TerminatorInst *Term = BB->getTerminator();
#ifndef NDEBUG
unsigned e = Term->getNumSuccessors();
#endif
@@ -203,7 +204,8 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
return true;
// Linear scan, start at 'A', see whether we hit 'B' or the end first.
- for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E;
+ ++I) {
if (&*I == B)
return true;
}
diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
index fe1c088..4843ed6 100644
--- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
@@ -27,18 +27,17 @@
// time.
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "StratifiedSets.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -47,7 +46,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <forward_list>
#include <memory>
#include <tuple>
@@ -55,6 +53,19 @@ using namespace llvm;
#define DEBUG_TYPE "cfl-aa"
+CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {}
+CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+// \brief Information we have about a function and would like to keep around
+struct CFLAAResult::FunctionInfo {
+ StratifiedSets<Value *> Sets;
+ // Lots of functions have < 4 returns. Adjust as necessary.
+ SmallVector<Value *, 4> ReturnedValues;
+
+ FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+ : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+};
+
// Try to go from a Value* to a Function*. Never returns nullptr.
static Optional<Function *> parentFunctionOfValue(Value *);
@@ -141,129 +152,13 @@ struct Edge {
: From(From), To(To), Weight(W), AdditionalAttrs(A) {}
};
-// \brief Information we have about a function and would like to keep around
-struct FunctionInfo {
- StratifiedSets<Value *> Sets;
- // Lots of functions have < 4 returns. Adjust as necessary.
- SmallVector<Value *, 4> ReturnedValues;
-
- FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
- : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
-};
-
-struct CFLAliasAnalysis;
-
-struct FunctionHandle : public CallbackVH {
- FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA)
- : CallbackVH(Fn), CFLAA(CFLAA) {
- assert(Fn != nullptr);
- assert(CFLAA != nullptr);
- }
-
- ~FunctionHandle() override {}
-
- void deleted() override { removeSelfFromCache(); }
- void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
-
-private:
- CFLAliasAnalysis *CFLAA;
-
- void removeSelfFromCache();
-};
-
-struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis {
-private:
- /// \brief Cached mapping of Functions to their StratifiedSets.
- /// If a function's sets are currently being built, it is marked
- /// in the cache as an Optional without a value. This way, if we
- /// have any kind of recursion, it is discernable from a function
- /// that simply has empty sets.
- DenseMap<Function *, Optional<FunctionInfo>> Cache;
- std::forward_list<FunctionHandle> Handles;
-
-public:
- static char ID;
-
- CFLAliasAnalysis() : ImmutablePass(ID) {
- initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- ~CFLAliasAnalysis() override {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- }
-
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis *)this;
- return this;
- }
-
- /// \brief Inserts the given Function into the cache.
- void scan(Function *Fn);
-
- void evict(Function *Fn) { Cache.erase(Fn); }
-
- /// \brief Ensures that the given function is available in the cache.
- /// Returns the appropriate entry from the cache.
- const Optional<FunctionInfo> &ensureCached(Function *Fn) {
- auto Iter = Cache.find(Fn);
- if (Iter == Cache.end()) {
- scan(Fn);
- Iter = Cache.find(Fn);
- assert(Iter != Cache.end());
- assert(Iter->second.hasValue());
- }
- return Iter->second;
- }
-
- AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
-
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- if (LocA.Ptr == LocB.Ptr) {
- if (LocA.Size == LocB.Size) {
- return MustAlias;
- } else {
- return PartialAlias;
- }
- }
-
- // Comparisons between global variables and other constants should be
- // handled by BasicAA.
- // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
- // a GlobalValue and ConstantExpr, but every query needs to have at least
- // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
- // are.
- if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
- return AliasAnalysis::alias(LocA, LocB);
- }
-
- AliasResult QueryResult = query(LocA, LocB);
- if (QueryResult == MayAlias)
- return AliasAnalysis::alias(LocA, LocB);
-
- return QueryResult;
- }
-
- bool doInitialization(Module &M) override;
-};
-
-void FunctionHandle::removeSelfFromCache() {
- assert(CFLAA != nullptr);
- auto *Val = getValPtr();
- CFLAA->evict(cast<Function>(Val));
- setValPtr(nullptr);
-}
-
// \brief Gets the edges our graph should have, based on an Instruction*
class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
- CFLAliasAnalysis &AA;
+ CFLAAResult &AA;
SmallVectorImpl<Edge> &Output;
public:
- GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output)
+ GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output)
: AA(AA), Output(Output) {}
void visitInstruction(Instruction &) {
@@ -480,6 +375,8 @@ public:
}
template <typename InstT> void visitCallLikeInst(InstT &Inst) {
+ // TODO: Add support for noalias args/all the other fun function attributes
+ // that we can tack on.
SmallVector<Function *, 4> Targets;
if (getPossibleTargets(&Inst, Targets)) {
if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
@@ -488,8 +385,16 @@ public:
Output.clear();
}
+ // Because the function is opaque, we need to note that anything
+ // could have happened to the arguments, and that the result could alias
+ // just about anything, too.
+ // The goal of the loop is in part to unify many Values into one set, so we
+ // don't care if the function is void there.
for (Value *V : Inst.arg_operands())
Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll));
+ if (Inst.getNumArgOperands() == 0 &&
+ Inst.getType() != Type::getVoidTy(Inst.getContext()))
+ Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
}
void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
@@ -624,7 +529,7 @@ public:
// ----- Various Edge iterators for the graph ----- //
// \brief Iterator for edges. Because this graph is bidirected, we don't
- // allow modificaiton of the edges using this iterator. Additionally, the
+ // allow modification of the edges using this iterator. Additionally, the
// iterator becomes invalid if you add edges to or from the node you're
// getting the edges of.
struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
@@ -727,16 +632,6 @@ typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
typedef DenseMap<Value *, GraphT::Node> NodeMapT;
}
-// -- Setting up/registering CFLAA pass -- //
-char CFLAliasAnalysis::ID = 0;
-
-INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa",
- "CFL-Based AA implementation", false, true, false)
-
-ImmutablePass *llvm::createCFLAliasAnalysisPass() {
- return new CFLAliasAnalysis();
-}
-
//===----------------------------------------------------------------------===//
// Function declarations that require types defined in the namespace above
//===----------------------------------------------------------------------===//
@@ -751,12 +646,10 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
static EdgeType flipWeight(EdgeType);
// Gets edges of the given Instruction*, writing them to the SmallVector*.
-static void argsToEdges(CFLAliasAnalysis &, Instruction *,
- SmallVectorImpl<Edge> &);
+static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &);
// Gets edges of the given ConstantExpr*, writing them to the SmallVector*.
-static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *,
- SmallVectorImpl<Edge> &);
+static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &);
// Gets the "Level" that one should travel in StratifiedSets
// given an EdgeType.
@@ -764,13 +657,13 @@ static Level directionOfEdgeType(EdgeType);
// Builds the graph needed for constructing the StratifiedSets for the
// given function
-static void buildGraphFrom(CFLAliasAnalysis &, Function *,
+static void buildGraphFrom(CFLAAResult &, Function *,
SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
// Gets the edges of a ConstantExpr as if it was an Instruction. This
// function also acts on any nested ConstantExprs, adding the edges
// of those to the given SmallVector as well.
-static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
+static void constexprToEdges(CFLAAResult &, ConstantExpr &,
SmallVectorImpl<Edge> &);
// Given an Instruction, this will add it to the graph, along with any
@@ -779,16 +672,13 @@ static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
// addInstructionToGraph would add both the `load` and `getelementptr`
// instructions to the graph appropriately.
-static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &,
+static void addInstructionToGraph(CFLAAResult &, Instruction &,
SmallVectorImpl<Value *> &, NodeMapT &,
GraphT &);
// Notes whether it would be pointless to add the given Value to our sets.
static bool canSkipAddingToSets(Value *Val);
-// Builds the graph + StratifiedSets for a function.
-static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
-
static Optional<Function *> parentFunctionOfValue(Value *Val) {
if (auto *Inst = dyn_cast<Instruction>(Val)) {
auto *Bb = Inst->getParent();
@@ -825,7 +715,7 @@ static bool hasUsefulEdges(Instruction *Inst) {
}
static bool hasUsefulEdges(ConstantExpr *CE) {
- // ConstantExpr doens't have terminators, invokes, or fences, so only needs
+ // ConstantExpr doesn't have terminators, invokes, or fences, so only needs
// to check for compares.
return CE->getOpcode() != Instruction::ICmp &&
CE->getOpcode() != Instruction::FCmp;
@@ -862,7 +752,7 @@ static EdgeType flipWeight(EdgeType Initial) {
llvm_unreachable("Incomplete coverage of EdgeType enum");
}
-static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
+static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst,
SmallVectorImpl<Edge> &Output) {
assert(hasUsefulEdges(Inst) &&
"Expected instructions to have 'useful' edges");
@@ -870,7 +760,7 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
v.visit(Inst);
}
-static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE,
+static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE,
SmallVectorImpl<Edge> &Output) {
assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges");
GetEdgesVisitor v(Analysis, Output);
@@ -889,7 +779,7 @@ static Level directionOfEdgeType(EdgeType Weight) {
llvm_unreachable("Incomplete switch coverage");
}
-static void constexprToEdges(CFLAliasAnalysis &Analysis,
+static void constexprToEdges(CFLAAResult &Analysis,
ConstantExpr &CExprToCollapse,
SmallVectorImpl<Edge> &Results) {
SmallVector<ConstantExpr *, 4> Worklist;
@@ -919,7 +809,7 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis,
}
}
-static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
+static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst,
SmallVectorImpl<Value *> &ReturnedValues,
NodeMapT &Map, GraphT &Graph) {
const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
@@ -982,7 +872,7 @@ static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
// buy us much that we don't already have. I'd like to add interprocedural
// analysis prior to this however, in case that somehow requires the graph
// produced by this for efficient execution
-static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn,
SmallVectorImpl<Value *> &ReturnedValues,
NodeMapT &Map, GraphT &Graph) {
for (auto &Bb : Fn->getBasicBlockList())
@@ -1012,12 +902,13 @@ static bool canSkipAddingToSets(Value *Val) {
return false;
}
-static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
+// Builds the graph + StratifiedSets for a function.
+CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) {
NodeMapT Map;
GraphT Graph;
SmallVector<Value *, 4> ReturnedValues;
- buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph);
+ buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph);
DenseMap<GraphT::Node, Value *> NodeValueMap;
NodeValueMap.resize(Map.size());
@@ -1098,19 +989,35 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
return FunctionInfo(Builder.build(), std::move(ReturnedValues));
}
-void CFLAliasAnalysis::scan(Function *Fn) {
+void CFLAAResult::scan(Function *Fn) {
auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
(void)InsertPair;
assert(InsertPair.second &&
"Trying to scan a function that has already been cached");
- FunctionInfo Info(buildSetsFrom(*this, Fn));
+ FunctionInfo Info(buildSetsFrom(Fn));
Cache[Fn] = std::move(Info);
Handles.push_front(FunctionHandle(Fn, this));
}
-AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); }
+
+/// \brief Ensures that the given function is available in the cache.
+/// Returns the appropriate entry from the cache.
+const Optional<CFLAAResult::FunctionInfo> &
+CFLAAResult::ensureCached(Function *Fn) {
+ auto Iter = Cache.find(Fn);
+ if (Iter == Cache.end()) {
+ scan(Fn);
+ Iter = Cache.find(Fn);
+ assert(Iter != Cache.end());
+ assert(Iter->second.hasValue());
+ }
+ return Iter->second;
+}
+
+AliasResult CFLAAResult::query(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
auto *ValA = const_cast<Value *>(LocA.Ptr);
auto *ValB = const_cast<Value *>(LocB.Ptr);
@@ -1176,7 +1083,37 @@ AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
return NoAlias;
}
-bool CFLAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
+CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char CFLAA::PassID;
+
+char CFLAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+ false, true)
+
+ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); }
+
+CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) {
+ initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool CFLAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(
+ new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
+}
+
+bool CFLAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index e2799d9..7cec962 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
CallGraph::CallGraph(Module &M)
: M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
- CallsExternalNode(new CallGraphNode(nullptr)) {
+ CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
// Add every function to the call graph.
for (Function &F : M)
addToCallGraph(&F);
@@ -32,10 +32,19 @@ CallGraph::CallGraph(Module &M)
Root = ExternalCallingNode;
}
+CallGraph::CallGraph(CallGraph &&Arg)
+ : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root),
+ ExternalCallingNode(Arg.ExternalCallingNode),
+ CallsExternalNode(std::move(Arg.CallsExternalNode)) {
+ Arg.FunctionMap.clear();
+ Arg.Root = nullptr;
+ Arg.ExternalCallingNode = nullptr;
+}
+
CallGraph::~CallGraph() {
// CallsExternalNode is not in the function map, delete it explicitly.
- CallsExternalNode->allReferencesDropped();
- delete CallsExternalNode;
+ if (CallsExternalNode)
+ CallsExternalNode->allReferencesDropped();
// Reset all node's use counts to zero before deleting them to prevent an
// assertion from firing.
@@ -43,8 +52,6 @@ CallGraph::~CallGraph() {
for (auto &I : FunctionMap)
I.second->allReferencesDropped();
#endif
- for (auto &I : FunctionMap)
- delete I.second;
}
void CallGraph::addToCallGraph(Function *F) {
@@ -70,7 +77,7 @@ void CallGraph::addToCallGraph(Function *F) {
// If this function is not defined in this translation unit, it could call
// anything.
if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode);
+ Node->addCalledFunction(CallSite(), CallsExternalNode.get());
// Look for calls by this function.
for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
@@ -83,7 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
- Node->addCalledFunction(CS, CallsExternalNode);
+ Node->addCalledFunction(CS, CallsExternalNode.get());
else if (!Callee->isIntrinsic())
Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
@@ -105,7 +112,7 @@ void CallGraph::print(raw_ostream &OS) const {
Nodes.reserve(FunctionMap.size());
for (auto I = begin(), E = end(); I != E; ++I)
- Nodes.push_back(I->second);
+ Nodes.push_back(I->second.get());
std::sort(Nodes.begin(), Nodes.end(),
[](CallGraphNode *LHS, CallGraphNode *RHS) {
@@ -120,9 +127,8 @@ void CallGraph::print(raw_ostream &OS) const {
CN->print(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void CallGraph::dump() const { print(dbgs()); }
-#endif
// removeFunctionFromModule - Unlink the function from this module, returning
// it. Because this removes the function from the module, the call graph node
@@ -134,7 +140,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
assert(CGN->empty() && "Cannot remove function from call "
"graph if it references other functions!");
Function *F = CGN->getFunction(); // Get the function for the call graph node
- delete CGN; // Delete the call graph node for this func
FunctionMap.erase(F); // Remove the call graph node from the map
M.getFunctionList().remove(F);
@@ -152,7 +157,7 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
"Pointing CallGraphNode at a function that already exists");
FunctionMapTy::iterator I = FunctionMap.find(From);
I->second->F = const_cast<Function*>(To);
- FunctionMap[To] = I->second;
+ FunctionMap[To] = std::move(I->second);
FunctionMap.erase(I);
}
@@ -160,12 +165,13 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
// it will insert a new CallGraphNode for the specified function if one does
// not already exist.
CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
- CallGraphNode *&CGN = FunctionMap[F];
+ auto &CGN = FunctionMap[F];
if (CGN)
- return CGN;
+ return CGN.get();
assert((!F || F->getParent() == &M) && "Function not in current module!");
- return CGN = new CallGraphNode(const_cast<Function*>(F));
+ CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+ return CGN.get();
}
//===----------------------------------------------------------------------===//
@@ -190,9 +196,8 @@ void CallGraphNode::print(raw_ostream &OS) const {
OS << '\n';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void CallGraphNode::dump() const { print(dbgs()); }
-#endif
/// removeCallEdgeFor - This method removes the edge in the node for the
/// specified call site. Note that this method takes linear time, so it
@@ -297,6 +302,5 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
G->print(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
-#endif
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 07b389a..07b389a 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/CallPrinter.cpp
index 68dcd3c..68dcd3c 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CallPrinter.cpp
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 52ef807..1add2fa 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -52,63 +53,6 @@ namespace {
bool Captured;
};
- struct NumberedInstCache {
- SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts;
- BasicBlock::const_iterator LastInstFound;
- unsigned LastInstPos;
- const BasicBlock *BB;
-
- NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) {
- LastInstFound = BB->end();
- }
-
- /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out
- /// instruction while walking 'BB'.
- const Instruction *find(const Instruction *A, const Instruction *B) {
- const Instruction *Inst = nullptr;
- assert(!(LastInstFound == BB->end() && LastInstPos != 0) &&
- "Instruction supposed to be in NumberedInsts");
-
- // Start the search with the instruction found in the last lookup round.
- auto II = BB->begin();
- auto IE = BB->end();
- if (LastInstFound != IE)
- II = std::next(LastInstFound);
-
- // Number all instructions up to the point where we find 'A' or 'B'.
- for (++LastInstPos; II != IE; ++II, ++LastInstPos) {
- Inst = cast<Instruction>(II);
- NumberedInsts[Inst] = LastInstPos;
- if (Inst == A || Inst == B)
- break;
- }
-
- assert(II != IE && "Instruction not found?");
- LastInstFound = II;
- return Inst;
- }
-
- /// \brief Find out whether 'A' dominates 'B', meaning whether 'A'
- /// comes before 'B' in 'BB'. This is a simplification that considers
- /// cached instruction positions and ignores other basic blocks, being
- /// only relevant to compare relative instructions positions inside 'BB'.
- bool dominates(const Instruction *A, const Instruction *B) {
- assert(A->getParent() == B->getParent() &&
- "Instructions must be in the same basic block!");
-
- unsigned NA = NumberedInsts.lookup(A);
- unsigned NB = NumberedInsts.lookup(B);
- if (NA && NB)
- return NA < NB;
- if (NA)
- return true;
- if (NB)
- return false;
-
- return A == find(A, B);
- }
- };
-
/// Only find pointer captures which happen before the given instruction. Uses
/// the dominator tree to determine whether one instruction is before another.
/// Only support the case where the Value is defined in the same basic block
@@ -116,8 +60,8 @@ namespace {
struct CapturesBefore : public CaptureTracker {
CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
- bool IncludeI)
- : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT),
+ bool IncludeI, OrderedBasicBlock *IC)
+ : OrderedBB(IC), BeforeHere(I), DT(DT),
ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
void tooManyUses() override { Captured = true; }
@@ -131,18 +75,18 @@ namespace {
// Compute the case where both instructions are inside the same basic
// block. Since instructions in the same BB as BeforeHere are numbered in
- // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable'
+ // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable'
// which are very expensive for large basic blocks.
if (BB == BeforeHere->getParent()) {
// 'I' dominates 'BeforeHere' => not safe to prune.
//
- // The value defined by an invoke dominates an instruction only if it
- // dominates every instruction in UseBB. A PHI is dominated only if
- // the instruction dominates every possible use in the UseBB. Since
+ // The value defined by an invoke dominates an instruction only
+ // if it dominates every instruction in UseBB. A PHI is dominated only
+ // if the instruction dominates every possible use in the UseBB. Since
// UseBB == BB, avoid pruning.
if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)
return false;
- if (!LocalInstCache.dominates(BeforeHere, I))
+ if (!OrderedBB->dominates(BeforeHere, I))
return false;
// 'BeforeHere' comes before 'I', it's safe to prune if we also
@@ -157,10 +101,7 @@ namespace {
SmallVector<BasicBlock*, 32> Worklist;
Worklist.append(succ_begin(BB), succ_end(BB));
- if (!isPotentiallyReachableFromMany(Worklist, BB, DT))
- return true;
-
- return false;
+ return !isPotentiallyReachableFromMany(Worklist, BB, DT);
}
// If the value is defined in the same basic block as use and BeforeHere,
@@ -196,7 +137,7 @@ namespace {
return true;
}
- NumberedInstCache LocalInstCache;
+ OrderedBasicBlock *OrderedBB;
const Instruction *BeforeHere;
DominatorTree *DT;
@@ -238,21 +179,29 @@ bool llvm::PointerMayBeCaptured(const Value *V,
/// returning the value (or part of it) from the function counts as capturing
/// it or not. The boolean StoreCaptures specified whether storing the value
/// (or part of it) into memory anywhere automatically counts as capturing it
-/// or not.
+/// or not. A ordered basic block \p OBB can be used in order to speed up
+/// queries about relative order among instructions in the same basic block.
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
- DominatorTree *DT, bool IncludeI) {
+ DominatorTree *DT, bool IncludeI,
+ OrderedBasicBlock *OBB) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
+ bool UseNewOBB = OBB == nullptr;
if (!DT)
return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+ if (UseNewOBB)
+ OBB = new OrderedBasicBlock(I->getParent());
// TODO: See comment in PointerMayBeCaptured regarding what could be done
// with StoreCaptures.
- CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
+ CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
PointerMayBeCaptured(V, &CB);
+
+ if (UseNewOBB)
+ delete OBB;
return CB.Captured;
}
@@ -300,8 +249,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// that loading a value from a pointer does not cause the pointer to be
// captured, even though the loaded value might be the pointer itself
// (think of self-referential objects).
- CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (CallSite::arg_iterator A = B; A != E; ++A)
+ CallSite::data_operand_iterator B =
+ CS.data_operands_begin(), E = CS.data_operands_end();
+ for (CallSite::data_operand_iterator A = B; A != E; ++A)
if (A->get() == V && !CS.doesNotCapture(A - B))
// The parameter is not marked 'nocapture' - captured.
if (Tracker->captured(U))
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index 46a2c43..4090b4c 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -45,14 +45,8 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
continue;
// If all uses of this value are ephemeral, then so is this value.
- bool FoundNEUse = false;
- for (const User *I : V->users())
- if (!EphValues.count(I)) {
- FoundNEUse = true;
- break;
- }
-
- if (FoundNEUse)
+ if (!std::all_of(V->user_begin(), V->user_end(),
+ [&](const User *U) { return EphValues.count(U); }))
continue;
EphValues.insert(V);
@@ -116,7 +110,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
// Skip ephemeral values.
- if (EphValues.count(II))
+ if (EphValues.count(&*II))
continue;
// Special handling for calls.
@@ -155,6 +149,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
+ if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB))
+ notDuplicatable = true;
+
if (const CallInst *CI = dyn_cast<CallInst>(II))
if (CI->cannotDuplicate())
notDuplicatable = true;
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 02a5aef..ccb5663 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -248,8 +248,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
// Look through ptr->int and ptr->ptr casts.
if (CE->getOpcode() == Instruction::PtrToInt ||
- CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::AddrSpaceCast)
+ CE->getOpcode() == Instruction::BitCast)
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
@@ -532,6 +531,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
if (GV->isConstant() && GV->hasDefinitiveInitializer())
return GV->getInitializer();
+ if (auto *GA = dyn_cast<GlobalAlias>(C))
+ if (GA->getAliasee() && !GA->mayBeOverridden())
+ return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL);
+
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE)
@@ -1236,6 +1239,9 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::bswap:
@@ -1276,24 +1282,30 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
// return true for a name like "cos\0blah" which strcmp would return equal to
// "cos", but has length 8.
switch (Name[0]) {
- default: return false;
+ default:
+ return false;
case 'a':
- return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2";
+ return Name == "acos" || Name == "asin" || Name == "atan" ||
+ Name == "atan2" || Name == "acosf" || Name == "asinf" ||
+ Name == "atanf" || Name == "atan2f";
case 'c':
- return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+ return Name == "ceil" || Name == "cos" || Name == "cosh" ||
+ Name == "ceilf" || Name == "cosf" || Name == "coshf";
case 'e':
- return Name == "exp" || Name == "exp2";
+ return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
case 'f':
- return Name == "fabs" || Name == "fmod" || Name == "floor";
+ return Name == "fabs" || Name == "floor" || Name == "fmod" ||
+ Name == "fabsf" || Name == "floorf" || Name == "fmodf";
case 'l':
- return Name == "log" || Name == "log10";
+ return Name == "log" || Name == "log10" || Name == "logf" ||
+ Name == "log10f";
case 'p':
- return Name == "pow";
+ return Name == "pow" || Name == "powf";
case 's':
return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
- Name == "sinf" || Name == "sqrtf";
+ Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
case 't':
- return Name == "tan" || Name == "tanh";
+ return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
}
}
@@ -1422,6 +1434,36 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFP::get(Ty->getContext(), V);
}
+ if (IntrinsicID == Intrinsic::floor) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::ceil) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::trunc) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::rint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ if (IntrinsicID == Intrinsic::nearbyint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
@@ -1448,10 +1490,6 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFoldFP(exp, V, Ty);
case Intrinsic::exp2:
return ConstantFoldFP(exp2, V, Ty);
- case Intrinsic::floor:
- return ConstantFoldFP(floor, V, Ty);
- case Intrinsic::ceil:
- return ConstantFoldFP(ceil, V, Ty);
case Intrinsic::sin:
return ConstantFoldFP(sin, V, Ty);
case Intrinsic::cos:
@@ -1463,43 +1501,51 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
switch (Name[0]) {
case 'a':
- if (Name == "acos" && TLI->has(LibFunc::acos))
+ if ((Name == "acos" && TLI->has(LibFunc::acos)) ||
+ (Name == "acosf" && TLI->has(LibFunc::acosf)))
return ConstantFoldFP(acos, V, Ty);
- else if (Name == "asin" && TLI->has(LibFunc::asin))
+ else if ((Name == "asin" && TLI->has(LibFunc::asin)) ||
+ (Name == "asinf" && TLI->has(LibFunc::asinf)))
return ConstantFoldFP(asin, V, Ty);
- else if (Name == "atan" && TLI->has(LibFunc::atan))
+ else if ((Name == "atan" && TLI->has(LibFunc::atan)) ||
+ (Name == "atanf" && TLI->has(LibFunc::atanf)))
return ConstantFoldFP(atan, V, Ty);
break;
case 'c':
- if (Name == "ceil" && TLI->has(LibFunc::ceil))
+ if ((Name == "ceil" && TLI->has(LibFunc::ceil)) ||
+ (Name == "ceilf" && TLI->has(LibFunc::ceilf)))
return ConstantFoldFP(ceil, V, Ty);
- else if (Name == "cos" && TLI->has(LibFunc::cos))
+ else if ((Name == "cos" && TLI->has(LibFunc::cos)) ||
+ (Name == "cosf" && TLI->has(LibFunc::cosf)))
return ConstantFoldFP(cos, V, Ty);
- else if (Name == "cosh" && TLI->has(LibFunc::cosh))
+ else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) ||
+ (Name == "coshf" && TLI->has(LibFunc::coshf)))
return ConstantFoldFP(cosh, V, Ty);
- else if (Name == "cosf" && TLI->has(LibFunc::cosf))
- return ConstantFoldFP(cos, V, Ty);
break;
case 'e':
- if (Name == "exp" && TLI->has(LibFunc::exp))
+ if ((Name == "exp" && TLI->has(LibFunc::exp)) ||
+ (Name == "expf" && TLI->has(LibFunc::expf)))
return ConstantFoldFP(exp, V, Ty);
-
- if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
+ if ((Name == "exp2" && TLI->has(LibFunc::exp2)) ||
+ (Name == "exp2f" && TLI->has(LibFunc::exp2f)))
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
- }
break;
case 'f':
- if (Name == "fabs" && TLI->has(LibFunc::fabs))
+ if ((Name == "fabs" && TLI->has(LibFunc::fabs)) ||
+ (Name == "fabsf" && TLI->has(LibFunc::fabsf)))
return ConstantFoldFP(fabs, V, Ty);
- else if (Name == "floor" && TLI->has(LibFunc::floor))
+ else if ((Name == "floor" && TLI->has(LibFunc::floor)) ||
+ (Name == "floorf" && TLI->has(LibFunc::floorf)))
return ConstantFoldFP(floor, V, Ty);
break;
case 'l':
- if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
+ if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) ||
+ (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))
return ConstantFoldFP(log, V, Ty);
- else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
+ else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) ||
+ (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))
return ConstantFoldFP(log10, V, Ty);
else if (IntrinsicID == Intrinsic::sqrt &&
(Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1516,21 +1562,22 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
}
break;
case 's':
- if (Name == "sin" && TLI->has(LibFunc::sin))
+ if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
+ (Name == "sinf" && TLI->has(LibFunc::sinf)))
return ConstantFoldFP(sin, V, Ty);
- else if (Name == "sinh" && TLI->has(LibFunc::sinh))
+ else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) ||
+ (Name == "sinhf" && TLI->has(LibFunc::sinhf)))
return ConstantFoldFP(sinh, V, Ty);
- else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
- return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
+ else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) ||
+ (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))
return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sinf" && TLI->has(LibFunc::sinf))
- return ConstantFoldFP(sin, V, Ty);
break;
case 't':
- if (Name == "tan" && TLI->has(LibFunc::tan))
+ if ((Name == "tan" && TLI->has(LibFunc::tan)) ||
+ (Name == "tanf" && TLI->has(LibFunc::tanf)))
return ConstantFoldFP(tan, V, Ty);
- else if (Name == "tanh" && TLI->has(LibFunc::tanh))
+ else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) ||
+ (Name == "tanhf" && TLI->has(LibFunc::tanhf)))
return ConstantFoldFP(tanh, V, Ty);
break;
default:
@@ -1633,11 +1680,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
if (!TLI)
return nullptr;
- if (Name == "pow" && TLI->has(LibFunc::pow))
+ if ((Name == "pow" && TLI->has(LibFunc::pow)) ||
+ (Name == "powf" && TLI->has(LibFunc::powf)))
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if (Name == "fmod" && TLI->has(LibFunc::fmod))
+ if ((Name == "fmod" && TLI->has(LibFunc::fmod)) ||
+ (Name == "fmodf" && TLI->has(LibFunc::fmodf)))
return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if (Name == "atan2" && TLI->has(LibFunc::atan2))
+ if ((Name == "atan2" && TLI->has(LibFunc::atan2)) ||
+ (Name == "atan2f" && TLI->has(LibFunc::atan2f)))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index b529c1a..0383cbf 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -152,10 +152,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
Mask[i] = val;
SmallVector<int, 16> ActualMask = SI->getShuffleMask();
- if (Mask != ActualMask)
- return false;
-
- return true;
+ return Mask == ActualMask;
}
static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
@@ -383,10 +380,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
return -1;
switch (I->getOpcode()) {
- case Instruction::GetElementPtr:{
- Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
- return TTI->getAddressComputationCost(ValTy);
- }
+ case Instruction::GetElementPtr:
+ return TTI->getUserCost(I);
case Instruction::Ret:
case Instruction::PHI:
@@ -505,12 +500,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- SmallVector<Type*, 4> Tys;
+ SmallVector<Value *, 4> Args;
for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
- Tys.push_back(II->getArgOperand(J)->getType());
+ Args.push_back(II->getArgOperand(J));
return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
- Tys);
+ Args);
}
return -1;
default:
@@ -525,7 +520,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
- Instruction *Inst = it;
+ Instruction *Inst = &*it;
unsigned Cost = getInstructionCost(Inst);
if (Cost != (unsigned)-1)
OS << "Cost Model: Found an estimated cost of " << Cost;
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
index 9d15786..baee8b3 100644
--- a/contrib/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -60,12 +60,12 @@ public:
void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
bool Delinearization::runOnFunction(Function &F) {
this->F = &F;
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
if (!BasePointer)
break;
AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
-
- // Do not try to delinearize memory accesses that are not AddRecs.
- if (!AR)
- break;
-
O << "\n";
O << "Inst:" << *Inst << "\n";
O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
- O << "AddRec: " << *AR << "\n";
+ O << "AccessFunction: " << *AccessFn << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
- SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
+ SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));
if (Subscripts.size() == 0 || Sizes.size() == 0 ||
Subscripts.size() != Sizes.size()) {
O << "failed to delinearize\n";
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
new file mode 100644
index 0000000..912c5ce
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -0,0 +1,392 @@
+//===---- DemandedBits.cpp - Determine demanded bits ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a demanded bits analysis. A demanded bit is one that
+// contributes to a result; bits that are not demanded can be either zero or
+// one without affecting control or data flow. For example in this sequence:
+//
+// %1 = add i32 %x, %y
+// %2 = trunc i32 %1 to i16
+//
+// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
+// trunc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "demanded-bits"
+
+char DemandedBits::ID = 0;
+INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis",
+ false, false)
+
+DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) {
+ initializeDemandedBitsPass(*PassRegistry::getPassRegistry());
+}
+
+void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesAll();
+}
+
+static bool isAlwaysLive(Instruction *I) {
+ return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ I->isEHPad() || I->mayHaveSideEffects();
+}
+
+void DemandedBits::determineLiveOperandBits(
+ const Instruction *UserI, const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2) {
+ unsigned BitWidth = AB.getBitWidth();
+
+ // We're called once per operand, but for some instructions, we need to
+ // compute known bits of both operands in order to determine the live bits of
+ // either (when both operands are instructions themselves). We don't,
+ // however, want to do this twice, so we cache the result in APInts that live
+ // in the caller. For the two-relevant-operands case, both operand values are
+ // provided here.
+ auto ComputeKnownBits =
+ [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ KnownZero = APInt(BitWidth, 0);
+ KnownOne = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+ AC, UserI, DT);
+
+ if (V2) {
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+ 0, AC, UserI, DT);
+ }
+ };
+
+ switch (UserI->getOpcode()) {
+ default: break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // The alive bits of the input are the swapped alive bits of
+ // the output.
+ AB = AOut.byteSwap();
+ break;
+ case Intrinsic::ctlz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the left of, and including, the leftmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getHighBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+ }
+ break;
+ case Intrinsic::cttz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the right of, and including, the rightmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getLowBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+ }
+ break;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // Find the highest live output bit. We don't need any more input
+ // bits than that (adds, and thus subtracts, ripple only to the
+ // left).
+ AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+ break;
+ case Instruction::Shl:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.lshr(ShiftAmt);
+
+ // If the shift is nuw/nsw, then the high bits are not dead
+ // (because we've promised that they *must* be zero).
+ const ShlOperator *S = cast<ShlOperator>(UserI);
+ if (S->hasNoSignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (S->hasNoUnsignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<LShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::AShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<AShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::And:
+ AB = AOut;
+
+ // For bits that are known zero, the corresponding bits in the
+ // other operand are dead (unless they're both zero, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownZero2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownZero & ~KnownZero2);
+ }
+ break;
+ case Instruction::Or:
+ AB = AOut;
+
+ // For bits that are known one, the corresponding bits in the
+ // other operand are dead (unless they're both one, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownOne2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownOne & ~KnownOne2);
+ }
+ break;
+ case Instruction::Xor:
+ case Instruction::PHI:
+ AB = AOut;
+ break;
+ case Instruction::Trunc:
+ AB = AOut.zext(BitWidth);
+ break;
+ case Instruction::ZExt:
+ AB = AOut.trunc(BitWidth);
+ break;
+ case Instruction::SExt:
+ AB = AOut.trunc(BitWidth);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+ AOut.getBitWidth() - BitWidth))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+ break;
+ case Instruction::Select:
+ if (OperandNo != 0)
+ AB = AOut;
+ break;
+ case Instruction::ICmp:
+ // Count the number of leading zeroes in each operand.
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes);
+ break;
+ }
+}
+
+bool DemandedBits::runOnFunction(Function& Fn) {
+ F = &Fn;
+ Analyzed = false;
+ return false;
+}
+
+void DemandedBits::performAnalysis() {
+ if (Analyzed)
+ // Analysis already completed for this function.
+ return;
+ Analyzed = true;
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ Visited.clear();
+ AliveBits.clear();
+
+ SmallVector<Instruction*, 128> Worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (Instruction &I : instructions(*F)) {
+ if (!isAlwaysLive(&I))
+ continue;
+
+ DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
+ // For integer-valued instructions, set up an initial empty set of alive
+ // bits and add the instruction to the work list. For other instructions
+ // add their operands to the work list (for integer values operands, mark
+ // all bits as live).
+ if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ if (!AliveBits.count(&I)) {
+ AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+ Worklist.push_back(&I);
+ }
+
+ continue;
+ }
+
+ // Non-integer-typed instructions...
+ for (Use &OI : I.operands()) {
+ if (Instruction *J = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+ AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+ Worklist.push_back(J);
+ }
+ }
+ // To save memory, we don't add I to the Visited set here. Instead, we
+ // check isAlwaysLive on every instruction when searching for dead
+ // instructions later (we need to check isAlwaysLive for the
+ // integer-typed instructions anyway).
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!Worklist.empty()) {
+ Instruction *UserI = Worklist.pop_back_val();
+
+ DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
+ APInt AOut;
+ if (UserI->getType()->isIntegerTy()) {
+ AOut = AliveBits[UserI];
+ DEBUG(dbgs() << " Alive Out: " << AOut);
+ }
+ DEBUG(dbgs() << "\n");
+
+ if (!UserI->getType()->isIntegerTy())
+ Visited.insert(UserI);
+
+ APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+ // Compute the set of alive bits for each operand. These are anded into the
+ // existing set, if any, and if that changes the set of alive bits, the
+ // operand is added to the work-list.
+ for (Use &OI : UserI->operands()) {
+ if (Instruction *I = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+ unsigned BitWidth = IT->getBitWidth();
+ APInt AB = APInt::getAllOnesValue(BitWidth);
+ if (UserI->getType()->isIntegerTy() && !AOut &&
+ !isAlwaysLive(UserI)) {
+ AB = APInt(BitWidth, 0);
+ } else {
+ // If all bits of the output are dead, then all bits of the input
+ // Bits of each operand that are used to compute alive bits of the
+ // output are alive, all others are dead.
+ determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+ KnownZero, KnownOne,
+ KnownZero2, KnownOne2);
+ }
+
+ // If we've added to the set of alive bits (or the operand has not
+ // been previously visited), then re-queue the operand to be visited
+ // again.
+ APInt ABPrev(BitWidth, 0);
+ auto ABI = AliveBits.find(I);
+ if (ABI != AliveBits.end())
+ ABPrev = ABI->second;
+
+ APInt ABNew = AB | ABPrev;
+ if (ABNew != ABPrev || ABI == AliveBits.end()) {
+ AliveBits[I] = std::move(ABNew);
+ Worklist.push_back(I);
+ }
+ } else if (!Visited.count(I)) {
+ Worklist.push_back(I);
+ }
+ }
+ }
+ }
+}
+
+APInt DemandedBits::getDemandedBits(Instruction *I) {
+ performAnalysis();
+
+ const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
+ if (AliveBits.count(I))
+ return AliveBits[I];
+ return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+}
+
+bool DemandedBits::isInstructionDead(Instruction *I) {
+ performAnalysis();
+
+ return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() &&
+ !isAlwaysLive(I);
+}
+
+void DemandedBits::print(raw_ostream &OS, const Module *M) const {
+ // This is gross. But the alternative is making all the state mutable
+ // just because of this one debugging method.
+ const_cast<DemandedBits*>(this)->performAnalysis();
+ for (auto &KV : AliveBits) {
+ OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
+ << *KV.first << "\n";
+ }
+}
+
+FunctionPass *llvm::createDemandedBitsPass() {
+ return new DemandedBits();
+}
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 4826ac4..4040ad3 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -117,8 +117,8 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
"Dependence Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(DependenceAnalysis, "da",
"Dependence Analysis", true, true)
@@ -132,8 +132,8 @@ FunctionPass *llvm::createDependenceAnalysisPass() {
bool DependenceAnalysis::runOnFunction(Function &F) {
this->F = &F;
- AA = &getAnalysis<AliasAnalysis>();
- SE = &getAnalysis<ScalarEvolution>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -145,8 +145,8 @@ void DependenceAnalysis::releaseMemory() {
void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequiredTransitive<AliasAnalysis>();
- AU.addRequiredTransitive<ScalarEvolution>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
+ AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
@@ -233,7 +233,8 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
: Dependence(Source, Destination), Levels(CommonLevels),
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
- DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
+ if (CommonLevels)
+ DV = make_unique<DVEntry[]>(CommonLevels);
}
// The rest are simple getters that hide the implementation.
@@ -371,7 +372,7 @@ void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
const Loop *CurLoop) {
Kind = Distance;
- A = SE->getConstant(D->getType(), 1);
+ A = SE->getOne(D->getType());
B = SE->getNegativeSCEV(A);
C = SE->getNegativeSCEV(D);
AssociatedLoop = CurLoop;
@@ -500,10 +501,10 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
if (!C1B2_C2B1 || !C1A2_C2A1 ||
!A1B2_A2B1 || !A2B1_A1B2)
return false;
- APInt Xtop = C1B2_C2B1->getValue()->getValue();
- APInt Xbot = A1B2_A2B1->getValue()->getValue();
- APInt Ytop = C1A2_C2A1->getValue()->getValue();
- APInt Ybot = A2B1_A1B2->getValue()->getValue();
+ APInt Xtop = C1B2_C2B1->getAPInt();
+ APInt Xbot = A1B2_A2B1->getAPInt();
+ APInt Ytop = C1A2_C2A1->getAPInt();
+ APInt Ybot = A2B1_A1B2->getAPInt();
DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
@@ -527,7 +528,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
}
if (const SCEVConstant *CUB =
collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
- APInt UpperBound = CUB->getValue()->getValue();
+ APInt UpperBound = CUB->getAPInt();
DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
X->setEmpty();
@@ -630,8 +631,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
const Value *B) {
const Value *AObj = GetUnderlyingObject(A, DL);
const Value *BObj = GetUnderlyingObject(B, DL);
- return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
- BObj, AA->getTypeStoreSize(BObj->getType()));
+ return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()),
+ BObj, DL.getTypeStoreSize(BObj->getType()));
}
@@ -1114,8 +1115,8 @@ bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
// Can we compute distance?
if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
- APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
- APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
+ APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
+ APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
APInt Distance = ConstDelta; // these need to be initialized
APInt Remainder = ConstDelta;
APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
@@ -1256,11 +1257,9 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
// compute SplitIter for use by DependenceAnalysis::getSplitIteration()
- SplitIter =
- SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
- Delta),
- SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
- ConstCoeff));
+ SplitIter = SE->getUDivExpr(
+ SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
+ SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
@@ -1302,14 +1301,14 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
return true;
}
Result.DV[Level].Splitable = false;
- Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
+ Result.DV[Level].Distance = SE->getZero(Delta->getType());
return false;
}
}
// check that Coeff divides Delta
- APInt APDelta = ConstDelta->getValue()->getValue();
- APInt APCoeff = ConstCoeff->getValue()->getValue();
+ APInt APDelta = ConstDelta->getAPInt();
+ APInt APCoeff = ConstCoeff->getAPInt();
APInt Distance = APDelta; // these need to be initialzed
APInt Remainder = APDelta;
APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
@@ -1463,10 +1462,10 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
// find gcd
APInt G, X, Y;
- APInt AM = ConstSrcCoeff->getValue()->getValue();
- APInt BM = ConstDstCoeff->getValue()->getValue();
+ APInt AM = ConstSrcCoeff->getAPInt();
+ APInt BM = ConstDstCoeff->getAPInt();
unsigned Bits = AM.getBitWidth();
- if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactSIVindependence;
++ExactSIVsuccesses;
@@ -1481,7 +1480,7 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
// UM is perhaps unavailable, let's check
if (const SCEVConstant *CUB =
collectConstantUpperBound(CurLoop, Delta->getType())) {
- UM = CUB->getValue()->getValue();
+ UM = CUB->getAPInt();
DEBUG(dbgs() << "\t UM = " << UM << "\n");
UMvalid = true;
}
@@ -1609,8 +1608,8 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
static
bool isRemainderZero(const SCEVConstant *Dividend,
const SCEVConstant *Divisor) {
- APInt ConstDividend = Dividend->getValue()->getValue();
- APInt ConstDivisor = Divisor->getValue()->getValue();
+ APInt ConstDividend = Dividend->getAPInt();
+ APInt ConstDivisor = Divisor->getAPInt();
return ConstDividend.srem(ConstDivisor) == 0;
}
@@ -1665,8 +1664,8 @@ bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
- NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
- DstCoeff, Delta, CurLoop);
+ NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
+ CurLoop);
DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
if (Level < CommonLevels) {
@@ -1775,8 +1774,8 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
- NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
- Delta, CurLoop);
+ NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
+ CurLoop);
DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
if (Level < CommonLevels) {
@@ -1867,10 +1866,10 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
// find gcd
APInt G, X, Y;
- APInt AM = ConstSrcCoeff->getValue()->getValue();
- APInt BM = ConstDstCoeff->getValue()->getValue();
+ APInt AM = ConstSrcCoeff->getAPInt();
+ APInt BM = ConstDstCoeff->getAPInt();
unsigned Bits = AM.getBitWidth();
- if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+ if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactRDIVindependence;
return true;
@@ -1884,7 +1883,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
// SrcUM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(SrcLoop, Delta->getType())) {
- SrcUM = UpperBound->getValue()->getValue();
+ SrcUM = UpperBound->getAPInt();
DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
SrcUMvalid = true;
}
@@ -1894,7 +1893,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
// UM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(DstLoop, Delta->getType())) {
- DstUM = UpperBound->getValue()->getValue();
+ DstUM = UpperBound->getAPInt();
DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
DstUMvalid = true;
}
@@ -2307,7 +2306,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
if (!Constant)
return false;
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
Coefficients = AddRec->getStart();
}
@@ -2328,7 +2327,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
if (!Constant)
return false;
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
Coefficients = AddRec->getStart();
}
@@ -2352,7 +2351,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
const SCEVConstant *ConstOp = getConstantPart(Product);
if (!ConstOp)
return false;
- APInt ConstOpValue = ConstOp->getValue()->getValue();
+ APInt ConstOpValue = ConstOp->getAPInt();
ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
ConstOpValue.abs());
}
@@ -2362,7 +2361,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
}
if (!Constant)
return false;
- APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
+ APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
if (ConstDelta == 0)
return false;
@@ -2410,7 +2409,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
else
Constant = cast<SCEVConstant>(Coeff);
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
}
Inner = AddRec->getStart();
@@ -2428,7 +2427,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
Constant = getConstantPart(Product);
else
Constant = cast<SCEVConstant>(Coeff);
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
}
Inner = AddRec->getStart();
@@ -2445,7 +2444,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
// or constant, in which case we give up on this direction.
continue;
}
- APInt ConstCoeff = Constant->getValue()->getValue();
+ APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
if (RunningGCD != 0) {
@@ -2728,10 +2727,10 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
// If the difference is 0, we won't need to know the number of iterations.
if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
Bound[K].Lower[Dependence::DVEntry::ALL] =
- SE->getConstant(A[K].Coeff->getType(), 0);
+ SE->getZero(A[K].Coeff->getType());
if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
Bound[K].Upper[Dependence::DVEntry::ALL] =
- SE->getConstant(A[K].Coeff->getType(), 0);
+ SE->getZero(A[K].Coeff->getType());
}
}
@@ -2800,9 +2799,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
- const SCEV *Iter_1 =
- SE->getMinusSCEV(Bound[K].Iterations,
- SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *Iter_1 = SE->getMinusSCEV(
+ Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
Bound[K].Lower[Dependence::DVEntry::LT] =
@@ -2847,9 +2845,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
- const SCEV *Iter_1 =
- SE->getMinusSCEV(Bound[K].Iterations,
- SE->getConstant(Bound[K].Iterations->getType(), 1));
+ const SCEV *Iter_1 = SE->getMinusSCEV(
+ Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
Bound[K].Lower[Dependence::DVEntry::GT] =
@@ -2874,13 +2871,13 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
// X^+ = max(X, 0)
const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
- return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
+ return SE->getSMaxExpr(X, SE->getZero(X->getType()));
}
// X^- = min(X, 0)
const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
- return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
+ return SE->getSMinExpr(X, SE->getZero(X->getType()));
}
@@ -2891,7 +2888,7 @@ DependenceAnalysis::CoefficientInfo *
DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
bool SrcFlag,
const SCEV *&Constant) const {
- const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
+ const SCEV *Zero = SE->getZero(Subscript->getType());
CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
for (unsigned K = 1; K <= MaxLevels; ++K) {
CI[K].Coeff = Zero;
@@ -2975,7 +2972,7 @@ const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
const Loop *TargetLoop) const {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
- return SE->getConstant(Expr->getType(), 0);
+ return SE->getZero(Expr->getType());
if (AddRec->getLoop() == TargetLoop)
return AddRec->getStepRecurrence(*SE);
return findCoefficient(AddRec->getStart(), TargetLoop);
@@ -3110,8 +3107,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Bconst || !Cconst) return false;
- APInt Beta = Bconst->getValue()->getValue();
- APInt Charlie = Cconst->getValue()->getValue();
+ APInt Beta = Bconst->getAPInt();
+ APInt Charlie = Cconst->getAPInt();
APInt CdivB = Charlie.sdiv(Beta);
assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
const SCEV *AP_K = findCoefficient(Dst, CurLoop);
@@ -3125,8 +3122,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Aconst || !Cconst) return false;
- APInt Alpha = Aconst->getValue()->getValue();
- APInt Charlie = Cconst->getValue()->getValue();
+ APInt Alpha = Aconst->getAPInt();
+ APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
const SCEV *A_K = findCoefficient(Src, CurLoop);
@@ -3139,8 +3136,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Aconst || !Cconst) return false;
- APInt Alpha = Aconst->getValue()->getValue();
- APInt Charlie = Cconst->getValue()->getValue();
+ APInt Alpha = Aconst->getAPInt();
+ APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
const SCEV *A_K = findCoefficient(Src, CurLoop);
@@ -3244,20 +3241,36 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
/// source and destination array references are recurrences on a nested loop,
/// this function flattens the nested recurrences into separate recurrences
/// for each loop level.
-bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
- const SCEV *DstSCEV,
- SmallVectorImpl<Subscript> &Pair,
- const SCEV *ElementSize) {
+bool DependenceAnalysis::tryDelinearize(Instruction *Src,
+ Instruction *Dst,
+ SmallVectorImpl<Subscript> &Pair)
+{
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *DstPtr = getPointerOperand(Dst);
+
+ Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+ Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+
+ // Below code mimics the code in Delinearization.cpp
+ const SCEV *SrcAccessFn =
+ SE->getSCEVAtScope(SrcPtr, SrcLoop);
+ const SCEV *DstAccessFn =
+ SE->getSCEVAtScope(DstPtr, DstLoop);
+
const SCEVUnknown *SrcBase =
- dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
- dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV));
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
if (!SrcBase || !DstBase || SrcBase != DstBase)
return false;
- SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase);
- DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase);
+ const SCEV *ElementSize = SE->getElementSize(Src);
+ if (ElementSize != SE->getElementSize(Dst))
+ return false;
+
+ const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
+ const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
@@ -3330,7 +3343,6 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
}
#endif
-
// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
@@ -3425,10 +3437,11 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
Pair[0].Dst = DstSCEV;
}
- if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
- tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
- DEBUG(dbgs() << " delinerized GEP\n");
- Pairs = Pair.size();
+ if (Delinearize && CommonLevels > 1) {
+ if (tryDelinearize(Src, Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
}
for (unsigned P = 0; P < Pairs; ++P) {
@@ -3746,9 +3759,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
return nullptr;
}
- auto Final = make_unique<FullDependence>(Result);
- Result.DV = nullptr;
- return std::move(Final);
+ return make_unique<FullDependence>(std::move(Result));
}
@@ -3852,10 +3863,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
Pair[0].Dst = DstSCEV;
}
- if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
- tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
- DEBUG(dbgs() << " delinerized GEP\n");
- Pairs = Pair.size();
+ if (Delinearize && CommonLevels > 1) {
+ if (tryDelinearize(Src, Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
}
for (unsigned P = 0; P < Pairs; ++P) {
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index e5ee295..5ae6d74 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines divergence analysis which determines whether a branch in a
-// GPU program is divergent. It can help branch optimizations such as jump
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
// threading and loop unswitching to make better decisions.
//
// GPU programs typically use the SIMD execution model, where multiple threads
@@ -61,75 +61,31 @@
// 2. memory as black box. It conservatively considers values loaded from
// generic or local address as divergent. This can be improved by leveraging
// pointer analysis.
+//
//===----------------------------------------------------------------------===//
-#include <vector>
-#include "llvm/IR/Dominators.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "divergence"
-
-namespace {
-class DivergenceAnalysis : public FunctionPass {
-public:
- static char ID;
-
- DivergenceAnalysis() : FunctionPass(ID) {
- initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
- AU.setPreservesAll();
- }
-
- bool runOnFunction(Function &F) override;
-
- // Print all divergent branches in the function.
- void print(raw_ostream &OS, const Module *) const override;
-
- // Returns true if V is divergent.
- bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
- // Returns true if V is uniform/non-divergent.
- bool isUniform(const Value *V) const { return !isDivergent(V); }
-
-private:
- // Stores all divergent values.
- DenseSet<const Value *> DivergentValues;
-};
-} // End of anonymous namespace
-
-// Register this pass.
-char DivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
-
namespace {
class DivergencePropagator {
public:
- DivergencePropagator(Function &F, TargetTransformInfo &TTI,
- DominatorTree &DT, PostDominatorTree &PDT,
- DenseSet<const Value *> &DV)
+ DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+ PostDominatorTree &PDT, DenseSet<const Value *> &DV)
: F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
void populateWithSourcesOfDivergence();
void propagate();
@@ -140,7 +96,7 @@ private:
// A helper function that explores sync dependents of TI.
void exploreSyncDependency(TerminatorInst *TI);
// Computes the influence region from Start to End. This region includes all
- // basic blocks on any path from Start to End.
+ // basic blocks on any simple path from Start to End.
void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
DenseSet<BasicBlock *> &InfluenceRegion);
// Finds all users of I that are outside the influence region, and add these
@@ -153,13 +109,13 @@ private:
DominatorTree &DT;
PostDominatorTree &PDT;
std::vector<Value *> Worklist; // Stack for DFS.
- DenseSet<const Value *> &DV; // Stores all divergent values.
+ DenseSet<const Value *> &DV; // Stores all divergent values.
};
void DivergencePropagator::populateWithSourcesOfDivergence() {
Worklist.clear();
DV.clear();
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
Worklist.push_back(&I);
DV.insert(&I);
@@ -191,8 +147,8 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
// A PHINode is uniform if it returns the same value no matter which path is
// taken.
- if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
- Worklist.push_back(I);
+ if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second)
+ Worklist.push_back(&*I);
}
// Propagation rule 2: if a value defined in a loop is used outside, the user
@@ -242,21 +198,33 @@ void DivergencePropagator::findUsersOutsideInfluenceRegion(
}
}
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion,
+ std::vector<BasicBlock *> &InfluenceStack) {
+ for (BasicBlock *Succ : successors(ThisBB)) {
+ if (Succ != End && InfluenceRegion.insert(Succ).second)
+ InfluenceStack.push_back(Succ);
+ }
+}
+
void DivergencePropagator::computeInfluenceRegion(
BasicBlock *Start, BasicBlock *End,
DenseSet<BasicBlock *> &InfluenceRegion) {
assert(PDT.properlyDominates(End, Start) &&
"End does not properly dominate Start");
+
+ // The influence region starts from the end of "Start" to the beginning of
+ // "End". Therefore, "Start" should not be in the region unless "Start" is in
+ // a loop that doesn't contain "End".
std::vector<BasicBlock *> InfluenceStack;
- InfluenceStack.push_back(Start);
- InfluenceRegion.insert(Start);
+ addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
while (!InfluenceStack.empty()) {
BasicBlock *BB = InfluenceStack.back();
InfluenceStack.pop_back();
- for (BasicBlock *Succ : successors(BB)) {
- if (End != Succ && InfluenceRegion.insert(Succ).second)
- InfluenceStack.push_back(Succ);
- }
+ addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
}
}
@@ -286,10 +254,25 @@ void DivergencePropagator::propagate() {
} /// end namespace anonymous
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+
FunctionPass *llvm::createDivergenceAnalysisPass() {
return new DivergenceAnalysis();
}
+void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.setPreservesAll();
+}
+
bool DivergenceAnalysis::runOnFunction(Function &F) {
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
if (TTIWP == nullptr)
@@ -329,8 +312,8 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.count(&Arg))
OS << "DIVERGENT: " << Arg << "\n";
}
- // Iterate instructions using inst_range to ensure a deterministic order.
- for (auto &I : inst_range(F)) {
+ // Iterate instructions using instructions() to ensure a deterministic order.
+ for (auto &I : instructions(F)) {
if (DivergentValues.count(&I))
OS << "DIVERGENT:" << I << "\n";
}
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
new file mode 100644
index 0000000..01be8b3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -0,0 +1,106 @@
+//===- EHPersonalities.cpp - Compute EH-related information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// See if the given exception handling personality function is one that we
+/// understand. If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+ const Function *F =
+ Pers ? dyn_cast<Function>(Pers->stripPointerCasts()) : nullptr;
+ if (!F)
+ return EHPersonality::Unknown;
+ return StringSwitch<EHPersonality>(F->getName())
+ .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+ .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
+ .Case("__gcc_personality_v0", EHPersonality::GNU_C)
+ .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+ .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
+ .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
+ .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
+ .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
+ .Case("ProcessCLRException", EHPersonality::CoreCLR)
+ .Default(EHPersonality::Unknown);
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
+ EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
+ // We can't simplify any invokes to nounwind functions if the personality
+ // function wants to catch asynch exceptions. The nounwind attribute only
+ // implies that the function does not throw synchronous exceptions.
+ return !isAsynchronousEHPersonality(Personality);
+}
+
+DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 16> Worklist;
+ BasicBlock *EntryBlock = &F.getEntryBlock();
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
+
+ // Build up the color map, which maps each block to its set of 'colors'.
+ // For any block B the "colors" of B are the set of funclets F (possibly
+ // including a root "funclet" representing the main function) such that
+ // F will need to directly contain B or a copy of B (where the term "directly
+ // contain" is used to distinguish from being "transitively contained" in
+ // a nested funclet).
+ //
+ // Note: Despite not being a funclet in the truest sense, a catchswitch is
+ // considered to belong to its own funclet for the purposes of coloring.
+
+ DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for "
+ << F.getName() << "\n");
+
+ Worklist.push_back({EntryBlock, EntryBlock});
+
+ while (!Worklist.empty()) {
+ BasicBlock *Visiting;
+ BasicBlock *Color;
+ std::tie(Visiting, Color) = Worklist.pop_back_val();
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << "Visiting " << Visiting->getName() << ", "
+ << Color->getName() << "\n");
+ Instruction *VisitingHead = Visiting->getFirstNonPHI();
+ if (VisitingHead->isEHPad()) {
+ // Mark this funclet head as a member of itself.
+ Color = Visiting;
+ }
+ // Note that this is a member of the given color.
+ ColorVector &Colors = BlockColors[Visiting];
+ if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+ Colors.push_back(Color);
+ else
+ continue;
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Assigned color \'" << Color->getName()
+ << "\' to block \'" << Visiting->getName()
+ << "\'.\n");
+
+ BasicBlock *SuccColor = Color;
+ TerminatorInst *Terminator = Visiting->getTerminator();
+ if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
+ Value *ParentPad = CatchRet->getParentPad();
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccColor = EntryBlock;
+ else
+ SuccColor = cast<Instruction>(ParentPad)->getParent();
+ }
+
+ for (BasicBlock *Succ : successors(Visiting))
+ Worklist.push_back({Succ, SuccColor});
+ }
+ return BlockColors;
+}
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
new file mode 100644
index 0000000..ab2263a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -0,0 +1,1002 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure"). For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "globalsmodref-aa"
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+ "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+// An option to enable unsafe alias results from the GlobalsModRef analysis.
+// When enabled, GlobalsModRef will provide no-alias results which in extremely
+// rare cases may not be conservatively correct. In particular, in the face of
+// transforms which cause assymetry between how effective GetUnderlyingObject
+// is for two pointers, it may produce incorrect results.
+//
+// These unsafe results have been returned by GMR for many years without
+// causing significant issues in the wild and so we provide a mechanism to
+// re-enable them for users of LLVM that have a particular performance
+// sensitivity and no known issues. The option also makes it easy to evaluate
+// the performance impact of these results.
+static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults(
+ "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden);
+
+/// The mod/ref information collected for a particular function.
+///
+/// We collect information about mod/ref behavior of a function here, both in
+/// general and as pertains to specific globals. We only have this detailed
+/// information when we know *something* useful about the behavior. If we
+/// saturate to fully general mod/ref, we remove the info for the function.
+class GlobalsAAResult::FunctionInfo {
+ typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType;
+
+ /// Build a wrapper struct that has 8-byte alignment. All heap allocations
+ /// should provide this much alignment at least, but this makes it clear we
+ /// specifically rely on this amount of alignment.
+ struct LLVM_ALIGNAS(8) AlignedMap {
+ AlignedMap() {}
+ AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
+ GlobalInfoMapType Map;
+ };
+
+ /// Pointer traits for our aligned map.
+ struct AlignedMapPointerTraits {
+ static inline void *getAsVoidPointer(AlignedMap *P) { return P; }
+ static inline AlignedMap *getFromVoidPointer(void *P) {
+ return (AlignedMap *)P;
+ }
+ enum { NumLowBitsAvailable = 3 };
+ static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+ "AlignedMap insufficiently aligned to have enough low bits.");
+ };
+
+ /// The bit that flags that this function may read any global. This is
+ /// chosen to mix together with ModRefInfo bits.
+ enum { MayReadAnyGlobal = 4 };
+
+ /// Checks to document the invariants of the bit packing here.
+ static_assert((MayReadAnyGlobal & MRI_ModRef) == 0,
+ "ModRef and the MayReadAnyGlobal flag bits overlap.");
+ static_assert(((MayReadAnyGlobal | MRI_ModRef) >>
+ AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
+ "Insufficient low bits to store our flag and ModRef info.");
+
+public:
+ FunctionInfo() : Info() {}
+ ~FunctionInfo() {
+ delete Info.getPointer();
+ }
+ // Spell out the copy ond move constructors and assignment operators to get
+ // deep copy semantics and correct move semantics in the face of the
+ // pointer-int pair.
+ FunctionInfo(const FunctionInfo &Arg)
+ : Info(nullptr, Arg.Info.getInt()) {
+ if (const auto *ArgPtr = Arg.Info.getPointer())
+ Info.setPointer(new AlignedMap(*ArgPtr));
+ }
+ FunctionInfo(FunctionInfo &&Arg)
+ : Info(Arg.Info.getPointer(), Arg.Info.getInt()) {
+ Arg.Info.setPointerAndInt(nullptr, 0);
+ }
+ FunctionInfo &operator=(const FunctionInfo &RHS) {
+ delete Info.getPointer();
+ Info.setPointerAndInt(nullptr, RHS.Info.getInt());
+ if (const auto *RHSPtr = RHS.Info.getPointer())
+ Info.setPointer(new AlignedMap(*RHSPtr));
+ return *this;
+ }
+ FunctionInfo &operator=(FunctionInfo &&RHS) {
+ delete Info.getPointer();
+ Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt());
+ RHS.Info.setPointerAndInt(nullptr, 0);
+ return *this;
+ }
+
+ /// Returns the \c ModRefInfo info for this function.
+ ModRefInfo getModRefInfo() const {
+ return ModRefInfo(Info.getInt() & MRI_ModRef);
+ }
+
+ /// Adds new \c ModRefInfo for this function to its state.
+ void addModRefInfo(ModRefInfo NewMRI) {
+ Info.setInt(Info.getInt() | NewMRI);
+ }
+
+ /// Returns whether this function may read any global variable, and we don't
+ /// know which global.
+ bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; }
+
+ /// Sets this function as potentially reading from any global.
+ void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); }
+
+ /// Returns the \c ModRefInfo info for this function w.r.t. a particular
+ /// global, which may be more precise than the general information above.
+ ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const {
+ ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef;
+ if (AlignedMap *P = Info.getPointer()) {
+ auto I = P->Map.find(&GV);
+ if (I != P->Map.end())
+ GlobalMRI = ModRefInfo(GlobalMRI | I->second);
+ }
+ return GlobalMRI;
+ }
+
+ /// Add mod/ref info from another function into ours, saturating towards
+ /// MRI_ModRef.
+ void addFunctionInfo(const FunctionInfo &FI) {
+ addModRefInfo(FI.getModRefInfo());
+
+ if (FI.mayReadAnyGlobal())
+ setMayReadAnyGlobal();
+
+ if (AlignedMap *P = FI.Info.getPointer())
+ for (const auto &G : P->Map)
+ addModRefInfoForGlobal(*G.first, G.second);
+ }
+
+ void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) {
+ AlignedMap *P = Info.getPointer();
+ if (!P) {
+ P = new AlignedMap();
+ Info.setPointer(P);
+ }
+ auto &GlobalMRI = P->Map[&GV];
+ GlobalMRI = ModRefInfo(GlobalMRI | NewMRI);
+ }
+
+ /// Clear a global's ModRef info. Should be used when a global is being
+ /// deleted.
+ void eraseModRefInfoForGlobal(const GlobalValue &GV) {
+ if (AlignedMap *P = Info.getPointer())
+ P->Map.erase(&GV);
+ }
+
+private:
+ /// All of the information is encoded into a single pointer, with a three bit
+ /// integer in the low three bits. The high bit provides a flag for when this
+ /// function may read any global. The low two bits are the ModRefInfo. And
+ /// the pointer, when non-null, points to a map from GlobalValue to
+ /// ModRefInfo specific to that GlobalValue.
+ PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info;
+};
+
+void GlobalsAAResult::DeletionCallbackHandle::deleted() {
+ Value *V = getValPtr();
+ if (auto *F = dyn_cast<Function>(V))
+ GAR->FunctionInfos.erase(F);
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (GAR->NonAddressTakenGlobals.erase(GV)) {
+ // This global might be an indirect global. If so, remove it and
+ // remove any AllocRelatedValues for it.
+ if (GAR->IndirectGlobals.erase(GV)) {
+ // Remove any entries in AllocsForIndirectGlobals for this global.
+ for (auto I = GAR->AllocsForIndirectGlobals.begin(),
+ E = GAR->AllocsForIndirectGlobals.end();
+ I != E; ++I)
+ if (I->second == GV)
+ GAR->AllocsForIndirectGlobals.erase(I);
+ }
+
+ // Scan the function info we have collected and remove this global
+ // from all of them.
+ for (auto &FIPair : GAR->FunctionInfos)
+ FIPair.second.eraseModRefInfoForGlobal(*GV);
+ }
+ }
+
+ // If this is an allocation related to an indirect global, remove it.
+ GAR->AllocsForIndirectGlobals.erase(V);
+
+ // And clear out the handle.
+ setValPtr(nullptr);
+ GAR->Handles.erase(I);
+ // This object is now destroyed!
+}
+
+FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+ if (FunctionInfo *FI = getFunctionInfo(F)) {
+ if (FI->getModRefInfo() == MRI_NoModRef)
+ Min = FMRB_DoesNotAccessMemory;
+ else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+ Min = FMRB_OnlyReadsMemory;
+ }
+
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
+}
+
+FunctionModRefBehavior
+GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+ if (const Function *F = CS.getCalledFunction())
+ if (FunctionInfo *FI = getFunctionInfo(F)) {
+ if (FI->getModRefInfo() == MRI_NoModRef)
+ Min = FMRB_DoesNotAccessMemory;
+ else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+ Min = FMRB_OnlyReadsMemory;
+ }
+
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+}
+
+/// Returns the function info for the function, or null if we don't have
+/// anything useful to say about it.
+GlobalsAAResult::FunctionInfo *
+GlobalsAAResult::getFunctionInfo(const Function *F) {
+ auto I = FunctionInfos.find(F);
+ if (I != FunctionInfos.end())
+ return &I->second;
+ return nullptr;
+}
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program. If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsAAResult::AnalyzeGlobals(Module &M) {
+ SmallPtrSet<Function *, 64> TrackedFunctions;
+ for (Function &F : M)
+ if (F.hasLocalLinkage())
+ if (!AnalyzeUsesOfPointer(&F)) {
+ // Remember that we are tracking this global.
+ NonAddressTakenGlobals.insert(&F);
+ TrackedFunctions.insert(&F);
+ Handles.emplace_front(*this, &F);
+ Handles.front().I = Handles.begin();
+ ++NumNonAddrTakenFunctions;
+ }
+
+ SmallPtrSet<Function *, 64> Readers, Writers;
+ for (GlobalVariable &GV : M.globals())
+ if (GV.hasLocalLinkage()) {
+ if (!AnalyzeUsesOfPointer(&GV, &Readers,
+ GV.isConstant() ? nullptr : &Writers)) {
+ // Remember that we are tracking this global, and the mod/ref fns
+ NonAddressTakenGlobals.insert(&GV);
+ Handles.emplace_front(*this, &GV);
+ Handles.front().I = Handles.begin();
+
+ for (Function *Reader : Readers) {
+ if (TrackedFunctions.insert(Reader).second) {
+ Handles.emplace_front(*this, Reader);
+ Handles.front().I = Handles.begin();
+ }
+ FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref);
+ }
+
+ if (!GV.isConstant()) // No need to keep track of writers to constants
+ for (Function *Writer : Writers) {
+ if (TrackedFunctions.insert(Writer).second) {
+ Handles.emplace_front(*this, Writer);
+ Handles.front().I = Handles.begin();
+ }
+ FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod);
+ }
+ ++NumNonAddrTakenGlobalVars;
+
+ // If this global holds a pointer type, see if it is an indirect global.
+ if (GV.getType()->getElementType()->isPointerTy() &&
+ AnalyzeIndirectGlobalMemory(&GV))
+ ++NumIndirectGlobalVars;
+ }
+ Readers.clear();
+ Writers.clear();
+ }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true. Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
+ SmallPtrSetImpl<Function *> *Readers,
+ SmallPtrSetImpl<Function *> *Writers,
+ GlobalValue *OkayStoreDest) {
+ if (!V->getType()->isPointerTy())
+ return true;
+
+ for (Use &U : V->uses()) {
+ User *I = U.getUser();
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (Readers)
+ Readers->insert(LI->getParent()->getParent());
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (V == SI->getOperand(1)) {
+ if (Writers)
+ Writers->insert(SI->getParent()->getParent());
+ } else if (SI->getOperand(1) != OkayStoreDest) {
+ return true; // Storing the pointer
+ }
+ } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
+ if (AnalyzeUsesOfPointer(I, Readers, Writers))
+ return true;
+ } else if (Operator::getOpcode(I) == Instruction::BitCast) {
+ if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
+ return true;
+ } else if (auto CS = CallSite(I)) {
+ // Make sure that this is just the function being called, not that it is
+ // passing into the function.
+ if (CS.isDataOperand(&U)) {
+ // Detect calls to free.
+ if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+ if (Writers)
+ Writers->insert(CS->getParent()->getParent());
+ } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
+ Function *ParentF = CS->getParent()->getParent();
+ // A nocapture argument may be read from or written to, but does not
+ // escape unless the call can somehow recurse.
+ //
+ // nocapture "indicates that the callee does not make any copies of
+ // the pointer that outlive itself". Therefore if we directly or
+ // indirectly recurse, we must treat the pointer as escaping.
+ if (FunctionToSCCMap[ParentF] ==
+ FunctionToSCCMap[CS.getCalledFunction()])
+ return true;
+ if (Readers)
+ Readers->insert(ParentF);
+ if (Writers)
+ Writers->insert(ParentF);
+ } else {
+ return true; // Argument of an unknown call.
+ }
+ // If the Callee is not ReadNone, it may read the global,
+ // and if it is not ReadOnly, it may also write to it.
+ Function *CalleeF = CS.getCalledFunction();
+ if (!CalleeF->doesNotAccessMemory()) {
+ if (Readers)
+ Readers->insert(CalleeF);
+ if (Writers && !CalleeF->onlyReadsMemory())
+ Writers->insert(CalleeF);
+ }
+ }
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return true; // Allow comparison against null.
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type. See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere. If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
+ // Keep track of values related to the allocation of the memory, f.e. the
+ // value produced by the malloc call and any casts.
+ std::vector<Value *> AllocRelatedValues;
+
+ // If the initializer is a valid pointer, bail.
+ if (Constant *C = GV->getInitializer())
+ if (!C->isNullValue())
+ return false;
+
+ // Walk the user list of the global. If we find anything other than a direct
+ // load or store, bail out.
+ for (User *U : GV->users()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // The pointer loaded from the global can only be used in simple ways:
+ // we allow addressing of it and loading storing to it. We do *not* allow
+ // storing the loaded pointer somewhere else or passing to a function.
+ if (AnalyzeUsesOfPointer(LI))
+ return false; // Loaded pointer escapes.
+ // TODO: Could try some IP mod/ref of the loaded pointer.
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Storing the global itself.
+ if (SI->getOperand(0) == GV)
+ return false;
+
+ // If storing the null pointer, ignore it.
+ if (isa<ConstantPointerNull>(SI->getOperand(0)))
+ continue;
+
+ // Check the value being stored.
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+ GV->getParent()->getDataLayout());
+
+ if (!isAllocLikeFn(Ptr, &TLI))
+ return false; // Too hard to analyze.
+
+ // Analyze all uses of the allocation. If any of them are used in a
+ // non-simple way (e.g. stored to another global) bail out.
+ if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr,
+ GV))
+ return false; // Loaded pointer escapes.
+
+ // Remember that this allocation is related to the indirect global.
+ AllocRelatedValues.push_back(Ptr);
+ } else {
+ // Something complex, bail out.
+ return false;
+ }
+ }
+
+ // Okay, this is an indirect global. Remember all of the allocations for
+ // this global in AllocsForIndirectGlobals.
+ while (!AllocRelatedValues.empty()) {
+ AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+ Handles.emplace_front(*this, AllocRelatedValues.back());
+ Handles.front().I = Handles.begin();
+ AllocRelatedValues.pop_back();
+ }
+ IndirectGlobals.insert(GV);
+ Handles.emplace_front(*this, GV);
+ Handles.front().I = Handles.begin();
+ return true;
+}
+
+void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ unsigned SCCID = 0;
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ const std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ for (auto *CGN : SCC)
+ if (Function *F = CGN->getFunction())
+ FunctionToSCCMap[F] = SCCID;
+ ++SCCID;
+ }
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from. Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+ // We do a bottom-up SCC traversal of the call graph. In other words, we
+ // visit all callees before callers (leaf-first).
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ const std::vector<CallGraphNode *> &SCC = *I;
+ assert(!SCC.empty() && "SCC with no functions?");
+
+ if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) {
+ // Calls externally or is weak - can't say anything useful. Remove any existing
+ // function records (may have been created when scanning globals).
+ for (auto *Node : SCC)
+ FunctionInfos.erase(Node->getFunction());
+ continue;
+ }
+
+ FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()];
+ bool KnowNothing = false;
+
+ // Collect the mod/ref properties due to called functions. We only compute
+ // one mod-ref set.
+ for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+ Function *F = SCC[i]->getFunction();
+ if (!F) {
+ KnowNothing = true;
+ break;
+ }
+
+ if (F->isDeclaration()) {
+ // Try to get mod/ref behaviour from function attributes.
+ if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
+ // Can't do better than that!
+ } else if (F->onlyReadsMemory()) {
+ FI.addModRefInfo(MRI_Ref);
+ if (!F->isIntrinsic())
+ // This function might call back into the module and read a global -
+ // consider every global as possibly being read by this function.
+ FI.setMayReadAnyGlobal();
+ } else if (F->onlyAccessesArgMemory() ||
+ F->onlyAccessesInaccessibleMemOrArgMem()) {
+ // This function may only access (read/write) memory pointed to by its
+ // arguments. If this pointer is to a global, this escaping use of the
+ // pointer is captured in AnalyzeUsesOfPointer().
+ FI.addModRefInfo(MRI_ModRef);
+ } else {
+ FI.addModRefInfo(MRI_ModRef);
+ // Can't say anything useful unless it's an intrinsic - they don't
+ // read or write global variables of the kind considered here.
+ KnowNothing = !F->isIntrinsic();
+ }
+ continue;
+ }
+
+ for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+ CI != E && !KnowNothing; ++CI)
+ if (Function *Callee = CI->second->getFunction()) {
+ if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) {
+ // Propagate function effect up.
+ FI.addFunctionInfo(*CalleeFI);
+ } else {
+ // Can't say anything about it. However, if it is inside our SCC,
+ // then nothing needs to be done.
+ CallGraphNode *CalleeNode = CG[Callee];
+ if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ KnowNothing = true;
+ }
+ } else {
+ KnowNothing = true;
+ }
+ }
+
+ // If we can't say anything useful about this SCC, remove all SCC functions
+ // from the FunctionInfos map.
+ if (KnowNothing) {
+ for (auto *Node : SCC)
+ FunctionInfos.erase(Node->getFunction());
+ continue;
+ }
+
+ // Scan the function bodies for explicit loads or stores.
+ for (auto *Node : SCC) {
+ if (FI.getModRefInfo() == MRI_ModRef)
+ break; // The mod/ref lattice saturates here.
+ for (Instruction &I : instructions(Node->getFunction())) {
+ if (FI.getModRefInfo() == MRI_ModRef)
+ break; // The mod/ref lattice saturates here.
+
+ // We handle calls specially because the graph-relevant aspects are
+ // handled above.
+ if (auto CS = CallSite(&I)) {
+ if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+ // FIXME: It is completely unclear why this is necessary and not
+ // handled by the above graph code.
+ FI.addModRefInfo(MRI_ModRef);
+ } else if (Function *Callee = CS.getCalledFunction()) {
+ // The callgraph doesn't include intrinsic calls.
+ if (Callee->isIntrinsic()) {
+ FunctionModRefBehavior Behaviour =
+ AAResultBase::getModRefBehavior(Callee);
+ FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef));
+ }
+ }
+ continue;
+ }
+
+ // All non-call instructions we use the primary predicates for whether
+ // thay read or write memory.
+ if (I.mayReadFromMemory())
+ FI.addModRefInfo(MRI_Ref);
+ if (I.mayWriteToMemory())
+ FI.addModRefInfo(MRI_Mod);
+ }
+ }
+
+ if ((FI.getModRefInfo() & MRI_Mod) == 0)
+ ++NumReadMemFunctions;
+ if (FI.getModRefInfo() == MRI_NoModRef)
+ ++NumNoMemFunctions;
+
+ // Finally, now that we know the full effect on this SCC, clone the
+ // information to each function in the SCC.
+ // FI is a reference into FunctionInfos, so copy it now so that it doesn't
+ // get invalidated if DenseMap decides to re-hash.
+ FunctionInfo CachedFI = FI;
+ for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+ FunctionInfos[SCC[i]->getFunction()] = CachedFI;
+ }
+}
+
+// GV is a non-escaping global. V is a pointer address that has been loaded from.
+// If we can prove that V must escape, we can conclude that a load from V cannot
+// alias GV.
+static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
+ const Value *V,
+ int &Depth,
+ const DataLayout &DL) {
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Inputs;
+ Visited.insert(V);
+ Inputs.push_back(V);
+ do {
+ const Value *Input = Inputs.pop_back_val();
+
+ if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
+ isa<InvokeInst>(Input))
+ // Arguments to functions or returns from functions are inherently
+ // escaping, so we can immediately classify those as not aliasing any
+ // non-addr-taken globals.
+ //
+ // (Transitive) loads from a global are also safe - if this aliased
+ // another global, its address would escape, so no alias.
+ continue;
+
+ // Recurse through a limited number of selects, loads and PHIs. This is an
+ // arbitrary depth of 4, lower numbers could be used to fix compile time
+ // issues if needed, but this is generally expected to be only be important
+ // for small depths.
+ if (++Depth > 4)
+ return false;
+
+ if (auto *LI = dyn_cast<LoadInst>(Input)) {
+ Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(Input)) {
+ const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+ const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+ if (Visited.insert(LHS).second)
+ Inputs.push_back(LHS);
+ if (Visited.insert(RHS).second)
+ Inputs.push_back(RHS);
+ continue;
+ }
+ if (auto *PN = dyn_cast<PHINode>(Input)) {
+ for (const Value *Op : PN->incoming_values()) {
+ Op = GetUnderlyingObject(Op, DL);
+ if (Visited.insert(Op).second)
+ Inputs.push_back(Op);
+ }
+ continue;
+ }
+
+ return false;
+ } while (!Inputs.empty());
+
+ // All inputs were known to be no-alias.
+ return true;
+}
+
+// There are particular cases where we can conclude no-alias between
+// a non-addr-taken global and some other underlying object. Specifically,
+// a non-addr-taken global is known to not be escaped from any function. It is
+// also incorrect for a transformation to introduce an escape of a global in
+// a way that is observable when it was not there previously. One function
+// being transformed to introduce an escape which could possibly be observed
+// (via loading from a global or the return value for example) within another
+// function is never safe. If the observation is made through non-atomic
+// operations on different threads, it is a data-race and UB. If the
+// observation is well defined, by being observed the transformation would have
+// changed program behavior by introducing the observed escape, making it an
+// invalid transform.
+//
+// This property does require that transformations which *temporarily* escape
+// a global that was not previously escaped, prior to restoring it, cannot rely
+// on the results of GMR::alias. This seems a reasonable restriction, although
+// currently there is no way to enforce it. There is also no realistic
+// optimization pass that would make this mistake. The closest example is
+// a transformation pass which does reg2mem of SSA values but stores them into
+// global variables temporarily before restoring the global variable's value.
+// This could be useful to expose "benign" races for example. However, it seems
+// reasonable to require that a pass which introduces escapes of global
+// variables in this way to either not trust AA results while the escape is
+// active, or to be forced to operate as a module pass that cannot co-exist
+// with an alias analysis such as GMR.
+bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
+ const Value *V) {
+ // In order to know that the underlying object cannot alias the
+ // non-addr-taken global, we must know that it would have to be an escape.
+ // Thus if the underlying object is a function argument, a load from
+ // a global, or the return of a function, it cannot alias. We can also
+ // recurse through PHI nodes and select nodes provided all of their inputs
+ // resolve to one of these known-escaping roots.
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Inputs;
+ Visited.insert(V);
+ Inputs.push_back(V);
+ int Depth = 0;
+ do {
+ const Value *Input = Inputs.pop_back_val();
+
+ if (auto *InputGV = dyn_cast<GlobalValue>(Input)) {
+ // If one input is the very global we're querying against, then we can't
+ // conclude no-alias.
+ if (InputGV == GV)
+ return false;
+
+ // Distinct GlobalVariables never alias, unless overriden or zero-sized.
+ // FIXME: The condition can be refined, but be conservative for now.
+ auto *GVar = dyn_cast<GlobalVariable>(GV);
+ auto *InputGVar = dyn_cast<GlobalVariable>(InputGV);
+ if (GVar && InputGVar &&
+ !GVar->isDeclaration() && !InputGVar->isDeclaration() &&
+ !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) {
+ Type *GVType = GVar->getInitializer()->getType();
+ Type *InputGVType = InputGVar->getInitializer()->getType();
+ if (GVType->isSized() && InputGVType->isSized() &&
+ (DL.getTypeAllocSize(GVType) > 0) &&
+ (DL.getTypeAllocSize(InputGVType) > 0))
+ continue;
+ }
+
+ // Conservatively return false, even though we could be smarter
+ // (e.g. look through GlobalAliases).
+ return false;
+ }
+
+ if (isa<Argument>(Input) || isa<CallInst>(Input) ||
+ isa<InvokeInst>(Input)) {
+ // Arguments to functions or returns from functions are inherently
+ // escaping, so we can immediately classify those as not aliasing any
+ // non-addr-taken globals.
+ continue;
+ }
+
+ // Recurse through a limited number of selects, loads and PHIs. This is an
+ // arbitrary depth of 4, lower numbers could be used to fix compile time
+ // issues if needed, but this is generally expected to be only be important
+ // for small depths.
+ if (++Depth > 4)
+ return false;
+
+ if (auto *LI = dyn_cast<LoadInst>(Input)) {
+ // A pointer loaded from a global would have been captured, and we know
+ // that the global is non-escaping, so no alias.
+ const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
+ if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL))
+ // The load does not alias with GV.
+ continue;
+ // Otherwise, a load could come from anywhere, so bail.
+ return false;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(Input)) {
+ const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+ const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+ if (Visited.insert(LHS).second)
+ Inputs.push_back(LHS);
+ if (Visited.insert(RHS).second)
+ Inputs.push_back(RHS);
+ continue;
+ }
+ if (auto *PN = dyn_cast<PHINode>(Input)) {
+ for (const Value *Op : PN->incoming_values()) {
+ Op = GetUnderlyingObject(Op, DL);
+ if (Visited.insert(Op).second)
+ Inputs.push_back(Op);
+ }
+ continue;
+ }
+
+ // FIXME: It would be good to handle other obvious no-alias cases here, but
+ // it isn't clear how to do so reasonbly without building a small version
+ // of BasicAA into this code. We could recurse into AAResultBase::alias
+ // here but that seems likely to go poorly as we're inside the
+ // implementation of such a query. Until then, just conservatievly retun
+ // false.
+ return false;
+ } while (!Inputs.empty());
+
+ // If all the inputs to V were definitively no-alias, then V is no-alias.
+ return true;
+}
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ // Get the base object these pointers point to.
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
+
+ // If either of the underlying values is a global, they may be non-addr-taken
+ // globals, which we can answer queries about.
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ if (GV1 || GV2) {
+ // If the global's address is taken, pretend we don't know it's a pointer to
+ // the global.
+ if (GV1 && !NonAddressTakenGlobals.count(GV1))
+ GV1 = nullptr;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2))
+ GV2 = nullptr;
+
+ // If the two pointers are derived from two different non-addr-taken
+ // globals we know these can't alias.
+ if (GV1 && GV2 && GV1 != GV2)
+ return NoAlias;
+
+ // If one is and the other isn't, it isn't strictly safe but we can fake
+ // this result if necessary for performance. This does not appear to be
+ // a common problem in practice.
+ if (EnableUnsafeGlobalsModRefAliasResults)
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ // Check for a special case where a non-escaping global can be used to
+ // conclude no-alias.
+ if ((GV1 || GV2) && GV1 != GV2) {
+ const GlobalValue *GV = GV1 ? GV1 : GV2;
+ const Value *UV = GV1 ? UV2 : UV1;
+ if (isNonEscapingGlobalNoAlias(GV, UV))
+ return NoAlias;
+ }
+
+ // Otherwise if they are both derived from the same addr-taken global, we
+ // can't know the two accesses don't overlap.
+ }
+
+ // These pointers may be based on the memory owned by an indirect global. If
+ // so, we may be able to handle this. First check to see if the base pointer
+ // is a direct load from an indirect global.
+ GV1 = GV2 = nullptr;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV1 = GV;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (IndirectGlobals.count(GV))
+ GV2 = GV;
+
+ // These pointers may also be from an allocation for the indirect global. If
+ // so, also handle them.
+ if (!GV1)
+ GV1 = AllocsForIndirectGlobals.lookup(UV1);
+ if (!GV2)
+ GV2 = AllocsForIndirectGlobals.lookup(UV2);
+
+ // Now that we know whether the two pointers are related to indirect globals,
+ // use this to disambiguate the pointers. If the pointers are based on
+ // different indirect globals they cannot alias.
+ if (GV1 && GV2 && GV1 != GV2)
+ return NoAlias;
+
+ // If one is based on an indirect global and the other isn't, it isn't
+ // strictly safe but we can fake this result if necessary for performance.
+ // This does not appear to be a common problem in practice.
+ if (EnableUnsafeGlobalsModRefAliasResults)
+ if ((GV1 || GV2) && GV1 != GV2)
+ return NoAlias;
+
+ return AAResultBase::alias(LocA, LocB);
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+ const GlobalValue *GV) {
+ if (CS.doesNotAccessMemory())
+ return MRI_NoModRef;
+ ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
+
+ // Iterate through all the arguments to the called function. If any argument
+ // is based on GV, return the conservative result.
+ for (auto &A : CS.args()) {
+ SmallVector<Value*, 4> Objects;
+ GetUnderlyingObjects(A, Objects, DL);
+
+ // All objects must be identified.
+ if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject))
+ return ConservativeResult;
+
+ if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+ return ConservativeResult;
+ }
+
+ // We identified all objects in the argument list, and none of them were GV.
+ return MRI_NoModRef;
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ unsigned Known = MRI_ModRef;
+
+ // If we are asking for mod/ref info of a direct call with a pointer to a
+ // global we are tracking, return information if we have it.
+ if (const GlobalValue *GV =
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
+ if (GV->hasLocalLinkage())
+ if (const Function *F = CS.getCalledFunction())
+ if (NonAddressTakenGlobals.count(GV))
+ if (const FunctionInfo *FI = getFunctionInfo(F))
+ Known = FI->getModRefInfoForGlobal(*GV) |
+ getModRefInfoForArgument(CS, GV);
+
+ if (Known == MRI_NoModRef)
+ return MRI_NoModRef; // No need to query other mod/ref analyses
+ return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc));
+}
+
+GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
+ const TargetLibraryInfo &TLI)
+ : AAResultBase(TLI), DL(DL) {}
+
+GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL),
+ NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
+ IndirectGlobals(std::move(Arg.IndirectGlobals)),
+ AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
+ FunctionInfos(std::move(Arg.FunctionInfos)),
+ Handles(std::move(Arg.Handles)) {
+ // Update the parent for each DeletionCallbackHandle.
+ for (auto &H : Handles) {
+ assert(H.GAR == &Arg);
+ H.GAR = this;
+ }
+}
+
+/*static*/ GlobalsAAResult
+GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
+ CallGraph &CG) {
+ GlobalsAAResult Result(M.getDataLayout(), TLI);
+
+ // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
+ Result.CollectSCCMembership(CG);
+
+ // Find non-addr taken globals.
+ Result.AnalyzeGlobals(M);
+
+ // Propagate on CG.
+ Result.AnalyzeCallGraph(CG, M);
+
+ return Result;
+}
+
+GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> *AM) {
+ return GlobalsAAResult::analyzeModule(M,
+ AM->getResult<TargetLibraryAnalysis>(M),
+ AM->getResult<CallGraphAnalysis>(M));
+}
+
+char GlobalsAA::PassID;
+
+char GlobalsAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
+ "Globals Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa",
+ "Globals Alias Analysis", false, true)
+
+ModulePass *llvm::createGlobalsAAWrapperPass() {
+ return new GlobalsAAWrapperPass();
+}
+
+GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
+ initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+ Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
+ M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+ return false;
+}
+
+bool GlobalsAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<CallGraphWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
deleted file mode 100644
index 28fb49c..0000000
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ /dev/null
@@ -1,609 +0,0 @@
-//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This simple pass provides alias and mod/ref information for global values
-// that do not have their address taken, and keeps track of whether functions
-// read or write memory (are "pure"). For this simple (but very common) case,
-// we can provide pretty accurate and useful information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include <set>
-using namespace llvm;
-
-#define DEBUG_TYPE "globalsmodref-aa"
-
-STATISTIC(NumNonAddrTakenGlobalVars,
- "Number of global vars without address taken");
-STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
-STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
-STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
-STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
-
-namespace {
-/// FunctionRecord - One instance of this structure is stored for every
-/// function in the program. Later, the entries for these functions are
-/// removed if the function is found to call an external function (in which
-/// case we know nothing about it.
-struct FunctionRecord {
- /// GlobalInfo - Maintain mod/ref info for all of the globals without
- /// addresses taken that are read or written (transitively) by this
- /// function.
- std::map<const GlobalValue *, unsigned> GlobalInfo;
-
- /// MayReadAnyGlobal - May read global variables, but it is not known which.
- bool MayReadAnyGlobal;
-
- unsigned getInfoForGlobal(const GlobalValue *GV) const {
- unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
- std::map<const GlobalValue *, unsigned>::const_iterator I =
- GlobalInfo.find(GV);
- if (I != GlobalInfo.end())
- Effect |= I->second;
- return Effect;
- }
-
- /// FunctionEffect - Capture whether or not this function reads or writes to
- /// ANY memory. If not, we can do a lot of aggressive analysis on it.
- unsigned FunctionEffect;
-
- FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {}
-};
-
-/// GlobalsModRef - The actual analysis pass.
-class GlobalsModRef : public ModulePass, public AliasAnalysis {
- /// NonAddressTakenGlobals - The globals that do not have their addresses
- /// taken.
- std::set<const GlobalValue *> NonAddressTakenGlobals;
-
- /// IndirectGlobals - The memory pointed to by this global is known to be
- /// 'owned' by the global.
- std::set<const GlobalValue *> IndirectGlobals;
-
- /// AllocsForIndirectGlobals - If an instruction allocates memory for an
- /// indirect global, this map indicates which one.
- std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
-
- /// FunctionInfo - For each function, keep track of what globals are
- /// modified or read.
- std::map<const Function *, FunctionRecord> FunctionInfo;
-
-public:
- static char ID;
- GlobalsModRef() : ModulePass(ID) {
- initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this, &M.getDataLayout());
-
- // Find non-addr taken globals.
- AnalyzeGlobals(M);
-
- // Propagate on CG.
- AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.addRequired<CallGraphWrapperPass>();
- AU.setPreservesAll(); // Does not transform code
- }
-
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1, CS2);
- }
-
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(const Function *F) override {
- ModRefBehavior Min = UnknownModRefBehavior;
-
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- Min = DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- Min = OnlyReadsMemory;
- }
-
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
- }
-
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- ModRefBehavior Min = UnknownModRefBehavior;
-
- if (const Function *F = CS.getCalledFunction())
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- Min = DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- Min = OnlyReadsMemory;
- }
-
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
- }
-
- void deleteValue(Value *V) override;
- void addEscapingUse(Use &U) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis *)this;
- return this;
- }
-
-private:
- /// getFunctionInfo - Return the function info for the function, or null if
- /// we don't have anything useful to say about it.
- FunctionRecord *getFunctionInfo(const Function *F) {
- std::map<const Function *, FunctionRecord>::iterator I =
- FunctionInfo.find(F);
- if (I != FunctionInfo.end())
- return &I->second;
- return nullptr;
- }
-
- void AnalyzeGlobals(Module &M);
- void AnalyzeCallGraph(CallGraph &CG, Module &M);
- bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers,
- std::vector<Function *> &Writers,
- GlobalValue *OkayStoreDest = nullptr);
- bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
-};
-}
-
-char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
- "Simple mod/ref analysis for globals", false, true,
- false)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
- "Simple mod/ref analysis for globals", false, true,
- false)
-
-Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
-
-/// AnalyzeGlobals - Scan through the users of all of the internal
-/// GlobalValue's in the program. If none of them have their "address taken"
-/// (really, their address passed to something nontrivial), record this fact,
-/// and record the functions that they are used directly in.
-void GlobalsModRef::AnalyzeGlobals(Module &M) {
- std::vector<Function *> Readers, Writers;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasLocalLinkage()) {
- if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
- // Remember that we are tracking this global.
- NonAddressTakenGlobals.insert(I);
- ++NumNonAddrTakenFunctions;
- }
- Readers.clear();
- Writers.clear();
- }
-
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
- ++I)
- if (I->hasLocalLinkage()) {
- if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
- // Remember that we are tracking this global, and the mod/ref fns
- NonAddressTakenGlobals.insert(I);
-
- for (unsigned i = 0, e = Readers.size(); i != e; ++i)
- FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
-
- if (!I->isConstant()) // No need to keep track of writers to constants
- for (unsigned i = 0, e = Writers.size(); i != e; ++i)
- FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
- ++NumNonAddrTakenGlobalVars;
-
- // If this global holds a pointer type, see if it is an indirect global.
- if (I->getType()->getElementType()->isPointerTy() &&
- AnalyzeIndirectGlobalMemory(I))
- ++NumIndirectGlobalVars;
- }
- Readers.clear();
- Writers.clear();
- }
-}
-
-/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
-/// If this is used by anything complex (i.e., the address escapes), return
-/// true. Also, while we are at it, keep track of those functions that read and
-/// write to the value.
-///
-/// If OkayStoreDest is non-null, stores into this global are allowed.
-bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
- std::vector<Function *> &Readers,
- std::vector<Function *> &Writers,
- GlobalValue *OkayStoreDest) {
- if (!V->getType()->isPointerTy())
- return true;
-
- for (Use &U : V->uses()) {
- User *I = U.getUser();
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- Readers.push_back(LI->getParent()->getParent());
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (V == SI->getOperand(1)) {
- Writers.push_back(SI->getParent()->getParent());
- } else if (SI->getOperand(1) != OkayStoreDest) {
- return true; // Storing the pointer
- }
- } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
- if (AnalyzeUsesOfPointer(I, Readers, Writers))
- return true;
- } else if (Operator::getOpcode(I) == Instruction::BitCast) {
- if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
- return true;
- } else if (auto CS = CallSite(I)) {
- // Make sure that this is just the function being called, not that it is
- // passing into the function.
- if (!CS.isCallee(&U)) {
- // Detect calls to free.
- if (isFreeCall(I, TLI))
- Writers.push_back(CS->getParent()->getParent());
- else
- return true; // Argument of an unknown call.
- }
- } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
- if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
- return true; // Allow comparison against null.
- } else {
- return true;
- }
- }
-
- return false;
-}
-
-/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
-/// which holds a pointer type. See if the global always points to non-aliased
-/// heap memory: that is, all initializers of the globals are allocations, and
-/// those allocations have no use other than initialization of the global.
-/// Further, all loads out of GV must directly use the memory, not store the
-/// pointer somewhere. If this is true, we consider the memory pointed to by
-/// GV to be owned by GV and can disambiguate other pointers from it.
-bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
- // Keep track of values related to the allocation of the memory, f.e. the
- // value produced by the malloc call and any casts.
- std::vector<Value *> AllocRelatedValues;
-
- // Walk the user list of the global. If we find anything other than a direct
- // load or store, bail out.
- for (User *U : GV->users()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // The pointer loaded from the global can only be used in simple ways:
- // we allow addressing of it and loading storing to it. We do *not* allow
- // storing the loaded pointer somewhere else or passing to a function.
- std::vector<Function *> ReadersWriters;
- if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
- return false; // Loaded pointer escapes.
- // TODO: Could try some IP mod/ref of the loaded pointer.
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- // Storing the global itself.
- if (SI->getOperand(0) == GV)
- return false;
-
- // If storing the null pointer, ignore it.
- if (isa<ConstantPointerNull>(SI->getOperand(0)))
- continue;
-
- // Check the value being stored.
- Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
- GV->getParent()->getDataLayout());
-
- if (!isAllocLikeFn(Ptr, TLI))
- return false; // Too hard to analyze.
-
- // Analyze all uses of the allocation. If any of them are used in a
- // non-simple way (e.g. stored to another global) bail out.
- std::vector<Function *> ReadersWriters;
- if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
- return false; // Loaded pointer escapes.
-
- // Remember that this allocation is related to the indirect global.
- AllocRelatedValues.push_back(Ptr);
- } else {
- // Something complex, bail out.
- return false;
- }
- }
-
- // Okay, this is an indirect global. Remember all of the allocations for
- // this global in AllocsForIndirectGlobals.
- while (!AllocRelatedValues.empty()) {
- AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
- AllocRelatedValues.pop_back();
- }
- IndirectGlobals.insert(GV);
- return true;
-}
-
-/// AnalyzeCallGraph - At this point, we know the functions where globals are
-/// immediately stored to and read from. Propagate this information up the call
-/// graph to all callers and compute the mod/ref info for all memory for each
-/// function.
-void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
- // We do a bottom-up SCC traversal of the call graph. In other words, we
- // visit all callees before callers (leaf-first).
- for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
- const std::vector<CallGraphNode *> &SCC = *I;
- assert(!SCC.empty() && "SCC with no functions?");
-
- if (!SCC[0]->getFunction()) {
- // Calls externally - can't say anything useful. Remove any existing
- // function records (may have been created when scanning globals).
- for (unsigned i = 0, e = SCC.size(); i != e; ++i)
- FunctionInfo.erase(SCC[i]->getFunction());
- continue;
- }
-
- FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
-
- bool KnowNothing = false;
- unsigned FunctionEffect = 0;
-
- // Collect the mod/ref properties due to called functions. We only compute
- // one mod-ref set.
- for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
- Function *F = SCC[i]->getFunction();
- if (!F) {
- KnowNothing = true;
- break;
- }
-
- if (F->isDeclaration()) {
- // Try to get mod/ref behaviour from function attributes.
- if (F->doesNotAccessMemory()) {
- // Can't do better than that!
- } else if (F->onlyReadsMemory()) {
- FunctionEffect |= Ref;
- if (!F->isIntrinsic())
- // This function might call back into the module and read a global -
- // consider every global as possibly being read by this function.
- FR.MayReadAnyGlobal = true;
- } else {
- FunctionEffect |= ModRef;
- // Can't say anything useful unless it's an intrinsic - they don't
- // read or write global variables of the kind considered here.
- KnowNothing = !F->isIntrinsic();
- }
- continue;
- }
-
- for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
- CI != E && !KnowNothing; ++CI)
- if (Function *Callee = CI->second->getFunction()) {
- if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
- // Propagate function effect up.
- FunctionEffect |= CalleeFR->FunctionEffect;
-
- // Incorporate callee's effects on globals into our info.
- for (const auto &G : CalleeFR->GlobalInfo)
- FR.GlobalInfo[G.first] |= G.second;
- FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
- } else {
- // Can't say anything about it. However, if it is inside our SCC,
- // then nothing needs to be done.
- CallGraphNode *CalleeNode = CG[Callee];
- if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
- KnowNothing = true;
- }
- } else {
- KnowNothing = true;
- }
- }
-
- // If we can't say anything useful about this SCC, remove all SCC functions
- // from the FunctionInfo map.
- if (KnowNothing) {
- for (unsigned i = 0, e = SCC.size(); i != e; ++i)
- FunctionInfo.erase(SCC[i]->getFunction());
- continue;
- }
-
- // Scan the function bodies for explicit loads or stores.
- for (auto *Node : SCC) {
- if (FunctionEffect == ModRef)
- break; // The mod/ref lattice saturates here.
- for (Instruction &I : inst_range(Node->getFunction())) {
- if (FunctionEffect == ModRef)
- break; // The mod/ref lattice saturates here.
-
- // We handle calls specially because the graph-relevant aspects are
- // handled above.
- if (auto CS = CallSite(&I)) {
- if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) {
- // FIXME: It is completely unclear why this is necessary and not
- // handled by the above graph code.
- FunctionEffect |= ModRef;
- } else if (Function *Callee = CS.getCalledFunction()) {
- // The callgraph doesn't include intrinsic calls.
- if (Callee->isIntrinsic()) {
- ModRefBehavior Behaviour =
- AliasAnalysis::getModRefBehavior(Callee);
- FunctionEffect |= (Behaviour & ModRef);
- }
- }
- continue;
- }
-
- // All non-call instructions we use the primary predicates for whether
- // thay read or write memory.
- if (I.mayReadFromMemory())
- FunctionEffect |= Ref;
- if (I.mayWriteToMemory())
- FunctionEffect |= Mod;
- }
- }
-
- if ((FunctionEffect & Mod) == 0)
- ++NumReadMemFunctions;
- if (FunctionEffect == 0)
- ++NumNoMemFunctions;
- FR.FunctionEffect = FunctionEffect;
-
- // Finally, now that we know the full effect on this SCC, clone the
- // information to each function in the SCC.
- for (unsigned i = 1, e = SCC.size(); i != e; ++i)
- FunctionInfo[SCC[i]->getFunction()] = FR;
- }
-}
-
-/// alias - If one of the pointers is to a global that we are tracking, and the
-/// other is some random pointer, we know there cannot be an alias, because the
-/// address of the global isn't taken.
-AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- // Get the base object these pointers point to.
- const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
- const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
-
- // If either of the underlying values is a global, they may be non-addr-taken
- // globals, which we can answer queries about.
- const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
- const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
- if (GV1 || GV2) {
- // If the global's address is taken, pretend we don't know it's a pointer to
- // the global.
- if (GV1 && !NonAddressTakenGlobals.count(GV1))
- GV1 = nullptr;
- if (GV2 && !NonAddressTakenGlobals.count(GV2))
- GV2 = nullptr;
-
- // If the two pointers are derived from two different non-addr-taken
- // globals, or if one is and the other isn't, we know these can't alias.
- if ((GV1 || GV2) && GV1 != GV2)
- return NoAlias;
-
- // Otherwise if they are both derived from the same addr-taken global, we
- // can't know the two accesses don't overlap.
- }
-
- // These pointers may be based on the memory owned by an indirect global. If
- // so, we may be able to handle this. First check to see if the base pointer
- // is a direct load from an indirect global.
- GV1 = GV2 = nullptr;
- if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
- if (IndirectGlobals.count(GV))
- GV1 = GV;
- if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
- if (IndirectGlobals.count(GV))
- GV2 = GV;
-
- // These pointers may also be from an allocation for the indirect global. If
- // so, also handle them.
- if (AllocsForIndirectGlobals.count(UV1))
- GV1 = AllocsForIndirectGlobals[UV1];
- if (AllocsForIndirectGlobals.count(UV2))
- GV2 = AllocsForIndirectGlobals[UV2];
-
- // Now that we know whether the two pointers are related to indirect globals,
- // use this to disambiguate the pointers. If either pointer is based on an
- // indirect global and if they are not both based on the same indirect global,
- // they cannot alias.
- if ((GV1 || GV2) && GV1 != GV2)
- return NoAlias;
-
- return AliasAnalysis::alias(LocA, LocB);
-}
-
-AliasAnalysis::ModRefResult
-GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
- unsigned Known = ModRef;
-
- // If we are asking for mod/ref info of a direct call with a pointer to a
- // global we are tracking, return information if we have it.
- const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
- if (const GlobalValue *GV =
- dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
- if (GV->hasLocalLinkage())
- if (const Function *F = CS.getCalledFunction())
- if (NonAddressTakenGlobals.count(GV))
- if (const FunctionRecord *FR = getFunctionInfo(F))
- Known = FR->getInfoForGlobal(GV);
-
- if (Known == NoModRef)
- return NoModRef; // No need to query other mod/ref analyses
- return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
-}
-
-//===----------------------------------------------------------------------===//
-// Methods to update the analysis as a result of the client transformation.
-//
-void GlobalsModRef::deleteValue(Value *V) {
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- if (NonAddressTakenGlobals.erase(GV)) {
- // This global might be an indirect global. If so, remove it and remove
- // any AllocRelatedValues for it.
- if (IndirectGlobals.erase(GV)) {
- // Remove any entries in AllocsForIndirectGlobals for this global.
- for (std::map<const Value *, const GlobalValue *>::iterator
- I = AllocsForIndirectGlobals.begin(),
- E = AllocsForIndirectGlobals.end();
- I != E;) {
- if (I->second == GV) {
- AllocsForIndirectGlobals.erase(I++);
- } else {
- ++I;
- }
- }
- }
- }
- }
-
- // Otherwise, if this is an allocation related to an indirect global, remove
- // it.
- AllocsForIndirectGlobals.erase(V);
-
- AliasAnalysis::deleteValue(V);
-}
-
-void GlobalsModRef::addEscapingUse(Use &U) {
- // For the purposes of this analysis, it is conservatively correct to treat
- // a newly escaping value equivalently to a deleted one. We could perhaps
- // be more precise by processing the new use and attempting to update our
- // saved analysis results to accommodate it.
- deleteValue(U);
-
- AliasAnalysis::addEscapingUse(U);
-}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
deleted file mode 100644
index 806bfb8..0000000
--- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- IPA.cpp -----------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the common initialization routines for the IPA library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm-c/Initialization.h"
-#include "llvm/PassRegistry.h"
-
-using namespace llvm;
-
-/// initializeIPA - Initialize all passes linked into the IPA library.
-void llvm::initializeIPA(PassRegistry &Registry) {
- initializeCallGraphWrapperPassPass(Registry);
- initializeCallGraphPrinterPass(Registry);
- initializeCallGraphViewerPass(Registry);
- initializeGlobalsModRefPass(Registry);
-}
-
-void LLVMInitializeIPA(LLVMPassRegistryRef R) {
- initializeIPA(*unwrap(R));
-}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index 926787d..e0c5d8f 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -39,7 +39,7 @@ INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(IVUsers, "iv-users",
"Induction Variable Users", false, true)
@@ -255,7 +255,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.setPreservesAll();
}
@@ -266,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
*L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Collect ephemeral values so that AddUsersIfInteresting skips them.
EphValues.clear();
@@ -276,7 +276,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
- (void)AddUsersIfInteresting(I);
+ (void)AddUsersIfInteresting(&*I);
return false;
}
diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index c0d2e37..a86a703 100644
--- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// inlining has the given attribute set either at the call site or the
/// function declaration. Primarily used to inspect call site specific
/// attributes since these can be more precise than the ones on the callee
- /// itself.
+ /// itself.
bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
/// Return true if the given value is known non null within the callee if
- /// inlined through this particular callsite.
+ /// inlined through this particular callsite.
bool isKnownNonNullInCallee(Value *V);
// Custom analysis routines.
@@ -156,6 +156,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitSwitchInst(SwitchInst &SI);
bool visitIndirectBrInst(IndirectBrInst &IBI);
bool visitResumeInst(ResumeInst &RI);
+ bool visitCleanupReturnInst(CleanupReturnInst &RI);
+ bool visitCatchReturnInst(CatchReturnInst &RI);
bool visitUnreachableInst(UnreachableInst &I);
public:
@@ -832,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
- // bonus we want to apply, but don't go below zero.
- Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+ // threshold to get the bonus we want to apply, but don't go below zero.
+ Cost -= std::max(0, CA.getThreshold() - CA.getCost());
}
return Base::visitCallSite(CS);
@@ -903,6 +905,18 @@ bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
return false;
}
+bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a cleanupret instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a catchret instruction.
+ return false;
+}
+
bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
// FIXME: It might be reasonably to discount the cost of instructions leading
// to unreachable as they have the lowest possible impact on both runtime and
@@ -946,20 +960,21 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
continue;
// Skip ephemeral values.
- if (EphValues.count(I))
+ if (EphValues.count(&*I))
continue;
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
- // If the instruction is floating point, and the target says this operation is
- // expensive or the function has the "use-soft-float" attribute, this may
- // eventually become a library call. Treat the cost as such.
+ // If the instruction is floating point, and the target says this operation
+ // is expensive or the function has the "use-soft-float" attribute, this may
+ // eventually become a library call. Treat the cost as such.
if (I->getType()->isFloatingPointTy()) {
bool hasSoftFloatAttr = false;
- // If the function has the "use-soft-float" attribute, mark it as expensive.
+ // If the function has the "use-soft-float" attribute, mark it as
+ // expensive.
if (F.hasFnAttribute("use-soft-float")) {
Attribute Attr = F.getFnAttribute("use-soft-float");
StringRef Val = Attr.getValueAsString();
@@ -977,7 +992,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// all of the per-instruction logic. The visit tree returns true if we
// consumed the instruction in any way, and false if the instruction's base
// cost should count against inlining.
- if (Base::visit(I))
+ if (Base::visit(&*I))
++NumInstructionsSimplified;
else
Cost += InlineConstants::InstrCost;
@@ -1157,15 +1172,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
FAI != FAE; ++FAI, ++CAI) {
assert(CAI != CS.arg_end());
if (Constant *C = dyn_cast<Constant>(CAI))
- SimplifiedValues[FAI] = C;
+ SimplifiedValues[&*FAI] = C;
Value *PtrArg = *CAI;
if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
- ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+ ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
// We can SROA any pointer arguments derived from alloca instructions.
if (isa<AllocaInst>(PtrArg)) {
- SROAArgValues[FAI] = PtrArg;
+ SROAArgValues[&*FAI] = PtrArg;
SROAArgCosts[PtrArg] = 0;
}
}
@@ -1281,7 +1296,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
- return Cost < Threshold;
+ return Cost <= std::max(0, Threshold);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1304,36 +1319,6 @@ void CallAnalyzer::dump() {
}
#endif
-INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
- true, true)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
- true, true)
-
-char InlineCostAnalysis::ID = 0;
-
-InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
-
-InlineCostAnalysis::~InlineCostAnalysis() {}
-
-void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
-}
-
-bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
- TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
- ACT = &getAnalysis<AssumptionCacheTracker>();
- return false;
-}
-
-InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
- return getInlineCost(CS, CS.getCalledFunction(), Threshold);
-}
-
/// \brief Test that two functions either have or have not the given attribute
/// at the same time.
template<typename AttrKind>
@@ -1346,14 +1331,19 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
static bool functionsHaveCompatibleAttributes(Function *Caller,
Function *Callee,
TargetTransformInfo &TTI) {
- return TTI.hasCompatibleFunctionAttributes(Caller, Callee) &&
- attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
- attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
- attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+ return TTI.areInlineCompatible(Caller, Callee) &&
+ AttributeFuncs::areInlineCompatible(*Caller, *Callee);
+}
+
+InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
+ TargetTransformInfo &CalleeTTI,
+ AssumptionCacheTracker *ACT) {
+ return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
}
-InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
- int Threshold) {
+InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
+ TargetTransformInfo &CalleeTTI,
+ AssumptionCacheTracker *ACT) {
// Cannot inline indirect calls.
if (!Callee)
return llvm::InlineCost::getNever();
@@ -1368,8 +1358,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
- if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee,
- TTIWP->getTTI(*Callee)))
+ if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
return llvm::InlineCost::getNever();
// Don't inline this call if the caller has the optnone attribute.
@@ -1386,7 +1375,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS);
+ CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1400,7 +1389,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
-bool InlineCostAnalysis::isInlineViable(Function &F) {
+bool llvm::isInlineViable(Function &F) {
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain indirect branches or
@@ -1408,9 +1397,8 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
return false;
- for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
- ++II) {
- CallSite CS(II);
+ for (auto &II : *BI) {
+ CallSite CS(&II);
if (!CS)
continue;
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index a7f8f5c..b89ff26 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -122,7 +122,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
return DT->dominates(I, P);
}
- // Otherwise, if the instruction is in the entry block, and is not an invoke,
+ // Otherwise, if the instruction is in the entry block and is not an invoke,
// then it obviously dominates all phi nodes.
if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
!isa<InvokeInst>(I))
@@ -2090,8 +2090,7 @@ static Constant *computePointerICmp(const DataLayout &DL,
// Is the set of underlying objects all noalias calls?
auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
- return std::all_of(Objects.begin(), Objects.end(),
- [](Value *V){ return isNoAliasCall(V); });
+ return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
};
// Is the set of underlying objects all things which must be disjoint from
@@ -2176,6 +2175,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// X >=u 1 -> X
if (match(RHS, m_One()))
return LHS;
+ if (isImpliedCondition(RHS, LHS, Q.DL))
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SGE:
+ /// For signed comparison, the values for an i1 are 0 and -1
+ /// respectively. This maps into a truth table of:
+ /// LHS | RHS | LHS >=s RHS | LHS implies RHS
+ /// 0 | 0 | 1 (0 >= 0) | 1
+ /// 0 | 1 | 1 (0 >= -1) | 1
+ /// 1 | 0 | 0 (-1 >= 0) | 0
+ /// 1 | 1 | 1 (-1 >= -1) | 1
+ if (isImpliedCondition(LHS, RHS, Q.DL))
+ return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
// X <s 0 -> X
@@ -2187,6 +2199,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (match(RHS, m_One()))
return LHS;
break;
+ case ICmpInst::ICMP_ULE:
+ if (isImpliedCondition(LHS, RHS, Q.DL))
+ return getTrue(ITy);
+ break;
}
}
@@ -2360,9 +2376,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
} else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
// 'and x, CI2' produces [0, CI2].
Upper = CI2->getValue() + 1;
+ } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
+ // 'add nuw x, CI2' produces [CI2, UINT_MAX].
+ Lower = CI2->getValue();
}
- if (Lower != Upper) {
- ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+
+ ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
+ : ConstantRange(Width, true);
+
+ if (auto *I = dyn_cast<Instruction>(LHS))
+ if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+
+ if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
return ConstantInt::getTrue(RHS->getContext());
if (RHS_CR.inverse().contains(LHS_CR))
@@ -2370,6 +2396,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // If both operands have range metadata, use the metadata
+ // to simplify the comparison.
+ if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+ auto RHS_Instr = dyn_cast<Instruction>(RHS);
+ auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+ if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+ LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+ auto RHS_CR = getConstantRangeFromMetadata(
+ *RHS_Instr->getMetadata(LLVMContext::MD_range));
+ auto LHS_CR = getConstantRangeFromMetadata(
+ *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+ auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+ if (Satisfied_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(RHS->getContext());
+
+ auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+ CmpInst::getInversePredicate(Pred), RHS_CR);
+ if (InversedSatisfied_CR.contains(LHS_CR))
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
// Compare of cast, for example (zext X) != 0 -> X != 0
if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
Instruction *LI = cast<CastInst>(LHS);
@@ -2529,6 +2579,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // icmp eq|ne X, Y -> false|true if X != Y
+ if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+ isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+ LLVMContext &Ctx = LHS->getType()->getContext();
+ return Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+ }
+
// Special logic for binary operators.
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
@@ -3039,7 +3097,7 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
- Instruction *CxtI) {
+ const Instruction *CxtI) {
return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -4024,6 +4082,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
break;
}
+ // In general, it is possible for computeKnownBits to determine all bits in a
+ // value even when the operands are not all constants.
+ if (!Result && I->getType()->isIntegerTy()) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
+ if ((KnownZero | KnownOne).isAllOnesValue())
+ Result = ConstantInt::get(I->getContext(), KnownOne);
+ }
+
/// If called on unreachable code, the above logic may report that the
/// instruction simplified to itself. Make life easier for users by
/// detecting that case here, returning a safe value instead.
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index c8d0410..0f0f31e 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -198,7 +198,8 @@ void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {
assert(CalleeC.isDescendantOf(*this) &&
"Callee must be a descendant of the Caller.");
- // The only change required is to add this SCC to the parent set of the callee.
+ // The only change required is to add this SCC to the parent set of the
+ // callee.
CalleeC.ParentSCCs.insert(this);
}
@@ -454,8 +455,7 @@ void LazyCallGraph::SCC::internalDFS(
}
SmallVector<LazyCallGraph::SCC *, 1>
-LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
- Node &CalleeN) {
+LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) {
// First remove it from the node.
CallerN.removeEdgeInternal(CalleeN.getFunction());
@@ -522,7 +522,7 @@ LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
// the leaf SCC list.
if (!IsLeafSCC && !ResultSCCs.empty())
G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this),
- G->LeafSCCs.end());
+ G->LeafSCCs.end());
// Return the new list of SCCs.
return ResultSCCs;
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index a6ae7f2..0d1d34e 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
@@ -64,10 +65,10 @@ class LVILatticeVal {
enum LatticeValueTy {
/// This Value has no known value yet.
undefined,
-
+
/// This Value has a specific constant value.
constant,
-
+
/// This Value is known to not have the specified value.
notconstant,
@@ -77,13 +78,13 @@ class LVILatticeVal {
/// This value is not known to be constant, and we know that it has a value.
overdefined
};
-
+
/// Val: This stores the current lattice value along with the Constant* for
/// the constant if this is a 'constant' or 'notconstant' value.
LatticeValueTy Tag;
Constant *Val;
ConstantRange Range;
-
+
public:
LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
@@ -104,29 +105,34 @@ public:
Res.markConstantRange(CR);
return Res;
}
+ static LVILatticeVal getOverdefined() {
+ LVILatticeVal Res;
+ Res.markOverdefined();
+ return Res;
+ }
bool isUndefined() const { return Tag == undefined; }
bool isConstant() const { return Tag == constant; }
bool isNotConstant() const { return Tag == notconstant; }
bool isConstantRange() const { return Tag == constantrange; }
bool isOverdefined() const { return Tag == overdefined; }
-
+
Constant *getConstant() const {
assert(isConstant() && "Cannot get the constant of a non-constant!");
return Val;
}
-
+
Constant *getNotConstant() const {
assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
return Val;
}
-
+
ConstantRange getConstantRange() const {
assert(isConstantRange() &&
"Cannot get the constant-range of a non-constant-range!");
return Range;
}
-
+
/// Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
@@ -150,7 +156,7 @@ public:
Val = V;
return true;
}
-
+
/// Return true if this is a change in status.
bool markNotConstant(Constant *V) {
assert(V && "Marking constant with NULL");
@@ -168,27 +174,27 @@ public:
Val = V;
return true;
}
-
+
/// Return true if this is a change in status.
bool markConstantRange(const ConstantRange NewR) {
if (isConstantRange()) {
if (NewR.isEmptySet())
return markOverdefined();
-
+
bool changed = Range != NewR;
Range = NewR;
return changed;
}
-
+
assert(isUndefined());
if (NewR.isEmptySet())
return markOverdefined();
-
+
Tag = constantrange;
Range = NewR;
return true;
}
-
+
/// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
@@ -267,7 +273,7 @@ public:
return markConstantRange(NewR);
}
};
-
+
} // end anonymous namespace.
namespace llvm {
@@ -295,9 +301,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
namespace {
/// A callback value handle updates the cache when values are erased.
class LazyValueInfoCache;
- struct LVIValueHandle : public CallbackVH {
+ struct LVIValueHandle final : public CallbackVH {
LazyValueInfoCache *Parent;
-
+
LVIValueHandle(Value *V, LazyValueInfoCache *P)
: CallbackVH(V), Parent(P) { }
@@ -308,24 +314,27 @@ namespace {
};
}
-namespace {
+namespace {
/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
/// This is all of the cached block information for exactly one Value*.
/// The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
- typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+ /// Over-defined lattice values are recorded in OverDefinedCache to reduce
+ /// memory overhead.
+ typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
+ ValueCacheEntryTy;
/// This is all of the cached information for all values,
/// mapped from Value* to key information.
std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
-
+
/// This tracks, on a per-block basis, the set of values that are
- /// over-defined at the end of that block. This is required
- /// for cache updating.
- typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
- DenseSet<OverDefinedPairTy> OverDefinedCache;
+ /// over-defined at the end of that block.
+ typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>>
+ OverDefinedCacheTy;
+ OverDefinedCacheTy OverDefinedCache;
/// Keep track of all blocks that we have ever seen, so we
/// don't spend time removing unused blocks from our caches.
@@ -357,9 +366,13 @@ namespace {
void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
SeenBlocks.insert(BB);
- lookup(Val)[BB] = Result;
+
+ // Insert over-defined values into their own cache to reduce memory
+ // overhead.
if (Result.isOverdefined())
- OverDefinedCache.insert(std::make_pair(BB, Val));
+ OverDefinedCache[BB].insert(Val);
+ else
+ lookup(Val)[BB] = Result;
}
LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
@@ -382,11 +395,39 @@ namespace {
Instruction *BBI);
void solve();
-
+
ValueCacheEntryTy &lookup(Value *V) {
return ValueCache[LVIValueHandle(V, this)];
}
+ bool isOverdefined(Value *V, BasicBlock *BB) const {
+ auto ODI = OverDefinedCache.find(BB);
+
+ if (ODI == OverDefinedCache.end())
+ return false;
+
+ return ODI->second.count(V);
+ }
+
+ bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
+ if (isOverdefined(V, BB))
+ return true;
+
+ LVIValueHandle ValHandle(V, this);
+ auto I = ValueCache.find(ValHandle);
+ if (I == ValueCache.end())
+ return false;
+
+ return I->second.count(BB);
+ }
+
+ LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
+ if (isOverdefined(V, BB))
+ return LVILatticeVal::getOverdefined();
+
+ return lookup(V)[BB];
+ }
+
public:
/// This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
@@ -402,15 +443,15 @@ namespace {
/// value for the specified Value* that is true on the specified edge.
LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
Instruction *CxtI = nullptr);
-
+
/// This is the update interface to inform the cache that an edge from
/// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
-
+
/// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB);
-
+
/// clear - Empty the cache.
void clear() {
SeenBlocks.clear();
@@ -425,15 +466,17 @@ namespace {
} // end anonymous namespace
void LVIValueHandle::deleted() {
- typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
-
- SmallVector<OverDefinedPairTy, 4> ToErase;
- for (const OverDefinedPairTy &P : Parent->OverDefinedCache)
- if (P.second == getValPtr())
- ToErase.push_back(P);
- for (const OverDefinedPairTy &P : ToErase)
- Parent->OverDefinedCache.erase(P);
-
+ SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+ for (auto &I : Parent->OverDefinedCache) {
+ SmallPtrSetImpl<Value *> &ValueSet = I.second;
+ if (ValueSet.count(getValPtr()))
+ ValueSet.erase(getValPtr());
+ if (ValueSet.empty())
+ ToErase.push_back(I.first);
+ }
+ for (auto &BB : ToErase)
+ Parent->OverDefinedCache.erase(BB);
+
// This erasure deallocates *this, so it MUST happen after we're done
// using any and all members of *this.
Parent->ValueCache.erase(*this);
@@ -446,15 +489,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
return;
SeenBlocks.erase(I);
- SmallVector<OverDefinedPairTy, 4> ToErase;
- for (const OverDefinedPairTy& P : OverDefinedCache)
- if (P.first == BB)
- ToErase.push_back(P);
- for (const OverDefinedPairTy &P : ToErase)
- OverDefinedCache.erase(P);
+ auto ODI = OverDefinedCache.find(BB);
+ if (ODI != OverDefinedCache.end())
+ OverDefinedCache.erase(ODI);
- for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
- I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
I->second.erase(BB);
}
@@ -466,7 +505,8 @@ void LazyValueInfoCache::solve() {
if (solveBlockValue(e.second, e.first)) {
// The work item was completely processed.
assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
- assert(lookup(e.second).count(e.first) && "Result should be in cache!");
+ assert(hasCachedValueInfo(e.second, e.first) &&
+ "Result should be in cache!");
BlockValueStack.pop();
BlockValueSet.erase(e);
@@ -482,11 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- LVIValueHandle ValHandle(Val, this);
- std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I =
- ValueCache.find(ValHandle);
- if (I == ValueCache.end()) return false;
- return I->second.count(BB);
+ return hasCachedValueInfo(Val, BB);
}
LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
@@ -495,17 +531,36 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
return LVILatticeVal::get(VC);
SeenBlocks.insert(BB);
- return lookup(Val)[BB];
+ return getCachedValueInfo(Val, BB);
+}
+
+static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
+ switch (BBI->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
+ if (isa<IntegerType>(BBI->getType())) {
+ ConstantRange Result = getConstantRangeFromMetadata(*Ranges);
+ return LVILatticeVal::getRange(Result);
+ }
+ break;
+ };
+ // Nothing known - Note that we do not want overdefined here. We may know
+ // something else about the value and not having range metadata shouldn't
+ // cause us to throw away those facts.
+ return LVILatticeVal();
}
bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- if (lookup(Val).count(BB)) {
+ if (hasCachedValueInfo(Val, BB)) {
// If we have a cached value, use that.
DEBUG(dbgs() << " reuse BB '" << BB->getName()
- << "' val=" << lookup(Val)[BB] << '\n');
+ << "' val=" << getCachedValueInfo(Val, BB) << '\n');
// Since we're reusing a cached value, we don't need to update the
// OverDefinedCache. The cache will have been properly updated whenever the
@@ -516,7 +571,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
// Hold off inserting this value into the Cache in case we have to return
// false and come back later.
LVILatticeVal Res;
-
+
Instruction *BBI = dyn_cast<Instruction>(Val);
if (!BBI || BBI->getParent() != BB) {
if (!solveBlockValueNonLocal(Res, Val, BB))
@@ -532,12 +587,18 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
return true;
}
- if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
- Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ // If this value is a nonnull pointer, record it's range and bailout.
+ PointerType *PT = dyn_cast<PointerType>(BBI->getType());
+ if (PT && isKnownNonNull(BBI)) {
+ Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
insertResult(Val, BB, Res);
return true;
}
+ // If this is an instruction which supports range metadata, return the
+ // implied range. TODO: This should be an intersection, not a union.
+ Res.mergeIn(getFromRangeMetadata(BBI), DL);
+
// We can only analyze the definitions of certain classes of instructions
// (integral binops and casts at the moment), so bail if this isn't one.
LVILatticeVal Result;
@@ -661,7 +722,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
PointerType *PTy = cast<PointerType>(Val->getType());
Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
}
-
+
BBLV = Result;
return true;
}
@@ -674,7 +735,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
BBLV = Result;
return true;
}
-
+
bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
PHINode *PN, BasicBlock *BB) {
LVILatticeVal Result; // Start Undefined.
@@ -700,7 +761,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
if (Result.isOverdefined()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because of pred.\n");
-
+
BBLV = Result;
return true;
}
@@ -765,7 +826,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
BBLV.markOverdefined();
return true;
}
-
+
ConstantRange LHSRange = LHSVal.getConstantRange();
ConstantRange RHSRange(1);
IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
@@ -819,7 +880,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
case Instruction::Or:
Result.markConstantRange(LHSRange.binaryOr(RHSRange));
break;
-
+
// Unhandled instructions are overdefined.
default:
DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -827,7 +888,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
Result.markOverdefined();
break;
}
-
+
BBLV = Result;
return true;
}
@@ -877,7 +938,7 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
/// Val is not constrained on the edge.
static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
BasicBlock *BBTo, LVILatticeVal &Result) {
- // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+ // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
// If this is a conditional branch and only one successor goes to BBTo, then
@@ -887,7 +948,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
bool isTrueDest = BI->getSuccessor(0) == BBTo;
assert(BI->getSuccessor(!isTrueDest) == BBTo &&
"BBTo isn't a successor of BBFrom");
-
+
// If V is the condition of the branch itself, then we know exactly what
// it is.
if (BI->getCondition() == Val) {
@@ -895,7 +956,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
Type::getInt1Ty(Val->getContext()), isTrueDest));
return true;
}
-
+
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
@@ -997,7 +1058,7 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
-
+
assert(BlockValueStack.empty() && BlockValueSet.empty());
pushBlockValue(std::make_pair(BB, V));
@@ -1014,6 +1075,8 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
<< CxtI->getName() << "'\n");
LVILatticeVal Result;
+ if (auto *I = dyn_cast<Instruction>(V))
+ Result = getFromRangeMetadata(I);
mergeAssumeBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
@@ -1025,7 +1088,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
-
+
LVILatticeVal Result;
if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
solve();
@@ -1040,24 +1103,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- // When an edge in the graph has been threaded, values that we could not
- // determine a value for before (i.e. were marked overdefined) may be possible
- // to solve now. We do NOT try to proactively update these values. Instead,
- // we clear their entries from the cache, and allow lazy updating to recompute
- // them when needed.
-
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be
+ // possible to solve now. We do NOT try to proactively update these values.
+ // Instead, we clear their entries from the cache, and allow lazy updating to
+ // recompute them when needed.
+
// The updating process is fairly simple: we need to drop cached info
// for all values that were marked overdefined in OldSucc, and for those same
// values in any successor of OldSucc (except NewSucc) in which they were
// also marked overdefined.
std::vector<BasicBlock*> worklist;
worklist.push_back(OldSucc);
-
- DenseSet<Value*> ClearSet;
- for (OverDefinedPairTy &P : OverDefinedCache)
- if (P.first == OldSucc)
- ClearSet.insert(P.second);
-
+
+ auto I = OverDefinedCache.find(OldSucc);
+ if (I == OverDefinedCache.end())
+ return; // Nothing to process here.
+ SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
// Use a worklist to perform a depth-first search of OldSucc's successors.
// NOTE: We do not need a visited list since any blocks we have already
// visited will have had their overdefined markers cleared already, and we
@@ -1065,32 +1128,31 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
while (!worklist.empty()) {
BasicBlock *ToUpdate = worklist.back();
worklist.pop_back();
-
+
// Skip blocks only accessible through NewSucc.
if (ToUpdate == NewSucc) continue;
-
+
bool changed = false;
- for (Value *V : ClearSet) {
+ for (Value *V : ValsToClear) {
// If a value was marked overdefined in OldSucc, and is here too...
- DenseSet<OverDefinedPairTy>::iterator OI =
- OverDefinedCache.find(std::make_pair(ToUpdate, V));
- if (OI == OverDefinedCache.end()) continue;
-
- // Remove it from the caches.
- ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)];
- ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
-
- assert(CI != Entry.end() && "Couldn't find entry to update?");
- Entry.erase(CI);
- OverDefinedCache.erase(OI);
-
- // If we removed anything, then we potentially need to update
+ auto OI = OverDefinedCache.find(ToUpdate);
+ if (OI == OverDefinedCache.end())
+ continue;
+ SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+ if (!ValueSet.count(V))
+ continue;
+
+ ValueSet.erase(V);
+ if (ValueSet.empty())
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
// blocks successors too.
changed = true;
}
if (!changed) continue;
-
+
worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
}
}
@@ -1158,7 +1220,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
}
/// Determine whether the specified value is known to be a
-/// constant on the specified edge. Return null if not.
+/// constant on the specified edge. Return null if not.
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
@@ -1190,26 +1252,26 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
return LazyValueInfo::Unknown;
}
-
+
if (Result.isConstantRange()) {
ConstantInt *CI = dyn_cast<ConstantInt>(C);
if (!CI) return LazyValueInfo::Unknown;
-
+
ConstantRange CR = Result.getConstantRange();
if (Pred == ICmpInst::ICMP_EQ) {
if (!CR.contains(CI->getValue()))
return LazyValueInfo::False;
-
+
if (CR.isSingleElement() && CR.contains(CI->getValue()))
return LazyValueInfo::True;
} else if (Pred == ICmpInst::ICMP_NE) {
if (!CR.contains(CI->getValue()))
return LazyValueInfo::True;
-
+
if (CR.isSingleElement() && CR.contains(CI->getValue()))
return LazyValueInfo::False;
}
-
+
// Handle more complex predicates.
ConstantRange TrueValues =
ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
@@ -1219,7 +1281,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
return LazyValueInfo::False;
return LazyValueInfo::Unknown;
}
-
+
if (Result.isNotConstant()) {
// If this is an equality comparison, we can try to fold it knowing that
// "V != C1".
@@ -1240,7 +1302,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
}
return LazyValueInfo::Unknown;
}
-
+
return LazyValueInfo::Unknown;
}
@@ -1266,20 +1328,69 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
if (Ret != Unknown)
return Ret;
- // TODO: Move this logic inside getValueAt so that it can be cached rather
- // than re-queried on each call. This would also allow us to merge the
- // underlying lattice values to get more information
+ // Note: The following bit of code is somewhat distinct from the rest of LVI;
+ // LVI as a whole tries to compute a lattice value which is conservatively
+ // correct at a given location. In this case, we have a predicate which we
+ // weren't able to prove about the merged result, and we're pushing that
+ // predicate back along each incoming edge to see if we can prove it
+ // separately for each input. As a motivating example, consider:
+ // bb1:
+ // %v1 = ... ; constantrange<1, 5>
+ // br label %merge
+ // bb2:
+ // %v2 = ... ; constantrange<10, 20>
+ // br label %merge
+ // merge:
+ // %phi = phi [%v1, %v2] ; constantrange<1,20>
+ // %pred = icmp eq i32 %phi, 8
+ // We can't tell from the lattice value for '%phi' that '%pred' is false
+ // along each path, but by checking the predicate over each input separately,
+ // we can.
+ // We limit the search to one step backwards from the current BB and value.
+ // We could consider extending this to search further backwards through the
+ // CFG and/or value graph, but there are non-obvious compile time vs quality
+ // tradeoffs.
if (CxtI) {
- // For a comparison where the V is outside this block, it's possible
- // that we've branched on it before. Look to see if the value is known
- // on all incoming edges.
BasicBlock *BB = CxtI->getParent();
+
+ // Function entry or an unreachable block. Bail to avoid confusing
+ // analysis below.
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (PI != PE &&
- (!isa<Instruction>(V) ||
- cast<Instruction>(V)->getParent() != BB)) {
+ if (PI == PE)
+ return Unknown;
+
+ // If V is a PHI node in the same block as the context, we need to ask
+ // questions about the predicate as applied to the incoming value along
+ // each edge. This is useful for eliminating cases where the predicate is
+ // known along all incoming edges.
+ if (auto *PHI = dyn_cast<PHINode>(V))
+ if (PHI->getParent() == BB) {
+ Tristate Baseline = Unknown;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
+ Value *Incoming = PHI->getIncomingValue(i);
+ BasicBlock *PredBB = PHI->getIncomingBlock(i);
+ // Note that PredBB may be BB itself.
+ Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB,
+ CxtI);
+
+ // Keep going as long as we've seen a consistent known result for
+ // all inputs.
+ Baseline = (i == 0) ? Result /* First iteration */
+ : (Baseline == Result ? Baseline : Unknown); /* All others */
+ if (Baseline == Unknown)
+ break;
+ }
+ if (Baseline != Unknown)
+ return Baseline;
+ }
+
+ // For a comparison where the V is outside this block, it's possible
+ // that we've branched on it before. Look to see if the value is known
+ // on all incoming edges.
+ if (!isa<Instruction>(V) ||
+ cast<Instruction>(V)->getParent() != BB) {
// For predecessor edge, determine if the comparison is true or false
- // on that edge. If they're all true or all false, we can conclude
+ // on that edge. If they're all true or all false, we can conclude
// the value of the comparison in this block.
Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
if (Baseline != Unknown) {
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
deleted file mode 100644
index 991a0e3..0000000
--- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LibCallAliasAnalysis class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LibCallAliasAnalysis.h"
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-// Register this pass...
-char LibCallAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
- "LibCall Alias Analysis", false, true, false)
-
-FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
- return new LibCallAliasAnalysis(LCI);
-}
-
-LibCallAliasAnalysis::~LibCallAliasAnalysis() {
- delete LCI;
-}
-
-void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.setPreservesAll(); // Does not transform code
-}
-
-bool LibCallAliasAnalysis::runOnFunction(Function &F) {
- // set up super class
- InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
- return false;
-}
-
-/// AnalyzeLibCallDetails - Given a call to a function with the specified
-/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
-/// vs the specified pointer/size.
-AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- // If we have a function, check to see what kind of mod/ref effects it
- // has. Start by including any info globally known about the function.
- AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
- if (MRInfo == NoModRef) return MRInfo;
-
- // If that didn't tell us that the function is 'readnone', check to see
- // if we have detailed info and if 'P' is any of the locations we know
- // about.
- const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
- if (Details == nullptr)
- return MRInfo;
-
- // If the details array is of the 'DoesNot' kind, we only know something if
- // the pointer is a match for one of the locations in 'Details'. If we find a
- // match, we can prove some interactions cannot happen.
- //
- if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
- // Find out if the pointer refers to a known location.
- for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
- const LibCallLocationInfo &LocInfo =
- LCI->getLocationInfo(Details[i].LocationID);
- LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
- if (Res != LibCallLocationInfo::Yes) continue;
-
- // If we find a match against a location that we 'do not' interact with,
- // learn this info into MRInfo.
- return ModRefResult(MRInfo & ~Details[i].MRInfo);
- }
- return MRInfo;
- }
-
- // If the details are of the 'DoesOnly' sort, we know something if the pointer
- // is a match for one of the locations in 'Details'. Also, if we can prove
- // that the pointers is *not* one of the locations in 'Details', we know that
- // the call is NoModRef.
- assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
-
- // Find out if the pointer refers to a known location.
- bool NoneMatch = true;
- for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
- const LibCallLocationInfo &LocInfo =
- LCI->getLocationInfo(Details[i].LocationID);
- LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
- if (Res == LibCallLocationInfo::No) continue;
-
- // If we don't know if this pointer points to the location, then we have to
- // assume it might alias in some case.
- if (Res == LibCallLocationInfo::Unknown) {
- NoneMatch = false;
- continue;
- }
-
- // If we know that this pointer definitely is pointing into the location,
- // merge in this information.
- return ModRefResult(MRInfo & Details[i].MRInfo);
- }
-
- // If we found that the pointer is guaranteed to not match any of the
- // locations in our 'DoesOnly' rule, then we know that the pointer must point
- // to some other location. Since the libcall doesn't mod/ref any other
- // locations, return NoModRef.
- if (NoneMatch)
- return NoModRef;
-
- // Otherwise, return any other info gained so far.
- return MRInfo;
-}
-
-// getModRefInfo - Check to see if the specified callsite can clobber the
-// specified memory object.
-//
-AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- ModRefResult MRInfo = ModRef;
-
- // If this is a direct call to a function that LCI knows about, get the
- // information about the runtime function.
- if (LCI) {
- if (const Function *F = CS.getCalledFunction()) {
- if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
- MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
- if (MRInfo == NoModRef) return NoModRef;
- }
- }
- }
-
- // The AliasAnalysis base class has some smarts, lets use them.
- return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
-}
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
deleted file mode 100644
index 003c81e..0000000
--- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements interfaces that can be used to describe language
-// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
-// optimizers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Function.h"
-using namespace llvm;
-
-/// This impl pointer in ~LibCallInfo is actually a StringMap. This
-/// helper does the cast.
-static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
- return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
-}
-
-LibCallInfo::~LibCallInfo() {
- delete getMap(Impl);
-}
-
-const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
- // Get location info on the first call.
- if (NumLocations == 0)
- NumLocations = getLocationInfo(Locations);
-
- assert(LocID < NumLocations && "Invalid location ID!");
- return Locations[LocID];
-}
-
-
-/// Return the LibCallFunctionInfo object corresponding to
-/// the specified function if we have it. If not, return null.
-const LibCallFunctionInfo *
-LibCallInfo::getFunctionInfo(const Function *F) const {
- StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
-
- /// If this is the first time we are querying for this info, lazily construct
- /// the StringMap to index it.
- if (!Map) {
- Impl = Map = new StringMap<const LibCallFunctionInfo*>();
-
- const LibCallFunctionInfo *Array = getFunctionInfoArray();
- if (!Array) return nullptr;
-
- // We now have the array of entries. Populate the StringMap.
- for (unsigned i = 0; Array[i].Name; ++i)
- (*Map)[Array[i].Name] = Array+i;
- }
-
- // Look up this function in the string map.
- return Map->lookup(F->getName());
-}
-
-/// See if the given exception handling personality function is one that we
-/// understand. If so, return a description of it; otherwise return Unknown.
-EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
- const Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
- if (!F)
- return EHPersonality::Unknown;
- return StringSwitch<EHPersonality>(F->getName())
- .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
- .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
- .Case("__gcc_personality_v0", EHPersonality::GNU_C)
- .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
- .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
- .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
- .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
- .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
- .Default(EHPersonality::Unknown);
-}
-
-bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
- EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
- // We can't simplify any invokes to nounwind functions if the personality
- // function wants to catch asynch exceptions. The nounwind attribute only
- // implies that the function does not throw synchronous exceptions.
- return !isAsynchronousEHPersonality(Personality);
-}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 0b9308a..2dfb09c 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -49,6 +49,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -98,12 +99,13 @@ namespace {
void visitInsertElementInst(InsertElementInst &I);
void visitUnreachableInst(UnreachableInst &I);
- Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
- Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const;
public:
Module *Mod;
+ const DataLayout *DL;
AliasAnalysis *AA;
AssumptionCache *AC;
DominatorTree *DT;
@@ -121,7 +123,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
@@ -165,7 +167,7 @@ INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
@@ -178,7 +180,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
//
bool Lint::runOnFunction(Function &F) {
Mod = F.getParent();
- AA = &getAnalysis<AliasAnalysis>();
+ DL = &F.getParent()->getDataLayout();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -200,12 +203,11 @@ void Lint::visitFunction(Function &F) {
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
- const DataLayout &DL = CS->getModule()->getDataLayout();
visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
MemRef::Callee);
- if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
+ if (Function *F = dyn_cast<Function>(findValue(Callee,
/*OffsetOk=*/false))) {
Assert(CS.getCallingConv() == F->getCallingConv(),
"Undefined behavior: Caller and callee calling convention differ",
@@ -232,7 +234,7 @@ void Lint::visitCallSite(CallSite CS) {
for (; AI != AE; ++AI) {
Value *Actual = *AI;
if (PI != PE) {
- Argument *Formal = PI++;
+ Argument *Formal = &*PI++;
Assert(Formal->getType() == Actual->getType(),
"Undefined behavior: Call argument type mismatches "
"callee parameter type",
@@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
Type *Ty =
cast<PointerType>(Formal->getType())->getElementType();
- visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
- DL.getABITypeAlignment(Ty), Ty,
+ visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
+ DL->getABITypeAlignment(Ty), Ty,
MemRef::Read | MemRef::Write);
}
}
@@ -264,7 +266,7 @@ void Lint::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
- Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
+ Value *Obj = findValue(*AI, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj),
"Undefined behavior: Call with \"tail\" keyword references "
"alloca",
@@ -291,7 +293,7 @@ void Lint::visitCallSite(CallSite CS) {
// overlap is not distinguished from the case where nothing is known.
uint64_t Size = 0;
if (const ConstantInt *Len =
- dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
@@ -343,13 +345,6 @@ void Lint::visitCallSite(CallSite CS) {
visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
nullptr, MemRef::Read | MemRef::Write);
break;
-
- case Intrinsic::eh_begincatch:
- visitEHBeginCatch(II);
- break;
- case Intrinsic::eh_endcatch:
- visitEHEndCatch(II);
- break;
}
}
@@ -367,8 +362,7 @@ void Lint::visitReturnInst(ReturnInst &I) {
"Unusual: Return statement in function with noreturn attribute", &I);
if (Value *V = I.getReturnValue()) {
- Value *Obj =
- findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
+ Value *Obj = findValue(V, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
}
}
@@ -383,8 +377,7 @@ void Lint::visitMemoryReference(Instruction &I,
if (Size == 0)
return;
- Value *UnderlyingObject =
- findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
+ Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
Assert(!isa<ConstantPointerNull>(UnderlyingObject),
"Undefined behavior: Null pointer dereference", &I);
Assert(!isa<UndefValue>(UnderlyingObject),
@@ -423,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I,
// Check for buffer overflows and misalignment.
// Only handles memory references that read/write something simple like an
// alloca instruction or a global variable.
- auto &DL = I.getModule()->getDataLayout();
int64_t Offset = 0;
- if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
+ if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
// OK, so the access is to a constant offset from Ptr. Check that Ptr is
// something we can handle and if so extract the size of this base object
// along with its alignment.
@@ -435,20 +427,20 @@ void Lint::visitMemoryReference(Instruction &I,
if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
Type *ATy = AI->getAllocatedType();
if (!AI->isArrayAllocation() && ATy->isSized())
- BaseSize = DL.getTypeAllocSize(ATy);
+ BaseSize = DL->getTypeAllocSize(ATy);
BaseAlign = AI->getAlignment();
if (BaseAlign == 0 && ATy->isSized())
- BaseAlign = DL.getABITypeAlignment(ATy);
+ BaseAlign = DL->getABITypeAlignment(ATy);
} else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
// If the global may be defined differently in another compilation unit
// then don't warn about funky memory accesses.
if (GV->hasDefinitiveInitializer()) {
Type *GTy = GV->getType()->getElementType();
if (GTy->isSized())
- BaseSize = DL.getTypeAllocSize(GTy);
+ BaseSize = DL->getTypeAllocSize(GTy);
BaseAlign = GV->getAlignment();
if (BaseAlign == 0 && GTy->isSized())
- BaseAlign = DL.getABITypeAlignment(GTy);
+ BaseAlign = DL->getABITypeAlignment(GTy);
}
}
@@ -462,7 +454,7 @@ void Lint::visitMemoryReference(Instruction &I,
// Accesses that say that the memory is more aligned than it is are not
// defined.
if (Align == 0 && Ty && Ty->isSized())
- Align = DL.getABITypeAlignment(Ty);
+ Align = DL->getABITypeAlignment(Ty);
Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
"Undefined behavior: Memory reference address is misaligned", &I);
}
@@ -470,13 +462,13 @@ void Lint::visitMemoryReference(Instruction &I,
void Lint::visitLoadInst(LoadInst &I) {
visitMemoryReference(I, I.getPointerOperand(),
- AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+ DL->getTypeStoreSize(I.getType()), I.getAlignment(),
I.getType(), MemRef::Read);
}
void Lint::visitStoreInst(StoreInst &I) {
visitMemoryReference(I, I.getPointerOperand(),
- AA->getTypeStoreSize(I.getOperand(0)->getType()),
+ DL->getTypeStoreSize(I.getOperand(0)->getType()),
I.getAlignment(),
I.getOperand(0)->getType(), MemRef::Write);
}
@@ -492,208 +484,26 @@ void Lint::visitSub(BinaryOperator &I) {
}
void Lint::visitLShr(BinaryOperator &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(
- findValue(I.getOperand(1), I.getModule()->getDataLayout(),
- /*OffsetOk=*/false)))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
+ /*OffsetOk=*/false)))
Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
void Lint::visitAShr(BinaryOperator &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
- I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
void Lint::visitShl(BinaryOperator &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
- I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
-static bool
-allPredsCameFromLandingPad(BasicBlock *BB,
- SmallSet<BasicBlock *, 4> &VisitedBlocks) {
- VisitedBlocks.insert(BB);
- if (BB->isLandingPad())
- return true;
- // If we find a block with no predecessors, the search failed.
- if (pred_empty(BB))
- return false;
- for (BasicBlock *Pred : predecessors(BB)) {
- if (VisitedBlocks.count(Pred))
- continue;
- if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
- return false;
- }
- return true;
-}
-
-static bool
-allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
- IntrinsicInst **SecondBeginCatch,
- SmallSet<BasicBlock *, 4> &VisitedBlocks) {
- VisitedBlocks.insert(BB);
- for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
- IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
- return true;
- // If we find another begincatch while looking for an endcatch,
- // that's also an error.
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
- *SecondBeginCatch = IC;
- return false;
- }
- }
-
- // If we reach a block with no successors while searching, the
- // search has failed.
- if (succ_empty(BB))
- return false;
- // Otherwise, search all of the successors.
- for (BasicBlock *Succ : successors(BB)) {
- if (VisitedBlocks.count(Succ))
- continue;
- if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
- VisitedBlocks))
- return false;
- }
- return true;
-}
-
-void Lint::visitEHBeginCatch(IntrinsicInst *II) {
- // The checks in this function make a potentially dubious assumption about
- // the CFG, namely that any block involved in a catch is only used for the
- // catch. This will very likely be true of IR generated by a front end,
- // but it may cease to be true, for example, if the IR is run through a
- // pass which combines similar blocks.
- //
- // In general, if we encounter a block the isn't dominated by the catch
- // block while we are searching the catch block's successors for a call
- // to end catch intrinsic, then it is possible that it will be legal for
- // a path through this block to never reach a call to llvm.eh.endcatch.
- // An analogous statement could be made about our search for a landing
- // pad among the catch block's predecessors.
- //
- // What is actually required is that no path is possible at runtime that
- // reaches a call to llvm.eh.begincatch without having previously visited
- // a landingpad instruction and that no path is possible at runtime that
- // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
- // (mentally adjusting for the fact that in reality these calls will be
- // removed before code generation).
- //
- // Because this is a lint check, we take a pessimistic approach and warn if
- // the control flow is potentially incorrect.
-
- SmallSet<BasicBlock *, 4> VisitedBlocks;
- BasicBlock *CatchBB = II->getParent();
-
- // The begin catch must occur in a landing pad block or all paths
- // to it must have come from a landing pad.
- Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
- "llvm.eh.begincatch may be reachable without passing a landingpad",
- II);
-
- // Reset the visited block list.
- VisitedBlocks.clear();
-
- IntrinsicInst *SecondBeginCatch = nullptr;
-
- // This has to be called before it is asserted. Otherwise, the first assert
- // below can never be hit.
- bool EndCatchFound = allSuccessorsReachEndCatch(
- CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
- &SecondBeginCatch, VisitedBlocks);
- Assert(
- SecondBeginCatch == nullptr,
- "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
- II, SecondBeginCatch);
- Assert(EndCatchFound,
- "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
- II);
-}
-
-static bool allPredCameFromBeginCatch(
- BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
- IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
- VisitedBlocks.insert(BB);
- // Look for a begincatch in this block.
- for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
- ++RI) {
- IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
- return true;
- // If we find another end catch before we find a begin catch, that's
- // an error.
- if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
- *SecondEndCatch = IC;
- return false;
- }
- // If we encounter a landingpad instruction, the search failed.
- if (isa<LandingPadInst>(*RI))
- return false;
- }
- // If while searching we find a block with no predeccesors,
- // the search failed.
- if (pred_empty(BB))
- return false;
- // Search any predecessors we haven't seen before.
- for (BasicBlock *Pred : predecessors(BB)) {
- if (VisitedBlocks.count(Pred))
- continue;
- if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
- VisitedBlocks))
- return false;
- }
- return true;
-}
-
-void Lint::visitEHEndCatch(IntrinsicInst *II) {
- // The check in this function makes a potentially dubious assumption about
- // the CFG, namely that any block involved in a catch is only used for the
- // catch. This will very likely be true of IR generated by a front end,
- // but it may cease to be true, for example, if the IR is run through a
- // pass which combines similar blocks.
- //
- // In general, if we encounter a block the isn't post-dominated by the
- // end catch block while we are searching the end catch block's predecessors
- // for a call to the begin catch intrinsic, then it is possible that it will
- // be legal for a path to reach the end catch block without ever having
- // called llvm.eh.begincatch.
- //
- // What is actually required is that no path is possible at runtime that
- // reaches a call to llvm.eh.endcatch without having previously visited
- // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
- // reality these calls will be removed before code generation).
- //
- // Because this is a lint check, we take a pessimistic approach and warn if
- // the control flow is potentially incorrect.
-
- BasicBlock *EndCatchBB = II->getParent();
-
- // Alls paths to the end catch call must pass through a begin catch call.
-
- // If llvm.eh.begincatch wasn't called in the current block, we'll use this
- // lambda to recursively look for it in predecessors.
- SmallSet<BasicBlock *, 4> VisitedBlocks;
- IntrinsicInst *SecondEndCatch = nullptr;
-
- // This has to be called before it is asserted. Otherwise, the first assert
- // below can never be hit.
- bool BeginCatchFound =
- allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
- &SecondEndCatch, VisitedBlocks);
- Assert(
- SecondEndCatch == nullptr,
- "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
- II, SecondEndCatch);
- Assert(BeginCatchFound,
- "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
- II);
-}
-
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
AssumptionCache *AC) {
// Assume undef could be zero.
@@ -777,25 +587,23 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(
- findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
- /*OffsetOk=*/false)))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+ /*OffsetOk=*/false)))
Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
"Undefined result: extractelement index out of range", &I);
}
void Lint::visitInsertElementInst(InsertElementInst &I) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(
- findValue(I.getOperand(2), I.getModule()->getDataLayout(),
- /*OffsetOk=*/false)))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+ /*OffsetOk=*/false)))
Assert(CI->getValue().ult(I.getType()->getNumElements()),
"Undefined result: insertelement index out of range", &I);
}
void Lint::visitUnreachableInst(UnreachableInst &I) {
// This isn't undefined behavior, it's merely suspicious.
- Assert(&I == I.getParent()->begin() ||
- std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ Assert(&I == &I.getParent()->front() ||
+ std::prev(I.getIterator())->mayHaveSideEffects(),
"Unusual: unreachable immediately preceded by instruction without "
"side effects",
&I);
@@ -808,13 +616,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
/// Most analysis passes don't require this logic, because instcombine
/// will simplify most of these kinds of things away. But it's a goal of
/// this Lint pass to be useful even on non-optimized IR.
-Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
SmallPtrSet<Value *, 4> Visited;
- return findValueImpl(V, DL, OffsetOk, Visited);
+ return findValueImpl(V, OffsetOk, Visited);
}
/// findValueImpl - Implementation helper for findValue.
-Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const {
// Detect self-referential values.
if (!Visited.insert(V).second)
@@ -825,17 +633,18 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
// TODO: Look through eliminable cast pairs.
// TODO: Look through calls with unique return values.
// TODO: Look through vector insert/extract/shuffle.
- V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts();
+ V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
if (LoadInst *L = dyn_cast<LoadInst>(V)) {
- BasicBlock::iterator BBI = L;
+ BasicBlock::iterator BBI = L->getIterator();
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
for (;;) {
if (!VisitedBlocks.insert(BB).second)
break;
- if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
- BB, BBI, 6, AA))
- return findValueImpl(U, DL, OffsetOk, Visited);
+ if (Value *U =
+ FindAvailableLoadedValue(L->getPointerOperand(),
+ BB, BBI, DefMaxInstsToScan, AA))
+ return findValueImpl(U, OffsetOk, Visited);
if (BBI != BB->begin()) break;
BB = BB->getUniquePredecessor();
if (!BB) break;
@@ -844,38 +653,38 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
if (Value *W = PN->hasConstantValue())
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
- if (CI->isNoopCast(DL))
- return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
+ if (CI->isNoopCast(*DL))
+ return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
} else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
Ex->getIndices()))
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
// Same as above, but for ConstantExpr instead of Instruction.
if (Instruction::isCast(CE->getOpcode())) {
if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
CE->getOperand(0)->getType(), CE->getType(),
- DL.getIntPtrType(V->getType())))
- return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
+ DL->getIntPtrType(V->getType())))
+ return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
} else if (CE->getOpcode() == Instruction::ExtractValue) {
ArrayRef<unsigned> Indices = CE->getIndices();
if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
}
}
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
- return findValueImpl(W, DL, OffsetOk, Visited);
+ if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
+ return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
+ if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
if (W != V)
- return findValueImpl(W, DL, OffsetOk, Visited);
+ return findValueImpl(W, OffsetOk, Visited);
}
return V;
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 624c5a1..4b2fa3c 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -118,7 +118,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// from/to. If so, the previous load or store would have already trapped,
// so there is no harm doing an extra load (also, CSE will later eliminate
// the load entirely).
- BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+ BasicBlock::iterator BBI = ScanFrom->getIterator(),
+ E = ScanFrom->getParent()->begin();
// We can at least always strip pointer casts even though we can't use the
// base here.
@@ -161,6 +162,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
return false;
}
+/// DefMaxInstsToScan - the default number of maximum instructions
+/// to scan in the block, used by FindAvailableLoadedValue().
+/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
+/// threading in part by eliminating partially redundant loads.
+/// At that point, the value of MaxInstsToScan was already set to '6'
+/// without documented explanation.
+cl::opt<unsigned>
+llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
+ cl::desc("Use this to specify the default maximum number of instructions "
+ "to scan backward from a given instruction, when searching for "
+ "available loaded value"));
+
/// \brief Scan the ScanBB block backwards to see if we have the value at the
/// memory address *Ptr locally available within a small number of instructions.
///
@@ -199,7 +212,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
// would affect codegen).
- Instruction *Inst = --ScanFrom;
+ Instruction *Inst = &*--ScanFrom;
if (isa<DbgInfoIntrinsic>(Inst))
continue;
@@ -246,9 +259,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
- if (AA &&
- (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
- AliasAnalysis::Mod) == 0)
+ if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0)
continue;
// Otherwise the store that may or may not alias the pointer, bail out.
@@ -261,8 +272,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
- (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
- AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0)
continue;
// May modify the pointer, bail out.
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index becbae4..d7896ad 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -58,12 +58,12 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
-/// \brief We collect interesting dependences up to this threshold.
-static cl::opt<unsigned> MaxInterestingDependence(
- "max-interesting-dependences", cl::Hidden,
- cl::desc("Maximum number of interesting dependences collected by "
- "loop-access analysis (default = 100)"),
- cl::init(100));
+/// \brief We collect dependences up to this threshold.
+static cl::opt<unsigned>
+ MaxDependences("max-dependences", cl::Hidden,
+ cl::desc("Maximum number of dependences collected by "
+ "loop-access analysis (default = 100)"),
+ cl::init(100));
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
@@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) {
return V;
}
-const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
Value *Ptr, Value *OrigPtr) {
-
- const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+ const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
// If there is an entry in the map return the SCEV of the pointer with the
// symbolic stride replaced by one.
@@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
ValueToValueMap RewriteMap;
RewriteMap[StrideVal] = One;
- const SCEV *ByOne =
- SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
- DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+ ScalarEvolution *SE = PSE.getSE();
+ const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
+ const auto *CT =
+ static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
+
+ PSE.addPredicate(*SE->getEqualPredicate(U, CT));
+ auto *Expr = PSE.getSCEV(Ptr);
+
+ DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
<< "\n");
- return ByOne;
+ return Expr;
}
// Otherwise, just return the SCEV of the original pointer.
- return SE->getSCEV(Ptr);
+ return OrigSCEV;
}
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
unsigned DepSetId, unsigned ASId,
- const ValueToValueMap &Strides) {
+ const ValueToValueMap &Strides,
+ PredicatedScalarEvolution &PSE) {
// Get the stride replaced scev.
- const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
+ ScalarEvolution *SE = PSE.getSE();
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+
+ const SCEV *ScStart = AR->getStart();
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
- Sc);
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // For expressions with negative step, the upper bound is ScStart and the
+ // lower bound is ScEnd.
+ if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
+ if (CStep->getValue()->isNegative())
+ std::swap(ScStart, ScEnd);
+ } else {
+ // Fallback case: the step is not constant, but the we can still
+ // get the upper and lower bounds of the interval by using min/max
+ // expressions.
+ ScStart = SE->getUMinExpr(ScStart, ScEnd);
+ ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
+ }
+
+ Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+}
+
+SmallVector<RuntimePointerChecking::PointerCheck, 4>
+RuntimePointerChecking::generateChecks() const {
+ SmallVector<PointerCheck, 4> Checks;
+
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
+ const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
+ const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
+
+ if (needsChecking(CGI, CGJ))
+ Checks.push_back(std::make_pair(&CGI, &CGJ));
+ }
+ }
+ return Checks;
+}
+
+void RuntimePointerChecking::generateChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ assert(Checks.empty() && "Checks is not empty");
+ groupChecks(DepCands, UseDependencies);
+ Checks = generateChecks();
}
-bool RuntimePointerChecking::needsChecking(
- const CheckingPtrGroup &M, const CheckingPtrGroup &N,
- const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+ const CheckingPtrGroup &N) const {
for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
- if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ if (needsChecking(M.Members[I], N.Members[J]))
return true;
return false;
}
@@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks(
CheckingGroups.clear();
+ // If we need to check two pointers to the same underlying object
+ // with a non-constant difference, we shouldn't perform any pointer
+ // grouping with those pointers. This is because we can easily get
+ // into cases where the resulting check would return false, even when
+ // the accesses are safe.
+ //
+ // The following example shows this:
+ // for (i = 0; i < 1000; ++i)
+ // a[5000 + i * m] = a[i] + a[i + 9000]
+ //
+ // Here grouping gives a check of (5000, 5000 + 1000 * m) against
+ // (0, 10000) which is always false. However, if m is 1, there is no
+ // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
+ // us to perform an accurate check in this case.
+ //
+ // The above case requires that we have an UnknownDependence between
+ // accesses to the same underlying object. This cannot happen unless
+ // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+ // is also false. In this case we will use the fallback path and create
+ // separate checking groups for all pointers.
+
// If we don't have the dependency partitions, construct a new
- // checking pointer group for each pointer.
+ // checking pointer group for each pointer. This is also required
+ // for correctness, because in this case we can have checking between
+ // pointers to the same underlying object.
if (!UseDependencies) {
for (unsigned I = 0; I < Pointers.size(); ++I)
CheckingGroups.push_back(CheckingPtrGroup(I, *this));
@@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks(
// don't process them twice.
SmallSet<unsigned, 2> Seen;
- // Go through all equivalence classes, get the the "pointer check groups"
+ // Go through all equivalence classes, get the "pointer check groups"
// and add them to the overall solution. We use the order in which accesses
// appear in 'Pointers' to enforce determinism.
for (unsigned I = 0; I < Pointers.size(); ++I) {
@@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks(
}
}
-bool RuntimePointerChecking::needsChecking(
- unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::arePointersInSamePartition(
+ const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
+ unsigned PtrIdx2) {
+ return (PtrToPartition[PtrIdx1] != -1 &&
+ PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
+}
+
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
const PointerInfo &PointerI = Pointers[I];
const PointerInfo &PointerJ = Pointers[J];
@@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking(
if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
- // If PtrPartition is set omit checks between pointers of the same partition.
- // Partition number -1 means that the pointer is used in multiple partitions.
- // In this case we can't omit the check.
- if (PtrPartition && (*PtrPartition)[I] != -1 &&
- (*PtrPartition)[I] == (*PtrPartition)[J])
- return false;
-
return true;
}
-void RuntimePointerChecking::print(
- raw_ostream &OS, unsigned Depth,
- const SmallVectorImpl<int> *PtrPartition) const {
-
- OS.indent(Depth) << "Run-time memory checks:\n";
-
+void RuntimePointerChecking::printChecks(
+ raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+ unsigned Depth) const {
unsigned N = 0;
- for (unsigned I = 0; I < CheckingGroups.size(); ++I)
- for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
- OS.indent(Depth) << "Check " << N++ << ":\n";
- OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
-
- for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
- OS.indent(Depth + 2)
- << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
- if (PtrPartition)
- OS << " (Partition: "
- << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
- << "\n";
- }
+ for (const auto &Check : Checks) {
+ const auto &First = Check.first->Members, &Second = Check.second->Members;
- OS.indent(Depth + 2) << "Against group " << J << ":\n";
+ OS.indent(Depth) << "Check " << N++ << ":\n";
- for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
- OS.indent(Depth + 2)
- << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
- if (PtrPartition)
- OS << " (Partition: "
- << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
- << "\n";
- }
- }
+ OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
+ for (unsigned K = 0; K < First.size(); ++K)
+ OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
- OS.indent(Depth) << "Grouped accesses:\n";
- for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
- OS.indent(Depth + 2) << "Group " << I << ":\n";
- OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
- << " High: " << *CheckingGroups[I].High << ")\n";
- for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
- OS.indent(Depth + 6) << "Member: "
- << *Pointers[CheckingGroups[I].Members[J]].Expr
- << "\n";
- }
+ OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
+ for (unsigned K = 0; K < Second.size(); ++K)
+ OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
}
}
-unsigned RuntimePointerChecking::getNumberOfChecks(
- const SmallVectorImpl<int> *PtrPartition) const {
-
- unsigned NumPartitions = CheckingGroups.size();
- unsigned CheckCount = 0;
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
- for (unsigned I = 0; I < NumPartitions; ++I)
- for (unsigned J = I + 1; J < NumPartitions; ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
- CheckCount++;
- return CheckCount;
-}
+ OS.indent(Depth) << "Run-time memory checks:\n";
+ printChecks(OS, Checks, Depth);
-bool RuntimePointerChecking::needsAnyChecking(
- const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ const auto &CG = CheckingGroups[I];
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
- return true;
- return false;
+ OS.indent(Depth + 2) << "Group " << &CG << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
+ << ")\n";
+ for (unsigned J = 0; J < CG.Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
+ << "\n";
+ }
+ }
}
namespace {
@@ -390,9 +424,10 @@ public:
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
- MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
- IsRTCheckAnalysisNeeded(false) {}
+ MemoryDepChecker::DepCandidates &DA,
+ PredicatedScalarEvolution &PSE)
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
+ PSE(PSE) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -435,7 +470,7 @@ public:
/// We decided that no dependence analysis would be used. Reset the state.
void resetDepChecks(MemoryDepChecker &DepChecker) {
CheckDeps.clear();
- DepChecker.clearInterestingDependences();
+ DepChecker.clearDependences();
}
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
@@ -477,14 +512,18 @@ private:
/// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
/// while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded;
+
+ /// The SCEV predicate containing all the SCEV-related assumptions.
+ PredicatedScalarEvolution &PSE;
};
} // end anonymous namespace
/// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE,
- const ValueToValueMap &Strides, Value *Ptr) {
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
+ const ValueToValueMap &Strides, Value *Ptr,
+ Loop *L) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR)
return false;
@@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
else
++NumReadPtrChecks;
- if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
(!ShouldCheckStride ||
- isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
+ isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) {
// The id of the dependence set.
unsigned DepId;
@@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
@@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
}
if (NeedRTCheck && CanDoRT)
- RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+ RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
- DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
<< " pointer comparisons.\n");
RtCheck.Need = NeedRTCheck;
@@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() {
GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
for (Value *UnderlyingObj : TempObjects) {
+ // nullptr never alias, don't join sets for pointer that have "null"
+ // in their UnderlyingObjects list.
+ if (isa<ConstantPointerNull>(UnderlyingObj))
+ continue;
+
UnderlyingObjToAccessMap::iterator Prev =
ObjToLastAccess.find(UnderlyingObj);
if (Prev != ObjToLastAccess.end())
@@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// \brief Check whether the access through \p Ptr has a constant stride.
-int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
- const ValueToValueMap &StridesMap) {
- const Type *Ty = Ptr->getType();
+int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
+ const Loop *Lp, const ValueToValueMap &StridesMap) {
+ Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to aggregate types.
- const PointerType *PtrTy = cast<PointerType>(Ty);
+ auto *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
}
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
@@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
- bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
+ bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
- << *Ptr << " SCEV: " << *PtrScev << "\n");
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// Check the step is constant.
- const SCEV *Step = AR->getStepRecurrence(*SE);
+ const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
// Calculate the pointer stride and check if it is constant.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
- const APInt &APStepVal = C->getValue()->getValue();
+ const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
@@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
llvm_unreachable("unexpected DepType!");
}
-bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+bool MemoryDepChecker::Dependence::isBackward() const {
switch (Type) {
case NoDep:
case Forward:
+ case ForwardButPreventsForwarding:
+ case Unknown:
return false;
case BackwardVectorizable:
- case Unknown:
- case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return true;
@@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
}
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+ return isBackward() || Type == Unknown;
+}
+
+bool MemoryDepChecker::Dependence::isForward() const {
switch (Type) {
- case NoDep:
case Forward:
case ForwardButPreventsForwarding:
- return false;
+ return true;
+ case NoDep:
case Unknown:
case BackwardVectorizable:
case Backward:
case BackwardVectorizableButPreventsForwarding:
- return true;
+ return false;
}
llvm_unreachable("unexpected DepType!");
}
@@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
- const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
- const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+ const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr);
+ const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr);
- int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
- int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
+ int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides);
+ int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides);
const SCEV *Src = AScev;
const SCEV *Sink = BScev;
@@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
std::swap(StrideAPtr, StrideBPtr);
}
- const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+ const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
+ << "(Induction step: " << StrideAPtr << ")\n");
DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
- << *InstMap[BIdx] << ": " << *Dist << "\n");
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
// Need accesses with constant stride. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
@@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
// Negative distances are not plausible dependencies.
- const APInt &Val = C->getValue()->getValue();
+ const APInt &Val = C->getAPInt();
if (Val.isNegative()) {
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
if (IsTrueDataDependence &&
@@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Could be improved to assert type sizes are the same (i32 == float, etc).
if (Val == 0) {
if (ATy == BTy)
- return Dependence::NoDep;
+ return Dependence::Forward;
DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
return Dependence::Unknown;
}
@@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
isDependent(*A.first, A.second, *B.first, B.second, Strides);
SafeForVectorization &= Dependence::isSafeForVectorization(Type);
- // Gather dependences unless we accumulated MaxInterestingDependence
+ // Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
// unsafe dependence. This puts a limit on this quadratic
// algorithm.
- if (RecordInterestingDependences) {
- if (Dependence::isInterestingDependence(Type))
- InterestingDependences.push_back(
- Dependence(A.second, B.second, Type));
-
- if (InterestingDependences.size() >= MaxInterestingDependence) {
- RecordInterestingDependences = false;
- InterestingDependences.clear();
+ if (RecordDependences) {
+ if (Type != Dependence::NoDep)
+ Dependences.push_back(Dependence(A.second, B.second, Type));
+
+ if (Dependences.size() >= MaxDependences) {
+ RecordDependences = false;
+ Dependences.clear();
DEBUG(dbgs() << "Too many dependences, stopped recording\n");
}
}
- if (!RecordInterestingDependences && !SafeForVectorization)
+ if (!RecordDependences && !SafeForVectorization)
return false;
}
++OI;
@@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
}
}
- DEBUG(dbgs() << "Total Interesting Dependences: "
- << InterestingDependences.size() << "\n");
+ DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
return SafeForVectorization;
}
@@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() {
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(LoopAccessReport() <<
- "could not determine number of loop iterations");
+ const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+ emitAnalysis(LoopAccessReport()
+ << "could not determine number of loop iterations");
DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) {
- emitAnalysis(LoopAccessReport(it) <<
+ emitAnalysis(LoopAccessReport(&*it) <<
"instruction cannot be vectorized");
CanVecMem = false;
return;
@@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
- AA, LI, DependentAccesses);
+ AA, LI, DependentAccesses, PSE);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
+ if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRTIfNeeded =
- Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+ Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides);
if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
@@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
PtrRtChecking.reset();
PtrRtChecking.Need = true;
+ auto *SE = PSE.getSE();
CanDoRTIfNeeded =
Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
@@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
}
bool LoopAccessInfo::isUniform(Value *V) const {
- return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+ return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop));
}
// FIXME: this function is currently a duplicate of the one in
@@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
return nullptr;
}
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
- Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- if (!PtrRtChecking.Need)
- return std::make_pair(nullptr, nullptr);
+namespace {
+/// \brief IR Values for the lower and upper bounds of a pointer evolution. We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values. Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+ TrackingVH<Value> Start;
+ TrackingVH<Value> End;
+};
+} // end anonymous namespace
- SmallVector<TrackingVH<Value>, 2> Starts;
- SmallVector<TrackingVH<Value>, 2> Ends;
+/// \brief Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop. \return the values for the bounds.
+static PointerBounds
+expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
+ Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
+ const RuntimePointerChecking &PtrRtChecking) {
+ Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, TheLoop)) {
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
+ return {Ptr, Ptr};
+ } else {
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ LLVMContext &Ctx = Loc->getContext();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
+
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
+ }
+}
- LLVMContext &Ctx = Loc->getContext();
- SCEVExpander Exp(*SE, DL, "induction");
- Instruction *FirstInst = nullptr;
+/// \brief Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
+ Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
+ const RuntimePointerChecking &PtrRtChecking) {
+ SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+ // Here we're relying on the SCEV Expander's cache to only emit code for the
+ // same bounds once.
+ std::transform(
+ PointerChecks.begin(), PointerChecks.end(),
+ std::back_inserter(ChecksWithBounds),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ PointerBounds
+ First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
+ Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
+ return std::make_pair(First, Second);
+ });
+
+ return ChecksWithBounds;
+}
- for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
- const RuntimePointerChecking::CheckingPtrGroup &CG =
- PtrRtChecking.CheckingGroups[i];
- Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
- << "\n");
- Starts.push_back(Ptr);
- Ends.push_back(Ptr);
- } else {
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
- Value *Start = nullptr, *End = nullptr;
-
- DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
- DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
- Starts.push_back(Start);
- Ends.push_back(End);
- }
- }
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
+ Instruction *Loc,
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
+ const {
+ auto *SE = PSE.getSE();
+ SCEVExpander Exp(*SE, DL, "induction");
+ auto ExpandedChecks =
+ expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking);
+ LLVMContext &Ctx = Loc->getContext();
+ Instruction *FirstInst = nullptr;
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
- for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
- const RuntimePointerChecking::CheckingPtrGroup &CGI =
- PtrRtChecking.CheckingGroups[i];
- const RuntimePointerChecking::CheckingPtrGroup &CGJ =
- PtrRtChecking.CheckingGroups[j];
-
- if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
- continue;
- unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
- unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
- assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
- (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
- "Trying to bounds check pointers with different address spaces");
-
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
-
- Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
- FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
- Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
- FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
- Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ for (const auto &Check : ExpandedChecks) {
+ const PointerBounds &A = Check.first, &B = Check.second;
+ // Check if two pointers (A and B) conflict where conflict is computed as:
+ // start(A) <= end(B) && start(B) <= end(A)
+ unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+ unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
+
+ assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+ (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- if (MemoryRuntimeCheck) {
- IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
- "conflict.rdx");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- }
- MemoryRuntimeCheck = IsConflict;
}
+ MemoryRuntimeCheck = IsConflict;
}
if (!MemoryRuntimeCheck)
@@ -1661,12 +1737,20 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
return std::make_pair(FirstInst, Check);
}
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
+ if (!PtrRtChecking.Need)
+ return std::make_pair(nullptr, nullptr);
+
+ return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
+}
+
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
@@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (Report)
OS.indent(Depth) << "Report: " << Report->str() << "\n";
- if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
- OS.indent(Depth) << "Interesting Dependences:\n";
- for (auto &Dep : *InterestingDependences) {
+ if (auto *Dependences = DepChecker.getDependences()) {
+ OS.indent(Depth) << "Dependences:\n";
+ for (auto &Dep : *Dependences) {
Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
OS << "\n";
}
} else
- OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
+ OS.indent(Depth) << "Too many dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
PtrRtChecking.print(OS, Depth);
@@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Store to invariant address was "
<< (StoreToLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
+
+ OS.indent(Depth) << "SCEV assumptions:\n";
+ PSE.getUnionPredicate().print(OS, Depth);
}
const LoopAccessInfo &
@@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
if (!LAI) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
- Strides);
+ LAI =
+ llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);
#ifndef NDEBUG
LAI->NumSymbolicStrides = Strides.size();
#endif
@@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
}
bool LoopAccessAnalysis::runOnFunction(Function &F) {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {
}
void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis";
#define LAA_NAME "loop-accesses"
INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 6b6faf8..9ab9eea 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -102,8 +102,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
return false;
if (I->mayReadFromMemory())
return false;
- // The landingpad instruction is immobile.
- if (isa<LandingPadInst>(I))
+ // EH block instructions are immobile.
+ if (I->isEHPad())
return false;
// Determine the insertion point, unless one was given.
if (!InsertPt) {
@@ -120,6 +120,13 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
// Hoist.
I->moveBefore(InsertPt);
+
+ // There is possibility of hoisting this instruction above some arbitrary
+ // condition. Any metadata defined on it can be control dependent on this
+ // condition. Conservatively strip it here so that we don't give any wrong
+ // information to the optimizer.
+ I->dropUnknownNonDebugMetadata();
+
Changed = true;
return true;
}
@@ -172,7 +179,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
bool Loop::isLCSSAForm(DominatorTree &DT) const {
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
BasicBlock *BB = *BI;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) {
+ // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+ // optimizations, so for the purposes of considered LCSSA form, we
+ // can ignore them.
+ if (I->getType()->isTokenTy())
+ continue;
+
for (Use &U : I->uses()) {
Instruction *UI = cast<Instruction>(U.getUser());
BasicBlock *UserBB = UI->getParent();
@@ -188,11 +201,21 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
DT.isReachableFromEntry(UserBB))
return false;
}
+ }
}
return true;
}
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
+ if (!isLCSSAForm(DT))
+ return false;
+
+ return std::all_of(begin(), end(), [&](const Loop *L) {
+ return L->isRecursivelyLCSSAForm(DT);
+ });
+}
+
/// isLoopSimplifyForm - Return true if the Loop is in the form that
/// the LoopSimplify form transforms loops to, which is sometimes called
/// normal form.
@@ -211,15 +234,23 @@ bool Loop::isSafeToClone() const {
if (isa<IndirectBrInst>((*I)->getTerminator()))
return false;
- if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
+ if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
if (II->cannotDuplicate())
return false;
+ // Return false if any loop blocks contain invokes to EH-pads other than
+ // landingpads; we don't know how to split those edges yet.
+ auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() && !isa<LandingPadInst>(FirstNonPHI))
+ return false;
+ }
for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
if (CI->cannotDuplicate())
return false;
}
+ if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I))
+ return false;
}
}
return true;
@@ -602,14 +633,12 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
return NearLoop;
}
-/// updateUnloop - The last backedge has been removed from a loop--now the
-/// "unloop". Find a new parent for the blocks contained within unloop and
-/// update the loop tree. We don't necessarily have valid dominators at this
-/// point, but LoopInfo is still valid except for the removal of this loop.
-///
-/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without
-/// checking first is illegal.
+LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
+ analyze(DomTree);
+}
+
void LoopInfo::updateUnloop(Loop *Unloop) {
+ Unloop->markUnlooped();
// First handle the special case of no parent loop to simplify the algorithm.
if (!Unloop->getParentLoop()) {
@@ -675,7 +704,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
// objects. I don't want to add that kind of complexity until the scope of
// the problem is better understood.
LoopInfo LI;
- LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F));
+ LI.analyze(AM->getResult<DominatorTreeAnalysis>(F));
return LI;
}
@@ -685,6 +714,20 @@ PreservedAnalyses LoopPrinterPass::run(Function &F,
return PreservedAnalyses::all();
}
+PrintLoopPass::PrintLoopPass() : OS(dbgs()) {}
+PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner)
+ : OS(OS), Banner(Banner) {}
+
+PreservedAnalyses PrintLoopPass::run(Loop &L) {
+ OS << Banner;
+ for (auto *Block : L.blocks())
+ if (Block)
+ Block->print(OS);
+ else
+ OS << "Printing <null> block";
+ return PreservedAnalyses::all();
+}
+
//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
@@ -698,7 +741,7 @@ INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
bool LoopInfoWrapperPass::runOnFunction(Function &) {
releaseMemory();
- LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+ LI.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
return false;
}
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index e9fcf02..dc42473 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -27,35 +28,26 @@ namespace {
/// PrintLoopPass - Print a Function corresponding to a Loop.
///
-class PrintLoopPass : public LoopPass {
-private:
- std::string Banner;
- raw_ostream &Out; // raw_ostream to print on.
+class PrintLoopPassWrapper : public LoopPass {
+ PrintLoopPass P;
public:
static char ID;
- PrintLoopPass(const std::string &B, raw_ostream &o)
- : LoopPass(ID), Banner(B), Out(o) {}
+ PrintLoopPassWrapper() : LoopPass(ID) {}
+ PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner)
+ : LoopPass(ID), P(OS, Banner) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
bool runOnLoop(Loop *L, LPPassManager &) override {
- Out << Banner;
- for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
- b != be;
- ++b) {
- if (*b)
- (*b)->print(Out);
- else
- Out << "Printing <null> block";
- }
+ P.run(*L);
return false;
}
};
-char PrintLoopPass::ID = 0;
+char PrintLoopPassWrapper::ID = 0;
}
//===----------------------------------------------------------------------===//
@@ -66,81 +58,34 @@ char LPPassManager::ID = 0;
LPPassManager::LPPassManager()
: FunctionPass(ID), PMDataManager() {
- skipThisLoop = false;
- redoThisLoop = false;
LI = nullptr;
CurrentLoop = nullptr;
}
-/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
-void LPPassManager::deleteLoopFromQueue(Loop *L) {
-
- LI->updateUnloop(L);
-
- // Notify passes that the loop is being deleted.
- deleteSimpleAnalysisLoop(L);
-
- // If L is current loop then skip rest of the passes and let
- // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
- // and continue applying other passes on CurrentLoop.
- if (CurrentLoop == L)
- skipThisLoop = true;
-
- delete L;
-
- if (skipThisLoop)
- return;
-
- for (std::deque<Loop *>::iterator I = LQ.begin(),
- E = LQ.end(); I != E; ++I) {
- if (*I == L) {
- LQ.erase(I);
- break;
- }
- }
-}
-
// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
-void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
-
- assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+Loop &LPPassManager::addLoop(Loop *ParentLoop) {
+ // Create a new loop. LI will take ownership.
+ Loop *L = new Loop();
- // Insert into loop nest
- if (ParentLoop)
- ParentLoop->addChildLoop(L);
- else
+ // Insert into the loop nest and the loop queue.
+ if (!ParentLoop) {
+ // This is the top level loop.
LI->addTopLevelLoop(L);
-
- insertLoopIntoQueue(L);
-}
-
-void LPPassManager::insertLoopIntoQueue(Loop *L) {
- // Insert L into loop queue
- if (L == CurrentLoop)
- redoLoop(L);
- else if (!L->getParentLoop())
- // This is top level loop.
LQ.push_front(L);
- else {
- // Insert L after the parent loop.
- for (std::deque<Loop *>::iterator I = LQ.begin(),
- E = LQ.end(); I != E; ++I) {
- if (*I == L->getParentLoop()) {
- // deque does not support insert after.
- ++I;
- LQ.insert(I, 1, L);
- break;
- }
- }
+ return *L;
}
-}
-// Reoptimize this loop. LPPassManager will re-insert this loop into the
-// queue. This allows LoopPass to change loop nest for the loop. This
-// utility may send LPPassManager into infinite loops so use caution.
-void LPPassManager::redoLoop(Loop *L) {
- assert (CurrentLoop == L && "Can redo only CurrentLoop");
- redoThisLoop = true;
+ ParentLoop->addChildLoop(L);
+ // Insert L into the loop queue after the parent loop.
+ for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) {
+ if (*I == L->getParentLoop()) {
+ // deque does not support insert after.
+ ++I;
+ LQ.insert(I, 1, L);
+ break;
+ }
+ }
+ return *L;
}
/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
@@ -230,10 +175,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Walk Loops
while (!LQ.empty()) {
- CurrentLoop = LQ.back();
- skipThisLoop = false;
- redoThisLoop = false;
-
+ CurrentLoop = LQ.back();
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
@@ -253,11 +195,15 @@ bool LPPassManager::runOnFunction(Function &F) {
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
- skipThisLoop ? "<deleted>" :
- CurrentLoop->getHeader()->getName());
+ CurrentLoop->isUnloop()
+ ? "<deleted>"
+ : CurrentLoop->getHeader()->getName());
dumpPreservedSet(P);
- if (!skipThisLoop) {
+ if (CurrentLoop->isUnloop()) {
+ // Notify passes that the loop is being deleted.
+ deleteSimpleAnalysisLoop(CurrentLoop);
+ } else {
// Manually check that this loop is still healthy. This is done
// instead of relying on LoopInfo::verifyLoop since LoopInfo
// is a function pass and it's really expensive to verify every
@@ -276,12 +222,12 @@ bool LPPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
- removeDeadPasses(P,
- skipThisLoop ? "<deleted>" :
- CurrentLoop->getHeader()->getName(),
+ removeDeadPasses(P, CurrentLoop->isUnloop()
+ ? "<deleted>"
+ : CurrentLoop->getHeader()->getName(),
ON_LOOP_MSG);
- if (skipThisLoop)
+ if (CurrentLoop->isUnloop())
// Do not run other passes on this loop.
break;
}
@@ -289,17 +235,16 @@ bool LPPassManager::runOnFunction(Function &F) {
// If the loop was deleted, release all the loop passes. This frees up
// some memory, and avoids trouble with the pass manager trying to call
// verifyAnalysis on them.
- if (skipThisLoop)
+ if (CurrentLoop->isUnloop()) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
Pass *P = getContainedPass(Index);
freePass(P, "<deleted>", ON_LOOP_MSG);
}
+ delete CurrentLoop;
+ }
// Pop the loop from queue after running all passes.
LQ.pop_back();
-
- if (redoThisLoop)
- LQ.push_back(CurrentLoop);
}
// Finalization
@@ -327,7 +272,7 @@ void LPPassManager::dumpPassStructure(unsigned Offset) {
Pass *LoopPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
- return new PrintLoopPass(Banner, O);
+ return new PrintLoopPassWrapper(O, Banner);
}
// Check if this pass is suitable for the current LPPassManager, if
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index da3b829..078cefe 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -49,7 +49,7 @@ namespace {
void print(raw_ostream &OS, const Module * = nullptr) const override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
AU.addRequiredTransitive<MemoryDependenceAnalysis>();
AU.setPreservesAll();
}
@@ -96,7 +96,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
// All this code uses non-const interfaces because MemDep is not
// const-friendly, though nothing is actually modified.
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
Instruction *Inst = &I;
if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
@@ -135,7 +135,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
}
void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
- for (const auto &I : inst_range(*F)) {
+ for (const auto &I : instructions(*F)) {
const Instruction *Inst = &I;
DepSetMap::const_iterator DI = Deps.find(Inst);
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
index fa292a2..36f1424 100644
--- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -22,7 +22,8 @@ using namespace llvm;
namespace {
struct MemDerefPrinter : public FunctionPass {
- SmallVector<Value *, 4> Vec;
+ SmallVector<Value *, 4> Deref;
+ SmallPtrSet<Value *, 4> DerefAndAligned;
static char ID; // Pass identification, replacement for typeid
MemDerefPrinter() : FunctionPass(ID) {
@@ -34,7 +35,8 @@ namespace {
bool runOnFunction(Function &F) override;
void print(raw_ostream &OS, const Module * = nullptr) const override;
void releaseMemory() override {
- Vec.clear();
+ Deref.clear();
+ DerefAndAligned.clear();
}
};
}
@@ -51,11 +53,13 @@ FunctionPass *llvm::createMemDerefPrinter() {
bool MemDerefPrinter::runOnFunction(Function &F) {
const DataLayout &DL = F.getParent()->getDataLayout();
- for (auto &I: inst_range(F)) {
+ for (auto &I: instructions(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, DL))
- Vec.push_back(PO);
+ Deref.push_back(PO);
+ if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+ DerefAndAligned.insert(PO);
}
}
return false;
@@ -63,8 +67,12 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
OS << "The following are dereferenceable:\n";
- for (auto &V: Vec) {
+ for (Value *V: Deref) {
V->print(OS);
+ if (DerefAndAligned.count(V))
+ OS << "\t(aligned)";
+ else
+ OS << "\t(unaligned)";
OS << "\n\n";
}
}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 8ddac8f..b19ecad 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
#define DEBUG_TYPE "memory-builtins"
-enum AllocType {
+enum AllocType : uint8_t {
OpNewLike = 1<<0, // allocates; never returns null
MallocLike = 1<<1 | OpNewLike, // allocates; may return null
CallocLike = 1<<2, // allocates + bzero
@@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = {
{LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
{LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long)
{LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow)
+ {LibFunc::msvc_new_int, OpNewLike, 1, 0, -1}, // new(unsigned int)
+ {LibFunc::msvc_new_int_nothrow, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow)
+ {LibFunc::msvc_new_longlong, OpNewLike, 1, 0, -1}, // new(unsigned long long)
+ {LibFunc::msvc_new_longlong_nothrow, MallocLike, 2, 0, -1}, // new(unsigned long long, nothrow)
+ {LibFunc::msvc_new_array_int, OpNewLike, 1, 0, -1}, // new[](unsigned int)
+ {LibFunc::msvc_new_array_int_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
+ {LibFunc::msvc_new_array_longlong, OpNewLike, 1, 0, -1}, // new[](unsigned long long)
+ {LibFunc::msvc_new_array_longlong_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned long long, nothrow)
{LibFunc::calloc, CallocLike, 2, 0, 1},
{LibFunc::realloc, ReallocLike, 2, 1, -1},
{LibFunc::reallocf, ReallocLike, 2, 1, -1},
@@ -107,18 +115,13 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return nullptr;
- unsigned i = 0;
- bool found = false;
- for ( ; i < array_lengthof(AllocationFnData); ++i) {
- if (AllocationFnData[i].Func == TLIFn) {
- found = true;
- break;
- }
- }
- if (!found)
+ const AllocFnsTy *FnData =
+ std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
+ [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; });
+
+ if (FnData == std::end(AllocationFnData))
return nullptr;
- const AllocFnsTy *FnData = &AllocationFnData[i];
if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
return nullptr;
@@ -185,13 +188,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
}
/// \brief Tests if a value is a call or invoke to a library function that
-/// reallocates memory (such as realloc).
-bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
- bool LookThroughBitCast) {
- return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
-}
-
-/// \brief Tests if a value is a call or invoke to a library function that
/// allocates memory and never returns null (such as operator new).
bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
@@ -313,14 +309,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
unsigned ExpectedNumParams;
if (TLIFn == LibFunc::free ||
TLIFn == LibFunc::ZdlPv || // operator delete(void*)
- TLIFn == LibFunc::ZdaPv) // operator delete[](void*)
+ TLIFn == LibFunc::ZdaPv || // operator delete[](void*)
+ TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*)
+ TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*)
+ TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*)
+ TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*)
ExpectedNumParams = 1;
else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint)
TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong)
TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint)
TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong)
- TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow)
+ TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint)
+ TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
+ TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint)
+ TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
+ TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
+ TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow)
ExpectedNumParams = 2;
else
return nullptr;
@@ -621,7 +629,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
// always generate code immediately before the instruction being
// processed, so that the generated code dominates the same BBs
- Instruction *PrevInsertPoint = Builder.GetInsertPoint();
+ BuilderTy::InsertPointGuard Guard(Builder);
if (Instruction *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I);
@@ -650,9 +658,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
Result = unknown();
}
- if (PrevInsertPoint)
- Builder.SetInsertPoint(PrevInsertPoint);
-
// Don't reuse CacheIt since it may be invalid at this point.
CacheMap[V] = Result;
return Result;
@@ -742,7 +747,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
// compute offset/size for each PHI incoming pointer
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
- Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt());
+ Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());
SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
if (!bothKnown(EdgeData)) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 782a67b..3e80bfe 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -22,7 +22,9 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -49,7 +51,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-static const unsigned int BlockScanLimit = 100;
+
+static cl::opt<unsigned> BlockScanLimit(
+ "memdep-block-scan-limit", cl::Hidden, cl::init(100),
+ cl::desc("The number of instructions to scan in a block in memory "
+ "dependency analysis (default = 100)"));
// Limit on the number of memdep results to process.
static const unsigned int NumResultsLimit = 100;
@@ -60,7 +66,8 @@ char MemoryDependenceAnalysis::ID = 0;
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -87,15 +94,17 @@ void MemoryDependenceAnalysis::releaseMemory() {
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
+ AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
return false;
}
@@ -118,43 +127,43 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,
/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
/// Return a ModRefInfo value describing the general behavior of the
/// instruction.
-static AliasAnalysis::ModRefResult
-GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
+static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
+ const TargetLibraryInfo &TLI) {
if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (LI->isUnordered()) {
Loc = MemoryLocation::get(LI);
- return AliasAnalysis::Ref;
+ return MRI_Ref;
}
if (LI->getOrdering() == Monotonic) {
Loc = MemoryLocation::get(LI);
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
Loc = MemoryLocation();
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->isUnordered()) {
Loc = MemoryLocation::get(SI);
- return AliasAnalysis::Mod;
+ return MRI_Mod;
}
if (SI->getOrdering() == Monotonic) {
Loc = MemoryLocation::get(SI);
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
Loc = MemoryLocation();
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
Loc = MemoryLocation::get(V);
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
}
- if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+ if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
// calls to free() deallocate the entire structure
Loc = MemoryLocation(CI->getArgOperand(0));
- return AliasAnalysis::Mod;
+ return MRI_Mod;
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -170,7 +179,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
- return AliasAnalysis::Mod;
+ return MRI_Mod;
case Intrinsic::invariant_end:
II->getAAMetadata(AAInfo);
Loc = MemoryLocation(
@@ -178,7 +187,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
- return AliasAnalysis::Mod;
+ return MRI_Mod;
default:
break;
}
@@ -186,10 +195,10 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
// Otherwise, just do the coarse-grained thing that always works.
if (Inst->mayWriteToMemory())
- return AliasAnalysis::ModRef;
+ return MRI_ModRef;
if (Inst->mayReadFromMemory())
- return AliasAnalysis::Ref;
- return AliasAnalysis::NoModRef;
+ return MRI_Ref;
+ return MRI_NoModRef;
}
/// getCallSiteDependencyFrom - Private helper for finding the local
@@ -207,14 +216,14 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
if (!Limit)
return MemDepResult::getUnknown();
- Instruction *Inst = --ScanIt;
+ Instruction *Inst = &*--ScanIt;
// If this inst is a memory op, get the pointer it accessed
MemoryLocation Loc;
- AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+ ModRefInfo MR = GetLocation(Inst, Loc, *TLI);
if (Loc.Ptr) {
// A simple instruction.
- if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+ if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef)
return MemDepResult::getClobber(Inst);
continue;
}
@@ -224,10 +233,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
if (isa<DbgInfoIntrinsic>(Inst)) continue;
// If these two calls do not interfere, look past it.
switch (AA->getModRefInfo(CS, InstCS)) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
// If the two calls are the same, return InstCS as a Def, so that
// CS can be found redundant and eliminated.
- if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+ if (isReadOnlyCall && !(MR & MRI_Mod) &&
CS.getInstruction()->isIdenticalToWhenDefined(Inst))
return MemDepResult::getDef(Inst);
@@ -241,7 +250,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
// If we could not obtain a pointer for the instruction and the instruction
// touches memory then assume that this is a dependency.
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
return MemDepResult::getClobber(Inst);
}
@@ -371,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst) {
+ if (QueryInst != nullptr) {
+ if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
+ MemDepResult invariantGroupDependency =
+ getInvariantGroupPointerDependency(LI, BB);
+
+ if (invariantGroupDependency.isDef())
+ return invariantGroupDependency;
+ }
+ }
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+}
+
+MemDepResult
+MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI,
+ BasicBlock *BB) {
+ Value *LoadOperand = LI->getPointerOperand();
+ // It's is not safe to walk the use list of global value, because function
+ // passes aren't allowed to look outside their functions.
+ if (isa<GlobalValue>(LoadOperand))
+ return MemDepResult::getUnknown();
+
+ auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+ if (!InvariantGroupMD)
+ return MemDepResult::getUnknown();
+
+ MemDepResult Result = MemDepResult::getUnknown();
+ llvm::SmallSet<Value *, 14> Seen;
+ // Queue to process all pointers that are equivalent to load operand.
+ llvm::SmallVector<Value *, 8> LoadOperandsQueue;
+ LoadOperandsQueue.push_back(LoadOperand);
+ while (!LoadOperandsQueue.empty()) {
+ Value *Ptr = LoadOperandsQueue.pop_back_val();
+ if (isa<GlobalValue>(Ptr))
+ continue;
+
+ if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
+ if (!Seen.count(BCI->getOperand(0))) {
+ LoadOperandsQueue.push_back(BCI->getOperand(0));
+ Seen.insert(BCI->getOperand(0));
+ }
+ }
+
+ for (Use &Us : Ptr->uses()) {
+ auto *U = dyn_cast<Instruction>(Us.getUser());
+ if (!U || U == LI || !DT->dominates(U, LI))
+ continue;
+
+ if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+ if (!Seen.count(BCI)) {
+ LoadOperandsQueue.push_back(BCI);
+ Seen.insert(BCI);
+ }
+ continue;
+ }
+ // If we hit load/store with the same invariant.group metadata (and the
+ // same pointer operand) we can assume that value pointed by pointer
+ // operand didn't change.
+ if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+ U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
+ return MemDepResult::getDef(U);
+ }
+ }
+ return Result;
+}
+
+MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
+ const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
+ BasicBlock *BB, Instruction *QueryInst) {
+
const Value *MemLocBase = nullptr;
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
@@ -416,9 +494,15 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
const DataLayout &DL = BB->getModule()->getDataLayout();
+ // Create a numbered basic block to lazily compute and cache instruction
+ // positions inside a BB. This is used to provide fast queries for relative
+ // position between two instructions in a BB and can be used by
+ // AliasAnalysis::callCapturesBefore.
+ OrderedBasicBlock OBB(BB);
+
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
- Instruction *Inst = --ScanIt;
+ Instruction *Inst = &*--ScanIt;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
// Debug intrinsics don't (and can't) cause dependencies.
@@ -567,7 +651,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
- if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+ if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef)
continue;
// Ok, this store might clobber the query pointer. Check to see if it is
@@ -594,7 +678,6 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
// a subsequent bitcast of the malloc call result. There can be stores to
// the malloced memory between the malloc call and its bitcast uses, and we
// need to continue scanning until the malloc call.
- const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
@@ -616,17 +699,17 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
continue;
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+ ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
- if (MR == AliasAnalysis::ModRef)
- MR = AA->callCapturesBefore(Inst, MemLoc, DT);
+ if (MR == MRI_ModRef)
+ MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB);
switch (MR) {
- case AliasAnalysis::NoModRef:
+ case MRI_NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
- case AliasAnalysis::Mod:
+ case MRI_Mod:
return MemDepResult::getClobber(Inst);
- case AliasAnalysis::Ref:
+ case MRI_Ref:
// If the call is known to never store to the pointer, and if this is a
// load query, we can safely ignore it (scan past it).
if (isLoad)
@@ -677,20 +760,20 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
LocalCache = MemDepResult::getNonFuncLocal();
} else {
MemoryLocation MemLoc;
- AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+ ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI);
if (MemLoc.Ptr) {
// If we can do a pointer scan, make it happen.
- bool isLoad = !(MR & AliasAnalysis::Mod);
+ bool isLoad = !(MR & MRI_Mod);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
- LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
- QueryParent, QueryInst);
+ LocalCache = getPointerDependencyFrom(
+ MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
} else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
CallSite QueryCS(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
- LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
- QueryParent);
+ LocalCache = getCallSiteDependencyFrom(
+ QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
} else
// Non-memory instruction.
LocalCache = MemDepResult::getUnknown();
@@ -813,7 +896,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
BasicBlock::iterator ScanPos = DirtyBB->end();
if (ExistingResult) {
if (Instruction *Inst = ExistingResult->getResult().getInst()) {
- ScanPos = Inst;
+ ScanPos = Inst->getIterator();
// We're removing QueryInst's use of Inst.
RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
QueryCS.getInstruction());
@@ -952,11 +1035,11 @@ MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
assert(ExistingResult->getResult().getInst()->getParent() == BB &&
"Instruction invalidated?");
++NumCacheDirtyNonLocalPtr;
- ScanPos = ExistingResult->getResult().getInst();
+ ScanPos = ExistingResult->getResult().getInst()->getIterator();
// Eliminating the dirty entry from 'Cache', so update the reverse info.
ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
} else {
++NumUncacheNonLocalPtr;
}
@@ -1507,7 +1590,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// the entire block to get to this point.
MemDepResult NewDirtyVal;
if (!RemInst->isTerminator())
- NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+ NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
@@ -1614,7 +1697,6 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
- AA->deleteValue(RemInst);
DEBUG(verifyRemoved(RemInst));
}
/// verifyRemoved - Verify that the specified instruction does not occur
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
deleted file mode 100644
index 322a9a8..0000000
--- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the default implementation of the Alias Analysis interface
-// that simply returns "I don't know" for all queries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
- /// NoAA - This class implements the -no-aa pass, which always returns "I
- /// don't know" for alias queries. NoAA is unlike other alias analysis
- /// implementations, in that it does not chain to a previous analysis. As
- /// such it doesn't follow many of the rules that other alias analyses must.
- ///
- struct NoAA : public ImmutablePass, public AliasAnalysis {
- static char ID; // Class identification, replacement for typeinfo
- NoAA() : ImmutablePass(ID) {
- initializeNoAAPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {}
-
- bool doInitialization(Module &M) override {
- // Note: NoAA does not call InitializeAliasAnalysis because it's
- // special and does not support chaining.
- DL = &M.getDataLayout();
- return true;
- }
-
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- return MayAlias;
- }
-
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- return UnknownModRefBehavior;
- }
- ModRefBehavior getModRefBehavior(const Function *F) override {
- return UnknownModRefBehavior;
- }
-
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override {
- return false;
- }
- ModRefResult getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) override {
- return ModRef;
- }
-
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override {
- return ModRef;
- }
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return ModRef;
- }
-
- void deleteValue(Value *V) override {}
- void addEscapingUse(Use &U) override {}
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char NoAA::ID = 0;
-INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
- "No Alias Analysis (always returns 'may' alias)",
- true, true, true)
-
-ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 3893aab..25f660f 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -18,66 +18,46 @@
/// used. Naive LLVM IR transformations which would otherwise be
/// behavior-preserving may break these assumptions.
///
+/// TODO: Theoretically we could check for dependencies between objc_* calls
+/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls.
+///
//===----------------------------------------------------------------------===//
-#include "ObjCARC.h"
-#include "ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/PassSupport.h"
#define DEBUG_TYPE "objc-arc-aa"
-namespace llvm {
- class Function;
- class Value;
-}
-
using namespace llvm;
using namespace llvm::objcarc;
-// Register this pass...
-char ObjCARCAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
- "ObjC-ARC-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
- return new ObjCARCAliasAnalysis();
-}
-
-bool ObjCARCAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
-}
-
-void
-ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
-}
-
-AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
if (!EnableARCOpts)
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
// First, strip off no-ops, including ObjC-specific no-ops, and try making a
// precise alias query.
const Value *SA = GetRCIdentityRoot(LocA.Ptr);
const Value *SB = GetRCIdentityRoot(LocB.Ptr);
AliasResult Result =
- AliasAnalysis::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
- MemoryLocation(SB, LocB.Size, LocB.AATags));
+ AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
+ MemoryLocation(SB, LocB.Size, LocB.AATags));
if (Result != MayAlias)
return Result;
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
- const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
+ const Value *UA = GetUnderlyingObjCPtr(SA, DL);
+ const Value *UB = GetUnderlyingObjCPtr(SB, DL);
if (UA != SA || UB != SB) {
- Result = AliasAnalysis::alias(MemoryLocation(UA), MemoryLocation(UB));
+ Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
// We can't use MustAlias or PartialAlias results here because
// GetUnderlyingObjCPtr may return an offsetted pointer value.
if (Result == NoAlias)
@@ -89,55 +69,47 @@ AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
return MayAlias;
}
-bool ObjCARCAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
if (!EnableARCOpts)
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
// First, strip off no-ops, including ObjC-specific no-ops, and try making
// a precise alias query.
const Value *S = GetRCIdentityRoot(Loc.Ptr);
- if (AliasAnalysis::pointsToConstantMemory(
+ if (AAResultBase::pointsToConstantMemory(
MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
return true;
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *U = GetUnderlyingObjCPtr(S, *DL);
+ const Value *U = GetUnderlyingObjCPtr(S, DL);
if (U != S)
- return AliasAnalysis::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+ return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
// If that failed, fail. We don't need to chain here, since that's covered
// by the earlier precise query.
return false;
}
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
- // We have nothing to do. Just chain to the next AliasAnalysis.
- return AliasAnalysis::getModRefBehavior(CS);
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
if (!EnableARCOpts)
- return AliasAnalysis::getModRefBehavior(F);
+ return AAResultBase::getModRefBehavior(F);
switch (GetFunctionClass(F)) {
case ARCInstKind::NoopCast:
- return DoesNotAccessMemory;
+ return FMRB_DoesNotAccessMemory;
default:
break;
}
- return AliasAnalysis::getModRefBehavior(F);
+ return AAResultBase::getModRefBehavior(F);
}
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
+ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
if (!EnableARCOpts)
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(CS, Loc);
switch (GetBasicARCInstKind(CS.getInstruction())) {
case ARCInstKind::Retain:
@@ -151,18 +123,48 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// These functions don't access any memory visible to the compiler.
// Note that this doesn't include objc_retainBlock, because it updates
// pointers when it copies block data.
- return NoModRef;
+ return MRI_NoModRef;
default:
break;
}
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(CS, Loc);
+}
+
+ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return ObjCARCAAResult(F.getParent()->getDataLayout(),
+ AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char ObjCARCAA::PassID;
+
+char ObjCARCAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createObjCARCAAWrapperPass() {
+ return new ObjCARCAAWrapperPass();
+}
+
+ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) {
+ initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
- // TODO: Theoretically we could check for dependencies between objc_* calls
- // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+bool ObjCARCAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(new ObjCARCAAResult(
+ M.getDataLayout(), getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
+}
+
+bool ObjCARCAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
new file mode 100644
index 0000000..e3e74aa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -0,0 +1,28 @@
+//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+ cl::desc("enable/disable all ARC Optimizations"),
+ cl::location(EnableARCOpts),
+ cl::init(true));
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
index afb873a..133b635 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -19,7 +19,9 @@
///
//===----------------------------------------------------------------------===//
-#include "ObjCARC.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Intrinsics.h"
using namespace llvm;
@@ -91,7 +93,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
.Default(ARCInstKind::CallOrUser);
// One argument.
- const Argument *A0 = AI++;
+ const Argument *A0 = &*AI++;
if (AI == AE)
// Argument is a pointer.
if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
@@ -129,7 +131,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
}
// Two arguments, first is i8**.
- const Argument *A1 = AI++;
+ const Argument *A1 = &*AI++;
if (AI == AE)
if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
new file mode 100644
index 0000000..0f0016f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -0,0 +1,85 @@
+//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the OrderedBasicBlock class. OrderedBasicBlock
+// maintains an interface where clients can query if one instruction comes
+// before another in a BasicBlock. Since BasicBlock currently lacks a reliable
+// way to query relative position between instructions one can use
+// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a
+// source BasicBlock and maintains an internal Instruction -> Position map. A
+// OrderedBasicBlock instance should be discarded whenever the source
+// BasicBlock changes.
+//
+// It's currently used by the CaptureTracker in order to find relative
+// positions of a pair of instructions inside a BasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/Instruction.h"
+using namespace llvm;
+
+OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB)
+ : NextInstPos(0), BB(BasicB) {
+ LastInstFound = BB->end();
+}
+
+/// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+/// Cache and number out instruction while walking \p BB.
+bool OrderedBasicBlock::comesBefore(const Instruction *A,
+ const Instruction *B) {
+ const Instruction *Inst = nullptr;
+ assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
+ "Instruction supposed to be in NumberedInsts");
+
+ // Start the search with the instruction found in the last lookup round.
+ auto II = BB->begin();
+ auto IE = BB->end();
+ if (LastInstFound != IE)
+ II = std::next(LastInstFound);
+
+ // Number all instructions up to the point where we find 'A' or 'B'.
+ for (; II != IE; ++II) {
+ Inst = cast<Instruction>(II);
+ NumberedInsts[Inst] = NextInstPos++;
+ if (Inst == A || Inst == B)
+ break;
+ }
+
+ assert(II != IE && "Instruction not found?");
+ assert((Inst == A || Inst == B) && "Should find A or B");
+ LastInstFound = II;
+ return Inst == A;
+}
+
+/// \brief Find out whether \p A dominates \p B, meaning whether \p A
+/// comes before \p B in \p BB. This is a simplification that considers
+/// cached instruction positions and ignores other basic blocks, being
+/// only relevant to compare relative instructions positions inside \p BB.
+bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
+ assert(A->getParent() == B->getParent() &&
+ "Instructions must be in the same basic block!");
+
+ // First we lookup the instructions. If they don't exist, lookup will give us
+ // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
+ // exists and NB doesn't, it means NA must come before NB because we would
+ // have numbered NB as well if it didn't. The same is true for NB. If it
+ // exists, but NA does not, NA must come after it. If neither exist, we need
+ // to number the block and cache the results (by calling comesBefore).
+ auto NAI = NumberedInsts.find(A);
+ auto NBI = NumberedInsts.find(B);
+ if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end())
+ return NAI->second < NBI->second;
+ if (NAI != NumberedInsts.end())
+ return true;
+ if (NBI != NumberedInsts.end())
+ return false;
+
+ return comesBefore(A, B);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index 8cd8534..f59d267 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -21,6 +21,9 @@
#include <algorithm>
#include <iterator>
#include <set>
+#ifndef NDEBUG
+#include "llvm/Analysis/RegionPrinter.h"
+#endif
using namespace llvm;
@@ -103,6 +106,12 @@ void RegionInfo::recalculate(Function &F, DominatorTree *DT_,
calculate(F);
}
+#ifndef NDEBUG
+void RegionInfo::view() { viewRegion(this); }
+
+void RegionInfo::viewOnly() { viewRegionOnly(this); }
+#endif
+
//===----------------------------------------------------------------------===//
// RegionInfoPass implementation
//
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index d7f5109..acb218d 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -20,6 +20,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include "llvm/IR/LegacyPassManager.h"
+#endif
using namespace llvm;
@@ -55,25 +58,22 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
}
};
-template<>
-struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
+template <>
+struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
DOTGraphTraits (bool isSimple = false)
: DOTGraphTraits<RegionNode*>(isSimple) {}
- static std::string getGraphName(RegionInfoPass *DT) {
- return "Region Graph";
- }
+ static std::string getGraphName(const RegionInfo *) { return "Region Graph"; }
- std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) {
- RegionInfo &RI = G->getRegionInfo();
- return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
- reinterpret_cast<RegionNode*>(RI.getTopLevelRegion()));
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode *>::getNodeLabel(
+ Node, reinterpret_cast<RegionNode *>(G->getTopLevelRegion()));
}
std::string getEdgeAttributes(RegionNode *srcNode,
- GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) {
- RegionInfo &RI = G->getRegionInfo();
+ GraphTraits<RegionInfo *>::ChildIteratorType CI,
+ RegionInfo *G) {
RegionNode *destNode = *CI;
if (srcNode->isSubRegion() || destNode->isSubRegion())
@@ -83,7 +83,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
- Region *R = RI.getRegionFor(destBB);
+ Region *R = G->getRegionFor(destBB);
while (R && R->getParent())
if (R->getParent()->getEntry() == destBB)
@@ -91,7 +91,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
else
break;
- if (R->getEntry() == destBB && R->contains(srcBB))
+ if (R && R->getEntry() == destBB && R->contains(srcBB))
return "constraint=false";
return "";
@@ -99,8 +99,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
// Print the cluster of the subregions. This groups the single basic blocks
// and adds a different background color for each group.
- static void printRegionCluster(const Region &R,
- GraphWriter<RegionInfoPass*> &GW,
+ static void printRegionCluster(const Region &R, GraphWriter<RegionInfo *> &GW,
unsigned depth = 0) {
raw_ostream &O = GW.getOStream();
O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
@@ -132,50 +131,81 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
O.indent(2 * depth) << "}\n";
}
- static void addCustomGraphFeatures(const RegionInfoPass* RIP,
- GraphWriter<RegionInfoPass*> &GW) {
- const RegionInfo &RI = RIP->getRegionInfo();
+ static void addCustomGraphFeatures(const RegionInfo *G,
+ GraphWriter<RegionInfo *> &GW) {
raw_ostream &O = GW.getOStream();
O << "\tcolorscheme = \"paired12\"\n";
- printRegionCluster(*RI.getTopLevelRegion(), GW, 4);
+ printRegionCluster(*G->getTopLevelRegion(), GW, 4);
}
};
} //end namespace llvm
namespace {
+struct RegionInfoPassGraphTraits {
+ static RegionInfo *getGraph(RegionInfoPass *RIP) {
+ return &RIP->getRegionInfo();
+ }
+};
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits> {
+ static char ID;
+ RegionPrinter()
+ : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
+ initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionPrinter::ID = 0;
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits> {
+ static char ID;
+ RegionOnlyPrinter()
+ : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
+ initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+};
+char RegionOnlyPrinter::ID = 0;
+
struct RegionViewer
- : public DOTGraphTraitsViewer<RegionInfoPass, false> {
+ : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits> {
static char ID;
- RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){
+ RegionViewer()
+ : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
initializeRegionViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionViewer::ID = 0;
struct RegionOnlyViewer
- : public DOTGraphTraitsViewer<RegionInfoPass, true> {
+ : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits> {
static char ID;
- RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) {
+ RegionOnlyViewer()
+ : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits>("regonly", ID) {
initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionOnlyViewer::ID = 0;
-struct RegionPrinter
- : public DOTGraphTraitsPrinter<RegionInfoPass, false> {
- static char ID;
- RegionPrinter() :
- DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) {
- initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
- }
-};
-char RegionPrinter::ID = 0;
} //end anonymous namespace
INITIALIZE_PASS(RegionPrinter, "dot-regions",
"Print regions of function to 'dot' file", true, true)
+INITIALIZE_PASS(
+ RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file (with no function bodies)", true,
+ true)
+
INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
true, true)
@@ -183,25 +213,12 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
"View regions of function (with no function bodies)",
true, true)
-namespace {
-
-struct RegionOnlyPrinter
- : public DOTGraphTraitsPrinter<RegionInfoPass, true> {
- static char ID;
- RegionOnlyPrinter() :
- DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) {
- initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
- }
-};
+FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); }
+FunctionPass *llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
}
-char RegionOnlyPrinter::ID = 0;
-INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
- "Print regions of function to 'dot' file "
- "(with no function bodies)",
- true, true)
-
FunctionPass* llvm::createRegionViewerPass() {
return new RegionViewer();
}
@@ -210,11 +227,41 @@ FunctionPass* llvm::createRegionOnlyViewerPass() {
return new RegionOnlyViewer();
}
-FunctionPass* llvm::createRegionPrinterPass() {
- return new RegionPrinter();
+#ifndef NDEBUG
+static void viewRegionInfo(RegionInfo *RI, bool ShortNames) {
+ assert(RI && "Argument must be non-null");
+
+ llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent();
+ std::string GraphName = DOTGraphTraits<RegionInfo *>::getGraphName(RI);
+
+ llvm::ViewGraph(RI, "reg", ShortNames,
+ Twine(GraphName) + " for '" + F->getName() + "' function");
}
-FunctionPass* llvm::createRegionOnlyPrinterPass() {
- return new RegionOnlyPrinter();
+static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) {
+ assert(F && "Argument must be non-null");
+ assert(!F->isDeclaration() && "Function must have an implementation");
+
+ // The viewer and analysis passes do not modify anything, so we can safely
+ // remove the const qualifier
+ auto NonConstF = const_cast<Function *>(F);
+
+ llvm::legacy::FunctionPassManager FPM(NonConstF->getParent());
+ FPM.add(ViewerPass);
+ FPM.doInitialization();
+ FPM.run(*NonConstF);
+ FPM.doFinalization();
}
+void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); }
+
+void llvm::viewRegion(const Function *F) {
+ invokeFunctionPass(F, createRegionViewerPass());
+}
+
+void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); }
+
+void llvm::viewRegionOnly(const Function *F) {
+ invokeFunctionPass(F, createRegionOnlyViewerPass());
+}
+#endif
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 9c7c175..34074ef 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -83,11 +83,13 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
using namespace llvm;
@@ -114,16 +116,6 @@ static cl::opt<bool>
VerifySCEV("verify-scev",
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
-INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
- "Scalar Evolution Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
- "Scalar Evolution Analysis", false, true)
-char ScalarEvolution::ID = 0;
-
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -132,12 +124,11 @@ char ScalarEvolution::ID = 0;
// Implementation of the SCEV class.
//
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
-#endif
void SCEV::print(raw_ostream &OS) const {
switch (static_cast<SCEVTypes>(getSCEVType())) {
@@ -303,7 +294,7 @@ bool SCEV::isNonConstantNegative() const {
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
- return SC->getValue()->getValue().isNegative();
+ return SC->getAPInt().isNegative();
}
SCEVCouldNotCompute::SCEVCouldNotCompute() :
@@ -455,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
//===----------------------------------------------------------------------===//
namespace {
- /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
- /// than the complexity of the RHS. This comparator is used to canonicalize
- /// expressions.
- class SCEVComplexityCompare {
- const LoopInfo *const LI;
- public:
- explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
-
- // Return true or false if LHS is less than, or at least RHS, respectively.
- bool operator()(const SCEV *LHS, const SCEV *RHS) const {
- return compare(LHS, RHS) < 0;
- }
-
- // Return negative, zero, or positive, if LHS is less than, equal to, or
- // greater than RHS, respectively. A three-way result allows recursive
- // comparisons to be more efficient.
- int compare(const SCEV *LHS, const SCEV *RHS) const {
- // Fast-path: SCEVs are uniqued so we can do a quick equality check.
- if (LHS == RHS)
- return 0;
-
- // Primarily, sort the SCEVs by their getSCEVType().
- unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
- if (LType != RType)
- return (int)LType - (int)RType;
-
- // Aside from the getSCEVType() ordering, the particular ordering
- // isn't very important except that it's beneficial to be consistent,
- // so that (a + b) and (b + a) don't end up as different expressions.
- switch (static_cast<SCEVTypes>(LType)) {
- case scUnknown: {
- const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
- const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
-
- // Sort SCEVUnknown values with some loose heuristics. TODO: This is
- // not as complete as it could be.
- const Value *LV = LU->getValue(), *RV = RU->getValue();
-
- // Order pointer values after integer values. This helps SCEVExpander
- // form GEPs.
- bool LIsPointer = LV->getType()->isPointerTy(),
- RIsPointer = RV->getType()->isPointerTy();
- if (LIsPointer != RIsPointer)
- return (int)LIsPointer - (int)RIsPointer;
-
- // Compare getValueID values.
- unsigned LID = LV->getValueID(),
- RID = RV->getValueID();
- if (LID != RID)
- return (int)LID - (int)RID;
-
- // Sort arguments by their position.
- if (const Argument *LA = dyn_cast<Argument>(LV)) {
- const Argument *RA = cast<Argument>(RV);
- unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
- return (int)LArgNo - (int)RArgNo;
- }
-
- // For instructions, compare their loop depth, and their operand
- // count. This is pretty loose.
- if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
- const Instruction *RInst = cast<Instruction>(RV);
-
- // Compare loop depths.
- const BasicBlock *LParent = LInst->getParent(),
- *RParent = RInst->getParent();
- if (LParent != RParent) {
- unsigned LDepth = LI->getLoopDepth(LParent),
- RDepth = LI->getLoopDepth(RParent);
- if (LDepth != RDepth)
- return (int)LDepth - (int)RDepth;
- }
-
- // Compare the number of operands.
- unsigned LNumOps = LInst->getNumOperands(),
- RNumOps = RInst->getNumOperands();
- return (int)LNumOps - (int)RNumOps;
- }
+/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+/// than the complexity of the RHS. This comparator is used to canonicalize
+/// expressions.
+class SCEVComplexityCompare {
+ const LoopInfo *const LI;
+public:
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
- return 0;
- }
+ // Return true or false if LHS is less than, or at least RHS, respectively.
+ bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
- case scConstant: {
- const SCEVConstant *LC = cast<SCEVConstant>(LHS);
- const SCEVConstant *RC = cast<SCEVConstant>(RHS);
-
- // Compare constant values.
- const APInt &LA = LC->getValue()->getValue();
- const APInt &RA = RC->getValue()->getValue();
- unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
- if (LBitWidth != RBitWidth)
- return (int)LBitWidth - (int)RBitWidth;
- return LA.ult(RA) ? -1 : 1;
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
+ // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+ if (LHS == RHS)
+ return 0;
+
+ // Primarily, sort the SCEVs by their getSCEVType().
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
+
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+ switch (static_cast<SCEVTypes>(LType)) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+ // Order pointer values after integer values. This helps SCEVExpander
+ // form GEPs.
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
+
+ // Compare getValueID values.
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
+
+ // Sort arguments by their position.
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
}
- case scAddRecExpr: {
- const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
- const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
-
- // Compare addrec loop depths.
- const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
- if (LLoop != RLoop) {
- unsigned LDepth = LLoop->getLoopDepth(),
- RDepth = RLoop->getLoopDepth();
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
+
+ // Compare loop depths.
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
if (LDepth != RDepth)
return (int)LDepth - (int)RDepth;
}
- // Addrec complexity grows with operand count.
- unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
- if (LNumOps != RNumOps)
- return (int)LNumOps - (int)RNumOps;
+ // Compare the number of operands.
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
+ }
- // Lexicographically compare.
- for (unsigned i = 0; i != LNumOps; ++i) {
- long X = compare(LA->getOperand(i), RA->getOperand(i));
- if (X != 0)
- return X;
- }
+ return 0;
+ }
- return 0;
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+ // Compare constant values.
+ const APInt &LA = LC->getAPInt();
+ const APInt &RA = RC->getAPInt();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
+ }
+
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
}
- case scAddExpr:
- case scMulExpr:
- case scSMaxExpr:
- case scUMaxExpr: {
- const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
- const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
-
- // Lexicographically compare n-ary expressions.
- unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
- if (LNumOps != RNumOps)
- return (int)LNumOps - (int)RNumOps;
-
- for (unsigned i = 0; i != LNumOps; ++i) {
- if (i >= RNumOps)
- return 1;
- long X = compare(LC->getOperand(i), RC->getOperand(i));
- if (X != 0)
- return X;
- }
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
}
- case scUDivExpr: {
- const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
- const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+ return 0;
+ }
+
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
- // Lexicographically compare udiv expressions.
- long X = compare(LC->getLHS(), RC->getLHS());
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
if (X != 0)
return X;
- return compare(LC->getRHS(), RC->getRHS());
}
+ return (int)LNumOps - (int)RNumOps;
+ }
- case scTruncate:
- case scZeroExtend:
- case scSignExtend: {
- const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
- const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
- // Compare cast expressions by operand.
- return compare(LC->getOperand(), RC->getOperand());
- }
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
+ }
- case scCouldNotCompute:
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- }
- llvm_unreachable("Unknown SCEV kind!");
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
}
- };
-}
+
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ }
+};
+} // end anonymous namespace
/// GroupByComplexity - Given a list of SCEV objects, order them by their
/// complexity, and group objects of the same complexity together by value.
@@ -675,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
}
-namespace {
-struct FindSCEVSize {
- int Size;
- FindSCEVSize() : Size(0) {}
-
- bool follow(const SCEV *S) {
- ++Size;
- // Keep looking at all operands of S.
- return true;
- }
- bool isDone() const {
- return false;
- }
-};
-}
-
// Returns the size of the SCEV S.
static inline int sizeOfSCEV(const SCEV *S) {
+ struct FindSCEVSize {
+ int Size;
+ FindSCEVSize() : Size(0) {}
+
+ bool follow(const SCEV *S) {
+ ++Size;
+ // Keep looking at all operands of S.
+ return true;
+ }
+ bool isDone() const {
+ return false;
+ }
+ };
+
FindSCEVSize F;
SCEVTraversal<FindSCEVSize> ST(F);
ST.visitAll(S);
@@ -771,8 +760,8 @@ public:
void visitConstant(const SCEVConstant *Numerator) {
if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
- APInt NumeratorVal = Numerator->getValue()->getValue();
- APInt DenominatorVal = D->getValue()->getValue();
+ APInt NumeratorVal = Numerator->getAPInt();
+ APInt DenominatorVal = D->getAPInt();
uint32_t NumeratorBW = NumeratorVal.getBitWidth();
uint32_t DenominatorBW = DenominatorVal.getBitWidth();
@@ -792,17 +781,15 @@ public:
void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
const SCEV *StartQ, *StartR, *StepQ, *StepR;
- assert(Numerator->isAffine() && "Numerator should be affine");
+ if (!Numerator->isAffine())
+ return cannotDivide(Numerator);
divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
// Bail out if the types do not match.
Type *Ty = Denominator->getType();
if (Ty != StartQ->getType() || Ty != StartR->getType() ||
- Ty != StepQ->getType() || Ty != StepR->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ Ty != StepQ->getType() || Ty != StepR->getType())
+ return cannotDivide(Numerator);
Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
Numerator->getNoWrapFlags());
Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
@@ -818,11 +805,8 @@ public:
divide(SE, Op, Denominator, &Q, &R);
// Bail out if types do not match.
- if (Ty != Q->getType() || Ty != R->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (Ty != Q->getType() || Ty != R->getType())
+ return cannotDivide(Numerator);
Qs.push_back(Q);
Rs.push_back(R);
@@ -845,11 +829,8 @@ public:
bool FoundDenominatorTerm = false;
for (const SCEV *Op : Numerator->operands()) {
// Bail out if types do not match.
- if (Ty != Op->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (Ty != Op->getType())
+ return cannotDivide(Numerator);
if (FoundDenominatorTerm) {
Qs.push_back(Op);
@@ -865,11 +846,8 @@ public:
}
// Bail out if types do not match.
- if (Ty != Q->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (Ty != Q->getType())
+ return cannotDivide(Numerator);
FoundDenominatorTerm = true;
Qs.push_back(Q);
@@ -884,11 +862,8 @@ public:
return;
}
- if (!isa<SCEVUnknown>(Denominator)) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ if (!isa<SCEVUnknown>(Denominator))
+ return cannotDivide(Numerator);
// The Remainder is obtained by replacing Denominator by 0 in Numerator.
ValueToValueMap RewriteMap;
@@ -908,15 +883,12 @@ public:
// Quotient is (Numerator - Remainder) divided by Denominator.
const SCEV *Q, *R;
const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
- if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
- // This SCEV does not seem to simplify: fail the division here.
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
+ // This SCEV does not seem to simplify: fail the division here.
+ if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
+ return cannotDivide(Numerator);
divide(SE, Diff, Denominator, &Q, &R);
- assert(R == Zero &&
- "(Numerator - Remainder) should evenly divide Denominator");
+ if (R != Zero)
+ return cannotDivide(Numerator);
Quotient = Q;
}
@@ -924,11 +896,18 @@ private:
SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
const SCEV *Denominator)
: SE(S), Denominator(Denominator) {
- Zero = SE.getConstant(Denominator->getType(), 0);
- One = SE.getConstant(Denominator->getType(), 1);
+ Zero = SE.getZero(Denominator->getType());
+ One = SE.getOne(Denominator->getType());
+
+ // We generally do not know how to divide Expr by Denominator. We
+ // initialize the division to a "cannot divide" state to simplify the rest
+ // of the code.
+ cannotDivide(Numerator);
+ }
- // By default, we don't know how to divide Expr by Denominator.
- // Providing the default here simplifies the rest of the code.
+ // Convenience function for giving up on the division. We set the quotient to
+ // be equal to zero and the remainder to be equal to the numerator.
+ void cannotDivide(const SCEV *Numerator) {
Quotient = Zero;
Remainder = Numerator;
}
@@ -1151,8 +1130,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
// If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
- Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+ for (const SCEV *Op : AddRec->operands())
+ Operands.push_back(getTruncateExpr(Op, Ty));
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
@@ -1287,7 +1266,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
// `Step`:
// 1. NSW/NUW flags on the step increment.
- const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+ auto PreStartFlags =
+ ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
@@ -1322,9 +1303,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
if (OverflowLimit &&
- SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
return PreStart;
- }
+
return nullptr;
}
@@ -1390,24 +1371,22 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
if (!StartC)
return false;
- APInt StartAI = StartC->getValue()->getValue();
+ APInt StartAI = StartC->getAPInt();
for (unsigned Delta : {-2, -1, 1, 2}) {
const SCEV *PreStart = getConstant(StartAI - Delta);
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ ID.AddPointer(PreStart);
+ ID.AddPointer(Step);
+ ID.AddPointer(L);
+ void *IP = nullptr;
+ const auto *PreAR =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+
// Give up if we don't already have the add recurrence we need because
// actually constructing an add recurrence is relatively expensive.
- const SCEVAddRecExpr *PreAR = [&]() {
- FoldingSetNodeID ID;
- ID.AddInteger(scAddRecExpr);
- ID.AddPointer(PreStart);
- ID.AddPointer(Step);
- ID.AddPointer(L);
- void *IP = nullptr;
- return static_cast<SCEVAddRecExpr *>(
- this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
- }();
-
if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
@@ -1578,6 +1557,18 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
}
}
+ if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
+ if (SA->getNoWrapFlags(SCEV::FlagNUW)) {
+ // If the addition does not unsign overflow then we can, by definition,
+ // commute the zero extension with the addition operation.
+ SmallVector<const SCEV *, 4> Ops;
+ for (const auto *Op : SA->operands())
+ Ops.push_back(getZeroExtendExpr(Op, Ty));
+ return getAddExpr(Ops, SCEV::FlagNUW);
+ }
+ }
+
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
@@ -1635,14 +1626,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
}
// sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
- if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+ if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
if (SA->getNumOperands() == 2) {
- auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
- auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+ auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+ auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
if (SMul && SC1) {
- if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
- const APInt &C1 = SC1->getValue()->getValue();
- const APInt &C2 = SC2->getValue()->getValue();
+ if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+ const APInt &C1 = SC1->getAPInt();
+ const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
C2.ugt(C1) && C2.isPowerOf2())
return getAddExpr(getSignExtendExpr(SC1, Ty),
@@ -1650,6 +1641,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
}
}
}
+
+ // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
+ if (SA->getNoWrapFlags(SCEV::FlagNSW)) {
+ // If the addition does not sign overflow then we can, by definition,
+ // commute the sign extension with the addition operation.
+ SmallVector<const SCEV *, 4> Ops;
+ for (const auto *Op : SA->operands())
+ Ops.push_back(getSignExtendExpr(Op, Ty));
+ return getAddExpr(Ops, SCEV::FlagNSW);
+ }
}
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
@@ -1754,16 +1755,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// If Start and Step are constants, check if we can apply this
// transformation:
// sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
- auto SC1 = dyn_cast<SCEVConstant>(Start);
- auto SC2 = dyn_cast<SCEVConstant>(Step);
+ auto *SC1 = dyn_cast<SCEVConstant>(Start);
+ auto *SC2 = dyn_cast<SCEVConstant>(Step);
if (SC1 && SC2) {
- const APInt &C1 = SC1->getValue()->getValue();
- const APInt &C2 = SC2->getValue()->getValue();
+ const APInt &C1 = SC1->getAPInt();
+ const APInt &C2 = SC2->getAPInt();
if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
C2.isPowerOf2()) {
Start = getSignExtendExpr(Start, Ty);
- const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
- L, AR->getNoWrapFlags());
+ const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
+ AR->getNoWrapFlags());
return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
}
}
@@ -1798,7 +1799,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
// Sign-extend negative constants.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
- if (SC->getValue()->getValue().isNegative())
+ if (SC->getAPInt().isNegative())
return getSignExtendExpr(Op, Ty);
// Peel off a truncate cast.
@@ -1876,7 +1877,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
// Pull a buried constant out to the outside.
if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
Interesting = true;
- AccumulatedConstant += Scale * C->getValue()->getValue();
+ AccumulatedConstant += Scale * C->getAPInt();
}
// Next comes everything else. We're especially interested in multiplies
@@ -1885,7 +1886,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
APInt NewScale =
- Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+ Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
// A multiplication of a constant with another add; recurse.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
@@ -1898,8 +1899,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
// the map.
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
const SCEV *Key = SE.getMulExpr(MulOps);
- std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
- M.insert(std::make_pair(Key, NewScale));
+ auto Pair = M.insert(std::make_pair(Key, NewScale));
if (Pair.second) {
NewOps.push_back(Pair.first->first);
} else {
@@ -1927,22 +1927,15 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
return Interesting;
}
-namespace {
- struct APIntCompare {
- bool operator()(const APInt &LHS, const APInt &RHS) const {
- return LHS.ult(RHS);
- }
- };
-}
-
// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
// can't-overflow flags for the operation if possible.
static SCEV::NoWrapFlags
StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
const SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags OldFlags) {
+ SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
+ typedef OverflowingBinaryOperator OBO;
bool CanAnalyze =
Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
@@ -1951,18 +1944,42 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
SCEV::NoWrapFlags SignOrUnsignWrap =
- ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
+ ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- auto IsKnownNonNegative =
- std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
+ auto IsKnownNonNegative = [&](const SCEV *S) {
+ return SE->isKnownNonNegative(S);
+ };
+
+ if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
+ Flags =
+ ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- if (SignOrUnsignWrap == SCEV::FlagNSW &&
- std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
- return ScalarEvolution::setFlags(OldFlags,
- (SCEV::NoWrapFlags)SignOrUnsignMask);
+ SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
+
+ if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
+ Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
+
+ // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
+ // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
+
+ const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
+ if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
+ auto NSWRegion =
+ ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap);
+ if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+ }
+ if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
+ auto NUWRegion =
+ ConstantRange::makeNoWrapRegion(Instruction::Add, C,
+ OBO::NoUnsignedWrap);
+ if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ }
+ }
- return OldFlags;
+ return Flags;
}
/// getAddExpr - Get a canonical add expression, or something simpler if
@@ -1980,10 +1997,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVAddExpr operand types don't match!");
#endif
- Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
-
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
+
+ Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -1992,8 +2009,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- Ops[0] = getConstant(LHSC->getValue()->getValue() +
- RHSC->getValue()->getValue());
+ Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
if (Ops.size() == 2) return Ops[0];
Ops.erase(Ops.begin()+1); // Erase the folded element
LHSC = cast<SCEVConstant>(Ops[0]);
@@ -2063,8 +2079,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
break;
}
LargeMulOps.push_back(T->getOperand());
- } else if (const SCEVConstant *C =
- dyn_cast<SCEVConstant>(M->getOperand(j))) {
+ } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
} else {
Ok = false;
@@ -2123,24 +2138,28 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
Ops.data(), Ops.size(),
APInt(BitWidth, 1), *this)) {
+ struct APIntCompare {
+ bool operator()(const APInt &LHS, const APInt &RHS) const {
+ return LHS.ult(RHS);
+ }
+ };
+
// Some interesting folding opportunity is present, so its worthwhile to
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
- E = NewOps.end(); I != E; ++I)
- MulOpLists[M.find(*I)->second].push_back(*I);
+ for (const SCEV *NewOp : NewOps)
+ MulOpLists[M.find(NewOp)->second].push_back(NewOp);
// Re-generate the operands list.
Ops.clear();
if (AccumulatedConstant != 0)
Ops.push_back(getConstant(AccumulatedConstant));
- for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
- I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
- if (I->first != 0)
- Ops.push_back(getMulExpr(getConstant(I->first),
- getAddExpr(I->second)));
+ for (auto &MulOp : MulOpLists)
+ if (MulOp.first != 0)
+ Ops.push_back(getMulExpr(getConstant(MulOp.first),
+ getAddExpr(MulOp.second)));
if (Ops.empty())
- return getConstant(Ty, 0);
+ return getZero(Ty);
if (Ops.size() == 1)
return Ops[0];
return getAddExpr(Ops);
@@ -2168,7 +2187,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps);
}
- const SCEV *One = getConstant(Ty, 1);
+ const SCEV *One = getOne(Ty);
const SCEV *AddOne = getAddExpr(One, InnerMul);
const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
if (Ops.size() == 2) return OuterMul;
@@ -2279,8 +2298,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRec->op_end());
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx)
- if (const SCEVAddRecExpr *OtherAddRec =
- dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
if (OtherAddRec->getLoop() == AddRecLoop) {
for (unsigned i = 0, e = OtherAddRec->getNumOperands();
i != e; ++i) {
@@ -2388,10 +2406,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVMulExpr operand types don't match!");
#endif
- Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
-
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
+
+ Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -2410,9 +2428,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(getContext(),
- LHSC->getValue()->getValue() *
- RHSC->getValue()->getValue());
+ ConstantInt *Fold =
+ ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
@@ -2433,23 +2450,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
SmallVector<const SCEV *, 4> NewOps;
bool AnyFolded = false;
- for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
- E = Add->op_end(); I != E; ++I) {
- const SCEV *Mul = getMulExpr(Ops[0], *I);
+ for (const SCEV *AddOp : Add->operands()) {
+ const SCEV *Mul = getMulExpr(Ops[0], AddOp);
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
NewOps.push_back(Mul);
}
if (AnyFolded)
return getAddExpr(NewOps);
- }
- else if (const SCEVAddRecExpr *
- AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+ } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
// Negation preserves a recurrence's no self-wrap property.
SmallVector<const SCEV *, 4> Operands;
- for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
- E = AddRec->op_end(); I != E; ++I) {
- Operands.push_back(getMulExpr(Ops[0], *I));
- }
+ for (const SCEV *AddRecOp : AddRec->operands())
+ Operands.push_back(getMulExpr(Ops[0], AddRecOp));
+
return getAddRecExpr(Operands, AddRec->getLoop(),
AddRec->getNoWrapFlags(SCEV::FlagNW));
}
@@ -2560,7 +2573,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
- const SCEV *Term = getConstant(Ty, 0);
+ const SCEV *Term = getZero(Ty);
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
@@ -2638,11 +2651,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
Type *Ty = LHS->getType();
- unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+ unsigned LZ = RHSC->getAPInt().countLeadingZeros();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
- if (!RHSC->getValue()->getValue().isPowerOf2())
+ if (!RHSC->getAPInt().isPowerOf2())
++MaxShiftAmt;
IntegerType *ExtTy =
IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
@@ -2650,18 +2663,17 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
if (const SCEVConstant *Step =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
// {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
- const APInt &StepInt = Step->getValue()->getValue();
- const APInt &DivInt = RHSC->getValue()->getValue();
+ const APInt &StepInt = Step->getAPInt();
+ const APInt &DivInt = RHSC->getAPInt();
if (!StepInt.urem(DivInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
- Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
- return getAddRecExpr(Operands, AR->getLoop(),
- SCEV::FlagNW);
+ for (const SCEV *Op : AR->operands())
+ Operands.push_back(getUDivExpr(Op, RHS));
+ return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
}
/// Get a canonical UDivExpr for a recurrence.
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
@@ -2672,7 +2684,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop(), SCEV::FlagAnyWrap)) {
- const APInt &StartInt = StartC->getValue()->getValue();
+ const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
if (StartRem != 0)
LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
@@ -2682,8 +2694,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
- Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+ for (const SCEV *Op : M->operands())
+ Operands.push_back(getZeroExtendExpr(Op, ExtTy));
if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
// Find an operand that's safely divisible.
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
@@ -2700,8 +2712,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
- for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
- Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+ for (const SCEV *Op : A->operands())
+ Operands.push_back(getZeroExtendExpr(Op, ExtTy));
if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
Operands.clear();
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
@@ -2739,8 +2751,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
}
static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue().abs();
- APInt B = C2->getValue()->getValue().abs();
+ APInt A = C1->getAPInt().abs();
+ APInt B = C2->getAPInt().abs();
uint32_t ABW = A.getBitWidth();
uint32_t BBW = B.getBitWidth();
@@ -2769,8 +2781,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
// If the mulexpr multiplies by a constant, then that constant must be the
// first element of the mulexpr.
- if (const SCEVConstant *LHSCst =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
if (LHSCst == RHSCst) {
SmallVector<const SCEV *, 2> Operands;
Operands.append(Mul->op_begin() + 1, Mul->op_end());
@@ -2782,10 +2793,10 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
// check.
APInt Factor = gcd(LHSCst, RHSCst);
if (!Factor.isIntN(1)) {
- LHSCst = cast<SCEVConstant>(
- getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
- RHSCst = cast<SCEVConstant>(
- getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+ LHSCst =
+ cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
+ RHSCst =
+ cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
SmallVector<const SCEV *, 2> Operands;
Operands.push_back(LHSCst);
Operands.append(Mul->op_begin() + 1, Mul->op_end());
@@ -2859,22 +2870,19 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop *NestedLoop = NestedAR->getLoop();
- if (L->contains(NestedLoop) ?
- (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
- (!NestedLoop->contains(L) &&
- DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+ if (L->contains(NestedLoop)
+ ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
+ : (!NestedLoop->contains(L) &&
+ DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
NestedAR->op_end());
Operands[0] = NestedAR->getStart();
// AddRecs require their operands be loop-invariant with respect to their
// loops. Don't perform this transformation if it would break this
// requirement.
- bool AllInvariant = true;
- for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- if (!isLoopInvariant(Operands[i], L)) {
- AllInvariant = false;
- break;
- }
+ bool AllInvariant = all_of(
+ Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
+
if (AllInvariant) {
// Create a recurrence for the outer loop with the same step size.
//
@@ -2884,12 +2892,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
- AllInvariant = true;
- for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
- if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
- AllInvariant = false;
- break;
- }
+ AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
+ return isLoopInvariant(Op, NestedLoop);
+ });
+
if (AllInvariant) {
// Ok, both add recurrences are valid after the transformation.
//
@@ -2936,10 +2942,11 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
// FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
// instruction to its SCEV, because the Instruction may be guarded by control
// flow and the no-overflow bits may not be valid for the expression in any
- // context.
+ // context. This can be fixed similarly to how these flags are handled for
+ // adds.
SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+ const SCEV *TotalOffset = getZero(IntPtrTy);
// The address space is unimportant. The first thing we do on CurTy is getting
// its element type.
Type *CurTy = PointerType::getUnqual(PointeeType);
@@ -2996,7 +3003,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -3005,9 +3012,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(getContext(),
- APIntOps::smax(LHSC->getValue()->getValue(),
- RHSC->getValue()->getValue()));
+ ConstantInt *Fold = ConstantInt::get(
+ getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
@@ -3100,7 +3106,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, LI);
+ GroupByComplexity(Ops, &LI);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -3109,9 +3115,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(getContext(),
- APIntOps::umax(LHSC->getValue()->getValue(),
- RHSC->getValue()->getValue()));
+ ConstantInt *Fold = ConstantInt::get(
+ getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
@@ -3200,8 +3205,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
// We can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- return getConstant(IntTy,
- F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
+ return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
@@ -3211,9 +3215,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
return getConstant(
- IntTy,
- F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
- FieldNo));
+ IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -3255,7 +3257,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
/// for which isSCEVable must return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
+ return getDataLayout().getTypeSizeInBits(Ty);
}
/// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3265,20 +3267,20 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isIntegerTy()) {
+ if (Ty->isIntegerTy())
return Ty;
- }
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- return F->getParent()->getDataLayout().getIntPtrType(Ty);
+ return getDataLayout().getIntPtrType(Ty);
}
const SCEV *ScalarEvolution::getCouldNotCompute() {
- return &CouldNotCompute;
+ return CouldNotCompute.get();
}
-namespace {
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
// Helper class working with SCEVTraversal to figure out if a SCEV contains
// a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
// is set iff if find such SCEVUnknown.
@@ -3300,9 +3302,7 @@ namespace {
}
bool isDone() const { return FindOne; }
};
-}
-bool ScalarEvolution::checkValidity(const SCEV *S) const {
FindInvalidSCEVUnknown F;
SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
ST.visitAll(S);
@@ -3315,35 +3315,39 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const {
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+ const SCEV *S = getExistingSCEV(V);
+ if (S == nullptr) {
+ S = createSCEV(V);
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ }
+ return S;
+}
+
+const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
if (checkValidity(S))
return S;
- else
- ValueExprMap.erase(I);
+ ValueExprMap.erase(I);
}
- const SCEV *S = createSCEV(V);
-
- // The process of creating a SCEV for V may have caused other SCEVs
- // to have been created, so it's necessary to insert the new entry
- // from scratch, rather than trying to remember the insert position
- // above.
- ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
- return S;
+ return nullptr;
}
/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+ SCEV::NoWrapFlags Flags) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
- return getMulExpr(V,
- getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+ return getMulExpr(
+ V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
}
/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
@@ -3362,15 +3366,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags) {
- assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
// Fast path: X - X --> 0.
if (LHS == RHS)
- return getConstant(LHS->getType(), 0);
+ return getZero(LHS->getType());
+
+ // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+ // makes it so that we cannot make much use of NUW.
+ auto AddFlags = SCEV::FlagAnyWrap;
+ const bool RHSIsNotMinSigned =
+ !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+ if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+ // Let M be the minimum representable signed value. Then (-1)*RHS
+ // signed-wraps if and only if RHS is M. That can happen even for
+ // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+ // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+ // (-1)*RHS, we need to prove that RHS != M.
+ //
+ // If LHS is non-negative and we know that LHS - RHS does not
+ // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+ // either by proving that RHS > M or that LHS >= 0.
+ if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+ AddFlags = SCEV::FlagNSW;
+ }
+ }
+
+ // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+ // RHS is NSW and LHS >= 0.
+ //
+ // The difficulty here is that the NSW flag may have been proven
+ // relative to a loop that is to be found in a recurrence in LHS and
+ // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+ // larger scope than intended.
+ auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- // X - Y --> X + -Y.
- // X -(nsw || nuw) Y --> X + -Y.
- return getAddExpr(LHS, getNegativeSCEV(RHS));
+ return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
}
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -3513,16 +3542,14 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
return getPointerBase(Cast->getOperand());
- }
- else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+ } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
const SCEV *PtrOp = nullptr;
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- if ((*I)->getType()->isPointerTy()) {
+ for (const SCEV *NAryOp : NAry->operands()) {
+ if (NAryOp->getType()->isPointerTy()) {
// Cannot find the base of an expression with multiple pointer operands.
if (PtrOp)
return V;
- PtrOp = *I;
+ PtrOp = NAryOp;
}
}
if (!PtrOp)
@@ -3558,8 +3585,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
if (!Visited.insert(I).second)
continue;
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
+ auto It = ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
@@ -3587,165 +3613,476 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
}
}
-/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
-/// a loop header, making it a potential recurrence, or it doesn't.
-///
-const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
- if (const Loop *L = LI->getLoopFor(PN->getParent()))
- if (L->getHeader() == PN->getParent()) {
- // The loop may have multiple entrances or multiple exits; we can analyze
- // this phi as an addrec if it has a unique entry value and a unique
- // backedge value.
- Value *BEValueV = nullptr, *StartValueV = nullptr;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
- if (L->contains(PN->getIncomingBlock(i))) {
- if (!BEValueV) {
- BEValueV = V;
- } else if (BEValueV != V) {
- BEValueV = nullptr;
- break;
- }
- } else if (!StartValueV) {
- StartValueV = V;
- } else if (StartValueV != V) {
- StartValueV = nullptr;
- break;
- }
- }
- if (BEValueV && StartValueV) {
- // While we are analyzing this PHI node, handle its value symbolically.
- const SCEV *SymbolicName = getUnknown(PN);
- assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
- "PHI node already processed?");
- ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
-
- // Using this symbolic name for the PHI, analyze the value coming around
- // the back-edge.
- const SCEV *BEValue = getSCEV(BEValueV);
-
- // NOTE: If BEValue is loop invariant, we know that the PHI node just
- // has a special value for the first iteration of the loop.
-
- // If the value coming around the backedge is an add with the symbolic
- // value we just inserted, then we found a simple induction variable!
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
- // If there is a single occurrence of the symbolic value, replace it
- // with a recurrence.
- unsigned FoundIndex = Add->getNumOperands();
- for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
- if (Add->getOperand(i) == SymbolicName)
- if (FoundIndex == e) {
- FoundIndex = i;
- break;
- }
+namespace {
+class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+ ScalarEvolution &SE) {
+ SCEVInitRewriter Rewriter(L, SE);
+ const SCEV *Result = Rewriter.visit(Scev);
+ return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+ }
- if (FoundIndex != Add->getNumOperands()) {
- // Create an add with everything but the specified operand.
- SmallVector<const SCEV *, 8> Ops;
- for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
- if (i != FoundIndex)
- Ops.push_back(Add->getOperand(i));
- const SCEV *Accum = getAddExpr(Ops);
-
- // This is not a valid addrec if the step amount is varying each
- // loop iteration, but is not itself an addrec in this loop.
- if (isLoopInvariant(Accum, L) ||
- (isa<SCEVAddRecExpr>(Accum) &&
- cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
- SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-
- // If the increment doesn't overflow, then neither the addrec nor
- // the post-increment will overflow.
- if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
- if (OBO->getOperand(0) == PN) {
- if (OBO->hasNoUnsignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNUW);
- if (OBO->hasNoSignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNSW);
- }
- } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
- // If the increment is an inbounds GEP, then we know the address
- // space cannot be wrapped around. We cannot make any guarantee
- // about signed or unsigned overflow because pointers are
- // unsigned but we may have a negative index from the base
- // pointer. We can guarantee that no unsigned wrap occurs if the
- // indices form a positive value.
- if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
- Flags = setFlags(Flags, SCEV::FlagNW);
-
- const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
- if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
- Flags = setFlags(Flags, SCEV::FlagNUW);
- }
+ SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
- // We cannot transfer nuw and nsw flags from subtraction
- // operations -- sub nuw X, Y is not the same as add nuw X, -Y
- // for instance.
- }
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+ Valid = false;
+ return Expr;
+ }
- const SCEV *StartVal = getSCEV(StartValueV);
- const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
- // Since the no-wrap flags are on the increment, they apply to the
- // post-incremented value as well.
- if (isLoopInvariant(Accum, L))
- (void)getAddRecExpr(getAddExpr(StartVal, Accum),
- Accum, L, Flags);
-
- // Okay, for the entire analysis of this edge we assumed the PHI
- // to be symbolic. We now need to go back and purge all of the
- // entries for the scalars that use the symbolic expression.
- ForgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
- return PHISCEV;
- }
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ // Only allow AddRecExprs for this loop.
+ if (Expr->getLoop() == L)
+ return Expr->getStart();
+ Valid = false;
+ return Expr;
+ }
+
+ bool isValid() { return Valid; }
+
+private:
+ const Loop *L;
+ bool Valid;
+};
+
+class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+ ScalarEvolution &SE) {
+ SCEVShiftRewriter Rewriter(L, SE);
+ const SCEV *Result = Rewriter.visit(Scev);
+ return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+ }
+
+ SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
+ : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ // Only allow AddRecExprs for this loop.
+ if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+ Valid = false;
+ return Expr;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (Expr->getLoop() == L && Expr->isAffine())
+ return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
+ Valid = false;
+ return Expr;
+ }
+ bool isValid() { return Valid; }
+
+private:
+ const Loop *L;
+ bool Valid;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
+ const Loop *L = LI.getLoopFor(PN->getParent());
+ if (!L || L->getHeader() != PN->getParent())
+ return nullptr;
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi as an addrec if it has a unique entry value and a unique
+ // backedge value.
+ Value *BEValueV = nullptr, *StartValueV = nullptr;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (L->contains(PN->getIncomingBlock(i))) {
+ if (!BEValueV) {
+ BEValueV = V;
+ } else if (BEValueV != V) {
+ BEValueV = nullptr;
+ break;
+ }
+ } else if (!StartValueV) {
+ StartValueV = V;
+ } else if (StartValueV != V) {
+ StartValueV = nullptr;
+ break;
+ }
+ }
+ if (BEValueV && StartValueV) {
+ // While we are analyzing this PHI node, handle its value symbolically.
+ const SCEV *SymbolicName = getUnknown(PN);
+ assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+ "PHI node already processed?");
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+ // Using this symbolic name for the PHI, analyze the value coming around
+ // the back-edge.
+ const SCEV *BEValue = getSCEV(BEValueV);
+
+ // NOTE: If BEValue is loop invariant, we know that the PHI node just
+ // has a special value for the first iteration of the loop.
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, then we found a simple induction variable!
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+ // If there is a single occurrence of the symbolic value, replace it
+ // with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (Add->getOperand(i) == SymbolicName)
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
}
- } else if (const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(BEValue)) {
- // Otherwise, this could be a loop like this:
- // i = 0; for (j = 1; ..; ++j) { .... i = j; }
- // In this case, j = {1,+,1} and BEValue is j.
- // Because the other in-value of i (0) fits the evolution of BEValue
- // i really is an addrec evolution.
- if (AddRec->getLoop() == L && AddRec->isAffine()) {
- const SCEV *StartVal = getSCEV(StartValueV);
-
- // If StartVal = j.start - j.stride, we can use StartVal as the
- // initial step of the addrec evolution.
- if (StartVal == getMinusSCEV(AddRec->getOperand(0),
- AddRec->getOperand(1))) {
- // FIXME: For constant StartVal, we should be able to infer
- // no-wrap flags.
- const SCEV *PHISCEV =
- getAddRecExpr(StartVal, AddRec->getOperand(1), L,
- SCEV::FlagAnyWrap);
-
- // Okay, for the entire analysis of this edge we assumed the PHI
- // to be symbolic. We now need to go back and purge all of the
- // entries for the scalars that use the symbolic expression.
- ForgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
- return PHISCEV;
+
+ if (FoundIndex != Add->getNumOperands()) {
+ // Create an add with everything but the specified operand.
+ SmallVector<const SCEV *, 8> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ const SCEV *Accum = getAddExpr(Ops);
+
+ // This is not a valid addrec if the step amount is varying each
+ // loop iteration, but is not itself an addrec in this loop.
+ if (isLoopInvariant(Accum, L) ||
+ (isa<SCEVAddRecExpr>(Accum) &&
+ cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+ // If the increment doesn't overflow, then neither the addrec nor
+ // the post-increment will overflow.
+ if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+ if (OBO->getOperand(0) == PN) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ }
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
+ // If the increment is an inbounds GEP, then we know the address
+ // space cannot be wrapped around. We cannot make any guarantee
+ // about signed or unsigned overflow because pointers are
+ // unsigned but we may have a negative index from the base
+ // pointer. We can guarantee that no unsigned wrap occurs if the
+ // indices form a positive value.
+ if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
+ Flags = setFlags(Flags, SCEV::FlagNW);
+
+ const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+ if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
}
+
+ // We cannot transfer nuw and nsw flags from subtraction
+ // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+ // for instance.
}
+
+ const SCEV *StartVal = getSCEV(StartValueV);
+ const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+ // Since the no-wrap flags are on the increment, they apply to the
+ // post-incremented value as well.
+ if (isLoopInvariant(Accum, L))
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ return PHISCEV;
+ }
+ }
+ } else {
+ // Otherwise, this could be a loop like this:
+ // i = 0; for (j = 1; ..; ++j) { .... i = j; }
+ // In this case, j = {1,+,1} and BEValue is j.
+ // Because the other in-value of i (0) fits the evolution of BEValue
+ // i really is an addrec evolution.
+ //
+ // We can generalize this saying that i is the shifted value of BEValue
+ // by one iteration:
+ // PHI(f(0), f({1,+,1})) --> f({0,+,1})
+ const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+ const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+ if (Shifted != getCouldNotCompute() &&
+ Start != getCouldNotCompute()) {
+ const SCEV *StartVal = getSCEV(StartValueV);
+ if (Start == StartVal) {
+ // Okay, for the entire analysis of this edge we assumed the PHI
+ // to be symbolic. We now need to go back and purge all of the
+ // entries for the scalars that use the symbolic expression.
+ ForgetSymbolicName(PN, SymbolicName);
+ ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+ return Shifted;
}
}
}
+ }
+
+ return nullptr;
+}
+
+// Checks if the SCEV S is available at BB. S is considered available at BB
+// if S can be materialized at BB without introducing a fault.
+static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
+ BasicBlock *BB) {
+ struct CheckAvailable {
+ bool TraversalDone = false;
+ bool Available = true;
+
+ const Loop *L = nullptr; // The loop BB is in (can be nullptr)
+ BasicBlock *BB = nullptr;
+ DominatorTree &DT;
+
+ CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
+ : L(L), BB(BB), DT(DT) {}
+
+ bool setUnavailable() {
+ TraversalDone = true;
+ Available = false;
+ return false;
+ }
+
+ bool follow(const SCEV *S) {
+ switch (S->getSCEVType()) {
+ case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
+ case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+ // These expressions are available if their operand(s) is/are.
+ return true;
+
+ case scAddRecExpr: {
+ // We allow add recurrences that are on the loop BB is in, or some
+ // outer loop. This guarantees availability because the value of the
+ // add recurrence at BB is simply the "current" value of the induction
+ // variable. We can relax this in the future; for instance an add
+ // recurrence on a sibling dominating loop is also available at BB.
+ const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
+ if (L && (ARLoop == L || ARLoop->contains(L)))
+ return true;
+
+ return setUnavailable();
+ }
+
+ case scUnknown: {
+ // For SCEVUnknown, we check for simple dominance.
+ const auto *SU = cast<SCEVUnknown>(S);
+ Value *V = SU->getValue();
+
+ if (isa<Argument>(V))
+ return false;
+
+ if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
+ return false;
+
+ return setUnavailable();
+ }
+
+ case scUDivExpr:
+ case scCouldNotCompute:
+ // We do not try to smart about these at all.
+ return setUnavailable();
+ }
+ llvm_unreachable("switch should be fully covered!");
+ }
+
+ bool isDone() { return TraversalDone; }
+ };
+
+ CheckAvailable CA(L, BB, DT);
+ SCEVTraversal<CheckAvailable> ST(CA);
+
+ ST.visitAll(S);
+ return CA.Available;
+}
+
+// Try to match a control flow sequence that branches out at BI and merges back
+// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
+// match.
+static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
+ Value *&C, Value *&LHS, Value *&RHS) {
+ C = BI->getCondition();
+
+ BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
+ BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
+
+ if (!LeftEdge.isSingleEdge())
+ return false;
+
+ assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
+
+ Use &LeftUse = Merge->getOperandUse(0);
+ Use &RightUse = Merge->getOperandUse(1);
+
+ if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
+ LHS = LeftUse;
+ RHS = RightUse;
+ return true;
+ }
+
+ if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
+ LHS = RightUse;
+ RHS = LeftUse;
+ return true;
+ }
+
+ return false;
+}
+
+const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
+ if (PN->getNumIncomingValues() == 2) {
+ const Loop *L = LI.getLoopFor(PN->getParent());
+
+ // We don't want to break LCSSA, even in a SCEV expression tree.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
+ return nullptr;
+
+ // Try to match
+ //
+ // br %cond, label %left, label %right
+ // left:
+ // br label %merge
+ // right:
+ // br label %merge
+ // merge:
+ // V = phi [ %x, %left ], [ %y, %right ]
+ //
+ // as "select %cond, %x, %y"
+
+ BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
+ assert(IDom && "At least the entry block should dominate PN");
+
+ auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+ Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
+
+ if (BI && BI->isConditional() &&
+ BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
+ IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
+ IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
+ return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
+ }
+
+ return nullptr;
+}
+
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+ if (const SCEV *S = createAddRecFromPHI(PN))
+ return S;
+
+ if (const SCEV *S = createNodeFromSelectLikePHI(PN))
+ return S;
// If the PHI has a single incoming value, follow that value, unless the
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V =
- SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
- if (LI->replacementPreservesLCSSAForm(PN, V))
+ if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
+ if (LI.replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
}
+const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
+ Value *Cond,
+ Value *TrueVal,
+ Value *FalseVal) {
+ // Handle "constant" branch or select. This can occur for instance when a
+ // loop pass transforms an inner loop and moves on to process the outer loop.
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+
+ // Try to match some simple smax or umax patterns.
+ auto *ICI = dyn_cast<ICmpInst>(Cond);
+ if (!ICI)
+ return getUnknown(I);
+
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ switch (ICI->getPredicate()) {
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ // a >s b ? a+x : b+x -> smax(a, b)+x
+ // a >s b ? b+x : a+x -> smin(a, b)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+ const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
+ const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getSMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // a >u b ? a+x : b+x -> umax(a, b)+x
+ // a >u b ? b+x : a+x -> umin(a, b)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, RS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+ LDiff = getMinusSCEV(LA, RS);
+ RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMinExpr(LS, RS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ // n != 0 ? n+x : 1+x -> umax(n, 1)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getOne(I->getType());
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, LS);
+ const SCEV *RDiff = getMinusSCEV(RA, One);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ // n == 0 ? 1+x : n+x -> umax(n, 1)+x
+ if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getOne(I->getType());
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *LA = getSCEV(TrueVal);
+ const SCEV *RA = getSCEV(FalseVal);
+ const SCEV *LDiff = getMinusSCEV(LA, One);
+ const SCEV *RDiff = getMinusSCEV(RA, LS);
+ if (LDiff == RDiff)
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return getUnknown(I);
+}
+
/// createNodeForGEP - Expand GEP instructions into add and multiply
/// operations. This allows them to be analyzed by regular SCEV code.
///
@@ -3769,7 +4106,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
uint32_t
ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return C->getValue()->getValue().countTrailingZeros();
+ return C->getAPInt().countTrailingZeros();
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
return std::min(GetMinTrailingZeros(T->getOperand()),
@@ -3834,8 +4171,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones,
- F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
+ nullptr, &DT);
return Zeros.countTrailingOnes();
}
@@ -3846,26 +4183,9 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
/// GetRangeFromMetadata - Helper method to assign a range to V from
/// metadata present in the IR.
static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
- ConstantRange TotalRange(
- cast<IntegerType>(I->getType())->getBitWidth(), false);
-
- unsigned NumRanges = MD->getNumOperands() / 2;
- assert(NumRanges >= 1);
-
- for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Lower =
- mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
- ConstantInt *Upper =
- mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
- ConstantRange Range(Lower->getValue(), Upper->getValue());
- TotalRange = TotalRange.unionWith(Range);
- }
-
- return TotalRange;
- }
- }
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
+ return getConstantRangeFromMetadata(*MD);
return None;
}
@@ -3887,7 +4207,7 @@ ScalarEvolution::getRange(const SCEV *S,
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
+ return setRange(C, SignHint, ConstantRange(C->getAPInt()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3965,9 +4285,8 @@ ScalarEvolution::getRange(const SCEV *S,
if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
- ConservativeResult =
- ConservativeResult.intersectWith(
- ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
@@ -4065,18 +4384,18 @@ ScalarEvolution::getRange(const SCEV *S,
// Split here to avoid paying the compile-time cost of calling both
// computeKnownBits and ComputeNumSignBits. This restriction can be lifted
// if needed.
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
if (Ones != ~Zeros + 1)
ConservativeResult =
ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
} else {
assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
"generalize as needed!");
- unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (NS > 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -4089,8 +4408,64 @@ ScalarEvolution::getRange(const SCEV *S,
return setRange(S, SignHint, ConservativeResult);
}
-/// createSCEV - We know that there is no SCEV for the specified value.
-/// Analyze the expression.
+SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+ if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
+ const BinaryOperator *BinOp = cast<BinaryOperator>(V);
+
+ // Return early if there are no flags to propagate to the SCEV.
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+ if (BinOp->hasNoUnsignedWrap())
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ if (BinOp->hasNoSignedWrap())
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+ if (Flags == SCEV::FlagAnyWrap) {
+ return SCEV::FlagAnyWrap;
+ }
+
+ // Here we check that BinOp is in the header of the innermost loop
+ // containing BinOp, since we only deal with instructions in the loop
+ // header. The actual loop we need to check later will come from an add
+ // recurrence, but getting that requires computing the SCEV of the operands,
+ // which can be expensive. This check we can do cheaply to rule out some
+ // cases early.
+ Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
+ if (innermostContainingLoop == nullptr ||
+ innermostContainingLoop->getHeader() != BinOp->getParent())
+ return SCEV::FlagAnyWrap;
+
+ // Only proceed if we can prove that BinOp does not yield poison.
+ if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
+
+ // At this point we know that if V is executed, then it does not wrap
+ // according to at least one of NSW or NUW. If V is not executed, then we do
+ // not know if the calculation that V represents would wrap. Multiple
+ // instructions can map to the same SCEV. If we apply NSW or NUW from V to
+ // the SCEV, we must guarantee no wrapping for that SCEV also when it is
+ // derived from other instructions that map to the same SCEV. We cannot make
+ // that guarantee for cases where V is not executed. So we need to find the
+ // loop that V is considered in relation to and prove that V is executed for
+ // every iteration of that loop. That implies that the value that V
+ // calculates does not wrap anywhere in the loop, so then we can apply the
+ // flags to the SCEV.
+ //
+ // We check isLoopInvariant to disambiguate in case we are adding two
+ // recurrences from different loops, so that we know which loop to prove
+ // that V is executed in.
+ for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
+ const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ const int OtherOpIndex = 1 - OpIndex;
+ const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
+ if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
+ isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
+ return Flags;
+ }
+ }
+ return SCEV::FlagAnyWrap;
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value. Analyze
+/// the expression.
///
const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (!isSCEVable(V->getType()))
@@ -4104,14 +4479,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// reachable. Such instructions don't matter, and they aren't required
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
- if (!DT->isReachableFromEntry(I->getParent()))
+ if (!DT.isReachableFromEntry(I->getParent()))
return getUnknown(V);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
Opcode = CE->getOpcode();
else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (isa<ConstantPointerNull>(V))
- return getConstant(V->getType(), 0);
+ return getZero(V->getType());
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
else
@@ -4126,47 +4501,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
- //
- // Don't apply this instruction's NSW or NUW flags to the new
- // expression. The instruction may be guarded by control flow that the
- // no-wrap behavior depends on. Non-control-equivalent instructions can be
- // mapped to the same SCEV expression, and it would be incorrect to transfer
- // NSW/NUW semantics to those operations.
SmallVector<const SCEV *, 4> AddOps;
- AddOps.push_back(getSCEV(U->getOperand(1)));
- for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
- unsigned Opcode = Op->getValueID() - Value::InstructionVal;
- if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ for (Value *Op = U;; Op = U->getOperand(0)) {
+ U = dyn_cast<Operator>(Op);
+ unsigned Opcode = U ? U->getOpcode() : 0;
+ if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
+ assert(Op != V && "V should be an add");
+ AddOps.push_back(getSCEV(Op));
+ break;
+ }
+
+ if (auto *OpSCEV = getExistingSCEV(U)) {
+ AddOps.push_back(OpSCEV);
+ break;
+ }
+
+ // If a NUW or NSW flag can be applied to the SCEV for this
+ // addition, then compute the SCEV for this addition by itself
+ // with a separate call to getAddExpr. We need to do that
+ // instead of pushing the operands of the addition onto AddOps,
+ // since the flags are only known to apply to this particular
+ // addition - they may not apply to other additions that can be
+ // formed with operands from AddOps.
+ const SCEV *RHS = getSCEV(U->getOperand(1));
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ const SCEV *LHS = getSCEV(U->getOperand(0));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+ else
+ AddOps.push_back(getAddExpr(LHS, RHS, Flags));
break;
- U = cast<Operator>(Op);
- const SCEV *Op1 = getSCEV(U->getOperand(1));
+ }
+
if (Opcode == Instruction::Sub)
- AddOps.push_back(getNegativeSCEV(Op1));
+ AddOps.push_back(getNegativeSCEV(RHS));
else
- AddOps.push_back(Op1);
+ AddOps.push_back(RHS);
}
- AddOps.push_back(getSCEV(U->getOperand(0)));
return getAddExpr(AddOps);
}
+
case Instruction::Mul: {
- // Don't transfer NSW/NUW for the same reason as AddExpr.
SmallVector<const SCEV *, 4> MulOps;
- MulOps.push_back(getSCEV(U->getOperand(1)));
- for (Value *Op = U->getOperand(0);
- Op->getValueID() == Instruction::Mul + Value::InstructionVal;
- Op = U->getOperand(0)) {
- U = cast<Operator>(Op);
+ for (Value *Op = U;; Op = U->getOperand(0)) {
+ U = dyn_cast<Operator>(Op);
+ if (!U || U->getOpcode() != Instruction::Mul) {
+ assert(Op != V && "V should be a mul");
+ MulOps.push_back(getSCEV(Op));
+ break;
+ }
+
+ if (auto *OpSCEV = getExistingSCEV(U)) {
+ MulOps.push_back(OpSCEV);
+ break;
+ }
+
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)), Flags));
+ break;
+ }
+
MulOps.push_back(getSCEV(U->getOperand(1)));
}
- MulOps.push_back(getSCEV(U->getOperand(0)));
return getMulExpr(MulOps);
}
case Instruction::UDiv:
return getUDivExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::Sub:
- return getMinusSCEV(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
+ return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+ getNoWrapFlagsFromUB(U));
case Instruction::And:
// For an expression like x&255 that merely masks off the high bits,
// use zext(trunc(x)) as the SCEV expression.
@@ -4185,8 +4592,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
- F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(),
+ 0, &AC, nullptr, &DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4286,9 +4693,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (SA->getValue().uge(BitWidth))
break;
+ // It is currently not resolved how to interpret NSW for left
+ // shift by BitWidth - 1, so we avoid applying flags in that
+ // case. Remove this check (or this comment) once the situation
+ // is resolved. See
+ // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+ // and http://reviews.llvm.org/D8890 .
+ auto Flags = SCEV::FlagAnyWrap;
+ if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
Constant *X = ConstantInt::get(getContext(),
APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
- return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
}
break;
@@ -4363,94 +4779,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return createNodeForPHI(cast<PHINode>(U));
case Instruction::Select:
- // This could be a smax or umax that was lowered earlier.
- // Try to recover it.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
- switch (ICI->getPredicate()) {
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE:
- std::swap(LHS, RHS);
- // fall through
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- // a >s b ? a+x : b+x -> smax(a, b)+x
- // a >s b ? b+x : a+x -> smin(a, b)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType())) {
- const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
- const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, RS);
- if (LDiff == RDiff)
- return getAddExpr(getSMaxExpr(LS, RS), LDiff);
- LDiff = getMinusSCEV(LA, RS);
- RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getSMinExpr(LS, RS), LDiff);
- }
- break;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- std::swap(LHS, RHS);
- // fall through
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- // a >u b ? a+x : b+x -> umax(a, b)+x
- // a >u b ? b+x : a+x -> umin(a, b)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType())) {
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
- const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, RS);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(LS, RS), LDiff);
- LDiff = getMinusSCEV(LA, RS);
- RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getUMinExpr(LS, RS), LDiff);
- }
- break;
- case ICmpInst::ICMP_NE:
- // n != 0 ? n+x : 1+x -> umax(n, 1)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType()) &&
- isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(U->getType(), 1);
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, One);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(One, LS), LDiff);
- }
- break;
- case ICmpInst::ICMP_EQ:
- // n == 0 ? 1+x : n+x -> umax(n, 1)+x
- if (getTypeSizeInBits(LHS->getType()) <=
- getTypeSizeInBits(U->getType()) &&
- isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(U->getType(), 1);
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
- const SCEV *LA = getSCEV(U->getOperand(1));
- const SCEV *RA = getSCEV(U->getOperand(2));
- const SCEV *LDiff = getMinusSCEV(LA, One);
- const SCEV *RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(One, LS), LDiff);
- }
- break;
- default:
- break;
- }
- }
+ // U can also be a select constant expr, which let fall through. Since
+ // createNodeForSelect only works for a condition that is an `ICmpInst`, and
+ // constant expressions cannot have instructions as operands, we'd have
+ // returned getUnknown for a select constant expressions anyway.
+ if (isa<Instruction>(U))
+ return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
+ U->getOperand(1), U->getOperand(2));
default: // We cannot analyze this expression.
break;
@@ -4534,8 +4869,7 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
return 1;
// Get the trip count from the BE count by adding 1.
- const SCEV *TCMul = getAddExpr(ExitCount,
- getConstant(ExitCount->getType(), 1));
+ const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
// FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
// to factor simple cases.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
@@ -4610,10 +4944,10 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
if (!Pair.second)
return Pair.first->second;
- // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+ // computeBackedgeTakenCount may allocate memory for its result. Inserting it
// into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
// must be cleared in this scope.
- BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+ BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
if (Result.getExact(this) != getCouldNotCompute()) {
assert(isLoopInvariant(Result.getExact(this), L) &&
@@ -4666,7 +5000,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
}
// Re-lookup the insert position, since the call to
- // ComputeBackedgeTakenCount above could result in a
+ // computeBackedgeTakenCount above could result in a
// recusive call to getBackedgeTakenInfo (on a different
// loop), which would invalidate the iterator computed
// earlier.
@@ -4744,12 +5078,12 @@ void ScalarEvolution::forgetValue(Value *V) {
}
/// getExact - Get the exact loop backedge taken count considering all loop
-/// exits. A computable result can only be return for loops with a single exit.
-/// Returning the minimum taken count among all exits is incorrect because one
-/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
-/// the limit of each loop test is never skipped. This is a valid assumption as
-/// long as the loop exits via that test. For precise results, it is the
-/// caller's responsibility to specify the relevant loop exit using
+/// exits. A computable result can only be returned for loops with a single
+/// exit. Returning the minimum taken count among all exits is incorrect
+/// because one of the loop's exit limit's may have been skipped. HowFarToZero
+/// assumes that the limit of each loop test is never skipped. This is a valid
+/// assumption as long as the loop exits via that test. For precise results, it
+/// is the caller's responsibility to specify the relevant loop exit using
/// getExact(ExitingBlock, SE).
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
@@ -4847,10 +5181,10 @@ void ScalarEvolution::BackedgeTakenInfo::clear() {
delete[] ExitNotTaken.getNextExit();
}
-/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// computeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -4864,7 +5198,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// and compute maxBECount.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitBB = ExitingBlocks[i];
- ExitLimit EL = ComputeExitLimit(L, ExitBB);
+ ExitLimit EL = computeExitLimit(L, ExitBB);
// 1. For each exit that can be computed, add an entry to ExitCounts.
// CouldComputeBECount is true only if all exits can be computed.
@@ -4885,7 +5219,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
// considered greater than any computable EL.Max.
if (EL.Max != getCouldNotCompute() && Latch &&
- DT->dominates(ExitBB, Latch)) {
+ DT.dominates(ExitBB, Latch)) {
if (!MustExitMaxBECount)
MustExitMaxBECount = EL.Max;
else {
@@ -4906,13 +5240,11 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
}
-/// ComputeExitLimit - Compute the number of times the backedge of the specified
-/// loop will execute if it exits via the specified block.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
- // Okay, we've chosen an exiting block. See what condition causes us to
- // exit at this block and remember the exit block and whether all other targets
+ // Okay, we've chosen an exiting block. See what condition causes us to exit
+ // at this block and remember the exit block and whether all other targets
// lead to the loop header.
bool MustExecuteLoopHeader = true;
BasicBlock *Exit = nullptr;
@@ -4952,8 +5284,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
if (!Pred)
return getCouldNotCompute();
TerminatorInst *PredTerm = Pred->getTerminator();
- for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
- BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+ for (const BasicBlock *PredSucc : PredTerm->successors()) {
if (PredSucc == BB)
continue;
// If the predecessor has a successor that isn't BB and isn't
@@ -4976,19 +5307,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
// Proceed to the next level to examine the exit condition expression.
- return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+ return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
BI->getSuccessor(1),
/*ControlsExit=*/IsOnlyExit);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
- return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+ return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
/*ControlsExit=*/IsOnlyExit);
return getCouldNotCompute();
}
-/// ComputeExitLimitFromCond - Compute the number of times the
+/// computeExitLimitFromCond - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
///
@@ -4997,7 +5328,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
/// condition is true and can infer that failing to meet the condition prior to
/// integer wraparound results in undefined behavior.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+ScalarEvolution::computeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
@@ -5007,9 +5338,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
- ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
ControlsExit && !EitherMayExit);
- ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
@@ -5042,9 +5373,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
- ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
ControlsExit && !EitherMayExit);
- ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
@@ -5079,7 +5410,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+ return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -5091,18 +5422,15 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
return getCouldNotCompute();
else
// The backedge is never taken.
- return getConstant(CI->getType(), 0);
+ return getZero(CI->getType());
}
// If it's not an integer or pointer comparison then compute it the hard way.
- return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
-/// ComputeExitLimitFromICmp - Compute the number of times the
-/// backedge of the specified loop will execute if its exit condition
-/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
@@ -5119,11 +5447,16 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
ExitLimit ItCnt =
- ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
+ computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
+ ExitLimit ShiftEL = computeShiftCompareExitLimit(
+ ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond);
+ if (ShiftEL.hasAnyInfo())
+ return ShiftEL;
+
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
@@ -5149,7 +5482,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (AddRec->getLoop() == L) {
// Form the constant range.
ConstantRange CompRange(
- ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+ ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
@@ -5183,21 +5516,13 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
break;
}
default:
-#if 0
- dbgs() << "ComputeBackedgeTakenCount ";
- if (ExitCond->getOperand(0)->getType()->isUnsigned())
- dbgs() << "[unsigned] ";
- dbgs() << *LHS << " "
- << Instruction::getOpcodeName(Instruction::ICmp)
- << " " << *RHS << "\n";
-#endif
break;
}
- return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
SwitchInst *Switch,
BasicBlock *ExitingBlock,
bool ControlsExit) {
@@ -5230,11 +5555,11 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
return cast<SCEVConstant>(Val)->getValue();
}
-/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
+/// computeLoadConstantCompareExitLimit - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+ScalarEvolution::computeLoadConstantCompareExitLimit(
LoadInst *LI,
Constant *RHS,
const Loop *L,
@@ -5303,11 +5628,6 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
Result = ConstantExpr::getICmp(predicate, Result, RHS);
if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
-#if 0
- dbgs() << "\n***\n*** Computed loop count " << *ItCst
- << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
- << "***\n";
-#endif
++NumArrayLenItCounts;
return getConstant(ItCst); // Found terminating iteration!
}
@@ -5315,6 +5635,149 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
return getCouldNotCompute();
}
+ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
+ Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
+ ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
+ if (!RHS)
+ return getCouldNotCompute();
+
+ const BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return getCouldNotCompute();
+
+ const BasicBlock *Predecessor = L->getLoopPredecessor();
+ if (!Predecessor)
+ return getCouldNotCompute();
+
+ // Return true if V is of the form "LHS `shift_op` <positive constant>".
+ // Return LHS in OutLHS and shift_opt in OutOpCode.
+ auto MatchPositiveShift =
+ [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
+
+ using namespace PatternMatch;
+
+ ConstantInt *ShiftAmt;
+ if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+ OutOpCode = Instruction::LShr;
+ else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+ OutOpCode = Instruction::AShr;
+ else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+ OutOpCode = Instruction::Shl;
+ else
+ return false;
+
+ return ShiftAmt->getValue().isStrictlyPositive();
+ };
+
+ // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
+ //
+ // loop:
+ // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
+ // %iv.shifted = lshr i32 %iv, <positive constant>
+ //
+ // Return true on a succesful match. Return the corresponding PHI node (%iv
+ // above) in PNOut and the opcode of the shift operation in OpCodeOut.
+ auto MatchShiftRecurrence =
+ [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
+ Optional<Instruction::BinaryOps> PostShiftOpCode;
+
+ {
+ Instruction::BinaryOps OpC;
+ Value *V;
+
+ // If we encounter a shift instruction, "peel off" the shift operation,
+ // and remember that we did so. Later when we inspect %iv's backedge
+ // value, we will make sure that the backedge value uses the same
+ // operation.
+ //
+ // Note: the peeled shift operation does not have to be the same
+ // instruction as the one feeding into the PHI's backedge value. We only
+ // really care about it being the same *kind* of shift instruction --
+ // that's all that is required for our later inferences to hold.
+ if (MatchPositiveShift(LHS, V, OpC)) {
+ PostShiftOpCode = OpC;
+ LHS = V;
+ }
+ }
+
+ PNOut = dyn_cast<PHINode>(LHS);
+ if (!PNOut || PNOut->getParent() != L->getHeader())
+ return false;
+
+ Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
+ Value *OpLHS;
+
+ return
+ // The backedge value for the PHI node must be a shift by a positive
+ // amount
+ MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
+
+ // of the PHI node itself
+ OpLHS == PNOut &&
+
+ // and the kind of shift should be match the kind of shift we peeled
+ // off, if any.
+ (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
+ };
+
+ PHINode *PN;
+ Instruction::BinaryOps OpCode;
+ if (!MatchShiftRecurrence(LHS, PN, OpCode))
+ return getCouldNotCompute();
+
+ const DataLayout &DL = getDataLayout();
+
+ // The key rationale for this optimization is that for some kinds of shift
+ // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
+ // within a finite number of iterations. If the condition guarding the
+ // backedge (in the sense that the backedge is taken if the condition is true)
+ // is false for the value the shift recurrence stabilizes to, then we know
+ // that the backedge is taken only a finite number of times.
+
+ ConstantInt *StableValue = nullptr;
+ switch (OpCode) {
+ default:
+ llvm_unreachable("Impossible case!");
+
+ case Instruction::AShr: {
+ // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
+ // bitwidth(K) iterations.
+ Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
+ bool KnownZero, KnownOne;
+ ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
+ Predecessor->getTerminator(), &DT);
+ auto *Ty = cast<IntegerType>(RHS->getType());
+ if (KnownZero)
+ StableValue = ConstantInt::get(Ty, 0);
+ else if (KnownOne)
+ StableValue = ConstantInt::get(Ty, -1, true);
+ else
+ return getCouldNotCompute();
+
+ break;
+ }
+ case Instruction::LShr:
+ case Instruction::Shl:
+ // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
+ // stabilize to 0 in at most bitwidth(K) iterations.
+ StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
+ break;
+ }
+
+ auto *Result =
+ ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
+ assert(Result->getType()->isIntegerTy(1) &&
+ "Otherwise cannot be an operand to a branch instruction");
+
+ if (Result->isZeroValue()) {
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *UpperBound =
+ getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
+ return ExitLimit(getCouldNotCompute(), UpperBound);
+ }
+
+ return getCouldNotCompute();
+}
/// CanConstantFold - Return true if we can constant fold an instruction of the
/// specified type, assuming that all operands were constants.
@@ -5356,12 +5819,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
PHINode *PHI = nullptr;
- for (Instruction::op_iterator OpI = UseInst->op_begin(),
- OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
-
- if (isa<Constant>(*OpI)) continue;
+ for (Value *Op : UseInst->operands()) {
+ if (isa<Constant>(Op)) continue;
- Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
PHINode *P = dyn_cast<PHINode>(OpInst);
@@ -5395,9 +5856,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !canConstantEvolve(I, L)) return nullptr;
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (PHINode *PN = dyn_cast<PHINode>(I))
return PN;
- }
// Record non-constant instructions contained by the loop.
DenseMap<Instruction *, PHINode *> PHIMap;
@@ -5454,6 +5914,30 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
TLI);
}
+
+// If every incoming value to PN except the one for BB is a specific Constant,
+// return that, else return nullptr.
+static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
+ Constant *IncomingVal = nullptr;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingBlock(i) == BB)
+ continue;
+
+ auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (!CurrentVal)
+ return nullptr;
+
+ if (IncomingVal != CurrentVal) {
+ if (IncomingVal)
+ return nullptr;
+ IncomingVal = CurrentVal;
+ }
+ }
+
+ return IncomingVal;
+}
+
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
/// constant number of times, and the PHI node is just a recurrence
@@ -5462,8 +5946,7 @@ Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
- DenseMap<PHINode*, Constant*>::const_iterator I =
- ConstantEvolutionLoopExitValue.find(PN);
+ auto I = ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
@@ -5476,22 +5959,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
- // Since the loop is canonicalized, the PHI node must have two entries. One
- // entry must be a constant (coming in from outside of the loop), and the
- // second must be derived from the same PHI.
- bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- PHINode *PHI = nullptr;
- for (BasicBlock::iterator I = Header->begin();
- (PHI = dyn_cast<PHINode>(I)); ++I) {
- Constant *StartCST =
- dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return nullptr;
+
+ for (auto &I : *Header) {
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI) break;
+ auto *StartCST = getOtherIncomingValue(PHI, Latch);
if (!StartCST) continue;
CurrentIterVals[PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return RetVal = nullptr;
- Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+ Value *BEValue = PN->getIncomingValueForBlock(Latch);
// Execute the loop symbolically to determine the exit value.
if (BEs.getActiveBits() >= 32)
@@ -5499,7 +5981,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
@@ -5508,7 +5990,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
Constant *NextPHI =
- EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
if (!NextPHI)
return nullptr; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
@@ -5519,23 +6001,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// cease to be able to evaluate one of them or if they stop evolving,
// because that doesn't necessarily prevent us from computing PN.
SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
- for (DenseMap<Instruction *, Constant *>::const_iterator
- I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
- PHINode *PHI = dyn_cast<PHINode>(I->first);
+ for (const auto &I : CurrentIterVals) {
+ PHINode *PHI = dyn_cast<PHINode>(I.first);
if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
- PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+ PHIsToCompute.emplace_back(PHI, I.second);
}
// We use two distinct loops because EvaluateExpression may invalidate any
// iterators into CurrentIterVals.
- for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
- I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
- PHINode *PHI = I->first;
+ for (const auto &I : PHIsToCompute) {
+ PHINode *PHI = I.first;
Constant *&NextPHI = NextIterVals[PHI];
if (!NextPHI) { // Not already computed.
- Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
- NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
}
- if (NextPHI != I->second)
+ if (NextPHI != I.second)
StoppedEvolving = false;
}
@@ -5548,12 +6028,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
}
}
-/// ComputeExitCountExhaustively - If the loop is known to execute a
-/// constant number of times (the condition evolves only from constants),
-/// try to evaluate a few iterations of the loop until we get the exit
-/// condition gets a value of ExitWhen (true or false). If we cannot
-/// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
Value *Cond,
bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
@@ -5567,14 +6042,14 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
BasicBlock *Header = L->getHeader();
assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
- // One entry must be a constant (coming in from outside of the loop), and the
- // second must be derived from the same PHI.
- bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- PHINode *PHI = nullptr;
- for (BasicBlock::iterator I = Header->begin();
- (PHI = dyn_cast<PHINode>(I)); ++I) {
- Constant *StartCST =
- dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Should follow from NumIncomingValues == 2!");
+
+ for (auto &I : *Header) {
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+ auto *StartCST = getOtherIncomingValue(PHI, Latch);
if (!StartCST) continue;
CurrentIterVals[PHI] = StartCST;
}
@@ -5585,10 +6060,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
- ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
- EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
+ auto *CondVal = dyn_cast_or_null<ConstantInt>(
+ EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -5605,20 +6080,17 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// calling EvaluateExpression on them because that may invalidate iterators
// into CurrentIterVals.
SmallVector<PHINode *, 8> PHIsToCompute;
- for (DenseMap<Instruction *, Constant *>::const_iterator
- I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
- PHINode *PHI = dyn_cast<PHINode>(I->first);
+ for (const auto &I : CurrentIterVals) {
+ PHINode *PHI = dyn_cast<PHINode>(I.first);
if (!PHI || PHI->getParent() != Header) continue;
PHIsToCompute.push_back(PHI);
}
- for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
- E = PHIsToCompute.end(); I != E; ++I) {
- PHINode *PHI = *I;
+ for (PHINode *PHI : PHIsToCompute) {
Constant *&NextPHI = NextIterVals[PHI];
if (NextPHI) continue; // Already computed!
- Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
- NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
}
CurrentIterVals.swap(NextIterVals);
}
@@ -5638,22 +6110,22 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
/// In the case that a relevant loop exit value cannot be computed, the
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
+ ValuesAtScopes[V];
// Check to see if we've folded this expression at this loop before.
- SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
- for (unsigned u = 0; u < Values.size(); u++) {
- if (Values[u].first == L)
- return Values[u].second ? Values[u].second : V;
- }
- Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
+ for (auto &LS : Values)
+ if (LS.first == L)
+ return LS.second ? LS.second : V;
+
+ Values.emplace_back(L, nullptr);
+
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
- SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
- for (unsigned u = Values2.size(); u > 0; u--) {
- if (Values2[u - 1].first == L) {
- Values2[u - 1].second = C;
+ for (auto &LS : reverse(ValuesAtScopes[V]))
+ if (LS.first == L) {
+ LS.second = C;
break;
}
- }
return C;
}
@@ -5763,7 +6235,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// exit value from the loop without using SCEVs.
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
- const Loop *LI = (*this->LI)[I->getParent()];
+ const Loop *LI = this->LI[I->getParent()];
if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
if (PHINode *PN = dyn_cast<PHINode>(I))
if (PN->getParent() == LI->getHeader()) {
@@ -5777,9 +6249,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.
- Constant *RV = getConstantEvolutionLoopExitValue(PN,
- BTCC->getValue()->getValue(),
- LI);
+ Constant *RV =
+ getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
if (RV) return getSCEV(RV);
}
}
@@ -5791,8 +6262,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (CanConstantFold(I)) {
SmallVector<Constant *, 4> Operands;
bool MadeImprovement = false;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *Op = I->getOperand(i);
+ for (Value *Op : I->operands()) {
if (Constant *C = dyn_cast<Constant>(Op)) {
Operands.push_back(C);
continue;
@@ -5821,16 +6291,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
Constant *C = nullptr;
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], DL, TLI);
+ Operands[1], DL, &TLI);
else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
} else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
- DL, TLI);
+ DL, &TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -6021,10 +6491,10 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
return std::make_pair(CNC, CNC);
}
- uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
- const APInt &L = LC->getValue()->getValue();
- const APInt &M = MC->getValue()->getValue();
- const APInt &N = NC->getValue()->getValue();
+ uint32_t BitWidth = LC->getAPInt().getBitWidth();
+ const APInt &L = LC->getAPInt();
+ const APInt &M = MC->getAPInt();
+ const APInt &N = NC->getAPInt();
APInt Two(BitWidth, 2);
APInt Four(BitWidth, 4);
@@ -6103,10 +6573,6 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
if (R1 && R2) {
-#if 0
- dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
- << " sol#2: " << *R2 << "\n";
-#endif
// Pick the smallest positive root value.
if (ConstantInt *CB =
dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
@@ -6160,7 +6626,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// For negative steps (counting down to zero):
// N = Start/-Step
// First compute the unsigned distance from zero in the direction of Step.
- bool CountDown = StepC->getValue()->getValue().isNegative();
+ bool CountDown = StepC->getAPInt().isNegative();
const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
// Handle unitary steps, which cannot wraparound.
@@ -6185,13 +6651,53 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// done by counting and comparing the number of trailing zeros of Step and
// Distance.
if (!CountDown) {
- const APInt &StepV = StepC->getValue()->getValue();
+ const APInt &StepV = StepC->getAPInt();
// StepV.isPowerOf2() returns true if StepV is an positive power of two. It
// also returns true if StepV is maximally negative (eg, INT_MIN), but that
// case is not handled as this code is guarded by !CountDown.
if (StepV.isPowerOf2() &&
- GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
- return getUDivExactExpr(Distance, Step);
+ GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
+ // Here we've constrained the equation to be of the form
+ //
+ // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0)
+ //
+ // where we're operating on a W bit wide integer domain and k is
+ // non-negative. The smallest unsigned solution for X is the trip count.
+ //
+ // (0) is equivalent to:
+ //
+ // 2^(N + k) * Distance' - 2^N * X = L * 2^W
+ // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
+ // <=> 2^k * Distance' - X = L * 2^(W - N)
+ // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1)
+ //
+ // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
+ // by 2^(W - N).
+ //
+ // <=> X = 2^k * Distance' URem 2^(W - N) ... (2)
+ //
+ // E.g. say we're solving
+ //
+ // 2 * Val = 2 * X (in i8) ... (3)
+ //
+ // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
+ //
+ // Note: It is tempting to solve (3) by setting X = Val, but Val is not
+ // necessarily the smallest unsigned value of X that satisfies (3).
+ // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
+ // is i8 1, not i8 -127
+
+ const auto *ModuloResult = getUDivExactExpr(Distance, Step);
+
+ // Since SCEV does not have a URem node, we construct one using a truncate
+ // and a zero extend.
+
+ unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
+ auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
+ auto *WideTy = Distance->getType();
+
+ return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+ }
}
// If the condition controls loop exit (the loop exits only if the expression
@@ -6207,8 +6713,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
- return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
- -StartC->getValue()->getValue(),
+ return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
*this);
return getCouldNotCompute();
}
@@ -6226,7 +6731,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// already. If so, the backedge will execute zero times.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
if (!C->getValue()->isNullValue())
- return getConstant(C->getType(), 0);
+ return getZero(C->getType());
return getCouldNotCompute(); // Otherwise it will loop infinitely.
}
@@ -6251,7 +6756,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
// A loop's header is defined to be a block that dominates the loop.
// If the header has a unique predecessor outside the loop, it must be
// a block that has exactly one successor that can reach the loop.
- if (Loop *L = LI->getLoopFor(BB))
+ if (Loop *L = LI.getLoopFor(BB))
return std::make_pair(L->getLoopPredecessor(), L->getHeader());
return std::pair<BasicBlock *, BasicBlock *>();
@@ -6267,13 +6772,20 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
// Quick check to see if they are the same SCEV.
if (A == B) return true;
+ auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
+ // Not all instructions that are "identical" compute the same value. For
+ // instance, two distinct alloca instructions allocating the same type are
+ // identical and do not read memory; but compute distinct values.
+ return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
+ };
+
// Otherwise, if they're both SCEVUnknown, it's possible that they hold
// two different instructions with the same value. Check for this case.
if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
- if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+ if (ComputesEqualValues(AI, BI))
return true;
// Otherwise assume they may have a different value.
@@ -6324,7 +6836,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// If there's a constant operand, canonicalize comparisons with boundary
// cases, and canonicalize *-or-equal comparisons to regular comparisons.
if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &RA = RC->getAPInt();
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
@@ -6515,16 +7027,14 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
Pred = ICmpInst::ICMP_ULT;
Changed = true;
} else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
- LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
- SCEV::FlagNUW);
+ LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
}
break;
case ICmpInst::ICMP_UGE:
if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
- RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
- SCEV::FlagNUW);
+ RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
} else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
@@ -6612,10 +7122,140 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
if (LeftGuarded && RightGuarded)
return true;
+ if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
+ return true;
+
// Otherwise see what can be done with known constant ranges.
return isKnownPredicateWithRanges(Pred, LHS, RHS);
}
+bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred,
+ bool &Increasing) {
+ bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
+
+#ifndef NDEBUG
+ // Verify an invariant: inverting the predicate should turn a monotonically
+ // increasing change to a monotonically decreasing one, and vice versa.
+ bool IncreasingSwapped;
+ bool ResultSwapped = isMonotonicPredicateImpl(
+ LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
+
+ assert(Result == ResultSwapped && "should be able to analyze both!");
+ if (ResultSwapped)
+ assert(Increasing == !IncreasingSwapped &&
+ "monotonicity should flip as we flip the predicate");
+#endif
+
+ return Result;
+}
+
+bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+ ICmpInst::Predicate Pred,
+ bool &Increasing) {
+
+ // A zero step value for LHS means the induction variable is essentially a
+ // loop invariant value. We don't really depend on the predicate actually
+ // flipping from false to true (for increasing predicates, and the other way
+ // around for decreasing predicates), all we care about is that *if* the
+ // predicate changes then it only changes from false to true.
+ //
+ // A zero step value in itself is not very useful, but there may be places
+ // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
+ // as general as possible.
+
+ switch (Pred) {
+ default:
+ return false; // Conservative answer
+
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (!LHS->getNoWrapFlags(SCEV::FlagNUW))
+ return false;
+
+ Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
+ return true;
+
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: {
+ if (!LHS->getNoWrapFlags(SCEV::FlagNSW))
+ return false;
+
+ const SCEV *Step = LHS->getStepRecurrence(*this);
+
+ if (isKnownNonNegative(Step)) {
+ Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
+ return true;
+ }
+
+ if (isKnownNonPositive(Step)) {
+ Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
+ return true;
+ }
+
+ return false;
+ }
+
+ }
+
+ llvm_unreachable("switch has default clause!");
+}
+
+bool ScalarEvolution::isLoopInvariantPredicate(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+ ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+ const SCEV *&InvariantRHS) {
+
+ // If there is a loop-invariant, force it into the RHS, otherwise bail out.
+ if (!isLoopInvariant(RHS, L)) {
+ if (!isLoopInvariant(LHS, L))
+ return false;
+
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!ArLHS || ArLHS->getLoop() != L)
+ return false;
+
+ bool Increasing;
+ if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
+ return false;
+
+ // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
+ // true as the loop iterates, and the backedge is control dependent on
+ // "ArLHS `Pred` RHS" == true then we can reason as follows:
+ //
+ // * if the predicate was false in the first iteration then the predicate
+ // is never evaluated again, since the loop exits without taking the
+ // backedge.
+ // * if the predicate was true in the first iteration then it will
+ // continue to be true for all future iterations since it is
+ // monotonically increasing.
+ //
+ // For both the above possibilities, we can replace the loop varying
+ // predicate with its value on the first iteration of the loop (which is
+ // loop invariant).
+ //
+ // A similar reasoning applies for a monotonically decreasing predicate, by
+ // replacing true with false and false with true in the above two bullets.
+
+ auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
+
+ if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
+ return false;
+
+ InvariantPred = Pred;
+ InvariantLHS = ArLHS->getStart();
+ InvariantRHS = RHS;
+ return true;
+}
+
bool
ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
@@ -6690,6 +7330,84 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
return false;
}
+bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS) {
+
+ // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
+ // Return Y via OutY.
+ auto MatchBinaryAddToConst =
+ [this](const SCEV *Result, const SCEV *X, APInt &OutY,
+ SCEV::NoWrapFlags ExpectedFlags) {
+ const SCEV *NonConstOp, *ConstOp;
+ SCEV::NoWrapFlags FlagsPresent;
+
+ if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) ||
+ !isa<SCEVConstant>(ConstOp) || NonConstOp != X)
+ return false;
+
+ OutY = cast<SCEVConstant>(ConstOp)->getAPInt();
+ return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
+ };
+
+ APInt C;
+
+ switch (Pred) {
+ default:
+ break;
+
+ case ICmpInst::ICMP_SGE:
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLE:
+ // X s<= (X + C)<nsw> if C >= 0
+ if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
+ return true;
+
+ // (X + C)<nsw> s<= X if C <= 0
+ if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
+ !C.isStrictlyPositive())
+ return true;
+ break;
+
+ case ICmpInst::ICMP_SGT:
+ std::swap(LHS, RHS);
+ case ICmpInst::ICMP_SLT:
+ // X s< (X + C)<nsw> if C > 0
+ if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
+ C.isStrictlyPositive())
+ return true;
+
+ // (X + C)<nsw> s< X if C < 0
+ if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS) {
+ if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
+ return false;
+
+ // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
+ // the stack can result in exponential time complexity.
+ SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
+
+ // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
+ //
+ // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
+ // isKnownPredicate. isKnownPredicate is more powerful, but also more
+ // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
+ // interesting cases seen in practice. We can consider "upgrading" L >= 0 to
+ // use isKnownPredicate later if needed.
+ return isKnownNonNegative(RHS) &&
+ isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
+ isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
+}
+
/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
/// protected by a conditional between LHS and RHS. This is used to
/// to eliminate casts.
@@ -6715,46 +7433,49 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
return true;
+ // We don't want more than one activation of the following loops on the stack
+ // -- that can lead to O(n!) time complexity.
+ if (WalkingBEDominatingConds)
+ return false;
+
+ SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
+
+ // See if we can exploit a trip count to prove the predicate.
+ const auto &BETakenInfo = getBackedgeTakenInfo(L);
+ const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
+ if (LatchBECount != getCouldNotCompute()) {
+ // We know that Latch branches back to the loop header exactly
+ // LatchBECount times. This means the backdege condition at Latch is
+ // equivalent to "{0,+,1} u< LatchBECount".
+ Type *Ty = LatchBECount->getType();
+ auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
+ const SCEV *LoopCounter =
+ getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
+ if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
+ LatchBECount))
+ return true;
+ }
+
// Check conditions due to any @llvm.assume intrinsics.
- for (auto &AssumeVH : AC->assumptions()) {
+ for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
- if (!DT->dominates(CI, Latch->getTerminator()))
+ if (!DT.dominates(CI, Latch->getTerminator()))
continue;
if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
return true;
}
- struct ClearWalkingBEDominatingCondsOnExit {
- ScalarEvolution &SE;
-
- explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
- : SE(SE){};
-
- ~ClearWalkingBEDominatingCondsOnExit() {
- SE.WalkingBEDominatingConds = false;
- }
- };
-
- // We don't want more than one activation of the following loop on the stack
- // -- that can lead to O(n!) time complexity.
- if (WalkingBEDominatingConds)
- return false;
-
- WalkingBEDominatingConds = true;
- ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
-
// If the loop is not reachable from the entry block, we risk running into an
// infinite loop as we walk up into the dom tree. These loops do not matter
// anyway, so we just return a conservative answer when we see them.
- if (!DT->isReachableFromEntry(L->getHeader()))
+ if (!DT.isReachableFromEntry(L->getHeader()))
return false;
- for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
- DTN != HeaderDTN;
- DTN = DTN->getIDom()) {
+ for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+ DTN != HeaderDTN; DTN = DTN->getIDom()) {
assert(DTN && "should reach the loop header before reaching the root!");
@@ -6778,7 +7499,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// We're constructively (and conservatively) enumerating edges within the
// loop body that dominate the latch. The dominator tree better agree
// with us on this:
- assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+ assert(DT.dominates(DominatingEdge, Latch) && "should be!");
if (isImpliedCond(Pred, LHS, RHS, Condition,
BB != ContinuePredicate->getSuccessor(0)))
@@ -6823,11 +7544,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
}
// Check conditions due to any @llvm.assume intrinsics.
- for (auto &AssumeVH : AC->assumptions()) {
+ for (auto &AssumeVH : AC.assumptions()) {
if (!AssumeVH)
continue;
auto *CI = cast<CallInst>(AssumeVH);
- if (!DT->dominates(CI, L->getHeader()))
+ if (!DT.dominates(CI, L->getHeader()))
continue;
if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
@@ -6837,6 +7558,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
return false;
}
+namespace {
/// RAII wrapper to prevent recursive application of isImpliedCond.
/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
/// currently evaluating isImpliedCond.
@@ -6854,6 +7576,7 @@ struct MarkPendingLoopPredicate {
LoopPreds.erase(Cond);
}
};
+} // end anonymous namespace
/// isImpliedCond - Test whether the condition described by Pred, LHS,
/// and RHS is true whenever the given Cond value evaluates to true.
@@ -6892,6 +7615,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+ return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
+}
+
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS,
+ ICmpInst::Predicate FoundPred,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
// Balance the types.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(FoundLHS->getType())) {
@@ -6947,6 +7678,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
RHS, LHS, FoundLHS, FoundRHS);
}
+ // Unsigned comparison is the same as signed comparison when both the operands
+ // are non-negative.
+ if (CmpInst::isUnsigned(FoundPred) &&
+ CmpInst::getSignedPredicate(FoundPred) == Pred &&
+ isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
// Check if we can make progress by sharpening ranges.
if (FoundPred == ICmpInst::ICMP_NE &&
(isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
@@ -6970,7 +7708,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
APInt Min = ICmpInst::isSigned(Pred) ?
getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
- if (Min == C->getValue()->getValue()) {
+ if (Min == C->getAPInt()) {
// Given (V >= Min && V != Min) we conclude V >= (Min + 1).
// This is true even if (Min + 1) wraps around -- in case of
// wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
@@ -7021,6 +7759,149 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
return false;
}
+bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
+ const SCEV *&L, const SCEV *&R,
+ SCEV::NoWrapFlags &Flags) {
+ const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
+ if (!AE || AE->getNumOperands() != 2)
+ return false;
+
+ L = AE->getOperand(0);
+ R = AE->getOperand(1);
+ Flags = AE->getNoWrapFlags();
+ return true;
+}
+
+bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
+ const SCEV *More,
+ APInt &C) {
+ // We avoid subtracting expressions here because this function is usually
+ // fairly deep in the call stack (i.e. is called many times).
+
+ if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
+ const auto *LAR = cast<SCEVAddRecExpr>(Less);
+ const auto *MAR = cast<SCEVAddRecExpr>(More);
+
+ if (LAR->getLoop() != MAR->getLoop())
+ return false;
+
+ // We look at affine expressions only; not for correctness but to keep
+ // getStepRecurrence cheap.
+ if (!LAR->isAffine() || !MAR->isAffine())
+ return false;
+
+ if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
+ return false;
+
+ Less = LAR->getStart();
+ More = MAR->getStart();
+
+ // fall through
+ }
+
+ if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
+ const auto &M = cast<SCEVConstant>(More)->getAPInt();
+ const auto &L = cast<SCEVConstant>(Less)->getAPInt();
+ C = M - L;
+ return true;
+ }
+
+ const SCEV *L, *R;
+ SCEV::NoWrapFlags Flags;
+ if (splitBinaryAdd(Less, L, R, Flags))
+ if (const auto *LC = dyn_cast<SCEVConstant>(L))
+ if (R == More) {
+ C = -(LC->getAPInt());
+ return true;
+ }
+
+ if (splitBinaryAdd(More, L, R, Flags))
+ if (const auto *LC = dyn_cast<SCEVConstant>(L))
+ if (R == Less) {
+ C = LC->getAPInt();
+ return true;
+ }
+
+ return false;
+}
+
+bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS, const SCEV *FoundRHS) {
+ if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
+ return false;
+
+ const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!AddRecLHS)
+ return false;
+
+ const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
+ if (!AddRecFoundLHS)
+ return false;
+
+ // We'd like to let SCEV reason about control dependencies, so we constrain
+ // both the inequalities to be about add recurrences on the same loop. This
+ // way we can use isLoopEntryGuardedByCond later.
+
+ const Loop *L = AddRecFoundLHS->getLoop();
+ if (L != AddRecLHS->getLoop())
+ return false;
+
+ // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1)
+ //
+ // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
+ // ... (2)
+ //
+ // Informal proof for (2), assuming (1) [*]:
+ //
+ // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
+ //
+ // Then
+ //
+ // FoundLHS s< FoundRHS s< INT_MIN - C
+ // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ]
+ // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
+ // <=> (FoundLHS + INT_MIN + C + INT_MIN) s<
+ // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
+ // <=> FoundLHS + C s< FoundRHS + C
+ //
+ // [*]: (1) can be proved by ruling out overflow.
+ //
+ // [**]: This can be proved by analyzing all the four possibilities:
+ // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
+ // (A s>= 0, B s>= 0).
+ //
+ // Note:
+ // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
+ // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS
+ // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS
+ // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is
+ // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
+ // C)".
+
+ APInt LDiff, RDiff;
+ if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
+ !computeConstantDifference(FoundRHS, RHS, RDiff) ||
+ LDiff != RDiff)
+ return false;
+
+ if (LDiff == 0)
+ return true;
+
+ APInt FoundRHSLimit;
+
+ if (Pred == CmpInst::ICMP_ULT) {
+ FoundRHSLimit = -RDiff;
+ } else {
+ assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
+ FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+ }
+
+ // Try to prove (1) or (2), as needed.
+ return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
+ getConstant(FoundRHSLimit));
+}
+
/// isImpliedCondOperands - Test whether the condition described by Pred,
/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
/// and FoundRHS is true.
@@ -7031,6 +7912,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
+ if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS) ||
// ~x < ~y --> x > y
@@ -7043,17 +7927,13 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
/// If Expr computes ~A, return A else return nullptr
static const SCEV *MatchNotExpr(const SCEV *Expr) {
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (!Add || Add->getNumOperands() != 2) return nullptr;
-
- const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
- if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
+ if (!Add || Add->getNumOperands() != 2 ||
+ !Add->getOperand(0)->isAllOnesValue())
return nullptr;
const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
-
- const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
- if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
+ if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+ !AddRHS->getOperand(0)->isAllOnesValue())
return nullptr;
return AddRHS->getOperand(1);
@@ -7067,8 +7947,7 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
if (!MaxExpr) return false;
- auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
- return It != MaxExpr->op_end();
+ return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
}
@@ -7084,6 +7963,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE,
return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
}
+static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+
+ // If both sides are affine addrecs for the same loop, with equal
+ // steps, and we know the recurrences don't wrap, then we only
+ // need to check the predicate on the starting values.
+
+ if (!ICmpInst::isRelational(Pred))
+ return false;
+
+ const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+ if (!LAR)
+ return false;
+ const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+ if (!RAR)
+ return false;
+ if (LAR->getLoop() != RAR->getLoop())
+ return false;
+ if (!LAR->isAffine() || !RAR->isAffine())
+ return false;
+
+ if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
+ return false;
+
+ SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
+ SCEV::FlagNSW : SCEV::FlagNUW;
+ if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
+ return false;
+
+ return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
+}
/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
/// expression?
@@ -7129,7 +8040,9 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
auto IsKnownPredicateFull =
[this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
- IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
+ IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+ IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
+ isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
};
switch (Pred) {
@@ -7185,7 +8098,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
!isa<SCEVConstant>(AddLHS->getOperand(0)))
return false;
- APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
+ APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
// `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
// antecedent "`FoundLHS` `Pred` `FoundRHS`".
@@ -7194,13 +8107,12 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
// Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
// for `LHS`:
- APInt Addend =
- cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
+ APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
// We can also compute the range of values for `LHS` that satisfy the
// consequent, "`LHS` `Pred` `RHS`":
- APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
+ APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
ConstantRange SatisfyingLHSRange =
ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
@@ -7217,7 +8129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
- const SCEV *One = getConstant(Stride->getType(), 1);
+ const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
APInt MaxRHS = getSignedRange(RHS).getSignedMax();
@@ -7246,7 +8158,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
- const SCEV *One = getConstant(Stride->getType(), 1);
+ const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
APInt MinRHS = getSignedRange(RHS).getSignedMin();
@@ -7271,7 +8183,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
// stride and presence of the equality in the comparison.
const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
bool Equality) {
- const SCEV *One = getConstant(Step->getType(), 1);
+ const SCEV *One = getOne(Step->getType());
Delta = Equality ? getAddExpr(Delta, Step)
: getAddExpr(Delta, getMinusSCEV(Step, One));
return getUDivExpr(Delta, Step);
@@ -7324,7 +8236,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// overflow, in which case if RHS - Start is a constant, we don't need to
// do a max operation since we can just figure it out statically
if (NoWrap && isa<SCEVConstant>(Diff)) {
- APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
if (D.isNegative())
End = Start;
} else
@@ -7405,7 +8317,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
// overflow, in which case if RHS - Start is a constant, we don't need to
// do a max operation since we can just figure it out statically
if (NoWrap && isa<SCEVConstant>(Diff)) {
- APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
if (!D.isNegative())
End = Start;
} else
@@ -7460,23 +8372,20 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
if (!SC->getValue()->isZero()) {
SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
- Operands[0] = SE.getConstant(SC->getType(), 0);
+ Operands[0] = SE.getZero(SC->getType());
const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
getNoWrapFlags(FlagNW));
- if (const SCEVAddRecExpr *ShiftedAddRec =
- dyn_cast<SCEVAddRecExpr>(Shifted))
+ if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
return ShiftedAddRec->getNumIterationsInRange(
- Range.subtract(SC->getValue()->getValue()), SE);
+ Range.subtract(SC->getAPInt()), SE);
// This is strange and shouldn't happen.
return SE.getCouldNotCompute();
}
// The only time we can solve this is when we have all constant indices.
// Otherwise, we cannot determine the overflow conditions.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!isa<SCEVConstant>(getOperand(i)))
- return SE.getCouldNotCompute();
-
+ if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
+ return SE.getCouldNotCompute();
// Okay at this point we know that all elements of the chrec are constants and
// that the start element is zero.
@@ -7485,7 +8394,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// iteration exits.
unsigned BitWidth = SE.getTypeSizeInBits(getType());
if (!Range.contains(APInt(BitWidth, 0)))
- return SE.getConstant(getType(), 0);
+ return SE.getZero(getType());
if (isAffine()) {
// If this is an affine expression then we have this situation:
@@ -7496,7 +8405,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// If A is negative then the lower of the range is the last possible loop
// value. Also note that we already checked for a full range.
APInt One(BitWidth,1);
- APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+ APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
// The exit value should be (End+A)/A.
@@ -7528,15 +8437,13 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
FlagAnyWrap);
// Next, solve the constructed addrec
- std::pair<const SCEV *,const SCEV *> Roots =
- SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+ auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
if (R1) {
// Pick the smallest positive root value.
- if (ConstantInt *CB =
- dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
- R1->getValue(), R2->getValue()))) {
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
+ ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
if (!CB->getZExtValue())
std::swap(R1, R2); // R1 is the minimum root now.
@@ -7549,7 +8456,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
if (Range.contains(R1Val->getValue())) {
// The next iteration must be out of the range...
ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+ ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
if (!Range.contains(R1Val->getValue()))
@@ -7560,7 +8467,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// If R1 was not in the range, then it is a good return value. Make
// sure that R1-1 WAS in the range though, just in case.
ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+ ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
if (Range.contains(R1Val->getValue()))
return R1;
@@ -7644,9 +8551,84 @@ struct SCEVCollectTerms {
}
bool isDone() const { return false; }
};
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+ bool &ContainsAddRec;
+
+ SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+ ContainsAddRec = false;
+ }
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVAddRecExpr>(S)) {
+ ContainsAddRec = true;
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+ SmallVectorImpl<const SCEV *> &Terms;
+ ScalarEvolution &SE;
+
+ SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
+ : Terms(T), SE(SE) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ bool HasAddRec = false;
+ SmallVector<const SCEV *, 0> Operands;
+ for (auto Op : Mul->operands()) {
+ if (isa<SCEVUnknown>(Op)) {
+ Operands.push_back(Op);
+ } else {
+ bool ContainsAddRec;
+ SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+ visitAll(Op, ContiansAddRec);
+ HasAddRec |= ContainsAddRec;
+ }
+ }
+ if (Operands.size() == 0)
+ return true;
+
+ if (!HasAddRec)
+ return false;
+
+ Terms.push_back(SE.getMulExpr(Operands));
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
}
-/// Find parametric terms in this SCEVAddRecExpr.
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+/// 1) The strides of AddRec expressions.
+/// 2) Unknowns that are multiplied with AddRec expressions.
void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Terms) {
SmallVector<const SCEV *, 4> Strides;
@@ -7669,6 +8651,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
for (const SCEV *T : Terms)
dbgs() << *T << "\n";
});
+
+ SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
+ visitAll(Expr, MulCollector);
}
static bool findArrayDimensionsRec(ScalarEvolution &SE,
@@ -7718,30 +8703,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
return true;
}
-namespace {
-struct FindParameter {
- bool FoundParameter;
- FindParameter() : FoundParameter(false) {}
-
- bool follow(const SCEV *S) {
- if (isa<SCEVUnknown>(S)) {
- FoundParameter = true;
- // Stop recursion: we found a parameter.
- return false;
- }
- // Keep looking.
- return true;
- }
- bool isDone() const {
- // Stop recursion if we have found a parameter.
- return FoundParameter;
- }
-};
-}
-
// Returns true when S contains at least a SCEVUnknown parameter.
static inline bool
containsParameters(const SCEV *S) {
+ struct FindParameter {
+ bool FoundParameter;
+ FindParameter() : FoundParameter(false) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S)) {
+ FoundParameter = true;
+ // Stop recursion: we found a parameter.
+ return false;
+ }
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const {
+ // Stop recursion if we have found a parameter.
+ return FoundParameter;
+ }
+ };
+
FindParameter F;
SCEVTraversal<FindParameter> ST(F);
ST.visitAll(S);
@@ -7829,11 +8812,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
- // Divide all terms by the element size.
+ // Try to divide all terms by the element size. If term is not divisible by
+ // element size, proceed with the original term.
for (const SCEV *&Term : Terms) {
const SCEV *Q, *R;
SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
- Term = Q;
+ if (!Q->isZero())
+ Term = Q;
}
SmallVector<const SCEV *, 4> NewTerms;
@@ -7875,7 +8860,7 @@ void ScalarEvolution::computeAccessFunctions(
if (Sizes.empty())
return;
- if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr))
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
if (!AR->isAffine())
return;
@@ -8059,58 +9044,55 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
// ScalarEvolution Class Implementation
//===----------------------------------------------------------------------===//
-ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
- LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
- initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
-}
-
-bool ScalarEvolution::runOnFunction(Function &F) {
- this->F = &F;
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return false;
-}
-
-void ScalarEvolution::releaseMemory() {
+ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
+ AssumptionCache &AC, DominatorTree &DT,
+ LoopInfo &LI)
+ : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
+ CouldNotCompute(new SCEVCouldNotCompute()),
+ WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+ ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64),
+ FirstUnknown(nullptr) {}
+
+ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
+ : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
+ CouldNotCompute(std::move(Arg.CouldNotCompute)),
+ ValueExprMap(std::move(Arg.ValueExprMap)),
+ WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+ BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+ ConstantEvolutionLoopExitValue(
+ std::move(Arg.ConstantEvolutionLoopExitValue)),
+ ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+ LoopDispositions(std::move(Arg.LoopDispositions)),
+ BlockDispositions(std::move(Arg.BlockDispositions)),
+ UnsignedRanges(std::move(Arg.UnsignedRanges)),
+ SignedRanges(std::move(Arg.SignedRanges)),
+ UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
+ UniquePreds(std::move(Arg.UniquePreds)),
+ SCEVAllocator(std::move(Arg.SCEVAllocator)),
+ FirstUnknown(Arg.FirstUnknown) {
+ Arg.FirstUnknown = nullptr;
+}
+
+ScalarEvolution::~ScalarEvolution() {
// Iterate through all the SCEVUnknown instances and call their
// destructors, so that they release their references to their values.
- for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
- U->~SCEVUnknown();
+ for (SCEVUnknown *U = FirstUnknown; U;) {
+ SCEVUnknown *Tmp = U;
+ U = U->Next;
+ Tmp->~SCEVUnknown();
+ }
FirstUnknown = nullptr;
ValueExprMap.clear();
// Free any extra memory created for ExitNotTakenInfo in the unlikely event
// that a loop had multiple computable exits.
- for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
- BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
- I != E; ++I) {
- I->second.clear();
- }
+ for (auto &BTCI : BackedgeTakenCounts)
+ BTCI.second.clear();
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
-
- BackedgeTakenCounts.clear();
- ConstantEvolutionLoopExitValue.clear();
- ValuesAtScopes.clear();
- LoopDispositions.clear();
- BlockDispositions.clear();
- UnsignedRanges.clear();
- SignedRanges.clear();
- UniqueSCEVs.clear();
- SCEVAllocator.Reset();
-}
-
-void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredTransitive<LoopInfoWrapperPass>();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -8152,7 +9134,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
OS << "\n";
}
-void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+void ScalarEvolution::print(raw_ostream &OS) const {
// ScalarEvolution's implementation of the print method is to print
// out SCEV values of all instructions that are interesting. Doing
// this potentially causes it to create new SCEV objects though,
@@ -8162,13 +9144,13 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
OS << "Classifying expressions for: ";
- F->printAsOperand(OS, /*PrintType=*/false);
+ F.printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
- OS << *I << '\n';
+ for (Instruction &I : instructions(F))
+ if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
+ OS << I << '\n';
OS << " --> ";
- const SCEV *SV = SE.getSCEV(&*I);
+ const SCEV *SV = SE.getSCEV(&I);
SV->print(OS);
if (!isa<SCEVCouldNotCompute>(SV)) {
OS << " U: ";
@@ -8177,7 +9159,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
SE.getSignedRange(SV).print(OS);
}
- const Loop *L = LI->getLoopFor((*I).getParent());
+ const Loop *L = LI.getLoopFor(I.getParent());
const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
if (AtUse != SV) {
@@ -8205,9 +9187,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
}
OS << "Determining loop execution counts for: ";
- F->printAsOperand(OS, /*PrintType=*/false);
+ F.printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)
PrintLoopInfo(OS, &SE, *I);
}
@@ -8260,9 +9242,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
// This recurrence is variant w.r.t. L if any of its operands
// are variant.
- for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
- I != E; ++I)
- if (!isLoopInvariant(*I, L))
+ for (auto *Op : AR->operands())
+ if (!isLoopInvariant(Op, L))
return LoopVariant;
// Otherwise it's loop-invariant.
@@ -8272,11 +9253,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr: {
- const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool HasVarying = false;
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- LoopDisposition D = getLoopDisposition(*I, L);
+ for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+ LoopDisposition D = getLoopDisposition(Op, L);
if (D == LoopVariant)
return LoopVariant;
if (D == LoopComputable)
@@ -8300,7 +9279,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
// invariant if they are not contained in the specified loop.
// Instructions are never considered invariant in the function body
// (null loop) because they are defined within the "loop".
- if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+ if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
return LoopInvariant;
case scCouldNotCompute:
@@ -8351,7 +9330,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
// produces the addrec's value is a PHI, and a PHI effectively properly
// dominates its entire containing block.
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
- if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+ if (!DT.dominates(AR->getLoop()->getHeader(), BB))
return DoesNotDominateBlock;
}
// FALL THROUGH into SCEVNAryExpr handling.
@@ -8361,9 +9340,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool Proper = true;
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- BlockDisposition D = getBlockDisposition(*I, BB);
+ for (const SCEV *NAryOp : NAry->operands()) {
+ BlockDisposition D = getBlockDisposition(NAryOp, BB);
if (D == DoesNotDominateBlock)
return DoesNotDominateBlock;
if (D == DominatesBlock)
@@ -8388,7 +9366,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
if (I->getParent() == BB)
return DominatesBlock;
- if (DT->properlyDominates(I->getParent(), BB))
+ if (DT.properlyDominates(I->getParent(), BB))
return ProperlyDominatesBlock;
return DoesNotDominateBlock;
}
@@ -8407,24 +9385,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
}
-namespace {
-// Search for a SCEV expression node within an expression tree.
-// Implements SCEVTraversal::Visitor.
-struct SCEVSearch {
- const SCEV *Node;
- bool IsFound;
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ // Search for a SCEV expression node within an expression tree.
+ // Implements SCEVTraversal::Visitor.
+ struct SCEVSearch {
+ const SCEV *Node;
+ bool IsFound;
- SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+ SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
- bool follow(const SCEV *S) {
- IsFound |= (S == Node);
- return !IsFound;
- }
- bool isDone() const { return IsFound; }
-};
-}
+ bool follow(const SCEV *S) {
+ IsFound |= (S == Node);
+ return !IsFound;
+ }
+ bool isDone() const { return IsFound; }
+ };
-bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
SCEVSearch Search(Op);
visitAll(S, Search);
return Search.IsFound;
@@ -8463,43 +9439,39 @@ static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
static void
getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
- for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
- getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
-
- std::string &S = Map[L];
- if (S.empty()) {
- raw_string_ostream OS(S);
- SE.getBackedgeTakenCount(L)->print(OS);
+ std::string &S = Map[L];
+ if (S.empty()) {
+ raw_string_ostream OS(S);
+ SE.getBackedgeTakenCount(L)->print(OS);
- // false and 0 are semantically equivalent. This can happen in dead loops.
- replaceSubString(OS.str(), "false", "0");
- // Remove wrap flags, their use in SCEV is highly fragile.
- // FIXME: Remove this when SCEV gets smarter about them.
- replaceSubString(OS.str(), "<nw>", "");
- replaceSubString(OS.str(), "<nsw>", "");
- replaceSubString(OS.str(), "<nuw>", "");
- }
+ // false and 0 are semantically equivalent. This can happen in dead loops.
+ replaceSubString(OS.str(), "false", "0");
+ // Remove wrap flags, their use in SCEV is highly fragile.
+ // FIXME: Remove this when SCEV gets smarter about them.
+ replaceSubString(OS.str(), "<nw>", "");
+ replaceSubString(OS.str(), "<nsw>", "");
+ replaceSubString(OS.str(), "<nuw>", "");
}
-}
-void ScalarEvolution::verifyAnalysis() const {
- if (!VerifySCEV)
- return;
+ for (auto *R : reverse(*L))
+ getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
+}
+void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
// Gather stringified backedge taken counts for all loops using SCEV's caches.
// FIXME: It would be much better to store actual values instead of strings,
// but SCEV pointers will change if we drop the caches.
VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
- for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
- // Gather stringified backedge taken counts for all loops without using
- // SCEV's caches.
- SE.releaseMemory();
- for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
- getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+ // Gather stringified backedge taken counts for all loops using a fresh
+ // ScalarEvolution object.
+ ScalarEvolution SE2(F, TLI, AC, DT, LI);
+ for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
+ getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
// Now compare whether they're the same with and without caches. This allows
// verifying that no pass changed the cache.
@@ -8532,3 +9504,238 @@ void ScalarEvolution::verifyAnalysis() const {
// TODO: Verify more things.
}
+
+char ScalarEvolutionAnalysis::PassID;
+
+ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F),
+ AM->getResult<DominatorTreeAnalysis>(F),
+ AM->getResult<LoopAnalysis>(F));
+}
+
+PreservedAnalyses
+ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) {
+ AM->getResult<ScalarEvolutionAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+char ScalarEvolutionWrapperPass::ID = 0;
+
+ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
+ initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
+ SE.reset(new ScalarEvolution(
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
+ return false;
+}
+
+void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
+
+void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
+ SE->print(OS);
+}
+
+void ScalarEvolutionWrapperPass::verifyAnalysis() const {
+ if (!VerifySCEV)
+ return;
+
+ SE->verify();
+}
+
+void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<AssumptionCacheTracker>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
+}
+
+const SCEVPredicate *
+ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
+ const SCEVConstant *RHS) {
+ FoldingSetNodeID ID;
+ // Unique this node based on the arguments
+ ID.AddInteger(SCEVPredicate::P_Equal);
+ ID.AddPointer(LHS);
+ ID.AddPointer(RHS);
+ void *IP = nullptr;
+ if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
+ return S;
+ SCEVEqualPredicate *Eq = new (SCEVAllocator)
+ SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
+ UniquePreds.InsertNode(Eq, IP);
+ return Eq;
+}
+
+namespace {
+class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
+public:
+ static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
+ SCEVUnionPredicate &A) {
+ SCEVPredicateRewriter Rewriter(SE, A);
+ return Rewriter.visit(Scev);
+ }
+
+ SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P)
+ : SCEVRewriteVisitor(SE), P(P) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ auto ExprPreds = P.getPredicatesForExpr(Expr);
+ for (auto *Pred : ExprPreds)
+ if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred))
+ if (IPred->getLHS() == Expr)
+ return IPred->getRHS();
+
+ return Expr;
+ }
+
+private:
+ SCEVUnionPredicate &P;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev,
+ SCEVUnionPredicate &Preds) {
+ return SCEVPredicateRewriter::rewrite(Scev, *this, Preds);
+}
+
+/// SCEV predicates
+SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
+ SCEVPredicateKind Kind)
+ : FastID(ID), Kind(Kind) {}
+
+SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
+ const SCEVUnknown *LHS,
+ const SCEVConstant *RHS)
+ : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
+
+bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
+ const auto *Op = dyn_cast<const SCEVEqualPredicate>(N);
+
+ if (!Op)
+ return false;
+
+ return Op->LHS == LHS && Op->RHS == RHS;
+}
+
+bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
+
+const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
+
+void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
+ OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
+}
+
+/// Union predicates don't get cached so create a dummy set ID for it.
+SCEVUnionPredicate::SCEVUnionPredicate()
+ : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
+
+bool SCEVUnionPredicate::isAlwaysTrue() const {
+ return all_of(Preds,
+ [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
+}
+
+ArrayRef<const SCEVPredicate *>
+SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
+ auto I = SCEVToPreds.find(Expr);
+ if (I == SCEVToPreds.end())
+ return ArrayRef<const SCEVPredicate *>();
+ return I->second;
+}
+
+bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
+ if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N))
+ return all_of(Set->Preds,
+ [this](const SCEVPredicate *I) { return this->implies(I); });
+
+ auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
+ if (ScevPredsIt == SCEVToPreds.end())
+ return false;
+ auto &SCEVPreds = ScevPredsIt->second;
+
+ return any_of(SCEVPreds,
+ [N](const SCEVPredicate *I) { return I->implies(N); });
+}
+
+const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
+
+void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
+ for (auto Pred : Preds)
+ Pred->print(OS, Depth);
+}
+
+void SCEVUnionPredicate::add(const SCEVPredicate *N) {
+ if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) {
+ for (auto Pred : Set->Preds)
+ add(Pred);
+ return;
+ }
+
+ if (implies(N))
+ return;
+
+ const SCEV *Key = N->getExpr();
+ assert(Key && "Only SCEVUnionPredicate doesn't have an "
+ " associated expression!");
+
+ SCEVToPreds[Key].push_back(N);
+ Preds.push_back(N);
+}
+
+PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE)
+ : SE(SE), Generation(0) {}
+
+const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
+ const SCEV *Expr = SE.getSCEV(V);
+ RewriteEntry &Entry = RewriteMap[Expr];
+
+ // If we already have an entry and the version matches, return it.
+ if (Entry.second && Generation == Entry.first)
+ return Entry.second;
+
+ // We found an entry but it's stale. Rewrite the stale entry
+ // acording to the current predicate.
+ if (Entry.second)
+ Expr = Entry.second;
+
+ const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds);
+ Entry = {Generation, NewSCEV};
+
+ return NewSCEV;
+}
+
+void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
+ if (Preds.implies(&Pred))
+ return;
+ Preds.add(&Pred);
+ updateGeneration();
+}
+
+const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
+ return Preds;
+}
+
+void PredicatedScalarEvolution::updateGeneration() {
+ // If the generation number wrapped recompute everything.
+ if (++Generation == 0) {
+ for (auto &II : RewriteMap) {
+ const SCEV *Rewritten = II.second.second;
+ II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)};
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 6bc0d85..2e50c80 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,125 +19,42 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
-namespace {
- /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
- /// implementation that uses ScalarEvolution to answer queries.
- class ScalarEvolutionAliasAnalysis : public FunctionPass,
- public AliasAnalysis {
- ScalarEvolution *SE;
-
- public:
- static char ID; // Class identification, replacement for typeinfo
- ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) {
- initializeScalarEvolutionAliasAnalysisPass(
- *PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
-
- Value *GetBaseValue(const SCEV *S);
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char ScalarEvolutionAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
- "ScalarEvolution-based Alias Analysis", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
- "ScalarEvolution-based Alias Analysis", false, true, false)
-
-FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
- return new ScalarEvolutionAliasAnalysis();
-}
-
-void
-ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<ScalarEvolution>();
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
-}
-
-bool
-ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
- InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
- SE = &getAnalysis<ScalarEvolution>();
- return false;
-}
-
-/// GetBaseValue - Given an expression, try to find a
-/// base value. Return null is none was found.
-Value *
-ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- // In an addrec, assume that the base will be in the start, rather
- // than the step.
- return GetBaseValue(AR->getStart());
- } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
- // If there's a pointer operand, it'll be sorted at the end of the list.
- const SCEV *Last = A->getOperand(A->getNumOperands()-1);
- if (Last->getType()->isPointerTy())
- return GetBaseValue(Last);
- } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- // This is a leaf node.
- return U->getValue();
- }
- // No Identified object found.
- return nullptr;
-}
-
-AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
if (LocA.Size == 0 || LocB.Size == 0)
return NoAlias;
- // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
- const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
- const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+ // This is SCEVAAResult. Get the SCEVs!
+ const SCEV *AS = SE.getSCEV(const_cast<Value *>(LocA.Ptr));
+ const SCEV *BS = SE.getSCEV(const_cast<Value *>(LocB.Ptr));
// If they evaluate to the same expression, it's a MustAlias.
- if (AS == BS) return MustAlias;
+ if (AS == BS)
+ return MustAlias;
// If something is known about the difference between the two addresses,
// see if it's enough to prove a NoAlias.
- if (SE->getEffectiveSCEVType(AS->getType()) ==
- SE->getEffectiveSCEVType(BS->getType())) {
- unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+ if (SE.getEffectiveSCEVType(AS->getType()) ==
+ SE.getEffectiveSCEVType(BS->getType())) {
+ unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
APInt ASizeInt(BitWidth, LocA.Size);
APInt BSizeInt(BitWidth, LocB.Size);
// Compute the difference between the two pointers.
- const SCEV *BA = SE->getMinusSCEV(BS, AS);
+ const SCEV *BA = SE.getMinusSCEV(BS, AS);
// Test whether the difference is known to be great enough that memory of
// the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
// are non-zero, which is special-cased above.
- if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
- (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+ if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) &&
+ (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax()))
return NoAlias;
// Folding the subtraction while preserving range information can be tricky
@@ -145,13 +62,13 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
// and try again to see if things fold better that way.
// Compute the difference between the two pointers.
- const SCEV *AB = SE->getMinusSCEV(AS, BS);
+ const SCEV *AB = SE.getMinusSCEV(AS, BS);
// Test whether the difference is known to be great enough that memory of
// the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
// are non-zero, which is special-cased above.
- if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
- (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+ if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) &&
+ (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax()))
return NoAlias;
}
@@ -170,5 +87,62 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
return NoAlias;
// Forward the query to the next analysis.
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
+}
+
+/// Given an expression, try to find a base value.
+///
+/// Returns null if none was found.
+Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // In an addrec, assume that the base will be in the start, rather
+ // than the step.
+ return GetBaseValue(AR->getStart());
+ } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+ // If there's a pointer operand, it'll be sorted at the end of the list.
+ const SCEV *Last = A->getOperand(A->getNumOperands() - 1);
+ if (Last->getType()->isPointerTy())
+ return GetBaseValue(Last);
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // This is a leaf node.
+ return U->getValue();
+ }
+ // No Identified object found.
+ return nullptr;
+}
+
+SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return SCEVAAResult(AM->getResult<TargetLibraryAnalysis>(F),
+ AM->getResult<ScalarEvolutionAnalysis>(F));
+}
+
+char SCEVAA::PassID;
+
+char SCEVAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true)
+
+FunctionPass *llvm::createSCEVAAWrapperPass() {
+ return new SCEVAAWrapperPass();
+}
+
+SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) {
+ initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool SCEVAAWrapperPass::runOnFunction(Function &F) {
+ Result.reset(
+ new SCEVAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ getAnalysis<ScalarEvolutionWrapperPass>().getSE()));
+ return false;
+}
+
+void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index fee2a2d..921403d 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -63,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// Create a new cast, and leave the old cast in place in case
// it is being used as an insert point. Clear its operand
// so that it doesn't hold anything live.
- Ret = CastInst::Create(Op, V, Ty, "", IP);
+ Ret = CastInst::Create(Op, V, Ty, "", &*IP);
Ret->takeName(CI);
CI->replaceAllUsesWith(Ret);
CI->setOperand(0, UndefValue::get(V->getType()));
@@ -75,17 +75,39 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// Create a new cast.
if (!Ret)
- Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
+ Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
// We assert at the end of the function since IP might point to an
// instruction with different dominance properties than a cast
// (an invoke for example) and not dominate BIP (but the cast does).
- assert(SE.DT->dominates(Ret, BIP));
+ assert(SE.DT.dominates(Ret, &*BIP));
rememberInstruction(Ret);
return Ret;
}
+static BasicBlock::iterator findInsertPointAfter(Instruction *I,
+ BasicBlock *MustDominate) {
+ BasicBlock::iterator IP = ++I->getIterator();
+ if (auto *II = dyn_cast<InvokeInst>(I))
+ IP = II->getNormalDest()->begin();
+
+ while (isa<PHINode>(IP))
+ ++IP;
+
+ while (IP->isEHPad()) {
+ if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
+ ++IP;
+ } else if (isa<CatchSwitchInst>(IP)) {
+ IP = MustDominate->getFirstInsertionPt();
+ } else {
+ llvm_unreachable("unexpected eh pad!");
+ }
+ }
+
+ return IP;
+}
+
/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
/// which must be possible with a noop cast, doing what we can to share
/// the casts.
@@ -135,19 +157,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
while ((isa<BitCastInst>(IP) &&
isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
cast<BitCastInst>(IP)->getOperand(0) != A) ||
- isa<DbgInfoIntrinsic>(IP) ||
- isa<LandingPadInst>(IP))
+ isa<DbgInfoIntrinsic>(IP))
++IP;
return ReuseOrCreateCast(A, Ty, Op, IP);
}
// Cast the instruction immediately after the instruction.
Instruction *I = cast<Instruction>(V);
- BasicBlock::iterator IP = I; ++IP;
- if (InvokeInst *II = dyn_cast<InvokeInst>(I))
- IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
- ++IP;
+ BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
return ReuseOrCreateCast(I, Ty, Op, IP);
}
@@ -174,7 +191,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
ScanLimit++;
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
IP->getOperand(1) == RHS)
- return IP;
+ return &*IP;
if (IP == BlockBegin) break;
}
}
@@ -184,13 +201,13 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
- Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
// If we haven't found this binop, insert it.
@@ -229,19 +246,15 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
// Check for divisibility.
if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
ConstantInt *CI =
- ConstantInt::get(SE.getContext(),
- C->getValue()->getValue().sdiv(
- FC->getValue()->getValue()));
+ ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
// If the quotient is zero and the remainder is non-zero, reject
// the value at this scale. It will be considered for subsequent
// smaller scales.
if (!CI->isZero()) {
const SCEV *Div = SE.getConstant(CI);
S = Div;
- Remainder =
- SE.getAddExpr(Remainder,
- SE.getConstant(C->getValue()->getValue().srem(
- FC->getValue()->getValue())));
+ Remainder = SE.getAddExpr(
+ Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
return true;
}
}
@@ -254,10 +267,9 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
// of the given factor. If so, we can factor it.
const SCEVConstant *FC = cast<SCEVConstant>(Factor);
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
- if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ if (!C->getAPInt().srem(FC->getAPInt())) {
SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
- NewMulOps[0] = SE.getConstant(
- C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+ NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
S = SE.getMulExpr(NewMulOps);
return true;
}
@@ -402,8 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- const SCEV *Op = Ops[i];
+ for (const SCEV *Op : Ops) {
const SCEV *Remainder = SE.getConstant(Ty, 0);
if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
// Op now has ElSize factored out.
@@ -414,7 +425,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
} else {
// The operand was not divisible, so add it to the list of operands
// we'll scan next iteration.
- NewOps.push_back(Ops[i]);
+ NewOps.push_back(Op);
}
}
// If we made any changes, update Ops.
@@ -483,7 +494,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
assert(!isa<Instruction>(V) ||
- SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
+ SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
// Expand the operands for a plain byte offset.
Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
@@ -508,7 +519,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
ScanLimit++;
if (IP->getOpcode() == Instruction::GetElementPtr &&
IP->getOperand(0) == V && IP->getOperand(1) == Idx)
- return IP;
+ return &*IP;
if (IP == BlockBegin) break;
}
}
@@ -517,13 +528,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
- Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
// Emit a GEP.
@@ -537,16 +548,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
// Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V)) break;
- bool AnyIndexNotLoopInvariant = false;
- for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
- E = GepIndices.end(); I != E; ++I)
- if (!L->isLoopInvariant(*I)) {
- AnyIndexNotLoopInvariant = true;
- break;
- }
+ bool AnyIndexNotLoopInvariant =
+ std::any_of(GepIndices.begin(), GepIndices.end(),
+ [L](Value *Op) { return !L->isLoopInvariant(Op); });
+
if (AnyIndexNotLoopInvariant)
break;
@@ -554,7 +562,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
if (!Preheader) break;
// Ok, move up a level.
- Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
@@ -563,9 +571,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *Casted = V;
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
- GepIndices,
- "scevgep");
+ Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
Ops.push_back(SE.getUnknown(GEP));
rememberInstruction(GEP);
@@ -593,8 +599,7 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
/// expression, according to PickMostRelevantLoop.
const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
// Test whether we've already computed the most relevant loop for this SCEV.
- std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
- RelevantLoops.insert(std::make_pair(S, nullptr));
+ auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
if (!Pair.second)
return Pair.first->second;
@@ -603,7 +608,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
return nullptr;
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
- return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+ return Pair.first->second = SE.LI.getLoopFor(I->getParent());
// A non-instruction has no relevant loops.
return nullptr;
}
@@ -611,9 +616,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
const Loop *L = nullptr;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
L = AR->getLoop();
- for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
- I != E; ++I)
- L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+ for (const SCEV *Op : N->operands())
+ L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
return RelevantLoops[N] = L;
}
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
@@ -621,10 +625,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
return RelevantLoops[C] = Result;
}
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
- const Loop *Result =
- PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
- getRelevantLoop(D->getRHS()),
- *SE.DT);
+ const Loop *Result = PickMostRelevantLoop(
+ getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
return RelevantLoops[D] = Result;
}
llvm_unreachable("Unexpected SCEV type!");
@@ -679,13 +681,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
// Emit instructions to add all the operands. Hoist as much as possible
// out of loops, and form meaningful getelementptrs where possible.
Value *Sum = nullptr;
- for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
- I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
const Loop *CurLoop = I->first;
const SCEV *Op = I->second;
if (!Sum) {
@@ -747,14 +748,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
// Sort by loop. Use a stable sort so that constants follow non-constants.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
Value *Prod = nullptr;
- for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
- I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) {
- const SCEV *Op = I->second;
+ for (const auto &I : OpsAndLoops) {
+ const SCEV *Op = I.second;
if (!Prod) {
// This is the first operand. Just expand it.
Prod = expand(Op);
@@ -788,7 +788,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
Value *LHS = expandCodeFor(S->getLHS(), Ty);
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
- const APInt &RHS = SC->getValue()->getValue();
+ const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
return InsertBinop(Instruction::LShr, LHS,
ConstantInt::get(Ty, RHS.logBase2()));
@@ -834,7 +834,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
for (User::op_iterator OI = IncV->op_begin()+1,
OE = IncV->op_end(); OI != OE; ++OI)
if (Instruction *OInst = dyn_cast<Instruction>(OI))
- if (!SE.DT->dominates(OInst, IVIncInsertPos))
+ if (!SE.DT.dominates(OInst, IVIncInsertPos))
return false;
}
// Advance to the next instruction.
@@ -873,19 +873,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
case Instruction::Add:
case Instruction::Sub: {
Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
- if (!OInst || SE.DT->dominates(OInst, InsertPos))
+ if (!OInst || SE.DT.dominates(OInst, InsertPos))
return dyn_cast<Instruction>(IncV->getOperand(0));
return nullptr;
}
case Instruction::BitCast:
return dyn_cast<Instruction>(IncV->getOperand(0));
case Instruction::GetElementPtr:
- for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
- I != E; ++I) {
+ for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
if (isa<Constant>(*I))
continue;
if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
- if (!SE.DT->dominates(OInst, InsertPos))
+ if (!SE.DT.dominates(OInst, InsertPos))
return nullptr;
}
if (allowScale) {
@@ -912,13 +911,16 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
/// it available to other uses in this loop. Recursively hoist any operands,
/// until we reach a value that dominates InsertPos.
bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
- if (SE.DT->dominates(IncV, InsertPos))
+ if (SE.DT.dominates(IncV, InsertPos))
return true;
// InsertPos must itself dominate IncV so that IncV's new position satisfies
// its existing users.
- if (isa<PHINode>(InsertPos)
- || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
+ if (isa<PHINode>(InsertPos) ||
+ !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
+ return false;
+
+ if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
return false;
// Check that the chain of IV operands leading back to Phi can be hoisted.
@@ -930,11 +932,10 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
// IncV is safe to hoist.
IVIncs.push_back(IncV);
IncV = Oper;
- if (SE.DT->dominates(IncV, InsertPos))
+ if (SE.DT.dominates(IncV, InsertPos))
break;
}
- for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
- E = IVIncs.rend(); I != E; ++I) {
+ for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
(*I)->moveBefore(InsertPos);
}
return true;
@@ -1002,7 +1003,7 @@ static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
}
/// \brief Check whether we can cheaply express the requested SCEV in terms of
-/// the available PHI SCEV by truncation and/or invertion of the step.
+/// the available PHI SCEV by truncation and/or inversion of the step.
static bool canBeCheaplyTransformed(ScalarEvolution &SE,
const SCEVAddRecExpr *Phi,
const SCEVAddRecExpr *Requested,
@@ -1084,12 +1085,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Only try partially matching scevs that need truncation and/or
// step-inversion if we know this loop is outside the current loop.
- bool TryNonMatchingSCEV = IVIncInsertLoop &&
- SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+ bool TryNonMatchingSCEV =
+ IVIncInsertLoop &&
+ SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (!SE.isSCEVable(PN->getType()))
+ for (auto &I : *L->getHeader()) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN || !SE.isSCEVable(PN->getType()))
continue;
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
@@ -1142,7 +1144,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Potentially, move the increment. We have made sure in
// isExpandedAddRecExprPHI or hoistIVInc that this is possible.
if (L == IVIncInsertLoop)
- hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+ hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
@@ -1167,13 +1169,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
PostIncLoops.clear();
// Expand code for the start value.
- Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
- L->getHeader()->begin());
+ Value *StartV =
+ expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
// StartV must be hoisted into L's preheader to dominate the new phi.
assert(!isa<Instruction>(StartV) ||
- SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
- L->getHeader()));
+ SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
+ L->getHeader()));
// Expand code for the step value. Do this before creating the PHI so that PHI
// reuse code doesn't see an incomplete PHI.
@@ -1185,7 +1187,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
// Expand the step somewhere that dominates the loop header.
- Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
// The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
// we actually do emit an addition. It does not apply if we emit a
@@ -1249,9 +1251,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (PostIncLoops.count(L)) {
PostIncLoopSet Loops;
Loops.insert(L);
- Normalized =
- cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr,
- nullptr, Loops, SE, *SE.DT));
+ Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse(
+ Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));
}
// Strip off any non-loop-dominating component from the addrec start.
@@ -1301,9 +1302,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
- if (isa<Instruction>(Result)
- && !SE.DT->dominates(cast<Instruction>(Result),
- Builder.GetInsertPoint())) {
+ if (isa<Instruction>(Result) &&
+ !SE.DT.dominates(cast<Instruction>(Result),
+ &*Builder.GetInsertPoint())) {
// The induction variable's postinc expansion does not dominate this use.
// IVUsers tries to prevent this case, so it is rare. However, it can
// happen when an IVUser outside the loop is not dominated by the latch
@@ -1321,7 +1322,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
{
// Expand the step somewhere that dominates the loop header.
BuilderType::InsertPointGuard Guard(Builder);
- StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
}
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
@@ -1395,13 +1396,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
- std::next(BasicBlock::iterator(cast<Instruction>(V)));
- BuilderType::InsertPointGuard Guard(Builder);
- while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
- isa<LandingPadInst>(NewInsertPt))
- ++NewInsertPt;
+ findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
- NewInsertPt);
+ &*NewInsertPt);
return V;
}
@@ -1442,7 +1439,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
BasicBlock *Header = L->getHeader();
pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
- Header->begin());
+ &Header->front());
rememberInstruction(CanonicalIV);
SmallSet<BasicBlock *, 4> PredSeen;
@@ -1587,7 +1584,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
- Builder.SetInsertPoint(IP->getParent(), IP);
+ assert(IP);
+ Builder.SetInsertPoint(IP);
return expandCodeFor(SH, Ty);
}
@@ -1605,8 +1603,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
- Instruction *InsertPt = Builder.GetInsertPoint();
- for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+ Instruction *InsertPt = &*Builder.GetInsertPoint();
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
L = L->getParentLoop())
if (SE.isLoopInvariant(S, L)) {
if (!L) break;
@@ -1616,30 +1614,29 @@ Value *SCEVExpander::expand(const SCEV *S) {
// LSR sets the insertion point for AddRec start/step values to the
// block start to simplify value reuse, even though it's an invalid
// position. SCEVExpander must correct for this in all cases.
- InsertPt = L->getHeader()->getFirstInsertionPt();
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
}
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
- InsertPt = L->getHeader()->getFirstInsertionPt();
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
while (InsertPt != Builder.GetInsertPoint()
&& (isInsertedInstruction(InsertPt)
|| isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = std::next(BasicBlock::iterator(InsertPt));
+ InsertPt = &*std::next(InsertPt->getIterator());
}
break;
}
// Check to see if we already expanded this here.
- std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator
- I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
if (I != InsertedExpressions.end())
return I->second;
BuilderType::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+ Builder.SetInsertPoint(InsertPt);
// Expand the expression into instructions.
Value *V = visit(S);
@@ -1677,8 +1674,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
// Emit code for it.
BuilderType::InsertPointGuard Guard(Builder);
- PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr,
- L->getHeader()->begin()));
+ PHINode *V =
+ cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
return V;
}
@@ -1694,10 +1691,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
const TargetTransformInfo *TTI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
- Phis.push_back(Phi);
+ for (auto &I : *L->getHeader()) {
+ if (auto *PN = dyn_cast<PHINode>(&I))
+ Phis.push_back(PN);
+ else
+ break;
}
+
if (TTI)
std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
@@ -1711,13 +1711,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
DenseMap<const SCEV *, PHINode *> ExprToIVMap;
// Process phis from wide to narrow. Map wide phis to their truncation
// so narrow phis can reuse them.
- for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
- PEnd = Phis.end(); PIter != PEnd; ++PIter) {
- PHINode *Phi = *PIter;
+ for (PHINode *Phi : Phis) {
+ auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
+ if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC))
+ return V;
+ if (!SE.isSCEVable(PN->getType()))
+ return nullptr;
+ auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
+ if (!Const)
+ return nullptr;
+ return Const->getValue();
+ };
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
+ if (Value *V = SimplifyPHINode(Phi)) {
+ if (V->getType() != Phi->getType())
+ continue;
Phi->replaceAllUsesWith(V);
DeadInsts.emplace_back(Phi);
++NumElim;
@@ -1784,7 +1794,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
if (OrigInc->getType() != IsomorphicInc->getType()) {
Instruction *IP = nullptr;
if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
- IP = PN->getParent()->getFirstInsertionPt();
+ IP = &*PN->getParent()->getFirstInsertionPt();
else
IP = OrigInc->getNextNode();
@@ -1802,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
- IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
+ IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
}
@@ -1812,8 +1822,46 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
+Value *SCEVExpander::findExistingExpansion(const SCEV *S,
+ const Instruction *At, Loop *L) {
+ using namespace llvm::PatternMatch;
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Look for suitable value in simple conditions at the loop exits.
+ for (BasicBlock *BB : ExitingBlocks) {
+ ICmpInst::Predicate Pred;
+ Instruction *LHS, *RHS;
+ BasicBlock *TrueBB, *FalseBB;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+ TrueBB, FalseBB)))
+ continue;
+
+ if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+ return LHS;
+
+ if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+ return RHS;
+ }
+
+ // There is potential to make this significantly smarter, but this simple
+ // heuristic already gets some interesting cases.
+
+ // Can not find suitable value.
+ return nullptr;
+}
+
bool SCEVExpander::isHighCostExpansionHelper(
- const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+ const SCEV *S, Loop *L, const Instruction *At,
+ SmallPtrSetImpl<const SCEV *> &Processed) {
+
+ // If we can find an existing value for this scev avaliable at the point "At"
+ // then consider the expression cheap.
+ if (At && findExistingExpansion(S, At, L) != nullptr)
+ return false;
// Zero/One operand expressions
switch (S->getSCEVType()) {
@@ -1821,14 +1869,14 @@ bool SCEVExpander::isHighCostExpansionHelper(
case scConstant:
return false;
case scTruncate:
- return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
- Processed);
+ return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(),
+ L, At, Processed);
case scZeroExtend:
return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
- L, Processed);
+ L, At, Processed);
case scSignExtend:
return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
- L, Processed);
+ L, At, Processed);
}
if (!Processed.insert(S).second)
@@ -1836,10 +1884,10 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
// If the divisor is a power of two and the SCEV type fits in a native
- // integer, consider the divison cheap irrespective of whether it occurs in
+ // integer, consider the division cheap irrespective of whether it occurs in
// the user code since it can be lowered into a right shift.
if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
- if (SC->getValue()->getValue().isPowerOf2()) {
+ if (SC->getAPInt().isPowerOf2()) {
const DataLayout &DL =
L->getHeader()->getParent()->getParent()->getDataLayout();
unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
@@ -1855,22 +1903,14 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (!ExitingBB)
return true;
- BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
- if (!ExitingBI || !ExitingBI->isConditional())
+ // At the beginning of this function we already tried to find existing value
+ // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
+ // involving division. This is just a simple search heuristic.
+ if (!At)
+ At = &ExitingBB->back();
+ if (!findExistingExpansion(
+ SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
return true;
-
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
- if (!OrigCond)
- return true;
-
- const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
- RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
- if (RHS != S) {
- const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
- LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
- if (LHS != S)
- return true;
- }
}
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
@@ -1882,11 +1922,9 @@ bool SCEVExpander::isHighCostExpansionHelper(
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- if (isHighCostExpansionHelper(*I, L, Processed))
+ for (auto *Op : NAry->operands())
+ if (isHighCostExpansionHelper(Op, L, At, Processed))
return true;
- }
}
// If we haven't recognized an expensive SCEV pattern, assume it's an
@@ -1894,6 +1932,43 @@ bool SCEVExpander::isHighCostExpansionHelper(
return false;
}
+Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
+ Instruction *IP) {
+ assert(IP);
+ switch (Pred->getKind()) {
+ case SCEVPredicate::P_Union:
+ return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
+ case SCEVPredicate::P_Equal:
+ return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
+ }
+ llvm_unreachable("Unknown SCEV predicate type");
+}
+
+Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
+ Instruction *IP) {
+ Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
+ Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+
+ Builder.SetInsertPoint(IP);
+ auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
+ return I;
+}
+
+Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
+ Instruction *IP) {
+ auto *BoolType = IntegerType::get(IP->getContext(), 1);
+ Value *Check = ConstantInt::getNullValue(BoolType);
+
+ // Loop over all checks in this set.
+ for (auto Pred : Union->getPredicates()) {
+ auto *NextCheck = expandCodeForPredicate(Pred, IP);
+ Builder.SetInsertPoint(IP);
+ Check = Builder.CreateOr(Check, NextCheck);
+ }
+
+ return Check;
+}
+
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index b238fe4..b7fd5d5 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -109,7 +109,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
SmallVector<const SCEV *, 8> Operands;
const Loop *L = AR->getLoop();
// The addrec conceptually uses its operands at loop entry.
- Instruction *LUser = L->getHeader()->begin();
+ Instruction *LUser = &L->getHeader()->front();
// Transform each operand.
for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
I != E; ++I) {
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index a5fca3e..029997a 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -32,22 +32,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+
using namespace llvm;
// A handy option for disabling scoped no-alias functionality. The same effect
// can also be achieved by stripping the associated metadata tags from IR, but
// this option is sometimes more convenient.
-static cl::opt<bool>
-EnableScopedNoAlias("enable-scoped-noalias", cl::init(true));
+static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
+ cl::init(true));
namespace {
/// AliasScopeNode - This is a simple wrapper around an MDNode which provides
@@ -57,7 +58,7 @@ class AliasScopeNode {
const MDNode *Node;
public:
- AliasScopeNode() : Node(0) {}
+ AliasScopeNode() : Node(nullptr) {}
explicit AliasScopeNode(const MDNode *N) : Node(N) {}
/// getNode - Get the MDNode for this AliasScopeNode.
@@ -70,79 +71,74 @@ public:
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
};
+} // end of anonymous namespace
-/// ScopedNoAliasAA - This is a simple alias analysis
-/// implementation that uses scoped-noalias metadata to answer queries.
-class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis {
-public:
- static char ID; // Class identification, replacement for typeinfo
- ScopedNoAliasAA() : ImmutablePass(ID) {
- initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
- }
+AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ if (!EnableScopedNoAlias)
+ return AAResultBase::alias(LocA, LocB);
- bool doInitialization(Module &M) override;
+ // Get the attached MDNodes.
+ const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
+ const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias;
-protected:
- bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
- void collectMDInDomain(const MDNode *List, const MDNode *Domain,
- SmallPtrSetImpl<const MDNode *> &Nodes) const;
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override;
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
- ModRefBehavior getModRefBehavior(const Function *F) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
-};
-} // End of anonymous namespace
+ if (!mayAliasInScopes(AScopes, BNoAlias))
+ return NoAlias;
-// Register this pass...
-char ScopedNoAliasAA::ID = 0;
-INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias",
- "Scoped NoAlias Alias Analysis", false, true, false)
+ if (!mayAliasInScopes(BScopes, ANoAlias))
+ return NoAlias;
-ImmutablePass *llvm::createScopedNoAliasAAPass() {
- return new ScopedNoAliasAA();
+ // If they may alias, chain to the next AliasAnalysis.
+ return AAResultBase::alias(LocA, LocB);
}
-bool ScopedNoAliasAA::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
+ if (!EnableScopedNoAlias)
+ return AAResultBase::getModRefInfo(CS, Loc);
+
+ if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
+ LLVMContext::MD_noalias)))
+ return MRI_NoModRef;
+
+ if (!mayAliasInScopes(
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ Loc.AATags.NoAlias))
+ return MRI_NoModRef;
+
+ return AAResultBase::getModRefInfo(CS, Loc);
}
-void
-ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableScopedNoAlias)
+ return AAResultBase::getModRefInfo(CS1, CS2);
+
+ if (!mayAliasInScopes(
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return MRI_NoModRef;
+
+ if (!mayAliasInScopes(
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return MRI_NoModRef;
+
+ return AAResultBase::getModRefInfo(CS1, CS2);
}
-void
-ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain,
- SmallPtrSetImpl<const MDNode *> &Nodes) const {
+void ScopedNoAliasAAResult::collectMDInDomain(
+ const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const {
for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)
if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))
if (AliasScopeNode(MD).getDomain() == Domain)
Nodes.insert(MD);
}
-bool
-ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
- const MDNode *NoAlias) const {
+bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
+ const MDNode *NoAlias) const {
if (!Scopes || !NoAlias)
return true;
@@ -177,76 +173,40 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
return true;
}
-AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
- if (!EnableScopedNoAlias)
- return AliasAnalysis::alias(LocA, LocB);
-
- // Get the attached MDNodes.
- const MDNode *AScopes = LocA.AATags.Scope,
- *BScopes = LocB.AATags.Scope;
+ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ return ScopedNoAliasAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
- const MDNode *ANoAlias = LocA.AATags.NoAlias,
- *BNoAlias = LocB.AATags.NoAlias;
+char ScopedNoAliasAA::PassID;
- if (!mayAliasInScopes(AScopes, BNoAlias))
- return NoAlias;
-
- if (!mayAliasInScopes(BScopes, ANoAlias))
- return NoAlias;
+char ScopedNoAliasAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+ "Scoped NoAlias Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+ "Scoped NoAlias Alias Analysis", false, true)
- // If they may alias, chain to the next AliasAnalysis.
- return AliasAnalysis::alias(LocA, LocB);
+ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() {
+ return new ScopedNoAliasAAWrapperPass();
}
-bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) {
+ initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
-AliasAnalysis::ModRefBehavior
-ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) {
- return AliasAnalysis::getModRefBehavior(CS);
+bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(new ScopedNoAliasAAResult(
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
}
-AliasAnalysis::ModRefBehavior
-ScopedNoAliasAA::getModRefBehavior(const Function *F) {
- return AliasAnalysis::getModRefBehavior(F);
+bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
}
-AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
- if (!EnableScopedNoAlias)
- return AliasAnalysis::getModRefInfo(CS, Loc);
-
- if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
- LLVMContext::MD_noalias)))
- return NoModRef;
-
- if (!mayAliasInScopes(
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- Loc.AATags.NoAlias))
- return NoModRef;
-
- return AliasAnalysis::getModRefInfo(CS, Loc);
-}
-
-AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
- if (!EnableScopedNoAlias)
- return AliasAnalysis::getModRefInfo(CS1, CS2);
-
- if (!mayAliasInScopes(
- CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
- return NoModRef;
-
- if (!mayAliasInScopes(
- CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
- return NoModRef;
-
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
-
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
index edd82f5..f5a927b 100644
--- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -328,17 +328,17 @@ void SparseSolver::Solve(Function &F) {
void SparseSolver::Print(Function &F, raw_ostream &OS) const {
OS << "\nFUNCTION: " << F.getName() << "\n";
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BBExecutable.count(BB))
+ for (auto &BB : F) {
+ if (!BBExecutable.count(&BB))
OS << "INFEASIBLE: ";
OS << "\t";
- if (BB->hasName())
- OS << BB->getName() << ":\n";
+ if (BB.hasName())
+ OS << BB.getName() << ":\n";
else
OS << "; anon bb\n";
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- LatticeFunc->PrintValue(getLatticeState(I), OS);
- OS << *I << "\n";
+ for (auto &I : BB) {
+ LatticeFunc->PrintValue(getLatticeState(&I), OS);
+ OS << I << "\n";
}
OS << "\n";
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 635c50c..e00f4ae 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -61,10 +61,19 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
#endif // !NDEBUG
+ if (T.getArch() == Triple::r600 ||
+ T.getArch() == Triple::amdgcn) {
+ TLI.setUnavailable(LibFunc::ldexp);
+ TLI.setUnavailable(LibFunc::ldexpf);
+ TLI.setUnavailable(LibFunc::ldexpl);
+ }
+
// There are no library implementations of mempcy and memset for AMD gpus and
// these can be difficult to lower in the backend.
if (T.getArch() == Triple::r600 ||
- T.getArch() == Triple::amdgcn) {
+ T.getArch() == Triple::amdgcn ||
+ T.getArch() == Triple::wasm32 ||
+ T.getArch() == Triple::wasm64) {
TLI.setUnavailable(LibFunc::memcpy);
TLI.setUnavailable(LibFunc::memset);
TLI.setUnavailable(LibFunc::memset_pattern16);
@@ -72,13 +81,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
// memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+ // All versions of watchOS support it.
if (T.isMacOSX()) {
if (T.isMacOSXVersionLT(10, 5))
TLI.setUnavailable(LibFunc::memset_pattern16);
} else if (T.isiOS()) {
if (T.isOSVersionLT(3, 0))
TLI.setUnavailable(LibFunc::memset_pattern16);
- } else {
+ } else if (!T.isWatchOS()) {
TLI.setUnavailable(LibFunc::memset_pattern16);
}
@@ -286,8 +296,13 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
break;
case Triple::IOS:
+ case Triple::TvOS:
+ case Triple::WatchOS:
TLI.setUnavailable(LibFunc::exp10l);
- if (T.isOSVersionLT(7, 0)) {
+ if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
+ (T.isOSVersionLT(9, 0) &&
+ (T.getArch() == Triple::x86 ||
+ T.getArch() == Triple::x86_64)))) {
TLI.setUnavailable(LibFunc::exp10);
TLI.setUnavailable(LibFunc::exp10f);
} else {
@@ -311,12 +326,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
// Linux (GLIBC):
// http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
- // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c
// http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
switch (T.getOS()) {
case Triple::Darwin:
case Triple::MacOSX:
case Triple::IOS:
+ case Triple::TvOS:
+ case Triple::WatchOS:
case Triple::FreeBSD:
case Triple::Linux:
break;
@@ -325,9 +342,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
}
// ffsll is available on at least FreeBSD and Linux (GLIBC):
- // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c
// http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::TvOS:
+ case Triple::WatchOS:
case Triple::FreeBSD:
case Triple::Linux:
break;
@@ -335,6 +357,16 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::ffsll);
}
+ // The following functions are available on at least FreeBSD:
+ // http://svn.freebsd.org/base/head/lib/libc/string/fls.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c
+ // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c
+ if (!T.isOSFreeBSD()) {
+ TLI.setUnavailable(LibFunc::fls);
+ TLI.setUnavailable(LibFunc::flsl);
+ TLI.setUnavailable(LibFunc::flsll);
+ }
+
// The following functions are available on at least Linux:
if (!T.isOSLinux()) {
TLI.setUnavailable(LibFunc::dunder_strdup);
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7d1c3fb..9c1d3fd 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -46,30 +46,37 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
return *this;
}
-unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
- Type *OpTy) const {
- return TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
+ Type *OpTy) const {
+ int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
- int NumArgs) const {
- return TTIImpl->getCallCost(FTy, NumArgs);
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
+ int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const {
- return TTIImpl->getCallCost(F, Arguments);
+int TargetTransformInfo::getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) const {
+ int Cost = TTIImpl->getCallCost(F, Arguments);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const {
- return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+int TargetTransformInfo::getIntrinsicCost(
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
+ int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getUserCost(const User *U) const {
- return TTIImpl->getUserCost(U);
+int TargetTransformInfo::getUserCost(const User *U) const {
+ int Cost = TTIImpl->getUserCost(U);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
bool TargetTransformInfo::hasBranchDivergence() const {
@@ -106,14 +113,20 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
Scale, AddrSpace);
}
-bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
- int Consecutive) const {
- return TTIImpl->isLegalMaskedStore(DataType, Consecutive);
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
+ return TTIImpl->isLegalMaskedStore(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
+ return TTIImpl->isLegalMaskedLoad(DataType);
}
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
- int Consecutive) const {
- return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
+bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
+ return TTIImpl->isLegalMaskedGather(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
+ return TTIImpl->isLegalMaskedGather(DataType);
}
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
@@ -121,8 +134,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
bool HasBaseReg,
int64_t Scale,
unsigned AddrSpace) const {
- return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace);
+ int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale, AddrSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -153,6 +168,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
+bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
+ return TTIImpl->enableInterleavedAccessVectorization();
+}
+
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTIImpl->getPopcntSupport(IntTyWidthInBit);
@@ -162,22 +181,30 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
return TTIImpl->haveFastSqrt(Ty);
}
-unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
- return TTIImpl->getFPOpCost(Ty);
+int TargetTransformInfo::getFPOpCost(Type *Ty) const {
+ int Cost = TTIImpl->getFPOpCost(Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
- return TTIImpl->getIntImmCost(Imm, Ty);
+int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCost(Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
- return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
- return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
@@ -192,81 +219,122 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
-unsigned TargetTransformInfo::getArithmeticInstrCost(
+int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) const {
- return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty,
- int Index, Type *SubTp) const {
- return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
+ Type *SubTp) const {
+ int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- return TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
- return TTIImpl->getCFInstrCost(Opcode);
+int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+ int Cost = TTIImpl->getCFInstrCost(Opcode);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
- return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
- return TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ int Cost =
+ TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
+int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ Value *Ptr, bool VariableMask,
+ unsigned Alignment) const {
+ int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getInterleavedMemoryOpCost(
+int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace) const {
- return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned
-TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) const {
- return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys) const {
- return TTIImpl->getCallInstrCost(F, RetTy, Tys);
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
+int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return TTIImpl->getNumberOfParts(Tp);
}
-unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
- bool IsComplex) const {
- return TTIImpl->getAddressComputationCost(Tp, IsComplex);
+int TargetTransformInfo::getAddressComputationCost(Type *Tp,
+ bool IsComplex) const {
+ int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
-unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) const {
- return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) const {
+ int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
}
unsigned
@@ -284,9 +352,9 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
-bool TargetTransformInfo::hasCompatibleFunctionAttributes(
- const Function *Caller, const Function *Callee) const {
- return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee);
+bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ return TTIImpl->areInlineCompatible(Caller, Callee);
}
TargetTransformInfo::Concept::~Concept() {}
@@ -294,16 +362,16 @@ TargetTransformInfo::Concept::~Concept() {}
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
TargetIRAnalysis::TargetIRAnalysis(
- std::function<Result(Function &)> TTICallback)
+ std::function<Result(const Function &)> TTICallback)
: TTICallback(TTICallback) {}
-TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
+TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) {
return TTICallback(F);
}
char TargetIRAnalysis::PassID;
-TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
return Result(F.getParent()->getDataLayout());
}
@@ -327,7 +395,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
*PassRegistry::getPassRegistry());
}
-TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) {
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
TTI = TIRA.run(F);
return *TTI;
}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 4e9c6f6..805f3ef 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -121,15 +121,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SetVector.h"
using namespace llvm;
// A handy option for disabling TBAA functionality. The same effect can also be
@@ -138,199 +136,138 @@ using namespace llvm;
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
namespace {
- /// TBAANode - This is a simple wrapper around an MDNode which provides a
- /// higher-level interface by hiding the details of how alias analysis
- /// information is encoded in its operands.
- class TBAANode {
- const MDNode *Node;
-
- public:
- TBAANode() : Node(nullptr) {}
- explicit TBAANode(const MDNode *N) : Node(N) {}
-
- /// getNode - Get the MDNode for this TBAANode.
- const MDNode *getNode() const { return Node; }
-
- /// getParent - Get this TBAANode's Alias tree parent.
- TBAANode getParent() const {
- if (Node->getNumOperands() < 2)
- return TBAANode();
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
- if (!P)
- return TBAANode();
- // Ok, this node has a valid parent. Return it.
- return TBAANode(P);
- }
-
- /// TypeIsImmutable - Test if this TBAANode represents a type for objects
- /// which are not modified (by any means) in the context where this
- /// AliasAnalysis is relevant.
- bool TypeIsImmutable() const {
- if (Node->getNumOperands() < 3)
- return false;
- ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
- if (!CI)
- return false;
- return CI->getValue()[0];
- }
- };
-
- /// This is a simple wrapper around an MDNode which provides a
- /// higher-level interface by hiding the details of how alias analysis
- /// information is encoded in its operands.
- class TBAAStructTagNode {
- /// This node should be created with createTBAAStructTagNode.
- const MDNode *Node;
+/// TBAANode - This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAANode {
+ const MDNode *Node;
+
+public:
+ TBAANode() : Node(nullptr) {}
+ explicit TBAANode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias tree parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
- public:
- explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+};
- /// Get the MDNode for this TBAAStructTagNode.
- const MDNode *getNode() const { return Node; }
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTagNode {
+ /// This node should be created with createTBAAStructTagNode.
+ const MDNode *Node;
- const MDNode *getBaseType() const {
- return dyn_cast_or_null<MDNode>(Node->getOperand(0));
- }
- const MDNode *getAccessType() const {
- return dyn_cast_or_null<MDNode>(Node->getOperand(1));
- }
- uint64_t getOffset() const {
- return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
- }
- /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
- /// objects which are not modified (by any means) in the context where this
- /// AliasAnalysis is relevant.
- bool TypeIsImmutable() const {
- if (Node->getNumOperands() < 4)
- return false;
- ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
- if (!CI)
- return false;
- return CI->getValue()[0];
- }
- };
-
- /// This is a simple wrapper around an MDNode which provides a
- /// higher-level interface by hiding the details of how alias analysis
- /// information is encoded in its operands.
- class TBAAStructTypeNode {
- /// This node should be created with createTBAAStructTypeNode.
- const MDNode *Node;
-
- public:
- TBAAStructTypeNode() : Node(nullptr) {}
- explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
-
- /// Get the MDNode for this TBAAStructTypeNode.
- const MDNode *getNode() const { return Node; }
-
- /// Get this TBAAStructTypeNode's field in the type DAG with
- /// given offset. Update the offset to be relative to the field type.
- TBAAStructTypeNode getParent(uint64_t &Offset) const {
- // Parent can be omitted for the root node.
- if (Node->getNumOperands() < 2)
- return TBAAStructTypeNode();
+public:
+ explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
- // Fast path for a scalar type node and a struct type node with a single
- // field.
- if (Node->getNumOperands() <= 3) {
- uint64_t Cur = Node->getNumOperands() == 2
- ? 0
- : mdconst::extract<ConstantInt>(Node->getOperand(2))
- ->getZExtValue();
- Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
- if (!P)
- return TBAAStructTypeNode();
- return TBAAStructTypeNode(P);
- }
+ /// Get the MDNode for this TBAAStructTagNode.
+ const MDNode *getNode() const { return Node; }
- // Assume the offsets are in order. We return the previous field if
- // the current offset is bigger than the given offset.
- unsigned TheIdx = 0;
- for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
- uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
- ->getZExtValue();
- if (Cur > Offset) {
- assert(Idx >= 3 &&
- "TBAAStructTypeNode::getParent should have an offset match!");
- TheIdx = Idx - 2;
- break;
- }
- }
- // Move along the last field.
- if (TheIdx == 0)
- TheIdx = Node->getNumOperands() - 2;
- uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
- ->getZExtValue();
+ const MDNode *getBaseType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+ }
+ const MDNode *getAccessType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+ uint64_t getOffset() const {
+ return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ }
+ /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+ /// objects which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 4)
+ return false;
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+};
+
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTypeNode {
+ /// This node should be created with createTBAAStructTypeNode.
+ const MDNode *Node;
+
+public:
+ TBAAStructTypeNode() : Node(nullptr) {}
+ explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this TBAAStructTypeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// Get this TBAAStructTypeNode's field in the type DAG with
+ /// given offset. Update the offset to be relative to the field type.
+ TBAAStructTypeNode getParent(uint64_t &Offset) const {
+ // Parent can be omitted for the root node.
+ if (Node->getNumOperands() < 2)
+ return TBAAStructTypeNode();
+
+ // Fast path for a scalar type node and a struct type node with a single
+ // field.
+ if (Node->getNumOperands() <= 3) {
+ uint64_t Cur = Node->getNumOperands() == 2
+ ? 0
+ : mdconst::extract<ConstantInt>(Node->getOperand(2))
+ ->getZExtValue();
Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
return TBAAStructTypeNode();
return TBAAStructTypeNode(P);
}
- };
-}
-
-namespace {
- /// TypeBasedAliasAnalysis - This is a simple alias analysis
- /// implementation that uses TypeBased to answer queries.
- class TypeBasedAliasAnalysis : public ImmutablePass,
- public AliasAnalysis {
- public:
- static char ID; // Class identification, replacement for typeinfo
- TypeBasedAliasAnalysis() : ImmutablePass(ID) {
- initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
- bool doInitialization(Module &M) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
+ // Assume the offsets are in order. We return the previous field if
+ // the current offset is bigger than the given offset.
+ unsigned TheIdx = 0;
+ for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
+ ->getZExtValue();
+ if (Cur > Offset) {
+ assert(Idx >= 3 &&
+ "TBAAStructTypeNode::getParent should have an offset match!");
+ TheIdx = Idx - 2;
+ break;
+ }
}
-
- bool Aliases(const MDNode *A, const MDNode *B) const;
- bool PathAliases(const MDNode *A, const MDNode *B) const;
-
- private:
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override;
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
- ModRefBehavior getModRefBehavior(const Function *F) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char TypeBasedAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
- "Type-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
- return new TypeBasedAliasAnalysis();
-}
-
-bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
- InitializeAliasAnalysis(this, &M.getDataLayout());
- return true;
-}
-
-void
-TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
+ // Move along the last field.
+ if (TheIdx == 0)
+ TheIdx = Node->getNumOperands() - 2;
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
+ ->getZExtValue();
+ Offset -= Cur;
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
+ }
+};
}
/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
@@ -342,145 +279,36 @@ static bool isStructPathTBAA(const MDNode *MD) {
return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
}
-/// Aliases - Test whether the type represented by A may alias the
-/// type represented by B.
-bool
-TypeBasedAliasAnalysis::Aliases(const MDNode *A,
- const MDNode *B) const {
- // Make sure that both MDNodes are struct-path aware.
- if (isStructPathTBAA(A) && isStructPathTBAA(B))
- return PathAliases(A, B);
-
- // Keep track of the root node for A and B.
- TBAANode RootA, RootB;
-
- // Climb the tree from A to see if we reach B.
- for (TBAANode T(A); ; ) {
- if (T.getNode() == B)
- // B is an ancestor of A.
- return true;
-
- RootA = T;
- T = T.getParent();
- if (!T.getNode())
- break;
- }
-
- // Climb the tree from B to see if we reach A.
- for (TBAANode T(B); ; ) {
- if (T.getNode() == A)
- // A is an ancestor of B.
- return true;
-
- RootB = T;
- T = T.getParent();
- if (!T.getNode())
- break;
- }
-
- // Neither node is an ancestor of the other.
-
- // If they have different roots, they're part of different potentially
- // unrelated type systems, so we must be conservative.
- if (RootA.getNode() != RootB.getNode())
- return true;
-
- // If they have the same root, then we've proved there's no alias.
- return false;
-}
-
-/// Test whether the struct-path tag represented by A may alias the
-/// struct-path tag represented by B.
-bool
-TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
- const MDNode *B) const {
- // Verify that both input nodes are struct-path aware.
- assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
- assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
-
- // Keep track of the root node for A and B.
- TBAAStructTypeNode RootA, RootB;
- TBAAStructTagNode TagA(A), TagB(B);
-
- // TODO: We need to check if AccessType of TagA encloses AccessType of
- // TagB to support aggregate AccessType. If yes, return true.
-
- // Start from the base type of A, follow the edge with the correct offset in
- // the type DAG and adjust the offset until we reach the base type of B or
- // until we reach the Root node.
- // Compare the adjusted offset once we have the same base.
-
- // Climb the type DAG from base type of A to see if we reach base type of B.
- const MDNode *BaseA = TagA.getBaseType();
- const MDNode *BaseB = TagB.getBaseType();
- uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
- for (TBAAStructTypeNode T(BaseA); ; ) {
- if (T.getNode() == BaseB)
- // Base type of A encloses base type of B, check if the offsets match.
- return OffsetA == OffsetB;
-
- RootA = T;
- // Follow the edge with the correct offset, OffsetA will be adjusted to
- // be relative to the field type.
- T = T.getParent(OffsetA);
- if (!T.getNode())
- break;
- }
-
- // Reset OffsetA and climb the type DAG from base type of B to see if we reach
- // base type of A.
- OffsetA = TagA.getOffset();
- for (TBAAStructTypeNode T(BaseB); ; ) {
- if (T.getNode() == BaseA)
- // Base type of B encloses base type of A, check if the offsets match.
- return OffsetA == OffsetB;
-
- RootB = T;
- // Follow the edge with the correct offset, OffsetB will be adjusted to
- // be relative to the field type.
- T = T.getParent(OffsetB);
- if (!T.getNode())
- break;
- }
-
- // Neither node is an ancestor of the other.
-
- // If they have different roots, they're part of different potentially
- // unrelated type systems, so we must be conservative.
- if (RootA.getNode() != RootB.getNode())
- return true;
-
- // If they have the same root, then we've proved there's no alias.
- return false;
-}
-
-AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
if (!EnableTBAA)
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
// Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
// be conservative.
const MDNode *AM = LocA.AATags.TBAA;
- if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ if (!AM)
+ return AAResultBase::alias(LocA, LocB);
const MDNode *BM = LocB.AATags.TBAA;
- if (!BM) return AliasAnalysis::alias(LocA, LocB);
+ if (!BM)
+ return AAResultBase::alias(LocA, LocB);
// If they may alias, chain to the next AliasAnalysis.
if (Aliases(AM, BM))
- return AliasAnalysis::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB);
// Otherwise return a definitive result.
return NoAlias;
}
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
if (!EnableTBAA)
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
const MDNode *M = Loc.AATags.TBAA;
- if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ if (!M)
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
@@ -488,80 +316,82 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
(isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
return true;
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
}
-AliasAnalysis::ModRefBehavior
-TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+FunctionModRefBehavior
+TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
if (!EnableTBAA)
- return AliasAnalysis::getModRefBehavior(CS);
+ return AAResultBase::getModRefBehavior(CS);
- ModRefBehavior Min = UnknownModRefBehavior;
+ FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
(isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
- Min = OnlyReadsMemory;
+ Min = FMRB_OnlyReadsMemory;
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
}
-AliasAnalysis::ModRefBehavior
-TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
// Functions don't have metadata. Just chain to the next implementation.
- return AliasAnalysis::getModRefBehavior(F);
+ return AAResultBase::getModRefBehavior(F);
}
-AliasAnalysis::ModRefResult
-TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) {
if (!EnableTBAA)
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(CS, Loc);
if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M =
CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
- return NoModRef;
+ return MRI_NoModRef;
- return AliasAnalysis::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(CS, Loc);
}
-AliasAnalysis::ModRefResult
-TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
if (!EnableTBAA)
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(CS1, CS2);
if (const MDNode *M1 =
CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 =
CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
- return NoModRef;
+ return MRI_NoModRef;
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(CS1, CS2);
}
bool MDNode::isTBAAVtableAccess() const {
if (!isStructPathTBAA(this)) {
- if (getNumOperands() < 1) return false;
+ if (getNumOperands() < 1)
+ return false;
if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
+ if (Tag1->getString() == "vtable pointer")
+ return true;
}
return false;
}
// For struct-path aware TBAA, we use the access type of the tag.
- if (getNumOperands() < 2) return false;
+ if (getNumOperands() < 2)
+ return false;
MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
- if (!Tag) return false;
+ if (!Tag)
+ return false;
if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
+ if (Tag1->getString() == "vtable pointer")
+ return true;
}
- return false;
+ return false;
}
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
@@ -575,9 +405,11 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
if (StructPath) {
A = cast_or_null<MDNode>(A->getOperand(1));
- if (!A) return nullptr;
+ if (!A)
+ return nullptr;
B = cast_or_null<MDNode>(B->getOperand(1));
- if (!B) return nullptr;
+ if (!B)
+ return nullptr;
}
SmallSetVector<MDNode *, 4> PathA;
@@ -604,7 +436,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
int IB = PathB.size() - 1;
MDNode *Ret = nullptr;
- while (IA >= 0 && IB >=0) {
+ while (IA >= 0 && IB >= 0) {
if (PathA[IA] == PathB[IB])
Ret = PathA[IA];
else
@@ -644,3 +476,147 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
N.NoAlias = getMetadata(LLVMContext::MD_noalias);
}
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
+ // Make sure that both MDNodes are struct-path aware.
+ if (isStructPathTBAA(A) && isStructPathTBAA(B))
+ return PathAliases(A, B);
+
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A);;) {
+ if (T.getNode() == B)
+ // B is an ancestor of A.
+ return true;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B);;) {
+ if (T.getNode() == A)
+ // A is an ancestor of B.
+ return true;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
+ // Verify that both input nodes are struct-path aware.
+ assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
+ assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+
+ // Keep track of the root node for A and B.
+ TBAAStructTypeNode RootA, RootB;
+ TBAAStructTagNode TagA(A), TagB(B);
+
+ // TODO: We need to check if AccessType of TagA encloses AccessType of
+ // TagB to support aggregate AccessType. If yes, return true.
+
+ // Start from the base type of A, follow the edge with the correct offset in
+ // the type DAG and adjust the offset until we reach the base type of B or
+ // until we reach the Root node.
+ // Compare the adjusted offset once we have the same base.
+
+ // Climb the type DAG from base type of A to see if we reach base type of B.
+ const MDNode *BaseA = TagA.getBaseType();
+ const MDNode *BaseB = TagB.getBaseType();
+ uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+ for (TBAAStructTypeNode T(BaseA);;) {
+ if (T.getNode() == BaseB)
+ // Base type of A encloses base type of B, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootA = T;
+ // Follow the edge with the correct offset, OffsetA will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetA);
+ if (!T.getNode())
+ break;
+ }
+
+ // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+ // base type of A.
+ OffsetA = TagA.getOffset();
+ for (TBAAStructTypeNode T(BaseB);;) {
+ if (T.getNode() == BaseA)
+ // Base type of B encloses base type of A, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootB = T;
+ // Follow the edge with the correct offset, OffsetB will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetB);
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> *AM) {
+ return TypeBasedAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char TypeBasedAA::PassID;
+
+char TypeBasedAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa",
+ "Type-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
+ false, true)
+
+ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
+ return new TypeBasedAAWrapperPass();
+}
+
+TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
+ initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
+ Result.reset(new TypeBasedAAResult(
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ return false;
+}
+
+bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
+ Result.reset();
+ return false;
+}
+
+void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index fa0d779..314ec9c 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -43,7 +44,7 @@ const unsigned MaxDepth = 6;
/// Enable an experimental feature to leverage information about dominating
/// conditions to compute known bits. The individual options below control how
-/// hard we search. The defaults are choosen to be fairly aggressive. If you
+/// hard we search. The defaults are chosen to be fairly aggressive. If you
/// run into compile time problems when testing, scale them back and report
/// your findings.
static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
@@ -58,12 +59,12 @@ static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
/// conditions?
static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
cl::Hidden,
- cl::init(20000));
+ cl::init(20));
// Controls the number of uses of the value searched for possible
// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
- cl::Hidden, cl::init(2000));
+ cl::Hidden, cl::init(20));
// If true, don't consider only compares whose only use is a branch.
static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
@@ -185,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
+bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ bool NonNegative, Negative;
+ ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
+ return NonNegative;
+}
+
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ const Query &Q);
+
+bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::isKnownNonEqual(V1, V2, DL, Query(AC,
+ safeCxtI(V1, safeCxtI(V2, CxtI)),
+ DT));
+}
+
static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
unsigned Depth, const Query &Q);
@@ -320,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
}
// If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
+ // Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
KnownOne.clearAllBits();
@@ -347,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
}
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
- APInt &KnownZero) {
+ APInt &KnownZero,
+ APInt &KnownOne) {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned NumRanges = Ranges.getNumOperands() / 2;
assert(NumRanges >= 1);
- // Use the high end of the ranges to find leading zeros.
- unsigned MinLeadingZeros = BitWidth;
+ KnownZero.setAllBits();
+ KnownOne.setAllBits();
+
for (unsigned i = 0; i < NumRanges; ++i) {
ConstantInt *Lower =
mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
ConstantInt *Upper =
mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
ConstantRange Range(Lower->getValue(), Upper->getValue());
- if (Range.isWrappedSet())
- MinLeadingZeros = 0; // -1 has no zeros
- unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros();
- MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros);
- }
- KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
+ // The first CommonPrefixBits of all values in Range are equal.
+ unsigned CommonPrefixBits =
+ (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
+
+ APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
+ KnownOne &= Range.getUnsignedMax() & Mask;
+ KnownZero &= ~Range.getUnsignedMax() & Mask;
+ }
}
static bool isEphemeralValueOf(Instruction *I, const Value *E) {
@@ -374,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
SmallPtrSet<const Value *, 32> Visited;
SmallPtrSet<const Value *, 16> EphValues;
+ // The instruction defining an assumption's condition itself is always
+ // considered ephemeral to that assumption (even if it has other
+ // non-ephemeral users). See r246696's test case for an example.
+ if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+ return true;
+
while (!WorkSet.empty()) {
const Value *V = WorkSet.pop_back_val();
if (!Visited.insert(V).second)
continue;
// If all uses of this value are ephemeral, then so is this value.
- bool FoundNEUse = false;
- for (const User *I : V->users())
- if (!EphValues.count(I)) {
- FoundNEUse = true;
- break;
- }
-
- if (!FoundNEUse) {
+ if (std::all_of(V->user_begin(), V->user_end(),
+ [&](const User *U) { return EphValues.count(U); })) {
if (V == E)
return true;
@@ -447,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -464,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
// of the block); the common case is that the assume will come first.
for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
IE = Inv->getParent()->end(); I != IE; ++I)
- if (I == Q.CxtI)
+ if (&*I == Q.CxtI)
return true;
// The context must come first...
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -601,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
if (!Q.DT || !Q.CxtI)
return;
Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+ // The context instruction might be in a statically unreachable block. If
+ // so, asking dominator queries may yield suprising results. (e.g. the block
+ // may not have a dom tree node)
+ if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
+ return;
// Avoid useless work
if (auto VI = dyn_cast<Instruction>(V))
@@ -647,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
// instruction. Finding a condition where one path dominates the context
// isn't enough because both the true and false cases could merge before
// the context instruction we're actually interested in. Instead, we need
- // to ensure that the taken *edge* dominates the context instruction.
+ // to ensure that the taken *edge* dominates the context instruction. We
+ // know that the edge must be reachable since we started from a reachable
+ // block.
BasicBlock *BB0 = BI->getSuccessor(0);
BasicBlockEdge Edge(BI->getParent(), BB0);
if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
@@ -941,6 +972,90 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
}
+// Compute known bits from a shift operator, including those with a
+// non-constant shift amount. KnownZero and KnownOne are the outputs of this
+// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
+// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
+// functors that, given the known-zero or known-one bits respectively, and a
+// shift amount, compute the implied known-zero or known-one bits of the shift
+// operator's result respectively for that shift amount. The results from calling
+// KZF and KOF are conservatively combined for all permitted shift amounts.
+template <typename KZFunctor, typename KOFunctor>
+static void computeKnownBitsFromShiftOperator(Operator *I,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const DataLayout &DL, unsigned Depth, const Query &Q,
+ KZFunctor KZF, KOFunctor KOF) {
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+ KnownZero = KZF(KnownZero, ShiftAmt);
+ KnownOne = KOF(KnownOne, ShiftAmt);
+ return;
+ }
+
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+
+ // Note: We cannot use KnownZero.getLimitedValue() here, because if
+ // BitWidth > 64 and any upper bits are known, we'll end up returning the
+ // limit value (which implies all bits are known).
+ uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
+ uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+
+ // It would be more-clearly correct to use the two temporaries for this
+ // calculation. Reusing the APInts here to prevent unnecessary allocations.
+ KnownZero.clearAllBits(), KnownOne.clearAllBits();
+
+ // If we know the shifter operand is nonzero, we can sometimes infer more
+ // known bits. However this is expensive to compute, so be lazy about it and
+ // only compute it when absolutely necessary.
+ Optional<bool> ShifterOperandIsNonZero;
+
+ // Early exit if we can't constrain any well-defined shift amount.
+ if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
+ ShifterOperandIsNonZero =
+ isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+ if (!*ShifterOperandIsNonZero)
+ return;
+ }
+
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
+ // Combine the shifted known input bits only for those shift amounts
+ // compatible with its known constraints.
+ if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
+ continue;
+ if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
+ continue;
+ // If we know the shifter is nonzero, we may be able to infer more known
+ // bits. This check is sunk down as far as possible to avoid the expensive
+ // call to isKnownNonZero if the cheaper checks above fail.
+ if (ShiftAmt == 0) {
+ if (!ShifterOperandIsNonZero.hasValue())
+ ShifterOperandIsNonZero =
+ isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+ if (*ShifterOperandIsNonZero)
+ continue;
+ }
+
+ KnownZero &= KZF(KnownZero2, ShiftAmt);
+ KnownOne &= KOF(KnownOne2, ShiftAmt);
+ }
+
+ // If there are no compatible shift amounts, then we've proven that the shift
+ // amount must be >= the BitWidth, and the result is undefined. We could
+ // return anything we'd like, but we need to make sure the sets of known bits
+ // stay disjoint (it should be better for some other code to actually
+ // propagate the undef than to pick a value here using known bits).
+ if ((KnownZero & KnownOne) != 0)
+ KnownZero.clearAllBits(), KnownOne.clearAllBits();
+}
+
static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
APInt &KnownOne, const DataLayout &DL,
unsigned Depth, const Query &Q) {
@@ -951,7 +1066,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
default: break;
case Instruction::Load:
if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+ computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
@@ -962,6 +1077,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
KnownOne &= KnownOne2;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
KnownZero |= KnownZero2;
+
+ // and(x, add (x, -1)) is a common idiom that always clears the low bit;
+ // here we handle the more general case of adding any odd number by
+ // matching the form add(x, add(x, y)) where y is odd.
+ // TODO: This could be generalized to clearing any bit set in y where the
+ // following bit is known to be unset in y.
+ Value *Y = nullptr;
+ if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
+ m_Value(Y))) ||
+ match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
+ m_Value(Y)))) {
+ APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0);
+ computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q);
+ if (KnownOne3.countTrailingOnes() > 0)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, 1);
+ }
break;
}
case Instruction::Or: {
@@ -1050,7 +1181,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
case Instruction::BitCast: {
Type *SrcTy = I->getOperand(0)->getType();
- if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
+ SrcTy->isFloatingPointTy()) &&
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
@@ -1077,48 +1209,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
break;
}
- case Instruction::Shl:
+ case Instruction::Shl: {
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
- KnownZero <<= ShiftAmt;
- KnownOne <<= ShiftAmt;
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- }
+ auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+ return (KnownZero << ShiftAmt) |
+ APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+ };
+
+ auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+ return KnownOne << ShiftAmt;
+ };
+
+ computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, DL, Depth, Q,
+ KZF, KOF);
break;
- case Instruction::LShr:
+ }
+ case Instruction::LShr: {
// (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
- // Unsigned shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
- // high bits known zero.
- KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- }
+ auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+ return APIntOps::lshr(KnownZero, ShiftAmt) |
+ // High bits known zero.
+ APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ };
+
+ auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+ return APIntOps::lshr(KnownOne, ShiftAmt);
+ };
+
+ computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, DL, Depth, Q,
+ KZF, KOF);
break;
- case Instruction::AShr:
+ }
+ case Instruction::AShr: {
// (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+ return APIntOps::ashr(KnownZero, ShiftAmt);
+ };
- // Signed shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+ return APIntOps::ashr(KnownOne, ShiftAmt);
+ };
- APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
- KnownZero |= HighBits;
- else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
- KnownOne |= HighBits;
- }
+ computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, DL, Depth, Q,
+ KZF, KOF);
break;
+ }
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
@@ -1336,13 +1474,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
case Instruction::Call:
case Instruction::Invoke:
if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+ computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
// If a range metadata is attached to this IntrinsicInst, intersect the
// explicit range specified by the metadata and the implicit range of
// the intrinsic.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::bswap:
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+ Depth + 1, Q);
+ KnownZero |= KnownZero2.byteSwap();
+ KnownOne |= KnownOne2.byteSwap();
+ break;
case Intrinsic::ctlz:
case Intrinsic::cttz: {
unsigned LowBits = Log2_32(BitWidth)+1;
@@ -1353,8 +1497,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
break;
}
case Intrinsic::ctpop: {
- unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+ Depth + 1, Q);
+ // We can bound the space the count needs. Also, bits known to be zero
+ // can't contribute to the population.
+ unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
+ unsigned LeadingZeros =
+ APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
+ assert(LeadingZeros <= BitWidth);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
+ KnownOne &= ~KnownZero;
+ // TODO: we could bound KnownOne using the lower bound on the number
+ // of bits which might be set provided by popcnt KnownOne2.
+ break;
+ }
+ case Intrinsic::fabs: {
+ Type *Ty = II->getType();
+ APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
+ KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
@@ -1394,6 +1554,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
}
+static unsigned getAlignment(const Value *V, const DataLayout &DL) {
+ unsigned Align = 0;
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
+ Align = GO->getAlignment();
+ if (Align == 0) {
+ if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+ Type *ObjectType = GVar->getType()->getElementType();
+ if (ObjectType->isSized()) {
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (GVar->isStrongDefinitionForLinker())
+ Align = DL.getPreferredAlignment(GVar);
+ else
+ Align = DL.getABITypeAlignment(ObjectType);
+ }
+ }
+ }
+ } else if (const Argument *A = dyn_cast<Argument>(V)) {
+ Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
+
+ if (!Align && A->hasStructRetAttr()) {
+ // An sret parameter has at least the ABI alignment of the return type.
+ Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+ if (EltTy->isSized())
+ Align = DL.getABITypeAlignment(EltTy);
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ Align = AI->getAlignment();
+ else if (auto CS = ImmutableCallSite(V))
+ Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex);
+ else if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+ if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ Align = CI->getLimitedValue();
+ }
+
+ return Align;
+}
+
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
///
@@ -1416,8 +1616,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned BitWidth = KnownZero.getBitWidth();
assert((V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->isFPOrFPVectorTy() ||
V->getType()->getScalarType()->isPointerTy()) &&
- "Not integer or pointer type!");
+ "Not integer, floating point, or pointer type!");
assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
@@ -1454,59 +1655,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
return;
}
- // The address of an aligned GlobalValue has trailing zeros.
- if (auto *GO = dyn_cast<GlobalObject>(V)) {
- unsigned Align = GO->getAlignment();
- if (Align == 0) {
- if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
- Type *ObjectType = GVar->getType()->getElementType();
- if (ObjectType->isSized()) {
- // If the object is defined in the current Module, we'll be giving
- // it the preferred alignment. Otherwise, we have to assume that it
- // may only have the minimum ABI alignment.
- if (GVar->isStrongDefinitionForLinker())
- Align = DL.getPreferredAlignment(GVar);
- else
- Align = DL.getABITypeAlignment(ObjectType);
- }
- }
- }
- if (Align > 0)
- KnownZero = APInt::getLowBitsSet(BitWidth,
- countTrailingZeros(Align));
- else
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
- return;
- }
-
- if (Argument *A = dyn_cast<Argument>(V)) {
- unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
-
- if (!Align && A->hasStructRetAttr()) {
- // An sret parameter has at least the ABI alignment of the return type.
- Type *EltTy = cast<PointerType>(A->getType())->getElementType();
- if (EltTy->isSized())
- Align = DL.getABITypeAlignment(EltTy);
- }
-
- if (Align)
- KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
- else
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
-
- // Don't give up yet... there might be an assumption that provides more
- // information...
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
-
- // Or a dominating condition for that matter
- if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
- computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
- Depth, Q);
- return;
- }
-
// Start out not knowing anything.
KnownZero.clearAllBits(); KnownOne.clearAllBits();
@@ -1525,6 +1673,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Operator *I = dyn_cast<Operator>(V))
computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Aligned pointers have trailing zeros - refine KnownZero set
+ if (V->getType()->isPointerTy()) {
+ unsigned Align = getAlignment(V, DL);
+ if (Align)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+ }
+
// computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
// strictly refines KnownZero and KnownOne. Therefore, we run them after
// computeKnownBitsFromOperator.
@@ -1812,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
if (XKnownNegative)
return true;
+
+ // If the shifter operand is a constant, and all of the bits shifted
+ // out are known to be zero, and X is known non-zero then at least one
+ // non-zero bit must remain.
+ if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+
+ auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
+ // Is there a known one in the portion not shifted out?
+ if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+ return true;
+ // Are all the bits to be shifted out known zero?
+ if (KnownZero.countTrailingOnes() >= ShiftVal)
+ return isKnownNonZero(X, DL, Depth, Q);
+ }
}
// div exact can only produce a zero if the dividend is zero.
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
@@ -1871,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
return true;
}
+ // PHI
+ else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Try and detect a recurrence that monotonically increases from a
+ // starting value, as these are common as induction variables.
+ if (PN->getNumIncomingValues() == 2) {
+ Value *Start = PN->getIncomingValue(0);
+ Value *Induction = PN->getIncomingValue(1);
+ if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
+ std::swap(Start, Induction);
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
+ if (!C->isZero() && !C->isNegative()) {
+ ConstantInt *X;
+ if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+ match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
+ !X->isNegative())
+ return true;
+ }
+ }
+ }
+ }
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
@@ -1879,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
return KnownOne != 0;
}
+/// Return true if V2 == V1 + X, where X is known non-zero.
+static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL,
+ const Query &Q) {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+ if (!BO || BO->getOpcode() != Instruction::Add)
+ return false;
+ Value *Op = nullptr;
+ if (V2 == BO->getOperand(0))
+ Op = BO->getOperand(1);
+ else if (V2 == BO->getOperand(1))
+ Op = BO->getOperand(0);
+ else
+ return false;
+ return isKnownNonZero(Op, DL, 0, Q);
+}
+
+/// Return true if it is known that V1 != V2.
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+ const Query &Q) {
+ if (V1->getType()->isVectorTy() || V1 == V2)
+ return false;
+ if (V1->getType() != V2->getType())
+ // We can't look through casts yet.
+ return false;
+ if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q))
+ return true;
+
+ if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
+ // Are any known bits in V1 contradictory to known bits in V2? If V1
+ // has a known zero where V2 has a known one, they must not be equal.
+ auto BitWidth = Ty->getBitWidth();
+ APInt KnownZero1(BitWidth, 0);
+ APInt KnownOne1(BitWidth, 0);
+ computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q);
+ APInt KnownZero2(BitWidth, 0);
+ APInt KnownOne2(BitWidth, 0);
+ computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q);
+
+ auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1);
+ if (OppositeBits.getBoolValue())
+ return true;
+ }
+ return false;
+}
+
/// Return true if 'V & Mask' is known to be zero. We use this predicate to
/// simplify operations downstream. Mask is known to be zero for bits that V
/// cannot have.
@@ -2545,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
}
// This insert value inserts something else than what we are looking for.
- // See if the (aggregrate) value inserted into has the value we are
+ // See if the (aggregate) value inserted into has the value we are
// looking for, then.
if (*req_idx != *i)
return FindInsertedValue(I->getAggregateOperand(), idx_range,
@@ -2560,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
}
if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
- // If we're extracting a value from an aggregrate that was extracted from
+ // If we're extracting a value from an aggregate that was extracted from
// something else, we can extract from that something else directly instead.
// However, we will need to chain I's indices with the requested indices.
@@ -2935,20 +3173,42 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
}
-/// Return true if Value is always a dereferenceable pointer.
-///
+static bool isAligned(const Value *Base, APInt Offset, unsigned Align,
+ const DataLayout &DL) {
+ APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL));
+
+ if (!BaseAlign) {
+ Type *Ty = Base->getType()->getPointerElementType();
+ if (!Ty->isSized())
+ return false;
+ BaseAlign = DL.getABITypeAlignment(Ty);
+ }
+
+ APInt Alignment(Offset.getBitWidth(), Align);
+
+ assert(Alignment.isPowerOf2() && "must be a power of 2!");
+ return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+}
+
+static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
+ Type *Ty = Base->getType();
+ assert(Ty->isSized() && "must be sized");
+ APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+ return isAligned(Base, Offset, Align, DL);
+}
+
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
-static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
- const Instruction *CtxI,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- SmallPtrSetImpl<const Value *> &Visited) {
+static bool isDereferenceableAndAlignedPointer(
+ const Value *V, unsigned Align, const DataLayout &DL,
+ const Instruction *CtxI, const DominatorTree *DT,
+ const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
- // These are obviously ok.
- if (isa<AllocaInst>(V)) return true;
+ // These are obviously ok if aligned.
+ if (isa<AllocaInst>(V))
+ return isAligned(V, Align, DL);
// It's not always safe to follow a bitcast, for example:
// bitcast i8* (alloca i8) to i32*
@@ -2963,21 +3223,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
if (STy->isSized() && DTy->isSized() &&
(DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
(DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
- return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
- DT, TLI, Visited);
+ return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL,
+ CtxI, DT, TLI, Visited);
}
// Global variables which can't collapse to null are ok.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return !GV->hasExternalWeakLinkage();
+ if (!GV->hasExternalWeakLinkage())
+ return isAligned(V, Align, DL);
// byval arguments are okay.
if (const Argument *A = dyn_cast<Argument>(V))
if (A->hasByValAttr())
- return true;
-
+ return isAligned(V, Align, DL);
+
if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
- return true;
+ return isAligned(V, Align, DL);
// For GEPs, determine if the indexing lands within the allocated object.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -2985,61 +3246,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
Type *Ty = VTy->getPointerElementType();
const Value *Base = GEP->getPointerOperand();
- // Conservatively require that the base pointer be fully dereferenceable.
+ // Conservatively require that the base pointer be fully dereferenceable
+ // and aligned.
if (!Visited.insert(Base).second)
return false;
- if (!isDereferenceablePointer(Base, DL, CtxI,
- DT, TLI, Visited))
+ if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI,
+ Visited))
return false;
-
+
APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
if (!GEP->accumulateConstantOffset(DL, Offset))
return false;
-
- // Check if the load is within the bounds of the underlying object.
+
+ // Check if the load is within the bounds of the underlying object
+ // and offset is aligned.
uint64_t LoadSize = DL.getTypeStoreSize(Ty);
Type *BaseType = Base->getType()->getPointerElementType();
- return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType));
+ assert(isPowerOf2_32(Align) && "must be a power of 2!");
+ return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) &&
+ !(Offset & APInt(Offset.getBitWidth(), Align-1));
}
// For gc.relocate, look through relocations
if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
GCRelocateOperands RelocateInst(I);
- return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
- DT, TLI, Visited);
+ return isDereferenceableAndAlignedPointer(
+ RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
}
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
- return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
- DT, TLI, Visited);
+ return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
+ CtxI, DT, TLI, Visited);
// If we don't know, assume the worst.
return false;
}
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
- const Instruction *CtxI,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI) {
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+ const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
Type *VTy = V->getType();
Type *Ty = VTy->getPointerElementType();
+
+ // Require ABI alignment for loads without alignment specification
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(Ty);
+
if (Ty->isSized()) {
APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-
+
if (Offset.isNonNegative())
- if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
- CtxI, DT, TLI))
+ if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) &&
+ isAligned(BV, Offset, Align, DL))
return true;
}
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
+ return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI,
+ Visited);
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI);
}
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
@@ -3089,10 +3368,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
const LoadInst *LI = cast<LoadInst>(Inst);
if (!LI->isUnordered() ||
// Speculative load may create a race that did not exist in the source.
- LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeThread) ||
+ // Speculative load may load data from dirty regions.
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
- return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
+ return isDereferenceableAndAlignedPointer(
+ LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -3147,16 +3431,27 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Instruction::Switch:
case Instruction::Unreachable:
case Instruction::Fence:
- case Instruction::LandingPad:
case Instruction::AtomicRMW:
case Instruction::AtomicCmpXchg:
+ case Instruction::LandingPad:
case Instruction::Resume:
+ case Instruction::CatchSwitch:
+ case Instruction::CatchPad:
+ case Instruction::CatchRet:
+ case Instruction::CleanupPad:
+ case Instruction::CleanupRet:
return false; // Misc instructions which have effects
}
}
+bool llvm::mayBeMemoryDependent(const Instruction &I) {
+ return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
+}
+
/// Return true if we know that the specified value is never null.
bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
+ assert(V->getType()->isPointerTy() && "V must be pointer type");
+
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -3164,9 +3459,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (const Argument *A = dyn_cast<Argument>(V))
return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
- // Global values are not null unless extern weak.
+ // A global variable in address space 0 is non null unless extern weak.
+ // Other address spaces may have null as a valid address for a global,
+ // so we can't assume anything.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return !GV->hasExternalWeakLinkage();
+ return !GV->hasExternalWeakLinkage() &&
+ GV->getType()->getAddressSpace() == 0;
// A Load tagged w/nonnull metadata is never null.
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
@@ -3186,6 +3484,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
static bool isKnownNonNullFromDominatingCondition(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
+ assert(V->getType()->isPointerTy() && "V must be pointer type");
+
unsigned NumUsesExplored = 0;
for (auto U : V->users()) {
// Avoid massive lists
@@ -3316,40 +3616,339 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
return OverflowResult::MayOverflow;
}
-static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
+static OverflowResult computeOverflowForSignedAdd(
+ Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+ if (Add && Add->hasNoSignedWrap()) {
+ return OverflowResult::NeverOverflows;
+ }
+
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+
+ if ((LHSKnownNonNegative && RHSKnownNegative) ||
+ (LHSKnownNegative && RHSKnownNonNegative)) {
+ // The sign bits are opposite: this CANNOT overflow.
+ return OverflowResult::NeverOverflows;
+ }
+
+ // The remaining code needs Add to be available. Early returns if not so.
+ if (!Add)
+ return OverflowResult::MayOverflow;
+
+ // If the sign of Add is the same as at least one of the operands, this add
+ // CANNOT overflow. This is particularly useful when the sum is
+ // @llvm.assume'ed non-negative rather than proved so from analyzing its
+ // operands.
+ bool LHSOrRHSKnownNonNegative =
+ (LHSKnownNonNegative || RHSKnownNonNegative);
+ bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+ if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
+ bool AddKnownNonNegative, AddKnownNegative;
+ ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
+ /*Depth=*/0, AC, CxtI, DT);
+ if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
+ (AddKnownNegative && LHSOrRHSKnownNegative)) {
+ return OverflowResult::NeverOverflows;
+ }
+ }
+
+ return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
+ Add, DL, AC, CxtI, DT);
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+}
+
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
+ // FIXME: This conservative implementation can be relaxed. E.g. most
+ // atomic operations are guaranteed to terminate on most platforms
+ // and most functions terminate.
+
+ return !I->isAtomic() && // atomics may never succeed on some platforms
+ !isa<CallInst>(I) && // could throw and might not terminate
+ !isa<InvokeInst>(I) && // might not terminate and could throw to
+ // non-successor (see bug 24185 for details).
+ !isa<ResumeInst>(I) && // has no successors
+ !isa<ReturnInst>(I); // has no successors
+}
+
+bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+ const Loop *L) {
+ // The loop header is guaranteed to be executed for every iteration.
+ //
+ // FIXME: Relax this constraint to cover all basic blocks that are
+ // guaranteed to be executed at every iteration.
+ if (I->getParent() != L->getHeader()) return false;
+
+ for (const Instruction &LI : *L->getHeader()) {
+ if (&LI == I) return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
+ }
+ llvm_unreachable("Instruction not contained in its own parent basic block.");
+}
+
+bool llvm::propagatesFullPoison(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ // These operations all propagate poison unconditionally. Note that poison
+ // is not any particular value, so xor or subtraction of poison with
+ // itself still yields poison, not zero.
+ return true;
+
+ case Instruction::AShr:
+ case Instruction::SExt:
+ // For these operations, one bit of the input is replicated across
+ // multiple output bits. A replicated poison bit is still poison.
+ return true;
+
+ case Instruction::Shl: {
+ // Left shift *by* a poison value is poison. The number of
+ // positions to shift is unsigned, so no negative values are
+ // possible there. Left shift by zero places preserves poison. So
+ // it only remains to consider left shift of poison by a positive
+ // number of places.
+ //
+ // A left shift by a positive number of places leaves the lowest order bit
+ // non-poisoned. However, if such a shift has a no-wrap flag, then we can
+ // make the poison operand violate that flag, yielding a fresh full-poison
+ // value.
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+ }
+
+ case Instruction::Mul: {
+ // A multiplication by zero yields a non-poison zero result, so we need to
+ // rule out zero as an operand. Conservatively, multiplication by a
+ // non-zero constant is not multiplication by zero.
+ //
+ // Multiplication by a non-zero constant can leave some bits
+ // non-poisoned. For example, a multiplication by 2 leaves the lowest
+ // order bit unpoisoned. So we need to consider that.
+ //
+ // Multiplication by 1 preserves poison. If the multiplication has a
+ // no-wrap flag, then we can make the poison operand violate that flag
+ // when multiplied by any integer other than 0 and 1.
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
+ for (Value *V : OBO->operands()) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ // A ConstantInt cannot yield poison, so we can assume that it is
+ // the other operand that is poison.
+ return !CI->isZero();
+ }
+ }
+ }
+ return false;
+ }
+
+ case Instruction::GetElementPtr:
+ // A GEP implicitly represents a sequence of additions, subtractions,
+ // truncations, sign extensions and multiplications. The multiplications
+ // are by the non-zero sizes of some set of types, so we do not have to be
+ // concerned with multiplication by zero. If the GEP is in-bounds, then
+ // these operations are implicitly no-signed-wrap so poison is propagated
+ // by the arguments above for Add, Sub, Trunc, SExt and Mul.
+ return cast<GEPOperator>(I)->isInBounds();
+
+ default:
+ return false;
+ }
+}
+
+const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+ return cast<StoreInst>(I)->getPointerOperand();
+
+ case Instruction::Load:
+ return cast<LoadInst>(I)->getPointerOperand();
+
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
+
+ case Instruction::AtomicRMW:
+ return cast<AtomicRMWInst>(I)->getPointerOperand();
+
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ return I->getOperand(1);
+
+ default:
+ return nullptr;
+ }
+}
+
+bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+ // We currently only look for uses of poison values within the same basic
+ // block, as that makes it easier to guarantee that the uses will be
+ // executed given that PoisonI is executed.
+ //
+ // FIXME: Expand this to consider uses beyond the same basic block. To do
+ // this, look out for the distinction between post-dominance and strong
+ // post-dominance.
+ const BasicBlock *BB = PoisonI->getParent();
+
+ // Set of instructions that we have proved will yield poison if PoisonI
+ // does.
+ SmallSet<const Value *, 16> YieldsPoison;
+ YieldsPoison.insert(PoisonI);
+
+ for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end();
+ I != E; ++I) {
+ if (&*I != PoisonI) {
+ const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I);
+ if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+ return false;
+ }
+
+ // Mark poison that propagates from I through uses of I.
+ if (YieldsPoison.count(&*I)) {
+ for (const User *User : I->users()) {
+ const Instruction *UserI = cast<Instruction>(User);
+ if (UserI->getParent() == BB && propagatesFullPoison(UserI))
+ YieldsPoison.insert(User);
+ }
+ }
+ }
+ return false;
+}
+
+static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+ if (FMF.noNaNs())
+ return true;
+
+ if (auto *C = dyn_cast<ConstantFP>(V))
+ return !C->isNaN();
+ return false;
+}
+
+static bool isKnownNonZero(Value *V) {
+ if (auto *C = dyn_cast<ConstantFP>(V))
+ return !C->isZero();
+ return false;
+}
+
+static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
+ FastMathFlags FMF,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS) {
LHS = CmpLHS;
RHS = CmpRHS;
- // (icmp X, Y) ? X : Y
- if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
- switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMAX;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMAX;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMIN;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may
+ // return inconsistent results between implementations.
+ // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+ // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+ // Therefore we behave conservatively and only proceed if at least one of the
+ // operands is known to not be zero, or if we don't care about signed zeroes.
+ switch (Pred) {
+ default: break;
+ case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
+ if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+ !isKnownNonZero(CmpRHS))
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ }
+
+ SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
+ bool Ordered = false;
+
+ // When given one NaN and one non-NaN input:
+ // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
+ // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
+ // ordered comparison fails), which could be NaN or non-NaN.
+ // so here we discover exactly what NaN behavior is required/accepted.
+ if (CmpInst::isFPPredicate(Pred)) {
+ bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
+ bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
+
+ if (LHSSafe && RHSSafe) {
+ // Both operands are known non-NaN.
+ NaNBehavior = SPNB_RETURNS_ANY;
+ } else if (CmpInst::isOrdered(Pred)) {
+ // An ordered comparison will return false when given a NaN, so it
+ // returns the RHS.
+ Ordered = true;
+ if (LHSSafe)
+ // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
+ NaNBehavior = SPNB_RETURNS_NAN;
+ else if (RHSSafe)
+ NaNBehavior = SPNB_RETURNS_OTHER;
+ else
+ // Completely unsafe.
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ } else {
+ Ordered = false;
+ // An unordered comparison will return true when given a NaN, so it
+ // returns the LHS.
+ if (LHSSafe)
+ // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
+ NaNBehavior = SPNB_RETURNS_OTHER;
+ else if (RHSSafe)
+ NaNBehavior = SPNB_RETURNS_NAN;
+ else
+ // Completely unsafe.
+ return {SPF_UNKNOWN, SPNB_NA, false};
}
}
- // (icmp X, Y) ? Y : X
if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ if (NaNBehavior == SPNB_RETURNS_NAN)
+ NaNBehavior = SPNB_RETURNS_OTHER;
+ else if (NaNBehavior == SPNB_RETURNS_OTHER)
+ NaNBehavior = SPNB_RETURNS_NAN;
+ Ordered = !Ordered;
+ }
+
+ // ([if]cmp X, Y) ? X : Y
+ if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
+ default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
}
}
@@ -3360,13 +3959,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
// ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
// NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
- return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+ return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
// ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
// NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
- return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+ return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
}
@@ -3377,24 +3976,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
LHS = TrueVal;
RHS = FalseVal;
- return SPF_SMIN;
+ return {SPF_SMIN, SPNB_NA, false};
}
}
}
// TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
- return SPF_UNKNOWN;
+ return {SPF_UNKNOWN, SPNB_NA, false};
}
-static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
- Instruction::CastOps *CastOp) {
+static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
+ Instruction::CastOps *CastOp) {
CastInst *CI = dyn_cast<CastInst>(V1);
Constant *C = dyn_cast<Constant>(V2);
- if (!CI || !C)
+ CastInst *CI2 = dyn_cast<CastInst>(V2);
+ if (!CI)
return nullptr;
*CastOp = CI->getOpcode();
+ if (CI2) {
+ // If V1 and V2 are both the same cast from the same type, we can look
+ // through V1.
+ if (CI2->getOpcode() == CI->getOpcode() &&
+ CI2->getSrcTy() == CI->getSrcTy())
+ return CI2->getOperand(0);
+ return nullptr;
+ } else if (!C) {
+ return nullptr;
+ }
+
if (isa<SExtInst>(CI) && CmpI->isSigned()) {
Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
// This is only valid if the truncated value can be sign-extended
@@ -3409,39 +4020,200 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
if (isa<TruncInst>(CI))
return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
+ if (isa<FPToUIInst>(CI))
+ return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
+
+ if (isa<FPToSIInst>(CI))
+ return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
+
+ if (isa<UIToFPInst>(CI))
+ return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+
+ if (isa<SIToFPInst>(CI))
+ return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+
+ if (isa<FPTruncInst>(CI))
+ return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
+
+ if (isa<FPExtInst>(CI))
+ return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
+
return nullptr;
}
-SelectPatternFlavor llvm::matchSelectPattern(Value *V,
+SelectPatternResult llvm::matchSelectPattern(Value *V,
Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp) {
SelectInst *SI = dyn_cast<SelectInst>(V);
- if (!SI) return SPF_UNKNOWN;
+ if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
- ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition());
- if (!CmpI) return SPF_UNKNOWN;
+ CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
+ if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
- ICmpInst::Predicate Pred = CmpI->getPredicate();
+ CmpInst::Predicate Pred = CmpI->getPredicate();
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
+ FastMathFlags FMF;
+ if (isa<FPMathOperator>(CmpI))
+ FMF = CmpI->getFastMathFlags();
// Bail out early.
if (CmpI->isEquality())
- return SPF_UNKNOWN;
+ return {SPF_UNKNOWN, SPNB_NA, false};
// Deal with type mismatches.
if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
- if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
- return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+ if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+ return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
cast<CastInst>(TrueVal)->getOperand(0), C,
LHS, RHS);
- if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
- return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+ if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+ return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
C, cast<CastInst>(FalseVal)->getOperand(0),
LHS, RHS);
}
- return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal,
+ return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
LHS, RHS);
}
+
+ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
+ const unsigned NumRanges = Ranges.getNumOperands() / 2;
+ assert(NumRanges >= 1 && "Must have at least one range!");
+ assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
+
+ auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
+ auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
+
+ ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
+
+ for (unsigned i = 1; i < NumRanges; ++i) {
+ auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+ auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
+
+ // Note: unionWith will potentially create a range that contains values not
+ // contained in any of the original N ranges.
+ CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
+ }
+
+ return CR;
+}
+
+/// Return true if "icmp Pred LHS RHS" is always true.
+static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
+ if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
+ return true;
+
+ switch (Pred) {
+ default:
+ return false;
+
+ case CmpInst::ICMP_SLE: {
+ const APInt *C;
+
+ // LHS s<= LHS +_{nsw} C if C >= 0
+ if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))))
+ return !C->isNegative();
+ return false;
+ }
+
+ case CmpInst::ICMP_ULE: {
+ const APInt *C;
+
+ // LHS u<= LHS +_{nuw} C for any C
+ if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
+ return true;
+
+ // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
+ auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+ const APInt *&CA, const APInt *&CB) {
+ if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
+ match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
+ return true;
+
+ // If X & C == 0 then (X | C) == X +_{nuw} C
+ if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
+ match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
+ unsigned BitWidth = CA->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT);
+
+ if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB)
+ return true;
+ }
+
+ return false;
+ };
+
+ Value *X;
+ const APInt *CLHS, *CRHS;
+ if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
+ return CLHS->ule(*CRHS);
+
+ return false;
+ }
+ }
+}
+
+/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
+/// ALHS ARHS" is true.
+static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS,
+ Value *ARHS, Value *BLHS, Value *BRHS,
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ switch (Pred) {
+ default:
+ return false;
+
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
+ DT) &&
+ isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI,
+ DT);
+
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
+ DT) &&
+ isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI,
+ DT);
+ }
+}
+
+bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ assert(LHS->getType() == RHS->getType() && "mismatched type");
+ Type *OpTy = LHS->getType();
+ assert(OpTy->getScalarType()->isIntegerTy(1));
+
+ // LHS ==> RHS by definition
+ if (LHS == RHS) return true;
+
+ if (OpTy->isVectorTy())
+ // TODO: extending the code below to handle vectors
+ return false;
+ assert(OpTy->isIntegerTy(1) && "implied by above");
+
+ ICmpInst::Predicate APred, BPred;
+ Value *ALHS, *ARHS;
+ Value *BLHS, *BRHS;
+
+ if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
+ !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+ return false;
+
+ if (APred == BPred)
+ return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
+ CxtI, DT);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index 8c671ef..4b244ec 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -11,13 +11,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
/// \brief Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
@@ -79,7 +86,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
/// d) call should only reads memory.
/// If all these condition is met then return ValidIntrinsicID
/// else return not_intrinsic.
-llvm::Intrinsic::ID
+Intrinsic::ID
llvm::checkUnaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 1 ||
@@ -98,7 +105,7 @@ llvm::checkUnaryFloatSignature(const CallInst &I,
/// d) call should only reads memory.
/// If all these condition is met then return ValidIntrinsicID
/// else return not_intrinsic.
-llvm::Intrinsic::ID
+Intrinsic::ID
llvm::checkBinaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 2 ||
@@ -114,8 +121,8 @@ llvm::checkBinaryFloatSignature(const CallInst &I,
/// \brief Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
/// its ID, in case it does not found it return not_intrinsic.
-llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
- const TargetLibraryInfo *TLI) {
+Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
+ const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
Intrinsic::ID ID = II->getIntrinsicID();
@@ -228,8 +235,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
// Walk backwards and try to peel off zeros.
- while (LastOperand > 1 &&
- match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+ while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
// Find the type we're currently indexing into.
gep_type_iterator GEPTI = gep_type_begin(Gep);
std::advance(GEPTI, LastOperand - 1);
@@ -247,8 +253,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
/// \brief If the argument is a GEP, then returns the operand identified by
/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
-llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
- Loop *Lp) {
+Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
@@ -265,8 +270,8 @@ llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
}
/// \brief If a value has only one user that is a CastInst, return it.
-llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
- llvm::Value *UniqueCast = nullptr;
+Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+ Value *UniqueCast = nullptr;
for (User *U : Ptr->users()) {
CastInst *CI = dyn_cast<CastInst>(U);
if (CI && CI->getType() == Ty) {
@@ -281,16 +286,15 @@ llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
-llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
- Loop *Lp) {
- const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+ auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
return nullptr;
// Try to remove a gep instruction to make the pointer (actually index at this
// point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
// pointer, otherwise, we are analyzing the index.
- llvm::Value *OrigPtr = Ptr;
+ Value *OrigPtr = Ptr;
// The size of the pointer access.
int64_t PtrAccessSize = 1;
@@ -320,8 +324,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
if (M->getOperand(0)->getSCEVType() != scConstant)
return nullptr;
- const APInt &APStepVal =
- cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+ const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
@@ -346,7 +349,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
if (!U)
return nullptr;
- llvm::Value *Stride = U->getValue();
+ Value *Stride = U->getValue();
if (!Lp->isLoopInvariant(Stride))
return nullptr;
@@ -361,7 +364,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
/// \brief Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
-llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
+Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
assert(V->getType()->isVectorTy() && "Not looking at a vector?");
VectorType *VTy = cast<VectorType>(V->getType());
unsigned Width = VTy->getNumElements();
@@ -399,14 +402,166 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
// Extract a value from a vector add operation with a constant zero.
Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V,
- llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
- llvm::PatternMatch::m_Constant(Con)))) {
+ if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
if (Constant *Elt = Con->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
- }
// Otherwise, we don't know.
return nullptr;
}
+
+/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// This function is not fully general. It checks only 2 cases:
+/// the input value is (1) a splat constants vector or (2) a sequence
+/// of instructions that broadcast a single value into a vector.
+///
+const llvm::Value *llvm::getSplatValue(const Value *V) {
+
+ if (auto *C = dyn_cast<Constant>(V))
+ if (isa<VectorType>(V->getType()))
+ return C->getSplatValue();
+
+ auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
+ if (!ShuffleInst)
+ return nullptr;
+ // All-zero (or undef) shuffle mask elements.
+ for (int MaskElt : ShuffleInst->getShuffleMask())
+ if (MaskElt != 0 && MaskElt != -1)
+ return nullptr;
+ // The first shuffle source is 'insertelement' with index 0.
+ auto *InsertEltInst =
+ dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
+ if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
+ !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
+ return nullptr;
+
+ return InsertEltInst->getOperand(1);
+}
+
+MapVector<Instruction *, uint64_t>
+llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
+ const TargetTransformInfo *TTI) {
+
+ // DemandedBits will give us every value's live-out bits. But we want
+ // to ensure no extra casts would need to be inserted, so every DAG
+ // of connected values must have the same minimum bitwidth.
+ EquivalenceClasses<Value *> ECs;
+ SmallVector<Value *, 16> Worklist;
+ SmallPtrSet<Value *, 4> Roots;
+ SmallPtrSet<Value *, 16> Visited;
+ DenseMap<Value *, uint64_t> DBits;
+ SmallPtrSet<Instruction *, 4> InstructionSet;
+ MapVector<Instruction *, uint64_t> MinBWs;
+
+ // Determine the roots. We work bottom-up, from truncs or icmps.
+ bool SeenExtFromIllegalType = false;
+ for (auto *BB : Blocks)
+ for (auto &I : *BB) {
+ InstructionSet.insert(&I);
+
+ if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
+ !TTI->isTypeLegal(I.getOperand(0)->getType()))
+ SeenExtFromIllegalType = true;
+
+ // Only deal with non-vector integers up to 64-bits wide.
+ if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
+ !I.getType()->isVectorTy() &&
+ I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
+ // Don't make work for ourselves. If we know the loaded type is legal,
+ // don't add it to the worklist.
+ if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
+ continue;
+
+ Worklist.push_back(&I);
+ Roots.insert(&I);
+ }
+ }
+ // Early exit.
+ if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
+ return MinBWs;
+
+ // Now proceed breadth-first, unioning values together.
+ while (!Worklist.empty()) {
+ Value *Val = Worklist.pop_back_val();
+ Value *Leader = ECs.getOrInsertLeaderValue(Val);
+
+ if (Visited.count(Val))
+ continue;
+ Visited.insert(Val);
+
+ // Non-instructions terminate a chain successfully.
+ if (!isa<Instruction>(Val))
+ continue;
+ Instruction *I = cast<Instruction>(Val);
+
+ // If we encounter a type that is larger than 64 bits, we can't represent
+ // it so bail out.
+ if (DB.getDemandedBits(I).getBitWidth() > 64)
+ return MapVector<Instruction *, uint64_t>();
+
+ uint64_t V = DB.getDemandedBits(I).getZExtValue();
+ DBits[Leader] |= V;
+
+ // Casts, loads and instructions outside of our range terminate a chain
+ // successfully.
+ if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
+ !InstructionSet.count(I))
+ continue;
+
+ // Unsafe casts terminate a chain unsuccessfully. We can't do anything
+ // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
+ // transform anything that relies on them.
+ if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
+ !I->getType()->isIntegerTy()) {
+ DBits[Leader] |= ~0ULL;
+ continue;
+ }
+
+ // We don't modify the types of PHIs. Reductions will already have been
+ // truncated if possible, and inductions' sizes will have been chosen by
+ // indvars.
+ if (isa<PHINode>(I))
+ continue;
+
+ if (DBits[Leader] == ~0ULL)
+ // All bits demanded, no point continuing.
+ continue;
+
+ for (Value *O : cast<User>(I)->operands()) {
+ ECs.unionSets(Leader, O);
+ Worklist.push_back(O);
+ }
+ }
+
+ // Now we've discovered all values, walk them to see if there are
+ // any users we didn't see. If there are, we can't optimize that
+ // chain.
+ for (auto &I : DBits)
+ for (auto *U : I.first->users())
+ if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
+ DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
+
+ for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
+ uint64_t LeaderDemandedBits = 0;
+ for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
+ LeaderDemandedBits |= DBits[*MI];
+
+ uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
+ llvm::countLeadingZeros(LeaderDemandedBits);
+ // Round up to a power of 2
+ if (!isPowerOf2_64((uint64_t)MinBW))
+ MinBW = NextPowerOf2(MinBW);
+ for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
+ if (!isa<Instruction>(*MI))
+ continue;
+ Type *Ty = (*MI)->getType();
+ if (Roots.count(*MI))
+ Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
+ if (MinBW < Ty->getScalarSizeInBits())
+ MinBWs[cast<Instruction>(*MI)] = MinBW;
+ }
+ }
+
+ return MinBWs;
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index 5c4bab7..26eca23 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -105,7 +105,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
Pair[1] += hexDigitValue(*Buffer);
}
Pair[0] = 0;
- for (int i=0; i<16; i++, Buffer++) {
+ for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
Pair[0] *= 16;
Pair[0] += hexDigitValue(*Buffer);
}
@@ -523,9 +523,14 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(zeroinitializer);
KEYWORD(undef);
KEYWORD(null);
+ KEYWORD(none);
KEYWORD(to);
+ KEYWORD(caller);
+ KEYWORD(within);
+ KEYWORD(from);
KEYWORD(tail);
KEYWORD(musttail);
+ KEYWORD(notail);
KEYWORD(target);
KEYWORD(triple);
KEYWORD(unwind);
@@ -586,6 +591,10 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(preserve_mostcc);
KEYWORD(preserve_allcc);
KEYWORD(ghccc);
+ KEYWORD(x86_intrcc);
+ KEYWORD(hhvmcc);
+ KEYWORD(hhvm_ccc);
+ KEYWORD(cxx_fast_tlscc);
KEYWORD(cc);
KEYWORD(c);
@@ -601,6 +610,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(convergent);
KEYWORD(dereferenceable);
KEYWORD(dereferenceable_or_null);
+ KEYWORD(inaccessiblememonly);
+ KEYWORD(inaccessiblemem_or_argmemonly);
KEYWORD(inlinehint);
KEYWORD(inreg);
KEYWORD(jumptable);
@@ -613,6 +624,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noduplicate);
KEYWORD(noimplicitfloat);
KEYWORD(noinline);
+ KEYWORD(norecurse);
KEYWORD(nonlazybind);
KEYWORD(nonnull);
KEYWORD(noredzone);
@@ -690,6 +702,7 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("label", Type::getLabelTy(Context));
TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
+ TYPEKEYWORD("token", Type::getTokenTy(Context));
#undef TYPEKEYWORD
// Keywords for instructions.
@@ -749,6 +762,11 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(extractvalue, ExtractValue);
INSTKEYWORD(insertvalue, InsertValue);
INSTKEYWORD(landingpad, LandingPad);
+ INSTKEYWORD(cleanupret, CleanupRet);
+ INSTKEYWORD(catchret, CatchRet);
+ INSTKEYWORD(catchswitch, CatchSwitch);
+ INSTKEYWORD(catchpad, CatchPad);
+ INSTKEYWORD(cleanuppad, CleanupPad);
#undef INSTKEYWORD
#define DWKEYWORD(TYPE, TOKEN) \
@@ -763,6 +781,7 @@ lltok::Kind LLLexer::LexIdentifier() {
DWKEYWORD(VIRTUALITY, DwarfVirtuality);
DWKEYWORD(LANG, DwarfLang);
DWKEYWORD(OP, DwarfOp);
+ DWKEYWORD(MACINFO, DwarfMacinfo);
#undef DWKEYWORD
if (Keyword.startswith("DIFlag")) {
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index 1c6e7bd..3471a2d 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -13,6 +13,7 @@
#include "LLParser.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/CallingConv.h"
@@ -48,6 +49,32 @@ bool LLParser::Run() {
ValidateEndOfModule();
}
+bool LLParser::parseStandaloneConstantValue(Constant *&C,
+ const SlotMapping *Slots) {
+ restoreParsingState(Slots);
+ Lex.Lex();
+
+ Type *Ty = nullptr;
+ if (ParseType(Ty) || parseConstantValue(Ty, C))
+ return true;
+ if (Lex.getKind() != lltok::Eof)
+ return Error(Lex.getLoc(), "expected end of string");
+ return false;
+}
+
+void LLParser::restoreParsingState(const SlotMapping *Slots) {
+ if (!Slots)
+ return;
+ NumberedVals = Slots->GlobalValues;
+ NumberedMetadata = Slots->MetadataNodes;
+ for (const auto &I : Slots->NamedTypes)
+ NamedTypes.insert(
+ std::make_pair(I.getKey(), std::make_pair(I.second, LocTy())));
+ for (const auto &I : Slots->Types)
+ NumberedTypes.insert(
+ std::make_pair(I.first, std::make_pair(I.second, LocTy())));
+}
+
/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
/// module.
bool LLParser::ValidateEndOfModule() {
@@ -158,7 +185,7 @@ bool LLParser::ValidateEndOfModule() {
// Look for intrinsic functions and CallInst that need to be upgraded
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
- UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+ UpgradeCallsToIntrinsic(&*FI++); // must be post-increment, as we remove
UpgradeDebugInfo(*M);
@@ -169,6 +196,10 @@ bool LLParser::ValidateEndOfModule() {
// the mapping from LLParser as it doesn't need it anymore.
Slots->GlobalValues = std::move(NumberedVals);
Slots->MetadataNodes = std::move(NumberedMetadata);
+ for (const auto &I : NamedTypes)
+ Slots->NamedTypes.insert(std::make_pair(I.getKey(), I.second.first));
+ for (const auto &I : NumberedTypes)
+ Slots->Types.insert(std::make_pair(I.first, I.second.first));
return false;
}
@@ -647,6 +678,12 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
return Error(NameLoc,
"symbol with local linkage must have default visibility");
+ Type *Ty;
+ LocTy ExplicitTypeLoc = Lex.getLoc();
+ if (ParseType(Ty) ||
+ ParseToken(lltok::comma, "expected comma after alias's type"))
+ return true;
+
Constant *Aliasee;
LocTy AliaseeLoc = Lex.getLoc();
if (Lex.getKind() != lltok::kw_bitcast &&
@@ -669,11 +706,35 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
auto *PTy = dyn_cast<PointerType>(AliaseeType);
if (!PTy)
return Error(AliaseeLoc, "An alias must have pointer type");
+ unsigned AddrSpace = PTy->getAddressSpace();
+
+ if (Ty != PTy->getElementType())
+ return Error(
+ ExplicitTypeLoc,
+ "explicit pointee type doesn't match operand's pointee type");
+
+ GlobalValue *GVal = nullptr;
+
+ // See if the alias was forward referenced, if so, prepare to replace the
+ // forward reference.
+ if (!Name.empty()) {
+ GVal = M->getNamedValue(Name);
+ if (GVal) {
+ if (!ForwardRefVals.erase(Name))
+ return Error(NameLoc, "redefinition of global '@" + Name + "'");
+ }
+ } else {
+ auto I = ForwardRefValIDs.find(NumberedVals.size());
+ if (I != ForwardRefValIDs.end()) {
+ GVal = I->second.first;
+ ForwardRefValIDs.erase(I);
+ }
+ }
// Okay, create the alias but do not insert it into the module yet.
std::unique_ptr<GlobalAlias> GA(
- GlobalAlias::create(PTy, (GlobalValue::LinkageTypes)Linkage, Name,
- Aliasee, /*Parent*/ nullptr));
+ GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage,
+ Name, Aliasee, /*Parent*/ nullptr));
GA->setThreadLocalMode(TLM);
GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
@@ -682,27 +743,17 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
if (Name.empty())
NumberedVals.push_back(GA.get());
- // See if this value already exists in the symbol table. If so, it is either
- // a redefinition or a definition of a forward reference.
- if (GlobalValue *Val = M->getNamedValue(Name)) {
- // See if this was a redefinition. If so, there is no entry in
- // ForwardRefVals.
- std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
- I = ForwardRefVals.find(Name);
- if (I == ForwardRefVals.end())
- return Error(NameLoc, "redefinition of global named '@" + Name + "'");
-
- // Otherwise, this was a definition of forward ref. Verify that types
- // agree.
- if (Val->getType() != GA->getType())
- return Error(NameLoc,
- "forward reference and definition of alias have different types");
+ if (GVal) {
+ // Verify that types agree.
+ if (GVal->getType() != GA->getType())
+ return Error(
+ ExplicitTypeLoc,
+ "forward reference and definition of alias have different types");
// If they agree, just RAUW the old value with the alias and remove the
// forward ref info.
- Val->replaceAllUsesWith(GA.get());
- Val->eraseFromParent();
- ForwardRefVals.erase(I);
+ GVal->replaceAllUsesWith(GA.get());
+ GVal->eraseFromParent();
}
// Insert into the module, we know its name won't collide now.
@@ -767,12 +818,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
if (!Name.empty()) {
GVal = M->getNamedValue(Name);
if (GVal) {
- if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+ if (!ForwardRefVals.erase(Name))
return Error(NameLoc, "redefinition of global '@" + Name + "'");
}
} else {
- std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
- I = ForwardRefValIDs.find(NumberedVals.size());
+ auto I = ForwardRefValIDs.find(NumberedVals.size());
if (I != ForwardRefValIDs.end()) {
GVal = I->second.first;
ForwardRefValIDs.erase(I);
@@ -903,14 +953,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
}
// Target-dependent attributes:
case lltok::StringConstant: {
- std::string Attr = Lex.getStrVal();
- Lex.Lex();
- std::string Val;
- if (EatIfPresent(lltok::equal) &&
- ParseStringConstant(Val))
+ if (ParseStringAttribute(B))
return true;
-
- B.addAttribute(Attr, Val);
continue;
}
@@ -951,6 +995,10 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break;
+ case lltok::kw_inaccessiblememonly:
+ B.addAttribute(Attribute::InaccessibleMemOnly); break;
+ case lltok::kw_inaccessiblemem_or_argmemonly:
+ B.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); break;
case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
@@ -963,6 +1011,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+ case lltok::kw_norecurse: B.addAttribute(Attribute::NoRecurse); break;
case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break;
case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
@@ -1015,6 +1064,17 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
// GlobalValue Reference/Resolution Routines.
//===----------------------------------------------------------------------===//
+static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy,
+ const std::string &Name) {
+ if (auto *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+ return Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+ else
+ return new GlobalVariable(*M, PTy->getElementType(), false,
+ GlobalValue::ExternalWeakLinkage, nullptr, Name,
+ nullptr, GlobalVariable::NotThreadLocal,
+ PTy->getAddressSpace());
+}
+
/// GetGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
@@ -1033,8 +1093,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
// If this is a forward reference for the value, see if we already created a
// forward ref record.
if (!Val) {
- std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
- I = ForwardRefVals.find(Name);
+ auto I = ForwardRefVals.find(Name);
if (I != ForwardRefVals.end())
Val = I->second.first;
}
@@ -1048,15 +1107,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
}
// Otherwise, create a new forward reference for this value and remember it.
- GlobalValue *FwdVal;
- if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
- FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
- else
- FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
- GlobalValue::ExternalWeakLinkage, nullptr, Name,
- nullptr, GlobalVariable::NotThreadLocal,
- PTy->getAddressSpace());
-
+ GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, Name);
ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
return FwdVal;
}
@@ -1073,8 +1124,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
// If this is a forward reference for the value, see if we already created a
// forward ref record.
if (!Val) {
- std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
- I = ForwardRefValIDs.find(ID);
+ auto I = ForwardRefValIDs.find(ID);
if (I != ForwardRefValIDs.end())
Val = I->second.first;
}
@@ -1088,13 +1138,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
}
// Otherwise, create a new forward reference for this value and remember it.
- GlobalValue *FwdVal;
- if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
- FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
- else
- FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
- GlobalValue::ExternalWeakLinkage, nullptr, "");
-
+ GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, "");
ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
return FwdVal;
}
@@ -1217,6 +1261,19 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
ParseToken(lltok::rparen, "expected ')' in address space");
}
+/// ParseStringAttribute
+/// := StringConstant
+/// := StringConstant '=' StringConstant
+bool LLParser::ParseStringAttribute(AttrBuilder &B) {
+ std::string Attr = Lex.getStrVal();
+ Lex.Lex();
+ std::string Val;
+ if (EatIfPresent(lltok::equal) && ParseStringConstant(Val))
+ return true;
+ B.addAttribute(Attr, Val);
+ return false;
+}
+
/// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes.
bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
bool HaveError = false;
@@ -1228,6 +1285,11 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
switch (Token) {
default: // End of attributes.
return HaveError;
+ case lltok::StringConstant: {
+ if (ParseStringAttribute(B))
+ return true;
+ continue;
+ }
case lltok::kw_align: {
unsigned Alignment;
if (ParseOptionalAlignment(Alignment))
@@ -1309,6 +1371,11 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
switch (Token) {
default: // End of attributes.
return HaveError;
+ case lltok::StringConstant: {
+ if (ParseStringAttribute(B))
+ return true;
+ continue;
+ }
case lltok::kw_dereferenceable: {
uint64_t Bytes;
if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -1323,6 +1390,13 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
B.addDereferenceableOrNullAttr(Bytes);
continue;
}
+ case lltok::kw_align: {
+ unsigned Alignment;
+ if (ParseOptionalAlignment(Alignment))
+ return true;
+ B.addAlignmentAttr(Alignment);
+ continue;
+ }
case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break;
@@ -1330,7 +1404,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
// Error handling.
- case lltok::kw_align:
case lltok::kw_byval:
case lltok::kw_inalloca:
case lltok::kw_nest:
@@ -1473,6 +1546,10 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'preserve_mostcc'
/// ::= 'preserve_allcc'
/// ::= 'ghccc'
+/// ::= 'x86_intrcc'
+/// ::= 'hhvmcc'
+/// ::= 'hhvm_ccc'
+/// ::= 'cxx_fast_tlscc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
@@ -1501,6 +1578,10 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break;
case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break;
case lltok::kw_ghccc: CC = CallingConv::GHC; break;
+ case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break;
+ case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
+ case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
+ case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
case lltok::kw_cc: {
Lex.Lex();
return ParseUInt32(CC);
@@ -1883,7 +1964,59 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
return false;
}
+/// ParseOptionalOperandBundles
+/// ::= /*empty*/
+/// ::= '[' OperandBundle [, OperandBundle ]* ']'
+///
+/// OperandBundle
+/// ::= bundle-tag '(' ')'
+/// ::= bundle-tag '(' Type Value [, Type Value ]* ')'
+///
+/// bundle-tag ::= String Constant
+bool LLParser::ParseOptionalOperandBundles(
+ SmallVectorImpl<OperandBundleDef> &BundleList, PerFunctionState &PFS) {
+ LocTy BeginLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::lsquare))
+ return false;
+
+ while (Lex.getKind() != lltok::rsquare) {
+ // If this isn't the first operand bundle, we need a comma.
+ if (!BundleList.empty() &&
+ ParseToken(lltok::comma, "expected ',' in input list"))
+ return true;
+ std::string Tag;
+ if (ParseStringConstant(Tag))
+ return true;
+
+ if (ParseToken(lltok::lparen, "expected '(' in operand bundle"))
+ return true;
+
+ std::vector<Value *> Inputs;
+ while (Lex.getKind() != lltok::rparen) {
+ // If this isn't the first input, we need a comma.
+ if (!Inputs.empty() &&
+ ParseToken(lltok::comma, "expected ',' in input list"))
+ return true;
+
+ Type *Ty = nullptr;
+ Value *Input = nullptr;
+ if (ParseType(Ty) || ParseValue(Ty, Input, PFS))
+ return true;
+ Inputs.push_back(Input);
+ }
+
+ BundleList.emplace_back(std::move(Tag), std::move(Inputs));
+
+ Lex.Lex(); // Lex the ')'.
+ }
+
+ if (BundleList.empty())
+ return Error(BeginLoc, "operand bundle set must not be empty");
+
+ Lex.Lex(); // Lex the ']'.
+ return false;
+}
/// ParseArgumentList - Parse the argument list for a function type or function
/// prototype.
@@ -2146,31 +2279,29 @@ LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
: P(p), F(f), FunctionNumber(functionNumber) {
// Insert unnamed arguments into the NumberedVals list.
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
- AI != E; ++AI)
- if (!AI->hasName())
- NumberedVals.push_back(AI);
+ for (Argument &A : F.args())
+ if (!A.hasName())
+ NumberedVals.push_back(&A);
}
LLParser::PerFunctionState::~PerFunctionState() {
// If there were any forward referenced non-basicblock values, delete them.
- for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
- I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
- if (!isa<BasicBlock>(I->second.first)) {
- I->second.first->replaceAllUsesWith(
- UndefValue::get(I->second.first->getType()));
- delete I->second.first;
- I->second.first = nullptr;
- }
- for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
- I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
- if (!isa<BasicBlock>(I->second.first)) {
- I->second.first->replaceAllUsesWith(
- UndefValue::get(I->second.first->getType()));
- delete I->second.first;
- I->second.first = nullptr;
- }
+ for (const auto &P : ForwardRefVals) {
+ if (isa<BasicBlock>(P.second.first))
+ continue;
+ P.second.first->replaceAllUsesWith(
+ UndefValue::get(P.second.first->getType()));
+ delete P.second.first;
+ }
+
+ for (const auto &P : ForwardRefValIDs) {
+ if (isa<BasicBlock>(P.second.first))
+ continue;
+ P.second.first->replaceAllUsesWith(
+ UndefValue::get(P.second.first->getType()));
+ delete P.second.first;
+ }
}
bool LLParser::PerFunctionState::FinishFunction() {
@@ -2189,16 +2320,15 @@ bool LLParser::PerFunctionState::FinishFunction() {
/// GetVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
-Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
- Type *Ty, LocTy Loc) {
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
+ LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = F.getValueSymbolTable().lookup(Name);
// If this is a forward reference for the value, see if we already created a
// forward ref record.
if (!Val) {
- std::map<std::string, std::pair<Value*, LocTy> >::iterator
- I = ForwardRefVals.find(Name);
+ auto I = ForwardRefVals.find(Name);
if (I != ForwardRefVals.end())
Val = I->second.first;
}
@@ -2222,25 +2352,24 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
// Otherwise, create a new forward reference for this value and remember it.
Value *FwdVal;
- if (Ty->isLabelTy())
+ if (Ty->isLabelTy()) {
FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
- else
+ } else {
FwdVal = new Argument(Ty, Name);
+ }
ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
return FwdVal;
}
-Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
- LocTy Loc) {
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
// If this is a forward reference for the value, see if we already created a
// forward ref record.
if (!Val) {
- std::map<unsigned, std::pair<Value*, LocTy> >::iterator
- I = ForwardRefValIDs.find(ID);
+ auto I = ForwardRefValIDs.find(ID);
if (I != ForwardRefValIDs.end())
Val = I->second.first;
}
@@ -2263,10 +2392,11 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
// Otherwise, create a new forward reference for this value and remember it.
Value *FwdVal;
- if (Ty->isLabelTy())
+ if (Ty->isLabelTy()) {
FwdVal = BasicBlock::Create(F.getContext(), "", &F);
- else
+ } else {
FwdVal = new Argument(Ty);
+ }
ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
return FwdVal;
@@ -2295,14 +2425,15 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
return P.Error(NameLoc, "instruction expected to be numbered '%" +
Twine(NumberedVals.size()) + "'");
- std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
- ForwardRefValIDs.find(NameID);
+ auto FI = ForwardRefValIDs.find(NameID);
if (FI != ForwardRefValIDs.end()) {
- if (FI->second.first->getType() != Inst->getType())
+ Value *Sentinel = FI->second.first;
+ if (Sentinel->getType() != Inst->getType())
return P.Error(NameLoc, "instruction forward referenced with type '" +
getTypeString(FI->second.first->getType()) + "'");
- FI->second.first->replaceAllUsesWith(Inst);
- delete FI->second.first;
+
+ Sentinel->replaceAllUsesWith(Inst);
+ delete Sentinel;
ForwardRefValIDs.erase(FI);
}
@@ -2311,14 +2442,15 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
}
// Otherwise, the instruction had a name. Resolve forward refs and set it.
- std::map<std::string, std::pair<Value*, LocTy> >::iterator
- FI = ForwardRefVals.find(NameStr);
+ auto FI = ForwardRefVals.find(NameStr);
if (FI != ForwardRefVals.end()) {
- if (FI->second.first->getType() != Inst->getType())
+ Value *Sentinel = FI->second.first;
+ if (Sentinel->getType() != Inst->getType())
return P.Error(NameLoc, "instruction forward referenced with type '" +
getTypeString(FI->second.first->getType()) + "'");
- FI->second.first->replaceAllUsesWith(Inst);
- delete FI->second.first;
+
+ Sentinel->replaceAllUsesWith(Inst);
+ delete Sentinel;
ForwardRefVals.erase(FI);
}
@@ -2421,6 +2553,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case lltok::kw_null: ID.Kind = ValID::t_Null; break;
case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+ case lltok::kw_none: ID.Kind = ValID::t_None; break;
case lltok::lbrace: {
// ValID ::= '{' ConstVector '}'
@@ -2430,9 +2563,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ParseToken(lltok::rbrace, "expected end of struct constant"))
return true;
- ID.ConstantStructElts = new Constant*[Elts.size()];
+ ID.ConstantStructElts = make_unique<Constant *[]>(Elts.size());
ID.UIntVal = Elts.size();
- memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+ memcpy(ID.ConstantStructElts.get(), Elts.data(),
+ Elts.size() * sizeof(Elts[0]));
ID.Kind = ValID::t_ConstantStruct;
return false;
}
@@ -2451,8 +2585,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (isPackedStruct) {
- ID.ConstantStructElts = new Constant*[Elts.size()];
- memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+ ID.ConstantStructElts = make_unique<Constant *[]>(Elts.size());
+ memcpy(ID.ConstantStructElts.get(), Elts.data(),
+ Elts.size() * sizeof(Elts[0]));
ID.UIntVal = Elts.size();
ID.Kind = ValID::t_PackedConstantStruct;
return false;
@@ -2891,7 +3026,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
}
- SmallPtrSet<const Type*, 4> Visited;
+ SmallPtrSet<Type*, 4> Visited;
if (!Indices.empty() && !Ty->isSized(&Visited))
return Error(ID.Loc, "base element of getelementptr must be sized");
@@ -3066,6 +3201,11 @@ struct DwarfTagField : public MDUnsignedField {
DwarfTagField(dwarf::Tag DefaultTag)
: MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {}
};
+struct DwarfMacinfoTypeField : public MDUnsignedField {
+ DwarfMacinfoTypeField() : MDUnsignedField(0, dwarf::DW_MACINFO_vendor_ext) {}
+ DwarfMacinfoTypeField(dwarf::MacinfoRecordType DefaultType)
+ : MDUnsignedField(DefaultType, dwarf::DW_MACINFO_vendor_ext) {}
+};
struct DwarfAttEncodingField : public MDUnsignedField {
DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {}
};
@@ -3159,6 +3299,26 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) {
template <>
bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+ DwarfMacinfoTypeField &Result) {
+ if (Lex.getKind() == lltok::APSInt)
+ return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+ if (Lex.getKind() != lltok::DwarfMacinfo)
+ return TokError("expected DWARF macinfo type");
+
+ unsigned Macinfo = dwarf::getMacinfo(Lex.getStrVal());
+ if (Macinfo == dwarf::DW_MACINFO_invalid)
+ return TokError(
+ "invalid DWARF macinfo type" + Twine(" '") + Lex.getStrVal() + "'");
+ assert(Macinfo <= Result.Max && "Expected valid DWARF macinfo type");
+
+ Result.assign(Macinfo);
+ Lex.Lex();
+ return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
DwarfVirtualityField &Result) {
if (Lex.getKind() == lltok::APSInt)
return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
@@ -3569,8 +3729,11 @@ bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
/// isOptimized: true, flags: "-O2", runtimeVersion: 1,
/// splitDebugFilename: "abc.debug", emissionKind: 1,
/// enums: !1, retainedTypes: !2, subprograms: !3,
-/// globals: !4, imports: !5, dwoId: 0x0abcd)
+/// globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd)
bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
+ if (!IsDistinct)
+ return Lex.Error("missing 'distinct', required for !DICompileUnit");
+
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(language, DwarfLangField, ); \
REQUIRED(file, MDField, (/* AllowNull */ false)); \
@@ -3585,16 +3748,16 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
OPTIONAL(subprograms, MDField, ); \
OPTIONAL(globals, MDField, ); \
OPTIONAL(imports, MDField, ); \
+ OPTIONAL(macros, MDField, ); \
OPTIONAL(dwoId, MDUnsignedField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- Result = GET_OR_DISTINCT(DICompileUnit,
- (Context, language.Val, file.Val, producer.Val,
- isOptimized.Val, flags.Val, runtimeVersion.Val,
- splitDebugFilename.Val, emissionKind.Val, enums.Val,
- retainedTypes.Val, subprograms.Val, globals.Val,
- imports.Val, dwoId.Val));
+ Result = DICompileUnit::getDistinct(
+ Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
+ runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
+ retainedTypes.Val, subprograms.Val, globals.Val, imports.Val, macros.Val,
+ dwoId.Val);
return false;
}
@@ -3604,9 +3767,10 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
/// isDefinition: true, scopeLine: 8, containingType: !3,
/// virtuality: DW_VIRTUALTIY_pure_virtual,
/// virtualIndex: 10, flags: 11,
-/// isOptimized: false, function: void ()* @_Z3foov,
-/// templateParams: !4, declaration: !5, variables: !6)
+/// isOptimized: false, templateParams: !4, declaration: !5,
+/// variables: !6)
bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
+ auto Loc = Lex.getLoc();
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(scope, MDField, ); \
OPTIONAL(name, MDStringField, ); \
@@ -3622,19 +3786,23 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
OPTIONAL(virtualIndex, MDUnsignedField, (0, UINT32_MAX)); \
OPTIONAL(flags, DIFlagField, ); \
OPTIONAL(isOptimized, MDBoolField, ); \
- OPTIONAL(function, MDConstant, ); \
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
OPTIONAL(variables, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
+ if (isDefinition.Val && !IsDistinct)
+ return Lex.Error(
+ Loc,
+ "missing 'distinct', required for !DISubprogram when 'isDefinition'");
+
Result = GET_OR_DISTINCT(
- DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val,
- line.Val, type.Val, isLocal.Val, isDefinition.Val,
- scopeLine.Val, containingType.Val, virtuality.Val,
- virtualIndex.Val, flags.Val, isOptimized.Val, function.Val,
- templateParams.Val, declaration.Val, variables.Val));
+ DISubprogram,
+ (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
+ type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val,
+ containingType.Val, virtuality.Val, virtualIndex.Val, flags.Val,
+ isOptimized.Val, templateParams.Val, declaration.Val, variables.Val));
return false;
}
@@ -3685,6 +3853,39 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
return false;
}
+/// ParseDIMacro:
+/// ::= !DIMacro(macinfo: type, line: 9, name: "SomeMacro", value: "SomeValue")
+bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
+ REQUIRED(type, DwarfMacinfoTypeField, ); \
+ REQUIRED(line, LineField, ); \
+ REQUIRED(name, MDStringField, ); \
+ OPTIONAL(value, MDStringField, );
+ PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+ Result = GET_OR_DISTINCT(DIMacro,
+ (Context, type.Val, line.Val, name.Val, value.Val));
+ return false;
+}
+
+/// ParseDIMacroFile:
+/// ::= !DIMacroFile(line: 9, file: !2, nodes: !3)
+bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
+ OPTIONAL(type, DwarfMacinfoTypeField, (dwarf::DW_MACINFO_start_file)); \
+ REQUIRED(line, LineField, ); \
+ REQUIRED(file, MDField, ); \
+ OPTIONAL(nodes, MDField, );
+ PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+ Result = GET_OR_DISTINCT(DIMacroFile,
+ (Context, type.Val, line.Val, file.Val, nodes.Val));
+ return false;
+}
+
+
/// ParseDIModule:
/// ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG",
/// includePath: "/usr/include", isysroot: "/")
@@ -3762,24 +3963,25 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
}
/// ParseDILocalVariable:
-/// ::= !DILocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo",
+/// ::= !DILocalVariable(arg: 7, scope: !0, name: "foo",
+/// file: !1, line: 7, type: !2, arg: 2, flags: 7)
+/// ::= !DILocalVariable(scope: !0, name: "foo",
/// file: !1, line: 7, type: !2, arg: 2, flags: 7)
bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(tag, DwarfTagField, ); \
REQUIRED(scope, MDField, (/* AllowNull */ false)); \
OPTIONAL(name, MDStringField, ); \
+ OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX)); \
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
OPTIONAL(type, MDField, ); \
- OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX)); \
OPTIONAL(flags, DIFlagField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
Result = GET_OR_DISTINCT(DILocalVariable,
- (Context, tag.Val, scope.Val, name.Val, file.Val,
- line.Val, type.Val, arg.Val, flags.Val));
+ (Context, scope.Val, name.Val, file.Val, line.Val,
+ type.Val, arg.Val, flags.Val));
return false;
}
@@ -3969,13 +4171,11 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_InlineAsm: {
- PointerType *PTy = dyn_cast<PointerType>(Ty);
- FunctionType *FTy =
- PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : nullptr;
- if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+ if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2))
return Error(ID.Loc, "invalid type for inline asm constraint string");
- V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1,
- (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2)));
+ V = InlineAsm::get(ID.FTy, ID.StrVal, ID.StrVal2, ID.UIntVal & 1,
+ (ID.UIntVal >> 1) & 1,
+ (InlineAsm::AsmDialect(ID.UIntVal >> 2)));
return false;
}
case ValID::t_GlobalName:
@@ -4035,6 +4235,11 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return Error(ID.Loc, "invalid type for null constant");
V = Constant::getNullValue(Ty);
return false;
+ case ValID::t_None:
+ if (!Ty->isTokenTy())
+ return Error(ID.Loc, "invalid type for none constant");
+ V = Constant::getNullValue(Ty);
+ return false;
case ValID::t_Constant:
if (ID.ConstantVal->getType() != Ty)
return Error(ID.Loc, "constant expression type mismatch");
@@ -4056,8 +4261,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return Error(ID.Loc, "element " + Twine(i) +
" of struct initializer doesn't match struct element type");
- V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts,
- ID.UIntVal));
+ V = ConstantStruct::get(
+ ST, makeArrayRef(ID.ConstantStructElts.get(), ID.UIntVal));
} else
return Error(ID.Loc, "constant expression type mismatch");
return false;
@@ -4065,11 +4270,35 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
llvm_unreachable("Invalid ValID");
}
+bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
+ C = nullptr;
+ ValID ID;
+ auto Loc = Lex.getLoc();
+ if (ParseValID(ID, /*PFS=*/nullptr))
+ return true;
+ switch (ID.Kind) {
+ case ValID::t_APSInt:
+ case ValID::t_APFloat:
+ case ValID::t_Undef:
+ case ValID::t_Constant:
+ case ValID::t_ConstantStruct:
+ case ValID::t_PackedConstantStruct: {
+ Value *V;
+ if (ConvertValIDToValue(Ty, ID, V, /*PFS=*/nullptr))
+ return true;
+ assert(isa<Constant>(V) && "Expected a constant value");
+ C = cast<Constant>(V);
+ return false;
+ }
+ default:
+ return Error(Loc, "expected a constant value");
+ }
+}
+
bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
V = nullptr;
ValID ID;
- return ParseValID(ID, PFS) ||
- ConvertValIDToValue(Ty, ID, V, PFS);
+ return ParseValID(ID, PFS) || ConvertValIDToValue(Ty, ID, V, PFS);
}
bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
@@ -4242,8 +4471,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (!FunctionName.empty()) {
// If this was a definition of a forward reference, remove the definition
// from the forward reference table and fill in the forward ref.
- std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
- ForwardRefVals.find(FunctionName);
+ auto FRVI = ForwardRefVals.find(FunctionName);
if (FRVI != ForwardRefVals.end()) {
Fn = M->getFunction(FunctionName);
if (!Fn)
@@ -4265,8 +4493,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
} else {
// If this is a definition of a forward referenced function, make sure the
// types agree.
- std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
- = ForwardRefValIDs.find(NumberedVals.size());
+ auto I = ForwardRefValIDs.find(NumberedVals.size());
if (I != ForwardRefValIDs.end()) {
Fn = cast<Function>(I->second.first);
if (Fn->getType() != PFT)
@@ -4498,6 +4725,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS);
case lltok::kw_invoke: return ParseInvoke(Inst, PFS);
case lltok::kw_resume: return ParseResume(Inst, PFS);
+ case lltok::kw_cleanupret: return ParseCleanupRet(Inst, PFS);
+ case lltok::kw_catchret: return ParseCatchRet(Inst, PFS);
+ case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
+ case lltok::kw_catchpad: return ParseCatchPad(Inst, PFS);
+ case lltok::kw_cleanuppad: return ParseCleanupPad(Inst, PFS);
// Binary Operators.
case lltok::kw_add:
case lltok::kw_sub:
@@ -4580,6 +4812,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None);
case lltok::kw_tail: return ParseCall(Inst, PFS, CallInst::TCK_Tail);
case lltok::kw_musttail: return ParseCall(Inst, PFS, CallInst::TCK_MustTail);
+ case lltok::kw_notail: return ParseCall(Inst, PFS, CallInst::TCK_NoTail);
// Memory.
case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
case lltok::kw_load: return ParseLoad(Inst, PFS);
@@ -4798,15 +5031,15 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
LocTy RetTypeLoc;
ValID CalleeID;
SmallVector<ParamInfo, 16> ArgList;
+ SmallVector<OperandBundleDef, 2> BundleList;
BasicBlock *NormalBB, *UnwindBB;
- if (ParseOptionalCallingConv(CC) ||
- ParseOptionalReturnAttrs(RetAttrs) ||
+ if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
- ParseValID(CalleeID) ||
- ParseParameterList(ArgList, PFS) ||
+ ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
NoBuiltinLoc) ||
+ ParseOptionalOperandBundles(BundleList, PFS) ||
ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
ParseTypeAndBasicBlock(NormalBB, PFS) ||
ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
@@ -4829,6 +5062,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
Ty = FunctionType::get(RetType, ParamTypes, false);
}
+ CalleeID.FTy = Ty;
+
// Look up the callee.
Value *Callee;
if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
@@ -4880,7 +5115,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// Finish off the Attribute and check them
AttributeSet PAL = AttributeSet::get(Context, Attrs);
- InvokeInst *II = InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args);
+ InvokeInst *II =
+ InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args, BundleList);
II->setCallingConv(CC);
II->setAttributes(PAL);
ForwardRefAttrGroups[II] = FwdRefAttrGrps;
@@ -4900,6 +5136,183 @@ bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) {
return false;
}
+bool LLParser::ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
+ PerFunctionState &PFS) {
+ if (ParseToken(lltok::lsquare, "expected '[' in catchpad/cleanuppad"))
+ return true;
+
+ while (Lex.getKind() != lltok::rsquare) {
+ // If this isn't the first argument, we need a comma.
+ if (!Args.empty() &&
+ ParseToken(lltok::comma, "expected ',' in argument list"))
+ return true;
+
+ // Parse the argument.
+ LocTy ArgLoc;
+ Type *ArgTy = nullptr;
+ if (ParseType(ArgTy, ArgLoc))
+ return true;
+
+ Value *V;
+ if (ArgTy->isMetadataTy()) {
+ if (ParseMetadataAsValue(V, PFS))
+ return true;
+ } else {
+ if (ParseValue(ArgTy, V, PFS))
+ return true;
+ }
+ Args.push_back(V);
+ }
+
+ Lex.Lex(); // Lex the ']'.
+ return false;
+}
+
+/// ParseCleanupRet
+/// ::= 'cleanupret' from Value unwind ('to' 'caller' | TypeAndValue)
+bool LLParser::ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *CleanupPad = nullptr;
+
+ if (ParseToken(lltok::kw_from, "expected 'from' after cleanupret"))
+ return true;
+
+ if (ParseValue(Type::getTokenTy(Context), CleanupPad, PFS))
+ return true;
+
+ if (ParseToken(lltok::kw_unwind, "expected 'unwind' in cleanupret"))
+ return true;
+
+ BasicBlock *UnwindBB = nullptr;
+ if (Lex.getKind() == lltok::kw_to) {
+ Lex.Lex();
+ if (ParseToken(lltok::kw_caller, "expected 'caller' in cleanupret"))
+ return true;
+ } else {
+ if (ParseTypeAndBasicBlock(UnwindBB, PFS)) {
+ return true;
+ }
+ }
+
+ Inst = CleanupReturnInst::Create(CleanupPad, UnwindBB);
+ return false;
+}
+
+/// ParseCatchRet
+/// ::= 'catchret' from Parent Value 'to' TypeAndValue
+bool LLParser::ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *CatchPad = nullptr;
+
+ if (ParseToken(lltok::kw_from, "expected 'from' after catchret"))
+ return true;
+
+ if (ParseValue(Type::getTokenTy(Context), CatchPad, PFS))
+ return true;
+
+ BasicBlock *BB;
+ if (ParseToken(lltok::kw_to, "expected 'to' in catchret") ||
+ ParseTypeAndBasicBlock(BB, PFS))
+ return true;
+
+ Inst = CatchReturnInst::Create(CatchPad, BB);
+ return false;
+}
+
+/// ParseCatchSwitch
+/// ::= 'catchswitch' within Parent
+bool LLParser::ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *ParentPad;
+ LocTy BBLoc;
+
+ if (ParseToken(lltok::kw_within, "expected 'within' after catchswitch"))
+ return true;
+
+ if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar &&
+ Lex.getKind() != lltok::LocalVarID)
+ return TokError("expected scope value for catchswitch");
+
+ if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS))
+ return true;
+
+ if (ParseToken(lltok::lsquare, "expected '[' with catchswitch labels"))
+ return true;
+
+ SmallVector<BasicBlock *, 32> Table;
+ do {
+ BasicBlock *DestBB;
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ Table.push_back(DestBB);
+ } while (EatIfPresent(lltok::comma));
+
+ if (ParseToken(lltok::rsquare, "expected ']' after catchswitch labels"))
+ return true;
+
+ if (ParseToken(lltok::kw_unwind,
+ "expected 'unwind' after catchswitch scope"))
+ return true;
+
+ BasicBlock *UnwindBB = nullptr;
+ if (EatIfPresent(lltok::kw_to)) {
+ if (ParseToken(lltok::kw_caller, "expected 'caller' in catchswitch"))
+ return true;
+ } else {
+ if (ParseTypeAndBasicBlock(UnwindBB, PFS))
+ return true;
+ }
+
+ auto *CatchSwitch =
+ CatchSwitchInst::Create(ParentPad, UnwindBB, Table.size());
+ for (BasicBlock *DestBB : Table)
+ CatchSwitch->addHandler(DestBB);
+ Inst = CatchSwitch;
+ return false;
+}
+
+/// ParseCatchPad
+/// ::= 'catchpad' ParamList 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *CatchSwitch = nullptr;
+
+ if (ParseToken(lltok::kw_within, "expected 'within' after catchpad"))
+ return true;
+
+ if (Lex.getKind() != lltok::LocalVar && Lex.getKind() != lltok::LocalVarID)
+ return TokError("expected scope value for catchpad");
+
+ if (ParseValue(Type::getTokenTy(Context), CatchSwitch, PFS))
+ return true;
+
+ SmallVector<Value *, 8> Args;
+ if (ParseExceptionArgs(Args, PFS))
+ return true;
+
+ Inst = CatchPadInst::Create(CatchSwitch, Args);
+ return false;
+}
+
+/// ParseCleanupPad
+/// ::= 'cleanuppad' within Parent ParamList
+bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
+ Value *ParentPad = nullptr;
+
+ if (ParseToken(lltok::kw_within, "expected 'within' after cleanuppad"))
+ return true;
+
+ if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar &&
+ Lex.getKind() != lltok::LocalVarID)
+ return TokError("expected scope value for cleanuppad");
+
+ if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS))
+ return true;
+
+ SmallVector<Value *, 8> Args;
+ if (ParseExceptionArgs(Args, PFS))
+ return true;
+
+ Inst = CleanupPadInst::Create(ParentPad, Args);
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Binary Operators.
//===----------------------------------------------------------------------===//
@@ -5196,12 +5609,14 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
}
/// ParseCall
-/// ::= 'call' OptionalCallingConv OptionalAttrs Type Value
-/// ParameterList OptionalAttrs
-/// ::= 'tail' 'call' OptionalCallingConv OptionalAttrs Type Value
-/// ParameterList OptionalAttrs
-/// ::= 'musttail' 'call' OptionalCallingConv OptionalAttrs Type Value
-/// ParameterList OptionalAttrs
+/// ::= 'call' OptionalFastMathFlags OptionalCallingConv
+/// OptionalAttrs Type Value ParameterList OptionalAttrs
+/// ::= 'tail' 'call' OptionalFastMathFlags OptionalCallingConv
+/// OptionalAttrs Type Value ParameterList OptionalAttrs
+/// ::= 'musttail' 'call' OptionalFastMathFlags OptionalCallingConv
+/// OptionalAttrs Type Value ParameterList OptionalAttrs
+/// ::= 'notail' 'call' OptionalFastMathFlags OptionalCallingConv
+/// OptionalAttrs Type Value ParameterList OptionalAttrs
bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
CallInst::TailCallKind TCK) {
AttrBuilder RetAttrs, FnAttrs;
@@ -5212,20 +5627,29 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
LocTy RetTypeLoc;
ValID CalleeID;
SmallVector<ParamInfo, 16> ArgList;
+ SmallVector<OperandBundleDef, 2> BundleList;
LocTy CallLoc = Lex.getLoc();
- if ((TCK != CallInst::TCK_None &&
- ParseToken(lltok::kw_call, "expected 'tail call'")) ||
- ParseOptionalCallingConv(CC) ||
- ParseOptionalReturnAttrs(RetAttrs) ||
+ if (TCK != CallInst::TCK_None &&
+ ParseToken(lltok::kw_call,
+ "expected 'tail call', 'musttail call', or 'notail call'"))
+ return true;
+
+ FastMathFlags FMF = EatFastMathFlagsIfPresent();
+
+ if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
ParseValID(CalleeID) ||
ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
PFS.getFunction().isVarArg()) ||
- ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
- BuiltinLoc))
+ ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc) ||
+ ParseOptionalOperandBundles(BundleList, PFS))
return true;
+ if (FMF.any() && !RetType->isFPOrFPVectorTy())
+ return Error(CallLoc, "fast-math-flags specified for call without "
+ "floating-point scalar or vector return type");
+
// If RetType is a non-function pointer type, then this is the short syntax
// for the call, which means that RetType is just the return type. Infer the
// rest of the function argument types from the arguments that are present.
@@ -5242,6 +5666,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
Ty = FunctionType::get(RetType, ParamTypes, false);
}
+ CalleeID.FTy = Ty;
+
// Look up the callee.
Value *Callee;
if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
@@ -5293,9 +5719,11 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// Finish off the Attribute and check them
AttributeSet PAL = AttributeSet::get(Context, Attrs);
- CallInst *CI = CallInst::Create(Ty, Callee, Args);
+ CallInst *CI = CallInst::Create(Ty, Callee, Args, BundleList);
CI->setTailCallKind(TCK);
CI->setCallingConv(CC);
+ if (FMF.any())
+ CI->setFastMathFlags(FMF);
CI->setAttributes(PAL);
ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
Inst = CI;
@@ -5614,7 +6042,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Indices.push_back(Val);
}
- SmallPtrSet<const Type*, 4> Visited;
+ SmallPtrSet<Type*, 4> Visited;
if (!Indices.empty() && !Ty->isSized(&Visited))
return Error(Loc, "base element of getelementptr must be sized");
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index 6e57b3e..f61a5e5 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -46,29 +46,32 @@ namespace llvm {
/// or a symbolic (%var) reference. This is just a discriminated union.
struct ValID {
enum {
- t_LocalID, t_GlobalID, // ID in UIntVal.
- t_LocalName, t_GlobalName, // Name in StrVal.
- t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal.
- t_Null, t_Undef, t_Zero, // No value.
- t_EmptyArray, // No value: []
- t_Constant, // Value in ConstantVal.
- t_InlineAsm, // Value in StrVal/StrVal2/UIntVal.
- t_ConstantStruct, // Value in ConstantStructElts.
- t_PackedConstantStruct // Value in ConstantStructElts.
- } Kind;
+ t_LocalID, t_GlobalID, // ID in UIntVal.
+ t_LocalName, t_GlobalName, // Name in StrVal.
+ t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal.
+ t_Null, t_Undef, t_Zero, t_None, // No value.
+ t_EmptyArray, // No value: []
+ t_Constant, // Value in ConstantVal.
+ t_InlineAsm, // Value in FTy/StrVal/StrVal2/UIntVal.
+ t_ConstantStruct, // Value in ConstantStructElts.
+ t_PackedConstantStruct // Value in ConstantStructElts.
+ } Kind = t_LocalID;
LLLexer::LocTy Loc;
unsigned UIntVal;
+ FunctionType *FTy = nullptr;
std::string StrVal, StrVal2;
APSInt APSIntVal;
- APFloat APFloatVal;
+ APFloat APFloatVal{0.0};
Constant *ConstantVal;
- Constant **ConstantStructElts;
-
- ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
- ~ValID() {
- if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
- delete [] ConstantStructElts;
+ std::unique_ptr<Constant *[]> ConstantStructElts;
+
+ ValID() = default;
+ ValID(const ValID &RHS)
+ : Kind(RHS.Kind), Loc(RHS.Loc), UIntVal(RHS.UIntVal), FTy(RHS.FTy),
+ StrVal(RHS.StrVal), StrVal2(RHS.StrVal2), APSIntVal(RHS.APSIntVal),
+ APFloatVal(RHS.APFloatVal), ConstantVal(RHS.ConstantVal) {
+ assert(!RHS.ConstantStructElts);
}
bool operator<(const ValID &RHS) const {
@@ -143,6 +146,8 @@ namespace llvm {
Slots(Slots), BlockAddressPFS(nullptr) {}
bool Run();
+ bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
+
LLVMContext &getContext() { return Context; }
private:
@@ -154,6 +159,10 @@ namespace llvm {
return Error(Lex.getLoc(), Msg);
}
+ /// Restore the internal name and slot mappings using the mappings that
+ /// were created at an earlier parsing stage.
+ void restoreParsingState(const SlotMapping *Slots);
+
/// GetGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
@@ -210,6 +219,8 @@ namespace llvm {
return ParseUInt64(Val);
}
+ bool ParseStringAttribute(AttrBuilder &B);
+
bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
bool parseOptionalUnnamedAddr(bool &UnnamedAddr) {
@@ -343,10 +354,12 @@ namespace llvm {
bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS);
+ bool parseConstantValue(Type *Ty, Constant *&C);
bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
return ParseValue(Ty, V, &PFS);
}
+
bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
PerFunctionState &PFS) {
Loc = Lex.getLoc();
@@ -381,6 +394,13 @@ namespace llvm {
bool IsMustTailCall = false,
bool InVarArgsFunc = false);
+ bool
+ ParseOptionalOperandBundles(SmallVectorImpl<OperandBundleDef> &BundleList,
+ PerFunctionState &PFS);
+
+ bool ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
+ PerFunctionState &PFS);
+
// Constant Parsing.
bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
bool ParseGlobalValue(Type *Ty, Constant *&V);
@@ -441,6 +461,11 @@ namespace llvm {
bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
unsigned OperandType);
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index 691f085..29a7f16 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -49,10 +49,14 @@ namespace lltok {
kw_external, kw_thread_local,
kw_localdynamic, kw_initialexec, kw_localexec,
kw_zeroinitializer,
- kw_undef, kw_null,
+ kw_undef, kw_null, kw_none,
kw_to,
+ kw_caller,
+ kw_within,
+ kw_from,
kw_tail,
kw_musttail,
+ kw_notail,
kw_target,
kw_triple,
kw_unwind,
@@ -96,6 +100,9 @@ namespace lltok {
kw_webkit_jscc, kw_anyregcc,
kw_preserve_mostcc, kw_preserve_allcc,
kw_ghccc,
+ kw_x86_intrcc,
+ kw_hhvmcc, kw_hhvm_ccc,
+ kw_cxx_fast_tlscc,
// Attributes:
kw_attributes,
@@ -109,6 +116,8 @@ namespace lltok {
kw_convergent,
kw_dereferenceable,
kw_dereferenceable_or_null,
+ kw_inaccessiblememonly,
+ kw_inaccessiblemem_or_argmemonly,
kw_inlinehint,
kw_inreg,
kw_jumptable,
@@ -121,6 +130,7 @@ namespace lltok {
kw_noduplicate,
kw_noimplicitfloat,
kw_noinline,
+ kw_norecurse,
kw_nonlazybind,
kw_nonnull,
kw_noredzone,
@@ -177,7 +187,8 @@ namespace lltok {
kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume,
- kw_unreachable,
+ kw_unreachable, kw_cleanupret, kw_catchswitch, kw_catchret, kw_catchpad,
+ kw_cleanuppad,
kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
kw_getelementptr,
@@ -209,6 +220,7 @@ namespace lltok {
DwarfLang, // DW_LANG_foo
DwarfOp, // DW_OP_foo
DIFlag, // DIFlagFoo
+ DwarfMacinfo, // DW_MACINFO_foo
// Type valued tokens (TyVal).
Type,
diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp
index 9145a54..4e55e62 100644
--- a/contrib/llvm/lib/AsmParser/Parser.cpp
+++ b/contrib/llvm/lib/AsmParser/Parser.cpp
@@ -66,3 +66,15 @@ std::unique_ptr<Module> llvm::parseAssemblyString(StringRef AsmString,
MemoryBufferRef F(AsmString, "<string>");
return parseAssembly(F, Err, Context, Slots);
}
+
+Constant *llvm::parseConstantValue(StringRef Asm, SMDiagnostic &Err,
+ const Module &M, const SlotMapping *Slots) {
+ SourceMgr SM;
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Asm);
+ SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
+ Constant *C;
+ if (LLParser(Asm, SM, Err, const_cast<Module *>(&M))
+ .parseStandaloneConstantValue(C, Slots))
+ return nullptr;
+ return C;
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
index 289c76e..385c18a 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/BitReader.h"
+#include "llvm-c/Core.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
@@ -22,12 +23,25 @@ using namespace llvm;
/* Builds a module from the bitcode in the specified memory buffer, returning a
reference to the module via the OutModule parameter. Returns 0 on success.
Optionally returns a human-readable error message via OutMessage. */
-LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
- LLVMModuleRef *OutModule, char **OutMessage) {
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule,
+ char **OutMessage) {
return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
OutMessage);
}
+LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule) {
+ return LLVMParseBitcodeInContext2(wrap(&getGlobalContext()), MemBuf,
+ OutModule);
+}
+
+static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
+ auto *Message = reinterpret_cast<std::string *>(C);
+ raw_string_ostream Stream(*Message);
+ DiagnosticPrinterRawOStream DP(Stream);
+ DI.print(DP);
+}
+
LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutModule,
@@ -35,18 +49,36 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
LLVMContext &Ctx = *unwrap(ContextRef);
+ LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+ Ctx.getDiagnosticHandler();
+ void *OldDiagnosticContext = Ctx.getDiagnosticContext();
std::string Message;
- raw_string_ostream Stream(Message);
- DiagnosticPrinterRawOStream DP(Stream);
+ Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
+
+ ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
+
+ Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
- ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(
- Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); });
if (ModuleOrErr.getError()) {
- if (OutMessage) {
- Stream.flush();
+ if (OutMessage)
*OutMessage = strdup(Message.c_str());
- }
- *OutModule = wrap((Module*)nullptr);
+ *OutModule = wrap((Module *)nullptr);
+ return 1;
+ }
+
+ *OutModule = wrap(ModuleOrErr.get().release());
+ return 0;
+}
+
+LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutModule) {
+ MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
+ LLVMContext &Ctx = *unwrap(ContextRef);
+
+ ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
+ if (ModuleOrErr.getError()) {
+ *OutModule = wrap((Module *)nullptr);
return 1;
}
@@ -59,26 +91,50 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
Optionally returns a human-readable error message via OutMessage. */
LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
- LLVMModuleRef *OutM,
- char **OutMessage) {
+ LLVMModuleRef *OutM, char **OutMessage) {
+ LLVMContext &Ctx = *unwrap(ContextRef);
+ LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+ Ctx.getDiagnosticHandler();
+ void *OldDiagnosticContext = Ctx.getDiagnosticContext();
+
std::string Message;
+ Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
- getLazyBitcodeModule(std::move(Owner), *unwrap(ContextRef));
+ getLazyBitcodeModule(std::move(Owner), Ctx);
Owner.release();
+ Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
- if (std::error_code EC = ModuleOrErr.getError()) {
+ if (ModuleOrErr.getError()) {
*OutM = wrap((Module *)nullptr);
if (OutMessage)
- *OutMessage = strdup(EC.message().c_str());
+ *OutMessage = strdup(Message.c_str());
return 1;
}
*OutM = wrap(ModuleOrErr.get().release());
return 0;
+}
+
+LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM) {
+ LLVMContext &Ctx = *unwrap(ContextRef);
+ std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
+
+ ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
+ getLazyBitcodeModule(std::move(Owner), Ctx);
+ Owner.release();
+ if (ModuleOrErr.getError()) {
+ *OutM = wrap((Module *)nullptr);
+ return 1;
+ }
+
+ *OutM = wrap(ModuleOrErr.get().release());
+ return 0;
}
LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
@@ -87,20 +143,7 @@ LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
OutMessage);
}
-/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
-LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
- LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage) {
- return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
- reinterpret_cast<LLVMModuleRef*>(OutMP),
- OutMessage);
-}
-
-/* Deprecated: Use LLVMGetBitcodeModule instead. */
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage) {
- return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
- OutMP, OutMessage);
+LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM) {
+ return LLVMGetBitcodeModuleInContext2(LLVMGetGlobalContext(), MemBuf, OutM);
}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c04e8b9..2e670d5 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/DataStream.h"
#include "llvm/Support/ManagedStatic.h"
@@ -93,35 +94,35 @@ public:
void resolveConstantForwardRefs();
};
-class BitcodeReaderMDValueList {
+class BitcodeReaderMetadataList {
unsigned NumFwdRefs;
bool AnyFwdRefs;
unsigned MinFwdRef;
unsigned MaxFwdRef;
- std::vector<TrackingMDRef> MDValuePtrs;
+ std::vector<TrackingMDRef> MetadataPtrs;
LLVMContext &Context;
public:
- BitcodeReaderMDValueList(LLVMContext &C)
+ BitcodeReaderMetadataList(LLVMContext &C)
: NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
// vector compatibility methods
- unsigned size() const { return MDValuePtrs.size(); }
- void resize(unsigned N) { MDValuePtrs.resize(N); }
- void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); }
- void clear() { MDValuePtrs.clear(); }
- Metadata *back() const { return MDValuePtrs.back(); }
- void pop_back() { MDValuePtrs.pop_back(); }
- bool empty() const { return MDValuePtrs.empty(); }
+ unsigned size() const { return MetadataPtrs.size(); }
+ void resize(unsigned N) { MetadataPtrs.resize(N); }
+ void push_back(Metadata *MD) { MetadataPtrs.emplace_back(MD); }
+ void clear() { MetadataPtrs.clear(); }
+ Metadata *back() const { return MetadataPtrs.back(); }
+ void pop_back() { MetadataPtrs.pop_back(); }
+ bool empty() const { return MetadataPtrs.empty(); }
Metadata *operator[](unsigned i) const {
- assert(i < MDValuePtrs.size());
- return MDValuePtrs[i];
+ assert(i < MetadataPtrs.size());
+ return MetadataPtrs[i];
}
void shrinkTo(unsigned N) {
assert(N <= size() && "Invalid shrinkTo request!");
- MDValuePtrs.resize(N);
+ MetadataPtrs.resize(N);
}
Metadata *getValueFwdRef(unsigned Idx);
@@ -131,17 +132,27 @@ public:
class BitcodeReader : public GVMaterializer {
LLVMContext &Context;
- DiagnosticHandlerFunction DiagnosticHandler;
Module *TheModule = nullptr;
std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<BitstreamReader> StreamFile;
BitstreamCursor Stream;
+ // Next offset to start scanning for lazy parsing of function bodies.
uint64_t NextUnreadBit = 0;
+ // Last function offset found in the VST.
+ uint64_t LastFunctionBlockBit = 0;
bool SeenValueSymbolTable = false;
+ uint64_t VSTOffset = 0;
+ // Contains an arbitrary and optional string identifying the bitcode producer
+ std::string ProducerIdentification;
+ // Number of module level metadata records specified by the
+ // MODULE_CODE_METADATA_VALUES record.
+ unsigned NumModuleMDs = 0;
+ // Support older bitcode without the MODULE_CODE_METADATA_VALUES record.
+ bool SeenModuleValuesRecord = false;
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
- BitcodeReaderMDValueList MDValueList;
+ BitcodeReaderMetadataList MetadataList;
std::vector<Comdat *> ComdatList;
SmallVector<Instruction *, 64> InstructionList;
@@ -157,7 +168,7 @@ class BitcodeReader : public GVMaterializer {
/// is thus not represented here. As such all indices are off by one.
std::vector<AttributeSet> MAttributes;
- /// \brief The set of attribute groups.
+ /// The set of attribute groups.
std::map<unsigned, AttributeSet> MAttributeGroups;
/// While parsing a function body, this is a list of the basic blocks for the
@@ -208,23 +219,24 @@ class BitcodeReader : public GVMaterializer {
/// (e.g.) blockaddress forward references.
bool WillMaterializeAllForwardRefs = false;
- /// Functions that have block addresses taken. This is usually empty.
- SmallPtrSet<const Function *, 4> BlockAddressesTaken;
-
/// True if any Metadata block has been materialized.
bool IsMetadataMaterialized = false;
bool StripDebugInfo = false;
+ /// Functions that need to be matched with subprograms when upgrading old
+ /// metadata.
+ SmallDenseMap<Function *, DISubprogram *, 16> FunctionsWithSPs;
+
+ std::vector<std::string> BundleTags;
+
public:
std::error_code error(BitcodeError E, const Twine &Message);
std::error_code error(BitcodeError E);
std::error_code error(const Twine &Message);
- BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler);
- BitcodeReader(LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler);
+ BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context);
+ BitcodeReader(LLVMContext &Context);
~BitcodeReader() override { freeState(); }
std::error_code materializeForwardReferencedFunctions();
@@ -233,11 +245,9 @@ public:
void releaseBuffer();
- bool isDematerializable(const GlobalValue *GV) const override;
std::error_code materialize(GlobalValue *GV) override;
- std::error_code materializeModule(Module *M) override;
+ std::error_code materializeModule() override;
std::vector<StructType *> getIdentifiedStructTypes() const override;
- void dematerialize(GlobalValue *GV) override;
/// \brief Main interface to parsing a bitcode buffer.
/// \returns true if an error occurred.
@@ -249,6 +259,9 @@ public:
/// \returns true if an error occurred.
ErrorOr<std::string> parseTriple();
+ /// Cheap mechanism to just extract the identification block out of bitcode.
+ ErrorOr<std::string> parseIdentificationBlock();
+
static uint64_t decodeSignRotatedValue(uint64_t V);
/// Materialize any deferred Metadata block.
@@ -256,7 +269,20 @@ public:
void setStripDebugInfo() override;
+ /// Save the mapping between the metadata values and the corresponding
+ /// value id that were recorded in the MetadataList during parsing. If
+ /// OnlyTempMD is true, then only record those entries that are still
+ /// temporary metadata. This interface is used when metadata linking is
+ /// performed as a postpass, such as during function importing.
+ void saveMetadataList(DenseMap<const Metadata *, unsigned> &MetadataToIDs,
+ bool OnlyTempMD) override;
+
private:
+ /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the
+ // ProducerIdentification data member, and do some basic enforcement on the
+ // "epoch" encoded in the bitcode.
+ std::error_code parseBitcodeVersion();
+
std::vector<StructType *> IdentifiedStructTypes;
StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
StructType *createIdentifiedStructType(LLVMContext &Context);
@@ -268,7 +294,7 @@ private:
return ValueList.getValueFwdRef(ID, Ty);
}
Metadata *getFnMetadataByID(unsigned ID) {
- return MDValueList.getValueFwdRef(ID);
+ return MetadataList.getValueFwdRef(ID);
}
BasicBlock *getBasicBlock(unsigned ID) const {
if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
@@ -351,21 +377,28 @@ private:
/// a corresponding error code.
std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
- std::error_code parseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
+ std::error_code parseModule(uint64_t ResumeBit,
+ bool ShouldLazyLoadMetadata = false);
std::error_code parseAttributeBlock();
std::error_code parseAttributeGroupBlock();
std::error_code parseTypeTable();
std::error_code parseTypeTableBody();
+ std::error_code parseOperandBundleTags();
- std::error_code parseValueSymbolTable();
+ ErrorOr<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
+ unsigned NameIndex, Triple &TT);
+ std::error_code parseValueSymbolTable(uint64_t Offset = 0);
std::error_code parseConstants();
+ std::error_code rememberAndSkipFunctionBodies();
std::error_code rememberAndSkipFunctionBody();
/// Save the positions of the Metadata blocks and skip parsing the blocks.
std::error_code rememberAndSkipMetadata();
std::error_code parseFunctionBody(Function *F);
std::error_code globalCleanup();
std::error_code resolveGlobalAndAliasInits();
- std::error_code parseMetadata();
+ std::error_code parseMetadata(bool ModuleLevel = false);
+ std::error_code parseMetadataKinds();
+ std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
std::error_code parseMetadataAttachment(Function &F);
ErrorOr<std::string> parseModuleTriple();
std::error_code parseUseLists();
@@ -376,6 +409,94 @@ private:
Function *F,
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
};
+
+/// Class to manage reading and parsing function summary index bitcode
+/// files/sections.
+class FunctionIndexBitcodeReader {
+ DiagnosticHandlerFunction DiagnosticHandler;
+
+ /// Eventually points to the function index built during parsing.
+ FunctionInfoIndex *TheIndex = nullptr;
+
+ std::unique_ptr<MemoryBuffer> Buffer;
+ std::unique_ptr<BitstreamReader> StreamFile;
+ BitstreamCursor Stream;
+
+ /// \brief Used to indicate whether we are doing lazy parsing of summary data.
+ ///
+ /// If false, the summary section is fully parsed into the index during
+ /// the initial parse. Otherwise, if true, the caller is expected to
+ /// invoke \a readFunctionSummary for each summary needed, and the summary
+ /// section is thus parsed lazily.
+ bool IsLazy = false;
+
+ /// Used to indicate whether caller only wants to check for the presence
+ /// of the function summary bitcode section. All blocks are skipped,
+ /// but the SeenFuncSummary boolean is set.
+ bool CheckFuncSummaryPresenceOnly = false;
+
+ /// Indicates whether we have encountered a function summary section
+ /// yet during parsing, used when checking if file contains function
+ /// summary section.
+ bool SeenFuncSummary = false;
+
+ /// \brief Map populated during function summary section parsing, and
+ /// consumed during ValueSymbolTable parsing.
+ ///
+ /// Used to correlate summary records with VST entries. For the per-module
+ /// index this maps the ValueID to the parsed function summary, and
+ /// for the combined index this maps the summary record's bitcode
+ /// offset to the function summary (since in the combined index the
+ /// VST records do not hold value IDs but rather hold the function
+ /// summary record offset).
+ DenseMap<uint64_t, std::unique_ptr<FunctionSummary>> SummaryMap;
+
+ /// Map populated during module path string table parsing, from the
+ /// module ID to a string reference owned by the index's module
+ /// path string table, used to correlate with combined index function
+ /// summary records.
+ DenseMap<uint64_t, StringRef> ModuleIdMap;
+
+public:
+ std::error_code error(BitcodeError E, const Twine &Message);
+ std::error_code error(BitcodeError E);
+ std::error_code error(const Twine &Message);
+
+ FunctionIndexBitcodeReader(MemoryBuffer *Buffer,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy = false,
+ bool CheckFuncSummaryPresenceOnly = false);
+ FunctionIndexBitcodeReader(DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy = false,
+ bool CheckFuncSummaryPresenceOnly = false);
+ ~FunctionIndexBitcodeReader() { freeState(); }
+
+ void freeState();
+
+ void releaseBuffer();
+
+ /// Check if the parser has encountered a function summary section.
+ bool foundFuncSummary() { return SeenFuncSummary; }
+
+ /// \brief Main interface to parsing a bitcode buffer.
+ /// \returns true if an error occurred.
+ std::error_code parseSummaryIndexInto(std::unique_ptr<DataStreamer> Streamer,
+ FunctionInfoIndex *I);
+
+ /// \brief Interface for parsing a function summary lazily.
+ std::error_code parseFunctionSummary(std::unique_ptr<DataStreamer> Streamer,
+ FunctionInfoIndex *I,
+ size_t FunctionSummaryOffset);
+
+private:
+ std::error_code parseModule();
+ std::error_code parseValueSymbolTable();
+ std::error_code parseEntireSummary();
+ std::error_code parseModuleStringTable();
+ std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
+ std::error_code initStreamFromBuffer();
+ std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
+};
} // namespace
BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
@@ -397,43 +518,51 @@ static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
return error(DiagnosticHandler, EC, EC.message());
}
-static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
+static std::error_code error(LLVMContext &Context, std::error_code EC,
const Twine &Message) {
- return error(DiagnosticHandler,
- make_error_code(BitcodeError::CorruptedBitcode), Message);
+ return error([&](const DiagnosticInfo &DI) { Context.diagnose(DI); }, EC,
+ Message);
+}
+
+static std::error_code error(LLVMContext &Context, std::error_code EC) {
+ return error(Context, EC, EC.message());
+}
+
+static std::error_code error(LLVMContext &Context, const Twine &Message) {
+ return error(Context, make_error_code(BitcodeError::CorruptedBitcode),
+ Message);
}
std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
- return ::error(DiagnosticHandler, make_error_code(E), Message);
+ if (!ProducerIdentification.empty()) {
+ return ::error(Context, make_error_code(E),
+ Message + " (Producer: '" + ProducerIdentification +
+ "' Reader: 'LLVM " + LLVM_VERSION_STRING "')");
+ }
+ return ::error(Context, make_error_code(E), Message);
}
std::error_code BitcodeReader::error(const Twine &Message) {
- return ::error(DiagnosticHandler,
- make_error_code(BitcodeError::CorruptedBitcode), Message);
+ if (!ProducerIdentification.empty()) {
+ return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode),
+ Message + " (Producer: '" + ProducerIdentification +
+ "' Reader: 'LLVM " + LLVM_VERSION_STRING "')");
+ }
+ return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode),
+ Message);
}
std::error_code BitcodeReader::error(BitcodeError E) {
- return ::error(DiagnosticHandler, make_error_code(E));
+ return ::error(Context, make_error_code(E));
}
-static DiagnosticHandlerFunction getDiagHandler(DiagnosticHandlerFunction F,
- LLVMContext &C) {
- if (F)
- return F;
- return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); };
-}
-
-BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler)
- : Context(Context),
- DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
- Buffer(Buffer), ValueList(Context), MDValueList(Context) {}
+BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context)
+ : Context(Context), Buffer(Buffer), ValueList(Context),
+ MetadataList(Context) {}
-BitcodeReader::BitcodeReader(LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler)
- : Context(Context),
- DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
- Buffer(nullptr), ValueList(Context), MDValueList(Context) {}
+BitcodeReader::BitcodeReader(LLVMContext &Context)
+ : Context(Context), Buffer(nullptr), ValueList(Context),
+ MetadataList(Context) {}
std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
if (WillMaterializeAllForwardRefs)
@@ -472,7 +601,7 @@ void BitcodeReader::freeState() {
Buffer = nullptr;
std::vector<Type*>().swap(TypeList);
ValueList.clear();
- MDValueList.clear();
+ MetadataList.clear();
std::vector<Comdat *>().swap(ComdatList);
std::vector<AttributeSet>().swap(MAttributes);
@@ -779,6 +908,8 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
OldV->replaceAllUsesWith(V);
delete PrevVal;
}
+
+ return;
}
@@ -904,7 +1035,7 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() {
}
}
-void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
+void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
if (Idx == size()) {
push_back(MD);
return;
@@ -913,7 +1044,7 @@ void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
if (Idx >= size())
resize(Idx+1);
- TrackingMDRef &OldMD = MDValuePtrs[Idx];
+ TrackingMDRef &OldMD = MetadataPtrs[Idx];
if (!OldMD) {
OldMD.reset(MD);
return;
@@ -925,11 +1056,11 @@ void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
--NumFwdRefs;
}
-Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+Metadata *BitcodeReaderMetadataList::getValueFwdRef(unsigned Idx) {
if (Idx >= size())
resize(Idx + 1);
- if (Metadata *MD = MDValuePtrs[Idx])
+ if (Metadata *MD = MetadataPtrs[Idx])
return MD;
// Track forward refs to be resolved later.
@@ -944,11 +1075,11 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
// Create and return a placeholder, which will later be RAUW'd.
Metadata *MD = MDNode::getTemporary(Context, None).release();
- MDValuePtrs[Idx].reset(MD);
+ MetadataPtrs[Idx].reset(MD);
return MD;
}
-void BitcodeReaderMDValueList::tryToResolveCycles() {
+void BitcodeReaderMetadataList::tryToResolveCycles() {
if (!AnyFwdRefs)
// Nothing to do.
return;
@@ -959,7 +1090,7 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
// Resolve any cycles.
for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
- auto &MD = MDValuePtrs[I];
+ auto &MD = MetadataPtrs[I];
auto *N = dyn_cast_or_null<MDNode>(MD);
if (!N)
continue;
@@ -1102,6 +1233,10 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Cold;
case bitc::ATTR_KIND_CONVERGENT:
return Attribute::Convergent;
+ case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY:
+ return Attribute::InaccessibleMemOnly;
+ case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY:
+ return Attribute::InaccessibleMemOrArgMemOnly;
case bitc::ATTR_KIND_INLINE_HINT:
return Attribute::InlineHint;
case bitc::ATTR_KIND_IN_REG:
@@ -1126,6 +1261,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::NoImplicitFloat;
case bitc::ATTR_KIND_NO_INLINE:
return Attribute::NoInline;
+ case bitc::ATTR_KIND_NO_RECURSE:
+ return Attribute::NoRecurse;
case bitc::ATTR_KIND_NON_LAZY_BIND:
return Attribute::NonLazyBind;
case bitc::ATTR_KIND_NON_NULL:
@@ -1360,6 +1497,9 @@ std::error_code BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_X86_MMX: // X86_MMX
ResultTy = Type::getX86_MMXTy(Context);
break;
+ case bitc::TYPE_CODE_TOKEN: // TOKEN
+ ResultTy = Type::getTokenTy(Context);
+ break;
case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
if (Record.size() < 1)
return error("Invalid record");
@@ -1524,7 +1664,107 @@ std::error_code BitcodeReader::parseTypeTableBody() {
}
}
-std::error_code BitcodeReader::parseValueSymbolTable() {
+std::error_code BitcodeReader::parseOperandBundleTags() {
+ if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
+ return error("Invalid record");
+
+ if (!BundleTags.empty())
+ return error("Invalid multiple blocks");
+
+ SmallVector<uint64_t, 64> Record;
+
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Tags are implicitly mapped to integers by their order.
+
+ if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG)
+ return error("Invalid record");
+
+ // OPERAND_BUNDLE_TAG: [strchr x N]
+ BundleTags.emplace_back();
+ if (convertToString(Record, 0, BundleTags.back()))
+ return error("Invalid record");
+ Record.clear();
+ }
+}
+
+/// Associate a value with its name from the given index in the provided record.
+ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
+ unsigned NameIndex, Triple &TT) {
+ SmallString<128> ValueName;
+ if (convertToString(Record, NameIndex, ValueName))
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size() || !ValueList[ValueID])
+ return error("Invalid record");
+ Value *V = ValueList[ValueID];
+
+ StringRef NameStr(ValueName.data(), ValueName.size());
+ if (NameStr.find_first_of(0) != StringRef::npos)
+ return error("Invalid value name");
+ V->setName(NameStr);
+ auto *GO = dyn_cast<GlobalObject>(V);
+ if (GO) {
+ if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
+ if (TT.isOSBinFormatMachO())
+ GO->setComdat(nullptr);
+ else
+ GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
+ }
+ }
+ return V;
+}
+
+/// Parse the value symbol table at either the current parsing location or
+/// at the given bit offset if provided.
+std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
+ uint64_t CurrentBit;
+ // Pass in the Offset to distinguish between calling for the module-level
+ // VST (where we want to jump to the VST offset) and the function-level
+ // VST (where we don't).
+ if (Offset > 0) {
+ // Save the current parsing location so we can jump back at the end
+ // of the VST read.
+ CurrentBit = Stream.GetCurrentBitNo();
+ Stream.JumpToBit(Offset * 32);
+#ifndef NDEBUG
+ // Do some checking if we are in debug mode.
+ BitstreamEntry Entry = Stream.advance();
+ assert(Entry.Kind == BitstreamEntry::SubBlock);
+ assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
+#else
+ // In NDEBUG mode ignore the output so we don't get an unused variable
+ // warning.
+ Stream.advance();
+#endif
+ }
+
+ // Compute the delta between the bitcode indices in the VST (the word offset
+ // to the word-aligned ENTER_SUBBLOCK for the function block, and that
+ // expected by the lazy reader. The reader's EnterSubBlock expects to have
+ // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID
+ // (size BlockIDWidth). Note that we access the stream's AbbrevID width here
+ // just before entering the VST subblock because: 1) the EnterSubBlock
+ // changes the AbbrevID width; 2) the VST block is nested within the same
+ // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same
+ // AbbrevID width before calling EnterSubBlock; and 3) when we want to
+ // jump to the FUNCTION_BLOCK using this offset later, we don't want
+ // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK.
+ unsigned FuncBitcodeOffsetDelta =
+ Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
+
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
return error("Invalid record");
@@ -1542,6 +1782,8 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
+ if (Offset > 0)
+ Stream.JumpToBit(CurrentBit);
return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
@@ -1554,23 +1796,39 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
default: // Default behavior: unknown type.
break;
case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
- if (convertToString(Record, 1, ValueName))
- return error("Invalid record");
- unsigned ValueID = Record[0];
- if (ValueID >= ValueList.size() || !ValueList[ValueID])
- return error("Invalid record");
- Value *V = ValueList[ValueID];
-
- V->setName(StringRef(ValueName.data(), ValueName.size()));
- if (auto *GO = dyn_cast<GlobalObject>(V)) {
- if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
- if (TT.isOSBinFormatMachO())
- GO->setComdat(nullptr);
- else
- GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
- }
+ ErrorOr<Value *> ValOrErr = recordValue(Record, 1, TT);
+ if (std::error_code EC = ValOrErr.getError())
+ return EC;
+ ValOrErr.get();
+ break;
+ }
+ case bitc::VST_CODE_FNENTRY: {
+ // VST_FNENTRY: [valueid, offset, namechar x N]
+ ErrorOr<Value *> ValOrErr = recordValue(Record, 2, TT);
+ if (std::error_code EC = ValOrErr.getError())
+ return EC;
+ Value *V = ValOrErr.get();
+
+ auto *GO = dyn_cast<GlobalObject>(V);
+ if (!GO) {
+ // If this is an alias, need to get the actual Function object
+ // it aliases, in order to set up the DeferredFunctionInfo entry below.
+ auto *GA = dyn_cast<GlobalAlias>(V);
+ if (GA)
+ GO = GA->getBaseObject();
+ assert(GO);
}
- ValueName.clear();
+
+ uint64_t FuncWordOffset = Record[1];
+ Function *F = dyn_cast<Function>(GO);
+ assert(F);
+ uint64_t FuncBitOffset = FuncWordOffset * 32;
+ DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
+ // Set the LastFunctionBlockBit to point to the last function block.
+ // Later when parsing is resumed after function materialization,
+ // we can simply skip that last function block.
+ if (FuncBitOffset > LastFunctionBlockBit)
+ LastFunctionBlockBit = FuncBitOffset;
break;
}
case bitc::VST_CODE_BBENTRY: {
@@ -1588,19 +1846,51 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
}
}
+/// Parse a single METADATA_KIND record, inserting result in MDKindMap.
+std::error_code
+BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) {
+ if (Record.size() < 2)
+ return error("Invalid record");
+
+ unsigned Kind = Record[0];
+ SmallString<8> Name(Record.begin() + 1, Record.end());
+
+ unsigned NewKind = TheModule->getMDKindID(Name.str());
+ if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+ return error("Conflicting METADATA_KIND records");
+ return std::error_code();
+}
+
static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
-std::error_code BitcodeReader::parseMetadata() {
+/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
+/// module level metadata.
+std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
IsMetadataMaterialized = true;
- unsigned NextMDValueNo = MDValueList.size();
+ unsigned NextMetadataNo = MetadataList.size();
+ if (ModuleLevel && SeenModuleValuesRecord) {
+ // Now that we are parsing the module level metadata, we want to restart
+ // the numbering of the MD values, and replace temp MD created earlier
+ // with their real values. If we saw a METADATA_VALUE record then we
+ // would have set the MetadataList size to the number specified in that
+ // record, to support parsing function-level metadata first, and we need
+ // to reset back to 0 to fill the MetadataList in with the parsed module
+ // The function-level metadata parsing should have reset the MetadataList
+ // size back to the value reported by the METADATA_VALUE record, saved in
+ // NumModuleMDs.
+ assert(NumModuleMDs == MetadataList.size() &&
+ "Expected MetadataList to only contain module level values");
+ NextMetadataNo = 0;
+ }
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
return error("Invalid record");
SmallVector<uint64_t, 64> Record;
- auto getMD =
- [&](unsigned ID) -> Metadata *{ return MDValueList.getValueFwdRef(ID); };
+ auto getMD = [&](unsigned ID) -> Metadata * {
+ return MetadataList.getValueFwdRef(ID);
+ };
auto getMDOrNull = [&](unsigned ID) -> Metadata *{
if (ID)
return getMD(ID - 1);
@@ -1624,7 +1914,10 @@ std::error_code BitcodeReader::parseMetadata() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- MDValueList.tryToResolveCycles();
+ MetadataList.tryToResolveCycles();
+ assert((!(ModuleLevel && SeenModuleValuesRecord) ||
+ NumModuleMDs == MetadataList.size()) &&
+ "Inconsistent bitcode: METADATA_VALUES mismatch");
return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
@@ -1652,7 +1945,8 @@ std::error_code BitcodeReader::parseMetadata() {
unsigned Size = Record.size();
NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
for (unsigned i = 0; i != Size; ++i) {
- MDNode *MD = dyn_cast_or_null<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+ MDNode *MD =
+ dyn_cast_or_null<MDNode>(MetadataList.getValueFwdRef(Record[i]));
if (!MD)
return error("Invalid record");
NMD->addOperand(MD);
@@ -1669,7 +1963,7 @@ std::error_code BitcodeReader::parseMetadata() {
// If this isn't a LocalAsMetadata record, we're dropping it. This used
// to be legal, but there's no upgrade path.
auto dropRecord = [&] {
- MDValueList.assignValue(MDNode::get(Context, None), NextMDValueNo++);
+ MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
};
if (Record.size() != 2) {
dropRecord();
@@ -1682,9 +1976,9 @@ std::error_code BitcodeReader::parseMetadata() {
break;
}
- MDValueList.assignValue(
+ MetadataList.assignValue(
LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_OLD_NODE: {
@@ -1699,7 +1993,7 @@ std::error_code BitcodeReader::parseMetadata() {
if (!Ty)
return error("Invalid record");
if (Ty->isMetadataTy())
- Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+ Elts.push_back(MetadataList.getValueFwdRef(Record[i + 1]));
else if (!Ty->isVoidTy()) {
auto *MD =
ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty));
@@ -1709,7 +2003,7 @@ std::error_code BitcodeReader::parseMetadata() {
} else
Elts.push_back(nullptr);
}
- MDValueList.assignValue(MDNode::get(Context, Elts), NextMDValueNo++);
+ MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
break;
}
case bitc::METADATA_VALUE: {
@@ -1720,9 +2014,9 @@ std::error_code BitcodeReader::parseMetadata() {
if (Ty->isMetadataTy() || Ty->isVoidTy())
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_DISTINCT_NODE:
@@ -1732,10 +2026,10 @@ std::error_code BitcodeReader::parseMetadata() {
SmallVector<Metadata *, 8> Elts;
Elts.reserve(Record.size());
for (unsigned ID : Record)
- Elts.push_back(ID ? MDValueList.getValueFwdRef(ID - 1) : nullptr);
- MDValueList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
- : MDNode::get(Context, Elts),
- NextMDValueNo++);
+ Elts.push_back(ID ? MetadataList.getValueFwdRef(ID - 1) : nullptr);
+ MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
+ : MDNode::get(Context, Elts),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_LOCATION: {
@@ -1744,13 +2038,13 @@ std::error_code BitcodeReader::parseMetadata() {
unsigned Line = Record[1];
unsigned Column = Record[2];
- MDNode *Scope = cast<MDNode>(MDValueList.getValueFwdRef(Record[3]));
+ MDNode *Scope = cast<MDNode>(MetadataList.getValueFwdRef(Record[3]));
Metadata *InlinedAt =
- Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr;
- MDValueList.assignValue(
+ Record[4] ? MetadataList.getValueFwdRef(Record[4] - 1) : nullptr;
+ MetadataList.assignValue(
GET_OR_DISTINCT(DILocation, Record[0],
(Context, Line, Column, Scope, InlinedAt)),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_GENERIC_DEBUG: {
@@ -1766,63 +2060,65 @@ std::error_code BitcodeReader::parseMetadata() {
auto *Header = getMDString(Record[3]);
SmallVector<Metadata *, 8> DwarfOps;
for (unsigned I = 4, E = Record.size(); I != E; ++I)
- DwarfOps.push_back(Record[I] ? MDValueList.getValueFwdRef(Record[I] - 1)
- : nullptr);
- MDValueList.assignValue(GET_OR_DISTINCT(GenericDINode, Record[0],
- (Context, Tag, Header, DwarfOps)),
- NextMDValueNo++);
+ DwarfOps.push_back(
+ Record[I] ? MetadataList.getValueFwdRef(Record[I] - 1) : nullptr);
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(GenericDINode, Record[0],
+ (Context, Tag, Header, DwarfOps)),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_SUBRANGE: {
if (Record.size() != 3)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DISubrange, Record[0],
(Context, Record[1], unrotateSign(Record[2]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_ENUMERATOR: {
if (Record.size() != 3)
return error("Invalid record");
- MDValueList.assignValue(GET_OR_DISTINCT(DIEnumerator, Record[0],
- (Context, unrotateSign(Record[1]),
- getMDString(Record[2]))),
- NextMDValueNo++);
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(
+ DIEnumerator, Record[0],
+ (Context, unrotateSign(Record[1]), getMDString(Record[2]))),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_BASIC_TYPE: {
if (Record.size() != 6)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIBasicType, Record[0],
(Context, Record[1], getMDString(Record[2]),
Record[3], Record[4], Record[5])),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_DERIVED_TYPE: {
if (Record.size() != 12)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIDerivedType, Record[0],
(Context, Record[1], getMDString(Record[2]),
getMDOrNull(Record[3]), Record[4],
getMDOrNull(Record[5]), getMDOrNull(Record[6]),
Record[7], Record[8], Record[9], Record[10],
getMDOrNull(Record[11]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_COMPOSITE_TYPE: {
if (Record.size() != 16)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DICompositeType, Record[0],
(Context, Record[1], getMDString(Record[2]),
getMDOrNull(Record[3]), Record[4],
@@ -1831,17 +2127,17 @@ std::error_code BitcodeReader::parseMetadata() {
getMDOrNull(Record[11]), Record[12],
getMDOrNull(Record[13]), getMDOrNull(Record[14]),
getMDString(Record[15]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_SUBROUTINE_TYPE: {
if (Record.size() != 3)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DISubroutineType, Record[0],
(Context, Record[1], getMDOrNull(Record[2]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
@@ -1849,12 +2145,12 @@ std::error_code BitcodeReader::parseMetadata() {
if (Record.size() != 6)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIModule, Record[0],
(Context, getMDOrNull(Record[1]),
- getMDString(Record[2]), getMDString(Record[3]),
- getMDString(Record[4]), getMDString(Record[5]))),
- NextMDValueNo++);
+ getMDString(Record[2]), getMDString(Record[3]),
+ getMDString(Record[4]), getMDString(Record[5]))),
+ NextMetadataNo++);
break;
}
@@ -1862,185 +2158,260 @@ std::error_code BitcodeReader::parseMetadata() {
if (Record.size() != 3)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIFile, Record[0], (Context, getMDString(Record[1]),
getMDString(Record[2]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_COMPILE_UNIT: {
- if (Record.size() < 14 || Record.size() > 15)
+ if (Record.size() < 14 || Record.size() > 16)
return error("Invalid record");
- MDValueList.assignValue(
- GET_OR_DISTINCT(
- DICompileUnit, Record[0],
- (Context, Record[1], getMDOrNull(Record[2]),
- getMDString(Record[3]), Record[4], getMDString(Record[5]),
- Record[6], getMDString(Record[7]), Record[8],
- getMDOrNull(Record[9]), getMDOrNull(Record[10]),
- getMDOrNull(Record[11]), getMDOrNull(Record[12]),
- getMDOrNull(Record[13]), Record.size() == 14 ? 0 : Record[14])),
- NextMDValueNo++);
+ // Ignore Record[0], which indicates whether this compile unit is
+ // distinct. It's always distinct.
+ MetadataList.assignValue(
+ DICompileUnit::getDistinct(
+ Context, Record[1], getMDOrNull(Record[2]),
+ getMDString(Record[3]), Record[4], getMDString(Record[5]),
+ Record[6], getMDString(Record[7]), Record[8],
+ getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+ getMDOrNull(Record[11]), getMDOrNull(Record[12]),
+ getMDOrNull(Record[13]),
+ Record.size() <= 15 ? 0 : getMDOrNull(Record[15]),
+ Record.size() <= 14 ? 0 : Record[14]),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_SUBPROGRAM: {
- if (Record.size() != 19)
- return error("Invalid record");
-
- MDValueList.assignValue(
- GET_OR_DISTINCT(
- DISubprogram, Record[0],
- (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
- getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
- getMDOrNull(Record[6]), Record[7], Record[8], Record[9],
- getMDOrNull(Record[10]), Record[11], Record[12], Record[13],
- Record[14], getMDOrNull(Record[15]), getMDOrNull(Record[16]),
- getMDOrNull(Record[17]), getMDOrNull(Record[18]))),
- NextMDValueNo++);
+ if (Record.size() != 18 && Record.size() != 19)
+ return error("Invalid record");
+
+ bool HasFn = Record.size() == 19;
+ DISubprogram *SP = GET_OR_DISTINCT(
+ DISubprogram,
+ Record[0] || Record[8], // All definitions should be distinct.
+ (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+ getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+ getMDOrNull(Record[6]), Record[7], Record[8], Record[9],
+ getMDOrNull(Record[10]), Record[11], Record[12], Record[13],
+ Record[14], getMDOrNull(Record[15 + HasFn]),
+ getMDOrNull(Record[16 + HasFn]), getMDOrNull(Record[17 + HasFn])));
+ MetadataList.assignValue(SP, NextMetadataNo++);
+
+ // Upgrade sp->function mapping to function->sp mapping.
+ if (HasFn && Record[15]) {
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(getMDOrNull(Record[15])))
+ if (auto *F = dyn_cast<Function>(CMD->getValue())) {
+ if (F->isMaterializable())
+ // Defer until materialized; unmaterialized functions may not have
+ // metadata.
+ FunctionsWithSPs[F] = SP;
+ else if (!F->empty())
+ F->setSubprogram(SP);
+ }
+ }
break;
}
case bitc::METADATA_LEXICAL_BLOCK: {
if (Record.size() != 5)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DILexicalBlock, Record[0],
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3], Record[4])),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_LEXICAL_BLOCK_FILE: {
if (Record.size() != 4)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DILexicalBlockFile, Record[0],
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3])),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_NAMESPACE: {
if (Record.size() != 5)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DINamespace, Record[0],
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), getMDString(Record[3]),
Record[4])),
- NextMDValueNo++);
+ NextMetadataNo++);
+ break;
+ }
+ case bitc::METADATA_MACRO: {
+ if (Record.size() != 5)
+ return error("Invalid record");
+
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(DIMacro, Record[0],
+ (Context, Record[1], Record[2],
+ getMDString(Record[3]), getMDString(Record[4]))),
+ NextMetadataNo++);
+ break;
+ }
+ case bitc::METADATA_MACRO_FILE: {
+ if (Record.size() != 5)
+ return error("Invalid record");
+
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(DIMacroFile, Record[0],
+ (Context, Record[1], Record[2],
+ getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_TEMPLATE_TYPE: {
if (Record.size() != 3)
return error("Invalid record");
- MDValueList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
- Record[0],
- (Context, getMDString(Record[1]),
- getMDOrNull(Record[2]))),
- NextMDValueNo++);
+ MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
+ Record[0],
+ (Context, getMDString(Record[1]),
+ getMDOrNull(Record[2]))),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_TEMPLATE_VALUE: {
if (Record.size() != 5)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DITemplateValueParameter, Record[0],
(Context, Record[1], getMDString(Record[2]),
getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_GLOBAL_VAR: {
if (Record.size() != 11)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIGlobalVariable, Record[0],
(Context, getMDOrNull(Record[1]),
getMDString(Record[2]), getMDString(Record[3]),
getMDOrNull(Record[4]), Record[5],
getMDOrNull(Record[6]), Record[7], Record[8],
getMDOrNull(Record[9]), getMDOrNull(Record[10]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_LOCAL_VAR: {
// 10th field is for the obseleted 'inlinedAt:' field.
- if (Record.size() != 9 && Record.size() != 10)
+ if (Record.size() < 8 || Record.size() > 10)
return error("Invalid record");
- MDValueList.assignValue(
+ // 2nd field used to be an artificial tag, either DW_TAG_auto_variable or
+ // DW_TAG_arg_variable.
+ bool HasTag = Record.size() > 8;
+ MetadataList.assignValue(
GET_OR_DISTINCT(DILocalVariable, Record[0],
- (Context, Record[1], getMDOrNull(Record[2]),
- getMDString(Record[3]), getMDOrNull(Record[4]),
- Record[5], getMDOrNull(Record[6]), Record[7],
- Record[8])),
- NextMDValueNo++);
+ (Context, getMDOrNull(Record[1 + HasTag]),
+ getMDString(Record[2 + HasTag]),
+ getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
+ getMDOrNull(Record[5 + HasTag]), Record[6 + HasTag],
+ Record[7 + HasTag])),
+ NextMetadataNo++);
break;
}
case bitc::METADATA_EXPRESSION: {
if (Record.size() < 1)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIExpression, Record[0],
(Context, makeArrayRef(Record).slice(1))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_OBJC_PROPERTY: {
if (Record.size() != 8)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIObjCProperty, Record[0],
(Context, getMDString(Record[1]),
getMDOrNull(Record[2]), Record[3],
getMDString(Record[4]), getMDString(Record[5]),
Record[6], getMDOrNull(Record[7]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
if (Record.size() != 6)
return error("Invalid record");
- MDValueList.assignValue(
+ MetadataList.assignValue(
GET_OR_DISTINCT(DIImportedEntity, Record[0],
(Context, Record[1], getMDOrNull(Record[2]),
getMDOrNull(Record[3]), Record[4],
getMDString(Record[5]))),
- NextMDValueNo++);
+ NextMetadataNo++);
break;
}
case bitc::METADATA_STRING: {
std::string String(Record.begin(), Record.end());
llvm::UpgradeMDStringConstant(String);
Metadata *MD = MDString::get(Context, String);
- MDValueList.assignValue(MD, NextMDValueNo++);
+ MetadataList.assignValue(MD, NextMetadataNo++);
break;
}
case bitc::METADATA_KIND: {
- if (Record.size() < 2)
- return error("Invalid record");
+ // Support older bitcode files that had METADATA_KIND records in a
+ // block with METADATA_BLOCK_ID.
+ if (std::error_code EC = parseMetadataKindRecord(Record))
+ return EC;
+ break;
+ }
+ }
+ }
+#undef GET_OR_DISTINCT
+}
- unsigned Kind = Record[0];
- SmallString<8> Name(Record.begin()+1, Record.end());
+/// Parse the metadata kinds out of the METADATA_KIND_BLOCK.
+std::error_code BitcodeReader::parseMetadataKinds() {
+ if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
- unsigned NewKind = TheModule->getMDKindID(Name.str());
- if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
- return error("Conflicting METADATA_KIND records");
+ // Read a record.
+ Record.clear();
+ unsigned Code = Stream.readRecord(Entry.ID, Record);
+ switch (Code) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::METADATA_KIND: {
+ if (std::error_code EC = parseMetadataKindRecord(Record))
+ return EC;
break;
}
}
}
-#undef GET_OR_DISTINCT
}
/// Decode a signed value stored with the sign bit in the LSB for dense VBR
@@ -2410,11 +2781,12 @@ std::error_code BitcodeReader::parseConstants() {
Type *SelectorTy = Type::getInt1Ty(Context);
- // If CurTy is a vector of length n, then Record[0] must be a <n x i1>
- // vector. Otherwise, it must be a single bit.
+ // The selector might be an i1 or an <n x i1>
+ // Get the type from the ValueList before getting a forward ref.
if (VectorType *VTy = dyn_cast<VectorType>(CurTy))
- SelectorTy = VectorType::get(Type::getInt1Ty(Context),
- VTy->getNumElements());
+ if (Value *V = ValueList[Record[0]])
+ if (SelectorTy != V->getType())
+ SelectorTy = VectorType::get(SelectorTy, VTy->getNumElements());
V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
SelectorTy),
@@ -2567,9 +2939,6 @@ std::error_code BitcodeReader::parseConstants() {
if (!Fn)
return error("Invalid record");
- // Don't let Fn get dematerialized.
- BlockAddressesTaken.insert(Fn);
-
// If the function is already parsed we can insert the block address right
// away.
BasicBlock *BB;
@@ -2584,7 +2953,7 @@ std::error_code BitcodeReader::parseConstants() {
return error("Invalid ID");
++BBI;
}
- BB = BBI;
+ BB = &*BBI;
} else {
// Otherwise insert a placeholder and remember it so it can be inserted
// when the function is parsed.
@@ -2652,7 +3021,7 @@ std::error_code BitcodeReader::parseUseLists() {
V = ValueList[ID];
unsigned NumUses = 0;
SmallDenseMap<const Use *, unsigned, 16> Order;
- for (const Use &U : V->uses()) {
+ for (const Use &U : V->materialized_uses()) {
if (++NumUses > Record.size())
break;
Order[&U] = Record[NumUses - 1];
@@ -2688,7 +3057,7 @@ std::error_code BitcodeReader::materializeMetadata() {
for (uint64_t BitPos : DeferredMetadataInfo) {
// Move the bit stream to the saved position.
Stream.JumpToBit(BitPos);
- if (std::error_code EC = parseMetadata())
+ if (std::error_code EC = parseMetadata(true))
return EC;
}
DeferredMetadataInfo.clear();
@@ -2697,6 +3066,25 @@ std::error_code BitcodeReader::materializeMetadata() {
void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
+void BitcodeReader::saveMetadataList(
+ DenseMap<const Metadata *, unsigned> &MetadataToIDs, bool OnlyTempMD) {
+ for (unsigned ID = 0; ID < MetadataList.size(); ++ID) {
+ Metadata *MD = MetadataList[ID];
+ auto *N = dyn_cast_or_null<MDNode>(MD);
+ // Save all values if !OnlyTempMD, otherwise just the temporary metadata.
+ if (!OnlyTempMD || (N && N->isTemporary())) {
+ // Will call this after materializing each function, in order to
+ // handle remapping of the function's instructions/metadata.
+ // See if we already have an entry in that case.
+ if (OnlyTempMD && MetadataToIDs.count(MD)) {
+ assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id");
+ continue;
+ }
+ MetadataToIDs[MD] = ID;
+ }
+ }
+}
+
/// When we see the block for a function body, remember where it is and then
/// skip it. This lets us lazily deserialize the functions.
std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
@@ -2709,6 +3097,9 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
// Save the current stream state.
uint64_t CurBit = Stream.GetCurrentBitNo();
+ assert(
+ (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) &&
+ "Mismatch between VST and scanned function offsets");
DeferredFunctionInfo[Fn] = CurBit;
// Skip over the function block for now.
@@ -2741,10 +3132,91 @@ std::error_code BitcodeReader::globalCleanup() {
return std::error_code();
}
-std::error_code BitcodeReader::parseModule(bool Resume,
+/// Support for lazy parsing of function bodies. This is required if we
+/// either have an old bitcode file without a VST forward declaration record,
+/// or if we have an anonymous function being materialized, since anonymous
+/// functions do not have a name and are therefore not in the VST.
+std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
+ Stream.JumpToBit(NextUnreadBit);
+
+ if (Stream.AtEndOfStream())
+ return error("Could not find function in stream");
+
+ if (!SeenFirstFunctionBody)
+ return error("Trying to materialize functions before seeing function blocks");
+
+ // An old bitcode file with the symbol table at the end would have
+ // finished the parse greedily.
+ assert(SeenValueSymbolTable);
+
+ SmallVector<uint64_t, 64> Record;
+
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+ switch (Entry.Kind) {
+ default:
+ return error("Expect SubBlock");
+ case BitstreamEntry::SubBlock:
+ switch (Entry.ID) {
+ default:
+ return error("Expect function block");
+ case bitc::FUNCTION_BLOCK_ID:
+ if (std::error_code EC = rememberAndSkipFunctionBody())
+ return EC;
+ NextUnreadBit = Stream.GetCurrentBitNo();
+ return std::error_code();
+ }
+ }
+ }
+}
+
+std::error_code BitcodeReader::parseBitcodeVersion() {
+ if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
+ return error("Invalid record");
+
+ // Read all the records.
+ SmallVector<uint64_t, 64> Record;
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ default:
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default: // Default behavior: reject
+ return error("Invalid value");
+ case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x
+ // N]
+ convertToString(Record, 0, ProducerIdentification);
+ break;
+ }
+ case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
+ unsigned epoch = (unsigned)Record[0];
+ if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
+ return error(
+ Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
+ "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
+ }
+ }
+ }
+ }
+}
+
+std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
bool ShouldLazyLoadMetadata) {
- if (Resume)
- Stream.JumpToBit(NextUnreadBit);
+ if (ResumeBit)
+ Stream.JumpToBit(ResumeBit);
else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return error("Invalid record");
@@ -2785,9 +3257,23 @@ std::error_code BitcodeReader::parseModule(bool Resume,
return EC;
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
- if (std::error_code EC = parseValueSymbolTable())
- return EC;
- SeenValueSymbolTable = true;
+ if (!SeenValueSymbolTable) {
+ // Either this is an old form VST without function index and an
+ // associated VST forward declaration record (which would have caused
+ // the VST to be jumped to and parsed before it was encountered
+ // normally in the stream), or there were no function blocks to
+ // trigger an earlier parsing of the VST.
+ assert(VSTOffset == 0 || FunctionsWithBodies.empty());
+ if (std::error_code EC = parseValueSymbolTable())
+ return EC;
+ SeenValueSymbolTable = true;
+ } else {
+ // We must have had a VST forward declaration record, which caused
+ // the parser to jump to and parse the VST earlier.
+ assert(VSTOffset > 0);
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ }
break;
case bitc::CONSTANTS_BLOCK_ID:
if (std::error_code EC = parseConstants())
@@ -2802,7 +3288,11 @@ std::error_code BitcodeReader::parseModule(bool Resume,
break;
}
assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
- if (std::error_code EC = parseMetadata())
+ if (std::error_code EC = parseMetadata(true))
+ return EC;
+ break;
+ case bitc::METADATA_KIND_BLOCK_ID:
+ if (std::error_code EC = parseMetadataKinds())
return EC;
break;
case bitc::FUNCTION_BLOCK_ID:
@@ -2815,8 +3305,39 @@ std::error_code BitcodeReader::parseModule(bool Resume,
SeenFirstFunctionBody = true;
}
+ if (VSTOffset > 0) {
+ // If we have a VST forward declaration record, make sure we
+ // parse the VST now if we haven't already. It is needed to
+ // set up the DeferredFunctionInfo vector for lazy reading.
+ if (!SeenValueSymbolTable) {
+ if (std::error_code EC =
+ BitcodeReader::parseValueSymbolTable(VSTOffset))
+ return EC;
+ SeenValueSymbolTable = true;
+ // Fall through so that we record the NextUnreadBit below.
+ // This is necessary in case we have an anonymous function that
+ // is later materialized. Since it will not have a VST entry we
+ // need to fall back to the lazy parse to find its offset.
+ } else {
+ // If we have a VST forward declaration record, but have already
+ // parsed the VST (just above, when the first function body was
+ // encountered here), then we are resuming the parse after
+ // materializing functions. The ResumeBit points to the
+ // start of the last function block recorded in the
+ // DeferredFunctionInfo map. Skip it.
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ continue;
+ }
+ }
+
+ // Support older bitcode files that did not have the function
+ // index in the VST, nor a VST forward declaration record, as
+ // well as anonymous functions that do not have VST entries.
+ // Build the DeferredFunctionInfo vector on the fly.
if (std::error_code EC = rememberAndSkipFunctionBody())
return EC;
+
// Suspend parsing when we reach the function bodies. Subsequent
// materialization calls will resume it when necessary. If the bitcode
// file is old, the symbol table will be at the end instead and will not
@@ -2830,6 +3351,10 @@ std::error_code BitcodeReader::parseModule(bool Resume,
if (std::error_code EC = parseUseLists())
return EC;
break;
+ case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+ if (std::error_code EC = parseOperandBundleTags())
+ return EC;
+ break;
}
continue;
@@ -2840,7 +3365,8 @@ std::error_code BitcodeReader::parseModule(bool Resume,
// Read a record.
- switch (Stream.readRecord(Entry.ID, Record)) {
+ auto BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
default: break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_VERSION: { // VERSION: [version#]
if (Record.size() < 1)
@@ -3012,11 +3538,14 @@ std::error_code BitcodeReader::parseModule(bool Resume,
auto *FTy = dyn_cast<FunctionType>(Ty);
if (!FTy)
return error("Invalid type for value");
+ auto CC = static_cast<CallingConv::ID>(Record[1]);
+ if (CC & ~CallingConv::MaxID)
+ return error("Invalid calling convention ID");
Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
"", TheModule);
- Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
+ Func->setCallingConv(CC);
bool isProto = Record[2];
uint64_t RawLinkage = Record[3];
Func->setLinkage(getDecodedLinkage(RawLinkage));
@@ -3079,35 +3608,51 @@ std::error_code BitcodeReader::parseModule(bool Resume,
}
break;
}
- // ALIAS: [alias type, aliasee val#, linkage]
- // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass]
- case bitc::MODULE_CODE_ALIAS: {
- if (Record.size() < 3)
+ // ALIAS: [alias type, addrspace, aliasee val#, linkage]
+ // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass]
+ case bitc::MODULE_CODE_ALIAS:
+ case bitc::MODULE_CODE_ALIAS_OLD: {
+ bool NewRecord = BitCode == bitc::MODULE_CODE_ALIAS;
+ if (Record.size() < (3 + (unsigned)NewRecord))
return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
+ unsigned OpNum = 0;
+ Type *Ty = getTypeByID(Record[OpNum++]);
if (!Ty)
return error("Invalid record");
- auto *PTy = dyn_cast<PointerType>(Ty);
- if (!PTy)
- return error("Invalid type for value");
- auto *NewGA =
- GlobalAlias::create(PTy, getDecodedLinkage(Record[2]), "", TheModule);
+ unsigned AddrSpace;
+ if (!NewRecord) {
+ auto *PTy = dyn_cast<PointerType>(Ty);
+ if (!PTy)
+ return error("Invalid type for value");
+ Ty = PTy->getElementType();
+ AddrSpace = PTy->getAddressSpace();
+ } else {
+ AddrSpace = Record[OpNum++];
+ }
+
+ auto Val = Record[OpNum++];
+ auto Linkage = Record[OpNum++];
+ auto *NewGA = GlobalAlias::create(
+ Ty, AddrSpace, getDecodedLinkage(Linkage), "", TheModule);
// Old bitcode files didn't have visibility field.
// Local linkage must have default visibility.
- if (Record.size() > 3 && !NewGA->hasLocalLinkage())
- // FIXME: Change to an error if non-default in 4.0.
- NewGA->setVisibility(getDecodedVisibility(Record[3]));
- if (Record.size() > 4)
- NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[4]));
+ if (OpNum != Record.size()) {
+ auto VisInd = OpNum++;
+ if (!NewGA->hasLocalLinkage())
+ // FIXME: Change to an error if non-default in 4.0.
+ NewGA->setVisibility(getDecodedVisibility(Record[VisInd]));
+ }
+ if (OpNum != Record.size())
+ NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++]));
else
- upgradeDLLImportExportLinkage(NewGA, Record[2]);
- if (Record.size() > 5)
- NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[5]));
- if (Record.size() > 6)
- NewGA->setUnnamedAddr(Record[6]);
+ upgradeDLLImportExportLinkage(NewGA, Linkage);
+ if (OpNum != Record.size())
+ NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++]));
+ if (OpNum != Record.size())
+ NewGA->setUnnamedAddr(Record[OpNum++]);
ValueList.push_back(NewGA);
- AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+ AliasInits.push_back(std::make_pair(NewGA, Val));
break;
}
/// MODULE_CODE_PURGEVALS: [numvals]
@@ -3117,11 +3662,52 @@ std::error_code BitcodeReader::parseModule(bool Resume,
return error("Invalid record");
ValueList.shrinkTo(Record[0]);
break;
+ /// MODULE_CODE_VSTOFFSET: [offset]
+ case bitc::MODULE_CODE_VSTOFFSET:
+ if (Record.size() < 1)
+ return error("Invalid record");
+ VSTOffset = Record[0];
+ break;
+ /// MODULE_CODE_METADATA_VALUES: [numvals]
+ case bitc::MODULE_CODE_METADATA_VALUES:
+ if (Record.size() < 1)
+ return error("Invalid record");
+ assert(!IsMetadataMaterialized);
+ // This record contains the number of metadata values in the module-level
+ // METADATA_BLOCK. It is used to support lazy parsing of metadata as
+ // a postpass, where we will parse function-level metadata first.
+ // This is needed because the ids of metadata are assigned implicitly
+ // based on their ordering in the bitcode, with the function-level
+ // metadata ids starting after the module-level metadata ids. Otherwise,
+ // we would have to parse the module-level metadata block to prime the
+ // MetadataList when we are lazy loading metadata during function
+ // importing. Initialize the MetadataList size here based on the
+ // record value, regardless of whether we are doing lazy metadata
+ // loading, so that we have consistent handling and assertion
+ // checking in parseMetadata for module-level metadata.
+ NumModuleMDs = Record[0];
+ SeenModuleValuesRecord = true;
+ assert(MetadataList.size() == 0);
+ MetadataList.resize(NumModuleMDs);
+ break;
}
Record.clear();
}
}
+/// Helper to read the header common to all bitcode files.
+static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return false;
+ return true;
+}
+
std::error_code
BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
Module *M, bool ShouldLazyLoadMetadata) {
@@ -3131,12 +3717,7 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
return EC;
// Sniff for the signature.
- if (Stream.Read(8) != 'B' ||
- Stream.Read(8) != 'C' ||
- Stream.Read(4) != 0x0 ||
- Stream.Read(4) != 0xC ||
- Stream.Read(4) != 0xE ||
- Stream.Read(4) != 0xD)
+ if (!hasValidBitcodeHeader(Stream))
return error("Invalid bitcode signature");
// We expect a number of well-defined blocks, though we don't necessarily
@@ -3153,8 +3734,13 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
if (Entry.Kind != BitstreamEntry::SubBlock)
return error("Malformed block");
+ if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+ parseBitcodeVersion();
+ continue;
+ }
+
if (Entry.ID == bitc::MODULE_BLOCK_ID)
- return parseModule(false, ShouldLazyLoadMetadata);
+ return parseModule(0, ShouldLazyLoadMetadata);
if (Stream.SkipBlock())
return error("Invalid record");
@@ -3204,12 +3790,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
return EC;
// Sniff for the signature.
- if (Stream.Read(8) != 'B' ||
- Stream.Read(8) != 'C' ||
- Stream.Read(4) != 0x0 ||
- Stream.Read(4) != 0xC ||
- Stream.Read(4) != 0xE ||
- Stream.Read(4) != 0xD)
+ if (!hasValidBitcodeHeader(Stream))
return error("Invalid bitcode signature");
// We expect a number of well-defined blocks, though we don't necessarily
@@ -3239,6 +3820,41 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
}
}
+ErrorOr<std::string> BitcodeReader::parseIdentificationBlock() {
+ if (std::error_code EC = initStream(nullptr))
+ return EC;
+
+ // Sniff for the signature.
+ if (!hasValidBitcodeHeader(Stream))
+ return error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+
+ case BitstreamEntry::SubBlock:
+ if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+ if (std::error_code EC = parseBitcodeVersion())
+ return EC;
+ return ProducerIdentification;
+ }
+ // Ignore other sub-blocks.
+ if (Stream.SkipBlock())
+ return error("Malformed block");
+ continue;
+ case BitstreamEntry::Record:
+ Stream.skipRecord(Entry.ID);
+ continue;
+ }
+ }
+}
+
/// Parse metadata attachments.
std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
@@ -3274,7 +3890,7 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
auto K = MDKindMap.find(Record[I]);
if (K == MDKindMap.end())
return error("Invalid ID");
- Metadata *MD = MDValueList.getValueFwdRef(Record[I + 1]);
+ Metadata *MD = MetadataList.getValueFwdRef(Record[I + 1]);
F.setMetadata(K->second, cast<MDNode>(MD));
}
continue;
@@ -3288,7 +3904,7 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
MDKindMap.find(Kind);
if (I == MDKindMap.end())
return error("Invalid ID");
- Metadata *Node = MDValueList.getValueFwdRef(Record[i + 1]);
+ Metadata *Node = MetadataList.getValueFwdRef(Record[i + 1]);
if (isa<LocalAsMetadata>(Node))
// Drop the attachment. This used to be legal, but there's no
// upgrade path.
@@ -3303,17 +3919,17 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
}
}
-static std::error_code typeCheckLoadStoreInst(DiagnosticHandlerFunction DH,
- Type *ValType, Type *PtrType) {
+static std::error_code typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
+ LLVMContext &Context = PtrType->getContext();
if (!isa<PointerType>(PtrType))
- return error(DH, "Load/Store operand is not a pointer type");
+ return error(Context, "Load/Store operand is not a pointer type");
Type *ElemType = cast<PointerType>(PtrType)->getElementType();
if (ValType && ValType != ElemType)
- return error(DH, "Explicit load/store type does not match pointee type of "
- "pointer operand");
+ return error(Context, "Explicit load/store type does not match pointee "
+ "type of pointer operand");
if (!PointerType::isLoadableOrStorableType(ElemType))
- return error(DH, "Cannot load/store from pointer");
+ return error(Context, "Cannot load/store from pointer");
return std::error_code();
}
@@ -3324,11 +3940,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
- unsigned ModuleMDValueListSize = MDValueList.size();
+ unsigned ModuleMetadataListSize = MetadataList.size();
// Add all the function arguments to the value table.
- for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- ValueList.push_back(I);
+ for (Argument &I : F->args())
+ ValueList.push_back(&I);
unsigned NextValueNo = ValueList.size();
BasicBlock *CurBB = nullptr;
@@ -3344,6 +3960,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
return nullptr;
};
+ std::vector<OperandBundleDef> OperandBundles;
+
// Read all the records.
SmallVector<uint64_t, 64> Record;
while (1) {
@@ -3452,8 +4070,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
unsigned ScopeID = Record[2], IAID = Record[3];
MDNode *Scope = nullptr, *IA = nullptr;
- if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
- if (IAID) IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+ if (ScopeID)
+ Scope = cast<MDNode>(MetadataList.getValueFwdRef(ScopeID - 1));
+ if (IAID)
+ IA = cast<MDNode>(MetadataList.getValueFwdRef(IAID - 1));
LastLoc = DebugLoc::get(Line, Col, Scope, IA);
I->setDebugLoc(LastLoc);
I = nullptr;
@@ -3515,7 +4135,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
CurBB->getInstList().push_back(Temp);
}
} else {
- I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ auto CastOp = (Instruction::CastOps)Opc;
+ if (!CastInst::castIsValid(CastOp, Op, ResTy))
+ return error("Invalid cast");
+ I = CastInst::Create(CastOp, Op, ResTy);
}
InstructionList.push_back(I);
break;
@@ -3811,6 +4434,110 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
}
break;
}
+ case bitc::FUNC_CODE_INST_CLEANUPRET: { // CLEANUPRET: [val] or [val,bb#]
+ if (Record.size() != 1 && Record.size() != 2)
+ return error("Invalid record");
+ unsigned Idx = 0;
+ Value *CleanupPad =
+ getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+ if (!CleanupPad)
+ return error("Invalid record");
+ BasicBlock *UnwindDest = nullptr;
+ if (Record.size() == 2) {
+ UnwindDest = getBasicBlock(Record[Idx++]);
+ if (!UnwindDest)
+ return error("Invalid record");
+ }
+
+ I = CleanupReturnInst::Create(CleanupPad, UnwindDest);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CATCHRET: { // CATCHRET: [val,bb#]
+ if (Record.size() != 2)
+ return error("Invalid record");
+ unsigned Idx = 0;
+ Value *CatchPad =
+ getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+ if (!CatchPad)
+ return error("Invalid record");
+ BasicBlock *BB = getBasicBlock(Record[Idx++]);
+ if (!BB)
+ return error("Invalid record");
+
+ I = CatchReturnInst::Create(CatchPad, BB);
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CATCHSWITCH: { // CATCHSWITCH: [tok,num,(bb)*,bb?]
+ // We must have, at minimum, the outer scope and the number of arguments.
+ if (Record.size() < 2)
+ return error("Invalid record");
+
+ unsigned Idx = 0;
+
+ Value *ParentPad =
+ getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+
+ unsigned NumHandlers = Record[Idx++];
+
+ SmallVector<BasicBlock *, 2> Handlers;
+ for (unsigned Op = 0; Op != NumHandlers; ++Op) {
+ BasicBlock *BB = getBasicBlock(Record[Idx++]);
+ if (!BB)
+ return error("Invalid record");
+ Handlers.push_back(BB);
+ }
+
+ BasicBlock *UnwindDest = nullptr;
+ if (Idx + 1 == Record.size()) {
+ UnwindDest = getBasicBlock(Record[Idx++]);
+ if (!UnwindDest)
+ return error("Invalid record");
+ }
+
+ if (Record.size() != Idx)
+ return error("Invalid record");
+
+ auto *CatchSwitch =
+ CatchSwitchInst::Create(ParentPad, UnwindDest, NumHandlers);
+ for (BasicBlock *Handler : Handlers)
+ CatchSwitch->addHandler(Handler);
+ I = CatchSwitch;
+ InstructionList.push_back(I);
+ break;
+ }
+ case bitc::FUNC_CODE_INST_CATCHPAD:
+ case bitc::FUNC_CODE_INST_CLEANUPPAD: { // [tok,num,(ty,val)*]
+ // We must have, at minimum, the outer scope and the number of arguments.
+ if (Record.size() < 2)
+ return error("Invalid record");
+
+ unsigned Idx = 0;
+
+ Value *ParentPad =
+ getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+
+ unsigned NumArgOperands = Record[Idx++];
+
+ SmallVector<Value *, 2> Args;
+ for (unsigned Op = 0; Op != NumArgOperands; ++Op) {
+ Value *Val;
+ if (getValueTypePair(Record, Idx, NextValueNo, Val))
+ return error("Invalid record");
+ Args.push_back(Val);
+ }
+
+ if (Record.size() != Idx)
+ return error("Invalid record");
+
+ if (BitCode == bitc::FUNC_CODE_INST_CLEANUPPAD)
+ I = CleanupPadInst::Create(ParentPad, Args);
+ else
+ I = CatchPadInst::Create(ParentPad, Args);
+ InstructionList.push_back(I);
+ break;
+ }
case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
// Check magic
if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
@@ -3973,10 +4700,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
}
}
- I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
+ I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles);
+ OperandBundles.clear();
InstructionList.push_back(I);
- cast<InvokeInst>(I)
- ->setCallingConv(static_cast<CallingConv::ID>(~(1U << 13) & CCInfo));
+ cast<InvokeInst>(I)->setCallingConv(
+ static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
cast<InvokeInst>(I)->setAttributes(PAL);
break;
}
@@ -4081,6 +4809,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
uint64_t AlignRecord = Record[3];
const uint64_t InAllocaMask = uint64_t(1) << 5;
const uint64_t ExplicitTypeMask = uint64_t(1) << 6;
+ // Reserve bit 7 for SwiftError flag.
+ // const uint64_t SwiftErrorMask = uint64_t(1) << 7;
const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask;
bool InAlloca = AlignRecord & InAllocaMask;
Type *Ty = getTypeByID(Record[0]);
@@ -4115,8 +4845,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
Type *Ty = nullptr;
if (OpNum + 3 == Record.size())
Ty = getTypeByID(Record[OpNum++]);
- if (std::error_code EC =
- typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+ if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType()))
return EC;
if (!Ty)
Ty = cast<PointerType>(Op->getType())->getElementType();
@@ -4140,8 +4869,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
Type *Ty = nullptr;
if (OpNum + 5 == Record.size())
Ty = getTypeByID(Record[OpNum++]);
- if (std::error_code EC =
- typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+ if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType()))
return EC;
if (!Ty)
Ty = cast<PointerType>(Op->getType())->getElementType();
@@ -4175,8 +4903,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
OpNum + 2 != Record.size())
return error("Invalid record");
- if (std::error_code EC = typeCheckLoadStoreInst(
- DiagnosticHandler, Val->getType(), Ptr->getType()))
+ if (std::error_code EC =
+ typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
return EC;
unsigned Align;
if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
@@ -4199,8 +4927,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
OpNum + 4 != Record.size())
return error("Invalid record");
- if (std::error_code EC = typeCheckLoadStoreInst(
- DiagnosticHandler, Val->getType(), Ptr->getType()))
+ if (std::error_code EC =
+ typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
return EC;
AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
if (Ordering == NotAtomic || Ordering == Acquire ||
@@ -4237,8 +4965,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid record");
SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]);
- if (std::error_code EC = typeCheckLoadStoreInst(
- DiagnosticHandler, Cmp->getType(), Ptr->getType()))
+ if (std::error_code EC =
+ typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
return EC;
AtomicOrdering FailureOrdering;
if (Record.size() < 7)
@@ -4299,7 +5027,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
break;
}
case bitc::FUNC_CODE_INST_CALL: {
- // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+ // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
return error("Invalid record");
@@ -4307,8 +5035,15 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
AttributeSet PAL = getAttributes(Record[OpNum++]);
unsigned CCInfo = Record[OpNum++];
+ FastMathFlags FMF;
+ if ((CCInfo >> bitc::CALL_FMF) & 1) {
+ FMF = getDecodedFastMathFlags(Record[OpNum++]);
+ if (!FMF.any())
+ return error("Fast math flags indicator set for call with no FMF");
+ }
+
FunctionType *FTy = nullptr;
- if (CCInfo >> 15 & 1 &&
+ if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 &&
!(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
return error("Explicit call type is not a function type");
@@ -4354,17 +5089,26 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
}
}
- I = CallInst::Create(FTy, Callee, Args);
+ I = CallInst::Create(FTy, Callee, Args, OperandBundles);
+ OperandBundles.clear();
InstructionList.push_back(I);
cast<CallInst>(I)->setCallingConv(
- static_cast<CallingConv::ID>((~(1U << 14) & CCInfo) >> 1));
+ static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
CallInst::TailCallKind TCK = CallInst::TCK_None;
- if (CCInfo & 1)
+ if (CCInfo & 1 << bitc::CALL_TAIL)
TCK = CallInst::TCK_Tail;
- if (CCInfo & (1 << 14))
+ if (CCInfo & (1 << bitc::CALL_MUSTTAIL))
TCK = CallInst::TCK_MustTail;
+ if (CCInfo & (1 << bitc::CALL_NOTAIL))
+ TCK = CallInst::TCK_NoTail;
cast<CallInst>(I)->setTailCallKind(TCK);
cast<CallInst>(I)->setAttributes(PAL);
+ if (FMF.any()) {
+ if (!isa<FPMathOperator>(I))
+ return error("Fast-math-flags specified for call without "
+ "floating-point scalar or vector return type");
+ I->setFastMathFlags(FMF);
+ }
break;
}
case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
@@ -4379,6 +5123,28 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
+
+ case bitc::FUNC_CODE_OPERAND_BUNDLE: {
+ // A call or an invoke can be optionally prefixed with some variable
+ // number of operand bundle blocks. These blocks are read into
+ // OperandBundles and consumed at the next call or invoke instruction.
+
+ if (Record.size() < 1 || Record[0] >= BundleTags.size())
+ return error("Invalid record");
+
+ std::vector<Value *> Inputs;
+
+ unsigned OpNum = 1;
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return error("Invalid record");
+ Inputs.push_back(Op);
+ }
+
+ OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs));
+ continue;
+ }
}
// Add instruction to end of current BB. If there is no current BB, reject
@@ -4387,6 +5153,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
delete I;
return error("Invalid instruction with no BB");
}
+ if (!OperandBundles.empty()) {
+ delete I;
+ return error("Operand bundles found with no consumer");
+ }
CurBB->getInstList().push_back(I);
// If this was a terminator instruction, move to the next block.
@@ -4402,6 +5172,9 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
OutOfRecordLoop:
+ if (!OperandBundles.empty())
+ return error("Operand bundles found with no consumer");
+
// Check the function list for unresolved values.
if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
if (!A->getParent()) {
@@ -4421,7 +5194,7 @@ OutOfRecordLoop:
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
- MDValueList.shrinkTo(ModuleMDValueListSize);
+ MetadataList.shrinkTo(ModuleMetadataListSize);
std::vector<BasicBlock*>().swap(FunctionBBs);
return std::error_code();
}
@@ -4431,11 +5204,14 @@ std::error_code BitcodeReader::findFunctionInStream(
Function *F,
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
while (DeferredFunctionInfoIterator->second == 0) {
- if (Stream.AtEndOfStream())
- return error("Could not find function in stream");
- // ParseModule will parse the next body in the stream and set its
- // position in the DeferredFunctionInfo map.
- if (std::error_code EC = parseModule(true))
+ // This is the fallback handling for the old format bitcode that
+ // didn't contain the function index in the VST, or when we have
+ // an anonymous function which would not have a VST entry.
+ // Assert that we have one of those two cases.
+ assert(VSTOffset == 0 || !F->hasName());
+ // Parse the next body in the stream and set its position in the
+ // DeferredFunctionInfo map.
+ if (std::error_code EC = rememberAndSkipFunctionBodies())
return EC;
}
return std::error_code();
@@ -4448,8 +5224,12 @@ std::error_code BitcodeReader::findFunctionInStream(
void BitcodeReader::releaseBuffer() { Buffer.release(); }
std::error_code BitcodeReader::materialize(GlobalValue *GV) {
- if (std::error_code EC = materializeMetadata())
- return EC;
+ // In older bitcode we must materialize the metadata before parsing
+ // any functions, in order to set up the MetadataList properly.
+ if (!SeenModuleValuesRecord) {
+ if (std::error_code EC = materializeMetadata())
+ return EC;
+ }
Function *F = dyn_cast<Function>(GV);
// If it's not a function or is already material, ignore the request.
@@ -4476,7 +5256,8 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
// Upgrade any old intrinsic calls in the function.
for (auto &I : UpgradedIntrinsics) {
- for (auto UI = I.first->user_begin(), UE = I.first->user_end(); UI != UE;) {
+ for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
+ UI != UE;) {
User *U = *UI;
++UI;
if (CallInst *CI = dyn_cast<CallInst>(U))
@@ -4484,41 +5265,16 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
}
}
+ // Finish fn->subprogram upgrade for materialized functions.
+ if (DISubprogram *SP = FunctionsWithSPs.lookup(F))
+ F->setSubprogram(SP);
+
// Bring in any functions that this function forward-referenced via
// blockaddresses.
return materializeForwardReferencedFunctions();
}
-bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
- const Function *F = dyn_cast<Function>(GV);
- if (!F || F->isDeclaration())
- return false;
-
- // Dematerializing F would leave dangling references that wouldn't be
- // reconnected on re-materialization.
- if (BlockAddressesTaken.count(F))
- return false;
-
- return DeferredFunctionInfo.count(const_cast<Function*>(F));
-}
-
-void BitcodeReader::dematerialize(GlobalValue *GV) {
- Function *F = dyn_cast<Function>(GV);
- // If this function isn't dematerializable, this is a noop.
- if (!F || !isDematerializable(F))
- return;
-
- assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
-
- // Just forget the function body, we can remat it later.
- F->dropAllReferences();
- F->setIsMaterializable(true);
-}
-
-std::error_code BitcodeReader::materializeModule(Module *M) {
- assert(M == TheModule &&
- "Can only Materialize the Module this BitcodeReader is attached to.");
-
+std::error_code BitcodeReader::materializeModule() {
if (std::error_code EC = materializeMetadata())
return EC;
@@ -4527,16 +5283,16 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
// Iterate over the module, deserializing any functions that are still on
// disk.
- for (Module::iterator F = TheModule->begin(), E = TheModule->end();
- F != E; ++F) {
- if (std::error_code EC = materialize(F))
+ for (Function &F : *TheModule) {
+ if (std::error_code EC = materialize(&F))
return EC;
}
- // At this point, if there are any function bodies, the current bit is
- // pointing to the END_BLOCK record after them. Now make sure the rest
- // of the bits in the module have been read.
- if (NextUnreadBit)
- parseModule(true);
+ // At this point, if there are any function bodies, parse the rest of
+ // the bits in the module past the last function block we have recorded
+ // through either lazy scanning or the VST.
+ if (LastFunctionBlockBit || NextUnreadBit)
+ parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit
+ : NextUnreadBit);
// Check that all block address forward references got resolved (as we
// promised above).
@@ -4561,7 +5317,7 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
- UpgradeDebugInfo(*M);
+ UpgradeDebugInfo(*TheModule);
return std::error_code();
}
@@ -4622,6 +5378,416 @@ BitcodeReader::initLazyStream(std::unique_ptr<DataStreamer> Streamer) {
return std::error_code();
}
+std::error_code FunctionIndexBitcodeReader::error(BitcodeError E,
+ const Twine &Message) {
+ return ::error(DiagnosticHandler, make_error_code(E), Message);
+}
+
+std::error_code FunctionIndexBitcodeReader::error(const Twine &Message) {
+ return ::error(DiagnosticHandler,
+ make_error_code(BitcodeError::CorruptedBitcode), Message);
+}
+
+std::error_code FunctionIndexBitcodeReader::error(BitcodeError E) {
+ return ::error(DiagnosticHandler, make_error_code(E));
+}
+
+FunctionIndexBitcodeReader::FunctionIndexBitcodeReader(
+ MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy, bool CheckFuncSummaryPresenceOnly)
+ : DiagnosticHandler(DiagnosticHandler), Buffer(Buffer), IsLazy(IsLazy),
+ CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {}
+
+FunctionIndexBitcodeReader::FunctionIndexBitcodeReader(
+ DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy,
+ bool CheckFuncSummaryPresenceOnly)
+ : DiagnosticHandler(DiagnosticHandler), Buffer(nullptr), IsLazy(IsLazy),
+ CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {}
+
+void FunctionIndexBitcodeReader::freeState() { Buffer = nullptr; }
+
+void FunctionIndexBitcodeReader::releaseBuffer() { Buffer.release(); }
+
+// Specialized value symbol table parser used when reading function index
+// blocks where we don't actually create global values.
+// At the end of this routine the function index is populated with a map
+// from function name to FunctionInfo. The function info contains
+// the function block's bitcode offset as well as the offset into the
+// function summary section.
+std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ SmallString<128> ValueName;
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
+ break;
+ case bitc::VST_CODE_FNENTRY: {
+ // VST_FNENTRY: [valueid, offset, namechar x N]
+ if (convertToString(Record, 2, ValueName))
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ uint64_t FuncOffset = Record[1];
+ std::unique_ptr<FunctionInfo> FuncInfo =
+ llvm::make_unique<FunctionInfo>(FuncOffset);
+ if (foundFuncSummary() && !IsLazy) {
+ DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
+ SummaryMap.find(ValueID);
+ assert(SMI != SummaryMap.end() && "Summary info not found");
+ FuncInfo->setFunctionSummary(std::move(SMI->second));
+ }
+ TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+
+ ValueName.clear();
+ break;
+ }
+ case bitc::VST_CODE_COMBINED_FNENTRY: {
+ // VST_FNENTRY: [offset, namechar x N]
+ if (convertToString(Record, 1, ValueName))
+ return error("Invalid record");
+ uint64_t FuncSummaryOffset = Record[0];
+ std::unique_ptr<FunctionInfo> FuncInfo =
+ llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
+ if (foundFuncSummary() && !IsLazy) {
+ DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
+ SummaryMap.find(FuncSummaryOffset);
+ assert(SMI != SummaryMap.end() && "Summary info not found");
+ FuncInfo->setFunctionSummary(std::move(SMI->second));
+ }
+ TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+
+ ValueName.clear();
+ break;
+ }
+ }
+ }
+}
+
+// Parse just the blocks needed for function index building out of the module.
+// At the end of this routine the function Index is populated with a map
+// from function name to FunctionInfo. The function info contains
+// either the parsed function summary information (when parsing summaries
+// eagerly), or just to the function summary record's offset
+// if parsing lazily (IsLazy).
+std::error_code FunctionIndexBitcodeReader::parseModule() {
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return error("Invalid record");
+
+ // Read the function index for this module.
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+
+ case BitstreamEntry::SubBlock:
+ if (CheckFuncSummaryPresenceOnly) {
+ if (Entry.ID == bitc::FUNCTION_SUMMARY_BLOCK_ID) {
+ SeenFuncSummary = true;
+ // No need to parse the rest since we found the summary.
+ return std::error_code();
+ }
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ continue;
+ }
+ switch (Entry.ID) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ break;
+ case bitc::BLOCKINFO_BLOCK_ID:
+ // Need to parse these to get abbrev ids (e.g. for VST)
+ if (Stream.ReadBlockInfoBlock())
+ return error("Malformed block");
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (std::error_code EC = parseValueSymbolTable())
+ return EC;
+ break;
+ case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+ SeenFuncSummary = true;
+ if (IsLazy) {
+ // Lazy parsing of summary info, skip it.
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ } else if (std::error_code EC = parseEntireSummary())
+ return EC;
+ break;
+ case bitc::MODULE_STRTAB_BLOCK_ID:
+ if (std::error_code EC = parseModuleStringTable())
+ return EC;
+ break;
+ }
+ continue;
+
+ case BitstreamEntry::Record:
+ Stream.skipRecord(Entry.ID);
+ continue;
+ }
+ }
+}
+
+// Eagerly parse the entire function summary block (i.e. for all functions
+// in the index). This populates the FunctionSummary objects in
+// the index.
+std::error_code FunctionIndexBitcodeReader::parseEntireSummary() {
+ if (Stream.EnterSubBlock(bitc::FUNCTION_SUMMARY_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record. The record format depends on whether this
+ // is a per-module index or a combined index file. In the per-module
+ // case the records contain the associated value's ID for correlation
+ // with VST entries. In the combined index the correlation is done
+ // via the bitcode offset of the summary records (which were saved
+ // in the combined index VST entries). The records also contain
+ // information used for ThinLTO renaming and importing.
+ Record.clear();
+ uint64_t CurRecordBit = Stream.GetCurrentBitNo();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ // FS_PERMODULE_ENTRY: [valueid, islocal, instcount]
+ case bitc::FS_CODE_PERMODULE_ENTRY: {
+ unsigned ValueID = Record[0];
+ bool IsLocal = Record[1];
+ unsigned InstCount = Record[2];
+ std::unique_ptr<FunctionSummary> FS =
+ llvm::make_unique<FunctionSummary>(InstCount);
+ FS->setLocalFunction(IsLocal);
+ // The module path string ref set in the summary must be owned by the
+ // index's module string table. Since we don't have a module path
+ // string table section in the per-module index, we create a single
+ // module path string table entry with an empty (0) ID to take
+ // ownership.
+ FS->setModulePath(
+ TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+ SummaryMap[ValueID] = std::move(FS);
+ }
+ // FS_COMBINED_ENTRY: [modid, instcount]
+ case bitc::FS_CODE_COMBINED_ENTRY: {
+ uint64_t ModuleId = Record[0];
+ unsigned InstCount = Record[1];
+ std::unique_ptr<FunctionSummary> FS =
+ llvm::make_unique<FunctionSummary>(InstCount);
+ FS->setModulePath(ModuleIdMap[ModuleId]);
+ SummaryMap[CurRecordBit] = std::move(FS);
+ }
+ }
+ }
+ llvm_unreachable("Exit infinite loop");
+}
+
+// Parse the module string table block into the Index.
+// This populates the ModulePathStringTable map in the index.
+std::error_code FunctionIndexBitcodeReader::parseModuleStringTable() {
+ if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ SmallString<128> ModulePath;
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::MST_CODE_ENTRY: {
+ // MST_ENTRY: [modid, namechar x N]
+ if (convertToString(Record, 1, ModulePath))
+ return error("Invalid record");
+ uint64_t ModuleId = Record[0];
+ StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId);
+ ModuleIdMap[ModuleId] = ModulePathInMap;
+ ModulePath.clear();
+ break;
+ }
+ }
+ }
+ llvm_unreachable("Exit infinite loop");
+}
+
+// Parse the function info index from the bitcode streamer into the given index.
+std::error_code FunctionIndexBitcodeReader::parseSummaryIndexInto(
+ std::unique_ptr<DataStreamer> Streamer, FunctionInfoIndex *I) {
+ TheIndex = I;
+
+ if (std::error_code EC = initStream(std::move(Streamer)))
+ return EC;
+
+ // Sniff for the signature.
+ if (!hasValidBitcodeHeader(Stream))
+ return error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (1) {
+ if (Stream.AtEndOfStream()) {
+ // We didn't really read a proper Module block.
+ return error("Malformed block");
+ }
+
+ BitstreamEntry Entry =
+ Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+
+ if (Entry.Kind != BitstreamEntry::SubBlock)
+ return error("Malformed block");
+
+ // If we see a MODULE_BLOCK, parse it to find the blocks needed for
+ // building the function summary index.
+ if (Entry.ID == bitc::MODULE_BLOCK_ID)
+ return parseModule();
+
+ if (Stream.SkipBlock())
+ return error("Invalid record");
+ }
+}
+
+// Parse the function information at the given offset in the buffer into
+// the index. Used to support lazy parsing of function summaries from the
+// combined index during importing.
+// TODO: This function is not yet complete as it won't have a consumer
+// until ThinLTO function importing is added.
+std::error_code FunctionIndexBitcodeReader::parseFunctionSummary(
+ std::unique_ptr<DataStreamer> Streamer, FunctionInfoIndex *I,
+ size_t FunctionSummaryOffset) {
+ TheIndex = I;
+
+ if (std::error_code EC = initStream(std::move(Streamer)))
+ return EC;
+
+ // Sniff for the signature.
+ if (!hasValidBitcodeHeader(Stream))
+ return error("Invalid bitcode signature");
+
+ Stream.JumpToBit(FunctionSummaryOffset);
+
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ default:
+ return error("Malformed block");
+ case BitstreamEntry::Record:
+ // The expected case.
+ break;
+ }
+
+ // TODO: Read a record. This interface will be completed when ThinLTO
+ // importing is added so that it can be tested.
+ SmallVector<uint64_t, 64> Record;
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ case bitc::FS_CODE_COMBINED_ENTRY:
+ default:
+ return error("Invalid record");
+ }
+
+ return std::error_code();
+}
+
+std::error_code
+FunctionIndexBitcodeReader::initStream(std::unique_ptr<DataStreamer> Streamer) {
+ if (Streamer)
+ return initLazyStream(std::move(Streamer));
+ return initStreamFromBuffer();
+}
+
+std::error_code FunctionIndexBitcodeReader::initStreamFromBuffer() {
+ const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart();
+ const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize();
+
+ if (Buffer->getBufferSize() & 3)
+ return error("Invalid bitcode signature");
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+ return error("Invalid bitcode wrapper header");
+
+ StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+ Stream.init(&*StreamFile);
+
+ return std::error_code();
+}
+
+std::error_code FunctionIndexBitcodeReader::initLazyStream(
+ std::unique_ptr<DataStreamer> Streamer) {
+ // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+ // see it.
+ auto OwnedBytes =
+ llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
+ StreamingMemoryObject &Bytes = *OwnedBytes;
+ StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
+ Stream.init(&*StreamFile);
+
+ unsigned char buf[16];
+ if (Bytes.readBytes(buf, 16, 0) != 16)
+ return error("Invalid bitcode signature");
+
+ if (!isBitcode(buf, buf + 16))
+ return error("Invalid bitcode signature");
+
+ if (isBitcodeWrapper(buf, buf + 4)) {
+ const unsigned char *bitcodeStart = buf;
+ const unsigned char *bitcodeEnd = buf + 16;
+ SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+ Bytes.dropLeadingBytes(bitcodeStart - buf);
+ Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart);
+ }
+ return std::error_code();
+}
+
namespace {
class BitcodeErrorCategoryType : public std::error_category {
const char *name() const LLVM_NOEXCEPT override {
@@ -4669,7 +5835,7 @@ getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
if (MaterializeAll) {
// Read in the entire module, and destroy the BitcodeReader.
- if (std::error_code EC = M->materializeAllPermanently())
+ if (std::error_code EC = M->materializeAll())
return cleanupOnError(EC);
} else {
// Resolve forward references from blockaddresses.
@@ -4690,10 +5856,8 @@ getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
static ErrorOr<std::unique_ptr<Module>>
getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
LLVMContext &Context, bool MaterializeAll,
- DiagnosticHandlerFunction DiagnosticHandler,
bool ShouldLazyLoadMetadata = false) {
- BitcodeReader *R =
- new BitcodeReader(Buffer.get(), Context, DiagnosticHandler);
+ BitcodeReader *R = new BitcodeReader(Buffer.get(), Context);
ErrorOr<std::unique_ptr<Module>> Ret =
getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context,
@@ -4705,41 +5869,124 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
return Ret;
}
-ErrorOr<std::unique_ptr<Module>> llvm::getLazyBitcodeModule(
- std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler, bool ShouldLazyLoadMetadata) {
+ErrorOr<std::unique_ptr<Module>>
+llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
+ LLVMContext &Context, bool ShouldLazyLoadMetadata) {
return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false,
- DiagnosticHandler, ShouldLazyLoadMetadata);
+ ShouldLazyLoadMetadata);
}
-ErrorOr<std::unique_ptr<Module>> llvm::getStreamedBitcodeModule(
- StringRef Name, std::unique_ptr<DataStreamer> Streamer,
- LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler) {
+ErrorOr<std::unique_ptr<Module>>
+llvm::getStreamedBitcodeModule(StringRef Name,
+ std::unique_ptr<DataStreamer> Streamer,
+ LLVMContext &Context) {
std::unique_ptr<Module> M = make_unique<Module>(Name, Context);
- BitcodeReader *R = new BitcodeReader(Context, DiagnosticHandler);
+ BitcodeReader *R = new BitcodeReader(Context);
return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false,
false);
}
-ErrorOr<std::unique_ptr<Module>>
-llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler) {
+ErrorOr<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
+ LLVMContext &Context) {
std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
- return getLazyBitcodeModuleImpl(std::move(Buf), Context, true,
- DiagnosticHandler);
+ return getLazyBitcodeModuleImpl(std::move(Buf), Context, true);
// TODO: Restore the use-lists to the in-memory state when the bitcode was
// written. We must defer until the Module has been fully materialized.
}
-std::string
-llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler) {
+std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer,
+ LLVMContext &Context) {
std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
- auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context,
- DiagnosticHandler);
+ auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context);
ErrorOr<std::string> Triple = R->parseTriple();
if (Triple.getError())
return "";
return Triple.get();
}
+
+std::string llvm::getBitcodeProducerString(MemoryBufferRef Buffer,
+ LLVMContext &Context) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ BitcodeReader R(Buf.release(), Context);
+ ErrorOr<std::string> ProducerString = R.parseIdentificationBlock();
+ if (ProducerString.getError())
+ return "";
+ return ProducerString.get();
+}
+
+// Parse the specified bitcode buffer, returning the function info index.
+// If IsLazy is false, parse the entire function summary into
+// the index. Otherwise skip the function summary section, and only create
+// an index object with a map from function name to function summary offset.
+// The index is used to perform lazy function summary reading later.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+llvm::getFunctionInfoIndex(MemoryBufferRef Buffer,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler, IsLazy);
+
+ auto Index = llvm::make_unique<FunctionInfoIndex>();
+
+ auto cleanupOnError = [&](std::error_code EC) {
+ R.releaseBuffer(); // Never take ownership on error.
+ return EC;
+ };
+
+ if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get()))
+ return cleanupOnError(EC);
+
+ Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+ return std::move(Index);
+}
+
+// Check if the given bitcode buffer contains a function summary block.
+bool llvm::hasFunctionSummary(MemoryBufferRef Buffer,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler, false, true);
+
+ auto cleanupOnError = [&](std::error_code EC) {
+ R.releaseBuffer(); // Never take ownership on error.
+ return false;
+ };
+
+ if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr))
+ return cleanupOnError(EC);
+
+ Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+ return R.foundFuncSummary();
+}
+
+// This method supports lazy reading of function summary data from the combined
+// index during ThinLTO function importing. When reading the combined index
+// file, getFunctionInfoIndex is first invoked with IsLazy=true.
+// Then this method is called for each function considered for importing,
+// to parse the summary information for the given function name into
+// the index.
+std::error_code llvm::readFunctionSummary(
+ MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler,
+ StringRef FunctionName, std::unique_ptr<FunctionInfoIndex> Index) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler);
+
+ auto cleanupOnError = [&](std::error_code EC) {
+ R.releaseBuffer(); // Never take ownership on error.
+ return EC;
+ };
+
+ // Lookup the given function name in the FunctionMap, which may
+ // contain a list of function infos in the case of a COMDAT. Walk through
+ // and parse each function summary info at the function summary offset
+ // recorded when parsing the value symbol table.
+ for (const auto &FI : Index->getFunctionInfoList(FunctionName)) {
+ size_t FunctionSummaryOffset = FI->bitcodeIndex();
+ if (std::error_code EC =
+ R.parseFunctionSummary(nullptr, Index.get(), FunctionSummaryOffset))
+ return cleanupOnError(EC);
+ }
+
+ Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+ return std::error_code();
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1a70ba5..a1f8786 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -13,14 +13,18 @@
#include "llvm/Bitcode/ReaderWriter.h"
#include "ValueEnumerator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitstreamWriter.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/UseListOrder.h"
@@ -174,6 +178,10 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_IN_ALLOCA;
case Attribute::Cold:
return bitc::ATTR_KIND_COLD;
+ case Attribute::InaccessibleMemOnly:
+ return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY;
+ case Attribute::InaccessibleMemOrArgMemOnly:
+ return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY;
case Attribute::InlineHint:
return bitc::ATTR_KIND_INLINE_HINT;
case Attribute::InReg:
@@ -198,6 +206,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT;
case Attribute::NoInline:
return bitc::ATTR_KIND_NO_INLINE;
+ case Attribute::NoRecurse:
+ return bitc::ATTR_KIND_NO_RECURSE;
case Attribute::NonLazyBind:
return bitc::ATTR_KIND_NON_LAZY_BIND;
case Attribute::NonNull:
@@ -405,6 +415,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+ case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break;
case Type::IntegerTyID:
// INTEGER: [width]
Code = bitc::TYPE_CODE_INTEGER;
@@ -573,10 +584,41 @@ static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) {
}
}
-// Emit top-level description of module, including target triple, inline asm,
-// descriptors for global variables, and function prototype info.
-static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
- BitstreamWriter &Stream) {
+/// Write a record that will eventually hold the word offset of the
+/// module-level VST. For now the offset is 0, which will be backpatched
+/// after the real VST is written. Returns the bit offset to backpatch.
+static uint64_t WriteValueSymbolTableForwardDecl(const ValueSymbolTable &VST,
+ BitstreamWriter &Stream) {
+ if (VST.empty())
+ return 0;
+
+ // Write a placeholder value in for the offset of the real VST,
+ // which is written after the function blocks so that it can include
+ // the offset of each function. The placeholder offset will be
+ // updated when the real VST is written.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
+ // Blocks are 32-bit aligned, so we can use a 32-bit word offset to
+ // hold the real VST offset. Must use fixed instead of VBR as we don't
+ // know how many VBR chunks to reserve ahead of time.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Emit the placeholder
+ uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0};
+ Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals);
+
+ // Compute and return the bit offset to the placeholder, which will be
+ // patched when the real VST is written. We can simply subtract the 32-bit
+ // fixed size from the current bit number to get the location to backpatch.
+ return Stream.GetCurrentBitNo() - 32;
+}
+
+/// Emit top-level description of module, including target triple, inline asm,
+/// descriptors for global variables, and function prototype info.
+/// Returns the bit offset to backpatch with the location of the real VST.
+static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
// Emit various pieces of data attached to a module.
if (!M->getTargetTriple().empty())
WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
@@ -725,7 +767,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the alias information.
for (const GlobalAlias &A : M->aliases()) {
// ALIAS: [alias type, aliasee val#, linkage, visibility]
- Vals.push_back(VE.getTypeID(A.getType()));
+ Vals.push_back(VE.getTypeID(A.getValueType()));
+ Vals.push_back(A.getType()->getAddressSpace());
Vals.push_back(VE.getValueID(A.getAliasee()));
Vals.push_back(getEncodedLinkage(A));
Vals.push_back(getEncodedVisibility(A));
@@ -736,6 +779,25 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
Vals.clear();
}
+
+ // Write a record indicating the number of module-level metadata IDs
+ // This is needed because the ids of metadata are assigned implicitly
+ // based on their ordering in the bitcode, with the function-level
+ // metadata ids starting after the module-level metadata ids. For
+ // function importing where we lazy load the metadata as a postpass,
+ // we want to avoid parsing the module-level metadata before parsing
+ // the imported functions.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
+ Vals.push_back(VE.numMDs());
+ Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
+ Vals.clear();
+
+ uint64_t VSTOffsetPlaceholder =
+ WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream);
+ return VSTOffsetPlaceholder;
}
static uint64_t GetOptimizationFlags(const Value *V) {
@@ -943,7 +1005,8 @@ static void WriteDICompileUnit(const DICompileUnit *N,
BitstreamWriter &Stream,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
- Record.push_back(N->isDistinct());
+ assert(N->isDistinct() && "Expected distinct compile units");
+ Record.push_back(/* IsDistinct */ true);
Record.push_back(N->getSourceLanguage());
Record.push_back(VE.getMetadataOrNullID(N->getFile()));
Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
@@ -958,6 +1021,7 @@ static void WriteDICompileUnit(const DICompileUnit *N,
Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
Record.push_back(N->getDWOId());
+ Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
Record.clear();
@@ -982,7 +1046,6 @@ static void WriteDISubprogram(const DISubprogram *N, const ValueEnumerator &VE,
Record.push_back(N->getVirtualIndex());
Record.push_back(N->getFlags());
Record.push_back(N->isOptimized());
- Record.push_back(VE.getMetadataOrNullID(N->getRawFunction()));
Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
@@ -1034,6 +1097,33 @@ static void WriteDINamespace(const DINamespace *N, const ValueEnumerator &VE,
Record.clear();
}
+static void WriteDIMacro(const DIMacro *N, const ValueEnumerator &VE,
+ BitstreamWriter &Stream,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getMacinfoType());
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawValue()));
+
+ Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev);
+ Record.clear();
+}
+
+static void WriteDIMacroFile(const DIMacroFile *N, const ValueEnumerator &VE,
+ BitstreamWriter &Stream,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getMacinfoType());
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
+
+ Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev);
+ Record.clear();
+}
+
static void WriteDIModule(const DIModule *N, const ValueEnumerator &VE,
BitstreamWriter &Stream,
SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
@@ -1100,7 +1190,6 @@ static void WriteDILocalVariable(const DILocalVariable *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
Record.push_back(N->isDistinct());
- Record.push_back(N->getTag());
Record.push_back(VE.getMetadataOrNullID(N->getScope()));
Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
Record.push_back(VE.getMetadataOrNullID(N->getFile()));
@@ -1310,16 +1399,15 @@ static void WriteMetadataAttachment(const Function &F,
Record.clear();
}
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
MDs.clear();
- I->getAllMetadataOtherThanDebugLoc(MDs);
+ I.getAllMetadataOtherThanDebugLoc(MDs);
// If no metadata, ignore instruction.
if (MDs.empty()) continue;
- Record.push_back(VE.getInstructionID(I));
+ Record.push_back(VE.getInstructionID(&I));
for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
Record.push_back(MDs[i].first);
@@ -1342,7 +1430,7 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
if (Names.empty()) return;
- Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ Stream.EnterSubblock(bitc::METADATA_KIND_BLOCK_ID, 3);
for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
Record.push_back(MDKindID);
@@ -1356,6 +1444,33 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+static void WriteOperandBundleTags(const Module *M, BitstreamWriter &Stream) {
+ // Write metadata kinds
+ //
+ // OPERAND_BUNDLE_TAGS_BLOCK_ID : N x OPERAND_BUNDLE_TAG
+ //
+ // OPERAND_BUNDLE_TAG - [strchr x N]
+
+ SmallVector<StringRef, 8> Tags;
+ M->getOperandBundleTags(Tags);
+
+ if (Tags.empty())
+ return;
+
+ Stream.EnterSubblock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+
+ for (auto Tag : Tags) {
+ Record.append(Tag.begin(), Tag.end());
+
+ Stream.EmitRecord(bitc::OPERAND_BUNDLE_TAG, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
if ((int64_t)V >= 0)
Vals.push_back(V << 1);
@@ -1664,6 +1779,23 @@ static bool PushValueAndType(const Value *V, unsigned InstID,
return false;
}
+static void WriteOperandBundles(BitstreamWriter &Stream, ImmutableCallSite CS,
+ unsigned InstID, ValueEnumerator &VE) {
+ SmallVector<unsigned, 64> Record;
+ LLVMContext &C = CS.getInstruction()->getContext();
+
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+ const auto &Bundle = CS.getOperandBundleAt(i);
+ Record.push_back(C.getOperandBundleTagID(Bundle.getTagName()));
+
+ for (auto &Input : Bundle.Inputs)
+ PushValueAndType(Input, InstID, Record, VE);
+
+ Stream.EmitRecord(bitc::FUNC_CODE_OPERAND_BUNDLE, Record);
+ Record.clear();
+ }
+}
+
/// pushValue - Like PushValueAndType, but where the type of the value is
/// omitted (perhaps it was already encoded in an earlier operand).
static void pushValue(const Value *V, unsigned InstID,
@@ -1806,10 +1938,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
pushValue(SI.getCondition(), InstID, Vals, VE);
Vals.push_back(VE.getValueID(SI.getDefaultDest()));
- for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
- i != e; ++i) {
- Vals.push_back(VE.getValueID(i.getCaseValue()));
- Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
+ for (SwitchInst::ConstCaseIt Case : SI.cases()) {
+ Vals.push_back(VE.getValueID(Case.getCaseValue()));
+ Vals.push_back(VE.getValueID(Case.getCaseSuccessor()));
}
}
break;
@@ -1826,6 +1957,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
const InvokeInst *II = cast<InvokeInst>(&I);
const Value *Callee = II->getCalledValue();
FunctionType *FTy = II->getFunctionType();
+
+ if (II->hasOperandBundles())
+ WriteOperandBundles(Stream, II, InstID, VE);
+
Code = bitc::FUNC_CODE_INST_INVOKE;
Vals.push_back(VE.getAttributeID(II->getAttributes()));
@@ -1851,6 +1986,49 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Code = bitc::FUNC_CODE_INST_RESUME;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
break;
+ case Instruction::CleanupRet: {
+ Code = bitc::FUNC_CODE_INST_CLEANUPRET;
+ const auto &CRI = cast<CleanupReturnInst>(I);
+ pushValue(CRI.getCleanupPad(), InstID, Vals, VE);
+ if (CRI.hasUnwindDest())
+ Vals.push_back(VE.getValueID(CRI.getUnwindDest()));
+ break;
+ }
+ case Instruction::CatchRet: {
+ Code = bitc::FUNC_CODE_INST_CATCHRET;
+ const auto &CRI = cast<CatchReturnInst>(I);
+ pushValue(CRI.getCatchPad(), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(CRI.getSuccessor()));
+ break;
+ }
+ case Instruction::CleanupPad:
+ case Instruction::CatchPad: {
+ const auto &FuncletPad = cast<FuncletPadInst>(I);
+ Code = isa<CatchPadInst>(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD
+ : bitc::FUNC_CODE_INST_CLEANUPPAD;
+ pushValue(FuncletPad.getParentPad(), InstID, Vals, VE);
+
+ unsigned NumArgOperands = FuncletPad.getNumArgOperands();
+ Vals.push_back(NumArgOperands);
+ for (unsigned Op = 0; Op != NumArgOperands; ++Op)
+ PushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals, VE);
+ break;
+ }
+ case Instruction::CatchSwitch: {
+ Code = bitc::FUNC_CODE_INST_CATCHSWITCH;
+ const auto &CatchSwitch = cast<CatchSwitchInst>(I);
+
+ pushValue(CatchSwitch.getParentPad(), InstID, Vals, VE);
+
+ unsigned NumHandlers = CatchSwitch.getNumHandlers();
+ Vals.push_back(NumHandlers);
+ for (const BasicBlock *CatchPadBB : CatchSwitch.handlers())
+ Vals.push_back(VE.getValueID(CatchPadBB));
+
+ if (CatchSwitch.hasUnwindDest())
+ Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest()));
+ break;
+ }
case Instruction::Unreachable:
Code = bitc::FUNC_CODE_INST_UNREACHABLE;
AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
@@ -1902,6 +2080,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64");
AlignRecord |= AI.isUsedWithInAlloca() << 5;
AlignRecord |= 1 << 6;
+ // Reserve bit 7 for SwiftError flag.
+ // AlignRecord |= AI.isSwiftError() << 7;
Vals.push_back(AlignRecord);
break;
}
@@ -1971,11 +2151,23 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
const CallInst &CI = cast<CallInst>(I);
FunctionType *FTy = CI.getFunctionType();
+ if (CI.hasOperandBundles())
+ WriteOperandBundles(Stream, &CI, InstID, VE);
+
Code = bitc::FUNC_CODE_INST_CALL;
Vals.push_back(VE.getAttributeID(CI.getAttributes()));
- Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) |
- unsigned(CI.isMustTailCall()) << 14 | 1 << 15);
+
+ unsigned Flags = GetOptimizationFlags(&I);
+ Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV |
+ unsigned(CI.isTailCall()) << bitc::CALL_TAIL |
+ unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL |
+ 1 << bitc::CALL_EXPLICIT_TYPE |
+ unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL |
+ unsigned(Flags != 0) << bitc::CALL_FMF);
+ if (Flags != 0)
+ Vals.push_back(Flags);
+
Vals.push_back(VE.getTypeID(FTy));
PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee
@@ -2008,56 +2200,149 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.clear();
}
-// Emit names for globals/functions etc.
-static void WriteValueSymbolTable(const ValueSymbolTable &VST,
- const ValueEnumerator &VE,
- BitstreamWriter &Stream) {
- if (VST.empty()) return;
+enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
+
+/// Determine the encoding to use for the given string name and length.
+static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
+ bool isChar6 = true;
+ for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128)
+ // don't bother scanning the rest.
+ return SE_Fixed8;
+ }
+ if (isChar6)
+ return SE_Char6;
+ else
+ return SE_Fixed7;
+}
+
+/// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
+/// BitcodeStartBit and FunctionIndex are only passed for the module-level
+/// VST, where we are including a function bitcode index and need to
+/// backpatch the VST forward declaration record.
+static void WriteValueSymbolTable(
+ const ValueSymbolTable &VST, const ValueEnumerator &VE,
+ BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0,
+ uint64_t BitcodeStartBit = 0,
+ DenseMap<const Function *, std::unique_ptr<FunctionInfo>> *FunctionIndex =
+ nullptr) {
+ if (VST.empty()) {
+ // WriteValueSymbolTableForwardDecl should have returned early as
+ // well. Ensure this handling remains in sync by asserting that
+ // the placeholder offset is not set.
+ assert(VSTOffsetPlaceholder == 0);
+ return;
+ }
+
+ if (VSTOffsetPlaceholder > 0) {
+ // Get the offset of the VST we are writing, and backpatch it into
+ // the VST forward declaration record.
+ uint64_t VSTOffset = Stream.GetCurrentBitNo();
+ // The BitcodeStartBit was the stream offset of the actual bitcode
+ // (e.g. excluding any initial darwin header).
+ VSTOffset -= BitcodeStartBit;
+ assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
+ Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
+ }
+
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+ // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY
+ // records, which are not used in the per-function VSTs.
+ unsigned FnEntry8BitAbbrev;
+ unsigned FnEntry7BitAbbrev;
+ unsigned FnEntry6BitAbbrev;
+ if (VSTOffsetPlaceholder > 0) {
+ // 8-bit fixed-width VST_FNENTRY function strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // 7-bit fixed width VST_FNENTRY function strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // 6-bit char6 VST_FNENTRY function strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+ }
+
// FIXME: Set up the abbrev, we know how many values there are!
// FIXME: We know if the type names can use 7-bit ascii.
SmallVector<unsigned, 64> NameVals;
- for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
- SI != SE; ++SI) {
-
- const ValueName &Name = *SI;
-
+ for (const ValueName &Name : VST) {
// Figure out the encoding to use for the name.
- bool is7Bit = true;
- bool isChar6 = true;
- for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
- C != E; ++C) {
- if (isChar6)
- isChar6 = BitCodeAbbrevOp::isChar6(*C);
- if ((unsigned char)*C & 128) {
- is7Bit = false;
- break; // don't bother scanning the rest.
- }
- }
+ StringEncoding Bits =
+ getStringEncoding(Name.getKeyData(), Name.getKeyLength());
unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+ NameVals.push_back(VE.getValueID(Name.getValue()));
+
+ Function *F = dyn_cast<Function>(Name.getValue());
+ if (!F) {
+ // If value is an alias, need to get the aliased base object to
+ // see if it is a function.
+ auto *GA = dyn_cast<GlobalAlias>(Name.getValue());
+ if (GA && GA->getBaseObject())
+ F = dyn_cast<Function>(GA->getBaseObject());
+ }
// VST_ENTRY: [valueid, namechar x N]
+ // VST_FNENTRY: [valueid, funcoffset, namechar x N]
// VST_BBENTRY: [bbid, namechar x N]
unsigned Code;
- if (isa<BasicBlock>(SI->getValue())) {
+ if (isa<BasicBlock>(Name.getValue())) {
Code = bitc::VST_CODE_BBENTRY;
- if (isChar6)
+ if (Bits == SE_Char6)
AbbrevToUse = VST_BBENTRY_6_ABBREV;
+ } else if (F && !F->isDeclaration()) {
+ // Must be the module-level VST, where we pass in the Index and
+ // have a VSTOffsetPlaceholder. The function-level VST should not
+ // contain any Function symbols.
+ assert(FunctionIndex);
+ assert(VSTOffsetPlaceholder > 0);
+
+ // Save the word offset of the function (from the start of the
+ // actual bitcode written to the stream).
+ assert(FunctionIndex->count(F) == 1);
+ uint64_t BitcodeIndex =
+ (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit;
+ assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
+ NameVals.push_back(BitcodeIndex / 32);
+
+ Code = bitc::VST_CODE_FNENTRY;
+ AbbrevToUse = FnEntry8BitAbbrev;
+ if (Bits == SE_Char6)
+ AbbrevToUse = FnEntry6BitAbbrev;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = FnEntry7BitAbbrev;
} else {
Code = bitc::VST_CODE_ENTRY;
- if (isChar6)
+ if (Bits == SE_Char6)
AbbrevToUse = VST_ENTRY_6_ABBREV;
- else if (is7Bit)
+ else if (Bits == SE_Fixed7)
AbbrevToUse = VST_ENTRY_7_ABBREV;
}
- NameVals.push_back(VE.getValueID(SI->getValue()));
- for (const char *P = Name.getKeyData(),
- *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
- NameVals.push_back((unsigned char)*P);
+ for (const auto P : Name.getKey())
+ NameVals.push_back((unsigned char)P);
// Emit the finished record.
Stream.EmitRecord(Code, NameVals, AbbrevToUse);
@@ -2066,6 +2351,66 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
Stream.ExitBlock();
}
+/// Emit function names and summary offsets for the combined index
+/// used by ThinLTO.
+static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+ // 8-bit fixed-width VST_COMBINED_FNENTRY function strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // 7-bit fixed width VST_COMBINED_FNENTRY function strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // 6-bit char6 VST_COMBINED_FNENTRY function strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // FIXME: We know if the type names can use 7-bit ascii.
+ SmallVector<unsigned, 64> NameVals;
+
+ for (const auto &FII : Index) {
+ for (const auto &FI : FII.getValue()) {
+ NameVals.push_back(FI->bitcodeIndex());
+
+ StringRef FuncName = FII.first();
+
+ // Figure out the encoding to use for the name.
+ StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size());
+
+ // VST_COMBINED_FNENTRY: [funcsumoffset, namechar x N]
+ unsigned AbbrevToUse = FnEntry8BitAbbrev;
+ if (Bits == SE_Char6)
+ AbbrevToUse = FnEntry6BitAbbrev;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = FnEntry7BitAbbrev;
+
+ for (const auto P : FuncName)
+ NameVals.push_back((unsigned char)P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse);
+ NameVals.clear();
+ }
+ }
+ Stream.ExitBlock();
+}
+
static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
BitstreamWriter &Stream) {
assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
@@ -2100,9 +2445,34 @@ static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
Stream.ExitBlock();
}
-/// WriteFunction - Emit a function body to the module stream.
-static void WriteFunction(const Function &F, ValueEnumerator &VE,
- BitstreamWriter &Stream) {
+/// \brief Save information for the given function into the function index.
+///
+/// At a minimum this saves the bitcode index of the function record that
+/// was just written. However, if we are emitting function summary information,
+/// for example for ThinLTO, then a \a FunctionSummary object is created
+/// to hold the provided summary information.
+static void SaveFunctionInfo(
+ const Function &F,
+ DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
+ unsigned NumInsts, uint64_t BitcodeIndex, bool EmitFunctionSummary) {
+ std::unique_ptr<FunctionSummary> FuncSummary;
+ if (EmitFunctionSummary) {
+ FuncSummary = llvm::make_unique<FunctionSummary>(NumInsts);
+ FuncSummary->setLocalFunction(F.hasLocalLinkage());
+ }
+ FunctionIndex[&F] =
+ llvm::make_unique<FunctionInfo>(BitcodeIndex, std::move(FuncSummary));
+}
+
+/// Emit a function body to the module stream.
+static void WriteFunction(
+ const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream,
+ DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
+ bool EmitFunctionSummary) {
+ // Save the bitcode index of the start of this function block for recording
+ // in the VST.
+ uint64_t BitcodeIndex = Stream.GetCurrentBitNo();
+
Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
VE.incorporateFunction(F);
@@ -2128,6 +2498,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
bool NeedsMetadataAttachment = F.hasMetadata();
DILocation *LastDL = nullptr;
+ unsigned NumInsts = 0;
// Finally, emit all the instructions, in order.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -2135,6 +2506,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
I != E; ++I) {
WriteInstruction(*I, InstID, VE, Stream, Vals);
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++NumInsts;
+
if (!I->getType()->isVoidTy())
++InstID;
@@ -2171,6 +2545,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
WriteUseListBlock(&F, VE, Stream);
VE.purgeFunction();
Stream.ExitBlock();
+
+ SaveFunctionInfo(F, FunctionIndex, NumInsts, BitcodeIndex,
+ EmitFunctionSummary);
}
// Emit blockinfo, which defines the standard abbreviations etc.
@@ -2348,9 +2725,183 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+/// Write the module path strings, currently only used when generating
+/// a combined index file.
+static void WriteModStrings(const FunctionInfoIndex &I,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3);
+
+ // TODO: See which abbrev sizes we actually need to emit
+
+ // 8-bit fixed-width MST_ENTRY strings.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv);
+
+ // 7-bit fixed width MST_ENTRY strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv);
+
+ // 6-bit char6 MST_ENTRY strings.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
+
+ SmallVector<unsigned, 64> NameVals;
+ for (const StringMapEntry<uint64_t> &MPSE : I.modPathStringEntries()) {
+ StringEncoding Bits =
+ getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
+ unsigned AbbrevToUse = Abbrev8Bit;
+ if (Bits == SE_Char6)
+ AbbrevToUse = Abbrev6Bit;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = Abbrev7Bit;
+
+ NameVals.push_back(MPSE.getValue());
+
+ for (const auto P : MPSE.getKey())
+ NameVals.push_back((unsigned char)P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse);
+ NameVals.clear();
+ }
+ Stream.ExitBlock();
+}
+
+// Helper to emit a single function summary record.
+static void WritePerModuleFunctionSummaryRecord(
+ SmallVector<unsigned, 64> &NameVals, FunctionSummary *FS, unsigned ValueID,
+ unsigned FSAbbrev, BitstreamWriter &Stream) {
+ assert(FS);
+ NameVals.push_back(ValueID);
+ NameVals.push_back(FS->isLocalFunction());
+ NameVals.push_back(FS->instCount());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::FS_CODE_PERMODULE_ENTRY, NameVals, FSAbbrev);
+ NameVals.clear();
+}
+
+/// Emit the per-module function summary section alongside the rest of
+/// the module's bitcode.
+static void WritePerModuleFunctionSummary(
+ DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
+ const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3);
+
+ // Abbrev for FS_CODE_PERMODULE_ENTRY.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_PERMODULE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // islocal
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
+ unsigned FSAbbrev = Stream.EmitAbbrev(Abbv);
+
+ SmallVector<unsigned, 64> NameVals;
+ for (auto &I : FunctionIndex) {
+ // Skip anonymous functions. We will emit a function summary for
+ // any aliases below.
+ if (!I.first->hasName())
+ continue;
+
+ WritePerModuleFunctionSummaryRecord(
+ NameVals, I.second->functionSummary(),
+ VE.getValueID(M->getValueSymbolTable().lookup(I.first->getName())),
+ FSAbbrev, Stream);
+ }
+
+ for (const GlobalAlias &A : M->aliases()) {
+ if (!A.getBaseObject())
+ continue;
+ const Function *F = dyn_cast<Function>(A.getBaseObject());
+ if (!F || F->isDeclaration())
+ continue;
+
+ assert(FunctionIndex.count(F) == 1);
+ WritePerModuleFunctionSummaryRecord(
+ NameVals, FunctionIndex[F]->functionSummary(),
+ VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSAbbrev,
+ Stream);
+ }
+
+ Stream.ExitBlock();
+}
+
+/// Emit the combined function summary section into the combined index
+/// file.
+static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3);
+
+ // Abbrev for FS_CODE_COMBINED_ENTRY.
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_COMBINED_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
+ unsigned FSAbbrev = Stream.EmitAbbrev(Abbv);
+
+ SmallVector<unsigned, 64> NameVals;
+ for (const auto &FII : I) {
+ for (auto &FI : FII.getValue()) {
+ FunctionSummary *FS = FI->functionSummary();
+ assert(FS);
+
+ NameVals.push_back(I.getModuleId(FS->modulePath()));
+ NameVals.push_back(FS->instCount());
+
+ // Record the starting offset of this summary entry for use
+ // in the VST entry. Add the current code size since the
+ // reader will invoke readRecord after the abbrev id read.
+ FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::FS_CODE_COMBINED_ENTRY, NameVals, FSAbbrev);
+ NameVals.clear();
+ }
+ }
+
+ Stream.ExitBlock();
+}
+
+// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
+// current llvm version, and a record for the epoch number.
+static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
+
+ // Write the "user readable" string identifying the bitcode producer
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ auto StringAbbrev = Stream.EmitAbbrev(Abbv);
+ WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING,
+ "LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream);
+
+ // Write the epoch version
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ auto EpochAbbrev = Stream.EmitAbbrev(Abbv);
+ SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
+ Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev);
+ Stream.ExitBlock();
+}
+
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream,
- bool ShouldPreserveUseListOrder) {
+ bool ShouldPreserveUseListOrder,
+ uint64_t BitcodeStartBit, bool EmitFunctionSummary) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
SmallVector<unsigned, 1> Vals;
@@ -2377,7 +2928,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
// Emit top-level description of module, including target triple, inline asm,
// descriptors for global variables, and function prototype info.
- WriteModuleInfo(M, VE, Stream);
+ uint64_t VSTOffsetPlaceholder = WriteModuleInfo(M, VE, Stream);
// Emit constants.
WriteModuleConstants(VE, Stream);
@@ -2388,17 +2939,25 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
// Emit metadata.
WriteModuleMetadataStore(M, Stream);
- // Emit names for globals/functions etc.
- WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
-
// Emit module-level use-lists.
if (VE.shouldPreserveUseListOrder())
WriteUseListBlock(nullptr, VE, Stream);
+ WriteOperandBundleTags(M, Stream);
+
// Emit function bodies.
+ DenseMap<const Function *, std::unique_ptr<FunctionInfo>> FunctionIndex;
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
if (!F->isDeclaration())
- WriteFunction(*F, VE, Stream);
+ WriteFunction(*F, VE, Stream, FunctionIndex, EmitFunctionSummary);
+
+ // Need to write after the above call to WriteFunction which populates
+ // the summary information in the index.
+ if (EmitFunctionSummary)
+ WritePerModuleFunctionSummary(FunctionIndex, M, VE, Stream);
+
+ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
+ VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
Stream.ExitBlock();
}
@@ -2473,10 +3032,22 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
Buffer.push_back(0);
}
+/// Helper to write the header common to all bitcode files.
+static void WriteBitcodeHeader(BitstreamWriter &Stream) {
+ // Emit the file header.
+ Stream.Emit((unsigned)'B', 8);
+ Stream.Emit((unsigned)'C', 8);
+ Stream.Emit(0x0, 4);
+ Stream.Emit(0xC, 4);
+ Stream.Emit(0xE, 4);
+ Stream.Emit(0xD, 4);
+}
+
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
- bool ShouldPreserveUseListOrder) {
+ bool ShouldPreserveUseListOrder,
+ bool EmitFunctionSummary) {
SmallVector<char, 0> Buffer;
Buffer.reserve(256*1024);
@@ -2489,17 +3060,20 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
// Emit the module into the buffer.
{
BitstreamWriter Stream(Buffer);
+ // Save the start bit of the actual bitcode, in case there is space
+ // saved at the start for the darwin header above. The reader stream
+ // will start at the bitcode, and we need the offset of the VST
+ // to line up.
+ uint64_t BitcodeStartBit = Stream.GetCurrentBitNo();
// Emit the file header.
- Stream.Emit((unsigned)'B', 8);
- Stream.Emit((unsigned)'C', 8);
- Stream.Emit(0x0, 4);
- Stream.Emit(0xC, 4);
- Stream.Emit(0xE, 4);
- Stream.Emit(0xD, 4);
+ WriteBitcodeHeader(Stream);
+
+ WriteIdentificationBlock(M, Stream);
// Emit the module.
- WriteModule(M, Stream, ShouldPreserveUseListOrder);
+ WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
+ EmitFunctionSummary);
}
if (TT.isOSDarwin())
@@ -2508,3 +3082,38 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
// Write the generated bitstream to "Out".
Out.write((char*)&Buffer.front(), Buffer.size());
}
+
+// Write the specified function summary index to the given raw output stream,
+// where it will be written in a new bitcode block. This is used when
+// writing the combined index file for ThinLTO.
+void llvm::WriteFunctionSummaryToFile(const FunctionInfoIndex &Index,
+ raw_ostream &Out) {
+ SmallVector<char, 0> Buffer;
+ Buffer.reserve(256 * 1024);
+
+ BitstreamWriter Stream(Buffer);
+
+ // Emit the bitcode header.
+ WriteBitcodeHeader(Stream);
+
+ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+ SmallVector<unsigned, 1> Vals;
+ unsigned CurVersion = 1;
+ Vals.push_back(CurVersion);
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+
+ // Write the module paths in the combined index.
+ WriteModStrings(Index, Stream);
+
+ // Write the function summary combined index records.
+ WriteCombinedFunctionSummary(Index, Stream);
+
+ // Need a special VST writer for the combined index (we don't have a
+ // real VST and real values when this is invoked).
+ WriteCombinedValueSymbolTable(Index, Stream);
+
+ Stream.ExitBlock();
+
+ Out.write((char *)&Buffer.front(), Buffer.size());
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 3165743..24de99a 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,7 +19,7 @@
using namespace llvm;
PreservedAnalyses BitcodeWriterPass::run(Module &M) {
- WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
+ WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, EmitFunctionSummary);
return PreservedAnalyses::all();
}
@@ -27,17 +27,21 @@ namespace {
class WriteBitcodePass : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
bool ShouldPreserveUseListOrder;
+ bool EmitFunctionSummary;
public:
static char ID; // Pass identification, replacement for typeid
- explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder)
+ explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder,
+ bool EmitFunctionSummary)
: ModulePass(ID), OS(o),
- ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
+ ShouldPreserveUseListOrder(ShouldPreserveUseListOrder),
+ EmitFunctionSummary(EmitFunctionSummary) {}
const char *getPassName() const override { return "Bitcode Writer"; }
bool runOnModule(Module &M) override {
- WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
+ WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder,
+ EmitFunctionSummary);
return false;
}
};
@@ -46,6 +50,8 @@ namespace {
char WriteBitcodePass::ID = 0;
ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str,
- bool ShouldPreserveUseListOrder) {
- return new WriteBitcodePass(Str, ShouldPreserveUseListOrder);
+ bool ShouldPreserveUseListOrder,
+ bool EmitFunctionSummary) {
+ return new WriteBitcodePass(Str, ShouldPreserveUseListOrder,
+ EmitFunctionSummary);
}
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 44dd604..e07563b 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -87,15 +87,9 @@ static OrderMap orderModule(const Module &M) {
if (!isa<GlobalValue>(A.getAliasee()))
orderValue(A.getAliasee(), OM);
for (const Function &F : M) {
- if (F.hasPrefixData())
- if (!isa<GlobalValue>(F.getPrefixData()))
- orderValue(F.getPrefixData(), OM);
- if (F.hasPrologueData())
- if (!isa<GlobalValue>(F.getPrologueData()))
- orderValue(F.getPrologueData(), OM);
- if (F.hasPersonalityFn())
- if (!isa<GlobalValue>(F.getPersonalityFn()))
- orderValue(F.getPersonalityFn(), OM);
+ for (const Use &U : F.operands())
+ if (!isa<GlobalValue>(U.get()))
+ orderValue(U.get(), OM);
}
OM.LastGlobalConstantID = OM.size();
@@ -273,12 +267,8 @@ static UseListOrderStack predictUseListOrder(const Module &M) {
for (const GlobalAlias &A : M.aliases())
predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
for (const Function &F : M) {
- if (F.hasPrefixData())
- predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
- if (F.hasPrologueData())
- predictValueUseListOrder(F.getPrologueData(), nullptr, OM, Stack);
- if (F.hasPersonalityFn())
- predictValueUseListOrder(F.getPersonalityFn(), nullptr, OM, Stack);
+ for (const Use &U : F.operands())
+ predictValueUseListOrder(U.get(), nullptr, OM, Stack);
}
return Stack;
@@ -321,20 +311,10 @@ ValueEnumerator::ValueEnumerator(const Module &M,
for (const GlobalAlias &GA : M.aliases())
EnumerateValue(GA.getAliasee());
- // Enumerate the prefix data constants.
+ // Enumerate any optional Function data.
for (const Function &F : M)
- if (F.hasPrefixData())
- EnumerateValue(F.getPrefixData());
-
- // Enumerate the prologue data constants.
- for (const Function &F : M)
- if (F.hasPrologueData())
- EnumerateValue(F.getPrologueData());
-
- // Enumerate the personality functions.
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasPersonalityFn())
- EnumerateValue(I->getPersonalityFn());
+ for (const Use &U : F.operands())
+ EnumerateValue(U.get());
// Enumerate the metadata type.
//
@@ -425,7 +405,7 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
void ValueEnumerator::dump() const {
print(dbgs(), ValueMap, "Default");
dbgs() << '\n';
- print(dbgs(), MDValueMap, "MetaData");
+ print(dbgs(), MetadataMap, "MetaData");
dbgs() << '\n';
}
@@ -512,10 +492,8 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
/// Insert all of the values referenced by named metadata in the specified
/// module.
void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
- for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end();
- I != E; ++I)
- EnumerateNamedMDNode(I);
+ for (const auto &I : M.named_metadata())
+ EnumerateNamedMDNode(&I);
}
void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
@@ -544,7 +522,7 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
// EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph.
//
// Return early if there's already an ID.
- if (!MDValueMap.insert(std::make_pair(MD, 0)).second)
+ if (!MetadataMap.insert(std::make_pair(MD, 0)).second)
return;
// Visit operands first to minimize RAUW.
@@ -557,10 +535,10 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
HasDILocation |= isa<DILocation>(MD);
HasGenericDINode |= isa<GenericDINode>(MD);
- // Replace the dummy ID inserted above with the correct one. MDValueMap may
+ // Replace the dummy ID inserted above with the correct one. MetadataMap may
// have changed by inserting operands, so we need a fresh lookup here.
MDs.push_back(MD);
- MDValueMap[MD] = MDs.size();
+ MetadataMap[MD] = MDs.size();
}
/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
@@ -568,12 +546,12 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
void ValueEnumerator::EnumerateFunctionLocalMetadata(
const LocalAsMetadata *Local) {
// Check to see if it's already in!
- unsigned &MDValueID = MDValueMap[Local];
- if (MDValueID)
+ unsigned &MetadataID = MetadataMap[Local];
+ if (MetadataID)
return;
MDs.push_back(Local);
- MDValueID = MDs.size();
+ MetadataID = MDs.size();
EnumerateValue(Local->getValue());
@@ -729,23 +707,20 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
NumModuleMDs = MDs.size();
// Adding function arguments to the value table.
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I)
- EnumerateValue(I);
+ for (const auto &I : F.args())
+ EnumerateValue(&I);
FirstFuncConstantID = Values.size();
// Add all function-level constants to the value table.
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
- for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
- OI != E; ++OI) {
- if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
- isa<InlineAsm>(*OI))
- EnumerateValue(*OI);
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB)
+ for (const Use &OI : I.operands()) {
+ if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI))
+ EnumerateValue(OI);
}
- BasicBlocks.push_back(BB);
- ValueMap[BB] = BasicBlocks.size();
+ BasicBlocks.push_back(&BB);
+ ValueMap[&BB] = BasicBlocks.size();
}
// Optimize the constant layout.
@@ -759,18 +734,17 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
// Add all of the instructions.
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
- for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
- OI != E; ++OI) {
- if (auto *MD = dyn_cast<MetadataAsValue>(&*OI))
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ for (const Use &OI : I.operands()) {
+ if (auto *MD = dyn_cast<MetadataAsValue>(&OI))
if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata()))
// Enumerate metadata after the instructions they might refer to.
FnLocalMDVector.push_back(Local);
}
- if (!I->getType()->isVoidTy())
- EnumerateValue(I);
+ if (!I.getType()->isVoidTy())
+ EnumerateValue(&I);
}
}
@@ -784,7 +758,7 @@ void ValueEnumerator::purgeFunction() {
for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
ValueMap.erase(Values[i].first);
for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
- MDValueMap.erase(MDs[i]);
+ MetadataMap.erase(MDs[i]);
for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
ValueMap.erase(BasicBlocks[i]);
@@ -797,8 +771,8 @@ void ValueEnumerator::purgeFunction() {
static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
DenseMap<const BasicBlock*, unsigned> &IDMap) {
unsigned Counter = 0;
- for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- IDMap[BB] = ++Counter;
+ for (const BasicBlock &BB : *F)
+ IDMap[&BB] = ++Counter;
}
/// getGlobalBasicBlockID - This returns the function-specific ID for the
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
index 92d166e..9fb8325 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -63,7 +63,7 @@ private:
std::vector<const Metadata *> MDs;
SmallVector<const LocalAsMetadata *, 8> FunctionLocalMDs;
typedef DenseMap<const Metadata *, unsigned> MetadataMapType;
- MetadataMapType MDValueMap;
+ MetadataMapType MetadataMap;
bool HasMDString;
bool HasDILocation;
bool HasGenericDINode;
@@ -93,7 +93,7 @@ private:
/// before incorporation.
unsigned NumModuleValues;
- /// When a function is incorporated, this is the size of the MDValues list
+ /// When a function is incorporated, this is the size of the Metadatas list
/// before incorporation.
unsigned NumModuleMDs;
@@ -117,8 +117,9 @@ public:
return ID - 1;
}
unsigned getMetadataOrNullID(const Metadata *MD) const {
- return MDValueMap.lookup(MD);
+ return MetadataMap.lookup(MD);
}
+ unsigned numMDs() const { return MDs.size(); }
bool hasMDString() const { return HasMDString; }
bool hasDILocation() const { return HasDILocation; }
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5fe4c4b..4060db7 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -142,16 +142,15 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
assert(!State);
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
- bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+ bool IsReturnBlock = BB->isReturnBlock();
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ for (const auto &LI : (*SI)->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
@@ -365,9 +364,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
- // defined in a call must not be changed (ABI).
+ // defined in a call must not be changed (ABI). Inline assembly may
+ // reference either system calls or the register directly. Skip it until we
+ // can tell user specified registers from compiler-specified.
if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
- TII->isPredicated(MI)) {
+ TII->isPredicated(MI) || MI->isInlineAsm()) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -429,6 +430,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// If MI's uses have special allocation requirement, don't allow
// any use registers to be changed. Also assume all registers
// used in a call must not be changed (ABI).
+ // Inline Assembly register uses also cannot be safely changed.
// FIXME: The issue with predicated instruction is more complex. We are being
// conservatively here because the kill markers cannot be trusted after
// if-conversion:
@@ -444,7 +446,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// changed.
bool Special = MI->isCall() ||
MI->hasExtraSrcRegAllocReq() ||
- TII->isPredicated(MI);
+ TII->isPredicated(MI) || MI->isInlineAsm();
// Scan the register uses for this instruction and update
// live-ranges, groups and RegRefs.
@@ -509,15 +511,8 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
// Check all references that need rewriting for Reg. For each, use
// the corresponding register class to narrow the set of registers
// that are appropriate for renaming.
- std::pair<std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator,
- std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator>
- Range = State->GetRegRefs().equal_range(Reg);
- for (std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
- QE = Range.second; Q != QE; ++Q) {
- const TargetRegisterClass *RC = Q->second.RC;
+ for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) {
+ const TargetRegisterClass *RC = Q.second.RC;
if (!RC) continue;
BitVector RCBV = TRI->getAllocatableSet(MF, RC);
@@ -685,9 +680,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also
// defines 'NewReg' via an early-clobber operand.
- auto Range = RegRefs.equal_range(Reg);
- for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) {
- auto UseMI = Q->second.Operand->getParent();
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ MachineInstr *UseMI = Q.second.Operand->getParent();
int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
if (Idx == -1)
continue;
@@ -698,6 +692,20 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
}
}
+ // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining
+ // 'Reg' is an early-clobber define and that instruction also uses
+ // 'NewReg'.
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber())
+ continue;
+
+ MachineInstr *DefMI = Q.second.Operand->getParent();
+ if (DefMI->readsRegister(NewReg, TRI)) {
+ DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
// Record that 'Reg' can be renamed to 'NewReg'.
RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
}
@@ -920,23 +928,16 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Update the references to the old register CurrReg to
// refer to the new register NewReg.
- std::pair<std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator,
- std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator>
- Range = RegRefs.equal_range(CurrReg);
- for (std::multimap<unsigned,
- AggressiveAntiDepState::RegisterReference>::iterator
- Q = Range.first, QE = Range.second; Q != QE; ++Q) {
- Q->second.Operand->setReg(NewReg);
+ for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) {
+ Q.second.Operand->setReg(NewReg);
// If the SU for the instruction being updated has debug
// information related to the anti-dependency register, make
// sure to update that as well.
- const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
if (!SU) continue;
for (DbgValueVector::iterator DVI = DbgValues.begin(),
DVE = DbgValues.end(); DVI != DVE; ++DVI)
- if (DVI->second == Q->second.Operand->getParent())
+ if (DVI->second == Q.second.Operand->getParent())
UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index dc9bcff..40451c0 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -29,12 +29,13 @@ using namespace llvm;
// Compare VirtRegMap::getRegAllocPref().
AllocationOrder::AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo)
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix)
: Pos(0) {
const MachineFunction &MF = VRM.getMachineFunction();
const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
- TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
rewind();
DEBUG({
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
index 02b2d92..2aee3a6 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -24,6 +24,7 @@ namespace llvm {
class RegisterClassInfo;
class VirtRegMap;
+class LiveRegMatrix;
class LLVM_LIBRARY_VISIBILITY AllocationOrder {
SmallVector<MCPhysReg, 16> Hints;
@@ -37,7 +38,8 @@ public:
/// @param RegClassInfo Information about reserved and allocatable registers.
AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo);
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix);
/// Get the allocation order without reordered hints.
ArrayRef<MCPhysReg> getOrder() const { return Order; }
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 98d4c8a..75579a2 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -25,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -515,7 +517,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
- !isSafeToSpeculativelyExecute(BBI))
+ !isSafeToSpeculativelyExecute(&*BBI))
return false;
}
@@ -643,3 +645,97 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
return !GS.IsCompared;
}
+
+static void collectFuncletMembers(
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
+ const MachineBasicBlock *MBB) {
+ // Add this MBB to our funclet.
+ auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet));
+
+ // Don't revisit blocks.
+ if (!P.second) {
+ assert(P.first->second == Funclet && "MBB is part of two funclets!");
+ return;
+ }
+
+ bool IsReturn = false;
+ int NumTerminators = 0;
+ for (const MachineInstr &MI : MBB->terminators()) {
+ IsReturn |= MI.isReturn();
+ ++NumTerminators;
+ }
+ assert((!IsReturn || NumTerminators == 1) &&
+ "Expected only one terminator when a return is present!");
+
+ // Returns are boundaries where funclet transfer can occur, don't follow
+ // successors.
+ if (IsReturn)
+ return;
+
+ for (const MachineBasicBlock *SMBB : MBB->successors())
+ if (!SMBB->isEHPad())
+ collectFuncletMembers(FuncletMembership, Funclet, SMBB);
+}
+
+DenseMap<const MachineBasicBlock *, int>
+llvm::getFuncletMembership(const MachineFunction &MF) {
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (!MF.getMMI().hasEHFunclets())
+ return FuncletMembership;
+
+ int EntryBBNumber = MF.front().getNumber();
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
+ SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
+ SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
+ SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
+ for (const MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry()) {
+ FuncletBlocks.push_back(&MBB);
+ } else if (IsSEH && MBB.isEHPad()) {
+ SEHCatchPads.push_back(&MBB);
+ } else if (MBB.pred_empty()) {
+ UnreachableBlocks.push_back(&MBB);
+ }
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+ // CatchPads are not funclets for SEH so do not consider CatchRet to
+ // transfer control to another funclet.
+ if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+ continue;
+
+ // FIXME: SEH CatchPads are not necessarily in the parent function:
+ // they could be inside a finally block.
+ const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
+ const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
+ CatchRetSuccessors.push_back(
+ {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
+ }
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (FuncletBlocks.empty())
+ return FuncletMembership;
+
+ // Identify all the basic blocks reachable from the function entry.
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
+ // All blocks not part of a funclet are in the parent function.
+ for (const MachineBasicBlock *MBB : UnreachableBlocks)
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ // Next, identify all the blocks inside the funclets.
+ for (const MachineBasicBlock *MBB : FuncletBlocks)
+ collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
+ // SEH CatchPads aren't really funclets, handle them separately.
+ for (const MachineBasicBlock *MBB : SEHCatchPads)
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ // Finally, identify all the targets of a catchret.
+ for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
+ CatchRetSuccessors)
+ collectFuncletMembers(FuncletMembership, CatchRetPair.second,
+ CatchRetPair.first);
+ return FuncletMembership;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 0bad795..ade2d71 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -73,7 +73,6 @@ void ARMException::endFunction(const MachineFunction *MF) {
const Function *Per = nullptr;
if (F->hasPersonalityFn())
Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
- assert(!MMI->getPersonality() || Per == MMI->getPersonality());
bool forceEmitPersonality =
F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
F->needsUnwindTableEntry();
@@ -115,9 +114,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalValue *>::const_reverse_iterator
- I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalValue *GV = *I;
+ for (const GlobalValue *GV : reverse(TypeInfos)) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 125047e..be7eafb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -135,11 +135,14 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
return *TM.getObjFileLowering();
}
-/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
- return *TM.getDataLayout();
+ return MMI->getModule()->getDataLayout();
}
+// Do not use the cached DataLayout because some client use it without a Module
+// (llmv-dsymutil, llvm-dwarfdump).
+unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
+
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
return MF->getSubtarget<MCSubtargetInfo>();
@@ -193,10 +196,18 @@ bool AsmPrinter::doInitialization(Module &M) {
unsigned Major, Minor, Update;
TT.getOSVersion(Major, Minor, Update);
// If there is a version specified, Major will be non-zero.
- if (Major)
- OutStreamer->EmitVersionMin((TT.isMacOSX() ?
- MCVM_OSXVersionMin : MCVM_IOSVersionMin),
- Major, Minor, Update);
+ if (Major) {
+ MCVersionMinType VersionType;
+ if (TT.isWatchOS())
+ VersionType = MCVM_WatchOSVersionMin;
+ else if (TT.isTvOS())
+ VersionType = MCVM_TvOSVersionMin;
+ else if (TT.isMacOSX())
+ VersionType = MCVM_OSXVersionMin;
+ else
+ VersionType = MCVM_IOSVersionMin;
+ OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+ }
}
// Allow the target to emit any magic that it wants at the start of the file.
@@ -224,28 +235,20 @@ bool AsmPrinter::doInitialization(Module &M) {
TM.getTargetFeatureString()));
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions);
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n",
+ OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->AddBlankLine();
}
if (MAI->doesSupportDebugInformation()) {
- bool skip_dwarf = false;
- if (TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
+ if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
DbgTimerName,
CodeViewLineTablesGroupName));
- // FIXME: Don't emit DWARF debug info if there's at least one function
- // with AddressSanitizer instrumentation.
- // This is a band-aid fix for PR22032.
- for (auto &F : M.functions()) {
- if (F.hasFnAttribute(Attribute::SanitizeAddress)) {
- skip_dwarf = true;
- break;
- }
- }
}
- if (!skip_dwarf) {
+ if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
}
@@ -340,8 +343,51 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
return TM.getSymbol(GV, *Mang);
}
+static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) {
+ return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName());
+}
+
+static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) {
+ return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName());
+}
+
+/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable.
+void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
+ MCSymbol *EmittedSym,
+ bool AllZeroInitValue) {
+ MCSection *TLSVarSection = getObjFileLowering().getDataSection();
+ OutStreamer->SwitchSection(TLSVarSection);
+ MCSymbol *GVSym = getSymbol(GV);
+ EmitLinkage(GV, EmittedSym); // same linkage as GV
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+ unsigned WordSize = DL.getPointerSize();
+ unsigned Alignment = DL.getPointerABIAlignment();
+ EmitAlignment(Log2_32(Alignment));
+ OutStreamer->EmitLabel(EmittedSym);
+ OutStreamer->EmitIntValue(Size, WordSize);
+ OutStreamer->EmitIntValue((1 << AlignLog), WordSize);
+ OutStreamer->EmitIntValue(0, WordSize);
+ if (GV->hasInitializer() && !AllZeroInitValue) {
+ OutStreamer->EmitSymbolValue(
+ getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize);
+ } else
+ OutStreamer->EmitIntValue(0, WordSize);
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym),
+ MCConstantExpr::create(4 * WordSize, OutContext));
+ OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable.
+}
+
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ bool IsEmuTLSVar =
+ GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal &&
+ TM.Options.EmulatedTLS;
+ assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
+ "No emulated TLS variables in the common section");
+
if (GV->hasInitializer()) {
// Check to see if this is a special global used by LLVM, if so, emit it.
if (EmitSpecialLLVMGlobal(GV))
@@ -352,7 +398,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GlobalGOTEquivs.count(getSymbol(GV)))
return;
- if (isVerbose()) {
+ if (isVerbose() && !IsEmuTLSVar) {
+ // When printing the control variable __emutls_v.*,
+ // we don't need to print the original TLS variable name.
GV->printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, GV->getParent());
OutStreamer->GetCommentOS() << '\n';
@@ -360,7 +408,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
MCSymbol *GVSym = getSymbol(GV);
- EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+ MCSymbol *EmittedSym = IsEmuTLSVar ?
+ getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym;
+ // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes.
+ // GV's or GVSym's attributes will be used for the EmittedSym.
+
+ EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
if (!GV->hasInitializer()) // External globals require no extra code.
return;
@@ -371,17 +424,29 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
"' is already defined");
if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+ OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout *DL = TM.getDataLayout();
- uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
- unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
+ unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+
+ bool AllZeroInitValue = false;
+ const Constant *InitValue = GV->getInitializer();
+ if (isa<ConstantAggregateZero>(InitValue))
+ AllZeroInitValue = true;
+ else {
+ const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+ if (InitIntValue && InitIntValue->isZero())
+ AllZeroInitValue = true;
+ }
+ if (IsEmuTLSVar)
+ EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue);
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -390,6 +455,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common and BSS local symbols (.lcomm).
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ assert(!(IsEmuTLSVar && GVKind.isCommon()) &&
+ "No emulated TLS variables in the common section");
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
unsigned Align = 1 << AlignLog;
@@ -434,12 +501,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
return;
}
- MCSection *TheSection =
+ if (IsEmuTLSVar && AllZeroInitValue)
+ return; // No need of initialization values.
+
+ MCSymbol *EmittedInitSym = IsEmuTLSVar ?
+ getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym;
+ // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes.
+ // GV's or GVSym's attributes will be used for the EmittedInitSym.
+
+ MCSection *TheSection = IsEmuTLSVar ?
+ getObjFileLowering().getReadOnlySection() :
getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
// Handle the zerofill directive on darwin, which is a special form of BSS
// emission.
- if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) {
if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
// .globl _foo
@@ -459,7 +535,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// TLOF class. This will also make it more obvious that stuff like
// MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
// specific code.
- if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) {
// Emit the .tbss symbol
MCSymbol *MangSym =
OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
@@ -473,7 +549,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitAlignment(AlignLog, GV);
OutStreamer->EmitLabel(MangSym);
- EmitGlobalConstant(GV->getInitializer());
+ EmitGlobalConstant(GV->getParent()->getDataLayout(),
+ GV->getInitializer());
}
OutStreamer->AddBlankLine();
@@ -490,7 +567,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
- unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
+ unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize);
OutStreamer->EmitIntValue(0, PtrSize);
@@ -502,16 +579,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->SwitchSection(TheSection);
- EmitLinkage(GV, GVSym);
+ // emutls_t.* symbols are only used in the current compilation unit.
+ if (!IsEmuTLSVar)
+ EmitLinkage(GV, EmittedInitSym);
EmitAlignment(AlignLog, GV);
- OutStreamer->EmitLabel(GVSym);
+ OutStreamer->EmitLabel(EmittedInitSym);
- EmitGlobalConstant(GV->getInitializer());
+ EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
- OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
MCConstantExpr::create(Size, OutContext));
OutStreamer->AddBlankLine();
@@ -545,7 +624,7 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit the prefix data.
if (F->hasPrefixData())
- EmitGlobalConstant(F->getPrefixData());
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
@@ -580,7 +659,7 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit the prologue data.
if (F->hasPrologueData())
- EmitGlobalConstant(F->getPrologueData());
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -640,19 +719,27 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- OutStreamer->AddComment(Twine("implicit-def: ") +
- MMI->getContext().getRegisterInfo()->getName(RegNo));
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ OutStreamer->AddComment(OS.str());
OutStreamer->AddBlankLine();
}
static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
- std::string Str = "kill:";
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "kill:";
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
- Str += ' ';
- Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg());
- Str += (Op.isDef() ? "<def>" : "<kill>");
+ OS << ' '
+ << PrintReg(Op.getReg(),
+ AP.MF->getSubtarget().getRegisterInfo())
+ << (Op.isDef() ? "<def>" : "<kill>");
}
AP.OutStreamer->AddComment(Str);
AP.OutStreamer->AddBlankLine();
@@ -688,6 +775,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
+ for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
+ if (Deref) {
+ // We currently don't support extra Offsets or derefs after the first
+ // one. Bail out early instead of emitting an incorrect comment
+ OS << " [complex expression]";
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+ }
+ uint64_t Op = Expr->getElement(i);
+ if (Op == dwarf::DW_OP_deref) {
+ Deref = true;
+ continue;
+ } else if (Op == dwarf::DW_OP_bit_piece) {
+ // There can't be any operands after this in a valid expression
+ break;
+ }
+ uint64_t ExtraOffset = Expr->getElement(i++);
+ if (Op == dwarf::DW_OP_plus)
+ Offset += ExtraOffset;
+ else {
+ assert(Op == dwarf::DW_OP_minus);
+ Offset -= ExtraOffset;
+ }
+ }
+
// Register or immediate value. Register 0 means undef.
if (MI->getOperand(0).isFPImm()) {
APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
@@ -727,7 +839,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (Deref)
OS << '[';
- OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg);
+ OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
if (Deref)
@@ -888,7 +1000,7 @@ void AsmPrinter::EmitFunctionBody() {
EmitFunctionBodyEnd();
if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
- MAI->hasDotTypeDotSizeDirective()) {
+ MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->EmitLabel(CurrentFnEnd);
@@ -1047,20 +1159,17 @@ bool AsmPrinter::doFinalization(Module &M) {
// Output stubs for external and common global variables.
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
- OutStreamer->SwitchSection(TLOF.getDataRelSection());
- const DataLayout *DL = TM.getDataLayout();
+ OutStreamer->SwitchSection(TLOF.getDataSection());
+ const DataLayout &DL = M.getDataLayout();
for (const auto &Stub : Stubs) {
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
- DL->getPointerSize());
+ DL.getPointerSize());
}
}
}
- // Make sure we wrote out everything we need.
- OutStreamer->Flush();
-
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
@@ -1103,10 +1212,29 @@ bool AsmPrinter::doFinalization(Module &M) {
else
assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
+ // Set the symbol type to function if the alias has a function type.
+ // This affects codegen when the aliasee is not a function.
+ if (Alias.getType()->getPointerElementType()->isFunctionTy())
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+
EmitVisibility(Name, Alias.getVisibility());
// Emit the directives as assignments aka .set:
OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
+
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = Alias.getBaseObject();
+ if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(Alias.getValueType());
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
+ MCConstantExpr::create(Size, OutContext));
+ }
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1120,16 +1248,16 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit __morestack address if needed for indirect calls.
if (MMI->usesMorestackAddr()) {
- MCSection *ReadOnlySection =
- getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
- /*C=*/nullptr);
+ MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), SectionKind::getReadOnly(),
+ /*C=*/nullptr);
OutStreamer->SwitchSection(ReadOnlySection);
MCSymbol *AddrSymbol =
OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
OutStreamer->EmitLabel(AddrSymbol);
- unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+ unsigned PtrSize = M.getDataLayout().getPointerSize(0);
OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
PtrSize);
}
@@ -1169,7 +1297,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
- NeedsLocalForSize) {
+ MMI->hasEHFunclets() || NeedsLocalForSize) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
@@ -1206,14 +1334,14 @@ void AsmPrinter::EmitConstantPool() {
const MachineConstantPoolEntry &CPE = CP[i];
unsigned Align = CPE.getAlignment();
- SectionKind Kind =
- CPE.getSectionKind(TM.getDataLayout());
+ SectionKind Kind = CPE.getSectionKind(&getDataLayout());
const Constant *C = nullptr;
if (!CPE.isMachineConstantPoolEntry())
C = CPE.Val.ConstVal;
- MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
+ MCSection *S =
+ getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C);
// The number of sections are small, just do a linear search from the
// last section to the first.
@@ -1260,14 +1388,13 @@ void AsmPrinter::EmitConstantPool() {
OutStreamer->EmitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
- Offset = NewOffset +
- TM.getDataLayout()->getTypeAllocSize(Ty);
+ Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
OutStreamer->EmitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
else
- EmitGlobalConstant(CPE.Val.ConstVal);
+ EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
}
}
}
@@ -1276,7 +1403,7 @@ void AsmPrinter::EmitConstantPool() {
/// by the current function to the current output stream.
///
void AsmPrinter::EmitJumpTableInfo() {
- const DataLayout *DL = MF->getTarget().getDataLayout();
+ const DataLayout &DL = MF->getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1296,8 +1423,7 @@ void AsmPrinter::EmitJumpTableInfo() {
OutStreamer->SwitchSection(ReadOnlySection);
}
- EmitAlignment(Log2_32(
- MJTI->getEntryAlignment(*TM.getDataLayout())));
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL)));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -1335,7 +1461,7 @@ void AsmPrinter::EmitJumpTableInfo() {
// before each jump table. The first label is never referenced, but tells
// the assembler and linker the extents of the jump table object. The
// second label is actually referenced by the code.
- if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix())
+ if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
// FIXME: This doesn't have to have any specific name, just any randomly
// named and numbered 'l' label would work. Simplify GetJTISymbol.
OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
@@ -1409,8 +1535,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
assert(Value && "Unknown entry kind!");
- unsigned EntrySize =
- MJTI->getEntrySize(*TM.getDataLayout());
+ unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
OutStreamer->EmitValue(Value, EntrySize);
}
@@ -1435,7 +1560,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
assert(GV->hasInitializer() && "Not a special LLVM global!");
if (GV->getName() == "llvm.global_ctors") {
- EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
+ EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ true);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1447,7 +1573,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
if (GV->getName() == "llvm.global_dtors") {
- EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
+ EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ false);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1485,7 +1612,8 @@ struct Structor {
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
+void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool isCtor) {
// Should be an array of '{ int, void ()* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
@@ -1520,8 +1648,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const DataLayout *DL = TM.getDataLayout();
- unsigned Align = Log2_32(DL->getPointerPrefAlignment());
+ unsigned Align = Log2_32(DL.getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(),
[](const Structor &L,
const Structor &R) { return L.Priority < R.Priority; });
@@ -1542,7 +1669,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
OutStreamer->SwitchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
EmitAlignment(Align);
- EmitXXStructor(S.Func);
+ EmitXXStructor(DL, S.Func);
}
}
@@ -1621,8 +1748,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
if (GV)
- NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(),
- NumBits);
+ NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
@@ -1668,7 +1794,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
if (C != CE)
return lowerConstant(C);
@@ -1682,11 +1808,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *TM.getDataLayout();
-
// Generate a symbolic expression for the byte address
- APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
+ APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI);
const MCExpr *Base = lowerConstant(CE->getOperand(0));
if (!OffsetAI)
@@ -1707,7 +1831,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return lowerConstant(CE->getOperand(0));
case Instruction::IntToPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
@@ -1718,7 +1842,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
@@ -1769,10 +1893,13 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
}
}
-static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
+ AsmPrinter &AP,
const Constant *BaseCV = nullptr,
uint64_t Offset = 0);
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
+
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
@@ -1789,9 +1916,9 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
-static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSizeInBits(V->getType());
+ uint64_t Size = DL.getTypeAllocSizeInBits(V->getType());
assert(Size % 8 == 0);
// Extend the element to take zero padding into account.
@@ -1806,7 +1933,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
// byte.
assert(CA->getNumOperands() != 0 && "Should be a CAZ");
Constant *Op0 = CA->getOperand(0);
- int Byte = isRepeatedByteSequence(Op0, TM);
+ int Byte = isRepeatedByteSequence(Op0, DL);
if (Byte == -1)
return -1;
@@ -1823,15 +1950,14 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
return -1;
}
-static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
- AsmPrinter &AP){
+static void emitGlobalConstantDataSequential(const DataLayout &DL,
+ const ConstantDataSequential *CDS,
+ AsmPrinter &AP) {
// See if we can aggregate this into a .fill, if so, emit it as such.
- int Value = isRepeatedByteSequence(CDS, AP.TM);
+ int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
- uint64_t Bytes =
- AP.TM.getDataLayout()->getTypeAllocSize(
- CDS->getType());
+ uint64_t Bytes = DL.getTypeAllocSize(CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
return AP.OutStreamer->EmitFill(Bytes, Value);
@@ -1851,37 +1977,11 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
ElementByteSize);
}
- } else if (ElementByteSize == 4) {
- // FP Constants are printed as integer constants to avoid losing
- // precision.
- assert(CDS->getElementType()->isFloatTy());
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union {
- float F;
- uint32_t I;
- };
-
- F = CDS->getElementAsFloat(i);
- if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << "float " << F << '\n';
- AP.OutStreamer->EmitIntValue(I, 4);
- }
} else {
- assert(CDS->getElementType()->isDoubleTy());
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union {
- double F;
- uint64_t I;
- };
-
- F = CDS->getElementAsDouble(i);
- if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << "double " << F << '\n';
- AP.OutStreamer->EmitIntValue(I, 8);
- }
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP);
}
- const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
@@ -1890,12 +1990,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
-static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
+static void emitGlobalConstantArray(const DataLayout &DL,
+ const ConstantArray *CA, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
- int Value = isRepeatedByteSequence(CA, AP.TM);
- const DataLayout &DL = *AP.TM.getDataLayout();
+ int Value = isRepeatedByteSequence(CA, DL);
if (Value != -1) {
uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
@@ -1903,17 +2003,17 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
}
else {
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
- emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
+ emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset);
Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
}
}
}
-static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
+static void emitGlobalConstantVector(const DataLayout &DL,
+ const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- emitGlobalConstantImpl(CV->getOperand(i), AP);
+ emitGlobalConstantImpl(DL, CV->getOperand(i), AP);
- const DataLayout &DL = *AP.TM.getDataLayout();
unsigned Size = DL.getTypeAllocSize(CV->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
@@ -1921,21 +2021,21 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
AP.OutStreamer->EmitZeros(Padding);
}
-static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
+static void emitGlobalConstantStruct(const DataLayout &DL,
+ const ConstantStruct *CS, AsmPrinter &AP,
const Constant *BaseCV, uint64_t Offset) {
// Print the fields in successive locations. Pad to align if needed!
- const DataLayout *DL = AP.TM.getDataLayout();
- unsigned Size = DL->getTypeAllocSize(CS->getType());
- const StructLayout *Layout = DL->getStructLayout(CS->getType());
+ unsigned Size = DL.getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = DL.getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
const Constant *Field = CS->getOperand(i);
// Print the actual field value.
- emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
+ emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar);
// Check if padding is needed and insert one or more 0s.
- uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
+ uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- Layout->getElementOffset(i)) - FieldSize;
SizeSoFar += FieldSize + PadSize;
@@ -1974,8 +2074,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getDataLayout()->isBigEndian() &&
- !CFP->getType()->isPPC_FP128Ty()) {
+ if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
@@ -1993,13 +2092,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
}
// Emit the tail padding for the long double.
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = AP.getDataLayout();
AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout &DL = AP.getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
@@ -2016,7 +2115,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Big endian:
// * Record the extra bits to emit.
// * Realign the raw data to emit the chunks of 64-bits.
- if (DL->isBigEndian()) {
+ if (DL.isBigEndian()) {
// Basically the structure of the raw data is a chunk of 64-bits cells:
// 0 1 BitWidth / 64
// [chunk1][chunk2] ... [chunkN].
@@ -2037,7 +2136,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// quantities at a time.
const uint64_t *RawData = Realigned.getRawData();
for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
- uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i];
AP.OutStreamer->EmitIntValue(Val, 8);
}
@@ -2045,8 +2144,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
- uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(
- CI->getType());
+ uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
(ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
@@ -2094,7 +2192,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
return;
- const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
+ const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst);
if (!BaseGV)
return;
@@ -2149,10 +2247,10 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
}
-static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
- const Constant *BaseCV, uint64_t Offset) {
- const DataLayout *DL = AP.TM.getDataLayout();
- uint64_t Size = DL->getTypeAllocSize(CV->getType());
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
+ AsmPrinter &AP, const Constant *BaseCV,
+ uint64_t Offset) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
// Globals with sub-elements such as combinations of arrays and structs
// are handled recursively by emitGlobalConstantImpl. Keep track of the
@@ -2189,32 +2287,32 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
- return emitGlobalConstantDataSequential(CDS, AP);
+ return emitGlobalConstantDataSequential(DL, CDS, AP);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
+ return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
+ return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
// vectors).
if (CE->getOpcode() == Instruction::BitCast)
- return emitGlobalConstantImpl(CE->getOperand(0), AP);
+ return emitGlobalConstantImpl(DL, CE->getOperand(0), AP);
if (Size > 8) {
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
- Constant *New = ConstantFoldConstantExpression(CE, *DL);
+ Constant *New = ConstantFoldConstantExpression(CE, DL);
if (New && New != CE)
- return emitGlobalConstantImpl(New, AP);
+ return emitGlobalConstantImpl(DL, New, AP);
}
}
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
- return emitGlobalConstantVector(V, AP);
+ return emitGlobalConstantVector(DL, V, AP);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
@@ -2230,11 +2328,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
- uint64_t Size =
- TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
if (Size)
- emitGlobalConstantImpl(CV, *this);
+ emitGlobalConstantImpl(DL, CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
@@ -2272,10 +2369,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.getOrCreateSymbol
- (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
- + "_" + Twine(CPID));
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "CPI" + Twine(getFunctionNumber()) + "_" +
+ Twine(CPID));
}
/// GetJTISymbol - Return the symbol for the specified jump table entry.
@@ -2286,10 +2383,10 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.getOrCreateSymbol
- (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
- Twine(UID) + "_set_" + Twine(MBBID));
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
}
MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
@@ -2301,7 +2398,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
/// Return the MCSymbol for the specified ExternalSymbol.
MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
SmallString<60> NameStr;
- Mangler::getNameWithPrefix(NameStr, Sym, *TM.getDataLayout());
+ Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout());
return OutContext.getOrCreateSymbol(NameStr);
}
@@ -2376,6 +2473,14 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+ // End the previous funclet and start a new one.
+ if (MBB.isEHFuncletEntry()) {
+ for (const HandlerInfo &HI : Handlers) {
+ HI.Handler->endFunclet();
+ HI.Handler->beginFunclet(MBB);
+ }
+ }
+
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB.getAlignment())
EmitAlignment(Align);
@@ -2389,20 +2494,28 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
if (isVerbose())
OutStreamer->AddComment("Block address taken");
- for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
- OutStreamer->EmitLabel(Sym);
+ // MBBs can have their address taken as part of CodeGen without having
+ // their corresponding BB's address taken in IR
+ if (BB->hasAddressTaken())
+ for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
+ OutStreamer->EmitLabel(Sym);
}
// Print some verbose block comments.
if (isVerbose()) {
- if (const BasicBlock *BB = MBB.getBasicBlock())
- if (BB->hasName())
- OutStreamer->AddComment("%" + BB->getName());
+ if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ BB->printAsOperand(OutStreamer->GetCommentOS(),
+ /*PrintType=*/false, BB->getModule());
+ OutStreamer->GetCommentOS() << '\n';
+ }
+ }
emitBasicBlockLoopComments(MBB, LI, *this);
}
// Print the main label for the block.
- if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) {
+ if (MBB.pred_empty() ||
+ (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
@@ -2440,7 +2553,7 @@ bool AsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
- if (MBB->isLandingPad() || MBB->pred_empty())
+ if (MBB->isEHPad() || MBB->pred_empty())
return false;
// If there isn't exactly one predecessor, it can't be a fall through.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ad180b6..504c5d2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -47,7 +47,7 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
OutStreamer->EmitSLEB128IntValue(Value);
}
-/// EmitULEB128 - emit the specified signed leb128 value.
+/// EmitULEB128 - emit the specified unsigned leb128 value.
void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
unsigned PadTo) const {
if (isVerbose() && Desc)
@@ -56,18 +56,6 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
OutStreamer->EmitULEB128IntValue(Value, PadTo);
}
-/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
-void AsmPrinter::EmitCFAByte(unsigned Val) const {
- if (isVerbose()) {
- if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64)
- OutStreamer->AddComment("DW_CFA_offset + Reg (" +
- Twine(Val - dwarf::DW_CFA_offset) + ")");
- else
- OutStreamer->AddComment(dwarf::CallFrameString(Val));
- }
- OutStreamer->EmitIntValue(Val, 1);
-}
-
static const char *DecodeDWARFEncoding(unsigned Encoding) {
switch (Encoding) {
case dwarf::DW_EH_PE_absptr:
@@ -134,7 +122,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return TM.getDataLayout()->getPointerSize();
+ return MF->getDataLayout().getPointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -228,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpDefCfaOffset:
OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+ break;
case MCCFIInstruction::OpDefCfa:
OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
break;
@@ -246,6 +237,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpSameValue:
OutStreamer->EmitCFISameValue(Inst.getRegister());
break;
+ case MCCFIInstruction::OpGnuArgsSize:
+ OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpEscape:
+ OutStreamer->EmitCFIEscape(Inst.getValues());
+ break;
}
}
@@ -284,17 +281,10 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
}
}
-void
-AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
- // For each abbrevation.
- for (const DIEAbbrev *Abbrev : Abbrevs) {
- // Emit the abbrevations code (base 1 index.)
- EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
- // Emit the abbreviations data.
- Abbrev->Emit(this);
- }
+void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const {
+ // Emit the abbreviations code (base 1 index.)
+ EmitULEB128(Abbrev.getNumber(), "Abbreviation Code");
- // Mark end of abbreviations.
- EmitULEB128(0, "EOM(3)");
+ // Emit the abbreviations data.
+ Abbrev.Emit(this);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index f1efe9d..e59961f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -19,6 +19,7 @@
namespace llvm {
+class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
class MCSymbol;
@@ -50,6 +51,11 @@ public:
/// beginFunction at all.
virtual void endFunction(const MachineFunction *MF) = 0;
+ /// \brief Emit target-specific EH funclet machinery.
+ virtual void beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym = nullptr) {}
+ virtual void endFunclet() {}
+
/// \brief Process beginning of an instruction.
virtual void beginInstruction(const MachineInstr *MI) = 0;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 793e629..4171657 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -127,19 +127,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
- // Create a temporary copy of the original STI because the parser may modify
- // it. For example, when switching between arm and thumb mode. If the target
- // needs to emit code to return to the original state it can do so in
- // emitInlineAsmEnd().
- MCSubtargetInfo TmpSTI = STI;
-
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
// we only need MCInstrInfo for asm parsing. We create one unconditionally
// because it's not subtarget dependent.
std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
- TmpSTI, *Parser, *MII, MCOptions));
+ STI, *Parser, *MII, MCOptions));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
@@ -154,7 +148,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
- emitInlineAsmEnd(STI, &TmpSTI);
+ emitInlineAsmEnd(STI, &TAP->getSTI());
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
@@ -512,9 +506,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
const char *Code) const {
- const DataLayout *DL = TM.getDataLayout();
if (!strcmp(Code, "private")) {
- OS << DL->getPrivateGlobalPrefix();
+ const DataLayout &DL = MF->getDataLayout();
+ OS << DL.getPrivateGlobalPrefix();
} else if (!strcmp(Code, "comment")) {
OS << MAI->getCommentString();
} else if (!strcmp(Code, "uid")) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 0cc829f..df1997b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -24,16 +24,19 @@
namespace llvm {
class ByteStreamer {
- public:
- virtual ~ByteStreamer() {}
+ protected:
+ ~ByteStreamer() = default;
+ ByteStreamer(const ByteStreamer&) = default;
+ ByteStreamer() = default;
+ public:
// For now we're just handling the calls we need for dwarf emission/hashing.
virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
};
-class APByteStreamer : public ByteStreamer {
+class APByteStreamer final : public ByteStreamer {
private:
AsmPrinter &AP;
@@ -53,7 +56,7 @@ public:
}
};
-class HashingByteStreamer : public ByteStreamer {
+class HashingByteStreamer final : public ByteStreamer {
private:
DIEHash &Hash;
public:
@@ -69,7 +72,7 @@ class HashingByteStreamer : public ByteStreamer {
}
};
-class BufferByteStreamer : public ByteStreamer {
+class BufferByteStreamer final : public ByteStreamer {
private:
SmallVectorImpl<char> &Buffer;
SmallVectorImpl<std::string> &Comments;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 46dbc76..bf794f7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -86,7 +86,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
AP->EmitULEB128(0, "EOM(2)");
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEAbbrev::print(raw_ostream &O) {
O << "Abbreviation @"
<< format("0x%lx", (long)(intptr_t)this)
@@ -104,12 +104,13 @@ void DIEAbbrev::print(raw_ostream &O) {
<< '\n';
}
}
+
+LLVM_DUMP_METHOD
void DIEAbbrev::dump() { print(dbgs()); }
-#endif
DIEAbbrev DIE::generateAbbrev() const {
DIEAbbrev Abbrev(Tag, hasChildren());
- for (const DIEValue &V : Values)
+ for (const DIEValue &V : values())
Abbrev.AddAttribute(V.getAttribute(), V.getForm());
return Abbrev;
}
@@ -144,36 +145,35 @@ DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
return DIEValue();
}
-#ifndef NDEBUG
-void DIE::print(raw_ostream &O, unsigned IndentCount) const {
- const std::string Indent(IndentCount, ' ');
- bool isBlock = getTag() == 0;
-
- if (!isBlock) {
- O << Indent
- << "Die: "
- << format("0x%lx", (long)(intptr_t)this)
- << ", Offset: " << Offset
- << ", Size: " << Size << "\n";
-
- O << Indent
- << dwarf::TagString(getTag())
- << " "
- << dwarf::ChildrenString(hasChildren()) << "\n";
- } else {
- O << "Size: " << Size << "\n";
- }
+LLVM_DUMP_METHOD
+static void printValues(raw_ostream &O, const DIEValueList &Values,
+ StringRef Type, unsigned Size, unsigned IndentCount) {
+ O << Type << ": Size: " << Size << "\n";
- IndentCount += 2;
unsigned I = 0;
- for (const auto &V : Values) {
+ const std::string Indent(IndentCount, ' ');
+ for (const auto &V : Values.values()) {
O << Indent;
+ O << "Blk[" << I++ << "]";
+ O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
+ V.print(O);
+ O << "\n";
+ }
+}
- if (!isBlock)
- O << dwarf::AttributeString(V.getAttribute());
- else
- O << "Blk[" << I++ << "]";
+LLVM_DUMP_METHOD
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
+ const std::string Indent(IndentCount, ' ');
+ O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this)
+ << ", Offset: " << Offset << ", Size: " << Size << "\n";
+ O << Indent << dwarf::TagString(getTag()) << " "
+ << dwarf::ChildrenString(hasChildren()) << "\n";
+
+ IndentCount += 2;
+ for (const auto &V : values()) {
+ O << Indent;
+ O << dwarf::AttributeString(V.getAttribute());
O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
V.print(O);
O << "\n";
@@ -183,13 +183,13 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
for (const auto &Child : children())
Child.print(O, IndentCount + 4);
- if (!isBlock) O << "\n";
+ O << "\n";
}
+LLVM_DUMP_METHOD
void DIE::dump() {
print(dbgs());
}
-#endif
void DIEValue::EmitValue(const AsmPrinter *AP) const {
switch (Ty) {
@@ -215,7 +215,7 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP) const {
llvm_unreachable("Unknown DIE kind");
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEValue::print(raw_ostream &O) const {
switch (Ty) {
case isNone:
@@ -228,10 +228,10 @@ void DIEValue::print(raw_ostream &O) const {
}
}
+LLVM_DUMP_METHOD
void DIEValue::dump() const {
print(dbgs());
}
-#endif
//===----------------------------------------------------------------------===//
// DIEInteger Implementation
@@ -264,7 +264,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
case dwarf::DW_FORM_addr:
- Size = Asm->getDataLayout().getPointerSize(); break;
+ Size = Asm->getPointerSize();
+ break;
case dwarf::DW_FORM_ref_addr:
Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
break;
@@ -294,21 +295,21 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
- case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ case dwarf::DW_FORM_addr:
+ return AP->getPointerSize();
case dwarf::DW_FORM_ref_addr:
if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
return sizeof(int32_t);
default: llvm_unreachable("DIE Value form not supported yet");
}
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEInteger::print(raw_ostream &O) const {
O << "Int: " << (int64_t)Integer << " 0x";
O.write_hex(Integer);
}
-#endif
//===----------------------------------------------------------------------===//
// DIEExpr Implementation
@@ -326,12 +327,11 @@ unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
-#endif
//===----------------------------------------------------------------------===//
// DIELabel Implementation
@@ -352,12 +352,11 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
-#endif
//===----------------------------------------------------------------------===//
// DIEDelta Implementation
@@ -375,14 +374,13 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEDelta::print(raw_ostream &O) const {
O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
}
-#endif
//===----------------------------------------------------------------------===//
// DIEString Implementation
@@ -431,11 +429,10 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return DIEInteger(S.getOffset()).SizeOf(AP, Form);
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEString::print(raw_ostream &O) const {
O << "String: " << S.getString();
}
-#endif
//===----------------------------------------------------------------------===//
// DIEEntry Implementation
@@ -472,15 +469,14 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
const DwarfDebug *DD = AP->getDwarfDebug();
assert(DD && "Expected Dwarf Debug info to be available");
if (DD->getDwarfVersion() == 2)
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
return sizeof(int32_t);
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEEntry::print(raw_ostream &O) const {
O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
}
-#endif
//===----------------------------------------------------------------------===//
// DIETypeSignature Implementation
@@ -491,11 +487,10 @@ void DIETypeSignature::EmitValue(const AsmPrinter *Asm,
Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8);
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIETypeSignature::print(raw_ostream &O) const {
O << format("Type Unit: 0x%lx", Unit->getTypeSignature());
}
-#endif
//===----------------------------------------------------------------------===//
// DIELoc Implementation
@@ -505,7 +500,7 @@ void DIETypeSignature::print(raw_ostream &O) const {
///
unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
- for (const auto &V : Values)
+ for (const auto &V : values())
Size += V.SizeOf(AP);
}
@@ -525,7 +520,7 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
Asm->EmitULEB128(Size); break;
}
- for (const auto &V : Values)
+ for (const auto &V : values())
V.EmitValue(Asm);
}
@@ -543,12 +538,10 @@ unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIELoc::print(raw_ostream &O) const {
- O << "ExprLoc: ";
- DIE::print(O, 5);
+ printValues(O, *this, "ExprLoc", Size, 5);
}
-#endif
//===----------------------------------------------------------------------===//
// DIEBlock Implementation
@@ -558,7 +551,7 @@ void DIELoc::print(raw_ostream &O) const {
///
unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
- for (const auto &V : Values)
+ for (const auto &V : values())
Size += V.SizeOf(AP);
}
@@ -576,7 +569,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
}
- for (const auto &V : Values)
+ for (const auto &V : values())
V.EmitValue(Asm);
}
@@ -592,12 +585,10 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIEBlock::print(raw_ostream &O) const {
- O << "Blk: ";
- DIE::print(O, 5);
+ printValues(O, *this, "Blk", Size, 5);
}
-#endif
//===----------------------------------------------------------------------===//
// DIELocList Implementation
@@ -608,7 +599,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
return 4;
- return AP->getDataLayout().getPointerSize();
+ return AP->getPointerSize();
}
/// EmitValue - Emit label value.
@@ -619,6 +610,5 @@ void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
}
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
-#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 5e60156..0201065 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -470,38 +470,6 @@ void DIEHash::computeHash(const DIE &Die) {
}
/// This is based on the type signature computation given in section 7.27 of the
-/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE
-/// with the exception that we are hashing only the context and the name of the
-/// type.
-uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
-
- // Add the contexts to the hash. We won't be computing the ODR hash for
- // function local types so it's safe to use the generic context hashing
- // algorithm here.
- // FIXME: If we figure out how to account for linkage in some way we could
- // actually do this with a slight modification to the parent hash algorithm.
- if (const DIE *Parent = Die.getParent())
- addParentContext(*Parent);
-
- // Add the current DIE information.
-
- // Add the DWARF tag of the DIE.
- addULEB128(Die.getTag());
-
- // Add the name of the type to the hash.
- addString(getDIEStringAttr(Die, dwarf::DW_AT_name));
-
- // Now get the result.
- MD5::MD5Result Result;
- Hash.final(Result);
-
- // ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
-}
-
-/// This is based on the type signature computation given in section 7.27 of the
/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
/// with the inclusion of the full CU and all top level CU entities.
// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 833ca02..44f0ce8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -84,9 +84,6 @@ class DIEHash {
public:
DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
- /// \brief Computes the ODR signature.
- uint64_t computeDIEODRSignature(const DIE &Die);
-
/// \brief Computes the CU signature.
uint64_t computeCUSignature(const DIE &Die);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index afffa83..bbe5324 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,6 +9,8 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+
+#include "DebugLocStream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -17,7 +19,6 @@
namespace llvm {
class AsmPrinter;
-class DebugLocStream;
/// \brief This struct describes location entries emitted in the .debug_loc
/// section.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index f8cdde2..4ad3e18 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -41,7 +41,7 @@ void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
}
-void DwarfAccelTable::ComputeBucketCount(void) {
+void DwarfAccelTable::ComputeBucketCount() {
// First get the number of unique hashes.
std::vector<uint32_t> uniques(Data.size());
for (size_t i = 0, e = Data.size(); i < e; ++i)
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 2c212c7..6665c16 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -78,12 +78,11 @@ void DwarfCFIException::endModule() {
return;
// Emit references to all used personality functions
- const std::vector<const Function*> &Personalities = MMI->getPersonalities();
- for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
- if (!Personalities[i])
+ for (const Function *Personality : MMI->getPersonalities()) {
+ if (!Personality)
continue;
- MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
- TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym);
+ MCSymbol *Sym = Asm->getSymbol(Personality);
+ TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym);
}
}
@@ -108,7 +107,6 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const Function *Per = nullptr;
if (F->hasPersonalityFn())
Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
- assert(!MMI->getPersonality() || Per == MMI->getPersonality());
// Emit a personality function even when there are no landing pads
bool forceEmitPersonality =
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index fc54a29..725063a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -151,28 +151,33 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
const MCSymbol *Sym = Asm->getSymbol(Global);
if (Global->isThreadLocal()) {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
} else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
+ ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
- // 3) followed by an OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
@@ -338,9 +343,9 @@ void DwarfCompileUnit::constructScopeDIE(
// Skip imported directives in gmlt-like data.
if (!includeMinimalInlineScopes()) {
// There is no need to emit empty lexical block DIE.
- for (const auto &E : DD->findImportedEntitiesForScope(DS))
+ for (const auto *IE : ImportedEntities[DS])
Children.push_back(
- constructImportedEntityDIE(cast<DIImportedEntity>(E.second)));
+ constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
}
// If there are only other scopes as children, put them directly in the
@@ -435,6 +440,9 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+ if (IA->getDiscriminator())
+ addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
+ IA->getDiscriminator());
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -517,8 +525,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- assert(Expr != DV.getExpression().end() &&
- "Wrong number of expressions");
+ assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
++Expr;
@@ -597,8 +604,8 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
return ObjectPointer;
}
-void
-DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
+ LexicalScope *Scope) {
DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
if (AbsDef)
return;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 509c943..2e28467 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -39,6 +39,12 @@ class DwarfCompileUnit : public DwarfUnit {
/// The start of the unit within its section.
MCSymbol *LabelBegin;
+ typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
+ typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
+ ImportedEntityMap;
+
+ ImportedEntityMap ImportedEntities;
+
/// GlobalNames - A map of globally visible named entities for this unit.
StringMap<const DIE *> GlobalNames;
@@ -98,6 +104,10 @@ public:
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+ void addImportedEntity(const DIImportedEntity* IE) {
+ ImportedEntities[IE->getScope()].push_back(IE);
+ }
+
/// addRange - Add an address range to the list of ranges for this unit.
void addRange(RangeSpan Range);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7d03a39..3466f34 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -104,6 +105,14 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
clEnumVal(Disable, "Disabled"), clEnumValEnd),
cl::init(Default));
+static cl::opt<DefaultOnOff>
+DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
+ cl::desc("Emit DWARF linkage-name attributes."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ cl::init(Default));
+
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
@@ -176,9 +185,9 @@ const DIType *DbgVariable::getType() const {
if (tag == dwarf::DW_TAG_pointer_type)
subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
- auto Elements = cast<DICompositeTypeBase>(subType)->getElements();
+ auto Elements = cast<DICompositeType>(subType)->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
- auto *DT = cast<DIDerivedTypeBase>(Elements[i]);
+ auto *DT = cast<DIDerivedType>(Elements[i]);
if (getName() == DT->getName())
return resolve(DT->getBaseType());
}
@@ -194,45 +203,67 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()),
PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator),
- UsedNonDefaultText(false),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
- IsPS4(Triple(A->getTargetTriple()).isPS4()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
- AccelTypes(TypeAtoms) {
+ AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
CurFn = nullptr;
CurMI = nullptr;
+ Triple TT(Asm->getTargetTriple());
+
+ // Make sure we know our "debugger tuning." The target option takes
+ // precedence; fall back to triple-based defaults.
+ if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
+ DebuggerTuning = Asm->TM.Options.DebuggerTuning;
+ else if (IsDarwin || TT.isOSFreeBSD())
+ DebuggerTuning = DebuggerKind::LLDB;
+ else if (TT.isPS4CPU())
+ DebuggerTuning = DebuggerKind::SCE;
+ else
+ DebuggerTuning = DebuggerKind::GDB;
- // Turn on accelerator tables for Darwin by default, pubnames by
- // default for non-Darwin/PS4, and handle split dwarf.
+ // Turn on accelerator tables for LLDB by default.
if (DwarfAccelTables == Default)
- HasDwarfAccelTables = IsDarwin;
+ HasDwarfAccelTables = tuneForLLDB();
else
HasDwarfAccelTables = DwarfAccelTables == Enable;
+ // Handle split DWARF. Off by default for now.
if (SplitDwarf == Default)
HasSplitDwarf = false;
else
HasSplitDwarf = SplitDwarf == Enable;
+ // Pubnames/pubtypes on by default for GDB.
if (DwarfPubSections == Default)
- HasDwarfPubSections = !IsDarwin && !IsPS4;
+ HasDwarfPubSections = tuneForGDB();
else
HasDwarfPubSections = DwarfPubSections == Enable;
+ // SCE does not use linkage names.
+ if (DwarfLinkageNames == Default)
+ UseLinkageNames = !tuneForSCE();
+ else
+ UseLinkageNames = DwarfLinkageNames == Enable;
+
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
+ // Use dwarf 4 by default if nothing is requested.
+ DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
- // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
- // Everybody else uses GNU's.
- UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+ // Work around a GDB bug. GDB doesn't support the standard opcode;
+ // SCE doesn't support GNU's; LLDB prefers the standard opcode, which
+ // is defined as of DWARF 3.
+ // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
+ // https://sourceware.org/bugzilla/show_bug.cgi?id=11616
+ UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
@@ -300,18 +331,6 @@ void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
}
}
-/// isSubprogramContext - Return true if Context is either a subprogram
-/// or another context nested inside a subprogram.
-bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
- if (!Context)
- return false;
- if (isa<DISubprogram>(Context))
- return true;
- if (auto *T = dyn_cast<DIType>(Context))
- return isSubprogramContext(resolve(T->getScope()));
- return false;
-}
-
/// Check whether we should create a DIE for the given Scope, return true
/// if we don't create a DIE (the corresponding DIE is null).
bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
@@ -416,6 +435,16 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
else
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ if (DIUnit->getDWOId()) {
+ // This CU is either a clang module DWO or a skeleton CU.
+ NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
+ DIUnit->getDWOId());
+ if (!DIUnit->getSplitDebugFilename().empty())
+ // This is a prefabricated skeleton CU.
+ NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit->getSplitDebugFilename());
+ }
+
CUMap.insert(std::make_pair(DIUnit, &NewCU));
CUDieMap.insert(std::make_pair(&Die, &NewCU));
return NewCU;
@@ -436,8 +465,6 @@ void DwarfDebug::beginModule() {
const Module *M = MMI->getModule();
- FunctionDIs = makeSubprogramMap(*M);
-
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes)
return;
@@ -449,12 +476,7 @@ void DwarfDebug::beginModule() {
auto *CUNode = cast<DICompileUnit>(N);
DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
- ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
- // Stable sort to preserve the order of appearance of imported entities.
- // This is to avoid out-of-order processing of interdependent declarations
- // within the same scope, e.g. { namespace A = base; namespace B = A; }
- std::stable_sort(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(), less_first());
+ CU.addImportedEntity(IE);
for (auto *GV : CUNode->getGlobalVariables())
CU.getOrCreateGlobalVariableDIE(GV);
for (auto *SP : CUNode->getSubprograms())
@@ -467,7 +489,10 @@ void DwarfDebug::beginModule() {
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
- CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
+ DIType *RT = cast<DIType>(resolve(Ty->getRef()));
+ if (!RT->isExternalTypeRef())
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
@@ -1061,12 +1086,8 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc()) {
- // Did the target forget to set the FrameSetup flag for CFI insns?
- assert(!MI.isCFIInstruction() &&
- "First non-frame-setup instruction is a CFI instruction.");
+ MI.getDebugLoc())
return MI.getDebugLoc();
- }
return DebugLoc();
}
@@ -1079,8 +1100,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return;
- auto DI = FunctionDIs.find(MF->getFunction());
- if (DI == FunctionDIs.end())
+ auto DI = MF->getFunction()->getSubprogram();
+ if (!DI)
return;
// Grab the lexical scopes for the function, if we don't have any of those
@@ -1127,7 +1148,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// The first mention of a function argument gets the CurrentFnBegin
// label, so arguments are visible when breaking at function entry.
const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
- if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+ if (DIVar->isParameter() &&
getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
@@ -1171,7 +1192,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
"endFunction should be called with the same function as beginFunction");
if (!MMI->hasDebugInfo() || LScopes.empty() ||
- !FunctionDIs.count(MF->getFunction())) {
+ !MF->getFunction()->getSubprogram()) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
@@ -1863,7 +1884,7 @@ void DwarfDebug::emitDebugLineDWO() {
assert(useSplitDwarf() && "No split dwarf?");
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLineDWOSection());
- SplitTypeUnitFileTable.Emit(*Asm->OutStreamer);
+ SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
}
// Emit the .debug_str.dwo section for separated dwarf. This contains the
@@ -1884,7 +1905,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
return &SplitTypeUnitFileTable;
}
-static uint64_t makeTypeSignature(StringRef Identifier) {
+uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 01f34c6..4c613a9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -33,6 +33,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetOptions.h"
#include <memory>
namespace llvm {
@@ -49,24 +50,6 @@ class DwarfUnit;
class MachineModuleInfo;
//===----------------------------------------------------------------------===//
-/// This class is used to record source line correspondence.
-class SrcLineInfo {
- unsigned Line; // Source line number.
- unsigned Column; // Source column.
- unsigned SourceID; // Source ID number.
- MCSymbol *Label; // Label in code ID number.
-public:
- SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
- : Line(L), Column(C), SourceID(S), Label(label) {}
-
- // Accessors
- unsigned getLine() const { return Line; }
- unsigned getColumn() const { return Column; }
- unsigned getSourceID() const { return SourceID; }
- MCSymbol *getLabel() const { return Label; }
-};
-
-//===----------------------------------------------------------------------===//
/// This class is used to track local variable information.
///
/// Variables can be created from allocas, in which case they're generated from
@@ -127,14 +110,14 @@ public:
// Accessors.
const DILocalVariable *getVariable() const { return Var; }
const DILocation *getInlinedAt() const { return IA; }
- const ArrayRef<const DIExpression *> getExpression() const { return Expr; }
+ ArrayRef<const DIExpression *> getExpression() const { return Expr; }
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
StringRef getName() const { return Var->getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
- const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+ ArrayRef<int> getFrameIndex() const { return FrameIndex; }
void addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
@@ -156,7 +139,8 @@ public:
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
- if (Var->getTag() == dwarf::DW_TAG_arg_variable)
+ // FIXME: Why don't we just infer this tag and store it all along?
+ if (Var->isParameter())
return dwarf::DW_TAG_formal_parameter;
return dwarf::DW_TAG_variable;
@@ -282,11 +266,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Holders for the various debug information flags that we might need to
/// have exposed. See accessor functions below for description.
- /// Holder for imported entities.
- typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
- ImportedEntityMap;
- ImportedEntityMap ScopesWithImportedEntities;
-
/// Map from MDNodes for user-defined types to the type units that
/// describe them.
DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
@@ -298,16 +277,12 @@ class DwarfDebug : public AsmPrinterHandler {
/// Whether to emit the pubnames/pubtypes sections.
bool HasDwarfPubSections;
- /// Whether or not to use AT_ranges for compilation units.
- bool HasCURanges;
-
- /// Whether we emitted a function into a section other than the
- /// default text.
- bool UsedNonDefaultText;
-
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
+ /// Whether to emit DW_AT_[MIPS_]linkage_name.
+ bool UseLinkageNames;
+
/// Version of dwarf we're emitting.
unsigned DwarfVersion;
@@ -338,7 +313,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// True iff there are multiple CUs in this module.
bool SingleCU;
bool IsDarwin;
- bool IsPS4;
AddressPool AddrPool;
@@ -347,7 +321,8 @@ class DwarfDebug : public AsmPrinterHandler {
DwarfAccelTable AccelNamespace;
DwarfAccelTable AccelTypes;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
+ // Identify a debugger for "tuning" the debug info.
+ DebuggerKind DebuggerTuning;
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
@@ -372,12 +347,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// Compute the size and offset of a DIE given an incoming Offset.
- unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
-
- /// Compute the size and offset of all the DIEs.
- void computeSizeAndOffsets();
-
/// Collect info for variables that were optimized out.
void collectDeadVariables();
@@ -443,9 +412,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Emit visible names into a debug ranges section.
void emitDebugRanges();
- /// Emit inline info using custom format.
- void emitDebugInlineInfo();
-
/// DWARF 5 Experimental Split Dwarf Emitters
/// Initialize common features of skeleton units.
@@ -456,10 +422,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// section.
DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
- /// Construct the split debug info compile unit for the debug info
- /// section.
- DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU);
-
/// Emit the debug info dwo section.
void emitDebugInfoDWO();
@@ -544,6 +506,9 @@ public:
/// Process end of an instruction.
void endInstruction() override;
+ /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
+ static uint64_t makeTypeSignature(StringRef Identifier);
+
/// Add a DIE to the set of types that we're going to pull into
/// type units.
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
@@ -558,10 +523,22 @@ public:
SymSize[Sym] = Size;
}
+ /// Returns whether to emit DW_AT_[MIPS_]linkage_name.
+ bool useLinkageNames() const { return UseLinkageNames; }
+
/// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
/// standard DW_OP_form_tls_address opcode
bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
+ /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+ ///
+ /// Returns whether we are "tuning" for a given debugger.
+ /// @{
+ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+ bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+ bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ /// @}
+
// Experimental DWARF5 features.
/// Returns whether or not to emit tables that dwarf consumers can
@@ -604,9 +581,6 @@ public:
DwarfCompileUnit *lookupUnit(const DIE *CU) const {
return CUDieMap.lookup(CU);
}
- /// isSubprogramContext - Return true if Context is either a subprogram
- /// or another context nested inside a subprogram.
- bool isSubprogramContext(const MDNode *Context);
void addSubprogramNames(const DISubprogram *SP, DIE &Die);
@@ -622,14 +596,6 @@ public:
const MachineFunction *getCurrentFunction() const { return CurFn; }
- iterator_range<ImportedEntityMap::const_iterator>
- findImportedEntitiesForScope(const MDNode *Scope) const {
- return make_range(std::equal_range(
- ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
- std::pair<const MDNode *, const MDNode *>(Scope, nullptr),
- less_first()));
- }
-
/// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index a2799b8..7b5b831 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -211,12 +211,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
return AddMachineRegPiece(MachineReg, SizeInBits,
getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
}
- case dwarf::DW_OP_plus: {
- // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
+ case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_minus: {
+ // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
+ // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
auto N = I.getNext();
if (N != E && N->getOp() == dwarf::DW_OP_deref) {
unsigned Offset = I->getArg(0);
- ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+ ValidReg = AddMachineRegIndirect(
+ MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
std::advance(I, 2);
break;
} else
@@ -255,6 +258,12 @@ void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
EmitOp(dwarf::DW_OP_plus_uconst);
EmitUnsigned(I->getArg(0));
break;
+ case dwarf::DW_OP_minus:
+ // There is no OP_minus_uconst.
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(I->getArg(0));
+ EmitOp(dwarf::DW_OP_minus);
+ break;
case dwarf::DW_OP_deref:
EmitOp(dwarf::DW_OP_deref);
break;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 3555822..d75fea5 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -192,18 +192,19 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
DIEInteger(1));
}
-void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
}
-void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
+void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
+ uint64_t Integer) {
addUInt(Block, (dwarf::Attribute)0, Form, Integer);
}
-void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
@@ -222,9 +223,10 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
DIEString(DU->getStringPool().getEntry(*Asm, String)));
}
-DIE::value_iterator DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute,
- dwarf::Form Form,
- const MCSymbol *Label) {
+DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
+ dwarf::Attribute Attribute,
+ dwarf::Form Form,
+ const MCSymbol *Label) {
return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label));
}
@@ -277,6 +279,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type));
}
+void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+ StringRef Identifier) {
+ uint64_t Signature = DD->makeTypeSignature(Identifier);
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
+ DIEInteger(Signature));
+}
+
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
const DIE *DieCU = Die.getUnitOrNull();
@@ -292,8 +301,6 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
}
DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
- assert(Tag != dwarf::DW_TAG_auto_variable &&
- Tag != dwarf::DW_TAG_arg_variable);
DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag));
if (N)
insertDIE(N, &Die);
@@ -445,7 +452,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Find the __forwarding field and the variable field in the __Block_byref
// struct.
- DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements();
+ DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
const DIDerivedType *varField = nullptr;
const DIDerivedType *forwardingField = nullptr;
@@ -506,34 +513,35 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
/// Return true if type encoding is unsigned.
static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
- if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) {
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
+ return false;
+
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
+ }
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
dwarf::Tag T = (dwarf::Tag)Ty->getTag();
// Encode pointer constants as unsigned bytes. This is used at least for
// null pointer constant emission.
- // (Pieces of) aggregate types that get hacked apart by SROA may also be
- // represented by a constant. Encode them as unsigned bytes.
// FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
// here, but accept them for now due to a bug in SROA producing bogus
// dbg.values.
- if (T == dwarf::DW_TAG_array_type ||
- T == dwarf::DW_TAG_class_type ||
- T == dwarf::DW_TAG_pointer_type ||
+ if (T == dwarf::DW_TAG_pointer_type ||
T == dwarf::DW_TAG_ptr_to_member_type ||
T == dwarf::DW_TAG_reference_type ||
- T == dwarf::DW_TAG_rvalue_reference_type ||
- T == dwarf::DW_TAG_structure_type ||
- T == dwarf::DW_TAG_union_type)
+ T == dwarf::DW_TAG_rvalue_reference_type)
return true;
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
- T == dwarf::DW_TAG_restrict_type ||
- T == dwarf::DW_TAG_enumeration_type);
- if (DITypeRef Deriv = DTy->getBaseType())
- return isUnsignedDIType(DD, DD->resolve(Deriv));
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
- return false;
+ T == dwarf::DW_TAG_restrict_type);
+ DITypeRef Deriv = DTy->getBaseType();
+ assert(Deriv && "Expected valid base type");
+ return isUnsignedDIType(DD, DD->resolve(Deriv));
}
auto *BTy = cast<DIBasicType>(Ty);
@@ -659,7 +667,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
}
void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
- if (!LinkageName.empty())
+ if (!LinkageName.empty() && DD->useLinkageNames())
addString(Die,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
@@ -685,6 +693,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getOrCreateNameSpace(NS);
if (auto *SP = dyn_cast<DISubprogram>(Context))
return getOrCreateSubprogramDIE(SP);
+ if (auto *M = dyn_cast<DIModule>(Context))
+ return getOrCreateModule(M);
return getDIE(Context);
}
@@ -700,7 +710,8 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
- updateAcceleratorTables(Context, Ty, TyDIE);
+ if (!Ty->isExternalTypeRef())
+ updateAcceleratorTables(Context, Ty, TyDIE);
return &TyDIE;
}
@@ -753,7 +764,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
const DIType *Ty, const DIE &TyDIE) {
if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
bool IsImplementation = 0;
- if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) {
+ if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
// A runtime language of 0 actually means C/C++ and that any
// non-negative value is some version of Objective-C/C++.
IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
@@ -795,8 +806,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) {
- const DIScope *Ctx = *I;
+ for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
StringRef Name = Ctx->getName();
if (Name.empty() && isa<DINamespace>(Ctx))
Name = "(anonymous namespace)";
@@ -843,7 +853,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type
- && Tag != dwarf::DW_TAG_ptr_to_member_type)
+ && Tag != dwarf::DW_TAG_ptr_to_member_type
+ && Tag != dwarf::DW_TAG_reference_type
+ && Tag != dwarf::DW_TAG_rvalue_reference_type)
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
@@ -899,6 +911,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ if (CTy->isExternalTypeRef()) {
+ StringRef Identifier = CTy->getIdentifier();
+ assert(!Identifier.empty() && "external type ref without identifier");
+ addFlag(Buffer, dwarf::DW_AT_declaration);
+ return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
+ }
+
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -1134,6 +1153,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
"definition DIE was created in "
"getOrCreateSubprogramDIE");
DeclLinkageName = SPDecl->getLinkageName();
+ unsigned DeclID =
+ getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
+ unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
+ if (DeclID != DefID)
+ addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
+
+ if (SP->getLine() != SPDecl->getLine())
+ addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
}
// Add function template parameters.
@@ -1180,11 +1207,10 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
- const DISubroutineType *SPTy = SP->getType();
- assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
- "the type of a subprogram should be a subroutine");
+ DITypeRefArray Args;
+ if (const DISubroutineType *SPTy = SP->getType())
+ Args = SPTy->getTypeArray();
- auto Args = SPTy->getTypeArray();
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
if (Args.size())
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 44d9d22..82760bf 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -113,13 +113,6 @@ protected:
DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
- /// Add a string attribute data and value.
- ///
- /// This is guaranteed to be in the local string pool instead of indirected.
- void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
-
- void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
-
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
public:
@@ -162,9 +155,6 @@ public:
virtual void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {}
- /// Add a new name to the namespace accelerator table.
- void addAccelNamespace(StringRef Name, const DIE &Die);
-
/// Returns the DIE map slot for the specified debug variable.
///
/// We delegate the request to DwarfDebug when the MDNode can be part of the
@@ -186,14 +176,14 @@ public:
void addFlag(DIE &Die, dwarf::Attribute Attribute);
/// Add an unsigned integer attribute data and value.
- void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
- uint64_t Integer);
+ void addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, uint64_t Integer);
- void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
+ void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer);
/// Add an signed integer attribute data and value.
- void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
- int64_t Integer);
+ void addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, int64_t Integer);
void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
@@ -206,8 +196,10 @@ public:
void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
/// Add a Dwarf label attribute data and value.
- DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute,
- dwarf::Form Form, const MCSymbol *Label);
+ DIEValueList::value_iterator addLabel(DIEValueList &Die,
+ dwarf::Attribute Attribute,
+ dwarf::Form Form,
+ const MCSymbol *Label);
void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
@@ -228,7 +220,11 @@ public:
/// Add a DIE attribute data and value.
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
+ /// Add a type's DW_AT_signature and set the declaration flag.
void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
+ /// Add an attribute containing the type signature for a unique identifier.
+ void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+ StringRef Identifier);
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 49ef8d3..e24dcb1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -662,9 +662,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalValue *>::const_reverse_iterator
- I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalValue *GV = *I;
+ for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
+ TypeInfos.rend())) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index e42e082..c6a0e9d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -76,10 +76,6 @@ protected:
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions);
- /// Return `true' if this is a call to a function marked `nounwind'. Return
- /// `false' otherwise.
- bool callToNoUnwindFunction(const MachineInstr *MI);
-
void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
RangeMapType &PadMap);
@@ -131,6 +127,10 @@ public:
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
void beginInstruction(const MachineInstr *MI) override {}
void endInstruction() override {}
+
+ /// Return `true' if this is a call to a function marked `nounwind'. Return
+ /// `false' otherwise.
+ static bool callToNoUnwindFunction(const MachineInstr *MI);
};
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index eb9e4c1..6a023b9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -48,7 +48,7 @@ void llvm::linkErlangGCPrinter() {}
void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
MCStreamer &OS = *AP.OutStreamer;
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
// Put this in a custom .note section.
OS.SwitchSection(
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 2ceec61..c09ef6a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -93,7 +93,7 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
///
void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(M, AP, "code_end");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 6610ac7..c2c0f84 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -27,15 +27,15 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
auto *Scope = cast<DIScope>(S);
StringRef Dir = Scope->getDirectory(),
Filename = Scope->getFilename();
- char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
- if (Result)
- return Result;
+ std::string &Filepath =
+ DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
+ if (!Filepath.empty())
+ return Filepath;
// Clang emits directory and relative filename info into the IR, but CodeView
// operates on full paths. We could change Clang to emit full paths too, but
// that would increase the IR size and probably not needed for other users.
// For now, just concatenate and canonicalize the path here.
- std::string Filepath;
if (Filename.find(':') == 1)
Filepath = Filename;
else
@@ -74,8 +74,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
Filepath.erase(Cursor, 1);
- Result = strdup(Filepath.c_str());
- return StringRef(Result);
+ return Filepath;
}
void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
@@ -253,7 +252,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
}
FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
- // Emit a line table subsection, requred to do PC-to-file:line lookup.
+ // Emit a line table subsection, required to do PC-to-file:line lookup.
Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 43d1a43..78068e0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
}
} FileNameRegistry;
- typedef std::map<std::pair<StringRef, StringRef>, char *>
+ typedef std::map<std::pair<StringRef, StringRef>, std::string>
DirAndFilenameToFilepathMapTy;
DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap;
StringRef getFullFilepath(const MDNode *S);
@@ -116,14 +116,6 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
public:
WinCodeViewLineTables(AsmPrinter *Asm);
- ~WinCodeViewLineTables() override {
- for (DirAndFilenameToFilepathMapTy::iterator
- I = DirAndFilenameToFilepathMap.begin(),
- E = DirAndFilenameToFilepathMap.end();
- I != E; ++I)
- free(I->second);
- }
-
void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
/// \brief Emit the COFF section that holds the line table information.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index a2b9316..48b7104 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWin64EH.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -37,6 +38,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -62,9 +64,9 @@ void WinException::beginFunction(const MachineFunction *MF) {
// If any landing pads survive, we need an EH table.
bool hasLandingPads = !MMI->getLandingPads().empty();
+ bool hasEHFunclets = MMI->hasEHFunclets();
const Function *F = MF->getFunction();
- const Function *ParentF = MMI->getWinEHParent(F);
shouldEmitMoves = Asm->needsSEHMoves();
@@ -78,49 +80,23 @@ void WinException::beginFunction(const MachineFunction *MF) {
F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
F->needsUnwindTableEntry();
- shouldEmitPersonality = forceEmitPersonality || (hasLandingPads &&
- PerEncoding != dwarf::DW_EH_PE_omit && Per);
+ shouldEmitPersonality =
+ forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per);
unsigned LSDAEncoding = TLOF.getLSDAEncoding();
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- // If we're not using CFI, we don't want the CFI or the personality. If
- // WinEHPrepare outlined something, we should emit the LSDA.
+ // If we're not using CFI, we don't want the CFI or the personality, but we
+ // might want EH tables if we had EH pads.
if (!Asm->MAI->usesWindowsCFI()) {
- bool HasOutlinedChildren =
- F->hasFnAttribute("wineh-parent") && F == ParentF;
- shouldEmitLSDA = HasOutlinedChildren;
+ shouldEmitLSDA = hasEHFunclets;
shouldEmitPersonality = false;
return;
}
- // If this was an outlined handler, we need to define the label corresponding
- // to the offset of the parent frame relative to the stack pointer after the
- // prologue.
- if (F != ParentF) {
- WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
- auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
- if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
- MCSymbol *HandlerTypeParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(F->getName()));
-
- // Emit a symbol assignment.
- Asm->OutStreamer->EmitAssignment(
- HandlerTypeParentFrameOffset,
- MCConstantExpr::create(I->second, Asm->OutContext));
- }
- }
-
- if (shouldEmitMoves || shouldEmitPersonality)
- Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
-
- if (shouldEmitPersonality) {
- const MCSymbol *PersHandlerSym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
- Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
- }
+ beginFunclet(MF->front(), Asm->CurrentFnSym);
}
/// endFunction - Gather and emit post-function exception information.
@@ -134,43 +110,158 @@ void WinException::endFunction(const MachineFunction *MF) {
if (F->hasPersonalityFn())
Per = classifyEHPersonality(F->getPersonalityFn());
- // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
- // Windows EH schemes, the landing pad is not actually reachable. It only
- // exists so that we can emit the right table data.
- if (!isMSVCEHPersonality(Per))
+ // Get rid of any dead landing pads if we're not using funclets. In funclet
+ // schemes, the landing pad is not actually reachable. It only exists so
+ // that we can emit the right table data.
+ if (!isFuncletEHPersonality(Per))
MMI->TidyLandingPads();
+ endFunclet();
+
+ // endFunclet will emit the necessary .xdata tables for x64 SEH.
+ if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+ return;
+
if (shouldEmitPersonality || shouldEmitLSDA) {
Asm->OutStreamer->PushSection();
- if (shouldEmitMoves || shouldEmitPersonality) {
- // Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
- } else {
- // Just switch sections to the right xdata section. This use of
- // CurrentFnSym assumes that we only emit the LSDA when ending the parent
- // function.
- MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
- Asm->CurrentFnSym, Asm->OutContext);
- Asm->OutStreamer->SwitchSection(XData);
- }
+ // Just switch sections to the right xdata section. This use of CurrentFnSym
+ // assumes that we only emit the LSDA when ending the parent function.
+ MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym,
+ Asm->OutContext);
+ Asm->OutStreamer->SwitchSection(XData);
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
if (Per == EHPersonality::MSVC_Win64SEH)
- emitCSpecificHandlerTable();
+ emitCSpecificHandlerTable(MF);
else if (Per == EHPersonality::MSVC_X86SEH)
emitExceptHandlerTable(MF);
else if (Per == EHPersonality::MSVC_CXX)
emitCXXFrameHandler3Table(MF);
+ else if (Per == EHPersonality::CoreCLR)
+ emitCLRExceptionTable(MF);
else
emitExceptionTable();
Asm->OutStreamer->PopSection();
}
+}
+
+/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB) {
+ if (!MBB)
+ return nullptr;
+ assert(MBB->isEHFuncletEntry());
+
+ // Give catches and cleanups a name based off of their parent function and
+ // their funclet entry block's number.
+ const MachineFunction *MF = MBB->getParent();
+ const Function *F = MF->getFunction();
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ MCContext &Ctx = MF->getContext();
+ StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
+ return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
+ Twine(MBB->getNumber()) + "@?0?" +
+ FuncLinkageName + "@4HA");
+}
+
+void WinException::beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym) {
+ CurrentFuncletEntry = &MBB;
+
+ const Function *F = Asm->MF->getFunction();
+ // If a symbol was not provided for the funclet, invent one.
+ if (!Sym) {
+ Sym = getMCSymbolForMBB(Asm, &MBB);
+
+ // Describe our funclet symbol as a function with internal linkage.
+ Asm->OutStreamer->BeginCOFFSymbolDef(Sym);
+ Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ Asm->OutStreamer->EndCOFFSymbolDef();
+
+ // We want our funclet's entry point to be aligned such that no nops will be
+ // present after the label.
+ Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
+ F);
+
+ // Now that we've emitted the alignment directive, point at our funclet.
+ Asm->OutStreamer->EmitLabel(Sym);
+ }
+
+ // Mark 'Sym' as starting our funclet.
if (shouldEmitMoves || shouldEmitPersonality)
+ Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *PerFn = nullptr;
+
+ // Determine which personality routine we are using for this funclet.
+ if (F->hasPersonalityFn())
+ PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
+
+ // Classify the personality routine so that we may reason about it.
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
+ if (Per != EHPersonality::MSVC_CXX ||
+ !CurrentFuncletEntry->isCleanupFuncletEntry())
+ Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ }
+}
+
+void WinException::endFunclet() {
+ // No funclet to process? Great, we have nothing to do.
+ if (!CurrentFuncletEntry)
+ return;
+
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ const Function *F = Asm->MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // The .seh_handlerdata directive implicitly switches section, push the
+ // current section so that we may return to it.
+ Asm->OutStreamer->PushSection();
+
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
+ if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
+ !CurrentFuncletEntry->isCleanupFuncletEntry()) {
+ // If this is a C++ catch funclet (or the parent function),
+ // emit a reference to the LSDA for the parent function.
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$cppxdata$", FuncLinkageName));
+ Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
+ } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+ !CurrentFuncletEntry->isEHFuncletEntry()) {
+ // If this is the parent function in Win64 SEH, emit the LSDA immediately
+ // following .seh_handlerdata.
+ emitCSpecificHandlerTable(Asm->MF);
+ }
+
+ // Switch back to the previous section now that we are done writing to
+ // .xdata.
+ Asm->OutStreamer->PopSection();
+
+ // Emit a .seh_endproc directive to mark the end of the function.
Asm->OutStreamer->EmitWinCFIEndProc();
+ }
+
+ // Let's make sure we don't try to end the same funclet twice.
+ CurrentFuncletEntry = nullptr;
}
const MCExpr *WinException::create32bitRef(const MCSymbol *Value) {
@@ -188,6 +279,202 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
return create32bitRef(Asm->getSymbol(GV));
}
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(create32bitRef(Label),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(OffsetOf, Asm->OutContext),
+ MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+int WinException::getFrameIndexOffset(int FrameIndex,
+ const WinEHFuncInfo &FuncInfo) {
+ const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
+ unsigned UnusedReg;
+ if (Asm->MAI->usesWindowsCFI())
+ return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg);
+ // For 32-bit, offsets should be relative to the end of the EH registration
+ // node. For 64-bit, it's relative to SP at the end of the prologue.
+ assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
+ int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+ Offset += FuncInfo.EHRegNodeEndOffset;
+ return Offset;
+}
+
+namespace {
+
+/// Top-level state used to represent unwind to caller
+const int NullState = -1;
+
+struct InvokeStateChange {
+ /// EH Label immediately after the last invoke in the previous state, or
+ /// nullptr if the previous state was the null state.
+ const MCSymbol *PreviousEndLabel;
+
+ /// EH label immediately before the first invoke in the new state, or nullptr
+ /// if the new state is the null state.
+ const MCSymbol *NewStartLabel;
+
+ /// State of the invoke following NewStartLabel, or NullState to indicate
+ /// the presence of calls which may unwind to caller.
+ int NewState;
+};
+
+/// Iterator that reports all the invoke state changes in a range of machine
+/// basic blocks. Changes to the null state are reported whenever a call that
+/// may unwind to caller is encountered. The MBB range is expected to be an
+/// entire function or funclet, and the start and end of the range are treated
+/// as being in the NullState even if there's not an unwind-to-caller call
+/// before the first invoke or after the last one (i.e., the first state change
+/// reported is the first change to something other than NullState, and a
+/// change back to NullState is always reported at the end of iteration).
+class InvokeStateChangeIterator {
+ InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo,
+ MachineFunction::const_iterator MFI,
+ MachineFunction::const_iterator MFE,
+ MachineBasicBlock::const_iterator MBBI,
+ int BaseState)
+ : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) {
+ LastStateChange.PreviousEndLabel = nullptr;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ scan();
+ }
+
+public:
+ static iterator_range<InvokeStateChangeIterator>
+ range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin,
+ MachineFunction::const_iterator End, int BaseState = NullState) {
+ // Reject empty ranges to simplify bookkeeping by ensuring that we can get
+ // the end of the last block.
+ assert(Begin != End);
+ auto BlockBegin = Begin->begin();
+ auto BlockEnd = std::prev(End)->end();
+ return make_range(
+ InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState),
+ InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState));
+ }
+
+ // Iterator methods.
+ bool operator==(const InvokeStateChangeIterator &O) const {
+ assert(BaseState == O.BaseState);
+ // Must be visiting same block.
+ if (MFI != O.MFI)
+ return false;
+ // Must be visiting same isntr.
+ if (MBBI != O.MBBI)
+ return false;
+ // At end of block/instr iteration, we can still have two distinct states:
+ // one to report the final EndLabel, and another indicating the end of the
+ // state change iteration. Check for CurrentEndLabel equality to
+ // distinguish these.
+ return CurrentEndLabel == O.CurrentEndLabel;
+ }
+
+ bool operator!=(const InvokeStateChangeIterator &O) const {
+ return !operator==(O);
+ }
+ InvokeStateChange &operator*() { return LastStateChange; }
+ InvokeStateChange *operator->() { return &LastStateChange; }
+ InvokeStateChangeIterator &operator++() { return scan(); }
+
+private:
+ InvokeStateChangeIterator &scan();
+
+ const WinEHFuncInfo &EHInfo;
+ const MCSymbol *CurrentEndLabel = nullptr;
+ MachineFunction::const_iterator MFI;
+ MachineFunction::const_iterator MFE;
+ MachineBasicBlock::const_iterator MBBI;
+ InvokeStateChange LastStateChange;
+ bool VisitingInvoke = false;
+ int BaseState;
+};
+
+} // end anonymous namespace
+
+InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
+ bool IsNewBlock = false;
+ for (; MFI != MFE; ++MFI, IsNewBlock = true) {
+ if (IsNewBlock)
+ MBBI = MFI->begin();
+ for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ const MachineInstr &MI = *MBBI;
+ if (!VisitingInvoke && LastStateChange.NewState != BaseState &&
+ MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) {
+ // Indicate a change of state to the null state. We don't have
+ // start/end EH labels handy but the caller won't expect them for
+ // null state regions.
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ CurrentEndLabel = nullptr;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+
+ // All other state changes are at EH labels before/after invokes.
+ if (!MI.isEHLabel())
+ continue;
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+ if (Label == CurrentEndLabel) {
+ VisitingInvoke = false;
+ continue;
+ }
+ auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label);
+ // Ignore EH labels that aren't the ones inserted before an invoke
+ if (InvokeMapIter == EHInfo.LabelToStateMap.end())
+ continue;
+ auto &StateAndEnd = InvokeMapIter->second;
+ int NewState = StateAndEnd.first;
+ // Keep track of the fact that we're between EH start/end labels so
+ // we know not to treat the inoke we'll see as unwinding to caller.
+ VisitingInvoke = true;
+ if (NewState == LastStateChange.NewState) {
+ // The state isn't actually changing here. Record the new end and
+ // keep going.
+ CurrentEndLabel = StateAndEnd.second;
+ continue;
+ }
+ // Found a state change to report
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = Label;
+ LastStateChange.NewState = NewState;
+ // Start keeping track of the new current end
+ CurrentEndLabel = StateAndEnd.second;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+ }
+ // Iteration hit the end of the block range.
+ if (LastStateChange.NewState != BaseState) {
+ // Report the end of the last new state
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ // Leave CurrentEndLabel non-null to distinguish this state from end.
+ assert(CurrentEndLabel != nullptr);
+ return *this;
+ }
+ // We've reported all state changes and hit the end state.
+ CurrentEndLabel = nullptr;
+ return *this;
+}
+
/// Emit the language-specific data that __C_specific_handler expects. This
/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
/// up after faults with __try, __except, and __finally. The typeinfo values
@@ -216,135 +503,156 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
/// };
-void WinException::emitCSpecificHandlerTable() {
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-
- // Simplifying assumptions for first implementation:
- // - Cleanups are not implemented.
- // - Filters are not implemented.
-
- // The Itanium LSDA table sorts similar landing pads together to simplify the
- // actions table, but we don't need that.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- LandingPads.reserve(PadInfos.size());
- for (const auto &LP : PadInfos)
- LandingPads.push_back(&LP);
-
- // Compute label ranges for call sites as we would for the Itanium LSDA, but
- // use an all zero action table because we aren't using these actions.
- SmallVector<unsigned, 64> FirstActions;
- FirstActions.resize(LandingPads.size());
- SmallVector<CallSiteEntry, 64> CallSites;
- computeCallSiteTable(CallSites, LandingPads, FirstActions);
-
- MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
- MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
-
- // Emit the number of table entries.
- unsigned NumEntries = 0;
- for (const CallSiteEntry &CSE : CallSites) {
- if (!CSE.LPad)
- continue; // Ignore gaps.
- NumEntries += CSE.LPad->SEHHandlers.size();
+void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.x86.seh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+
+ // Use the assembler to compute the number of table entries through label
+ // difference and division.
+ MCSymbol *TableBegin =
+ Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true);
+ MCSymbol *TableEnd =
+ Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true);
+ const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin);
+ const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx);
+ const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx);
+ AddComment("Number of call sites");
+ OS.EmitValue(EntryCount, 4);
+
+ OS.EmitLabel(TableBegin);
+
+ // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only
+ // models exceptions from invokes. LLVM also allows arbitrary reordering of
+ // the code, so our tables end up looking a bit different. Rather than
+ // trying to match MSVC's tables exactly, we emit a denormalized table. For
+ // each range of invokes in the same state, we emit table entries for all
+ // the actions that would be taken in that state. This means our tables are
+ // slightly bigger, which is OK.
+ const MCSymbol *LastStartLabel = nullptr;
+ int LastEHState = -1;
+ // Break out before we enter into a finally funclet.
+ // FIXME: We need to emit separate EH tables for cleanups.
+ MachineFunction::const_iterator End = MF->end();
+ MachineFunction::const_iterator Stop = std::next(MF->begin());
+ while (Stop != End && !Stop->isEHFuncletEntry())
+ ++Stop;
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) {
+ // Emit all the actions for the state we just transitioned out of
+ // if it was not the null state
+ if (LastEHState != -1)
+ emitSEHActionsForRange(FuncInfo, LastStartLabel,
+ StateChange.PreviousEndLabel, LastEHState);
+ LastStartLabel = StateChange.NewStartLabel;
+ LastEHState = StateChange.NewState;
}
- Asm->OutStreamer->EmitIntValue(NumEntries, 4);
- // If there are no actions, we don't need to iterate again.
- if (NumEntries == 0)
- return;
+ OS.EmitLabel(TableEnd);
+}
- // Emit the four-label records for each call site entry. The table has to be
- // sorted in layout order, and the call sites should already be sorted.
- for (const CallSiteEntry &CSE : CallSites) {
- // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
- // an EH table entry will propagate the exception rather than terminating
- // the program.
- if (!CSE.LPad)
- continue;
- const LandingPadInfo *LPad = CSE.LPad;
-
- // Compute the label range. We may reuse the function begin and end labels
- // rather than forming new ones.
- const MCExpr *Begin =
- create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
- const MCExpr *End;
- if (CSE.EndLabel) {
- // The interval is half-open, so we have to add one to include the return
- // address of the last invoke in the range.
- End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel),
- MCConstantExpr::create(1, Asm->OutContext),
- Asm->OutContext);
+void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ assert(BeginLabel && EndLabel);
+ while (State != -1) {
+ const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
+ const MCExpr *FilterOrFinally;
+ const MCExpr *ExceptOrNull;
+ auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ if (UME.IsFinally) {
+ FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
+ ExceptOrNull = MCConstantExpr::create(0, Ctx);
} else {
- End = create32bitRef(EHFuncEndSym);
+ // For an except, the filter can be 1 (catch-all) or a function
+ // label.
+ FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter)
+ : MCConstantExpr::create(1, Ctx);
+ ExceptOrNull = create32bitRef(Handler->getSymbol());
}
- // Emit an entry for each action.
- for (SEHHandler Handler : LPad->SEHHandlers) {
- Asm->OutStreamer->EmitValue(Begin, 4);
- Asm->OutStreamer->EmitValue(End, 4);
-
- // Emit the filter or finally function pointer, if present. Otherwise,
- // emit '1' to indicate a catch-all.
- const Function *F = Handler.FilterOrFinally;
- if (F)
- Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4);
- else
- Asm->OutStreamer->EmitIntValue(1, 4);
-
- // Emit the recovery address, if present. Otherwise, this must be a
- // finally.
- const BlockAddress *BA = Handler.RecoverBA;
- if (BA)
- Asm->OutStreamer->EmitValue(
- create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4);
- else
- Asm->OutStreamer->EmitIntValue(0, 4);
- }
+ AddComment("LabelStart");
+ OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+ AddComment("LabelEnd");
+ OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
+ : "CatchAll");
+ OS.EmitValue(FilterOrFinally, 4);
+ AddComment(UME.IsFinally ? "Null" : "ExceptionHandler");
+ OS.EmitValue(ExceptOrNull, 4);
+
+ assert(UME.ToState < State && "states should decrease");
+ State = UME.ToState;
}
}
void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
const Function *F = MF->getFunction();
- const Function *ParentF = MMI->getWinEHParent(F);
auto &OS = *Asm->OutStreamer;
- WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
- StringRef ParentLinkageName =
- GlobalValue::getRealLinkageName(ParentF->getName());
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
MCSymbol *FuncInfoXData = nullptr;
if (shouldEmitPersonality) {
- FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$cppxdata$", ParentLinkageName));
- OS.EmitValue(create32bitRef(FuncInfoXData), 4);
-
- extendIP2StateTable(MF, ParentF, FuncInfo);
-
- // Defer emission until we've visited the parent function and all the catch
- // handlers. Cleanups don't contribute to the ip2state table, so don't count
- // them.
- if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
- return;
- ++FuncInfo.NumIPToStateFuncsVisited;
- if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
- return;
+ // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from
+ // IPs to state numbers.
+ FuncInfoXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName));
+ computeIP2StateTable(MF, FuncInfo, IPToStateTable);
} else {
- FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName);
- emitEHRegistrationOffsetLabel(FuncInfo, ParentLinkageName);
+ FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName);
}
+ int UnwindHelpOffset = 0;
+ if (Asm->MAI->usesWindowsCFI())
+ UnwindHelpOffset =
+ getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
+
MCSymbol *UnwindMapXData = nullptr;
MCSymbol *TryBlockMapXData = nullptr;
MCSymbol *IPToStateXData = nullptr;
- if (!FuncInfo.UnwindMap.empty())
+ if (!FuncInfo.CxxUnwindMap.empty())
UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$stateUnwindMap$", ParentLinkageName));
+ Twine("$stateUnwindMap$", FuncLinkageName));
if (!FuncInfo.TryBlockMap.empty())
- TryBlockMapXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$tryMap$", ParentLinkageName));
- if (!FuncInfo.IPToStateList.empty())
- IPToStateXData = Asm->OutContext.getOrCreateSymbol(
- Twine("$ip2state$", ParentLinkageName));
+ TryBlockMapXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName));
+ if (!IPToStateTable.empty())
+ IPToStateXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName));
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
// FuncInfo {
// uint32_t MagicNumber
@@ -363,17 +671,38 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
OS.EmitValueToAlignment(4);
OS.EmitLabel(FuncInfoXData);
- OS.EmitIntValue(0x19930522, 4); // MagicNumber
- OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState
- OS.EmitValue(create32bitRef(UnwindMapXData), 4); // UnwindMap
- OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks
- OS.EmitValue(create32bitRef(TryBlockMapXData), 4); // TryBlockMap
- OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries
- OS.EmitValue(create32bitRef(IPToStateXData), 4); // IPToStateMap
- if (Asm->MAI->usesWindowsCFI())
- OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp
- OS.EmitIntValue(0, 4); // ESTypeList
- OS.EmitIntValue(1, 4); // EHFlags
+
+ AddComment("MagicNumber");
+ OS.EmitIntValue(0x19930522, 4);
+
+ AddComment("MaxState");
+ OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4);
+
+ AddComment("UnwindMap");
+ OS.EmitValue(create32bitRef(UnwindMapXData), 4);
+
+ AddComment("NumTryBlocks");
+ OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);
+
+ AddComment("TryBlockMap");
+ OS.EmitValue(create32bitRef(TryBlockMapXData), 4);
+
+ AddComment("IPMapEntries");
+ OS.EmitIntValue(IPToStateTable.size(), 4);
+
+ AddComment("IPToStateXData");
+ OS.EmitValue(create32bitRef(IPToStateXData), 4);
+
+ if (Asm->MAI->usesWindowsCFI()) {
+ AddComment("UnwindHelp");
+ OS.EmitIntValue(UnwindHelpOffset, 4);
+ }
+
+ AddComment("ESTypeList");
+ OS.EmitIntValue(0, 4);
+
+ AddComment("EHFlags");
+ OS.EmitIntValue(1, 4);
// UnwindMapEntry {
// int32_t ToState;
@@ -381,9 +710,14 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// };
if (UnwindMapXData) {
OS.EmitLabel(UnwindMapXData);
- for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
- OS.EmitIntValue(UME.ToState, 4); // ToState
- OS.EmitValue(create32bitRef(UME.Cleanup), 4); // Action
+ for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
+ MCSymbol *CleanupSym =
+ getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
+ AddComment("ToState");
+ OS.EmitIntValue(UME.ToState, 4);
+
+ AddComment("Action");
+ OS.EmitValue(create32bitRef(CleanupSym), 4);
}
}
@@ -398,33 +732,49 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.EmitLabel(TryBlockMapXData);
SmallVector<MCSymbol *, 1> HandlerMaps;
for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
- WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
- MCSymbol *HandlerMapXData = nullptr;
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+ MCSymbol *HandlerMapXData = nullptr;
if (!TBME.HandlerArray.empty())
HandlerMapXData =
Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
.concat(Twine(I))
.concat("$")
- .concat(ParentLinkageName));
-
+ .concat(FuncLinkageName));
HandlerMaps.push_back(HandlerMapXData);
- int CatchHigh = -1;
- for (WinEHHandlerType &HT : TBME.HandlerArray)
- CatchHigh =
- std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
-
- assert(TBME.TryLow <= TBME.TryHigh);
- OS.EmitIntValue(TBME.TryLow, 4); // TryLow
- OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh
- OS.EmitIntValue(CatchHigh, 4); // CatchHigh
- OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches
- OS.EmitValue(create32bitRef(HandlerMapXData), 4); // HandlerArray
+ // TBMEs should form intervals.
+ assert(0 <= TBME.TryLow && "bad trymap interval");
+ assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval");
+ assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval");
+ assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) &&
+ "bad trymap interval");
+
+ AddComment("TryLow");
+ OS.EmitIntValue(TBME.TryLow, 4);
+
+ AddComment("TryHigh");
+ OS.EmitIntValue(TBME.TryHigh, 4);
+
+ AddComment("CatchHigh");
+ OS.EmitIntValue(TBME.CatchHigh, 4);
+
+ AddComment("NumCatches");
+ OS.EmitIntValue(TBME.HandlerArray.size(), 4);
+
+ AddComment("HandlerArray");
+ OS.EmitValue(create32bitRef(HandlerMapXData), 4);
+ }
+
+ // All funclets use the same parent frame offset currently.
+ unsigned ParentFrameOffset = 0;
+ if (shouldEmitPersonality) {
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF);
}
for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
- WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
MCSymbol *HandlerMapXData = HandlerMaps[I];
if (!HandlerMapXData)
continue;
@@ -438,32 +788,34 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.EmitLabel(HandlerMapXData);
for (const WinEHHandlerType &HT : TBME.HandlerArray) {
// Get the frame escape label with the offset of the catch object. If
- // the index is -1, then there is no catch object, and we should emit an
- // offset of zero, indicating that no copy will occur.
+ // the index is INT_MAX, then there is no catch object, and we should
+ // emit an offset of zero, indicating that no copy will occur.
const MCExpr *FrameAllocOffsetRef = nullptr;
- if (HT.CatchObjRecoverIdx >= 0) {
- MCSymbol *FrameAllocOffset =
- Asm->OutContext.getOrCreateFrameAllocSymbol(
- GlobalValue::getRealLinkageName(ParentF->getName()),
- HT.CatchObjRecoverIdx);
- FrameAllocOffsetRef = MCSymbolRefExpr::create(
- FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+ if (HT.CatchObj.FrameIndex != INT_MAX) {
+ int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
+ FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
} else {
FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
}
- OS.EmitIntValue(HT.Adjectives, 4); // Adjectives
- OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); // Type
- OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset
- OS.EmitValue(create32bitRef(HT.Handler), 4); // Handler
+ MCSymbol *HandlerSym =
+ getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
+
+ AddComment("Adjectives");
+ OS.EmitIntValue(HT.Adjectives, 4);
+
+ AddComment("Type");
+ OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4);
+
+ AddComment("CatchObjOffset");
+ OS.EmitValue(FrameAllocOffsetRef, 4);
+
+ AddComment("Handler");
+ OS.EmitValue(create32bitRef(HandlerSym), 4);
if (shouldEmitPersonality) {
- MCSymbol *ParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(HT.Handler->getName()));
- const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create(
- ParentFrameOffset, Asm->OutContext);
- OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset
+ AddComment("ParentFrameOffset");
+ OS.EmitIntValue(ParentFrameOffset, 4);
}
}
}
@@ -475,87 +827,65 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// };
if (IPToStateXData) {
OS.EmitLabel(IPToStateXData);
- for (auto &IPStatePair : FuncInfo.IPToStateList) {
- OS.EmitValue(create32bitRef(IPStatePair.first), 4); // IP
- OS.EmitIntValue(IPStatePair.second, 4); // State
+ for (auto &IPStatePair : IPToStateTable) {
+ AddComment("IP");
+ OS.EmitValue(IPStatePair.first, 4);
+ AddComment("ToState");
+ OS.EmitIntValue(IPStatePair.second, 4);
}
}
}
-void WinException::extendIP2StateTable(const MachineFunction *MF,
- const Function *ParentF,
- WinEHFuncInfo &FuncInfo) {
- const Function *F = MF->getFunction();
-
- // The Itanium LSDA table sorts similar landing pads together to simplify the
- // actions table, but we don't need that.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- LandingPads.reserve(PadInfos.size());
- for (const auto &LP : PadInfos)
- LandingPads.push_back(&LP);
-
- RangeMapType PadMap;
- computePadMap(LandingPads, PadMap);
-
- // The end label of the previous invoke or nounwind try-range.
- MCSymbol *LastLabel = Asm->getFunctionBegin();
-
- // Whether there is a potentially throwing instruction (currently this means
- // an ordinary call) between the end of the previous try-range and now.
- bool SawPotentiallyThrowing = false;
-
- int LastEHState = -2;
-
- // The parent function and the catch handlers contribute to the 'ip2state'
- // table.
-
- // Include ip2state entries for the beginning of the main function and
- // for catch handler functions.
- if (F == ParentF) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
- LastEHState = -1;
- } else if (FuncInfo.HandlerBaseState.count(F)) {
- FuncInfo.IPToStateList.push_back(
- std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F]));
- LastEHState = FuncInfo.HandlerBaseState[F];
- }
- for (const auto &MBB : *MF) {
- for (const auto &MI : MBB) {
- if (!MI.isEHLabel()) {
- if (MI.isCall())
- SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
- continue;
+void WinException::computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) {
+
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ // Find the end of the funclet
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ break;
}
+ }
- // End of the previous try-range?
- MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
- if (BeginLabel == LastLabel)
- SawPotentiallyThrowing = false;
-
- // Beginning of a new try-range?
- RangeMapType::const_iterator L = PadMap.find(BeginLabel);
- if (L == PadMap.end())
- // Nope, it was just some random label.
- continue;
-
- const PadRange &P = L->second;
- const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
- assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
- "Inconsistent landing pad map!");
-
- // FIXME: Should this be using FuncInfo.HandlerBaseState?
- if (SawPotentiallyThrowing && LastEHState != -1) {
- FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
- SawPotentiallyThrowing = false;
- LastEHState = -1;
- }
+ // Don't emit ip2state entries for cleanup funclets. Any interesting
+ // exceptional actions in cleanups must be handled in a separate IR
+ // function.
+ if (FuncletStart->isCleanupFuncletEntry())
+ continue;
- if (LandingPad->WinEHState != LastEHState)
- FuncInfo.IPToStateList.push_back(
- std::make_pair(BeginLabel, LandingPad->WinEHState));
- LastEHState = LandingPad->WinEHState;
- LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ MCSymbol *StartLabel;
+ int BaseState;
+ if (FuncletStart == MF->begin()) {
+ BaseState = NullState;
+ StartLabel = Asm->getFunctionBegin();
+ } else {
+ auto *FuncletPad =
+ cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI());
+ assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0);
+ BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second;
+ StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart);
+ }
+ assert(StartLabel && "need local function start label");
+ IPToStateTable.push_back(
+ std::make_pair(create32bitRef(StartLabel), BaseState));
+
+ for (const auto &StateChange : InvokeStateChangeIterator::range(
+ FuncInfo, FuncletStart, FuncletEnd, BaseState)) {
+ // Compute the label to report as the start of this entry; use the EH
+ // start label for the invoke if we have one, otherwise (this is a call
+ // which may unwind to our caller and does not have an EH start label, so)
+ // use the previous end label.
+ const MCSymbol *ChangeLabel = StateChange.NewStartLabel;
+ if (!ChangeLabel)
+ ChangeLabel = StateChange.PreviousEndLabel;
+ // Emit an entry indicating that PCs after 'Label' have this EH state.
+ IPToStateTable.push_back(
+ std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+ // FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
}
@@ -566,15 +896,15 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
// registration in order to recover the parent frame pointer. Now that we know
// we've code generated the parent, we can emit the label assignment that
// those helpers use to get the offset of the registration node.
- assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
- "no EH reg node localescape index");
+ MCContext &Ctx = Asm->OutContext;
MCSymbol *ParentFrameOffset =
- Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
- FLinkageName, FuncInfo.EHRegNodeEscapeIndex);
- const MCExpr *RegistrationOffsetSymRef =
- MCSymbolRefExpr::create(RegistrationOffsetSym, Asm->OutContext);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, RegistrationOffsetSymRef);
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ int64_t Offset = TFI->getFrameIndexReference(
+ *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
+ const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
}
/// Emit the language-specific data that _except_handler3 and 4 expect. This is
@@ -585,7 +915,13 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
const Function *F = MF->getFunction();
StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
- WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
// Emit the __ehtable label that we use for llvm.x86.seh.lsda.
@@ -611,58 +947,290 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
//
// Only the EHCookieOffset field appears to vary, and it appears to be the
// offset from the final saved SP value to the retaddr.
+ AddComment("GSCookieOffset");
OS.EmitIntValue(-2, 4);
+ AddComment("GSCookieXOROffset");
OS.EmitIntValue(0, 4);
// FIXME: Calculate.
+ AddComment("EHCookieOffset");
OS.EmitIntValue(9999, 4);
+ AddComment("EHCookieXOROffset");
OS.EmitIntValue(0, 4);
BaseState = -2;
}
- // Build a list of pointers to LandingPadInfos and then sort by WinEHState.
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- SmallVector<const LandingPadInfo *, 4> LPads;
- LPads.reserve((PadInfos.size()));
- for (const LandingPadInfo &LPInfo : PadInfos)
- LPads.push_back(&LPInfo);
- std::sort(LPads.begin(), LPads.end(),
- [](const LandingPadInfo *L, const LandingPadInfo *R) {
- return L->WinEHState < R->WinEHState;
- });
-
- // For each action in each lpad, emit one of these:
- // struct ScopeTableEntry {
- // int32_t EnclosingLevel;
- // int32_t (__cdecl *Filter)();
- // void *HandlerOrFinally;
- // };
- //
- // The "outermost" action will use BaseState as its enclosing level. Each
- // other action will refer to the previous state as its enclosing level.
- int CurState = 0;
- for (const LandingPadInfo *LPInfo : LPads) {
- int EnclosingLevel = BaseState;
- assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 ==
- LPInfo->WinEHState &&
- "gaps in the SEH scope table");
- for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend();
- I != E; ++I) {
- const SEHHandler &Handler = *I;
- const BlockAddress *BA = Handler.RecoverBA;
- const Function *F = Handler.FilterOrFinally;
- assert(F && "cannot catch all in 32-bit SEH without filter function");
- const MCExpr *FilterOrNull =
- create32bitRef(BA ? Asm->getSymbol(F) : nullptr);
- const MCExpr *ExceptOrFinally = create32bitRef(
- BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F));
-
- OS.EmitIntValue(EnclosingLevel, 4);
- OS.EmitValue(FilterOrNull, 4);
- OS.EmitValue(ExceptOrFinally, 4);
-
- // The next state unwinds to this state.
- EnclosingLevel = CurState;
- CurState++;
+ assert(!FuncInfo.SEHUnwindMap.empty());
+ for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
+ auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ const MCSymbol *ExceptOrFinally =
+ UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
+ // -1 is usually the base state for "unwind to caller", but for
+ // _except_handler4 it's -2. Do that replacement here if necessary.
+ int ToState = UME.ToState == -1 ? BaseState : UME.ToState;
+ AddComment("ToState");
+ OS.EmitIntValue(ToState, 4);
+ AddComment(UME.IsFinally ? "Null" : "FilterFunction");
+ OS.EmitValue(create32bitRef(UME.Filter), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler");
+ OS.EmitValue(create32bitRef(ExceptOrFinally), 4);
+ }
+}
+
+static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
+ int Rank = 0;
+ while (State != -1) {
+ ++Rank;
+ State = FuncInfo.ClrEHUnwindMap[State].Parent;
+ }
+ return Rank;
+}
+
+static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+ int LeftRank = getRank(FuncInfo, Left);
+ int RightRank = getRank(FuncInfo, Right);
+
+ while (LeftRank < RightRank) {
+ Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+ --RightRank;
+ }
+
+ while (RightRank < LeftRank) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+ --LeftRank;
+ }
+
+ while (Left != Right) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+ Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+ }
+
+ return Left;
+}
+
+void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
+ // CLR EH "states" are really just IDs that identify handlers/funclets;
+ // states, handlers, and funclets all have 1:1 mappings between them, and a
+ // handler/funclet's "state" is its index in the ClrEHUnwindMap.
+ MCStreamer &OS = *Asm->OutStreamer;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ MCSymbol *FuncBeginSym = Asm->getFunctionBegin();
+ MCSymbol *FuncEndSym = Asm->getFunctionEnd();
+
+ // A ClrClause describes a protected region.
+ struct ClrClause {
+ const MCSymbol *StartLabel; // Start of protected region
+ const MCSymbol *EndLabel; // End of protected region
+ int State; // Index of handler protecting the protected region
+ int EnclosingState; // Index of funclet enclosing the protected region
+ };
+ SmallVector<ClrClause, 8> Clauses;
+
+ // Build a map from handler MBBs to their corresponding states (i.e. their
+ // indices in the ClrEHUnwindMap).
+ int NumStates = FuncInfo.ClrEHUnwindMap.size();
+ assert(NumStates > 0 && "Don't need exception table!");
+ DenseMap<const MachineBasicBlock *, int> HandlerStates;
+ for (int State = 0; State < NumStates; ++State) {
+ MachineBasicBlock *HandlerBlock =
+ FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
+ HandlerStates[HandlerBlock] = State;
+ // Use this loop through all handlers to verify our assumption (used in
+ // the MinEnclosingState computation) that ancestors have lower state
+ // numbers than their descendants.
+ assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
+ "ill-formed state numbering");
+ }
+ // Map the main function to the NullState.
+ HandlerStates[&MF->front()] = NullState;
+
+ // Write out a sentinel indicating the end of the standard (Windows) xdata
+ // and the start of the additional (CLR) info.
+ OS.EmitIntValue(0xffffffff, 4);
+ // Write out the number of funclets
+ OS.EmitIntValue(NumStates, 4);
+
+ // Walk the machine blocks/instrs, computing and emitting a few things:
+ // 1. Emit a list of the offsets to each handler entry, in lexical order.
+ // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end.
+ // 3. Compute the list of ClrClauses, in the required order (inner before
+ // outer, earlier before later; the order by which a forward scan with
+ // early termination will find the innermost enclosing clause covering
+ // a given address).
+ // 4. A map (MinClauseMap) from each handler index to the index of the
+ // outermost funclet/function which contains a try clause targeting the
+ // key handler. This will be used to determine IsDuplicate-ness when
+ // emitting ClrClauses. The NullState value is used to indicate that the
+ // top-level function contains a try clause targeting the key handler.
+ // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for
+ // try regions we entered before entering the PendingState try but which
+ // we haven't yet exited.
+ SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack;
+ // EndSymbolMap and MinClauseMap are maps described above.
+ std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]);
+ SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
+
+ // Visit the root function and each funclet.
+
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ int FuncletState = HandlerStates[&*FuncletStart];
+ // Find the end of the funclet
+ MCSymbol *EndSymbol = FuncEndSym;
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd);
+ break;
+ }
}
+ // Emit the function/funclet end and, if this is a funclet (and not the
+ // root function), record it in the EndSymbolMap.
+ OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4);
+ if (FuncletState != NullState) {
+ // Record the end of the handler.
+ EndSymbolMap[FuncletState] = EndSymbol;
+ }
+
+ // Walk the state changes in this function/funclet and compute its clauses.
+ // Funclets always start in the null state.
+ const MCSymbol *CurrentStartLabel = nullptr;
+ int CurrentState = NullState;
+ assert(HandlerStack.empty());
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
+ // Close any try regions we're not still under
+ int AncestorState =
+ getAncestor(FuncInfo, CurrentState, StateChange.NewState);
+ while (CurrentState != AncestorState) {
+ assert(CurrentState != NullState && "Failed to find ancestor!");
+ // Close the pending clause
+ Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
+ CurrentState, FuncletState});
+ // Now the parent handler is current
+ CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
+ // Pop the new start label from the handler stack if we've exited all
+ // descendants of the corresponding handler.
+ if (HandlerStack.back().second == CurrentState)
+ CurrentStartLabel = HandlerStack.pop_back_val().first;
+ }
+
+ if (StateChange.NewState != CurrentState) {
+ // For each clause we're starting, update the MinClauseMap so we can
+ // know which is the topmost funclet containing a clause targeting
+ // it.
+ for (int EnteredState = StateChange.NewState;
+ EnteredState != CurrentState;
+ EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
+ int &MinEnclosingState = MinClauseMap[EnteredState];
+ if (FuncletState < MinEnclosingState)
+ MinEnclosingState = FuncletState;
+ }
+ // Save the previous current start/label on the stack and update to
+ // the newly-current start/state.
+ HandlerStack.emplace_back(CurrentStartLabel, CurrentState);
+ CurrentStartLabel = StateChange.NewStartLabel;
+ CurrentState = StateChange.NewState;
+ }
+ }
+ assert(HandlerStack.empty());
+ }
+
+ // Now emit the clause info, starting with the number of clauses.
+ OS.EmitIntValue(Clauses.size(), 4);
+ for (ClrClause &Clause : Clauses) {
+ // Emit a CORINFO_EH_CLAUSE :
+ /*
+ struct CORINFO_EH_CLAUSE
+ {
+ CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag
+ DWORD TryOffset;
+ DWORD TryLength; // actually TryEndOffset
+ DWORD HandlerOffset;
+ DWORD HandlerLength; // actually HandlerEndOffset
+ union
+ {
+ DWORD ClassToken; // use for catch clauses
+ DWORD FilterOffset; // use for filter clauses
+ };
+ };
+
+ enum CORINFO_EH_CLAUSE_FLAGS
+ {
+ CORINFO_EH_CLAUSE_NONE = 0,
+ CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter
+ CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause
+ CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause
+ };
+ typedef enum CorExceptionFlag
+ {
+ COR_ILEXCEPTION_CLAUSE_NONE,
+ COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause
+ COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause
+ COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause
+ COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This
+ // clause was duplicated
+ // to a funclet which was
+ // pulled out of line
+ } CorExceptionFlag;
+ */
+ // Add 1 to the start/end of the EH clause; the IP associated with a
+ // call when the runtime does its scan is the IP of the next instruction
+ // (the one to which control will return after the call), so we need
+ // to add 1 to the end of the clause to cover that offset. We also add
+ // 1 to the start of the clause to make sure that the ranges reported
+ // for all clauses are disjoint. Note that we'll need some additional
+ // logic when machine traps are supported, since in that case the IP
+ // that the runtime uses is the offset of the faulting instruction
+ // itself; if such an instruction immediately follows a call but the
+ // two belong to different clauses, we'll need to insert a nop between
+ // them so the runtime can distinguish the point to which the call will
+ // return from the point at which the fault occurs.
+
+ const MCExpr *ClauseBegin =
+ getOffsetPlusOne(Clause.StartLabel, FuncBeginSym);
+ const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
+
+ const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
+ MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>();
+ MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
+ const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
+ MCSymbol *EndSym = EndSymbolMap[Clause.State];
+ const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym);
+
+ uint32_t Flags = 0;
+ switch (Entry.HandlerType) {
+ case ClrHandlerType::Catch:
+ // Leaving bits 0-2 clear indicates catch.
+ break;
+ case ClrHandlerType::Filter:
+ Flags |= 1;
+ break;
+ case ClrHandlerType::Finally:
+ Flags |= 2;
+ break;
+ case ClrHandlerType::Fault:
+ Flags |= 4;
+ break;
+ }
+ if (Clause.EnclosingState != MinClauseMap[Clause.State]) {
+ // This is a "duplicate" clause; the handler needs to be entered from a
+ // frame above the one holding the invoke.
+ assert(Clause.EnclosingState > MinClauseMap[Clause.State]);
+ Flags |= 8;
+ }
+ OS.EmitIntValue(Flags, 4);
+
+ // Write the clause start/end
+ OS.EmitValue(ClauseBegin, 4);
+ OS.EmitValue(ClauseEnd, 4);
+
+ // Write out the handler start/end
+ OS.EmitValue(HandlerBegin, 4);
+ OS.EmitValue(HandlerEnd, 4);
+
+ // Write out the type token or filter offset
+ assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters");
+ OS.EmitIntValue(Entry.TypeToken, 4);
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index 669c9cc..acb3010 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -21,6 +21,7 @@ class Function;
class GlobalValue;
class MachineFunction;
class MCExpr;
+class Value;
struct WinEHFuncInfo;
class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
@@ -36,7 +37,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if this is a 64-bit target and we should use image relative offsets.
bool useImageRel32 = false;
- void emitCSpecificHandlerTable();
+ /// Pointer to the current funclet entry BB.
+ const MachineBasicBlock *CurrentFuncletEntry = nullptr;
+
+ void emitCSpecificHandlerTable(const MachineFunction *MF);
+
+ void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State);
/// Emit the EH table data for 32-bit and 64-bit functions using
/// the __CxxFrameHandler3 personality.
@@ -47,8 +55,11 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// tables.
void emitExceptHandlerTable(const MachineFunction *MF);
- void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF,
- WinEHFuncInfo &FuncInfo);
+ void emitCLRExceptionTable(const MachineFunction *MF);
+
+ void computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
/// Emits the label used with llvm.x86.seh.recoverfp, which is used by
/// outlined funclets.
@@ -57,6 +68,16 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+ const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
+ const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom);
+
+ /// Gets the offset that we should use in a table for a stack object with the
+ /// given index. For targets using CFI (Win64, etc), this is relative to the
+ /// established SP at the end of the prologue. For targets without CFI (Win32
+ /// only), it is relative to the frame pointer.
+ int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
public:
//===--------------------------------------------------------------------===//
@@ -74,6 +95,10 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
+
+ /// \brief Emit target-specific EH funclet machinery.
+ void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;
+ void endFunclet() override;
};
}
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 530ab46..d12fdb2 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,10 +8,14 @@
//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+// target specific instruction which implement the same semantics in a way
+// which better fits the target backend. This can include the use of either
+// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or
+// type coercions.
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +24,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -44,13 +49,17 @@ namespace {
private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad);
- bool expandAtomicLoad(LoadInst *LI);
+ IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+ LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
bool expandAtomicLoadToLL(LoadInst *LI);
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
- bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
+ bool expandAtomicOpToLLSC(
+ Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+ std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *AI);
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
@@ -108,7 +117,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(Monotonic);
IsStore = IsLoad = true;
- } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+ } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
(isAtLeastRelease(CASI->getSuccessOrdering()) ||
isAtLeastAcquire(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -126,10 +135,28 @@ bool AtomicExpand::runOnFunction(Function &F) {
}
}
- if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
- MadeChange |= expandAtomicLoad(LI);
- } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
- MadeChange |= expandAtomicStore(SI);
+ if (LI) {
+ if (LI->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ LI = convertAtomicLoadToIntegerType(LI);
+ assert(LI->getType()->isIntegerTy() && "invariant broken");
+ MadeChange = true;
+ }
+
+ MadeChange |= tryExpandAtomicLoad(LI);
+ } else if (SI) {
+ if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ SI = convertAtomicStoreToIntegerType(SI);
+ assert(SI->getValueOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
+
+ if (TLI->shouldExpandAtomicStoreInIR(SI))
+ MadeChange |= expandAtomicStore(SI);
} else if (RMWI) {
// There are two different ways of expanding RMW instructions:
// - into a load if it is idempotent
@@ -141,7 +168,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
} else {
MadeChange |= tryExpandAtomicRMW(RMWI);
}
- } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
+ } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
}
@@ -169,11 +196,56 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
return (LeadingFence || TrailingFence);
}
-bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- if (TLI->hasLoadLinkedStoreConditional())
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+ const DataLayout &DL) {
+ EVT VT = TLI->getValueType(DL, T);
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+ return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivelent bitwidth. See the function comment on
+/// convertAtomicStoreToIntegerType for background.
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+ auto *M = LI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(LI->getType(),
+ M->getDataLayout());
+
+ IRBuilder<> Builder(LI);
+
+ Value *Addr = LI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ auto *NewLI = Builder.CreateLoad(NewAddr);
+ NewLI->setAlignment(LI->getAlignment());
+ NewLI->setVolatile(LI->isVolatile());
+ NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+ DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+
+ Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+ LI->replaceAllUsesWith(NewVal);
+ LI->eraseFromParent();
+ return NewLI;
+}
+
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+ switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ return expandAtomicOpToLLSC(
+ LI, LI->getPointerOperand(), LI->getOrdering(),
+ [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+ case TargetLoweringBase::AtomicExpansionKind::LLOnly:
return expandAtomicLoadToLL(LI);
- else
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -184,6 +256,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
Value *Val =
TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
LI->replaceAllUsesWith(Val);
LI->eraseFromParent();
@@ -209,6 +282,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
return true;
}
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivelent bitwidth. We used to not support floating point or vector
+/// atomics in the IR at all. The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store. The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+ IRBuilder<> Builder(SI);
+ auto *M = SI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+ M->getDataLayout());
+ Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+
+ Value *Addr = SI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+ NewSI->setAlignment(SI->getAlignment());
+ NewSI->setVolatile(SI->isVolatile());
+ NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+ DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+ SI->eraseFromParent();
+ return NewSI;
+}
+
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
@@ -226,23 +328,15 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
return tryExpandAtomicRMW(AI);
}
-bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
- case TargetLoweringBase::AtomicRMWExpansionKind::None:
- return false;
- case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
- assert(TLI->hasLoadLinkedStoreConditional() &&
- "TargetLowering requested we expand AtomicRMW instruction into "
- "load-linked/store-conditional combos, but such instructions aren't "
- "supported");
-
- return expandAtomicRMWToLLSC(AI);
- }
- case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
- return expandAtomicRMWToCmpXchg(AI);
- }
- }
- llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal,
+ AtomicOrdering MemOpOrder,
+ Value *&Success, Value *&NewLoaded) {
+ Value* Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -282,10 +376,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
}
-bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
- AtomicOrdering MemOpOrder = AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
+ [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(),
+ Builder, Loaded,
+ AI->getValOperand());
+ });
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
+ return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+ }
+}
+
+bool AtomicExpand::expandAtomicOpToLLSC(
+ Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+ std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ BasicBlock *BB = I->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
@@ -303,11 +415,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
// atomicrmw.end:
// fence?
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
+ // This grabs the DebugLoc from I.
+ IRBuilder<> Builder(I);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
@@ -320,8 +432,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
Builder.SetInsertPoint(LoopBB);
Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+ Value *NewVal = PerformOp(Builder, Loaded);
Value *StoreSuccess =
TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
@@ -331,72 +442,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- AI->replaceAllUsesWith(Loaded);
- AI->eraseFromParent();
-
- return true;
-}
-
-bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
- AtomicOrdering MemOpOrder =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // %init_loaded = load atomic iN* %addr
- // br label %loop
- // loop:
- // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
- // %new = some_op iN %loaded, %incr
- // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
- // %new_loaded = extractvalue { iN, i1 } %pair, 0
- // %success = extractvalue { iN, i1 } %pair, 1
- // br i1 %success, label %atomicrmw.end, label %loop
- // atomicrmw.end:
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we want a load. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(Addr);
- // Atomics require at least natural alignment.
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
- Loaded->addIncoming(InitLoaded, BB);
-
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
- Value *Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, MemOpOrder,
- AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
- Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
- Loaded->addIncoming(NewLoaded, LoopBB);
-
- Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
- Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-
- AI->replaceAllUsesWith(NewLoaded);
- AI->eraseFromParent();
+ I->replaceAllUsesWith(Loaded);
+ I->eraseFromParent();
return true;
}
@@ -424,7 +471,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.failure
+ // label %cmpxchg.nostore
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
// %success = icmp eq i32 %stored, 0
@@ -432,6 +479,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// cmpxchg.success:
// fence?
// br label %cmpxchg.end
+ // cmpxchg.nostore:
+ // @load_linked_fail_balance()?
+ // br label %cmpxchg.failure
// cmpxchg.failure:
// fence?
// br label %cmpxchg.end
@@ -440,9 +490,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
- auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
@@ -466,7 +517,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess = TLI->emitStoreConditional(
@@ -482,6 +533,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
/*IsLoad=*/true);
Builder.CreateBr(ExitBB);
+ Builder.SetInsertPoint(NoStoreBB);
+ // In the failing case, where we don't execute the store-conditional, the
+ // target might want to balance out the load-linked with a dedicated
+ // instruction (e.g., on ARM, clearing the exclusive monitor).
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+ Builder.CreateBr(FailureBB);
+
Builder.SetInsertPoint(FailureBB);
TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
/*IsLoad=*/true);
@@ -556,9 +614,77 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
- if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
- expandAtomicLoad(ResultingLoad);
+ tryExpandAtomicLoad(ResultingLoad);
return true;
}
return false;
}
+
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ assert(AI);
+
+ AtomicOrdering MemOpOrder =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ // Atomics require at least natural alignment.
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *NewLoaded = nullptr;
+ Value *Success = nullptr;
+
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
+ Success, NewLoaded);
+ assert(Success && NewLoaded);
+
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(NewLoaded);
+ AI->eraseFromParent();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index db00910..a67e194 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -33,6 +33,6 @@ cl::opt<unsigned>
cl::desc("Threshold for partial unrolling"),
cl::Hidden);
-BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 6182667..604feed 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -12,7 +12,8 @@
// it then removes.
//
// Note that this pass must be run after register allocation, it cannot handle
-// SSA form.
+// SSA form. It also must handle virtual registers for targets that emit virtual
+// ISA (e.g. NVPTX).
//
//===----------------------------------------------------------------------===//
@@ -20,6 +21,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -95,7 +97,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
- PassConfig->getEnableTailMerge();
+ PassConfig->getEnableTailMerge();
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineBranchProbabilityInfo>());
@@ -132,6 +134,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Remove the block.
MF->erase(MBB);
+ FuncletMembership.erase(MBB);
}
/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
@@ -150,9 +153,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
if (!I->isImplicitDef())
break;
unsigned Reg = I->getOperand(0).getReg();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- ImpDefRegs.insert(*SubRegs);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
+ } else {
+ ImpDefRegs.insert(Reg);
+ }
++I;
}
if (ImpDefRegs.empty())
@@ -163,8 +170,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
if (!TII->isUnpredicatedTerminator(I))
return false;
// See if it uses any of the implicitly defined registers.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
+ for (const MachineOperand &MO : I->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -208,14 +214,17 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
- MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr;
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
- MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
- MadeChange |= OptimizeImpDefsBlock(MBB);
+ if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(&MBB);
}
+ // Recalculate funclet membership.
+ FuncletMembership = getFuncletMembership(MF);
+
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = TailMergeBlocks(MF);
@@ -235,12 +244,9 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Walk the function to find jump tables that are live.
BitVector JTIsLive(JTI->getJumpTables().size());
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
- BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I)
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- MachineOperand &Op = I->getOperand(op);
+ for (const MachineBasicBlock &BB : MF) {
+ for (const MachineInstr &I : BB)
+ for (const MachineOperand &Op : I.operands()) {
if (!Op.isJTI()) continue;
// Remember that this JT is live.
@@ -365,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
}
// Back past possible debugging pseudos at beginning of block. This matters
// when one block differs from the other only by whether debugging pseudos
- // are present at the beginning. (This way, the various checks later for
+ // are present at the beginning. (This way, the various checks later for
// I1==MBB1->begin() work as expected.)
if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
--I2;
@@ -426,7 +432,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineFunction &MF = *CurMBB.getParent();
// Create the fall-through block.
- MachineFunction::iterator MBBI = &CurMBB;
+ MachineFunction::iterator MBBI = CurMBB.getIterator();
MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
CurMBB.getParent()->insert(++MBBI, NewMBB);
@@ -445,6 +451,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// For targets that use the register scavenger, we must maintain LiveIns.
MaintainLiveIns(&CurMBB, NewMBB);
+ // Add the new block to the funclet.
+ const auto &FuncletI = FuncletMembership.find(&CurMBB);
+ if (FuncletI != FuncletMembership.end())
+ FuncletMembership[NewMBB] = FuncletI->second;
+
return NewMBB;
}
@@ -479,7 +490,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
DebugLoc dl; // FIXME: this is nowhere
if (I != MF->end() &&
!TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
- MachineBasicBlock *NextBB = I;
+ MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
TII->RemoveBranch(*CurMBB);
@@ -549,14 +560,23 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
/// in each block.
-static bool ProfitableToMerge(MachineBasicBlock *MBB1,
- MachineBasicBlock *MBB2,
- unsigned minCommonTailLength,
- unsigned &CommonTailLen,
- MachineBasicBlock::iterator &I1,
- MachineBasicBlock::iterator &I2,
- MachineBasicBlock *SuccBB,
- MachineBasicBlock *PredBB) {
+static bool
+ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength, unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+ // It is never profitable to tail-merge blocks from two different funclets.
+ if (!FuncletMembership.empty()) {
+ auto Funclet1 = FuncletMembership.find(MBB1);
+ assert(Funclet1 != FuncletMembership.end());
+ auto Funclet2 = FuncletMembership.find(MBB2);
+ assert(Funclet2 != FuncletMembership.end());
+ if (Funclet1->second != Funclet2->second)
+ return false;
+ }
+
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
if (CommonTailLen == 0)
return false;
@@ -600,12 +620,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- if (EffectiveTailLen >= 2 &&
- MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
- (I1 == MBB1->begin() || I2 == MBB2->begin()))
- return true;
-
- return false;
+ return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin());
}
/// ComputeSameTails - Look through all the blocks in MergePotentials that have
@@ -634,7 +650,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
minCommonTailLength,
CommonTailLen, TrialBBI1, TrialBBI2,
- SuccBB, PredBB)) {
+ SuccBB, PredBB,
+ FuncletMembership)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
@@ -776,7 +793,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
- MBBICommon->clearMemRefs();
+ MBBICommon->dropMemRefs();
++MBBI;
++MBBICommon;
@@ -840,8 +857,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// block, which we can't jump to), we can treat all blocks with this same
// tail at once. Use PredBB if that is one of the possibilities, as that
// will not introduce any extra branches.
- MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
- getParent()->begin();
+ MachineBasicBlock *EntryBB =
+ &MergePotentials.front().getBlock()->getParent()->front();
unsigned commonTailIndex = SameTails.size();
// If there are two blocks, check to see if one can be made to fall through
// into the other.
@@ -917,12 +934,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// First find blocks with no successors.
MergePotentials.clear();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
- if (TriedMerging.count(I))
- continue;
- if (I->succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+ for (MachineBasicBlock &MBB : MF) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB));
}
// If this is a large problem, avoid visiting the same basic blocks
@@ -958,13 +974,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
I != E; ++I) {
if (I->pred_size() < 2) continue;
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
- MachineBasicBlock *IBB = I;
- MachineBasicBlock *PredBB = std::prev(I);
+ MachineBasicBlock *IBB = &*I;
+ MachineBasicBlock *PredBB = &*std::prev(I);
MergePotentials.clear();
- for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
- E2 = I->pred_end();
- P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
- MachineBasicBlock *PBB = *P;
+ for (MachineBasicBlock *PBB : I->predecessors()) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+
if (TriedMerging.count(PBB))
continue;
@@ -977,7 +993,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
continue;
// Skip blocks which may jump to a landing pad. Can't tail merge these.
- if (PBB->getLandingPadSuccessor())
+ if (PBB->hasEHPadSuccessor())
continue;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -990,18 +1006,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (TII->ReverseBranchCondition(NewCond))
continue;
// This is the QBB case described above
- if (!FBB)
- FBB = std::next(MachineFunction::iterator(PBB));
+ if (!FBB) {
+ auto Next = ++PBB->getIterator();
+ if (Next != MF.end())
+ FBB = &*Next;
+ }
}
// Failing case: the only way IBB can be reached from PBB is via
// exception handling. Happens for landing pads. Would be nice to have
// a bit in the edge so we didn't have to do all this.
- if (IBB->isLandingPad()) {
- MachineFunction::iterator IP = PBB; IP++;
+ if (IBB->isEHPad()) {
+ MachineFunction::iterator IP = ++PBB->getIterator();
MachineBasicBlock *PredNextBB = nullptr;
if (IP != MF.end())
- PredNextBB = IP;
+ PredNextBB = &*IP;
if (!TBB) {
if (IBB != PredNextBB) // fallthrough
continue;
@@ -1027,7 +1046,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
NewCond, dl);
}
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB));
}
}
@@ -1042,7 +1061,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Reinsert an unconditional branch if needed. The 1 below can occur as a
// result of removing blocks in TryTailMergeBlocks.
- PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks
+ PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->getBlock() != PredBB)
FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -1080,13 +1099,19 @@ void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
if (TailMBB.succ_size() <= 1)
return;
- auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end());
- uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1;
+ auto SumEdgeFreq =
+ std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))
+ .getFrequency();
auto EdgeFreq = EdgeFreqLs.begin();
- for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
- SuccI != SuccE; ++SuccI, ++EdgeFreq)
- TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale);
+ if (SumEdgeFreq > 0) {
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq) {
+ auto Prob = BranchProbability::getBranchProbability(
+ EdgeFreq->getFrequency(), SumEdgeFreq);
+ TailMBB.setSuccProbability(SuccI, Prob);
+ }
+ }
}
//===----------------------------------------------------------------------===//
@@ -1098,10 +1123,12 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Make sure blocks are numbered in order
MF.RenumberBlocks();
+ // Renumbering blocks alters funclet membership, recalculate it.
+ FuncletMembership = getFuncletMembership(MF);
for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
I != E; ) {
- MachineBasicBlock *MBB = I++;
+ MachineBasicBlock *MBB = &*I++;
MadeChange |= OptimizeBlock(MBB);
// If it is dead, remove it.
@@ -1111,6 +1138,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
++NumDeadBlocks;
}
}
+
return MadeChange;
}
@@ -1167,20 +1195,31 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineFunction &MF = *MBB->getParent();
ReoptimizeBlock:
- MachineFunction::iterator FallThrough = MBB;
+ MachineFunction::iterator FallThrough = MBB->getIterator();
++FallThrough;
+ // Make sure MBB and FallThrough belong to the same funclet.
+ bool SameFunclet = true;
+ if (!FuncletMembership.empty() && FallThrough != MF.end()) {
+ auto MBBFunclet = FuncletMembership.find(MBB);
+ assert(MBBFunclet != FuncletMembership.end());
+ auto FallThroughFunclet = FuncletMembership.find(&*FallThrough);
+ assert(FallThroughFunclet != FuncletMembership.end());
+ SameFunclet = MBBFunclet->second == FallThroughFunclet->second;
+ }
+
// If this block is empty, make everyone use its fall-through, not the block
// explicitly. Landing pads should not do this since the landing-pad table
// points to this block. Blocks with their addresses taken shouldn't be
// optimized away.
- if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+ if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
+ SameFunclet) {
// Dead block? Leave for cleanup later.
if (MBB->pred_empty()) return MadeChange;
if (FallThrough == MF.end()) {
// TODO: Simplify preds to not branch here if possible!
- } else if (FallThrough->isLandingPad()) {
+ } else if (FallThrough->isEHPad()) {
// Don't rewrite to a landing pad fallthough. That could lead to the case
// where a BB jumps to more than one landing pad.
// TODO: Is it ever worth rewriting predecessors which don't already
@@ -1190,12 +1229,12 @@ ReoptimizeBlock:
// instead.
while (!MBB->pred_empty()) {
MachineBasicBlock *Pred = *(MBB->pred_end()-1);
- Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
}
// If MBB was the target of a jump table, update jump tables to go to the
// fallthrough instead.
if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
- MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+ MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);
MadeChange = true;
}
return MadeChange;
@@ -1237,7 +1276,7 @@ ReoptimizeBlock:
// AnalyzeBranch.
if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
PrevBB.succ_size() == 1 &&
- !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
DEBUG(dbgs() << "\nMerging into block: " << PrevBB
<< "From MBB: " << *MBB);
// Remove redundant DBG_VALUEs first.
@@ -1333,7 +1372,7 @@ ReoptimizeBlock:
TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
// Move this block to the end of the function.
- MBB->moveAfter(--MF.end());
+ MBB->moveAfter(&MF.back());
MadeChange = true;
++NumBranchOpts;
return MadeChange;
@@ -1371,7 +1410,7 @@ ReoptimizeBlock:
// other blocks across it.
if (CurTBB && CurCond.empty() && !CurFBB &&
IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
- !MBB->hasAddressTaken()) {
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
DebugLoc dl = getBranchDebugLoc(*MBB);
// This block may contain just an unconditional branch. Because there can
// be 'non-branch terminators' in the block, try removing the branch and
@@ -1468,14 +1507,11 @@ ReoptimizeBlock:
// see if it has a fall-through into its successor.
bool CurFallsThru = MBB->canFallThrough();
- if (!MBB->isLandingPad()) {
+ if (!MBB->isEHPad()) {
// Check all the predecessors of this block. If one of them has no fall
// throughs, move this block right after it.
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- E = MBB->pred_end(); PI != E; ++PI) {
+ for (MachineBasicBlock *PredBB : MBB->predecessors()) {
// Analyze the branch at the end of the pred.
- MachineBasicBlock *PredBB = *PI;
- MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (PredBB != MBB && !PredBB->canFallThrough() &&
@@ -1493,8 +1529,7 @@ ReoptimizeBlock:
// B elsewhere
// next:
if (CurFallsThru) {
- MachineBasicBlock *NextBB =
- std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
CurCond.clear();
TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
}
@@ -1507,11 +1542,9 @@ ReoptimizeBlock:
if (!CurFallsThru) {
// Check all successors to see if we can move this block before it.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++SI) {
+ for (MachineBasicBlock *SuccBB : MBB->successors()) {
// Analyze the branch at the end of the block before the succ.
- MachineBasicBlock *SuccBB = *SI;
- MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+ MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
// If this block doesn't already fall-through to that successor, and if
// the succ doesn't already have a block that can fall through into it,
@@ -1519,7 +1552,7 @@ ReoptimizeBlock:
// fallthrough to happen.
if (SuccBB != MBB && &*SuccPrev != MBB &&
!SuccPrev->canFallThrough() && !CurUnAnalyzable &&
- !SuccBB->isLandingPad()) {
+ !SuccBB->isEHPad()) {
MBB->moveBefore(SuccBB);
MadeChange = true;
goto ReoptimizeBlock;
@@ -1531,10 +1564,18 @@ ReoptimizeBlock:
// removed, move this block to the end of the function.
MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
SmallVector<MachineOperand, 4> PrevCond;
+ // We're looking for cases where PrevBB could possibly fall through to
+ // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
+ // so here we skip over any EH pads so we might have a chance to find
+ // a branch target from PrevBB.
+ while (FallThrough != MF.end() && FallThrough->isEHPad())
+ ++FallThrough;
+ // Now check to see if the current block is sitting between PrevBB and
+ // a block to which it could fall through.
if (FallThrough != MF.end() &&
!TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
- PrevBB.isSuccessor(FallThrough)) {
- MBB->moveAfter(--MF.end());
+ PrevBB.isSuccessor(&*FallThrough)) {
+ MBB->moveAfter(&MF.back());
MadeChange = true;
return MadeChange;
}
@@ -1553,7 +1594,7 @@ ReoptimizeBlock:
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = I++;
+ MachineBasicBlock *MBB = &*I++;
MadeChange |= HoistCommonCodeInSuccs(MBB);
}
@@ -1564,15 +1605,23 @@ bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
/// its 'true' successor.
static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
MachineBasicBlock *TrueBB) {
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- E = BB->succ_end(); SI != E; ++SI) {
- MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock *SuccBB : BB->successors())
if (SuccBB != TrueBB)
return SuccBB;
- }
return nullptr;
}
+template <class Container>
+static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
+ Container &Set) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Set.insert(*AI);
+ } else {
+ Set.insert(Reg);
+ }
+}
+
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
/// in successors to. The location is usually just before the terminator,
/// however if the terminator is a conditional branch and its previous
@@ -1590,16 +1639,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!TII->isUnpredicatedTerminator(Loc))
return MBB->end();
- for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = Loc->getOperand(i);
+ for (const MachineOperand &MO : Loc->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
@@ -1608,8 +1655,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// If the terminator defines a register, make sure we don't hoist
// the instruction whose def might be clobbered by the terminator.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Defs.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Defs);
}
}
@@ -1626,8 +1672,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
--PI;
bool IsDef = false;
- for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
- const MachineOperand &MO = PI->getOperand(i);
+ for (const MachineOperand &MO : PI->operands()) {
// If PI has a regmask operand, it is probably a call. Separate away.
if (MO.isRegMask())
return Loc;
@@ -1636,8 +1681,10 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (Uses.count(Reg))
+ if (Uses.count(Reg)) {
IsDef = true;
+ break;
+ }
}
if (!IsDef)
// The condition setting instruction is not just before the conditional
@@ -1657,23 +1704,22 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// Find out what registers are live. Note this routine is ignoring other live
// registers which are only used by instructions in successor blocks.
- for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = PI->getOperand(i);
+ for (const MachineOperand &MO : PI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (Uses.erase(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ }
}
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Defs.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Defs);
}
}
@@ -1737,8 +1783,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
bool IsSafe = true;
- for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = TIB->getOperand(i);
+ for (MachineOperand &MO : TIB->operands()) {
// Don't attempt to hoist instructions with register masks.
if (MO.isRegMask()) {
IsSafe = false;
@@ -1793,28 +1838,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
// Remove kills from LocalDefsSet, these registers had short live ranges.
- for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = TIB->getOperand(i);
+ for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.erase(*AI);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
+ } else {
+ LocalDefsSet.erase(Reg);
+ }
}
// Track local defs so we can update liveins.
- for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = TIB->getOperand(i);
+ for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, LocalDefsSet);
}
HasDups = true;
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 46c05dc..d759d53 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -54,6 +54,7 @@ namespace llvm {
typedef std::vector<MergePotentialsElt>::iterator MPIterator;
std::vector<MergePotentialsElt> MergePotentials;
SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership;
class SameTailElt {
MPIterator MPIter;
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index d08fae0..abc655a 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -24,6 +25,7 @@ using namespace llvm;
void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
MachineFunction &MF,
+ VirtRegMap *VRM,
const MachineLoopInfo &MLI,
const MachineBlockFrequencyInfo &MBFI,
VirtRegAuxInfo::NormalizingFn norm) {
@@ -31,7 +33,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
<< "********** Function: " << MF.getName() << '\n');
MachineRegisterInfo &MRI = MF.getRegInfo();
- VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm);
+ VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
@@ -74,7 +76,10 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
// Check if all values in LI are rematerializable
static bool isRematerializable(const LiveInterval &LI,
const LiveIntervals &LIS,
+ VirtRegMap *VRM,
const TargetInstrInfo &TII) {
+ unsigned Reg = LI.reg;
+ unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
const VNInfo *VNI = *I;
@@ -86,6 +91,36 @@ static bool isRematerializable(const LiveInterval &LI,
MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
assert(MI && "Dead valno in interval");
+ // Trace copies introduced by live range splitting. The inline
+ // spiller can rematerialize through these copies, so the spill
+ // weight must reflect this.
+ if (VRM) {
+ while (MI->isFullCopy()) {
+ // The copy destination must match the interval register.
+ if (MI->getOperand(0).getReg() != Reg)
+ return false;
+
+ // Get the source register.
+ Reg = MI->getOperand(1).getReg();
+
+ // If the original (pre-splitting) registers match this
+ // copy came from a split.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ VRM->getOriginal(Reg) != Original)
+ return false;
+
+ // Follow the copy live-in value.
+ const LiveInterval &SrcLI = LIS.getInterval(Reg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ VNI = SrcQ.valueIn();
+ assert(VNI && "Copy from non-existing value");
+ if (VNI->isPHIDef())
+ return false;
+ MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+ }
+ }
+
if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
return false;
}
@@ -188,7 +223,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo()))
+ if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
totalWeight *= 0.5F;
li.weight = normalize(totalWeight, li.getSize(), numInstr);
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index fb29b1d..23c0d54 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
+ MaxStackArgAlign = 1;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
unsigned SavedStackOffset = StackOffset;
+ unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
// Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// as allocated so that future queries don't return the same registers, i.e.
// when i64 and f64 are both passed in GPRs.
StackOffset = SavedStackOffset;
+ MaxStackArgAlign = SavedMaxStackArgAlign;
Locs.resize(NumLocs);
}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 155c5ec..dc13b5b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -29,6 +29,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeExpandISelPseudosPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
+ initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
@@ -66,6 +67,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6ab6acc..5844124 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -63,6 +64,9 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumAndsAdded,
+ "Number of and mask instructions added to form ext loads");
+STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
@@ -109,25 +113,18 @@ static cl::opt<bool> StressExtLdPromotion(
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-struct TypeIsSExt {
- Type *Ty;
- bool IsSExt;
- TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
-};
+typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
const TargetMachine *TM;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
- /// CurInstIterator - As we scan instructions optimizing them, this is the
- /// next instruction to optimize. Xforms that can invalidate this should
- /// update it.
+ /// As we scan instructions optimizing them, this is the next instruction
+ /// to optimize. Transforms that can invalidate this should update it.
BasicBlock::iterator CurInstIterator;
/// Keeps track of non-local addresses that have been sunk into a block.
@@ -141,10 +138,10 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
- /// ModifiedDT - If CFG is modified in anyway.
+ /// True if CFG is modified in any way.
bool ModifiedDT;
- /// OptSize - True if optimizing for size.
+ /// True if optimizing for size.
bool OptSize;
/// DataLayout for the Function being processed.
@@ -167,30 +164,33 @@ class TypePromotionTransaction;
}
private:
- bool EliminateFallThrough(Function &F);
- bool EliminateMostlyEmptyBlocks(Function &F);
- bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
- void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
- bool OptimizeInst(Instruction *I, bool& ModifiedDT);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr,
+ bool eliminateFallThrough(Function &F);
+ bool eliminateMostlyEmptyBlocks(Function &F);
+ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+ bool optimizeInst(Instruction *I, bool& ModifiedDT);
+ bool optimizeMemoryInst(Instruction *I, Value *Addr,
Type *AccessTy, unsigned AS);
- bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool MoveExtToFormExtLoad(Instruction *&I);
- bool OptimizeExtUses(Instruction *I);
- bool OptimizeSelectInst(SelectInst *SI);
- bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
- bool OptimizeExtractElementInst(Instruction *Inst);
- bool DupRetToEnableTailCallOpts(BasicBlock *BB);
- bool PlaceDbgValues(Function &F);
+ bool optimizeInlineAsmInst(CallInst *CS);
+ bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
+ bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExtUses(Instruction *I);
+ bool optimizeLoadExt(LoadInst *I);
+ bool optimizeSelectInst(SelectInst *SI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool optimizeSwitchInst(SwitchInst *CI);
+ bool optimizeExtractElementInst(Instruction *Inst);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool placeDbgValues(Function &F);
bool sinkAndCmp(Function &F);
- bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ void stripInvariantGroupMetadata(Instruction &I);
};
}
@@ -218,7 +218,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
+ OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -231,12 +231,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
- EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ EverMadeChange |= eliminateMostlyEmptyBlocks(F);
// llvm.dbg.value is far away from the value then iSel may not be able
// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
- EverMadeChange |= PlaceDbgValues(F);
+ EverMadeChange |= placeDbgValues(F);
// If there is a mask, compare against zero, and branch that can be combined
// into a single target instruction, push the mask and compare into branch
@@ -251,9 +251,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
- MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -296,7 +296,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Merge pairs of basic blocks with unconditional branches, connected by
// a single edge.
if (EverMadeChange || MadeChange)
- MadeChange |= EliminateFallThrough(F);
+ MadeChange |= eliminateFallThrough(F);
EverMadeChange |= MadeChange;
}
@@ -314,14 +314,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
-/// EliminateFallThrough - Merge basic blocks which are connected
-/// by a single edge, where one of the basic blocks has a single successor
-/// pointing to the other basic block, which has a single predecessor.
-bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+/// Merge basic blocks which are connected by a single edge, where one of the
+/// basic blocks has a single successor pointing to the other basic block,
+/// which has a single predecessor.
+bool CodeGenPrepare::eliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
@@ -342,22 +342,21 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
BB->moveBefore(&BB->getParent()->getEntryBlock());
// We have erased a block. Update the iterator.
- I = BB;
+ I = BB->getIterator();
}
}
return Changed;
}
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
-/// debug info directives, and an unconditional branch. Passes before isel
-/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
-/// isel. Start by eliminating these blocks so we can split them the way we
-/// want them.
-bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
+/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
+/// edges in ways that are non-optimal for isel. Start by eliminating these
+/// blocks so we can split them the way we want them.
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
@@ -366,7 +365,7 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
// If the instruction before the branch (skipping debug info) isn't a phi
// node, then other stuff is happening here.
- BasicBlock::iterator BBI = BI;
+ BasicBlock::iterator BBI = BI->getIterator();
if (BBI != BB->begin()) {
--BBI;
while (isa<DbgInfoIntrinsic>(BBI)) {
@@ -383,19 +382,19 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
if (DestBB == BB)
continue;
- if (!CanMergeBlocks(BB, DestBB))
+ if (!canMergeBlocks(BB, DestBB))
continue;
- EliminateMostlyEmptyBlock(BB);
+ eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}
-/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
-/// single uncond branch between them, and BB contains no other non-phi
+/// Return true if we can merge BB into DestBB if there is a single
+/// unconditional branch between them, and BB contains no other non-phi
/// instructions.
-bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
const BasicBlock *DestBB) const {
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
// the successor. If there are more complex condition (e.g. preheaders),
@@ -461,9 +460,9 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
}
-/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
-/// an unconditional branch in it.
-void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+/// Eliminate a basic block that has only phi's and an unconditional branch in
+/// it.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
@@ -594,6 +593,14 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
continue;
}
+ if (RelocatedBase->getParent() != ToReplace->getParent()) {
+ // Base and derived relocates are in different basic blocks.
+ // In this case transform is only valid when base dominates derived
+ // relocate. However it would be too expensive to check dominance
+ // for each such relocate, so we skip the whole transformation.
+ continue;
+ }
+
Value *Base = ThisRelocate.getBasePtr();
auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
@@ -631,21 +638,20 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
// In this case, we can not find the bitcast any more. So we insert a new bitcast
// no matter there is already one or not. In this way, we can handle all cases, and
// the extra bitcast should be optimized away in later passes.
- Instruction *ActualRelocatedBase = RelocatedBase;
+ Value *ActualRelocatedBase = RelocatedBase;
if (RelocatedBase->getType() != Base->getType()) {
ActualRelocatedBase =
- cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+ Builder.CreateBitCast(RelocatedBase, Base->getType());
}
Value *Replacement = Builder.CreateGEP(
Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
- Instruction *ReplacementInst = cast<Instruction>(Replacement);
Replacement->takeName(ToReplace);
// If the newly generated derived pointer's type does not match the original derived
// pointer's type, cast the new derived pointer to match it. Same reasoning as above.
- Instruction *ActualReplacement = ReplacementInst;
- if (ReplacementInst->getType() != ToReplace->getType()) {
+ Value *ActualReplacement = Replacement;
+ if (Replacement->getType() != ToReplace->getType()) {
ActualReplacement =
- cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+ Builder.CreateBitCast(Replacement, ToReplace->getType());
}
ToReplace->replaceAllUsesWith(ActualReplacement);
ToReplace->eraseFromParent();
@@ -723,6 +729,12 @@ static bool SinkCast(CastInst *CI) {
// Preincrement use iterator so we don't invalidate it.
++UI;
+ // If the block selected to receive the cast is an EH pad that does not
+ // allow non-PHI instructions before the terminator, we can't sink the
+ // cast.
+ if (UserBB->getTerminator()->isEHPad())
+ continue;
+
// If this user is in the same block as the cast, don't change the cast.
if (UserBB == DefBB) continue;
@@ -731,9 +743,9 @@ static bool SinkCast(CastInst *CI) {
if (!InsertedCast) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedCast =
- CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
- InsertPt);
+ assert(InsertPt != UserBB->end());
+ InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
+ CI->getType(), "", &*InsertPt);
}
// Replace a use of the cast with a use of the new cast.
@@ -751,10 +763,9 @@ static bool SinkCast(CastInst *CI) {
return MadeChange;
}
-/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
-/// sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coalesced.
+/// If the specified cast instruction is a noop copy (e.g. it's casting from
+/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
+/// reduce the number of virtual registers that must be created and coalesced.
///
/// Return true if any changes are made.
///
@@ -789,8 +800,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
-/// CombineUAddWithOverflow - try to combine CI into a call to the
-/// llvm.uadd.with.overflow intrinsic if possible.
+/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
+/// possible.
///
/// Return true if any changes were made.
static bool CombineUAddWithOverflow(CmpInst *CI) {
@@ -818,7 +829,7 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
assert(*AddI->user_begin() == CI && "expected!");
#endif
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
@@ -836,16 +847,16 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
return true;
}
-/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
-/// the number of virtual registers that must be created and coalesced. This is
-/// a clear win except on targets with multiple condition code registers
-/// (PowerPC), where it might lose; some adjustment may be wanted there.
+/// Sink the given CmpInst into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced. This is a clear win except on
+/// targets with multiple condition code registers (PowerPC), where it might
+/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
static bool SinkCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
- /// InsertedCmp - Only insert a cmp in each block once.
+ /// Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
@@ -872,10 +883,10 @@ static bool SinkCmpExpression(CmpInst *CI) {
if (!InsertedCmp) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
InsertedCmp =
- CmpInst::Create(CI->getOpcode(),
- CI->getPredicate(), CI->getOperand(0),
- CI->getOperand(1), "", InsertPt);
+ CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
+ CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
}
// Replace a use of the cmp with a use of the new cmp.
@@ -903,8 +914,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
return false;
}
-/// isExtractBitsCandidateUse - Check if the candidates could
-/// be combined with shift instruction, which includes:
+/// Check if the candidates could be combined with a shift instruction, which
+/// includes:
/// 1. Truncate instruction
/// 2. And instruction and the imm is a mask of the low bits:
/// imm & (imm+1) == 0
@@ -922,8 +933,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) {
return true;
}
-/// SinkShiftAndTruncate - sink both shift and truncate instruction
-/// to the use of truncate's BB.
+/// Sink both shift and truncate instruction to the use of truncate's BB.
static bool
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
@@ -970,20 +980,22 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
if (!InsertedShift && !InsertedTrunc) {
BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ assert(InsertPt != TruncUserBB->end());
// Sink the shift
if (ShiftI->getOpcode() == Instruction::AShr)
- InsertedShift =
- BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
else
- InsertedShift =
- BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
// Sink the trunc
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
TruncInsertPt++;
+ assert(TruncInsertPt != TruncUserBB->end());
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
- TruncI->getType(), "", TruncInsertPt);
+ TruncI->getType(), "", &*TruncInsertPt);
MadeChange = true;
@@ -993,10 +1005,10 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
return MadeChange;
}
-/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
-/// the uses could potentially be combined with this shift instruction and
-/// generate BitExtract instruction. It will only be applied if the architecture
-/// supports BitExtract instruction. Here is an example:
+/// Sink the shift *right* instruction into user blocks if the uses could
+/// potentially be combined with this shift instruction and generate BitExtract
+/// instruction. It will only be applied if the architecture supports BitExtract
+/// instruction. Here is an example:
/// BB1:
/// %x.extract.shift = lshr i64 %arg1, 32
/// BB2:
@@ -1067,13 +1079,14 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
if (!InsertedShift) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
if (ShiftI->getOpcode() == Instruction::AShr)
- InsertedShift =
- BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
else
- InsertedShift =
- BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
MadeChange = true;
}
@@ -1089,10 +1102,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
return MadeChange;
}
-// ScalarizeMaskedLoad() translates masked load intrinsic, like
+// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, whith loading element one-by-one if
+// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
// %1 = bitcast i8* %addr to i32*
@@ -1126,35 +1139,68 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
//
static void ScalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
- Value *Src0 = CI->getArgOperand(3);
+ Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
- Type *EltTy = VecType->getElementType();
+ Value *Src0 = CI->getArgOperand(3);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
assert(VecType && "Unexpected return type of masked load intrinsic");
+ Type *EltTy = CI->getType()->getVectorElementType();
+
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
BasicBlock *CondBlock = nullptr;
BasicBlock *PrevIfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
+ Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask = isa<Constant>(Mask) &&
+ cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
Value *UndefVal = UndefValue::get(VecType);
// The result vector
Value *VResult = UndefVal;
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
PHINode *Phi = nullptr;
Value *PrevPhi = UndefVal;
- unsigned VectorWidth = VecType->getNumElements();
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
@@ -1182,16 +1228,17 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// %Elt = load i32* %EltAddr
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst* Load = Builder.CreateLoad(Gep, false);
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
// Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -1208,7 +1255,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
CI->eraseFromParent();
}
-// ScalarizeMaskedStore() translates masked store intrinsic, like
+// Translate a masked store intrinsic, like
// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
// <16 x i1> %mask)
// to a chain of basic blocks, that stores element one-by-one if
@@ -1237,34 +1284,61 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// br label %else2
// . . .
static void ScalarizeMaskedStore(CallInst *CI) {
- Value *Ptr = CI->getArgOperand(1);
Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
Value *Mask = CI->getArgOperand(3);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
VectorType *VecType = dyn_cast<VectorType>(Src->getType());
- Type *EltTy = VecType->getElementType();
-
assert(VecType && "Unexpected data type in masked store intrinsic");
+ Type *EltTy = VecType->getElementType();
+
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask = isa<Constant>(Mask) &&
+ cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
// Bitcast %addr fron i8* to EltTy*
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
-
unsigned VectorWidth = VecType->getNumElements();
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// %to_store = icmp eq i1 %mask_1, true
- // br i1 %to_load, label %cond.store, label %else
+ // br i1 %to_store, label %cond.store, label %else
//
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
@@ -1276,13 +1350,146 @@ static void ScalarizeMaskedStore(CallInst *CI) {
// %EltAddr = getelementptr i32* %1, i32 0
// %store i32 %OneElt, i32* %EltAddr
//
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
Builder.SetInsertPoint(InsertPt);
-
+
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- Builder.CreateStore(OneElt, Gep);
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+// <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> %Mask, i32 0
+// % ToLoad0 = icmp eq i1 % Mask0, true
+// br i1 % ToLoad0, label %cond.load, label %else
+//
+// cond.load:
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// % Load0 = load i32, i32* % Ptr0, align 4
+// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// br label %else
+//
+// else:
+// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
+// % Mask1 = extractelement <16 x i1> %Mask, i32 1
+// % ToLoad1 = icmp eq i1 % Mask1, true
+// br i1 % ToLoad1, label %cond.load1, label %else2
+//
+// cond.load1:
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// % Load1 = load i32, i32* % Ptr1, align 4
+// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// br label %else2
+// . . .
+// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void ScalarizeMaskedGather(CallInst *CI) {
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+ "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %ToLoad1 = icmp eq i1 %Mask1, true
+ // br i1 %ToLoad1, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask,
+ Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToLoad" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+ "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -1290,12 +1497,204 @@ static void ScalarizeMaskedStore(CallInst *CI) {
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
}
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
}
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+// <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> % Mask, i32 0
+// % ToStore0 = icmp eq i1 % Mask0, true
+// br i1 %ToStore0, label %cond.store, label %else
+//
+// cond.store:
+// % Elt0 = extractelement <16 x i32> %Src, i32 0
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* % Ptr0, align 4
+// br label %else
+//
+// else:
+// % Mask1 = extractelement <16 x i1> % Mask, i32 1
+// % ToStore1 = icmp eq i1 % Mask1, true
+// br i1 % ToStore1, label %cond.store1, label %else2
+//
+// cond.store1:
+// % Elt1 = extractelement <16 x i32> %Src, i32 1
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 % Elt1, i32* % Ptr1, align 4
+// br label %else2
+// . . .
+static void ScalarizeMaskedScatter(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptrs = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ assert(isa<VectorType>(Src->getType()) &&
+ "Unexpected data type in masked scatter intrinsic");
+ assert(isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ unsigned VectorWidth = Src->getType()->getVectorNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
+ // % ToStore = icmp eq i1 % Mask1, true
+ // br i1 % ToStore, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask,
+ Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp =
+ Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToStore" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // % Elt1 = extractelement <16 x i32> %Src, i32 1
+ // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 % Elt1, i32* % Ptr1
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+/// If counting leading or trailing zeros is an expensive operation and a zero
+/// input is defined, add a check for zero to avoid calling the intrinsic.
+///
+/// We want to transform:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+///
+/// into:
+/// entry:
+/// %cmpz = icmp eq i64 %A, 0
+/// br i1 %cmpz, label %cond.end, label %cond.false
+/// cond.false:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+/// br label %cond.end
+/// cond.end:
+/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+///
+/// If the transform is performed, return true and set ModifiedDT to true.
+static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ const TargetLowering *TLI,
+ const DataLayout *DL,
+ bool &ModifiedDT) {
+ if (!TLI || !DL)
+ return false;
+
+ // If a zero input is undefined, it doesn't make sense to despeculate that.
+ if (match(CountZeros->getOperand(1), m_One()))
+ return false;
+
+ // If it's cheap to speculate, there's nothing to do.
+ auto IntrinsicID = CountZeros->getIntrinsicID();
+ if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
+ (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
+ return false;
+
+ // Only handle legal scalar cases. Anything else requires too much work.
+ Type *Ty = CountZeros->getType();
+ unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+ if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize())
+ return false;
+
+ // The intrinsic will be sunk behind a compare against zero and branch.
+ BasicBlock *StartBlock = CountZeros->getParent();
+ BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+
+ // Create another block after the count zero intrinsic. A PHI will be added
+ // in this block to select the result of the intrinsic or the bit-width
+ // constant if the input to the intrinsic is zero.
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+ BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+
+ // Set up a builder to create a compare, conditional branch, and PHI.
+ IRBuilder<> Builder(CountZeros->getContext());
+ Builder.SetInsertPoint(StartBlock->getTerminator());
+ Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
+
+ // Replace the unconditional branch that was created by the first split with
+ // a compare against zero and a conditional branch.
+ Value *Zero = Constant::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+ Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Create a PHI in the end block to select either the output of the intrinsic
+ // or the bit width of the operand.
+ Builder.SetInsertPoint(&EndBlock->front());
+ PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
+ CountZeros->replaceAllUsesWith(PN);
+ Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
+ PN->addIncoming(BitWidth, StartBlock);
+ PN->addIncoming(CountZeros, CallBlock);
+
+ // We are explicitly handling the zero case, so we can set the intrinsic's
+ // undefined zero argument to 'true'. This will also prevent reprocessing the
+ // intrinsic; we only despeculate when a zero input is defined.
+ CountZeros->setArgOperand(1, Builder.getTrue());
+ ModifiedDT = true;
+ return true;
+}
+
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
@@ -1311,7 +1710,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
// Sink address computing for memory operands into the block.
- if (OptimizeInlineAsmInst(CI))
+ if (optimizeInlineAsmInst(CI))
return true;
}
@@ -1372,14 +1771,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
- WeakVH IterHandle(CurInstIterator);
+ WeakVH IterHandle(&*CurInstIterator);
replaceAndRecursivelySimplify(CI, RetVal,
TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
- if (IterHandle != CurInstIterator) {
+ if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
CurInstIterator = BB->begin();
SunkAddrs.clear();
}
@@ -1387,7 +1786,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
- if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
+ if (!TTI->isLegalMaskedLoad(CI->getType())) {
ScalarizeMaskedLoad(CI);
ModifiedDT = true;
return true;
@@ -1395,13 +1794,29 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return false;
}
case Intrinsic::masked_store: {
- if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
ScalarizeMaskedStore(CI);
ModifiedDT = true;
return true;
}
return false;
}
+ case Intrinsic::masked_gather: {
+ if (!TTI->isLegalMaskedGather(CI->getType())) {
+ ScalarizeMaskedGather(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_scatter: {
+ if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
+ ScalarizeMaskedScatter(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
@@ -1415,6 +1830,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
InsertedInsts.insert(ExtVal);
return true;
}
+ case Intrinsic::invariant_group_barrier:
+ II->replaceAllUsesWith(II->getArgOperand(0));
+ II->eraseFromParent();
+ return true;
+
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ // If counting zeros is expensive, try to avoid it.
+ return despeculateCountZeros(II, TLI, DL, ModifiedDT);
}
if (TLI) {
@@ -1426,7 +1850,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
Type *AccessTy;
if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
return true;
}
}
@@ -1447,9 +1871,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return false;
}
-/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
-/// instructions to the predecessor to enable tail call optimizations. The
-/// case it is currently looking for is:
+/// Look for opportunities to duplicate return instructions to the predecessor
+/// to enable tail call optimizations. The case it is currently looking for is:
/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
@@ -1478,7 +1901,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!TLI)
return false;
@@ -1597,7 +2020,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
namespace {
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// This is an extended version of TargetLowering::AddrMode
/// which holds actual Value*'s for register values.
struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg;
@@ -1709,10 +2132,10 @@ class TypePromotionTransaction {
public:
/// \brief Record the position of \p Inst.
InsertionHandler(Instruction *Inst) {
- BasicBlock::iterator It = Inst;
+ BasicBlock::iterator It = Inst->getIterator();
HasPrevInstruction = (It != (Inst->getParent()->begin()));
if (HasPrevInstruction)
- Point.PrevInst = --It;
+ Point.PrevInst = &*--It;
else
Point.BB = Inst->getParent();
}
@@ -1724,7 +2147,7 @@ class TypePromotionTransaction {
Inst->removeFromParent();
Inst->insertAfter(Point.PrevInst);
} else {
- Instruction *Position = Point.BB->getFirstInsertionPt();
+ Instruction *Position = &*Point.BB->getFirstInsertionPt();
if (Inst->getParent())
Inst->moveBefore(Position);
else
@@ -1797,7 +2220,7 @@ class TypePromotionTransaction {
Value *Val = Inst->getOperand(It);
OriginalValues.push_back(Val);
// Set a dummy one.
- // We could use OperandSetter here, but that would implied an overhead
+ // We could use OperandSetter here, but that would imply an overhead
// that we are not willing to pay.
Inst->setOperand(It, UndefValue::get(Val->getType()));
}
@@ -2111,7 +2534,7 @@ class AddressingModeMatcher {
unsigned AddrSpace;
Instruction *MemoryInst;
- /// AddrMode - This is the addressing mode that we're building up. This is
+ /// This is the addressing mode that we're building up. This is
/// part of the return value of this addressing mode matching stuff.
ExtAddrMode &AddrMode;
@@ -2122,9 +2545,8 @@ class AddressingModeMatcher {
/// The ongoing transaction where every action should be registered.
TypePromotionTransaction &TPT;
- /// IgnoreProfitability - This is set to true when we should not do
- /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
- /// always returns true.
+ /// This is set to true when we should not do profitability checks.
+ /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
@@ -2143,7 +2565,7 @@ class AddressingModeMatcher {
}
public:
- /// Match - Find the maximal addressing mode that a load/store of V can fold,
+ /// Find the maximal addressing mode that a load/store of V can fold,
/// give an access type of AccessTy. This returns a list of involved
/// instructions in AddrModeInsts.
/// \p InsertedInsts The instructions inserted by other CodeGenPrepare
@@ -2161,32 +2583,32 @@ public:
bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT).MatchAddr(V, 0);
+ PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
private:
- bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
- bool MatchAddr(Value *V, unsigned Depth);
- bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+ bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool matchAddr(Value *V, unsigned Depth);
+ bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
bool *MovedAway = nullptr);
- bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+ bool isProfitableToFoldIntoAddressingMode(Instruction *I,
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
- bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
- bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
+ bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+ bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
Value *PromotedOperand) const;
};
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Try adding ScaleReg*Scale to the current addressing mode.
/// Return true and update AddrMode if this addr mode is legal for the target,
/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
unsigned Depth) {
// If Scale is 1, then this is the same as adding ScaleReg to the addressing
// mode. Just process that directly.
if (Scale == 1)
- return MatchAddr(ScaleReg, Depth);
+ return matchAddr(ScaleReg, Depth);
// If the scale is 0, it takes nothing to add this.
if (Scale == 0)
@@ -2233,9 +2655,9 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
return true;
}
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// This is a little filter, which returns true if an addressing computation
+/// involving I might be folded into a load/store accessing it.
+/// This doesn't need to be perfect, but needs to accept at least
/// the set of instructions that MatchOperationAddr can.
static bool MightBeFoldableInst(Instruction *I) {
switch (I->getOpcode()) {
@@ -2301,9 +2723,7 @@ class TypePromotionHelper {
/// \brief Utility function to determine if \p OpIdx should be promoted when
/// promoting \p Inst.
static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
- if (isa<SelectInst>(Inst) && OpIdx == 0)
- return false;
- return true;
+ return !(isa<SelectInst>(Inst) && OpIdx == 0);
}
/// \brief Utility function to promote the operand of \p Ext when this
@@ -2413,8 +2833,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
Value *OpndVal = Inst->getOperand(0);
// Check if we can use this operand in the extension.
- // If the type is larger than the result type of the extension,
- // we cannot.
+ // If the type is larger than the result type of the extension, we cannot.
if (!OpndVal->getType()->isIntegerTy() ||
OpndVal->getType()->getIntegerBitWidth() >
ConsideredExtType->getIntegerBitWidth())
@@ -2433,18 +2852,16 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// #1 get the type of the operand and check the kind of the extended bits.
const Type *OpndType;
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
- if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
- OpndType = It->second.Ty;
+ if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
+ OpndType = It->second.getPointer();
else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
OpndType = Opnd->getOperand(0)->getType();
else
return false;
- // #2 check that the truncate just drop extended bits.
- if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
- return true;
-
- return false;
+ // #2 check that the truncate just drops extended bits.
+ return Inst->getType()->getIntegerBitWidth() >=
+ OpndType->getIntegerBitWidth();
}
TypePromotionHelper::Action TypePromotionHelper::getAction(
@@ -2553,7 +2970,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
}
TPT.replaceAllUsesWith(ExtOpnd, Trunc);
- // Restore the operand of Ext (which has been replace by the previous call
+ // Restore the operand of Ext (which has been replaced by the previous call
// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
TPT.setOperand(Ext, 0, ExtOpnd);
}
@@ -2631,8 +3048,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
return ExtOpnd;
}
-/// IsPromotionProfitable - Check whether or not promoting an instruction
-/// to a wider type was profitable.
+/// Check whether or not promoting an instruction to a wider type is profitable.
/// \p NewCost gives the cost of extension instructions created by the
/// promotion.
/// \p OldCost gives the cost of extension instructions before the promotion
@@ -2640,7 +3056,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
/// matched in the addressing mode the promotion.
/// \p PromotedOperand is the value that has been promoted.
/// \return True if the promotion is profitable, false otherwise.
-bool AddressingModeMatcher::IsPromotionProfitable(
+bool AddressingModeMatcher::isPromotionProfitable(
unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
// The cost of the new extensions is greater than the cost of the
@@ -2656,9 +3072,9 @@ bool AddressingModeMatcher::IsPromotionProfitable(
return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
}
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode. If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
+/// Given an instruction or constant expr, see if we can fold the operation
+/// into the addressing mode. If so, update the addressing mode and return
+/// true, otherwise return false without modifying AddrMode.
/// If \p MovedAway is not NULL, it contains the information of whether or
/// not AddrInst has to be folded into the addressing mode on success.
/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
@@ -2667,7 +3083,7 @@ bool AddressingModeMatcher::IsPromotionProfitable(
/// This state can happen when AddrInst is a sext, since it may be moved away.
/// Therefore, AddrInst may not be valid when MovedAway is true and it must
/// not be referenced anymore.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned Depth,
bool *MovedAway) {
// Avoid exponential behavior on extremely deep expression trees.
@@ -2680,13 +3096,13 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
switch (Opcode) {
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
case Instruction::IntToPtr: {
auto AS = AddrInst->getType()->getPointerAddressSpace();
auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
// This inttoptr is a no-op if the integer type is pointer sized.
if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}
case Instruction::BitCast:
@@ -2698,14 +3114,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
// and we don't want to mess around with them. Assume it knows what it
// is doing.
AddrInst->getOperand(0)->getType() != AddrInst->getType())
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::AddrSpaceCast: {
unsigned SrcAS
= AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}
case Instruction::Add: {
@@ -2719,8 +3135,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
- MatchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
+ matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
// Restore the old addr mode info.
@@ -2729,8 +3145,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TPT.rollback(LastKnownGood);
// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
- MatchAddr(AddrInst->getOperand(1), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
+ matchAddr(AddrInst->getOperand(1), Depth+1))
return true;
// Otherwise we definitely can't merge the ADD in.
@@ -2752,7 +3168,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
- return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
case Instruction::GetElementPtr: {
// Scan the GEP. We check it if it contains constant offsets and at most
@@ -2791,7 +3207,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantOffset == 0 ||
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
}
AddrMode.BaseOffs -= ConstantOffset;
@@ -2806,7 +3222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
AddrMode.BaseOffs += ConstantOffset;
// Match the base operand of the GEP.
- if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
// If it couldn't be matched, just stuff the value in a register.
if (AddrMode.HasBaseReg) {
AddrMode = BackupAddrMode;
@@ -2818,7 +3234,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
}
// Match the remaining variable portion of the GEP.
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
Depth)) {
// If it couldn't be matched, try stuffing the base into a register
// instead of matching it, and retrying the match of the scale.
@@ -2829,7 +3245,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = AddrInst->getOperand(0);
AddrMode.BaseOffs += ConstantOffset;
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
VariableScale, Depth)) {
// If even that didn't work, bail.
AddrMode = BackupAddrMode;
@@ -2879,12 +3295,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
ExtAddrMode BackupAddrMode = AddrMode;
unsigned OldSize = AddrModeInsts.size();
- if (!MatchAddr(PromotedOperand, Depth) ||
- // The total of the new cost is equals to the cost of the created
+ if (!matchAddr(PromotedOperand, Depth) ||
+ // The total of the new cost is equal to the cost of the created
// instructions.
- // The total of the old cost is equals to the cost of the extension plus
+ // The total of the old cost is equal to the cost of the extension plus
// what we have saved in the addressing mode.
- !IsPromotionProfitable(CreatedInstsCost,
+ !isPromotionProfitable(CreatedInstsCost,
ExtCost + (AddrModeInsts.size() - OldSize),
PromotedOperand)) {
AddrMode = BackupAddrMode;
@@ -2899,12 +3315,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
return false;
}
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode. If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
+/// If we can, try to add the value of 'Addr' into the current addressing mode.
+/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
+/// unmodified. This assumes that Addr is either a pointer type or intptr_t
+/// for the target.
///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
// Start a transaction at this point that we will rollback if the matching
// fails.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
@@ -2929,8 +3345,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
// Check to see if it is possible to fold this operation.
bool MovedAway = false;
- if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
- // This instruction may have been move away. If so, there is nothing
+ if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+ // This instruction may have been moved away. If so, there is nothing
// to check here.
if (MovedAway)
return true;
@@ -2938,7 +3354,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
// *profitable* to do so. We use a simple cost model to avoid increasing
// register pressure too much.
if (I->hasOneUse() ||
- IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
AddrModeInsts.push_back(I);
return true;
}
@@ -2950,7 +3366,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
TPT.rollback(LastKnownGood);
}
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
- if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ if (matchOperationAddr(CE, CE->getOpcode(), Depth))
return true;
TPT.rollback(LastKnownGood);
} else if (isa<ConstantPointerNull>(Addr)) {
@@ -2983,9 +3399,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
return false;
}
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands. If so, return true, otherwise
-/// return false.
+/// Check to see if all uses of OpVal by the specified inline asm call are due
+/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetMachine &TM) {
const Function *F = CI->getParent()->getParent();
@@ -3011,8 +3426,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Recursively walk all the uses of I until we find a memory use.
+/// If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(
Instruction *I,
@@ -3059,11 +3474,11 @@ static bool FindAllMemoryUses(
return false;
}
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into. If so, there is no cost to
-/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+/// Return true if Val is already known to be live at the use site that we're
+/// folding it into. If so, there is no cost to include it in the addressing
+/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
+/// instruction already.
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
Value *KnownLive2) {
// If Val is either of the known-live values, we know it is live!
if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
@@ -3085,11 +3500,11 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it. However, the specified instruction has multiple
-/// uses. Given this, it may actually increase register pressure to fold it
-/// into the load. For example, consider this code:
+/// It is possible for the addressing mode of the machine to fold the specified
+/// instruction into a load or store that ultimately uses it.
+/// However, the specified instruction has multiple uses.
+/// Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
///
/// X = ...
/// Y = X+1
@@ -3107,7 +3522,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case. This would make Y die earlier.
bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
@@ -3124,9 +3539,9 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
- if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
BaseReg = nullptr;
- if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
ScaledReg = nullptr;
// If folding this instruction (and it's subexprs) didn't extend any live
@@ -3171,7 +3586,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
- bool Success = Matcher.MatchAddr(Address, 0);
+ bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
// The match was to check the profitability, the changes made are not
@@ -3192,7 +3607,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
} // end anonymous namespace
-/// IsNonLocalValue - Return true if the specified values are defined in a
+/// Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -3200,16 +3615,15 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
-/// OptimizeMemoryInst - Load and Store Instructions often have
-/// addressing modes that can do significant amounts of computation. As such,
-/// instruction selection will try to get the load or store to do as much
-/// computation as possible for the program. The problem is that isel can only
-/// see within a single block. As such, we sink as much legal addressing mode
-/// stuff into the block as possible.
+/// Load and Store Instructions often have addressing modes that can do
+/// significant amounts of computation. As such, instruction selection will try
+/// to get the load or store to do as much computation as possible for the
+/// program. The problem is that isel can only see within a single block. As
+/// such, we sink as much legal addressing mode work into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
-bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
@@ -3530,12 +3944,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Repl->use_empty()) {
// This can cause recursive deletion, which can invalidate our iterator.
// Use a WeakVH to hold onto it in case this happens.
- WeakVH IterHandle(CurInstIterator);
+ WeakVH IterHandle(&*CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
- if (IterHandle != CurInstIterator) {
+ if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
CurInstIterator = BB->begin();
@@ -3546,10 +3960,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return true;
}
-/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeMemoryInst to sink their address computing into the block when
-/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+/// If there are any memory operands, use OptimizeMemoryInst to sink their
+/// address computing into the block when possible / profitable.
+bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
const TargetRegisterInfo *TRI =
@@ -3566,7 +3979,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
+ MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
} else if (OpInfo.Type == InlineAsm::isInput)
ArgNo++;
}
@@ -3646,7 +4059,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
/// %add = add nuw i64 %zext, 4
/// \encode
/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInstsCost = 0) {
@@ -3696,7 +4109,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
}
// The promotion is profitable.
// Check if it exposes an ext(load).
- (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
@@ -3713,13 +4126,13 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
return false;
}
-/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
-/// basic block as the load, unless conditions are unfavorable. This allows
-/// SelectionDAG to fold the extend into the load.
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
/// \p I[in/out] the extension may be modified during the process if some
/// promotions apply.
///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
// Try to promote a chain of computation if it allows to form
// an extended load.
TypePromotionTransaction TPT;
@@ -3730,7 +4143,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
// Look for a load being extended.
LoadInst *LI = nullptr;
Instruction *OldExt = I;
- bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+ bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
if (!LI || !I) {
assert(!HasPromoted && !LI && "If we did not match any load instruction "
"the code must remain the same");
@@ -3780,7 +4193,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
return true;
}
-bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
// If the result of a {s|z}ext and its source are both live out, rewrite all
@@ -3838,7 +4251,8 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
if (!InsertedTrunc) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ assert(InsertPt != UserBB->end());
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
InsertedInsts.insert(InsertedTrunc);
}
@@ -3851,9 +4265,202 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
return MadeChange;
}
-/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
-/// turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+// Find loads whose uses only use some of the loaded value's bits. Add an "and"
+// just after the load if the target can fold this into one extload instruction,
+// with the hope of eliminating some of the other later "and" instructions using
+// the loaded value. "and"s that are made trivially redundant by the insertion
+// of the new "and" are removed by this function, while others (e.g. those whose
+// path from the load goes through a phi) are left for isel to potentially
+// remove.
+//
+// For example:
+//
+// b0:
+// x = load i32
+// ...
+// b1:
+// y = and x, 0xff
+// z = use y
+//
+// becomes:
+//
+// b0:
+// x = load i32
+// x' = and x, 0xff
+// ...
+// b1:
+// z = use x'
+//
+// whereas:
+//
+// b0:
+// x1 = load i32
+// ...
+// b1:
+// x2 = load i32
+// ...
+// b2:
+// x = phi x1, x2
+// y = and x, 0xff
+//
+// becomes (after a call to optimizeLoadExt for each load):
+//
+// b0:
+// x1 = load i32
+// x1' = and x1, 0xff
+// ...
+// b1:
+// x2 = load i32
+// x2' = and x2, 0xff
+// ...
+// b2:
+// x = phi x1', x2'
+// y = and x, 0xff
+//
+
+bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
+
+ if (!Load->isSimple() ||
+ !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
+ return false;
+
+ // Skip loads we've already transformed or have no reason to transform.
+ if (Load->hasOneUse()) {
+ User *LoadUser = *Load->user_begin();
+ if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
+ !dyn_cast<PHINode>(LoadUser))
+ return false;
+ }
+
+ // Look at all uses of Load, looking through phis, to determine how many bits
+ // of the loaded value are needed.
+ SmallVector<Instruction *, 8> WorkList;
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 8> AndsToMaybeRemove;
+ for (auto *U : Load->users())
+ WorkList.push_back(cast<Instruction>(U));
+
+ EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
+ unsigned BitWidth = LoadResultVT.getSizeInBits();
+ APInt DemandBits(BitWidth, 0);
+ APInt WidestAndBits(BitWidth, 0);
+
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ // Break use-def graph loops.
+ if (!Visited.insert(I).second)
+ continue;
+
+ // For a PHI node, push all of its users.
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ for (auto *U : Phi->users())
+ WorkList.push_back(cast<Instruction>(U));
+ continue;
+ }
+
+ switch (I->getOpcode()) {
+ case llvm::Instruction::And: {
+ auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!AndC)
+ return false;
+ APInt AndBits = AndC->getValue();
+ DemandBits |= AndBits;
+ // Keep track of the widest and mask we see.
+ if (AndBits.ugt(WidestAndBits))
+ WidestAndBits = AndBits;
+ if (AndBits == WidestAndBits && I->getOperand(0) == Load)
+ AndsToMaybeRemove.push_back(I);
+ break;
+ }
+
+ case llvm::Instruction::Shl: {
+ auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!ShlC)
+ return false;
+ uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
+ auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
+ DemandBits |= ShlDemandBits;
+ break;
+ }
+
+ case llvm::Instruction::Trunc: {
+ EVT TruncVT = TLI->getValueType(*DL, I->getType());
+ unsigned TruncBitWidth = TruncVT.getSizeInBits();
+ auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
+ DemandBits |= TruncBits;
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ uint32_t ActiveBits = DemandBits.getActiveBits();
+ // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
+ // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
+ // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
+ // (and (load x) 1) is not matched as a single instruction, rather as a LDR
+ // followed by an AND.
+ // TODO: Look into removing this restriction by fixing backends to either
+ // return false for isLoadExtLegal for i1 or have them select this pattern to
+ // a single instruction.
+ //
+ // Also avoid hoisting if we didn't see any ands with the exact DemandBits
+ // mask, since these are the only ands that will be removed by isel.
+ if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ WidestAndBits != DemandBits)
+ return false;
+
+ LLVMContext &Ctx = Load->getType()->getContext();
+ Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
+ EVT TruncVT = TLI->getValueType(*DL, TruncTy);
+
+ // Reject cases that won't be matched as extloads.
+ if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
+ !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
+ return false;
+
+ IRBuilder<> Builder(Load->getNextNode());
+ auto *NewAnd = dyn_cast<Instruction>(
+ Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+
+ // Replace all uses of load with new and (except for the use of load in the
+ // new and itself).
+ Load->replaceAllUsesWith(NewAnd);
+ NewAnd->setOperand(0, Load);
+
+ // Remove any and instructions that are now redundant.
+ for (auto *And : AndsToMaybeRemove)
+ // Check that the and mask is the same as the one we decided to put on the
+ // new and.
+ if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
+ And->replaceAllUsesWith(NewAnd);
+ if (&*CurInstIterator == And)
+ CurInstIterator = std::next(And->getIterator());
+ And->eraseFromParent();
+ ++NumAndUses;
+ }
+
+ ++NumAndsAdded;
+ return true;
+}
+
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // If it's safe to speculatively execute, then it should not have side
+ // effects; therefore, it's safe to sink and possibly *not* execute.
+ return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
+ TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
+}
+
+/// Returns true if a SelectInst should be turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ SelectInst *SI) {
// FIXME: This should use the same heuristics as IfConversion to determine
// whether a select is better represented as a branch. This requires that
// branch probability metadata is preserved for the select, which is not the
@@ -3861,28 +4468,36 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
- // If the branch is predicted right, an out of order CPU can avoid blocking on
- // the compare. Emit cmovs on compares with a memory operand as branches to
- // avoid stalls on the load from memory. If the compare has more than one use
- // there's probably another cmov or setcc around so it's not worth emitting a
- // branch.
- if (!Cmp)
+ // If a branch is predictable, an out-of-order CPU can avoid blocking on its
+ // comparison condition. If the compare has more than one use, there's
+ // probably another cmov or setcc around, so it's not worth emitting a branch.
+ if (!Cmp || !Cmp->hasOneUse())
return false;
Value *CmpOp0 = Cmp->getOperand(0);
Value *CmpOp1 = Cmp->getOperand(1);
- // We check that the memory operand has one use to avoid uses of the loaded
- // value directly after the compare, making branches unprofitable.
- return Cmp->hasOneUse() &&
- ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+ // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
+ // on a load from memory. But if the load is used more than once, do not
+ // change the select to a branch because the load is probably needed
+ // regardless of whether the branch is taken or not.
+ if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
+ return true;
+
+ // If either operand of the select is expensive and only needed on one side
+ // of the select, we should form a branch.
+ if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+ sinkSelectOperand(TTI, SI->getFalseValue()))
+ return true;
+
+ return false;
}
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
-bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
@@ -3902,34 +4517,97 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
// We have efficient codegen support for the select instruction.
// Check if it is profitable to keep this 'select'.
if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(SI))
+ !isFormingBranchFromSelectProfitable(TTI, SI))
return false;
}
ModifiedDT = true;
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // br i1 %cmp, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // In addition, we may sink instructions that produce %c or %d from
+ // the entry block into the destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
// First, we split the block containing the select into 2 blocks.
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
- BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- // Create a new block serving as the landing pad for the branch.
- BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
- NextBlock->getParent(), NextBlock);
-
- // Move the unconditional branch from the block with the select in it into our
- // landing pad block.
+ // Delete the unconditional branch that was just created by the split.
StartBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(NextBlock, SmallBlock);
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr;
+ BasicBlock *FalseBlock = nullptr;
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+ TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+ FalseInst->moveBefore(FalseBranch);
+ }
+
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ BranchInst::Create(EndBlock, FalseBlock);
+ }
// Insert the real conditional branch based on the original condition.
- BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ if (TrueBlock == nullptr) {
+ BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+ FalseBlock = StartBlock;
+ } else {
+ BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+ }
// The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
PN->takeName(SI);
- PN->addIncoming(SI->getTrueValue(), StartBlock);
- PN->addIncoming(SI->getFalseValue(), SmallBlock);
+ PN->addIncoming(SI->getTrueValue(), TrueBlock);
+ PN->addIncoming(SI->getFalseValue(), FalseBlock);
+
SI->replaceAllUsesWith(PN);
SI->eraseFromParent();
@@ -3955,7 +4633,7 @@ static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
/// it's often worth sinking a shufflevector splat down to its use so that
/// codegen can spot all lanes are identical.
-bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
BasicBlock *DefBB = SVI->getParent();
// Only do this xform if variable vector shifts are particularly expensive.
@@ -3987,9 +4665,10 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
if (!InsertedShuffle) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
- SVI->getOperand(1),
- SVI->getOperand(2), "", InsertPt);
+ assert(InsertPt != UserBB->end());
+ InsertedShuffle =
+ new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
+ SVI->getOperand(2), "", &*InsertPt);
}
UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -4005,6 +4684,49 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ if (!TLI || !DL)
+ return false;
+
+ Value *Cond = SI->getCondition();
+ Type *OldType = Cond->getType();
+ LLVMContext &Context = Cond->getContext();
+ MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+ unsigned RegWidth = RegType.getSizeInBits();
+
+ if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
+ return false;
+
+ // If the register width is greater than the type width, expand the condition
+ // of the switch instruction and each case constant to the width of the
+ // register. By widening the type of the switch condition, subsequent
+ // comparisons (for case comparisons) will not need to be extended to the
+ // preferred register width, so we will potentially eliminate N-1 extends,
+ // where N is the number of cases in the switch.
+ auto *NewType = Type::getIntNTy(Context, RegWidth);
+
+ // Zero-extend the switch condition and case constants unless the switch
+ // condition is a function argument that is already being sign-extended.
+ // In that case, we can avoid an unnecessary mask/extension by sign-extending
+ // everything instead.
+ Instruction::CastOps ExtType = Instruction::ZExt;
+ if (auto *Arg = dyn_cast<Argument>(Cond))
+ if (Arg->hasSExtAttr())
+ ExtType = Instruction::SExt;
+
+ auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
+ ExtInst->insertBefore(SI);
+ SI->setCondition(ExtInst);
+ for (SwitchInst::CaseIt Case : SI->cases()) {
+ APInt NarrowConst = Case.getCaseValue()->getValue();
+ APInt WideConst = (ExtType == Instruction::ZExt) ?
+ NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
+ Case.setValue(ConstantInt::get(Context, WideConst));
+ }
+
+ return true;
+}
+
namespace {
/// \brief Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
@@ -4138,7 +4860,7 @@ class VectorPromoteHelper {
/// \brief Generate a constant vector with \p Val with the same
/// number of elements as the transition.
/// \p UseSplat defines whether or not \p Val should be replicated
- /// accross the whole vector.
+ /// across the whole vector.
/// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
/// otherwise we generate a vector with as many undef as possible:
/// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
@@ -4320,7 +5042,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
/// Some targets can do store(extractelement) with one instruction.
/// Try to push the extractelement towards the stores when the target
/// has this feature and this is profitable.
-bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
+bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
unsigned CombineCost = UINT_MAX;
if (DisableStoreExtract || !TLI ||
(!StressStoreExtract &&
@@ -4372,7 +5094,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
return false;
}
-bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
if (InsertedInsts.count(I))
@@ -4413,8 +5135,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = MoveExtToFormExtLoad(I);
- return MadeChange | OptimizeExtUses(I);
+ bool MadeChange = moveExtToFormExtLoad(I);
+ return MadeChange | optimizeExtUses(I);
}
}
return false;
@@ -4425,17 +5147,21 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ stripInvariantGroupMetadata(*LI);
if (TLI) {
+ bool Modified = optimizeLoadExt(LI);
unsigned AS = LI->getPointerAddressSpace();
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ return Modified;
}
return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ stripInvariantGroupMetadata(*SI);
if (TLI) {
unsigned AS = SI->getPointerAddressSpace();
- return OptimizeMemoryInst(I, SI->getOperand(1),
+ return optimizeMemoryInst(I, SI->getOperand(1),
SI->getOperand(0)->getType(), AS);
}
return false;
@@ -4460,23 +5186,26 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
++NumGEPsElim;
- OptimizeInst(NC, ModifiedDT);
+ optimizeInst(NC, ModifiedDT);
return true;
}
return false;
}
if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI, ModifiedDT);
+ return optimizeCallInst(CI, ModifiedDT);
if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return OptimizeSelectInst(SI);
+ return optimizeSelectInst(SI);
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
- return OptimizeShuffleVectorInst(SVI);
+ return optimizeShuffleVectorInst(SVI);
+
+ if (auto *Switch = dyn_cast<SwitchInst>(I))
+ return optimizeSwitchInst(Switch);
if (isa<ExtractElementInst>(I))
- return OptimizeExtractElementInst(I);
+ return optimizeExtractElementInst(I);
return false;
}
@@ -4484,17 +5213,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
CurInstIterator = BB.begin();
while (CurInstIterator != BB.end()) {
- MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
+ MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
if (ModifiedDT)
return true;
}
- MadeChange |= DupRetToEnableTailCallOpts(&BB);
+ MadeChange |= dupRetToEnableTailCallOpts(&BB);
return MadeChange;
}
@@ -4502,12 +5231,12 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
// llvm.dbg.value is far away from the value then iSel may not be able
// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
-bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+bool CodeGenPrepare::placeDbgValues(Function &F) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
Instruction *PrevNonDbgInst = nullptr;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- Instruction *Insn = BI++;
+ Instruction *Insn = &*BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
// Leave dbg.values that refer to an alloca alone. These
// instrinsics describe the address of a variable (= the alloca)
@@ -4521,10 +5250,14 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) {
Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
DVI->removeFromParent();
if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
else
DVI->insertAfter(VI);
MadeChange = true;
@@ -4548,7 +5281,7 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
return false;
bool MadeChange = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
// Does this BB end with the following?
// %andVal = and %val, #single-bit-set
@@ -4671,6 +5404,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
continue;
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+ continue;
+
unsigned Opc;
Value *Cond1, *Cond2;
if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
@@ -4697,7 +5434,6 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
- auto *Br1 = cast<BranchInst>(BB.getTerminator());
Br1->setCondition(Cond1);
LogicOp->eraseFromParent();
@@ -4828,3 +5564,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
return MadeChange;
}
+
+void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
+ if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
+ I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
+}
diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
index 28c97ba..ff7c0d5 100644
--- a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
+++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
@@ -38,9 +38,9 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
- Optional<bool> isGCManagedPointer(const Value *V) const override {
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
- PointerType *PT = cast<PointerType>(V->getType());
+ const PointerType *PT = cast<PointerType>(Ty);
// We pick addrspace(1) as our GC managed heap.
return (1 == PT->getAddressSpace());
}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index dba280f..c924ba3 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -52,14 +52,13 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// Clear "do not change" set.
KeepRegs.reset();
- bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
+ bool IsReturnBlock = BB->isReturnBlock();
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ for (const auto &LI : (*SI)->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BBSize;
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 0a188c0..af6b6a3 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -31,10 +31,39 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+namespace {
+ DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+ return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+ }
+
+ /// Return the DFAInput for an instruction class input vector.
+ /// This function is used in both DFAPacketizer.cpp and in
+ /// DFAPacketizerEmitter.cpp.
+ DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+ DFAInput InsnInput = 0;
+ assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+ "Exceeded maximum number of DFA terms");
+ for (auto U : InsnClass)
+ InsnInput = addDFAFuncUnits(InsnInput, U);
+ return InsnInput;
+ }
+}
+// --------------------------------------------------------------------
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
+ const DFAStateInput (*SIT)[2],
const unsigned *SET):
InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
- DFAStateEntryTable(SET) {}
+ DFAStateEntryTable(SET) {
+ // Make sure DFA types are large enough for the number of terms & resources.
+ assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput))
+ && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+ assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput))
+ && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
+}
//
@@ -60,26 +89,42 @@ void DFAPacketizer::ReadTable(unsigned int state) {
DFAStateInputTable[i][1];
}
+//
+// getInsnInput - Return the DFAInput for an instruction class.
+//
+DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
+ // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
+ DFAInput InsnInput = 0;
+ unsigned i = 0;
+ for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
+ *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) {
+ InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
+ assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
+ }
+ return InsnInput;
+}
+
+// getInsnInput - Return the DFAInput for an instruction class input vector.
+DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
+ return getDFAInsnInput(InsnClass);
+}
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
- const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
- unsigned FuncUnits = IS->getUnits();
- UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ DFAInput InsnInput = getInsnInput(InsnClass);
+ UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
return (CachedTable.count(StateTrans) != 0);
}
-
// reserveResources - Reserve the resources occupied by a MCInstrDesc and
// change the current state to reflect that change.
void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
- const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
- unsigned FuncUnits = IS->getUnits();
- UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ DFAInput InsnInput = getInsnInput(InsnClass);
+ UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
assert(CachedTable.count(StateTrans) != 0);
CurrentState = CachedTable[StateTrans];
@@ -104,32 +149,35 @@ namespace llvm {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+private:
+ AliasAnalysis *AA;
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- bool IsPostRA);
+ AliasAnalysis *AA);
// Schedule - Actual scheduling work.
void schedule() override;
};
}
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
- MachineLoopInfo &MLI, bool IsPostRA)
- : ScheduleDAGInstrs(MF, &MLI, IsPostRA) {
+ MachineLoopInfo &MLI,
+ AliasAnalysis *AA)
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
CanHandleTerminators = true;
}
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
- buildSchedGraph(nullptr);
+ buildSchedGraph(AA);
}
// VLIWPacketizerList Ctor
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
- MachineLoopInfo &MLI, bool IsPostRA)
- : MF(MF) {
+ MachineLoopInfo &MLI, AliasAnalysis *AA)
+ : MF(MF), AA(AA) {
TII = MF.getSubtarget().getInstrInfo();
ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA);
}
// VLIWPacketizerList Dtor
@@ -147,7 +195,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineInstr *MI) {
if (CurrentPacketMIs.size() > 1) {
MachineInstr *MIFirst = CurrentPacketMIs.front();
- finalizeBundle(*MBB, MIFirst, MI);
+ finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator());
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
@@ -191,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
// Ask DFA if machine resource is available for MI.
bool ResourceAvail = ResourceTracker->canReserveResources(MI);
- if (ResourceAvail) {
+ if (ResourceAvail && shouldAddToPacket(MI)) {
// Dependency check for MI with instructions in CurrentPacketMIs.
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
@@ -210,7 +258,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
} // !isLegalToPacketizeTogether.
} // For all instructions in CurrentPacketMIs.
} else {
- // End the packet if resource is not available.
+ // End the packet if resource is not available, or if the instruction
+ // shoud not be added to the current packet.
endPacket(MBB, MI);
}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 941129b..b11b497 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -101,26 +101,22 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
- for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
- I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
-
+ for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
// Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
- for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
- E = MBB->succ_end(); S != E; S++)
- for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
- LI != (*S)->livein_end(); LI++)
- LivePhysRegs.set(*LI);
+ for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+ E = MBB.succ_end(); S != E; S++)
+ for (const auto &LI : (*S)->liveins())
+ LivePhysRegs.set(LI.PhysReg);
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
- MIE = MBB->rend(); MII != MIE; ) {
+ for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
+ MIE = MBB.rend(); MII != MIE; ) {
MachineInstr *MI = &*MII;
// If the instruction is dead, delete it!
@@ -132,7 +128,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
MI->eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
++NumDeletes;
- MIE = MBB->rend();
+ MIE = MBB.rend();
// MII is now pointing to the next instruction to process,
// so don't increment it.
continue;
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index e019dfb..eae78a9 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -192,9 +192,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
if (Resumes.empty())
return false;
- // Check the personality, don't do anything if it's for MSVC.
+ // Check the personality, don't do anything if it's funclet-based.
EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
- if (isMSVCEHPersonality(Pers))
+ if (isFuncletEHPersonality(Pers))
return false;
LLVMContext &Ctx = Fn.getContext();
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index fbc4d97..f3536d7 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -538,11 +538,11 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// Fix up the CFG, temporarily leave Head without any successors.
Head->removeSuccessor(TBB);
- Head->removeSuccessor(FBB);
+ Head->removeSuccessor(FBB, true);
if (TBB != Tail)
- TBB->removeSuccessor(Tail);
+ TBB->removeSuccessor(Tail, true);
if (FBB != Tail)
- FBB->removeSuccessor(Tail);
+ FBB->removeSuccessor(Tail, true);
// Fix up Head's terminators.
// It should become a single branch or a fallthrough.
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 5b09cf1..c550008 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -375,9 +375,8 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// This is the entry block.
if (MBB->pred_empty()) {
- for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
- e = MBB->livein_end(); i != e; ++i) {
- for (int rx : regIndices(*i)) {
+ for (const auto &LI : MBB->liveins()) {
+ for (int rx : regIndices(LI.PhysReg)) {
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
@@ -559,12 +558,11 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
MachineInstr *UndefMI = UndefReads.back().first;
unsigned OpIdx = UndefReads.back().second;
- for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend();
- I != E; ++I) {
+ for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
// Update liveness, including the current instruction's defs.
- LiveRegSet.stepBackward(*I);
+ LiveRegSet.stepBackward(I);
- if (UndefMI == &*I) {
+ if (UndefMI == &I) {
if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
TII->breakPartialRegDependency(UndefMI, OpIdx, TRI);
@@ -733,12 +731,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
- for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
- I != E; ++I)
- if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ const MachineRegisterInfo &MRI = mf.getRegInfo();
+ for (unsigned Reg : *RC) {
+ if (MRI.isPhysRegUsed(Reg)) {
anyregs = true;
break;
}
+ }
if (!anyregs) return false;
// Initialize the AliasMap on the first use.
@@ -752,7 +751,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
AliasMap[*AI].push_back(i);
}
- MachineBasicBlock *Entry = MF->begin();
+ MachineBasicBlock *Entry = &*MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
SmallVector<MachineBasicBlock*, 16> Loops;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
@@ -761,22 +760,19 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
enterBasicBlock(MBB);
if (SeenUnknownBackEdge)
Loops.push_back(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I)
- visitInstr(I);
+ for (MachineInstr &MI : *MBB)
+ visitInstr(&MI);
processUndefReads(MBB);
leaveBasicBlock(MBB);
}
// Visit all the loop blocks again in order to merge DomainValues from
// back-edges.
- for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Loops[i];
+ for (MachineBasicBlock *MBB : Loops) {
enterBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I)
- if (!I->isDebugValue())
- processDefs(I, false);
+ for (MachineInstr &MI : *MBB)
+ if (!MI.isDebugValue())
+ processDefs(&MI, false);
processUndefReads(MBB);
leaveBasicBlock(MBB);
}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
index 55e809e..90ddac9 100644
--- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -50,7 +50,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
// Iterate through each instruction in the function, looking for pseudos.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE; ) {
MachineInstr *MI = MBBI++;
@@ -63,7 +63,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
// The expansion may involve new basic blocks.
if (NewMBB != MBB) {
MBB = NewMBB;
- I = NewMBB;
+ I = NewMBB->getIterator();
MBBI = NewMBB->begin();
MBBE = NewMBB->end();
}
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
new file mode 100644
index 0000000..8b2f505
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -0,0 +1,55 @@
+//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations which result in
+// funclets being contiguous.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "funclet-layout"
+
+namespace {
+class FuncletLayout : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ FuncletLayout() : MachineFunctionPass(ID) {
+ initializeFuncletLayoutPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+char FuncletLayout::ID = 0;
+char &llvm::FuncletLayoutID = FuncletLayout::ID;
+INITIALIZE_PASS(FuncletLayout, "funclet-layout",
+ "Contiguously Lay Out Funclets", false, false)
+
+bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership =
+ getFuncletMembership(F);
+ if (FuncletMembership.empty())
+ return false;
+
+ F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto FuncletX = FuncletMembership.find(&X);
+ auto FuncletY = FuncletMembership.find(&Y);
+ assert(FuncletX != FuncletMembership.end());
+ assert(FuncletY != FuncletMembership.end());
+ return FuncletX->second < FuncletY->second;
+ });
+
+ // Conservatively assume we changed something.
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index d8edd7e..484d317 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -158,7 +158,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
// Search for initializers in the initial BB.
SmallPtrSet<AllocaInst *, 16> InitedRoots;
- for (; !CouldBecomeSafePoint(IP); ++IP)
+ for (; !CouldBecomeSafePoint(&*IP); ++IP)
if (StoreInst *SI = dyn_cast<StoreInst>(IP))
if (AllocaInst *AI =
dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
@@ -320,7 +320,9 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
RI = FI->removeStackRoot(RI);
} else {
- RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ unsigned FrameReg; // FIXME: surely GCRoot ought to store the
+ // register that the offset is from?
+ RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
++RI;
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 6f9e839..dd9a840 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -108,10 +108,9 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
// FIXME: this could be a transitional option, and we probably need to remove
// it if only we are sure this optimization could always benefit all targets.
-static cl::opt<bool>
+static cl::opt<cl::boolOrDefault>
EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
- cl::desc("Enable global merge pass on external linkage"),
- cl::init(false));
+ cl::desc("Enable global merge pass on external linkage"));
STATISTIC(NumMerged, "Number of globals merged");
namespace {
@@ -129,11 +128,14 @@ namespace {
/// FIXME: This could learn about optsize, and be used in the cost model.
bool OnlyOptimizeForSize;
+ /// Whether we should merge global variables that have external linkage.
+ bool MergeExternalGlobals;
+
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
/// \brief Merge everything in \p Globals for which the corresponding bit
/// in \p GlobalSet is set.
- bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+ bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
const BitVector &GlobalSet, Module &M, bool isConst,
unsigned AddrSpace) const;
@@ -158,9 +160,11 @@ namespace {
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetMachine *TM = nullptr,
unsigned MaximalOffset = 0,
- bool OnlyOptimizeForSize = false)
+ bool OnlyOptimizeForSize = false,
+ bool MergeExternalGlobals = false)
: FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
- OnlyOptimizeForSize(OnlyOptimizeForSize) {
+ OnlyOptimizeForSize(OnlyOptimizeForSize),
+ MergeExternalGlobals(MergeExternalGlobals) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -189,14 +193,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
auto &DL = M.getDataLayout();
// FIXME: Find better heuristics
- std::stable_sort(
- Globals.begin(), Globals.end(),
- [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
- return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2));
- });
+ std::stable_sort(Globals.begin(), Globals.end(),
+ [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ return DL.getTypeAllocSize(GV1->getValueType()) <
+ DL.getTypeAllocSize(GV2->getValueType());
+ });
// If we want to just blindly group all globals together, do so.
if (!GlobalMergeGroupByUse) {
@@ -207,7 +208,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// If we want to be smarter, look at all uses of each global, to try to
// discover all sets of globals used together, and how many times each of
- // these sets occured.
+ // these sets occurred.
//
// Keep this reasonably efficient, by having an append-only list of all sets
// discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
@@ -302,8 +303,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Function *ParentFn = I->getParent()->getParent();
// If we're only optimizing for size, ignore non-minsize functions.
- if (OnlyOptimizeForSize &&
- !ParentFn->hasFnAttribute(Attribute::MinSize))
+ if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
continue;
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
@@ -406,15 +406,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return Changed;
}
-bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
const BitVector &GlobalSet, Module &M, bool isConst,
unsigned AddrSpace) const {
+ assert(Globals.size() > 1);
Type *Int32Ty = Type::getInt32Ty(M.getContext());
auto &DL = M.getDataLayout();
- assert(Globals.size() > 1);
-
DEBUG(dbgs() << " Trying to merge set, starts with #"
<< GlobalSet.find_first() << "\n");
@@ -425,58 +424,44 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
std::vector<Type*> Tys;
std::vector<Constant*> Inits;
- bool HasExternal = false;
- GlobalVariable *TheFirstExternal = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
- Type *Ty = Globals[j]->getType()->getElementType();
+ Type *Ty = Globals[j]->getValueType();
MergedSize += DL.getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
break;
}
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
-
- if (Globals[j]->hasExternalLinkage() && !HasExternal) {
- HasExternal = true;
- TheFirstExternal = Globals[j];
- }
}
- // If merged variables doesn't have external linkage, we needn't to expose
- // the symbol after merging.
- GlobalValue::LinkageTypes Linkage = HasExternal
- ? GlobalValue::ExternalLinkage
- : GlobalValue::InternalLinkage;
-
StructType *MergedTy = StructType::get(M.getContext(), Tys);
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- // If merged variables have external linkage, we use symbol name of the
- // first variable merged as the suffix of global symbol name. This would
- // be able to avoid the link-time naming conflict for globalm symbols.
GlobalVariable *MergedGV = new GlobalVariable(
- M, MergedTy, isConst, Linkage, MergedInit,
- HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName()
- : "_MergedGlobals",
- nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+ M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
+ "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
- for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) {
+ for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
std::string Name = Globals[k]->getName();
Constant *Idx[2] = {
ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, idx++)
+ ConstantInt::get(Int32Ty, idx),
};
Constant *GEP =
ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
- if (Linkage != GlobalValue::InternalLinkage) {
- // Generate a new alias...
- auto *PTy = cast<PointerType>(GEP->getType());
- GlobalAlias::create(PTy, Linkage, Name, GEP, &M);
+ // When the linkage is not internal we must emit an alias for the original
+ // variable name as it may be accessed from another object. On non-Mach-O
+ // we can also emit an alias for internal linkage as it's safe to do so.
+ // It's not safe on Mach-O as the alias (and thus the portion of the
+ // MergedGlobals variable) may be dead stripped at link time.
+ if (Linkage != GlobalValue::InternalLinkage ||
+ !TM->getTargetTriple().isOSBinFormatMachO()) {
+ GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M);
}
NumMerged++;
@@ -535,61 +520,57 @@ bool GlobalMerge::doInitialization(Module &M) {
setMustKeepGlobalVariables(M);
// Grab all non-const globals.
- for (Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
+ for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
- if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+ if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
continue;
- if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
- !I->hasInternalLinkage())
+ if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
+ !GV.hasInternalLinkage())
continue;
- PointerType *PT = dyn_cast<PointerType>(I->getType());
+ PointerType *PT = dyn_cast<PointerType>(GV.getType());
assert(PT && "Global variable is not a pointer!");
unsigned AddressSpace = PT->getAddressSpace();
// Ignore fancy-aligned globals for now.
- unsigned Alignment = DL.getPreferredAlignment(I);
- Type *Ty = I->getType()->getElementType();
+ unsigned Alignment = DL.getPreferredAlignment(&GV);
+ Type *Ty = GV.getValueType();
if (Alignment > DL.getABITypeAlignment(Ty))
continue;
// Ignore all 'special' globals.
- if (I->getName().startswith("llvm.") ||
- I->getName().startswith(".llvm."))
+ if (GV.getName().startswith("llvm.") ||
+ GV.getName().startswith(".llvm."))
continue;
// Ignore all "required" globals:
- if (isMustKeepGlobalVariable(I))
+ if (isMustKeepGlobalVariable(&GV))
continue;
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
- if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
- BSSGlobals[AddressSpace].push_back(I);
- else if (I->isConstant())
- ConstGlobals[AddressSpace].push_back(I);
+ if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
+ BSSGlobals[AddressSpace].push_back(&GV);
+ else if (GV.isConstant())
+ ConstGlobals[AddressSpace].push_back(&GV);
else
- Globals[AddressSpace].push_back(I);
+ Globals[AddressSpace].push_back(&GV);
}
}
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = Globals.begin(), E = Globals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, false, I->first);
+ for (auto &P : Globals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first);
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, false, I->first);
+ for (auto &P : BSSGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first);
if (EnableGlobalMergeOnConst)
- for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
- I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
- if (I->second.size() > 1)
- Changed |= doMerge(I->second, M, true, I->first);
+ for (auto &P : ConstGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, true, P.first);
return Changed;
}
@@ -604,6 +585,9 @@ bool GlobalMerge::doFinalization(Module &M) {
}
Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
- bool OnlyOptimizeForSize) {
- return new GlobalMerge(TM, Offset, OnlyOptimizeForSize);
+ bool OnlyOptimizeForSize,
+ bool MergeExternalByDefault) {
+ bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
+ MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
+ return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index ee0532b..c38c9d2 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
using namespace llvm;
@@ -190,10 +191,10 @@ namespace {
private:
bool ReverseBranchCondition(BBInfo &BBI);
bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
- const BranchProbability &Prediction) const;
+ BranchProbability Prediction) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
- const BranchProbability &Prediction) const;
+ BranchProbability Prediction) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;
void ScanInstructions(BBInfo &BBI);
@@ -218,7 +219,7 @@ namespace {
bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
unsigned Cycle, unsigned Extra,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
Prediction);
}
@@ -227,7 +228,7 @@ namespace {
unsigned TCycle, unsigned TExtra,
MachineBasicBlock &FBB,
unsigned FCycle, unsigned FExtra,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
return TCycle > 0 && FCycle > 0 &&
TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
Prediction);
@@ -462,11 +463,11 @@ bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
/// getNextBlock - Returns the next block in the function blocks ordering. If
/// it is the end, returns NULL.
static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
- MachineFunction::iterator I = BB;
+ MachineFunction::iterator I = BB->getIterator();
MachineFunction::iterator E = BB->getParent()->end();
if (++I == E)
return nullptr;
- return I;
+ return &*I;
}
/// ValidSimple - Returns true if the 'true' block (along with its
@@ -474,7 +475,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
/// number of instructions that the ifcvt would need to duplicate if performed
/// in Dups.
bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -501,7 +502,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
- const BranchProbability &Prediction) const {
+ BranchProbability Prediction) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -530,10 +531,10 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
- MachineFunction::iterator I = TrueBBI.BB;
+ MachineFunction::iterator I = TrueBBI.BB->getIterator();
if (++I == TrueBBI.BB->getParent()->end())
return false;
- TExit = I;
+ TExit = &*I;
}
return TExit && TExit == FalseBBI.BB;
}
@@ -948,10 +949,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
/// candidates.
void IfConverter::AnalyzeBlocks(MachineFunction &MF,
std::vector<IfcvtToken*> &Tokens) {
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *BB = I;
- AnalyzeBlock(BB, Tokens);
- }
+ for (auto &BB : MF)
+ AnalyzeBlock(&BB, Tokens);
// Sort to favor more complex ifcvt scheme.
std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
@@ -961,14 +960,14 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF,
/// that all the intervening blocks are empty (given BB can fall through to its
/// next block).
static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
- MachineFunction::iterator PI = BB;
+ MachineFunction::iterator PI = BB->getIterator();
MachineFunction::iterator I = std::next(PI);
- MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator TI = ToBB->getIterator();
MachineFunction::iterator E = BB->getParent()->end();
while (I != TI) {
// Check isSuccessor to avoid case where the next block is empty, but
// it's not a successor.
- if (I == E || !I->empty() || !PI->isSuccessor(I))
+ if (I == E || !I->empty() || !PI->isSuccessor(&*I))
return false;
PI = I++;
}
@@ -1114,7 +1113,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(CvtBBI->BB);
+ BBI.BB->removeSuccessor(CvtBBI->BB, true);
} else {
RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
@@ -1153,28 +1152,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
return true;
}
-/// Scale down weights to fit into uint32_t. NewTrue is the new weight
-/// for successor TrueBB, and NewFalse is the new weight for successor
-/// FalseBB.
-static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse,
- MachineBasicBlock *MBB,
- const MachineBasicBlock *TrueBB,
- const MachineBasicBlock *FalseBB,
- const MachineBranchProbabilityInfo *MBPI) {
- uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
- uint32_t Scale = (NewMax / UINT32_MAX) + 1;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI) {
- if (*SI == TrueBB)
- MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale));
- else if (*SI == FalseBB)
- MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale));
- else
- MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale);
- }
-}
-
/// IfConvertTriangle - If convert a triangle sub-CFG.
///
bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
@@ -1231,16 +1208,14 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
DontKill.clear();
bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
- uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
- uint32_t WeightScale = 0;
+ BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
if (HasEarlyExit) {
- // Get weights before modifying CvtBBI->BB and BBI.BB.
- CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
- CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB);
- BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB);
- BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
- SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
+ // Get probabilities before modifying CvtBBI->BB and BBI.BB.
+ CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
+ CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
+ BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
+ BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
}
if (CvtBBI->BB->pred_size() > 1) {
@@ -1251,7 +1226,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(CvtBBI->BB);
+ BBI.BB->removeSuccessor(CvtBBI->BB, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1268,22 +1243,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
llvm_unreachable("Unable to reverse branch condition!");
+
+ // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
+ // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
+ // Prob(BBI.BB, NextBBI->BB) +
+ // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+ // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
+ // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
+ auto NewTrueBB = getNextBlock(BBI.BB);
+ auto NewNext = BBNext + BBCvt * CvtNext;
+ auto NewTrueBBIter =
+ std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+ if (NewTrueBBIter != BBI.BB->succ_end())
+ BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
+
+ auto NewFalse = BBCvt * CvtFalse;
TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
- BBI.BB->addSuccessor(CvtBBI->FalseBB);
- // Update the edge weight for both CvtBBI->FalseBB and NextBBI.
- // New_Weight(BBI.BB, NextBBI->BB) =
- // Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) +
- // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB)
- // New_Weight(BBI.BB, CvtBBI->FalseBB) =
- // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB)
-
- uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale;
- uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale;
- // We need to scale down all weights of BBI.BB to fit uint32_t.
- // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to
- // the next block.
- ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB),
- CvtBBI->FalseBB, MBPI);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
}
// Merge in the 'false' block if the 'false' block has no other
@@ -1526,7 +1502,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
MergeBlocks(BBI, TailBBI);
TailBBI.IsDone = true;
} else {
- BBI.BB->addSuccessor(TailBB);
+ BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
InsertUncondBranch(BBI.BB, TailBB, TII);
BBI.HasFallThrough = false;
}
@@ -1536,7 +1512,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// which can happen here if TailBB is unanalyzable and is merged, so
// explicitly remove BBI1 and BBI2 as successors.
BBI.BB->removeSuccessor(BBI1->BB);
- BBI.BB->removeSuccessor(BBI2->BB);
+ BBI.BB->removeSuccessor(BBI2->BB, true);
RemoveExtraEdges(BBI);
// Update block info.
@@ -1686,25 +1662,94 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
ToBBI.BB->splice(ToBBI.BB->end(),
FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
- std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
- FromBBI.BB->succ_end());
+ // Force normalizing the successors' probabilities of ToBBI.BB to convert all
+ // unknown probabilities into known ones.
+ // FIXME: This usage is too tricky and in the future we would like to
+ // eliminate all unknown probabilities in MBB.
+ ToBBI.BB->normalizeSuccProbs();
+
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+ // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
+ // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+ auto To2FromProb = BranchProbability::getZero();
+ if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
+ To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
+ // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+ // edge probability being merged to other edges when this edge is removed
+ // later.
+ ToBBI.BB->setSuccProbability(
+ std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
+ BranchProbability::getZero());
+ }
- for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = Succs[i];
+ for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = FromSuccs[i];
// Fallthrough edge can't be transferred.
if (Succ == FallThrough)
continue;
+
+ auto NewProb = BranchProbability::getZero();
+ if (AddEdges) {
+ // Calculate the edge probability for the edge from ToBBI.BB to Succ,
+ // which is a portion of the edge probability from FromBBI.BB to Succ. The
+ // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+ // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+ NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+
+ // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
+ // only happens when if-converting a diamond CFG and FromBBI.BB is the
+ // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we
+ // could just use the probabilities on FromBBI.BB's out-edges when adding
+ // new successors.
+ if (!To2FromProb.isZero())
+ NewProb *= To2FromProb;
+ }
+
FromBBI.BB->removeSuccessor(Succ);
- if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
- ToBBI.BB->addSuccessor(Succ);
+
+ if (AddEdges) {
+ // If the edge from ToBBI.BB to Succ already exists, update the
+ // probability of this edge by adding NewProb to it. An example is shown
+ // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+ // don't have to set C as A's successor as it already is. We only need to
+ // update the edge probability on A->C. Note that B will not be
+ // immediately removed from A's successors. It is possible that B->D is
+ // not removed either if D is a fallthrough of B. Later the edge A->D
+ // (generated here) and B->D will be combined into one edge. To maintain
+ // correct edge probability of this combined edge, we need to set the edge
+ // probability of A->B to zero, which is already done above. The edge
+ // probability on A->D is calculated by scaling the original probability
+ // on A->B by the probability of B->D.
+ //
+ // Before ifcvt: After ifcvt (assume B->D is kept):
+ //
+ // A A
+ // /| /|\
+ // / B / B|
+ // | /| | ||
+ // |/ | | |/
+ // C D C D
+ //
+ if (ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->setSuccProbability(
+ std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+ MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
+ else
+ ToBBI.BB->addSuccessor(Succ, NewProb);
+ }
}
// Now FromBBI always falls through to the next block!
if (NBB && !FromBBI.BB->isSuccessor(NBB))
FromBBI.BB->addSuccessor(NBB);
+ // Normalize the probabilities of ToBBI.BB's successors with all adjustment
+ // we've done above.
+ ToBBI.BB->normalizeSuccProbs();
+
ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 93e0487..39c1b9f 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -107,6 +108,98 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
+
+/// \brief Detect re-ordering hazards and dependencies.
+///
+/// This class keeps track of defs and uses, and can be queried if a given
+/// machine instruction can be re-ordered from after the machine instructions
+/// seen so far to before them.
+class HazardDetector {
+ DenseSet<unsigned> RegDefs;
+ DenseSet<unsigned> RegUses;
+ const TargetRegisterInfo &TRI;
+ bool hasSeenClobber;
+
+public:
+ explicit HazardDetector(const TargetRegisterInfo &TRI) :
+ TRI(TRI), hasSeenClobber(false) {}
+
+ /// \brief Make a note of \p MI for later queries to isSafeToHoist.
+ ///
+ /// May clobber this HazardDetector instance. \see isClobbered.
+ void rememberInstruction(MachineInstr *MI);
+
+ /// \brief Return true if it is safe to hoist \p MI from after all the
+ /// instructions seen so far (via rememberInstruction) to before it.
+ bool isSafeToHoist(MachineInstr *MI);
+
+ /// \brief Return true if this instance of HazardDetector has been clobbered
+ /// (i.e. has no more useful information).
+ ///
+ /// A HazardDetecter is clobbered when it sees a construct it cannot
+ /// understand, and it would have to return a conservative answer for all
+ /// future queries. Having a separate clobbered state lets the client code
+ /// bail early, without making queries about all of the future instructions
+ /// (which would have returned the most conservative answer anyway).
+ ///
+ /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
+ /// is an error.
+ bool isClobbered() { return hasSeenClobber; }
+};
+}
+
+
+void HazardDetector::rememberInstruction(MachineInstr *MI) {
+ assert(!isClobbered() &&
+ "Don't add instructions to a clobbered hazard detector");
+
+ if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
+ hasSeenClobber = true;
+ return;
+ }
+
+ for (auto *MMO : MI->memoperands()) {
+ // Right now we don't want to worry about LLVM's memory model.
+ if (!MMO->isUnordered()) {
+ hasSeenClobber = true;
+ return;
+ }
+ }
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ if (MO.isDef())
+ RegDefs.insert(MO.getReg());
+ else
+ RegUses.insert(MO.getReg());
+ }
+}
+
+bool HazardDetector::isSafeToHoist(MachineInstr *MI) {
+ assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
+
+ // Right now we don't want to worry about LLVM's memory model. This can be
+ // made more precise later.
+ for (auto *MMO : MI->memoperands())
+ if (!MMO->isUnordered())
+ return false;
+
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg()) {
+ for (unsigned Reg : RegDefs)
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-write or read-after-write
+
+ if (MO.isDef())
+ for (unsigned Reg : RegUses)
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-read
+ }
+ }
+
+ return true;
}
bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
@@ -132,10 +225,10 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
- MDNode *BranchMD =
- MBB.getBasicBlock()
- ? MBB.getBasicBlock()->getTerminator()->getMetadata("make.implicit")
- : nullptr;
+ MDNode *BranchMD = nullptr;
+ if (auto *BB = MBB.getBasicBlock())
+ BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
+
if (!BranchMD)
return false;
@@ -188,7 +281,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
//
// we want to end up with
//
- // Def = TrappingLoad (%RAX + <offset>), LblNull
+ // Def = FaultingLoad (%RAX + <offset>), LblNull
// jmp LblNotNull ;; explicit or fallthrough
//
// LblNotNull:
@@ -199,38 +292,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// LblNull:
// callq throw_NullPointerException
//
+ //
+ // To see why this is legal, consider the two possibilities:
+ //
+ // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the
+ // load instruction dereferences the null page, causing a segmentation
+ // fault.
+ //
+ // 2. %RAX is not null: in this case we know that the load cannot fault, as
+ // otherwise the load would've faulted in the original program too and the
+ // original program would've been undefined.
+ //
+ // This reasoning cannot be extended to justify hoisting through arbitrary
+ // control flow. For instance, in the example below (in pseudo-C)
+ //
+ // if (ptr == null) { throw_npe(); unreachable; }
+ // if (some_cond) { return 42; }
+ // v = ptr->field; // LD
+ // ...
+ //
+ // we cannot (without code duplication) use the load marked "LD" to null check
+ // ptr -- clause (2) above does not apply in this case. In the above program
+ // the safety of ptr->field can be dependent on some_cond; and, for instance,
+ // ptr could be some non-null invalid reference that never gets loaded from
+ // because some_cond is always true.
unsigned PointerReg = MBP.LHS.getReg();
- // As we scan NotNullSucc for a suitable load instruction, we keep track of
- // the registers defined and used by the instructions we scan past. This bit
- // of information lets us decide if it is legal to hoist the load instruction
- // we find (if we do find such an instruction) to before NotNullSucc.
- DenseSet<unsigned> RegDefs, RegUses;
-
- // Returns true if it is safe to reorder MI to before NotNullSucc.
- auto IsSafeToHoist = [&](MachineInstr *MI) {
- // Right now we don't want to worry about LLVM's memory model. This can be
- // made more precise later.
- for (auto *MMO : MI->memoperands())
- if (!MMO->isUnordered())
- return false;
-
- for (auto &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg()) {
- for (unsigned Reg : RegDefs)
- if (TRI->regsOverlap(Reg, MO.getReg()))
- return false; // We found a write-after-write or read-after-write
-
- if (MO.isDef())
- for (unsigned Reg : RegUses)
- if (TRI->regsOverlap(Reg, MO.getReg()))
- return false; // We found a write-after-read
- }
- }
-
- return true;
- };
+ HazardDetector HD(*TRI);
for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
++MII) {
@@ -238,37 +327,16 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
unsigned BaseReg, Offset;
if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
- Offset < PageSize && MI->getDesc().getNumDefs() == 1 &&
- IsSafeToHoist(MI)) {
+ Offset < PageSize && MI->getDesc().getNumDefs() <= 1 &&
+ HD.isSafeToHoist(MI)) {
NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc);
return true;
}
- // MI did not match our criteria for conversion to a trapping load. Check
- // if we can continue looking.
-
- if (MI->mayStore() || MI->hasUnmodeledSideEffects())
+ HD.rememberInstruction(MI);
+ if (HD.isClobbered())
return false;
-
- for (auto *MMO : MI->memoperands())
- // Right now we don't want to worry about LLVM's memory model.
- if (!MMO->isUnordered())
- return false;
-
- // It _may_ be okay to reorder a later load instruction across MI. Make a
- // note of its operands so that we can make the legality check if we find a
- // suitable load instruction:
-
- for (auto &MO : MI->operands()) {
- if (!MO.isReg() || !MO.getReg())
- continue;
-
- if (MO.isDef())
- RegDefs.insert(MO.getReg());
- else
- RegUses.insert(MO.getReg());
- }
}
return false;
@@ -281,14 +349,19 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
MachineBasicBlock *MBB,
MCSymbol *HandlerLabel) {
+ const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
+ // all targets.
+
DebugLoc DL;
unsigned NumDefs = LoadMI->getDesc().getNumDefs();
- assert(NumDefs == 1 && "other cases unhandled!");
- (void)NumDefs;
+ assert(NumDefs <= 1 && "other cases unhandled!");
- unsigned DefReg = LoadMI->defs().begin()->getReg();
- assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
- "expected exactly one def!");
+ unsigned DefReg = NoRegister;
+ if (NumDefs != 0) {
+ DefReg = LoadMI->defs().begin()->getReg();
+ assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+ "expected exactly one def!");
+ }
auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
.addSym(HandlerLabel)
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 9989f23..e310132 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -141,7 +141,7 @@ public:
InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
: MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AliasAnalysis>()),
+ AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
@@ -329,8 +329,8 @@ static raw_ostream &operator<<(raw_ostream &OS,
if (SVI.KillsSource)
OS << " kill";
OS << " deps[";
- for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
- OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+ for (VNInfo *Dep : SVI.Deps)
+ OS << ' ' << Dep->id << '@' << Dep->def;
OS << " ]";
if (SVI.DefMI)
OS << " def: " << *SVI.DefMI;
@@ -383,9 +383,8 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
unsigned SpillDepth = ~0u;
- for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
- DepE = Deps->end(); DepI != DepE; ++DepI) {
- SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+ for (VNInfo *Dep : *Deps) {
+ SibValueMap::iterator DepSVI = SibValues.find(Dep);
assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
SibValueInfo &DepSV = DepSVI->second;
if (!DepSV.SpillMBB)
@@ -566,12 +565,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
// Create entries for all the PHIs. Don't add them to the worklist, we
// are processing all of them in one go here.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+ for (VNInfo *PHI : PHIs)
+ SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
// Add every PHI as a dependent of all the non-PHIs.
- for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
- VNInfo *NonPHI = NonPHIs[i];
+ for (VNInfo *NonPHI : NonPHIs) {
// Known value? Try an insertion.
std::tie(SVI, Inserted) =
SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
@@ -654,8 +652,7 @@ void InlineSpiller::analyzeSiblingValues() {
return;
LiveInterval &OrigLI = LIS.getInterval(Original);
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
VE = LI.vni_end(); VI != VE; ++VI) {
@@ -831,9 +828,8 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
if (VNI->isPHIDef()) {
MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
+ for (MachineBasicBlock *P : MBB->predecessors()) {
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P));
if (PVNI)
WorkList.push_back(std::make_pair(LI, PVNI));
}
@@ -920,8 +916,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
<< *LIS.getInstructionFromIndex(DefIdx));
// Replace operands
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
MO.setReg(NewVReg);
MO.setIsKill();
@@ -944,8 +940,7 @@ void InlineSpiller::reMaterializeAll() {
// Try to remat before all uses of snippets.
bool anyRemat = false;
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (MachineRegisterInfo::reg_bundle_iterator
RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
@@ -963,8 +958,7 @@ void InlineSpiller::reMaterializeAll() {
return;
// Remove any values that were completely rematted.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
@@ -989,8 +983,7 @@ void InlineSpiller::reMaterializeAll() {
// Get rid of deleted and empty intervals.
unsigned ResultPos = 0;
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
- unsigned Reg = RegsToSpill[i];
+ for (unsigned Reg : RegsToSpill) {
if (!LIS.hasInterval(Reg))
continue;
@@ -1098,9 +1091,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- unsigned Idx = Ops[i].second;
- assert(MI == Ops[i].first && "Instruction conflict during operand folding");
+ for (const auto &OpPair : Ops) {
+ unsigned Idx = OpPair.second;
+ assert(MI == OpPair.first && "Instruction conflict during operand folding");
MachineOperand &MO = MI->getOperand(Idx);
if (MO.isImplicit()) {
ImpReg = MO.getReg();
@@ -1139,7 +1132,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
continue;
MIBundleOperands::PhysRegInfo RI =
MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
- if (RI.Defines)
+ if (RI.FullyDefined)
continue;
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
@@ -1152,10 +1145,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
// Insert any new instructions other than FoldMI into the LIS maps.
assert(!MIS.empty() && "Unexpected empty span of instructions!");
- for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end();
- MII != End; ++MII)
- if (&*MII != FoldMI)
- LIS.InsertMachineInstrInMaps(&*MII);
+ for (MachineInstr &MI : MIS)
+ if (&MI != FoldMI)
+ LIS.InsertMachineInstrInMaps(&MI);
// TII.foldMemoryOperand may have left some implicit operands on the
// instruction. Strip them.
@@ -1301,11 +1293,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Rewrite instruction operands.
bool hasLiveDef = false;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
MO.setReg(NewVReg);
if (MO.isUse()) {
- if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
+ if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
MO.setIsKill();
} else {
if (!MO.isDead())
@@ -1335,14 +1327,14 @@ void InlineSpiller::spillAll() {
VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]),
+ for (unsigned Reg : RegsToSpill)
+ StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
StackInt->getValNumInfo(0));
DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
// Spill around uses of all RegsToSpill.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- spillAroundUses(RegsToSpill[i]);
+ for (unsigned Reg : RegsToSpill)
+ spillAroundUses(Reg);
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
@@ -1351,9 +1343,9 @@ void InlineSpiller::spillAll() {
}
// Finally delete the SnippetCopies.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ for (unsigned Reg : RegsToSpill) {
for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end();
+ RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
RI != E; ) {
MachineInstr *MI = &*(RI++);
assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
@@ -1364,8 +1356,8 @@ void InlineSpiller::spillAll() {
}
// Delete all spilled registers.
- for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- Edit->eraseVirtReg(RegsToSpill[i]);
+ for (unsigned Reg : RegsToSpill)
+ Edit->eraseVirtReg(Reg);
}
void InlineSpiller::spill(LiveRangeEdit &edit) {
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
index fd5749b..f8cc247 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -144,7 +144,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
PrevPos = Start;
}
- MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+ MachineFunction::const_iterator MFI =
+ MF->getBlockNumbered(MBBNum)->getIterator();
BlockInterference *BI = &Blocks[MBBNum];
ArrayRef<SlotIndex> RegMaskSlots;
ArrayRef<const uint32_t*> RegMaskBits;
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 53c8adc..724f1d6 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -52,7 +52,7 @@ using namespace llvm;
static cl::opt<bool> LowerInterleavedAccesses(
"lower-interleaved-accesses",
cl::desc("Enable lowering interleaved accesses to intrinsics"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static unsigned MaxFactor; // The maximum supported interleave factor.
@@ -271,7 +271,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
SmallVector<Instruction *, 32> DeadInsts;
bool Changed = false;
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I))
Changed |= lowerInterleavedLoad(LI, DeadInsts);
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 2c95e9e..2962f87 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -35,24 +35,24 @@ static void EnsureFunctionExists(Module &M, const char *Name,
M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
}
-static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
const char *FName,
const char *DName, const char *LDName) {
// Insert definitions for all the floating point types.
- switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ switch((int)Fn.arg_begin()->getType()->getTypeID()) {
case Type::FloatTyID:
- EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
Type::getFloatTy(M.getContext()));
break;
case Type::DoubleTyID:
- EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
Type::getDoubleTy(M.getContext()));
break;
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
- EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
- Fn->arg_begin()->getType());
+ EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
+ Fn.arg_begin()->getType());
break;
}
}
@@ -67,7 +67,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
Type *RetTy) {
// If we haven't already looked up this function, check to see if the
// program already contains a function with this name.
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
// Get or insert the definition now.
std::vector<Type *> ParamTys;
for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
@@ -75,7 +75,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
Constant* FCache = M->getOrInsertFunction(NewFn,
FunctionType::get(RetTy, ParamTys, false));
- IRBuilder<> Builder(CI->getParent(), CI);
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
CallInst *NewCI = Builder.CreateCall(FCache, Args);
NewCI->setName(CI->getName());
@@ -94,20 +94,20 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
void IntrinsicLowering::AddPrototypes(Module &M) {
LLVMContext &Context = M.getContext();
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->isDeclaration() && !I->use_empty())
- switch (I->getIntrinsicID()) {
+ for (auto &F : M)
+ if (F.isDeclaration() && !F.use_empty())
+ switch (F.getIntrinsicID()) {
default: break;
case Intrinsic::setjmp:
- EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
Type::getInt32Ty(M.getContext()));
break;
case Intrinsic::longjmp:
- EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
Type::getVoidTy(M.getContext()));
break;
case Intrinsic::siglongjmp:
- EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
Type::getVoidTy(M.getContext()));
break;
case Intrinsic::memcpy:
@@ -132,31 +132,31 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
DL.getIntPtrType(Context), nullptr);
break;
case Intrinsic::sqrt:
- EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
break;
case Intrinsic::sin:
- EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
break;
case Intrinsic::cos:
- EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
break;
case Intrinsic::pow:
- EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
break;
case Intrinsic::log:
- EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
break;
case Intrinsic::log2:
- EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
break;
case Intrinsic::log10:
- EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
break;
case Intrinsic::exp:
- EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
break;
case Intrinsic::exp2:
- EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
break;
}
}
@@ -167,8 +167,8 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
-
- IRBuilder<> Builder(IP->getParent(), IP);
+
+ IRBuilder<> Builder(IP);
switch(BitSize) {
default: llvm_unreachable("Unhandled type size of value to byteswap!");
@@ -268,7 +268,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
};
- IRBuilder<> Builder(IP->getParent(), IP);
+ IRBuilder<> Builder(IP);
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
unsigned WordSize = (BitSize + 63) / 64;
@@ -301,7 +301,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
/// instruction IP.
static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
- IRBuilder<> Builder(IP->getParent(), IP);
+ IRBuilder<> Builder(IP);
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
for (unsigned i = 1; i < BitSize; i <<= 1) {
@@ -338,7 +338,7 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
}
void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
- IRBuilder<> Builder(CI->getParent(), CI);
+ IRBuilder<> Builder(CI);
LLVMContext &Context = CI->getContext();
const Function *Callee = CI->getCalledFunction();
@@ -424,6 +424,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
+ case Intrinsic::get_dynamic_area_offset:
+ errs() << "WARNING: this target does not support the custom llvm.get."
+ "dynamic.area.offset. It is being lowered to a constant 0\n";
+ // Just lower it to a constant 0 because for most targets
+ // @llvm.get.dynamic.area.offset is lowered to zero.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
+ break;
case Intrinsic::returnaddress:
case Intrinsic::frameaddress:
errs() << "WARNING: this target does not support the llvm."
@@ -589,7 +596,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
return false;
// Okay, we can do this xform, do so now.
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
Value *Op = CI->getArgOperand(0);
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 37299eb..1c27377 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -82,7 +82,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
}
TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(BasicTTIImpl(this, F));
});
}
@@ -125,9 +125,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
PM.add(new MachineFunctionAnalysis(*TM, MFInitializer));
// Enable FastISel with -fast, but allow that to be overridden.
+ TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
if (EnableFastISelOption == cl::BOU_TRUE ||
(TM->getOptLevel() == CodeGenOpt::None &&
- EnableFastISelOption != cl::BOU_FALSE))
+ TM->getO0WantsFastISel()))
TM->setFastISel(true);
// Ask the target for an isel.
@@ -202,6 +203,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
break;
}
@@ -254,6 +256,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
new file mode 100644
index 0000000..98d30b9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -0,0 +1,405 @@
+//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass implements a data flow analysis that propagates debug location
+/// information by inserting additional DBG_VALUE instructions into the machine
+/// instruction stream. The pass internally builds debug location liveness
+/// ranges to determine the points where additional DBG_VALUEs need to be
+/// inserted.
+///
+/// This is a separate pass from DbgValueHistoryCalculator to facilitate
+/// testing and improve modularity.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <deque>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "live-debug-values"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+
+namespace {
+
+class LiveDebugValues : public MachineFunctionPass {
+
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ typedef std::pair<const DILocalVariable *, const DILocation *>
+ InlinedVariable;
+
+ /// A potentially inlined instance of a variable.
+ struct DebugVariable {
+ const DILocalVariable *Var;
+ const DILocation *InlinedAt;
+
+ DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt)
+ : Var(_var), InlinedAt(_inlinedAt) {}
+
+ bool operator==(const DebugVariable &DV) const {
+ return (Var == DV.Var) && (InlinedAt == DV.InlinedAt);
+ }
+ };
+
+ /// Member variables and functions for Range Extension across basic blocks.
+ struct VarLoc {
+ DebugVariable Var;
+ const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr.
+
+ VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {}
+
+ bool operator==(const VarLoc &V) const;
+ };
+
+ typedef std::list<VarLoc> VarLocList;
+ typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
+
+ bool OLChanged; // OutgoingLocs got changed for this bb.
+ bool MBBJoined; // The MBB was joined.
+
+ void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
+ void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
+ void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
+ VarLocInMBB &OutLocs);
+ void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+
+ void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+
+ bool ExtendRanges(MachineFunction &MF);
+
+public:
+ static char ID;
+
+ /// Default construct and initialize the pass.
+ LiveDebugValues();
+
+ /// Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Print to ostream with a message.
+ void printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+ raw_ostream &Out) const;
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char LiveDebugValues::ID = 0;
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis",
+ false, false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+ initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+// \brief If @MI is a DBG_VALUE with debug value described by a defined
+// register, returns the number of this register. In the other case, returns 0.
+static unsigned isDescribedByReg(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ assert(MI.getNumOperands() == 4);
+ // If location of variable is described using a register (directly or
+ // indirecltly), this register is always a first operand.
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
+// \brief This function takes two DBG_VALUE instructions and returns true
+// if their offsets are equal; otherwise returns false.
+static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) {
+ assert(MI1.isDebugValue());
+ assert(MI1.getNumOperands() == 4);
+
+ assert(MI2.isDebugValue());
+ assert(MI2.getNumOperands() == 4);
+
+ if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue())
+ return true;
+
+ // Check if both MIs are indirect and they are equal.
+ if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue())
+ return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm();
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+ raw_ostream &Out) const {
+ Out << "Printing " << msg << ":\n";
+ for (const auto &L : V) {
+ Out << "MBB: " << L.first->getName() << ":\n";
+ for (const auto &VLL : L.second) {
+ Out << " Var: " << VLL.Var.Var->getName();
+ Out << " MI: ";
+ (*VLL.MI).dump();
+ Out << "\n";
+ }
+ }
+ Out << "\n";
+}
+
+bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const {
+ return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) &&
+ (areOffsetsEqual(*MI, *V.MI));
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+void LiveDebugValues::transferDebugValue(MachineInstr &MI,
+ VarLocList &OpenRanges) {
+ if (!MI.isDebugValue())
+ return;
+ const DILocalVariable *RawVar = MI.getDebugVariable();
+ assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(
+ std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](const VarLoc &V) { return (Var == V.Var); }),
+ OpenRanges.end());
+
+ // Add Var to OpenRanges from this DBG_VALUE.
+ // TODO: Currently handles DBG_VALUE which has only reg as location.
+ if (isDescribedByReg(MI)) {
+ VarLoc V(Var, &MI);
+ OpenRanges.push_back(std::move(V));
+ }
+}
+
+/// A definition of a register may mark the end of a range.
+void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
+ VarLocList &OpenRanges) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!(MO.isReg() && MO.isDef() && MO.getReg() &&
+ TRI->isPhysicalRegister(MO.getReg())))
+ continue;
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](const VarLoc &V) {
+ return (*RAI ==
+ isDescribedByReg(*V.MI));
+ }),
+ OpenRanges.end());
+ }
+}
+
+/// Terminate all open ranges at the end of the current basic block.
+void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+ VarLocList &OpenRanges,
+ VarLocInMBB &OutLocs) {
+ const MachineBasicBlock *CurMBB = MI.getParent();
+ if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
+ return;
+
+ if (OpenRanges.empty())
+ return;
+
+ if (OutLocs.find(CurMBB) == OutLocs.end()) {
+ // Create space for new Outgoing locs entries.
+ VarLocList VLL;
+ OutLocs.insert(std::make_pair(CurMBB, std::move(VLL)));
+ }
+ auto OL = OutLocs.find(CurMBB);
+ assert(OL != OutLocs.end());
+ VarLocList &VLL = OL->second;
+
+ for (auto OR : OpenRanges) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ assert(OR.MI->isDebugValue());
+ DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump(););
+ if (std::find_if(VLL.begin(), VLL.end(),
+ [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
+ VLL.push_back(std::move(OR));
+ OLChanged = true;
+ }
+ }
+ OpenRanges.clear();
+}
+
+/// This routine creates OpenRanges and OutLocs.
+void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
+ VarLocInMBB &OutLocs) {
+ transferDebugValue(MI, OpenRanges);
+ transferRegisterDef(MI, OpenRanges);
+ transferTerminatorInst(MI, OpenRanges, OutLocs);
+}
+
+/// This routine joins the analysis results of all incoming edges in @MBB by
+/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
+/// source variable in all the predecessors of @MBB reside in the same location.
+void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+ VarLocInMBB &InLocs) {
+ DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+
+ MBBJoined = false;
+
+ VarLocList InLocsT; // Temporary incoming locations.
+
+ // For all predecessors of this MBB, find the set of VarLocs that can be
+ // joined.
+ for (auto p : MBB.predecessors()) {
+ auto OL = OutLocs.find(p);
+ // Join is null in case of empty OutLocs from any of the pred.
+ if (OL == OutLocs.end())
+ return;
+
+ // Just copy over the Out locs to incoming locs for the first predecessor.
+ if (p == *MBB.pred_begin()) {
+ InLocsT = OL->second;
+ continue;
+ }
+
+ // Join with this predecessor.
+ VarLocList &VLL = OL->second;
+ InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(),
+ [&](VarLoc &ILT) {
+ return (std::find_if(VLL.begin(), VLL.end(),
+ [&](const VarLoc &V) {
+ return (ILT == V);
+ }) == VLL.end());
+ }),
+ InLocsT.end());
+ }
+
+ if (InLocsT.empty())
+ return;
+
+ if (InLocs.find(&MBB) == InLocs.end()) {
+ // Create space for new Incoming locs entries.
+ VarLocList VLL;
+ InLocs.insert(std::make_pair(&MBB, std::move(VLL)));
+ }
+ auto IL = InLocs.find(&MBB);
+ assert(IL != InLocs.end());
+ VarLocList &ILL = IL->second;
+
+ // Insert DBG_VALUE instructions, if not already inserted.
+ for (auto ILT : InLocsT) {
+ if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) {
+ return (ILT == I);
+ }) == ILL.end()) {
+ // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
+ // new range is started for the var from the mbb's beginning by inserting
+ // a new DBG_VALUE. transfer() will end this range however appropriate.
+ const MachineInstr *DMI = ILT.MI;
+ MachineInstr *MI =
+ BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
+ DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ if (DMI->isIndirectDebugValue())
+ MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ ++NumInserted;
+ MBBJoined = true; // rerun transfer().
+
+ VarLoc V(ILT.Var, MI);
+ ILL.push_back(std::move(V));
+ }
+ }
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "\nDebug Range Extension\n");
+
+ bool Changed = false;
+ OLChanged = MBBJoined = false;
+
+ VarLocList OpenRanges; // Ranges that are open until end of bb.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+
+ std::deque<MachineBasicBlock *> BBWorklist;
+
+ // Initialize every mbb with OutLocs.
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ transfer(MI, OpenRanges, OutLocs);
+ DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
+
+ // Construct a worklist of MBBs.
+ for (auto &MBB : MF)
+ BBWorklist.push_back(&MBB);
+
+ // Perform join() and transfer() using the worklist until the ranges converge
+ // Ranges have converged when the worklist is empty.
+ while (!BBWorklist.empty()) {
+ MachineBasicBlock *MBB = BBWorklist.front();
+ BBWorklist.pop_front();
+
+ join(*MBB, OutLocs, InLocs);
+
+ if (MBBJoined) {
+ Changed = true;
+ for (auto &MI : *MBB)
+ transfer(MI, OpenRanges, OutLocs);
+ DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
+ DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+
+ if (OLChanged) {
+ OLChanged = false;
+ for (auto s : MBB->successors())
+ if (std::find(BBWorklist.begin(), BBWorklist.end(), s) ==
+ BBWorklist.end()) // add if not already present.
+ BBWorklist.push_back(s);
+ }
+ }
+ }
+ DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
+ DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
+ return Changed;
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Changed = false;
+
+ Changed |= ExtendRanges(MF);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 1571551..6dac7db 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -91,9 +91,7 @@ public:
bool dominates(MachineBasicBlock *MBB) {
if (LBlocks.empty())
LS.getMachineBasicBlocks(DL, LBlocks);
- if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
- return true;
- return false;
+ return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
}
};
} // end anonymous namespace
@@ -512,7 +510,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
bool Changed = false;
for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
++MFI) {
- MachineBasicBlock *MBB = MFI;
+ MachineBasicBlock *MBB = &*MFI;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE;) {
if (!MBBI->isDebugValue()) {
@@ -536,65 +534,49 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
return Changed;
}
-void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
- LiveRange *LR, const VNInfo *VNI,
- SmallVectorImpl<SlotIndex> *Kills,
+/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+/// data-flow analysis to propagate them beyond basic block boundaries.
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
+ const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS) {
- SmallVector<SlotIndex, 16> Todo;
- Todo.push_back(Idx);
- do {
- SlotIndex Start = Todo.pop_back_val();
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
- SlotIndex Stop = LIS.getMBBEndIdx(MBB);
- LocMap::iterator I = locInts.find(Start);
-
- // Limit to VNI's live range.
- bool ToEnd = true;
- if (LR && VNI) {
- LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
- if (!Segment || Segment->valno != VNI) {
- if (Kills)
- Kills->push_back(Start);
- continue;
- }
- if (Segment->end < Stop)
- Stop = Segment->end, ToEnd = false;
- }
-
- // There could already be a short def at Start.
- if (I.valid() && I.start() <= Start) {
- // Stop when meeting a different location or an already extended interval.
- Start = Start.getNextSlot();
- if (I.value() != LocNo || I.stop() != Start)
- continue;
- // This is a one-slot placeholder. Just skip it.
- ++I;
+ SlotIndex Start = Idx;
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LR && VNI) {
+ LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
+ if (!Segment || Segment->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ return;
}
+ if (Segment->end < Stop)
+ Stop = Segment->end, ToEnd = false;
+ }
- // Limited by the next def.
- if (I.valid() && I.start() < Stop)
- Stop = I.start(), ToEnd = false;
- // Limited by VNI's live range.
- else if (!ToEnd && Kills)
- Kills->push_back(Stop);
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ return;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
- if (Start >= Stop)
- continue;
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+ if (Start < Stop)
I.insert(Start, Stop, LocNo);
-
- // If we extended to the MBB end, propagate down the dominator tree.
- if (!ToEnd)
- continue;
- const std::vector<MachineDomTreeNode*> &Children =
- MDT.getNode(MBB)->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Children[i]->getBlock();
- if (UVS.dominates(MBB))
- Todo.push_back(LIS.getMBBStartIdx(MBB));
- }
- } while (!Todo.empty());
}
void
@@ -763,7 +745,7 @@ static void removeDebugValues(MachineFunction &mf) {
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
- if (!FunctionDIs.count(mf.getFunction())) {
+ if (!mf.getFunction()->getSubprogram()) {
removeDebugValues(mf);
return false;
}
@@ -1004,11 +986,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
SlotIndex Stop = I.stop();
unsigned LocNo = I.value();
DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
- MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
- SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
- insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
while(Stop > MBBEnd) {
@@ -1016,9 +998,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
Start = MBBEnd;
if (++MBB == MFEnd)
break;
- MBBEnd = LIS.getMBBEndIdx(MBB);
+ MBBEnd = LIS.getMBBEndIdx(&*MBB);
DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
- insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
}
DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
@@ -1047,7 +1029,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
}
bool LiveDebugVariables::doInitialization(Module &M) {
- FunctionDIs = makeSubprogramMap(M);
return Pass::doInitialization(M);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index 694aa17..3d36f4d 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -33,7 +33,6 @@ class VirtRegMap;
class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
void *pImpl;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index d75e441..efad36f 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
@@ -865,7 +864,7 @@ void LiveInterval::constructMainRangeFromSubranges(
// - If any of the subranges is live at a point the main liverange has to be
// live too, conversily if no subrange is live the main range mustn't be
// live either.
- // We do this by scannig through all the subranges simultaneously creating new
+ // We do this by scanning through all the subranges simultaneously creating new
// segments in the main range as segments start/ends come up in the subranges.
assert(hasSubRanges() && "expected subranges to be present");
assert(segments.empty() && valnos.empty() && "expected empty main range");
@@ -889,7 +888,7 @@ void LiveInterval::constructMainRangeFromSubranges(
Segment CurrentSegment;
bool ConstructingSegment = false;
bool NeedVNIFixup = false;
- unsigned ActiveMask = 0;
+ LaneBitmask ActiveMask = 0;
SlotIndex Pos = First;
while (true) {
SlotIndex NextPos = Last;
@@ -899,7 +898,7 @@ void LiveInterval::constructMainRangeFromSubranges(
END_SEGMENT,
} Event = NOTHING;
// Which subregister lanes are affected by the current event.
- unsigned EventMask = 0;
+ LaneBitmask EventMask = 0;
// Whether a BEGIN_SEGMENT is also a valno definition point.
bool IsDef = false;
// Find the next begin or end of a subrange segment. Combine masks if we
@@ -1066,7 +1065,7 @@ void LiveInterval::print(raw_ostream &OS) const {
super::print(OS);
// Print subranges
for (const SubRange &SR : subranges()) {
- OS << format(" L%04X ", SR.LaneMask) << SR;
+ OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR;
}
}
@@ -1101,8 +1100,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
super::verify();
// Make sure SubRanges are fine and LaneMasks are disjunct.
- unsigned Mask = 0;
- unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+ LaneBitmask Mask = 0;
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
for (const SubRange &SR : subranges()) {
// Subrange lanemask should be disjunct to any previous subrange masks.
assert((Mask & SR.LaneMask) == 0);
@@ -1110,6 +1109,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// subrange mask should not contained in maximum lane mask for the vreg.
assert((Mask & ~MaxMask) == 0);
+ // empty subranges must be removed.
+ assert(!SR.empty());
SR.verify();
// Main liverange should cover subrange.
@@ -1370,11 +1371,42 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
return EqClass.getNumClasses();
}
-void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
- MachineRegisterInfo &MRI) {
- assert(LIV[0] && "LIV[0] must be set");
- LiveInterval &LI = *LIV[0];
+template<typename LiveRangeT, typename EqClassesT>
+static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
+ EqClassesT VNIClasses) {
+ // Move segments to new intervals.
+ LiveRange::iterator J = LR.begin(), E = LR.end();
+ while (J != E && VNIClasses[J->valno->id] == 0)
+ ++J;
+ for (LiveRange::iterator I = J; I != E; ++I) {
+ if (unsigned eq = VNIClasses[I->valno->id]) {
+ assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ SplitLRs[eq-1]->segments.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LR.segments.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LR.getNumValNums();
+ while (j != e && VNIClasses[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (unsigned eq = VNIClasses[i]) {
+ VNI->id = SplitLRs[eq-1]->getNumValNums();
+ SplitLRs[eq-1]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LR.valnos[j++] = VNI;
+ }
+ }
+ LR.valnos.resize(j);
+}
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
// Rewrite instructions.
for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
RE = MRI.reg_end(); RI != RE;) {
@@ -1396,38 +1428,41 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
// NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
continue;
- MO.setReg(LIV[getEqClass(VNI)]->reg);
- }
-
- // Move runs to new intervals.
- LiveInterval::iterator J = LI.begin(), E = LI.end();
- while (J != E && EqClass[J->valno->id] == 0)
- ++J;
- for (LiveInterval::iterator I = J; I != E; ++I) {
- if (unsigned eq = EqClass[I->valno->id]) {
- assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
- "New intervals should be empty");
- LIV[eq]->segments.push_back(*I);
- } else
- *J++ = *I;
+ if (unsigned EqClass = getEqClass(VNI))
+ MO.setReg(LIV[EqClass-1]->reg);
}
- // TODO: do not cheat anymore by simply cleaning all subranges
- LI.clearSubRanges();
- LI.segments.erase(J, E);
- // Transfer VNInfos to their new owners and renumber them.
- unsigned j = 0, e = LI.getNumValNums();
- while (j != e && EqClass[j] == 0)
- ++j;
- for (unsigned i = j; i != e; ++i) {
- VNInfo *VNI = LI.getValNumInfo(i);
- if (unsigned eq = EqClass[i]) {
- VNI->id = LIV[eq]->getNumValNums();
- LIV[eq]->valnos.push_back(VNI);
- } else {
- VNI->id = j;
- LI.valnos[j++] = VNI;
+ // Distribute subregister liveranges.
+ if (LI.hasSubRanges()) {
+ unsigned NumComponents = EqClass.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ // Create new subranges in the split intervals and construct a mapping
+ // for the VNInfos in the subrange.
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumComponents-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
+ assert(MainRangeVNI != nullptr
+ && "SubRange def must have corresponding main range def");
+ unsigned ComponentNum = getEqClass(MainRangeVNI);
+ VNIMapping.push_back(ComponentNum);
+ if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
+ SubRanges[ComponentNum-1]
+ = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+ }
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
}
+ LI.removeEmptySubRanges();
}
- LI.valnos.resize(j);
+
+ // Distribute main liverange.
+ DistributeRange(LI, LIV, EqClass);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index c00b010..9451d92 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -32,7 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +47,7 @@ char LiveIntervals::ID = 0;
char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
@@ -76,8 +75,8 @@ cl::opt<bool> UseSegmentSetForPhysRegs(
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
// LiveVariables isn't really required by this analysis, it is only required
// here to make sure it is live during TwoAddressInstructionPass and
// PHIElimination. This is temporary.
@@ -124,7 +123,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MRI = &MF->getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -198,9 +197,16 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
+ bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg);
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
- computeDeadValues(LI, nullptr);
+ LRCalc->calculate(LI, ShouldTrackSubRegLiveness);
+ bool SeparatedComponents = computeDeadValues(LI, nullptr);
+ if (SeparatedComponents) {
+ assert(ShouldTrackSubRegLiveness
+ && "Separated components should only occur for unused subreg defs");
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ splitSeparateComponents(LI, SplitLIs);
+ }
}
void LiveIntervals::computeVirtRegs() {
@@ -216,19 +222,31 @@ void LiveIntervals::computeRegMasks() {
RegMaskBlocks.resize(MF->getNumBlockIDs());
// Find all instructions with regmask operands.
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
- std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ for (MachineBasicBlock &MBB : *MF) {
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
RMB.first = RegMaskSlots.size();
- for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
- MI != ME; ++MI)
- for (const MachineOperand &MO : MI->operands()) {
+
+ // Some block starts, such as EH funclets, create masks.
+ if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
+ for (MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
- RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
- RegMaskBits.push_back(MO.getRegMask());
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot());
+ RegMaskBits.push_back(MO.getRegMask());
}
+ }
+
+ // Some block ends, such as funclet returns, create masks.
+ if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) {
+ RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
// Compute the number of register mask instructions in this block.
RMB.second = RegMaskSlots.size() - RMB.first;
}
@@ -296,18 +314,17 @@ void LiveIntervals::computeLiveInRegUnits() {
// Check all basic blocks for live-ins.
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
- const MachineBasicBlock *MBB = MFI;
+ const MachineBasicBlock *MBB = &*MFI;
// We only care about ABI blocks: Entry + landing pads.
- if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+ if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
continue;
// Create phi-defs at Begin for all live-in registers.
SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
- for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
- LIE = MBB->livein_end(); LII != LIE; ++LII) {
- for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+ for (const auto &LI : MBB->liveins()) {
+ for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange *LR = RegUnitRanges[Unit];
if (!LR) {
@@ -396,9 +413,6 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
}
}
-/// shrinkToUses - After removing some uses of a register, shrink its live
-/// range to just the remaining uses. This method does not compute reaching
-/// defs for new uses, and it doesn't remove dead defs.
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
DEBUG(dbgs() << "Shrink: " << *li << '\n');
@@ -406,9 +420,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
&& "Can only shrink virtual registers");
// Shrink subregister live ranges.
+ bool NeedsCleanup = false;
for (LiveInterval::SubRange &S : li->subranges()) {
shrinkToUses(S, li->reg);
+ if (S.empty())
+ NeedsCleanup = true;
}
+ if (NeedsCleanup)
+ li->removeEmptySubRanges();
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
@@ -456,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
- bool PHIRemoved = false;
+ bool MayHaveSplitComponents = false;
for (auto VNI : LI.valnos) {
if (VNI->isUnused())
continue;
@@ -466,10 +485,13 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
+ bool DeadBeforeDef = false;
+ unsigned VReg = LI.reg;
+ if (MRI->shouldTrackSubRegLiveness(VReg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
- MI->addRegisterDefReadUndef(LI.reg);
+ MI->setRegisterDefReadUndef(VReg);
+ DeadBeforeDef = true;
}
}
@@ -480,19 +502,27 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
VNI->markUnused();
LI.removeSegment(I);
DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
- PHIRemoved = true;
+ MayHaveSplitComponents = true;
} else {
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(LI.reg, TRI);
+ MI->addRegisterDead(VReg, TRI);
+
+ // If we have a dead def that is completely separate from the rest of
+ // the liverange then we rewrite it to use a different VReg to not violate
+ // the rule that the liveness of a virtual register forms a connected
+ // component. This should only happen if subregister liveness is tracked.
+ if (DeadBeforeDef)
+ MayHaveSplitComponents = true;
+
if (dead && MI->allDefsAreDead()) {
DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
}
}
}
- return PHIRemoved;
+ return MayHaveSplitComponents;
}
void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
@@ -512,8 +542,8 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
// Maybe the operand is for a subregister we don't care about.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg);
- if ((SubRegMask & SR.LaneMask) == 0)
+ LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((LaneMask & SR.LaneMask) == 0)
continue;
}
// We only need to visit each instruction once.
@@ -712,7 +742,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
// are actually never written by %vreg2. After assignment the <kill>
// flag at the read instruction is invalid.
- unsigned DefinedLanesMask;
+ LaneBitmask DefinedLanesMask;
if (!SRs.empty()) {
// Compute a mask of lanes that are defined.
DefinedLanesMask = 0;
@@ -736,7 +766,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
continue;
if (MO.isUse()) {
// Reading any undefined lanes?
- unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
if ((UseMask & ~DefinedLanesMask) != 0)
goto CancelKill;
} else if (MO.getSubReg() == 0) {
@@ -944,7 +974,7 @@ public:
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
unsigned SubReg = MO.getSubReg();
- unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
for (LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & LaneMask) == 0)
continue;
@@ -968,7 +998,7 @@ public:
private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
- void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
+ void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
if (!Updated.insert(&LR).second)
return;
DEBUG({
@@ -976,7 +1006,7 @@ private:
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
dbgs() << PrintReg(Reg);
if (LaneMask != 0)
- dbgs() << format(" L%04X", LaneMask);
+ dbgs() << " L" << PrintLaneMask(LaneMask);
} else {
dbgs() << PrintRegUnit(Reg, &TRI);
}
@@ -1098,7 +1128,7 @@ private:
/// Hoist kill to NewIdx, then scan for last kill between NewIdx and
/// OldIdx.
///
- void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
+ void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
// First look for a kill at OldIdx.
LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
LiveRange::iterator E = LR.end();
@@ -1175,7 +1205,7 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) {
+ SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
SlotIndex LastUse = NewIdx;
@@ -1255,7 +1285,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
const MachineBasicBlock::iterator End,
const SlotIndex endIdx,
LiveRange &LR, const unsigned Reg,
- const unsigned LaneMask) {
+ LaneBitmask LaneMask) {
LiveInterval::iterator LII = LR.find(endIdx);
SlotIndex lastUseIdx;
if (LII != LR.end() && LII->start < endIdx)
@@ -1282,7 +1312,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
continue;
unsigned SubReg = MO.getSubReg();
- unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
if ((Mask & LaneMask) == 0)
continue;
@@ -1412,3 +1442,20 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
}
LI.removeEmptySubRanges();
}
+
+void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
+ SmallVectorImpl<LiveInterval*> &SplitLIs) {
+ ConnectedVNInfoEqClasses ConEQ(*this);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp <= 1)
+ return;
+ DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
+ unsigned Reg = LI.reg;
+ const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+ for (unsigned I = 1; I < NumComp; ++I) {
+ unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ LiveInterval &NewLI = createEmptyInterval(NewVReg);
+ SplitLIs.push_back(&NewLI);
+ }
+ ConEQ.Distribute(LI, SplitLIs.data(), *MRI);
+}
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index cbd98e3..efbbcbe 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -68,7 +68,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
/// Simulates liveness when stepping forward over an instruction(bundle): Remove
/// killed-uses, add defs. This is the not recommended way, because it depends
-/// on accurate kill flags. If possible use stepBackwards() instead of this
+/// on accurate kill flags. If possible use stepBackward() instead of this
/// function.
void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
@@ -128,8 +128,8 @@ void LivePhysRegs::dump() const {
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
- for (unsigned Reg : make_range(MBB.livein_begin(), MBB.livein_end()))
- LiveRegs.addReg(Reg);
+ for (const auto &LI : MBB.liveins())
+ LiveRegs.addReg(LI.PhysReg);
}
/// Add pristine registers to the given \p LiveRegs. This function removes
@@ -147,11 +147,19 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB,
- bool AddPristines) {
- if (AddPristines) {
+ bool AddPristinesAndCSRs) {
+ if (AddPristinesAndCSRs) {
const MachineFunction &MF = *MBB->getParent();
addPristines(*this, MF, *TRI);
+ if (!MBB->isReturnBlock()) {
+ // The return block has no successors whose live-ins we could merge
+ // below. So instead we add the callee saved registers manually.
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ }
}
+
+ // To get the live-outs we simply merge the live-ins of all successors.
for (const MachineBasicBlock *Succ : MBB->successors())
::addLiveIns(*this, *Succ);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index bb2877a..c408615 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -64,23 +64,23 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
unsigned SubReg = MO.getSubReg();
if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
- unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
- : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
// If this is the first time we see a subregister def, initialize
// subranges by creating a copy of the main range.
if (!LI.hasSubRanges() && !LI.empty()) {
- unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
LI.createSubRangeFrom(*Alloc, ClassMask, LI);
}
for (LiveInterval::SubRange &S : LI.subranges()) {
// A Mask for subregs common to the existing subrange and current def.
- unsigned Common = S.LaneMask & Mask;
+ LaneBitmask Common = S.LaneMask & Mask;
if (Common == 0)
continue;
// A Mask for subregs covered by the subrange but not the current def.
- unsigned LRest = S.LaneMask & ~Mask;
+ LaneBitmask LRest = S.LaneMask & ~Mask;
LiveInterval::SubRange *CommonRange;
if (LRest != 0) {
// Split current subrange into Common and LRest ranges.
@@ -138,7 +138,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
}
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
+ LaneBitmask Mask) {
// Visit all operands that read Reg. This may include partial defs.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
@@ -157,7 +158,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
continue;
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
// Ignore uses not covering the current subrange.
if ((SubRegMask & Mask) == 0)
continue;
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 34d9953..ff38c68 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -129,7 +129,7 @@ class LiveRangeCalc {
///
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
- void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask);
+ void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
/// Reset Map and Seen fields.
void resetLiveOutMap();
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 08bbe0c..5ce364a 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -226,7 +226,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
return true;
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
unsigned SubReg = MO.getSubReg();
- unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
for (const LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
return true;
@@ -349,8 +349,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
ToShrink.pop_back();
if (foldAsLoad(LI, Dead))
continue;
+ unsigned VReg = LI->reg;
if (TheDelegate)
- TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
+ TheDelegate->LRE_WillShrinkVirtReg(VReg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
@@ -360,7 +361,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// them results in incorrect code.
bool BeingSpilled = false;
for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
- if (LI->reg == RegsBeingSpilled[i]) {
+ if (VReg == RegsBeingSpilled[i]) {
BeingSpilled = true;
break;
}
@@ -370,29 +371,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// LI may have been separated, create new intervals.
LI->RenumberValues();
- ConnectedVNInfoEqClasses ConEQ(LIS);
- unsigned NumComp = ConEQ.Classify(LI);
- if (NumComp <= 1)
- continue;
- ++NumFracRanges;
- bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
- DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
- SmallVector<LiveInterval*, 8> Dups(1, LI);
- for (unsigned i = 1; i != NumComp; ++i) {
- Dups.push_back(&createEmptyIntervalFrom(LI->reg));
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(*LI, SplitLIs);
+ if (!SplitLIs.empty())
+ ++NumFracRanges;
+
+ unsigned Original = VRM ? VRM->getOriginal(VReg) : 0;
+ for (const LiveInterval *SplitLI : SplitLIs) {
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
- if (IsOriginal)
- VRM->setIsSplitFromReg(Dups.back()->reg, 0);
+ if (Original != VReg && Original != 0)
+ VRM->setIsSplitFromReg(SplitLI->reg, Original);
if (TheDelegate)
- TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg);
}
- ConEQ.Distribute(&Dups[0], MRI);
- DEBUG({
- for (unsigned i = 0; i != NumComp; ++i)
- dbgs() << '\t' << *Dups[i] << '\n';
- });
}
}
@@ -411,7 +404,7 @@ void
LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI) {
- VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
+ VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg))
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 9ea031d..7ee87c1 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,12 +15,11 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -49,7 +48,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
- MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
@@ -78,7 +76,7 @@ bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
- unsigned Mask = (*Units).second;
+ LaneBitmask Mask = (*Units).second;
for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
if (S.LaneMask & Mask) {
if (Func(Unit, S))
@@ -101,7 +99,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
<< " to " << PrintReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index b355393..06b86d8 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -522,11 +522,15 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
continue;
unsigned MOReg = MO.getReg();
if (MO.isUse()) {
- MO.setIsKill(false);
+ if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ MRI->isReserved(MOReg)))
+ MO.setIsKill(false);
if (MO.readsReg())
UseRegs.push_back(MOReg);
} else /*MO.isDef()*/ {
- MO.setIsDead(false);
+ if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ MRI->isReserved(MOReg)))
+ MO.setIsDead(false);
DefRegs.push_back(MOReg);
}
}
@@ -559,11 +563,10 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
// Mark live-in registers as live-in.
SmallVector<unsigned, 4> Defs;
- for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
- EE = MBB->livein_end(); II != EE; ++II) {
- assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ for (const auto &LI : MBB->liveins()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) &&
"Cannot have a live-in virtual register!");
- HandlePhysRegDef(*II, nullptr, Defs);
+ HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
}
// Loop over all of the instructions, processing them.
@@ -599,14 +602,12 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB->isLandingPad())
+ if (SuccMBB->isEHPad())
continue;
- for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
- LE = SuccMBB->livein_end(); LI != LE; ++LI) {
- unsigned LReg = *LI;
- if (!TRI->isInAllocatableClass(LReg))
+ for (const auto &LI : SuccMBB->liveins()) {
+ if (!TRI->isInAllocatableClass(LI.PhysReg))
// Ignore other live-ins, e.g. those that are live into landing pads.
- LiveOuts.insert(LReg);
+ LiveOuts.insert(LI.PhysReg);
}
}
@@ -640,7 +641,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// function. This guarantees that we will see the definition of a virtual
// register before its uses due to dominance properties of SSA (except for PHI
// nodes, which are treated as a special case).
- MachineBasicBlock *Entry = MF->begin();
+ MachineBasicBlock *Entry = &MF->front();
SmallPtrSet<MachineBasicBlock*,16> Visited;
for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 8378429..eb60005 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -325,7 +325,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Sort the frame references by local offset
array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
- MachineBasicBlock *Entry = Fn.begin();
+ MachineBasicBlock *Entry = &Fn.front();
unsigned BaseReg = 0;
int64_t BaseOffset = 0;
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 482c33a..28f9d4e 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include <cctype>
@@ -54,15 +55,132 @@ public:
} // end anonymous namespace
+MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
+ this->Kind = Kind;
+ this->Range = Range;
+ return *this;
+}
+
+MIToken &MIToken::setStringValue(StringRef StrVal) {
+ StringValue = StrVal;
+ return *this;
+}
+
+MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
+ StringValueStorage = std::move(StrVal);
+ StringValue = StringValueStorage;
+ return *this;
+}
+
+MIToken &MIToken::setIntegerValue(APSInt IntVal) {
+ this->IntVal = std::move(IntVal);
+ return *this;
+}
+
/// Skip the leading whitespace characters and return the updated cursor.
static Cursor skipWhitespace(Cursor C) {
- while (isspace(C.peek()))
+ while (isblank(C.peek()))
+ C.advance();
+ return C;
+}
+
+static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
+
+/// Skip a line comment and return the updated cursor.
+static Cursor skipComment(Cursor C) {
+ if (C.peek() != ';')
+ return C;
+ while (!isNewlineChar(C.peek()) && !C.isEOF())
C.advance();
return C;
}
+/// Return true if the given character satisfies the following regular
+/// expression: [-a-zA-Z$._0-9]
static bool isIdentifierChar(char C) {
- return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
+ return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
+ C == '$';
+}
+
+/// Unescapes the given string value.
+///
+/// Expects the string value to be quoted.
+static std::string unescapeQuotedString(StringRef Value) {
+ assert(Value.front() == '"' && Value.back() == '"');
+ Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+ std::string Str;
+ Str.reserve(C.remaining().size());
+ while (!C.isEOF()) {
+ char Char = C.peek();
+ if (Char == '\\') {
+ if (C.peek(1) == '\\') {
+ // Two '\' become one
+ Str += '\\';
+ C.advance(2);
+ continue;
+ }
+ if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+ Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+ C.advance(3);
+ continue;
+ }
+ }
+ Str += Char;
+ C.advance();
+ }
+ return Str;
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(
+ Cursor C,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ assert(C.peek() == '"');
+ for (C.advance(); C.peek() != '"'; C.advance()) {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '\"'");
+ return None;
+ }
+ }
+ C.advance();
+ return C;
+}
+
+static Cursor lexName(
+ Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ auto Range = C;
+ C.advance(PrefixLength);
+ if (C.peek() == '"') {
+ if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+ StringRef String = Range.upto(R);
+ Token.reset(Type, String)
+ .setOwnedStringValue(
+ unescapeQuotedString(String.drop_front(PrefixLength)));
+ return R;
+ }
+ Token.reset(MIToken::Error, Range.remaining());
+ return Range;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token.reset(Type, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(PrefixLength));
+ return C;
+}
+
+static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
+ if (C.peek() != 'i' || !isdigit(C.peek(1)))
+ return None;
+ auto Range = C;
+ C.advance(); // Skip 'i'
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::IntegerType, Range.upto(C));
+ return C;
}
static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
@@ -70,32 +188,70 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("_", MIToken::underscore)
.Case("implicit", MIToken::kw_implicit)
.Case("implicit-def", MIToken::kw_implicit_define)
+ .Case("def", MIToken::kw_def)
.Case("dead", MIToken::kw_dead)
.Case("killed", MIToken::kw_killed)
.Case("undef", MIToken::kw_undef)
+ .Case("internal", MIToken::kw_internal)
+ .Case("early-clobber", MIToken::kw_early_clobber)
+ .Case("debug-use", MIToken::kw_debug_use)
+ .Case("tied-def", MIToken::kw_tied_def)
+ .Case("frame-setup", MIToken::kw_frame_setup)
+ .Case("debug-location", MIToken::kw_debug_location)
+ .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
+ .Case(".cfi_offset", MIToken::kw_cfi_offset)
+ .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+ .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("blockaddress", MIToken::kw_blockaddress)
+ .Case("target-index", MIToken::kw_target_index)
+ .Case("half", MIToken::kw_half)
+ .Case("float", MIToken::kw_float)
+ .Case("double", MIToken::kw_double)
+ .Case("x86_fp80", MIToken::kw_x86_fp80)
+ .Case("fp128", MIToken::kw_fp128)
+ .Case("ppc_fp128", MIToken::kw_ppc_fp128)
+ .Case("target-flags", MIToken::kw_target_flags)
+ .Case("volatile", MIToken::kw_volatile)
+ .Case("non-temporal", MIToken::kw_non_temporal)
+ .Case("invariant", MIToken::kw_invariant)
+ .Case("align", MIToken::kw_align)
+ .Case("stack", MIToken::kw_stack)
+ .Case("got", MIToken::kw_got)
+ .Case("jump-table", MIToken::kw_jump_table)
+ .Case("constant-pool", MIToken::kw_constant_pool)
+ .Case("call-entry", MIToken::kw_call_entry)
+ .Case("liveout", MIToken::kw_liveout)
+ .Case("address-taken", MIToken::kw_address_taken)
+ .Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("liveins", MIToken::kw_liveins)
+ .Case("successors", MIToken::kw_successors)
.Default(MIToken::Identifier);
}
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
- if (!isalpha(C.peek()) && C.peek() != '_')
+ if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
return None;
auto Range = C;
while (isIdentifierChar(C.peek()))
C.advance();
auto Identifier = Range.upto(C);
- Token = MIToken(getIdentifierKind(Identifier), Identifier);
+ Token.reset(getIdentifierKind(Identifier), Identifier)
+ .setStringValue(Identifier);
return C;
}
static Cursor maybeLexMachineBasicBlock(
Cursor C, MIToken &Token,
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
- if (!C.remaining().startswith("%bb."))
+ bool IsReference = C.remaining().startswith("%bb.");
+ if (!IsReference && !C.remaining().startswith("bb."))
return None;
auto Range = C;
- C.advance(4); // Skip '%bb.'
+ unsigned PrefixLength = IsReference ? 4 : 3;
+ C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
if (!isdigit(C.peek())) {
- Token = MIToken(MIToken::Error, C.remaining());
+ Token.reset(MIToken::Error, C.remaining());
ErrorCallback(C.location(), "expected a number after '%bb.'");
return C;
}
@@ -103,26 +259,103 @@ static Cursor maybeLexMachineBasicBlock(
while (isdigit(C.peek()))
C.advance();
StringRef Number = NumberRange.upto(C);
- unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>'
+ unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
if (C.peek() == '.') {
C.advance(); // Skip '.'
++StringOffset;
while (isIdentifierChar(C.peek()))
C.advance();
}
- Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number),
- StringOffset);
+ Token.reset(IsReference ? MIToken::MachineBasicBlock
+ : MIToken::MachineBasicBlockLabel,
+ Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
+ return C;
+}
+
+static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return None;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return None;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ StringRef Number = NumberRange.upto(C);
+ unsigned StringOffset = Rule.size() + Number.size();
+ if (C.peek() == '.') {
+ C.advance();
+ ++StringOffset;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ }
+ Token.reset(Kind, Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
return C;
}
+static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
+}
+
+static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
+}
+
+static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
+}
+
+static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
+}
+
+static Cursor maybeLexIRBlock(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ const StringRef Rule = "%ir-block.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
+ return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexIRValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ const StringRef Rule = "%ir.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
+ return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
+}
+
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
auto Range = C;
C.advance(); // Skip '%'
auto NumberRange = C;
while (isdigit(C.peek()))
C.advance();
- Token = MIToken(MIToken::VirtualRegister, Range.upto(C),
- APSInt(NumberRange.upto(C)));
+ Token.reset(MIToken::VirtualRegister, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
return C;
}
@@ -135,41 +368,112 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
C.advance(); // Skip '%'
while (isIdentifierChar(C.peek()))
C.advance();
- Token = MIToken(MIToken::NamedRegister, Range.upto(C),
- /*StringOffset=*/1); // Drop the '%'
+ Token.reset(MIToken::NamedRegister, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
return C;
}
-static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
+static Cursor maybeLexGlobalValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
if (C.peek() != '@')
return None;
+ if (!isdigit(C.peek(1)))
+ return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
+ ErrorCallback);
auto Range = C;
- C.advance(); // Skip the '@'
- // TODO: add support for quoted names.
- if (!isdigit(C.peek())) {
- while (isIdentifierChar(C.peek()))
- C.advance();
- Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C),
- /*StringOffset=*/1); // Drop the '@'
- return C;
- }
+ C.advance(1); // Skip the '@'
auto NumberRange = C;
while (isdigit(C.peek()))
C.advance();
- Token =
- MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C)));
+ Token.reset(MIToken::GlobalValue, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
return C;
}
-static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) {
+static Cursor maybeLexExternalSymbol(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '$')
+ return None;
+ return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
+ ErrorCallback);
+}
+
+static bool isValidHexFloatingPointPrefix(char C) {
+ return C == 'H' || C == 'K' || C == 'L' || C == 'M';
+}
+
+static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
+ if (C.peek() != '0' || C.peek(1) != 'x')
+ return None;
+ Cursor Range = C;
+ C.advance(2); // Skip '0x'
+ if (isValidHexFloatingPointPrefix(C.peek()))
+ C.advance();
+ while (isxdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
+ C.advance();
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(C.peek()))
+ C.advance();
+ if ((C.peek() == 'e' || C.peek() == 'E') &&
+ (isdigit(C.peek(1)) ||
+ ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
+ C.advance(2);
+ while (isdigit(C.peek()))
+ C.advance();
+ }
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
return None;
auto Range = C;
C.advance();
while (isdigit(C.peek()))
C.advance();
+ if (C.peek() == '.')
+ return lexFloatingPointLiteral(Range, C, Token);
StringRef StrVal = Range.upto(C);
- Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal));
+ Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
+ return C;
+}
+
+static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("!tbaa", MIToken::md_tbaa)
+ .Case("!alias.scope", MIToken::md_alias_scope)
+ .Case("!noalias", MIToken::md_noalias)
+ .Case("!range", MIToken::md_range)
+ .Default(MIToken::Error);
+}
+
+static Cursor maybeLexExlaim(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '!')
+ return None;
+ auto Range = C;
+ C.advance(1);
+ if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
+ Token.reset(MIToken::exclaim, Range.upto(C));
+ return C;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef StrVal = Range.upto(C);
+ Token.reset(getMetadataKeywordKind(StrVal), StrVal);
+ if (Token.isError())
+ ErrorCallback(Token.location(),
+ "use of unknown metadata keyword '" + StrVal + "'");
return C;
}
@@ -181,44 +485,119 @@ static MIToken::TokenKind symbolToken(char C) {
return MIToken::equal;
case ':':
return MIToken::colon;
+ case '(':
+ return MIToken::lparen;
+ case ')':
+ return MIToken::rparen;
+ case '{':
+ return MIToken::lbrace;
+ case '}':
+ return MIToken::rbrace;
+ case '+':
+ return MIToken::plus;
+ case '-':
+ return MIToken::minus;
default:
return MIToken::Error;
}
}
static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
- auto Kind = symbolToken(C.peek());
+ MIToken::TokenKind Kind;
+ unsigned Length = 1;
+ if (C.peek() == ':' && C.peek(1) == ':') {
+ Kind = MIToken::coloncolon;
+ Length = 2;
+ } else
+ Kind = symbolToken(C.peek());
if (Kind == MIToken::Error)
return None;
auto Range = C;
+ C.advance(Length);
+ Token.reset(Kind, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
+ if (!isNewlineChar(C.peek()))
+ return None;
+ auto Range = C;
+ C.advance();
+ Token.reset(MIToken::Newline, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexEscapedIRValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '`')
+ return None;
+ auto Range = C;
+ C.advance();
+ auto StrRange = C;
+ while (C.peek() != '`') {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '`'");
+ Token.reset(MIToken::Error, Range.remaining());
+ return C;
+ }
+ C.advance();
+ }
+ StringRef Value = StrRange.upto(C);
C.advance();
- Token = MIToken(Kind, Range.upto(C));
+ Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
return C;
}
StringRef llvm::lexMIToken(
StringRef Source, MIToken &Token,
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
- auto C = skipWhitespace(Cursor(Source));
+ auto C = skipComment(skipWhitespace(Cursor(Source)));
if (C.isEOF()) {
- Token = MIToken(MIToken::Eof, C.remaining());
+ Token.reset(MIToken::Eof, C.remaining());
return C.remaining();
}
- if (Cursor R = maybeLexIdentifier(C, Token))
+ if (Cursor R = maybeLexIntegerType(C, Token))
return R.remaining();
if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
return R.remaining();
+ if (Cursor R = maybeLexIdentifier(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexJumpTableIndex(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexFixedStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexConstantPoolItem(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
+ return R.remaining();
if (Cursor R = maybeLexRegister(C, Token))
return R.remaining();
- if (Cursor R = maybeLexGlobalValue(C, Token))
+ if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
return R.remaining();
- if (Cursor R = maybeLexIntegerLiteral(C, Token))
+ if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexNumericalLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexSymbol(C, Token))
return R.remaining();
+ if (Cursor R = maybeLexNewline(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
+ return R.remaining();
- Token = MIToken(MIToken::Error, C.remaining());
+ Token.reset(MIToken::Error, C.remaining());
ErrorCallback(C.location(),
Twine("unexpected character '") + Twine(C.peek()) + "'");
return C.remaining();
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index 55460b5..ff54aa3 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -30,50 +30,119 @@ struct MIToken {
// Markers
Eof,
Error,
+ Newline,
// Tokens with no info.
comma,
equal,
underscore,
colon,
+ coloncolon,
+ exclaim,
+ lparen,
+ rparen,
+ lbrace,
+ rbrace,
+ plus,
+ minus,
// Keywords
kw_implicit,
kw_implicit_define,
+ kw_def,
kw_dead,
kw_killed,
kw_undef,
+ kw_internal,
+ kw_early_clobber,
+ kw_debug_use,
+ kw_tied_def,
+ kw_frame_setup,
+ kw_debug_location,
+ kw_cfi_same_value,
+ kw_cfi_offset,
+ kw_cfi_def_cfa_register,
+ kw_cfi_def_cfa_offset,
+ kw_cfi_def_cfa,
+ kw_blockaddress,
+ kw_target_index,
+ kw_half,
+ kw_float,
+ kw_double,
+ kw_x86_fp80,
+ kw_fp128,
+ kw_ppc_fp128,
+ kw_target_flags,
+ kw_volatile,
+ kw_non_temporal,
+ kw_invariant,
+ kw_align,
+ kw_stack,
+ kw_got,
+ kw_jump_table,
+ kw_constant_pool,
+ kw_call_entry,
+ kw_liveout,
+ kw_address_taken,
+ kw_landing_pad,
+ kw_liveins,
+ kw_successors,
+
+ // Named metadata keywords
+ md_tbaa,
+ md_alias_scope,
+ md_noalias,
+ md_range,
// Identifier tokens
Identifier,
+ IntegerType,
NamedRegister,
+ MachineBasicBlockLabel,
MachineBasicBlock,
+ StackObject,
+ FixedStackObject,
NamedGlobalValue,
GlobalValue,
+ ExternalSymbol,
// Other tokens
IntegerLiteral,
- VirtualRegister
+ FloatingPointLiteral,
+ VirtualRegister,
+ ConstantPoolItem,
+ JumpTableIndex,
+ NamedIRBlock,
+ IRBlock,
+ NamedIRValue,
+ IRValue,
+ QuotedIRValue // `<constant value>`
};
private:
TokenKind Kind;
- unsigned StringOffset;
StringRef Range;
+ StringRef StringValue;
+ std::string StringValueStorage;
APSInt IntVal;
public:
- MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
- : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
+ MIToken() : Kind(Error) {}
- MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
- unsigned StringOffset = 0)
- : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {}
+ MIToken &reset(TokenKind Kind, StringRef Range);
+
+ MIToken &setStringValue(StringRef StrVal);
+ MIToken &setOwnedStringValue(std::string StrVal);
+ MIToken &setIntegerValue(APSInt IntVal);
TokenKind kind() const { return Kind; }
bool isError() const { return Kind == Error; }
+ bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
+
+ bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
+
bool isRegister() const {
return Kind == NamedRegister || Kind == underscore ||
Kind == VirtualRegister;
@@ -81,7 +150,14 @@ public:
bool isRegisterFlag() const {
return Kind == kw_implicit || Kind == kw_implicit_define ||
- Kind == kw_dead || Kind == kw_killed || Kind == kw_undef;
+ Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
+ Kind == kw_undef || Kind == kw_internal ||
+ Kind == kw_early_clobber || Kind == kw_debug_use;
+ }
+
+ bool isMemoryOperandFlag() const {
+ return Kind == kw_volatile || Kind == kw_non_temporal ||
+ Kind == kw_invariant;
}
bool is(TokenKind K) const { return Kind == K; }
@@ -90,13 +166,19 @@ public:
StringRef::iterator location() const { return Range.begin(); }
- StringRef stringValue() const { return Range.drop_front(StringOffset); }
+ StringRef range() const { return Range; }
+
+ /// Return the token's string value.
+ StringRef stringValue() const { return StringValue; }
const APSInt &integerValue() const { return IntVal; }
bool hasIntegerValue() const {
return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
- Kind == GlobalValue || Kind == VirtualRegister;
+ Kind == MachineBasicBlockLabel || Kind == StackObject ||
+ Kind == FixedStackObject || Kind == GlobalValue ||
+ Kind == VirtualRegister || Kind == ConstantPoolItem ||
+ Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
}
};
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index c000112..f2f6584 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -14,12 +14,20 @@
#include "MIParser.h"
#include "MILexer.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -30,15 +38,20 @@ using namespace llvm;
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
-/// range.
-struct MachineOperandWithLocation {
+/// range and other attributes.
+struct ParsedMachineOperand {
MachineOperand Operand;
StringRef::iterator Begin;
StringRef::iterator End;
-
- MachineOperandWithLocation(const MachineOperand &Operand,
- StringRef::iterator Begin, StringRef::iterator End)
- : Operand(Operand), Begin(Begin), End(End) {}
+ Optional<unsigned> TiedDefIdx;
+
+ ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin,
+ StringRef::iterator End, Optional<unsigned> &TiedDefIdx)
+ : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) {
+ if (TiedDefIdx)
+ assert(Operand.isReg() && Operand.isUse() &&
+ "Only used register operands can be tied");
+ }
};
class MIParser {
@@ -58,6 +71,16 @@ class MIParser {
StringMap<const uint32_t *> Names2RegMasks;
/// Maps from subregister names to subregister indices.
StringMap<unsigned> Names2SubRegIndices;
+ /// Maps from slot numbers to function's unnamed basic blocks.
+ DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
+ /// Maps from slot numbers to function's unnamed values.
+ DenseMap<unsigned, const Value *> Slots2Values;
+ /// Maps from target index names to target indices.
+ StringMap<int> Names2TargetIndices;
+ /// Maps from direct target flag names to the direct target flag values.
+ StringMap<unsigned> Names2DirectTargetFlags;
+ /// Maps from direct target flag names to the bitmask target flag values.
+ StringMap<unsigned> Names2BitmaskTargetFlags;
public:
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
@@ -76,19 +99,66 @@ public:
/// This function always return true.
bool error(StringRef::iterator Loc, const Twine &Msg);
+ bool
+ parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlocks();
bool parse(MachineInstr *&MI);
- bool parseMBB(MachineBasicBlock *&MBB);
- bool parseNamedRegister(unsigned &Reg);
+ bool parseStandaloneMBB(MachineBasicBlock *&MBB);
+ bool parseStandaloneNamedRegister(unsigned &Reg);
+ bool parseStandaloneVirtualRegister(unsigned &Reg);
+ bool parseStandaloneStackObject(int &FI);
+ bool parseStandaloneMDNode(MDNode *&Node);
+
+ bool
+ parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlock(MachineBasicBlock &MBB);
+ bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
+ bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
bool parseRegister(unsigned &Reg);
bool parseRegisterFlag(unsigned &Flags);
bool parseSubRegisterIndex(unsigned &SubReg);
- bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
+ bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
+ bool parseRegisterOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx, bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
+ bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
+ const Constant *&C);
+ bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
+ bool parseTypedImmediateOperand(MachineOperand &Dest);
+ bool parseFPImmediateOperand(MachineOperand &Dest);
bool parseMBBReference(MachineBasicBlock *&MBB);
bool parseMBBOperand(MachineOperand &Dest);
+ bool parseStackFrameIndex(int &FI);
+ bool parseStackObjectOperand(MachineOperand &Dest);
+ bool parseFixedStackFrameIndex(int &FI);
+ bool parseFixedStackObjectOperand(MachineOperand &Dest);
+ bool parseGlobalValue(GlobalValue *&GV);
bool parseGlobalAddressOperand(MachineOperand &Dest);
- bool parseMachineOperand(MachineOperand &Dest);
+ bool parseConstantPoolIndexOperand(MachineOperand &Dest);
+ bool parseJumpTableIndexOperand(MachineOperand &Dest);
+ bool parseExternalSymbolOperand(MachineOperand &Dest);
+ bool parseMDNode(MDNode *&Node);
+ bool parseMetadataOperand(MachineOperand &Dest);
+ bool parseCFIOffset(int &Offset);
+ bool parseCFIRegister(unsigned &Reg);
+ bool parseCFIOperand(MachineOperand &Dest);
+ bool parseIRBlock(BasicBlock *&BB, const Function &F);
+ bool parseBlockAddressOperand(MachineOperand &Dest);
+ bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
+ bool parseMachineOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx);
+ bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx);
+ bool parseOffset(int64_t &Offset);
+ bool parseAlignment(unsigned &Alignment);
+ bool parseOperandsOffset(MachineOperand &Op);
+ bool parseIRValue(const Value *&V);
+ bool parseMemoryOperandFlag(unsigned &Flags);
+ bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
+ bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
private:
/// Convert the integer literal in the current token into an unsigned integer.
@@ -96,15 +166,31 @@ private:
/// Return true if an error occurred.
bool getUnsigned(unsigned &Result);
+ /// Convert the integer literal in the current token into an uint64.
+ ///
+ /// Return true if an error occurred.
+ bool getUint64(uint64_t &Result);
+
+ /// If the current token is of the given kind, consume it and return false.
+ /// Otherwise report an error and return true.
+ bool expectAndConsume(MIToken::TokenKind TokenKind);
+
+ /// If the current token is of the given kind, consume it and return true.
+ /// Otherwise return false.
+ bool consumeIfPresent(MIToken::TokenKind TokenKind);
+
void initNames2InstrOpCodes();
/// Try to convert an instruction name to an opcode. Return true if the
/// instruction name is invalid.
bool parseInstrName(StringRef InstrName, unsigned &OpCode);
- bool parseInstruction(unsigned &OpCode);
+ bool parseInstruction(unsigned &OpCode, unsigned &Flags);
+
+ bool assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands);
- bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands,
+ bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
const MCInstrDesc &MCID);
void initNames2Regs();
@@ -126,6 +212,34 @@ private:
///
/// Return 0 if the name isn't a subregister index class.
unsigned getSubRegIndex(StringRef Name);
+
+ const BasicBlock *getIRBlock(unsigned Slot);
+ const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
+
+ const Value *getIRValue(unsigned Slot);
+
+ void initNames2TargetIndices();
+
+ /// Try to convert a name of target index to the corresponding target index.
+ ///
+ /// Return true if the name isn't a name of a target index.
+ bool getTargetIndex(StringRef Name, int &Index);
+
+ void initNames2DirectTargetFlags();
+
+ /// Try to convert a name of a direct target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a direct flag.
+ bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
+
+ void initNames2BitmaskTargetFlags();
+
+ /// Try to convert a name of a bitmask target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a bitmask target flag.
+ bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
};
} // end anonymous namespace
@@ -134,7 +248,7 @@ MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
StringRef Source, const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots)
: SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
- Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {}
+ PFS(PFS), IRSlots(IRSlots) {}
void MIParser::lex() {
CurrentSource = lexMIToken(
@@ -146,49 +260,378 @@ bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
- Error = SMDiagnostic(
- SM, SMLoc(),
- SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
- Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
+ const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID());
+ if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) {
+ // Create an ordinary diagnostic when the source manager's buffer is the
+ // source string.
+ Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
+ return true;
+ }
+ // Create a diagnostic for a YAML string literal.
+ Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+ Loc - Source.data(), SourceMgr::DK_Error, Msg.str(),
+ Source, None, None);
return true;
}
-bool MIParser::parse(MachineInstr *&MI) {
+static const char *toString(MIToken::TokenKind TokenKind) {
+ switch (TokenKind) {
+ case MIToken::comma:
+ return "','";
+ case MIToken::equal:
+ return "'='";
+ case MIToken::colon:
+ return "':'";
+ case MIToken::lparen:
+ return "'('";
+ case MIToken::rparen:
+ return "')'";
+ default:
+ return "<unknown token>";
+ }
+}
+
+bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return error(Twine("expected ") + toString(TokenKind));
+ lex();
+ return false;
+}
+
+bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return false;
+ lex();
+ return true;
+}
+
+bool MIParser::parseBasicBlockDefinition(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ auto Loc = Token.location();
+ auto Name = Token.stringValue();
+ lex();
+ bool HasAddressTaken = false;
+ bool IsLandingPad = false;
+ unsigned Alignment = 0;
+ BasicBlock *BB = nullptr;
+ if (consumeIfPresent(MIToken::lparen)) {
+ do {
+ // TODO: Report an error when multiple same attributes are specified.
+ switch (Token.kind()) {
+ case MIToken::kw_address_taken:
+ HasAddressTaken = true;
+ lex();
+ break;
+ case MIToken::kw_landing_pad:
+ IsLandingPad = true;
+ lex();
+ break;
+ case MIToken::kw_align:
+ if (parseAlignment(Alignment))
+ return true;
+ break;
+ case MIToken::IRBlock:
+ // TODO: Report an error when both name and ir block are specified.
+ if (parseIRBlock(BB, *MF.getFunction()))
+ return true;
+ lex();
+ break;
+ default:
+ break;
+ }
+ } while (consumeIfPresent(MIToken::comma));
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ if (expectAndConsume(MIToken::colon))
+ return true;
+
+ if (!Name.empty()) {
+ BB = dyn_cast_or_null<BasicBlock>(
+ MF.getFunction()->getValueSymbolTable().lookup(Name));
+ if (!BB)
+ return error(Loc, Twine("basic block '") + Name +
+ "' is not defined in the function '" +
+ MF.getName() + "'");
+ }
+ auto *MBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(MF.end(), MBB);
+ bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second;
+ if (!WasInserted)
+ return error(Loc, Twine("redefinition of machine basic block with id #") +
+ Twine(ID));
+ if (Alignment)
+ MBB->setAlignment(Alignment);
+ if (HasAddressTaken)
+ MBB->setHasAddressTaken();
+ MBB->setIsEHPad(IsLandingPad);
+ return false;
+}
+
+bool MIParser::parseBasicBlockDefinitions(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ if (Token.isNot(MIToken::MachineBasicBlockLabel))
+ return error("expected a basic block definition before instructions");
+ unsigned BraceDepth = 0;
+ do {
+ if (parseBasicBlockDefinition(MBBSlots))
+ return true;
+ bool IsAfterNewline = false;
+ // Skip until the next machine basic block.
+ while (true) {
+ if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) ||
+ Token.isErrorOrEOF())
+ break;
+ else if (Token.is(MIToken::MachineBasicBlockLabel))
+ return error("basic block definition should be located at the start of "
+ "the line");
+ else if (consumeIfPresent(MIToken::Newline)) {
+ IsAfterNewline = true;
+ continue;
+ }
+ IsAfterNewline = false;
+ if (Token.is(MIToken::lbrace))
+ ++BraceDepth;
+ if (Token.is(MIToken::rbrace)) {
+ if (!BraceDepth)
+ return error("extraneous closing brace ('}')");
+ --BraceDepth;
+ }
+ lex();
+ }
+ // Verify that we closed all of the '{' at the end of a file or a block.
+ if (!Token.isError() && BraceDepth)
+ return error("expected '}'"); // FIXME: Report a note that shows '{'.
+ } while (!Token.isErrorOrEOF());
+ return Token.isError();
+}
+
+bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_liveins));
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of liveins.
+ return false;
+ do {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg = 0;
+ if (parseRegister(Reg))
+ return true;
+ MBB.addLiveIn(Reg);
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+ return false;
+}
+
+bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_successors));
lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of successors.
+ return false;
+ do {
+ if (Token.isNot(MIToken::MachineBasicBlock))
+ return error("expected a machine basic block reference");
+ MachineBasicBlock *SuccMBB = nullptr;
+ if (parseMBBReference(SuccMBB))
+ return true;
+ lex();
+ unsigned Weight = 0;
+ if (consumeIfPresent(MIToken::lparen)) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '('");
+ if (getUnsigned(Weight))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight));
+ } while (consumeIfPresent(MIToken::comma));
+ MBB.normalizeSuccProbs();
+ return false;
+}
+bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
+ // Skip the definition.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ lex();
+ if (consumeIfPresent(MIToken::lparen)) {
+ while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF())
+ lex();
+ consumeIfPresent(MIToken::rparen);
+ }
+ consumeIfPresent(MIToken::colon);
+
+ // Parse the liveins and successors.
+ // N.B: Multiple lists of successors and liveins are allowed and they're
+ // merged into one.
+ // Example:
+ // liveins: %edi
+ // liveins: %esi
+ //
+ // is equivalent to
+ // liveins: %edi, %esi
+ while (true) {
+ if (Token.is(MIToken::kw_successors)) {
+ if (parseBasicBlockSuccessors(MBB))
+ return true;
+ } else if (Token.is(MIToken::kw_liveins)) {
+ if (parseBasicBlockLiveins(MBB))
+ return true;
+ } else if (consumeIfPresent(MIToken::Newline)) {
+ continue;
+ } else
+ break;
+ if (!Token.isNewlineOrEOF())
+ return error("expected line break at the end of a list");
+ lex();
+ }
+
+ // Parse the instructions.
+ bool IsInBundle = false;
+ MachineInstr *PrevMI = nullptr;
+ while (true) {
+ if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof))
+ return false;
+ else if (consumeIfPresent(MIToken::Newline))
+ continue;
+ if (consumeIfPresent(MIToken::rbrace)) {
+ // The first parsing pass should verify that all closing '}' have an
+ // opening '{'.
+ assert(IsInBundle);
+ IsInBundle = false;
+ continue;
+ }
+ MachineInstr *MI = nullptr;
+ if (parse(MI))
+ return true;
+ MBB.insert(MBB.end(), MI);
+ if (IsInBundle) {
+ PrevMI->setFlag(MachineInstr::BundledSucc);
+ MI->setFlag(MachineInstr::BundledPred);
+ }
+ PrevMI = MI;
+ if (Token.is(MIToken::lbrace)) {
+ if (IsInBundle)
+ return error("nested instruction bundles are not allowed");
+ lex();
+ // This instruction is the start of the bundle.
+ MI->setFlag(MachineInstr::BundledSucc);
+ IsInBundle = true;
+ if (!Token.is(MIToken::Newline))
+ // The next instruction can be on the same line.
+ continue;
+ }
+ assert(Token.isNewlineOrEOF() && "MI is not fully parsed");
+ lex();
+ }
+ return false;
+}
+
+bool MIParser::parseBasicBlocks() {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ // The first parsing pass should have verified that this token is a MBB label
+ // in the 'parseBasicBlockDefinitions' method.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ do {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB))
+ return true;
+ if (parseBasicBlock(*MBB))
+ return true;
+ // The method 'parseBasicBlock' should parse the whole block until the next
+ // block or the end of file.
+ assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof));
+ } while (Token.isNot(MIToken::Eof));
+ return false;
+}
+
+bool MIParser::parse(MachineInstr *&MI) {
// Parse any register operands before '='
- // TODO: Allow parsing of multiple operands before '='
MachineOperand MO = MachineOperand::CreateImm(0);
- SmallVector<MachineOperandWithLocation, 8> Operands;
- if (Token.isRegister() || Token.isRegisterFlag()) {
+ SmallVector<ParsedMachineOperand, 8> Operands;
+ while (Token.isRegister() || Token.isRegisterFlag()) {
auto Loc = Token.location();
- if (parseRegisterOperand(MO, /*IsDef=*/true))
+ Optional<unsigned> TiedDefIdx;
+ if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true))
return true;
- Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
- if (Token.isNot(MIToken::equal))
- return error("expected '='");
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNot(MIToken::comma))
+ break;
lex();
}
-
- unsigned OpCode;
- if (Token.isError() || parseInstruction(OpCode))
+ if (!Operands.empty() && expectAndConsume(MIToken::equal))
return true;
- // TODO: Parse the instruction flags and memory operands.
+ unsigned OpCode, Flags = 0;
+ if (Token.isError() || parseInstruction(OpCode, Flags))
+ return true;
// Parse the remaining machine operands.
- while (Token.isNot(MIToken::Eof)) {
+ while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+ Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
- if (parseMachineOperand(MO))
+ Optional<unsigned> TiedDefIdx;
+ if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
return true;
- Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
- if (Token.is(MIToken::Eof))
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
break;
if (Token.isNot(MIToken::comma))
return error("expected ',' before the next machine operand");
lex();
}
+ DebugLoc DebugLocation;
+ if (Token.is(MIToken::kw_debug_location)) {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node after 'debug-location'");
+ MDNode *Node = nullptr;
+ if (parseMDNode(Node))
+ return true;
+ DebugLocation = DebugLoc(Node);
+ }
+
+ // Parse the machine memory operands.
+ SmallVector<MachineMemOperand *, 2> MemOperands;
+ if (Token.is(MIToken::coloncolon)) {
+ lex();
+ while (!Token.isNewlineOrEOF()) {
+ MachineMemOperand *MemOp = nullptr;
+ if (parseMachineMemoryOperand(MemOp))
+ return true;
+ MemOperands.push_back(MemOp);
+ if (Token.isNewlineOrEOF())
+ break;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine memory operand");
+ lex();
+ }
+ }
+
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
if (!MCID.isVariadic()) {
// FIXME: Move the implicit operand verification to the machine verifier.
@@ -197,13 +640,22 @@ bool MIParser::parse(MachineInstr *&MI) {
}
// TODO: Check for extraneous machine operands.
- MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
+ MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
+ MI->setFlags(Flags);
for (const auto &Operand : Operands)
MI->addOperand(MF, Operand.Operand);
+ if (assignRegisterTies(*MI, Operands))
+ return true;
+ if (MemOperands.empty())
+ return false;
+ MachineInstr::mmo_iterator MemRefs =
+ MF.allocateMemRefsArray(MemOperands.size());
+ std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
+ MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
return false;
}
-bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
+bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) {
lex();
if (Token.isNot(MIToken::MachineBasicBlock))
return error("expected a machine basic block reference");
@@ -216,18 +668,52 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
return false;
}
-bool MIParser::parseNamedRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
lex();
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
if (parseRegister(Reg))
- return 0;
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+ lex();
+ if (Token.isNot(MIToken::VirtualRegister))
+ return error("expected a virtual register");
+ if (parseRegister(Reg))
+ return true;
lex();
if (Token.isNot(MIToken::Eof))
return error("expected end of string after the register reference");
return false;
}
+bool MIParser::parseStandaloneStackObject(int &FI) {
+ lex();
+ if (Token.isNot(MIToken::StackObject))
+ return error("expected a stack object");
+ if (parseStackFrameIndex(FI))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the stack object reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+ if (parseMDNode(Node))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the metadata node");
+ return false;
+}
+
static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
assert(MO.isImplicit());
return MO.isDef() ? "implicit-def" : "implicit";
@@ -239,8 +725,18 @@ static std::string getRegisterName(const TargetRegisterInfo *TRI,
return StringRef(TRI->getName(Reg)).lower();
}
-bool MIParser::verifyImplicitOperands(
- ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) {
+/// Return true if the parsed machine operands contain a given machine operand.
+static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ for (const auto &I : Operands) {
+ if (ImplicitOperand.isIdenticalTo(I.Operand))
+ return true;
+ }
+ return false;
+}
+
+bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+ const MCInstrDesc &MCID) {
if (MCID.isCall())
// We can't verify call instructions as they can contain arbitrary implicit
// register and register mask operands.
@@ -249,48 +745,32 @@ bool MIParser::verifyImplicitOperands(
// Gather all the expected implicit operands.
SmallVector<MachineOperand, 4> ImplicitOperands;
if (MCID.ImplicitDefs)
- for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
ImplicitOperands.push_back(
MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID.ImplicitUses)
- for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
ImplicitOperands.push_back(
MachineOperand::CreateReg(*ImpUses, false, true));
const auto *TRI = MF.getSubtarget().getRegisterInfo();
assert(TRI && "Expected target register info");
- size_t I = ImplicitOperands.size(), J = Operands.size();
- while (I) {
- --I;
- if (J) {
- --J;
- const auto &ImplicitOperand = ImplicitOperands[I];
- const auto &Operand = Operands[J].Operand;
- if (ImplicitOperand.isIdenticalTo(Operand))
- continue;
- if (Operand.isReg() && Operand.isImplicit()) {
- return error(Operands[J].Begin,
- Twine("expected an implicit register operand '") +
- printImplicitRegisterFlag(ImplicitOperand) + " %" +
- getRegisterName(TRI, ImplicitOperand.getReg()) + "'");
- }
- }
- // TODO: Fix source location when Operands[J].end is right before '=', i.e:
- // insead of reporting an error at this location:
- // %eax = MOV32r0
- // ^
- // report the error at the following location:
- // %eax = MOV32r0
- // ^
- return error(J < Operands.size() ? Operands[J].End : Token.location(),
+ for (const auto &I : ImplicitOperands) {
+ if (isImplicitOperandIn(I, Operands))
+ continue;
+ return error(Operands.empty() ? Token.location() : Operands.back().End,
Twine("missing implicit register operand '") +
- printImplicitRegisterFlag(ImplicitOperands[I]) + " %" +
- getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'");
+ printImplicitRegisterFlag(I) + " %" +
+ getRegisterName(TRI, I.getReg()) + "'");
}
return false;
}
-bool MIParser::parseInstruction(unsigned &OpCode) {
+bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
+ if (Token.is(MIToken::kw_frame_setup)) {
+ Flags |= MachineInstr::FrameSetup;
+ lex();
+ }
if (Token.isNot(MIToken::Identifier))
return error("expected a machine instruction");
StringRef InstrName = Token.stringValue();
@@ -330,6 +810,7 @@ bool MIParser::parseRegister(unsigned &Reg) {
}
bool MIParser::parseRegisterFlag(unsigned &Flags) {
+ const unsigned OldFlags = Flags;
switch (Token.kind()) {
case MIToken::kw_implicit:
Flags |= RegState::Implicit;
@@ -337,6 +818,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
case MIToken::kw_implicit_define:
Flags |= RegState::ImplicitDefine;
break;
+ case MIToken::kw_def:
+ Flags |= RegState::Define;
+ break;
case MIToken::kw_dead:
Flags |= RegState::Dead;
break;
@@ -346,11 +830,22 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
case MIToken::kw_undef:
Flags |= RegState::Undef;
break;
- // TODO: report an error when we specify the same flag more than once.
- // TODO: parse the other register flags.
+ case MIToken::kw_internal:
+ Flags |= RegState::InternalRead;
+ break;
+ case MIToken::kw_early_clobber:
+ Flags |= RegState::EarlyClobber;
+ break;
+ case MIToken::kw_debug_use:
+ Flags |= RegState::Debug;
+ break;
default:
llvm_unreachable("The current token should be a register flag");
}
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' register flag");
lex();
return false;
}
@@ -368,7 +863,59 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
return false;
}
-bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
+bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
+ if (!consumeIfPresent(MIToken::kw_tied_def))
+ return error("expected 'tied-def' after '('");
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'tied-def'");
+ if (getUnsigned(TiedDefIdx))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ return false;
+}
+
+bool MIParser::assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+ if (!Operands[I].TiedDefIdx)
+ continue;
+ // The parser ensures that this operand is a register use, so we just have
+ // to check the tied-def operand.
+ unsigned DefIdx = Operands[I].TiedDefIdx.getValue();
+ if (DefIdx >= E)
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '" +
+ Twine(DefIdx) + "'; instruction has only ") +
+ Twine(E) + " operands");
+ const auto &DefOperand = Operands[DefIdx].Operand;
+ if (!DefOperand.isReg() || !DefOperand.isDef())
+ // FIXME: add note with the def operand.
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '") +
+ Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) +
+ " isn't a defined register");
+ // Check that the tied-def operand wasn't tied elsewhere.
+ for (const auto &TiedPair : TiedRegisterPairs) {
+ if (TiedPair.first == DefIdx)
+ return error(Operands[I].Begin,
+ Twine("the tied-def operand #") + Twine(DefIdx) +
+ " is already tied with another register operand");
+ }
+ TiedRegisterPairs.push_back(std::make_pair(DefIdx, I));
+ }
+ // FIXME: Verify that for non INLINEASM instructions, the def and use tied
+ // indices must be less than tied max.
+ for (const auto &TiedPair : TiedRegisterPairs)
+ MI.tieOperands(TiedPair.first, TiedPair.second);
+ return false;
+}
+
+bool MIParser::parseRegisterOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx,
+ bool IsDef) {
unsigned Reg;
unsigned Flags = IsDef ? RegState::Define : 0;
while (Token.isRegisterFlag()) {
@@ -385,10 +932,17 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
if (parseSubRegisterIndex(SubReg))
return true;
}
+ if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) {
+ unsigned Idx;
+ if (parseRegisterTiedDefIndex(Idx))
+ return true;
+ TiedDefIdx = Idx;
+ }
Dest = MachineOperand::CreateReg(
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
- /*isEarlyClobber=*/false, SubReg);
+ Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
+ Flags & RegState::InternalRead);
return false;
}
@@ -396,13 +950,55 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::IntegerLiteral));
const APSInt &Int = Token.integerValue();
if (Int.getMinSignedBits() > 64)
- // TODO: Replace this with an error when we can parse CIMM Machine Operands.
- llvm_unreachable("Can't parse large integer literals yet!");
+ return error("integer literal is too large to be an immediate operand");
Dest = MachineOperand::CreateImm(Int.getExtValue());
lex();
return false;
}
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C) {
+ auto Source = StringValue.str(); // The source has to be null terminated.
+ SMDiagnostic Err;
+ C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+ &IRSlots);
+ if (!C)
+ return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return false;
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
+ if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::IntegerType));
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C));
+ return false;
+}
+
+bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::FloatingPointLiteral))
+ return error("expected a floating point literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C));
+ return false;
+}
+
bool MIParser::getUnsigned(unsigned &Result) {
assert(Token.hasIntegerValue() && "Expected a token with an integer value");
const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
@@ -414,7 +1010,8 @@ bool MIParser::getUnsigned(unsigned &Result) {
}
bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
- assert(Token.is(MIToken::MachineBasicBlock));
+ assert(Token.is(MIToken::MachineBasicBlock) ||
+ Token.is(MIToken::MachineBasicBlockLabel));
unsigned Number;
if (getUnsigned(Number))
return true;
@@ -438,16 +1035,66 @@ bool MIParser::parseMBBOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+bool MIParser::parseStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::StackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.StackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.StackObjectSlots.end())
+ return error(Twine("use of undefined stack object '%stack.") + Twine(ID) +
+ "'");
+ StringRef Name;
+ if (const auto *Alloca =
+ MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+ Name = Alloca->getName();
+ if (!Token.stringValue().empty() && Token.stringValue() != Name)
+ return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
+ "' isn't '" + Token.stringValue() + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseFixedStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::FixedStackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.FixedStackObjectSlots.end())
+ return error(Twine("use of undefined fixed stack object '%fixed-stack.") +
+ Twine(ID) + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
switch (Token.kind()) {
case MIToken::NamedGlobalValue: {
- auto Name = Token.stringValue();
const Module *M = MF.getFunction()->getParent();
- if (const auto *GV = M->getNamedValue(Name)) {
- Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
- break;
- }
- return error(Twine("use of undefined global value '@") + Name + "'");
+ GV = M->getNamedValue(Token.stringValue());
+ if (!GV)
+ return error(Twine("use of undefined global value '") + Token.range() +
+ "'");
+ break;
}
case MIToken::GlobalValue: {
unsigned GVIdx;
@@ -456,36 +1103,323 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
if (GVIdx >= IRSlots.GlobalValues.size())
return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
"'");
- Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx],
- /*Offset=*/0);
+ GV = IRSlots.GlobalValues[GVIdx];
break;
}
default:
llvm_unreachable("The current token should be a global value");
}
- // TODO: Parse offset and target flags.
+ return false;
+}
+
+bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ lex();
+ Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ConstantPoolItem));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ConstantInfo = PFS.ConstantPoolSlots.find(ID);
+ if (ConstantInfo == PFS.ConstantPoolSlots.end())
+ return error("use of undefined constant '%const." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::JumpTableIndex));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID);
+ if (JumpTableEntryInfo == PFS.JumpTableSlots.end())
+ return error("use of undefined jump table '%jump-table." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second);
+ return false;
+}
+
+bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ExternalSymbol));
+ const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateES(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseMDNode(MDNode *&Node) {
+ assert(Token.is(MIToken::exclaim));
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto NodeInfo = IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo == IRSlots.MetadataNodes.end())
+ return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+ lex();
+ Node = NodeInfo->second.get();
+ return false;
+}
+
+bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
+ MDNode *Node = nullptr;
+ if (parseMDNode(Node))
+ return true;
+ Dest = MachineOperand::CreateMetadata(Node);
+ return false;
+}
+
+bool MIParser::parseCFIOffset(int &Offset) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected a cfi offset");
+ if (Token.integerValue().getMinSignedBits() > 32)
+ return error("expected a 32 bit integer (the cfi offset is too large)");
+ Offset = (int)Token.integerValue().getExtValue();
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIRegister(unsigned &Reg) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a cfi register");
+ unsigned LLVMReg;
+ if (parseRegister(LLVMReg))
+ return true;
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true);
+ if (DwarfReg < 0)
+ return error("invalid DWARF register");
+ Reg = (unsigned)DwarfReg;
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIOperand(MachineOperand &Dest) {
+ auto Kind = Token.kind();
+ lex();
+ auto &MMI = MF.getMMI();
+ int Offset;
+ unsigned Reg;
+ unsigned CFIIndex;
+ switch (Kind) {
+ case MIToken::kw_cfi_same_value:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_offset:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa_register:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_def_cfa_offset:
+ if (parseCFIOffset(Offset))
+ return true;
+ // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ // NB: MCCFIInstruction::createDefCfa negates the offset.
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+ break;
+ default:
+ // TODO: Parse the other CFI operands.
+ llvm_unreachable("The current token should be a cfi operand");
+ }
+ Dest = MachineOperand::CreateCFIIndex(CFIIndex);
+ return false;
+}
+
+bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRBlock: {
+ BB = dyn_cast_or_null<BasicBlock>(
+ F.getValueSymbolTable().lookup(Token.stringValue()));
+ if (!BB)
+ return error(Twine("use of undefined IR block '") + Token.range() + "'");
+ break;
+ }
+ case MIToken::IRBlock: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F));
+ if (!BB)
+ return error(Twine("use of undefined IR block '%ir-block.") +
+ Twine(SlotNumber) + "'");
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ return false;
+}
+
+bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_blockaddress));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue))
+ return error("expected a global value");
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ auto *F = dyn_cast<Function>(GV);
+ if (!F)
+ return error("expected an IR function reference");
+ lex();
+ if (expectAndConsume(MIToken::comma))
+ return true;
+ BasicBlock *BB = nullptr;
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+ return error("expected an IR block reference");
+ if (parseIRBlock(BB, *F))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_target_index));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target index");
+ int Index = 0;
+ if (getTargetIndex(Token.stringValue(), Index))
+ return error("use of undefined target index '" + Token.stringValue() + "'");
lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_liveout));
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg = 0;
+ if (parseRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegLiveOut(Mask);
return false;
}
-bool MIParser::parseMachineOperand(MachineOperand &Dest) {
+bool MIParser::parseMachineOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx) {
switch (Token.kind()) {
case MIToken::kw_implicit:
case MIToken::kw_implicit_define:
+ case MIToken::kw_def:
case MIToken::kw_dead:
case MIToken::kw_killed:
case MIToken::kw_undef:
+ case MIToken::kw_internal:
+ case MIToken::kw_early_clobber:
+ case MIToken::kw_debug_use:
case MIToken::underscore:
case MIToken::NamedRegister:
case MIToken::VirtualRegister:
- return parseRegisterOperand(Dest);
+ return parseRegisterOperand(Dest, TiedDefIdx);
case MIToken::IntegerLiteral:
return parseImmediateOperand(Dest);
+ case MIToken::IntegerType:
+ return parseTypedImmediateOperand(Dest);
+ case MIToken::kw_half:
+ case MIToken::kw_float:
+ case MIToken::kw_double:
+ case MIToken::kw_x86_fp80:
+ case MIToken::kw_fp128:
+ case MIToken::kw_ppc_fp128:
+ return parseFPImmediateOperand(Dest);
case MIToken::MachineBasicBlock:
return parseMBBOperand(Dest);
+ case MIToken::StackObject:
+ return parseStackObjectOperand(Dest);
+ case MIToken::FixedStackObject:
+ return parseFixedStackObjectOperand(Dest);
case MIToken::GlobalValue:
case MIToken::NamedGlobalValue:
return parseGlobalAddressOperand(Dest);
+ case MIToken::ConstantPoolItem:
+ return parseConstantPoolIndexOperand(Dest);
+ case MIToken::JumpTableIndex:
+ return parseJumpTableIndexOperand(Dest);
+ case MIToken::ExternalSymbol:
+ return parseExternalSymbolOperand(Dest);
+ case MIToken::exclaim:
+ return parseMetadataOperand(Dest);
+ case MIToken::kw_cfi_same_value:
+ case MIToken::kw_cfi_offset:
+ case MIToken::kw_cfi_def_cfa_register:
+ case MIToken::kw_cfi_def_cfa_offset:
+ case MIToken::kw_cfi_def_cfa:
+ return parseCFIOperand(Dest);
+ case MIToken::kw_blockaddress:
+ return parseBlockAddressOperand(Dest);
+ case MIToken::kw_target_index:
+ return parseTargetIndexOperand(Dest);
+ case MIToken::kw_liveout:
+ return parseLiveoutRegisterMaskOperand(Dest);
case MIToken::Error:
return true;
case MIToken::Identifier:
@@ -496,12 +1430,314 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest) {
}
// fallthrough
default:
- // TODO: parse the other machine operands.
+ // FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
}
return false;
}
+bool MIParser::parseMachineOperandAndTargetFlags(
+ MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+ unsigned TF = 0;
+ bool HasTargetFlags = false;
+ if (Token.is(MIToken::kw_target_flags)) {
+ HasTargetFlags = true;
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ if (getDirectTargetFlag(Token.stringValue(), TF)) {
+ if (getBitmaskTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ }
+ lex();
+ while (Token.is(MIToken::comma)) {
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ unsigned BitFlag = 0;
+ if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ // TODO: Report an error when using a duplicate bit target flag.
+ TF |= BitFlag;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ auto Loc = Token.location();
+ if (parseMachineOperand(Dest, TiedDefIdx))
+ return true;
+ if (!HasTargetFlags)
+ return false;
+ if (Dest.isReg())
+ return error(Loc, "register operands can't have target flags");
+ Dest.setTargetFlags(TF);
+ return false;
+}
+
+bool MIParser::parseOffset(int64_t &Offset) {
+ if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus))
+ return false;
+ StringRef Sign = Token.range();
+ bool IsNegative = Token.is(MIToken::minus);
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '" + Sign + "'");
+ if (Token.integerValue().getMinSignedBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Offset = Token.integerValue().getExtValue();
+ if (IsNegative)
+ Offset = -Offset;
+ lex();
+ return false;
+}
+
+bool MIParser::parseAlignment(unsigned &Alignment) {
+ assert(Token.is(MIToken::kw_align));
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected an integer literal after 'align'");
+ if (getUnsigned(Alignment))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseOperandsOffset(MachineOperand &Op) {
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Op.setOffset(Offset);
+ return false;
+}
+
+bool MIParser::parseIRValue(const Value *&V) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRValue: {
+ V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+ break;
+ }
+ case MIToken::IRValue: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ V = getIRValue(SlotNumber);
+ break;
+ }
+ case MIToken::NamedGlobalValue:
+ case MIToken::GlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ V = GV;
+ break;
+ }
+ case MIToken::QuotedIRValue: {
+ const Constant *C = nullptr;
+ if (parseIRConstant(Token.location(), Token.stringValue(), C))
+ return true;
+ V = C;
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ if (!V)
+ return error(Twine("use of undefined IR value '") + Token.range() + "'");
+ return false;
+}
+
+bool MIParser::getUint64(uint64_t &Result) {
+ assert(Token.hasIntegerValue());
+ if (Token.integerValue().getActiveBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = Token.integerValue().getZExtValue();
+ return false;
+}
+
+bool MIParser::parseMemoryOperandFlag(unsigned &Flags) {
+ const unsigned OldFlags = Flags;
+ switch (Token.kind()) {
+ case MIToken::kw_volatile:
+ Flags |= MachineMemOperand::MOVolatile;
+ break;
+ case MIToken::kw_non_temporal:
+ Flags |= MachineMemOperand::MONonTemporal;
+ break;
+ case MIToken::kw_invariant:
+ Flags |= MachineMemOperand::MOInvariant;
+ break;
+ // TODO: parse the target specific memory operand flags.
+ default:
+ llvm_unreachable("The current token should be a memory operand flag");
+ }
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' memory operand flag");
+ lex();
+ return false;
+}
+
+bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
+ switch (Token.kind()) {
+ case MIToken::kw_stack:
+ PSV = MF.getPSVManager().getStack();
+ break;
+ case MIToken::kw_got:
+ PSV = MF.getPSVManager().getGOT();
+ break;
+ case MIToken::kw_jump_table:
+ PSV = MF.getPSVManager().getJumpTable();
+ break;
+ case MIToken::kw_constant_pool:
+ PSV = MF.getPSVManager().getConstantPool();
+ break;
+ case MIToken::FixedStackObject: {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
+ case MIToken::kw_call_entry: {
+ lex();
+ switch (Token.kind()) {
+ case MIToken::GlobalValue:
+ case MIToken::NamedGlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ PSV = MF.getPSVManager().getGlobalValueCallEntry(GV);
+ break;
+ }
+ case MIToken::ExternalSymbol:
+ PSV = MF.getPSVManager().getExternalSymbolCallEntry(
+ MF.createExternalSymbolName(Token.stringValue()));
+ break;
+ default:
+ return error(
+ "expected a global value or an external symbol after 'call-entry'");
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be pseudo source value");
+ }
+ lex();
+ return false;
+}
+
+bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
+ if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
+ Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
+ Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) {
+ const PseudoSourceValue *PSV = nullptr;
+ if (parseMemoryPseudoSourceValue(PSV))
+ return true;
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(PSV, Offset);
+ return false;
+ }
+ if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) &&
+ Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue) &&
+ Token.isNot(MIToken::QuotedIRValue))
+ return error("expected an IR value reference");
+ const Value *V = nullptr;
+ if (parseIRValue(V))
+ return true;
+ if (!V->getType()->isPointerTy())
+ return error("expected a pointer IR value");
+ lex();
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(V, Offset);
+ return false;
+}
+
+bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ unsigned Flags = 0;
+ while (Token.isMemoryOperandFlag()) {
+ if (parseMemoryOperandFlag(Flags))
+ return true;
+ }
+ if (Token.isNot(MIToken::Identifier) ||
+ (Token.stringValue() != "load" && Token.stringValue() != "store"))
+ return error("expected 'load' or 'store' memory operation");
+ if (Token.stringValue() == "load")
+ Flags |= MachineMemOperand::MOLoad;
+ else
+ Flags |= MachineMemOperand::MOStore;
+ lex();
+
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected the size integer literal after memory operation");
+ uint64_t Size;
+ if (getUint64(Size))
+ return true;
+ lex();
+
+ const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+ if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word)
+ return error(Twine("expected '") + Word + "'");
+ lex();
+
+ MachinePointerInfo Ptr = MachinePointerInfo();
+ if (parseMachinePointerInfo(Ptr))
+ return true;
+ unsigned BaseAlignment = Size;
+ AAMDNodes AAInfo;
+ MDNode *Range = nullptr;
+ while (consumeIfPresent(MIToken::comma)) {
+ switch (Token.kind()) {
+ case MIToken::kw_align:
+ if (parseAlignment(BaseAlignment))
+ return true;
+ break;
+ case MIToken::md_tbaa:
+ lex();
+ if (parseMDNode(AAInfo.TBAA))
+ return true;
+ break;
+ case MIToken::md_alias_scope:
+ lex();
+ if (parseMDNode(AAInfo.Scope))
+ return true;
+ break;
+ case MIToken::md_noalias:
+ lex();
+ if (parseMDNode(AAInfo.NoAlias))
+ return true;
+ break;
+ case MIToken::md_range:
+ lex();
+ if (parseMDNode(Range))
+ return true;
+ break;
+ // TODO: Report an error on duplicate metadata nodes.
+ default:
+ return error("expected 'align' or '!tbaa' or '!alias.scope' or "
+ "'!noalias' or '!range'");
+ }
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest =
+ MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+ return false;
+}
+
void MIParser::initNames2InstrOpCodes() {
if (!Names2InstrOpCodes.empty())
return;
@@ -583,18 +1819,162 @@ unsigned MIParser::getSubRegIndex(StringRef Name) {
return SubRegInfo->getValue();
}
-bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI);
+static void initSlots2BasicBlocks(
+ const Function &F,
+ DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (auto &BB : F) {
+ if (BB.hasName())
+ continue;
+ int Slot = MST.getLocalSlot(&BB);
+ if (Slot == -1)
+ continue;
+ Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB));
+ }
+}
+
+static const BasicBlock *getIRBlockFromSlot(
+ unsigned Slot,
+ const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ auto BlockInfo = Slots2BasicBlocks.find(Slot);
+ if (BlockInfo == Slots2BasicBlocks.end())
+ return nullptr;
+ return BlockInfo->second;
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
+ if (Slots2BasicBlocks.empty())
+ initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
+ if (&F == MF.getFunction())
+ return getIRBlock(Slot);
+ DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
+ initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
+}
+
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ int Slot = MST.getLocalSlot(V);
+ if (Slot == -1)
+ return;
+ Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &Arg : F.args())
+ mapValueToSlot(&Arg, MST, Slots2Values);
+ for (const auto &BB : F) {
+ mapValueToSlot(&BB, MST, Slots2Values);
+ for (const auto &I : BB)
+ mapValueToSlot(&I, MST, Slots2Values);
+ }
+}
+
+const Value *MIParser::getIRValue(unsigned Slot) {
+ if (Slots2Values.empty())
+ initSlots2Values(*MF.getFunction(), Slots2Values);
+ auto ValueInfo = Slots2Values.find(Slot);
+ if (ValueInfo == Slots2Values.end())
+ return nullptr;
+ return ValueInfo->second;
+}
+
+void MIParser::initNames2TargetIndices() {
+ if (!Names2TargetIndices.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices)
+ Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getTargetIndex(StringRef Name, int &Index) {
+ initNames2TargetIndices();
+ auto IndexInfo = Names2TargetIndices.find(Name);
+ if (IndexInfo == Names2TargetIndices.end())
+ return true;
+ Index = IndexInfo->second;
+ return false;
+}
+
+void MIParser::initNames2DirectTargetFlags() {
+ if (!Names2DirectTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2DirectTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
+ initNames2DirectTargetFlags();
+ auto FlagInfo = Names2DirectTargetFlags.find(Name);
+ if (FlagInfo == Names2DirectTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void MIParser::initNames2BitmaskTargetFlags() {
+ if (!Names2BitmaskTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2BitmaskTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
+ initNames2BitmaskTargetFlags();
+ auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+ if (FlagInfo == Names2BitmaskTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
+ PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ SourceMgr SM;
+ SM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseBasicBlockDefinitions(PFS.MBBSlots);
+}
+
+bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ SourceMgr SM;
+ SM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks();
}
bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
MachineFunction &MF, StringRef Src,
const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB);
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB);
}
bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
@@ -602,5 +1982,30 @@ bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots,
SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg);
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseStandaloneNamedRegister(Reg);
+}
+
+bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseStandaloneVirtualRegister(Reg);
+}
+
+bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+ .parseStandaloneStackObject(FI);
+}
+
+bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
+ StringRef Src, const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node);
}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index fca4c4e..8aef704 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -19,9 +19,11 @@
namespace llvm {
+class BasicBlock;
class MachineBasicBlock;
class MachineInstr;
class MachineFunction;
+class MDNode;
struct SlotMapping;
class SMDiagnostic;
class SourceMgr;
@@ -29,11 +31,42 @@ class SourceMgr;
struct PerFunctionMIParsingState {
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+ DenseMap<unsigned, int> FixedStackObjectSlots;
+ DenseMap<unsigned, int> StackObjectSlots;
+ DenseMap<unsigned, unsigned> ConstantPoolSlots;
+ DenseMap<unsigned, unsigned> JumpTableSlots;
};
-bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF,
- StringRef Src, const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error);
+/// Parse the machine basic block definitions, and skip the machine
+/// instructions.
+///
+/// This function runs the first parsing pass on the machine function's body.
+/// It parses only the machine basic block definitions and creates the machine
+/// basic blocks in the given machine function.
+///
+/// The machine instructions aren't parsed during the first pass because all
+/// the machine basic blocks aren't defined yet - this makes it impossible to
+/// resolve the machine basic block references.
+///
+/// Return true if an error occurred.
+bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
+ PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error);
+
+/// Parse the machine instructions.
+///
+/// This function runs the second parsing pass on the machine function's body.
+/// It skips the machine basic block definitions and parses only the machine
+/// instructions and basic block attributes like liveins and successors.
+///
+/// The second parsing pass assumes that the first parsing pass already ran
+/// on the given source string.
+///
+/// Return true if an error occurred.
+bool parseMachineInstructions(MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error);
bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
MachineFunction &MF, StringRef Src,
@@ -46,6 +79,21 @@ bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
const SlotMapping &IRSlots,
SMDiagnostic &Error);
+bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error);
+
+bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF,
+ StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error);
+
+bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
+ StringRef Src, const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 16b0e16..422efbc 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -20,8 +20,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
@@ -95,30 +97,53 @@ public:
/// Return true if error occurred.
bool initializeMachineFunction(MachineFunction &MF);
- /// Initialize the machine basic block using it's YAML representation.
- ///
- /// Return true if an error occurred.
- bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
- const yaml::MachineBasicBlock &YamlMBB,
- const PerFunctionMIParsingState &PFS);
+ bool initializeRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS);
+
+ void inferRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeFrameInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS);
+
+ bool parseCalleeSavedRegister(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource,
+ int FrameIdx);
+
+ bool parseStackObjectsDebugInfo(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS,
+ const yaml::MachineStackObject &Object,
+ int FrameIdx);
- bool
- initializeRegisterInfo(const MachineFunction &MF,
- MachineRegisterInfo &RegInfo,
- const yaml::MachineFunction &YamlMF,
- DenseMap<unsigned, unsigned> &VirtualRegisterSlots);
+ bool initializeConstantPool(MachineConstantPool &ConstantPool,
+ const yaml::MachineFunction &YamlMF,
+ const MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots);
- bool initializeFrameInfo(MachineFrameInfo &MFI,
- const yaml::MachineFunction &YamlMF);
+ bool initializeJumpTableInfo(MachineFunction &MF,
+ const yaml::MachineJumpTable &YamlJTI,
+ PerFunctionMIParsingState &PFS);
private:
+ bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
+ MachineFunction &MF, const PerFunctionMIParsingState &PFS);
+
+ bool parseMBBReference(MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source, MachineFunction &MF,
+ const PerFunctionMIParsingState &PFS);
+
/// Return a MIR diagnostic converted from an MI string diagnostic.
SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
SMRange SourceRange);
- /// Return a MIR diagnostic converted from an LLVM assembly diagnostic.
- SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
- SMRange SourceRange);
+ /// Return a MIR diagnostic converted from a diagnostic located in a YAML
+ /// block scalar string.
+ SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
/// Create an empty function with the given name.
void createDummyFunction(StringRef Name, Module &M);
@@ -200,7 +225,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
Context, &IRSlots);
if (!M) {
- reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange()));
+ reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
return M;
}
In.nextDocument();
@@ -261,88 +286,56 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasInlineAsm(YamlMF.HasInlineAsm);
PerFunctionMIParsingState PFS;
- if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF,
- PFS.VirtualRegisterSlots))
- return true;
- if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF))
+ if (initializeRegisterInfo(MF, YamlMF, PFS))
return true;
-
- const auto &F = *MF.getFunction();
- for (const auto &YamlMBB : YamlMF.BasicBlocks) {
- const BasicBlock *BB = nullptr;
- const yaml::StringValue &Name = YamlMBB.Name;
- if (!Name.Value.empty()) {
- BB = dyn_cast_or_null<BasicBlock>(
- F.getValueSymbolTable().lookup(Name.Value));
- if (!BB)
- return error(Name.SourceRange.Start,
- Twine("basic block '") + Name.Value +
- "' is not defined in the function '" + MF.getName() +
- "'");
- }
- auto *MBB = MF.CreateMachineBasicBlock(BB);
- MF.insert(MF.end(), MBB);
- bool WasInserted =
- PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
- if (!WasInserted)
- return error(Twine("redefinition of machine basic block with id #") +
- Twine(YamlMBB.ID));
- }
-
- if (YamlMF.BasicBlocks.empty())
- return error(Twine("machine function '") + Twine(MF.getName()) +
- "' requires at least one machine basic block in its body");
- // Initialize the machine basic blocks after creating them all so that the
- // machine instructions parser can resolve the MBB references.
- unsigned I = 0;
- for (const auto &YamlMBB : YamlMF.BasicBlocks) {
- if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
- PFS))
+ if (!YamlMF.Constants.empty()) {
+ auto *ConstantPool = MF.getConstantPool();
+ assert(ConstantPool && "Constant pool must be created");
+ if (initializeConstantPool(*ConstantPool, YamlMF, MF,
+ PFS.ConstantPoolSlots))
return true;
}
- return false;
-}
-bool MIRParserImpl::initializeMachineBasicBlock(
- MachineFunction &MF, MachineBasicBlock &MBB,
- const yaml::MachineBasicBlock &YamlMBB,
- const PerFunctionMIParsingState &PFS) {
- MBB.setAlignment(YamlMBB.Alignment);
- if (YamlMBB.AddressTaken)
- MBB.setHasAddressTaken();
- MBB.setIsLandingPad(YamlMBB.IsLandingPad);
SMDiagnostic Error;
- // Parse the successors.
- for (const auto &MBBSource : YamlMBB.Successors) {
- MachineBasicBlock *SuccMBB = nullptr;
- if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots,
- Error))
- return error(Error, MBBSource.SourceRange);
- // TODO: Report an error when adding the same successor more than once.
- MBB.addSuccessor(SuccMBB);
- }
- // Parse the liveins.
- for (const auto &LiveInSource : YamlMBB.LiveIns) {
- unsigned Reg = 0;
- if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS,
- IRSlots, Error))
- return error(Error, LiveInSource.SourceRange);
- MBB.addLiveIn(Reg);
+ if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS,
+ IRSlots, Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
}
- // Parse the instructions.
- for (const auto &MISource : YamlMBB.Instructions) {
- MachineInstr *MI = nullptr;
- if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error))
- return error(Error, MISource.SourceRange);
- MBB.insert(MBB.end(), MI);
+
+ if (MF.empty())
+ return error(Twine("machine function '") + Twine(MF.getName()) +
+ "' requires at least one machine basic block in its body");
+ // Initialize the frame information after creating all the MBBs so that the
+ // MBB references in the frame information can be resolved.
+ if (initializeFrameInfo(MF, YamlMF, PFS))
+ return true;
+ // Initialize the jump table after creating all the MBBs so that the MBB
+ // references can be resolved.
+ if (!YamlMF.JumpTableInfo.Entries.empty() &&
+ initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS))
+ return true;
+ // Parse the machine instructions after creating all of the MBBs so that the
+ // parser can resolve the MBB references.
+ if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots,
+ Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
}
+ inferRegisterInfo(MF, YamlMF);
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MF.getRegInfo().freezeReservedRegs(MF);
+ MF.verify();
return false;
}
-bool MIRParserImpl::initializeRegisterInfo(
- const MachineFunction &MF, MachineRegisterInfo &RegInfo,
- const yaml::MachineFunction &YamlMF,
- DenseMap<unsigned, unsigned> &VirtualRegisterSlots) {
+bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
assert(RegInfo.isSSA());
if (!YamlMF.IsSSA)
RegInfo.leaveSSA();
@@ -351,6 +344,7 @@ bool MIRParserImpl::initializeRegisterInfo(
RegInfo.invalidateLiveness();
RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
+ SMDiagnostic Error;
// Parse the virtual register information.
for (const auto &VReg : YamlMF.VirtualRegisters) {
const auto *RC = getRegClass(MF, VReg.Class.Value);
@@ -359,15 +353,71 @@ bool MIRParserImpl::initializeRegisterInfo(
Twine("use of undefined register class '") +
VReg.Class.Value + "'");
unsigned Reg = RegInfo.createVirtualRegister(RC);
- // TODO: Report an error when the same virtual register with the same ID is
- // redefined.
- VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg));
+ if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
+ .second)
+ return error(VReg.ID.SourceRange.Start,
+ Twine("redefinition of virtual register '%") +
+ Twine(VReg.ID.Value) + "'");
+ if (!VReg.PreferredRegister.Value.empty()) {
+ unsigned PreferredReg = 0;
+ if (parseNamedRegisterReference(PreferredReg, SM, MF,
+ VReg.PreferredRegister.Value, PFS,
+ IRSlots, Error))
+ return error(Error, VReg.PreferredRegister.SourceRange);
+ RegInfo.setSimpleHint(Reg, PreferredReg);
+ }
}
+
+ // Parse the liveins.
+ for (const auto &LiveIn : YamlMF.LiveIns) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS,
+ IRSlots, Error))
+ return error(Error, LiveIn.Register.SourceRange);
+ unsigned VReg = 0;
+ if (!LiveIn.VirtualRegister.Value.empty()) {
+ if (parseVirtualRegisterReference(
+ VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error))
+ return error(Error, LiveIn.VirtualRegister.SourceRange);
+ }
+ RegInfo.addLiveIn(Reg, VReg);
+ }
+
+ // Parse the callee saved register mask.
+ BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
+ if (!YamlMF.CalleeSavedRegisters)
+ return false;
+ for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots,
+ Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisterMask[Reg] = true;
+ }
+ RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
return false;
}
-bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
- const yaml::MachineFunction &YamlMF) {
+void MIRParserImpl::inferRegisterInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF) {
+ if (YamlMF.CalleeSavedRegisters)
+ return;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask());
+ }
+ }
+ }
+}
+
+bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
+ const yaml::MachineFunction &YamlMF,
+ PerFunctionMIParsingState &PFS) {
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const Function &F = *MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
@@ -383,7 +433,20 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+ if (!YamlMFI.SavePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS))
+ return true;
+ MFI.setSavePoint(MBB);
+ }
+ if (!YamlMFI.RestorePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS))
+ return true;
+ MFI.setRestorePoint(MBB);
+ }
+ std::vector<CalleeSavedInfo> CSIInfo;
// Initialize the fixed frame objects.
for (const auto &Object : YamlMF.FixedStackObjects) {
int ObjectIdx;
@@ -393,27 +456,190 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
else
ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
- // TODO: Store the mapping between fixed object IDs and object indices to
- // parse fixed stack object references correctly.
+ if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
+ ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of fixed stack object '%fixed-stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+ ObjectIdx))
+ return true;
}
// Initialize the ordinary frame objects.
for (const auto &Object : YamlMF.StackObjects) {
int ObjectIdx;
+ const AllocaInst *Alloca = nullptr;
+ const yaml::StringValue &Name = Object.Name;
+ if (!Name.Value.empty()) {
+ Alloca = dyn_cast_or_null<AllocaInst>(
+ F.getValueSymbolTable().lookup(Name.Value));
+ if (!Alloca)
+ return error(Name.SourceRange.Start,
+ "alloca instruction named '" + Name.Value +
+ "' isn't defined in the function '" + F.getName() +
+ "'");
+ }
if (Object.Type == yaml::MachineStackObject::VariableSized)
- ObjectIdx =
- MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr);
+ ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
else
ObjectIdx = MFI.CreateStackObject(
Object.Size, Object.Alignment,
- Object.Type == yaml::MachineStackObject::SpillSlot);
+ Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
MFI.setObjectOffset(ObjectIdx, Object.Offset);
- // TODO: Store the mapping between object IDs and object indices to parse
- // stack object references correctly.
+ if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of stack object '%stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+ ObjectIdx))
+ return true;
+ if (Object.LocalOffset)
+ MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
+ if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx))
+ return true;
+ }
+ MFI.setCalleeSavedInfo(CSIInfo);
+ if (!CSIInfo.empty())
+ MFI.setCalleeSavedInfoValid(true);
+
+ // Initialize the various stack object references after initializing the
+ // stack objects.
+ if (!YamlMFI.StackProtector.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS,
+ IRSlots, Error))
+ return error(Error, YamlMFI.StackProtector.SourceRange);
+ MFI.setStackProtectorIndex(FI);
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseCalleeSavedRegister(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource, int FrameIdx) {
+ if (RegisterSource.Value.empty())
+ return false;
+ unsigned Reg = 0;
+ SMDiagnostic Error;
+ if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS,
+ IRSlots, Error))
+ return error(Error, RegisterSource.SourceRange);
+ CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
+ return false;
+}
+
+/// Verify that given node is of a certain type. Return true on error.
+template <typename T>
+static bool typecheckMDNode(T *&Result, MDNode *Node,
+ const yaml::StringValue &Source,
+ StringRef TypeString, MIRParserImpl &Parser) {
+ if (!Node)
+ return false;
+ Result = dyn_cast<T>(Node);
+ if (!Result)
+ return Parser.error(Source.SourceRange.Start,
+ "expected a reference to a '" + TypeString +
+ "' metadata node");
+ return false;
+}
+
+bool MIRParserImpl::parseStackObjectsDebugInfo(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const yaml::MachineStackObject &Object, int FrameIdx) {
+ // Debug information can only be attached to stack objects; Fixed stack
+ // objects aren't supported.
+ assert(FrameIdx >= 0 && "Expected a stack object frame index");
+ MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
+ if (parseMDNode(Var, Object.DebugVar, MF, PFS) ||
+ parseMDNode(Expr, Object.DebugExpr, MF, PFS) ||
+ parseMDNode(Loc, Object.DebugLoc, MF, PFS))
+ return true;
+ if (!Var && !Expr && !Loc)
+ return false;
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) ||
+ typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
+ typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
+ return true;
+ MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+ return false;
+}
+
+bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
+ MachineFunction &MF,
+ const PerFunctionMIParsingState &PFS) {
+ if (Source.Value.empty())
+ return false;
+ SMDiagnostic Error;
+ if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::initializeConstantPool(
+ MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF,
+ const MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots) {
+ const auto &M = *MF.getFunction()->getParent();
+ SMDiagnostic Error;
+ for (const auto &YamlConstant : YamlMF.Constants) {
+ const Constant *Value = dyn_cast_or_null<Constant>(
+ parseConstantValue(YamlConstant.Value.Value, Error, M));
+ if (!Value)
+ return error(Error, YamlConstant.Value.SourceRange);
+ unsigned Alignment =
+ YamlConstant.Alignment
+ ? YamlConstant.Alignment
+ : M.getDataLayout().getPrefTypeAlignment(Value->getType());
+ unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
+ if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
+ .second)
+ return error(YamlConstant.ID.SourceRange.Start,
+ Twine("redefinition of constant pool item '%const.") +
+ Twine(YamlConstant.ID.Value) + "'");
}
return false;
}
+bool MIRParserImpl::initializeJumpTableInfo(
+ MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI,
+ PerFunctionMIParsingState &PFS) {
+ MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
+ for (const auto &Entry : YamlJTI.Entries) {
+ std::vector<MachineBasicBlock *> Blocks;
+ for (const auto &MBBSource : Entry.Blocks) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB, MBBSource.Value, MF, PFS))
+ return true;
+ Blocks.push_back(MBB);
+ }
+ unsigned Index = JTI->createJumpTableIndex(Blocks);
+ if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index))
+ .second)
+ return error(Entry.ID.SourceRange.Start,
+ Twine("redefinition of jump table entry '%jump-table.") +
+ Twine(Entry.ID.Value) + "'");
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source,
+ MachineFunction &MF,
+ const PerFunctionMIParsingState &PFS) {
+ SMDiagnostic Error;
+ if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
SMRange SourceRange) {
assert(SourceRange.isValid() && "Invalid source range");
@@ -430,8 +656,8 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
Error.getFixIts());
}
-SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
- SMRange SourceRange) {
+SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
assert(SourceRange.isValid());
// Translate the location of the error from the location in the llvm IR string
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index d5cf924..175cb0d 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -14,13 +14,20 @@
#include "MIRPrinter.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/YAMLTraits.h"
@@ -31,11 +38,38 @@ using namespace llvm;
namespace {
+/// This structure describes how to print out stack object references.
+struct FrameIndexOperand {
+ std::string Name;
+ unsigned ID;
+ bool IsFixed;
+
+ FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed)
+ : Name(Name.str()), ID(ID), IsFixed(IsFixed) {}
+
+ /// Return an ordinary stack object reference.
+ static FrameIndexOperand create(StringRef Name, unsigned ID) {
+ return FrameIndexOperand(Name, ID, /*IsFixed=*/false);
+ }
+
+ /// Return a fixed stack object reference.
+ static FrameIndexOperand createFixed(unsigned ID) {
+ return FrameIndexOperand("", ID, /*IsFixed=*/true);
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
/// This class prints out the machine functions using the MIR serialization
/// format.
class MIRPrinter {
raw_ostream &OS;
DenseMap<const uint32_t *, unsigned> RegisterMaskIds;
+ /// Maps from stack object indices to operand indices which will be used when
+ /// printing frame index machine operands.
+ DenseMap<int, FrameIndexOperand> StackObjectOperandMapping;
public:
MIRPrinter(raw_ostream &OS) : OS(OS) {}
@@ -44,11 +78,16 @@ public:
void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI);
- void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI);
- void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB,
- const MachineBasicBlock &MBB);
+ void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI);
+ void convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool);
+ void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI);
void convertStackObjects(yaml::MachineFunction &MF,
- const MachineFrameInfo &MFI);
+ const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
+ ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -60,18 +99,32 @@ class MIPrinter {
raw_ostream &OS;
ModuleSlotTracker &MST;
const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
public:
MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
- const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds)
- : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {}
+ const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds,
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping)
+ : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds),
+ StackObjectOperandMapping(StackObjectOperandMapping) {}
+
+ void print(const MachineBasicBlock &MBB);
void print(const MachineInstr &MI);
void printMBBReference(const MachineBasicBlock &MBB);
- void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
+ void printIRBlockReference(const BasicBlock &BB);
+ void printIRValueReference(const Value &V);
+ void printStackObjectReference(int FrameIndex);
+ void printOffset(int64_t Offset);
+ void printTargetFlags(const MachineOperand &Op);
+ void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+ unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false);
+ void print(const MachineMemOperand &Op);
+
+ void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
};
-} // end anonymous namespace
+} // end namespace llvm
namespace llvm {
namespace yaml {
@@ -103,6 +156,12 @@ static void printReg(unsigned Reg, raw_ostream &OS,
llvm_unreachable("Can't print this kind of register yet");
}
+static void printReg(unsigned Reg, yaml::StringValue &Dest,
+ const TargetRegisterInfo *TRI) {
+ raw_string_ostream OS(Dest.Value);
+ printReg(Reg, OS, TRI);
+}
+
void MIRPrinter::print(const MachineFunction &MF) {
initRegisterMaskIds(MF);
@@ -112,23 +171,25 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasInlineAsm = MF.hasInlineAsm();
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
- convert(YamlMF.FrameInfo, *MF.getFrameInfo());
- convertStackObjects(YamlMF, *MF.getFrameInfo());
-
- int I = 0;
ModuleSlotTracker MST(MF.getFunction()->getParent());
+ MST.incorporateFunction(*MF.getFunction());
+ convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
+ convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
+ MF.getSubtarget().getRegisterInfo());
+ if (const auto *ConstantPool = MF.getConstantPool())
+ convert(YamlMF, *ConstantPool);
+ if (const auto *JumpTableInfo = MF.getJumpTableInfo())
+ convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+ raw_string_ostream StrOS(YamlMF.Body.Value.Value);
+ bool IsNewlineNeeded = false;
for (const auto &MBB : MF) {
- // TODO: Allow printing of non sequentially numbered MBBs.
- // This is currently needed as the basic block references get their index
- // from MBB.getNumber(), thus it should be sequential so that the parser can
- // map back to the correct MBBs when parsing the output.
- assert(MBB.getNumber() == I++ &&
- "Can't print MBBs that aren't sequentially numbered");
- (void)I;
- yaml::MachineBasicBlock YamlMBB;
- convert(MST, YamlMBB, MBB);
- YamlMF.BasicBlocks.push_back(YamlMBB);
+ if (IsNewlineNeeded)
+ StrOS << "\n";
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .print(MBB);
+ IsNewlineNeeded = true;
}
+ StrOS.flush();
yaml::Output Out(OS);
Out << YamlMF;
}
@@ -147,11 +208,38 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
VReg.ID = I;
VReg.Class =
StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
+ if (PreferredReg)
+ printReg(PreferredReg, VReg.PreferredRegister, TRI);
MF.VirtualRegisters.push_back(VReg);
}
+
+ // Print the live ins.
+ for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) {
+ yaml::MachineFunctionLiveIn LiveIn;
+ printReg(I->first, LiveIn.Register, TRI);
+ if (I->second)
+ printReg(I->second, LiveIn.VirtualRegister, TRI);
+ MF.LiveIns.push_back(LiveIn);
+ }
+ // The used physical register mask is printed as an inverted callee saved
+ // register mask.
+ const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
+ if (UsedPhysRegMask.none())
+ return;
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
+ if (!UsedPhysRegMask[I]) {
+ yaml::FlowStringValue Reg;
+ printReg(I, Reg, TRI);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ }
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
-void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineFrameInfo &YamlMFI,
const MachineFrameInfo &MFI) {
YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
@@ -166,10 +254,23 @@ void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+ if (MFI.getSavePoint()) {
+ raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MFI.getSavePoint());
+ }
+ if (MFI.getRestorePoint()) {
+ raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MFI.getRestorePoint());
+ }
}
void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
- const MachineFrameInfo &MFI) {
+ const MachineFrameInfo &MFI,
+ MachineModuleInfo &MMI,
+ ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI) {
// Process fixed stack objects.
unsigned ID = 0;
for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
@@ -177,7 +278,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
continue;
yaml::FixedMachineStackObject YamlObject;
- YamlObject.ID = ID++;
+ YamlObject.ID = ID;
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::FixedMachineStackObject::SpillSlot
: yaml::FixedMachineStackObject::DefaultType;
@@ -187,8 +288,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
MF.FixedStackObjects.push_back(YamlObject);
- // TODO: Store the mapping between fixed object IDs and object indices to
- // print the fixed stack object references correctly.
+ StackObjectOperandMapping.insert(
+ std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
}
// Process ordinary stack objects.
@@ -198,7 +299,10 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
continue;
yaml::MachineStackObject YamlObject;
- YamlObject.ID = ID++;
+ YamlObject.ID = ID;
+ if (const auto *Alloca = MFI.getObjectAllocation(I))
+ YamlObject.Name.Value =
+ Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>";
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::MachineStackObject::SpillSlot
: MFI.isVariableSizedObjectIndex(I)
@@ -209,47 +313,100 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
YamlObject.Alignment = MFI.getObjectAlignment(I);
MF.StackObjects.push_back(YamlObject);
- // TODO: Store the mapping between object IDs and object indices to print
- // the stack object references correctly.
+ StackObjectOperandMapping.insert(std::make_pair(
+ I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+ }
+
+ for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+ yaml::StringValue Reg;
+ printReg(CSInfo.getReg(), Reg, TRI);
+ auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ if (StackObject.IsFixed)
+ MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ else
+ MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ }
+ for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
+ auto LocalObject = MFI.getLocalFrameObjectMap(I);
+ auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first);
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
+ MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+ }
+
+ // Print the stack object references in the frame information class after
+ // converting the stack objects.
+ if (MFI.hasStackProtectorIndex()) {
+ raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getStackProtectorIndex());
+ }
+
+ // Print the debug variable information.
+ for (MachineModuleInfo::VariableDbgInfo &DebugVar :
+ MMI.getVariableDbgInfo()) {
+ auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
+ auto &Object = MF.StackObjects[StackObject.ID];
+ {
+ raw_string_ostream StrOS(Object.DebugVar.Value);
+ DebugVar.Var->printAsOperand(StrOS, MST);
+ }
+ {
+ raw_string_ostream StrOS(Object.DebugExpr.Value);
+ DebugVar.Expr->printAsOperand(StrOS, MST);
+ }
+ {
+ raw_string_ostream StrOS(Object.DebugLoc.Value);
+ DebugVar.Loc->printAsOperand(StrOS, MST);
+ }
}
}
-void MIRPrinter::convert(ModuleSlotTracker &MST,
- yaml::MachineBasicBlock &YamlMBB,
- const MachineBasicBlock &MBB) {
- assert(MBB.getNumber() >= 0 && "Invalid MBB number");
- YamlMBB.ID = (unsigned)MBB.getNumber();
- // TODO: Serialize unnamed BB references.
- if (const auto *BB = MBB.getBasicBlock())
- YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>";
- else
- YamlMBB.Name.Value = "";
- YamlMBB.Alignment = MBB.getAlignment();
- YamlMBB.AddressTaken = MBB.hasAddressTaken();
- YamlMBB.IsLandingPad = MBB.isLandingPad();
- for (const auto *SuccMBB : MBB.successors()) {
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool) {
+ unsigned ID = 0;
+ for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
+ // TODO: Serialize target specific constant pool entries.
+ if (Constant.isMachineConstantPoolEntry())
+ llvm_unreachable("Can't print target specific constant pool entries yet");
+
+ yaml::MachineConstantPoolValue YamlConstant;
std::string Str;
raw_string_ostream StrOS(Str);
- MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB);
- YamlMBB.Successors.push_back(StrOS.str());
+ Constant.Val.ConstVal->printAsOperand(StrOS);
+ YamlConstant.ID = ID++;
+ YamlConstant.Value = StrOS.str();
+ YamlConstant.Alignment = Constant.getAlignment();
+ MF.Constants.push_back(YamlConstant);
}
- // Print the live in registers.
- const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) {
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI) {
+ YamlJTI.Kind = JTI.getEntryKind();
+ unsigned ID = 0;
+ for (const auto &Table : JTI.getJumpTables()) {
std::string Str;
- raw_string_ostream StrOS(Str);
- printReg(*I, StrOS, TRI);
- YamlMBB.LiveIns.push_back(StrOS.str());
- }
- // Print the machine instructions.
- YamlMBB.Instructions.reserve(MBB.size());
- std::string Str;
- for (const auto &MI : MBB) {
- raw_string_ostream StrOS(Str);
- MIPrinter(StrOS, MST, RegisterMaskIds).print(MI);
- YamlMBB.Instructions.push_back(StrOS.str());
- Str.clear();
+ yaml::MachineJumpTable::Entry Entry;
+ Entry.ID = ID++;
+ for (const auto *MBB : Table.MBBs) {
+ raw_string_ostream StrOS(Str);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MBB);
+ Entry.Blocks.push_back(StrOS.str());
+ Str.clear();
+ }
+ YamlJTI.Entries.push_back(Entry);
}
}
@@ -260,26 +417,137 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
RegisterMaskIds.insert(std::make_pair(Mask, I++));
}
+void MIPrinter::print(const MachineBasicBlock &MBB) {
+ assert(MBB.getNumber() >= 0 && "Invalid MBB number");
+ OS << "bb." << MBB.getNumber();
+ bool HasAttributes = false;
+ if (const auto *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ OS << "." << BB->getName();
+ } else {
+ HasAttributes = true;
+ OS << " (";
+ int Slot = MST.getLocalSlot(BB);
+ if (Slot == -1)
+ OS << "<ir-block badref>";
+ else
+ OS << (Twine("%ir-block.") + Twine(Slot)).str();
+ }
+ }
+ if (MBB.hasAddressTaken()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "address-taken";
+ HasAttributes = true;
+ }
+ if (MBB.isEHPad()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "landing-pad";
+ HasAttributes = true;
+ }
+ if (MBB.getAlignment()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "align " << MBB.getAlignment();
+ HasAttributes = true;
+ }
+ if (HasAttributes)
+ OS << ")";
+ OS << ":\n";
+
+ bool HasLineAttributes = false;
+ // Print the successors
+ if (!MBB.succ_empty()) {
+ OS.indent(2) << "successors: ";
+ for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
+ if (I != MBB.succ_begin())
+ OS << ", ";
+ printMBBReference(**I);
+ if (MBB.hasSuccessorProbabilities())
+ OS << '(' << MBB.getSuccProbability(I) << ')';
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ // Print the live in registers.
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ if (!MBB.livein_empty()) {
+ OS.indent(2) << "liveins: ";
+ bool First = true;
+ for (const auto &LI : MBB.liveins()) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ printReg(LI.PhysReg, OS, TRI);
+ if (LI.LaneMask != ~0u)
+ OS << ':' << PrintLaneMask(LI.LaneMask);
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ if (HasLineAttributes)
+ OS << "\n";
+ bool IsInBundle = false;
+ for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (IsInBundle && !MI.isInsideBundle()) {
+ OS.indent(2) << "}\n";
+ IsInBundle = false;
+ }
+ OS.indent(IsInBundle ? 4 : 2);
+ print(MI);
+ if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+ OS << " {";
+ IsInBundle = true;
+ }
+ OS << "\n";
+ }
+ if (IsInBundle)
+ OS.indent(2) << "}\n";
+}
+
+/// Return true when an instruction has tied register that can't be determined
+/// by the instruction's descriptor.
+static bool hasComplexRegisterTies(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const auto &Operand = MI.getOperand(I);
+ if (!Operand.isReg() || Operand.isDef())
+ // Ignore the defined registers as MCID marks only the uses as tied.
+ continue;
+ int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+ int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
+ if (ExpectedTiedIdx != TiedIdx)
+ return true;
+ }
+ return false;
+}
+
void MIPrinter::print(const MachineInstr &MI) {
const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
const auto *TRI = SubTarget.getRegisterInfo();
assert(TRI && "Expected target register info");
const auto *TII = SubTarget.getInstrInfo();
assert(TII && "Expected target instruction info");
+ if (MI.isCFIInstruction())
+ assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
+ bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
unsigned I = 0, E = MI.getNumOperands();
for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
!MI.getOperand(I).isImplicit();
++I) {
if (I)
OS << ", ";
- print(MI.getOperand(I), TRI);
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true);
}
if (I)
OS << " = ";
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ OS << "frame-setup ";
OS << TII->getName(MI.getOpcode());
- // TODO: Print the instruction flags, machine mem operands.
if (I < E)
OS << ' ';
@@ -287,9 +555,27 @@ void MIPrinter::print(const MachineInstr &MI) {
for (; I < E; ++I) {
if (NeedComma)
OS << ", ";
- print(MI.getOperand(I), TRI);
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
NeedComma = true;
}
+
+ if (MI.getDebugLoc()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-location ";
+ MI.getDebugLoc()->printAsOperand(OS, MST);
+ }
+
+ if (!MI.memoperands_empty()) {
+ OS << " :: ";
+ bool NeedComma = false;
+ for (const auto *Op : MI.memoperands()) {
+ if (NeedComma)
+ OS << ", ";
+ print(*Op);
+ NeedComma = true;
+ }
+ }
}
void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
@@ -300,32 +586,225 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
}
}
-void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
+static void printIRSlotNumber(raw_ostream &OS, int Slot) {
+ if (Slot == -1)
+ OS << "<badref>";
+ else
+ OS << Slot;
+}
+
+void MIPrinter::printIRBlockReference(const BasicBlock &BB) {
+ OS << "%ir-block.";
+ if (BB.hasName()) {
+ printLLVMNameWithoutPrefix(OS, BB.getName());
+ return;
+ }
+ const Function *F = BB.getParent();
+ int Slot;
+ if (F == MST.getCurrentFunction()) {
+ Slot = MST.getLocalSlot(&BB);
+ } else {
+ ModuleSlotTracker CustomMST(F->getParent(),
+ /*ShouldInitializeAllMetadata=*/false);
+ CustomMST.incorporateFunction(*F);
+ Slot = CustomMST.getLocalSlot(&BB);
+ }
+ printIRSlotNumber(OS, Slot);
+}
+
+void MIPrinter::printIRValueReference(const Value &V) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ printIRSlotNumber(OS, MST.getLocalSlot(&V));
+}
+
+void MIPrinter::printStackObjectReference(int FrameIndex) {
+ auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);
+ assert(ObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid frame index");
+ const FrameIndexOperand &Operand = ObjectInfo->second;
+ if (Operand.IsFixed) {
+ OS << "%fixed-stack." << Operand.ID;
+ return;
+ }
+ OS << "%stack." << Operand.ID;
+ if (!Operand.Name.empty())
+ OS << '.' << Operand.Name;
+}
+
+void MIPrinter::printOffset(int64_t Offset) {
+ if (Offset == 0)
+ return;
+ if (Offset < 0) {
+ OS << " - " << -Offset;
+ return;
+ }
+ OS << " + " << Offset;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TF) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::printTargetFlags(const MachineOperand &Op) {
+ if (!Op.getTargetFlags())
+ return;
+ const auto *TII =
+ Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+ OS << "target-flags(";
+ const bool HasDirectFlags = Flags.first;
+ const bool HasBitmaskFlags = Flags.second;
+ if (!HasDirectFlags && !HasBitmaskFlags) {
+ OS << "<unknown>) ";
+ return;
+ }
+ if (HasDirectFlags) {
+ if (const auto *Name = getTargetFlagName(TII, Flags.first))
+ OS << Name;
+ else
+ OS << "<unknown target flag>";
+ }
+ if (!HasBitmaskFlags) {
+ OS << ") ";
+ return;
+ }
+ bool IsCommaNeeded = HasDirectFlags;
+ unsigned BitMask = Flags.second;
+ auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &Mask : BitMasks) {
+ // Check if the flag's bitmask has the bits of the current mask set.
+ if ((BitMask & Mask.first) == Mask.first) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ IsCommaNeeded = true;
+ OS << Mask.second;
+ // Clear the bits which were serialized from the flag's bitmask.
+ BitMask &= ~(Mask.first);
+ }
+ }
+ if (BitMask) {
+ // When the resulting flag's bitmask isn't zero, we know that we didn't
+ // serialize all of the bit flags.
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << "<unknown bitmask target flag>";
+ }
+ OS << ") ";
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices) {
+ if (I.first == Index) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+ unsigned I, bool ShouldPrintRegisterTies, bool IsDef) {
+ printTargetFlags(Op);
switch (Op.getType()) {
case MachineOperand::MO_Register:
- // TODO: Print the other register flags.
if (Op.isImplicit())
OS << (Op.isDef() ? "implicit-def " : "implicit ");
+ else if (!IsDef && Op.isDef())
+ // Print the 'def' flag only when the operand is defined after '='.
+ OS << "def ";
+ if (Op.isInternalRead())
+ OS << "internal ";
if (Op.isDead())
OS << "dead ";
if (Op.isKill())
OS << "killed ";
if (Op.isUndef())
OS << "undef ";
+ if (Op.isEarlyClobber())
+ OS << "early-clobber ";
+ if (Op.isDebug())
+ OS << "debug-use ";
printReg(Op.getReg(), OS, TRI);
// Print the sub register.
if (Op.getSubReg() != 0)
OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+ if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
+ OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
break;
case MachineOperand::MO_Immediate:
OS << Op.getImm();
break;
+ case MachineOperand::MO_CImmediate:
+ Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
case MachineOperand::MO_MachineBasicBlock:
printMBBReference(*Op.getMBB());
break;
+ case MachineOperand::MO_FrameIndex:
+ printStackObjectReference(Op.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "%const." << Op.getIndex();
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_TargetIndex: {
+ OS << "target-index(";
+ if (const auto *Name = getTargetIndexName(
+ *Op.getParent()->getParent()->getParent(), Op.getIndex()))
+ OS << Name;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ printOffset(Op.getOffset());
+ break;
+ }
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "%jump-table." << Op.getIndex();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << '$';
+ printLLVMNameWithoutPrefix(OS, Op.getSymbolName());
+ printOffset(Op.getOffset());
+ break;
case MachineOperand::MO_GlobalAddress:
Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
- // TODO: Print offset and target flags.
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << "blockaddress(";
+ Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
+ MST);
+ OS << ", ";
+ printIRBlockReference(*Op.getBlockAddress()->getBasicBlock());
+ OS << ')';
+ printOffset(Op.getOffset());
break;
case MachineOperand::MO_RegisterMask: {
auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
@@ -335,9 +814,157 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
llvm_unreachable("Can't print this machine register mask yet.");
break;
}
+ case MachineOperand::MO_RegisterLiveOut: {
+ const uint32_t *RegMask = Op.getRegLiveOut();
+ OS << "liveout(";
+ bool IsCommaNeeded = false;
+ for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+ if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ printReg(Reg, OS, TRI);
+ IsCommaNeeded = true;
+ }
+ }
+ OS << ")";
+ break;
+ }
+ case MachineOperand::MO_Metadata:
+ Op.getMetadata()->printAsOperand(OS, MST);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
+ break;
+ case MachineOperand::MO_CFIIndex: {
+ const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
+ print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+ break;
+ }
+ }
+}
+
+void MIPrinter::print(const MachineMemOperand &Op) {
+ OS << '(';
+ // TODO: Print operand's target specific flags.
+ if (Op.isVolatile())
+ OS << "volatile ";
+ if (Op.isNonTemporal())
+ OS << "non-temporal ";
+ if (Op.isInvariant())
+ OS << "invariant ";
+ if (Op.isLoad())
+ OS << "load ";
+ else {
+ assert(Op.isStore() && "Non load machine operand must be a store");
+ OS << "store ";
+ }
+ OS << Op.getSize() << (Op.isLoad() ? " from " : " into ");
+ if (const Value *Val = Op.getValue()) {
+ printIRValueReference(*Val);
+ } else {
+ const PseudoSourceValue *PVal = Op.getPseudoValue();
+ assert(PVal && "Expected a pseudo source value");
+ switch (PVal->kind()) {
+ case PseudoSourceValue::Stack:
+ OS << "stack";
+ break;
+ case PseudoSourceValue::GOT:
+ OS << "got";
+ break;
+ case PseudoSourceValue::JumpTable:
+ OS << "jump-table";
+ break;
+ case PseudoSourceValue::ConstantPool:
+ OS << "constant-pool";
+ break;
+ case PseudoSourceValue::FixedStack:
+ printStackObjectReference(
+ cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex());
+ break;
+ case PseudoSourceValue::GlobalValueCallEntry:
+ OS << "call-entry ";
+ cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
+ OS, /*PrintType=*/false, MST);
+ break;
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ OS << "call-entry $";
+ printLLVMNameWithoutPrefix(
+ OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
+ break;
+ }
+ }
+ printOffset(Op.getOffset());
+ if (Op.getBaseAlignment() != Op.getSize())
+ OS << ", align " << Op.getBaseAlignment();
+ auto AAInfo = Op.getAAInfo();
+ if (AAInfo.TBAA) {
+ OS << ", !tbaa ";
+ AAInfo.TBAA->printAsOperand(OS, MST);
+ }
+ if (AAInfo.Scope) {
+ OS << ", !alias.scope ";
+ AAInfo.Scope->printAsOperand(OS, MST);
+ }
+ if (AAInfo.NoAlias) {
+ OS << ", !noalias ";
+ AAInfo.NoAlias->printAsOperand(OS, MST);
+ }
+ if (Op.getRanges()) {
+ OS << ", !range ";
+ Op.getRanges()->printAsOperand(OS, MST);
+ }
+ OS << ')';
+}
+
+static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ int Reg = TRI->getLLVMRegNum(DwarfReg, true);
+ if (Reg == -1) {
+ OS << "<badreg>";
+ return;
+ }
+ printReg(Reg, OS, TRI);
+}
+
+void MIPrinter::print(const MCCFIInstruction &CFI,
+ const TargetRegisterInfo *TRI) {
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpSameValue:
+ OS << ".cfi_same_value ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpOffset:
+ OS << ".cfi_offset ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OS << ".cfi_def_cfa_register ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ OS << ".cfi_def_cfa_offset ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OS << ".cfi_def_cfa ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
default:
- // TODO: Print the other machine operands.
- llvm_unreachable("Can't print this machine operand at the moment");
+ // TODO: Print the other CFI Operations.
+ OS << "<unserializable cfi operation>";
+ break;
}
}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 13d61e6..8e7566a 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -40,7 +40,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override {
+ bool runOnMachineFunction(MachineFunction &MF) override {
std::string Str;
raw_string_ostream StrOS(Str);
printMIR(StrOS, MF);
@@ -48,7 +48,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
return false;
}
- virtual bool doFinalization(Module &M) override {
+ bool doFinalization(Module &M) override {
printMIR(OS, M);
OS << MachineFunctions;
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 5d3f7eb..76099f2 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -38,22 +39,21 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
-MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
- : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
- AddressTaken(false), CachedMCSymbol(nullptr) {
+MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
+ : BB(B), Number(-1), xParent(&MF) {
Insts.Parent = this;
}
MachineBasicBlock::~MachineBasicBlock() {
}
-/// getSymbol - Return the MCSymbol for this basic block.
-///
+/// Return the MCSymbol for this basic block.
MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -68,9 +68,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
return OS;
}
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
-/// parent pointer of the MBB, the MBB numbering, and any instructions in the
-/// MBB to be on the right operand list for registers.
+/// When an MBB is added to an MF, we need to update the parent pointer of the
+/// MBB, the MBB numbering, and any instructions in the MBB to be on the right
+/// operand list for registers.
///
/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
/// gets the next available unique MBB number. If it is removed from a
@@ -91,10 +91,8 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
N->Number = -1;
}
-
-/// addNodeToList (MI) - When we add an instruction to a basic block
-/// list, we update its parent pointer and add its operands from reg use/def
-/// lists if appropriate.
+/// When we add an instruction to a basic block list, we update its parent
+/// pointer and add its operands from reg use/def lists if appropriate.
void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
assert(!N->getParent() && "machine instruction already in a basic block");
N->setParent(Parent);
@@ -105,9 +103,8 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
N->AddRegOperandsToUseLists(MF->getRegInfo());
}
-/// removeNodeFromList (MI) - When we remove an instruction from a basic block
-/// list, we update its parent pointer and remove its operands from reg use/def
-/// lists if appropriate.
+/// When we remove an instruction from a basic block list, we update its parent
+/// pointer and remove its operands from reg use/def lists if appropriate.
void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() && "machine instruction not in a basic block");
@@ -118,23 +115,22 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
N->setParent(nullptr);
}
-/// transferNodesFromList (MI) - When moving a range of instructions from one
-/// MBB list to another, we need to update the parent pointers and the use/def
-/// lists.
+/// When moving a range of instructions from one MBB list to another, we need to
+/// update the parent pointers and the use/def lists.
void ilist_traits<MachineInstr>::
-transferNodesFromList(ilist_traits<MachineInstr> &fromList,
- ilist_iterator<MachineInstr> first,
- ilist_iterator<MachineInstr> last) {
- assert(Parent->getParent() == fromList.Parent->getParent() &&
+transferNodesFromList(ilist_traits<MachineInstr> &FromList,
+ ilist_iterator<MachineInstr> First,
+ ilist_iterator<MachineInstr> Last) {
+ assert(Parent->getParent() == FromList.Parent->getParent() &&
"MachineInstr parent mismatch!");
// Splice within the same MBB -> no change.
- if (Parent == fromList.Parent) return;
+ if (Parent == FromList.Parent) return;
// If splicing between two blocks within the same function, just update the
// parent pointers.
- for (; first != last; ++first)
- first->setParent(Parent);
+ for (; First != Last; ++First)
+ First->setParent(Parent);
}
void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
@@ -208,11 +204,18 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
if (succ_size() > 2)
return nullptr;
for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
- if ((*I)->isLandingPad())
+ if ((*I)->isEHPad())
return *I;
return nullptr;
}
+bool MachineBasicBlock::hasEHPadSuccessor() const {
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isEHPad())
+ return true;
+ return false;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineBasicBlock::dump() const {
print(dbgs());
@@ -271,7 +274,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
LBB->printAsOperand(OS, /*PrintType=*/false, MST);
Comma = ", ";
}
- if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
if (Alignment)
OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
@@ -283,8 +286,11 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
- for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
- OS << ' ' << PrintReg(*I, TRI);
+ for (const auto &LI : make_range(livein_begin(), livein_end())) {
+ OS << ' ' << PrintReg(LI.PhysReg, TRI);
+ if (LI.LaneMask != ~0u)
+ OS << ':' << PrintLaneMask(LI.LaneMask);
+ }
OS << '\n';
}
// Print the preds of this block according to the CFG.
@@ -298,8 +304,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
if (Indexes) {
- if (Indexes->hasIndex(I))
- OS << Indexes->getInstructionIndex(I);
+ if (Indexes->hasIndex(&*I))
+ OS << Indexes->getInstructionIndex(&*I);
OS << '\t';
}
OS << '\t';
@@ -314,35 +320,63 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " Successors according to CFG:";
for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
OS << " BB#" << (*SI)->getNumber();
- if (!Weights.empty())
- OS << '(' << *getWeightIterator(SI) << ')';
+ if (!Probs.empty())
+ OS << '(' << *getProbabilityIterator(SI) << ')';
}
OS << '\n';
}
}
-void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const {
+void MachineBasicBlock::printAsOperand(raw_ostream &OS,
+ bool /*PrintType*/) const {
OS << "BB#" << getNumber();
}
-void MachineBasicBlock::removeLiveIn(unsigned Reg) {
- std::vector<unsigned>::iterator I =
- std::find(LiveIns.begin(), LiveIns.end(), Reg);
- if (I != LiveIns.end())
+void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
+ LiveInVector::iterator I = std::find_if(
+ LiveIns.begin(), LiveIns.end(),
+ [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ if (I == LiveIns.end())
+ return;
+
+ I->LaneMask &= ~LaneMask;
+ if (I->LaneMask == 0)
LiveIns.erase(I);
}
-bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
- livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
- return I != livein_end();
+bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
+ livein_iterator I = std::find_if(
+ LiveIns.begin(), LiveIns.end(),
+ [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+}
+
+void MachineBasicBlock::sortUniqueLiveIns() {
+ std::sort(LiveIns.begin(), LiveIns.end(),
+ [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
+ return LI0.PhysReg < LI1.PhysReg;
+ });
+ // Liveins are sorted by physreg now we can merge their lanemasks.
+ LiveInVector::const_iterator I = LiveIns.begin();
+ LiveInVector::const_iterator J;
+ LiveInVector::iterator Out = LiveIns.begin();
+ for (; I != LiveIns.end(); ++Out, I = J) {
+ unsigned PhysReg = I->PhysReg;
+ LaneBitmask LaneMask = I->LaneMask;
+ for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J)
+ LaneMask |= J->LaneMask;
+ Out->PhysReg = PhysReg;
+ Out->LaneMask = LaneMask;
+ }
+ LiveIns.erase(Out, LiveIns.end());
}
unsigned
-MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
+MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
assert(RC && "Register class is required");
- assert((isLandingPad() || this == &getParent()->front()) &&
+ assert((isEHPad() || this == &getParent()->front()) &&
"Only the entry block and landing pads can have physreg live ins");
bool LiveIn = isLiveIn(PhysReg);
@@ -370,12 +404,11 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
}
void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
- getParent()->splice(NewAfter, this);
+ getParent()->splice(NewAfter->getIterator(), getIterator());
}
void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
- MachineFunction::iterator BBI = NewBefore;
- getParent()->splice(++BBI, this);
+ getParent()->splice(++NewBefore->getIterator(), getIterator());
}
void MachineBasicBlock::updateTerminator() {
@@ -385,7 +418,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc DL; // FIXME: this is nowhere
bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -400,7 +433,7 @@ void MachineBasicBlock::updateTerminator() {
// its layout successor, insert a branch. First we have to locate the
// only non-landing-pad successor, as that is the fallthrough block.
for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isLandingPad())
+ if ((*SI)->isEHPad())
continue;
assert(!TBB && "Found more than one non-landing-pad successor!");
TBB = *SI;
@@ -414,7 +447,7 @@ void MachineBasicBlock::updateTerminator() {
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
}
} else {
if (FBB) {
@@ -425,10 +458,10 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond))
return;
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
} else if (isLayoutSuccessor(FBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
}
} else {
// Walk through the successors and find the successor which is not
@@ -436,7 +469,7 @@ void MachineBasicBlock::updateTerminator() {
// as the fallthrough successor.
MachineBasicBlock *FallthroughBB = nullptr;
for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isLandingPad() || *SI == TBB)
+ if ((*SI)->isEHPad() || *SI == TBB)
continue;
assert(!FallthroughBB && "Found more than one fallthrough successor.");
FallthroughBB = *SI;
@@ -445,14 +478,14 @@ void MachineBasicBlock::updateTerminator() {
// We fallthrough to the same basic block as the conditional jump
// targets. Remove the conditional jump, leaving unconditional
// fallthrough.
- // FIXME: This does not seem like a reasonable pattern to support, but it
- // has been seen in the wild coming out of degenerate ARM test cases.
+ // FIXME: This does not seem like a reasonable pattern to support, but
+ // it has been seen in the wild coming out of degenerate ARM test cases.
TII->RemoveBranch(*this);
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
return;
}
@@ -461,55 +494,69 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
return;
}
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
} else if (!isLayoutSuccessor(FallthroughBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
}
}
}
}
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
-
- // If we see non-zero value for the first time it means we actually use Weight
- // list, so we fill all Weights with 0's.
- if (weight != 0 && Weights.empty())
- Weights.resize(Successors.size());
-
- if (weight != 0 || !Weights.empty())
- Weights.push_back(weight);
-
- Successors.push_back(succ);
- succ->addPredecessor(this);
- }
+void MachineBasicBlock::validateSuccProbs() const {
+#ifndef NDEBUG
+ int64_t Sum = 0;
+ for (auto Prob : Probs)
+ Sum += Prob.getNumerator();
+ // Due to precision issue, we assume that the sum of probabilities is one if
+ // the difference between the sum of their numerators and the denominator is
+ // no greater than the number of successors.
+ assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <=
+ Probs.size() &&
+ "The sum of successors's probabilities exceeds one.");
+#endif // NDEBUG
+}
-void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
- succ->removePredecessor(this);
- succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
- assert(I != Successors.end() && "Not a current successor!");
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ,
+ BranchProbability Prob) {
+ // Probability list is either empty (if successor list isn't empty, this means
+ // disabled optimization) or has the same size as successor list.
+ if (!(Probs.empty() && !Successors.empty()))
+ Probs.push_back(Prob);
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(I);
- Weights.erase(WI);
- }
+void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
+ // We need to make sure probability list is either empty or has the same size
+ // of successor list. When this function is called, we can safely delete all
+ // probability in the list.
+ Probs.clear();
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
- Successors.erase(I);
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
+ bool NormalizeSuccProbs) {
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+ removeSuccessor(I, NormalizeSuccProbs);
}
MachineBasicBlock::succ_iterator
-MachineBasicBlock::removeSuccessor(succ_iterator I) {
+MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) {
assert(I != Successors.end() && "Not a current successor!");
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(I);
- Weights.erase(WI);
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
+ if (!Probs.empty()) {
+ probability_iterator WI = getProbabilityIterator(I);
+ Probs.erase(WI);
+ if (NormalizeSuccProbs)
+ normalizeSuccProbs();
}
(*I)->removePredecessor(this);
@@ -537,74 +584,77 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
}
}
assert(OldI != E && "Old is not a successor of this block");
- Old->removePredecessor(this);
// If New isn't already a successor, let it take Old's place.
if (NewI == E) {
+ Old->removePredecessor(this);
New->addPredecessor(this);
*OldI = New;
return;
}
// New is already a successor.
- // Update its weight instead of adding a duplicate edge.
- if (!Weights.empty()) {
- weight_iterator OldWI = getWeightIterator(OldI);
- *getWeightIterator(NewI) += *OldWI;
- Weights.erase(OldWI);
+ // Update its probability instead of adding a duplicate edge.
+ if (!Probs.empty()) {
+ auto ProbIter = getProbabilityIterator(NewI);
+ if (!ProbIter->isUnknown())
+ *ProbIter += *getProbabilityIterator(OldI);
}
- Successors.erase(OldI);
+ removeSuccessor(OldI);
}
-void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
- Predecessors.push_back(pred);
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
+ Predecessors.push_back(Pred);
}
-void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
- pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
Predecessors.erase(I);
}
-void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
- if (this == fromMBB)
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
return;
- while (!fromMBB->succ_empty()) {
- MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t Weight = 0;
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!fromMBB->Weights.empty())
- Weight = *fromMBB->Weights.begin();
+ // If probability list is empty it means we don't use it (disabled optimization).
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
- addSuccessor(Succ, Weight);
- fromMBB->removeSuccessor(Succ);
+ FromMBB->removeSuccessor(Succ);
}
}
void
-MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
- if (this == fromMBB)
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
return;
- while (!fromMBB->succ_empty()) {
- MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t Weight = 0;
- if (!fromMBB->Weights.empty())
- Weight = *fromMBB->Weights.begin();
- addSuccessor(Succ, Weight);
- fromMBB->removeSuccessor(Succ);
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
+ FromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.getMBB() == fromMBB)
+ if (MO.getMBB() == FromMBB)
MO.setMBB(this);
}
}
+ normalizeSuccProbs();
}
bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
@@ -621,14 +671,14 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
}
bool MachineBasicBlock::canFallThrough() {
- MachineFunction::iterator Fallthrough = this;
+ MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == getParent()->end())
return false;
// If FallthroughBlock isn't a successor, no fallthrough is possible.
- if (!isSuccessor(Fallthrough))
+ if (!isSuccessor(&*Fallthrough))
return false;
// Analyze the branches, if any, at the end of the block.
@@ -666,11 +716,11 @@ MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
- if (Succ->isLandingPad())
+ if (Succ->isEHPad())
return nullptr;
MachineFunction *MF = getParent();
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc DL; // FIXME: this is nowhere
// Performance might be harmed on HW that implements branching using exec mask
// where both sides of the branches are always executed.
@@ -719,7 +769,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (LV)
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
- MachineInstr *MI = I;
+ MachineInstr *MI = &*I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
if (!OI->isReg() || OI->getReg() == 0 ||
@@ -739,7 +789,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (LIS) {
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
- MachineInstr *MI = I;
+ MachineInstr *MI = &*I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
@@ -761,7 +811,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (Indexes) {
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I)
- Terminators.push_back(I);
+ Terminators.push_back(&*I);
}
updateTerminator();
@@ -770,7 +820,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
SmallVector<MachineInstr*, 4> NewTerminators;
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I)
- NewTerminators.push_back(I);
+ NewTerminators.push_back(&*I);
for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
E = Terminators.end(); I != E; ++I) {
@@ -784,17 +834,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
Cond.clear();
- MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond,
- dl);
+ TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
if (Indexes) {
for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
I != E; ++I) {
// Some instructions may have been moved to NMBB by updateTerminator(),
// so we first remove any instruction that already has an index.
- if (Indexes->hasIndex(I))
- Indexes->removeMachineInstrFromMaps(I);
- Indexes->insertMachineInstrInMaps(I);
+ if (Indexes->hasIndex(&*I))
+ Indexes->removeMachineInstrFromMaps(&*I);
+ Indexes->insertMachineInstrInMaps(&*I);
}
}
}
@@ -808,9 +857,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
i->getOperand(ni+1).setMBB(NMBB);
// Inherit live-ins from the successor
- for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
- E = Succ->livein_end(); I != E; ++I)
- NMBB->addLiveIn(*I);
+ for (const auto &LI : Succ->liveins())
+ NMBB->addLiveIn(LI);
// Update LiveVariables.
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -822,7 +870,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
- LV->getVarInfo(Reg).Kills.push_back(I);
+ LV->getVarInfo(Reg).Kills.push_back(&*I);
DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
}
@@ -834,10 +882,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (LIS) {
// After splitting the edge and updating SlotIndexes, live intervals may be
// in one of two situations, depending on whether this block was the last in
- // the function. If the original block was the last in the function, all live
- // intervals will end prior to the beginning of the new split block. If the
- // original block was not at the end of the function, all live intervals will
- // extend to the end of the new split block.
+ // the function. If the original block was the last in the function, all
+ // live intervals will end prior to the beginning of the new split block. If
+ // the original block was not at the end of the function, all live intervals
+ // will extend to the end of the new split block.
bool isLastMBB =
std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
@@ -861,7 +909,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
LiveInterval &LI = LIS->getInterval(Reg);
VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
- assert(VNI && "PHI sources should be live out of their predecessors.");
+ assert(VNI &&
+ "PHI sources should be live out of their predecessors.");
LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
}
}
@@ -941,7 +990,7 @@ static void unbundleSingleMI(MachineInstr *MI) {
MachineBasicBlock::instr_iterator
MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
- unbundleSingleMI(I);
+ unbundleSingleMI(&*I);
return Insts.erase(I);
}
@@ -964,25 +1013,22 @@ MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
return Insts.insert(I, MI);
}
-/// removeFromParent - This method unlinks 'this' from the containing function,
-/// and returns it, but does not delete it.
+/// This method unlinks 'this' from the containing function, and returns it, but
+/// does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
assert(getParent() && "Not embedded in a function!");
getParent()->remove(this);
return this;
}
-
-/// eraseFromParent - This method unlinks 'this' from the containing function,
-/// and deletes it.
+/// This method unlinks 'this' from the containing function, and deletes it.
void MachineBasicBlock::eraseFromParent() {
assert(getParent() && "Not embedded in a function!");
getParent()->erase(this);
}
-
-/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
-/// 'Old', change the code and CFG so that it branches to 'New' instead.
+/// Given a machine basic block that branched to 'Old', change the code and CFG
+/// so that it branches to 'New' instead.
void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Cannot replace self with self!");
@@ -1004,46 +1050,44 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
replaceSuccessor(Old, New);
}
-/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
-/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
-/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
-/// null.
+/// Various pieces of code can cause excess edges in the CFG to be inserted. If
+/// we have proven that MBB can only branch to DestA and DestB, remove any other
+/// MBB successors from the CFG. DestA and DestB can be null.
///
/// Besides DestA and DestB, retain other edges leading to LandingPads
/// (currently there can be only one; we don't check or require that here).
/// Note it is possible that DestA and/or DestB are LandingPads.
bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock *DestB,
- bool isCond) {
+ bool IsCond) {
// The values of DestA and DestB frequently come from a call to the
// 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
// values from there.
//
// 1. If both DestA and DestB are null, then the block ends with no branches
// (it falls through to its successor).
- // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+ // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends
// with only an unconditional branch.
- // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+ // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends
// with a conditional branch that falls through to a successor (DestB).
- // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+ // 4. If DestA and DestB is set and IsCond is true, then the block ends with a
// conditional branch followed by an unconditional branch. DestA is the
// 'true' destination and DestB is the 'false' destination.
bool Changed = false;
- MachineFunction::iterator FallThru =
- std::next(MachineFunction::iterator(this));
+ MachineFunction::iterator FallThru = std::next(getIterator());
if (!DestA && !DestB) {
// Block falls through to successor.
- DestA = FallThru;
- DestB = FallThru;
+ DestA = &*FallThru;
+ DestB = &*FallThru;
} else if (DestA && !DestB) {
- if (isCond)
+ if (IsCond)
// Block ends in conditional jump that falls through to successor.
- DestB = FallThru;
+ DestB = &*FallThru;
} else {
- assert(DestA && DestB && isCond &&
+ assert(DestA && DestB && IsCond &&
"CFG in a bad state. Cannot correct CFG edges");
}
@@ -1054,7 +1098,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
while (SI != succ_end()) {
const MachineBasicBlock *MBB = *SI;
if (!SeenMBBs.insert(MBB).second ||
- (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+ (MBB != DestA && MBB != DestB && !MBB->isEHPad())) {
// This is a superfluous edge, remove it.
SI = removeSuccessor(SI);
Changed = true;
@@ -1063,11 +1107,13 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
}
}
+ if (Changed)
+ normalizeSuccProbs();
return Changed;
}
-/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
-/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
+/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+/// instructions. Return UnknownLoc if there is none.
DebugLoc
MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
DebugLoc DL;
@@ -1083,40 +1129,55 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return DL;
}
-/// getSuccWeight - Return weight of the edge from this block to MBB.
-///
-uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
- if (Weights.empty())
- return 0;
-
- return *getWeightIterator(Succ);
+/// Return probability of the edge from this block to MBB.
+BranchProbability
+MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
+ if (Probs.empty())
+ return BranchProbability(1, succ_size());
+
+ const auto &Prob = *getProbabilityIterator(Succ);
+ if (Prob.isUnknown()) {
+ // For unknown probabilities, collect the sum of all known ones, and evenly
+ // ditribute the complemental of the sum to each unknown probability.
+ unsigned KnownProbNum = 0;
+ auto Sum = BranchProbability::getZero();
+ for (auto &P : Probs) {
+ if (!P.isUnknown()) {
+ Sum += P;
+ KnownProbNum++;
+ }
+ }
+ return Sum.getCompl() / (Probs.size() - KnownProbNum);
+ } else
+ return Prob;
}
-/// Set successor weight of a given iterator.
-void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) {
- if (Weights.empty())
+/// Set successor probability of a given iterator.
+void MachineBasicBlock::setSuccProbability(succ_iterator I,
+ BranchProbability Prob) {
+ assert(!Prob.isUnknown());
+ if (Probs.empty())
return;
- *getWeightIterator(I) = weight;
+ *getProbabilityIterator(I) = Prob;
}
-/// getWeightIterator - Return wight iterator corresonding to the I successor
-/// iterator
-MachineBasicBlock::weight_iterator MachineBasicBlock::
-getWeightIterator(MachineBasicBlock::succ_iterator I) {
- assert(Weights.size() == Successors.size() && "Async weight list!");
- size_t index = std::distance(Successors.begin(), I);
- assert(index < Weights.size() && "Not a current successor!");
- return Weights.begin() + index;
+/// Return probability iterator corresonding to the I successor iterator
+MachineBasicBlock::const_probability_iterator
+MachineBasicBlock::getProbabilityIterator(
+ MachineBasicBlock::const_succ_iterator I) const {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
}
-/// getWeightIterator - Return wight iterator corresonding to the I successor
-/// iterator
-MachineBasicBlock::const_weight_iterator MachineBasicBlock::
-getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
- assert(Weights.size() == Successors.size() && "Async weight list!");
+/// Return probability iterator corresonding to the I successor iterator.
+MachineBasicBlock::probability_iterator
+MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
const size_t index = std::distance(Successors.begin(), I);
- assert(index < Weights.size() && "Not a current successor!");
- return Weights.begin() + index;
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
}
/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
@@ -1138,33 +1199,33 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
do {
--I;
- MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MachineOperandIteratorBase::PhysRegInfo Info =
ConstMIOperands(I).analyzePhysReg(Reg, TRI);
- if (Analysis.Defines)
- // Outputs happen after inputs so they take precedence if both are
- // present.
- return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+ // Defs happen after uses so they take precedence if both are present.
- if (Analysis.Kills || Analysis.Clobbers)
- // Register killed, so isn't live.
+ // Register is dead after a dead def of the full register.
+ if (Info.DeadDef)
return LQR_Dead;
-
- else if (Analysis.ReadsOverlap)
- // Defined or read without a previous kill - live.
- return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
-
+ // Register is (at least partially) live after a def.
+ if (Info.Defined)
+ return LQR_Live;
+ // Register is dead after a full kill or clobber and no def.
+ if (Info.Killed || Info.Clobbered)
+ return LQR_Dead;
+ // Register must be live if we read it.
+ if (Info.Read)
+ return LQR_Live;
} while (I != begin() && --N > 0);
}
// Did we get to the start of the block?
if (I == begin()) {
// If so, the register's state is definitely defined by the live-in state.
- for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
- RAI.isValid(); ++RAI) {
+ for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
+ ++RAI)
if (isLiveIn(*RAI))
- return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
- }
+ return LQR_Live;
return LQR_Dead;
}
@@ -1176,16 +1237,14 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// If this is the last insn in the block, don't search forwards.
if (I != end()) {
for (++I; I != end() && N > 0; ++I, --N) {
- MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MachineOperandIteratorBase::PhysRegInfo Info =
ConstMIOperands(I).analyzePhysReg(Reg, TRI);
- if (Analysis.ReadsOverlap)
- // Used, therefore must have been live.
- return (Analysis.Reads) ?
- LQR_Live : LQR_OverlappingLive;
-
- else if (Analysis.Clobbers || Analysis.Defines)
- // Defined (but not read) therefore cannot have been live.
+ // Register is live when we read it here.
+ if (Info.Read)
+ return LQR_Live;
+ // Register is dead if we can fully overwrite or clobber it here.
+ if (Info.FullyDefined || Info.Clobbered)
return LQR_Dead;
}
}
@@ -1193,3 +1252,17 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
// At this point we have no idea of the liveness of the register.
return LQR_Unknown;
}
+
+const uint32_t *
+MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const {
+ // EH funclet entry does not preserve any registers.
+ return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr;
+}
+
+const uint32_t *
+MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
+ // If we see a return block with successors, this must be a funclet return,
+ // which does not preserve any registers. If there are no successors, we don't
+ // care what kind of return it is, putting a mask after it is a no-op.
+ return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 9151d99..9119e31 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -57,7 +57,7 @@ struct GraphTraits<MachineBlockFrequencyInfo *> {
static inline
const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) {
- return G->getFunction()->begin();
+ return &G->getFunction()->front();
}
static ChildIteratorType child_begin(const NodeType *N) {
@@ -143,7 +143,7 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
if (!MBFI)
MBFI.reset(new ImplType);
- MBFI->doFunction(&F, &MBPI, &MLI);
+ MBFI->calculate(F, MBPI, MLI);
#ifndef NDEBUG
if (ViewMachineBlockFreqPropagationDAG != GVDT_None) {
view();
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 2969bad..f5e3056 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -51,7 +51,7 @@ using namespace llvm;
#define DEBUG_TYPE "block-placement"
STATISTIC(NumCondBranches, "Number of conditional branches");
-STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(NumUncondBranches, "Number of unconditional branches");
STATISTIC(CondBranchTakenFreq,
"Potential frequency of taking conditional branches");
STATISTIC(UncondBranchTakenFreq,
@@ -62,6 +62,11 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
"blocks in the function."),
cl::init(0), cl::Hidden);
+static cl::opt<unsigned>
+ AlignAllLoops("align-all-loops",
+ cl::desc("Force the alignment of all loops in the function."),
+ cl::init(0), cl::Hidden);
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned> ExitBlockBias(
"block-placement-exit-block-bias",
@@ -81,6 +86,29 @@ static cl::opt<unsigned> OutlineOptionalThreshold(
"instruction count below this threshold"),
cl::init(4), cl::Hidden);
+static cl::opt<unsigned> LoopToColdBlockRatio(
+ "loop-to-cold-block-ratio",
+ cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
+ "(frequency of block) is greater than this ratio"),
+ cl::init(5), cl::Hidden);
+
+static cl::opt<bool>
+ PreciseRotationCost("precise-rotation-cost",
+ cl::desc("Model the cost of loop rotation more "
+ "precisely by using profile data."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> MisfetchCost(
+ "misfetch-cost",
+ cl::desc("Cost that models the probablistic risk of an instruction "
+ "misfetch due to a jump comparing to falling through, whose cost "
+ "is zero."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
+ cl::desc("Cost of jump instructions."),
+ cl::init(1), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -246,9 +274,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
+ BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildCFGChains(MachineFunction &F);
public:
@@ -354,31 +385,56 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
const BranchProbability HotProb(4, 5); // 80%
MachineBasicBlock *BestSucc = nullptr;
- // FIXME: Due to the performance of the probability and weight routines in
- // the MBPI analysis, we manually compute probabilities using the edge
- // weights. This is suboptimal as it means that the somewhat subtle
- // definition of edge weight semantics is encoded here as well. We should
- // improve the MBPI interface to efficiently support query patterns such as
- // this.
- uint32_t BestWeight = 0;
- uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
- DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ auto BestProb = BranchProbability::getZero();
+
+ // Adjust edge probabilities by excluding edges pointing to blocks that is
+ // either not in BlockFilter or is already in the current chain. Consider the
+ // following CFG:
+ //
+ // --->A
+ // | / \
+ // | B C
+ // | \ / \
+ // ----D E
+ //
+ // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+ // A->C is chosen as a fall-through, D won't be selected as a successor of C
+ // due to CFG constraint (the probability of C->D is not greater than
+ // HotProb). If we exclude E that is not in BlockFilter when calculating the
+ // probability of C->D, D will be selected and we will get A C D B as the
+ // layout of this loop.
+ auto AdjustedSumProb = BranchProbability::getOne();
+ SmallVector<MachineBasicBlock *, 4> Successors;
for (MachineBasicBlock *Succ : BB->successors()) {
- if (BlockFilter && !BlockFilter->count(Succ))
- continue;
- BlockChain &SuccChain = *BlockToChain[Succ];
- if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n");
- continue;
- }
- if (Succ != *SuccChain.begin()) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
- continue;
+ bool SkipSucc = false;
+ if (BlockFilter && !BlockFilter->count(Succ)) {
+ SkipSucc = true;
+ } else {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(Succ)
+ << " -> Already merged!\n");
+ SkipSucc = true;
+ } else if (Succ != *SuccChain->begin()) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
+ continue;
+ }
}
+ if (SkipSucc)
+ AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
+ else
+ Successors.push_back(Succ);
+ }
- uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
- BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock *Succ : Successors) {
+ BranchProbability SuccProb;
+ uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator();
+ uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+ if (SuccProbN >= SuccProbD)
+ SuccProb = BranchProbability::getOne();
+ else
+ SuccProb = BranchProbability(SuccProbN, SuccProbD);
// If we outline optional branches, look whether Succ is unavoidable, i.e.
// dominates all terminators of the MachineFunction. If it does, other
@@ -406,6 +462,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
+ BlockChain &SuccChain = *BlockToChain[Succ];
if (SuccChain.LoopPredecessors != 0) {
if (SuccProb < HotProb) {
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
@@ -415,8 +472,9 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Make sure that a hot successor doesn't have a globally more
// important predecessor.
+ auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BlockFrequency CandidateEdgeFreq =
- MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
@@ -440,10 +498,10 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
<< " (prob)"
<< (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
- if (BestSucc && BestWeight >= SuccWeight)
+ if (BestSucc && BestProb >= SuccProb)
continue;
BestSucc = Succ;
- BestWeight = SuccWeight;
+ BestProb = SuccProb;
}
return BestSucc;
}
@@ -505,14 +563,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
const BlockFilterSet *BlockFilter) {
for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
++I) {
- if (BlockFilter && !BlockFilter->count(I))
+ if (BlockFilter && !BlockFilter->count(&*I))
continue;
- if (BlockToChain[I] != &PlacedChain) {
+ if (BlockToChain[&*I] != &PlacedChain) {
PrevUnplacedBlockIt = I;
// Now select the head of the chain to which the unplaced block belongs
// as the block to place. This will force the entire chain to be placed,
// and satisfies the requirements of merging chains.
- return *BlockToChain[I]->begin();
+ return *BlockToChain[&*I]->begin();
}
}
return nullptr;
@@ -672,13 +730,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
MachineBasicBlock *OldExitingBB = ExitingBB;
BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
bool HasLoopingSucc = false;
- // FIXME: Due to the performance of the probability and weight routines in
- // the MBPI analysis, we use the internal weights and manually compute the
- // probabilities to avoid quadratic behavior.
- uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
for (MachineBasicBlock *Succ : MBB->successors()) {
- if (Succ->isLandingPad())
+ if (Succ->isEHPad())
continue;
if (Succ == MBB)
continue;
@@ -690,10 +743,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
continue;
}
- uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+ auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
if (LoopBlockSet.count(Succ)) {
DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
- << getBlockName(Succ) << " (" << SuccWeight << ")\n");
+ << getBlockName(Succ) << " (" << SuccProb << ")\n");
HasLoopingSucc = true;
continue;
}
@@ -705,7 +758,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
BlocksExitingToOuterLoop.insert(MBB);
}
- BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
<< getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
@@ -791,6 +843,188 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
+/// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
+///
+/// With profile data, we can determine the cost in terms of missed fall through
+/// opportunities when rotating a loop chain and select the best rotation.
+/// Basically, there are three kinds of cost to consider for each rotation:
+/// 1. The possibly missed fall through edge (if it exists) from BB out of
+/// the loop to the loop header.
+/// 2. The possibly missed fall through edges (if they exist) from the loop
+/// exits to BB out of the loop.
+/// 3. The missed fall through edge (if it exists) from the last BB to the
+/// first BB in the loop chain.
+/// Therefore, the cost for a given rotation is the sum of costs listed above.
+/// We select the best rotation with the smallest cost.
+void MachineBlockPlacement::rotateLoopWithProfile(
+ BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+ auto HeaderBB = L.getHeader();
+ auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+ auto RotationPos = LoopChain.end();
+
+ BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
+
+ // A utility lambda that scales up a block frequency by dividing it by a
+ // branch probability which is the reciprocal of the scale.
+ auto ScaleBlockFrequency = [](BlockFrequency Freq,
+ unsigned Scale) -> BlockFrequency {
+ if (Scale == 0)
+ return 0;
+ // Use operator / between BlockFrequency and BranchProbability to implement
+ // saturating multiplication.
+ return Freq / BranchProbability(1, Scale);
+ };
+
+ // Compute the cost of the missed fall-through edge to the loop header if the
+ // chain head is not the loop header. As we only consider natural loops with
+ // single header, this computation can be done only once.
+ BlockFrequency HeaderFallThroughCost(0);
+ for (auto *Pred : HeaderBB->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ auto EdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+ auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
+ // If the predecessor has only an unconditional jump to the header, we
+ // need to consider the cost of this jump.
+ if (Pred->succ_size() == 1)
+ FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
+ HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
+ }
+ }
+
+ // Here we collect all exit blocks in the loop, and for each exit we find out
+ // its hottest exit edge. For each loop rotation, we define the loop exit cost
+ // as the sum of frequencies of exit edges we collect here, excluding the exit
+ // edge from the tail of the loop chain.
+ SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
+ for (auto BB : LoopChain) {
+ auto LargestExitEdgeProb = BranchProbability::getZero();
+ for (auto *Succ : BB->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
+ LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
+ }
+ }
+ if (LargestExitEdgeProb > BranchProbability::getZero()) {
+ auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
+ ExitsWithFreq.emplace_back(BB, ExitFreq);
+ }
+ }
+
+ // In this loop we iterate every block in the loop chain and calculate the
+ // cost assuming the block is the head of the loop chain. When the loop ends,
+ // we should have found the best candidate as the loop chain's head.
+ for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
+ EndIter = LoopChain.end();
+ Iter != EndIter; Iter++, TailIter++) {
+ // TailIter is used to track the tail of the loop chain if the block we are
+ // checking (pointed by Iter) is the head of the chain.
+ if (TailIter == LoopChain.end())
+ TailIter = LoopChain.begin();
+
+ auto TailBB = *TailIter;
+
+ // Calculate the cost by putting this BB to the top.
+ BlockFrequency Cost = 0;
+
+ // If the current BB is the loop header, we need to take into account the
+ // cost of the missed fall through edge from outside of the loop to the
+ // header.
+ if (Iter != HeaderIter)
+ Cost += HeaderFallThroughCost;
+
+ // Collect the loop exit cost by summing up frequencies of all exit edges
+ // except the one from the chain tail.
+ for (auto &ExitWithFreq : ExitsWithFreq)
+ if (TailBB != ExitWithFreq.first)
+ Cost += ExitWithFreq.second;
+
+ // The cost of breaking the once fall-through edge from the tail to the top
+ // of the loop chain. Here we need to consider three cases:
+ // 1. If the tail node has only one successor, then we will get an
+ // additional jmp instruction. So the cost here is (MisfetchCost +
+ // JumpInstCost) * tail node frequency.
+ // 2. If the tail node has two successors, then we may still get an
+ // additional jmp instruction if the layout successor after the loop
+ // chain is not its CFG successor. Note that the more frequently executed
+ // jmp instruction will be put ahead of the other one. Assume the
+ // frequency of those two branches are x and y, where x is the frequency
+ // of the edge to the chain head, then the cost will be
+ // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
+ // 3. If the tail node has more than two successors (this rarely happens),
+ // we won't consider any additional cost.
+ if (TailBB->isSuccessor(*Iter)) {
+ auto TailBBFreq = MBFI->getBlockFreq(TailBB);
+ if (TailBB->succ_size() == 1)
+ Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
+ MisfetchCost + JumpInstCost);
+ else if (TailBB->succ_size() == 2) {
+ auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
+ auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
+ auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
+ ? TailBBFreq * TailToHeadProb.getCompl()
+ : TailToHeadFreq;
+ Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
+ ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
+ }
+ }
+
+ DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter)
+ << " to the top: " << Cost.getFrequency() << "\n");
+
+ if (Cost < SmallestRotationCost) {
+ SmallestRotationCost = Cost;
+ RotationPos = Iter;
+ }
+ }
+
+ if (RotationPos != LoopChain.end()) {
+ DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos)
+ << " to the top\n");
+ std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
+ }
+}
+
+/// \brief Collect blocks in the given loop that are to be placed.
+///
+/// When profile data is available, exclude cold blocks from the returned set;
+/// otherwise, collect all blocks in the loop.
+MachineBlockPlacement::BlockFilterSet
+MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
+ BlockFilterSet LoopBlockSet;
+
+ // Filter cold blocks off from LoopBlockSet when profile data is available.
+ // Collect the sum of frequencies of incoming edges to the loop header from
+ // outside. If we treat the loop as a super block, this is the frequency of
+ // the loop. Then for each block in the loop, we calculate the ratio between
+ // its frequency and the frequency of the loop block. When it is too small,
+ // don't add it to the loop chain. If there are outer loops, then this block
+ // will be merged into the first outer loop chain for which this block is not
+ // cold anymore. This needs precise profile data and we only do this when
+ // profile data is available.
+ if (F.getFunction()->getEntryCount()) {
+ BlockFrequency LoopFreq(0);
+ for (auto LoopPred : L.getHeader()->predecessors())
+ if (!L.contains(LoopPred))
+ LoopFreq += MBFI->getBlockFreq(LoopPred) *
+ MBPI->getEdgeProbability(LoopPred, L.getHeader());
+
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
+ if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
+ continue;
+ LoopBlockSet.insert(LoopBB);
+ }
+ } else
+ LoopBlockSet.insert(L.block_begin(), L.block_end());
+
+ return LoopBlockSet;
+}
+
/// \brief Forms basic block chains from the natural loop structures.
///
/// These chains are designed to preserve the existing *structure* of the code
@@ -805,19 +1039,27 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
buildLoopChains(F, *InnerLoop);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
- BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+ BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
+
+ // Check if we have profile data for this function. If yes, we will rotate
+ // this loop by modeling costs more precisely which requires the profile data
+ // for better layout.
+ bool RotateLoopWithProfile =
+ PreciseRotationCost && F.getFunction()->getEntryCount();
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
// predecessors to the header if there is one which will result in strictly
// fewer branches in the loop body.
- MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+ // When we use profile data to rotate the loop, this is unnecessary.
+ MachineBasicBlock *LoopTop =
+ RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
MachineBasicBlock *ExitingBB = nullptr;
- if (LoopTop == L.getHeader())
+ if (!RotateLoopWithProfile && LoopTop == L.getHeader())
ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -828,7 +1070,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
assert(LoopChain.LoopPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+
+ for (MachineBasicBlock *LoopBB : LoopBlockSet) {
BlockChain &Chain = *BlockToChain[LoopBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
@@ -848,7 +1091,11 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
}
buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
- rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+
+ if (RotateLoopWithProfile)
+ rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
+ else
+ rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -889,7 +1136,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// the assumptions of the remaining algorithm.
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- MachineBasicBlock *BB = FI;
+ MachineBasicBlock *BB = &*FI;
BlockChain *Chain =
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
@@ -900,8 +1147,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
- MachineFunction::iterator NextFI(std::next(FI));
- MachineBasicBlock *NextBB = NextFI;
+ MachineFunction::iterator NextFI = std::next(FI);
+ MachineBasicBlock *NextBB = &*NextFI;
// Ensure that the layout successor is a viable block, as we know that
// fallthrough is a possibility.
assert(NextFI != FE && "Can't fallthrough past the last block.");
@@ -1004,7 +1251,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Update the terminator of the previous block.
if (ChainBB == *FunctionChain.begin())
continue;
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
+ MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
// FIXME: It would be awesome of updateTerminator would just return rather
// than assert when the branch cannot be analyzed in order to remove this
@@ -1035,14 +1282,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
// If PrevBB has a two-way branch, try to re-order the branches
- // such that we branch to the successor with higher weight first.
+ // such that we branch to the successor with higher probability first.
if (TBB && !Cond.empty() && FBB &&
- MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ MBPI->getEdgeProbability(PrevBB, FBB) >
+ MBPI->getEdgeProbability(PrevBB, TBB) &&
!TII->ReverseBranchCondition(Cond)) {
DEBUG(dbgs() << "Reverse order of the two branches: "
<< getBlockName(PrevBB) << "\n");
- DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
- << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DEBUG(dbgs() << " Edge probability: "
+ << MBPI->getEdgeProbability(PrevBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(PrevBB, TBB) << "\n");
DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
@@ -1064,13 +1313,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
+ // FIXME: Use Function::optForSize().
if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
return;
if (FunctionChain.begin() == FunctionChain.end())
return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
- BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
for (MachineBasicBlock *ChainBB : FunctionChain) {
if (ChainBB == *FunctionChain.begin())
@@ -1084,6 +1334,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!L)
continue;
+ if (AlignAllLoops) {
+ ChainBB->setAlignment(AlignAllLoops);
+ continue;
+ }
+
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
continue; // Don't care about loop alignment.
@@ -1224,4 +1479,3 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
return false;
}
-
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 6fbc2be..cf6d401 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -28,91 +28,48 @@ char MachineBranchProbabilityInfo::ID = 0;
void MachineBranchProbabilityInfo::anchor() { }
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
- // First we compute the sum with 64-bits of precision, ensuring that cannot
- // overflow by bounding the number of weights considered. Hopefully no one
- // actually needs 2^32 successors.
- assert(MBB->succ_size() < UINT32_MAX);
- uint64_t Sum = 0;
- Scale = 1;
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, I);
- Sum += Weight;
- }
-
- // If the computed sum fits in 32-bits, we're done.
- if (Sum <= UINT32_MAX)
- return Sum;
-
- // Otherwise, compute the scale necessary to cause the weights to fit, and
- // re-sum with that scale applied.
- assert((Sum / UINT32_MAX) < UINT32_MAX);
- Scale = (Sum / UINT32_MAX) + 1;
- Sum = 0;
- for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, I);
- Sum += Weight / Scale;
- }
- assert(Sum <= UINT32_MAX);
- return Sum;
-}
-
-uint32_t MachineBranchProbabilityInfo::
-getEdgeWeight(const MachineBasicBlock *Src,
- MachineBasicBlock::const_succ_iterator Dst) const {
- uint32_t Weight = Src->getSuccWeight(Dst);
- if (!Weight)
- return DEFAULT_WEIGHT;
- return Weight;
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
+ return Src->getSuccProbability(Dst);
}
-uint32_t MachineBranchProbabilityInfo::
-getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
// This is a linear search. Try to use the const_succ_iterator version when
// possible.
- return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+ return getEdgeProbability(Src,
+ std::find(Src->succ_begin(), Src->succ_end(), Dst));
}
bool
MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
- // FIXME: Compare against a static "hot" BranchProbability.
- return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+ static BranchProbability HotProb(4, 5);
+ return getEdgeProbability(Src, Dst) > HotProb;
}
MachineBasicBlock *
MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
- uint32_t MaxWeight = 0;
+ auto MaxProb = BranchProbability::getZero();
MachineBasicBlock *MaxSucc = nullptr;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, I);
- if (Weight > MaxWeight) {
- MaxWeight = Weight;
+ auto Prob = getEdgeProbability(MBB, I);
+ if (Prob > MaxProb) {
+ MaxProb = Prob;
MaxSucc = *I;
}
}
- if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
+ static BranchProbability HotProb(4, 5);
+ if (getEdgeProbability(MBB, MaxSucc) >= HotProb)
return MaxSucc;
return nullptr;
}
-BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
- const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
- uint32_t Scale = 1;
- uint32_t D = getSumForBlock(Src, Scale);
- uint32_t N = getEdgeWeight(Src, Dst) / Scale;
-
- return BranchProbability(N, D);
-}
-
raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
raw_ostream &OS, const MachineBasicBlock *Src,
const MachineBasicBlock *Dst) const {
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 87aaaa0..021707b 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -57,7 +57,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -111,7 +111,7 @@ char &llvm::MachineCSEID = MachineCSE::ID;
INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
@@ -714,7 +714,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
return PerformCSE(DT->getRootNode());
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index f33d0e6..fa43c4d 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -10,6 +10,7 @@
// The machine combiner pass uses machine trace metrics to ensure the combined
// instructions does not lengthen the critical path or the resource depth.
//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "machine-combiner"
#include "llvm/ADT/Statistic.h"
@@ -68,10 +69,10 @@ private:
MachineTraceMetrics::Trace BlockTrace);
bool
improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
- MachineTraceMetrics::Trace BlockTrace,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- bool NewCodeHasLessInsts);
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineCombinerPattern Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -122,9 +123,9 @@ unsigned
MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineTraceMetrics::Trace BlockTrace) {
-
SmallVector<unsigned, 16> InstrDepth;
- assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
// For each instruction in the new sequence compute the depth based on the
// operands. Use the trace information when possible. For new operands which
@@ -180,8 +181,8 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
/// \returns Latency of \p NewRoot
unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace) {
-
- assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
// Check each definition in NewRoot and compute the latency
unsigned NewRootLatency = 0;
@@ -202,62 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
UseMO->findRegisterUseOperandIdx(MO.getReg()));
} else {
- LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
}
NewRootLatency = std::max(NewRootLatency, LatencyOp);
}
return NewRootLatency;
}
-/// True when the new instruction sequence does not lengthen the critical path
-/// and the new sequence has less instructions or the new sequence improves the
-/// critical path.
+/// The combiner's goal may differ based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+ MustReduceDepth, // The data dependency chain must be improved.
+ Default // The critical path must not be lengthened.
+};
+
+static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+ // TODO: If C++ ever gets a real enum class, make this part of the
+ // MachineCombinerPattern class.
+ switch (P) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ return CombinerObjective::MustReduceDepth;
+ default:
+ return CombinerObjective::Default;
+ }
+}
+
/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
/// The new code sequence ends in MI NewRoot. A necessary condition for the new
/// sequence to replace the old sequence is that it cannot lengthen the critical
-/// path. This is decided by the formula:
-/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
-/// If the new sequence has an equal length critical path but does not reduce
-/// the number of instructions (NewCodeHasLessInsts is false), then it is not
-/// considered an improvement. The slack is the number of cycles Root can be
-/// delayed before the critical patch becomes longer.
+/// path. The definition of "improve" may be restricted by specifying that the
+/// new path improves the data dependency chain (MustReduceDepth).
bool MachineCombiner::improvesCriticalPathLen(
MachineBasicBlock *MBB, MachineInstr *Root,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- bool NewCodeHasLessInsts) {
-
- assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ MachineCombinerPattern Pattern) {
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
// NewRoot is the last instruction in the \p InsInstrs vector.
- // Get depth and latency of NewRoot.
unsigned NewRootIdx = InsInstrs.size() - 1;
MachineInstr *NewRoot = InsInstrs[NewRootIdx];
- unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
- unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
- // Get depth, latency and slack of Root.
+ // Get depth and latency of NewRoot and Root.
+ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
+ dbgs() << " RootDepth: " << RootDepth << "\n");
+
+ // For a transform such as reassociation, the cost equation is
+ // conservatively calculated so that we must improve the depth (data
+ // dependency cycles) in the critical path to proceed with the transform.
+ // Being conservative also protects against inaccuracies in the underlying
+ // machine trace metrics and CPU models.
+ if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
+ return NewRootDepth < RootDepth;
+
+ // A more flexible cost calculation for the critical path includes the slack
+ // of the original code sequence. This may allow the transform to proceed
+ // even if the instruction depths (data dependency cycles) become worse.
+ unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
unsigned RootSlack = BlockTrace.getInstrSlack(Root);
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
- dbgs() << " NewRootDepth: " << NewRootDepth
- << " NewRootLatency: " << NewRootLatency << "\n";
- dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
- << " RootSlack: " << RootSlack << "\n";
- dbgs() << " NewRootDepth + NewRootLatency "
+ DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
+ dbgs() << " RootLatency: " << RootLatency << "\n";
+ dbgs() << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " NewRootDepth + NewRootLatency = "
<< NewRootDepth + NewRootLatency << "\n";
- dbgs() << " RootDepth + RootLatency + RootSlack "
+ dbgs() << " RootDepth + RootLatency + RootSlack = "
<< RootDepth + RootLatency + RootSlack << "\n";);
unsigned NewCycleCount = NewRootDepth + NewRootLatency;
unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
- if (NewCodeHasLessInsts)
- return NewCycleCount <= OldCycleCount;
- else
- return NewCycleCount < OldCycleCount;
+ return NewCycleCount <= OldCycleCount;
}
/// helper routine to convert instructions into SC
@@ -271,11 +296,14 @@ void MachineCombiner::instr2instrSC(
InstrsSC.push_back(SC);
}
}
+
/// True when the new instructions do not increase resource length
bool MachineCombiner::preservesResourceLen(
MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs) {
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
// Compute current resource length
@@ -310,7 +338,7 @@ bool MachineCombiner::preservesResourceLen(
bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
if (OptSize && (NewSize < OldSize))
return true;
- if (!TSchedModel.hasInstrSchedModel())
+ if (!TSchedModel.hasInstrSchedModelOrItineraries())
return true;
return false;
}
@@ -332,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
auto &MI = *BlockIter++;
DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
- SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Patterns;
+ SmallVector<MachineCombinerPattern, 16> Patterns;
// The motivating example is:
//
// MUL Other MUL_op1 MUL_op2 Other
@@ -358,54 +386,55 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// mostly one pattern, and getMachineCombinerPatterns() can order patterns
// based on an internal cost heuristic.
- if (TII->getMachineCombinerPatterns(MI, Patterns)) {
- for (auto P : Patterns) {
- SmallVector<MachineInstr *, 16> InsInstrs;
- SmallVector<MachineInstr *, 16> DelInstrs;
- DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ if (!TII->getMachineCombinerPatterns(MI, Patterns))
+ continue;
+
+ for (auto P : Patterns) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ unsigned NewInstCount = InsInstrs.size();
+ unsigned OldInstCount = DelInstrs.size();
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (!NewInstCount)
+ continue;
+
+ // Substitute when we optimize for codesize and the new sequence has
+ // fewer instructions OR
+ // the new sequence neither lengthens the critical path nor increases
+ // resource pressure.
+ if (doSubstitute(NewInstCount, OldInstCount) ||
+ (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+ InstrIdxForVirtReg, P) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+ Changed = true;
+ ++NumInstCombined;
+
+ Traces->invalidate(MBB);
Traces->verifyAnalysis();
- TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
- InstrIdxForVirtReg);
- unsigned NewInstCount = InsInstrs.size();
- unsigned OldInstCount = DelInstrs.size();
- // Found pattern, but did not generate alternative sequence.
- // This can happen e.g. when an immediate could not be materialized
- // in a single instruction.
- if (!NewInstCount)
- continue;
- // Substitute when we optimize for codesize and the new sequence has
- // fewer instructions OR
- // the new sequence neither lengthens the critical path nor increases
- // resource pressure.
- if (doSubstitute(NewInstCount, OldInstCount) ||
- (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
- InstrIdxForVirtReg,
- NewInstCount < OldInstCount) &&
- preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
- for (auto *InstrPtr : InsInstrs)
- MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-
- Changed = true;
- ++NumInstCombined;
-
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
- // Eagerly stop after the first pattern fires.
- break;
- } else {
- // Cleanup instructions of the alternative code sequence. There is no
- // use for them.
- MachineFunction *MF = MBB->getParent();
- for (auto *InstrPtr : InsInstrs)
- MF->DeleteMachineInstr(InstrPtr);
- }
- InstrIdxForVirtReg.clear();
+ // Eagerly stop after the first pattern fires.
+ break;
+ } else {
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ MachineFunction *MF = MBB->getParent();
+ for (auto *InstrPtr : InsInstrs)
+ MF->DeleteMachineInstr(InstrPtr);
}
+ InstrIdxForVirtReg.clear();
}
}
@@ -420,9 +449,8 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
TSchedModel.init(SchedModel, &STI, TII);
MRI = &MF.getRegInfo();
Traces = &getAnalysis<MachineTraceMetrics>();
- MinInstr = 0;
-
- OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ MinInstr = nullptr;
+ OptSize = MF.getFunction()->optForSize();
DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index 9856e70..ca4bb1c 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionInitializer.h"
@@ -26,6 +27,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -44,6 +47,11 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
+static cl::opt<unsigned>
+ AlignAllFunctions("align-all-functions",
+ cl::desc("Force the alignment of all functions."),
+ cl::init(0), cl::Hidden);
+
void MachineFunctionInitializer::anchor() {}
//===----------------------------------------------------------------------===//
@@ -79,12 +87,27 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ // FIXME: Use Function::optForSize().
if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
+ if (AlignAllFunctions)
+ Alignment = AlignAllFunctions;
+
FunctionNumber = FunctionNum;
JumpTableInfo = nullptr;
+
+ if (isFuncletEHPersonality(classifyEHPersonality(
+ F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+ WinEHInfo = new (Allocator) WinEHFuncInfo();
+ }
+
+ assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+ "Can't create a MachineFunction using a Module with a "
+ "Target-incompatible DataLayout attached\n");
+
+ PSVManager = llvm::make_unique<PseudoSourceValueManager>();
}
MachineFunction::~MachineFunction() {
@@ -117,6 +140,11 @@ MachineFunction::~MachineFunction() {
JumpTableInfo->~MachineJumpTableInfo();
Allocator.Deallocate(JumpTableInfo);
}
+
+ if (WinEHInfo) {
+ WinEHInfo->~WinEHFuncInfo();
+ Allocator.Deallocate(WinEHInfo);
+ }
}
const DataLayout &MachineFunction::getDataLayout() const {
@@ -149,7 +177,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
if (MBB == nullptr)
MBBI = begin();
else
- MBBI = MBB;
+ MBBI = MBB->getIterator();
// Figure out the block number this should have.
unsigned BlockNo = 0;
@@ -169,7 +197,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
if (MBBNumbering[BlockNo])
MBBNumbering[BlockNo]->setNumber(-1);
- MBBNumbering[BlockNo] = MBBI;
+ MBBNumbering[BlockNo] = &*MBBI;
MBBI->setNumber(BlockNo);
}
}
@@ -322,6 +350,13 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
return std::make_pair(Result, Result + Num);
}
+const char *MachineFunction::createExternalSymbolName(StringRef Name) {
+ char *Dest = Allocator.Allocate<char>(Name.size() + 1);
+ std::copy(Name.begin(), Name.end(), Dest);
+ Dest[Name.size()] = 0;
+ return Dest;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineFunction::dump() const {
print(dbgs());
@@ -593,10 +628,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
BV.set(*CSR);
// Saved CSRs are not pristine.
- const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I)
- BV.reset(I->getReg());
+ for (auto &I : getCalleeSavedInfo())
+ for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
+ BV.reset(*S);
return BV;
}
@@ -801,42 +835,26 @@ Type *MachineConstantPoolEntry::getType() const {
return Val.ConstVal->getType();
}
-
-unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+bool MachineConstantPoolEntry::needsRelocation() const {
if (isMachineConstantPoolEntry())
- return Val.MachineCPVal->getRelocationInfo();
- return Val.ConstVal->getRelocationInfo();
+ return true;
+ return Val.ConstVal->needsRelocation();
}
SectionKind
MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
- SectionKind Kind;
- switch (getRelocationInfo()) {
+ if (needsRelocation())
+ return SectionKind::getReadOnlyWithRel();
+ switch (DL->getTypeAllocSize(getType())) {
+ case 4:
+ return SectionKind::getMergeableConst4();
+ case 8:
+ return SectionKind::getMergeableConst8();
+ case 16:
+ return SectionKind::getMergeableConst16();
default:
- llvm_unreachable("Unknown section kind");
- case Constant::GlobalRelocations:
- Kind = SectionKind::getReadOnlyWithRel();
- break;
- case Constant::LocalRelocation:
- Kind = SectionKind::getReadOnlyWithRelLocal();
- break;
- case Constant::NoRelocation:
- switch (DL->getTypeAllocSize(getType())) {
- case 4:
- Kind = SectionKind::getMergeableConst4();
- break;
- case 8:
- Kind = SectionKind::getMergeableConst8();
- break;
- case 16:
- Kind = SectionKind::getMergeableConst16();
- break;
- default:
- Kind = SectionKind::getReadOnly();
- break;
- }
+ return SectionKind::getReadOnly();
}
- return Kind;
}
MachineConstantPool::~MachineConstantPool() {
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index aaf06a7..05463fc 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -13,11 +13,14 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackProtector.h"
@@ -49,13 +52,16 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
// passes explicitly. This does not include setPreservesCFG,
// because CodeGen overloads that to mean preserving the MachineBasicBlock
// CFG in addition to the LLVM IR CFG.
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<DominanceFrontier>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<IVUsers>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addPreserved<StackProtector>();
FunctionPass::getAnalysisUsage(AU);
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index fdc4226..1eb2edc 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -43,6 +44,11 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+static cl::opt<bool> PrintWholeRegMask(
+ "print-whole-regmask",
+ cl::desc("Print the full contents of regmask operands in IR dumps"),
+ cl::init(true), cl::Hidden);
+
//===----------------------------------------------------------------------===//
// MachineOperand Implementation
//===----------------------------------------------------------------------===//
@@ -407,9 +413,26 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
- case MachineOperand::MO_RegisterMask:
- OS << "<regmask>";
+ case MachineOperand::MO_RegisterMask: {
+ unsigned NumRegsInMask = 0;
+ unsigned NumRegsEmitted = 0;
+ OS << "<regmask";
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ unsigned MaskWord = i / 32;
+ unsigned MaskBit = i % 32;
+ if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+ if (PrintWholeRegMask || NumRegsEmitted <= 10) {
+ OS << " " << PrintReg(i, TRI);
+ NumRegsEmitted++;
+ }
+ NumRegsInMask++;
+ }
+ }
+ if (NumRegsEmitted != NumRegsInMask)
+ OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+ OS << ">";
break;
+ }
case MachineOperand::MO_RegisterLiveOut:
OS << "<regliveout>";
break;
@@ -443,26 +466,28 @@ unsigned MachinePointerInfo::getAddrSpace() const {
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
-MachinePointerInfo MachinePointerInfo::getConstantPool() {
- return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getConstantPool());
}
/// getFixedStack - Return a MachinePointerInfo record that refers to the
/// the specified FrameIndex.
-MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
- return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+ int FI, int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
}
-MachinePointerInfo MachinePointerInfo::getJumpTable() {
- return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getJumpTable());
}
-MachinePointerInfo MachinePointerInfo::getGOT() {
- return MachinePointerInfo(PseudoSourceValue::getGOT());
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getGOT());
}
-MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
- return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+ int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
}
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
@@ -606,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
if (MCID->ImplicitDefs)
- for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
+ ++ImpDefs)
addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID->ImplicitUses)
- for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
+ for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses;
+ ++ImpUses)
addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
}
@@ -841,7 +868,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
- for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
+ for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
if (MII->getDesc().getFlags() & Mask) {
if (Type == AnyInBundle)
return true;
@@ -865,13 +892,13 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
if (isBundle()) {
// Both instructions are bundles, compare MIs inside the bundle.
- MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator I1 = getIterator();
MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
- MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator I2 = Other->getIterator();
MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
while (++I1 != E1 && I1->isInsideBundle()) {
++I2;
- if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check))
return false;
}
}
@@ -976,7 +1003,7 @@ unsigned MachineInstr::getNumExplicitOperands() const {
void MachineInstr::bundleWithPred() {
assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
setFlag(BundledPred);
- MachineBasicBlock::instr_iterator Pred = this;
+ MachineBasicBlock::instr_iterator Pred = getIterator();
--Pred;
assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
Pred->setFlag(BundledSucc);
@@ -985,7 +1012,7 @@ void MachineInstr::bundleWithPred() {
void MachineInstr::bundleWithSucc() {
assert(!isBundledWithSucc() && "MI is already bundled with its successor");
setFlag(BundledSucc);
- MachineBasicBlock::instr_iterator Succ = this;
+ MachineBasicBlock::instr_iterator Succ = getIterator();
++Succ;
assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
Succ->setFlag(BundledPred);
@@ -994,7 +1021,7 @@ void MachineInstr::bundleWithSucc() {
void MachineInstr::unbundleFromPred() {
assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
clearFlag(BundledPred);
- MachineBasicBlock::instr_iterator Pred = this;
+ MachineBasicBlock::instr_iterator Pred = getIterator();
--Pred;
assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
Pred->clearFlag(BundledSucc);
@@ -1003,7 +1030,7 @@ void MachineInstr::unbundleFromPred() {
void MachineInstr::unbundleFromSucc() {
assert(isBundledWithSucc() && "MI isn't bundled with its successor");
clearFlag(BundledSucc);
- MachineBasicBlock::instr_iterator Succ = this;
+ MachineBasicBlock::instr_iterator Succ = getIterator();
++Succ;
assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
Succ->clearFlag(BundledPred);
@@ -1139,7 +1166,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
/// Return the number of instructions inside the MI bundle, not counting the
/// header instruction.
unsigned MachineInstr::getBundleSize() const {
- MachineBasicBlock::const_instr_iterator I = this;
+ MachineBasicBlock::const_instr_iterator I = getIterator();
unsigned Size = 0;
while (I->isBundledWithSucc())
++Size, ++I;
@@ -1501,6 +1528,10 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
return false;
}
+bool MachineInstr::isLoadFoldBarrier() const {
+ return mayStore() || isCall() || hasUnmodeledSideEffects();
+}
+
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
///
bool MachineInstr::allDefsAreDead() const {
@@ -1615,7 +1646,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
FirstOp = false;
}
-
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -1706,13 +1736,16 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
bool HaveSemi = false;
- const unsigned PrintableFlags = FrameSetup;
+ const unsigned PrintableFlags = FrameSetup | FrameDestroy;
if (Flags & PrintableFlags) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
OS << " flags: ";
if (Flags & FrameSetup)
OS << "FrameSetup";
+
+ if (Flags & FrameDestroy)
+ OS << "FrameDestroy";
}
if (!memoperands_empty()) {
@@ -1755,7 +1788,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
DebugLoc InlinedAtDL(InlinedAt);
if (InlinedAtDL && MF) {
OS << " inlined @[ ";
- InlinedAtDL.print(OS);
+ InlinedAtDL.print(OS);
OS << " ]";
}
}
@@ -1902,11 +1935,11 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) {
}
}
-void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
+void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
continue;
- MO.setIsUndef();
+ MO.setIsUndef(IsUndef);
}
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index cd820ee..3eaf4c5 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -293,15 +293,17 @@ MachineOperandIteratorBase::PhysRegInfo
MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
- PhysRegInfo PRI = {false, false, false, false, false, false};
+ PhysRegInfo PRI = {false, false, false, false, false, false, false};
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
for (; isValid(); ++*this) {
MachineOperand &MO = deref();
- if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
- PRI.Clobbers = true; // Regmask clobbers Reg.
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
+ PRI.Clobbered = true;
+ continue;
+ }
if (!MO.isReg())
continue;
@@ -310,33 +312,28 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
continue;
- bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
- bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
-
- if (IsRegOrSuperReg && MO.readsReg()) {
- // Reg or a super-reg is read, and perhaps killed also.
- PRI.Reads = true;
- PRI.Kills = MO.isKill();
- }
-
- if (IsRegOrOverlapping && MO.readsReg()) {
- PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
- }
-
- if (!MO.isDef())
+ if (!TRI->regsOverlap(MOReg, Reg))
continue;
- if (IsRegOrSuperReg) {
- PRI.Defines = true; // Reg or a super-register is defined.
+ bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
+ if (MO.readsReg()) {
+ PRI.Read = true;
+ if (Covered) {
+ PRI.FullyRead = true;
+ if (MO.isKill())
+ PRI.Killed = true;
+ }
+ } else if (MO.isDef()) {
+ PRI.Defined = true;
+ if (Covered)
+ PRI.FullyDefined = true;
if (!MO.isDead())
AllDefsDead = false;
}
- if (IsRegOrOverlapping)
- PRI.Clobbers = true; // Reg or an overlapping reg is defined.
}
- if (AllDefsDead && PRI.Defines)
- PRI.DefinesDead = true; // Reg or super-register was defined and was dead.
+ if (AllDefsDead && PRI.FullyDefined)
+ PRI.DeadDef = true;
return PRI;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index e9ea5ed..a8368e9 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -138,7 +138,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -153,7 +153,7 @@ namespace {
}
private:
- /// CandidateInfo - Keep track of information about hoisting candidates.
+ /// Keep track of information about hoisting candidates.
struct CandidateInfo {
MachineInstr *MI;
unsigned Def;
@@ -162,149 +162,76 @@ namespace {
: MI(mi), Def(def), FI(fi) {}
};
- /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
- /// invariants out to the preheader.
void HoistRegionPostRA();
- /// HoistPostRA - When an instruction is found to only use loop invariant
- /// operands that is safe to hoist, this instruction is called to do the
- /// dirty work.
void HoistPostRA(MachineInstr *MI, unsigned Def);
- /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
- /// gather register def and frame object update information.
- void ProcessMI(MachineInstr *MI,
- BitVector &PhysRegDefs,
- BitVector &PhysRegClobbers,
- SmallSet<int, 32> &StoredFIs,
+ void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
SmallVectorImpl<CandidateInfo> &Candidates);
- /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
- /// current loop.
void AddToLiveIns(unsigned Reg);
- /// IsLICMCandidate - Returns true if the instruction may be a suitable
- /// candidate for LICM. e.g. If the instruction is a call, then it's
- /// obviously not safe to hoist it.
bool IsLICMCandidate(MachineInstr &I);
- /// IsLoopInvariantInst - Returns true if the instruction is loop
- /// invariant. I.e., all virtual register operands are defined outside of
- /// the loop, physical registers aren't accessed (explicitly or implicitly),
- /// and the instruction is hoistable.
- ///
bool IsLoopInvariantInst(MachineInstr &I);
- /// HasLoopPHIUse - Return true if the specified instruction is used by any
- /// phi node in the current loop.
bool HasLoopPHIUse(const MachineInstr *MI) const;
- /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
- /// and an use in the current loop, return true if the target considered
- /// it 'high'.
bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
unsigned Reg) const;
bool IsCheapInstruction(MachineInstr &MI) const;
- /// CanCauseHighRegPressure - Visit BBs from header to current BB,
- /// check if hoisting an instruction of the given cost matrix can cause high
- /// register pressure.
bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
bool Cheap);
- /// UpdateBackTraceRegPressure - Traverse the back trace from header to
- /// the current block and update their register pressures to reflect the
- /// effect of hoisting MI from the current block to the preheader.
void UpdateBackTraceRegPressure(const MachineInstr *MI);
- /// IsProfitableToHoist - Return true if it is potentially profitable to
- /// hoist the given loop invariant.
bool IsProfitableToHoist(MachineInstr &MI);
- /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
- /// If not then a load from this mbb may not be safe to hoist.
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
- /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
- /// dominator tree node if its a leaf or all of its children are done. Walk
- /// up the dominator tree to destroy ancestors which are now done.
- void ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
-
- /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
- /// blocks dominated by the specified header block, and that are in the
- /// current loop) in depth first order w.r.t the DominatorTree. This allows
- /// us to visit definitions before uses, allowing us to hoist a loop body in
- /// one pass without iteration.
- ///
+ void ExitScopeIfDone(
+ MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
+
void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
- /// SinkIntoLoop - Sink instructions into loops if profitable. This
- /// especially tries to prevent register spills caused by register pressure
- /// if there is little to no overhead moving instructions into loops.
void SinkIntoLoop();
- /// InitRegPressure - Find all virtual register references that are liveout
- /// of the preheader to initialize the starting "register pressure". Note
- /// this does not count live through (livein but not used) registers.
void InitRegPressure(MachineBasicBlock *BB);
- /// calcRegisterCost - Calculate the additional register pressure that the
- /// registers used in MI cause.
- ///
- /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
- /// figure out which usages are live-ins.
- /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
bool ConsiderSeen,
bool ConsiderUnseenAsDef);
- /// UpdateRegPressure - Update estimate of register pressure after the
- /// specified instruction.
void UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef = false);
- /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
- /// the load itself could be hoisted. Return the unfolded and hoistable
- /// load, or null if the load couldn't be unfolded or if it wouldn't
- /// be hoistable.
MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
- /// LookForDuplicate - Find an instruction amount PrevMIs that is a
- /// duplicate of MI. Return this instruction if it's found.
- const MachineInstr *LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs);
+ const MachineInstr *
+ LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr *> &PrevMIs);
- /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
- /// the preheader that compute the same value. If it's found, do a RAU on
- /// with the definition of the existing instruction rather than hoisting
- /// the instruction to the preheader.
- bool EliminateCSE(MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+ bool EliminateCSE(
+ MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
- /// MayCSE - Return true if the given instruction will be CSE'd if it's
- /// hoisted out of the loop.
bool MayCSE(MachineInstr *MI);
- /// Hoist - When an instruction is found to only use loop invariant operands
- /// that is safe to hoist, this instruction is called to do the dirty work.
- /// It returns true if the instruction is hoisted.
bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
- /// InitCSEMap - Initialize the CSE map with instructions that are in the
- /// current loop preheader that may become duplicates of instructions that
- /// are hoisted out of the loop.
void InitCSEMap(MachineBasicBlock *BB);
- /// getCurPreheader - Get the preheader for the current loop, splitting
- /// a critical edge if needed.
MachineBasicBlock *getCurPreheader();
};
} // end anonymous namespace
@@ -315,12 +242,11 @@ INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
-/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
-/// loop that has a unique predecessor.
+/// Test if the given loop is the outer-most loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
// Check whether this loop even has a unique predecessor.
if (!CurLoop->getLoopPredecessor())
@@ -367,7 +293,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// Get our Loop information...
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
while (!Worklist.empty()) {
@@ -402,9 +328,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-/// InstructionStoresToFI - Return true if instruction stores to the
-/// specified frame.
+/// Return true if instruction stores to the specified frame.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ // If we lost memory operands, conservatively assume that the instruction
+ // writes to all slots.
+ if (MI->memoperands_empty())
+ return true;
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end(); o != oe; ++o) {
if (!(*o)->isStore() || !(*o)->getPseudoValue())
@@ -418,7 +347,7 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
return false;
}
-/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
void MachineLICM::ProcessMI(MachineInstr *MI,
BitVector &PhysRegDefs,
@@ -506,8 +435,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
}
}
-/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
-/// invariants out to the preheader.
+/// Walk the specified region of the CFG and hoist loop invariants out to the
+/// preheader.
void MachineLICM::HoistRegionPostRA() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
@@ -529,15 +458,13 @@ void MachineLICM::HoistRegionPostRA() {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
- if (ML && ML->getHeader()->isLandingPad()) continue;
+ if (ML && ML->getHeader()->isEHPad()) continue;
// Conservatively treat live-in's as an external def.
// FIXME: That means a reload that're reused in successor block(s) will not
// be LICM'ed.
- for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
- E = BB->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ for (const auto &LI : BB->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
PhysRegDefs.set(*AI);
}
@@ -601,8 +528,8 @@ void MachineLICM::HoistRegionPostRA() {
}
}
-/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
-/// loop, and make sure it is not killed by any instructions in the loop.
+/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
+/// sure it is not killed by any instructions in the loop.
void MachineLICM::AddToLiveIns(unsigned Reg) {
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -622,9 +549,8 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
}
}
-/// HoistPostRA - When an instruction is found to only use loop invariant
-/// operands that is safe to hoist, this instruction is called to do the
-/// dirty work.
+/// When an instruction is found to only use loop invariant operands that is
+/// safe to hoist, this instruction is called to do the dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
@@ -646,8 +572,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
Changed = true;
}
-// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
-// If not then a load from this mbb may not be safe to hoist.
+/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
+/// may not be safe to hoist.
bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
@@ -679,9 +605,9 @@ void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
BackTrace.pop_back();
}
-/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
-/// dominator tree node if its a leaf or all of its children are done. Walk
-/// up the dominator tree to destroy ancestors which are now done.
+/// Destroy scope for the MBB that corresponds to the given dominator tree node
+/// if its a leaf or all of its children are done. Walk up the dominator tree to
+/// destroy ancestors which are now done.
void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
@@ -701,11 +627,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
}
}
-/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
-/// blocks dominated by the specified header block, and that are in the
-/// current loop) in depth first order w.r.t the DominatorTree. This allows
-/// us to visit definitions before uses, allowing us to hoist a loop body in
-/// one pass without iteration.
+/// Walk the specified loop in the CFG (defined by all blocks dominated by the
+/// specified header block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree. This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
///
void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
MachineBasicBlock *Preheader = getCurPreheader();
@@ -727,7 +652,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
- if (ML && ML->getHeader()->isLandingPad())
+ if (ML && ML->getHeader()->isEHPad())
continue;
// If this subregion is not in the top level loop at all, exit.
@@ -786,6 +711,9 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
}
}
+/// Sink instructions into loops if profitable. This especially tries to prevent
+/// register spills caused by register pressure if there is little to no
+/// overhead moving instructions into loops.
void MachineLICM::SinkIntoLoop() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
@@ -796,8 +724,8 @@ void MachineLICM::SinkIntoLoop() {
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
// As such, it must not have side-effects, e.g. such as a call has.
- if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
- Candidates.push_back(I);
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
+ Candidates.push_back(&*I);
}
for (MachineInstr *I : Candidates) {
@@ -837,9 +765,9 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
-/// InitRegPressure - Find all virtual register references that are liveout of
-/// the preheader to initialize the starting "register pressure". Note this
-/// does not count live through (livein but not used) registers.
+/// Find all virtual register references that are liveout of the preheader to
+/// initialize the starting "register pressure". Note this does not count live
+/// through (livein but not used) registers.
void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
std::fill(RegPressure.begin(), RegPressure.end(), 0);
@@ -858,8 +786,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
}
-/// UpdateRegPressure - Update estimate of register pressure after the
-/// specified instruction.
+/// Update estimate of register pressure after the specified instruction.
void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef) {
auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
@@ -872,6 +799,12 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
}
}
+/// Calculate the additional register pressure that the registers used in MI
+/// cause.
+///
+/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+/// figure out which usages are live-ins.
+/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
DenseMap<unsigned, int>
MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
bool ConsiderUnseenAsDef) {
@@ -915,23 +848,28 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
return Cost;
}
-/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
-/// loads from global offset table or constant pool.
-static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
assert (MI.mayLoad() && "Expected MI that loads!");
+
+ // If we lost memory operands, conservatively assume that the instruction
+ // reads from everything..
+ if (MI.memoperands_empty())
+ return true;
+
for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
E = MI.memoperands_end(); I != E; ++I) {
if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
- if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ if (PSV->isGOT() || PSV->isConstantPool())
return true;
}
}
return false;
}
-/// IsLICMCandidate - Returns true if the instruction may be a suitable
-/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
-/// not safe to hoist it.
+/// Returns true if the instruction may be a suitable candidate for LICM.
+/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// Check if it's safe to move the instruction.
bool DontMoveAcrossStore = true;
@@ -944,16 +882,16 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// from constant memory are not safe to speculate all the time, for example
// indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
- if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
!IsGuaranteedToExecute(I.getParent()))
return false;
return true;
}
-/// IsLoopInvariantInst - Returns true if the instruction is loop
-/// invariant. I.e., all virtual register operands are defined outside of the
-/// loop, physical registers aren't accessed explicitly, and there are no side
+/// Returns true if the instruction is loop invariant.
+/// I.e., all virtual register operands are defined outside of the loop,
+/// physical registers aren't accessed explicitly, and there are no side
/// effects that aren't captured by the operands or other flags.
///
bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
@@ -1007,8 +945,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
}
-/// HasLoopPHIUse - Return true if the specified instruction is used by a
-/// phi node and hoisting it could cause a copy to be inserted.
+/// Return true if the specified instruction is used by a phi node and hoisting
+/// it could cause a copy to be inserted.
bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
SmallVector<const MachineInstr*, 8> Work(1, MI);
do {
@@ -1042,9 +980,8 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
return false;
}
-/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
-/// and an use in the current loop, return true if the target considered
-/// it 'high'.
+/// Compute operand latency between a def of 'Reg' and an use in the current
+/// loop, return true if the target considered it high.
bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
unsigned DefIdx, unsigned Reg) const {
if (MRI->use_nodbg_empty(Reg))
@@ -1074,8 +1011,8 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
return false;
}
-/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
-/// the operand latency between its def and a use is one or less.
+/// Return true if the instruction is marked "cheap" or the operand latency
+/// between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
return true;
@@ -1099,9 +1036,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
return isCheap;
}
-/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
-/// if hoisting an instruction of the given cost matrix can cause high
-/// register pressure.
+/// Visit BBs from header to current BB, check if hoisting an instruction of the
+/// given cost matrix can cause high register pressure.
bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
bool CheapInstr) {
for (const auto &RPIdAndCost : Cost) {
@@ -1124,9 +1060,9 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
return false;
}
-/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
-/// current block and update their register pressures to reflect the effect
-/// of hoisting MI from the current block to the preheader.
+/// Traverse the back trace from header to the current block and update their
+/// register pressures to reflect the effect of hoisting MI from the current
+/// block to the preheader.
void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
// First compute the 'cost' of the instruction, i.e. its contribution
// to register pressure.
@@ -1139,8 +1075,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
RP[RPIdAndCost.first] += RPIdAndCost.second;
}
-/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
-/// the given loop invariant.
+/// Return true if it is potentially profitable to hoist the given loop
+/// invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (MI.isImplicitDef())
return true;
@@ -1230,6 +1166,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
return true;
}
+/// Unfold a load from the given machineinstr if the load itself could be
+/// hoisted. Return the unfolded and hoistable load, or null if the load
+/// couldn't be unfolded or if it wouldn't be hoistable.
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
if (MI->canFoldAsLoad())
@@ -1287,6 +1226,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
return NewMIs[0];
}
+/// Initialize the CSE map with instructions that are in the current loop
+/// preheader that may become duplicates of instructions that are hoisted
+/// out of the loop.
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
@@ -1295,6 +1237,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
}
}
+/// Find an instruction amount PrevMIs that is a duplicate of MI.
+/// Return this instruction if it's found.
const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
@@ -1306,6 +1250,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
return nullptr;
}
+/// Given a LICM'ed instruction, look for an instruction on the preheader that
+/// computes the same value. If it's found, do a RAU on with the definition of
+/// the existing instruction rather than hoisting the instruction to the
+/// preheader.
bool MachineLICM::EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
@@ -1363,8 +1311,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
return false;
}
-/// MayCSE - Return true if the given instruction will be CSE'd if it's
-/// hoisted out of the loop.
+/// Return true if the given instruction will be CSE'd if it's hoisted out of
+/// the loop.
bool MachineLICM::MayCSE(MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
@@ -1377,9 +1325,9 @@ bool MachineLICM::MayCSE(MachineInstr *MI) {
return LookForDuplicate(MI, CI->second) != nullptr;
}
-/// Hoist - When an instruction is found to use only loop invariant operands
+/// When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
-///
+/// It returns true if the instruction is hoisted.
bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
@@ -1441,6 +1389,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
return true;
}
+/// Get the preheader for the current loop, splitting a critical edge if needed.
MachineBasicBlock *MachineLICM::getCurPreheader() {
// Determine the block to which to hoist instructions. If we can't find a
// suitable loop predecessor, we can't do any hoisting.
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index ce6abdd..2f5c9e0 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -37,7 +37,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
- LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
+ LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
return false;
}
@@ -51,11 +51,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() {
MachineBasicBlock *TopMBB = getHeader();
MachineFunction::iterator Begin = TopMBB->getParent()->begin();
if (TopMBB != Begin) {
- MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
+ MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator());
while (contains(PriorMBB)) {
TopMBB = PriorMBB;
if (TopMBB == Begin) break;
- PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
+ PriorMBB = &*std::prev(TopMBB->getIterator());
}
}
return TopMBB;
@@ -65,11 +65,12 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
MachineBasicBlock *BotMBB = getHeader();
MachineFunction::iterator End = BotMBB->getParent()->end();
if (BotMBB != std::prev(End)) {
- MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB));
+ MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator());
while (contains(NextMBB)) {
BotMBB = NextMBB;
- if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break;
- NextMBB = std::next(MachineFunction::iterator(BotMBB));
+ if (BotMBB == &*std::next(BotMBB->getIterator()))
+ break;
+ NextMBB = &*std::next(BotMBB->getIterator());
}
}
return BotMBB;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 6a20624..1956a70 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,12 +9,12 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/PointerUnion.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -35,7 +35,7 @@ char MachineModuleInfo::ID = 0;
MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
namespace llvm {
-class MMIAddrLabelMapCallbackPtr : CallbackVH {
+class MMIAddrLabelMapCallbackPtr final : CallbackVH {
MMIAddrLabelMap *Map;
public:
MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
@@ -209,9 +209,8 @@ bool MachineModuleInfo::doInitialization(Module &M) {
CurCallSite = 0;
CallsEHReturn = false;
CallsUnwindInit = false;
+ HasEHFunclets = false;
DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
- // Always emit some info, by default "no personality" info.
- Personalities.push_back(nullptr);
PersonalityTypeCache = EHPersonality::Unknown;
AddrLabelSymbols = nullptr;
TheModule = nullptr;
@@ -249,6 +248,7 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = false;
CallsUnwindInit = false;
+ HasEHFunclets = false;
VariableDbgInfos.clear();
}
@@ -314,32 +314,11 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
return LandingPadLabel;
}
-/// addPersonality - Provide the personality function for the exception
-/// information.
-void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
- const Function *Personality) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.Personality = Personality;
- addPersonality(Personality);
-}
-
void MachineModuleInfo::addPersonality(const Function *Personality) {
for (unsigned i = 0; i < Personalities.size(); ++i)
if (Personalities[i] == Personality)
return;
-
- // If this is the first personality we're adding go
- // ahead and add it at the beginning.
- if (!Personalities[0])
- Personalities[0] = Personality;
- else
- Personalities.push_back(Personality);
-}
-
-void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
- int State) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.WinEHState = State;
+ Personalities.push_back(Personality);
}
/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
@@ -481,56 +460,3 @@ try_next:;
FilterIds.push_back(0); // terminator
return FilterID;
}
-
-/// getPersonality - Return the personality function for the current function.
-const Function *MachineModuleInfo::getPersonality() const {
- for (const LandingPadInfo &LPI : LandingPads)
- if (LPI.Personality)
- return LPI.Personality;
- return nullptr;
-}
-
-EHPersonality MachineModuleInfo::getPersonalityType() {
- if (PersonalityTypeCache == EHPersonality::Unknown) {
- if (const Function *F = getPersonality())
- PersonalityTypeCache = classifyEHPersonality(F);
- }
- return PersonalityTypeCache;
-}
-
-/// getPersonalityIndex - Return unique index for current personality
-/// function. NULL/first personality function should always get zero index.
-unsigned MachineModuleInfo::getPersonalityIndex() const {
- const Function* Personality = nullptr;
-
- // Scan landing pads. If there is at least one non-NULL personality - use it.
- for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
- if (LandingPads[i].Personality) {
- Personality = LandingPads[i].Personality;
- break;
- }
-
- for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
- if (Personalities[i] == Personality)
- return i;
- }
-
- // This will happen if the current personality function is
- // in the zero index.
- return 0;
-}
-
-const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
- StringRef WinEHParentName =
- F->getFnAttribute("wineh-parent").getValueAsString();
- if (WinEHParentName.empty() || WinEHParentName == F->getName())
- return F;
- return F->getParent()->getFunction(WinEHParentName);
-}
-
-WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
- auto &Ptr = FuncInfoMap[getWinEHParent(F)];
- if (!Ptr)
- Ptr.reset(new WinEHFuncInfo);
- return *Ptr;
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index e883ce5..03c82f4 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -27,13 +27,11 @@ void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
: MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true),
TracksSubRegLiveness(false) {
+ unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
- UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
-
- // Create the physreg use/def lists.
- PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr);
+ UsedPhysRegMask.resize(NumRegs);
+ PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -117,6 +115,8 @@ void MachineRegisterInfo::clearVirtRegs() {
}
#endif
VRegInfo.clear();
+ for (auto &I : LiveIns)
+ I.second = 0;
}
void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
@@ -394,8 +394,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
}
}
-unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const
-{
+LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
// Lane masks are only defined for vregs.
assert(TargetRegisterInfo::isVirtualRegister(Reg));
const TargetRegisterClass &TRC = *getRegClass(Reg);
@@ -468,11 +467,8 @@ static bool isNoReturnDef(const MachineOperand &MO) {
if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
return false;
const Function *Called = getCalledFunction(MI);
- if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn)
- || !Called->hasFnAttribute(Attribute::NoUnwind))
- return false;
-
- return true;
+ return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
+ !Called->hasFnAttribute(Attribute::NoUnwind));
}
bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
@@ -488,3 +484,15 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
}
return false;
}
+
+bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
+ if (UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid();
+ ++AliasReg) {
+ if (!reg_nodbg_empty(*AliasReg))
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index a48e54c..bcee15c 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden,
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+ cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
@@ -106,7 +111,7 @@ public:
void print(raw_ostream &O, const Module* = nullptr) const override;
protected:
- void scheduleRegions(ScheduleDAGInstrs &Scheduler);
+ void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
};
/// MachineScheduler runs after coalescing and before register allocation.
@@ -146,7 +151,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
@@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequiredID(MachineDominatorsID);
AU.addRequired<MachineLoopInfo>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
@@ -315,14 +320,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
} else if (!mf.getSubtarget().enableMachineScheduler())
return false;
- DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+ DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
PassConfig = &getAnalysis<TargetPassConfig>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LIS = &getAnalysis<LiveIntervals>();
@@ -335,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Instantiate the selected scheduler for this target, function, and
// optimization level.
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
- scheduleRegions(*Scheduler);
+ scheduleRegions(*Scheduler, false);
DEBUG(LIS->dump());
if (VerifyScheduling)
@@ -363,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Instantiate the selected scheduler for this target, function, and
// optimization level.
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
- scheduleRegions(*Scheduler);
+ scheduleRegions(*Scheduler, true);
if (VerifyScheduling)
MF->verify(this, "After post machine scheduling.");
@@ -383,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
static bool isSchedBoundary(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB,
MachineFunction *MF,
- const TargetInstrInfo *TII,
- bool IsPostRA) {
+ const TargetInstrInfo *TII) {
return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
}
/// Main driver for both MachineScheduler and PostMachineScheduler.
-void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
+ bool FixKillFlags) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- bool IsPostRA = Scheduler.isPostRA();
// Visit all machine basic blocks.
//
@@ -400,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
MBB != MBBEnd; ++MBB) {
- Scheduler.startBlock(MBB);
+ Scheduler.startBlock(&*MBB);
#ifndef NDEBUG
if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
@@ -429,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end() ||
- isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
// Count the boundary instruction.
--RemainingInstrs;
@@ -440,14 +444,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingInstrs) {
- if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
+ if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
break;
if (!I->isDebugValue())
++NumRegionInstrs;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
- Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
+ Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (I == RegionEnd || I == std::prev(RegionEnd)) {
@@ -456,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
Scheduler.exitRegion();
continue;
}
- DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
- << "MI Scheduling **********\n");
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
DEBUG(dbgs() << MF->getName()
<< ":BB#" << MBB->getNumber() << " " << MBB->getName()
<< "\n From: " << *I << " To: ";
@@ -484,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
}
assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler.finishBlock();
- if (Scheduler.isPostRA()) {
- // FIXME: Ideally, no further passes should rely on kill flags. However,
- // thumb2 size reduction is currently an exception.
- Scheduler.fixupKills(MBB);
- }
+ // FIXME: Ideally, no further passes should rely on kill flags. However,
+ // thumb2 size reduction is currently an exception, so the PostMIScheduler
+ // needs to do this.
+ if (FixKillFlags)
+ Scheduler.fixupKills(&*MBB);
}
Scheduler.finalizeSchedule();
}
@@ -499,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
LLVM_DUMP_METHOD
void ReadyQueue::dump() {
- dbgs() << Name << ": ";
+ dbgs() << "Queue " << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
@@ -660,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() {
/// does not consider liveness or register pressure. It is useful for PostRA
/// scheduling and potentially other custom schedulers.
void ScheduleDAGMI::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
+
// Build the DAG.
buildSchedGraph(AA);
@@ -682,7 +688,11 @@ void ScheduleDAGMI::schedule() {
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
@@ -900,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() {
updatePressureDiffs(LiveUses);
}
+ DEBUG(
+ dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
// Cache the list of excess pressure sets in this region. This will also track
@@ -976,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
}
// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
assert(VNI && "No live value at use.");
- for (VReg2UseMap::iterator
- UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
- SUnit *SU = UI->SU;
- DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
- << *SU->getInstr());
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
// If this use comes before the reaching def, it cannot be a last use, so
// descrease its pressure change.
if (!SU->isScheduled && SU != &ExitSU) {
LiveQueryResult LRQ
= LI.Query(LIS->getInstructionIndex(SU->getInstr()));
- if (LRQ.valueIn() == VNI)
- getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
}
}
}
@@ -998,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
/// only includes instructions that have DAG nodes, not scheduling boundaries.
///
/// This is a skeletal driver, with all the functionality pushed into helpers,
-/// so that it can be easilly extended by experimental schedulers. Generally,
+/// so that it can be easily extended by experimental schedulers. Generally,
/// implementing MachineSchedStrategy should be sufficient to implement a new
/// scheduling algorithm. However, if a scheduler further subclasses
/// ScheduleDAGMILive then it will want to override this virtual method in order
/// to update any specialized state.
void ScheduleDAGMILive::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
Topo.InitDAGTopologicalSorting();
@@ -1017,8 +1042,16 @@ void ScheduleDAGMILive::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ DEBUG(
+ for (const SUnit &SU : SUnits) {
+ SU.dumpAll(this);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << '\n';
+ }
+ );
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -1030,7 +1063,11 @@ void ScheduleDAGMILive::schedule() {
}
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
@@ -1149,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned LiveOutHeight = DefSU->getHeight();
unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
// Visit all local users of the vreg def.
- for (VReg2UseMap::iterator
- UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
- if (UI->SU == &ExitSU)
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ if (SU == &ExitSU)
continue;
// Only consider uses of the phi.
LiveQueryResult LRQ =
- LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
+ LI.Query(LIS->getInstructionIndex(SU->getInstr()));
if (!LRQ.valueIn()->isPHIDef())
continue;
@@ -1164,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
// overestimate in strange cases. This allows cyclic latency to be
// estimated as the minimum slack of the vreg's depth or height.
unsigned CyclicLatency = 0;
- if (LiveOutDepth > UI->SU->getDepth())
- CyclicLatency = LiveOutDepth - UI->SU->getDepth();
+ if (LiveOutDepth > SU->getDepth())
+ CyclicLatency = LiveOutDepth - SU->getDepth();
- unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
+ unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
if (LiveInHeight > LiveOutHeight) {
if (LiveInHeight - LiveOutHeight < CyclicLatency)
CyclicLatency = LiveInHeight - LiveOutHeight;
@@ -1176,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
CyclicLatency = 0;
DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
- << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
+ << SU->NodeNum << ") = " << CyclicLatency << "c\n");
if (CyclicLatency > MaxCyclicLatency)
MaxCyclicLatency = CyclicLatency;
}
@@ -1203,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
// Update top scheduled pressure.
TopRPTracker.advance();
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ DEBUG(
+ dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
}
}
@@ -1225,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
SmallVector<unsigned, 8> LiveUses;
BotRPTracker.recede(&LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ DEBUG(
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
updatePressureDiffs(LiveUses);
}
@@ -1349,25 +1397,49 @@ namespace {
/// \brief Post-process the DAG to create cluster edges between instructions
/// that may be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
- const TargetInstrInfo *TII;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
public:
- MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+ MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+ : TII(TII), TRI(TRI) {}
void apply(ScheduleDAGMI *DAG) override;
};
} // anonymous
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+ const MachineInstr &Other) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Other.modifiesRegister(Reg, &TRI))
+ return true;
+ }
+ return false;
+}
+
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
void MacroFusion::apply(ScheduleDAGMI *DAG) {
// For now, assume targets can only fuse with the branch.
- MachineInstr *Branch = DAG->ExitSU.getInstr();
+ SUnit &ExitSU = DAG->ExitSU;
+ MachineInstr *Branch = ExitSU.getInstr();
if (!Branch)
return;
- for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
- SUnit *SU = &DAG->SUnits[--Idx];
- if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+ for (SUnit &SU : DAG->SUnits) {
+ // SUnits with successors can't be schedule in front of the ExitSU.
+ if (!SU.Succs.empty())
+ continue;
+ // We only care if the node writes to a register that the branch reads.
+ MachineInstr *Pred = SU.getInstr();
+ if (!HasDataDep(TRI, *Branch, *Pred))
+ continue;
+
+ if (!TII.shouldScheduleAdjacent(Pred, Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1376,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
// scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
// of SU, we could create an artificial edge from the deepest root, but it
// hasn't been needed yet.
- bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+ bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
(void)Success;
assert(Success && "No DAG nodes should be reachable from ExitSU");
- DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
break;
}
}
@@ -2277,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
Latency = Cand.SU->getDepth();
break;
}
- dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
if (P.isValid())
dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
<< ":" << P.getUnitInc() << " ";
@@ -2438,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
}
+void GenericScheduler::dumpPolicy() {
+ dbgs() << "GenericScheduler RegionPolicy: "
+ << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+ << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+ << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+ << "\n";
+}
+
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
/// critical path by more cycles than it takes to drain the instruction buffer.
/// We estimate an upper bounds on in-flight instructions as:
@@ -2499,11 +2579,13 @@ static bool tryPressure(const PressureChange &TryP,
const PressureChange &CandP,
GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason) {
- int TryRank = TryP.getPSetOrMax();
- int CandRank = CandP.getPSetOrMax();
+ GenericSchedulerBase::CandReason Reason,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) {
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
// If both candidates affect the same set, go with the smallest increase.
- if (TryRank == CandRank) {
+ if (TryPSet == CandPSet) {
return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
Reason);
}
@@ -2513,6 +2595,13 @@ static bool tryPressure(const PressureChange &TryP,
Reason)) {
return true;
}
+
+ int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
+ std::numeric_limits<int>::max();
+
+ int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
+ std::numeric_limits<int>::max();
+
// If the candidates are decreasing pressure, reverse priority.
if (TryP.getUnitInc() < 0)
std::swap(TryRank, CandRank);
@@ -2597,7 +2686,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
}
}
DEBUG(if (TryCand.RPDelta.Excess.isValid())
- dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
+ dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") "
<< TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
<< ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
@@ -2615,13 +2704,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
Cand.RPDelta.Excess,
- TryCand, Cand, RegExcess))
+ TryCand, Cand, RegExcess, TRI,
+ DAG->MF))
return;
// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
Cand.RPDelta.CriticalMax,
- TryCand, Cand, RegCritical))
+ TryCand, Cand, RegCritical, TRI,
+ DAG->MF))
return;
// For loops that are acyclic path limited, aggressively schedule for latency.
@@ -2657,7 +2748,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid increasing the max pressure of the entire region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
Cand.RPDelta.CurrentMax,
- TryCand, Cand, RegMax))
+ TryCand, Cand, RegMax, TRI,
+ DAG->MF))
return;
// Avoid critical resource consumption and balance the schedule.
@@ -2672,8 +2764,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
- if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
- && tryLatency(TryCand, Cand, Zone)) {
+ if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
return;
}
@@ -2727,12 +2819,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
- DEBUG(dbgs() << "Pick Bot NOCAND\n");
+ DEBUG(dbgs() << "Pick Bot ONLY1\n");
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
- DEBUG(dbgs() << "Pick Top NOCAND\n");
+ DEBUG(dbgs() << "Pick Top ONLY1\n");
return SU;
}
CandPolicy NoPolicy;
@@ -2887,7 +2979,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
if (EnableLoadCluster && DAG->TII->enableClusterLoads())
DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
- DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
+ DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
}
@@ -3254,12 +3346,10 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
}
static bool isNodeHidden(const SUnit *Node) {
- return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
- }
-
- static bool hasNodeAddressLabel(const SUnit *Node,
- const ScheduleDAG *Graph) {
- return false;
+ if (ViewMISchedCutoff == 0)
+ return false;
+ return (Node->Preds.size() > ViewMISchedCutoff
+ || Node->Succs.size() > ViewMISchedCutoff);
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 1b9be50..5e6d619 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -87,7 +87,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -150,7 +150,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
@@ -268,7 +268,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverMadeChange = false;
@@ -667,7 +667,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ if (SuccToSinkTo && SuccToSinkTo->isEHPad())
return nullptr;
return SuccToSinkTo;
@@ -686,7 +686,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
if (!MI->isSafeToMove(AA, SawStore))
return false;
- // Convergent operations may only be moved to control equivalent locations.
+ // Convergent operations may not be made control-dependent on additional
+ // values.
if (MI->isConvergent())
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index d9a6b684..f7edacd 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -724,13 +724,12 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
// Update RegUnits to reflect live registers after UseMI.
// First kills.
- for (unsigned i = 0, e = Kills.size(); i != e; ++i)
- for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ for (unsigned Kill : Kills)
+ for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
RegUnits.erase(*Units);
// Second, live defs.
- for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
- unsigned DefOp = LiveDefOps[i];
+ for (unsigned DefOp : LiveDefOps) {
for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
@@ -756,8 +755,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
assert(TBI.HasValidInstrDepths && "Missing depth info");
assert(TBI.HasValidInstrHeights && "Missing height info");
unsigned MaxLen = 0;
- for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
- const LiveInReg &LIR = TBI.LiveIns[i];
+ for (const LiveInReg &LIR : TBI.LiveIns) {
if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
continue;
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index ca35ec5..cdcd8eb 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
@@ -42,7 +43,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -204,18 +204,19 @@ namespace {
void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
void visitMachineFunctionAfter();
+ template <typename T> void report(const char *msg, ilist_iterator<T> I) {
+ report(msg, &*I);
+ }
void report(const char *msg, const MachineFunction *MF);
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
- void report(const char *msg, const MachineFunction *MF,
- const LiveInterval &LI);
- void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveInterval &LI);
- void report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR, unsigned Reg, unsigned LaneMask);
- void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR, unsigned Reg, unsigned LaneMask);
+
+ void report_context(const LiveInterval &LI) const;
+ void report_context(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) const;
+ void report_context(const LiveRange::Segment &S) const;
+ void report_context(const VNInfo &VNI) const;
void verifyInlineAsm(const MachineInstr *MI);
@@ -233,9 +234,11 @@ namespace {
void verifyLiveRangeSegment(const LiveRange&,
const LiveRange::const_iterator I, unsigned,
unsigned);
- void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0);
+ void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
void verifyStackFrame();
+
+ void verifySlotIndexes() const;
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -273,6 +276,19 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
.runOnMachineFunction(const_cast<MachineFunction&>(*this));
}
+void MachineVerifier::verifySlotIndexes() const {
+ if (Indexes == nullptr)
+ return;
+
+ // Ensure the IdxMBB list is sorted by slot indexes.
+ SlotIndex Last;
+ for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(),
+ E = Indexes->MBBIndexEnd(); I != E; ++I) {
+ assert(!Last.isValid() || I->first > Last);
+ Last = I->first;
+ }
+}
+
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
foundErrors = 0;
@@ -295,10 +311,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
}
+ verifySlotIndexes();
+
visitMachineFunctionBefore();
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
- visitMachineBasicBlockBefore(MFI);
+ visitMachineBasicBlockBefore(&*MFI);
// Keep track of the current bundle header.
const MachineInstr *CurBundle = nullptr;
// Do we expect the next instruction to be part of the same bundle?
@@ -306,7 +324,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
- if (MBBI->getParent() != MFI) {
+ if (MBBI->getParent() != &*MFI) {
report("Bad instruction parent pointer", MFI);
errs() << "Instruction: " << *MBBI;
continue;
@@ -315,20 +333,22 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
// Check for consistent bundle flags.
if (InBundle && !MBBI->isBundledWithPred())
report("Missing BundledPred flag, "
- "BundledSucc was set on predecessor", MBBI);
+ "BundledSucc was set on predecessor",
+ &*MBBI);
if (!InBundle && MBBI->isBundledWithPred())
report("BundledPred flag is set, "
- "but BundledSucc not set on predecessor", MBBI);
+ "but BundledSucc not set on predecessor",
+ &*MBBI);
// Is this a bundle header?
if (!MBBI->isInsideBundle()) {
if (CurBundle)
visitMachineBundleAfter(CurBundle);
- CurBundle = MBBI;
+ CurBundle = &*MBBI;
visitMachineBundleBefore(CurBundle);
} else if (!CurBundle)
report("No bundle header", MBBI);
- visitMachineInstrBefore(MBBI);
+ visitMachineInstrBefore(&*MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
const MachineInstr &MI = *MBBI;
const MachineOperand &Op = MI.getOperand(I);
@@ -341,7 +361,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
visitMachineOperand(&Op, I);
}
- visitMachineInstrAfter(MBBI);
+ visitMachineInstrAfter(&*MBBI);
// Was this the last bundled instruction?
InBundle = MBBI->isBundledWithSucc();
@@ -350,7 +370,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
visitMachineBundleAfter(CurBundle);
if (InBundle)
report("BundledSucc flag set on last instruction in block", &MFI->back());
- visitMachineBasicBlockAfter(MFI);
+ visitMachineBasicBlockAfter(&*MFI);
}
visitMachineFunctionAfter();
@@ -375,7 +395,10 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
if (!foundErrors++) {
if (Banner)
errs() << "# " << Banner << '\n';
- MF->print(errs(), Indexes);
+ if (LiveInts != nullptr)
+ LiveInts->print(errs());
+ else
+ MF->print(errs(), Indexes);
}
errs() << "*** Bad machine code: " << msg << " ***\n"
<< "- function: " << MF->getName() << "\n";
@@ -399,7 +422,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
errs() << "- instruction: ";
if (Indexes && Indexes->hasIndex(MI))
errs() << Indexes->getInstructionIndex(MI) << '\t';
- MI->print(errs(), TM);
+ MI->print(errs(), /*SkipOpers=*/true);
+ errs() << '\n';
}
void MachineVerifier::report(const char *msg,
@@ -411,36 +435,24 @@ void MachineVerifier::report(const char *msg,
errs() << "\n";
}
-void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveInterval &LI) {
- report(msg, MF);
- errs() << "- interval: " << LI << '\n';
-}
-
-void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveInterval &LI) {
- report(msg, MBB);
+void MachineVerifier::report_context(const LiveInterval &LI) const {
errs() << "- interval: " << LI << '\n';
}
-void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR, unsigned Reg,
- unsigned LaneMask) {
- report(msg, MBB);
- errs() << "- liverange: " << LR << '\n';
+void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) const {
errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
if (LaneMask != 0)
- errs() << "- lanemask: " << format("%04X\n", LaneMask);
+ errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n';
+ errs() << "- liverange: " << LR << '\n';
}
-void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR, unsigned Reg,
- unsigned LaneMask) {
- report(msg, MF);
- errs() << "- liverange: " << LR << '\n';
- errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
- if (LaneMask != 0)
- errs() << "- lanemask: " << format("%04X\n", LaneMask);
+void MachineVerifier::report_context(const LiveRange::Segment &S) const {
+ errs() << "- segment: " << S << '\n';
+}
+
+void MachineVerifier::report_context(const VNInfo &VNI) const {
+ errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n";
}
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -507,11 +519,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MRI->isSSA()) {
// If this block has allocatable physical registers live-in, check that
// it is an entry block or landing pad.
- for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
- LE = MBB->livein_end();
- LI != LE; ++LI) {
- unsigned reg = *LI;
- if (isAllocatable(reg) && !MBB->isLandingPad() &&
+ for (const auto &LI : MBB->liveins()) {
+ if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB != MBB->getParent()->begin()) {
report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
}
@@ -522,7 +531,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- if ((*I)->isLandingPad())
+ if ((*I)->isEHPad())
LandingPadSuccs.insert(*I);
if (!FunctionBlocks.count(*I))
report("MBB has successor that isn't part of the function.", MBB);
@@ -547,10 +556,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
const BasicBlock *BB = MBB->getBasicBlock();
+ const Function *Fn = MF->getFunction();
if (LandingPadSuccs.size() > 1 &&
!(AsmInfo &&
AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
- BB && isa<SwitchInst>(BB->getTerminator())))
+ BB && isa<SwitchInst>(BB->getTerminator())) &&
+ !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn())))
report("MBB has more than one landing pad successor", MBB);
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -562,7 +573,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// check whether its answers match up with reality.
if (!TBB && !FBB) {
// Block falls through to its successor.
- MachineFunction::const_iterator MBBI = MBB;
+ MachineFunction::const_iterator MBBI = MBB->getIterator();
++MBBI;
if (MBBI == MF->end()) {
// It's possible that the block legitimately ends with a noreturn
@@ -575,7 +586,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional fall-through but doesn't have "
"exactly one CFG successor!", MBB);
- } else if (!MBB->isSuccessor(MBBI)) {
+ } else if (!MBB->isSuccessor(&*MBBI)) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
@@ -613,7 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && !Cond.empty()) {
// Block conditionally branches somewhere, otherwise falls through.
- MachineFunction::const_iterator MBBI = MBB;
+ MachineFunction::const_iterator MBBI = MBB->getIterator();
++MBBI;
if (MBBI == MF->end()) {
report("MBB conditionally falls through out of function!", MBB);
@@ -628,7 +639,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
- } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) {
report("MBB exits via conditional branch/fall-through but the CFG "
"successors don't match the actual successors!", MBB);
}
@@ -680,13 +691,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
regsLive.clear();
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I) {
- if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ for (const auto &LI : MBB->liveins()) {
+ if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
report("MBB live-in list contains non-physical register", MBB);
continue;
}
- for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true);
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
regsLive.insert(*SubRegs);
}
@@ -822,9 +832,12 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumDefs = MCID.getNumDefs();
+ if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
+ NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
// The first MCID.NumDefs operands must be explicit register defines
- if (MONum < MCID.getNumDefs()) {
+ if (MONum < NumDefs) {
const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
@@ -972,13 +985,38 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
case MachineOperand::MO_FrameIndex:
if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
LiveInts && !LiveInts->isNotInMIMap(MI)) {
- LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ int FI = MO->getIndex();
+ LiveInterval &LI = LiveStks->getInterval(FI);
SlotIndex Idx = LiveInts->getInstructionIndex(MI);
- if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+
+ bool stores = MI->mayStore();
+ bool loads = MI->mayLoad();
+ // For a memory-to-memory move, we need to check if the frame
+ // index is used for storing or loading, by inspecting the
+ // memory operands.
+ if (stores && loads) {
+ for (auto *MMO : MI->memoperands()) {
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+ if (PSV == nullptr) continue;
+ const FixedStackPseudoSourceValue *Value =
+ dyn_cast<FixedStackPseudoSourceValue>(PSV);
+ if (Value == nullptr) continue;
+ if (Value->getFrameIndex() != FI) continue;
+
+ if (MMO->isStore())
+ loads = false;
+ else
+ stores = false;
+ break;
+ }
+ if (loads == stores)
+ report("Missing fixed stack memoperand.", MI);
+ }
+ if (loads && !LI.liveAt(Idx.getRegSlot(true))) {
report("Instruction loads from dead spill slot", MO, MONum);
errs() << "Live stack: " << LI << '\n';
}
- if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+ if (stores && !LI.liveAt(Idx.getRegSlot())) {
report("Instruction stores to dead spill slot", MO, MONum);
errs() << "Live stack: " << LI << '\n';
}
@@ -1387,40 +1425,39 @@ void MachineVerifier::verifyLiveIntervals() {
void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
const VNInfo *VNI, unsigned Reg,
- unsigned LaneMask) {
+ LaneBitmask LaneMask) {
if (VNI->isUnused())
return;
const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
if (!DefVNI) {
- report("Valno not live at def and not marked unused", MF, LR, Reg,
- LaneMask);
- errs() << "Valno #" << VNI->id << '\n';
+ report("Value not live at VNInfo def and not marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
if (DefVNI != VNI) {
- report("Live segment at def has different valno", MF, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " where valno #" << DefVNI->id << " is live\n";
+ report("Live segment at def has different VNInfo", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
if (!MBB) {
- report("Invalid definition index", MF, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LR << '\n';
+ report("Invalid VNInfo definition index", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
if (VNI->isPHIDef()) {
if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MBB, LR, Reg,
- LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
- << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ report("PHIDef VNInfo is not defined at MBB start", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
return;
}
@@ -1428,8 +1465,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// Non-PHI def.
const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
if (!MI) {
- report("No instruction at def index", MBB, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("No instruction at VNInfo def index", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
return;
}
@@ -1457,60 +1495,67 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (!hasDef) {
report("Defining instruction does not modify register", MI);
- errs() << "Valno #" << VNI->id << " in " << LR << '\n';
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
// Early clobber defs begin at USE slots, but other defs must begin at
// DEF slots.
if (isEarlyClobber) {
if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MBB, LR,
- Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("Early clobber def must be at an early-clobber slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
} else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MBB, LR, Reg, LaneMask);
- errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ report("Non-PHI, non-early clobber def must be at a register slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
}
}
}
void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const LiveRange::const_iterator I,
- unsigned Reg, unsigned LaneMask) {
+ unsigned Reg, LaneBitmask LaneMask)
+{
const LiveRange::Segment &S = *I;
const VNInfo *VNI = S.valno;
assert(VNI && "Live segment has no valno");
if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
- report("Foreign valno in live segment", MF, LR, Reg, LaneMask);
- errs() << S << " has a bad valno\n";
+ report("Foreign valno in live segment", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ report_context(*VNI);
}
if (VNI->isUnused()) {
- report("Live segment valno is marked unused", MF, LR, Reg, LaneMask);
- errs() << S << '\n';
+ report("Live segment valno is marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
if (!MBB) {
- report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask);
- errs() << S << '\n';
+ report("Bad start of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
return;
}
SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
if (S.start != MBBStartIdx && S.start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
const MachineBasicBlock *EndMBB =
LiveInts->getMBBFromIndex(S.end.getPrevSlot());
if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask);
- errs() << S << '\n';
+ report("Bad end of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
return;
}
@@ -1527,26 +1572,26 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const MachineInstr *MI =
LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
return;
}
// The block slot must refer to a basic block boundary.
if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ report("Live segment ends at B slot of an instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
if (S.end.isDead()) {
// Segment ends on the dead slot.
// That means there must be a dead def.
if (!SlotIndex::isSameInstr(S.start, S.end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB, LR,
- Reg, LaneMask);
- errs() << S << '\n';
+ report("Live segment ending at dead slot spans instructions", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
}
@@ -1555,9 +1600,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB, LR, Reg,
- LaneMask);
- errs() << S << '\n';
+ "redefined by an EC def in the same instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
}
@@ -1587,14 +1632,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
!hasSubRegDef) {
report("Instruction ending live segment doesn't read the register",
MI);
- errs() << S << " in " << LR << '\n';
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
}
}
}
// Now check all the basic blocks in this live segment.
- MachineFunction::const_iterator MFI = MBB;
+ MachineFunction::const_iterator MFI = MBB->getIterator();
// Is this live segment the beginning of a non-PHIDef VN?
if (S.start == VNI->def && !VNI->isPHIDef()) {
// Not live-in to any blocks.
@@ -1604,10 +1650,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
++MFI;
}
for (;;) {
- assert(LiveInts->isLiveInToMBB(LR, MFI));
+ assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
- MFI->isLandingPad()) {
+ MFI->isEHPad()) {
if (&*MFI == EndMBB)
break;
++MFI;
@@ -1616,7 +1662,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Is VNI a PHI-def in the current block?
bool IsPHI = VNI->isPHIDef() &&
- VNI->def == LiveInts->getMBBStartIdx(MFI);
+ VNI->def == LiveInts->getMBBStartIdx(&*MFI);
// Check that VNI is live-out of all predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
@@ -1626,22 +1672,23 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// All predecessors must have a live-out value.
if (!PVNI) {
- report("Register not marked live out of predecessor", *PI, LR, Reg,
- LaneMask);
- errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
- << PEnd << '\n';
+ report("Register not marked live out of predecessor", *PI);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ errs() << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+ << PEnd << '\n';
continue;
}
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI, LR, Reg,
- LaneMask);
+ report("Different value live out of predecessor", *PI);
+ report_context(LR, Reg, LaneMask);
errs() << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
+ << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id
+ << " live into BB#" << MFI->getNumber() << '@'
+ << LiveInts->getMBBStartIdx(&*MFI) << '\n';
}
}
if (&*MFI == EndMBB)
@@ -1651,7 +1698,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
- unsigned LaneMask) {
+ LaneBitmask LaneMask) {
for (const VNInfo *VNI : LR.valnos)
verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
@@ -1664,24 +1711,35 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
assert(TargetRegisterInfo::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
- unsigned Mask = 0;
- unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask Mask = 0;
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((Mask & SR.LaneMask) != 0)
- report("Lane masks of sub ranges overlap in live interval", MF, LI);
- if ((SR.LaneMask & ~MaxMask) != 0)
- report("Subrange lanemask is invalid", MF, LI);
+ if ((Mask & SR.LaneMask) != 0) {
+ report("Lane masks of sub ranges overlap in live interval", MF);
+ report_context(LI);
+ }
+ if ((SR.LaneMask & ~MaxMask) != 0) {
+ report("Subrange lanemask is invalid", MF);
+ report_context(LI);
+ }
+ if (SR.empty()) {
+ report("Subrange must not be empty", MF);
+ report_context(SR, LI.reg, SR.LaneMask);
+ }
Mask |= SR.LaneMask;
verifyLiveRange(SR, LI.reg, SR.LaneMask);
- if (!LI.covers(SR))
- report("A Subrange is not covered by the main range", MF, LI);
+ if (!LI.covers(SR)) {
+ report("A Subrange is not covered by the main range", MF);
+ report_context(LI);
+ }
}
// Check the LI only has one connected component.
ConnectedVNInfoEqClasses ConEQ(*LiveInts);
unsigned NumComp = ConEQ.Classify(&LI);
if (NumComp > 1) {
- report("Multiple connected components in live interval", MF, LI);
+ report("Multiple connected components in live interval", MF);
+ report_context(LI);
for (unsigned comp = 0; comp != NumComp; ++comp) {
errs() << comp << ": valnos";
for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index d343301..2c93792 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -548,7 +548,7 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineLoopInfo *MLI) {
- if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
return false; // Quick exit for basic blocks without PHIs.
const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 99bbad1..4cabc3a 100644
--- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -28,7 +28,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
// Usually, we just want to insert the copy before the first terminator
// instruction. However, for the edge going to a landing pad, we must insert
// the copy before the call/invoke instruction.
- if (!SuccMBB->isLandingPad())
+ if (!SuccMBB->isEHPad())
return MBB->getFirstTerminator();
// Discover any defs/uses in this basic block.
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
new file mode 100644
index 0000000..e73ba02
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -0,0 +1,96 @@
+//===-- ParallelCG.cpp ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ParallelCG.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/thread.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
+ const Target *TheTarget, StringRef CPU, StringRef Features,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL,
+ TargetMachine::CodeGenFileType FileType) {
+ std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
+ M->getTargetTriple(), CPU, Features, Options, RM, CM, OL));
+
+ legacy::PassManager CodeGenPasses;
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType))
+ report_fatal_error("Failed to setup codegen");
+ CodeGenPasses.run(*M);
+}
+
+std::unique_ptr<Module>
+llvm::splitCodeGen(std::unique_ptr<Module> M,
+ ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU,
+ StringRef Features, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
+ TargetMachine::CodeGenFileType FileType) {
+ StringRef TripleStr = M->getTargetTriple();
+ std::string ErrMsg;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+ if (!TheTarget)
+ report_fatal_error(Twine("Target not found: ") + ErrMsg);
+
+ if (OSs.size() == 1) {
+ codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM,
+ OL, FileType);
+ return M;
+ }
+
+ std::vector<thread> Threads;
+ SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) {
+ // We want to clone the module in a new context to multi-thread the codegen.
+ // We do it by serializing partition modules to bitcode (while still on the
+ // main thread, in order to avoid data races) and spinning up new threads
+ // which deserialize the partitions into separate contexts.
+ // FIXME: Provide a more direct way to do this in LLVM.
+ SmallVector<char, 0> BC;
+ raw_svector_ostream BCOS(BC);
+ WriteBitcodeToFile(MPart.get(), BCOS);
+
+ llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()];
+ Threads.emplace_back(
+ [TheTarget, CPU, Features, Options, RM, CM, OL, FileType,
+ ThreadOS](const SmallVector<char, 0> &BC) {
+ LLVMContext Ctx;
+ ErrorOr<std::unique_ptr<Module>> MOrErr =
+ parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()),
+ "<split-module>"),
+ Ctx);
+ if (!MOrErr)
+ report_fatal_error("Failed to read bitcode");
+ std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+ codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features,
+ Options, RM, CM, OL, FileType);
+ },
+ // Pass BC using std::move to ensure that it get moved rather than
+ // copied into the thread's context.
+ std::move(BC));
+ });
+
+ for (thread &T : Threads)
+ T.join();
+
+ return {};
+}
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index 024d166..873f712 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -13,7 +13,11 @@
//===---------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/IRPrintingPasses.h"
@@ -52,9 +56,6 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<cl::boolOrDefault>
- EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
- cl::desc("enable the shrink-wrapping pass"));
static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
"optimize-regalloc", cl::Hidden,
cl::desc("Enable optimized register allocation compilation path."));
@@ -95,10 +96,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
-// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it
-// wouldn't be part of the standard pass pipeline, and the target would just add
-// a PostRA scheduling pass wherever it wants.
-static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
+// Targets can return true in targetSchedulesPostRAScheduling() and
+// insert a PostRA scheduling pass wherever it wants.
+cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
// Experimental option to run live interval analysis early.
@@ -188,6 +189,29 @@ char TargetPassConfig::ID = 0;
char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
+namespace {
+struct InsertedPass {
+ AnalysisID TargetPassID;
+ IdentifyingPassPtr InsertedPassID;
+ bool VerifyAfter;
+ bool PrintAfter;
+
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter, bool PrintAfter)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
+ VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {}
+
+ Pass *getInsertedPass() const {
+ assert(InsertedPassID.isValid() && "Illegal Pass ID!");
+ if (InsertedPassID.isInstance())
+ return InsertedPassID.getInstance();
+ Pass *NP = Pass::createPass(InsertedPassID.getID());
+ assert(NP && "Pass ID not registered");
+ return NP;
+ }
+};
+}
+
namespace llvm {
class PassConfigImpl {
public:
@@ -202,7 +226,7 @@ public:
/// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
/// is inserted after each instance of the first one.
- SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
+ SmallVector<InsertedPass, 4> InsertedPasses;
};
} // namespace llvm
@@ -217,7 +241,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
StopAfter(nullptr), Started(true), Stopped(false),
AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) {
+ DisableVerify(false), EnableTailMerge(true) {
Impl = new PassConfigImpl();
@@ -225,6 +249,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
// including this pass itself.
initializeCodeGen(*PassRegistry::getPassRegistry());
+ // Also register alias analysis passes required by codegen passes.
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+
// Substitute Pseudo Pass IDs for real ones.
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
@@ -232,14 +260,15 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- IdentifyingPassPtr InsertedPassID) {
+ IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter, bool PrintAfter) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
- Impl->InsertedPasses.push_back(P);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter,
+ PrintAfter);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -304,21 +333,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
}
// Add the passes after the pass P if there is any.
- for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
- I = Impl->InsertedPasses.begin(),
- E = Impl->InsertedPasses.end();
- I != E; ++I) {
- if ((*I).first == PassID) {
- assert((*I).second.isValid() && "Illegal Pass ID!");
- Pass *NP;
- if ((*I).second.isInstance())
- NP = (*I).second.getInstance();
- else {
- NP = Pass::createPass((*I).second.getID());
- assert(NP && "Pass ID not registered");
- }
- addPass(NP, false, false);
- }
+ for (auto IP : Impl->InsertedPasses) {
+ if (IP.TargetPassID == PassID)
+ addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter);
}
} else {
delete P;
@@ -380,10 +397,10 @@ void TargetPassConfig::addIRPasses() {
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
if (UseCFLAA)
- addPass(createCFLAliasAnalysisPass());
- addPass(createTypeBasedAliasAnalysisPass());
- addPass(createScopedNoAliasAAPass());
- addPass(createBasicAliasAnalysisPass());
+ addPass(createCFLAAWrapperPass());
+ addPass(createTypeBasedAAWrapperPass());
+ addPass(createScopedNoAliasAAWrapperPass());
+ addPass(createBasicAAWrapperPass());
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
@@ -461,7 +478,7 @@ void TargetPassConfig::addISelPrepare() {
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
- addPass(createSafeStackPass());
+ addPass(createSafeStackPass(TM));
addPass(createStackProtectorPass(TM));
if (PrintISelInput)
@@ -539,8 +556,9 @@ void TargetPassConfig::addMachinePasses() {
addPostRegAlloc();
// Insert prolog/epilog code. Eliminate abstract frame index references...
- if (getEnableShrinkWrap())
+ if (getOptLevel() != CodeGenOpt::None)
addPass(&ShrinkWrapID);
+
addPass(&PrologEpilogCodeInserterID);
/// Add passes that optimize machine instructions after register allocation.
@@ -557,7 +575,10 @@ void TargetPassConfig::addMachinePasses() {
addPass(&ImplicitNullChecksID);
// Second pass scheduler.
- if (getOptLevel() != CodeGenOpt::None) {
+ // Let Target optionally insert this pass by itself at some other
+ // point.
+ if (getOptLevel() != CodeGenOpt::None &&
+ !TM->targetSchedulesPostRAScheduling()) {
if (MISchedPostRA)
addPass(&PostMachineSchedulerID);
else
@@ -576,7 +597,10 @@ void TargetPassConfig::addMachinePasses() {
addPreEmitPass();
+ addPass(&FuncletLayoutID, false);
+
addPass(&StackMapLivenessID, false);
+ addPass(&LiveDebugValuesID, false);
AddingMachinePasses = false;
}
@@ -613,27 +637,12 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&MachineCSEID, false);
addPass(&MachineSinkingID);
- addPass(&PeepholeOptimizerID, false);
+ addPass(&PeepholeOptimizerID);
// Clean-up the dead code that may have been generated by peephole
// rewriting.
addPass(&DeadMachineInstructionElimID);
}
-bool TargetPassConfig::getEnableShrinkWrap() const {
- switch (EnableShrinkWrapOpt) {
- case cl::BOU_UNSET:
- return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None;
- // If EnableShrinkWrap is set, it takes precedence on whatever the
- // target sets. The rational is that we assume we want to test
- // something related to shrink-wrapping.
- case cl::BOU_TRUE:
- return true;
- case cl::BOU_FALSE:
- return false;
- }
- llvm_unreachable("Invalid shrink-wrapping state");
-}
-
//===---------------------------------------------------------------------===//
/// Register Allocation Pass Configuration
//===---------------------------------------------------------------------===//
@@ -717,7 +726,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
addPass(&PHIEliminationID, false);
addPass(&TwoAddressInstructionPassID, false);
- addPass(RegAllocPass);
+ if (RegAllocPass)
+ addPass(RegAllocPass);
}
/// Add standard target-independent passes that are tightly coupled with
@@ -748,25 +758,27 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
- // Add the selected register allocation pass.
- addPass(RegAllocPass);
+ if (RegAllocPass) {
+ // Add the selected register allocation pass.
+ addPass(RegAllocPass);
- // Allow targets to change the register assignments before rewriting.
- addPreRewrite();
+ // Allow targets to change the register assignments before rewriting.
+ addPreRewrite();
- // Finally rewrite virtual registers.
- addPass(&VirtRegRewriterID);
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
- // Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- addPass(&StackSlotColoringID);
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(&PostRAMachineLICMID);
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(&PostRAMachineLICMID);
+ }
}
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index ebe05e3..52b42b6 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -43,7 +43,7 @@
// - Optimize Loads:
//
// Loads that can be folded into a later instruction. A load is foldable
-// if it loads to virtual registers and the virtual register defined has
+// if it loads to virtual registers and the virtual register defined has
// a single use.
//
// - Optimize Copies and Bitcast (more generally, target specific copies):
@@ -98,6 +98,16 @@ static cl::opt<bool>
DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
cl::desc("Disable advanced copy optimization"));
+static cl::opt<bool> DisableNAPhysCopyOpt(
+ "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable non-allocatable physical register copy optimization"));
+
+// Limit the number of PHI instructions to process
+// in PeepholeOptimizer::getNextSource.
+static cl::opt<unsigned> RewritePHILimit(
+ "rewrite-phi-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the length of PHI chains to lookup"));
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -105,8 +115,11 @@ STATISTIC(NumLoadFold, "Number of loads folded");
STATISTIC(NumSelects, "Number of selects optimized");
STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
+STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
+ class ValueTrackerResult;
+
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -130,6 +143,10 @@ namespace {
}
}
+ /// \brief Track Def -> Use info used for rewriting copies.
+ typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
+ RewriteMapTy;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -137,17 +154,38 @@ namespace {
bool optimizeSelect(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeCondBranch(MachineInstr *MI);
- bool optimizeCopyOrBitcast(MachineInstr *MI);
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
- bool findNextSource(unsigned &Reg, unsigned &SubReg);
+ bool findNextSource(unsigned Reg, unsigned SubReg,
+ RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
+ /// \brief If copy instruction \p MI is a virtual register copy, track it in
+ /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
+ /// previously seen as a copy, replace the uses of this copy with the
+ /// previously seen copy's destination register.
+ bool foldRedundantCopy(MachineInstr *MI,
+ SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs);
+
+ /// \brief Is the register \p Reg a non-allocatable physical register?
+ bool isNAPhysCopy(unsigned Reg);
+
+ /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical
+ /// register copy, track it in the \p NAPhysToVirtMIs map. If this
+ /// non-allocatable physical register was previously copied to a virtual
+ /// registered and hasn't been clobbered, the virt->phys copy can be
+ /// deleted.
+ bool foldRedundantNAPhysCopy(
+ MachineInstr *MI,
+ DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
+
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
@@ -171,6 +209,69 @@ namespace {
}
};
+ /// \brief Helper class to hold a reply for ValueTracker queries. Contains the
+ /// returned sources for a given search and the instructions where the sources
+ /// were tracked from.
+ class ValueTrackerResult {
+ private:
+ /// Track all sources found by one ValueTracker query.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
+
+ /// Instruction using the sources in 'RegSrcs'.
+ const MachineInstr *Inst;
+
+ public:
+ ValueTrackerResult() : Inst(nullptr) {}
+ ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
+ addSource(Reg, SubReg);
+ }
+
+ bool isValid() const { return getNumSources() > 0; }
+
+ void setInst(const MachineInstr *I) { Inst = I; }
+ const MachineInstr *getInst() const { return Inst; }
+
+ void clear() {
+ RegSrcs.clear();
+ Inst = nullptr;
+ }
+
+ void addSource(unsigned SrcReg, unsigned SrcSubReg) {
+ RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg));
+ }
+
+ void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
+ assert(Idx < getNumSources() && "Reg pair source out of index");
+ RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg);
+ }
+
+ int getNumSources() const { return RegSrcs.size(); }
+
+ unsigned getSrcReg(int Idx) const {
+ assert(Idx < getNumSources() && "Reg source out of index");
+ return RegSrcs[Idx].Reg;
+ }
+
+ unsigned getSrcSubReg(int Idx) const {
+ assert(Idx < getNumSources() && "SubReg source out of index");
+ return RegSrcs[Idx].SubReg;
+ }
+
+ bool operator==(const ValueTrackerResult &Other) {
+ if (Other.getInst() != getInst())
+ return false;
+
+ if (Other.getNumSources() != getNumSources())
+ return false;
+
+ for (int i = 0, e = Other.getNumSources(); i != e; ++i)
+ if (Other.getSrcReg(i) != getSrcReg(i) ||
+ Other.getSrcSubReg(i) != getSrcSubReg(i))
+ return false;
+ return true;
+ }
+ };
+
/// \brief Helper class to track the possible sources of a value defined by
/// a (chain of) copy related instructions.
/// Given a definition (instruction and definition index), this class
@@ -213,23 +314,25 @@ namespace {
/// \brief Dispatcher to the right underlying implementation of
/// getNextSource.
- bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceImpl();
/// \brief Specialized version of getNextSource for Copy instructions.
- bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromCopy();
/// \brief Specialized version of getNextSource for Bitcast instructions.
- bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromBitcast();
/// \brief Specialized version of getNextSource for RegSequence
/// instructions.
- bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromRegSequence();
/// \brief Specialized version of getNextSource for InsertSubreg
/// instructions.
- bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromInsertSubreg();
/// \brief Specialized version of getNextSource for ExtractSubreg
/// instructions.
- bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromExtractSubreg();
/// \brief Specialized version of getNextSource for SubregToReg
/// instructions.
- bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
+ ValueTrackerResult getNextSourceFromSubregToReg();
+ /// \brief Specialized version of getNextSource for PHI instructions.
+ ValueTrackerResult getNextSourceFromPHI();
public:
/// \brief Create a ValueTracker instance for the value defined by \p Reg.
@@ -276,16 +379,10 @@ namespace {
/// \brief Following the use-def chain, get the next available source
/// for the tracked value.
- /// When the returned value is not nullptr, \p SrcReg gives the register
- /// that contain the tracked value.
- /// \note The sub register index returned in \p SrcSubReg must be used
- /// on \p SrcReg to access the actual value.
- /// \return Unless the returned value is nullptr (i.e., no source found),
- /// \p SrcReg gives the register of the next source used in the returned
- /// instruction and \p SrcSubReg the sub-register index to be used on that
- /// source to get the tracked value. When nullptr is returned, no
- /// alternative source has been found.
- const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
+ /// \return A ValueTrackerResult containing a set of registers
+ /// and sub registers with tracked values. A ValueTrackerResult with
+ /// an empty set of registers means no source was found.
+ ValueTrackerResult getNextSource();
/// \brief Get the last register where the initial value can be found.
/// Initially this is the register of the definition.
@@ -303,11 +400,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
-/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
-/// a single register and writes a single register and it does not modify the
-/// source, and if the source value is preserved as a sub-register of the
-/// result, then replace all reachable uses of the source with the subreg of the
-/// result.
+/// If instruction is a copy-like instruction, i.e. it reads a single register
+/// and writes a single register and it does not modify the source, and if the
+/// source value is preserved as a sub-register of the result, then replace all
+/// reachable uses of the source with the subreg of the result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
@@ -458,10 +554,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
return Changed;
}
-/// optimizeCmpInstr - If the instruction is a compare and the previous
-/// instruction it's comparing against all ready sets (or could be modified to
-/// set) the same flag as the compare, then we can remove the comparison and use
-/// the flag from the previous instruction.
+/// If the instruction is a compare and the previous instruction it's comparing
+/// against already sets (or could be modified to set) the same flag as the
+/// compare, then we can remove the comparison and use the flag from the
+/// previous instruction.
bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
// If this instruction is a comparison against zero and isn't comparing a
@@ -506,88 +602,138 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
return TII->optimizeCondBranch(MI);
}
-/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
-/// share the same register file.
-static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
- const TargetRegisterClass *DefRC,
- unsigned DefSubReg,
- const TargetRegisterClass *SrcRC,
- unsigned SrcSubReg) {
- // Same register class.
- if (DefRC == SrcRC)
- return true;
-
- // Both operands are sub registers. Check if they share a register class.
- unsigned SrcIdx, DefIdx;
- if (SrcSubReg && DefSubReg)
- return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
- SrcIdx, DefIdx) != nullptr;
- // At most one of the register is a sub register, make it Src to avoid
- // duplicating the test.
- if (!SrcSubReg) {
- std::swap(DefSubReg, SrcSubReg);
- std::swap(DefRC, SrcRC);
- }
-
- // One of the register is a sub register, check if we can get a superclass.
- if (SrcSubReg)
- return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
- // Plain copy.
- return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
-}
-
/// \brief Try to find the next source that share the same register file
/// for the value defined by \p Reg and \p SubReg.
-/// When true is returned, \p Reg and \p SubReg are updated with the
-/// register number and sub-register index of the new source.
+/// When true is returned, the \p RewriteMap can be used by the client to
+/// retrieve all Def -> Use along the way up to the next source. Any found
+/// Use that is not itself a key for another entry, is the next source to
+/// use. During the search for the next source, multiple sources can be found
+/// given multiple incoming sources of a PHI instruction. In this case, we
+/// look in each PHI source for the next source; all found next sources must
+/// share the same register file as \p Reg and \p SubReg. The client should
+/// then be capable to rewrite all intermediate PHIs to get the next source.
/// \return False if no alternative sources are available. True otherwise.
-bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
+bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
+ RewriteMapTy &RewriteMap) {
// Do not try to find a new source for a physical register.
// So far we do not have any motivating example for doing that.
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false;
-
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
- unsigned DefSubReg = SubReg;
-
- unsigned Src;
- unsigned SrcSubReg;
- bool ShouldRewrite = false;
-
- // Follow the chain of copies until we reach the top of the use-def chain
- // or find a more suitable source.
- ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
- do {
- unsigned CopySrcReg, CopySrcSubReg;
- if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
- break;
- Src = CopySrcReg;
- SrcSubReg = CopySrcSubReg;
-
- // Do not extend the live-ranges of physical registers as they add
- // constraints to the register allocator.
- // Moreover, if we want to extend the live-range of a physical register,
- // unlike SSA virtual register, we will have to check that they are not
- // redefine before the related use.
- if (TargetRegisterInfo::isPhysicalRegister(Src))
- break;
- const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
+ TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
+ SrcToLook.push_back(CurSrcPair);
+
+ unsigned PHICount = 0;
+ while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
+ TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
+ // As explained above, do not handle physical registers
+ if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
+ return false;
- // If this source does not incur a cross register bank copy, use it.
- ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
- SrcSubReg);
- } while (!ShouldRewrite);
+ CurSrcPair = Pair;
+ ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
+ !DisableAdvCopyOpt, TII);
+ ValueTrackerResult Res;
+ bool ShouldRewrite = false;
+
+ do {
+ // Follow the chain of copies until we reach the top of the use-def chain
+ // or find a more suitable source.
+ Res = ValTracker.getNextSource();
+ if (!Res.isValid())
+ break;
+
+ // Insert the Def -> Use entry for the recently found source.
+ ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
+ if (CurSrcRes.isValid()) {
+ assert(CurSrcRes == Res && "ValueTrackerResult found must match");
+ // An existent entry with multiple sources is a PHI cycle we must avoid.
+ // Otherwise it's an entry with a valid next source we already found.
+ if (CurSrcRes.getNumSources() > 1) {
+ DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
+ return false;
+ }
+ break;
+ }
+ RewriteMap.insert(std::make_pair(CurSrcPair, Res));
+
+ // ValueTrackerResult usually have one source unless it's the result from
+ // a PHI instruction. Add the found PHI edges to be looked up further.
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs > 1) {
+ PHICount++;
+ for (unsigned i = 0; i < NumSrcs; ++i)
+ SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
+ Res.getSrcReg(i), Res.getSrcSubReg(i)));
+ break;
+ }
- // If we did not find a more suitable source, there is nothing to optimize.
- if (!ShouldRewrite || Src == Reg)
+ CurSrcPair.Reg = Res.getSrcReg(0);
+ CurSrcPair.SubReg = Res.getSrcSubReg(0);
+ // Do not extend the live-ranges of physical registers as they add
+ // constraints to the register allocator. Moreover, if we want to extend
+ // the live-range of a physical register, unlike SSA virtual register,
+ // we will have to check that they aren't redefine before the related use.
+ if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ return false;
+
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
+ ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
+ CurSrcPair.SubReg);
+ } while (!ShouldRewrite);
+
+ // Continue looking for new sources...
+ if (Res.isValid())
+ continue;
+
+ // Do not continue searching for a new source if the there's at least
+ // one use-def which cannot be rewritten.
+ if (!ShouldRewrite)
+ return false;
+ }
+
+ if (PHICount >= RewritePHILimit) {
+ DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
return false;
+ }
- Reg = Src;
- SubReg = SrcSubReg;
- return true;
+ // If we did not find a more suitable source, there is nothing to optimize.
+ return CurSrcPair.Reg != Reg;
+}
+
+/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// guaranteed to have the same register class. This is necessary whenever we
+/// successfully traverse a PHI instruction and find suitable sources coming
+/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
+/// suitable to be used in a new COPY instruction.
+static MachineInstr *
+insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
+ MachineInstr *OrigPHI) {
+ assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
+
+ const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
+ unsigned NewVR = MRI->createVirtualRegister(NewRC);
+ MachineBasicBlock *MBB = OrigPHI->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
+ TII->get(TargetOpcode::PHI), NewVR);
+
+ unsigned MBBOpIdx = 2;
+ for (auto RegPair : SrcRegs) {
+ MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
+ MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
+ // Since we're extended the lifetime of RegPair.Reg, clear the
+ // kill flags to account for that and make RegPair.Reg reaches
+ // the new PHI.
+ MRI->clearKillFlags(RegPair.Reg);
+ MBBOpIdx += 2;
+ }
+
+ return MIB;
}
namespace {
@@ -624,7 +770,7 @@ public:
/// This source defines the whole definition, i.e.,
/// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
///
- /// The second and subsequent calls will return false, has there is only one
+ /// The second and subsequent calls will return false, as there is only one
/// rewritable source.
///
/// \return True if a rewritable source has been found, false otherwise.
@@ -632,9 +778,9 @@ public:
virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
unsigned &TrackReg,
unsigned &TrackSubReg) {
- // If CurrentSrcIdx == 1, this means this function has already been
- // called once. CopyLike has one defintiion and one argument, thus,
- // there is nothing else to rewrite.
+ // If CurrentSrcIdx == 1, this means this function has already been called
+ // once. CopyLike has one definition and one argument, thus, there is
+ // nothing else to rewrite.
if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
return false;
// This is the first call to getNextRewritableSource.
@@ -653,7 +799,7 @@ public:
/// \brief Rewrite the current source with \p NewReg and \p NewSubReg
/// if possible.
- /// \return True if the rewritting was possible, false otherwise.
+ /// \return True if the rewriting was possible, false otherwise.
virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
return false;
@@ -662,6 +808,157 @@ public:
MOSrc.setSubReg(NewSubReg);
return true;
}
+
+ /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
+ /// the new source to use for rewrite. If \p HandleMultipleSources is true and
+ /// multiple sources for a given \p Def are found along the way, we found a
+ /// PHI instructions that needs to be rewritten.
+ /// TODO: HandleMultipleSources should be removed once we test PHI handling
+ /// with coalescable copies.
+ TargetInstrInfo::RegSubRegPair
+ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap,
+ bool HandleMultipleSources = true) {
+
+ TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
+ do {
+ ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
+ // If there are no entries on the map, LookupSrc is the new source.
+ if (!Res.isValid())
+ return LookupSrc;
+
+ // There's only one source for this definition, keep searching...
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs == 1) {
+ LookupSrc.Reg = Res.getSrcReg(0);
+ LookupSrc.SubReg = Res.getSrcSubReg(0);
+ continue;
+ }
+
+ // TODO: Remove once multiple srcs w/ coalescable copies are supported.
+ if (!HandleMultipleSources)
+ break;
+
+ // Multiple sources, recurse into each source to find a new source
+ // for it. Then, rewrite the PHI accordingly to its new edges.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
+ for (unsigned i = 0; i < NumSrcs; ++i) {
+ TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
+ Res.getSrcSubReg(i));
+ NewPHISrcs.push_back(
+ getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
+ }
+
+ // Build the new PHI node and return its def register as the new source.
+ MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
+ MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
+ DEBUG(dbgs() << "-- getNewSource\n");
+ DEBUG(dbgs() << " Replacing: " << *OrigPHI);
+ DEBUG(dbgs() << " With: " << *NewPHI);
+ const MachineOperand &MODef = NewPHI->getOperand(0);
+ return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+
+ } while (1);
+
+ return TargetInstrInfo::RegSubRegPair(0, 0);
+ }
+
+ /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+ /// and create a new COPY instruction. More info about RewriteMap in
+ /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+ /// Uncoalescable copies, since they are copy like instructions that aren't
+ /// recognized by the register allocator.
+ virtual MachineInstr *
+ RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap) {
+ return nullptr;
+ }
+};
+
+/// \brief Helper class to rewrite uncoalescable copy like instructions
+/// into new COPY (coalescable friendly) instructions.
+class UncoalescableRewriter : public CopyRewriter {
+protected:
+ const TargetInstrInfo &TII;
+ MachineRegisterInfo &MRI;
+ /// The number of defs in the bitcast
+ unsigned NumDefs;
+
+public:
+ UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI)
+ : CopyRewriter(MI), TII(TII), MRI(MRI) {
+ NumDefs = MI.getDesc().getNumDefs();
+ }
+
+ /// \brief Get the next rewritable def source (TrackReg, TrackSubReg)
+ /// All such sources need to be considered rewritable in order to
+ /// rewrite a uncoalescable copy-like instruction. This method return
+ /// each definition that must be checked if rewritable.
+ ///
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // Find the next non-dead definition and continue from there.
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+
+ while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
+ ++CurrentSrcIdx;
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+ }
+
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+
+ CurrentSrcIdx++;
+ return true;
+ }
+
+ /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+ /// and create a new COPY instruction. More info about RewriteMap in
+ /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+ /// Uncoalescable copies, since they are copy like instructions that aren't
+ /// recognized by the register allocator.
+ MachineInstr *
+ RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
+ assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ "We do not rewrite physical registers");
+
+ // Find the new source to use in the COPY rewrite.
+ TargetInstrInfo::RegSubRegPair NewSrc =
+ getNewSource(&MRI, &TII, Def, RewriteMap);
+
+ // Insert the COPY.
+ const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
+ unsigned NewVR = MRI.createVirtualRegister(DefRC);
+
+ MachineInstr *NewCopy =
+ BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewVR)
+ .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
+
+ NewCopy->getOperand(0).setSubReg(Def.SubReg);
+ if (Def.SubReg)
+ NewCopy->getOperand(0).setIsUndef();
+
+ DEBUG(dbgs() << "-- RewriteSource\n");
+ DEBUG(dbgs() << " Replacing: " << CopyLike);
+ DEBUG(dbgs() << " With: " << *NewCopy);
+ MRI.replaceRegWith(Def.Reg, NewVR);
+ MRI.clearKillFlags(NewVR);
+
+ // We extended the lifetime of NewSrc.Reg, clear the kill flags to
+ // account for that.
+ MRI.clearKillFlags(NewSrc.Reg);
+
+ return NewCopy;
+ }
};
/// \brief Specialized rewriter for INSERT_SUBREG instruction.
@@ -699,7 +996,7 @@ public:
// partial definition.
TrackReg = MODef.getReg();
if (MODef.getSubReg())
- // Bails if we have to compose sub-register indices.
+ // Bail if we have to compose sub-register indices.
return false;
TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
return true;
@@ -740,7 +1037,7 @@ public:
CurrentSrcIdx = 1;
const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
SrcReg = MOExtractedReg.getReg();
- // If we have to compose sub-register indices, bails out.
+ // If we have to compose sub-register indices, bail out.
if (MOExtractedReg.getSubReg())
return false;
@@ -818,7 +1115,7 @@ public:
}
const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
SrcReg = MOInsertedReg.getReg();
- // If we have to compose sub-register indices, bails out.
+ // If we have to compose sub-register indices, bail out.
if ((SrcSubReg = MOInsertedReg.getSubReg()))
return false;
@@ -828,7 +1125,7 @@ public:
const MachineOperand &MODef = CopyLike.getOperand(0);
TrackReg = MODef.getReg();
- // If we have to compose sub-registers, bails.
+ // If we have to compose sub-registers, bail.
return MODef.getSubReg() == 0;
}
@@ -850,7 +1147,13 @@ public:
/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
/// if no rewriter works for \p MI.
static CopyRewriter *getCopyRewriter(MachineInstr &MI,
- const TargetInstrInfo &TII) {
+ const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
+ // Handle uncoalescable copy-like instructions.
+ if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()))
+ return new UncoalescableRewriter(MI, TII, MRI);
+
switch (MI.getOpcode()) {
default:
return nullptr;
@@ -874,7 +1177,7 @@ static CopyRewriter *getCopyRewriter(MachineInstr &MI,
/// the same register bank.
/// New copies issued by this optimization are register allocator
/// friendly. This optimization does not remove any copy as it may
-/// overconstraint the register allocator, but replaces some operands
+/// overconstrain the register allocator, but replaces some operands
/// when possible.
/// \pre isCoalescableCopy(*MI) is true.
/// \return True, when \p MI has been rewritten. False otherwise.
@@ -889,25 +1192,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
bool Changed = false;
// Get the right rewriter for the current copy.
- std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
- // If none exists, bails out.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+ // If none exists, bail out.
if (!CpyRewriter)
return false;
// Rewrite each rewritable source.
unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
TrackSubReg)) {
- unsigned NewSrc = TrackReg;
- unsigned NewSubReg = TrackSubReg;
- // Try to find a more suitable source.
- // If we failed to do so, or get the actual source,
- // move to the next source.
- if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
+ // Keep track of PHI nodes and its incoming edges when looking for sources.
+ RewriteMapTy RewriteMap;
+ // Try to find a more suitable source. If we failed to do so, or get the
+ // actual source, move to the next source.
+ if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
+ continue;
+
+ // Get the new source to rewrite. TODO: Only enable handling of multiple
+ // sources (PHIs) once we have a motivating example and testcases for it.
+ TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
+ TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
+ MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
+ if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
continue;
+
// Rewrite source.
- if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
+ if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
// We may have extended the live-range of NewSrc, account for that.
- MRI->clearKillFlags(NewSrc);
+ MRI->clearKillFlags(NewSrc.Reg);
Changed = true;
}
}
@@ -936,61 +1247,53 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
// Check if we can rewrite all the values defined by this instruction.
- SmallVector<
- std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
- 4> RewritePairs;
- for (const MachineOperand &MODef : MI->defs()) {
- if (MODef.isDead())
- // We can ignore those.
- continue;
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+ // If none exists, bail out.
+ if (!CpyRewriter)
+ return false;
+ // Rewrite each rewritable source by generating new COPYs. This works
+ // differently from optimizeCoalescableCopy since it first makes sure that all
+ // definitions can be rewritten.
+ RewriteMapTy RewriteMap;
+ unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
+ while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
+ CopyDefSubReg)) {
// If a physical register is here, this is probably for a good reason.
// Do not rewrite that.
- if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
return false;
// If we do not know how to rewrite this definition, there is no point
// in trying to kill this instruction.
- TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
- TargetInstrInfo::RegSubRegPair Src = Def;
- if (!findNextSource(Src.Reg, Src.SubReg))
+ TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
+ if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
return false;
- RewritePairs.push_back(std::make_pair(Def, Src));
+
+ RewritePairs.push_back(Def);
}
+
// The change is possible for all defs, do it.
- for (const auto &PairDefSrc : RewritePairs) {
- const auto &Def = PairDefSrc.first;
- const auto &Src = PairDefSrc.second;
+ for (const auto &Def : RewritePairs) {
// Rewrite the "copy" in a way the register coalescer understands.
- assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
- "We do not rewrite physical registers");
- const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
- unsigned NewVR = MRI->createVirtualRegister(DefRC);
- MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- NewVR).addReg(Src.Reg, 0, Src.SubReg);
- NewCopy->getOperand(0).setSubReg(Def.SubReg);
- if (Def.SubReg)
- NewCopy->getOperand(0).setIsUndef();
+ MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
+ assert(NewCopy && "Should be able to always generate a new copy");
LocalMIs.insert(NewCopy);
- MRI->replaceRegWith(Def.Reg, NewVR);
- MRI->clearKillFlags(NewVR);
- // We extended the lifetime of Src.
- // Clear the kill flags to account for that.
- MRI->clearKillFlags(Src.Reg);
}
+
// MI is now dead.
MI->eraseFromParent();
++NumUncoalescableCopies;
return true;
}
-/// isLoadFoldable - Check whether MI is a candidate for folding into a later
-/// instruction. We only fold loads to virtual registers and the virtual
-/// register defined has a single use.
+/// Check whether MI is a candidate for folding into a later instruction.
+/// We only fold loads to virtual registers and the virtual register defined
+/// has a single use.
bool PeepholeOptimizer::isLoadFoldable(
- MachineInstr *MI,
- SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
+ MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
if (!MI->canFoldAsLoad() || !MI->mayLoad())
return false;
const MCInstrDesc &MCID = MI->getDesc();
@@ -1010,9 +1313,9 @@ bool PeepholeOptimizer::isLoadFoldable(
return false;
}
-bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+bool PeepholeOptimizer::isMoveImmediate(
+ MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
const MCInstrDesc &MCID = MI->getDesc();
if (!MI->isMoveImmediate())
return false;
@@ -1028,23 +1331,26 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
return false;
}
-/// foldImmediate - Try folding register operands that are defined by move
-/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// Try folding register operands that are defined by move immediate
+/// instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+bool PeepholeOptimizer::foldImmediate(
+ MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
- assert(II != ImmDefMIs.end());
+ assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
++NumImmFold;
return true;
@@ -1053,6 +1359,117 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
return false;
}
+// FIXME: This is very simple and misses some cases which should be handled when
+// motivating examples are found.
+//
+// The copy rewriting logic should look at uses as well as defs and be able to
+// eliminate copies across blocks.
+//
+// Later copies that are subregister extracts will also not be eliminated since
+// only the first copy is considered.
+//
+// e.g.
+// %vreg1 = COPY %vreg0
+// %vreg2 = COPY %vreg0:sub1
+//
+// Should replace %vreg2 uses with %vreg1:sub1
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+ assert(MI->isCopy() && "expected a COPY machine instruction");
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ return false;
+
+ if (CopySrcRegs.insert(SrcReg).second) {
+ // First copy of this reg seen.
+ CopyMIs.insert(std::make_pair(SrcReg, MI));
+ return false;
+ }
+
+ MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+
+ unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+ unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
+
+ // Can't replace different subregister extracts.
+ if (SrcSubReg != PrevSrcSubReg)
+ return false;
+
+ unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+
+ // Only replace if the copy register class is the same.
+ //
+ // TODO: If we have multiple copies to different register classes, we may want
+ // to track multiple copies of the same source register.
+ if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
+ return false;
+
+ MRI->replaceRegWith(DstReg, PrevDstReg);
+
+ // Lifetime of the previous copy has been extended.
+ MRI->clearKillFlags(PrevDstReg);
+ return true;
+}
+
+bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
+ return TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !MRI->isAllocatable(Reg);
+}
+
+bool PeepholeOptimizer::foldRedundantNAPhysCopy(
+ MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
+ assert(MI->isCopy() && "expected a COPY machine instruction");
+
+ if (DisableNAPhysCopyOpt)
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // %vreg = COPY %PHYSREG
+ // Avoid using a datastructure which can track multiple live non-allocatable
+ // phys->virt copies since LLVM doesn't seem to do this.
+ NAPhysToVirtMIs.insert({SrcReg, MI});
+ return false;
+ }
+
+ if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ return false;
+
+ // %PHYSREG = COPY %vreg
+ auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
+ if (PrevCopy == NAPhysToVirtMIs.end()) {
+ // We can't remove the copy: there was an intervening clobber of the
+ // non-allocatable physical register after the copy to virtual.
+ DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI
+ << '\n');
+ return false;
+ }
+
+ unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+ if (PrevDstReg == SrcReg) {
+ // Remove the virt->phys copy: we saw the virtual register definition, and
+ // the non-allocatable physical register's state hasn't changed since then.
+ DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n');
+ ++NumNAPhysCopies;
+ return true;
+ }
+
+ // Potential missed optimization opportunity: we saw a different virtual
+ // register get a copy of the non-allocatable physical register, and we only
+ // track one such copy. Avoid getting confused by this new non-allocatable
+ // physical register definition, and remove it from the tracked copies.
+ DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n');
+ NAPhysToVirtMIs.erase(PrevCopy);
+ return false;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
@@ -1070,9 +1487,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
-
+ for (MachineBasicBlock &MBB : MF) {
bool SeenMoveImm = false;
// During this forward scan, at some point it needs to answer the question
@@ -1086,8 +1501,19 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
- for (MachineBasicBlock::iterator
- MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+ // Track when a non-allocatable physical register is copied to a virtual
+ // register so that useless moves can be removed.
+ //
+ // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
+ // without any intervening re-definition of %PHYSREG.
+ DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
+
+ // Set of virtual registers that are copied from.
+ SmallSet<unsigned, 4> CopySrcRegs;
+ DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+
+ for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+ MII != MIE; ) {
MachineInstr *MI = &*MII;
// We may be erasing MI below, increment MII now.
++MII;
@@ -1097,20 +1523,60 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- // If there exists an instruction which belongs to the following
- // categories, we will discard the load candidates.
- if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
- MI->isKill() || MI->isInlineAsm() ||
- MI->hasUnmodeledSideEffects()) {
+ // If we run into an instruction we can't fold across, discard
+ // the load candidates.
+ if (MI->isLoadFoldBarrier())
FoldAsLoadDefCandidates.clear();
+
+ if (MI->isPosition() || MI->isPHI())
+ continue;
+
+ if (!MI->isCopy()) {
+ for (const auto &Op : MI->operands()) {
+ // Visit all operands: definitions can be implicit or explicit.
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ if (Op.isDef() && isNAPhysCopy(Reg)) {
+ const auto &Def = NAPhysToVirtMIs.find(Reg);
+ if (Def != NAPhysToVirtMIs.end()) {
+ // A new definition of the non-allocatable physical register
+ // invalidates previous copies.
+ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+ << '\n');
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ } else if (Op.isRegMask()) {
+ const uint32_t *RegMask = Op.getRegMask();
+ for (auto &RegMI : NAPhysToVirtMIs) {
+ unsigned Def = RegMI.first;
+ if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
+ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+ << '\n');
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ }
+ }
+ }
+
+ if (MI->isImplicitDef() || MI->isKill())
+ continue;
+
+ if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
+ // Blow away all non-allocatable physical registers knowledge since we
+ // don't know what's correct anymore.
+ //
+ // FIXME: handle explicit asm clobbers.
+ DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
+ << '\n');
+ NAPhysToVirtMIs.clear();
continue;
}
- if (MI->mayStore() || MI->isCall())
- FoldAsLoadDefCandidates.clear();
if ((isUncoalescableCopy(*MI) &&
optimizeUncoalescableCopy(MI, LocalMIs)) ||
- (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) ||
(MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
// MI is deleted.
LocalMIs.erase(MI);
@@ -1129,17 +1595,26 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI->isCopy() &&
+ (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) ||
+ foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) {
+ LocalMIs.erase(MI);
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
- Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+ Changed |= optimizeExtInstr(MI, &MBB, LocalMIs);
// optimizeExtInstr might have created new instructions after MI
// and before the already incremented MII. Adjust MII so that the
// next iteration sees the new instructions.
MII = MI;
++MII;
if (SeenMoveImm)
- Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs);
}
// Check whether MI is a load candidate for folding into a later
@@ -1190,8 +1665,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
assert(Def->isCopy() && "Invalid definition");
// Copy instruction are supposed to be: Def = Src.
// If someone breaks this assumption, bad things will happen everywhere.
@@ -1199,30 +1673,27 @@ bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
- // Bails as we do not support composing subreg yet.
- return false;
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
// Otherwise, we want the whole source.
const MachineOperand &Src = Def->getOperand(1);
- SrcReg = Src.getReg();
- SrcSubReg = Src.getSubReg();
- return true;
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
-bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
assert(Def->isBitcast() && "Invalid definition");
// Bail if there are effects that a plain copy will not expose.
if (Def->hasUnmodeledSideEffects())
- return false;
+ return ValueTrackerResult();
// Bitcasts with more than one def are not supported.
if (Def->getDesc().getNumDefs() != 1)
- return false;
+ return ValueTrackerResult();
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of the src.
- // Bails as we do not support composing subreg yet.
- return false;
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
unsigned SrcIdx = Def->getNumOperands();
for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
@@ -1230,25 +1701,25 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
const MachineOperand &MO = Def->getOperand(OpIdx);
if (!MO.isReg() || !MO.getReg())
continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
assert(!MO.isDef() && "We should have skipped all the definitions by now");
if (SrcIdx != EndOpIdx)
// Multiple sources?
- return false;
+ return ValueTrackerResult();
SrcIdx = OpIdx;
}
const MachineOperand &Src = Def->getOperand(SrcIdx);
- SrcReg = Src.getReg();
- SrcSubReg = Src.getSubReg();
- return true;
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
-bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
"Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
- // If we are composing subreg, bails out.
+ // If we are composing subregs, bail out.
// The case we are checking is Def.<subreg> = REG_SEQUENCE.
// This should almost never happen as the SSA property is tracked at
// the register level (as opposed to the subreg level).
@@ -1262,16 +1733,16 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
// have this case.
// If we can ascertain (or force) that this never happens, we could
// turn that into an assertion.
- return false;
+ return ValueTrackerResult();
if (!TII)
// We could handle the REG_SEQUENCE here, but we do not want to
// duplicate the code from the generic TII.
- return false;
+ return ValueTrackerResult();
SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
- return false;
+ return ValueTrackerResult();
// We are looking at:
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
@@ -1279,41 +1750,38 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
for (auto &RegSeqInput : RegSeqInputRegs) {
if (RegSeqInput.SubIdx == DefSubReg) {
if (RegSeqInput.SubReg)
- // Bails if we have to compose sub registers.
- return false;
+ // Bail if we have to compose sub registers.
+ return ValueTrackerResult();
- SrcReg = RegSeqInput.Reg;
- SrcSubReg = RegSeqInput.SubReg;
- return true;
+ return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
}
}
// If the subreg we are tracking is super-defined by another subreg,
// we could follow this value. However, this would require to compose
// the subreg and we do not do that for now.
- return false;
+ return ValueTrackerResult();
}
-bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
"Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
- // If we are composing subreg, bails out.
+ // If we are composing subreg, bail out.
// Same remark as getNextSourceFromRegSequence.
// I.e., this may be turned into an assert.
- return false;
+ return ValueTrackerResult();
if (!TII)
// We could handle the REG_SEQUENCE here, but we do not want to
// duplicate the code from the generic TII.
- return false;
+ return ValueTrackerResult();
TargetInstrInfo::RegSubRegPair BaseReg;
TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
- return false;
+ return ValueTrackerResult();
// We are looking at:
// Def = INSERT_SUBREG v0, v1, sub1
@@ -1323,9 +1791,7 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
// #1 Check if the inserted register matches the required sub index.
if (InsertedReg.SubIdx == DefSubReg) {
- SrcReg = InsertedReg.Reg;
- SrcSubReg = InsertedReg.SubReg;
- return true;
+ return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
}
// #2 Otherwise, if the sub register we are looking for is not partial
// defined by the inserted element, we can look through the main
@@ -1333,10 +1799,10 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
const MachineOperand &MODef = Def->getOperand(DefIdx);
// If the result register (Def) and the base register (v0) do not
// have the same register class or if we have to compose
- // subregisters, bails out.
+ // subregisters, bail out.
if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
BaseReg.SubReg)
- return false;
+ return ValueTrackerResult();
// Get the TRI and check if the inserted sub-register overlaps with the
// sub-register we are tracking.
@@ -1344,121 +1810,138 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
if (!TRI ||
(TRI->getSubRegIndexLaneMask(DefSubReg) &
TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
- return false;
+ return ValueTrackerResult();
// At this point, the value is available in v0 via the same subreg
// we used for Def.
- SrcReg = BaseReg.Reg;
- SrcSubReg = DefSubReg;
- return true;
+ return ValueTrackerResult(BaseReg.Reg, DefSubReg);
}
-bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
assert((Def->isExtractSubreg() ||
Def->isExtractSubregLike()) && "Invalid definition");
// We are looking at:
// Def = EXTRACT_SUBREG v0, sub0
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Indeed, if DefSubReg != 0, we would have to compose it with sub0.
if (DefSubReg)
- return false;
+ return ValueTrackerResult();
if (!TII)
// We could handle the EXTRACT_SUBREG here, but we do not want to
// duplicate the code from the generic TII.
- return false;
+ return ValueTrackerResult();
TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
- return false;
+ return ValueTrackerResult();
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
if (ExtractSubregInputReg.SubReg)
- return false;
+ return ValueTrackerResult();
// Otherwise, the value is available in the v0.sub0.
- SrcReg = ExtractSubregInputReg.Reg;
- SrcSubReg = ExtractSubregInputReg.SubIdx;
- return true;
+ return ValueTrackerResult(ExtractSubregInputReg.Reg,
+ ExtractSubregInputReg.SubIdx);
}
-bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
assert(Def->isSubregToReg() && "Invalid definition");
// We are looking at:
// Def = SUBREG_TO_REG Imm, v0, sub0
- // Bails if we have to compose sub registers.
+ // Bail if we have to compose sub registers.
// If DefSubReg != sub0, we would have to check that all the bits
// we track are included in sub0 and if yes, we would have to
// determine the right subreg in v0.
if (DefSubReg != Def->getOperand(3).getImm())
- return false;
- // Bails if we have to compose sub registers.
+ return ValueTrackerResult();
+ // Bail if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose it with sub0.
if (Def->getOperand(2).getSubReg())
- return false;
+ return ValueTrackerResult();
- SrcReg = Def->getOperand(2).getReg();
- SrcSubReg = Def->getOperand(3).getImm();
- return true;
+ return ValueTrackerResult(Def->getOperand(2).getReg(),
+ Def->getOperand(3).getImm());
+}
+
+/// \brief Explore each PHI incoming operand and return its sources
+ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
+ assert(Def->isPHI() && "Invalid definition");
+ ValueTrackerResult Res;
+
+ // If we look for a different subreg, bail as we do not support composing
+ // subregs yet.
+ if (Def->getOperand(0).getSubReg() != DefSubReg)
+ return ValueTrackerResult();
+
+ // Return all register sources for PHI instructions.
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
+ auto &MO = Def->getOperand(i);
+ assert(MO.isReg() && "Invalid PHI instruction");
+ Res.addSource(MO.getReg(), MO.getSubReg());
+ }
+
+ return Res;
}
-bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceImpl() {
assert(Def && "This method needs a valid definition");
assert(
(DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
if (Def->isCopy())
- return getNextSourceFromCopy(SrcReg, SrcSubReg);
+ return getNextSourceFromCopy();
if (Def->isBitcast())
- return getNextSourceFromBitcast(SrcReg, SrcSubReg);
+ return getNextSourceFromBitcast();
// All the remaining cases involve "complex" instructions.
- // Bails if we did not ask for the advanced tracking.
+ // Bail if we did not ask for the advanced tracking.
if (!UseAdvancedTracking)
- return false;
+ return ValueTrackerResult();
if (Def->isRegSequence() || Def->isRegSequenceLike())
- return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
+ return getNextSourceFromRegSequence();
if (Def->isInsertSubreg() || Def->isInsertSubregLike())
- return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
+ return getNextSourceFromInsertSubreg();
if (Def->isExtractSubreg() || Def->isExtractSubregLike())
- return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
+ return getNextSourceFromExtractSubreg();
if (Def->isSubregToReg())
- return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
- return false;
+ return getNextSourceFromSubregToReg();
+ if (Def->isPHI())
+ return getNextSourceFromPHI();
+ return ValueTrackerResult();
}
-const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
- unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSource() {
// If we reach a point where we cannot move up in the use-def chain,
// there is nothing we can get.
if (!Def)
- return nullptr;
+ return ValueTrackerResult();
- const MachineInstr *PrevDef = nullptr;
- // Try to find the next source.
- if (getNextSourceImpl(SrcReg, SrcSubReg)) {
+ ValueTrackerResult Res = getNextSourceImpl();
+ if (Res.isValid()) {
// Update definition, definition index, and subregister for the
// next call of getNextSource.
// Update the current register.
- Reg = SrcReg;
- // Update the return value before moving up in the use-def chain.
- PrevDef = Def;
+ bool OneRegSrc = Res.getNumSources() == 1;
+ if (OneRegSrc)
+ Reg = Res.getSrcReg(0);
+ // Update the result before moving up in the use-def chain
+ // with the instruction containing the last found sources.
+ Res.setInst(Def);
+
// If we can still move up in the use-def chain, move to the next
- // defintion.
- if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // definition.
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();
- DefSubReg = SrcSubReg;
- return PrevDef;
+ DefSubReg = Res.getSrcSubReg(0);
+ return Res;
}
}
// If we end up here, this means we will not be able to find another source
- // for the next iteration.
- // Make sure any new call to getNextSource bails out early by cutting the
- // use-def chain.
+ // for the next iteration. Make sure any new call to getNextSource bails out
+ // early by cutting the use-def chain.
Def = nullptr;
- return PrevDef;
+ return Res;
}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 6f76116..b95dffd 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -87,7 +87,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -196,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList(
const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
const InstrItineraryData *InstrItins =
MF.getSubtarget().getInstrItineraryData();
@@ -267,7 +267,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
RegClassInfo.runOnMachineFunction(Fn);
@@ -302,8 +302,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
CriticalPathRCs);
// Loop over all of the basic blocks
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
+ for (auto &MBB : Fn) {
#ifndef NDEBUG
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
if (DebugDiv > 0) {
@@ -311,25 +310,25 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (bbcnt++ % DebugDiv != DebugMod)
continue;
dbgs() << "*** DEBUG scheduling " << Fn.getName()
- << ":BB#" << MBB->getNumber() << " ***\n";
+ << ":BB#" << MBB.getNumber() << " ***\n";
}
#endif
// Initialize register live-range state for scheduling in this block.
- Scheduler.startBlock(MBB);
+ Scheduler.startBlock(&MBB);
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
- MachineBasicBlock::iterator Current = MBB->end();
- unsigned Count = MBB->size(), CurrentCount = Count;
- for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineBasicBlock::iterator Current = MBB.end();
+ unsigned Count = MBB.size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
MachineInstr *MI = std::prev(I);
--Count;
// Calls are not scheduling boundaries before register allocation, but
// post-ra we don't gain anything by scheduling across calls since we
// don't need to worry about register pressure.
- if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
- Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count);
+ if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+ Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
@@ -343,9 +342,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Count -= MI->getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
- assert((MBB->begin() == Current || CurrentCount != 0) &&
+ assert((MBB.begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
- Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
@@ -355,7 +354,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Scheduler.finishBlock();
// Update register kills
- Scheduler.fixupKills(MBB);
+ Scheduler.fixupKills(&MBB);
}
return true;
@@ -400,8 +399,12 @@ void SchedulePostRATDList::schedule() {
}
DEBUG(dbgs() << "********** List Scheduling **********\n");
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ DEBUG(
+ for (const SUnit &SU : SUnits) {
+ SU.dumpAll(this);
+ dbgs() << '\n';
+ }
+ );
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 5f81949..d27ea2f 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -58,7 +58,7 @@ INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -96,7 +96,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// This is a physreg implicit-def.
// Look for the first instruction to use or define an alias.
- MachineBasicBlock::instr_iterator UserMI = MI;
+ MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
bool Found = false;
for (++UserMI; UserMI != UserE; ++UserMI) {
@@ -151,7 +151,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
if (MBBI->isImplicitDef())
- WorkList.insert(MBBI);
+ WorkList.insert(&*MBBI);
if (WorkList.empty())
continue;
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 6ca69a1..939c500 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -71,8 +71,9 @@ private:
// stack frame indexes.
unsigned MinCSFrameIndex, MaxCSFrameIndex;
- // Save and Restore blocks of the current function.
- MachineBasicBlock *SaveBlock;
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ SmallVector<MachineBasicBlock *, 1> SaveBlocks;
SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
// Flag to control whether to use the register scavenger to resolve
@@ -91,9 +92,6 @@ private:
int &SPAdj);
void scavengeFrameVirtualRegs(MachineFunction &Fn);
void insertPrologEpilogCode(MachineFunction &Fn);
-
- // Convenience for recognizing return blocks.
- bool isReturnBlock(const MachineBasicBlock *MBB) const;
};
} // namespace
@@ -128,10 +126,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const {
- return (MBB && !MBB->empty() && MBB->back().isReturn());
-}
-
/// Compute the set of return blocks
void PEI::calculateSets(MachineFunction &Fn) {
const MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -142,25 +136,25 @@ void PEI::calculateSets(MachineFunction &Fn) {
// Use the points found by shrink-wrapping, if any.
if (MFI->getSavePoint()) {
- SaveBlock = MFI->getSavePoint();
+ SaveBlocks.push_back(MFI->getSavePoint());
assert(MFI->getRestorePoint() && "Both restore and save must be set");
MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
// If RestoreBlock does not have any successor and is not a return block
// then the end point is unreachable and we do not need to insert any
// epilogue.
- if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock))
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
RestoreBlocks.push_back(RestoreBlock);
return;
}
// Save refs to entry and return blocks.
- SaveBlock = Fn.begin();
- for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
- MBB != E; ++MBB)
- if (isReturnBlock(MBB))
- RestoreBlocks.push_back(MBB);
-
- return;
+ SaveBlocks.push_back(&Fn.front());
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
}
/// StackObjSet - A set of stack object indexes
@@ -195,7 +189,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// place all spills in the entry block, all restores in return blocks.
calculateSets(Fn);
- // Add the code to save and restore the callee saved registers
+ // Add the code to save and restore the callee saved registers.
if (!F->hasFnAttribute(Attribute::Naked))
insertCSRSpillsAndRestores(Fn);
@@ -237,6 +231,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
}
delete RS;
+ SaveBlocks.clear();
RestoreBlocks.clear();
return true;
}
@@ -407,7 +402,7 @@ static void updateLiveness(MachineFunction &MF) {
const MachineBasicBlock *CurBB = WorkList.pop_back_val();
// By construction, the region that is after the save point is
// dominated by the Save and post-dominated by the Restore.
- if (CurBB == Save)
+ if (CurBB == Save && Save != Restore)
continue;
// Enqueue all the successors not already visited.
// Those are by construction either before Save or after Restore.
@@ -419,10 +414,13 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- for (MachineBasicBlock *MBB : Visited)
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = CSI[i].getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
- MBB->addLiveIn(CSI[i].getReg());
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
}
}
@@ -446,18 +444,20 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
MachineBasicBlock::iterator I;
// Spill using target interface.
- I = SaveBlock->begin();
- if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Insert the spill to the stack frame.
- unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
- RC, TRI);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ I = SaveBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
}
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(Fn);
}
- // Update the live-in information of all the blocks up to the save point.
- updateLiveness(Fn);
// Restore using target interface.
for (MachineBasicBlock *MBB : RestoreBlocks) {
@@ -500,7 +500,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
static inline void
AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
bool StackGrowsDown, int64_t &Offset,
- unsigned &MaxAlign) {
+ unsigned &MaxAlign, unsigned Skew) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI->getObjectSize(FrameIdx);
@@ -512,7 +512,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
if (StackGrowsDown) {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
@@ -530,12 +530,12 @@ static void
AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo *MFI, bool StackGrowsDown,
- int64_t &Offset, unsigned &MaxAlign) {
+ int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
E = UnassignedObjs.end(); I != E; ++I) {
int i = *I;
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
ProtectedObjs.insert(i);
}
}
@@ -563,6 +563,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
&& "Local area offset should be in direction of stack growth");
int64_t Offset = LocalAreaOffset;
+ // Skew to be applied to alignment.
+ unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
// We currently don't support filling in holes in between fixed sized
@@ -593,7 +596,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = RoundUpToAlignment(Offset, Align);
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
MFI->setObjectOffset(i, -Offset); // Set the computed offset
}
@@ -602,7 +605,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
for (int i = MaxCSFI; i >= MinCSFI ; --i) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = RoundUpToAlignment(Offset, Align);
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
MFI->setObjectOffset(i, Offset);
Offset += MFI->getObjectSize(i);
@@ -624,7 +627,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
RS->getScavengingFrameIndices(SFIs);
for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
IE = SFIs.end(); I != IE; ++I)
- AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -635,7 +638,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = RoundUpToAlignment(Offset, Align);
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -662,7 +665,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
StackObjSet AddrOfObjs;
AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
@@ -695,11 +698,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign);
+ Offset, MaxAlign, Skew);
}
// Then assign frame offsets to stack objects that are not used to spill
@@ -719,7 +722,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
if (ProtectedObjs.count(i))
continue;
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
}
// Make sure the special register scavenging spill slot is closest to the
@@ -729,7 +732,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
RS->getScavengingFrameIndices(SFIs);
for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
IE = SFIs.end(); I != IE; ++I)
- AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -754,7 +757,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
- Offset = RoundUpToAlignment(Offset, StackAlign);
+ Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
}
// Update frame info to pretend that this is part of the stack...
@@ -771,18 +774,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
// Add prologue to the function...
- TFI.emitPrologue(Fn, *SaveBlock);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.emitPrologue(Fn, *SaveBlock);
// Add epilogue to restore the callee-save registers in each exiting block.
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
TFI.emitEpilogue(Fn, *RestoreBlock);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.inlineStackProbe(Fn, *SaveBlock);
+
// Emit additional code that is required to support segmented stacks, if
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (Fn.shouldSplitStack())
- TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ if (Fn.shouldSplitStack()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ }
// Emit additional code that is required to explicitly handle the stack in
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -790,7 +799,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// different conditional check and another BIF for allocating more stack
// space.
if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
- TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -800,25 +810,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
if (!TFI.needsFrameIndexResolution(Fn)) return;
- MachineModuleInfo &MMI = Fn.getMMI();
- const Function *F = Fn.getFunction();
- const Function *ParentF = MMI.getWinEHParent(F);
- unsigned FrameReg;
- if (F == ParentF) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
- // FIXME: This should be unconditional but we have bugs in the preparation
- // pass.
- if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
- FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
- Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
- } else if (MMI.hasWinEHFuncInfo(F)) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
- auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
- if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
- FuncInfo.CatchHandlerParentFrameObjOffset[F] =
- TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
- }
-
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
SPState.resize(Fn.getNumBlockIDs());
@@ -841,12 +832,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
}
// Handle the unreachable blocks.
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
+ for (auto &BB : Fn) {
+ if (Reachable.count(&BB))
// Already handled in DFS traversal.
continue;
int SPAdj = 0;
- replaceFrameIndices(BB, Fn, SPAdj);
+ replaceFrameIndices(&BB, Fn, SPAdj);
}
}
@@ -889,11 +880,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
if (!MI->getOperand(i).isFI())
continue;
- // Frame indicies in debug values are encoded in a target independent
+ // Frame indices in debug values are encoded in a target independent
// way with simply the frame index and offset rather than any
// target-specific addressing mode.
if (MI->isDebugValue()) {
- assert(i == 0 && "Frame indicies can only appear as the first "
+ assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
MachineOperand &Offset = MI->getOperand(1);
@@ -979,7 +970,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
- RS->enterBasicBlock(BB);
+ RS->enterBasicBlock(&*BB);
int SPAdj = 0;
@@ -1026,12 +1017,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
- MachineRegisterInfo &MRI = Fn.getRegInfo();
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
- // Make sure MRI now accounts this register as used.
- MRI.setPhysRegUsed(ScratchReg);
-
// Because this instruction was processed by the RS before this
// register was allocated, make sure that the RS now records the
// register as being used.
@@ -1044,7 +1031,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// problem because we need the spill code before I: Move I to just
// prior to J.
if (I != std::prev(J)) {
- BB->splice(J, BB, I);
+ BB->splice(J, &*BB, I);
// Before we move I, we need to prepare the RS to visit I again.
// Specifically, RS will assert if it sees uses of registers that
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index b1c341d..1f46417 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DerivedTypes.h"
@@ -22,87 +23,38 @@
#include <map>
using namespace llvm;
-namespace {
-struct PSVGlobalsTy {
- // PseudoSourceValues are immutable so don't need locking.
- const PseudoSourceValue PSVs[4];
- sys::Mutex Lock; // Guards FSValues, but not the values inside it.
- std::map<int, const PseudoSourceValue *> FSValues;
-
- PSVGlobalsTy() : PSVs() {}
- ~PSVGlobalsTy() {
- for (std::map<int, const PseudoSourceValue *>::iterator
- I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
- delete I->second;
- }
- }
-};
-
-static ManagedStatic<PSVGlobalsTy> PSVGlobals;
-
-} // anonymous namespace
-
-const PseudoSourceValue *PseudoSourceValue::getStack()
-{ return &PSVGlobals->PSVs[0]; }
-const PseudoSourceValue *PseudoSourceValue::getGOT()
-{ return &PSVGlobals->PSVs[1]; }
-const PseudoSourceValue *PseudoSourceValue::getJumpTable()
-{ return &PSVGlobals->PSVs[2]; }
-const PseudoSourceValue *PseudoSourceValue::getConstantPool()
-{ return &PSVGlobals->PSVs[3]; }
-
static const char *const PSVNames[] = {
- "Stack",
- "GOT",
- "JumpTable",
- "ConstantPool"
-};
+ "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
+ "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {}
+PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
PseudoSourceValue::~PseudoSourceValue() {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
- O << PSVNames[this - PSVGlobals->PSVs];
-}
-
-const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
- PSVGlobalsTy &PG = *PSVGlobals;
- sys::ScopedLock locked(PG.Lock);
- const PseudoSourceValue *&V = PG.FSValues[FI];
- if (!V)
- V = new FixedStackPseudoSourceValue(FI);
- return V;
+ O << PSVNames[Kind];
}
bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
- if (this == getStack())
+ if (isStack())
return false;
- if (this == getGOT() ||
- this == getConstantPool() ||
- this == getJumpTable())
+ if (isGOT() || isConstantPool() || isJumpTable())
return true;
llvm_unreachable("Unknown PseudoSourceValue!");
}
-bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
- if (this == getStack() ||
- this == getGOT() ||
- this == getConstantPool() ||
- this == getJumpTable())
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ if (isStack() || isGOT() || isConstantPool() || isJumpTable())
return false;
llvm_unreachable("Unknown PseudoSourceValue!");
}
-bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
- if (this == getGOT() ||
- this == getConstantPool() ||
- this == getJumpTable())
- return false;
- return true;
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return !(isGOT() || isConstantPool() || isJumpTable());
}
-bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+bool FixedStackPseudoSourceValue::isConstant(
+ const MachineFrameInfo *MFI) const {
return MFI && MFI->isImmutableObjectIndex(FI);
}
@@ -122,3 +74,69 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
OS << "FixedStack" << FI;
}
+
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind)
+ : PseudoSourceValue(Kind) {}
+
+bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return false;
+}
+
+GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
+ const GlobalValue *GV)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {}
+
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager()
+ : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT),
+ JumpTablePSV(PseudoSourceValue::JumpTable),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool) {}
+
+const PseudoSourceValue *PseudoSourceValueManager::getStack() {
+ return &StackPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; }
+
+const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() {
+ return &ConstantPoolPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
+ return &JumpTablePSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) {
+ std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
+ if (!V)
+ V = llvm::make_unique<FixedStackPseudoSourceValue>(FI);
+ return V.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
+ std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
+ GlobalCallEntries[GV];
+ if (!E)
+ E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV);
+ return E.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
+ std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
+ ExternalCallEntries[ES];
+ if (!E)
+ E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES);
+ return E.get();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 0090332..cfe367d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -133,8 +133,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
@@ -223,7 +223,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
SmallVector<unsigned, 8> PhysRegSpillCands;
// Check for an available register in this class.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
while (unsigned PhysReg = Order.next()) {
// Check for interference in PhysReg
switch (Matrix->checkInterference(VirtReg, PhysReg)) {
@@ -276,7 +276,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
- calculateSpillWeightsAndHints(*LIS, *MF,
+ calculateSpillWeightsAndHints(*LIS, *MF, VRM,
getAnalysis<MachineLoopInfo>(),
getAnalysis<MachineBlockFrequencyInfo>());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index fd3d4d7..f4c076f 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -799,10 +799,9 @@ void RAFast::AllocateBasicBlock() {
MachineBasicBlock::iterator MII = MBB->begin();
// Add live-in registers as live.
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I)
- if (MRI->isAllocatable(*I))
- definePhysReg(MII, *I, regReserved);
+ for (const auto &LI : MBB->liveins())
+ if (MRI->isAllocatable(LI.PhysReg))
+ definePhysReg(MII, LI.PhysReg, regReserved);
SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
@@ -986,10 +985,6 @@ void RAFast::AllocateBasicBlock() {
}
}
- for (UsedInInstrSet::iterator
- I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setRegUnitUsed(*I);
-
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
UsedInInstr.clear();
@@ -1050,10 +1045,6 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
- for (UsedInInstrSet::iterator
- I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setRegUnitUsed(*I);
-
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
Coalesced.push_back(MI);
@@ -1103,12 +1094,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
AllocateBasicBlock();
}
- // Add the clobber lists for all the instructions we skipped earlier.
- for (const MCInstrDesc *Desc : SkippedInstrs)
- if (const uint16_t *Defs = Desc->getImplicitDefs())
- while (*Defs)
- MRI->setPhysRegUsed(*Defs++);
-
// All machine operands and other references to virtual registers have been
// replaced. Remove the virtual registers.
MRI->clearVirtRegs();
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 7ebcf7f..945cb9e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -86,6 +86,14 @@ static cl::opt<bool> EnableLocalReassignment(
"may be compile time intensive"),
cl::init(false));
+static cl::opt<bool> EnableDeferredSpilling(
+ "enable-deferred-spilling", cl::Hidden,
+ cl::desc("Instead of spilling a variable right away, defer the actual "
+ "code insertion to the end of the allocation. That way the "
+ "allocator might still find a suitable coloring for this "
+ "variable because of other evicted variables."),
+ cl::init(false));
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -157,6 +165,11 @@ class RAGreedy : public MachineFunctionPass,
/// Live range will be spilled. No more splitting will be attempted.
RS_Spill,
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
/// There is nothing more we can do to this live range. Abort compilation
/// if it can't be assigned.
RS_Done
@@ -414,6 +427,7 @@ const char *const RAGreedy::StageName[] = {
"RS_Split",
"RS_Split2",
"RS_Spill",
+ "RS_Memory",
"RS_Done"
};
#endif
@@ -447,8 +461,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineBlockFrequencyInfo>();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
@@ -536,6 +550,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
// everything else has been allocated.
Prio = Size;
+ } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+ // Memory operand should be considered last.
+ // Change the priority such that Memory operand are assigned in
+ // the reverse order that they came in.
+ // TODO: Make this a member variable and probably do something about hints.
+ static unsigned MemOp = 0;
+ Prio = MemOp++;
} else {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
@@ -637,7 +658,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
//===----------------------------------------------------------------------===//
unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
unsigned PhysReg;
while ((PhysReg = Order.next())) {
if (PhysReg == PrevReg)
@@ -2450,7 +2471,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
unsigned Depth) {
unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
@@ -2512,13 +2533,23 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return PhysReg;
// Finally spill VirtReg itself.
- NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
- spiller().spill(LRE);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+ if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
+ // TODO: This is experimental and in particular, we do not model
+ // the live range splitting done by spilling correctly.
+ // We would need a deep integration with the spiller to do the
+ // right thing here. Anyway, that is still good for early testing.
+ setStage(VirtReg, RS_Memory);
+ DEBUG(dbgs() << "Do as if this register is in memory\n");
+ NewVRegs.push_back(VirtReg.reg);
+ } else {
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
- if (VerifyEnabled)
- MF->verify(this, "After spilling");
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+ }
// The live virtual register requesting allocation was spilled, so tell
// the caller not to allocate anything during this round.
@@ -2555,7 +2586,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
- calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
+ calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
DEBUG(LIS->dump());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index eeff73d..fd28b05 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -47,6 +47,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -497,8 +498,8 @@ void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesCFG();
- au.addRequired<AliasAnalysis>();
- au.addPreserved<AliasAnalysis>();
+ au.addRequired<AAResultsWrapperPass>();
+ au.addPreserved<AAResultsWrapperPass>();
au.addRequired<SlotIndexes>();
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
@@ -724,11 +725,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
MachineBlockFrequencyInfo &MBFI =
getAnalysis<MachineBlockFrequencyInfo>();
- calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI,
- normalizePBQPSpillWeight);
-
VirtRegMap &VRM = getAnalysis<VirtRegMap>();
+ calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
+ MBFI, normalizePBQPSpillWeight);
+
std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
MF.getRegInfo().freezeReservedRegs(MF);
@@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-namespace {
-// A helper class for printing node and register info in a consistent way
-class PrintNodeInfo {
-public:
- typedef PBQP::RegAlloc::PBQPRAGraph Graph;
- typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId;
-
- PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {}
-
- void print(raw_ostream &OS) const {
+/// Create Printable object for node and register info.
+static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
+ const PBQP::RegAlloc::PBQPRAGraph &G) {
+ return Printable([NId, &G](raw_ostream &OS) {
const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
unsigned VReg = G.getNodeMetadata(NId).getVReg();
const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
- }
-
-private:
- const Graph &G;
- NodeId NId;
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) {
- PR.print(OS);
- return OS;
+ });
}
-} // anonymous namespace
void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
for (auto NId : nodeIds()) {
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c911b9b..e7b3217 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -32,7 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -93,7 +92,7 @@ namespace {
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
- unsigned ShrinkMask;
+ LaneBitmask ShrinkMask;
/// True if the main range of the currently coalesced intervals should be
/// checked for smaller live intervals.
@@ -164,15 +163,13 @@ namespace {
/// LaneMask are split as necessary. @p LaneMask are the lanes that
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
- /// @returns false if live range conflicts couldn't get resolved.
- bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
- unsigned LaneMask, CoalescerPair &CP);
+ void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ LaneBitmask LaneMask, CoalescerPair &CP);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
- /// @returns false if live range conflicts couldn't get resolved.
- bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
- unsigned LaneMask, const CoalescerPair &CP);
+ void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask, const CoalescerPair &CP);
/// We found a non-trivially-coalescable copy. If the source value number is
/// defined by a copy from the destination reg see if we can merge these two
@@ -224,30 +221,17 @@ namespace {
/// Dst, we can drop \p Copy.
bool applyTerminalRule(const MachineInstr &Copy) const;
- /// Check whether or not \p LI is composed by multiple connected
- /// components and if that is the case, fix that.
- void splitNewRanges(LiveInterval *LI) {
- ConnectedVNInfoEqClasses ConEQ(*LIS);
- unsigned NumComps = ConEQ.Classify(LI);
- if (NumComps <= 1)
- return;
- SmallVector<LiveInterval*, 8> NewComps(1, LI);
- for (unsigned i = 1; i != NumComps; ++i) {
- unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg));
- NewComps.push_back(&LIS->createEmptyInterval(VReg));
- }
-
- ConEQ.Distribute(&NewComps[0], *MRI);
- }
-
/// Wrapper method for \see LiveIntervals::shrinkToUses.
/// This method does the proper fixing of the live-ranges when the afore
/// mentioned method returns true.
void shrinkToUses(LiveInterval *LI,
SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
- if (LIS->shrinkToUses(LI, Dead))
- // We may have created multiple connected components, split them.
- splitNewRanges(LI);
+ if (LIS->shrinkToUses(LI, Dead)) {
+ /// Check whether or not \p LI is composed by multiple connected
+ /// components and if that is the case, fix that.
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS->splitSeparateComponents(*LI, SplitLIs);
+ }
}
public:
@@ -275,7 +259,7 @@ INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
@@ -453,7 +437,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
@@ -679,14 +663,18 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
return false;
- unsigned Op1, Op2, NewDstIdx;
- if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
- return false;
- if (Op1 == UseOpIdx)
- NewDstIdx = Op2;
- else if (Op2 == UseOpIdx)
- NewDstIdx = Op1;
- else
+
+ // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
+ // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
+ // passed to the method. That _other_ operand is chosen by
+ // the findCommutedOpIndices() method.
+ //
+ // That is obviously an area for improvement in case of instructions having
+ // more than 2 operands. For example, if some instruction has 3 commutable
+ // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
+ // op#2<->op#3) of commute transformation should be considered/tried here.
+ unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx))
return false;
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
@@ -719,7 +707,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
MachineBasicBlock *MBB = DefMI->getParent();
- MachineInstr *NewMI = TII->commuteInstruction(DefMI);
+ MachineInstr *NewMI =
+ TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return false;
if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
@@ -804,7 +793,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
if (IntB.hasSubRanges()) {
if (!IntA.hasSubRanges()) {
- unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
IntA.createSubRangeFrom(Allocator, Mask, IntA);
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
@@ -812,20 +801,21 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
- unsigned AMask = SA.LaneMask;
+ LaneBitmask AMask = SA.LaneMask;
for (LiveInterval::SubRange &SB : IntB.subranges()) {
- unsigned BMask = SB.LaneMask;
- unsigned Common = BMask & AMask;
+ LaneBitmask BMask = SB.LaneMask;
+ LaneBitmask Common = BMask & AMask;
if (Common == 0)
continue;
- DEBUG(
- dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common));
- unsigned BRest = BMask & ~AMask;
+ DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
+ << " into " << PrintLaneMask(Common) << '\n');
+ LaneBitmask BRest = BMask & ~AMask;
LiveInterval::SubRange *CommonRange;
if (BRest != 0) {
SB.LaneMask = BRest;
- DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest));
+ DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
+ << '\n');
// Duplicate SubRange for newly merged common stuff.
CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
} else {
@@ -842,7 +832,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
AMask &= ~BMask;
}
if (AMask != 0) {
- DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask));
+ DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
@@ -1107,7 +1097,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
// CopyMI is undef iff SrcReg is not live before the instruction.
if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
- unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
+ LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
if ((SR.LaneMask & SrcMask) == 0)
continue;
@@ -1128,7 +1118,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
DstLI.MergeValueNumberInto(VNI, PrevVNI);
// The affected subregister segments can be removed.
- unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
for (LiveInterval::SubRange &SR : DstLI.subranges()) {
if ((SR.LaneMask & DstMask) == 0)
continue;
@@ -1147,7 +1137,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
continue;
const MachineInstr &MI = *MO.getParent();
SlotIndex UseIdx = LIS->getInstructionIndex(&MI);
- unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
bool isLive;
if (UseMask != ~0u && DstLI.hasSubRanges()) {
isLive = false;
@@ -1213,10 +1203,10 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
}
- unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx);
bool IsUndef = true;
SlotIndex MIIdx = UseMI->isDebugValue()
? LIS->getSlotIndexes()->getIndexBefore(UseMI)
@@ -1445,8 +1435,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
for (LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & ShrinkMask) == 0)
continue;
- DEBUG(dbgs() << "Shrink LaneUses (Lane "
- << format("%04X", S.LaneMask) << ")\n");
+ DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
+ << ")\n");
LIS->shrinkToUses(S, LI.reg);
}
LI.removeEmptySubRanges();
@@ -1644,7 +1634,7 @@ class JoinVals {
const unsigned SubIdx;
/// The LaneMask that this liverange will occupy the coalesced register. May
/// be smaller than the lanemask produced by SubIdx when merging subranges.
- const unsigned LaneMask;
+ const LaneBitmask LaneMask;
/// This is true when joining sub register ranges, false when joining main
/// ranges.
@@ -1699,11 +1689,11 @@ class JoinVals {
ConflictResolution Resolution;
/// Lanes written by this def, 0 for unanalyzed values.
- unsigned WriteLanes;
+ LaneBitmask WriteLanes;
/// Lanes with defined values in this register. Other lanes are undef and
/// safe to clobber.
- unsigned ValidLanes;
+ LaneBitmask ValidLanes;
/// Value in LI being redefined by this def.
VNInfo *RedefVNI;
@@ -1744,7 +1734,7 @@ class JoinVals {
/// Compute the bitmask of lanes actually written by DefMI.
/// Set Redef if there are any partial register definitions that depend on the
/// previous value of the register.
- unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+ LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
/// Find the ultimate value that VNI was copied from.
std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
@@ -1780,12 +1770,12 @@ class JoinVals {
/// entry to TaintedVals.
///
/// Returns false if the tainted lanes extend beyond the basic block.
- bool taintExtent(unsigned, unsigned, JoinVals&,
- SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+ bool taintExtent(unsigned, LaneBitmask, JoinVals&,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&);
/// Return true if MI uses any of the given Lanes from Reg.
/// This does not include partial redefinitions of Reg.
- bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const;
+ bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const;
/// Determine if ValNo is a copy of a value number in LR or Other.LR that will
/// be pruned:
@@ -1796,7 +1786,7 @@ class JoinVals {
bool isPrunedValue(unsigned ValNo, JoinVals &Other);
public:
- JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask,
+ JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
bool TrackSubRegLiveness)
@@ -1822,8 +1812,8 @@ public:
/// Removes subranges starting at copies that get removed. This sometimes
/// happens when undefined subranges are copied around. These ranges contain
- /// no usefull information and can be removed.
- void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask);
+ /// no useful information and can be removed.
+ void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
/// Erase any machine instructions that have been coalesced away.
/// Add erased instructions to ErasedInstrs.
@@ -1840,9 +1830,9 @@ public:
};
} // end anonymous namespace
-unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
+LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
const {
- unsigned L = 0;
+ LaneBitmask L = 0;
for (const MachineOperand &MO : DefMI->operands()) {
if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
continue;
@@ -1879,7 +1869,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
ValueIn = nullptr;
for (const LiveInterval::SubRange &S : LI.subranges()) {
// Transform lanemask to a mask in the joined live interval.
- unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
+ LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
if ((SMask & LaneMask) == 0)
continue;
LiveQueryResult LRQ = S.Query(Def);
@@ -1928,7 +1918,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
const MachineInstr *DefMI = nullptr;
if (VNI->isPHIDef()) {
// Conservatively assume that all lanes in a PHI are valid.
- unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
V.ValidLanes = V.WriteLanes = Lanes;
} else {
DefMI = Indexes->getInstructionFromIndex(VNI->def);
@@ -2190,8 +2180,8 @@ bool JoinVals::mapValues(JoinVals &Other) {
}
bool JoinVals::
-taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
- SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) {
VNInfo *VNI = LR.getValNumInfo(ValNo);
MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
@@ -2230,7 +2220,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
}
bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
- unsigned Lanes) const {
+ LaneBitmask Lanes) const {
if (MI->isDebugValue())
return false;
for (const MachineOperand &MO : MI->operands()) {
@@ -2264,8 +2254,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
// VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
// join, those lanes will be tainted with a wrong value. Get the extent of
// the tainted lanes.
- unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
- SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+ LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
// Tainted lanes would extend beyond the basic block.
return false;
@@ -2384,7 +2374,7 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
-void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
{
// Look for values being erased.
bool DidPrune = false;
@@ -2401,7 +2391,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
// copied and we must remove that subrange value as well.
VNInfo *ValueOut = Q.valueOutOrDead();
if (ValueOut != nullptr && Q.valueIn() == nullptr) {
- DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask)
+ DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
<< " at " << Def << "\n");
LIS->pruneValue(S, Def, nullptr);
DidPrune = true;
@@ -2410,10 +2400,10 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
continue;
}
// If a subrange ends at the copy, then a value was copied but only
- // partially used later. Shrink the subregister range apropriately.
+ // partially used later. Shrink the subregister range appropriately.
if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
- DEBUG(dbgs() << "\t\tDead uses at sublane "
- << format("%04X", S.LaneMask) << " at " << Def << "\n");
+ DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
+ << " at " << Def << "\n");
ShrinkMask |= S.LaneMask;
}
}
@@ -2477,8 +2467,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
}
}
-bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
- unsigned LaneMask,
+void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask,
const CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
@@ -2492,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
// ranges get mapped to the "overflow" lane mask bit which creates unexpected
// interferences.
if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
- DEBUG(dbgs() << "*** Couldn't join subrange!\n");
- return false;
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
}
if (!LHSVals.resolveConflicts(RHSVals) ||
!RHSVals.resolveConflicts(LHSVals)) {
- DEBUG(dbgs() << "*** Couldn't join subrange!\n");
- return false;
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
}
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2521,36 +2513,37 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
if (EndPoints.empty())
- return true;
+ return;
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
<< " points: " << LRange << '\n');
LIS->extendToIndices(LRange, EndPoints);
- return true;
}
-bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
- unsigned LaneMask, CoalescerPair &CP) {
+ LaneBitmask LaneMask,
+ CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &R : LI.subranges()) {
- unsigned RMask = R.LaneMask;
+ LaneBitmask RMask = R.LaneMask;
// LaneMask of subregisters common to subrange R and ToMerge.
- unsigned Common = RMask & LaneMask;
+ LaneBitmask Common = RMask & LaneMask;
// There is nothing to do without common subregs.
if (Common == 0)
continue;
- DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common));
+ DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
+ << PrintLaneMask(Common) << '\n');
// LaneMask of subregisters contained in the R range but not in ToMerge,
// they have to split into their own subrange.
- unsigned LRest = RMask & ~LaneMask;
+ LaneBitmask LRest = RMask & ~LaneMask;
LiveInterval::SubRange *CommonRange;
if (LRest != 0) {
R.LaneMask = LRest;
- DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest));
+ DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
// Duplicate SubRange for newly merged common stuff.
CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
} else {
@@ -2559,16 +2552,14 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
CommonRange = &R;
}
LiveRange RangeCopy(ToMerge, Allocator);
- if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
- return false;
+ joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
LaneMask &= ~RMask;
}
if (LaneMask != 0) {
- DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
+ DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
}
- return true;
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -2602,15 +2593,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// create initial subranges if necessary.
unsigned DstIdx = CP.getDstIdx();
if (!LHS.hasSubRanges()) {
- unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
- : TRI->getSubRegIndexLaneMask(DstIdx);
+ LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(DstIdx);
// LHS must support subregs or we wouldn't be in this codepath.
assert(Mask != 0);
LHS.createSubRangeFrom(Allocator, Mask, LHS);
} else if (DstIdx != 0) {
// Transform LHS lanemasks to new register class if necessary.
for (LiveInterval::SubRange &R : LHS.subranges()) {
- unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
R.LaneMask = Mask;
}
}
@@ -2619,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Determine lanemasks of RHS in the coalesced register and merge subranges.
unsigned SrcIdx = CP.getSrcIdx();
- bool Abort = false;
if (!RHS.hasSubRanges()) {
- unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
- : TRI->getSubRegIndexLaneMask(SrcIdx);
- if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
- Abort = true;
+ LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(SrcIdx);
+ mergeSubRangeInto(LHS, RHS, Mask, CP);
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
- unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
- Abort = true;
- break;
- }
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
+ mergeSubRangeInto(LHS, R, Mask, CP);
}
}
- if (Abort) {
- // This shouldn't have happened :-(
- // However we are aware of at least one existing problem where we
- // can't merge subranges when multiple ranges end up in the
- // "overflow bit" 32. As a workaround we drop all subregister ranges
- // which means we loose some precision but are back to a well defined
- // state.
- assert(TargetRegisterInfo::isImpreciseLaneMask(
- CP.getNewRC()->getLaneMask())
- && "SubRange merge should only fail when merging into bit 32.");
- DEBUG(dbgs() << "\tSubrange join aborted!\n");
- LHS.clearSubRanges();
- RHS.clearSubRanges();
- } else {
- DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+ DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
- LHSVals.pruneSubRegValues(LHS, ShrinkMask);
- RHSVals.pruneSubRegValues(LHS, ShrinkMask);
- }
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
}
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2799,7 +2770,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
!isTerminalReg(DstReg, Copy, MRI))
return false;
- // DstReg is a terminal node. Check if it inteferes with any other
+ // DstReg is a terminal node. Check if it interferes with any other
// copy involving SrcReg.
const MachineBasicBlock *OrigBB = Copy.getParent();
const LiveInterval &DstLI = LIS->getInterval(DstReg);
@@ -2904,7 +2875,7 @@ void RegisterCoalescer::joinAllIntervals() {
std::vector<MBBPriorityInfo> MBBs;
MBBs.reserve(MF->size());
for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
JoinSplitEdges && isSplitEdge(MBB)));
}
@@ -2943,7 +2914,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
TRI = STI.getRegisterInfo();
TII = STI.getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Loops = &getAnalysis<MachineLoopInfo>();
if (EnableGlobalCopies == cl::BOU_UNSET)
JoinGlobalCopies = STI.enableJoinGlobalCopies();
@@ -2981,22 +2952,25 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
if (MRI->recomputeRegClass(Reg)) {
DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
<< TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+ ++NumInflated;
+
LiveInterval &LI = LIS->getInterval(Reg);
- unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
- if (MaxMask == 0) {
+ if (LI.hasSubRanges()) {
// If the inflated register class does not support subregisters anymore
// remove the subranges.
- LI.clearSubRanges();
- } else {
+ if (!MRI->shouldTrackSubRegLiveness(Reg)) {
+ LI.clearSubRanges();
+ } else {
#ifndef NDEBUG
- // If subranges are still supported, then the same subregs should still
- // be supported.
- for (LiveInterval::SubRange &S : LI.subranges()) {
- assert ((S.LaneMask & ~MaxMask) == 0);
- }
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ // If subranges are still supported, then the same subregs
+ // should still be supported.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ assert((S.LaneMask & ~MaxMask) == 0);
+ }
#endif
+ }
}
- ++NumInflated;
}
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index c3786e5..8382b09 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -59,12 +59,12 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Max Pressure: ";
dumpRegSetPressure(MaxSetPressure, TRI);
dbgs() << "Live In: ";
- for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
- dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " ";
+ for (unsigned Reg : LiveInRegs)
+ dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
dbgs() << '\n';
dbgs() << "Live Out: ";
- for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
- dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " ";
+ for (unsigned Reg : LiveOutRegs)
+ dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
dbgs() << '\n';
}
@@ -78,11 +78,13 @@ void RegPressureTracker::dump() const {
}
void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+ const char *sep = "";
for (const PressureChange &Change : *this) {
- if (!Change.isValid() || Change.getUnitInc() == 0)
- continue;
- dbgs() << " " << TRI.getRegPressureSetName(Change.getPSet())
+ if (!Change.isValid())
+ break;
+ dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet())
<< " " << Change.getUnitInc();
+ sep = " ";
}
dbgs() << '\n';
}
@@ -90,8 +92,8 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
/// Increase the current pressure as impacted by these registers and bump
/// the high water mark if needed.
void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
- for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
- PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]);
+ for (unsigned RegUnit : RegUnits) {
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
CurrSetPressure[*PSetI] += Weight;
@@ -104,8 +106,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
/// Simply decrease the current pressure as impacted by these registers.
void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) {
- for (unsigned I = 0, E = RegUnits.size(); I != E; ++I)
- decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I]));
+ for (unsigned RegUnit : RegUnits)
+ decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit));
}
/// Clear the result so it can be used for another round of pressure tracking.
@@ -157,10 +159,22 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
LiveInRegs.clear();
}
-const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const {
+void LiveRegSet::init(const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned NumRegUnits = TRI.getNumRegs();
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ Regs.setUniverse(NumRegUnits + NumVirtRegs);
+ this->NumRegUnits = NumRegUnits;
+}
+
+void LiveRegSet::clear() {
+ Regs.clear();
+}
+
+static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
- return &LIS->getInterval(Reg);
- return LIS->getCachedRegUnit(Reg);
+ return &LIS.getInterval(Reg);
+ return LIS.getCachedRegUnit(Reg);
}
void RegPressureTracker::reset() {
@@ -176,8 +190,7 @@ void RegPressureTracker::reset() {
else
static_cast<RegionPressure&>(P).reset();
- LiveRegs.PhysRegs.clear();
- LiveRegs.VirtRegs.clear();
+ LiveRegs.clear();
UntiedDefs.clear();
}
@@ -210,8 +223,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
P.MaxSetPressure = CurrSetPressure;
- LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
- LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
+ LiveRegs.init(*MRI);
if (TrackUntiedDefs)
UntiedDefs.setUniverse(MRI->getNumVirtRegs());
}
@@ -250,14 +262,8 @@ void RegPressureTracker::closeTop() {
static_cast<RegionPressure&>(P).TopPos = CurrPos;
assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
- P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
- P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
- for (SparseSet<unsigned>::const_iterator I =
- LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
- P.LiveInRegs.push_back(*I);
- std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
- P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
- P.LiveInRegs.end());
+ P.LiveInRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveInRegs);
}
/// Set the boundary for the bottom of the region and summarize live outs.
@@ -268,21 +274,14 @@ void RegPressureTracker::closeBottom() {
static_cast<RegionPressure&>(P).BottomPos = CurrPos;
assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
- P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
- P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
- for (SparseSet<unsigned>::const_iterator I =
- LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
- P.LiveOutRegs.push_back(*I);
- std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
- P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
- P.LiveOutRegs.end());
+ P.LiveOutRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveOutRegs);
}
/// Finalize the region boundaries and record live ins and live outs.
void RegPressureTracker::closeRegion() {
if (!isTopClosed() && !isBottomClosed()) {
- assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
- "no region boundary");
+ assert(LiveRegs.size() == 0 && "no region boundary");
return;
}
if (!isBottomClosed())
@@ -299,8 +298,7 @@ void RegPressureTracker::closeRegion() {
void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
- for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) {
- unsigned Reg = P.LiveOutRegs[i];
+ for (unsigned Reg : P.LiveOutRegs) {
if (TargetRegisterInfo::isVirtualRegister(Reg)
&& !RPTracker.hasUntiedDef(Reg)) {
increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg));
@@ -315,71 +313,113 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
}
namespace {
-/// Collect this instruction's unique uses and defs into SmallVectors for
-/// processing defs and uses in order.
-///
-/// FIXME: always ignore tied opers
-class RegisterOperands {
- const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
- bool IgnoreDead;
+/// List of register defined and used by a machine instruction.
+class RegisterOperands {
public:
SmallVector<unsigned, 8> Uses;
SmallVector<unsigned, 8> Defs;
SmallVector<unsigned, 8> DeadDefs;
- RegisterOperands(const TargetRegisterInfo *tri,
- const MachineRegisterInfo *mri, bool ID = false):
- TRI(tri), MRI(mri), IgnoreDead(ID) {}
+ void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI, bool IgnoreDead = false);
+
+ /// Use liveness information to find dead defs not marked with a dead flag
+ /// and move them to the DeadDefs vector.
+ void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
+};
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+///
+/// FIXME: always ignore tied opers
+class RegisterOperandsCollector {
+ RegisterOperands &RegOpers;
+ const TargetRegisterInfo &TRI;
+ const MachineRegisterInfo &MRI;
+ bool IgnoreDead;
+
+ RegisterOperandsCollector(RegisterOperands &RegOpers,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ bool IgnoreDead)
+ : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {}
+
+ void collectInstr(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI)
+ collectOperand(*OperI);
+
+ // Remove redundant physreg dead defs.
+ SmallVectorImpl<unsigned>::iterator I =
+ std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+ std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+ RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+ }
- /// Push this operand's register onto the correct vector.
- void collect(const MachineOperand &MO) {
+ /// Push this operand's register onto the correct vectors.
+ void collectOperand(const MachineOperand &MO) const {
if (!MO.isReg() || !MO.getReg())
return;
+ unsigned Reg = MO.getReg();
if (MO.readsReg())
- pushRegUnits(MO.getReg(), Uses);
+ pushRegUnits(Reg, RegOpers.Uses);
if (MO.isDef()) {
if (MO.isDead()) {
if (!IgnoreDead)
- pushRegUnits(MO.getReg(), DeadDefs);
- }
- else
- pushRegUnits(MO.getReg(), Defs);
+ pushRegUnits(Reg, RegOpers.DeadDefs);
+ } else
+ pushRegUnits(Reg, RegOpers.Defs);
}
}
-protected:
- void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) {
+ void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
if (containsReg(RegUnits, Reg))
return;
RegUnits.push_back(Reg);
- }
- else if (MRI->isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
if (containsReg(RegUnits, *Units))
continue;
RegUnits.push_back(*Units);
}
}
}
-};
-} // namespace
-/// Collect physical and virtual register operands.
-static void collectOperands(const MachineInstr *MI,
- RegisterOperands &RegOpers) {
- for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
- RegOpers.collect(*OperI);
+ friend class RegisterOperands;
+};
- // Remove redundant physreg dead defs.
- SmallVectorImpl<unsigned>::iterator I =
- std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
- std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
- RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+void RegisterOperands::collect(const MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ bool IgnoreDead) {
+ RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead);
+ Collector.collectInstr(MI);
+}
+
+void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ SlotIndex SlotIdx = LIS.getInstructionIndex(&MI);
+ for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin();
+ RI != Defs.end(); /*empty*/) {
+ unsigned Reg = *RI;
+ const LiveRange *LR = getLiveRange(LIS, Reg);
+ if (LR != nullptr) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ if (LRQ.isDeadDef()) {
+ // LiveIntervals knows this is a dead even though it's MachineOperand is
+ // not flagged as such.
+ DeadDefs.push_back(Reg);
+ RI = Defs.erase(RI);
+ continue;
+ }
+ }
+ ++RI;
+ }
}
+} // namespace
+
/// Initialize an array of N PressureDiffs.
void PressureDiffs::init(unsigned N) {
Size = N;
@@ -399,7 +439,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
// Find an existing entry in the pressure diff for this PSet.
- PressureDiff::iterator I = begin(), E = end();
+ PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
for (; I != E && I->isValid(); ++I) {
if (I->getPSet() >= *PSetI)
break;
@@ -411,10 +451,20 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
if (!I->isValid() || I->getPSet() != *PSetI) {
PressureChange PTmp = PressureChange(*PSetI);
for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J)
- std::swap(*J,PTmp);
+ std::swap(*J, PTmp);
}
// Update the units for this pressure set.
- I->setUnitInc(I->getUnitInc() + Weight);
+ unsigned NewUnitInc = I->getUnitInc() + Weight;
+ if (NewUnitInc != 0) {
+ I->setUnitInc(NewUnitInc);
+ } else {
+ // Remove entry
+ PressureDiff::iterator J;
+ for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
+ *I = *J;
+ if (J != E)
+ *I = *J;
+ }
}
}
@@ -423,18 +473,18 @@ static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
const MachineRegisterInfo *MRI) {
assert(!PDiff.begin()->isValid() && "stale PDiff");
- for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i)
- PDiff.addPressureChange(RegOpers.Defs[i], true, MRI);
+ for (unsigned Reg : RegOpers.Defs)
+ PDiff.addPressureChange(Reg, true, MRI);
- for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i)
- PDiff.addPressureChange(RegOpers.Uses[i], false, MRI);
+ for (unsigned Reg : RegOpers.Uses)
+ PDiff.addPressureChange(Reg, false, MRI);
}
/// Force liveness of registers.
void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- if (LiveRegs.insert(Regs[i]))
- increaseRegPressure(Regs[i]);
+ for (unsigned Reg : Regs) {
+ if (LiveRegs.insert(Reg))
+ increaseRegPressure(Reg);
}
}
@@ -465,13 +515,9 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
/// registers that are both defined and used by the instruction. If a pressure
/// difference pointer is provided record the changes is pressure caused by this
/// instruction independent of liveness.
-bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
+void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
PressureDiff *PDiff) {
- // Check for the top of the analyzable region.
- if (CurrPos == MBB->begin()) {
- closeRegion();
- return false;
- }
+ assert(CurrPos != MBB->begin());
if (!isBottomClosed())
closeBottom();
@@ -483,11 +529,8 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
do
--CurrPos;
while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+ assert(!CurrPos->isDebugValue());
- if (CurrPos->isDebugValue()) {
- closeRegion();
- return false;
- }
SlotIndex SlotIdx;
if (RequireIntervals)
SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
@@ -496,8 +539,11 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
if (RequireIntervals && isTopClosed())
static_cast<IntervalPressure&>(P).openTop(SlotIdx);
- RegisterOperands RegOpers(TRI, MRI);
- collectOperands(CurrPos, RegOpers);
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI);
+ if (RequireIntervals)
+ RegOpers.detectDeadDefs(MI, *LIS);
if (PDiff)
collectPDiff(*PDiff, RegOpers, MRI);
@@ -508,37 +554,19 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
// Kill liveness at live defs.
// TODO: consider earlyclobbers?
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
- bool DeadDef = false;
- if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
- if (LR) {
- LiveQueryResult LRQ = LR->Query(SlotIdx);
- DeadDef = LRQ.isDeadDef();
- }
- }
- if (DeadDef) {
- // LiveIntervals knows this is a dead even though it's MachineOperand is
- // not flagged as such. Since this register will not be recorded as
- // live-out, increase its PDiff value to avoid underflowing pressure.
- if (PDiff)
- PDiff->addPressureChange(Reg, false, MRI);
- } else {
- if (LiveRegs.erase(Reg))
- decreaseRegPressure(Reg);
- else
- discoverLiveOut(Reg);
- }
+ for (unsigned Reg : RegOpers.Defs) {
+ if (LiveRegs.erase(Reg))
+ decreaseRegPressure(Reg);
+ else
+ discoverLiveOut(Reg);
}
// Generate liveness for uses.
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
if (!LiveRegs.contains(Reg)) {
// Adjust liveouts if LiveIntervals are available.
if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
+ const LiveRange *LR = getLiveRange(*LIS, Reg);
if (LR) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
if (!LRQ.isKill() && !LRQ.valueDefined())
@@ -552,24 +580,18 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
}
}
if (TrackUntiedDefs) {
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
+ for (unsigned Reg : RegOpers.Defs) {
if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg))
UntiedDefs.insert(Reg);
}
}
- return true;
}
/// Advance across the current instruction.
-bool RegPressureTracker::advance() {
+void RegPressureTracker::advance() {
assert(!TrackUntiedDefs && "unsupported mode");
- // Check for the bottom of the analyzable region.
- if (CurrPos == MBB->end()) {
- closeRegion();
- return false;
- }
+ assert(CurrPos != MBB->end());
if (!isTopClosed())
closeTop();
@@ -585,11 +607,10 @@ bool RegPressureTracker::advance() {
static_cast<RegionPressure&>(P).openBottom(CurrPos);
}
- RegisterOperands RegOpers(TRI, MRI);
- collectOperands(CurrPos, RegOpers);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*CurrPos, *TRI, *MRI);
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
// Discover live-ins.
bool isLive = LiveRegs.contains(Reg);
if (!isLive)
@@ -597,24 +618,21 @@ bool RegPressureTracker::advance() {
// Kill liveness at last uses.
bool lastUse = false;
if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
+ const LiveRange *LR = getLiveRange(*LIS, Reg);
lastUse = LR && LR->Query(SlotIdx).isKill();
- }
- else {
+ } else {
// Allocatable physregs are always single-use before register rewriting.
lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
}
if (lastUse && isLive) {
LiveRegs.erase(Reg);
decreaseRegPressure(Reg);
- }
- else if (!lastUse && !isLive)
+ } else if (!lastUse && !isLive)
increaseRegPressure(Reg);
}
// Generate liveness for defs.
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
+ for (unsigned Reg : RegOpers.Defs) {
if (LiveRegs.insert(Reg))
increaseRegPressure(Reg);
}
@@ -627,7 +645,6 @@ bool RegPressureTracker::advance() {
do
++CurrPos;
while (CurrPos != MBB->end() && CurrPos->isDebugValue());
- return true;
}
/// Find the max change in excess pressure across all sets.
@@ -653,8 +670,7 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
PDiff = 0; // Under the limit
else
PDiff = PNew - Limit; // Just exceeded limit.
- }
- else if (Limit > PNew)
+ } else if (Limit > PNew)
PDiff = Limit - POld; // Just obeyed limit.
if (PDiff) {
@@ -719,34 +735,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
// Account for register pressure similar to RegPressureTracker::recede().
- RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true);
- collectOperands(MI, RegOpers);
-
- // Boost max pressure for all dead defs together.
- // Since CurrSetPressure and MaxSetPressure
- increaseRegPressure(RegOpers.DeadDefs);
- decreaseRegPressure(RegOpers.DeadDefs);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true);
+ assert(RegOpers.DeadDefs.size() == 0);
+ if (RequireIntervals)
+ RegOpers.detectDeadDefs(*MI, *LIS);
// Kill liveness at live defs.
- for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Defs[i];
- bool DeadDef = false;
- if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(Reg);
- if (LR) {
- SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
- LiveQueryResult LRQ = LR->Query(SlotIdx);
- DeadDef = LRQ.isDeadDef();
- }
- }
- if (!DeadDef) {
- if (!containsReg(RegOpers.Uses, Reg))
- decreaseRegPressure(Reg);
- }
+ for (unsigned Reg : RegOpers.Defs) {
+ if (!containsReg(RegOpers.Uses, Reg))
+ decreaseRegPressure(Reg);
}
// Generate liveness for uses.
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
if (!LiveRegs.contains(Reg))
increaseRegPressure(Reg);
}
@@ -853,7 +854,8 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
unsigned MNew = MOld;
// Ignore DeadDefs here because they aren't captured by PressureChange.
unsigned PNew = POld + PDiffI->getUnitInc();
- assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow");
+ assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld)
+ && "PSet overflow/underflow");
if (PNew > MOld)
MNew = PNew;
// Check if current pressure has exceeded the limit.
@@ -892,19 +894,13 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
}
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
-static bool findUseBetween(unsigned Reg,
- SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
- const MachineRegisterInfo *MRI,
+static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx,
+ SlotIndex NextUseIdx, const MachineRegisterInfo &MRI,
const LiveIntervals *LIS) {
- for (MachineRegisterInfo::use_instr_nodbg_iterator
- UI = MRI->use_instr_nodbg_begin(Reg),
- UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) {
- const MachineInstr* MI = &*UI;
- if (MI->isDebugValue())
- continue;
- SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
- if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
- return true;
+ for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) {
+ SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+ return true;
}
return false;
}
@@ -919,8 +915,8 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
// Account for register pressure similar to RegPressureTracker::recede().
- RegisterOperands RegOpers(TRI, MRI);
- collectOperands(MI, RegOpers);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI);
// Kill liveness at last uses. Assume allocatable physregs are single-use
// rather than checking LiveIntervals.
@@ -928,21 +924,18 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
if (RequireIntervals)
SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
- for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
- unsigned Reg = RegOpers.Uses[i];
+ for (unsigned Reg : RegOpers.Uses) {
if (RequireIntervals) {
// FIXME: allow the caller to pass in the list of vreg uses that remain
// to be bottom-scheduled to avoid searching uses at each query.
SlotIndex CurrIdx = getCurrSlot();
- const LiveRange *LR = getLiveRange(Reg);
+ const LiveRange *LR = getLiveRange(*LIS, Reg);
if (LR) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
- if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS))
decreaseRegPressure(Reg);
- }
}
- }
- else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
// Allocatable physregs are always single-use before register rewriting.
decreaseRegPressure(Reg);
}
@@ -966,7 +959,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
/// This is expensive for an on-the-fly query because it calls
/// bumpDownwardPressure to recompute the pressure sets based on current
/// liveness. We don't yet have a fast version of downward pressure tracking
-/// analagous to getUpwardPressureDelta.
+/// analogous to getUpwardPressureDelta.
void RegPressureTracker::
getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
ArrayRef<PressureChange> CriticalPSets,
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 4176686..8fa1bf7 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -31,9 +31,12 @@ using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
/// setUsed - Set the register units of this register as used.
-void RegScavenger::setRegUsed(unsigned Reg) {
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- RegUnitsAvailable.reset(*RUI);
+void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
+ for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ LaneBitmask UnitMask = (*RUI).second;
+ if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+ RegUnitsAvailable.reset((*RUI).first);
+ }
}
void RegScavenger::initRegState() {
@@ -50,9 +53,8 @@ void RegScavenger::initRegState() {
return;
// Live-in registers are in use.
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I)
- setRegUsed(*I);
+ for (const auto &LI : MBB->liveins())
+ setRegUsed(LI.PhysReg, LI.LaneMask);
// Pristine CSRs are also unavailable.
const MachineFunction &MF = *MBB->getParent();
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 76a7fef..efde61e 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -372,7 +372,6 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << "\n";
}
}
- dbgs() << "\n";
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 390b6d2..fb82ab7 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,12 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -51,15 +51,11 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
- bool IsPostRAFlag, bool RemoveKillFlags,
- LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
- IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
- CanHandleTerminators(false), FirstDbgValue(nullptr) {
- assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
+ bool RemoveKillFlags)
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
+ RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
+ TrackLaneMasks(false), FirstDbgValue(nullptr) {
DbgValues.clear();
- assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
- "Virtual registers must be removed prior to PostRA scheduling");
const TargetSubtargetInfo &ST = mf.getSubtarget();
SchedModel.init(ST.getSchedModel(), &ST, TII);
@@ -230,11 +226,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
if (TRI->isPhysicalRegister(Reg))
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
- else {
- assert(!IsPostRA && "Virtual register encountered after regalloc.");
- if (MO.readsReg()) // ignore undef operands
- addVRegUseDeps(&ExitSU, i);
- }
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(&ExitSU, i);
}
} else {
// For others, e.g. fallthrough, conditional branch, assume the exit
@@ -242,11 +235,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
assert(Uses.empty() && "Uses in set before adding deps?");
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- if (!Uses.contains(Reg))
- Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ for (const auto &LI : (*SI)->liveins()) {
+ if (!Uses.contains(LI.PhysReg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
}
}
}
@@ -371,6 +362,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
}
}
+LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
+{
+ unsigned Reg = MO.getReg();
+ // No point in tracking lanemasks if we don't have interesting subregisters.
+ const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+ if (!RC.HasDisjunctSubRegs)
+ return ~0u;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0)
+ return RC.getLaneMask();
+ return TRI->getSubRegIndexLaneMask(SubReg);
+}
+
/// addVRegDefDeps - Add register output and data dependencies from this SUnit
/// to instructions that occur later in the same scheduling region if they read
/// from or write to the virtual register defined at OperIdx.
@@ -378,35 +383,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
- const MachineInstr *MI = SU->getInstr();
- unsigned Reg = MI->getOperand(OperIdx).getReg();
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+
+ LaneBitmask DefLaneMask;
+ LaneBitmask KillLaneMask;
+ if (TrackLaneMasks) {
+ bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
+ DefLaneMask = getLaneMaskForMO(MO);
+ // If we have a <read-undef> flag, none of the lane values comes from an
+ // earlier instruction.
+ KillLaneMask = IsKill ? ~0u : DefLaneMask;
+
+ // Clear undef flag, we'll re-add it later once we know which subregister
+ // Def is first.
+ MO.setIsUndef(false);
+ } else {
+ DefLaneMask = ~0u;
+ KillLaneMask = ~0u;
+ }
+
+ if (MO.isDead()) {
+ assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
+ "Dead defs should have no uses");
+ } else {
+ // Add data dependence to all uses we found so far.
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
+ E = CurrentVRegUses.end(); I != E; /*empty*/) {
+ LaneBitmask LaneMask = I->LaneMask;
+ // Ignore uses of other lanes.
+ if ((LaneMask & KillLaneMask) == 0) {
+ ++I;
+ continue;
+ }
+
+ if ((LaneMask & DefLaneMask) != 0) {
+ SUnit *UseSU = I->SU;
+ MachineInstr *Use = UseSU->getInstr();
+ SDep Dep(SU, SDep::Data, Reg);
+ Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
+ I->OperandIndex));
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
+ }
+
+ LaneMask &= ~KillLaneMask;
+ // If we found a Def for all lanes of this use, remove it from the list.
+ if (LaneMask != 0) {
+ I->LaneMask = LaneMask;
+ ++I;
+ } else
+ I = CurrentVRegUses.erase(I);
+ }
+ }
- // Singly defined vregs do not have output/anti dependencies.
- // The current operand is a def, so we have at least one.
- // Check here if there are any others...
+ // Shortcut: Singly defined vregs do not have output/anti dependencies.
if (MRI.hasOneDef(Reg))
return;
- // Add output dependence to the next nearest def of this vreg.
+ // Add output dependence to the next nearest defs of this vreg.
//
// Unless this definition is dead, the output dependence should be
// transitively redundant with antidependencies from this definition's
// uses. We're conservative for now until we have a way to guarantee the uses
// are not eliminated sometime during scheduling. The output dependence edge
// is also useful if output latency exceeds def-use latency.
- VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
- if (DefI == VRegDefs.end())
- VRegDefs.insert(VReg2SUnit(Reg, SU));
- else {
- SUnit *DefSU = DefI->SU;
- if (DefSU != SU && DefSU != &ExitSU) {
- SDep Dep(SU, SDep::Output, Reg);
- Dep.setLatency(
- SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
- DefSU->addPred(Dep);
- }
- DefI->SU = SU;
+ LaneBitmask LaneMask = DefLaneMask;
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for other lanes.
+ if ((V2SU.LaneMask & LaneMask) == 0)
+ continue;
+ // Add an output dependence.
+ SUnit *DefSU = V2SU.SU;
+ // Ignore additional defs of the same lanes in one instruction. This can
+ // happen because lanemasks are shared for targets with too many
+ // subregisters. We also use some representration tricks/hacks where we
+ // add super-register defs/uses, to imply that although we only access parts
+ // of the reg we care about the full one.
+ if (DefSU == SU)
+ continue;
+ SDep Dep(SU, SDep::Output, Reg);
+ Dep.setLatency(
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ DefSU->addPred(Dep);
+
+ // Update current definition. This can get tricky if the def was about a
+ // bigger lanemask before. We then have to shrink it and create a new
+ // VReg2SUnit for the non-overlapping part.
+ LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
+ LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
+ if (NonOverlapMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
+ V2SU.SU = SU;
+ V2SU.LaneMask = OverlapMask;
}
+ // If there was no CurrentVRegDefs entry for some lanes yet, create one.
+ if (LaneMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
/// addVRegUseDeps - Add a register data dependency if the instruction that
@@ -416,59 +492,34 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
- MachineInstr *MI = SU->getInstr();
- unsigned Reg = MI->getOperand(OperIdx).getReg();
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+
+ // Remember the use. Data dependencies will be added when we find the def.
+ LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+ CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
+
+ // Add antidependences to the following defs of the vreg.
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for unrelated lanes.
+ LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
+ if ((PrevDefLaneMask & LaneMask) == 0)
+ continue;
+ if (V2SU.SU == SU)
+ continue;
- // Record this local VReg use.
- VReg2UseMap::iterator UI = VRegUses.find(Reg);
- for (; UI != VRegUses.end(); ++UI) {
- if (UI->SU == SU)
- break;
+ V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
}
- if (UI == VRegUses.end())
- VRegUses.insert(VReg2SUnit(Reg, SU));
-
- // Lookup this operand's reaching definition.
- assert(LIS && "vreg dependencies requires LiveIntervals");
- LiveQueryResult LRQ
- = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
- VNInfo *VNI = LRQ.valueIn();
-
- // VNI will be valid because MachineOperand::readsReg() is checked by caller.
- assert(VNI && "No value to read by operand");
- MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
- // Phis and other noninstructions (after coalescing) have a NULL Def.
- if (Def) {
- SUnit *DefSU = getSUnit(Def);
- if (DefSU) {
- // The reaching Def lives within this scheduling region.
- // Create a data dependence.
- SDep dep(DefSU, SDep::Data, Reg);
- // Adjust the dependence latency using operand def/use information, then
- // allow the target to perform its own adjustments.
- int DefOp = Def->findRegisterDefOperandIdx(Reg);
- dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
-
- const TargetSubtargetInfo &ST = MF.getSubtarget();
- ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
- SU->addPred(dep);
- }
- }
-
- // Add antidependence to the following def of the vreg it uses.
- VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
- if (DefI != VRegDefs.end() && DefI->SU != SU)
- DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
}
/// Return true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
- if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() &&
- (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
- return true;
- return false;
+ return MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() &&
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA)));
}
// This MI might have either incomplete info, or known to be unsafe
@@ -508,7 +559,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
return false;
}
-/// This returns true if the two MIs need a chain edge betwee them.
+/// This returns true if the two MIs need a chain edge between them.
/// If these are not even memory operations, we still may need
/// chain deps between them. The question really is - could
/// these two MIs be reordered during scheduling from memory dependency
@@ -670,7 +721,7 @@ static inline void addChainDependency(AliasAnalysis *AA,
unsigned TrueMemOrderLatency = 0,
bool isNormalMemory = false) {
// If this is a false dependency,
- // do not add the edge, but rememeber the rejected node.
+ // do not add the edge, but remember the rejected node.
if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
@@ -685,7 +736,7 @@ static inline void addChainDependency(AliasAnalysis *AA,
}
}
-/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// Create an SUnit for each real instruction, numbered in top-down topological
/// order. The instruction order A < B, implies that no edge exists from B to A.
///
/// Map each real instruction to its SUnit.
@@ -743,17 +794,44 @@ void ScheduleDAGInstrs::initSUnits() {
}
}
+void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (TrackLaneMasks && !MO.isUse())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // Record this local VReg use.
+ VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, 0, SU));
+ }
+}
+
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
- PressureDiffs *PDiffs) {
+ PressureDiffs *PDiffs,
+ bool TrackLaneMasks) {
const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
+ this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
ScheduleDAG::clearDAG();
@@ -766,7 +844,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// We build scheduling units by walking a block's instruction list from bottom
// to top.
- // Remember where a generic side-effecting instruction is as we procede.
+ // Remember where a generic side-effecting instruction is as we proceed.
SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
// Memory references to specific known memory locations are tracked
@@ -787,10 +865,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Defs.setUniverse(TRI->getNumRegs());
Uses.setUniverse(TRI->getNumRegs());
- assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+ assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
+ assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ CurrentVRegDefs.setUniverse(NumVirtRegs);
+ CurrentVRegUses.setUniverse(NumVirtRegs);
+
VRegUses.clear();
- VRegDefs.setUniverse(MRI.getNumVirtRegs());
- VRegUses.setUniverse(MRI.getNumVirtRegs());
+ VRegUses.setUniverse(NumVirtRegs);
// Model data dependencies between instructions being scheduled and the
// ExitSU.
@@ -818,6 +900,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
assert(RPTracker->getPos() == std::prev(MII) &&
"RPTracker can't find MI");
+ collectVRegUses(SU);
}
assert(
@@ -835,7 +918,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (TRI->isPhysicalRegister(Reg))
addPhysRegDeps(SU, j);
else {
- assert(!IsPostRA && "Virtual register encountered!");
if (MO.isDef()) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
@@ -890,7 +972,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain = SU;
// This is a barrier event that acts as a pivotal node in the DAG,
// so it is safe to clear list of exposed nodes.
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
RejectMemNodes.clear();
NonAliasMemDefs.clear();
@@ -903,27 +985,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes, ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes);
}
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, TrueMemOrderLatency);
}
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
@@ -937,7 +1019,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain->addPred(SDep(SU, SDep::Barrier));
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
if (Objs.empty()) {
// Treat all other stores conservatively.
@@ -961,7 +1043,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, 0, true);
// If we're not using AA, then we only need one store per object.
@@ -986,7 +1068,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
@@ -996,15 +1078,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
}
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
} else if (MI->mayLoad()) {
bool MayAlias = true;
@@ -1012,7 +1094,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Invariant load, no chain dependencies needed!
} else {
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
if (Objs.empty()) {
// A load with no underlying object. Depend on all
@@ -1020,7 +1102,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes);
PendingLoads.push_back(SU);
@@ -1044,7 +1126,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
@@ -1052,11 +1134,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
NonAliasMemUses[V].push_back(SU);
}
if (MayAlias)
- adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
@@ -1068,7 +1150,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Defs.clear();
Uses.clear();
- VRegDefs.clear();
+ CurrentVRegDefs.clear();
+ CurrentVRegUses.clear();
PendingLoads.clear();
}
@@ -1080,11 +1163,9 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
// Examine the live-in regs of all successors.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI) {
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
- E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
+ for (const auto &LI : (*SI)->liveins()) {
// Repeat, for reg and all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.set(*SubRegs);
}
@@ -1103,7 +1184,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
// Once we set a kill flag on an instruction, we bail out, as otherwise we
// might set it on too many operands. We will clear as many flags as we
// can though.
- MachineBasicBlock::instr_iterator Begin = MI;
+ MachineBasicBlock::instr_iterator Begin = MI->getIterator();
MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
while (Begin != End) {
for (MachineOperand &MO : (--End)->operands()) {
@@ -1237,7 +1318,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
toggleKillFlag(MI, MO);
DEBUG(MI->dump());
DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
- MachineBasicBlock::instr_iterator Begin = MI;
+ MachineBasicBlock::instr_iterator Begin = MI->getIterator();
MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
while (++Begin != End)
DEBUG(Begin->dump());
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index b2e4617..1150d26 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -43,9 +43,12 @@ namespace llvm {
return (Node->NumPreds > 10 || Node->NumSuccs > 10);
}
- static bool hasNodeAddressLabel(const SUnit *Node,
- const ScheduleDAG *Graph) {
- return true;
+ static std::string getNodeIdentifierLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+ OS << static_cast<const void *>(Node);
+ return R;
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3b29306..0872d7a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -156,13 +156,16 @@ namespace {
void deleteAndRecombine(SDNode *N);
bool recursivelyDeleteUnusedNodes(SDNode *N);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
@@ -233,18 +236,17 @@ namespace {
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
- SDValue visitSREM(SDNode *N);
- SDValue visitUREM(SDNode *N);
+ SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
- SDValue visitSDIVREM(SDNode *N);
- SDValue visitUDIVREM(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
@@ -265,6 +267,7 @@ namespace {
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCE(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
@@ -298,6 +301,10 @@ namespace {
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
+
+ SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+ SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+
SDValue visitSTORE(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
@@ -312,9 +319,11 @@ namespace {
SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
+ SDValue visitFMULForFMACombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -338,14 +347,17 @@ namespace {
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
+ SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
- SDValue BuildReciprocalEstimate(SDValue Op);
- SDValue BuildRsqrtEstimate(SDValue Op);
- SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
- SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
+ SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
+ SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -374,6 +386,10 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Do FindBetterChain for a store and any possibly adjacent stores on
+ /// consecutive chains.
+ bool findBetterNeighborChains(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -388,19 +404,37 @@ namespace {
unsigned SequenceNum;
};
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode);
+
/// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
/// constant build_vector of the stored constant values in Stores.
SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const;
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended. LoadedVT returns the type
+ /// of the original loaded value. NarrowLoad returns whether the load would
+ /// need to be narrowed in order to match.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
- EVT MemVT, unsigned NumElem,
+ EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
@@ -409,7 +443,7 @@ namespace {
void getStoreMergeAndAliasCandidates(
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
-
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -427,9 +461,7 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- auto *F = DAG.getMachineFunction().getFunction();
- ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
- F->hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
}
/// Runs the dag combiner on all nodes in the work list
@@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+ const SDNodeFlags *Flags = Op.getNode()->getFlags();
+
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
@@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
- Op.getOperand(0));
+ Op.getOperand(0), Flags);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(Options.UnsafeFPMath);
@@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0));
+ Op.getOperand(1), Op.getOperand(0), Flags);
case ISD::FMUL:
case ISD::FDIV:
@@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, Depth+1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
@@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I)
- AddToWorklist(I);
+ for (SDNode &Node : DAG.allnodes())
+ AddToWorklist(&Node);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
- case ISD::SREM: return visitSREM(N);
- case ISD::UREM: return visitUREM(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
- case ISD::SDIVREM: return visitSDIVREM(N);
- case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
@@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCE: return visitSETCCE(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
@@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
+ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
}
return SDValue();
}
@@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = {N1, N0};
- SDNode *CSENode;
- if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
- &BinNode->Flags);
- } else {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
- }
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static bool isNullConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool isNullFPConstant(SDValue V) {
- ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
- return Const != nullptr && Const->isZero() && !Const->isNegative();
-}
-
-static bool isAllOnesConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
-}
-
-static bool isOneConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isOne();
-}
-
/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
/// ContantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
@@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
- if (VT.isInteger() && !VT.isVector()) {
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
-
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
-
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
- if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
- }
- }
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
- if (N0 == N1) {
- SDLoc DL(N);
+ if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::CARRY_FALSE, DL,
- MVT::Glue));
- }
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (isNullConstant(N1))
- return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (isAllOnesConstant(N0))
- return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
return SDValue();
}
@@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
- (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT,
- N0.getOperand(1), N1));
+ if (isConstantIntBuildVectorOrConstantInt(N1) &&
+ N0.getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+ N0.getOperand(1), N1));
// reassociate mul
if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
@@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return SDValue();
}
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ EVT VT = Node->getValueType(0);
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->isOne())
return N0;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue()) {
- SDLoc DL(N);
+ if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
- }
+
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
- N0, N1);
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
@@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
!cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
(N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
- // If dividing by powers of two is cheap, then don't perform the following
- // fold.
- if (TLI.isPow2SDivCheap())
- return SDValue();
-
// Target-specific implementation of sdiv x, pow2.
- SDValue Res = BuildSDIVPow2(N);
- if (Res.getNode())
+ if (SDValue Res = BuildSDIVPow2(N))
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
- SDLoc DL(N);
// Splat the sign bit into the register
SDValue SGN =
@@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
}
// If integer divide is expensive and we satisfy the requirements, emit an
- // alternate sequence.
- if (N1C && !TLI.isIntDivCheap()) {
- SDValue Op = BuildSDIV(N);
- if (Op.getNode()) return Op;
- }
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildSDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
+ if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, DL, VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
- }
+
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
- SDLoc DL(N);
SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
@@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
}
}
+
// fold (udiv x, c) -> alternate
- if (N1C && !TLI.isIntDivCheap()) {
- SDValue Op = BuildUDIV(N);
- if (Op.getNode()) return Op;
- }
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildUDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSREM(SDNode *N) {
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
- // fold (srem c1, c2) -> c1%c2
+ // fold (rem c1, c2) -> c1%c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
- N0C, N1C))
+ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
- // If we know the sign bits of both operands are zero, strength reduce to a
- // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
- }
- // If X/C can be simplified by the division-by-constant logic, lower
- // X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
- AddToWorklist(Div.getNode());
- SDValue OptimizedDiv = combine(Div.getNode());
- if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorklist(Mul.getNode());
- return Sub;
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
}
- }
-
- // undef % X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
- // X % undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
- return N1;
-
- return SDValue();
-}
-
-SDValue DAGCombiner::visitUREM(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
-
- // fold (urem c1, c2) -> c1%c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
- N0C, N1C))
- return Folded;
- // fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
- }
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
- if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1,
+ } else {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ N1C->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
}
}
}
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
+ // To avoid mangling nodes, this simplification requires that the combine()
+ // call for the speculative DIV must not cause a DIVREM conversion. We guard
+ // against this by skipping the simplification if isIntDivCheap(). When
+ // div is not cheap, combine will not return a DIVREM. Regardless,
+ // checking cheapness here makes sense since the simplification results in
+ // fatter code.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
+ assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
+ (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(Mul.getNode());
return Sub;
}
}
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
}
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
- if (Res.getNode()) return Res;
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+ return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
}
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
- if (Res.getNode()) return Res;
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+ return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
- if (Res.getNode()) return Res;
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
- return SDValue();
-}
+ // fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
-SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
- if (Res.getNode()) return Res;
+ // canonicalize constant to RHS
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
return SDValue();
}
@@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getSimpleValueType())))))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
}
}
@@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
return SDValue();
}
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad) {
+ uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
+
+ if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ return false;
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ NarrowLoad = false;
+ return true;
+ }
+
+ // Do not change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ NarrowLoad = true;
+ return true;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
: cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
- uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
- if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT LoadedVT = LN0->getMemoryVT();
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- if (ExtVT == LoadedVT &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
-
+ auto NarrowLoad = false;
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad)) {
+ if (!NarrowLoad) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
@@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AddToWorklist(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
-
- // Do not change the width of a volatile load.
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
+ } else {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
@@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return Combined;
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
@@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getValueType())))))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
}
}
@@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return Combined;
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
- SDValue BSwap = MatchBSwapHWord(N, N0, N1);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
- BSwap = MatchBSwapHWordLow(N, N0, N1);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
return BSwap;
// reassociate or
@@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
- if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
Op = Op.getOperand(0);
} else {
@@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
}
// Return true if we can prove that, whenever Neg and Pos are both in the
-// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
//
// (or (shift1 X, Neg), (shift2 X, Pos))
//
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
-// in direction shift1 by Neg. The range [0, OpSize) means that we only need
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
- // If OpSize is a power of 2 then:
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+ // If EltSize is a power of 2 then:
//
- // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
- // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
//
- // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
// for the stronger condition:
//
- // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
//
- // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
// we can just replace Neg with Neg' for the rest of the function.
//
// In other cases we check for the even stronger condition:
//
- // Neg == OpSize - Pos [B]
+ // Neg == EltSize - Pos [B]
//
// for all Neg and Pos. Note that the (or ...) then invokes undefined
- // behavior if Pos == 0 (and consequently Neg == OpSize).
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
//
- // We could actually use [A] whenever OpSize is a power of 2, but the
+ // We could actually use [A] whenever EltSize is a power of 2, but the
// only extra cases that it would match are those uninteresting ones
// where Neg and Pos are never in range at the same time. E.g. for
- // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
// as well as (sub 32, Pos), but:
//
// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
//
// always invokes undefined behavior for 32-bit X.
//
- // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND &&
- isPowerOf2_64(OpSize) &&
- Neg.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
- Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(OpSize);
+ if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+ if (NegC->getAPIntValue() == EltSize - 1) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Log2_64(EltSize);
+ }
+ }
}
// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
if (Neg.getOpcode() != ISD::SUB)
- return 0;
- ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
if (!NegC)
- return 0;
+ return false;
SDValue NegOp1 = Neg.getOperand(1);
- // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits &&
- Pos.getOpcode() == ISD::AND &&
- Pos.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
- Pos = Pos.getOperand(0);
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND)
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ if (PosC->getAPIntValue() == EltSize - 1)
+ Pos = Pos.getOperand(0);
// The condition we need is now:
//
- // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
//
// If NegOp1 == Pos then we need:
//
- // OpSize & Mask == NegC & Mask
+ // EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
APInt Width;
if (Pos == NegOp1)
Width = NegC->getAPIntValue();
+
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
// Then the condition we want to prove becomes:
//
- // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
//
// which, again because "x & Mask" is a truncation, becomes:
//
- // NegC & Mask == (OpSize - PosC) & Mask
- // OpSize & Mask == (NegC + PosC) & Mask
- else if (Pos.getOpcode() == ISD::ADD &&
- Pos.getOperand(0) == NegOp1 &&
- Pos.getOperand(1).getOpcode() == ISD::Constant)
- Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
- NegC->getAPIntValue());
- else
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
return false;
- // Now we just need to check that OpSize & Mask == Width & Mask.
+ // Now we just need to check that EltSize & Mask == Width & Mask.
if (MaskLoBits)
- // Opsize & Mask is 0 since Mask is Opsize - 1.
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
return Width.getLoBits(MaskLoBits) == 0;
- return Width == OpSize;
+ return Width == EltSize;
}
// A subroutine of MatchRotate used once we have found an OR of two opposite
@@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
@@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);
- std::swap(LHSMask , RHSMask );
+ std::swap(LHSMask, RHSMask);
}
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);
@@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (LHSShiftAmt.getOpcode() == ISD::Constant &&
- RHSShiftAmt.getOpcode() == ISD::Constant) {
- uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
- uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
- if ((LShVal + RShVal) != OpSizeInBits)
+ if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
+ uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != EltSizeInBits)
return nullptr;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
@@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+ APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
+ SDValue Mask = DAG.getConstant(AllBits, DL, VT);
if (LHSMask.getNode()) {
- APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
- Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask,
+ DAG.getConstant(RHSBits, DL, VT)));
}
if (RHSMask.getNode()) {
- APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
- Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask,
+ DAG.getConstant(LHSBits, DL, VT)));
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
return Rot.getNode();
@@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// Simplify the expression using non-local knowledge.
if (!VT.isVector() &&
@@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
}
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSHL = visitShiftByConstant(N, N1C);
- if (NewSHL.getNode())
- return NewSHL;
+ // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+ if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ if (SDValue Folded =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
+ }
}
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ return NewSHL;
+
return SDValue();
}
@@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSRA = visitShiftByConstant(N, N1C);
- if (NewSRA.getNode())
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRA = visitShiftByConstant(N, N1C))
return NewSRA;
- }
return SDValue();
}
@@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
- SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
- if (NewOp1.getNode())
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
@@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSRL = visitShiftByConstant(N, N1C);
- if (NewSRL.getNode())
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRL = visitShiftByConstant(N, N1C))
return NewSRL;
- }
// Attempt to convert a srl of a load into a narrower zero-extending load.
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// Here is a common situation. We want to optimize:
@@ -4973,70 +5087,47 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // fold selects based on a setcc into other things, such as min/max/abs
- if (N0.getOpcode() == ISD::SETCC) {
- // select x, y (fcmp lt x, y) -> fminnum x, y
- // select x, y (fcmp gt x, y) -> fmaxnum x, y
- //
- // This is OK if we don't care about what happens if either operand is a
- // NaN.
- //
-
- // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
- // no signed zeros as well as no nans.
- const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath &&
- VT.isFloatingPoint() && N0.hasOneUse() &&
- DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-
- SDValue FMinMax =
- combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
- N1, N2, CC, TLI, DAG);
- if (FMinMax)
- return FMinMax;
- }
-
- if ((!LegalOperations &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- N1, N2, N0.getOperand(2));
- return SimplifySelect(SDLoc(N), N0, N1, N2);
- }
-
if (VT0 == MVT::i1) {
- if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
- // select (and Cond0, Cond1), X, Y
- // -> select Cond0, (select Cond1, X, Y), Y
- if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
- SDValue Cond0 = N0->getOperand(0);
- SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its prefered form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence
+ = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
InnerSelect, N2);
- }
- // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
- if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
- SDValue Cond0 = N0->getOperand(0);
- SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
InnerSelect);
- }
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
- if (N1->getOpcode() == ISD::SELECT) {
+ if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
SDValue N1_0 = N1->getOperand(0);
SDValue N1_1 = N1->getOperand(1);
SDValue N1_2 = N1->getOperand(2);
if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
- if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
N0, N1_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
@@ -5049,13 +5140,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
- if (N2->getOpcode() == ISD::SELECT) {
+ if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
SDValue N2_0 = N2->getOperand(0);
SDValue N2_1 = N2->getOperand(1);
SDValue N2_2 = N2->getOperand(2);
if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
- if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
N0, N2_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
@@ -5069,6 +5160,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // select x, y (fcmp lt x, y) -> fminnum x, y
+ // select x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+
+ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
+ // no signed zeros as well as no nans.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.UnsafeFPMath &&
+ VT.isFloatingPoint() && N0.hasOneUse() &&
+ DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1), N1, N2, CC,
+ TLI, DAG))
+ return FMinMax;
+ }
+
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(SDLoc(N), N0, N1, N2);
+ }
+
return SDValue();
}
@@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
N2.getOpcode() == ISD::CONCAT_VECTORS &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SDValue CV = ConvertSelectToConcatVector(N, DAG);
- if (CV.getNode())
+ if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
return CV;
}
@@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
SDLoc(N));
}
-/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
+SDValue DAGCombiner::visitSETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (Carry.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
@@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (!VT.isVector()) {
EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
@@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// fold (zext (truncate x)) -> (and x, mask)
- if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
-
+ if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
- SDValue Op = N0.getOperand(0);
- if (Op.getValueType().bitsLT(VT)) {
- Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- } else if (Op.getValueType().bitsGT(VT)) {
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT)) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ AddToWorklist(Op.getNode());
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ }
+ }
+
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = N0.getOperand(0);
+ if (SrcVT.bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ } else if (SrcVT.bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
}
- return DAG.getZeroExtendInReg(Op, SDLoc(N),
- N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
@@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
- SetCCs, TLI);
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ auto NarrowLoad = false;
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad))
+ DoXform = false;
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ }
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
@@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
// Watch out for shift count overflow though.
if (Amt >= Mask.getBitWidth()) break;
APInt NewMask = Mask << Amt;
- SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
- if (SimplifyLHS.getNode())
+ if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
@@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (sext_in_reg c1) -> c1
- if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
@@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (load x)) -> (smaller sextload x)
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
BSwap, N1);
}
- // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
- // into a build_vector.
- if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SmallVector<SDValue, 8> Elts;
- unsigned NumElts = N0->getNumOperands();
- unsigned ShAmt = VTBits - EVTBits;
-
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->getOpcode() == ISD::UNDEF) {
- Elts.push_back(Op);
- continue;
- }
-
- ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
- Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- SDLoc(Op), Op.getValueType()));
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
- }
-
return SDValue();
}
@@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
- SDValue Reduced = ReduceLoadWidth(N);
- if (Reduced.getNode())
+ if (SDValue Reduced = ReduceLoadWidth(N))
return Reduced;
+
// Handle the case where the load remains an extending load even
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
@@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
return SDValue();
}
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
@@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
@@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
@@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(X.getNode());
}
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
+ N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
+ N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
@@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
- if (N0.getOpcode() == ISD::BUILD_PAIR) {
- SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
- if (CombineLD.getNode())
+ if (N0.getOpcode() == ISD::BUILD_PAIR)
+ if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
return CombineLD;
- }
// Remove double bitcasts from shuffles - this is often a legacy of
// XformToShuffleWithZero being used to combine bitmaskings (of
@@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
// If operands are a bitcast, peek through if it casts the original VT.
- // If operands are a UNDEF or constant, just bitcast back to original VT.
+ // If operands are a constant, just bitcast back to original VT.
auto PeekThroughBitcast = [&](SDValue Op) {
if (Op.getOpcode() == ISD::BITCAST &&
- Op.getOperand(0)->getValueType(0) == VT)
+ Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
@@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath);
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations &&
- TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
- bool HasFMA = ((!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- UnsafeFPMath);
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
- unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && N0.getOpcode() == ISD::FMUL &&
+ N1.getOpcode() == ISD::FMUL) {
+ if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ std::swap(N0, N1);
+ }
+
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
@@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ if ((AllowFusion || HasFMAD) && Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
@@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath);
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations &&
- TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
- bool HasFMA = ((!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- UnsafeFPMath);
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
- unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
@@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
@@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ if ((AllowFusion || HasFMAD) && Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
@@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
@@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
}
+/// Try to perform FMA combining on a given FMUL node.
+SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
+ // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFADD(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFADD(N1, N0))
+ return FMA;
+
+ // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
+ // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
+ // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+ auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
+ if (XC0 && XC0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y);
+ if (XC0 && XC0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFSUB(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFSUB(N1, N0))
+ return FMA;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations));
+ GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+ DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+ Flags),
+ Flags);
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}
if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT));
+ N1, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT));
+ N0, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
@@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT), Flags);
}
}
} // enable-unsafe-fp-math
// FADD -> FMA combines:
- SDValue Fused = visitFADDForFMACombine(N);
- if (Fused) {
+ if (SDValue Fused = visitFADDForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// FSUB -> FMA combines:
- SDValue Fused = visitFSUBForFMACombine(N);
- if (Fused) {
+ if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector()) {
@@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (isConstantFPBuildVectorOrConstantFP(N0) &&
!isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// the second operand of the outer multiply are constants.
if ((N1CFP && isConstOrConstSplatFP(N01)) ||
(BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
}
}
}
@@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
// during an early run of DAGCombiner can prevent folding with fmuls
// inserted during lowering.
- if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+ if (N0.getOpcode() == ISD::FADD &&
+ (N0.getOperand(0) == N0.getOperand(1)) &&
+ N0.hasOneUse()) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
+ // FMUL -> FMA combines:
+ if (SDValue Fused = visitFMULForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
return SDValue();
}
@@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N1CFP->isZero())
return N2;
}
+ // TODO: The FMA node should have flags that propagate to these nodes.
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (N0CFP && !N1CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FMUL &&
- N0 == N2.getOperand(0) &&
- N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
- }
+ // TODO: FMA nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+ if (Options.UnsafeFPMath) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ &Flags), &Flags);
+ }
- // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == ISD::FMUL && N1CFP &&
- N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
- N2);
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (N0.getOpcode() == ISD::FMUL &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ &Flags),
+ N2);
+ }
}
// (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
AddToWorklist(RHSNeg.getNode());
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
- // (fma x, c, x) -> (fmul x, (c+1))
- if (Options.UnsafeFPMath && N1CFP && N0 == N2)
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT)));
-
- // (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
+ if (Options.UnsafeFPMath) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT)));
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ }
return SDValue();
}
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags *Flags = N->getFlags();
+ if (!UnsafeMath && !Flags->hasAllowReciprocal())
+ return SDValue();
+
+ // Skip if current node is a reciprocal.
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
+ SDValue N1 = N->getOperand(1);
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+ if (!MinUses || N1->use_size() < MinUses)
+ return SDValue();
+
+ // Find all FDIV users of the same divisor.
+ // Use a set because duplicates may be present in the user list.
+ SetVector<SDNode *> Users;
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
+
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if (Users.size() < MinUses)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal, Flags);
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
+ }
+ }
+ return SDValue(N, 0); // N was replaced.
+}
+
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
@@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT));
+ DAG.getConstantFP(Recip, DL, VT), Flags);
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SqrtOp.getNode()) {
// We found a FSQRT, so try to make this fold:
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
- RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1)) {
+ if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
@@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
- // Combine multiple FDIVs with the same divisor into multiple FMULs by the
- // reciprocal.
- // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
- // Notice that this is not always beneficial. One reason is different target
- // may have different costs for FDIV and FMUL, so sometimes the cost of two
- // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
- // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
- if (Options.UnsafeFPMath) {
- // Skip if current node is a reciprocal.
- if (N0CFP && N0CFP->isExactlyValue(1.0))
- return SDValue();
-
- // Find all FDIV users of the same divisor.
- // Use a set because duplicates may be present in the user list.
- SetVector<SDNode *> Users;
- for (auto *U : N1->uses())
- if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
- Users.insert(U);
-
- if (TLI.combineRepeatedFPDivisors(Users.size())) {
- SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // FIXME: This optimization requires some level of fast-math, so the
- // created reciprocal node should at least have the 'allowReciprocal'
- // fast-math-flag set.
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
-
- // Dividend / Divisor -> Dividend * Reciprocal
- for (auto *U : Users) {
- SDValue Dividend = U->getOperand(0);
- if (Dividend != FPOne) {
- SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
- Reciprocal);
- CombineTo(U, NewNode);
- } else if (U != Reciprocal.getNode()) {
- // In the absence of fast-math-flags, this user node is always the
- // same node as Reciprocal, but with FMF they may be different nodes.
- CombineTo(U, Reciprocal);
- }
- }
- return SDValue(N, 0); // N was replaced.
- }
- }
+ if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+ return CombineRepeatedDivisors;
return SDValue();
}
@@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+ &cast<BinaryWithFlagsSDNode>(N)->Flags);
return SDValue();
}
@@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
return SDValue();
+ // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
- SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+ SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
if (!RV)
return SDValue();
-
+
EVT VT = RV.getValueType();
SDLoc DL(N);
- RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
AddToWorklist(RV.getNode());
// Unfortunately, RV is now NaN if the input was exactly 0.
// Select out this case and force the answer to 0.
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ EVT CCVT = getSetCCResultType(VT);
SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
@@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
ZeroCmp, Zero, RV);
}
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ // Do not optimize out type conversion of f128 type yet.
+ // For some target like x86_64, configuration is changed
+ // to keep one f128 value in one SSE register, but
+ // instruction selection cannot handle FCOPYSIGN on
+ // SSE registers yet.
+ SDValue N1 = N->getOperand(1);
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND) &&
+ (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+}
+
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
- if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0, N1.getOperand(0));
@@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
- TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+ (TLI.isFPImmLegal(CVal, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N0.getOperand(1)),
+ &cast<BinaryWithFlagsSDNode>(N0)->Flags);
}
}
@@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
@@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
@@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Op1 = TheXor->getOperand(1);
if (Op0.getOpcode() == Op1.getOpcode()) {
// Avoid missing important xor optimizations.
- SDValue Tmp = visitXOR(TheXor);
- if (Tmp.getNode()) {
+ if (SDValue Tmp = visitXOR(TheXor)) {
if (Tmp.getNode() != TheXor) {
DEBUG(dbgs() << "\nReplacing.8 ";
TheXor->dump(&DAG);
@@ -9722,8 +10089,8 @@ struct LoadedSlice {
void addSliceGain(const LoadedSlice &LS) {
// Each slice saves a truncate.
const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
- if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
- LS.Inst->getOperand(0).getValueType()))
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+ LS.Inst->getValueType(0)))
++Truncates;
// If there is a shift amount, this slice gets rid of it.
if (LS.Shift)
@@ -10625,30 +10992,109 @@ struct BaseIndexOffset {
};
} // namespace
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse())
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const {
SmallVector<SDValue, 8> BuildVector;
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
- BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
+ Chains.push_back(St->getChain());
+ BuildVector.push_back(St->getValue());
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ unsigned NumStores, bool IsConstantSrc, bool UseVector) {
// Make sure we have something to merge.
- if (NumElem < 2)
+ if (NumStores < 2)
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned LatestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
+ for (unsigned i=0; i < NumStores; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
@@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
LatestNodeUsed = i;
}
+ SmallVector<SDValue, 8> Chains;
+
// The latest Node in the DAG.
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
if (UseVector) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ bool IsVec = MemVT.isVector();
+ unsigned Elts = NumStores;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ // Get the type for the merged vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
+ StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
} else {
SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = St->getValue();
- // All of the operands of a BUILD_VECTOR must have the same type.
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
+ Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
- StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
- }
+ StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
+ DL, Ty, Ops); }
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
APInt StoreInt(SizeInBits, 0);
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = DAG.getDataLayout().isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ for (unsigned i = 0; i < NumStores; ++i) {
+ unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ Chains.push_back(St->getChain());
+
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
@@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+ assert(!Chains.empty());
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
@@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// Replace the last store with the new store
CombineTo(LatestOp, NewStore);
// Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
return true;
}
-static bool allowableAlignment(const SelectionDAG &DAG,
- const TargetLowering &TLI, EVT EVTTy,
- unsigned AS, unsigned Align) {
- if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
- return true;
-
- Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
- return (Align >= ABIAlignment);
-}
-
void DAGCombiner::getStoreMergeAndAliasCandidates(
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
@@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
EVT MemVT = St->getMemoryVT();
unsigned Seq = 0;
StoreSDNode *Index = St;
+
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+
+ if (UseAA) {
+ // Look at other users of the same chain. Stores on the same chain do not
+ // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
+ // to be on the same chain, so don't bother looking at adjacent chains.
+
+ SDValue Chain = St->getChain();
+ for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ if (I.getOperandNo() != 0)
+ continue;
+
+ if (OtherST->isVolatile() || OtherST->isIndexed())
+ continue;
+
+ if (OtherST->getMemoryVT() != MemVT)
+ continue;
+
+ BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr());
+
+ if (Ptr.equalBaseIndex(BasePtr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
+ }
+ }
+
+ return;
+ }
+
while (Index) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (Index != St && !SDValue(Index, 0)->hasOneUse())
@@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (Index->getMemoryVT() != MemVT)
break;
+ // We do not allow under-aligned stores in order to prevent
+ // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+ // be irrelevant here; what MATTERS is that we not move memory
+ // operations that potentially overlap past each-other.
+ if (Index->getAlignment() < MemVT.getStoreSize())
+ break;
+
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
@@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
return false;
- // Don't merge vectors into wider inputs.
- if (MemVT.isVector() || !MemVT.isSimple())
+ if (!MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
@@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
- bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+ return false;
+
+ // Don't merge vectors into wider vectors if the source data comes from loads.
+ // TODO: This restriction can be lifted by using logic similar to the
+ // ExtractVecSrc case.
+ if (MemVT.isVector() && IsLoadSrc)
return false;
// Only look at ends of store sequences.
@@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// We need to make sure that these nodes do not interfere with
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
+
// Save the StoreSDNodes that we find in the chain.
SmallVector<MemOpLink, 8> StoreNodes;
getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
-
+
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // Sort the memory operands according to their distance from the base pointer.
+ // Sort the memory operands according to their distance from the
+ // base pointer. As a secondary criteria: make sure stores coming
+ // later in the code come first in the list. This is important for
+ // the non-UseAA case, because we're merging stores into the FINAL
+ // store along a chain which potentially contains aliasing stores.
+ // Thus, if there are multiple stores to the same address, the last
+ // one can be considered for merging but not the others.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
return LHS.OffsetFromBase < RHS.OffsetFromBase ||
(LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum > RHS.SequenceNum);
+ LHS.SequenceNum < RHS.SequenceNum);
});
// Scan the memory operations on the chain and find the first non-consecutive
@@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
}
- bool Alias = false;
// Check if this store interferes with any of the loads that we found.
- for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
- if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
- Alias = true;
- break;
- }
- // We found a load that alias with this store. Stop the sequence.
- if (Alias)
+ // If we find a load that alias with this store. Stop the sequence.
+ if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
+ [&](LSBaseSDNode* Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
break;
// Mark this node as useful.
@@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
@@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find a legal type for the constant store.
unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast;
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
- FirstStoreAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast) {
LastLegalType = i+1;
// Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
- FirstStoreAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
LastLegalType = i + 1;
}
}
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
+ FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ LastLegalVectorType = i + 1;
}
}
-
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if (NoVectors) {
- LastLegalVectorType = 0;
- } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
- LastLegalVectorType,
- FirstStoreAS)) {
- LastLegalVectorType = 0;
- }
-
// Check if we found a legal integer type to store.
if (LastLegalType == 0 && LastLegalVectorType == 0)
return false;
@@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecEltSrc) {
- unsigned NumElem = 0;
+ if (IsExtractVecSrc) {
+ unsigned NumStoresToMerge = 0;
+ bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
+ unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
// Bail out if any stored values are not elements extracted from a vector.
// It should be possible to handle mixed sources, but load sources need
// more careful handling (see the block of code below that handles
// consecutive loads).
- if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
return false;
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
if (TLI.isTypeLegal(Ty) &&
- allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
- NumElem = i + 1;
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ NumStoresToMerge = i + 1;
}
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
false, true);
}
@@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads much share the same chain.
+ // All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
@@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
-
// Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
+ bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd) {
LastLegalVectorType = i + 1;
}
// Find a legal type for the integer store.
unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd)
LastLegalIntegerType = i + 1;
// Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+ TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
- FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
- FirstLoadAlign))
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ IsFastLd)
LastLegalIntegerType = i+1;
}
}
@@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (NumElem < 2)
return false;
+ // Collect the chains from all merged stores.
+ SmallVector<SDValue, 8> MergeStoreChains;
+ MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+
// The latest Node in the DAG.
unsigned LatestNodeUsed = 0;
for (unsigned i=1; i<NumElem; ++i) {
@@ -11147,6 +11663,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// latest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
LatestNodeUsed = i;
+
+ MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
}
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
@@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
SDValue NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+ SDValue NewStoreChain =
+ DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+
SDValue NewStore = DAG.getStore(
- LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
- // Replace one of the loads with the new load.
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
-
- // Remove the rest of the load chains.
- for (unsigned i = 1; i < NumElem ; ++i) {
- // Replace all chain users of the old load nodes with the chain of the new
- // load node.
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
}
// Replace the last store with the new store.
@@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return true;
}
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+ SDLoc SL(ST);
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+ ST->getMemOperand());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+ MVT::Other, ST->getChain(), ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+ SDValue Value = ST->getValue();
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ SDLoc DL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+
+ SDValue Tmp;
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ return SDValue();
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
+ return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+ }
+
+ return SDValue();
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, DL, Tmp,
+ Ptr, ST->getMemOperand());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ SDValue St0 = DAG.getStore(Chain, DL, Lo,
+ Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal,
+ ST->getAlignment(), AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
+ Alignment, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ St0, St1);
+ }
+
+ return SDValue();
+ }
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
return Chain;
- // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
- // NOTE: If the original store is volatile, this transform must not increase
- // the number of stores. For example, on x86-32 an f64 can be stored in one
- // processor operation but an i64 (which is not legal) requires two. So the
- // transform should not be done in this case.
- if (Value.getOpcode() != ISD::TargetConstantFP) {
- SDValue Tmp;
- switch (CFP->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unknown FP type");
- case MVT::f16: // We don't do this for these yet.
- case MVT::f80:
- case MVT::f128:
- case MVT::ppcf128:
- break;
- case MVT::f32:
- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- ;
- Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue(), SDLoc(CFP),
- MVT::i32);
- return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getMemOperand());
- }
- break;
- case MVT::f64:
- if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
- !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
- ;
- Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
- getZExtValue(), SDLoc(CFP), MVT::i64);
- return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getMemOperand());
- }
-
- if (!ST->isVolatile() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- // Many FP stores are not made apparent until after legalize, e.g. for
- // argument passing. Since this is so common, custom legalize the
- // 64-bit integer store into two 32-bit stores.
- uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
- SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Lo, Hi);
-
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
-
- SDLoc DL(N);
-
- SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
- Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal,
- ST->getAlignment(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, DL, Ptr.getValueType()));
- Alignment = MinAlign(Alignment, 4U);
- SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
- Ptr, ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal,
- Alignment, AAInfo);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- St0, St1);
- }
-
- break;
- }
- }
- }
-
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
@@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try transforming a pair floating point load / store ops to integer
// load / store ops.
- SDValue NewST = TransformFPLoadStorePair(N);
- if (NewST.getNode())
+ if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
@@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
UseAA = false;
#endif
if (UseAA && ST->isUnindexed()) {
- // Walk up chain skipping non-aliasing memory nodes.
- SDValue BetterChain = FindBetterChain(N, Chain);
-
- // If there is a better chain.
- if (Chain != BetterChain) {
- SDValue ReplStore;
-
- // Replace the chain to avoid dependency.
- if (ST->isTruncatingStore()) {
- ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getMemoryVT(), ST->getMemOperand());
- } else {
- ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getMemOperand());
- }
+ // FIXME: We should do this even without AA enabled. AA will just allow
+ // FindBetterChain to work in more situations. The problem with this is that
+ // any combine that expects memory operations to be on consecutive chains
+ // first needs to be updated to look for users of the same chain.
- // Create token to keep both nodes around.
- SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
- MVT::Other, Chain, ReplStore);
-
- // Make sure the new and old chains are cleaned up.
- AddToWorklist(Token.getNode());
-
- // Don't add users to work list.
- return CombineTo(N, Token, false);
+ // Walk up chain skipping non-aliasing memory nodes, on this store and any
+ // adjacent stores.
+ if (findBetterNeighborChains(ST)) {
+ // replaceStoreChain uses CombineTo, which handled all of the worklist
+ // manipulation. Return the original node to not do anything else.
+ return SDValue(ST, 0);
}
}
@@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return SDValue(N, 0);
}
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ //
+ // Make sure to do this only after attempting to merge stores in order to
+ // avoid changing the types of some subset of stores due to visit order,
+ // preventing their merging.
+ if (isa<ConstantFPSDNode>(Value)) {
+ if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+ return NewSt;
+ }
+
return ReduceLoadOpStoreWidth(N);
}
@@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue EltNo = N->getOperand(1);
- bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (ConstEltNo &&
+ InVec.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VT) &&
+ (InVec.hasOneUse() ||
+ TLI.aggressivelyPreferBuildVectorSources(VT))) {
+ SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ EVT InEltVT = Elt.getValueType();
+
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (NVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector implicitly
+ // converts.
+ }
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
@@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
- if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
- && ConstEltNo) {
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
// Find the new index to extract from.
- int OrigElt = SVOp->getMaskElt(Elt);
+ int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
@@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
}
-SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
- // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
- // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
- // inputs come from at most two distinct vectors, turn this into a shuffle
- // node.
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ int NumElts = VT.getVectorNumElements();
+ int NumOpElts = OpVT.getVectorNumElements();
+
+ SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask;
+
+ for (SDValue Op : N->ops()) {
+ // Peek through any bitcast.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (Op.getOpcode() == ISD::UNDEF) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // What vector are we extracting the subvector from and at what index?
+ SDValue ExtVec = Op.getOperand(0);
+
+ // We want the EVT of the original extraction to correctly scale the
+ // extraction index.
+ EVT ExtVT = ExtVec.getValueType();
+
+ // Peek through any bitcast.
+ while (ExtVec.getOpcode() == ISD::BITCAST)
+ ExtVec = ExtVec.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (ExtVec.getOpcode() == ISD::UNDEF) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // Ensure that we are extracting a subvector from a vector the same
+ // size as the result.
+ if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Scale the subvector index to account for any bitcast.
+ int NumExtElts = ExtVT.getVectorNumElements();
+ if (0 == (NumExtElts % NumElts))
+ ExtIdx /= (NumExtElts / NumElts);
+ else if (0 == (NumElts % NumExtElts))
+ ExtIdx *= (NumElts / NumExtElts);
+ else
+ return SDValue();
+ // At most we can reference 2 inputs in the final shuffle.
+ if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) {
+ SV0 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx);
+ } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) {
+ SV1 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx + NumElts);
+ } else {
+ return SDValue();
+ }
+ }
+
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
return N->getOperand(0);
@@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
SVN->getMask().end(), [](int i) { return i == -1; })) {
N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+ makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
@@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ if (N0->getOpcode() == ISD::AND) {
+ ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+ if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+ N0.getOperand(0));
+ }
+ }
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
- if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ EVT RVT = RHS.getValueType();
+ unsigned NumElts = RHS.getNumOperands();
+
+ // Attempt to create a valid clear mask, splitting the mask into
+ // sub elements and checking to see if each is
+ // all zeros or all ones - suitable for shuffle masking.
+ auto BuildClearMask = [&](int Split) {
+ int NumSubElts = NumElts * Split;
+ int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
SmallVector<int, 8> Indices;
- unsigned NumElts = RHS.getNumOperands();
+ for (int i = 0; i != NumSubElts; ++i) {
+ int EltIdx = i / Split;
+ int SubIdx = i % Split;
+ SDValue Elt = RHS.getOperand(EltIdx);
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ Indices.push_back(-1);
+ continue;
+ }
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Elt = RHS.getOperand(i);
- if (isAllOnesConstant(Elt))
+ APInt Bits;
+ if (isa<ConstantSDNode>(Elt))
+ Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+ else if (isa<ConstantFPSDNode>(Elt))
+ Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ // Extract the sub element from the constant bit mask.
+ if (DAG.getDataLayout().isBigEndian()) {
+ Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+ } else {
+ Bits = Bits.lshr(SubIdx * NumSubBits);
+ }
+
+ if (Split > 1)
+ Bits = Bits.trunc(NumSubBits);
+
+ if (Bits.isAllOnesValue())
Indices.push_back(i);
- else if (isNullConstant(Elt))
- Indices.push_back(NumElts+i);
+ else if (Bits == 0)
+ Indices.push_back(i + NumSubElts);
else
return SDValue();
}
// Let's see if the target supports this vector_shuffle.
- EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+ EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+ if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
return SDValue();
- // Return the new VECTOR_SHUFFLE node.
- EVT EltVT = RVT.getVectorElementType();
- SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, dl, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
- LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
- SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
- }
+ SDValue Zero = DAG.getConstant(0, dl, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+ DAG.getBitcast(ClearVT, LHS),
+ Zero, &Indices[0]));
+ };
+
+ // Determine maximum split level (byte level masking).
+ int MaxSplit = 1;
+ if (RVT.getScalarSizeInBits() % 8 == 0)
+ MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+ for (int Split = 1; Split <= MaxSplit; ++Split)
+ if (RVT.getScalarSizeInBits() % Split == 0)
+ if (SDValue S = BuildClearMask(Split))
+ return S;
return SDValue();
}
@@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ SDValue Ops[] = {LHS, RHS};
+ // See if we can constant fold the vector operation.
+ if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+ N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ return Fold;
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
- // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
- // this operation.
- if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
- RHS.getOpcode() == ISD::BUILD_VECTOR) {
- // Check if both vectors are constants. If not bail out.
- if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
- cast<BuildVectorSDNode>(RHS)->isConstant()))
- return SDValue();
-
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- SDValue LHSOp = LHS.getOperand(i);
- SDValue RHSOp = RHS.getOperand(i);
-
- // Can't fold divide by zero.
- if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
- N->getOpcode() == ISD::FDIV) {
- if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
- break;
- }
-
- EVT VT = LHSOp.getValueType();
- EVT RVT = RHSOp.getValueType();
- if (RVT != VT) {
- // Integer BUILD_VECTOR operands may have types larger than the element
- // size (e.g., when the element type is not legal). Prior to type
- // legalization, the types may not match between the two BUILD_VECTORS.
- // Truncate one of the operands to make them match.
- if (RVT.getSizeInBits() > VT.getSizeInBits()) {
- RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
- } else {
- LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
- VT = RVT;
- }
- }
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
- LHSOp, RHSOp);
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::Constant &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() == LHS.getNumOperands())
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
- }
-
// Type legalization might introduce new shuffles in the DAG.
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
@@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0));
+ LHS.getOperand(0), RHS.getOperand(0),
+ N->getFlags());
AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
@@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorklist(CPIdx.getNode());
- return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false,
- false, false, Alignment);
+ return DAG.getLoad(
+ TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
}
}
@@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Get a SetCC of the condition
// NOTE: Don't create a SETCC if it's not legal on this target.
if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC,
- LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+ TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
SDValue Temp, SCC;
// cast from setcc result type to select result type
if (LegalTypes) {
@@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
}
}
- // Check to see if this is the equivalent of setcc
- // FIXME: Turn all of these into setcc if setcc if setcc is legal
- // otherwise, go ahead with the folds.
- if (0 && isNullConstant(N3) && isOneConstant(N2)) {
- EVT XType = N0.getValueType();
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
- SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
- if (Res.getValueType() != VT)
- Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
- return Res;
- }
-
- // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
- if (isNullConstant(N1) && CC == ISD::SETEQ &&
- (!LegalOperations ||
- TLI.isOperationLegal(ISD::CTLZ, XType))) {
- SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
- return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
- DAG.getConstant(Log2_32(XType.getSizeInBits()),
- SDLoc(Ctlz),
- getShiftAmountTy(Ctlz.getValueType())));
- }
- // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
- if (isNullConstant(N1) && CC == ISD::SETGT) {
- SDLoc DL(N0);
- SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
- XType, DAG.getConstant(0, DL, XType), N0);
- SDValue NotN0 = DAG.getNOT(DL, N0, XType);
- return DAG.getNode(ISD::SRL, DL, XType,
- DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(XType)));
- }
- // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
- if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
- SDLoc DL(N0);
- SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
- XType));
- }
- }
-
// Check to see if this is an integer abs.
// select_cc setg[te] X, 0, X, -X ->
// select_cc setgt X, -1, X, -X ->
@@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return S;
}
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
}
@@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
AddToWorklist(HalfArg.getNode());
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
@@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+ SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
AddToWorklist(HalfEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
}
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
AddToWorklist(Est.getNode());
if (Iterations) {
Est = UseOneConstNR ?
- BuildRsqrtNROneConst(Op, Est, Iterations) :
- BuildRsqrtNRTwoConst(Op, Est, Iterations);
+ BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
+ BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
}
return Est;
}
@@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SDValue Chain = Chains.pop_back_val();
// For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
- // find more and revert to original chain since the xform is unlikely to be
- // profitable.
+ // chain until we reach the depth limit.
//
// FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
- if (Depth > 6 || Aliases.size() == 2) {
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
Aliases.clear();
Aliases.push_back(OriginalChain);
return;
@@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ return false;
+
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ ChainedStores.push_back(St);
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ break;
+
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (true) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ ChainedStores.push_back(STn);
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = nullptr;
+ break;
+ }
+ }
+ }
+
+ bool MadeChange = false;
+ SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+
+ for (StoreSDNode *ChainedStore : ChainedStores) {
+ SDValue Chain = ChainedStore->getChain();
+ SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+
+ if (Chain != BetterChain) {
+ MadeChange = true;
+ BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
+ }
+ }
+
+ // Do all replacements after finding the replacements to make to avoid making
+ // the chains more complicated by introducing new TokenFactors.
+ for (auto Replacement : BetterChains)
+ replaceStoreChain(Replacement.first, Replacement.second);
+
+ return MadeChange;
+}
+
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
CodeGenOpt::Level OptLevel) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2b9ba2c..cfbb209 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -118,9 +118,9 @@ bool FastISel::lowerArguments() {
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
E = FuncInfo.Fn->arg_end();
I != E; ++I) {
- DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
- FuncInfo.ValueMap[I] = VI->second;
+ FuncInfo.ValueMap[&*I] = VI->second;
}
return true;
}
@@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
// have to worry about calling conventions and target-specific lowering code.
// Instead we perform the call lowering right here.
//
- // CALLSEQ_START(0)
+ // CALLSEQ_START(0...)
// STACKMAP(id, nbytes, ...)
// CALLSEQ_END(0, 0)
//
@@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(0);
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+ Builder.addImm(0);
// Issue STACKMAP.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
return true;
- case Intrinsic::eh_actions: {
- unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
@@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+ MachineInstr *CurLastLocalValue = getLastLocalValue();
+ if (CurLastLocalValue != SavedLastLocalValue) {
+ // Find the first local value instruction to be deleted.
+ // This is the instruction after SavedLastLocalValue if it is non-NULL.
+ // Otherwise it's the first instruction in the block.
+ MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+ if (SavedLastLocalValue)
+ ++FirstDeadInst;
+ else
+ FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+ setLastLocalValue(SavedLastLocalValue);
+ removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+ }
+}
+
bool FastISel::selectInstruction(const Instruction *I) {
+ MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(I))
- if (!handlePHINodesInSuccessorBlocks(I->getParent()))
+ if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+ // PHI node handling may have generated local value instructions,
+ // even though it failed to handle all PHI nodes.
+ // We remove these instructions because SelectionDAGISel will generate
+ // them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
return false;
+ }
DbgLoc = I->getDebugLoc();
@@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
LibInfo->hasOptimizedCodeGen(Func))
return false;
- // Don't handle Intrinsic::trap if a trap funciton is specified.
+ // Don't handle Intrinsic::trap if a trap function is specified.
if (F && F->getIntrinsicID() == Intrinsic::trap &&
Call->hasFnAttr("trap-func-name"))
return false;
@@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
- if (isa<TerminatorInst>(I))
+ if (isa<TerminatorInst>(I)) {
+ // PHI node handling may have generated local value instructions.
+ // We remove them because SelectionDAGISel will generate them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ }
return false;
}
@@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
- MSucc->getBasicBlock());
- FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB) {
+ // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+ // happen in degenerate IR and MachineIR forbids to have a block twice in the
+ // successor/predecessor lists.
+ if (TrueMBB != FalseMBB) {
+ if (FuncInfo.BPI) {
+ auto BranchProbability =
+ FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+ }
+
+ fastEmitBranch(FalseMBB, DbgLoc);
}
/// Emit an FNeg operation.
@@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, const ConstantFP *FPImm) {
+unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
- Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
.addFPImm(FPImm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
.addFPImm(FPImm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
@@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill, unsigned Op1,
- bool Op1IsKill, uint64_t Imm1,
- uint64_t Imm2) {
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- unsigned ResultReg = createResultReg(RC);
- Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
- Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
-
- if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addImm(Imm1)
- .addImm(Imm2);
- else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addImm(Imm1)
- .addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
- }
- return ResultReg;
-}
-
unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
@@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, uint64_t Imm1,
- uint64_t Imm2) {
- unsigned ResultReg = createResultReg(RC);
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addImm(Imm1)
- .addImm(Imm2);
- else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1)
- .addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
- }
- return ResultReg;
-}
-
unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cc306cb..b62bd2b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
MachineModuleInfo &MMI = MF->getMMI();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- // Static allocas can be folded into the initial stack frame adjustment.
- if (AI->isStaticAlloca()) {
+ Type *Ty = AI->getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // Static allocas can be folded into the initial stack frame
+ // adjustment. For targets that don't realign the stack, don't
+ // do this if there is an extra alignment requirement.
+ if (AI->isStaticAlloca() &&
+ (TFI->isStackRealignable() || (Align <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
- Type *Ty = AI->getAllocatedType();
uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
- AI->getAlignment());
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
StaticAllocaMap[AI] =
MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
-
} else {
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(
- AI->getAllocatedType()),
- AI->getAlignment());
- unsigned StackAlign =
- MF->getSubtarget().getFrameLowering()->getStackAlignment();
+ // FIXME: Overaligned static allocas should be grouped into
+ // a single dynamic allocation instead of using a separate
+ // stack allocation for each one.
if (Align <= StackAlign)
Align = 0;
// Inform the Frame Information that we have variable-sized objects.
@@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Look for inline asm that clobbers the SP register.
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(I);
+ ImmutableCallSite CS(&*I);
if (isa<InlineAsm>(CS.getCalledValue())) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo()->setHasVAStart(true);
}
- // If we have a musttail call in a variadic funciton, we need to ensure we
+ // If we have a musttail call in a variadic function, we need to ensure we
// forward implicit register parameters.
if (const auto *CI = dyn_cast<CallInst>(I)) {
if (CI->isMustTailCall() && Fn->isVarArg())
@@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
- if (isUsedOutsideOfDefiningBlock(I))
- if (!isa<AllocaInst>(I) ||
- !StaticAllocaMap.count(cast<AllocaInst>(I)))
- InitializeRegForValue(I);
+ if (isUsedOutsideOfDefiningBlock(&*I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(&*I);
// Collect llvm.dbg.declare information. This is done now instead of
// during the initial isel pass through the IR so that it is done
@@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
// Decide the preferred extend type for a value.
- PreferredExtendType[I] = getPreferredExtendForValue(I);
+ PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
// also creates the initial PHI MachineInstrs, though none of the input
// operands are populated.
for (BB = Fn->begin(); BB != EB; ++BB) {
- MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
- MBBMap[BB] = MBB;
+ // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+ // are really data, and no instructions can live here.
+ if (BB->isEHPad()) {
+ const Instruction *I = BB->getFirstNonPHI();
+ // If this is a non-landingpad EH pad, mark this function as using
+ // funclets.
+ // FIXME: SEH catchpads do not create funclets, so we could avoid setting
+ // this in such cases in order to improve frame layout.
+ if (!isa<LandingPadInst>(I)) {
+ MMI.setHasEHFunclets(true);
+ MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ }
+ if (isa<CatchSwitchInst>(I)) {
+ assert(&*BB->begin() == I &&
+ "WinEHPrepare failed to remove PHIs from imaginary BBs");
+ continue;
+ }
+ if (isa<FuncletPadInst>(I))
+ assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+ }
+
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
+ MBBMap[&*BB] = MBB;
MF->push_back(MBB);
// Transfer the address-taken flag. This is necessary because there could
@@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Mark landing pad blocks.
SmallVector<const LandingPadInst *, 4> LPads;
for (BB = Fn->begin(); BB != EB; ++BB) {
- if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
- MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
- if (BB->isLandingPad())
- LPads.push_back(BB->getLandingPadInst());
+ const Instruction *FNP = BB->getFirstNonPHI();
+ if (BB->isEHPad() && MBBMap.count(&*BB))
+ MBBMap[&*BB]->setIsEHPad();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
+ LPads.push_back(LPI);
}
- // If this is an MSVC EH personality, we need to do a bit more work.
- EHPersonality Personality = EHPersonality::Unknown;
- if (Fn->hasPersonalityFn())
- Personality = classifyEHPersonality(Fn->getPersonalityFn());
- if (!isMSVCEHPersonality(Personality))
+ // If this personality uses funclets, we need to do a bit more work.
+ if (!Fn->hasPersonalityFn())
+ return;
+ EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ if (!isFuncletEHPersonality(Personality))
return;
- if (Personality == EHPersonality::MSVC_Win64SEH ||
- Personality == EHPersonality::MSVC_X86SEH) {
- addSEHHandlersForLPads(LPads);
- }
-
- WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn);
- if (Personality == EHPersonality::MSVC_CXX) {
- const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
- calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo);
- }
-
- // Copy the state numbers to LandingPadInfo for the current function, which
- // could be a handler or the parent. This should happen for 32-bit SEH and
- // C++ EH.
- if (Personality == EHPersonality::MSVC_CXX ||
- Personality == EHPersonality::MSVC_X86SEH) {
- for (const LandingPadInst *LP : LPads) {
- MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]);
- }
- }
-}
-
-void FunctionLoweringInfo::addSEHHandlersForLPads(
- ArrayRef<const LandingPadInst *> LPads) {
- MachineModuleInfo &MMI = MF->getMMI();
-
- // Iterate over all landing pads with llvm.eh.actions calls.
- for (const LandingPadInst *LP : LPads) {
- const IntrinsicInst *ActionsCall =
- dyn_cast<IntrinsicInst>(LP->getNextNode());
- if (!ActionsCall ||
- ActionsCall->getIntrinsicID() != Intrinsic::eh_actions)
- continue;
-
- // Parse the llvm.eh.actions call we found.
- MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- SmallVector<std::unique_ptr<ActionHandler>, 4> Actions;
- parseEHActions(ActionsCall, Actions);
-
- // Iterate EH actions from most to least precedence, which means
- // iterating in reverse.
- for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) {
- ActionHandler *Action = I->get();
- if (auto *CH = dyn_cast<CatchHandler>(Action)) {
- const auto *Filter =
- dyn_cast<Function>(CH->getSelector()->stripPointerCasts());
- assert((Filter || CH->getSelector()->isNullValue()) &&
- "expected function or catch-all");
- const auto *RecoverBA =
- cast<BlockAddress>(CH->getHandlerBlockOrFunc());
- MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA);
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ calculateCatchReturnSuccessorColors(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.CatchObj.Alloca) {
+ assert(StaticAllocaMap.count(H.CatchObj.Alloca));
+ H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca];
} else {
- assert(isa<CleanupHandler>(Action));
- const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc());
- MMI.addSEHCleanupHandler(LPadMBB, Fini);
+ H.CatchObj.FrameIndex = INT_MAX;
}
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
}
}
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
+ }
}
/// clear - Clear out all the function-specific state. This returns this
/// FunctionLoweringInfo to an empty state, ready to be used for a
/// different function.
void FunctionLoweringInfo::clear() {
- assert(CatchInfoFound.size() == CatchInfoLost.size() &&
- "Not all catch info was assigned to a landing pad!");
-
MBBMap.clear();
ValueMap.clear();
StaticAllocaMap.clear();
-#ifndef NDEBUG
- CatchInfoLost.clear();
- CatchInfoFound.clear();
-#endif
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
@@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
return 0;
}
+unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+ const Value *CPI, const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto I = CatchPadExceptionPointers.insert({CPI, 0});
+ unsigned &VReg = I.first->second;
+ if (I.second)
+ VReg = MRI.createVirtualRegister(RC);
+ assert(VReg && "null vreg in exception pointer table!");
+ return VReg;
+}
+
/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
/// being passed to this variadic function, and set the MachineModuleInfo's
/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
@@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
MachineBasicBlock *MBB) {
- MMI.addPersonality(
- MBB,
- cast<Function>(
- I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()));
+ if (const auto *PF = dyn_cast<Function>(
+ I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+ MMI.addPersonality(PF);
if (I.isCleanup())
MMI.addCleanup(MBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 5ec1030..a1e2d41 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = RC;
else if (RC) {
const TargetRegisterClass *ComRC =
- TRI->getCommonSubClass(UseRC, RC);
+ TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fbc8f1e..f46767f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -39,6 +39,10 @@ using namespace llvm;
#define DEBUG_TYPE "legalizedag"
+namespace {
+
+struct FloatSignAsInt;
+
//===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
@@ -51,7 +55,6 @@ using namespace llvm;
/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
/// will attempt merge setcc and brc instructions into brcc's.
///
-namespace {
class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
@@ -130,7 +133,11 @@ private:
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandFCOPYSIGN(SDNode *Node);
+ void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
+ SDValue NewIntValue) const;
+ SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+ SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
SDLoc dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
@@ -138,6 +145,7 @@ private:
SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
SDLoc dl);
+ SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl);
SDValue ExpandBSWAP(SDValue Op, SDLoc dl);
SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl);
@@ -146,10 +154,11 @@ private:
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+ SDValue ExpandConstant(ConstantSDNode *CP);
- std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
-
- void ExpandNode(SDNode *Node);
+ // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+ bool ExpandNode(SDNode *Node);
+ void ConvertNodeToLibcall(SDNode *Node);
void PromoteNode(SDNode *Node);
public:
@@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend) {
- SDValue Result =
- DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
- DAG.getEntryNode(),
- CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, false, Alignment);
+ SDValue Result = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+ false, false, false, Alignment);
return Result;
}
SDValue Result =
- DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false, false, false,
- Alignment);
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
+ return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+ SDLoc dl(CP);
+ EVT VT = CP->getValueType(0);
+ SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Result =
+ DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
return Result;
}
@@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Store the vector.
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- false, false, 0);
+ SDValue Ch = DAG.getStore(
+ DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+ false, 0);
// Truncate or zero extend offset to target pointer type.
- unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
- Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
// Add the offset to the index.
unsigned EltSize = EltVT.getSizeInBits()/8;
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
@@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
- return DAG.getLoad(VT, dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI), false, false,
- false, 0);
+ return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), SPFI),
+ false, false, false, 0);
}
@@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
case TargetLowering::Legal: {
// If this is an unaligned store and the target doesn't support it,
// expand it.
+ EVT MemVT = ST->getMemoryVT();
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
- if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
break;
}
case TargetLowering::Custom: {
@@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
ReplaceNode(SDValue(Node, 0), Result);
} else {
- switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
- StVT.getSimpleVT())) {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DL.getABITypeAlignment(Ty);
- if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
break;
}
case TargetLowering::Custom: {
@@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
+ EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
- }
- }
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
break;
}
case TargetLowering::Custom: {
@@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Res.getValue(1);
}
} else {
- // If this is an unaligned load and the target doesn't support
- // it, expand it.
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
- if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
- }
- }
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
}
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
@@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Load.getValue(1);
break;
}
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+ Chain, Ptr, ISrcVT,
+ LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
}
assert(!SrcVT.isVector() &&
@@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
#ifndef NDEBUG
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
- TargetLowering::TypeLegal &&
+ assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Node->getValueType(i))) &&
"Unexpected illegal type!");
for (const SDValue &Op : Node->op_values())
- assert((TLI.getTypeAction(*DAG.getContext(),
- Op.getValueType()) == TargetLowering::TypeLegal ||
- Op.getOpcode() == ISD::TargetConstant) &&
- "Unexpected illegal type!");
+ assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Op.getValueType()) ||
+ Op.getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
#endif
// Figure out the correct action; the way to query this varies by opcode
@@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STACKSAVE:
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
case ISD::VAARG:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getValueType(0));
@@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ Node->getOpcode() == ISD::SETCC ? 2 :
+ Node->getOpcode() == ISD::SETCCE ? 3 : 1;
unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
@@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::READCYCLECOUNTER:
+ // READCYCLECOUNTER returns an i64, even if type legalization might have
+ // expanded that to several smaller types.
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+ break;
case ISD::READ_REGISTER:
case ISD::WRITE_REGISTER:
// Named register is legal in the DAG, but blocked by register name
@@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
// FALL THROUGH
case TargetLowering::Expand:
- ExpandNode(Node);
+ if (ExpandNode(Node))
+ return;
+ // FALL THROUGH
+ case TargetLowering::LibCall:
+ ConvertNodeToLibcall(Node);
return;
case TargetLowering::Promote:
PromoteNode(Node);
@@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
// the vector. If all are expanded here, we don't want one store per vector
// element.
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
SDValue StackPtr, Ch;
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
@@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
continue;
+ // If the index is dependent on the store we will introduce a cycle when
+ // creating the load (the load uses the index, and by replacing the chain
+ // we will make the index dependent on the load).
+ if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist))
+ continue;
+
StackPtr = ST->getBasePtr();
Ch = SDValue(ST, 0);
break;
@@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// First store the whole vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
@@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SDLoc dl(Node);
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
false, false, false, 0);
}
-SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
- SDLoc dl(Node);
- SDValue Tmp1 = Node->getOperand(0);
- SDValue Tmp2 = Node->getOperand(1);
-
- // Get the sign bit of the RHS. First obtain a value that has the same
- // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
- SDValue SignBit;
- EVT FloatVT = Tmp2.getValueType();
- EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+namespace {
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+};
+}
+
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+ SDLoc DL, SDValue Value) const {
+ EVT FloatVT = Value.getValueType();
+ unsigned NumBits = FloatVT.getSizeInBits();
+ State.FloatVT = FloatVT;
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+ // Convert to an integer of the same size.
if (TLI.isTypeLegal(IVT)) {
- // Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+ State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+ State.SignMask = APInt::getSignBit(NumBits);
+ return;
+ }
+
+ auto &DataLayout = DAG.getDataLayout();
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ // Then store the float to it.
+ State.FloatPtr = StackPtr;
+ MachineFunction &MF = DAG.getMachineFunction();
+ State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+ State.FloatPointerInfo, false, false, 0);
+
+ SDValue IntPtr;
+ if (DataLayout.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ IntPtr = StackPtr;
+ State.IntPointerInfo = State.FloatPointerInfo;
} else {
- auto &DL = DAG.getDataLayout();
- // Store the float to memory, then load the sign part out as an integer.
- MVT LoadTy = TLI.getPointerTy(DL);
- // First create a temporary that is aligned for both the load and store.
- SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
- // Then store the float to it.
- SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
- false, false, 0);
- if (DL.isBigEndian()) {
- assert(FloatVT.isByteSized() && "Unsupported floating point type!");
- // Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
- } else { // Little endian
- SDValue LoadPtr = StackPtr;
- // The float may be wider than the integer we are going to load. Advance
- // the pointer so that the loaded integer will contain the sign bit.
- unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
- unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
- LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
- DAG.getConstant(ByteOffset, dl,
- LoadPtr.getValueType()));
- // Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
- false, false, false, 0);
- // Move the sign bit to the top bit of the loaded integer.
- unsigned BitShift = LoadTy.getSizeInBits() -
- (FloatVT.getSizeInBits() - 8 * ByteOffset);
- assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
- if (BitShift)
- SignBit = DAG.getNode(
- ISD::SHL, dl, LoadTy, SignBit,
- DAG.getConstant(BitShift, dl,
- TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
- }
+ // Advance the pointer so that the loaded byte will contain the sign bit.
+ unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
+ IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+ ByteOffset);
}
- // Now get the sign bit proper, by seeing whether the value is negative.
- SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
- SignBit,
- DAG.getConstant(0, dl, SignBit.getValueType()),
- ISD::SETLT);
- // Get the absolute value of the result.
- SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
- // Select between the nabs and abs value based on the sign bit of
- // the input.
- return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
- DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
- AbsVal);
+
+ State.IntPtr = IntPtr;
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain,
+ IntPtr, State.IntPointerInfo, MVT::i8,
+ false, false, false, 0);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+ SDLoc DL, SDValue NewIntValue) const {
+ if (!State.Chain)
+ return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+ // Override the part containing the sign bit in the value stored on the stack.
+ SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+ State.IntPointerInfo, MVT::i8, false, false,
+ 0);
+ return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+ State.FloatPointerInfo, false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Mag = Node->getOperand(0);
+ SDValue Sign = Node->getOperand(1);
+
+ // Get sign bit into an integer value.
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Sign);
+
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+ SignMask);
+
+ // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+ EVT FloatVT = Mag.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+ SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+ SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+ SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+ DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+ return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+ }
+
+ // Transform values to integer, copy the sign bit and transform back.
+ FloatSignAsInt MagAsInt;
+ getSignAsIntValue(MagAsInt, DL, Mag);
+ assert(SignAsInt.SignMask == MagAsInt.SignMask);
+ SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
+ ClearSignMask);
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
+
+ return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Value = Node->getOperand(0);
+
+ // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+ EVT FloatVT = Value.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+ }
+
+ // Transform value to integer, clear the sign bit and transform back.
+ FloatSignAsInt ValueAsInt;
+ getSignAsIntValue(ValueAsInt, DL, Value);
+ EVT IntVT = ValueAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+ ClearSignMask);
+ return modifySignAsInt(ValueAsInt, DL, ClearedSign);
}
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
int SPFI = StackPtrFI->getIndex();
- SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
- StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- Node->getValueType(0).getVectorElementType(),
- false, false, 0);
- return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- false, false, false, 0);
+ SDValue Ch = DAG.getTruncStore(
+ DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+ Node->getValueType(0).getVectorElementType(), false, false, 0);
+ return DAG.getLoad(
+ Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+ false, false, 0);
}
static bool
@@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SDValue CPIdx =
DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ return DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
}
SmallSet<SDValue, 16> DefinedValues;
@@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
-/// Return true if divmod libcall is available.
-static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
- const TargetLowering &TLI) {
- RTLIB::Libcall LC;
- switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
- case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
- case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
- case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
- case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
- }
-
- return TLI.getLibcallName(LC) != nullptr;
-}
-
-/// Only issue divrem libcall if both quotient and remainder are needed.
-static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
- // The other use might have been replaced with a divrem already.
- unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
- unsigned OtherOpcode = 0;
- if (isSigned)
- OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
- else
- OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
-
- SDValue Op0 = Node->getOperand(0);
- SDValue Op1 = Node->getOperand(1);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User == Node)
- continue;
- if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
- User->getOperand(0) == Op0 &&
- User->getOperand(1) == Op1)
- return true;
- }
- return false;
-}
-
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Op0,
EVT DestVT,
SDLoc dl) {
+ // TODO: Should any fast-math-flags be set for the created nodes?
+
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
Alignment = std::min(Alignment, 4u);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
- FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ FudgeInReg = DAG.getLoad(
+ MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
else {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
- DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, false, Alignment);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, Alignment);
HandleSDNode Handle(Load);
LegalizeOp(Load.getNode());
FudgeInReg = Handle.getValue();
@@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
}
+/// Open code the operations for BITREVERSE.
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2;
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift(Sz, 1);
+ Shift = Shift.shl(J);
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
/// Open code the operations for BSWAP of the specified operation.
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
EVT VT = Op.getValueType();
@@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
}
-std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
- unsigned Opc = Node->getOpcode();
- MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
- return ExpandChainLibCall(LC, Node, false);
-}
-
-void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::BITREVERSE:
+ Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+ break;
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
break;
@@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
+ case ISD::READCYCLECOUNTER:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.append(Node->getNumValues() - 1,
+ DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
case ISD::EH_SJLJ_SETJMP:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.push_back(DAG.getConstant(0, dl, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
- case ISD::ATOMIC_FENCE: {
- // If the target didn't lower this, lower it to '__sync_synchronize()' call
- // FIXME: handle "fence singlethread" more efficiently.
- TargetLowering::ArgListTy Args;
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- Results.push_back(CallResult.second);
- break;
- }
case ISD::ATOMIC_LOAD: {
// There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
@@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Swap.getValue(1));
break;
}
- // By default, atomic intrinsics are marked Legal and lowered. Targets
- // which don't support them directly, however, may want libcalls, in which
- // case they mark them Expand, and we get here.
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_CMP_SWAP: {
- std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
- Results.push_back(Tmp.first);
- Results.push_back(Tmp.second);
- break;
- }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
// Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
// splits out the success value as a comparison. Expanding the resulting
@@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
- case ISD::TRAP: {
- // If this operation is not supported, lower it to 'abort()' call
- TargetLowering::ArgListTy Args;
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- Results.push_back(CallResult.second);
- break;
- }
case ISD::FP_ROUND:
case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
@@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0),
Tmp1, ISD::SETLT);
True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ // TODO: Should any fast-math-flags be set for the FSUB?
False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
DAG.getNode(ISD::FSUB, dl, VT,
Node->getOperand(0), Tmp1));
@@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::VAARG: {
- const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
- EVT VT = Node->getValueType(0);
- Tmp1 = Node->getOperand(0);
- Tmp2 = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
-
- SDValue VAListLoad =
- DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, false, 0);
- SDValue VAList = VAListLoad;
-
- if (Align > TLI.getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
- VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(Align - 1, dl,
- VAList.getValueType()));
-
- VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
- DAG.getConstant(-(int64_t)Align, dl,
- VAList.getValueType()));
- }
-
- // Increment the pointer, VAList, to the next vaarg
- Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
- VT.getTypeForEVT(*DAG.getContext())),
- dl, VAList.getValueType()));
- // Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
- MachinePointerInfo(V), false, false, 0);
- // Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, false, 0));
+ case ISD::VAARG:
+ Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
break;
- }
- case ISD::VACOPY: {
- // This defaults to loading a pointer from the input and storing it to the
- // output, returning the chain.
- const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
- const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
- Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
- Node->getOperand(0), Node->getOperand(2),
- MachinePointerInfo(VS), false, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
- MachinePointerInfo(VD), false, false, 0);
- Results.push_back(Tmp1);
+ case ISD::VACOPY:
+ Results.push_back(DAG.expandVACopy(Node));
break;
- }
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
@@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
}
break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(0));
+ break;
case ISD::FCOPYSIGN:
Results.push_back(ExpandFCOPYSIGN(Node));
break;
case ISD::FNEG:
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+ // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
Node->getOperand(0));
Results.push_back(Tmp1);
break;
- case ISD::FABS: {
- // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
- EVT VT = Node->getValueType(0);
- Tmp1 = Node->getOperand(0);
- Tmp2 = DAG.getConstantFP(0.0, dl, VT);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, ISD::SETUGT);
- Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
- Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
- Results.push_back(Tmp1);
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
break;
- }
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
- case ISD::FMINNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128));
- break;
- case ISD::FMAXNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128));
- break;
- case ISD::FSQRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128));
- break;
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
- bool isSIN = Node->getOpcode() == ISD::FSIN;
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
@@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
&& useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
- if (!isSIN)
+ if (Node->getOpcode() == ISD::FCOS)
Tmp1 = Tmp1.getValue(1);
Results.push_back(Tmp1);
- } else if (isSIN) {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
- } else {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
}
break;
}
- case ISD::FSINCOS:
- // Expand into sincos libcall.
- ExpandSinCosLibCall(Node, Results);
- break;
- case ISD::FLOG:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
- break;
- case ISD::FLOG2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
- break;
- case ISD::FLOG10:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
- break;
- case ISD::FEXP:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
- break;
- case ISD::FEXP2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
- break;
- case ISD::FTRUNC:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128));
- break;
- case ISD::FFLOOR:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128));
- break;
- case ISD::FCEIL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128));
- break;
- case ISD::FRINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128));
- break;
- case ISD::FNEARBYINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128));
- break;
- case ISD::FROUND:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128));
- break;
- case ISD::FPOWI:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128));
- break;
- case ISD::FPOW:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
- break;
- case ISD::FDIV:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128));
- break;
- case ISD::FREM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128));
- break;
- case ISD::FMA:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
- RTLIB::FMA_F80, RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128));
- break;
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
- case ISD::FADD:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128));
- break;
- case ISD::FMUL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128));
- break;
- case ISD::FP16_TO_FP: {
- if (Node->getValueType(0) == MVT::f32) {
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
- break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
-
- // We can extend to types bigger than f32 in two steps without changing the
- // result. Since "f16 -> f32" is much more commonly available, give CodeGen
- // the option of emitting that before resorting to a libcall.
- SDValue Res =
- DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
- Results.push_back(
- DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
- }
- case ISD::FP_TO_FP16: {
+ case ISD::FP_TO_FP16:
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl));
Results.push_back(
DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
- break;
}
}
-
- RTLIB::Libcall LC =
- RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
- Results.push_back(ExpandLibCall(LC, Node, false));
break;
- }
case ISD::ConstantFP: {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
@@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
+ case ISD::Constant: {
+ ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+ Results.push_back(ExpandConstant(CP));
+ break;
+ }
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
Results.push_back(Tmp1);
- } else {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128));
}
break;
}
@@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
Tmp2 = Node->getOperand(0);
Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
- (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- // If div is legal, it's better to do the normal expansion
- !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
- useDivRem(Node, isSigned, false))) {
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ Results.push_back(Tmp1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
- } else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128);
- else
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128);
- Results.push_back(Tmp1);
+ Results.push_back(Tmp1);
+ }
break;
}
case ISD::UDIV:
@@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool isSigned = Node->getOpcode() == ISD::SDIV;
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
EVT VT = Node->getValueType(0);
- SDVTList VTs = DAG.getVTList(VT, VT);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
- (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- useDivRem(Node, isSigned, true)))
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
- else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128);
- else
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128);
- Results.push_back(Tmp1);
+ Results.push_back(Tmp1);
+ }
break;
}
case ISD::MULHU:
@@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1.getValue(1));
break;
}
- case ISD::SDIVREM:
- case ISD::UDIVREM:
- // Expand into divrem libcall
- ExpandDivRemLibCall(Node, Results);
- break;
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
- break;
}
-
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128);
- Results.push_back(Tmp1);
break;
}
case ISD::SADDO:
@@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
- MachinePointerInfo::getJumpTable(), MemVT,
- false, false, false, 0);
+ SDValue LD = DAG.getExtLoad(
+ ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT,
+ false, false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
// For PIC, the sequence is:
@@ -4092,16 +3948,276 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
+ if (Results.empty())
+ return false;
+
+ ReplaceNode(Node, Results.data());
+ return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ unsigned Opc = Node->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_FENCE: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("abort",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ break;
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+ break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ }
+ break;
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
+ case ISD::FSUB:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ break;
+ case ISD::SREM:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
+ break;
+ case ISD::UREM:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
+ break;
+ case ISD::SDIV:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ break;
+ case ISD::UDIV:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
+ break;
+ }
+
+ // Replace the original node with the legalized result.
if (!Results.empty())
ReplaceNode(Node, Results.data());
}
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+ MVT EltVT, MVT NewEltVT) {
+ unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+ MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ assert(TLI.isTypeLegal(MidVT) && "unexpected");
+ return MidVT;
+}
+
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP ||
- Node->getOpcode() == ISD::SETCC) {
+ Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::BR_CC)
@@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FCOPYSIGN:
case ISD::FPOW: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
- Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
@@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl)));
break;
}
+ case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = Node->getOperand(1);
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+ // fcopysign doesn't change anything but the sign bit, so
+ // (fp_round (fcopysign (fpext a), b))
+ // is as precise as
+ // (fp_round (fpext a))
+ // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+ const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(0, dl)));
+ Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
break;
}
case ISD::FFLOOR:
@@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
}
+ case ISD::BUILD_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+ // =>
+ // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for build_vector");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+ SDValue Op = Node->getOperand(I);
+ NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+ }
+
+ SDLoc SL(Node);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for extract_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Idx = Node->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVec, TmpIdx);
+ NewOps.push_back(Elt);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ // v2i32:casty = bitcast y:i64
+ //
+ // v2i64 = bitcast
+ // (v4i32 insert_vector_elt
+ // (v4i32 insert_vector_elt v4i32:castx,
+ // (extract_vector_elt casty, 0), 2 * z),
+ // (extract_vector_elt casty, 1), (2 * z + 1))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for insert_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Val = Node->getOperand(1);
+ SDValue Idx = Node->getOperand(2);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+ SDValue NewVec = CastVec;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVal, IdxOffset);
+
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+ NewVec, Elt, InEltIdx);
+ }
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to different vector type with the smae total bit size.
+ //
+ // e.g. v2i64 = scalar_to_vector x:i64
+ // =>
+ // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+ //
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ SDValue Val = Node->getOperand(0);
+ SDLoc SL(Node);
+
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+ SDValue Undef = DAG.getUNDEF(MidVT);
+
+ SmallVector<SDValue, 8> NewElts;
+ NewElts.push_back(CastVal);
+ for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+ NewElts.push_back(Undef);
+
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() {
for (auto NI = allnodes_end(); NI != allnodes_begin();) {
--NI;
- SDNode *N = NI;
+ SDNode *N = &*NI;
if (N->use_empty() && N != getRoot().getNode()) {
++NI;
DeleteNode(N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3c50a41..6c0193a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
}
//===----------------------------------------------------------------------===//
-// Result Float to Integer Conversion.
+// Convert Float Results to Integer for Non-HW-supported Operations.
//===----------------------------------------------------------------------===//
-void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
@@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::Register:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ assert(isLegalInHWReg(N->getValueType(ResNo)) &&
+ "Unsupported SoftenFloatRes opcode!");
+ // Only when isLegalInHWReg, we can skip check of the operands.
+ R = SDValue(N, ResNo);
+ break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
- case ISD::ConstantFP:
- R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
- break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
- case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
@@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
- case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
- case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
- case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
@@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
}
// If R is null, the sub-method took care of registering the result.
- if (R.getNode())
+ if (R.getNode()) {
SetSoftenedFloat(SDValue(N, ResNo), R);
+ ReplaceSoftenFloatResult(N, ResNo, R);
+ }
+ // Return true only if the node is changed,
+ // assuming that the operands are also converted when necessary.
+ // Otherwise, return false to tell caller to scan operands.
+ return R.getNode() && R.getNode() != N;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
return BitConvertToInteger(N->getOperand(0));
}
@@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
BitConvertToInteger(N->getOperand(1)));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
- return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N),
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, we can load better from the constant pool.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
TLI.getTypeToTransformTo(*DAG.getContext(),
- N->getValueType(0)));
+ CN->getValueType(0)));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
NewOp, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FABS can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned Size = NVT.getSizeInBits();
@@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
RTLIB::FMIN_F80,
RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
@@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
RTLIB::FMAX_F80,
RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
@@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(0));
SDValue RHS = BitConvertToInteger(N->getOperand(1));
SDLoc dl(N);
@@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
@@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
@@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
@@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
@@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
@@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
@@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
@@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, 3, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
@@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
@@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
@@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, dl).first;
+ NVT, Ops, false, dl).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SoftenFloatResult(Op.getNode(), 0);
}
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0)) {
+ return BitConvertToInteger(Op);
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
- SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
false, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;
@@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
@@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
@@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
@@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
RTLIB::ROUND_F80,
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
@@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
@@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
@@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
+ bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ if (N != NewL.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
return NewL;
}
@@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
- return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+ auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+ if (LegalInHWReg)
+ return ExtendNode;
+ return BitConvertToInteger(ExtendNode);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ if (N != NewVAARG.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
return NewVAARG;
}
@@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, Signed, dl).first;
+ Op, Signed, dl).first;
}
//===----------------------------------------------------------------------===//
-// Operand Float to Integer Conversion..
+// Convert Float Operand to Integer for Non-HW-supported Operations.
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
default:
+ if (CanSkipSoftenFloatOperand(N, OpNo))
+ return false;
#ifndef NDEBUG
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
@@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
- case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
- case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
- case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::STORE:
+ Res = SoftenFloatOp_STORE(N, OpNo);
+ // Do not try to analyze or soften this node again if the value is
+ // or can be held in a register. In that case, Res.getNode() should
+ // be equal to N.
+ if (Res.getNode() == N &&
+ isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // Otherwise, we need to reanalyze and lower the new Res nodes.
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
- // core about this.
+ // core about this to re-analyze.
if (Res.getNode() == N)
return true;
@@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return false;
}
+bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // When the operand type can be kept in registers, SoftenFloatResult
+ // will call ReplaceValueWith to replace all references and we can
+ // skip softening this operand.
+ switch (N->getOperand(OpNo).getOpcode()) {
+ case ISD::BITCAST:
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ return true;
+ }
+ // For some opcodes, SoftenFloatResult handles all conversion of softening
+ // and replacing operands, so that there is no need to soften operands
+ // again, although such opcode could be scanned for other illegal operands.
+ switch (N->getOpcode()) {
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ return true;
+ }
+ return false;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
@@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
}
@@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT SVT = N->getOperand(0).getValueType();
EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
-}
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+ // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+ // match, eg. we don't have fp -> i8 conversions.
+ // Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
- EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+
+ // Truncate the result if the libcall returns a larger type.
+ return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, 3, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first;
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
break;
}
+ // TODO: Are there fast-math-flags to propagate to this FADD?
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
APInt(128, Parts)),
@@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
+ // TODO: Are there fast-math-flags to propagate to this FSUB?
return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
DAG.getNode(ISD::ADD, dl, MVT::i32,
DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
@@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
false, dl).first;
}
@@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
SDValue Op1 = GetPromotedFloat(N->getOperand(1));
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
}
SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9f060a09..cd114d6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
- case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
- case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX:
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
- case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+ case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+ case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
N->getChain(), N->getBasePtr(),
N->getMemOperand(), N->getOrdering(),
N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
N->getChain(), N->getBasePtr(),
Op2, N->getMemOperand(), N->getOrdering(),
N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(
+ ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
// The pair element type may be legal, or may not promote to the same type as
// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
@@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
N->getMemoryVT(), N->getMemOperand());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
- SDValue Mask = N->getMask();
- EVT NewMaskVT = getSetCCResultType(NVT);
- if (NewMaskVT != N->getMask().getValueType())
- Mask = PromoteTargetBoolean(Mask, NewMaskVT);
SDLoc dl(N);
-
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- Mask, ExtSrc0, N->getMemoryVT(),
+ N->getMask(), ExtSrc0, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
+ assert(NVT == ExtSrc0.getValueType() &&
+ "Gather result type and the passThru agrument type should be the same");
+
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+ N->getIndex()};
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
+
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.
@@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
- // Sign extend the input.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
- SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
@@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
return Mul;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
- // Zero extend the input.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
- SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0)));
@@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
OpNo); break;
+ case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+ OpNo); break;
+ case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+ OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- EVT MaskVT = Mask.getValueType();
SDLoc dl(N);
bool TruncateStore = false;
- if (!TLI.isTypeLegal(DataVT)) {
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
- DataOp = GetPromotedInteger(DataOp);
- if (!TLI.isTypeLegal(MaskVT))
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
- TruncateStore = true;
- }
+ if (OpNo == 2) {
+ // Mask comes before the data operand. If the data operand is legal, we just
+ // promote the mask.
+ // When the data operand has illegal type, we should legalize the data
+ // operand first. The mask will be promoted/splitted/widened according to
+ // the data operand type.
+ if (TLI.isTypeLegal(DataVT))
+ Mask = PromoteTargetBoolean(Mask, DataVT);
else {
- assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
- "Unexpected data legalization in MSTORE");
- DataOp = GetWidenedVector(DataOp);
-
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- EVT BoolVT = getSetCCResultType(DataOp.getValueType());
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+ return PromoteIntOp_MSTORE(N, 3);
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
+ else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+ return WidenVecOp_MSTORE(N, 3);
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
-
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ else {
+ assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+ return SplitVecOp_MSTORE(N, 3);
}
}
+ } else { // Data operand
+ assert(OpNo == 3 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
}
- else
- Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
+
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
N->getMemoryVT(), N->getMemOperand(),
TruncateStore);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+ unsigned OpNo) {
assert(OpNo == 2 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
@@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo)
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+ unsigned OpNo) {
+
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValueType(0);
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValue().getValueType();
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
@@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
@@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
-void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi) {
- SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
- SplitInteger(Res, Lo, Hi);
-}
-
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
}
}
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
Lo, Hi);
}
@@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
Lo, Hi);
}
@@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
}
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
}
@@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+ SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+ Lo = R.getValue(0);
+ Hi = R.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo,
- Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
return;
}
@@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
@@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
+ if (LHSHi == RHSHi) {
+ // Comparing the low bits is enough.
+ NewLHS = Tmp1;
+ NewRHS = SDValue();
+ return;
+ }
+
+ // Lower with SETCCE if the target supports it.
+ // FIXME: Make all targets support this, then remove the other lowering.
+ if (TLI.getOperationAction(
+ ISD::SETCCE,
+ TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
+ TargetLowering::Custom) {
+ // SETCCE can detect < and >= directly. For > and <=, flip operands and
+ // condition code.
+ bool FlipOperands = false;
+ switch (CCCode) {
+ case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
+ case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+ case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
+ case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+ default: break;
+ }
+ if (FlipOperands) {
+ std::swap(LHSLo, RHSLo);
+ std::swap(LHSHi, RHSHi);
+ }
+ // Perform a wide subtraction, feeding the carry from the low part into
+ // SETCCE. The SETCCE operation is essentially looking at the high part of
+ // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
+ // positive iff LHS >= RHS.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
+ SDValue Res =
+ DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ NewLHS = Res;
+ NewRHS = SDValue();
+ return;
+ }
+
NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
LHSHi, RHSHi, ISD::SETEQ, false,
DagCombineInfo, dl);
@@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
DAG.getCondCode(CCCode)), 0);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCE for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
@@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
- FudgePtr,
- MachinePointerInfo::getConstantPool(),
- MVT::f32,
- false, false, false, Alignment);
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
@@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first;
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 54cfaf5..2a0b0aa 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
// (for example because it was created but not used). In general, we cannot
// distinguish between new nodes and deleted nodes.
SmallVector<SDNode*, 16> NewNodes;
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
+ for (SDNode &Node : DAG.allnodes()) {
// Remember nodes marked NewNode - they are subject to extra checking below.
- if (I->getNodeId() == NewNode)
- NewNodes.push_back(I);
+ if (Node.getNodeId() == NewNode)
+ NewNodes.push_back(&Node);
- for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
- SDValue Res(I, i);
+ for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+ SDValue Res(&Node, i);
bool Failed = false;
unsigned Mapped = 0;
if (ReplacedValues.find(Res) != ReplacedValues.end()) {
Mapped |= 1;
// Check that remapped values are only used by nodes marked NewNode.
- for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
UI != UE; ++UI)
if (UI.getUse().getResNo() == i)
assert(UI->getNodeId() == NewNode &&
@@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
if (WidenedVectors.find(Res) != WidenedVectors.end())
Mapped |= 128;
- if (I->getNodeId() != Processed) {
+ if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
// marked NewNode too, since a deleted node may have been reallocated as
// another node that has not been seen by the LegalizeTypes machinery.
- if ((I->getNodeId() == NewNode && Mapped > 1) ||
- (I->getNodeId() != NewNode && Mapped != 0)) {
+ if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+ (Node.getNodeId() != NewNode && Mapped != 0)) {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
@@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() {
// Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
// (and remembering them) if they are leaves and assigning 'Unanalyzed' if
// non-leaves.
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
- if (I->getNumOperands() == 0) {
- I->setNodeId(ReadyToProcess);
- Worklist.push_back(I);
+ for (SDNode &Node : DAG.allnodes()) {
+ if (Node.getNumOperands() == 0) {
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
} else {
- I->setNodeId(Unanalyzed);
+ Node.setNodeId(Unanalyzed);
}
}
@@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() {
Changed = true;
goto NodeDone;
case TargetLowering::TypeSoftenFloat:
- SoftenFloatResult(N, i);
- Changed = true;
- goto NodeDone;
+ Changed = SoftenFloatResult(N, i);
+ if (Changed)
+ goto NodeDone;
+ // If not changed, the result type should be legally in register.
+ assert(isLegalInHWReg(ResultVT) &&
+ "Unchanged SoftenFloatResult should be legal in register!");
+ goto ScanOperands;
case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
@@ -409,40 +411,48 @@ NodeDone:
// In a debug build, scan all the nodes to make sure we found them all. This
// ensures that there are no cycles and that everything got processed.
#ifndef NDEBUG
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
+ for (SDNode &Node : DAG.allnodes()) {
bool Failed = false;
// Check that all result types are legal.
- if (!IgnoreNodeResults(I))
- for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
- if (!isTypeLegal(I->getValueType(i))) {
- dbgs() << "Result type " << i << " illegal!\n";
+ // A value type is illegal if its TypeAction is not TypeLegal,
+ // and TLI.RegClassForVT does not have a register class for this type.
+ // For example, the x86_64 target has f128 that is not TypeLegal,
+ // to have softened operators, but it also has FR128 register class to
+ // pass and return f128 values. Hence a legalized node can have f128 type.
+ if (!IgnoreNodeResults(&Node))
+ for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(Node.getValueType(i)) &&
+ !TLI.isTypeLegal(Node.getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal: ";
+ Node.dump();
Failed = true;
}
// Check that all operand types are legal.
- for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
- if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
- !isTypeLegal(I->getOperand(i).getValueType())) {
- dbgs() << "Operand type " << i << " illegal!\n";
+ for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+ !isTypeLegal(Node.getOperand(i).getValueType()) &&
+ !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal: ";
+ Node.getOperand(i).dump();
Failed = true;
}
- if (I->getNodeId() != Processed) {
- if (I->getNodeId() == NewNode)
+ if (Node.getNodeId() != Processed) {
+ if (Node.getNodeId() == NewNode)
dbgs() << "New node not analyzed?\n";
- else if (I->getNodeId() == Unanalyzed)
+ else if (Node.getNodeId() == Unanalyzed)
dbgs() << "Unanalyzed node not noticed?\n";
- else if (I->getNodeId() > 0)
+ else if (Node.getNodeId() > 0)
dbgs() << "Operand not processed?\n";
- else if (I->getNodeId() == ReadyToProcess)
+ else if (Node.getNodeId() == ReadyToProcess)
dbgs() << "Not added to worklist?\n";
Failed = true;
}
if (Failed) {
- I->dump(&DAG); dbgs() << "\n";
+ Node.dump(&DAG); dbgs() << "\n";
llvm_unreachable(nullptr);
}
}
@@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- assert(Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ // f128 of x86_64 could be kept in SSE registers,
+ // but sometimes softened to i128.
+ assert((Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
"Invalid type for softened float");
AnalyzeNewValue(Result);
SDValue &OpEntry = SoftenedFloats[Op];
- assert(!OpEntry.getNode() && "Node is already converted to integer!");
+ // Allow repeated calls to save f128 type nodes
+ // or any node with type that transforms to itself.
+ // Many operations on these types are not softened.
+ assert((!OpEntry.getNode()||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ "Node is already converted to integer!");
OpEntry = Result;
}
@@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0),
- &Ops[0], NumOps, isSigned, dl).first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
}
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
@@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
+/// WidenTargetBoolean - Widen the given target boolean to a target boolean
+/// of the given type. The boolean vector is widened and then promoted to match
+/// the target boolean type of the given ValVT.
+SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
+ bool WithZeroes) {
+ SDLoc dl(Bool);
+ EVT BoolVT = Bool.getValueType();
+
+ assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
+ TLI.isTypeLegal(ValVT) &&
+ "Unexpected types in WidenTargetBoolean");
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
+ ValVT.getVectorNumElements());
+ Bool = ModifyToType(Bool, WideVT, WithZeroes);
+ return PromoteTargetBoolean(Bool, ValVT);
+}
+
/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
/// bits in Hi.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d1131a7..8ba19f7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -72,6 +72,20 @@ private:
return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
+ /// isSimpleLegalType - Return true if this is a simple legal type.
+ bool isSimpleLegalType(EVT VT) const {
+ return VT.isSimple() && TLI.isTypeLegal(VT);
+ }
+
+ /// isLegalInHWReg - Return true if this type can be passed in registers.
+ /// For example, x86_64's f128, should to be legally in registers
+ /// and only some operations converted to library calls or integer
+ /// bitwise operations.
+ bool isLegalInHWReg(EVT VT) const {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return VT == NVT && isSimpleLegalType(VT);
+ }
+
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
@@ -173,6 +187,11 @@ private:
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+ /// Modify Bit Vector to match SetCC result type of ValVT.
+ /// The bit vector is widened with zeroes when WithZeroes is true.
+ SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
+
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -234,6 +253,7 @@ private:
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BITREVERSE(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
@@ -246,21 +266,22 @@ private:
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+ SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_SDIV(SDNode *N);
SDValue PromoteIntRes_SELECT(SDNode *N);
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
SDValue PromoteIntRes_SHL(SDNode *N);
SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue PromoteIntRes_SRA(SDNode *N);
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_UDIV(SDNode *N);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
@@ -276,7 +297,6 @@ private:
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
@@ -284,7 +304,6 @@ private:
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
@@ -294,6 +313,8 @@ private:
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -312,8 +333,6 @@ private:
// Integer Result Expansion.
void ExpandIntegerResult(SDNode *N, unsigned ResNo);
- void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -322,6 +341,7 @@ private:
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -333,6 +353,7 @@ private:
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -354,12 +375,10 @@ private:
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
- SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
- SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_SETCCE(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -375,32 +394,48 @@ private:
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetSoftenedFloat - Given a processed operand Op which was converted to an
- /// integer of the same size, this returns the integer. The integer contains
- /// exactly the same bits as Op - only the type changed. For example, if Op
- /// is an f32 which was softened to an i32, then this method returns an i32,
- /// the bits of which coincide with those of Op.
+ /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer
+ /// if the Op is not supported in target HW and converted to the integer.
+ /// The integer contains exactly the same bits as Op - only the type changed.
+ /// For example, if Op is an f32 which was softened to an i32, then this method
+ /// returns an i32, the bits of which coincide with those of Op.
+ /// If the Op can be efficiently supported in target HW or the operand must
+ /// stay in a register, the Op is not converted to an integer.
+ /// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
SDValue &SoftenedOp = SoftenedFloats[Op];
+ if (!SoftenedOp.getNode() &&
+ isSimpleLegalType(Op.getValueType()))
+ return Op;
RemapValue(SoftenedOp);
assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Result Float to Integer Conversion.
- void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
+ void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
+ // When the result type can be kept in HW registers, the converted
+ // NewRes node could have the same type. We can save the effort in
+ // cloning every user of N in SoftenFloatOperand or other legalization functions,
+ // by calling ReplaceValueWith here to update all users.
+ if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
+ ReplaceValueWith(SDValue(N, ResNo), NewRes);
+ }
+
+ // Convert Float Results to Integer for Non-HW-supported Operations.
+ bool SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
- SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
- SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -412,7 +447,7 @@ private:
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
- SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -425,21 +460,25 @@ private:
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
- SDValue SoftenFloatRes_LOAD(SDNode *N);
- SDValue SoftenFloatRes_SELECT(SDNode *N);
- SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
- // Operand Float to Integer Conversion.
+ // Return true if we can skip softening the given operand or SDNode because
+ // it was soften before by SoftenFloatResult and references to the operand
+ // were replaced by ReplaceValueWith.
+ bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
+
+ // Convert Float Operand to Integer for Non-HW-supported Operations.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
- SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
- SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -575,7 +614,6 @@ private:
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
- SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
@@ -617,20 +655,18 @@ private:
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
@@ -650,6 +686,7 @@ private:
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -680,8 +717,8 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
- SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
@@ -693,6 +730,7 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
@@ -707,9 +745,11 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
@@ -745,8 +785,10 @@ private:
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
- SDValue ModifyToType(SDValue InOp, EVT WidenVT);
-
+ /// When FillWithZeroes is "on" the vector will be widened with
+ /// zeroes.
+ /// By default, the vector will be widened with undefined values.
+ SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 14d8f77..593c346 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteFloat:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
- case TargetLowering::TypeSoftenFloat:
- // Convert the integer operand instead.
- SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ case TargetLowering::TypeSoftenFloat: {
+ // Expand the floating point operand only if it was converted to integers.
+ // Otherwise, it is a legal type like f128 that can be saved in a register.
+ auto SoftenedOp = GetSoftenedFloat(InOp);
+ if (SoftenedOp == InOp)
+ break;
+ SplitInteger(SoftenedOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
+ }
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat: {
auto &DL = DAG.getDataLayout();
@@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
NOutVT.getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
// Emit a store to the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 83d4ad5..f61f631 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,8 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -159,7 +161,7 @@ bool VectorLegalizer::Run() {
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
- LegalizeOp(SDValue(I, 0));
+ LegalizeOp(SDValue(&*I, 0));
// Finally, it's possible the root changed. Get the new root.
SDValue OldRoot = DAG.getRoot();
@@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
assert(Result.getValue(1).use_empty() &&
"There are still live users of the old chain!");
return LegalizeOp(Lowered);
- } else {
- return TranslateLegalizeResults(Op, Lowered);
}
+ return TranslateLegalizeResults(Op, Lowered);
}
case TargetLowering::Expand:
Changed = true;
@@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore())
- switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
@@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
- } else if (Op.getOpcode() == ISD::MSCATTER)
+ } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
HasVectorValue = true;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
@@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ROTL:
case ISD::ROTR:
case ISD::BSWAP:
+ case ISD::BITREVERSE:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FABS:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::MSCATTER:
QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
break;
+ case ISD::MSTORE:
+ QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+ break;
}
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
Result = Promote(Op);
Changed = true;
@@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
Operands[j] = Op.getOperand(j);
}
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
@@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::BITREVERSE:
+ return ExpandBITREVERSE(Op);
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Let LegalizeDAG handle this later.
+ return Op;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
// Convert hi and lo to floats
// Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
@@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDLoc DL(Op);
SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
Zero, Op.getOperand(0));
}
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+ // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle.
+ unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
+ if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType()))
+ return Op;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 51cd661..d0187d3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
case ISD::ANY_EXTEND:
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::FPOW:
case ISD::FREM:
@@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ LHS.getValueType(), LHS, RHS, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
@@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->isInvariant(), N->getOriginalAlignment(),
N->getAAInfo());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
return Result;
@@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
@@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
@@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+ const SDNodeFlags *Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(LD, 1), Ch);
}
@@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
+ SDValue Src0 = MLD->getSrc0();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
@@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0 = MLD->getSrc0();
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MLD, 1), Ch);
@@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
+ SDValue Index = MGT->getIndex();
unsigned Alignment = MGT->getOriginalAlignment();
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
}
@@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ptr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
unsigned Alignment = MGT->getOriginalAlignment();
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
- if (Index.getNode())
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
else
- IndexLo = IndexHi = Index;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
@@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- GetSplitVector(Data, DataLo, DataHi);
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
SDValue MaskLo, MaskHi;
- GetSplitVector(Mask, MaskLo, MaskHi);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+
+ MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
+ MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
// if Alignment is equal to the vector size,
// take the half of it for the second part
@@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
+ // Split all operands
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- GetSplitVector(Data, DataLo, DataHi);
- SDValue MaskLo, MaskHi;
- GetSplitVector(Mask, MaskLo, MaskHi);
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
- SDValue PtrLo, PtrHi;
- if (Ptr.getValueType().isVector()) // gather form vector of pointers
- std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
else
- PtrLo = PtrHi = Ptr;
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
SDValue IndexHi, IndexLo;
- if (Index.getNode())
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
else
- IndexLo = IndexHi = Index;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
@@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
+ SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
@@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
//===----------------------------------------------------------------------===//
@@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
+ case ISD::MGATHER:
+ Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
case ISD::ADD:
case ISD::AND:
@@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
Res = WidenVecRes_Binary(N);
break;
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
@@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
@@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
+ const SDNodeFlags *Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
// No legal vector version so unroll the vector operation and then widen.
@@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
SDValue EOp2 = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
}
@@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
+ EOp1, EOp2, Flags);
}
CurNumElts = 0;
}
@@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
-
+ const SDNodeFlags *Flags = N->getFlags();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (InVTNumElts == WidenNumElts) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
- return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
}
@@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
- return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
if (InVTNumElts % WidenNumElts == 0) {
@@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
- return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
@@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType);
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue Src0 = GetWidenedVector(N->getValue());
+ unsigned NumElts = WideVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen the Index operand
+ SDValue Index = N->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
@@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
false);
}
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can widen only data operand of mscatter");
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue DataOp = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(DataOp);
+ EVT WideVT = WideVal.getValueType();
+ unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen index
+ SDValue Index = MSC->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), dl, Ops,
+ MSC->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+ bool FillWithZeroes) {
// Note that InOp might have been widened so it might already have
// the right width or it might need be narrowed.
EVT InVT = InOp.getValueType();
@@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
unsigned NumConcat = WidenNumElts / InNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+ DAG.getUNDEF(InVT);
Ops[0] = InOp;
for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
+ Ops[i] = FillVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
@@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = UndefVal;
+ Ops[Idx] = FillVal;
return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 6303422..622e06f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
TII = STI.getInstrInfo();
ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
// This hard requirement could be relaxed, but for now
- // do not let it procede.
+ // do not let it proceed.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
unsigned NumRC = TRI->getNumRegClasses();
@@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
}
// Now see if there are no other dependencies
- // to instructions alredy in the packet.
+ // to instructions already in the packet.
for (unsigned i = 0, e = Packet.size(); i != e; ++i)
for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
E = Packet[i]->Succs.end(); I != E; ++I) {
// Since we do not add pseudos to packets, might as well
- // ignor order deps.
+ // ignore order deps.
if (I->isCtrl())
continue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 34e1a70..62e7733 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e9bd520..91024e6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -141,8 +141,8 @@ private:
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
- std::vector<SUnit*> LiveRegDefs;
- std::vector<SUnit*> LiveRegGens;
+ std::unique_ptr<SUnit*[]> LiveRegDefs;
+ std::unique_ptr<SUnit*[]> LiveRegGens;
// Collect interferences between physical register use/defs.
// Each interference is an SUnit and set of physical registers.
@@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() {
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence.
- LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
- LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
+ LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
CallSeqEndForStart.clear();
assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
@@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- std::vector<SUnit*> &LiveRegDefs,
+ SUnit **LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
const TargetRegisterInfo *TRI) {
@@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
/// by RegMask, and add them to LRegs.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
- std::vector<SUnit*> &LiveRegDefs,
+ ArrayRef<SUnit*> LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs) {
// Look at all live registers. Skip Reg0 and the special CallResource.
@@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
- CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
RegAdded, LRegs, TRI);
}
@@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
i += NumVals;
@@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
}
if (const uint32_t *RegMask = getNodeRegMask(Node))
- CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+ CheckForLiveRegDefMasked(SU, RegMask,
+ makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
return !LRegs.empty();
@@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const uint16_t *ImpDefs
+ const MCPhysReg *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
const uint32_t *RegMask = getNodeRegMask(SU->getNode());
if(!ImpDefs && !RegMask)
@@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
return true;
if (ImpDefs)
- for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
// Return true if SU clobbers this physical register use and the
// definition of the register reaches from DepSU. IsReachable queries
// a topological forward sort of the DAG (following the successors).
@@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const uint16_t *SUImpDefs =
+ const MCPhysReg *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
const uint32_t *SURegMask = getNodeRegMask(SUNode);
if (!SUImpDefs && !SURegMask)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 159c28c..5cc8066 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -86,12 +86,6 @@ namespace llvm {
/// flagged together nodes with a single SUnit.
void BuildSchedGraph(AliasAnalysis *AA);
- /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
- /// CopyToReg and its only active data operands are CopyFromReg within a
- /// single block loop.
- ///
- void InitVRegCycleFlag(SUnit *SU);
-
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void InitNumRegDefsLeft(SUnit *SU);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 14f44cc..abbc48e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
return true;
}
-/// isScalarToVector - Return true if the specified node is a
-/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
-/// element is not an undef.
-bool ISD::isScalarToVector(const SDNode *N) {
- if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return true;
-
- if (N->getOpcode() != ISD::BUILD_VECTOR)
- return false;
- if (N->getOperand(0).getOpcode() == ISD::UNDEF)
- return false;
- unsigned NumElems = N->getNumOperands();
- if (NumElems == 1)
- return false;
- for (unsigned i = 1; i < NumElems; ++i) {
- SDValue V = N->getOperand(i);
- if (V.getOpcode() != ISD::UNDEF)
- return false;
- }
- return true;
-}
-
/// allOperandsUndef - Return true if the node has at least one operand
/// and all operands of the specified node are ISD::UNDEF.
bool ISD::allOperandsUndef(const SDNode *N) {
@@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
ID.AddInteger(Op.getResNo());
}
}
+
/// Add logical or fast math flag values to FoldingSetNodeID value.
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
const SDNodeFlags *Flags) {
- if (!Flags || !isBinOpWithFlags(Opcode))
+ if (!isBinOpWithFlags(Opcode))
return;
- unsigned RawFlags = Flags->getRawFlags();
- // If no flags are set, do not alter the ID. We must match the ID of nodes
- // that were created without explicitly specifying flags. This also saves time
- // and allows a gradual increase in API usage of the optional optimization
- // flags.
- if (RawFlags != 0)
- ID.AddInteger(RawFlags);
+ unsigned RawFlags = 0;
+ if (Flags)
+ RawFlags = Flags->getRawFlags();
+ ID.AddInteger(RawFlags);
}
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
- if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
- AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
+ AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
}
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() {
SmallVector<SDNode*, 128> DeadNodes;
// Add all obviously-dead nodes to the DeadNodes worklist.
- for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
- if (I->use_empty())
- DeadNodes.push_back(I);
+ for (SDNode &Node : allnodes())
+ if (Node.use_empty())
+ DeadNodes.push_back(&Node);
RemoveDeadNodes(DeadNodes);
@@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) {
void SelectionDAG::InsertNode(SDNode *N) {
AllNodes.push_back(N);
#ifndef NDEBUG
+ N->PersistentId = NextPersistentId++;
VerifySDNode(N);
#endif
}
@@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
UpdateListeners(nullptr) {
- AllNodes.push_back(&EntryNode);
+ InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() {
assert(&*AllNodes.begin() == &EntryNode);
AllNodes.remove(AllNodes.begin());
while (!AllNodes.empty())
- DeallocateNode(AllNodes.begin());
+ DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+ NextPersistentId = 0;
+#endif
}
BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
@@ -1023,7 +1003,7 @@ void SelectionDAG::clear() {
static_cast<SDNode*>(nullptr));
EntryNode.UseList = nullptr;
- AllNodes.push_back(&EntryNode);
+ InsertNode(&EntryNode);
Root = getEntryNode();
DbgInfo->clear();
}
@@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
- TargetFlags);
+ SDNode *N =
+ new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
if (OpTy == ShTy || OpTy.isVector()) return Op;
- ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
- return getNode(Opcode, SDLoc(Op), ShTy, Op);
+ return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(Align - 1, dl, VAList.getValueType()));
+
+ VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*getContext())),
+ dl, VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl,
+ Node->getOperand(0), Node->getOperand(2),
+ MachinePointerInfo(VS), false, false, false, 0);
+ return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
}
/// CreateStackTemporary - Create a stack temporary, suitable for holding the
@@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
/// CreateStackTemporary - Create a stack temporary suitable for holding
/// either of the specified value types.
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
- VT2.getStoreSizeInBits())/8;
+ unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
@@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD)
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
}
break;
}
@@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (Tmp == 1) return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
return std::min(Tmp, Tmp2);
+ case ISD::SELECT_CC:
+ Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+ return std::min(Tmp, Tmp2);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
const int rIndex = Items - 1 -
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- // If the sign portion ends in our element the substraction gives correct
+ // If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
@@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+ assert(A.getValueType() == B.getValueType() &&
+ "Values must have the same type");
+ APInt AZero, AOne;
+ APInt BZero, BOne;
+ computeKnownBits(A, AZero, AOne);
+ computeKnownBits(B, BZero, BOne);
+ return (AZero | BZero).isAllOnesValue();
+}
+
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
- else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
- EVT SVT = VT.getScalarType();
- EVT InVT = BV->getValueType(0);
- EVT InSVT = InVT.getScalarType();
-
- // Find legal integer scalar type for constant promotion and
- // ensure that its scalar size is at least as large as source.
- EVT LegalSVT = SVT;
- if (SVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT);
- if (LegalSVT.bitsLT(SVT)) break;
- }
-
- // Let the above scalar folding handle the folding of each element.
- SmallVector<SDValue, 8> Ops;
- for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- SDValue OpN = BV->getOperand(i);
- EVT OpVT = OpN.getValueType();
-
- // Build vector (integer) scalar operands may need implicit
- // truncation - do this before constant folding.
- if (OpVT.isInteger() && OpVT.bitsGT(InSVT))
- OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN);
-
- OpN = getNode(Opcode, DL, SVT, OpN);
-
- // Legalize the (integer) scalar constant if necessary.
- if (LegalSVT != SVT)
- OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN);
-
- if (OpN.getOpcode() != ISD::UNDEF &&
- OpN.getOpcode() != ISD::Constant &&
- OpN.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(OpN);
- }
- if (Ops.size() == VT.getVectorNumElements())
- return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
- break;
+ SDValue Ops = { Operand };
+ if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
}
}
}
@@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid fpext node, dst < src!");
if (Operand.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid sext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
else if (OpOpcode == ISD::UNDEF)
@@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid zext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT,
Operand.getNode()->getOperand(0));
@@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid anyext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
@@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
- assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
- "Invalid truncate node, src < dst!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsGT(VT) &&
+ "Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
@@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
- Operand.getNode()->getOperand(0));
+ Operand.getNode()->getOperand(0),
+ &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getNode()->getOperand(0);
break;
@@ -3182,6 +3198,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+ case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+ case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+ case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+ case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
@@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
}
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
+ EVT VT,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags *Flags) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ if (!VT.isVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+ return !Op.getValueType().isVector() ||
+ Op.getValueType().getVectorNumElements() == NumElts;
+ };
+
+ auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ return (Op.getOpcode() == ISD::UNDEF) ||
+ (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant());
+ };
+
+ // All operands must be vector types with the same number of elements as
+ // the result type and must be either UNDEF or a build vector of constant
+ // or UNDEF scalars.
+ if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
+ !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+ return SDValue();
+
+ // If we are comparing vectors, then the result needs to be a i1 boolean
+ // that is then sign-extended back to the legal result type.
+ EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+
+ // Find legal integer scalar type for constant promotion and
+ // ensure that its scalar size is at least as large as source.
+ EVT LegalSVT = VT.getScalarType();
+ if (LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
+
+ // Constant fold each scalar lane separately.
+ SmallVector<SDValue, 4> ScalarResults;
+ for (unsigned i = 0; i != NumElts; i++) {
+ SmallVector<SDValue, 4> ScalarOps;
+ for (SDValue Op : Ops) {
+ EVT InSVT = Op.getValueType().getScalarType();
+ BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
+ if (!InBV) {
+ // We've checked that this is UNDEF or a constant of some kind.
+ if (Op.isUndef())
+ ScalarOps.push_back(getUNDEF(InSVT));
+ else
+ ScalarOps.push_back(Op);
+ continue;
+ }
+
+ SDValue ScalarOp = InBV->getOperand(i);
+ EVT ScalarVT = ScalarOp.getValueType();
+
+ // Build vector (integer) scalar operands may need implicit
+ // truncation - do this before constant folding.
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+ ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+
+ ScalarOps.push_back(ScalarOp);
+ }
+
+ // Constant fold the scalar operands.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+
+ // Legalize the (integer) scalar constant if necessary.
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (ScalarResult.getOpcode() != ISD::UNDEF &&
+ ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ ScalarResults.push_back(ScalarResult);
+ }
+
+ assert(ScalarResults.size() == NumElts &&
+ "Unexpected number of scalar results for BUILD_VECTOR");
+ return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults);
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue N2, const SDNodeFlags *Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ // Canonicalize constant to RHS if commutative.
+ if (isCommutativeBinOp(Opcode)) {
+ if (N1C && !N2C) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ } else if (N1CFP && !N2CFP) {
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ }
+ }
+
switch (Opcode) {
default: break;
case ISD::TokenFactor:
@@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
- // 0+x --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
- if (CFP->getValueAPF().isZero())
- return N2;
// x+0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
} else if (Opcode == ISD::FSUB) {
// x-0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
} else if (Opcode == ISD::FMUL) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
- SDValue V = N2;
-
- // If the first operand isn't the constant, try the second
- if (!CFP) {
- CFP = dyn_cast<ConstantFPSDNode>(N2);
- V = N1;
- }
-
- if (CFP) {
- // 0*x --> 0
- if (CFP->isZero())
- return SDValue(CFP,0);
- // 1*x --> x
- if (CFP->isExactlyValue(1.0))
- return V;
- }
+ // x*0 --> 0
+ if (N2CFP && N2CFP->isZero())
+ return N2;
+ // x*1 --> x
+ if (N2CFP && N2CFP->isExactlyValue(1.0))
+ return N1;
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
@@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
VT.bitsLE(N1.getValueType()) &&
- isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ N2C && "Invalid FP_ROUND!");
if (N1.getValueType() == VT) return N1; // noop conversion.
break;
case ISD::AssertSext:
@@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SmallVector<SDValue, 8> Ops;
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
- if (Op.getValueType() != VT.getScalarType()) break;
if (Op.getOpcode() == ISD::UNDEF) {
- Ops.push_back(Op);
+ Ops.push_back(getUNDEF(VT.getScalarType()));
continue;
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = C->getAPIntValue();
+ Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
Ops.push_back(SignExtendInReg(Val));
continue;
}
@@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
return N1.getOperand(N2C->getZExtValue());
// EXTRACT_ELEMENT of a constant int is also very common.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ if (N1C) {
unsigned ElementSize = VT.getSizeInBits();
unsigned Shift = ElementSize * N2C->getZExtValue();
- APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR: {
- SDValue Index = N2;
+ case ISD::EXTRACT_SUBVECTOR:
if (VT.isSimple() && N1.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
"Extract subvector VTs must be a vectors!");
@@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
- if (isa<ConstantSDNode>(Index)) {
- assert((VT.getVectorNumElements() +
- cast<ConstantSDNode>(Index)->getZExtValue()
+ if (N2C) {
+ assert((VT.getVectorNumElements() + N2C->getZExtValue()
<= N1.getValueType().getVectorNumElements())
&& "Extract subvector overflow!");
}
@@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
break;
}
- }
// Perform trivial constant folding.
if (SDValue SV =
FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
return SV;
- // Canonicalize constant to RHS if commutative.
- if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
-
// Constant fold FP operations.
bool HasFPExceptions = TLI->hasFloatingPointExceptions();
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
if (N1CFP) {
- if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Canonicalize constant to RHS if commutative.
- std::swap(N1CFP, N2CFP);
- std::swap(N1, N2);
- } else if (N2CFP) {
+ if (N2CFP) {
APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
APFloat::opStatus s;
switch (Opcode) {
@@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
break;
case ISD::FREM :
- s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ s = V1.mod(V2);
if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
s!=APFloat::opDivByZero)) {
return getConstantFP(V1, DL, VT);
@@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
switch (Opcode) {
case ISD::FMA: {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
break;
case ISD::SETCC: {
// Use FoldSetCC to simplify SETCC's.
- SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
- if (Simp.getNode()) return Simp;
+ if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+ return V;
+ // Vector constant folding.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return V;
break;
}
case ISD::SELECT:
- if (N1C) {
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
if (N1C->getZExtValue())
return N2; // select true, X, Y -> X
return N3; // select false, X, Y -> Y
@@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
return true;
}
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction()->optForMinSize();
+ return MF.getFunction()->optForSize();
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
@@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
@@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
true, DstPtrInfo, SrcPtrInfo);
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
// memcpy is not guaranteed to be safe. libc memcpys aren't required to
// respect volatile, so they may do things like read or write memory
@@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
@@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
// Emit a library call.
Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
@@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ int64_t Offset = 0) {
// If this is FI+Offset, we can model it.
if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
- return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+ return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ FI->getIndex(), Offset);
// If this is (FI+Offset1)+Offset2, we can model it.
if (Ptr.getOpcode() != ISD::ADD ||
@@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
return MachinePointerInfo();
int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
- return MachinePointerInfo::getFixedStack(FI, Offset+
- cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+ return MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI,
+ Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
}
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ SDValue OffsetOp) {
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
- return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
if (OffsetOp.getOpcode() == ISD::UNDEF)
- return InferPointerInfo(Ptr);
+ return InferPointerInfo(DAG, Ptr);
return MachinePointerInfo();
}
@@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr, Offset);
+ PtrInfo = InferPointerInfo(*this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
Flags |= MachineMemOperand::MONonTemporal;
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr);
+ PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
Flags |= MachineMemOperand::MONonTemporal;
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr);
+ PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- MaskedGatherSDNode *N =
+ MaskedGatherSDNode *N =
new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
Ops, VTs, VT, MMO);
CSEMap.InsertNode(N, IP);
@@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0]);
- case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
@@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
"Update with wrong number of operands");
// If no operands changed just return the input node.
- if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
return N;
// See if the modified node already exists.
@@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = I++;
+ SDNode *N = &*I++;
checkForCycles(N, this);
unsigned Degree = N->getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
N->setNodeId(DAGSize++);
- allnodes_iterator Q = N;
+ allnodes_iterator Q(N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
@@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// Visit all the nodes. As we iterate, move nodes into sorted order,
// such that by the time the end is reached all nodes will be sorted.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
- SDNode *N = I;
+ for (SDNode &Node : allnodes()) {
+ SDNode *N = &Node;
checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
@@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
P->setNodeId(Degree);
}
}
- if (I == SortedPos) {
+ if (&Node == SortedPos) {
#ifndef NDEBUG
- SDNode *S = ++I;
+ allnodes_iterator I(N);
+ SDNode *S = &*++I;
dbgs() << "Overran sorted position:\n";
S->dumprFull(this); dbgs() << "\n";
dbgs() << "Checking if this is due to cycles\n";
@@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
// SDNode Class
//===----------------------------------------------------------------------===//
+bool llvm::isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnesValue();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
+const SDNodeFlags *SDNode::getFlags() const {
+ if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+ return &FlagsNode->Flags;
+ return nullptr;
+}
+
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
}
switch (N->getOpcode()) {
- default:
- Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
break;
+ }
case ISD::VSELECT:
Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
@@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const {
+ if (ConstantFPSDNode *CN =
+ dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+ bool IsExact;
+ APSInt IntVal(BitWidth);
+ APFloat APF = CN->getValueAPF();
+ if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+ APFloat::opOK ||
+ !IsExact)
+ return -1;
+
+ return IntVal.exactLogBase2();
+ }
+ return -1;
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2c3c0eb1..d2ea85ab 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -63,6 +64,7 @@
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision",
cl::init(0));
static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
cl::desc("Enable fast-math-flags for DAG nodes"));
// Limit the width of DAG chains. This is important in general to prevent
@@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
if (PartEVT == ValueVT)
return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
@@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
- bool Smaller = ValueVT.bitsLE(PartEVT);
- return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT, Val);
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
@@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
}
if (ValueVT.getVectorNumElements() == 1 &&
- ValueVT.getVectorElementType() != PartEVT) {
- bool Smaller = ValueVT.bitsLE(PartEVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT.getScalarType(), Val);
- }
+ ValueVT.getVectorElementType() != PartEVT)
+ Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
@@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
@@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
// Promoted vector extract
- bool Smaller = PartEVT.bitsLE(ValueVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else{
// Vector -> scalar conversion.
assert(ValueVT.getVectorNumElements() == 1 &&
@@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- bool Smaller = ValueVT.bitsLE(PartVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
Parts[0] = Val;
@@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
+ for (EVT ValueVT : ValueVTs) {
unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
@@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
- if (!isa<TerminatorInst>(&I) && !HasTailCall)
+ if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+ !isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
@@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
uint64_t Offset = DI->getOffset();
- // A dbg.value for an alloca is always indirect.
- bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
Val)) {
SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
- IsIndirect, Offset, dl, DbgSDNodeOrder);
+ false, Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
llvm_unreachable("Can't get register for value!");
}
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ CatchPadMBB->setIsEHFuncletEntry();
+
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ if (IsSEH) {
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+ TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+ return;
+ }
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of a funclet.
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ auto UnwindDest = I.getUnwindDest();
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability UnwindDestProb =
+ (BPI && UnwindDest)
+ ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+ }
+ FuncInfo.MBB->normalizeSuccProbs();
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+ report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
@@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
PtrValueVTs);
- SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs;
@@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- if (!BPI)
- return 0;
const BasicBlock *SrcBB = Src->getBasicBlock();
const BasicBlock *DstBB = Dst->getBasicBlock();
- return BPI->getEdgeWeight(SrcBB, DstBB);
+ if (!BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(
+ std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return BPI->getEdgeProbability(SrcBB, DstBB);
}
-void SelectionDAGBuilder::
-addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- uint32_t Weight /* = 0 */) {
- if (!Weight)
- Weight = getEdgeWeight(Src, Dst);
- Src->addSuccessor(Dst, Weight);
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI)
+ Src->addSuccessorWithoutProb(Dst);
+ else {
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+ }
}
-
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
@@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- uint32_t TWeight,
- uint32_t FWeight) {
+ BranchProbability TProb,
+ BranchProbability FProb) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
Condition = getICmpCondCode(IC->getPredicate());
- } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
Condition = getFCmpCondCode(FC->getPredicate());
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- } else {
- (void)Condition; // silence warning.
- llvm_unreachable("Unknown compare instruction");
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
- TBB, FBB, CurBB, TWeight, FWeight);
+ TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
return;
}
@@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, TBB, FBB, CurBB, TWeight, FWeight);
+ nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
-/// Scale down both weights to fit into uint32_t.
-static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
- uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
- uint32_t Scale = (NewMax / UINT32_MAX) + 1;
- NewTrue = NewTrue / Scale;
- NewFalse = NewFalse / Scale;
-}
-
/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- unsigned Opc, uint32_t TWeight,
- uint32_t FWeight) {
+ Instruction::BinaryOps Opc,
+ BranchProbability TProb,
+ BranchProbability FProb) {
// If this node is not part of the or/and tree, emit it as a branch.
const Instruction *BOp = dyn_cast<Instruction>(Cond);
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TWeight, FWeight);
+ TProb, FProb);
return;
}
// Create TmpBB after CurBB.
- MachineFunction::iterator BBI = CurBB;
+ MachineFunction::iterator BBI(CurBB);
MachineFunction &MF = DAG.getMachineFunction();
MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
CurBB->getParent()->insert(++BBI, TmpBB);
@@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
// = TrueProb for original BB.
- // Assuming the original weights are A and B, one choice is to set BB1's
- // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
- // assumes that
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
- uint64_t NewTrueWeight = TWeight;
- uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ NewTrueProb, NewFalseProb);
- NewTrueWeight = TWeight;
- NewFalseWeight = 2 * (uint64_t)FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ Probs[0], Probs[1]);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
// = FalseProb for original BB.
- // Assuming the original weights are A and B, one choice is to set BB1's
- // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
- // assumes that
- // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
-
- uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
- uint64_t NewFalseWeight = FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ NewTrueProb, NewFalseProb);
- NewTrueWeight = 2 * (uint64_t)TWeight;
- NewFalseWeight = FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ Probs[0], Probs[1]);
}
}
@@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
- BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
- BOp->getOpcode() == Instruction::Or)) {
+ Instruction::BinaryOps Opcode = BOp->getOpcode();
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+ !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ (Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
- getEdgeWeight(BrMBB, Succ1MBB));
+ Opcode,
+ getEdgeProbability(BrMBB, Succ0MBB),
+ getEdgeProbability(BrMBB, Succ1MBB));
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
// Update successor info
- addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
// TrueBB and FalseBB are always different unless the incoming IR is
// degenerate. This only happens when running llc on weird IR.
if (CB.TrueBB != CB.FalseBB)
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+ SwitchBB->normalizeSuccProbs();
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
@@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
GuardPtr, MachinePointerInfo(IRGuard, 0),
true, false, false, Align);
- SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
- StackSlotPtr,
- MachinePointerInfo::getFixedStack(FI),
- true, false, false, Align);
+ SDValue StackSlot = DAG.getLoad(
+ PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+ false, false, Align);
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
@@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- nullptr, 0, false, getCurSDLoc(), false, false).second;
+ None, false, getCurSDLoc(), false, false).second;
DAG.setRoot(Chain);
}
@@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithWeight(SwitchBB, B.Default);
- addSuccessorWithWeight(SwitchBB, MBB);
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+ SwitchBB->normalizeSuccProbs();
SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, RangeCmp,
@@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
- uint32_t BranchWeightToNext,
+ BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
@@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
}
- // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
- addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
- // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
- addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(),
@@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
- // Retrieve successors.
+ // Retrieve successors. Look through artificial IR level blocks like
+ // catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
- MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
const Value *Callee(I.getCalledValue());
const Function *Fn = dyn_cast<Function>(Callee);
@@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I, LandingPad);
+ visitPatchpoint(&I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
}
} else
- LowerCallTo(&I, getValue(Callee), false, LandingPad);
+ LowerCallTo(&I, getValue(Callee), false, EHPadBB);
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
@@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
CopyToExportRegsIfNeeded(&I);
}
- // Update successor info
- addSuccessorWithWeight(InvokeMBB, Return);
- addSuccessorWithWeight(InvokeMBB, LandingPad);
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, Return);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
@@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
}
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
- assert(FuncInfo.MBB->isLandingPad() &&
+ assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
MachineBasicBlock *MBB = FuncInfo.MBB;
@@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.getExceptionPointerRegister() == 0 &&
- TLI.getExceptionSelectorRegister() == 0)
+ const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
return;
SmallVector<EVT, 2> ValueVTs;
@@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
// If this case has the same successor and is a neighbour, merge it into
// the previous cluster.
Clusters[DstIndex - 1].High = CaseVal;
- Clusters[DstIndex - 1].Weight += CC.Weight;
- assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
+ Clusters[DstIndex - 1].Prob += CC.Prob;
} else {
std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
sizeof(Clusters[SrcIndex]));
@@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
continue;
MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
- addSuccessorWithWeight(IndirectBrMBB, Succ);
+ addSuccessorWithProb(IndirectBrMBB, Succ);
}
+ IndirectBrMBB->normalizeSuccProbs();
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
MVT::Other, getControlRoot(),
@@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
if (DAG.getTarget().Options.TrapUnreachable)
- DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+
+ // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+ // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+ // further optimization, but currently FMF is only applicable to binary nodes.
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// Min/max matching is only viable if all output VTs are the same.
if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
- ISD::NodeType Opc = ISD::DELETED_NODE;
- switch (SPF) {
- case SPF_UMAX: Opc = ISD::UMAX; break;
- case SPF_UMIN: Opc = ISD::UMIN; break;
- case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
- default: break;
- }
-
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
- while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
+
+ // We care about the legality of the operation after it has been type
+ // legalized.
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
+ VT != TLI.getTypeToTransformTo(Ctx, VT))
VT = TLI.getTypeToTransformTo(Ctx, VT);
- if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
- // If the underlying comparison instruction is used by any other instruction,
- // the consumed instructions won't be destroyed, so it is not profitable
- // to convert to a min/max.
+ // If the vselect is legal, assume we want to leave this as a vector setcc +
+ // vselect. Otherwise, if this is going to be scalarized, we want to see if
+ // min/max is legal on the scalar type.
+ bool UseScalarMinMax = VT.isVector() &&
+ !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY: {
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ Opc = ISD::FMINNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
+ Opc = ISD::FMINNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
+ ISD::FMINNUM : ISD::FMINNAN;
+ break;
+ }
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ Opc = ISD::FMAXNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
+ Opc = ISD::FMAXNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
+ ISD::FMAXNUM : ISD::FMAXNAN;
+ break;
+ }
+ break;
+ default: break;
+ }
+
+ if (Opc != ISD::DELETED_NODE &&
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+ // If the underlying comparison instruction is used by any other
+ // instruction, the consumed instructions won't be destroyed, so it is
+ // not profitable to convert to a min/max.
cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
OpCode = Opc;
LHSVal = getValue(LHS);
@@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// throughout the function's lifetime.
bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
- isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
+ isDereferenceablePointer(SV, DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
@@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(
- MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+ else if (AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Value *PtrOperand = I.getArgOperand(1);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(I.getArgOperand(0));
@@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
setValue(&I, StoreNode);
}
-// Gather/scatter receive a vector of pointers.
-// This vector of pointers may be represented as a base pointer + vector of
-// indices, it depends on GEP and instruction preceeding GEP
-// that calculates indices
-static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
+static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
- assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- if (!Gep || Gep->getNumOperands() > 2)
+ SelectionDAG& DAG = SDB->DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getNumOperands() > 2)
return false;
- ShuffleVectorInst *ShuffleInst =
- dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
- if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
- cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
- Instruction::InsertElement)
+
+ const Value *GEPPtr = GEP->getPointerOperand();
+ if (!GEPPtr->getType()->isVectorTy())
+ Ptr = GEPPtr;
+ else if (!(Ptr = getSplatValue(GEPPtr)))
return false;
- Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+ Value *IndexVal = GEP->getOperand(1);
- SelectionDAG& DAG = SDB->DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Check is the Ptr is inside current basic block
- // If not, look for the shuffle instruction
- if (SDB->findValue(Ptr))
- Base = SDB->getValue(Ptr);
- else if (SDB->findValue(ShuffleInst)) {
- SDValue ShuffleNode = SDB->getValue(ShuffleInst);
- SDLoc sdl = ShuffleNode;
- Base = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, sdl,
- ShuffleNode.getValueType().getScalarType(), ShuffleNode,
- DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDB->setValue(Ptr, Base);
- }
- else
+ // The operands of the GEP may be defined in another basic block.
+ // In this case we'll not find nodes for the operands.
+ if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
return false;
- Value *IndexVal = Gep->getOperand(1);
- if (SDB->findValue(IndexVal)) {
- Index = SDB->getValue(IndexVal);
+ Base = SDB->getValue(Ptr);
+ Index = SDB->getValue(IndexVal);
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ // Suppress sign extension.
+ if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ if (SDB->findValue(Sext->getOperand(0))) {
IndexVal = Sext->getOperand(0);
- if (SDB->findValue(IndexVal))
- Index = SDB->getValue(IndexVal);
+ Index = SDB->getValue(IndexVal);
}
- return true;
}
- return false;
+ if (!Index.getValueType().isVector()) {
+ unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+ EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+ SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+ Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+ }
+ return true;
}
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
- Value *Ptr = I.getArgOperand(1);
+ const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
@@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
- Value *BasePtr = Ptr;
+ const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
- Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+ const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
MachineMemOperand::MOStore, VT.getStoreSize(),
@@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
SDValue InChain = DAG.getRoot();
if (AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
InChain = DAG.getEntryNode();
}
@@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
- Value *Ptr = I.getArgOperand(0);
+ const Value *Ptr = I.getArgOperand(0);
SDValue Src0 = getValue(I.getArgOperand(3));
SDValue Mask = getValue(I.getArgOperand(2));
@@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
- Value *BasePtr = Ptr;
+ const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
- AA->pointsToConstantMemory(
- MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ AA->pointsToConstantMemory(MemoryLocation(
+ BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
@@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
//
// #define LOG2OFe 1.4426950f
// t0 = Op * LOG2OFe
+
+ // TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
@@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
}
}
+ // TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
- // If optimizing for size, don't insert too many multiplies. This
- // inserts up to 5 multiplies.
+ if (!F->optForSize() ||
+ // If optimizing for size, don't insert too many multiplies.
+ // This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
@@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
@@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
-// getTruncatedArgReg - Find underlying register used for an truncated
-// argument.
-static unsigned getTruncatedArgReg(const SDValue &N) {
- if (N.getOpcode() != ISD::TRUNCATE)
+// getUnderlyingArgReg - Find underlying register used for a truncated or
+// bitcasted argument.
+static unsigned getUnderlyingArgReg(const SDValue &N) {
+ switch (N.getOpcode()) {
+ case ISD::CopyFromReg:
+ return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::BITCAST:
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ case ISD::TRUNCATE:
+ return getUnderlyingArgReg(N.getOperand(0));
+ default:
return 0;
-
- const SDValue &Ext = N.getOperand(0);
- if (Ext.getOpcode() == ISD::AssertZext ||
- Ext.getOpcode() == ISD::AssertSext) {
- const SDValue &CFR = Ext.getOperand(0);
- if (CFR.getOpcode() == ISD::CopyFromReg)
- return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
- if (CFR.getOpcode() == ISD::TRUNCATE)
- return getTruncatedArgReg(CFR);
}
- return 0;
}
/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
@@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
- unsigned Reg;
- if (N.getOpcode() == ISD::CopyFromReg)
- Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- else
- Reg = getTruncatedArgReg(N);
+ unsigned Reg = getUnderlyingArgReg(N);
if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
@@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
- bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
- isa<Argument>(Address);
-
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-
- if (isParameter && !AI) {
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
- if (FINode)
- // Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(
- Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
- else {
- // Address is an argument, so try to emit its dbg value using
- // virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
- N);
- return nullptr;
- }
- } else if (AI)
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+ auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (isParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
+ FINode->getIndex(), 0, dl, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ N);
+ return nullptr;
+ } else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, 0, dl, SDNodeOrder);
- else {
- // Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
- DEBUG(Address->dump());
- return nullptr;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
@@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- // A dbg.value for an alloca is always indirect.
- bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
- IsIndirect, N)) {
+ false, N)) {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- IsIndirect, Offset, dl, SDNodeOrder);
+ false, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return nullptr;
}
+ case Intrinsic::eh_sjlj_setup_dispatch: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return nullptr;
+ }
case Intrinsic::masked_gather:
visitMaskedGather(I);
@@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
} else {
+ // TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res.getValue(1));
return nullptr;
}
+ case Intrinsic::bitreverse:
+ setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return nullptr;
}
+ case Intrinsic::get_dynamic_area_offset: {
+ SDValue Op = getRoot();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+ // target.
+ if (PtrTy != ResTy)
+ report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+ " intrinsic!");
+ Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+ Op);
+ DAG.setRoot(Op);
+ setValue(&I, Res);
+ return nullptr;
+ }
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
@@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(Chain, sdl, Src, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI),
true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
@@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::clear_cache:
return TLI.getClearCacheBuiltinName();
- case Intrinsic::eh_actions:
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
case Intrinsic::donothing:
// ignore
return nullptr;
@@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
visitStatepoint(I);
return nullptr;
}
- case Intrinsic::experimental_gc_result_int:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_ptr:
case Intrinsic::experimental_gc_result: {
visitGCResult(I);
return nullptr;
@@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
-
+ case Intrinsic::instrprof_value_profile:
+ llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
- case Intrinsic::eh_begincatch:
- case Intrinsic::eh_endcatch:
- llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+
+ case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
- unsigned Reg = TLI.getExceptionPointerRegister();
- assert(Reg && "cannot get exception code on this platform");
+ // Get the exception pointer vreg, copy from it, and resize it to fit.
+ const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
- assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
- unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
+ unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
SDValue N =
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
- N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ if (Intrinsic == Intrinsic::eh_exceptioncode)
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
setValue(&I, N);
return nullptr;
}
@@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = nullptr;
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MMI.getContext().createTempSymbol();
@@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
- LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
@@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
DAG.setRoot(Result.second);
}
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ if (MMI.hasEHFunclets()) {
+ assert(CLI.CS);
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+ BeginLabel, EndLabel);
+ } else {
+ MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
}
return Result;
@@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
@@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CS)
.setTailCall(isTailCall);
- std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
@@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
@@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
- Chain = DAG.getStore(Chain, getCurSDLoc(),
- OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
- unsigned NumArgs, SDValue Callee,
- Type *ReturnTy,
- MachineBasicBlock *LandingPad,
- bool IsPatchPoint) {
+std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
+ ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
.setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
- return lowerInvokable(CLI, LandingPad);
+ return lowerInvokable(CLI, EHPadBB);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
@@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
- std::pair<SDValue, SDValue> Result =
- lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
- LandingPad, true);
+ std::pair<SDValue, SDValue> Result = lowerCallOperands(
+ CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
- else if (j != 0)
+ else if (j != 0) {
MyFlags.Flags.setOrigAlign(1);
+ if (j == NumParts - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
@@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
PtrVT));
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
- MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
- false, false, 1);
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ false, false, false, 1);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
@@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
if (FastISel)
return A->use_empty();
- const BasicBlock *Entry = A->getParent()->begin();
+ const BasicBlock &Entry = A->getParent()->front();
for (const User *U : A->users())
- if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
return true;
@@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ if (F.getCallingConv() == CallingConv::X86_INTR) {
+ // IA Interrupt passes frame (1st parameter) by value in the stack.
+ if (Idx == 1)
+ Flags.setByVal();
+ }
if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
@@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
- else if (i > 0)
+ else if (i > 0) {
MyFlags.Flags.setOrigAlign(1);
+ if (i == NumRegs - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
Ins.push_back(MyFlags);
}
if (NeedsRegBlock && Value == NumValues - 1)
@@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If this argument is unused then remember its value. It is used to generate
// debugging information.
if (I->use_empty() && NumValues) {
- SDB->setUnusedArgValue(I, InVals[i]);
+ SDB->setUnusedArgValue(&*I, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(I, Res);
+ SDB->setValue(&*I, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
// If this argument is live outside of the entry block, insert a copy from
@@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[I] = Reg;
+ FuncInfo->ValueMap[&*I] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(I);
- SDB->CopyToExportRegsIfNeeded(I);
+ if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&*I);
+ SDB->CopyToExportRegsIfNeeded(&*I);
}
}
@@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB,
// If SuccBB has not been created yet, create it.
if (!SuccMBB) {
MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI = ParentMBB;
+ MachineFunction::iterator BBI(ParentMBB);
SuccMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(++BBI, SuccMBB);
}
// Add it as a successor of ParentMBB.
ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
return SuccMBB;
}
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
- MachineFunction::iterator I = MBB;
+ MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
return nullptr;
- return I;
+ return &*I;
}
/// During lowering new call nodes can be created (such as memset, etc.).
@@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
CaseCluster &JTCluster) {
assert(First <= Last);
- uint32_t Weight = 0;
+ auto Prob = BranchProbability::getZero();
unsigned NumCmps = 0;
std::vector<MachineBasicBlock*> Table;
- DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
- Weight += Clusters[I].Weight;
- assert(Weight >= Clusters[I].Weight && "Weight overflow!");
+ Prob += Clusters[I].Prob;
APInt Low = Clusters[I].Low->getValue();
APInt High = Clusters[I].High->getValue();
NumCmps += (Low == High) ? 1 : 2;
@@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
for (uint64_t J = 0; J < ClusterSize; ++J)
Table.push_back(Clusters[I].MBB);
- JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
- unsigned NumDests = JTWeights.size();
+ unsigned NumDests = JTProbs.size();
if (isSuitableForBitTests(NumDests, NumCmps,
Clusters[First].Low->getValue(),
Clusters[Last].High->getValue())) {
@@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
for (MachineBasicBlock *Succ : Table) {
if (Done.count(Succ))
continue;
- addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
Done.insert(Succ);
}
+ JumpTableMBB->normalizeSuccProbs();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
@@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
JTCases.emplace_back(std::move(JTH), std::move(JT));
JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
- JTCases.size() - 1, Weight);
+ JTCases.size() - 1, Prob);
return true;
}
@@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
.getSizeInBits();
assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
- if (Low.isNonNegative() && High.slt(BitWidth)) {
- // Optimize the case where all the case values fit in a
- // word without having to subtract minValue. In this case,
- // we can optimize away the subtraction.
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
LowBound = APInt::getNullValue(Low.getBitWidth());
CmpRange = High;
+ ContiguousRange = false;
} else {
LowBound = Low;
CmpRange = High - Low;
}
CaseBitsVector CBV;
- uint32_t TotalWeight = 0;
+ auto TotalProb = BranchProbability::getZero();
for (unsigned i = First; i <= Last; ++i) {
// Find the CaseBits for this destination.
unsigned j;
@@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
if (CBV[j].BB == Clusters[i].MBB)
break;
if (j == CBV.size())
- CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
CaseBits *CB = &CBV[j];
- // Update Mask, Bits and ExtraWeight.
+ // Update Mask, Bits and ExtraProb.
uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
CB->Bits += Hi - Lo + 1;
- CB->ExtraWeight += Clusters[i].Weight;
- TotalWeight += Clusters[i].Weight;
- assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
}
BitTestInfo BTI;
std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
- // Sort by weight first, number of bits second.
- if (a.ExtraWeight != b.ExtraWeight)
- return a.ExtraWeight > b.ExtraWeight;
+ // Sort by probability first, number of bits second.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
return a.Bits > b.Bits;
});
for (auto &CB : CBV) {
MachineBasicBlock *BitTestBB =
FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
- BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
}
BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false, nullptr,
- nullptr, std::move(BTI));
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
- BitTestCases.size() - 1, TotalWeight);
+ BitTestCases.size() - 1, TotalProb);
return true;
}
@@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *DefaultMBB) {
MachineFunction *CurMF = FuncInfo.MF;
MachineBasicBlock *NextMBB = nullptr;
- MachineFunction::iterator BBI = W.MBB;
+ MachineFunction::iterator BBI(W.MBB);
if (++BBI != FuncInfo.MF->end())
- NextMBB = BBI;
+ NextMBB = &*BBI;
unsigned Size = W.LastCluster - W.FirstCluster + 1;
@@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
ISD::SETEQ);
// Update successor info.
- // Both Small and Big will jump to Small.BB, so we sum up the weights.
- addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
- addSuccessorWithWeight(
- SwitchMBB, DefaultMBB,
- // The default destination is the first successor in IR.
- BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
- : 0);
+ // Both Small and Big will jump to Small.BB, so we sum up the
+ // probabilities.
+ addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+ if (BPI)
+ addSuccessorWithProb(
+ SwitchMBB, DefaultMBB,
+ // The default destination is the first successor in IR.
+ BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+ else
+ addSuccessorWithProb(SwitchMBB, DefaultMBB);
// Insert the true branch.
SDValue BrCond =
@@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
if (TM.getOptLevel() != CodeGenOpt::None) {
- // Order cases by weight so the most likely case will be checked first.
+ // Order cases by probability so the most likely case will be checked first.
std::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
- return a.Weight > b.Weight;
+ return a.Prob > b.Prob;
});
// Rearrange the case blocks so that the last one falls through if possible
- // without without changing the order of weights.
+ // without without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
- if (I->Weight > W.LastCluster->Weight)
+ if (I->Prob > W.LastCluster->Prob)
break;
if (I->Kind == CC_Range && I->MBB == NextMBB) {
std::swap(*I, *W.LastCluster);
@@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- // Compute total weight.
- uint32_t UnhandledWeights = 0;
- for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
- UnhandledWeights += I->Weight;
- assert(UnhandledWeights >= I->Weight && "Weight overflow!");
- }
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
@@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
+ UnhandledProbs -= I->Prob;
switch (I->Kind) {
case CC_JumpTable: {
@@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
- addSuccessorWithWeight(CurMBB, Fallthrough);
- addSuccessorWithWeight(CurMBB, JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ break;
+ }
+ }
+
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
// The jump table header will be inserted in our current block, do the
// range check, and fall through to our fallthrough block.
@@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
- // If we're in the right place, emit the bit test header header right now.
- if (CurMBB ==SwitchMBB) {
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
@@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
RHS = I->High;
}
- // The false weight is the sum of all unhandled cases.
- UnhandledWeights -= I->Weight;
- CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
- UnhandledWeights);
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
+ UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
CaseClusterIt First,
CaseClusterIt Last) {
return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
- if (X.Weight != CC.Weight)
- return X.Weight > CC.Weight;
+ if (X.Prob != CC.Prob)
+ return X.Prob > CC.Prob;
// Ties are broken by comparing the case value.
return X.Low->getValue().slt(CC.Low->getValue());
@@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
- // Balance the tree based on branch weights to create a near-optimal (in terms
- // of search time given key frequency) binary search tree. See e.g. Kurt
+ // Balance the tree based on branch probabilities to create a near-optimal (in
+ // terms of search time given key frequency) binary search tree. See e.g. Kurt
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
- uint32_t LeftWeight = LastLeft->Weight;
- uint32_t RightWeight = FirstRight->Weight;
+ auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+ auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
- // find a partitioning of the clusters which balances the weight on both
- // sides. If LeftWeight and RightWeight are equal, alternate which side is
- // taken to ensure 0-weight nodes are distributed evenly.
+ // find a partitioning of the clusters which balances the probability on both
+ // sides. If LeftProb and RightProb are equal, alternate which side is
+ // taken to ensure 0-probability nodes are distributed evenly.
unsigned I = 0;
while (LastLeft + 1 < FirstRight) {
- if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
- LeftWeight += (++LastLeft)->Weight;
+ if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+ LeftProb += (++LastLeft)->Prob;
else
- RightWeight += (--FirstRight)->Weight;
+ RightProb += (--FirstRight)->Prob;
I++;
}
@@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
const ConstantInt *Pivot = PivotCluster->Low;
// New blocks will be inserted immediately after the current one.
- MachineFunction::iterator BBI = W.MBB;
+ MachineFunction::iterator BBI(W.MBB);
++BBI;
// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
@@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
- WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
@@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
- WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
- LeftWeight, RightWeight);
+ LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
for (auto I : SI.cases()) {
MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
const ConstantInt *CaseVal = I.getCaseValue();
- uint32_t Weight =
- BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
- Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
}
MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 7006754..49a3872 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -17,6 +17,7 @@
#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -30,7 +31,6 @@
namespace llvm {
class AddrSpaceCastInst;
-class AliasAnalysis;
class AllocaInst;
class BasicBlock;
class BitCastInst;
@@ -154,39 +154,39 @@ private:
unsigned JTCasesIndex;
unsigned BTCasesIndex;
};
- uint32_t Weight;
+ BranchProbability Prob;
static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
- MachineBasicBlock *MBB, uint32_t Weight) {
+ MachineBasicBlock *MBB, BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_Range;
C.Low = Low;
C.High = High;
C.MBB = MBB;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
static CaseCluster jumpTable(const ConstantInt *Low,
const ConstantInt *High, unsigned JTCasesIndex,
- uint32_t Weight) {
+ BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_JumpTable;
C.Low = Low;
C.High = High;
C.JTCasesIndex = JTCasesIndex;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
- unsigned BTCasesIndex, uint32_t Weight) {
+ unsigned BTCasesIndex, BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_BitTests;
C.Low = Low;
C.High = High;
C.BTCasesIndex = BTCasesIndex;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
};
@@ -198,13 +198,13 @@ private:
uint64_t Mask;
MachineBasicBlock* BB;
unsigned Bits;
- uint32_t ExtraWeight;
+ BranchProbability ExtraProb;
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
- uint32_t Weight):
- Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
+ BranchProbability Prob):
+ Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
- CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
+ CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
};
typedef std::vector<CaseBits> CaseBitsVector;
@@ -217,13 +217,13 @@ private:
/// blocks needed by multi-case switch statements.
struct CaseBlock {
CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle,
- MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
- MachineBasicBlock *me,
- uint32_t trueweight = 0, uint32_t falseweight = 0)
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
- TrueWeight(trueweight), FalseWeight(falseweight) { }
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
+ FalseProb(falseprob) {}
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
@@ -239,8 +239,8 @@ private:
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
- // TrueWeight/FalseWeight - branch weights.
- uint32_t TrueWeight, FalseWeight;
+ // TrueProb/FalseProb - branch weights.
+ BranchProbability TrueProb, FalseProb;
};
struct JumpTable {
@@ -272,32 +272,35 @@ private:
struct BitTestCase {
BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- uint32_t Weight):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
+ BranchProbability Prob):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
uint64_t Mask;
MachineBasicBlock *ThisBB;
MachineBasicBlock *TargetBB;
- uint32_t ExtraWeight;
+ BranchProbability ExtraProb;
};
typedef SmallVector<BitTestCase, 3> BitTestInfo;
struct BitTestBlock {
- BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, MVT RgVT, bool E,
- MachineBasicBlock* P, MachineBasicBlock* D,
- BitTestInfo C):
- First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
- Parent(P), Default(D), Cases(std::move(C)) { }
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+ bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+ ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)),
+ Prob(Pr) {}
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
MVT RegVT;
bool Emitted;
+ bool ContiguousRange;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
BitTestInfo Cases;
+ BranchProbability Prob;
+ BranchProbability DefaultProb;
};
/// Minimum jump table density, in percent.
@@ -339,6 +342,7 @@ private:
CaseClusterIt LastCluster;
const ConstantInt *GE;
const ConstantInt *LT;
+ BranchProbability DefaultProb;
};
typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
@@ -515,6 +519,7 @@ private:
void resetPerFunctionState() {
FailureMBB = nullptr;
Guard = nullptr;
+ GuardReg = 0;
}
MachineBasicBlock *getParentMBB() { return ParentMBB; }
@@ -592,10 +597,6 @@ public:
///
FunctionLoweringInfo &FuncInfo;
- /// OptLevel - What optimization level we're generating code for.
- ///
- CodeGenOpt::Level OptLevel;
-
/// GFI - Garbage collection metadata for the function.
GCFunctionInfo *GFI;
@@ -613,7 +614,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ DAG(dag), FuncInfo(funcinfo),
HasTailCall(false) {
}
@@ -692,19 +693,20 @@ public:
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB, unsigned Opc,
- uint32_t TW, uint32_t FW);
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TW,
+ BranchProbability FW);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- uint32_t TW, uint32_t FW);
+ BranchProbability TW, BranchProbability FW);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
std::pair<SDValue, SDValue> lowerCallOperands(
ImmutableCallSite CS,
@@ -712,7 +714,7 @@ public:
unsigned NumArgs,
SDValue Callee,
Type *ReturnTy,
- MachineBasicBlock *LandingPad = nullptr,
+ const BasicBlock *EHPadBB = nullptr,
bool IsPatchPoint = false);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
@@ -722,11 +724,11 @@ public:
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
void LowerStatepoint(ImmutableStatepoint Statepoint,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
private:
- std::pair<SDValue, SDValue> lowerInvokable(
- TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad);
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -734,11 +736,18 @@ private:
void visitSwitch(const SwitchInst &I);
void visitIndirectBr(const IndirectBrInst &I);
void visitUnreachable(const UnreachableInst &I);
+ void visitCleanupRet(const CleanupReturnInst &I);
+ void visitCatchSwitch(const CatchSwitchInst &I);
+ void visitCatchRet(const CatchReturnInst &I);
+ void visitCatchPad(const CatchPadInst &I);
+ void visitCleanupPad(const CleanupPadInst &CPI);
+
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
- uint32_t getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const;
- void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- uint32_t Weight = 0);
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
@@ -748,7 +757,7 @@ public:
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
void visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
- uint32_t BranchWeightToNext,
+ BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
@@ -842,7 +851,7 @@ private:
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
void visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
// These three are implemented in StatepointLowering.cpp
void visitStatepoint(const CallInst &I);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5b9b182..a1c6c4c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -30,6 +31,11 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+ cl::desc("Display more information when dumping selection "
+ "DAG nodes."));
+
std::string SDNode::getOperationName(const SelectionDAG *G) const {
switch (getOpcode()) {
default:
@@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
case ISD::ConstantPool: return "ConstantPool";
case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
@@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::FMINNAN: return "fminnan";
+ case ISD::FMAXNAN: return "fmaxnan";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
@@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
+ case ISD::SETCCE: return "setcce";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CALLSEQ_START: return "callseq_start";
case ISD::CALLSEQ_END: return "callseq_end";
+ // EH instructions
+ case ISD::CATCHRET: return "catchret";
+ case ISD::CLEANUPRET: return "cleanupret";
+
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
@@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
case ISD::CTTZ: return "cttz";
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
-
+
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
@@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETO: return "seto";
case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
+ case ISD::SETUEQ: return "setueq";
case ISD::SETUGT: return "setugt";
case ISD::SETUGE: return "setuge";
case ISD::SETULT: return "setult";
@@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
}
}
+static Printable PrintNodeId(const SDNode &Node) {
+ return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+ OS << 't' << Node.PersistentId;
+#else
+ OS << (const void*)&Node;
+#endif
+ });
+}
+
void SDNode::dump() const { dump(nullptr); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
@@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const {
}
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (const void*)this << ": ";
-
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
if (i) OS << ",";
if (getValueType(i) == MVT::Other)
@@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
else
OS << getValueType(i).getEVTString();
}
- OS << " = " << getOperationName(G);
}
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
@@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
<< ']';
}
- if (unsigned Order = getIROrder())
- OS << " [ORD=" << Order << ']';
+ if (VerboseDAGDumping) {
+ if (unsigned Order = getIROrder())
+ OS << " [ORD=" << Order << ']';
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
- if (!G)
- return;
+ if (!G)
+ return;
- DILocation *L = getDebugLoc();
- if (!L)
- return;
+ DILocation *L = getDebugLoc();
+ if (!L)
+ return;
+
+ if (auto *Scope = L->getScope())
+ OS << Scope->getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << L->getLine();
+ if (unsigned C = L->getColumn())
+ OS << ':' << C;
+ }
+}
- if (auto *Scope = L->getScope())
- OS << Scope->getFilename();
- else
- OS << "<unknown>";
- OS << ':' << L->getLine();
- if (unsigned C = L->getColumn())
- OS << ':' << C;
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node) {
+ if (Node.getOpcode() == ISD::EntryToken)
+ return false;
+ return Node.getNumOperands() == 0;
}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (const SDValue &Op : N->op_values())
+ for (const SDValue &Op : N->op_values()) {
+ if (shouldPrintInline(*Op.getNode()))
+ continue;
if (Op.getNode()->hasOneUse())
DumpNodes(Op.getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)Op.getNode() << ": <multiple use>";
+ }
- dbgs() << '\n';
dbgs().indent(indent);
N->dump(G);
}
void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
+ const SDNode *N = &*I;
+ if (!N->hasOneUse() && N != getRoot().getNode() &&
+ (!shouldPrintInline(*N) || N->use_empty()))
DumpNodes(N, 2, this);
}
@@ -573,10 +606,30 @@ void SelectionDAG::dump() const {
}
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << PrintNodeId(*this) << ": ";
print_types(OS, G);
+ OS << " = " << getOperationName(G);
print_details(OS, G);
}
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+ const SDValue Value) {
+ if (!Value.getNode()) {
+ OS << "<null>";
+ return false;
+ } else if (shouldPrintInline(*Value.getNode())) {
+ OS << Value->getOperationName(G) << ':';
+ Value->print_types(OS, G);
+ Value->print_details(OS, G);
+ return true;
+ } else {
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
+ }
+}
+
typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
// Having printed this SDNode, walk the children:
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
if (i) OS << ",";
OS << " ";
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (const void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
+ const SDValue Op = N->getOperand(i);
+ bool printedInline = printOperand(OS, G, Op);
+ if (printedInline)
+ once.insert(Op.getNode());
}
OS << "\n";
@@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const {
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
+ printr(OS, G);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
+ printOperand(OS, G, getOperand(i));
}
- print_details(OS, G);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 97ece8b..853a21a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
@@ -263,13 +264,17 @@ namespace llvm {
return;
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
- SavedFastISel = IS.TM.Options.EnableFastISel;
- if (NewOptLevel == CodeGenOpt::None)
- IS.TM.setFastISel(true);
DEBUG(dbgs() << "\nChanging optimization level for Function "
<< IS.MF->getFunction()->getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
<< " ; After: -O" << NewOptLevel << "\n");
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None) {
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+ DEBUG(dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
+ }
}
~OptLevelChanger() {
@@ -293,6 +298,11 @@ namespace llvm {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
+ // Try first to see if the Target has its own way of selecting a scheduler
+ if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+ return SchedulerCtor(IS, OptLevel);
+ }
+
if (OptLevel == CodeGenOpt::None ||
(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
@@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
OptLevel(OL),
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
- initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
- initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
initializeTargetLibraryInfoWrapperPassPass(
*PassRegistry::getPassRegistry());
}
@@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
- AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
+static void SplitCriticalSideEffectEdges(Function &Fn) {
// Loop for blocks with phi nodes.
- for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ for (BasicBlock &BB : Fn) {
+ PHINode *PN = dyn_cast<PHINode>(BB.begin());
if (!PN) continue;
ReprocessBlock:
@@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// are potentially trapping constant expressions. Constant expressions are
// the only potentially trapping value that can occur as the argument to a
// PHI.
- for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
if (!CE || !CE->canTrap()) continue;
@@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// Okay, we have to split this edge.
SplitCriticalEdge(
- Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
- CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
+ Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
+ CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
- FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
@@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF->setHasInlineAsm(false);
+ FuncInfo->SplitCSR = false;
+ SmallVector<MachineBasicBlock*, 4> Returns;
+
+ // We split CSR if the target supports it for the given function
+ // and the function has only return exits.
+ if (TLI->supportSplitCSR(MF)) {
+ FuncInfo->SplitCSR = true;
+
+ // Collect all the return blocks.
+ for (const BasicBlock &BB : Fn) {
+ if (!succ_empty(&BB))
+ continue;
+
+ const TerminatorInst *Term = BB.getTerminator();
+ if (isa<UnreachableInst>(Term))
+ continue;
+ if (isa<ReturnInst>(Term)) {
+ Returns.push_back(FuncInfo->MBBMap[&BB]);
+ continue;
+ }
+
+ // Bail out if the exit block is not Return nor Unreachable.
+ FuncInfo->SplitCSR = false;
+ break;
+ }
+ }
+
+ MachineBasicBlock *EntryMBB = &MF->front();
+ if (FuncInfo->SplitCSR)
+ // This performs initialization so lowering for SplitCSR will be correct.
+ TLI->initializeSplitCSR(EntryMBB);
+
SelectAllBasicBlocks(Fn);
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
- MachineBasicBlock *EntryMBB = MF->begin();
const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
+ // Insert copies in the entry block and the return blocks.
+ if (FuncInfo->SplitCSR)
+ TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
@@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() {
// graph) and preceding back toward the beginning (the entry
// node).
while (ISelPosition != CurDAG->allnodes_begin()) {
- SDNode *Node = --ISelPosition;
+ SDNode *Node = &*--ISelPosition;
// Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
// but there are currently some corner cases that it misses. Also, this
// makes it theoretically possible to disable the DAGCombiner.
@@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() {
PostprocessISelDAG();
}
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+ for (const User *U : CPI->users()) {
+ if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+ if (IID == Intrinsic::eh_exceptionpointer ||
+ IID == Intrinsic::eh_exceptioncode)
+ return true;
+ }
+ }
+ return false;
+}
+
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
bool SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
-
+ const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
const TargetRegisterClass *PtrRC =
TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+ // Catchpads have one live-in register, which typically holds the exception
+ // pointer or code.
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
+ return true;
+ }
+
+ if (!LLVMBB->isLandingPad())
+ return true;
+
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
- // If this is an MSVC-style personality function, we need to split the landing
- // pad into several BBs.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
- MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent()
- ->getParent()
- ->getPersonalityFn()
- ->stripPointerCasts()));
- EHPersonality Personality = MF->getMMI().getPersonalityType();
-
- if (isMSVCEHPersonality(Personality)) {
- SmallVector<MachineBasicBlock *, 4> ClauseBBs;
- const IntrinsicInst *ActionsCall =
- dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
- // Get all invoke BBs that unwind to this landingpad.
- SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
- MBB->pred_end());
- if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) {
- // If this is a call to llvm.eh.actions followed by indirectbr, then we've
- // run WinEHPrepare, and we should remove this block from the machine CFG.
- // Mark the targets of the indirectbr as landingpads instead.
- for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
- MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
- // Add the edge from the invoke to the clause.
- for (MachineBasicBlock *InvokeBB : InvokeBBs)
- InvokeBB->addSuccessor(ClauseBB);
-
- // Mark the clause as a landing pad or MI passes will delete it.
- ClauseBB->setIsLandingPad();
- }
- }
-
- // Remove the edge from the invoke to the lpad.
- for (MachineBasicBlock *InvokeBB : InvokeBBs)
- InvokeBB->removeSuccessor(MBB);
-
- // Don't select instructions for the landingpad.
- return false;
- }
-
// Mark exception register as live in.
- if (unsigned Reg = TLI->getExceptionPointerRegister())
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
// Mark exception selector register as live in.
- if (unsigned Reg = TLI->getExceptionSelectorRegister())
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
return true;
@@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
static bool isFoldedOrDeadInstruction(const Instruction *I,
FunctionLoweringInfo *FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
- !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
- !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
@@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->VisitedBBs.insert(LLVMBB);
}
- BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+ BasicBlock::const_iterator const Begin =
+ LLVMBB->getFirstNonPHI()->getIterator();
BasicBlock::const_iterator const End = LLVMBB->end();
BasicBlock::const_iterator BI = End;
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ if (!FuncInfo->MBB)
+ continue; // Some blocks like catchpads have no code or MBB.
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
FuncInfo->ExceptionSelectorVirtReg = 0;
- if (LLVMBB->isLandingPad())
+ if (LLVMBB->isEHPad())
if (!PrepareEHLandingPad())
continue;
@@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
- const Instruction *Inst = std::prev(BI);
+ const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
@@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// then see if there is a load right before the selected instructions.
// Try to fold the load if so.
const Instruction *BeforeInst = Inst;
- while (BeforeInst != Begin) {
- BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst));
+ while (BeforeInst != &*Begin) {
+ BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
break;
}
@@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// For the purpose of debugging, just abort.
report_fatal_error("FastISel didn't select the entire block");
- if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+ !Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
R = FuncInfo->CreateRegs(Inst->getType());
@@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
bool HadTailCall = false;
MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
- SelectBasicBlock(Inst, BI, HadTailCall);
+ SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
// If the call was emitted as a tail call, we're done with the block.
// We also need to delete any previously emitted instructions.
@@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() {
CodeGenAndEmitDAG();
}
- uint32_t UnhandledWeight = 0;
- for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
- UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
-
+ BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob;
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
- UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
+ UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb;
// Set the current basic block to the mbb we wish to insert the code into
FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- SDB->BitTestCases[i].Cases[j+1].ThisBB,
- UnhandledWeight,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
- FuncInfo->MBB);
+
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range.
+ MachineBasicBlock *NextMBB;
+ if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB;
+ else if (j + 1 != ej)
+ NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB;
else
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- SDB->BitTestCases[i].Default,
- UnhandledWeight,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
- FuncInfo->MBB);
+ NextMBB = SDB->BitTestCases[i].Default;
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ NextMBB,
+ UnhandledProb,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
+
+ if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ break;
}
// Update PHI Nodes
@@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() {
/// one preferred by the target.
///
ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
- RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
-
- if (!Ctor) {
- Ctor = ISHeuristic;
- RegisterScheduler::setDefault(Ctor);
- }
-
- return Ctor(this, OptLevel);
+ return ISHeuristic(this, OptLevel);
}
//===----------------------------------------------------------------------===//
@@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
@@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
/// CheckSame - Implements OP_CheckSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
@@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckChildSame - Implements OP_CheckChildXSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
@@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL,
unsigned ChildNo) {
@@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && C->getSExtValue() == Val;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
@@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 4df5ede..2764688 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -80,9 +80,16 @@ namespace llvm {
return true;
}
- static bool hasNodeAddressLabel(const SDNode *Node,
- const SelectionDAG *Graph) {
- return true;
+ static std::string getNodeIdentifierLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+#ifndef NDEBUG
+ OS << 't' << Node->PersistentId;
+#else
+ OS << static_cast<const void *>(Node);
+#endif
+ return R;
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 34688df..050ec21 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ MFI->markAsStatepointSpillSlotObjectIndex(FI);
+
Builder.FuncInfo.StatepointStackSlots.push_back(FI);
AllocatedStackSlots.push_back(true);
return SpillSlot;
@@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
return Builder.DAG.getFrameIndex(FI, ValueType);
}
// Note: We deliberately choose to advance this only on the failing path.
- // Doing so on the suceeding path involes a bit of complexity that caused a
- // minor bug previously. Unless performance shows this matters, please
+ // Doing so on the succeeding path involves a bit of complexity that caused
+ // a minor bug previously. Unless performance shows this matters, please
// keep this code as simple as possible.
NextSlotToAllocate++;
}
@@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
static Optional<int> findPreviousSpillSlot(const Value *Val,
SelectionDAGBuilder &Builder,
int LookUpDepth) {
- // Can not look any futher - give up now
+ // Can not look any further - give up now
if (LookUpDepth <= 0)
return Optional<int>();
@@ -196,7 +200,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
-/// This helps to avoid series of loads and stores that only serve to resuffle
+/// This helps to avoid series of loads and stores that only serve to reshuffle
/// values on the stack between calls.
static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
@@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
SmallVectorImpl<const Value *> &Relocs,
SelectionDAGBuilder &Builder) {
- // This is horribly ineffecient, but I don't care right now
+ // This is horribly inefficient, but I don't care right now
SmallSet<SDValue, 64> Seen;
SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
@@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
static SDNode *
-lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
+lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
SelectionDAGBuilder &Builder,
SmallVectorImpl<SDValue> &PendingExports) {
ImmutableCallSite CS(ISP.getCallSite());
- SDValue ActualCallee = Builder.getValue(ISP.getCalledValue());
+ SDValue ActualCallee;
+
+ if (ISP.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `null` instead. Not lowering the call target lets statepoint clients get
+ // away without providing a physical address for the symbolic call target at
+ // link time.
+
+ const auto &TLI = Builder.DAG.getTargetLoweringInfo();
+ const auto &DL = Builder.DAG.getDataLayout();
+
+ unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+ ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(),
+ TLI.getPointerTy(DL, AS));
+ } else
+ ActualCallee = Builder.getValue(ISP.getCalledValue());
assert(CS.getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
@@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
- ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad,
+ ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB,
false /* IsPatchPoint */);
SDNode *CallEnd = CallEndVal.getNode();
@@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
// ch, glue = callseq_end ch, glue
// get_return_value ch, glue
//
- // get_return_value can either be a CopyFromReg to grab the return value from
- // %RAX, or it can be a LOAD to load a value returned by reference via a stack
- // slot.
+ // get_return_value can either be a sequence of CopyFromReg instructions
+ // to grab the return value from the return register(s), or it can be a LOAD
+ // to load a value returned by reference via a stack slot.
- if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg ||
- CallEnd->getOpcode() == ISD::LOAD))
- CallEnd = CallEnd->getOperand(0).getNode();
+ if (HasDef) {
+ if (CallEnd->getOpcode() == ISD::LOAD)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ else
+ while (CallEnd->getOpcode() == ISD::CopyFromReg)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ }
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
- if (HasDef) {
- if (CS.isInvoke()) {
- // Result value will be used in different basic block for invokes
- // so we need to export it now. But statepoint call has a different type
- // than the actuall call. It means that standart exporting mechanism will
- // create register of the wrong type. So instead we need to create
- // register with correct type and save value into it manually.
+ // Export the result value if needed
+ const Instruction *GCResult = ISP.getGCResult();
+ if (HasDef && GCResult) {
+ if (GCResult->getParent() != CS.getParent()) {
+ // Result value will be used in a different basic block so we need to
+ // export it now.
+ // Default exporting mechanism will not work here because statepoint call
+ // has a different type than the actual call. It means that by default
+ // llvm will create export register of the wrong type (always i32 in our
+ // case). So instead we need to create export register with correct type
+ // manually.
// TODO: To eliminate this problem we can remove gc.result intrinsics
- // completelly and make statepoint call to return a tuple.
+ // completely and make statepoint call to return a tuple.
unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
RegsForValue RFV(
*Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
@@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
PendingExports.push_back(Chain);
Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
} else {
- // The value of the statepoint itself will be the value of call itself.
- // We'll replace the actually call node shortly. gc_result will grab
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies.
+ // We'll replace the actuall call node shortly. gc_result will grab
// this value.
Builder.setValue(CS.getInstruction(), ReturnValue);
}
@@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// chaining stores one after another, this may allow
// a bit more optimal scheduling for them
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- MachinePointerInfo::getFixedStack(Index),
+ MachinePointerInfo::getFixedStack(
+ Builder.DAG.getMachineFunction(), Index),
false, false, 0);
Builder.StatepointLowering.setLocation(Incoming, Loc);
@@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// to the GCStrategy from there (yet).
GCStrategy &S = Builder.GFI->getStrategy();
for (const Value *V : Bases) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : Ptrs) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
}
for (const Value *V : Relocations) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() && "non gc managed pointer relocated");
}
@@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
- // and constants. For this values we can avoid emiting spill load while
+ // and constants. For this values we can avoid emitting spill load while
// visiting corresponding gc_relocate.
// Actually we do not need to record them in this map at all.
- // We do this only to check that we are not relocating any unvisited value.
+ // We do this only to check that we are not relocating any unvisited
+ // value.
SpillMap[V] = None;
// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
// Note that it would be incorrect to teach llvm that all relocates are
- // uses of the corresponging values so that it would automatically
+ // uses of the corresponding values so that it would automatically
// export them. Relocates of the spilled values does not use original
// value.
- if (StatepointSite.getCallSite().isInvoke())
+ if (RelocateOpers.getUnderlyingCallSite().getParent() !=
+ StatepointInstr->getParent())
Builder.ExportFromCurrentBlock(V);
}
}
@@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
}
void SelectionDAGBuilder::LowerStatepoint(
- ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
+ ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) {
// The basic scheme here is that information about both the original call and
// the safepoint is encoded in the CallInst. We create a temporary call and
// lower it, then reverse engineer the calling sequence.
@@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint(
ImmutableCallSite CS(ISP.getCallSite());
#ifndef NDEBUG
- // Consistency check. Don't do this for invokes. It would be too
- // expensive to preserve this information across different basic blocks
- if (!CS.isInvoke()) {
- for (const User *U : CS->users()) {
- const CallInst *Call = cast<CallInst>(U);
- if (isGCRelocate(Call))
- StatepointLowering.scheduleRelocCall(*Call);
- }
+ // Consistency check. Check only relocates in the same basic block as thier
+ // statepoint.
+ for (const User *U : CS->users()) {
+ const CallInst *Call = cast<CallInst>(U);
+ if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
+ StatepointLowering.scheduleRelocCall(*Call);
}
#endif
@@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// Get call node, we will replace it later with statepoint
SDNode *CallNode =
- lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);
+ lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports);
// Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
// nodes with all the appropriate arguments and return values.
@@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// Replace original call
DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
- // Remove originall call node
+ // Remove original call node
DAG.DeleteNode(CallNode);
// DON'T set the root - under the assumption that it's already set past the
@@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
Instruction *I = cast<Instruction>(CI.getArgOperand(0));
assert(isStatepoint(I) && "first argument must be a statepoint token");
- if (isa<InvokeInst>(I)) {
- // For invokes we should have stored call result in a virtual register.
+ if (I->getParent() != CI.getParent()) {
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
// We can not use default getValue() functionality to copy value from this
// register because statepoint and actuall call return types can be
// different, and getValue() will use CopyFromReg of the wrong type,
@@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
#ifndef NDEBUG
// Consistency check
- // We skip this check for invoke statepoints. It would be too expensive to
- // preserve validation info through different basic blocks.
- if (!RelocateOpers.isTiedToInvoke()) {
+ // We skip this check for relocates not in the same basic block as thier
+ // statepoint. It would be too expensive to preserve validation info through
+ // different basic blocks.
+ if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
StatepointLowering.relocCallVisited(CI);
}
#endif
@@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
// Be conservative: flush all pending loads
// TODO: Probably we can be less restrictive on this,
- // it may allow more scheduling opprtunities
+ // it may allow more scheduling opportunities.
SDValue Chain = getRoot();
SDValue SpillLoad =
- DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
- MachinePointerInfo::getFixedStack(*DerivedPtrLocation),
- false, false, false, 0);
+ DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ *DerivedPtrLocation),
+ false, false, false, 0);
// Again, be conservative, don't emit pending loads
DAG.setRoot(SpillLoad.getValue(1));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fbf6512..c64d882 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG,
RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
+ ArrayRef<SDValue> Ops,
bool isSigned, SDLoc dl,
bool doesNotReturn,
bool isReturnValueUsed) const {
TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
+ Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
+ for (SDValue Op : Ops) {
+ Entry.Node = Op;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
- Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
Args.push_back(Entry);
}
+
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
@@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
return LowerCallTo(CLI);
}
-
-/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
@@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
@@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
(VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
break;
- default:
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ default:
+ // Invert CC for unordered comparisons
+ ShouldInvertCC = true;
switch (CCCode) {
- case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
- // Fallthrough
- case ISD::SETUGT:
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
- break;
- case ISD::SETUGE:
- LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
- (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
- break;
case ISD::SETULT:
- LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
break;
case ISD::SETULE:
- LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
break;
- case ISD::SETUEQ:
- LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
- (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ case ISD::SETUGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
@@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
- SDValue Ops[2] = { NewLHS, NewRHS };
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
+ SDValue Ops[2] = {NewLHS, NewRHS};
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT);
+
CCCode = getCmpLibcallCC(LC1);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
+
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
SDValue Tmp = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
dl).first;
NewLHS = DAG.getNode(
ISD::SETCC, dl,
@@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
}
}
-/// getJumpTableEncoding - Return the entry encoding for a jump table in the
-/// current function. The returned value is a member of the
-/// MachineJumpTableInfo::JTEntryKind enum.
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
@@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
return Table;
}
-/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
-/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
-/// MCExpr.
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
@@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
-/// specified instruction is a constant integer. If so, check to see if there
-/// are any bits set in the constant that are not demanded. If so, shrink the
-/// constant and return true.
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
SDLoc dl(Op);
@@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
return false;
}
-/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
-/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
-/// cast, but it could be generalized for targets with other types of
-/// implicit widening casts.
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
bool
TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
unsigned BitWidth,
@@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
return false;
}
-/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
-/// DemandedMask bits of the result of Op are ever used downstream. If we can
-/// use this information to simplify Op, create a new simplified DAG node and
-/// return true, returning the original and new nodes in Old and New. Otherwise,
-/// analyze the expression and return a mask of KnownOne and KnownZero bits for
-/// the expression (used to simplify the caller). The KnownZero/One bits may
-/// only be accurate for those bits in the DemandedMask.
+/// Look at Op. At this point, we know that only the DemandedMask bits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of KnownOne and KnownZero bits for the expression (used to
+/// simplify the caller). The KnownZero/One bits may only be accurate for those
+/// bits in the DemandedMask.
bool TargetLowering::SimplifyDemandedBits(SDValue Op,
const APInt &DemandedMask,
APInt &KnownZero,
@@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+ Op.getOperand(0).getValueType() != MVT::f128) {
+ // Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
@@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return false;
}
-/// computeKnownBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the KnownZero/KnownOne bitsets.
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
@@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
}
-/// ComputeNumSignBitsForTargetNode - This method can be implemented by
-/// targets that want to expose additional information about sign bits to the
-/// DAG Combiner.
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
const SelectionDAG &,
unsigned Depth) const {
@@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
-/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
-/// one bit set. This differs from computeKnownBits in that it doesn't need to
-/// determine which bit is set.
-///
+/// Test if the given value is known to have exactly one bit set. This differs
+/// from computeKnownBits in that it doesn't need to determine which bit is set.
static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// A left-shift of a constant one will have exactly one bit set, because
// shifting the bit off the end is undefined.
@@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
return CN->isNullValue();
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
-/// and cc. If it is unable to simplify it, return a null SDValue.
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
@@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
@@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
PreExt = N0->getOperand(0);
} else if (N0->getOpcode() == ISD::AND) {
// DAGCombine turns costly ZExts into ANDs
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
MinBits = C->getAPIntValue().countTrailingOnes();
PreExt = N0->getOperand(0);
@@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
MinBits = N0->getOperand(0).getValueSizeInBits();
PreExt = N0->getOperand(0);
Signed = true;
- } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
// ZEXTLOAD / SEXTLOAD
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
MinBits = LN0->getMemoryVT().getSizeInBits();
@@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
- if (ConstantSDNode *AndRHS =
- dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
EVT ShiftTy = DCI.isBeforeLegalize()
? getPointerTy(DL)
: getShiftAmountTy(N0.getValueType(), DL);
@@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
- if (ConstantSDNode *AndRHS =
- dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
@@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Constant fold or commute setcc.
SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
if (O.getNode()) return O;
- } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
// If the RHS of an FP comparison is a constant, simplify it away in
// some cases.
if (CFP->getValueAPF().isNaN()) {
@@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// to be careful about increasing register pressure needlessly.
bool LegalRHSImm = false;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
- if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
return DAG.getSetCC(dl, VT, N0.getOperand(0),
@@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Turn (C1-X) == C2 --> X == C1-C2
- if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
return
DAG.getSetCC(dl, VT, N0.getOperand(1),
@@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
-/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-/// node is a GlobalAddress + offset.
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
- if (isa<GlobalAddressSDNode>(N)) {
- GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
GA = GASD->getGlobal();
Offset += GASD->getOffset();
return true;
@@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
SDValue N1 = N->getOperand(0);
SDValue N2 = N->getOperand(1);
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
- if (V) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
Offset += V->getSExtValue();
return true;
}
} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
- if (V) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
Offset += V->getSExtValue();
return true;
}
@@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
return false;
}
-
-SDValue TargetLowering::
-PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
// Default implementation: no optimization.
return SDValue();
}
@@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
return C_Unknown;
}
-/// LowerXConstraint - try to replace an X constraint, which matches anything,
-/// with another that has more specific requirements based on the type of the
-/// corresponding operand.
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
if (ConstraintVT.isInteger())
return "r";
@@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
return nullptr;
}
-/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector. If it is invalid, don't add anything to Ops.
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
//===----------------------------------------------------------------------===//
// Constraint Selection.
-/// isMatchingInputConstraint - Return true of this is an input operand that is
-/// a matching constraint like "4".
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
}
-/// getMatchedOperand - If this is an input matching constraint, this method
-/// returns the output operand it matches.
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return atoi(ConstraintCode.c_str());
}
-
-/// ParseConstraints - Split up the constraint string from the inline
-/// assembly value into the specific constraints and their prefixes,
-/// and also tie in the associated operand values.
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
ImmutableCallSite CS) const {
- /// ConstraintOperands - Information about all of the constraints.
+ /// Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
unsigned maCount = 0; // Largest number of multiple alternative constraints.
@@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
" incompatible type!");
}
}
-
}
}
return ConstraintOperands;
}
-
-/// getConstraintGenerality - Return an integer indicating how general CT
-/// is.
+/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Other:
@@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight
return weight;
}
-/// ChooseConstraint - If there are multiple different constraints that we
-/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
/// Other -> immediates and magic values
/// Register -> one specific register
@@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
OpInfo.ConstraintType = BestType;
}
-/// ComputeConstraintToUse - Determines the constraint code and constraint
-/// type to use for the specific AsmOperandInfo, setting
-/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
SDValue Op,
SelectionDAG *DAG) const {
@@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
return Mul;
}
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue();
+}
+
/// \brief Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ // Access to address of TLS varialbe xyz is lowered to a function call:
+ // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ SDLoc dl(GA);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+ Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+ StringRef EmuTlsVarName(NameString);
+ GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+ if (!EmuTlsVar)
+ EmuTlsVar = dyn_cast_or_null<GlobalVariable>(
+ VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType));
+ Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+ Entry.Ty = VoidPtrType;
+ Args.push_back(Entry);
+
+ SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+ CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ // At last for X86 targets, maybe good for other targets too?
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true); // Is this only for X86 target?
+ MFI->setHasCalls(true);
+
+ assert((GA->getOffset() == 0) &&
+ "Emulated TLS must have zero offset in GlobalAddressSDNode");
+ return CallResult.first;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index e7b2a8e..878eeee 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -112,7 +112,7 @@ public:
case 1:
// Find all 'return', 'resume', and 'unwind' instructions.
while (StateBB != StateE) {
- BasicBlock *CurBB = StateBB++;
+ BasicBlock *CurBB = &*StateBB++;
// Branches and invokes do not escape, only unwind, resume, and return
// do.
@@ -120,7 +120,7 @@ public:
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
- Builder.SetInsertPoint(TI->getParent(), TI);
+ Builder.SetInsertPoint(TI);
return &Builder;
}
@@ -163,8 +163,8 @@ public:
// Split the basic block containing the function call.
BasicBlock *CallBB = CI->getParent();
- BasicBlock *NewBB =
- CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+ BasicBlock *NewBB = CallBB->splitBasicBlock(
+ CI->getIterator(), CallBB->getName() + ".cont");
// Remove the unconditional branch inserted at the end of CallBB.
CallBB->getInstList().pop_back();
@@ -184,7 +184,7 @@ public:
delete CI;
}
- Builder.SetInsertPoint(RI->getParent(), RI);
+ Builder.SetInsertPoint(RI);
return &Builder;
}
}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index 4463cc7..f8aa1e2 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -43,9 +43,11 @@
// points must be in the same loop.
// Property #3 is ensured via the MachineBlockFrequencyInfo.
//
-// If this pass found points matching all this properties, then
-// MachineFrameInfo is updated this that information.
+// If this pass found points matching all these properties, then
+// MachineFrameInfo is updated with this information.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
// To check for profitability.
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -61,11 +63,14 @@
#include "llvm/CodeGen/Passes.h"
// To know about callee-saved.
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
// To query the target about frame lowering.
#include "llvm/Target/TargetFrameLowering.h"
// To know about frame setup operation.
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
// To access TargetInstrInfo.
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -78,6 +83,10 @@ STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
STATISTIC(NumCandidatesDropped,
"Number of shrink-wrapping candidates dropped because of frequency");
+static cl::opt<cl::boolOrDefault>
+ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+ cl::desc("enable the shrink-wrapping pass"));
+
namespace {
/// \brief Class to determine where the safe point to insert the
/// prologue and epilogue are.
@@ -113,18 +122,38 @@ class ShrinkWrap : public MachineFunctionPass {
unsigned FrameDestroyOpcode;
/// Entry block.
const MachineBasicBlock *Entry;
+ typedef SmallSetVector<unsigned, 16> SetOfRegs;
+ /// Registers that need to be saved for the current function.
+ mutable SetOfRegs CurrentCSRs;
+ /// Current MachineFunction.
+ MachineFunction *MachineFunc;
/// \brief Check if \p MI uses or defines a callee-saved register or
/// a frame index. If this is the case, this means \p MI must happen
/// after Save and before Restore.
- bool useOrDefCSROrFI(const MachineInstr &MI) const;
+ bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
+
+ const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
+ if (CurrentCSRs.empty()) {
+ BitVector SavedRegs;
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
+
+ TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS);
+
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ CurrentCSRs.insert((unsigned)Reg);
+ }
+ return CurrentCSRs;
+ }
/// \brief Update the Save and Restore points such that \p MBB is in
/// the region that is dominated by Save and post-dominated by Restore
/// and Save and Restore still match the safe point definition.
/// Such point may not exist and Save and/or Restore may be null after
/// this call.
- void updateSaveRestorePoints(MachineBasicBlock &MBB);
+ void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
/// \brief Initialize the pass for \p MF.
void init(MachineFunction &MF) {
@@ -140,6 +169,8 @@ class ShrinkWrap : public MachineFunctionPass {
FrameSetupOpcode = TII.getCallFrameSetupOpcode();
FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
Entry = &MF.front();
+ CurrentCSRs.clear();
+ MachineFunc = &MF;
++NumFunc;
}
@@ -148,6 +179,9 @@ class ShrinkWrap : public MachineFunctionPass {
/// shrink-wrapping.
bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+ /// \brief Check if shrink wrapping is enabled for this target and function.
+ static bool isShrinkWrapEnabled(const MachineFunction &MF);
+
public:
static char ID;
@@ -185,27 +219,34 @@ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
-bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const {
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
+ RegScavenger *RS) const {
if (MI.getOpcode() == FrameSetupOpcode ||
MI.getOpcode() == FrameDestroyOpcode) {
DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
return true;
}
for (const MachineOperand &MO : MI.operands()) {
- bool UseCSR = false;
+ bool UseOrDefCSR = false;
if (MO.isReg()) {
unsigned PhysReg = MO.getReg();
if (!PhysReg)
continue;
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
"Unallocated register?!");
- UseCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+ UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+ } else if (MO.isRegMask()) {
+ // Check if this regmask clobbers any of the CSRs.
+ for (unsigned Reg : getCurrentCSRs(RS)) {
+ if (MO.clobbersPhysReg(Reg)) {
+ UseOrDefCSR = true;
+ break;
+ }
+ }
}
- // TODO: Handle regmask more accurately.
- // For now, be conservative about them.
- if (UseCSR || MO.isFI() || MO.isRegMask()) {
- DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI()
- << "): " << MI << '\n');
+ if (UseOrDefCSR || MO.isFI()) {
+ DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
+ << MO.isFI() << "): " << MI << '\n');
return true;
}
}
@@ -225,7 +266,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
return IDom;
}
-void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
+ RegScavenger *RS) {
// Get rid of the easy cases first.
if (!Save)
Save = &MBB;
@@ -246,7 +288,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
// terminator.
if (Restore == &MBB) {
for (const MachineInstr &Terminator : MBB.terminators()) {
- if (!useOrDefCSROrFI(Terminator))
+ if (!useOrDefCSROrFI(Terminator, RS))
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
@@ -277,7 +319,24 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
while (Save && Restore &&
(!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
!(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
- MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) {
+ // Post-dominance is not enough in loops to ensure that all uses/defs
+ // are after the prologue and before the epilogue at runtime.
+ // E.g.,
+ // while(1) {
+ // Save
+ // Restore
+ // if (...)
+ // break;
+ // use/def CSRs
+ // }
+ // All the uses/defs of CSRs are dominated by Save and post-dominated
+ // by Restore. However, the CSRs uses are still reachable after
+ // Restore and before Save are executed.
+ //
+ // For now, just push the restore/save points outside of loops.
+ // FIXME: Refine the criteria to still find interesting cases
+ // for loops.
+ MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
// Fix (A).
if (!SaveDominatesRestore) {
Save = MDT->findNearestCommonDominator(Save, Restore);
@@ -288,35 +347,72 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
Restore = MPDT->findNearestCommonDominator(Restore, Save);
// Fix (C).
- if (Save && Restore && Save != Restore &&
- MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) {
- if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore))
- // Push Save outside of this loop.
- Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
- else
+ if (Save && Restore &&
+ (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
+ // Push Save outside of this loop if immediate dominator is different
+ // from save block. If immediate dominator is not different, bail out.
+ MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (IDom != Save)
+ Save = IDom;
+ else {
+ Save = nullptr;
+ break;
+ }
+ } else {
+ // If the loop does not exit, there is no point in looking
+ // for a post-dominator outside the loop.
+ SmallVector<MachineBasicBlock*, 4> ExitBlocks;
+ MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks);
// Push Restore outside of this loop.
- Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ // Look for the immediate post-dominator of the loop exits.
+ MachineBasicBlock *IPdom = Restore;
+ for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
+ IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT);
+ if (!IPdom)
+ break;
+ }
+ // If the immediate post-dominator is not in a less nested loop,
+ // then we are stuck in a program with an infinite loop.
+ // In that case, we will not find a safe point, hence, bail out.
+ if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore))
+ Restore = IPdom;
+ else {
+ Restore = nullptr;
+ break;
+ }
+ }
}
}
}
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (MF.empty())
+ if (MF.empty() || !isShrinkWrapEnabled(MF))
return false;
+
DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
init(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
for (MachineBasicBlock &MBB : MF) {
DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
<< '\n');
+ if (MBB.isEHFuncletEntry()) {
+ DEBUG(dbgs() << "EH Funclets are not supported yet.\n");
+ return false;
+ }
+
for (const MachineInstr &MI : MBB) {
- if (!useOrDefCSROrFI(MI))
+ if (!useOrDefCSROrFI(MI, RS.get()))
continue;
// Save (resp. restore) point must dominate (resp. post dominate)
// MI. Look for the proper basic block for those.
- updateSaveRestorePoints(MBB);
+ updateSaveRestorePoints(MBB, RS.get());
// If we are at a point where we cannot improve the placement of
// save/restore instructions, just give up.
if (!ArePointsInteresting()) {
@@ -368,7 +464,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
break;
NewBB = Restore;
}
- updateSaveRestorePoints(*NewBB);
+ updateSaveRestorePoints(*NewBB, RS.get());
} while (Save && Restore);
if (!ArePointsInteresting()) {
@@ -386,3 +482,30 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
++NumCandidates;
return false;
}
+
+bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+ switch (EnableShrinkWrapOpt) {
+ case cl::BOU_UNSET:
+ return TFI->enableShrinkWrapping(MF) &&
+ // Windows with CFI has some limitations that make it impossible
+ // to use shrink-wrapping.
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ // Sanitizers look at the value of the stack at the location
+ // of the crash. Since a crash can happen anywhere, the
+ // frame must be lowered before anything else happen for the
+ // sanitizers to be able to get a correct stack frame.
+ !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+ MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
+ MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory));
+ // If EnableShrinkWrap is set, it takes precedence on whatever the
+ // target sets. The rational is that we assume we want to test
+ // something related to shrink-wrapping.
+ case cl::BOU_TRUE:
+ return true;
+ case cl::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Invalid shrink-wrapping state");
+}
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index d236e1f..e1f242a 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -50,7 +50,7 @@ class SjLjEHPrepare : public FunctionPass {
Type *FunctionContextTy;
Constant *RegisterFn;
Constant *UnregisterFn;
- Constant *BuiltinSetjmpFn;
+ Constant *BuiltinSetupDispatchFn;
Constant *FrameAddrFn;
Constant *StackAddrFn;
Constant *StackRestoreFn;
@@ -112,7 +112,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
- BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ BuiltinSetupDispatchFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
@@ -178,8 +179,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
// values and replace the LPI with that aggregate.
Type *LPadType = LPI->getType();
Value *LPadVal = UndefValue::get(LPadType);
- IRBuilder<> Builder(
- std::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ auto *SelI = cast<Instruction>(SelVal);
+ IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator()));
LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
@@ -190,7 +191,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
/// it with all of the data that we know at this point.
Value *SjLjEHPrepare::setupFunctionContext(Function &F,
ArrayRef<LandingPadInst *> LPads) {
- BasicBlock *EntryBB = F.begin();
+ BasicBlock *EntryBB = &F.front();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
@@ -198,12 +199,13 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
auto &DL = F.getParent()->getDataLayout();
unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
- EntryBB->begin());
+ &EntryBB->front());
// Fill in the function context structure.
for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
LandingPadInst *LPI = LPads[I];
- IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+ IRBuilder<> Builder(LPI->getParent(),
+ LPI->getParent()->getFirstInsertionPt());
// Reference the __data field.
Value *FCData =
@@ -250,21 +252,20 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
while (isa<AllocaInst>(AfterAllocaInsPt) &&
isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
++AfterAllocaInsPt;
+ assert(AfterAllocaInsPt != F.front().end());
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
- ++AI) {
- Type *Ty = AI->getType();
+ for (auto &AI : F.args()) {
+ Type *Ty = AI.getType();
// Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
Value *TrueValue = ConstantInt::getTrue(F.getContext());
Value *UndefValue = UndefValue::get(Ty);
- Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue,
- AI->getName() + ".tmp",
- AfterAllocaInsPt);
- AI->replaceAllUsesWith(SI);
+ Instruction *SI = SelectInst::Create(
+ TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt);
+ AI.replaceAllUsesWith(SI);
// Reset the operand, because it was clobbered by the RAUW above.
- SI->setOperand(1, AI);
+ SI->setOperand(1, &AI);
}
}
@@ -279,7 +280,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
// Ignore obvious cases we don't have to handle. In particular, most
// instructions either have no uses or only have a single use inside the
// current block. Ignore them quickly.
- Instruction *Inst = II;
+ Instruction *Inst = &*II;
if (Inst->use_empty())
continue;
if (Inst->hasOneUse() &&
@@ -360,7 +361,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
DemotePHIToStack(PN);
// Move the landingpad instruction back to the top of the landing pad block.
- LPI->moveBefore(UnwindBlock->begin());
+ LPI->moveBefore(&UnwindBlock->front());
}
}
@@ -400,7 +401,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Value *FuncCtx =
setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
- BasicBlock *EntryBB = F.begin();
+ BasicBlock *EntryBB = &F.front();
IRBuilder<> Builder(EntryBB->getTerminator());
// Get a reference to the jump buffer.
@@ -421,9 +422,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Val = Builder.CreateCall(StackAddrFn, {}, "sp");
Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
- Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
+ // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
+ Builder.CreateCall(BuiltinSetupDispatchFn, {});
// Store a pointer to the function context so that the back-end will know
// where to look for it.
@@ -475,7 +475,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
continue;
}
Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(I);
+ StackAddr->insertAfter(&*I);
Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
StoreStackAddr->insertAfter(StackAddr);
}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index 025ae70..c9d23f6 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -172,8 +172,8 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
// optionally includes an additional position prior to MBB->begin(), indicated
// by the includeStart flag. This is done so that we can iterate MIs in a MBB
// in parallel with SlotIndexes, but there should be a better way to do this.
- IndexList::iterator ListB = startIdx.listEntry();
- IndexList::iterator ListI = endIdx.listEntry();
+ IndexList::iterator ListB = startIdx.listEntry()->getIterator();
+ IndexList::iterator ListI = endIdx.listEntry()->getIterator();
MachineBasicBlock::iterator MBBI = End;
bool pastStart = false;
while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
index 97a5424..d30cfc2 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -36,7 +36,6 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -188,9 +187,9 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
BlockFrequencies.resize(mf.getNumBlockIDs());
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
setThreshold(MBFI->getEntryFreq());
- for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
- unsigned Num = I->getNumber();
- BlockFrequencies[Num] = MBFI->getBlockFreq(I);
+ for (auto &I : mf) {
+ unsigned Num = I.getNumber();
+ BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
}
// We never change the function.
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index dab1dfe..51dddab 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -56,6 +56,7 @@ void SplitAnalysis::clear() {
SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+ // FIXME: Handle multiple EH pad successors.
const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
@@ -176,10 +177,11 @@ bool SplitAnalysis::calcLiveBlockInfo() {
UseE = UseSlots.end();
// Loop over basic blocks where CurLI is live.
- MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ MachineFunction::iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
for (;;) {
BlockInfo BI;
- BI.MBB = MFI;
+ BI.MBB = &*MFI;
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
@@ -259,7 +261,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
if (LVI->start < Stop)
++MFI;
else
- MFI = LIS.getMBBFromIndex(LVI->start);
+ MFI = LIS.getMBBFromIndex(LVI->start)->getIterator();
}
assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
@@ -275,8 +277,9 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
unsigned Count = 0;
// Loop over basic blocks where li is live.
- MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
- SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+ MachineFunction::const_iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
+ SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
for (;;) {
++Count;
LVI = li->advanceTo(LVI, Stop);
@@ -284,7 +287,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
return Count;
do {
++MFI;
- Stop = LIS.getMBBEndIdx(MFI);
+ Stop = LIS.getMBBEndIdx(&*MFI);
} while (Stop <= LVI->start);
}
}
@@ -864,9 +867,9 @@ bool SplitEditor::transferValues() {
// This value has multiple defs in RegIdx, but it wasn't rematerialized,
// so the live range is accurate. Add live-in blocks in [Start;End) to the
// LiveInBlocks.
- MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex BlockStart, BlockEnd;
- std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+ std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB);
// The first block may be live-in, or it may have its own def.
if (Start != BlockStart) {
@@ -875,7 +878,7 @@ bool SplitEditor::transferValues() {
DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
// MBB has its own def. Is it also live-out?
if (BlockEnd <= End)
- LRC.setLiveOutValue(MBB, VNI);
+ LRC.setLiveOutValue(&*MBB, VNI);
// Skip to the next block for live-in.
++MBB;
@@ -886,23 +889,23 @@ bool SplitEditor::transferValues() {
assert(Start <= BlockStart && "Expected live-in block");
while (BlockStart < End) {
DEBUG(dbgs() << ">BB#" << MBB->getNumber());
- BlockEnd = LIS.getMBBEndIdx(MBB);
+ BlockEnd = LIS.getMBBEndIdx(&*MBB);
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped parent PHI");
if (End >= BlockEnd)
- LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
+ LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
} else {
// This block needs a live-in value. The last block covered may not
// be live-out.
if (End < BlockEnd)
- LRC.addLiveInBlock(LR, MDT[MBB], End);
+ LRC.addLiveInBlock(LR, MDT[&*MBB], End);
else {
// Live-through, and we don't know the value.
- LRC.addLiveInBlock(LR, MDT[MBB]);
- LRC.setLiveOutValue(MBB, nullptr);
+ LRC.addLiveInBlock(LR, MDT[&*MBB]);
+ LRC.setLiveOutValue(&*MBB, nullptr);
}
}
BlockStart = BlockEnd;
@@ -1081,16 +1084,14 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
ConnectedVNInfoEqClasses ConEQ(LIS);
for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
// Don't use iterators, they are invalidated by create() below.
- LiveInterval *li = &LIS.getInterval(Edit->get(i));
- unsigned NumComp = ConEQ.Classify(li);
- if (NumComp <= 1)
- continue;
- DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
- SmallVector<LiveInterval*, 8> dups;
- dups.push_back(li);
- for (unsigned j = 1; j != NumComp; ++j)
- dups.push_back(&Edit->createEmptyInterval());
- ConEQ.Distribute(&dups[0], MRI);
+ unsigned VReg = Edit->get(i);
+ LiveInterval &LI = LIS.getInterval(VReg);
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(LI, SplitLIs);
+ unsigned Original = VRM.getOriginal(VReg);
+ for (LiveInterval *SplitLI : SplitLIs)
+ VRM.setIsSplitFromReg(SplitLI->reg, Original);
+
// The new intervals all map back to i.
if (LRMap)
LRMap->resize(Edit->size(), i);
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index 116eef6..b3cd8b3 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -94,7 +94,9 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
default:
llvm_unreachable("Unrecognized operand type.");
case StackMaps::DirectMemRefOp: {
- unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
+ auto &DL = AP.MF->getDataLayout();
+
+ unsigned Size = DL.getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
unsigned Reg = (++MOI)->getReg();
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index bcea37a..db3fef5 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -373,7 +373,7 @@ bool StackProtector::InsertStackProtectors() {
Value *StackGuardVar = nullptr; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
- BasicBlock *BB = I++;
+ BasicBlock *BB = &*I++;
ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
if (!RI)
continue;
@@ -433,7 +433,7 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = CreateFailBB();
// Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+ BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
// Update the dominator tree if we need to.
if (DT && DT->isReachableFromEntry(BB)) {
@@ -453,22 +453,20 @@ bool StackProtector::InsertStackProtectors() {
LoadInst *LI1 = B.CreateLoad(StackGuardVar);
LoadInst *LI2 = B.CreateLoad(AI);
Value *Cmp = B.CreateICmpEQ(LI1, LI2);
- unsigned SuccessWeight =
- BranchProbabilityInfo::getBranchWeightStackProtector(true);
- unsigned FailureWeight =
- BranchProbabilityInfo::getBranchWeightStackProtector(false);
+ auto SuccessProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(false);
MDNode *Weights = MDBuilder(F->getContext())
- .createBranchWeights(SuccessWeight, FailureWeight);
+ .createBranchWeights(SuccessProb.getNumerator(),
+ FailureProb.getNumerator());
B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
}
}
// Return if we didn't modify any basic blocks. i.e., there are no return
// statements in the function.
- if (!HasPrologue)
- return false;
-
- return true;
+ return HasPrologue;
}
/// CreateFailBB - Create a basic block to jump to when the stack protector
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index a5a175f..51f4d0e 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -318,7 +318,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
if (NewFI == -1 || (NewFI == (int)SS))
continue;
- const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI);
SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
RefMMOs[i]->setValue(NewSV);
diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
index 95dfd75..3f60e18 100644
--- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
+++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
@@ -34,9 +34,9 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
- Optional<bool> isGCManagedPointer(const Value *V) const override {
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
- PointerType *PT = cast<PointerType>(V->getType());
+ const PointerType *PT = cast<PointerType>(Ty);
// For the sake of this example GC, we arbitrarily pick addrspace(1) as our
// GC managed heap. We know that a pointer into this heap needs to be
// updated and that no other pointer does. Note that addrspace(1) is used
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 237460c..d2fbf53 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -59,7 +59,7 @@ TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
namespace {
- /// TailDuplicatePass - Perform tail duplication.
+ /// Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -69,11 +69,11 @@ namespace {
std::unique_ptr<RegScavenger> RS;
bool PreRegAlloc;
- // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ // A list of virtual registers for which to update SSA form.
SmallVector<unsigned, 16> SSAUpdateVRs;
- // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
- // source virtual registers.
+ // For each virtual register in SSAUpdateVals keep a list of source virtual
+ // registers.
DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
public:
@@ -161,7 +161,7 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
MBB->pred_end());
MachineBasicBlock::iterator MI = MBB->begin();
@@ -207,7 +207,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
}
-/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+/// Tail duplicate the block and cleanup.
bool
TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
bool IsSimple,
@@ -310,9 +310,9 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
return true;
}
-/// TailDuplicateBlocks - Look for small blocks that are unconditionally
-/// branched to and do not fall through. Tail-duplicate their instructions
-/// into their predecessors to eliminate (dynamic) branches.
+/// Look for small blocks that are unconditionally branched to and do not fall
+/// through. Tail-duplicate their instructions into their predecessors to
+/// eliminate (dynamic) branches.
bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
bool MadeChange = false;
@@ -322,7 +322,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
}
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = I++;
+ MachineBasicBlock *MBB = &*I++;
if (NumTails == TailDupLimit)
break;
@@ -375,8 +375,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
}
}
-/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
-/// SSA update.
+/// Add a definition and source virtual registers pair for SSA update.
void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
MachineBasicBlock *BB) {
DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
@@ -390,9 +389,8 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
}
}
-/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
-/// Remember the source register that's contributed by PredBB and update SSA
-/// update map.
+/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
+/// source register that's contributed by PredBB and update SSA update map.
void TailDuplicatePass::ProcessPHI(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
@@ -422,7 +420,7 @@ void TailDuplicatePass::ProcessPHI(
MI->eraseFromParent();
}
-/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// Duplicate a TailBB instruction to PredBB and update
/// the source operands due to earlier PHI translation.
void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
MachineBasicBlock *TailBB,
@@ -459,9 +457,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
PredBB->insert(PredBB->instr_end(), NewMI);
}
-/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
-/// blocks, the successors have gained new predecessors. Update the PHI
-/// instructions in them accordingly.
+/// After FromBB is tail duplicated into its predecessor blocks, the successors
+/// have gained new predecessors. Update the PHI instructions in them
+/// accordingly.
void
TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
@@ -545,7 +543,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
}
}
-/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+/// Determine if it is profitable to duplicate this block.
bool
TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
bool IsSimple,
@@ -563,6 +561,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ // FIXME: Use Function::optForSize().
MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
@@ -584,30 +583,51 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
+ for (MachineInstr &MI : TailBB) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->isNotDuplicable())
+ if (MI.isNotDuplicable())
return false;
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->isReturn())
+ if (PreRegAlloc && MI.isReturn())
return false;
// Avoid duplicating calls before register allocation. Calls presents a
// barrier to register allocation so duplicating them may end up increasing
// spills.
- if (PreRegAlloc && I->isCall())
+ if (PreRegAlloc && MI.isCall())
return false;
- if (!I->isPHI() && !I->isDebugValue())
+ if (!MI.isPHI() && !MI.isDebugValue())
InstrCount += 1;
if (InstrCount > MaxDuplicateCount)
return false;
}
+ // Check if any of the successors of TailBB has a PHI node in which the
+ // value corresponding to TailBB uses a subregister.
+ // If a phi node uses a register paired with a subregister, the actual
+ // "value type" of the phi may differ from the type of the register without
+ // any subregisters. Due to a bug, tail duplication may add a new operand
+ // without a necessary subregister, producing an invalid code. This is
+ // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
+ // Disable tail duplication for this case for now, until the problem is
+ // fixed.
+ for (auto SB : TailBB.successors()) {
+ for (auto &I : *SB) {
+ if (!I.isPHI())
+ break;
+ unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
+ assert(Idx != 0);
+ MachineOperand &PU = I.getOperand(Idx);
+ if (PU.getSubReg() != 0)
+ return false;
+ }
+ }
+
if (HasIndirectbr && PreRegAlloc)
return true;
@@ -620,7 +640,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
return canCompletelyDuplicateBB(TailBB);
}
-/// isSimpleBB - True if this BB has only one unconditional jump.
+/// True if this BB has only one unconditional jump.
bool
TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
if (TailBB->succ_size() != 1)
@@ -636,22 +656,16 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
static bool
bothUsedInPHI(const MachineBasicBlock &A,
SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
- for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
- SE = A.succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *BB = *SI;
+ for (MachineBasicBlock *BB : A.successors())
if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
return true;
- }
return false;
}
bool
TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
- for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
- PE = BB.pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
-
+ for (MachineBasicBlock *PredBB : BB.predecessors()) {
if (PredBB->succ_size() > 1)
return false;
@@ -680,7 +694,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
PE = Preds.end(); PI != PE; ++PI) {
MachineBasicBlock *PredBB = *PI;
- if (PredBB->getLandingPadSuccessor())
+ if (PredBB->hasEHPadSuccessor())
continue;
if (bothUsedInPHI(*PredBB, Succs))
@@ -696,7 +710,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
<< "From simple Succ: " << *TailBB);
MachineBasicBlock *NewTarget = *TailBB->succ_begin();
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB));
+ MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
// Make PredFBB explicit.
if (PredCond.empty())
@@ -731,19 +745,19 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
if (PredTBB)
TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
- uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB);
- PredBB->removeSuccessor(TailBB);
- unsigned NumSuccessors = PredBB->succ_size();
- assert(NumSuccessors <= 1);
- if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
- PredBB->addSuccessor(NewTarget, Weight);
+ if (!PredBB->isSuccessor(NewTarget))
+ PredBB->replaceSuccessor(TailBB, NewTarget);
+ else {
+ PredBB->removeSuccessor(TailBB, true);
+ assert(PredBB->succ_size() <= 1);
+ }
TDBBs.push_back(PredBB);
}
return Changed;
}
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
@@ -798,13 +812,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
RS->enterBasicBlock(PredBB);
if (!PredBB->empty())
RS->forward(std::prev(PredBB->end()));
- for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
- E = TailBB->livein_end(); I != E; ++I) {
- if (!RS->isRegUsed(*I, false))
+ for (const auto &LI : TailBB->liveins()) {
+ if (!RS->isRegUsed(LI.PhysReg, false))
// If a register is previously livein to the tail but it's not live
// at the end of predecessor BB, then it should be added to its
// livein list.
- PredBB->addLiveIn(*I);
+ PredBB->addLiveIn(LI);
}
}
@@ -845,7 +858,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
"TailDuplicate called on block with multiple successors!");
for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
E = TailBB->succ_end(); I != E; ++I)
- PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I));
+ PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
Changed = true;
++NumTailDups;
@@ -854,7 +867,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// If TailBB was duplicated into all its predecessors except for the prior
// block, which falls through unconditionally, move the contents of this
// block into the prior block.
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
@@ -960,8 +973,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
return Changed;
}
-/// RemoveDeadBlock - Remove the specified dead machine basic block from the
-/// function, updating the CFG.
+/// Remove the specified dead machine basic block from the function, updating
+/// the CFG.
void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f3cccd8..679ade1 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -32,25 +33,22 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
return Attr.getValueAsString() == "true";
}
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI) + MFI->getStackSize() -
- getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
+/// Returns the displacement from the frame register to the stack
+/// frame of the specified index, along with the frame register used
+/// (in output arg FrameReg). This is the default implementation which
+/// is overridden for some targets.
int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
// By default, assume all frame indices are referenced via whatever
// getFrameRegister() says. The target can override this if it's doing
// something different.
FrameReg = RI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
+
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
}
bool TargetFrameLowering::needsFrameIndexResolution(
@@ -84,3 +82,13 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(Reg);
}
}
+
+unsigned TargetFrameLowering::getStackAlignmentSkew(
+ const MachineFunction &MF) const {
+ // When HHVM function is called, the stack is skewed as the return address
+ // is removed from the stack before we enter the function.
+ if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM))
+ return MF.getTarget().getPointerSize();
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 97ca025..6eaf991 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -118,23 +118,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
MBB->addSuccessor(NewDest);
}
-// commuteInstruction - The default implementation of this method just exchanges
-// the two operands returned by findCommutedOpIndices.
-MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI) const {
+MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned Idx1,
+ unsigned Idx2) const {
const MCInstrDesc &MCID = MI->getDesc();
bool HasDef = MCID.getNumDefs();
if (HasDef && !MI->getOperand(0).isReg())
// No idea how to commute this instruction. Target should implement its own.
return nullptr;
- unsigned Idx1, Idx2;
- if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
- assert(MI->isCommutable() && "Precondition violation: MI must be commutable.");
- return nullptr;
- }
+ unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
+ unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+ assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
+ CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
+ "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
+
unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(Idx1).getReg();
unsigned Reg2 = MI->getOperand(Idx2).getReg();
@@ -184,9 +185,53 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
-/// findCommutedOpIndices - If specified MI is commutable, return the two
-/// operand indices that would swap value. Return true if the instruction
-/// is not in a form which this routine understands.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose
+ // any commutable operand, which is done in findCommutedOpIndices() method
+ // called below.
+ if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) &&
+ !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) {
+ assert(MI->isCommutable() &&
+ "Precondition violation: MI must be commutable.");
+ return nullptr;
+ }
+ return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
+bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
+ unsigned &ResultIdx2,
+ unsigned CommutableOpIdx1,
+ unsigned CommutableOpIdx2) {
+ if (ResultIdx1 == CommuteAnyOperandIndex &&
+ ResultIdx2 == CommuteAnyOperandIndex) {
+ ResultIdx1 = CommutableOpIdx1;
+ ResultIdx2 = CommutableOpIdx2;
+ } else if (ResultIdx1 == CommuteAnyOperandIndex) {
+ if (ResultIdx2 == CommutableOpIdx1)
+ ResultIdx1 = CommutableOpIdx2;
+ else if (ResultIdx2 == CommutableOpIdx2)
+ ResultIdx1 = CommutableOpIdx1;
+ else
+ return false;
+ } else if (ResultIdx2 == CommuteAnyOperandIndex) {
+ if (ResultIdx1 == CommutableOpIdx1)
+ ResultIdx2 = CommutableOpIdx2;
+ else if (ResultIdx1 == CommutableOpIdx2)
+ ResultIdx2 = CommutableOpIdx1;
+ else
+ return false;
+ } else
+ // Check that the result operand indices match the given commutable
+ // operand indices.
+ return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) ||
+ (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1);
+
+ return true;
+}
+
bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
@@ -196,10 +241,15 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
+
// This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
// is not true, then the target must implement this.
- SrcOpIdx1 = MCID.getNumDefs();
- SrcOpIdx2 = SrcOpIdx1 + 1;
+ unsigned CommutableOpIdx1 = MCID.getNumDefs();
+ unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+
if (!MI->getOperand(SrcOpIdx1).isReg() ||
!MI->getOperand(SrcOpIdx2).isReg())
// No idea.
@@ -207,7 +257,6 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
return true;
}
-
bool
TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator()) return false;
@@ -315,7 +364,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!MF.getDataLayout().isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -384,11 +433,6 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
llvm_unreachable("Not a MachO target");
}
-bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
-}
-
static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
@@ -489,10 +533,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
"Folded a use to a non-load!");
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- Flags, MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
return NewMI;
@@ -517,6 +560,217 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
return --Pos;
}
+bool TargetInstrInfo::hasReassociableOperands(
+ const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions for the operands that we will
+ // reassociate.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And they need to be in the trace (otherwise, they won't have a depth).
+ return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB;
+}
+
+bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
+ bool &Commuted) const {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned AssocOpcode = Inst.getOpcode();
+
+ // If only one operand has the same opcode and it's the second source operand,
+ // the operands must be commuted.
+ Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ if (Commuted)
+ std::swap(MI1, MI2);
+
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 3. The previous instruction's result must only be used by Inst.
+ return MI1->getOpcode() == AssocOpcode &&
+ hasReassociableOperands(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
+}
+
+// 1. The operation must be associative and commutative.
+// 2. The instruction must have virtual register definitions for its
+// operands in the same basic block.
+// 3. The instruction must have a reassociable sibling.
+bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
+ bool &Commuted) const {
+ return isAssociativeAndCommutative(Inst) &&
+ hasReassociableOperands(Inst, Inst.getParent()) &&
+ hasReassociableSibling(Inst, Commuted);
+}
+
+// The concept of the reassociation pass is that these operations can benefit
+// from this kind of transformation:
+//
+// A = ? op ?
+// B = A op X (Prev)
+// C = B op Y (Root)
+// -->
+// A = ? op ?
+// B = X op Y
+// C = A op B
+//
+// breaking the dependency between A and B, allowing them to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+// instruction is known to not increase the critical path, then don't match
+// that pattern.
+bool TargetInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+
+ bool Commute;
+ if (isReassociationCandidate(Root, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// Attempt the reassociation transformation to reduce critical path length.
+/// See the above comments before getMachineCombinerPatterns().
+void TargetInstrInfo::reassociateOps(
+ MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ int Row;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
+ case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
+ case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
+ case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
+ default: llvm_unreachable("unexpected MachineCombinerPattern");
+ }
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ unsigned RegA = OpA.getReg();
+ unsigned RegB = OpB.getReg();
+ unsigned RegX = OpX.getReg();
+ unsigned RegY = OpY.getReg();
+ unsigned RegC = OpC.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA))
+ MRI.constrainRegClass(RegA, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI.constrainRegClass(RegB, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegX))
+ MRI.constrainRegClass(RegX, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegY))
+ MRI.constrainRegClass(RegY, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegC))
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ unsigned Opcode = Root.getOpcode();
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY));
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(true));
+
+ setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
+
+ // Record new instructions for insertion and old instructions for deletion.
+ InsInstrs.push_back(MIB1);
+ InsInstrs.push_back(MIB2);
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+}
+
+void TargetInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ break;
+ default:
+ break;
+ }
+
+ assert(Prev && "Unknown pattern for machine combiner");
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+ return;
+}
+
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
@@ -661,6 +915,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
return 0;
int SPAdj = MI->getOperand(0).getImm();
+ SPAdj = TFI->alignSPAdjust(SPAdj);
if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) ||
(StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode))
@@ -686,10 +941,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// modification.
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
- return true;
-
- return false;
+ return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
}
// Provide a global flag for disabling the PreRA hazard recognizer that targets
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index ecfd659..36a31c9 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -247,13 +247,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
- Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
- Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
- Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
- Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
@@ -266,13 +262,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
- Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
- Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
- Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
- Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
@@ -501,10 +493,6 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
- if (RetVT == MVT::i8)
- return FPTOSINT_F32_I8;
- if (RetVT == MVT::i16)
- return FPTOSINT_F32_I16;
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
@@ -512,10 +500,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::i128)
return FPTOSINT_F32_I128;
} else if (OpVT == MVT::f64) {
- if (RetVT == MVT::i8)
- return FPTOSINT_F64_I8;
- if (RetVT == MVT::i16)
- return FPTOSINT_F64_I16;
if (RetVT == MVT::i32)
return FPTOSINT_F64_I32;
if (RetVT == MVT::i64)
@@ -551,10 +535,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
- if (RetVT == MVT::i8)
- return FPTOUINT_F32_I8;
- if (RetVT == MVT::i16)
- return FPTOUINT_F32_I16;
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
@@ -562,10 +542,6 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::i128)
return FPTOUINT_F32_I128;
} else if (OpVT == MVT::f64) {
- if (RetVT == MVT::i8)
- return FPTOUINT_F64_I8;
- if (RetVT == MVT::i16)
- return FPTOUINT_F64_I16;
if (RetVT == MVT::i32)
return FPTOUINT_F64_I32;
if (RetVT == MVT::i64)
@@ -758,17 +734,13 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
SelectIsExpensive = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
- IntDivIsCheap = false;
FsqrtIsCheap = false;
- Pow2SDivIsCheap = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
- ExceptionPointerRegister = 0;
- ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
@@ -778,6 +750,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinFunctionAlignment = 0;
PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
+ GatherAllAliasesMaxDepth = 6;
MinStackArgumentAlignment = 1;
InsertFencesForAtomic = false;
MinimumJumpTableEntries = 4;
@@ -814,6 +787,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FMINNAN, VT, Expand);
+ setOperationAction(ISD::FMAXNAN, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::SMAX, VT, Expand);
@@ -828,6 +803,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
+ setOperationAction(ISD::BITREVERSE, VT, Expand);
+
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
@@ -838,11 +815,17 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
+
+ // For most targets @llvm.get.dynamic.area.offest just returns 0.
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+ // Most targets also ignore the @llvm.readcyclecounter intrinsic.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
@@ -1111,6 +1094,19 @@ MachineBasicBlock*
TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // We're handling multiple types of operands here:
+ // PATCHPOINT MetaArgs - live-in, read only, direct
+ // STATEPOINT Deopt Spill - live-through, read only, indirect
+ // STATEPOINT Deopt Alloca - live-through, read only, direct
+ // (We're currently conservative and mark the deopt slots read/write in
+ // practice.)
+ // STATEPOINT GC Spill - live-through, read/write, indirect
+ // STATEPOINT GC Alloca - live-through, read/write, direct
+ // The live-in vs live-through is handled already (the live through ones are
+ // all stack slots), but we need to handle the different type of stackmap
+ // operands and memory effects here.
// MI changes inside this loop as we grow operands.
for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) {
@@ -1126,10 +1122,24 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
// Copy operands before the frame-index.
for (unsigned i = 0; i < OperIdx; ++i)
MIB.addOperand(MI->getOperand(i));
- // Add frame index operands: direct-mem-ref tag, #FI, offset.
- MIB.addImm(StackMaps::DirectMemRefOp);
- MIB.addOperand(MI->getOperand(OperIdx));
- MIB.addImm(0);
+ // Add frame index operands recognized by stackmaps.cpp
+ if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
+ // indirect-mem-ref tag, size, #FI, offset.
+ // Used for spills inserted by StatepointLowering. This codepath is not
+ // used for patchpoints/stackmaps at all, for these spilling is done via
+ // foldMemoryOperand callback only.
+ assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(MFI.getObjectSize(FI));
+ MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.addImm(0);
+ } else {
+ // direct-mem-ref tag, #FI, offset.
+ // Used by patchpoint, and direct alloca arguments to statepoints
+ MIB.addImm(StackMaps::DirectMemRefOp);
+ MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.addImm(0);
+ }
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
MIB.addOperand(MI->getOperand(i));
@@ -1139,7 +1149,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
// Add a new memory operand for this FI.
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
unsigned Flags = MachineMemOperand::MOLoad;
@@ -1148,8 +1157,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
Flags |= MachineMemOperand::MOVolatile;
}
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), Flags,
- TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI));
+ MachinePointerInfo::getFixedStack(MF, FI), Flags,
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
MIB->addMemOperand(MF, MMO);
// Replace the instruction and update the operand index.
@@ -1274,20 +1283,14 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
+ // Decide how to handle f16. If the target does not have native f16 support,
+ // promote it to f32, because there are no f16 library calls (except for
+ // conversions).
if (!isTypeLegal(MVT::f16)) {
- // If the target has native f32 support, promote f16 operations to f32. If
- // f32 is not supported, generate soft float library calls.
- if (isTypeLegal(MVT::f32)) {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
- TransformToType[MVT::f16] = MVT::f32;
- ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
- } else {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
- TransformToType[MVT::f16] = MVT::i16;
- ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
- }
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
// Loop over all of the vector value types to see which need transformations.
@@ -1528,6 +1531,29 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
return DL.getABITypeAlignment(Ty);
}
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace,
+ unsigned Alignment,
+ bool *Fast) const {
+ // Check if the specified alignment is sufficient based on the data layout.
+ // TODO: While using the data layout works in practice, a better solution
+ // would be to implement this check directly (make this a virtual function).
+ // For example, the ABI alignment may change based on software platform while
+ // this function should only be affected by hardware implementation.
+ Type *Ty = VT.getTypeForEVT(Context);
+ if (Alignment >= DL.getABITypeAlignment(Ty)) {
+ // Assume that an access that meets the ABI-specified alignment is fast.
+ if (Fast != nullptr)
+ *Fast = true;
+ return true;
+ }
+
+ // This is a misaligned access.
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+}
+
+
//===----------------------------------------------------------------------===//
// TargetTransformInfo Helpers
//===----------------------------------------------------------------------===//
@@ -1546,6 +1572,11 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case Invoke: return 0;
case Resume: return 0;
case Unreachable: return 0;
+ case CleanupRet: return 0;
+ case CatchRet: return 0;
+ case CatchPad: return 0;
+ case CatchSwitch: return 0;
+ case CleanupPad: return 0;
case Add: return ISD::ADD;
case FAdd: return ISD::FADD;
case Sub: return ISD::SUB;
@@ -1603,13 +1634,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}
-std::pair<unsigned, MVT>
+std::pair<int, MVT>
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
LLVMContext &C = Ty->getContext();
EVT MTy = getValueType(DL, Ty);
- unsigned Cost = 1;
+ int Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
// we need to handle two types.
@@ -1622,11 +1653,28 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
Cost *= 2;
+ // Do not loop with f128 type.
+ if (MTy == LK.second)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
// Keep legalizing the type.
MTy = LK.second;
}
}
+Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!TM.getTargetTriple().isAndroid())
+ return nullptr;
+
+ // Android provides a libc function to retrieve the address of the current
+ // thread's unsafe stack pointer.
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+ Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
+ StackPtrTy->getPointerTo(0), nullptr);
+ return IRB.CreateCall(Fn);
+}
+
//===----------------------------------------------------------------------===//
// Loop Strength Reduction hooks
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2f78763..58ae9cc 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
@@ -32,6 +33,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -58,9 +60,8 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
report_fatal_error("We do not support this DWARF encoding yet!");
}
-void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
- const TargetMachine &TM,
- const MCSymbol *Sym) const {
+void TargetLoweringObjectFileELF::emitPersonalityValue(
+ MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const {
SmallString<64> NameData("DW.ref.");
NameData += Sym->getName();
MCSymbolELF *Label =
@@ -72,9 +73,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS,
Flags, 0, Label->getName());
- unsigned Size = TM.getDataLayout()->getPointerSize();
+ unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
@@ -232,14 +233,8 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".tdata";
if (Kind.isThreadBSS())
return ".tbss";
- if (Kind.isDataNoRel())
+ if (Kind.isData())
return ".data";
- if (Kind.isDataRelLocal())
- return ".data.rel.local";
- if (Kind.isDataRel())
- return ".data.rel";
- if (Kind.isReadOnlyWithRelLocal())
- return ".data.rel.ro.local";
assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
return ".data.rel.ro";
}
@@ -282,8 +277,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
// We also need alignment here.
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
- unsigned Align =
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+ unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV));
std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
Name = SizeSpec + utostr(Align);
@@ -350,9 +345,8 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
/// Given a mergeable constant with the specified size and relocation
/// information, return a section that it should be placed in.
-MCSection *
-TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isMergeableConst4() && MergeableConst4Section)
return MergeableConst4Section;
if (Kind.isMergeableConst8() && MergeableConst8Section)
@@ -362,7 +356,6 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
if (Kind.isReadOnly())
return ReadOnlySection;
- if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
return DataRelROSection;
}
@@ -507,7 +500,7 @@ emitModuleFlags(MCStreamer &Streamer,
// Get the section.
MCSectionMachO *S = getContext().getMachOSection(
- Segment, Section, TAA, StubSize, SectionKind::getDataNoRel());
+ Segment, Section, TAA, StubSize, SectionKind::getData());
Streamer.SwitchSection(S);
Streamer.EmitLabel(getContext().
getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
@@ -589,14 +582,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return UStringSection;
// With MachO only variables whose corresponding symbol starts with 'l' or
@@ -634,12 +629,11 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
return DataSection;
}
-MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
// If this constant requires a relocation, we have to put it in the data
// segment, not in the text segment.
- if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ if (Kind.isData() || Kind.isReadOnlyWithRel())
return ConstDataSection;
if (Kind.isMergeableConst4())
@@ -706,7 +700,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
- // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+ // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
// as 64-bit do, we replace the GOT equivalent by accessing the final symbol
// through a non_lazy_ptr stub instead. One advantage is that it allows the
// computation of deltas to final external symbols. Example:
@@ -740,7 +734,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
// non_lazy_ptr stubs.
SmallString<128> Name;
StringRef Suffix = "$non_lazy_ptr";
- Name += DL->getPrivateGlobalPrefix();
+ Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix();
Name += Sym->getName();
Name += Suffix;
MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
@@ -763,6 +757,29 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
return MCBinaryExpr::createSub(LHS, RHS, Ctx);
}
+static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
+ const MCSection &Section) {
+ if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+ return true;
+
+ // If it is not dead stripped, it is safe to use private labels.
+ const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
+ if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+ return true;
+
+ return false;
+}
+
+void TargetLoweringObjectFileMachO::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+ const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
+ bool CannotUsePrivateLabel =
+ !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
//===----------------------------------------------------------------------===//
// COFF
//===----------------------------------------------------------------------===//
@@ -918,7 +935,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
COMDATSymName, Selection);
} else {
SmallString<256> TmpData;
- getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+ Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
Selection);
}
@@ -943,8 +960,9 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
}
void TargetLoweringObjectFileCOFF::getNameWithPrefix(
- SmallVectorImpl<char> &OutName, const GlobalValue *GV,
- bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ bool CannotUsePrivateLabel = false;
if (GV->hasPrivateLinkage() &&
((isa<Function>(GV) && TM.getFunctionSections()) ||
(isa<GlobalVariable>(GV) && TM.getDataSections())))
@@ -1043,7 +1061,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
raw_string_ostream FlagOS(Flag);
Mang.getNameWithPrefix(FlagOS, GV, false);
FlagOS.flush();
- if (Flag[0] == DL->getGlobalPrefix())
+ if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
OS << Flag.substr(1);
else
OS << Flag;
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 61a66b6..0a7042a 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,13 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "target-reg-info"
using namespace llvm;
@@ -34,54 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
TargetRegisterInfo::~TargetRegisterInfo() {}
-void PrintReg::print(raw_ostream &OS) const {
- if (!Reg)
- OS << "%noreg";
- else if (TargetRegisterInfo::isStackSlot(Reg))
- OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
- else if (TargetRegisterInfo::isVirtualRegister(Reg))
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
- else if (TRI && Reg < TRI->getNumRegs())
- OS << '%' << TRI->getName(Reg);
- else
- OS << "%physreg" << Reg;
- if (SubIdx) {
- if (TRI)
- OS << ':' << TRI->getSubRegIndexName(SubIdx);
+namespace llvm {
+
+Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
+ unsigned SubIdx) {
+ return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
else
- OS << ":sub(" << SubIdx << ')';
- }
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+ });
}
-void PrintRegUnit::print(raw_ostream &OS) const {
- // Generic printout when TRI is missing.
- if (!TRI) {
- OS << "Unit~" << Unit;
- return;
- }
+Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
- // Check for invalid register units.
- if (Unit >= TRI->getNumRegUnits()) {
- OS << "BadUnit~" << Unit;
- return;
- }
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
- // Normal units have at least one root.
- MCRegUnitRootIterator Roots(Unit, TRI);
- assert(Roots.isValid() && "Unit has no roots.");
- OS << TRI->getName(*Roots);
- for (++Roots; Roots.isValid(); ++Roots)
- OS << '~' << TRI->getName(*Roots);
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+ });
}
-void PrintVRegOrUnit::print(raw_ostream &OS) const {
- if (TRI && TRI->isVirtualRegister(Unit)) {
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
- return;
- }
- PrintRegUnit::print(OS);
+Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ if (TRI && TRI->isVirtualRegister(Unit)) {
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+ } else {
+ OS << PrintRegUnit(Unit, TRI);
+ }
+ });
+}
+
+Printable PrintLaneMask(LaneBitmask LaneMask) {
+ return Printable([LaneMask](raw_ostream &OS) {
+ OS << format("%08X", LaneMask);
+ });
}
+} // End of llvm namespace
+
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
const TargetRegisterClass *
@@ -161,16 +184,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
static inline
const TargetRegisterClass *firstCommonClass(const uint32_t *A,
const uint32_t *B,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ const MVT::SimpleValueType SVT =
+ MVT::SimpleValueType::Any) {
+ const MVT VT(SVT);
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
- if (unsigned Common = *A++ & *B++)
- return TRI->getRegClass(I + countTrailingZeros(Common));
+ if (unsigned Common = *A++ & *B++) {
+ const TargetRegisterClass *RC =
+ TRI->getRegClass(I + countTrailingZeros(Common));
+ if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT))
+ return RC;
+ }
return nullptr;
}
const TargetRegisterClass *
TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B) const {
+ const TargetRegisterClass *B,
+ const MVT::SimpleValueType SVT) const {
// First take care of the trivial cases.
if (A == B)
return A;
@@ -179,7 +210,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
// Register classes are ordered topologically, so the largest common
// sub-class it the common sub-class with the smallest ID.
- return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT);
}
const TargetRegisterClass *
@@ -260,13 +291,55 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
return BestRC;
}
+/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
+/// share the same register file.
+static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) {
+ // Same register class.
+ if (DefRC == SrcRC)
+ return true;
+
+ // Both operands are sub registers. Check if they share a register class.
+ unsigned SrcIdx, DefIdx;
+ if (SrcSubReg && DefSubReg) {
+ return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
+ SrcIdx, DefIdx) != nullptr;
+ }
+
+ // At most one of the register is a sub register, make it Src to avoid
+ // duplicating the test.
+ if (!SrcSubReg) {
+ std::swap(DefSubReg, SrcSubReg);
+ std::swap(DefRC, SrcRC);
+ }
+
+ // One of the register is a sub register, check if we can get a superclass.
+ if (SrcSubReg)
+ return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
+
+ // Plain copy.
+ return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
+bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // If this source does not incur a cross register bank copy, use it.
+ return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
+}
+
// Compute target-independent register allocator hints to help eliminate copies.
void
TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const {
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
@@ -295,6 +368,26 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
Hints.push_back(Phys);
}
+bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+}
+
+bool TargetRegisterInfo::needsStackRealignment(
+ const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TFI->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttribute(Attribute::StackAlignment));
+ if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
+ if (canRealignStack(MF))
+ return true;
+ DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n");
+ }
+ return false;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 299380d..fc65639 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -211,11 +211,9 @@ unsigned TargetSchedModel::computeOperandLatency(
if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
&& !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
&& SchedModel.isComplete()) {
- std::string Err;
- raw_string_ostream ss(Err);
- ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
- << *DefMI;
- report_fatal_error(ss.str());
+ errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI;
+ llvm_unreachable("incomplete machine model");
}
#endif
// FIXME: Automatically giving all implicit defs defaultDefLatency is
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1e30821..c6bae24 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -83,21 +83,20 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// The current basic block being processed.
MachineBasicBlock *MBB;
- // DistanceMap - Keep track the distance of a MI from the start of the
- // current basic block.
+ // Keep track the distance of a MI from the start of the current basic block.
DenseMap<MachineInstr*, unsigned> DistanceMap;
// Set of already processed instructions in the current block.
SmallPtrSet<MachineInstr*, 8> Processed;
- // SrcRegMap - A map from virtual registers to physical registers which are
- // likely targets to be coalesced to due to copies from physical registers to
- // virtual registers. e.g. v1024 = move r0.
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies from physical registers to virtual
+ // registers. e.g. v1024 = move r0.
DenseMap<unsigned, unsigned> SrcRegMap;
- // DstRegMap - A map from virtual registers to physical registers which are
- // likely targets to be coalesced to due to copies to physical registers from
- // virtual registers. e.g. r1 = move v1024.
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies to physical registers from virtual
+ // registers. e.g. r1 = move v1024.
DenseMap<unsigned, unsigned> DstRegMap;
bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
@@ -110,8 +109,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, unsigned Dist);
- bool commuteInstruction(MachineBasicBlock::iterator &mi,
- unsigned RegB, unsigned RegC, unsigned Dist);
+ bool commuteInstruction(MachineInstr *MI,
+ unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
@@ -133,6 +132,11 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
unsigned SrcIdx, unsigned DstIdx,
unsigned Dist, bool shouldOnlyCommute);
+ bool tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist);
void scanUses(unsigned DstReg);
void processCopy(MachineInstr *MI);
@@ -151,7 +155,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
@@ -160,7 +164,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- /// runOnMachineFunction - Pass entry point.
+ /// Pass entry point.
bool runOnMachineFunction(MachineFunction&) override;
};
} // end anonymous namespace
@@ -168,7 +172,7 @@ public:
char TwoAddressInstructionPass::ID = 0;
INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
"Two-Address instruction pass", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
"Two-Address instruction pass", false, false)
@@ -176,10 +180,9 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
-/// sink3AddrInstruction - A two-address instruction has been converted to a
-/// three-address instruction to avoid clobbering a register. Try to sink it
-/// past the instruction that would kill the above mentioned register to reduce
-/// register pressure.
+/// A two-address instruction has been converted to a three-address instruction
+/// to avoid clobbering a register. Try to sink it past the instruction that
+/// would kill the above mentioned register to reduce register pressure.
bool TwoAddressInstructionPass::
sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
MachineBasicBlock::iterator OldPos) {
@@ -195,8 +198,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
unsigned DefReg = 0;
SmallSet<unsigned, 4> UseRegs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -231,10 +233,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
KillMI = LIS->getInstructionFromIndex(I->end);
}
if (!KillMI) {
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SavedReg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = *UI;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) {
if (!UseMO.isKill())
continue;
KillMI = UseMO.getParent();
@@ -312,8 +311,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
return true;
}
-/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
-/// in current BB.
+/// Return the MachineInstr* if it is the single def of the Reg in current BB.
static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
MachineInstr *Ret = nullptr;
@@ -351,10 +349,10 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
return false;
}
-/// noUseAfterLastDef - Return true if there are no intervening uses between the
-/// last instruction in the MBB that defines the specified register and the
-/// two-address instruction which is being processed. It also returns the last
-/// def location by reference
+/// Return true if there are no intervening uses between the last instruction
+/// in the MBB that defines the specified register and the two-address
+/// instruction which is being processed. It also returns the last def location
+/// by reference.
bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
unsigned &LastDef) {
LastDef = 0;
@@ -375,9 +373,9 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
return !(LastUse > LastDef && LastUse < Dist);
}
-/// isCopyToReg - Return true if the specified MI is a copy instruction or
-/// a extract_subreg instruction. It also returns the source and destination
-/// registers and whether they are physical registers by reference.
+/// Return true if the specified MI is a copy instruction or an extract_subreg
+/// instruction. It also returns the source and destination registers and
+/// whether they are physical registers by reference.
static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
unsigned &SrcReg, unsigned &DstReg,
bool &IsSrcPhys, bool &IsDstPhys) {
@@ -397,8 +395,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
return true;
}
-/// isPLainlyKilled - Test if the given register value, which is used by the
-// given instruction, is killed by the given instruction.
+/// Test if the given register value, which is used by the
+/// given instruction, is killed by the given instruction.
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
LiveIntervals *LIS) {
if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
@@ -424,7 +422,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
return MI->killsRegister(Reg);
}
-/// isKilled - Test if the given register value, which is used by the given
+/// Test if the given register value, which is used by the given
/// instruction, is killed by the given instruction. This looks through
/// coalescable copies to see if the original value is potentially not killed.
///
@@ -472,8 +470,8 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
}
}
-/// isTwoAddrUse - Return true if the specified MI uses the specified register
-/// as a two-address use. If so, return the destination register by reference.
+/// Return true if the specified MI uses the specified register as a two-address
+/// use. If so, return the destination register by reference.
static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
@@ -488,8 +486,8 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
-/// findOnlyInterestingUse - Given a register, if has a single in-basic block
-/// use, return the use instruction if it's a copy or a two-address use.
+/// Given a register, if has a single in-basic block use, return the use
+/// instruction if it's a copy or a two-address use.
static
MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
MachineRegisterInfo *MRI,
@@ -516,8 +514,8 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
return nullptr;
}
-/// getMappedReg - Return the physical register the specified virtual register
-/// might be mapped to.
+/// Return the physical register the specified virtual register might be mapped
+/// to.
static unsigned
getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
while (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -531,8 +529,7 @@ getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
return 0;
}
-/// regsAreCompatible - Return true if the two registers are equal or aliased.
-///
+/// Return true if the two registers are equal or aliased.
static bool
regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
if (RegA == RegB)
@@ -543,8 +540,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
-/// isProfitableToCommute - Return true if it's potentially profitable to commute
-/// the two-address instruction that's being processed.
+/// Return true if it's potentially profitable to commute the two-address
+/// instruction that's being processed.
bool
TwoAddressInstructionPass::
isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -642,15 +639,15 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
return LastDefB && LastDefC && LastDefC > LastDefB;
}
-/// commuteInstruction - Commute a two-address instruction and update the basic
-/// block, distance map, and live variables if needed. Return true if it is
-/// successful.
-bool TwoAddressInstructionPass::
-commuteInstruction(MachineBasicBlock::iterator &mi,
- unsigned RegB, unsigned RegC, unsigned Dist) {
- MachineInstr *MI = mi;
+/// Commute a two-address instruction and update the basic block, distance map,
+/// and live variables if needed. Return true if it is successful.
+bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+ unsigned RegBIdx,
+ unsigned RegCIdx,
+ unsigned Dist) {
+ unsigned RegC = MI->getOperand(RegCIdx).getReg();
DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
- MachineInstr *NewMI = TII->commuteInstruction(MI);
+ MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx);
if (NewMI == nullptr) {
DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
@@ -672,8 +669,8 @@ commuteInstruction(MachineBasicBlock::iterator &mi,
return true;
}
-/// isProfitableToConv3Addr - Return true if it is profitable to convert the
-/// given 2-address instruction to a 3-address one.
+/// Return true if it is profitable to convert the given 2-address instruction
+/// to a 3-address one.
bool
TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
// Look for situations like this:
@@ -689,17 +686,18 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
}
-/// convertInstTo3Addr - Convert the specified two-address instruction into a
-/// three address one. Return true if this transformation was successful.
+/// Convert the specified two-address instruction into a three address one.
+/// Return true if this transformation was successful.
bool
TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned RegA, unsigned RegB,
unsigned Dist) {
// FIXME: Why does convertToThreeAddress() need an iterator reference?
- MachineFunction::iterator MFI = MBB;
+ MachineFunction::iterator MFI = MBB->getIterator();
MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
- assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+ assert(MBB->getIterator() == MFI &&
+ "convertToThreeAddress changed iterator reference");
if (!NewMI)
return false;
@@ -730,8 +728,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
return true;
}
-/// scanUses - Scan forward recursively for only uses, update maps if the use
-/// is a copy or a two-address instruction.
+/// Scan forward recursively for only uses, update maps if the use is a copy or
+/// a two-address instruction.
void
TwoAddressInstructionPass::scanUses(unsigned DstReg) {
SmallVector<unsigned, 4> VirtRegPairs;
@@ -777,8 +775,8 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) {
}
}
-/// processCopy - If the specified instruction is not yet processed, process it
-/// if it's a copy. For a copy instruction, we find the physical registers the
+/// If the specified instruction is not yet processed, process it if it's a
+/// copy. For a copy instruction, we find the physical registers the
/// source and destination registers might be mapped to. These are kept in
/// point-to maps used to determine future optimizations. e.g.
/// v1024 = mov r0
@@ -813,9 +811,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
return;
}
-/// rescheduleMIBelowKill - If there is one more local instruction that reads
-/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
-/// instruction in order to eliminate the need for the copy.
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the instruction below the kill instruction in order to
+/// eliminate the need for the copy.
bool TwoAddressInstructionPass::
rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -871,8 +869,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
SmallSet<unsigned, 2> Uses;
SmallSet<unsigned, 2> Kills;
SmallSet<unsigned, 2> Defs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -914,8 +911,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
OtherMI->isBranch() || OtherMI->isTerminator())
// Don't move pass calls, etc.
return false;
- for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OtherMI->getOperand(i);
+ for (const MachineOperand &MO : OtherMI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -984,8 +980,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
return true;
}
-/// isDefTooClose - Return true if the re-scheduling will put the given
-/// instruction too close to the defs of its register dependencies.
+/// Return true if the re-scheduling will put the given instruction too close
+/// to the defs of its register dependencies.
bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
MachineInstr *MI) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
@@ -1004,10 +1000,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return false;
}
-/// rescheduleKillAboveMI - If there is one more local instruction that reads
-/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
-/// current two-address instruction in order to eliminate the need for the
-/// copy.
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the kill instruction above the current two-address
+/// instruction in order to eliminate the need for the copy.
bool TwoAddressInstructionPass::
rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -1055,8 +1050,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
SmallSet<unsigned, 2> Kills;
SmallSet<unsigned, 2> Defs;
SmallSet<unsigned, 2> LiveDefs;
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = KillMI->getOperand(i);
+ for (const MachineOperand &MO : KillMI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -1094,8 +1088,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Don't move pass calls, etc.
return false;
SmallVector<unsigned, 2> OtherDefs;
- for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OtherMI->getOperand(i);
+ for (const MachineOperand &MO : OtherMI->operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -1155,13 +1148,68 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
return true;
}
-/// tryInstructionTransform - For the case where an instruction has a single
-/// pair of tied register operands, attempt some transformations that may
-/// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy. Returns true if no copy needs to be
-/// inserted to untie mi's operands (either because they were untied, or
-/// because mi was rescheduled, and will be visited again later). If the
-/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
+/// Tries to commute the operand 'BaseOpIdx' and some other operand in the
+/// given machine instruction to improve opportunities for coalescing and
+/// elimination of a register to register copy.
+///
+/// 'DstOpIdx' specifies the index of MI def operand.
+/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx'
+/// operand is killed by the given instruction.
+/// The 'Dist' arguments provides the distance of MI from the start of the
+/// current basic block and it is used to determine if it is profitable
+/// to commute operands in the instruction.
+///
+/// Returns true if the transformation happened. Otherwise, returns false.
+bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist) {
+ unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
+ unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+ unsigned OpsNum = MI->getDesc().getNumOperands();
+ unsigned OtherOpIdx = MI->getDesc().getNumDefs();
+ for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
+ // The call of findCommutedOpIndices below only checks if BaseOpIdx
+ // and OtherOpIdx are commutable, it does not really search for
+ // other commutable operands and does not change the values of passed
+ // variables.
+ if (OtherOpIdx == BaseOpIdx ||
+ !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx))
+ continue;
+
+ unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+ bool AggressiveCommute = false;
+
+ // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
+ // operands. This makes the live ranges of DstOp and OtherOp joinable.
+ bool DoCommute =
+ !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+
+ if (!DoCommute &&
+ isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
+ DoCommute = true;
+ AggressiveCommute = true;
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// For the case where an instruction has a single pair of tied register
+/// operands, attempt some transformations that may either eliminate the tied
+/// operands or improve the opportunities for coalescing away the register copy.
+/// Returns true if no copy needs to be inserted to untie mi's operands
+/// (either because they were untied, or because mi was rescheduled, and will
+/// be visited again later). If the shouldOnlyCommute flag is true, only
+/// instruction commutation is attempted.
bool TwoAddressInstructionPass::
tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -1181,51 +1229,18 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (TargetRegisterInfo::isVirtualRegister(regA))
scanUses(regA);
- // Check if it is profitable to commute the operands.
- unsigned SrcOp1, SrcOp2;
- unsigned regC = 0;
- unsigned regCIdx = ~0U;
- bool TryCommute = false;
- bool AggressiveCommute = false;
- if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
- TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
- if (SrcIdx == SrcOp1)
- regCIdx = SrcOp2;
- else if (SrcIdx == SrcOp2)
- regCIdx = SrcOp1;
-
- if (regCIdx != ~0U) {
- regC = MI.getOperand(regCIdx).getReg();
- if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
- // If C dies but B does not, swap the B and C operands.
- // This makes the live ranges of A and C joinable.
- TryCommute = true;
- else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
- TryCommute = true;
- AggressiveCommute = true;
- }
- }
- }
+ bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
// If the instruction is convertible to 3 Addr, instead
// of returning try 3 Addr transformation aggresively and
// use this variable to check later. Because it might be better.
// For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
// instead of the following code.
- // addl %esi, %edi
- // movl %edi, %eax
+ // addl %esi, %edi
+ // movl %edi, %eax
// ret
- bool Commuted = false;
-
- // If it's profitable to commute, try to do so.
- if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
- Commuted = true;
- ++NumCommuted;
- if (AggressiveCommute)
- ++NumAggrCommuted;
- if (!MI.isConvertibleTo3Addr())
- return false;
- }
+ if (Commuted && !MI.isConvertibleTo3Addr())
+ return false;
if (shouldOnlyCommute)
return false;
@@ -1237,6 +1252,13 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
return true;
}
+ // If we commuted, regB may have changed so we should re-sample it to avoid
+ // confusing the three address conversion below.
+ if (Commuted) {
+ regB = MI.getOperand(SrcIdx).getReg();
+ regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+ }
+
if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
@@ -1348,10 +1370,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
SmallVector<unsigned, 4> OrigRegs;
if (LIS) {
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end(); MOI != MOE; ++MOI) {
- if (MOI->isReg())
- OrigRegs.push_back(MOI->getReg());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg())
+ OrigRegs.push_back(MO.getReg());
}
}
@@ -1536,12 +1557,10 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
SrcRegMap[RegA] = RegB;
}
-
if (AllUsesCopied) {
if (!IsEarlyClobber) {
// Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
MO.isUse()) {
if (MO.isKill()) {
@@ -1578,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// regB is still used in this instruction, but a kill flag was
// removed from a different tied use of regB, so now we need to add
// a kill flag to one of the remaining uses of regB.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
MO.setIsKill(true);
break;
@@ -1588,8 +1606,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
}
-/// runOnMachineFunction - Reduce two-address instructions to two operands.
-///
+/// Reduce two-address instructions to two operands.
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const TargetMachine &TM = MF->getTarget();
@@ -1599,7 +1616,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
InstrItins = MF->getSubtarget().getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
OptLevel = TM.getOptLevel();
bool MadeChange = false;
@@ -1614,7 +1631,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
TiedOperandMap TiedOperands;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
- MBB = MBBI;
+ MBB = &*MBBI;
unsigned Dist = 0;
DistanceMap.clear();
SrcRegMap.clear();
@@ -1661,8 +1678,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
unsigned DstReg = mi->getOperand(DstIdx).getReg();
if (SrcReg != DstReg &&
tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
- // The tied operands have been eliminated or shifted further down the
- // block to ease elimination. Continue processing with 'nmi'.
+ // The tied operands have been eliminated or shifted further down
+ // the block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
mi = nmi;
continue;
@@ -1671,9 +1688,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
}
// Now iterate over the information collected above.
- for (TiedOperandMap::iterator OI = TiedOperands.begin(),
- OE = TiedOperands.end(); OI != OE; ++OI) {
- processTiedPairs(mi, OI->second, Dist);
+ for (auto &TO : TiedOperands) {
+ processTiedPairs(mi, TO.second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index d393e10..8c9631e 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -71,8 +71,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
// in them.
std::vector<BasicBlock*> DeadBlocks;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (!Reachable.count(I)) {
- BasicBlock *BB = I;
+ if (!Reachable.count(&*I)) {
+ BasicBlock *BB = &*I;
DeadBlocks.push_back(BB);
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
@@ -131,7 +131,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// in them.
std::vector<MachineBasicBlock*> DeadBlocks;
for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = I;
+ MachineBasicBlock *BB = &*I;
// Test for deadness.
if (!Reachable.count(BB)) {
@@ -167,7 +167,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// Cleanup PHI nodes.
for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = I;
+ MachineBasicBlock *BB = &*I;
// Prune unneeded PHI entries.
SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
BB->pred_end());
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 2912bdd..bf1c0dc 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -163,11 +163,12 @@ class VirtRegRewriter : public MachineFunctionPass {
SlotIndexes *Indexes;
LiveIntervals *LIS;
VirtRegMap *VRM;
- SparseSet<unsigned> PhysRegs;
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+
public:
static char ID;
VirtRegRewriter() : MachineFunctionPass(ID) {}
@@ -237,10 +238,52 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
return true;
}
+void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
+ unsigned PhysReg) const {
+ assert(!LI.empty());
+ assert(LI.hasSubRanges());
+
+ typedef std::pair<const LiveInterval::SubRange *,
+ LiveInterval::const_iterator> SubRangeIteratorPair;
+ SmallVector<SubRangeIteratorPair, 4> SubRanges;
+ SlotIndex First;
+ SlotIndex Last;
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRanges.push_back(std::make_pair(&SR, SR.begin()));
+ if (!First.isValid() || SR.segments.front().start < First)
+ First = SR.segments.front().start;
+ if (!Last.isValid() || SR.segments.back().end > Last)
+ Last = SR.segments.back().end;
+ }
+
+ // Check all mbb start positions between First and Last while
+ // simulatenously advancing an iterator for each subrange.
+ for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First);
+ MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) {
+ SlotIndex MBBBegin = MBBI->first;
+ // Advance all subrange iterators so that their end position is just
+ // behind MBBBegin (or the iterator is at the end).
+ LaneBitmask LaneMask = 0;
+ for (auto &RangeIterPair : SubRanges) {
+ const LiveInterval::SubRange *SR = RangeIterPair.first;
+ LiveInterval::const_iterator &SRI = RangeIterPair.second;
+ while (SRI != SR->end() && SRI->end <= MBBBegin)
+ ++SRI;
+ if (SRI == SR->end())
+ continue;
+ if (SRI->start <= MBBBegin)
+ LaneMask |= SR->LaneMask;
+ }
+ if (LaneMask == 0)
+ continue;
+ MachineBasicBlock *MBB = MBBI->second;
+ MBB->addLiveIn(PhysReg, LaneMask);
+ }
+}
+
// Compute MBB live-in lists from virtual register live ranges and their
// assignments.
void VirtRegRewriter::addMBBLiveIns() {
- SmallVector<MachineBasicBlock*, 16> LiveIn;
for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
if (MRI->reg_nodbg_empty(VirtReg))
@@ -254,31 +297,18 @@ void VirtRegRewriter::addMBBLiveIns() {
assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
if (LI.hasSubRanges()) {
- for (LiveInterval::SubRange &S : LI.subranges()) {
- for (const auto &Seg : S.segments) {
- if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
- continue;
- for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) {
- unsigned SubReg = SR.getSubReg();
- unsigned SubRegIndex = SR.getSubRegIndex();
- unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
- if ((SubRegLaneMask & S.LaneMask) == 0)
- continue;
- for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
- LiveIn[i]->addLiveIn(SubReg);
- }
- }
- LiveIn.clear();
- }
- }
+ addLiveInsForSubRanges(LI, PhysReg);
} else {
- // Scan the segments of LI.
- for (const auto &Seg : LI.segments) {
- if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
- continue;
- for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
- LiveIn[i]->addLiveIn(PhysReg);
- LiveIn.clear();
+ // Go over MBB begin positions and see if we have segments covering them.
+ // The following works because segments and the MBBIndex list are both
+ // sorted by slot indexes.
+ SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin();
+ for (const auto &Seg : LI) {
+ I = Indexes->advanceMBBIndex(I, Seg.start);
+ for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) {
+ MachineBasicBlock *MBB = I->second;
+ MBB->addLiveIn(PhysReg);
+ }
}
}
}
@@ -305,7 +335,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
assert(LI.liveAt(BaseIndex) &&
"Reads of completely dead register should be marked undef already");
unsigned SubRegIdx = MO.getSubReg();
- unsigned UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
// See if any of the relevant subregister liveranges is defined at this point.
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
@@ -319,54 +349,15 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
- SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
-
- // Here we have a SparseSet to hold which PhysRegs are actually encountered
- // in the MF we are about to iterate over so that later when we call
- // setPhysRegUsed, we are only doing it for physRegs that were actually found
- // in the program and not for all of the possible physRegs for the given
- // target architecture. If the target has a lot of physRegs, then for a small
- // program there will be a significant compile time reduction here.
- PhysRegs.clear();
- PhysRegs.setUniverse(TRI->getNumRegs());
-
- // The function with uwtable should guarantee that the stack unwinder
- // can unwind the stack to the previous frame. Thus, we can't apply the
- // noreturn optimization if the caller function has uwtable attribute.
- bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
- bool IsExitBB = MBBI->succ_empty();
for (MachineBasicBlock::instr_iterator
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
- MachineInstr *MI = MII;
+ MachineInstr *MI = &*MII;
++MII;
- // Check if this instruction is a call to a noreturn function. If this
- // is a call to noreturn function and we don't need the stack unwinding
- // functionality (i.e. this function does not have uwtable attribute and
- // the callee function has the nounwind attribute), then we can ignore
- // the definitions set by this instruction.
- if (!HasUWTable && IsExitBB && MI->isCall()) {
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
- if (!MO.isGlobal())
- continue;
- const Function *Func = dyn_cast<Function>(MO.getGlobal());
- if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
- // We need to keep correct unwind information
- // even if the function will not return, since the
- // runtime may need it.
- !Func->hasFnAttribute(Attribute::NoUnwind))
- continue;
- NoReturnInsts.insert(MI);
- break;
- }
- }
-
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
@@ -375,15 +366,6 @@ void VirtRegRewriter::rewrite() {
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- // If we encounter a VirtReg or PhysReg then get at the PhysReg and add
- // it to the physreg bitset. Later we use only the PhysRegs that were
- // actually encountered in the MF to populate the MRI's used physregs.
- if (MO.isReg() && MO.getReg())
- PhysRegs.insert(
- TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
- VRM->getPhys(MO.getReg()) :
- MO.getReg());
-
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
@@ -418,14 +400,6 @@ void VirtRegRewriter::rewrite() {
MO.setIsUndef(true);
} else if (!MO.isDead()) {
assert(MO.isDef());
- // Things get tricky when we ran out of lane mask bits and
- // merged multiple lanes into the overflow bit: In this case
- // our subregister liveness tracking isn't precise and we can't
- // know what subregister parts are undefined, fall back to the
- // implicit super-register def then.
- unsigned LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
- if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask))
- SuperDefs.push_back(PhysReg);
}
}
@@ -470,29 +444,5 @@ void VirtRegRewriter::rewrite() {
}
}
}
-
- // Tell MRI about physical registers in use.
- if (NoReturnInsts.empty()) {
- for (SparseSet<unsigned>::iterator
- RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
- if (!MRI->reg_nodbg_empty(*RegI))
- MRI->setPhysRegUsed(*RegI);
- } else {
- for (SparseSet<unsigned>::iterator
- I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
- unsigned Reg = *I;
- if (MRI->reg_nodbg_empty(Reg))
- continue;
- // Check if this register has a use that will impact the rest of the
- // code. Uses in debug and noreturn instructions do not impact the
- // generated code.
- for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
- if (!NoReturnInsts.count(&It)) {
- MRI->setPhysRegUsed(Reg);
- break;
- }
- }
- }
- }
}
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 0d26ed3..52fb922 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -18,66 +18,40 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include <memory>
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "winehprepare"
-namespace {
-
-// This map is used to model frame variable usage during outlining, to
-// construct a structure type to hold the frame variables in a frame
-// allocation block, and to remap the frame variable allocas (including
-// spill locations as needed) to GEPs that get the variable from the
-// frame allocation structure.
-typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
-
-// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
-// quite null.
-AllocaInst *getCatchObjectSentinel() {
- return static_cast<AllocaInst *>(nullptr) + 1;
-}
-
-typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+static cl::opt<bool> DisableDemotion(
+ "disable-demotion", cl::Hidden,
+ cl::desc(
+ "Clone multicolor basic blocks but do not demote cross funclet values"),
+ cl::init(false));
-class LandingPadActions;
-class LandingPadMap;
-
-typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
-typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
+static cl::opt<bool> DisableCleanups(
+ "disable-cleanups", cl::Hidden,
+ cl::desc("Do not remove implausible terminators or other similar cleanups"),
+ cl::init(false));
+namespace {
+
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID) {
- if (TM)
- TheTriple = TM->getTargetTriple();
- }
+ WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -90,264 +64,27 @@ public:
}
private:
- bool prepareExceptionHandlers(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads);
- void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads);
- void promoteLandingPadValues(LandingPadInst *LPad);
- void demoteValuesLiveAcrossHandlers(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads);
- void findSEHEHReturnPoints(Function &F,
- SetVector<BasicBlock *> &EHReturnBlocks);
- void findCXXEHReturnPoints(Function &F,
- SetVector<BasicBlock *> &EHReturnBlocks);
- void getPossibleReturnTargets(Function *ParentF, Function *HandlerF,
- SetVector<BasicBlock*> &Targets);
- void completeNestedLandingPad(Function *ParentFn,
- LandingPadInst *OutlinedLPad,
- const LandingPadInst *OriginalLPad,
- FrameVarInfoMap &VarInfo);
- Function *createHandlerFunc(Function *ParentFn, Type *RetTy,
- const Twine &Name, Module *M, Value *&ParentFP);
- bool outlineHandler(ActionHandler *Action, Function *SrcFn,
- LandingPadInst *LPad, BasicBlock *StartBB,
- FrameVarInfoMap &VarInfo);
- void addStubInvokeToHandlerIfNeeded(Function *Handler);
-
- void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
- CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
- VisitedBlockSet &VisitedBlocks);
- void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
- BasicBlock *EndBB);
-
- void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
-
- Triple TheTriple;
+ void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot);
+ void
+ insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist);
+ AllocaInst *insertPHILoads(PHINode *PN, Function &F);
+ void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads, Function &F);
+ bool prepareExplicitEH(Function &F);
+ void colorFunclets(Function &F);
+
+ void demotePHIsOnFunclets(Function &F);
+ void cloneCommonBlocks(Function &F);
+ void removeImplausibleInstructions(Function &F);
+ void cleanupPreparedFunclets(Function &F);
+ void verifyPreparedFunclets(Function &F);
// All fields are reset by runOnFunction.
- DominatorTree *DT = nullptr;
- const TargetLibraryInfo *LibInfo = nullptr;
EHPersonality Personality = EHPersonality::Unknown;
- CatchHandlerMapTy CatchHandlerMap;
- CleanupHandlerMapTy CleanupHandlerMap;
- DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
- SmallPtrSet<BasicBlock *, 4> NormalBlocks;
- SmallPtrSet<BasicBlock *, 4> EHBlocks;
- SetVector<BasicBlock *> EHReturnBlocks;
-
- // This maps landing pad instructions found in outlined handlers to
- // the landing pad instruction in the parent function from which they
- // were cloned. The cloned/nested landing pad is used as the key
- // because the landing pad may be cloned into multiple handlers.
- // This map will be used to add the llvm.eh.actions call to the nested
- // landing pads after all handlers have been outlined.
- DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
-
- // This maps blocks in the parent function which are destinations of
- // catch handlers to cloned blocks in (other) outlined handlers. This
- // handles the case where a nested landing pads has a catch handler that
- // returns to a handler function rather than the parent function.
- // The original block is used as the key here because there should only
- // ever be one handler function from which the cloned block is not pruned.
- // The original block will be pruned from the parent function after all
- // handlers have been outlined. This map will be used to adjust the
- // return instructions of handlers which return to the block that was
- // outlined into a handler. This is done after all handlers have been
- // outlined but before the outlined code is pruned from the parent function.
- DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
-
- // Map from outlined handler to call to parent local address. Only used for
- // 32-bit EH.
- DenseMap<Function *, Value *> HandlerToParentFP;
-
- AllocaInst *SEHExceptionCodeSlot = nullptr;
-};
-
-class WinEHFrameVariableMaterializer : public ValueMaterializer {
-public:
- WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP,
- FrameVarInfoMap &FrameVarInfo);
- ~WinEHFrameVariableMaterializer() override {}
-
- Value *materializeValueFor(Value *V) override;
-
- void escapeCatchObject(Value *V);
-
-private:
- FrameVarInfoMap &FrameVarInfo;
- IRBuilder<> Builder;
-};
-
-class LandingPadMap {
-public:
- LandingPadMap() : OriginLPad(nullptr) {}
- void mapLandingPad(const LandingPadInst *LPad);
-
- bool isInitialized() { return OriginLPad != nullptr; }
-
- bool isOriginLandingPadBlock(const BasicBlock *BB) const;
- bool isLandingPadSpecificInst(const Instruction *Inst) const;
-
- void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
- Value *SelectorValue) const;
-
-private:
- const LandingPadInst *OriginLPad;
- // We will normally only see one of each of these instructions, but
- // if more than one occurs for some reason we can handle that.
- TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
- TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
-};
-
-class WinEHCloningDirectorBase : public CloningDirector {
-public:
- WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP,
- FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
- : Materializer(HandlerFn, ParentFP, VarInfo),
- SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
- Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
- LPadMap(LPadMap), ParentFP(ParentFP) {}
-
- CloningAction handleInstruction(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
-
- virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
- const InvokeInst *Invoke,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleResume(ValueToValueMapTy &VMap,
- const ResumeInst *Resume,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleCompare(ValueToValueMapTy &VMap,
- const CmpInst *Compare,
- BasicBlock *NewBB) = 0;
- virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
- const LandingPadInst *LPad,
- BasicBlock *NewBB) = 0;
-
- ValueMaterializer *getValueMaterializer() override { return &Materializer; }
-
-protected:
- WinEHFrameVariableMaterializer Materializer;
- Type *SelectorIDType;
- Type *Int8PtrType;
- LandingPadMap &LPadMap;
-
- /// The value representing the parent frame pointer.
- Value *ParentFP;
-};
-
-class WinEHCatchDirector : public WinEHCloningDirectorBase {
-public:
- WinEHCatchDirector(
- Function *CatchFn, Value *ParentFP, Value *Selector,
- FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap,
- DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads,
- DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks)
- : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap),
- CurrentSelector(Selector->stripPointerCasts()),
- ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads),
- DT(DT), EHBlocks(EHBlocks) {}
-
- CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) override;
- CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
- BasicBlock *NewBB) override;
- CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
- BasicBlock *NewBB) override;
- CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
- BasicBlock *NewBB) override;
- CloningAction handleLandingPad(ValueToValueMapTy &VMap,
- const LandingPadInst *LPad,
- BasicBlock *NewBB) override;
-
- Value *getExceptionVar() { return ExceptionObjectVar; }
- TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
-private:
- Value *CurrentSelector;
-
- Value *ExceptionObjectVar;
- TinyPtrVector<BasicBlock *> ReturnTargets;
- // This will be a reference to the field of the same name in the WinEHPrepare
- // object which instantiates this WinEHCatchDirector object.
- DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
- DominatorTree *DT;
- SmallPtrSetImpl<BasicBlock *> &EHBlocks;
-};
-
-class WinEHCleanupDirector : public WinEHCloningDirectorBase {
-public:
- WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP,
- FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
- : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo,
- LPadMap) {}
-
- CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) override;
- CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) override;
- CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
- BasicBlock *NewBB) override;
- CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
- BasicBlock *NewBB) override;
- CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
- BasicBlock *NewBB) override;
- CloningAction handleLandingPad(ValueToValueMapTy &VMap,
- const LandingPadInst *LPad,
- BasicBlock *NewBB) override;
-};
-
-class LandingPadActions {
-public:
- LandingPadActions() : HasCleanupHandlers(false) {}
-
- void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
- void insertCleanupHandler(CleanupHandler *Action) {
- Actions.push_back(Action);
- HasCleanupHandlers = true;
- }
-
- bool includesCleanup() const { return HasCleanupHandlers; }
-
- SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
- SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
- SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
-
-private:
- // Note that this class does not own the ActionHandler objects in this vector.
- // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
- // in the WinEHPrepare class.
- SmallVector<ActionHandler *, 4> Actions;
- bool HasCleanupHandlers;
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
+ MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
};
} // end anonymous namespace
@@ -361,2536 +98,987 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
}
bool WinEHPrepare::runOnFunction(Function &Fn) {
- // No need to prepare outlined handlers.
- if (Fn.hasFnAttribute("wineh-parent"))
- return false;
-
- SmallVector<LandingPadInst *, 4> LPads;
- SmallVector<ResumeInst *, 4> Resumes;
- for (BasicBlock &BB : Fn) {
- if (auto *LP = BB.getLandingPadInst())
- LPads.push_back(LP);
- if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator()))
- Resumes.push_back(Resume);
- }
-
- // No need to prepare functions that lack landing pads.
- if (LPads.empty())
+ if (!Fn.hasPersonalityFn())
return false;
// Classify the personality to see what kind of preparation we need.
Personality = classifyEHPersonality(Fn.getPersonalityFn());
- // Do nothing if this is not an MSVC personality.
- if (!isMSVCEHPersonality(Personality))
+ // Do nothing if this is not a funclet-based personality.
+ if (!isFuncletEHPersonality(Personality))
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- // If there were any landing pads, prepareExceptionHandlers will make changes.
- prepareExceptionHandlers(Fn, LPads);
- return true;
+ return prepareExplicitEH(Fn);
}
bool WinEHPrepare::doFinalization(Module &M) { return false; }
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
- Constant *&Selector, BasicBlock *&NextBB);
-
-// Finds blocks reachable from the starting set Worklist. Does not follow unwind
-// edges or blocks listed in StopPoints.
-static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs,
- SetVector<BasicBlock *> &Worklist,
- const SetVector<BasicBlock *> *StopPoints) {
- while (!Worklist.empty()) {
- BasicBlock *BB = Worklist.pop_back_val();
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
- // Don't cross blocks that we should stop at.
- if (StopPoints && StopPoints->count(BB))
- continue;
-
- if (!ReachableBBs.insert(BB).second)
- continue; // Already visited.
-
- // Don't follow unwind edges of invokes.
- if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Worklist.insert(II->getNormalDest());
- continue;
- }
-
- // Otherwise, follow all successors.
- Worklist.insert(succ_begin(BB), succ_end(BB));
- }
-}
-
-// Attempt to find an instruction where a block can be split before
-// a call to llvm.eh.begincatch and its operands. If the block
-// begins with the begincatch call or one of its adjacent operands
-// the block will not be split.
-static Instruction *findBeginCatchSplitPoint(BasicBlock *BB,
- IntrinsicInst *II) {
- // If the begincatch call is already the first instruction in the block,
- // don't split.
- Instruction *FirstNonPHI = BB->getFirstNonPHI();
- if (II == FirstNonPHI)
- return nullptr;
-
- // If either operand is in the same basic block as the instruction and
- // isn't used by another instruction before the begincatch call, include it
- // in the split block.
- auto *Op0 = dyn_cast<Instruction>(II->getOperand(0));
- auto *Op1 = dyn_cast<Instruction>(II->getOperand(1));
-
- Instruction *I = II->getPrevNode();
- Instruction *LastI = II;
-
- while (I == Op0 || I == Op1) {
- // If the block begins with one of the operands and there are no other
- // instructions between the operand and the begincatch call, don't split.
- if (I == FirstNonPHI)
- return nullptr;
-
- LastI = I;
- I = I->getPrevNode();
- }
-
- // If there is at least one instruction in the block before the begincatch
- // call and its operands, split the block at either the begincatch or
- // its operand.
- return LastI;
+static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState,
+ const BasicBlock *BB) {
+ CxxUnwindMapEntry UME;
+ UME.ToState = ToState;
+ UME.Cleanup = BB;
+ FuncInfo.CxxUnwindMap.push_back(UME);
+ return FuncInfo.getLastStateNumber();
}
-/// Find all points where exceptional control rejoins normal control flow via
-/// llvm.eh.endcatch. Add them to the normal bb reachability worklist.
-void WinEHPrepare::findCXXEHReturnPoints(
- Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
- for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- for (Instruction &I : *BB) {
- if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) {
- Instruction *SplitPt =
- findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I));
- if (SplitPt) {
- // Split the block before the llvm.eh.begincatch call to allow
- // cleanup and catch code to be distinguished later.
- // Do not update BBI because we still need to process the
- // portion of the block that we are splitting off.
- SplitBlock(BB, SplitPt, DT);
- break;
- }
- }
- if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) {
- // Split the block after the call to llvm.eh.endcatch if there is
- // anything other than an unconditional branch, or if the successor
- // starts with a phi.
- auto *Br = dyn_cast<BranchInst>(I.getNextNode());
- if (!Br || !Br->isUnconditional() ||
- isa<PHINode>(Br->getSuccessor(0)->begin())) {
- DEBUG(dbgs() << "splitting block " << BB->getName()
- << " with llvm.eh.endcatch\n");
- BBI = SplitBlock(BB, I.getNextNode(), DT);
- }
- // The next BB is normal control flow.
- EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0));
- break;
- }
- }
+static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
+ int TryHigh, int CatchHigh,
+ ArrayRef<const CatchPadInst *> Handlers) {
+ WinEHTryBlockMapEntry TBME;
+ TBME.TryLow = TryLow;
+ TBME.TryHigh = TryHigh;
+ TBME.CatchHigh = CatchHigh;
+ assert(TBME.TryLow <= TBME.TryHigh);
+ for (const CatchPadInst *CPI : Handlers) {
+ WinEHHandlerType HT;
+ Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0));
+ if (TypeInfo->isNullValue())
+ HT.TypeDescriptor = nullptr;
+ else
+ HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
+ HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
+ HT.Handler = CPI->getParent();
+ if (isa<ConstantPointerNull>(CPI->getArgOperand(2)))
+ HT.CatchObj.Alloca = nullptr;
+ else
+ HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2));
+ TBME.HandlerArray.push_back(HT);
}
+ FuncInfo.TryBlockMap.push_back(TBME);
}
-static bool isCatchAllLandingPad(const BasicBlock *BB) {
- const LandingPadInst *LP = BB->getLandingPadInst();
- if (!LP)
- return false;
- unsigned N = LP->getNumClauses();
- return (N > 0 && LP->isCatch(N - 1) &&
- isa<ConstantPointerNull>(LP->getClause(N - 1)));
+static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) {
+ for (const User *U : CleanupPad->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ return CRI->getUnwindDest();
+ return nullptr;
}
-/// Find all points where exceptions control rejoins normal control flow via
-/// selector dispatch.
-void WinEHPrepare::findSEHEHReturnPoints(
- Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
- for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- // If the landingpad is a catch-all, treat the whole lpad as if it is
- // reachable from normal control flow.
- // FIXME: This is imprecise. We need a better way of identifying where a
- // catch-all starts and cleanups stop. As far as LLVM is concerned, there
- // is no difference.
- if (isCatchAllLandingPad(BB)) {
- EHReturnBlocks.insert(BB);
+static void calculateStateNumbersForInvokes(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ auto *F = const_cast<Function *>(Fn);
+ DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F);
+ for (BasicBlock &BB : *F) {
+ auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+ if (!II)
continue;
- }
-
- BasicBlock *CatchHandler;
- BasicBlock *NextBB;
- Constant *Selector;
- if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
- // Split the edge if there are multiple predecessors. This creates a place
- // where we can insert EH recovery code.
- if (!CatchHandler->getSinglePredecessor()) {
- DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
- << " to " << CatchHandler->getName() << '\n');
- BBI = CatchHandler = SplitCriticalEdge(
- BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler));
- }
- EHReturnBlocks.insert(CatchHandler);
- }
- }
-}
-void WinEHPrepare::identifyEHBlocks(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads) {
- DEBUG(dbgs() << "Demoting values live across exception handlers in function "
- << F.getName() << '\n');
-
- // Build a set of all non-exceptional blocks and exceptional blocks.
- // - Non-exceptional blocks are blocks reachable from the entry block while
- // not following invoke unwind edges.
- // - Exceptional blocks are blocks reachable from landingpads. Analysis does
- // not follow llvm.eh.endcatch blocks, which mark a transition from
- // exceptional to normal control.
-
- if (Personality == EHPersonality::MSVC_CXX)
- findCXXEHReturnPoints(F, EHReturnBlocks);
- else
- findSEHEHReturnPoints(F, EHReturnBlocks);
-
- DEBUG({
- dbgs() << "identified the following blocks as EH return points:\n";
- for (BasicBlock *BB : EHReturnBlocks)
- dbgs() << " " << BB->getName() << '\n';
- });
-
-// Join points should not have phis at this point, unless they are a
-// landingpad, in which case we will demote their phis later.
-#ifndef NDEBUG
- for (BasicBlock *BB : EHReturnBlocks)
- assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) &&
- "non-lpad EH return block has phi");
-#endif
-
- // Normal blocks are the blocks reachable from the entry block and all EH
- // return points.
- SetVector<BasicBlock *> Worklist;
- Worklist = EHReturnBlocks;
- Worklist.insert(&F.getEntryBlock());
- findReachableBlocks(NormalBlocks, Worklist, nullptr);
- DEBUG({
- dbgs() << "marked the following blocks as normal:\n";
- for (BasicBlock *BB : NormalBlocks)
- dbgs() << " " << BB->getName() << '\n';
- });
-
- // Exceptional blocks are the blocks reachable from landingpads that don't
- // cross EH return points.
- Worklist.clear();
- for (auto *LPI : LPads)
- Worklist.insert(LPI->getParent());
- findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks);
- DEBUG({
- dbgs() << "marked the following blocks as exceptional:\n";
- for (BasicBlock *BB : EHBlocks)
- dbgs() << " " << BB->getName() << '\n';
- });
-
-}
-
-/// Ensure that all values live into and out of exception handlers are stored
-/// in memory.
-/// FIXME: This falls down when values are defined in one handler and live into
-/// another handler. For example, a cleanup defines a value used only by a
-/// catch handler.
-void WinEHPrepare::demoteValuesLiveAcrossHandlers(
- Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
- DEBUG(dbgs() << "Demoting values live across exception handlers in function "
- << F.getName() << '\n');
-
- // identifyEHBlocks() should have been called before this function.
- assert(!NormalBlocks.empty());
-
- // Try to avoid demoting EH pointer and selector values. They get in the way
- // of our pattern matching.
- SmallPtrSet<Instruction *, 10> EHVals;
- for (BasicBlock &BB : F) {
- LandingPadInst *LP = BB.getLandingPadInst();
- if (!LP)
- continue;
- EHVals.insert(LP);
- for (User *U : LP->users()) {
- auto *EI = dyn_cast<ExtractValueInst>(U);
- if (!EI)
- continue;
- EHVals.insert(EI);
- for (User *U2 : EI->users()) {
- if (auto *PN = dyn_cast<PHINode>(U2))
- EHVals.insert(PN);
- }
+ auto &BBColors = BlockColors[&BB];
+ assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+
+ BasicBlock *FuncletUnwindDest;
+ auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI());
+ assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock());
+ if (!FuncletPad)
+ FuncletUnwindDest = nullptr;
+ else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+ FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest();
+ else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad))
+ FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad);
+ else
+ llvm_unreachable("unexpected funclet pad!");
+
+ BasicBlock *InvokeUnwindDest = II->getUnwindDest();
+ int BaseState = -1;
+ if (FuncletUnwindDest == InvokeUnwindDest) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
}
- }
- SetVector<Argument *> ArgsToDemote;
- SetVector<Instruction *> InstrsToDemote;
- for (BasicBlock &BB : F) {
- bool IsNormalBB = NormalBlocks.count(&BB);
- bool IsEHBB = EHBlocks.count(&BB);
- if (!IsNormalBB && !IsEHBB)
- continue; // Blocks that are neither normal nor EH are unreachable.
- for (Instruction &I : BB) {
- for (Value *Op : I.operands()) {
- // Don't demote static allocas, constants, and labels.
- if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op))
- continue;
- auto *AI = dyn_cast<AllocaInst>(Op);
- if (AI && AI->isStaticAlloca())
- continue;
-
- if (auto *Arg = dyn_cast<Argument>(Op)) {
- if (IsEHBB) {
- DEBUG(dbgs() << "Demoting argument " << *Arg
- << " used by EH instr: " << I << "\n");
- ArgsToDemote.insert(Arg);
- }
- continue;
- }
-
- // Don't demote EH values.
- auto *OpI = cast<Instruction>(Op);
- if (EHVals.count(OpI))
- continue;
-
- BasicBlock *OpBB = OpI->getParent();
- // If a value is produced and consumed in the same BB, we don't need to
- // demote it.
- if (OpBB == &BB)
- continue;
- bool IsOpNormalBB = NormalBlocks.count(OpBB);
- bool IsOpEHBB = EHBlocks.count(OpBB);
- if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) {
- DEBUG({
- dbgs() << "Demoting instruction live in-out from EH:\n";
- dbgs() << "Instr: " << *OpI << '\n';
- dbgs() << "User: " << I << '\n';
- });
- InstrsToDemote.insert(OpI);
- }
- }
- }
- }
-
- // Demote values live into and out of handlers.
- // FIXME: This demotion is inefficient. We should insert spills at the point
- // of definition, insert one reload in each handler that uses the value, and
- // insert reloads in the BB used to rejoin normal control flow.
- Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt();
- for (Instruction *I : InstrsToDemote)
- DemoteRegToStack(*I, false, AllocaInsertPt);
-
- // Demote arguments separately, and only for uses in EH blocks.
- for (Argument *Arg : ArgsToDemote) {
- auto *Slot = new AllocaInst(Arg->getType(), nullptr,
- Arg->getName() + ".reg2mem", AllocaInsertPt);
- SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end());
- for (User *U : Users) {
- auto *I = dyn_cast<Instruction>(U);
- if (I && EHBlocks.count(I->getParent())) {
- auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I);
- U->replaceUsesOfWith(Arg, Reload);
- }
+ if (BaseState != -1) {
+ FuncInfo.InvokeStateMap[II] = BaseState;
+ } else {
+ Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI();
+ assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!");
+ FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst];
}
- new StoreInst(Arg, Slot, AllocaInsertPt);
- }
-
- // Demote landingpad phis, as the landingpad will be removed from the machine
- // CFG.
- for (LandingPadInst *LPI : LPads) {
- BasicBlock *BB = LPI->getParent();
- while (auto *Phi = dyn_cast<PHINode>(BB->begin()))
- DemotePHIToStack(Phi, AllocaInsertPt);
}
-
- DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and "
- << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n");
}
-bool WinEHPrepare::prepareExceptionHandlers(
- Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
- // Don't run on functions that are already prepared.
- for (LandingPadInst *LPad : LPads) {
- BasicBlock *LPadBB = LPad->getParent();
- for (Instruction &Inst : *LPadBB)
- if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>()))
- return false;
- }
-
- identifyEHBlocks(F, LPads);
- demoteValuesLiveAcrossHandlers(F, LPads);
-
- // These containers are used to re-map frame variables that are used in
- // outlined catch and cleanup handlers. They will be populated as the
- // handlers are outlined.
- FrameVarInfoMap FrameVarInfo;
-
- bool HandlersOutlined = false;
-
- Module *M = F.getParent();
- LLVMContext &Context = M->getContext();
-
- // Create a new function to receive the handler contents.
- PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
- Type *Int32Type = Type::getInt32Ty(Context);
- Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
-
- if (isAsynchronousEHPersonality(Personality)) {
- // FIXME: Switch the ehptr type to i32 and then switch this.
- SEHExceptionCodeSlot =
- new AllocaInst(Int8PtrType, nullptr, "seh_exception_code",
- F.getEntryBlock().getFirstInsertionPt());
+// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
+// to. If the unwind edge came from an invoke, return null.
+static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
+ Value *ParentPad) {
+ const TerminatorInst *TI = BB->getTerminator();
+ if (isa<InvokeInst>(TI))
+ return nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CatchSwitch->getParentPad() != ParentPad)
+ return nullptr;
+ return BB;
}
+ assert(!TI->isEHPad() && "unexpected EHPad!");
+ auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad();
+ if (CleanupPad->getParentPad() != ParentPad)
+ return nullptr;
+ return CleanupPad->getParent();
+}
- // In order to handle the case where one outlined catch handler returns
- // to a block within another outlined catch handler that would otherwise
- // be unreachable, we need to outline the nested landing pad before we
- // outline the landing pad which encloses it.
- if (!isAsynchronousEHPersonality(Personality))
- std::sort(LPads.begin(), LPads.end(),
- [this](LandingPadInst *const &L, LandingPadInst *const &R) {
- return DT->properlyDominates(R->getParent(), L->getParent());
- });
-
- // This container stores the llvm.eh.recover and IndirectBr instructions
- // that make up the body of each landing pad after it has been outlined.
- // We need to defer the population of the target list for the indirectbr
- // until all landing pads have been outlined so that we can handle the
- // case of blocks in the target that are reached only from nested
- // landing pads.
- SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls;
-
- for (LandingPadInst *LPad : LPads) {
- // Look for evidence that this landingpad has already been processed.
- bool LPadHasActionList = false;
- BasicBlock *LPadBB = LPad->getParent();
- for (Instruction &Inst : *LPadBB) {
- if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) {
- LPadHasActionList = true;
- break;
- }
- }
-
- // If we've already outlined the handlers for this landingpad,
- // there's nothing more to do here.
- if (LPadHasActionList)
- continue;
-
- // If either of the values in the aggregate returned by the landing pad is
- // extracted and stored to memory, promote the stored value to a register.
- promoteLandingPadValues(LPad);
-
- LandingPadActions Actions;
- mapLandingPadBlocks(LPad, Actions);
-
- HandlersOutlined |= !Actions.actions().empty();
- for (ActionHandler *Action : Actions) {
- if (Action->hasBeenProcessed())
- continue;
- BasicBlock *StartBB = Action->getStartBlock();
-
- // SEH doesn't do any outlining for catches. Instead, pass the handler
- // basic block addr to llvm.eh.actions and list the block as a return
- // target.
- if (isAsynchronousEHPersonality(Personality)) {
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- processSEHCatchHandler(CatchAction, StartBB);
- continue;
- }
- }
-
- outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
- }
-
- // Split the block after the landingpad instruction so that it is just a
- // call to llvm.eh.actions followed by indirectbr.
- assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed");
- SplitBlock(LPadBB, LPad->getNextNode(), DT);
- // Erase the branch inserted by the split so we can insert indirectbr.
- LPadBB->getTerminator()->eraseFromParent();
-
- // Replace all extracted values with undef and ultimately replace the
- // landingpad with undef.
- SmallVector<Instruction *, 4> SEHCodeUses;
- SmallVector<Instruction *, 4> EHUndefs;
- for (User *U : LPad->users()) {
- auto *E = dyn_cast<ExtractValueInst>(U);
- if (!E)
- continue;
- assert(E->getNumIndices() == 1 &&
- "Unexpected operation: extracting both landing pad values");
- unsigned Idx = *E->idx_begin();
- assert((Idx == 0 || Idx == 1) && "unexpected index");
- if (Idx == 0 && isAsynchronousEHPersonality(Personality))
- SEHCodeUses.push_back(E);
- else
- EHUndefs.push_back(E);
- }
- for (Instruction *E : EHUndefs) {
- E->replaceAllUsesWith(UndefValue::get(E->getType()));
- E->eraseFromParent();
- }
- LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
-
- // Rewrite uses of the exception pointer to loads of an alloca.
- while (!SEHCodeUses.empty()) {
- Instruction *E = SEHCodeUses.pop_back_val();
- SmallVector<Use *, 4> Uses;
- for (Use &U : E->uses())
- Uses.push_back(&U);
- for (Use *U : Uses) {
- auto *I = cast<Instruction>(U->getUser());
- if (isa<ResumeInst>(I))
- continue;
- if (auto *Phi = dyn_cast<PHINode>(I))
- SEHCodeUses.push_back(Phi);
- else
- U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I));
- }
- E->replaceAllUsesWith(UndefValue::get(E->getType()));
- E->eraseFromParent();
- }
-
- // Add a call to describe the actions for this landing pad.
- std::vector<Value *> ActionArgs;
- for (ActionHandler *Action : Actions) {
- // Action codes from docs are: 0 cleanup, 1 catch.
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
- ActionArgs.push_back(CatchAction->getSelector());
- // Find the frame escape index of the exception object alloca in the
- // parent.
- int FrameEscapeIdx = -1;
- Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
- if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
- auto I = FrameVarInfo.find(EHObj);
- assert(I != FrameVarInfo.end() &&
- "failed to map llvm.eh.begincatch var");
- FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
- }
- ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
- } else {
- ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
- }
- ActionArgs.push_back(Action->getHandlerBlockOrFunc());
- }
- CallInst *Recover =
- CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB);
-
- SetVector<BasicBlock *> ReturnTargets;
- for (ActionHandler *Action : Actions) {
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- const auto &CatchTargets = CatchAction->getReturnTargets();
- ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end());
- }
- }
- IndirectBrInst *Branch =
- IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB);
- for (BasicBlock *Target : ReturnTargets)
- Branch->addDestination(Target);
-
- if (!isAsynchronousEHPersonality(Personality)) {
- // C++ EH must repopulate the targets later to handle the case of
- // targets that are reached indirectly through nested landing pads.
- LPadImpls.push_back(std::make_pair(Recover, Branch));
- }
-
- } // End for each landingpad
+static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "not a funclet!");
- // If nothing got outlined, there is no more processing to be done.
- if (!HandlersOutlined)
- return false;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
- // Replace any nested landing pad stubs with the correct action handler.
- // This must be done before we remove unreachable blocks because it
- // cleans up references to outlined blocks that will be deleted.
- for (auto &LPadPair : NestedLPtoOriginalLP)
- completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
- NestedLPtoOriginalLP.clear();
-
- // Update the indirectbr instructions' target lists if necessary.
- SetVector<BasicBlock*> CheckedTargets;
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (auto &LPadImplPair : LPadImpls) {
- IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first);
- IndirectBrInst *Branch = LPadImplPair.second;
-
- // Get a list of handlers called by
- parseEHActions(Recover, ActionList);
-
- // Add an indirect branch listing possible successors of the catch handlers.
- SetVector<BasicBlock *> ReturnTargets;
- for (const auto &Action : ActionList) {
- if (auto *CA = dyn_cast<CatchHandler>(Action.get())) {
- Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc());
- getPossibleReturnTargets(&F, Handler, ReturnTargets);
- }
- }
- ActionList.clear();
- // Clear any targets we already knew about.
- for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) {
- BasicBlock *KnownTarget = Branch->getDestination(I);
- if (ReturnTargets.count(KnownTarget))
- ReturnTargets.remove(KnownTarget);
+ SmallVector<const CatchPadInst *, 2> Handlers;
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+ Handlers.push_back(CatchPad);
}
- for (BasicBlock *Target : ReturnTargets) {
- Branch->addDestination(Target);
- // The target may be a block that we excepted to get pruned.
- // If it is, it may contain a call to llvm.eh.endcatch.
- if (CheckedTargets.insert(Target)) {
- // Earlier preparations guarantee that all calls to llvm.eh.endcatch
- // will be followed by an unconditional branch.
- auto *Br = dyn_cast<BranchInst>(Target->getTerminator());
- if (Br && Br->isUnconditional() &&
- Br != Target->getFirstNonPHIOrDbgOrLifetime()) {
- Instruction *Prev = Br->getPrevNode();
- if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>()))
- Prev->eraseFromParent();
- }
+ int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryLow;
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryLow);
+ int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+
+ // catchpads are separate funclets in C++ EH due to the way rethrow works.
+ int TryHigh = CatchLow - 1;
+ for (const auto *CatchPad : Handlers) {
+ FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
+ if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
+ if (getCleanupRetUnwindDest(InnerCleanupPad) ==
+ CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
}
}
- }
- LPadImpls.clear();
-
- F.addFnAttr("wineh-parent", F.getName());
-
- // Delete any blocks that were only used by handlers that were outlined above.
- removeUnreachableBlocks(F);
+ int CatchHigh = FuncInfo.getLastStateNumber();
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+ DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
+ DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n');
+ DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
+ << '\n');
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
- BasicBlock *Entry = &F.getEntryBlock();
- IRBuilder<> Builder(F.getParent()->getContext());
- Builder.SetInsertPoint(Entry->getFirstInsertionPt());
-
- Function *FrameEscapeFn =
- Intrinsic::getDeclaration(M, Intrinsic::localescape);
- Function *RecoverFrameFn =
- Intrinsic::getDeclaration(M, Intrinsic::localrecover);
- SmallVector<Value *, 8> AllocasToEscape;
-
- // Scan the entry block for an existing call to llvm.localescape. We need to
- // keep escaping those objects.
- for (Instruction &I : F.front()) {
- auto *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::localescape) {
- auto Args = II->arg_operands();
- AllocasToEscape.append(Args.begin(), Args.end());
- II->eraseFromParent();
- break;
- }
- }
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
- // Finally, replace all of the temporary allocas for frame variables used in
- // the outlined handlers with calls to llvm.localrecover.
- for (auto &VarInfoEntry : FrameVarInfo) {
- Value *ParentVal = VarInfoEntry.first;
- TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
- AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal);
-
- // FIXME: We should try to sink unescaped allocas from the parent frame into
- // the child frame. If the alloca is escaped, we have to use the lifetime
- // markers to ensure that the alloca is only live within the child frame.
-
- // Add this alloca to the list of things to escape.
- AllocasToEscape.push_back(ParentAlloca);
-
- // Next replace all outlined allocas that are mapped to it.
- for (AllocaInst *TempAlloca : Allocas) {
- if (TempAlloca == getCatchObjectSentinel())
- continue; // Skip catch parameter sentinels.
- Function *HandlerFn = TempAlloca->getParent()->getParent();
- llvm::Value *FP = HandlerToParentFP[HandlerFn];
- assert(FP);
-
- // FIXME: Sink this localrecover into the blocks where it is used.
- Builder.SetInsertPoint(TempAlloca);
- Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
- Value *RecoverArgs[] = {
- Builder.CreateBitCast(&F, Int8PtrType, ""), FP,
- llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
- Instruction *RecoveredAlloca =
- Builder.CreateCall(RecoverFrameFn, RecoverArgs);
-
- // Add a pointer bitcast if the alloca wasn't an i8.
- if (RecoveredAlloca->getType() != TempAlloca->getType()) {
- RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
- RecoveredAlloca = cast<Instruction>(
- Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()));
+ int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB)) {
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CleanupPad->getParentPad()))) {
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
}
- TempAlloca->replaceAllUsesWith(RecoveredAlloca);
- TempAlloca->removeFromParent();
- RecoveredAlloca->takeName(TempAlloca);
- delete TempAlloca;
}
- } // End for each FrameVarInfo entry.
-
- // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry
- // block.
- Builder.SetInsertPoint(&F.getEntryBlock().back());
- Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
-
- if (SEHExceptionCodeSlot) {
- if (isAllocaPromotable(SEHExceptionCodeSlot)) {
- SmallPtrSet<BasicBlock *, 4> UserBlocks;
- for (User *U : SEHExceptionCodeSlot->users()) {
- if (auto *Inst = dyn_cast<Instruction>(U))
- UserBlocks.insert(Inst->getParent());
- }
- PromoteMemToReg(SEHExceptionCodeSlot, *DT);
- // After the promotion, kill off dead instructions.
- for (BasicBlock *BB : UserBlocks)
- SimplifyInstructionsInBlock(BB, LibInfo);
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the MSVC++ personality cannot "
+ "contain exceptional actions");
}
}
+}
- // Clean up the handler action maps we created for this function
- DeleteContainerSeconds(CatchHandlerMap);
- CatchHandlerMap.clear();
- DeleteContainerSeconds(CleanupHandlerMap);
- CleanupHandlerMap.clear();
- HandlerToParentFP.clear();
- DT = nullptr;
- LibInfo = nullptr;
- SEHExceptionCodeSlot = nullptr;
- EHBlocks.clear();
- NormalBlocks.clear();
- EHReturnBlocks.clear();
-
- return HandlersOutlined;
+static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState,
+ const Function *Filter, const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = false;
+ Entry.Filter = Filter;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
}
-void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
- // If the return values of the landing pad instruction are extracted and
- // stored to memory, we want to promote the store locations to reg values.
- SmallVector<AllocaInst *, 2> EHAllocas;
-
- // The landingpad instruction returns an aggregate value. Typically, its
- // value will be passed to a pair of extract value instructions and the
- // results of those extracts are often passed to store instructions.
- // In unoptimized code the stored value will often be loaded and then stored
- // again.
- for (auto *U : LPad->users()) {
- ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
- if (!Extract)
- continue;
+static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState,
+ const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = true;
+ Entry.Filter = nullptr;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
+}
- for (auto *EU : Extract->users()) {
- if (auto *Store = dyn_cast<StoreInst>(EU)) {
- auto *AV = cast<AllocaInst>(Store->getPointerOperand());
- EHAllocas.push_back(AV);
- }
+static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "no a funclet!");
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
+
+ // Extract the filter function and the __except basic block and create a
+ // state for them.
+ assert(CatchSwitch->getNumHandlers() == 1 &&
+ "SEH doesn't have multiple handlers per __try");
+ const auto *CatchPad =
+ cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI());
+ const BasicBlock *CatchPadBB = CatchPad->getParent();
+ const Constant *FilterOrNull =
+ cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ assert((Filter || FilterOrNull->isNullValue()) &&
+ "unexpected filter value");
+ int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB);
+
+ // Everything in the __try block uses TryState as its parent state.
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+ DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
+ << CatchPadBB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryState);
+
+ // Everything in the __except block unwinds to ParentState, just like code
+ // outside the __try.
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
+ if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
+ if (getCleanupRetUnwindDest(InnerCleanupPad) ==
+ CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
}
- }
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
- // We can't do this without a dominator tree.
- assert(DT);
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
- if (!EHAllocas.empty()) {
- PromoteMemToReg(EHAllocas, *DT);
- EHAllocas.clear();
+ int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock =
+ getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the SEH personality cannot "
+ "contain exceptional actions");
+ }
}
+}
- // After promotion, some extracts may be trivially dead. Remove them.
- SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
- for (auto *U : Users)
- RecursivelyDeleteTriviallyDeadInstructions(U);
+static bool isTopLevelPadForMSVC(const Instruction *EHPad) {
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad))
+ return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) &&
+ CatchSwitch->unwindsToCaller();
+ if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad))
+ return isa<ConstantTokenNone>(CleanupPad->getParentPad()) &&
+ getCleanupRetUnwindDest(CleanupPad) == nullptr;
+ if (isa<CatchPadInst>(EHPad))
+ return false;
+ llvm_unreachable("unexpected EHPad!");
}
-void WinEHPrepare::getPossibleReturnTargets(Function *ParentF,
- Function *HandlerF,
- SetVector<BasicBlock*> &Targets) {
- for (BasicBlock &BB : *HandlerF) {
- // If the handler contains landing pads, check for any
- // handlers that may return directly to a block in the
- // parent function.
- if (auto *LPI = BB.getLandingPadInst()) {
- IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode());
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- parseEHActions(Recover, ActionList);
- for (const auto &Action : ActionList) {
- if (auto *CH = dyn_cast<CatchHandler>(Action.get())) {
- Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc());
- getPossibleReturnTargets(ParentF, NestedF, Targets);
- }
- }
- }
+void llvm::calculateSEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Don't compute state numbers twice.
+ if (!FuncInfo.SEHUnwindMap.empty())
+ return;
- auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
- if (!Ret)
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
continue;
-
- // Handler functions must always return a block address.
- BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
-
- // If this is the handler for a nested landing pad, the
- // return address may have been remapped to a block in the
- // parent handler. We're not interested in those.
- if (BA->getFunction() != ParentF)
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
continue;
-
- Targets.insert(BA->getBasicBlock());
+ ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1);
}
+
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
- LandingPadInst *OutlinedLPad,
- const LandingPadInst *OriginalLPad,
- FrameVarInfoMap &FrameVarInfo) {
- // Get the nested block and erase the unreachable instruction that was
- // temporarily inserted as its terminator.
- LLVMContext &Context = ParentFn->getContext();
- BasicBlock *OutlinedBB = OutlinedLPad->getParent();
- // If the nested landing pad was outlined before the landing pad that enclosed
- // it, it will already be in outlined form. In that case, we just need to see
- // if the returns and the enclosing branch instruction need to be updated.
- IndirectBrInst *Branch =
- dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator());
- if (!Branch) {
- // If the landing pad wasn't in outlined form, it should be a stub with
- // an unreachable terminator.
- assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
- OutlinedBB->getTerminator()->eraseFromParent();
- // That should leave OutlinedLPad as the last instruction in its block.
- assert(&OutlinedBB->back() == OutlinedLPad);
- }
+void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
- // The original landing pad will have already had its action intrinsic
- // built by the outlining loop. We need to clone that into the outlined
- // location. It may also be necessary to add references to the exception
- // variables to the outlined handler in which this landing pad is nested
- // and remap return instructions in the nested handlers that should return
- // to an address in the outlined handler.
- Function *OutlinedHandlerFn = OutlinedBB->getParent();
- BasicBlock::const_iterator II = OriginalLPad;
- ++II;
- // The instruction after the landing pad should now be a call to eh.actions.
- const Instruction *Recover = II;
- const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover);
-
- // Remap the return target in the nested handler.
- SmallVector<BlockAddress *, 4> ActionTargets;
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- parseEHActions(EHActions, ActionList);
- for (const auto &Action : ActionList) {
- auto *Catch = dyn_cast<CatchHandler>(Action.get());
- if (!Catch)
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
continue;
- // The dyn_cast to function here selects C++ catch handlers and skips
- // SEH catch handlers.
- auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
- if (!Handler)
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
continue;
- // Visit all the return instructions, looking for places that return
- // to a location within OutlinedHandlerFn.
- for (BasicBlock &NestedHandlerBB : *Handler) {
- auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
- if (!Ret)
- continue;
-
- // Handler functions must always return a block address.
- BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
- // The original target will have been in the main parent function,
- // but if it is the address of a block that has been outlined, it
- // should be a block that was outlined into OutlinedHandlerFn.
- assert(BA->getFunction() == ParentFn);
-
- // Ignore targets that aren't part of an outlined handler function.
- if (!LPadTargetBlocks.count(BA->getBasicBlock()))
- continue;
-
- // If the return value is the address ofF a block that we
- // previously outlined into the parent handler function, replace
- // the return instruction and add the mapped target to the list
- // of possible return addresses.
- BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
- assert(MappedBB->getParent() == OutlinedHandlerFn);
- BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
- Ret->eraseFromParent();
- ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
- ActionTargets.push_back(NewBA);
- }
- }
- ActionList.clear();
-
- if (Branch) {
- // If the landing pad was already in outlined form, just update its targets.
- for (unsigned int I = Branch->getNumDestinations(); I > 0; --I)
- Branch->removeDestination(I);
- // Add the previously collected action targets.
- for (auto *Target : ActionTargets)
- Branch->addDestination(Target->getBasicBlock());
- } else {
- // If the landing pad was previously stubbed out, fill in its outlined form.
- IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone());
- OutlinedBB->getInstList().push_back(NewEHActions);
-
- // Insert an indirect branch into the outlined landing pad BB.
- IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB);
- // Add the previously collected action targets.
- for (auto *Target : ActionTargets)
- IBr->addDestination(Target->getBasicBlock());
- }
-}
-
-// This function examines a block to determine whether the block ends with a
-// conditional branch to a catch handler based on a selector comparison.
-// This function is used both by the WinEHPrepare::findSelectorComparison() and
-// WinEHCleanupDirector::handleTypeIdFor().
-static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
- Constant *&Selector, BasicBlock *&NextBB) {
- ICmpInst::Predicate Pred;
- BasicBlock *TBB, *FBB;
- Value *LHS, *RHS;
-
- if (!match(BB->getTerminator(),
- m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
- return false;
-
- if (!match(LHS,
- m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
- !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
- return false;
-
- if (Pred == CmpInst::ICMP_EQ) {
- CatchHandler = TBB;
- NextBB = FBB;
- return true;
- }
-
- if (Pred == CmpInst::ICMP_NE) {
- CatchHandler = FBB;
- NextBB = TBB;
- return true;
+ calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1);
}
- return false;
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-static bool isCatchBlock(BasicBlock *BB) {
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>()))
- return true;
- }
- return false;
-}
-
-static BasicBlock *createStubLandingPad(Function *Handler) {
- // FIXME: Finish this!
- LLVMContext &Context = Handler->getContext();
- BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
- Handler->getBasicBlockList().push_back(StubBB);
- IRBuilder<> Builder(StubBB);
- LandingPadInst *LPad = Builder.CreateLandingPad(
- llvm::StructType::get(Type::getInt8PtrTy(Context),
- Type::getInt32Ty(Context), nullptr),
- 0);
- // Insert a call to llvm.eh.actions so that we don't try to outline this lpad.
- Function *ActionIntrin =
- Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions);
- Builder.CreateCall(ActionIntrin, {}, "recover");
- LPad->setCleanup(true);
- Builder.CreateUnreachable();
- return StubBB;
-}
-
-// Cycles through the blocks in an outlined handler function looking for an
-// invoke instruction and inserts an invoke of llvm.donothing with an empty
-// landing pad if none is found. The code that generates the .xdata tables for
-// the handler needs at least one landing pad to identify the parent function's
-// personality.
-void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) {
- ReturnInst *Ret = nullptr;
- UnreachableInst *Unreached = nullptr;
- for (BasicBlock &BB : *Handler) {
- TerminatorInst *Terminator = BB.getTerminator();
- // If we find an invoke, there is nothing to be done.
- auto *II = dyn_cast<InvokeInst>(Terminator);
- if (II)
- return;
- // If we've already recorded a return instruction, keep looking for invokes.
- if (!Ret)
- Ret = dyn_cast<ReturnInst>(Terminator);
- // If we haven't recorded an unreachable instruction, try this terminator.
- if (!Unreached)
- Unreached = dyn_cast<UnreachableInst>(Terminator);
- }
-
- // If we got this far, the handler contains no invokes. We should have seen
- // at least one return or unreachable instruction. We'll insert an invoke of
- // llvm.donothing ahead of that instruction.
- assert(Ret || Unreached);
- TerminatorInst *Term;
- if (Ret)
- Term = Ret;
- else
- Term = Unreached;
- BasicBlock *OldRetBB = Term->getParent();
- BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT);
- // SplitBlock adds an unconditional branch instruction at the end of the
- // parent block. We want to replace that with an invoke call, so we can
- // erase it now.
- OldRetBB->getTerminator()->eraseFromParent();
- BasicBlock *StubLandingPad = createStubLandingPad(Handler);
- Function *F =
- Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
- InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
+ ClrHandlerType HandlerType, uint32_t TypeToken,
+ const BasicBlock *Handler) {
+ ClrEHUnwindMapEntry Entry;
+ Entry.Parent = ParentState;
+ Entry.Handler = Handler;
+ Entry.HandlerType = HandlerType;
+ Entry.TypeToken = TypeToken;
+ FuncInfo.ClrEHUnwindMap.push_back(Entry);
+ return FuncInfo.ClrEHUnwindMap.size() - 1;
}
-// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering
-// usually doesn't build LLVM IR, so that's probably the wrong place.
-Function *WinEHPrepare::createHandlerFunc(Function *ParentFn, Type *RetTy,
- const Twine &Name, Module *M,
- Value *&ParentFP) {
- // x64 uses a two-argument prototype where the parent FP is the second
- // argument. x86 uses no arguments, just the incoming EBP value.
- LLVMContext &Context = M->getContext();
- Type *Int8PtrType = Type::getInt8PtrTy(Context);
- FunctionType *FnType;
- if (TheTriple.getArch() == Triple::x86_64) {
- Type *ArgTys[2] = {Int8PtrType, Int8PtrType};
- FnType = FunctionType::get(RetTy, ArgTys, false);
- } else {
- FnType = FunctionType::get(RetTy, None, false);
- }
-
- Function *Handler =
- Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M);
- BasicBlock *Entry = BasicBlock::Create(Context, "entry");
- Handler->getBasicBlockList().push_front(Entry);
- if (TheTriple.getArch() == Triple::x86_64) {
- ParentFP = &(Handler->getArgumentList().back());
- } else {
- assert(M);
- Function *FrameAddressFn =
- Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
- Function *RecoverFPFn =
- Intrinsic::getDeclaration(M, Intrinsic::x86_seh_recoverfp);
- IRBuilder<> Builder(&Handler->getEntryBlock());
- Value *EBP =
- Builder.CreateCall(FrameAddressFn, {Builder.getInt32(1)}, "ebp");
- Value *ParentI8Fn = Builder.CreateBitCast(ParentFn, Int8PtrType);
- ParentFP = Builder.CreateCall(RecoverFPFn, {ParentI8Fn, EBP});
- }
- return Handler;
-}
+void llvm::calculateClrEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
-bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
- LandingPadInst *LPad, BasicBlock *StartBB,
- FrameVarInfoMap &VarInfo) {
- Module *M = SrcFn->getParent();
- LLVMContext &Context = M->getContext();
- Type *Int8PtrType = Type::getInt8PtrTy(Context);
-
- // Create a new function to receive the handler contents.
- Value *ParentFP;
- Function *Handler;
- if (Action->getType() == Catch) {
- Handler = createHandlerFunc(SrcFn, Int8PtrType, SrcFn->getName() + ".catch", M,
- ParentFP);
- } else {
- Handler = createHandlerFunc(SrcFn, Type::getVoidTy(Context),
- SrcFn->getName() + ".cleanup", M, ParentFP);
- }
- Handler->setPersonalityFn(SrcFn->getPersonalityFn());
- HandlerToParentFP[Handler] = ParentFP;
- Handler->addFnAttr("wineh-parent", SrcFn->getName());
- BasicBlock *Entry = &Handler->getEntryBlock();
-
- // Generate a standard prolog to setup the frame recovery structure.
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(Entry);
- Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
-
- std::unique_ptr<WinEHCloningDirectorBase> Director;
-
- ValueToValueMapTy VMap;
-
- LandingPadMap &LPadMap = LPadMaps[LPad];
- if (!LPadMap.isInitialized())
- LPadMap.mapLandingPad(LPad);
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- Constant *Sel = CatchAction->getSelector();
- Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo,
- LPadMap, NestedLPtoOriginalLP, DT,
- EHBlocks));
- LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
- ConstantInt::get(Type::getInt32Ty(Context), 1));
- } else {
- Director.reset(
- new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap));
- LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
- UndefValue::get(Type::getInt32Ty(Context)));
- }
+ SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
- SmallVector<ReturnInst *, 8> Returns;
- ClonedCodeInfo OutlinedFunctionInfo;
-
- // If the start block contains PHI nodes, we need to map them.
- BasicBlock::iterator II = StartBB->begin();
- while (auto *PN = dyn_cast<PHINode>(II)) {
- bool Mapped = false;
- // Look for PHI values that we have already mapped (such as the selector).
- for (Value *Val : PN->incoming_values()) {
- if (VMap.count(Val)) {
- VMap[PN] = VMap[Val];
- Mapped = true;
- }
- }
- // If we didn't find a match for this value, map it as an undef.
- if (!Mapped) {
- VMap[PN] = UndefValue::get(PN->getType());
- }
- ++II;
+ // Each pad needs to be able to refer to its parent, so scan the function
+ // looking for top-level handlers and seed the worklist with them.
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
+ continue;
+ if (BB.isLandingPad())
+ report_fatal_error("CoreCLR EH cannot use landingpads");
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
+ continue;
+ // queue this with sentinel parent state -1 to mean unwind to caller.
+ Worklist.emplace_back(FirstNonPHI, -1);
}
- // The landing pad value may be used by PHI nodes. It will ultimately be
- // eliminated, but we need it in the map for intermediate handling.
- VMap[LPad] = UndefValue::get(LPad->getType());
-
- // Skip over PHIs and, if applicable, landingpad instructions.
- II = StartBB->getFirstInsertionPt();
-
- CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
- /*ModuleLevelChanges=*/false, Returns, "",
- &OutlinedFunctionInfo, Director.get());
-
- // Move all the instructions in the cloned "entry" block into our entry block.
- // Depending on how the parent function was laid out, the block that will
- // correspond to the outlined entry block may not be the first block in the
- // list. We can recognize it, however, as the cloned block which has no
- // predecessors. Any other block wouldn't have been cloned if it didn't
- // have a predecessor which was also cloned.
- Function::iterator ClonedIt = std::next(Function::iterator(Entry));
- while (!pred_empty(ClonedIt))
- ++ClonedIt;
- BasicBlock *ClonedEntryBB = ClonedIt;
- assert(ClonedEntryBB);
- Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList());
- ClonedEntryBB->eraseFromParent();
-
- // Make sure we can identify the handler's personality later.
- addStubInvokeToHandlerIfNeeded(Handler);
-
- if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
- WinEHCatchDirector *CatchDirector =
- reinterpret_cast<WinEHCatchDirector *>(Director.get());
- CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
- CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
-
- // Look for blocks that are not part of the landing pad that we just
- // outlined but terminate with a call to llvm.eh.endcatch and a
- // branch to a block that is in the handler we just outlined.
- // These blocks will be part of a nested landing pad that intends to
- // return to an address in this handler. This case is best handled
- // after both landing pads have been outlined, so for now we'll just
- // save the association of the blocks in LPadTargetBlocks. The
- // return instructions which are created from these branches will be
- // replaced after all landing pads have been outlined.
- for (const auto MapEntry : VMap) {
- // VMap maps all values and blocks that were just cloned, but dead
- // blocks which were pruned will map to nullptr.
- if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
+ while (!Worklist.empty()) {
+ const Instruction *Pad;
+ int ParentState;
+ std::tie(Pad, ParentState) = Worklist.pop_back_val();
+
+ Value *ParentPad;
+ int PredState;
+ if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+ // A cleanup can have multiple exits; don't re-process after the first.
+ if (FuncInfo.EHPadStateMap.count(Cleanup))
continue;
- const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
- for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
- auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
- continue;
- BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
- --II;
- if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
- // This would indicate that a nested landing pad wants to return
- // to a block that is outlined into two different handlers.
- assert(!LPadTargetBlocks.count(MappedBB));
- LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
+ // CoreCLR personality uses arity to distinguish faults from finallies.
+ const BasicBlock *PadBlock = Cleanup->getParent();
+ ClrHandlerType HandlerType =
+ (Cleanup->getNumOperands() ? ClrHandlerType::Fault
+ : ClrHandlerType::Finally);
+ int NewState =
+ addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
+ FuncInfo.EHPadStateMap[Cleanup] = NewState;
+ // Propagate the new state to all preds of the cleanup
+ ParentPad = Cleanup->getParentPad();
+ PredState = NewState;
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ SmallVector<const CatchPadInst *, 1> Handlers;
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+ Handlers.push_back(Catch);
+ }
+ FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
+ int NewState = ParentState;
+ for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
+ HandlerI != HandlerE; ++HandlerI) {
+ const CatchPadInst *Catch = *HandlerI;
+ const BasicBlock *PadBlock = Catch->getParent();
+ uint32_t TypeToken = static_cast<uint32_t>(
+ cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
+ NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
+ TypeToken, PadBlock);
+ FuncInfo.EHPadStateMap[Catch] = NewState;
+ }
+ for (const auto *CatchPad : Handlers) {
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ Worklist.emplace_back(UserI, ParentState);
}
}
+ PredState = NewState;
+ ParentPad = CatchSwitch->getParentPad();
+ } else {
+ llvm_unreachable("Unexpected EH pad");
}
- } // End if (CatchAction)
-
- Action->setHandlerBlockOrFunc(Handler);
-
- return true;
-}
-
-/// This BB must end in a selector dispatch. All we need to do is pass the
-/// handler block to llvm.eh.actions and list it as a possible indirectbr
-/// target.
-void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
- BasicBlock *StartBB) {
- BasicBlock *HandlerBB;
- BasicBlock *NextBB;
- Constant *Selector;
- bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
- if (Res) {
- // If this was EH dispatch, this must be a conditional branch to the handler
- // block.
- // FIXME: Handle instructions in the dispatch block. Currently we drop them,
- // leading to crashes if some optimization hoists stuff here.
- assert(CatchAction->getSelector() && HandlerBB &&
- "expected catch EH dispatch");
- } else {
- // This must be a catch-all. Split the block after the landingpad.
- assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
- HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT);
- }
- IRBuilder<> Builder(HandlerBB->getFirstInsertionPt());
- Function *EHCodeFn = Intrinsic::getDeclaration(
- StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode);
- Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode");
- Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType());
- Builder.CreateStore(Code, SEHExceptionCodeSlot);
- CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
- TinyPtrVector<BasicBlock *> Targets(HandlerBB);
- CatchAction->setReturnTargets(Targets);
-}
-void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
- // Each instance of this class should only ever be used to map a single
- // landing pad.
- assert(OriginLPad == nullptr || OriginLPad == LPad);
-
- // If the landing pad has already been mapped, there's nothing more to do.
- if (OriginLPad == LPad)
- return;
-
- OriginLPad = LPad;
-
- // The landingpad instruction returns an aggregate value. Typically, its
- // value will be passed to a pair of extract value instructions and the
- // results of those extracts will have been promoted to reg values before
- // this routine is called.
- for (auto *U : LPad->users()) {
- const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
- if (!Extract)
- continue;
- assert(Extract->getNumIndices() == 1 &&
- "Unexpected operation: extracting both landing pad values");
- unsigned int Idx = *(Extract->idx_begin());
- assert((Idx == 0 || Idx == 1) &&
- "Unexpected operation: extracting an unknown landing pad element");
- if (Idx == 0) {
- ExtractedEHPtrs.push_back(Extract);
- } else if (Idx == 1) {
- ExtractedSelectors.push_back(Extract);
+ // Queue all predecessors with the given state
+ for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
+ if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
+ Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
}
}
-}
-bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
- return BB->getLandingPadInst() == OriginLPad;
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
}
-bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
- if (Inst == OriginLPad)
- return true;
- for (auto *Extract : ExtractedEHPtrs) {
- if (Inst == Extract)
- return true;
- }
- for (auto *Extract : ExtractedSelectors) {
- if (Inst == Extract)
- return true;
- }
- return false;
-}
-
-void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
- Value *SelectorValue) const {
- // Remap all landing pad extract instructions to the specified values.
- for (auto *Extract : ExtractedEHPtrs)
- VMap[Extract] = EHPtrValue;
- for (auto *Extract : ExtractedSelectors)
- VMap[Extract] = SelectorValue;
-}
-
-static bool isLocalAddressCall(const Value *V) {
- return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>());
-}
-
-CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // If this is one of the boilerplate landing pad instructions, skip it.
- // The instruction will have already been remapped in VMap.
- if (LPadMap.isLandingPadSpecificInst(Inst))
- return CloningDirector::SkipInstruction;
-
- // Nested landing pads that have not already been outlined will be cloned as
- // stubs, with just the landingpad instruction and an unreachable instruction.
- // When all landingpads have been outlined, we'll replace this with the
- // llvm.eh.actions call and indirect branch created when the landing pad was
- // outlined.
- if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
- return handleLandingPad(VMap, LPad, NewBB);
- }
+void WinEHPrepare::colorFunclets(Function &F) {
+ BlockColors = colorEHFunclets(F);
- // Nested landing pads that have already been outlined will be cloned in their
- // outlined form, but we need to intercept the ibr instruction to filter out
- // targets that do not return to the handler we are outlining.
- if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) {
- return handleIndirectBr(VMap, IBr, NewBB);
- }
-
- if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
- return handleInvoke(VMap, Invoke, NewBB);
-
- if (auto *Resume = dyn_cast<ResumeInst>(Inst))
- return handleResume(VMap, Resume, NewBB);
-
- if (auto *Cmp = dyn_cast<CmpInst>(Inst))
- return handleCompare(VMap, Cmp, NewBB);
-
- if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
- return handleBeginCatch(VMap, Inst, NewBB);
- if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
- return handleEndCatch(VMap, Inst, NewBB);
- if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
- return handleTypeIdFor(VMap, Inst, NewBB);
-
- // When outlining llvm.localaddress(), remap that to the second argument,
- // which is the FP of the parent.
- if (isLocalAddressCall(Inst)) {
- VMap[Inst] = ParentFP;
- return CloningDirector::SkipInstruction;
- }
-
- // Continue with the default cloning behavior.
- return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
- ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
- // If the instruction after the landing pad is a call to llvm.eh.actions
- // the landing pad has already been outlined. In this case, we should
- // clone it because it may return to a block in the handler we are
- // outlining now that would otherwise be unreachable. The landing pads
- // are sorted before outlining begins to enable this case to work
- // properly.
- const Instruction *NextI = LPad->getNextNode();
- if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>()))
- return CloningDirector::CloneInstruction;
-
- // If the landing pad hasn't been outlined yet, the landing pad we are
- // outlining now does not dominate it and so it cannot return to a block
- // in this handler. In that case, we can just insert a stub landing
- // pad now and patch it up later.
- Instruction *NewInst = LPad->clone();
- if (LPad->hasName())
- NewInst->setName(LPad->getName());
- // Save this correlation for later processing.
- NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
- VMap[LPad] = NewInst;
- BasicBlock::InstListType &InstList = NewBB->getInstList();
- InstList.push_back(NewInst);
- InstList.push_back(new UnreachableInst(NewBB->getContext()));
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // The argument to the call is some form of the first element of the
- // landingpad aggregate value, but that doesn't matter. It isn't used
- // here.
- // The second argument is an outparameter where the exception object will be
- // stored. Typically the exception object is a scalar, but it can be an
- // aggregate when catching by value.
- // FIXME: Leave something behind to indicate where the exception object lives
- // for this handler. Should it be part of llvm.eh.actions?
- assert(ExceptionObjectVar == nullptr && "Multiple calls to "
- "llvm.eh.begincatch found while "
- "outlining catch handler.");
- ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
- if (isa<ConstantPointerNull>(ExceptionObjectVar))
- return CloningDirector::SkipInstruction;
- assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
- "catch parameter is not static alloca");
- Materializer.escapeCatchObject(ExceptionObjectVar);
- return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction
-WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
- const Instruction *Inst, BasicBlock *NewBB) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- // It might be interesting to track whether or not we are inside a catch
- // function, but that might make the algorithm more brittle than it needs
- // to be.
-
- // The end catch call can occur in one of two places: either in a
- // landingpad block that is part of the catch handlers exception mechanism,
- // or at the end of the catch block. However, a catch-all handler may call
- // end catch from the original landing pad. If the call occurs in a nested
- // landing pad block, we must skip it and continue so that the landing pad
- // gets cloned.
- auto *ParentBB = IntrinCall->getParent();
- if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
- return CloningDirector::SkipInstruction;
-
- // If an end catch occurs anywhere else we want to terminate the handler
- // with a return to the code that follows the endcatch call. If the
- // next instruction is not an unconditional branch, we need to split the
- // block to provide a clear target for the return instruction.
- BasicBlock *ContinueBB;
- auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
- const BranchInst *Branch = dyn_cast<BranchInst>(Next);
- if (!Branch || !Branch->isUnconditional()) {
- // We're interrupting the cloning process at this location, so the
- // const_cast we're doing here will not cause a problem.
- ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
- const_cast<Instruction *>(cast<Instruction>(Next)));
- } else {
- ContinueBB = Branch->getSuccessor(0);
+ // Invert the map from BB to colors to color to BBs.
+ for (BasicBlock &BB : F) {
+ ColorVector &Colors = BlockColors[&BB];
+ for (BasicBlock *Color : Colors)
+ FuncletBlocks[Color].push_back(&BB);
}
-
- ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
- ReturnTargets.push_back(ContinueBB);
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block so that
- // the branch instruction will be skipped.
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
- // This causes a replacement that will collapse the landing pad CFG based
- // on the filter function we intend to match.
- if (Selector == CurrentSelector)
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
- else
- VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
- // Tell the caller not to clone this instruction.
- return CloningDirector::SkipInstruction;
}
-CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr(
- ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) {
- // If this indirect branch is not part of a landing pad block, just clone it.
- const BasicBlock *ParentBB = IBr->getParent();
- if (!ParentBB->isLandingPad())
- return CloningDirector::CloneInstruction;
-
- // If it is part of a landing pad, we want to filter out target blocks
- // that are not part of the handler we are outlining.
- const LandingPadInst *LPad = ParentBB->getLandingPadInst();
-
- // Save this correlation for later processing.
- NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad;
-
- // We should only get here for landing pads that have already been outlined.
- assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>()));
-
- // Copy the indirectbr, but only include targets that were previously
- // identified as EH blocks and are dominated by the nested landing pad.
- SetVector<const BasicBlock *> ReturnTargets;
- for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) {
- auto *TargetBB = IBr->getDestination(I);
- if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) &&
- DT->dominates(ParentBB, TargetBB)) {
- DEBUG(dbgs() << " Adding destination " << TargetBB->getName() << "\n");
- ReturnTargets.insert(TargetBB);
- }
+void llvm::calculateCatchReturnSuccessorColors(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ for (const BasicBlock &BB : *Fn) {
+ const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator());
+ if (!CatchRet)
+ continue;
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = CatchRet->getParentPad();
+ const BasicBlock *Color;
+ if (isa<ConstantTokenNone>(ParentPad))
+ Color = &Fn->getEntryBlock();
+ else
+ Color = cast<Instruction>(ParentPad)->getParent();
+ // Record the catchret successor's funclet membership.
+ FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color;
}
- IndirectBrInst *NewBranch =
- IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()),
- ReturnTargets.size(), NewBB);
- for (auto *Target : ReturnTargets)
- NewBranch->addDestination(const_cast<BasicBlock*>(Target));
-
- // The operands and targets of the branch instruction are remapped later
- // because it is a terminator. Tell the cloning code to clone the
- // blocks we just added to the target list.
- return CloningDirector::CloneSuccessors;
}
-CloningDirector::CloningAction
-WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
- const InvokeInst *Invoke, BasicBlock *NewBB) {
- return CloningDirector::CloneInstruction;
-}
+void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
+ // Strip PHI nodes off of EH pads.
+ SmallVector<PHINode *, 16> PHINodes;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = &*FI++;
+ if (!BB->isEHPad())
+ continue;
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = &*BI++;
+ auto *PN = dyn_cast<PHINode>(I);
+ // Stop at the first non-PHI.
+ if (!PN)
+ break;
-CloningDirector::CloningAction
-WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
- const ResumeInst *Resume, BasicBlock *NewBB) {
- // Resume instructions shouldn't be reachable from catch handlers.
- // We still need to handle it, but it will be pruned.
- BasicBlock::InstListType &InstList = NewBB->getInstList();
- InstList.push_back(new UnreachableInst(NewBB->getContext()));
- return CloningDirector::StopCloningBB;
-}
+ AllocaInst *SpillSlot = insertPHILoads(PN, F);
+ if (SpillSlot)
+ insertPHIStores(PN, SpillSlot);
-CloningDirector::CloningAction
-WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap,
- const CmpInst *Compare, BasicBlock *NewBB) {
- const IntrinsicInst *IntrinCall = nullptr;
- if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0));
- } else if (match(Compare->getOperand(1),
- m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1));
- }
- if (IntrinCall) {
- Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
- // This causes a replacement that will collapse the landing pad CFG based
- // on the filter function we intend to match.
- if (Selector == CurrentSelector->stripPointerCasts()) {
- VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
- } else {
- VMap[Compare] = ConstantInt::get(SelectorIDType, 0);
+ PHINodes.push_back(PN);
}
- return CloningDirector::SkipInstruction;
}
- return CloningDirector::CloneInstruction;
-}
-CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
- ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
- // The MS runtime will terminate the process if an exception occurs in a
- // cleanup handler, so we shouldn't encounter landing pads in the actual
- // cleanup code, but they may appear in catch blocks. Depending on where
- // we started cloning we may see one, but it will get dropped during dead
- // block pruning.
- Instruction *NewInst = new UnreachableInst(NewBB->getContext());
- VMap[LPad] = NewInst;
- BasicBlock::InstListType &InstList = NewBB->getInstList();
- InstList.push_back(NewInst);
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // Cleanup code may flow into catch blocks or the catch block may be part
- // of a branch that will be optimized away. We'll insert a return
- // instruction now, but it may be pruned before the cloning process is
- // complete.
- ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // Cleanup handlers nested within catch handlers may begin with a call to
- // eh.endcatch. We can just ignore that instruction.
- return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // If we encounter a selector comparison while cloning a cleanup handler,
- // we want to stop cloning immediately. Anything after the dispatch
- // will be outlined into a different handler.
- BasicBlock *CatchHandler;
- Constant *Selector;
- BasicBlock *NextBB;
- if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
- CatchHandler, Selector, NextBB)) {
- ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
- return CloningDirector::StopCloningBB;
- }
- // If eg.typeid.for is called for any other reason, it can be ignored.
- VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
- return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr(
- ValueToValueMapTy &VMap,
- const IndirectBrInst *IBr,
- BasicBlock *NewBB) {
- // No special handling is required for cleanup cloning.
- return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
- ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
- // All invokes in cleanup handlers can be replaced with calls.
- SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
- // Insert a normal call instruction...
- CallInst *NewCall =
- CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
- Invoke->getName(), NewBB);
- NewCall->setCallingConv(Invoke->getCallingConv());
- NewCall->setAttributes(Invoke->getAttributes());
- NewCall->setDebugLoc(Invoke->getDebugLoc());
- VMap[Invoke] = NewCall;
-
- // Remap the operands.
- llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
-
- // Insert an unconditional branch to the normal destination.
- BranchInst::Create(Invoke->getNormalDest(), NewBB);
-
- // The unwind destination won't be cloned into the new function, so
- // we don't need to clean up its phi nodes.
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block.
- return CloningDirector::CloneSuccessors;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
- ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
- ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block so that
- // the branch instruction will be skipped.
- return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction
-WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap,
- const CmpInst *Compare, BasicBlock *NewBB) {
- if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) ||
- match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
- return CloningDirector::SkipInstruction;
+ for (auto *PN : PHINodes) {
+ // There may be lingering uses on other EH PHIs being removed
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
}
- return CloningDirector::CloneInstruction;
-}
-
-WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
- Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo)
- : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
- BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
-
- // New allocas should be inserted in the entry block, but after the parent FP
- // is established if it is an instruction.
- Instruction *InsertPoint = EntryBB->getFirstInsertionPt();
- if (auto *FPInst = dyn_cast<Instruction>(ParentFP))
- InsertPoint = FPInst->getNextNode();
- Builder.SetInsertPoint(EntryBB, InsertPoint);
}
-Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
- // If we're asked to materialize a static alloca, we temporarily create an
- // alloca in the outlined function and add this to the FrameVarInfo map. When
- // all the outlining is complete, we'll replace these temporary allocas with
- // calls to llvm.localrecover.
- if (auto *AV = dyn_cast<AllocaInst>(V)) {
- assert(AV->isStaticAlloca() &&
- "cannot materialize un-demoted dynamic alloca");
- AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
- Builder.Insert(NewAlloca, AV->getName());
- FrameVarInfo[AV].push_back(NewAlloca);
- return NewAlloca;
- }
-
- if (isa<Instruction>(V) || isa<Argument>(V)) {
- Function *Parent = isa<Instruction>(V)
- ? cast<Instruction>(V)->getParent()->getParent()
- : cast<Argument>(V)->getParent();
- errs()
- << "Failed to demote instruction used in exception handler of function "
- << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n";
- errs() << " " << *V << '\n';
- report_fatal_error("WinEHPrepare failed to demote instruction");
- }
-
- // Don't materialize other values.
- return nullptr;
-}
+void WinEHPrepare::cloneCommonBlocks(Function &F) {
+ // We need to clone all blocks which belong to multiple funclets. Values are
+ // remapped throughout the funclet to propogate both the new instructions
+ // *and* the new basic blocks themselves.
+ for (auto &Funclets : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclets.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+
+ std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
+ ValueToValueMapTy VMap;
+ for (BasicBlock *BB : BlocksInFunclet) {
+ ColorVector &ColorsForBB = BlockColors[BB];
+ // We don't need to do anything if the block is monochromatic.
+ size_t NumColorsForBB = ColorsForBB.size();
+ if (NumColorsForBB == 1)
+ continue;
-void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
- // Catch parameter objects have to live in the parent frame. When we see a use
- // of a catch parameter, add a sentinel to the multimap to indicate that it's
- // used from another handler. This will prevent us from trying to sink the
- // alloca into the handler and ensure that the catch parameter is present in
- // the call to llvm.localescape.
- FrameVarInfo[V].push_back(getCatchObjectSentinel());
-}
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Cloning block \'" << BB->getName()
+ << "\' for funclet \'" << FuncletPadBB->getName()
+ << "\'.\n");
-// This function maps the catch and cleanup handlers that are reachable from the
-// specified landing pad. The landing pad sequence will have this basic shape:
-//
-// <cleanup handler>
-// <selector comparison>
-// <catch handler>
-// <cleanup handler>
-// <selector comparison>
-// <catch handler>
-// <cleanup handler>
-// ...
-//
-// Any of the cleanup slots may be absent. The cleanup slots may be occupied by
-// any arbitrary control flow, but all paths through the cleanup code must
-// eventually reach the next selector comparison and no path can skip to a
-// different selector comparisons, though some paths may terminate abnormally.
-// Therefore, we will use a depth first search from the start of any given
-// cleanup block and stop searching when we find the next selector comparison.
-//
-// If the landingpad instruction does not have a catch clause, we will assume
-// that any instructions other than selector comparisons and catch handlers can
-// be ignored. In practice, these will only be the boilerplate instructions.
-//
-// The catch handlers may also have any control structure, but we are only
-// interested in the start of the catch handlers, so we don't need to actually
-// follow the flow of the catch handlers. The start of the catch handlers can
-// be located from the compare instructions, but they can be skipped in the
-// flow by following the contrary branch.
-void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
- LandingPadActions &Actions) {
- unsigned int NumClauses = LPad->getNumClauses();
- unsigned int HandlersFound = 0;
- BasicBlock *BB = LPad->getParent();
-
- DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
-
- if (NumClauses == 0) {
- findCleanupHandlers(Actions, BB, nullptr);
- return;
- }
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB =
+ CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
+ // Insert the clone immediately after the original to ensure determinism
+ // and to keep the same relative ordering of any funclet's blocks.
+ CBB->insertInto(&F, BB->getNextNode());
- VisitedBlockSet VisitedBlocks;
+ // Add basic block mapping.
+ VMap[BB] = CBB;
- while (HandlersFound != NumClauses) {
- BasicBlock *NextBB = nullptr;
+ // Record delta operations that we need to perform to our color mappings.
+ Orig2Clone.emplace_back(BB, CBB);
+ }
- // Skip over filter clauses.
- if (LPad->isFilter(HandlersFound)) {
- ++HandlersFound;
+ // If nothing was cloned, we're done cloning in this funclet.
+ if (Orig2Clone.empty())
continue;
+
+ // Update our color mappings to reflect that one block has lost a color and
+ // another has gained a color.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ BlocksInFunclet.push_back(NewBlock);
+ ColorVector &NewColors = BlockColors[NewBlock];
+ assert(NewColors.empty() && "A new block should only have one color!");
+ NewColors.push_back(FuncletPadBB);
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Assigned color \'" << FuncletPadBB->getName()
+ << "\' to block \'" << NewBlock->getName()
+ << "\'.\n");
+
+ BlocksInFunclet.erase(
+ std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
+ BlocksInFunclet.end());
+ ColorVector &OldColors = BlockColors[OldBlock];
+ OldColors.erase(
+ std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
+ OldColors.end());
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Removed color \'" << FuncletPadBB->getName()
+ << "\' from block \'" << OldBlock->getName()
+ << "\'.\n");
}
- // See if the clause we're looking for is a catch-all.
- // If so, the catch begins immediately.
- Constant *ExpectedSelector =
- LPad->getClause(HandlersFound)->stripPointerCasts();
- if (isa<ConstantPointerNull>(ExpectedSelector)) {
- // The catch all must occur last.
- assert(HandlersFound == NumClauses - 1);
-
- // There can be additional selector dispatches in the call chain that we
- // need to ignore.
- BasicBlock *CatchBlock = nullptr;
- Constant *Selector;
- while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
- DEBUG(dbgs() << " Found extra catch dispatch in block "
- << CatchBlock->getName() << "\n");
- BB = NextBB;
+ // Loop over all of the instructions in this funclet, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (BasicBlock *BB : BlocksInFunclet)
+ // Loop over all instructions, fixing each one as we find it...
+ for (Instruction &I : *BB)
+ RemapInstruction(&I, VMap,
+ RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
+
+ auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
+ unsigned NumPreds = PN->getNumIncomingValues();
+ for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
+ ++PredIdx) {
+ BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
+ ColorVector &IncomingColors = BlockColors[IncomingBlock];
+ bool BlockInFunclet = IncomingColors.size() == 1 &&
+ IncomingColors.front() == FuncletPadBB;
+ if (IsForOldBlock != BlockInFunclet)
+ continue;
+ PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
+ // Revisit the next entry.
+ --PredIdx;
+ --PredEnd;
}
-
- // Add the catch handler to the action list.
- CatchHandler *Action = nullptr;
- if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
- // If the CatchHandlerMap already has an entry for this BB, re-use it.
- Action = CatchHandlerMap[BB];
- assert(Action->getSelector() == ExpectedSelector);
- } else {
- // We don't expect a selector dispatch, but there may be a call to
- // llvm.eh.begincatch, which separates catch handling code from
- // cleanup code in the same control flow. This call looks for the
- // begincatch intrinsic.
- Action = findCatchHandler(BB, NextBB, VisitedBlocks);
- if (Action) {
- // For C++ EH, check if there is any interesting cleanup code before
- // we begin the catch. This is important because cleanups cannot
- // rethrow exceptions but code called from catches can. For SEH, it
- // isn't important if some finally code before a catch-all is executed
- // out of line or after recovering from the exception.
- if (Personality == EHPersonality::MSVC_CXX)
- findCleanupHandlers(Actions, BB, BB);
- } else {
- // If an action was not found, it means that the control flows
- // directly into the catch-all handler and there is no cleanup code.
- // That's an expected situation and we must create a catch action.
- // Since this is a catch-all handler, the selector won't actually
- // appear in the code anywhere. ExpectedSelector here is the constant
- // null ptr that we got from the landing pad instruction.
- Action = new CatchHandler(BB, ExpectedSelector, nullptr);
- CatchHandlerMap[BB] = Action;
- }
+ };
+
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (Instruction &OldI : *OldBlock) {
+ auto *OldPN = dyn_cast<PHINode>(&OldI);
+ if (!OldPN)
+ break;
+ UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
}
- Actions.insertCatchHandler(Action);
- DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n");
- ++HandlersFound;
-
- // Once we reach a catch-all, don't expect to hit a resume instruction.
- BB = nullptr;
- break;
- }
-
- CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
- assert(CatchAction);
-
- // See if there is any interesting code executed before the dispatch.
- findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
-
- // When the source program contains multiple nested try blocks the catch
- // handlers can get strung together in such a way that we can encounter
- // a dispatch for a selector that we've already had a handler for.
- if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) {
- ++HandlersFound;
-
- // Add the catch handler to the action list.
- DEBUG(dbgs() << " Found catch dispatch in block "
- << CatchAction->getStartBlock()->getName() << "\n");
- Actions.insertCatchHandler(CatchAction);
- } else {
- // Under some circumstances optimized IR will flow unconditionally into a
- // handler block without checking the selector. This can only happen if
- // the landing pad has a catch-all handler and the handler for the
- // preceeding catch clause is identical to the catch-call handler
- // (typically an empty catch). In this case, the handler must be shared
- // by all remaining clauses.
- if (isa<ConstantPointerNull>(
- CatchAction->getSelector()->stripPointerCasts())) {
- DEBUG(dbgs() << " Applying early catch-all handler in block "
- << CatchAction->getStartBlock()->getName()
- << " to all remaining clauses.\n");
- Actions.insertCatchHandler(CatchAction);
- return;
+ for (Instruction &NewI : *NewBlock) {
+ auto *NewPN = dyn_cast<PHINode>(&NewI);
+ if (!NewPN)
+ break;
+ UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
}
-
- DEBUG(dbgs() << " Found extra catch dispatch in block "
- << CatchAction->getStartBlock()->getName() << "\n");
}
- // Move on to the block after the catch handler.
- BB = NextBB;
- }
-
- // If we didn't wind up in a catch-all, see if there is any interesting code
- // executed before the resume.
- findCleanupHandlers(Actions, BB, BB);
-
- // It's possible that some optimization moved code into a landingpad that
- // wasn't
- // previously being used for cleanup. If that happens, we need to execute
- // that
- // extra code from a cleanup handler.
- if (Actions.includesCleanup() && !LPad->isCleanup())
- LPad->setCleanup(true);
-}
-
-// This function searches starting with the input block for the next
-// block that terminates with a branch whose condition is based on a selector
-// comparison. This may be the input block. See the mapLandingPadBlocks
-// comments for a discussion of control flow assumptions.
-//
-CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
- BasicBlock *&NextBB,
- VisitedBlockSet &VisitedBlocks) {
- // See if we've already found a catch handler use it.
- // Call count() first to avoid creating a null entry for blocks
- // we haven't seen before.
- if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
- CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
- NextBB = Action->getNextBB();
- return Action;
- }
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
+ // the PHI nodes for NewBB now.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (BasicBlock *SuccBB : successors(NewBlock)) {
+ for (Instruction &SuccI : *SuccBB) {
+ auto *SuccPN = dyn_cast<PHINode>(&SuccI);
+ if (!SuccPN)
+ break;
+
+ // Ok, we have a PHI node. Figure out what the incoming value was for
+ // the OldBlock.
+ int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
+ if (OldBlockIdx == -1)
+ break;
+ Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
+
+ // Remap the value if necessary.
+ if (auto *Inst = dyn_cast<Instruction>(IV)) {
+ ValueToValueMapTy::iterator I = VMap.find(Inst);
+ if (I != VMap.end())
+ IV = I->second;
+ }
- // VisitedBlocks applies only to the current search. We still
- // need to consider blocks that we've visited while mapping other
- // landing pads.
- VisitedBlocks.insert(BB);
-
- BasicBlock *CatchBlock = nullptr;
- Constant *Selector = nullptr;
-
- // If this is the first time we've visited this block from any landing pad
- // look to see if it is a selector dispatch block.
- if (!CatchHandlerMap.count(BB)) {
- if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
- CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
- CatchHandlerMap[BB] = Action;
- return Action;
- }
- // If we encounter a block containing an llvm.eh.begincatch before we
- // find a selector dispatch block, the handler is assumed to be
- // reached unconditionally. This happens for catch-all blocks, but
- // it can also happen for other catch handlers that have been combined
- // with the catch-all handler during optimization.
- if (isCatchBlock(BB)) {
- PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext());
- Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy);
- CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr);
- CatchHandlerMap[BB] = Action;
- return Action;
+ SuccPN->addIncoming(IV, NewBlock);
+ }
+ }
}
- }
- // Visit each successor, looking for the dispatch.
- // FIXME: We expect to find the dispatch quickly, so this will probably
- // work better as a breadth first search.
- for (BasicBlock *Succ : successors(BB)) {
- if (VisitedBlocks.count(Succ))
- continue;
+ for (ValueToValueMapTy::value_type VT : VMap) {
+ // If there were values defined in BB that are used outside the funclet,
+ // then we now have to update all uses of the value to use either the
+ // original value, the cloned value, or some PHI derived value. This can
+ // require arbitrary PHI insertion, of which we are prepared to do, clean
+ // these up now.
+ SmallVector<Use *, 16> UsesToRename;
- CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
- if (Action)
- return Action;
- }
- return nullptr;
-}
-
-// These are helper functions to combine repeated code from findCleanupHandlers.
-static void createCleanupHandler(LandingPadActions &Actions,
- CleanupHandlerMapTy &CleanupHandlerMap,
- BasicBlock *BB) {
- CleanupHandler *Action = new CleanupHandler(BB);
- CleanupHandlerMap[BB] = Action;
- Actions.insertCleanupHandler(Action);
- DEBUG(dbgs() << " Found cleanup code in block "
- << Action->getStartBlock()->getName() << "\n");
-}
-
-static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
- Instruction *MaybeCall) {
- // Look for finally blocks that Clang has already outlined for us.
- // %fp = call i8* @llvm.localaddress()
- // call void @"fin$parent"(iN 1, i8* %fp)
- if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
- MaybeCall = MaybeCall->getNextNode();
- CallSite FinallyCall(MaybeCall);
- if (!FinallyCall || FinallyCall.arg_size() != 2)
- return CallSite();
- if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
- return CallSite();
- if (!isLocalAddressCall(FinallyCall.getArgument(1)))
- return CallSite();
- return FinallyCall;
-}
-
-static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
- // Skip single ubr blocks.
- while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
- auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
- if (Br && Br->isUnconditional())
- BB = Br->getSuccessor(0);
- else
- return BB;
- }
- return BB;
-}
-
-// This function searches starting with the input block for the next block that
-// contains code that is not part of a catch handler and would not be eliminated
-// during handler outlining.
-//
-void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
- BasicBlock *StartBB, BasicBlock *EndBB) {
- // Here we will skip over the following:
- //
- // landing pad prolog:
- //
- // Unconditional branches
- //
- // Selector dispatch
- //
- // Resume pattern
- //
- // Anything else marks the start of an interesting block
-
- BasicBlock *BB = StartBB;
- // Anything other than an unconditional branch will kick us out of this loop
- // one way or another.
- while (BB) {
- BB = followSingleUnconditionalBranches(BB);
- // If we've already scanned this block, don't scan it again. If it is
- // a cleanup block, there will be an action in the CleanupHandlerMap.
- // If we've scanned it and it is not a cleanup block, there will be a
- // nullptr in the CleanupHandlerMap. If we have not scanned it, there will
- // be no entry in the CleanupHandlerMap. We must call count() first to
- // avoid creating a null entry for blocks we haven't scanned.
- if (CleanupHandlerMap.count(BB)) {
- if (auto *Action = CleanupHandlerMap[BB]) {
- Actions.insertCleanupHandler(Action);
- DEBUG(dbgs() << " Found cleanup code in block "
- << Action->getStartBlock()->getName() << "\n");
- // FIXME: This cleanup might chain into another, and we need to discover
- // that.
- return;
- } else {
- // Here we handle the case where the cleanup handler map contains a
- // value for this block but the value is a nullptr. This means that
- // we have previously analyzed the block and determined that it did
- // not contain any cleanup code. Based on the earlier analysis, we
- // know the block must end in either an unconditional branch, a
- // resume or a conditional branch that is predicated on a comparison
- // with a selector. Either the resume or the selector dispatch
- // would terminate the search for cleanup code, so the unconditional
- // branch is the only case for which we might need to continue
- // searching.
- BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
- if (SuccBB == BB || SuccBB == EndBB)
- return;
- BB = SuccBB;
+ auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
+ if (!OldI)
continue;
+ auto *NewI = cast<Instruction>(VT.second);
+ // Scan all uses of this instruction to see if it is used outside of its
+ // funclet, and if so, record them in UsesToRename.
+ for (Use &U : OldI->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = UserI->getParent();
+ ColorVector &ColorsForUserBB = BlockColors[UserBB];
+ assert(!ColorsForUserBB.empty());
+ if (ColorsForUserBB.size() > 1 ||
+ *ColorsForUserBB.begin() != FuncletPadBB)
+ UsesToRename.push_back(&U);
}
- }
- // Create an entry in the cleanup handler map for this block. Initially
- // we create an entry that says this isn't a cleanup block. If we find
- // cleanup code, the caller will replace this entry.
- CleanupHandlerMap[BB] = nullptr;
+ // If there are no uses outside the block, we're done with this
+ // instruction.
+ if (UsesToRename.empty())
+ continue;
- TerminatorInst *Terminator = BB->getTerminator();
+ // We found a use of OldI outside of the funclet. Rename all uses of OldI
+ // that are outside its funclet to be uses of the appropriate PHI node
+ // etc.
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(OldI->getType(), OldI->getName());
+ SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
+ SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);
- // Landing pad blocks have extra instructions we need to accept.
- LandingPadMap *LPadMap = nullptr;
- if (BB->isLandingPad()) {
- LandingPadInst *LPad = BB->getLandingPadInst();
- LPadMap = &LPadMaps[LPad];
- if (!LPadMap->isInitialized())
- LPadMap->mapLandingPad(LPad);
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
}
+ }
+}
- // Look for the bare resume pattern:
- // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
- // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
- // resume { i8*, i32 } %lpad.val2
- if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
- InsertValueInst *Insert1 = nullptr;
- InsertValueInst *Insert2 = nullptr;
- Value *ResumeVal = Resume->getOperand(0);
- // If the resume value isn't a phi or landingpad value, it should be a
- // series of insertions. Identify them so we can avoid them when scanning
- // for cleanups.
- if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
- Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
- if (!Insert2)
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
- if (!Insert1)
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- Instruction *Inst = II;
- if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
- continue;
- if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
+void WinEHPrepare::removeImplausibleInstructions(Function &F) {
+ // Remove implausible terminators and replace them with UnreachableInst.
+ for (auto &Funclet : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclet.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second;
+ Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI();
+ auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI);
+ auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad);
+ auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad);
+
+ for (BasicBlock *BB : BlocksInFunclet) {
+ for (Instruction &I : *BB) {
+ CallSite CS(&I);
+ if (!CS)
continue;
- if (!Inst->hasOneUse() ||
- (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
- }
- return;
- }
- BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
- if (Branch && Branch->isConditional()) {
- // Look for the selector dispatch.
- // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
- // %matches = icmp eq i32 %sel, %2
- // br i1 %matches, label %catch14, label %eh.resume
- CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
- if (!Compare || !Compare->isEquality())
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- Instruction *Inst = II;
- if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
- continue;
- if (Inst == Compare || Inst == Branch)
+ Value *FuncletBundleOperand = nullptr;
+ if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet))
+ FuncletBundleOperand = BU->Inputs.front();
+
+ if (FuncletBundleOperand == FuncletPad)
continue;
- if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+
+ // Skip call sites which are nounwind intrinsics.
+ auto *CalledFn =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
continue;
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
- // The selector dispatch block should always terminate our search.
- assert(BB == EndBB);
- return;
- }
- if (isAsynchronousEHPersonality(Personality)) {
- // If this is a landingpad block, split the block at the first non-landing
- // pad instruction.
- Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
- if (LPadMap) {
- while (MaybeCall != BB->getTerminator() &&
- LPadMap->isLandingPadSpecificInst(MaybeCall))
- MaybeCall = MaybeCall->getNextNode();
+ // This call site was not part of this funclet, remove it.
+ if (CS.isInvoke()) {
+ // Remove the unwind edge if it was an invoke.
+ removeUnwindEdge(BB);
+ // Get a pointer to the new call.
+ BasicBlock::iterator CallI =
+ std::prev(BB->getTerminator()->getIterator());
+ auto *CI = cast<CallInst>(&*CallI);
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ } else {
+ changeToUnreachable(&I, /*UseLLVMTrap=*/false);
+ }
+
+ // There are no more instructions in the block (except for unreachable),
+ // we are done.
+ break;
}
- // Look for outlined finally calls on x64, since those happen to match the
- // prototype provided by the runtime.
- if (TheTriple.getArch() == Triple::x86_64) {
- if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
- Function *Fin = FinallyCall.getCalledFunction();
- assert(Fin && "outlined finally call should be direct");
- auto *Action = new CleanupHandler(BB);
- Action->setHandlerBlockOrFunc(Fin);
- Actions.insertCleanupHandler(Action);
- CleanupHandlerMap[BB] = Action;
- DEBUG(dbgs() << " Found frontend-outlined finally call to "
- << Fin->getName() << " in block "
- << Action->getStartBlock()->getName() << "\n");
-
- // Split the block if there were more interesting instructions and
- // look for finally calls in the normal successor block.
- BasicBlock *SuccBB = BB;
- if (FinallyCall.getInstruction() != BB->getTerminator() &&
- FinallyCall.getInstruction()->getNextNode() !=
- BB->getTerminator()) {
- SuccBB =
- SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
- } else {
- if (FinallyCall.isInvoke()) {
- SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())
- ->getNormalDest();
- } else {
- SuccBB = BB->getUniqueSuccessor();
- assert(SuccBB &&
- "splitOutlinedFinallyCalls didn't insert a branch");
- }
- }
- BB = SuccBB;
- if (BB == EndBB)
- return;
- continue;
+ TerminatorInst *TI = BB->getTerminator();
+ // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
+ bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
+ // The token consumed by a CatchReturnInst must match the funclet token.
+ bool IsUnreachableCatchret = false;
+ if (auto *CRI = dyn_cast<CatchReturnInst>(TI))
+ IsUnreachableCatchret = CRI->getCatchPad() != CatchPad;
+ // The token consumed by a CleanupReturnInst must match the funclet token.
+ bool IsUnreachableCleanupret = false;
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI))
+ IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
+ if (IsUnreachableRet || IsUnreachableCatchret ||
+ IsUnreachableCleanupret) {
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false);
+ } else if (isa<InvokeInst>(TI)) {
+ if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
+ // Invokes within a cleanuppad for the MSVC++ personality never
+ // transfer control to their unwind edge: the personality will
+ // terminate the program.
+ removeUnwindEdge(BB);
}
}
}
+ }
+}
- // Anything else is either a catch block or interesting cleanup code.
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II) {
- Instruction *Inst = II;
- if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
- continue;
- // Unconditional branches fall through to this loop.
- if (Inst == Branch)
- continue;
- // If this is a catch block, there is no cleanup code to be found.
- if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
- return;
- // If this a nested landing pad, it may contain an endcatch call.
- if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
- return;
- // Anything else makes this interesting cleanup code.
- return createCleanupHandler(Actions, CleanupHandlerMap, BB);
- }
-
- // Only unconditional branches in empty blocks should get this far.
- assert(Branch && Branch->isUnconditional());
- if (BB == EndBB)
- return;
- BB = Branch->getSuccessor(0);
+void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
+ // Clean-up some of the mess we made by removing useles PHI nodes, trivial
+ // branches, etc.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = &*FI++;
+ SimplifyInstructionsInBlock(BB);
+ ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true);
+ MergeBlockIntoPredecessor(BB);
}
+
+ // We might have some unreachable blocks after cleaning up some impossible
+ // control flow.
+ removeUnreachableBlocks(F);
}
-// This is a public function, declared in WinEHFuncInfo.h and is also
-// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
-void llvm::parseEHActions(
- const IntrinsicInst *II,
- SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) {
- assert(II->getIntrinsicID() == Intrinsic::eh_actions &&
- "attempted to parse non eh.actions intrinsic");
- for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
- uint64_t ActionKind =
- cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
- if (ActionKind == /*catch=*/1) {
- auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
- ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
- int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
- Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
- I += 4;
- auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector,
- /*NextBB=*/nullptr);
- CH->setHandlerBlockOrFunc(Handler);
- CH->setExceptionVarIndex(EHObjIndexVal);
- Actions.push_back(std::move(CH));
- } else if (ActionKind == 0) {
- Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
- I += 2;
- auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr);
- CH->setHandlerBlockOrFunc(Handler);
- Actions.push_back(std::move(CH));
- } else {
- llvm_unreachable("Expected either a catch or cleanup handler!");
+void WinEHPrepare::verifyPreparedFunclets(Function &F) {
+ // Recolor the CFG to verify that all is well.
+ for (BasicBlock &BB : F) {
+ size_t NumColors = BlockColors[&BB].size();
+ assert(NumColors == 1 && "Expected monochromatic BB!");
+ if (NumColors == 0)
+ report_fatal_error("Uncolored BB!");
+ if (NumColors > 1)
+ report_fatal_error("Multicolor BB!");
+ if (!DisableDemotion) {
+ bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
+ assert(!EHPadHasPHI && "EH Pad still has a PHI!");
+ if (EHPadHasPHI)
+ report_fatal_error("EH Pad still has a PHI!");
}
}
- std::reverse(Actions.begin(), Actions.end());
}
-namespace {
-struct WinEHNumbering {
- WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
- CurrentBaseState(-1), NextState(0) {}
+bool WinEHPrepare::prepareExplicitEH(Function &F) {
+ // Remove unreachable blocks. It is not valuable to assign them a color and
+ // their existence can trick us into thinking values are alive when they are
+ // not.
+ removeUnreachableBlocks(F);
- WinEHFuncInfo &FuncInfo;
- int CurrentBaseState;
- int NextState;
+ // Determine which blocks are reachable from which funclet entries.
+ colorFunclets(F);
- SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
- SmallPtrSet<const Function *, 4> VisitedHandlers;
+ cloneCommonBlocks(F);
- int currentEHNumber() const {
- return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
- }
+ if (!DisableDemotion)
+ demotePHIsOnFunclets(F);
- void createUnwindMapEntry(int ToState, ActionHandler *AH);
- void createTryBlockMapEntry(int TryLow, int TryHigh,
- ArrayRef<CatchHandler *> Handlers);
- void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
- ImmutableCallSite CS);
- void popUnmatchedActions(int FirstMismatch);
- void calculateStateNumbers(const Function &F);
- void findActionRootLPads(const Function &F);
-};
-}
+ if (!DisableCleanups) {
+ removeImplausibleInstructions(F);
-void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
- WinEHUnwindMapEntry UME;
- UME.ToState = ToState;
- if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
- UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
- else
- UME.Cleanup = nullptr;
- FuncInfo.UnwindMap.push_back(UME);
-}
-
-void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
- ArrayRef<CatchHandler *> Handlers) {
- // See if we already have an entry for this set of handlers.
- // This is using iterators rather than a range-based for loop because
- // if we find the entry we're looking for we'll need the iterator to erase it.
- int NumHandlers = Handlers.size();
- auto I = FuncInfo.TryBlockMap.begin();
- auto E = FuncInfo.TryBlockMap.end();
- for ( ; I != E; ++I) {
- auto &Entry = *I;
- if (Entry.HandlerArray.size() != (size_t)NumHandlers)
- continue;
- int N;
- for (N = 0; N < NumHandlers; ++N) {
- if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
- break; // breaks out of inner loop
- }
- // If all the handlers match, this is what we were looking for.
- if (N == NumHandlers) {
- break;
- }
- }
-
- // If we found an existing entry for this set of handlers, extend the range
- // but move the entry to the end of the map vector. The order of entries
- // in the map is critical to the way that the runtime finds handlers.
- // FIXME: Depending on what has happened with block ordering, this may
- // incorrectly combine entries that should remain separate.
- if (I != E) {
- // Copy the existing entry.
- WinEHTryBlockMapEntry Entry = *I;
- Entry.TryLow = std::min(TryLow, Entry.TryLow);
- Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
- assert(Entry.TryLow <= Entry.TryHigh);
- // Erase the old entry and add this one to the back.
- FuncInfo.TryBlockMap.erase(I);
- FuncInfo.TryBlockMap.push_back(Entry);
- return;
+ cleanupPreparedFunclets(F);
}
- // If we didn't find an entry, create a new one.
- WinEHTryBlockMapEntry TBME;
- TBME.TryLow = TryLow;
- TBME.TryHigh = TryHigh;
- assert(TBME.TryLow <= TBME.TryHigh);
- for (CatchHandler *CH : Handlers) {
- WinEHHandlerType HT;
- if (CH->getSelector()->isNullValue()) {
- HT.Adjectives = 0x40;
- HT.TypeDescriptor = nullptr;
- } else {
- auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
- // Selectors are always pointers to GlobalVariables with 'struct' type.
- // The struct has two fields, adjectives and a type descriptor.
- auto *CS = cast<ConstantStruct>(GV->getInitializer());
- HT.Adjectives =
- cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
- HT.TypeDescriptor =
- cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
- }
- HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
- HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
- TBME.HandlerArray.push_back(HT);
- }
- FuncInfo.TryBlockMap.push_back(TBME);
-}
+ verifyPreparedFunclets(F);
-static void print_name(const Value *V) {
-#ifndef NDEBUG
- if (!V) {
- DEBUG(dbgs() << "null");
- return;
- }
+ BlockColors.clear();
+ FuncletBlocks.clear();
- if (const auto *F = dyn_cast<Function>(V))
- DEBUG(dbgs() << F->getName());
- else
- DEBUG(V->dump());
-#endif
+ return true;
}
-void WinEHNumbering::processCallSite(
- MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
- ImmutableCallSite CS) {
- DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
- << ") for: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
-
- DEBUG(dbgs() << "HandlerStack: \n");
- for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
- DEBUG(dbgs() << " ");
- print_name(HandlerStack[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << '\n');
- }
- DEBUG(dbgs() << "Actions: \n");
- for (int I = 0, E = Actions.size(); I < E; ++I) {
- DEBUG(dbgs() << " ");
- print_name(Actions[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << '\n');
- }
- int FirstMismatch = 0;
- for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
- ++FirstMismatch) {
- if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
- Actions[FirstMismatch]->getHandlerBlockOrFunc())
- break;
- }
-
- // Remove unmatched actions from the stack and process their EH states.
- popUnmatchedActions(FirstMismatch);
-
- DEBUG(dbgs() << "Pushing actions for CallSite: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
-
- bool LastActionWasCatch = false;
- const LandingPadInst *LastRootLPad = nullptr;
- for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
- // We can reuse eh states when pushing two catches for the same invoke.
- bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
- auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
- // Various conditions can lead to a handler being popped from the
- // stack and re-pushed later. That shouldn't create a new state.
- // FIXME: Can code optimization lead to re-used handlers?
- if (FuncInfo.HandlerEnclosedState.count(Handler)) {
- // If we already assigned the state enclosed by this handler re-use it.
- Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
+// TODO: Share loads when one use dominates another, or when a catchpad exit
+// dominates uses (needs dominators).
+AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
+ BasicBlock *PHIBlock = PN->getParent();
+ AllocaInst *SpillSlot = nullptr;
+ Instruction *EHPad = PHIBlock->getFirstNonPHI();
+
+ if (!isa<TerminatorInst>(EHPad)) {
+ // If the EHPad isn't a terminator, then we can insert a load in this block
+ // that will dominate all uses.
+ SpillSlot = new AllocaInst(PN->getType(), nullptr,
+ Twine(PN->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+ Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+ &*PHIBlock->getFirstInsertionPt());
+ PN->replaceAllUsesWith(V);
+ return SpillSlot;
+ }
+
+ // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert
+ // loads of the slot before every use.
+ DenseMap<BasicBlock *, Value *> Loads;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) {
+ // Use is on an EH pad phi. Leave it alone; we'll insert loads and
+ // stores for it separately.
continue;
}
- const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
- if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
- DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
- Actions[I]->setEHState(currentEHNumber());
- } else {
- DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
- print_name(Actions[I]->getHandlerBlockOrFunc());
- DEBUG(dbgs() << ") with EH state " << NextState << "\n");
- createUnwindMapEntry(currentEHNumber(), Actions[I].get());
- DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
- Actions[I]->setEHState(NextState);
- NextState++;
- }
- HandlerStack.push_back(std::move(Actions[I]));
- LastActionWasCatch = CurrActionIsCatch;
- LastRootLPad = RootLPad;
+ replaceUseWithLoad(PN, U, SpillSlot, Loads, F);
}
-
- // This is used to defer numbering states for a handler until after the
- // last time it appears in an invoke action list.
- if (CS.isInvoke()) {
- for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
- auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
- if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
- continue;
- FuncInfo.LastInvokeVisited[Handler] = true;
- DEBUG(dbgs() << "Last invoke of ");
- print_name(Handler);
- DEBUG(dbgs() << " has been visited.\n");
- }
- }
-
- DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
- print_name(CS ? CS.getCalledValue() : nullptr);
- DEBUG(dbgs() << '\n');
+ return SpillSlot;
}
-void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
- // Don't recurse while we are looping over the handler stack. Instead, defer
- // the numbering of the catch handlers until we are done popping.
- SmallVector<CatchHandler *, 4> PoppedCatches;
- for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
- std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
- if (isa<CatchHandler>(Handler.get()))
- PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
- }
+// TODO: improve store placement. Inserting at def is probably good, but need
+// to be careful not to introduce interfering stores (needs liveness analysis).
+// TODO: identify related phi nodes that can share spill slots, and share them
+// (also needs liveness).
+void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
+ AllocaInst *SpillSlot) {
+ // Use a worklist of (Block, Value) pairs -- the given Value needs to be
+ // stored to the spill slot by the end of the given Block.
+ SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist;
- int TryHigh = NextState - 1;
- int LastTryLowIdx = 0;
- for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
- CatchHandler *CH = PoppedCatches[I];
- DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
- if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
- int TryLow = CH->getEHState();
- auto Handlers =
- makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
- DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
- for (size_t J = 0; J < Handlers.size(); ++J) {
- DEBUG(dbgs() << ", ");
- print_name(Handlers[J]->getHandlerBlockOrFunc());
- }
- DEBUG(dbgs() << ")\n");
- createTryBlockMapEntry(TryLow, TryHigh, Handlers);
- LastTryLowIdx = I + 1;
- }
- }
+ Worklist.push_back({OriginalPHI->getParent(), OriginalPHI});
- for (CatchHandler *CH : PoppedCatches) {
- if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
- if (FuncInfo.LastInvokeVisited[F]) {
- DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
- print_name(F);
- DEBUG(dbgs() << '\n');
- FuncInfo.HandlerBaseState[F] = NextState;
- DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber()
- << ", null)\n");
- createUnwindMapEntry(currentEHNumber(), nullptr);
- ++NextState;
- calculateStateNumbers(*F);
+ while (!Worklist.empty()) {
+ BasicBlock *EHBlock;
+ Value *InVal;
+ std::tie(EHBlock, InVal) = Worklist.pop_back_val();
+
+ PHINode *PN = dyn_cast<PHINode>(InVal);
+ if (PN && PN->getParent() == EHBlock) {
+ // The value is defined by another PHI we need to remove, with no room to
+ // insert a store after the PHI, so each predecessor needs to store its
+ // incoming value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ Value *PredVal = PN->getIncomingValue(i);
+
+ // Undef can safely be skipped.
+ if (isa<UndefValue>(PredVal))
+ continue;
+
+ insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist);
}
- else {
- DEBUG(dbgs() << "Deferring handling of ");
- print_name(F);
- DEBUG(dbgs() << " until last invoke visited.\n");
+ } else {
+ // We need to store InVal, which dominates EHBlock, but can't put a store
+ // in EHBlock, so need to put stores in each predecessor.
+ for (BasicBlock *PredBlock : predecessors(EHBlock)) {
+ insertPHIStore(PredBlock, InVal, SpillSlot, Worklist);
}
}
- delete CH;
}
}
-void WinEHNumbering::calculateStateNumbers(const Function &F) {
- auto I = VisitedHandlers.insert(&F);
- if (!I.second)
- return; // We've already visited this handler, don't renumber it.
+void WinEHPrepare::insertPHIStore(
+ BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
- int OldBaseState = CurrentBaseState;
- if (FuncInfo.HandlerBaseState.count(&F)) {
- CurrentBaseState = FuncInfo.HandlerBaseState[&F];
- }
-
- size_t SavedHandlerStackSize = HandlerStack.size();
-
- DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (const BasicBlock &BB : F) {
- for (const Instruction &I : BB) {
- const auto *CI = dyn_cast<CallInst>(&I);
- if (!CI || CI->doesNotThrow())
- continue;
- processCallSite(None, CI);
- }
- const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
- if (!II)
- continue;
- const LandingPadInst *LPI = II->getLandingPadInst();
- auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
- if (!ActionsCall)
- continue;
- parseEHActions(ActionsCall, ActionList);
- if (ActionList.empty())
- continue;
- processCallSite(ActionList, II);
- ActionList.clear();
- FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
- DEBUG(dbgs() << "Assigning state " << currentEHNumber()
- << " to landing pad at " << LPI->getParent()->getName()
- << '\n');
+ if (PredBlock->isEHPad() &&
+ isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+ // Pred is unsplittable, so we need to queue it on the worklist.
+ Worklist.push_back({PredBlock, PredVal});
+ return;
}
- // Pop any actions that were pushed on the stack for this function.
- popUnmatchedActions(SavedHandlerStackSize);
-
- DEBUG(dbgs() << "Assigning max state " << NextState - 1
- << " to " << F.getName() << '\n');
- FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
-
- CurrentBaseState = OldBaseState;
+ // Otherwise, insert the store at the end of the basic block.
+ new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
}
-// This function follows the same basic traversal as calculateStateNumbers
-// but it is necessary to identify the root landing pad associated
-// with each action before we start assigning state numbers.
-void WinEHNumbering::findActionRootLPads(const Function &F) {
- auto I = VisitedHandlers.insert(&F);
- if (!I.second)
- return; // We've already visited this handler, don't revisit it.
-
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- for (const BasicBlock &BB : F) {
- const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
- if (!II)
- continue;
- const LandingPadInst *LPI = II->getLandingPadInst();
- auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
- if (!ActionsCall)
- continue;
-
- assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
- parseEHActions(ActionsCall, ActionList);
- if (ActionList.empty())
- continue;
- for (int I = 0, E = ActionList.size(); I < E; ++I) {
- if (auto *Handler
- = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
- FuncInfo.LastInvoke[Handler] = II;
- // Don't replace the root landing pad if we previously saw this
- // handler in a different function.
- if (FuncInfo.RootLPad.count(Handler) &&
- FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
- continue;
- DEBUG(dbgs() << "Setting root lpad for ");
- print_name(Handler);
- DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
- FuncInfo.RootLPad[Handler] = LPI;
- }
+void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads,
+ Function &F) {
+ // Lazilly create the spill slot.
+ if (!SpillSlot)
+ SpillSlot = new AllocaInst(V->getType(), nullptr,
+ Twine(V->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) {
+ // If this is a PHI node, we can't insert a load of the value before
+ // the use. Instead insert the load in the predecessor block
+ // corresponding to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this
+ // PHI node that we cannot have multiple loads. The problem is that
+ // the resulting PHI node will have multiple values (from each load)
+ // coming in from the same block, which is illegal SSA form.
+ // For this reason, we keep track of and reuse loads we insert.
+ BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U);
+ if (auto *CatchRet =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ // Putting a load above a catchret and use on the phi would still leave
+ // a cross-funclet def/use. We need to split the edge, change the
+ // catchret to target the new block, and put the load there.
+ BasicBlock *PHIBlock = UsingInst->getParent();
+ BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock);
+ // SplitEdge gives us:
+ // IncomingBlock:
+ // ...
+ // br label %NewBlock
+ // NewBlock:
+ // catchret label %PHIBlock
+ // But we need:
+ // IncomingBlock:
+ // ...
+ // catchret label %NewBlock
+ // NewBlock:
+ // br label %PHIBlock
+ // So move the terminators to each others' blocks and swap their
+ // successors.
+ BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
+ Goto->removeFromParent();
+ CatchRet->removeFromParent();
+ IncomingBlock->getInstList().push_back(CatchRet);
+ NewBlock->getInstList().push_back(Goto);
+ Goto->setSuccessor(0, PHIBlock);
+ CatchRet->setSuccessor(NewBlock);
+ // Update the color mapping for the newly split edge.
+ ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
+ BlockColors[NewBlock] = ColorsForPHIBlock;
+ for (BasicBlock *FuncletPad : ColorsForPHIBlock)
+ FuncletBlocks[FuncletPad].push_back(NewBlock);
+ // Treat the new block as incoming for load insertion.
+ IncomingBlock = NewBlock;
}
- // Walk the actions again and look for nested handlers. This has to
- // happen after all of the actions have been processed in the current
- // function.
- for (int I = 0, E = ActionList.size(); I < E; ++I)
- if (auto *Handler
- = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
- findActionRootLPads(*Handler);
- ActionList.clear();
+ Value *&Load = Loads[IncomingBlock];
+ // Insert the load into the predecessor block
+ if (!Load)
+ Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*Volatile=*/false, IncomingBlock->getTerminator());
+
+ U.set(Load);
+ } else {
+ // Reload right before the old use.
+ auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*Volatile=*/false, UsingInst);
+ U.set(Load);
}
}
-void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn,
- WinEHFuncInfo &FuncInfo) {
- // Return if it's already been done.
- if (!FuncInfo.LandingPadStateMap.empty())
- return;
-
- WinEHNumbering Num(FuncInfo);
- Num.findActionRootLPads(*ParentFn);
- // The VisitedHandlers list is used by both findActionRootLPads and
- // calculateStateNumbers, but both functions need to visit all handlers.
- Num.VisitedHandlers.clear();
- Num.calculateStateNumbers(*ParentFn);
- // Pop everything on the handler stack.
- // It may be necessary to call this more than once because a handler can
- // be pushed on the stack as a result of clearing the stack.
- while (!Num.HandlerStack.empty())
- Num.processCallSite(None, ImmutableCallSite());
+void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
+ MCSymbol *InvokeBegin,
+ MCSymbol *InvokeEnd) {
+ assert(InvokeStateMap.count(II) &&
+ "should get invoke with precomputed state");
+ LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
}
+
+WinEHFuncInfo::WinEHFuncInfo() {}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
new file mode 100644
index 0000000..91b71cc
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
@@ -0,0 +1,165 @@
+//===-- FieldListRecordBuilder.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+FieldListRecordBuilder::FieldListRecordBuilder()
+ : ListRecordBuilder(TypeRecordKind::FieldList) {}
+
+void FieldListRecordBuilder::writeBaseClass(MemberAccess Access, TypeIndex Type,
+ uint64_t Offset) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::BaseClass);
+ Builder.writeUInt16(static_cast<uint16_t>(Access));
+ Builder.writeTypeIndex(Type);
+ Builder.writeEncodedUnsignedInteger(Offset);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeEnumerate(MemberAccess Access, uint64_t Value,
+ StringRef Name) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::Enumerate);
+ Builder.writeUInt16(static_cast<uint16_t>(Access));
+ Builder.writeEncodedUnsignedInteger(Value);
+ Builder.writeNullTerminatedString(Name);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeMember(MemberAccess Access, TypeIndex Type,
+ uint64_t Offset, StringRef Name) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::Member);
+ Builder.writeUInt16(static_cast<uint16_t>(Access));
+ Builder.writeTypeIndex(Type);
+ Builder.writeEncodedUnsignedInteger(Offset);
+ Builder.writeNullTerminatedString(Name);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeMethod(uint16_t OverloadCount,
+ TypeIndex MethodList, StringRef Name) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::Method);
+ Builder.writeUInt16(OverloadCount);
+ Builder.writeTypeIndex(MethodList);
+ Builder.writeNullTerminatedString(Name);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeOneMethod(
+ MemberAccess Access, MethodKind Kind, MethodOptions Options, TypeIndex Type,
+ int32_t VTableSlotOffset, StringRef Name) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ uint16_t Flags = static_cast<uint16_t>(Access);
+ Flags |= static_cast<uint16_t>(Kind) << MethodKindShift;
+ Flags |= static_cast<uint16_t>(Options);
+
+ Builder.writeTypeRecordKind(TypeRecordKind::OneMethod);
+ Builder.writeUInt16(Flags);
+ Builder.writeTypeIndex(Type);
+ switch (Kind) {
+ case MethodKind::IntroducingVirtual:
+ case MethodKind::PureIntroducingVirtual:
+ assert(VTableSlotOffset >= 0);
+ Builder.writeInt32(VTableSlotOffset);
+ break;
+
+ default:
+ assert(VTableSlotOffset == -1);
+ break;
+ }
+
+ Builder.writeNullTerminatedString(Name);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeOneMethod(const MethodInfo &Method,
+ StringRef Name) {
+ writeOneMethod(Method.getAccess(), Method.getKind(), Method.getOptions(),
+ Method.getType(), Method.getVTableSlotOffset(), Name);
+}
+
+void FieldListRecordBuilder::writeNestedType(TypeIndex Type, StringRef Name) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::NestedType);
+ Builder.writeUInt16(0);
+ Builder.writeTypeIndex(Type);
+ Builder.writeNullTerminatedString(Name);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeStaticMember(MemberAccess Access,
+ TypeIndex Type, StringRef Name) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::StaticMember);
+ Builder.writeUInt16(static_cast<uint16_t>(Access));
+ Builder.writeTypeIndex(Type);
+ Builder.writeNullTerminatedString(Name);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeIndirectVirtualBaseClass(
+ MemberAccess Access, TypeIndex Type, TypeIndex VirtualBasePointerType,
+ int64_t VirtualBasePointerOffset, uint64_t SlotIndex) {
+ writeVirtualBaseClass(TypeRecordKind::IndirectVirtualBaseClass, Access, Type,
+ VirtualBasePointerType, VirtualBasePointerOffset,
+ SlotIndex);
+}
+
+void FieldListRecordBuilder::writeVirtualBaseClass(
+ MemberAccess Access, TypeIndex Type, TypeIndex VirtualBasePointerType,
+ int64_t VirtualBasePointerOffset, uint64_t SlotIndex) {
+ writeVirtualBaseClass(TypeRecordKind::VirtualBaseClass, Access, Type,
+ VirtualBasePointerType, VirtualBasePointerOffset,
+ SlotIndex);
+}
+
+void FieldListRecordBuilder::writeVirtualBaseClass(
+ TypeRecordKind Kind, MemberAccess Access, TypeIndex Type,
+ TypeIndex VirtualBasePointerType, int64_t VirtualBasePointerOffset,
+ uint64_t SlotIndex) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(Kind);
+ Builder.writeUInt16(static_cast<uint16_t>(Access));
+ Builder.writeTypeIndex(Type);
+ Builder.writeTypeIndex(VirtualBasePointerType);
+ Builder.writeEncodedInteger(VirtualBasePointerOffset);
+ Builder.writeEncodedUnsignedInteger(SlotIndex);
+
+ finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeVirtualFunctionTablePointer(TypeIndex Type) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ Builder.writeTypeRecordKind(TypeRecordKind::VirtualFunctionTablePointer);
+ Builder.writeUInt16(0);
+ Builder.writeTypeIndex(Type);
+
+ finishSubRecord();
+} \ No newline at end of file
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/Line.cpp b/contrib/llvm/lib/DebugInfo/CodeView/Line.cpp
new file mode 100644
index 0000000..4cb766b
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/Line.cpp
@@ -0,0 +1,22 @@
+//===-- Line.cpp ----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/Line.h"
+
+using namespace llvm;
+using namespace codeview;
+
+LineInfo::LineInfo(uint32_t StartLine, uint32_t EndLine, bool IsStatement) {
+ LineData = StartLine & StartLineMask;
+ uint32_t LineDelta = EndLine - StartLine;
+ LineData |= (LineDelta << EndLineDeltaShift) & EndLineDeltaMask;
+ if (IsStatement) {
+ LineData |= StatementFlag;
+ }
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp
new file mode 100644
index 0000000..69c7e87
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp
@@ -0,0 +1,31 @@
+//===-- ListRecordBuilder.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+ListRecordBuilder::ListRecordBuilder(TypeRecordKind Kind) : Builder(Kind) {}
+
+void ListRecordBuilder::finishSubRecord() {
+ // The builder starts at offset 2 in the actual CodeView buffer, so add an
+ // additional offset of 2 before computing the alignment.
+ uint32_t Remainder = (Builder.size() + 2) % 4;
+ if (Remainder != 0) {
+ for (int32_t PaddingBytesLeft = 4 - Remainder; PaddingBytesLeft > 0;
+ --PaddingBytesLeft) {
+ Builder.writeUInt8(0xf0 + PaddingBytesLeft);
+ }
+ }
+
+ // TODO: Split the list into multiple records if it's longer than 64KB, using
+ // a subrecord of TypeRecordKind::Index to chain the records together.
+ assert(Builder.size() < 65536);
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
new file mode 100644
index 0000000..9afce92
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
@@ -0,0 +1,35 @@
+//===-- MemoryTypeTableBuilder.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+using namespace llvm;
+using namespace codeview;
+
+MemoryTypeTableBuilder::Record::Record(StringRef RData)
+ : Size(RData.size()), Data(new char[RData.size()]) {
+ memcpy(Data.get(), RData.data(), RData.size());
+}
+
+TypeIndex MemoryTypeTableBuilder::writeRecord(StringRef Data) {
+ auto I = HashedRecords.find(Data);
+ if (I != HashedRecords.end()) {
+ return I->second;
+ }
+
+ std::unique_ptr<Record> R(new Record(Data));
+
+ TypeIndex TI(static_cast<uint32_t>(Records.size()) +
+ TypeIndex::FirstNonSimpleIndex);
+ HashedRecords.insert(std::make_pair(StringRef(R->data(), R->size()), TI));
+ Records.push_back(std::move(R));
+
+ return TI;
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
new file mode 100644
index 0000000..8893025
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
@@ -0,0 +1,49 @@
+//===-- MethodListRecordBuilder.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+MethodListRecordBuilder::MethodListRecordBuilder()
+ : ListRecordBuilder(TypeRecordKind::MethodList) {}
+
+void MethodListRecordBuilder::writeMethod(MemberAccess Access, MethodKind Kind,
+ MethodOptions Options, TypeIndex Type,
+ int32_t VTableSlotOffset) {
+ TypeRecordBuilder &Builder = getBuilder();
+
+ uint16_t Flags = static_cast<uint16_t>(Access);
+ Flags |= static_cast<uint16_t>(Kind) << MethodKindShift;
+ Flags |= static_cast<uint16_t>(Options);
+
+ Builder.writeUInt16(Flags);
+ Builder.writeUInt16(0);
+ Builder.writeTypeIndex(Type);
+ switch (Kind) {
+ case MethodKind::IntroducingVirtual:
+ case MethodKind::PureIntroducingVirtual:
+ assert(VTableSlotOffset >= 0);
+ Builder.writeInt32(VTableSlotOffset);
+ break;
+
+ default:
+ assert(VTableSlotOffset == -1);
+ break;
+ }
+
+ // TODO: Fail if too big?
+}
+
+void MethodListRecordBuilder::writeMethod(const MethodInfo &Method) {
+ writeMethod(Method.getAccess(), Method.getKind(), Method.getOptions(),
+ Method.getType(), Method.getVTableSlotOffset());
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
new file mode 100644
index 0000000..cbf464f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
@@ -0,0 +1,113 @@
+//===-- TypeRecordBuilder.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+TypeRecordBuilder::TypeRecordBuilder(TypeRecordKind Kind) : Stream(Buffer),
+ Writer(Stream) {
+ writeTypeRecordKind(Kind);
+}
+
+StringRef TypeRecordBuilder::str() {
+ return StringRef(Buffer.data(), Buffer.size());
+}
+
+void TypeRecordBuilder::writeUInt8(uint8_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeInt16(int16_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeUInt16(uint16_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeInt32(int32_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeUInt32(uint32_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeInt64(int64_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeUInt64(uint64_t Value) {
+ Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeEncodedInteger(int64_t Value) {
+ if (Value >= 0) {
+ writeEncodedUnsignedInteger(static_cast<uint64_t>(Value));
+ } else {
+ writeEncodedSignedInteger(Value);
+ }
+}
+
+void TypeRecordBuilder::writeEncodedSignedInteger(int64_t Value) {
+ if (Value >= std::numeric_limits<int8_t>::min() &&
+ Value <= std::numeric_limits<int8_t>::max()) {
+ writeUInt16(static_cast<uint16_t>(TypeRecordKind::SByte));
+ writeInt16(static_cast<int8_t>(Value));
+ } else if (Value >= std::numeric_limits<int16_t>::min() &&
+ Value <= std::numeric_limits<int16_t>::max()) {
+ writeUInt16(static_cast<uint16_t>(TypeRecordKind::Int16));
+ writeInt16(static_cast<int16_t>(Value));
+ } else if (Value >= std::numeric_limits<int32_t>::min() &&
+ Value <= std::numeric_limits<int32_t>::max()) {
+ writeUInt16(static_cast<uint32_t>(TypeRecordKind::Int32));
+ writeInt32(static_cast<int32_t>(Value));
+ } else {
+ writeUInt16(static_cast<uint16_t>(TypeRecordKind::Int64));
+ writeInt64(Value);
+ }
+}
+
+void TypeRecordBuilder::writeEncodedUnsignedInteger(uint64_t Value) {
+ if (Value < static_cast<uint16_t>(TypeRecordKind::SByte)) {
+ writeUInt16(static_cast<uint16_t>(Value));
+ } else if (Value <= std::numeric_limits<uint16_t>::max()) {
+ writeUInt16(static_cast<uint16_t>(TypeRecordKind::UInt16));
+ writeUInt16(static_cast<uint16_t>(Value));
+ } else if (Value <= std::numeric_limits<uint32_t>::max()) {
+ writeUInt16(static_cast<uint16_t>(TypeRecordKind::UInt32));
+ writeUInt32(static_cast<uint32_t>(Value));
+ } else {
+ writeUInt16(static_cast<uint16_t>(TypeRecordKind::UInt64));
+ writeUInt64(Value);
+ }
+}
+
+void TypeRecordBuilder::writeNullTerminatedString(const char *Value) {
+ assert(Value != nullptr);
+
+ size_t Length = strlen(Value);
+ Stream.write(Value, Length);
+ writeUInt8(0);
+}
+
+void TypeRecordBuilder::writeNullTerminatedString(StringRef Value) {
+ Stream.write(Value.data(), Value.size());
+ writeUInt8(0);
+}
+
+void TypeRecordBuilder::writeTypeIndex(TypeIndex TypeInd) {
+ writeUInt32(TypeInd.getIndex());
+}
+
+void TypeRecordBuilder::writeTypeRecordKind(TypeRecordKind Kind) {
+ writeUInt16(static_cast<uint16_t>(Kind));
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
new file mode 100644
index 0000000..4af5dca
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
@@ -0,0 +1,217 @@
+//===-- TypeTableBuilder.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace codeview;
+
+namespace {
+
+const int PointerKindShift = 0;
+const int PointerModeShift = 5;
+const int PointerSizeShift = 13;
+
+const int ClassHfaKindShift = 11;
+const int ClassWindowsRTClassKindShift = 14;
+
+void writePointerBase(TypeRecordBuilder &Builder,
+ const PointerRecordBase &Record) {
+ Builder.writeTypeIndex(Record.getReferentType());
+ uint32_t flags =
+ static_cast<uint32_t>(Record.getOptions()) |
+ (Record.getSize() << PointerSizeShift) |
+ (static_cast<uint32_t>(Record.getMode()) << PointerModeShift) |
+ (static_cast<uint32_t>(Record.getPointerKind()) << PointerKindShift);
+ Builder.writeUInt32(flags);
+}
+}
+
+TypeTableBuilder::TypeTableBuilder() {}
+
+TypeTableBuilder::~TypeTableBuilder() {}
+
+TypeIndex TypeTableBuilder::writeModifier(const ModifierRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::Modifier);
+
+ Builder.writeTypeIndex(Record.getModifiedType());
+ Builder.writeUInt16(static_cast<uint16_t>(Record.getOptions()));
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeProcedure(const ProcedureRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::Procedure);
+
+ Builder.writeTypeIndex(Record.getReturnType());
+ Builder.writeUInt8(static_cast<uint8_t>(Record.getCallConv()));
+ Builder.writeUInt8(static_cast<uint8_t>(Record.getOptions()));
+ Builder.writeUInt16(Record.getParameterCount());
+ Builder.writeTypeIndex(Record.getArgumentList());
+
+ return writeRecord(Builder);
+}
+
+TypeIndex
+TypeTableBuilder::writeMemberFunction(const MemberFunctionRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::MemberFunction);
+
+ Builder.writeTypeIndex(Record.getReturnType());
+ Builder.writeTypeIndex(Record.getClassType());
+ Builder.writeTypeIndex(Record.getThisType());
+ Builder.writeUInt8(static_cast<uint8_t>(Record.getCallConv()));
+ Builder.writeUInt8(static_cast<uint8_t>(Record.getOptions()));
+ Builder.writeUInt16(Record.getParameterCount());
+ Builder.writeTypeIndex(Record.getArgumentList());
+ Builder.writeInt32(Record.getThisPointerAdjustment());
+
+ return writeRecord(Builder);
+}
+
+TypeIndex
+TypeTableBuilder::writeArgumentList(const ArgumentListRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::ArgumentList);
+
+ Builder.writeUInt32(Record.getArgumentTypes().size());
+ for (TypeIndex TI : Record.getArgumentTypes()) {
+ Builder.writeTypeIndex(TI);
+ }
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writePointer(const PointerRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::Pointer);
+
+ writePointerBase(Builder, Record);
+
+ return writeRecord(Builder);
+}
+
+TypeIndex
+TypeTableBuilder::writePointerToMember(const PointerToMemberRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::Pointer);
+
+ writePointerBase(Builder, Record);
+
+ Builder.writeTypeIndex(Record.getContainingType());
+ Builder.writeUInt16(static_cast<uint16_t>(Record.getRepresentation()));
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeArray(const ArrayRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::Array);
+
+ Builder.writeTypeIndex(Record.getElementType());
+ Builder.writeTypeIndex(Record.getIndexType());
+ Builder.writeEncodedUnsignedInteger(Record.getSize());
+ Builder.writeNullTerminatedString(Record.getName());
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeAggregate(const AggregateRecord &Record) {
+ assert((Record.getKind() == TypeRecordKind::Structure) ||
+ (Record.getKind() == TypeRecordKind::Class) ||
+ (Record.getKind() == TypeRecordKind::Union));
+
+ TypeRecordBuilder Builder(Record.getKind());
+
+ Builder.writeUInt16(Record.getMemberCount());
+ uint16_t Flags =
+ static_cast<uint16_t>(Record.getOptions()) |
+ (static_cast<uint16_t>(Record.getHfa()) << ClassHfaKindShift) |
+ (static_cast<uint16_t>(Record.getWinRTKind())
+ << ClassWindowsRTClassKindShift);
+ Builder.writeUInt16(Flags);
+ Builder.writeTypeIndex(Record.getFieldList());
+ if (Record.getKind() != TypeRecordKind::Union) {
+ Builder.writeTypeIndex(Record.getDerivationList());
+ Builder.writeTypeIndex(Record.getVTableShape());
+ } else {
+ assert(Record.getDerivationList() == TypeIndex());
+ assert(Record.getVTableShape() == TypeIndex());
+ }
+ Builder.writeEncodedUnsignedInteger(Record.getSize());
+ Builder.writeNullTerminatedString(Record.getName());
+ if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
+ ClassOptions::None) {
+ Builder.writeNullTerminatedString(Record.getUniqueName());
+ }
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeEnum(const EnumRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::Enum);
+
+ Builder.writeUInt16(Record.getMemberCount());
+ Builder.writeUInt16(static_cast<uint16_t>(Record.getOptions()));
+ Builder.writeTypeIndex(Record.getUnderlyingType());
+ Builder.writeTypeIndex(Record.getFieldList());
+ Builder.writeNullTerminatedString(Record.getName());
+ if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
+ ClassOptions::None) {
+ Builder.writeNullTerminatedString(Record.getUniqueName());
+ }
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeBitField(const BitFieldRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::BitField);
+
+ Builder.writeTypeIndex(Record.getType());
+ Builder.writeUInt8(Record.getBitSize());
+ Builder.writeUInt8(Record.getBitOffset());
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeVirtualTableShape(
+ const VirtualTableShapeRecord &Record) {
+ TypeRecordBuilder Builder(TypeRecordKind::VirtualTableShape);
+
+ ArrayRef<VirtualTableSlotKind> Slots = Record.getSlots();
+
+ Builder.writeUInt16(Slots.size());
+ for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
+ uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
+ if ((SlotIndex + 1) < Slots.size()) {
+ Byte |= static_cast<uint8_t>(Slots[SlotIndex + 1]);
+ }
+ Builder.writeUInt8(Byte);
+ }
+
+ return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeRecord(TypeRecordBuilder &Builder) {
+ return writeRecord(Builder.str());
+}
+
+TypeIndex TypeTableBuilder::writeFieldList(FieldListRecordBuilder &FieldList) {
+ // TODO: Split the list into multiple records if it's longer than 64KB, using
+ // a subrecord of TypeRecordKind::Index to chain the records together.
+ return writeRecord(FieldList.str());
+}
+
+TypeIndex
+TypeTableBuilder::writeMethodList(MethodListRecordBuilder &MethodList) {
+ // TODO: Split the list into multiple records if it's longer than 64KB, using
+ // a subrecord of TypeRecordKind::Index to chain the records together.
+ return writeRecord(MethodList.str());
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 96bcf15..a4195b7 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
@@ -126,6 +127,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
getDebugFrame()->dump(OS);
}
+ if (DumpType == DIDT_All || DumpType == DIDT_Macro) {
+ OS << "\n.debug_macinfo contents:\n";
+ getDebugMacro()->dump(OS);
+ }
+
uint32_t offset = 0;
if (DumpType == DIDT_All || DumpType == DIDT_Aranges) {
OS << "\n.debug_aranges contents:\n";
@@ -155,6 +161,16 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
}
}
+ if (DumpType == DIDT_All || DumpType == DIDT_CUIndex) {
+ OS << "\n.debug_cu_index contents:\n";
+ getCUIndex().dump(OS);
+ }
+
+ if (DumpType == DIDT_All || DumpType == DIDT_TUIndex) {
+ OS << "\n.debug_tu_index contents:\n";
+ getTUIndex().dump(OS);
+ }
+
if (DumpType == DIDT_All || DumpType == DIDT_LineDwo) {
OS << "\n.debug_line.dwo contents:\n";
unsigned stmtOffset = 0;
@@ -250,6 +266,28 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
getStringSection(), isLittleEndian());
}
+const DWARFUnitIndex &DWARFContext::getCUIndex() {
+ if (CUIndex)
+ return *CUIndex;
+
+ DataExtractor CUIndexData(getCUIndexSection(), isLittleEndian(), 0);
+
+ CUIndex = llvm::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
+ CUIndex->parse(CUIndexData);
+ return *CUIndex;
+}
+
+const DWARFUnitIndex &DWARFContext::getTUIndex() {
+ if (TUIndex)
+ return *TUIndex;
+
+ DataExtractor TUIndexData(getTUIndexSection(), isLittleEndian(), 0);
+
+ TUIndex = llvm::make_unique<DWARFUnitIndex>(DW_SECT_TYPES);
+ TUIndex->parse(TUIndexData);
+ return *TUIndex;
+}
+
const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
if (Abbrev)
return Abbrev.get();
@@ -322,24 +360,37 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
return DebugFrame.get();
}
+const DWARFDebugMacro *DWARFContext::getDebugMacro() {
+ if (Macro)
+ return Macro.get();
+
+ DataExtractor MacinfoData(getMacinfoSection(), isLittleEndian(), 0);
+ Macro.reset(new DWARFDebugMacro());
+ Macro->parse(MacinfoData);
+ return Macro.get();
+}
+
const DWARFLineTable *
DWARFContext::getLineTableForUnit(DWARFUnit *U) {
if (!Line)
Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
+
const auto *UnitDIE = U->getUnitDIE();
if (UnitDIE == nullptr)
return nullptr;
+
unsigned stmtOffset =
UnitDIE->getAttributeValueAsSectionOffset(U, DW_AT_stmt_list, -1U);
if (stmtOffset == -1U)
return nullptr; // No line table for this compile unit.
+ stmtOffset += U->getLineTableOffset();
// See if the line table is cached.
if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
return lt;
// We have to parse it first.
- DataExtractor lineData(getLineSection().Data, isLittleEndian(),
+ DataExtractor lineData(U->getLineSection(), isLittleEndian(),
U->getAddressByteSize());
return Line->getOrParseLineTable(lineData, stmtOffset);
}
@@ -556,10 +607,11 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
continue;
StringRef data;
+ section_iterator RelocatedSection = Section.getRelocatedSection();
// Try to obtain an already relocated version of this section.
// Else use the unrelocated section from the object file. We'll have to
// apply relocations ourselves later.
- if (!L || !L->getLoadedSectionContents(name,data))
+ if (!L || !L->getLoadedSectionContents(*RelocatedSection,data))
Section.getContents(data);
name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
@@ -591,6 +643,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
.Case("debug_frame", &DebugFrameSection)
.Case("debug_str", &StringSection)
.Case("debug_ranges", &RangeSection)
+ .Case("debug_macinfo", &MacinfoSection)
.Case("debug_pubnames", &PubNamesSection)
.Case("debug_pubtypes", &PubTypesSection)
.Case("debug_gnu_pubnames", &GnuPubNamesSection)
@@ -607,6 +660,8 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
.Case("apple_namespaces", &AppleNamespacesSection.Data)
.Case("apple_namespac", &AppleNamespacesSection.Data)
.Case("apple_objc", &AppleObjCSection.Data)
+ .Case("debug_cu_index", &CUIndexSection)
+ .Case("debug_tu_index", &TUIndexSection)
// Any more debug info sections go here.
.Default(nullptr);
if (SectionData) {
@@ -623,7 +678,6 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
TypesDWOSections[Section].Data = data;
}
- section_iterator RelocatedSection = Section.getRelocatedSection();
if (RelocatedSection == Obj.section_end())
continue;
@@ -634,7 +688,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
// If the section we're relocating was relocated already by the JIT,
// then we used the relocated version above, so we do not need to process
// relocations for it now.
- if (L && L->getLoadedSectionContents(RelSecName,RelSecData))
+ if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData))
+ continue;
+
+ // In Mach-o files, the relocations do not need to be applied if
+ // there is no load offset to apply. The value read at the
+ // relocation point already factors in the section address
+ // (actually applying the relocations will produce wrong results
+ // as the section address will be added twice).
+ if (!L && isa<MachOObjectFile>(&Obj))
continue;
RelSecName = RelSecName.substr(
@@ -685,13 +747,19 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
}
SymAddr = *SymAddrOrErr;
// Also remember what section this symbol is in for later
- Sym->getSection(RSec);
+ RSec = *Sym->getSection();
} else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
// MachO also has relocations that point to sections and
// scattered relocations.
- // FIXME: We are not handling scattered relocations, do we have to?
- RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
- SymAddr = RSec->getAddress();
+ auto RelocInfo = MObj->getRelocation(Reloc.getRawDataRefImpl());
+ if (MObj->isRelocationScattered(RelocInfo)) {
+ // FIXME: it's not clear how to correctly handle scattered
+ // relocations.
+ continue;
+ } else {
+ RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
+ SymAddr = RSec->getAddress();
+ }
}
// If we are given load addresses for the sections, we need to adjust:
@@ -699,12 +767,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
// (Address of Section in File) +
// (Load Address of Section)
if (L != nullptr && RSec != Obj.section_end()) {
- // RSec is now either the section being targetted or the section
- // containing the symbol being targetted. In either case,
+ // RSec is now either the section being targeted or the section
+ // containing the symbol being targeted. In either case,
// we need to perform the same computation.
StringRef SecName;
RSec->getName(SecName);
- SectionLoadAddress = L->getSectionLoadAddress(SecName);
+// llvm::dbgs() << "Name: '" << SecName
+// << "', RSec: " << RSec->getRawDataRefImpl()
+// << ", Section: " << Section.getRawDataRefImpl() << "\n";
+ SectionLoadAddress = L->getSectionLoadAddress(*RSec);
if (SectionLoadAddress != 0)
SymAddr += SectionLoadAddress - RSec->getAddress();
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 5abbde4..62d5e66 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -139,7 +139,7 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
std::string File;
auto Color = syntax::Enumerator;
if (attr == DW_AT_decl_file || attr == DW_AT_call_file) {
- Color = syntax::String;
+ Color = syntax::String;
if (const auto *LT = u->getContext().getLineTableForUnit(u))
if (LT->getFileNameByIndex(
formValue.getAsUnsignedConstant().getValue(),
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
new file mode 100644
index 0000000..b6555fa
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -0,0 +1,103 @@
+//===-- DWARFDebugMacro.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SyntaxHighlighting.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace dwarf;
+using namespace syntax;
+
+void DWARFDebugMacro::dump(raw_ostream &OS) const {
+ unsigned IndLevel = 0;
+ for (const Entry &E : Macros) {
+ // There should not be DW_MACINFO_end_file when IndLevel is Zero. However,
+ // this check handles the case of corrupted ".debug_macinfo" section.
+ if (IndLevel > 0)
+ IndLevel -= (E.Type == DW_MACINFO_end_file);
+ // Print indentation.
+ for (unsigned I = 0; I < IndLevel; I++)
+ OS << " ";
+ IndLevel += (E.Type == DW_MACINFO_start_file);
+
+ WithColor(OS, syntax::Macro).get() << MacinfoString(E.Type);
+ switch (E.Type) {
+ default:
+ // Got a corrupted ".debug_macinfo" section (invalid macinfo type).
+ break;
+ case DW_MACINFO_define:
+ case DW_MACINFO_undef:
+ OS << " - lineno: " << E.Line;
+ OS << " macro: " << E.MacroStr;
+ break;
+ case DW_MACINFO_start_file:
+ OS << " - lineno: " << E.Line;
+ OS << " filenum: " << E.File;
+ break;
+ case DW_MACINFO_end_file:
+ break;
+ case DW_MACINFO_vendor_ext:
+ OS << " - constant: " << E.ExtConstant;
+ OS << " string: " << E.ExtStr;
+ break;
+ }
+ OS << "\n";
+ }
+}
+
+void DWARFDebugMacro::parse(DataExtractor data) {
+ uint32_t Offset = 0;
+ while (data.isValidOffset(Offset)) {
+ // A macro list entry consists of:
+ Entry E;
+ // 1. Macinfo type
+ E.Type = data.getULEB128(&Offset);
+
+ if (E.Type == 0) {
+ // Reached end of ".debug_macinfo" section.
+ return;
+ }
+
+ switch (E.Type) {
+ default:
+ // Got a corrupted ".debug_macinfo" section (invalid macinfo type).
+ // Push the corrupted entry to the list and halt parsing.
+ E.Type = DW_MACINFO_invalid;
+ Macros.push_back(E);
+ return;
+ case DW_MACINFO_define:
+ case DW_MACINFO_undef:
+ // 2. Source line
+ E.Line = data.getULEB128(&Offset);
+ // 3. Macro string
+ E.MacroStr = data.getCStr(&Offset);
+ break;
+ case DW_MACINFO_start_file:
+ // 2. Source line
+ E.Line = data.getULEB128(&Offset);
+ // 3. Source file id
+ E.File = data.getULEB128(&Offset);
+ break;
+ case DW_MACINFO_end_file:
+ break;
+ case DW_MACINFO_vendor_ext:
+ // 2. Vendor extension constant
+ E.ExtConstant = data.getULEB128(&Offset);
+ // 3. Vendor extension string
+ E.ExtStr = data.getCStr(&Offset);
+ break;
+ }
+
+ Macros.push_back(E);
+ }
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 53a676e..3dc5842 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -18,7 +18,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <climits>
+#include <limits>
using namespace llvm;
using namespace dwarf;
using namespace syntax;
@@ -110,7 +110,7 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = {
bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
// First, check DWARF4 form classes.
- if (Form < ArrayRef<FormClass>(DWARF4FormClasses).size() &&
+ if (Form < makeArrayRef(DWARF4FormClasses).size() &&
DWARF4FormClasses[Form] == FC)
return true;
// Check more forms from DWARF4 and DWARF5 proposals.
@@ -261,6 +261,12 @@ DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr,
bool
DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
uint32_t *offset_ptr, const DWARFUnit *cu) {
+ return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(),
+ cu->getAddressByteSize());
+}
+bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
+ uint32_t *offset_ptr, uint16_t Version,
+ uint8_t AddrSize) {
bool indirect = false;
do {
switch (form) {
@@ -295,10 +301,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
// Compile unit address sized values
case DW_FORM_addr:
- *offset_ptr += cu->getAddressByteSize();
+ *offset_ptr += AddrSize;
return true;
case DW_FORM_ref_addr:
- *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+ *offset_ptr += getRefAddrSize(AddrSize, Version);
return true;
// 0 byte values - implied from the form.
@@ -565,7 +571,7 @@ Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
Optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) ||
- (Form == DW_FORM_udata && uint64_t(LLONG_MAX) < Value.uval))
+ (Form == DW_FORM_udata && uint64_t(std::numeric_limits<int64_t>::max()) < Value.uval))
return None;
switch (Form) {
case DW_FORM_data4:
@@ -584,6 +590,6 @@ Optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const {
if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc))
return None;
- return ArrayRef<uint8_t>(Value.data, Value.uval);
+ return makeArrayRef(Value.data, Value.uval);
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 348476d..92ca2d4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -14,29 +14,37 @@
#include "llvm/Support/Path.h"
#include <cstdio>
-using namespace llvm;
+namespace llvm {
using namespace dwarf;
void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(),
C.getStringSection(), StringRef(), C.getAddrSection(),
- C.isLittleEndian());
+ C.getLineSection().Data, C.isLittleEndian());
}
void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
- const DWARFSection &DWOSection) {
+ const DWARFSection &DWOSection,
+ DWARFUnitIndex *Index) {
parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(),
C.getStringDWOSection(), C.getStringOffsetDWOSection(),
- C.getAddrSection(), C.isLittleEndian());
+ C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian());
}
DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, bool LE,
- const DWARFUnitSectionBase &UnitSection)
+ StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+ const DWARFUnitSectionBase &UnitSection,
+ const DWARFUnitIndex::Entry *IndexEntry)
: Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
- StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
- isLittleEndian(LE), UnitSection(UnitSection) {
+ LineSection(LS), StringSection(SS), StringOffsetSection([&]() {
+ if (IndexEntry)
+ if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
+ return SOS.slice(C->Offset, C->Offset + C->Length);
+ return SOS;
+ }()),
+ AddrOffsetSection(AOS), isLittleEndian(LE), UnitSection(UnitSection),
+ IndexEntry(IndexEntry) {
clear();
}
@@ -69,6 +77,17 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
Length = debug_info.getU32(offset_ptr);
Version = debug_info.getU16(offset_ptr);
uint64_t AbbrOffset = debug_info.getU32(offset_ptr);
+ if (IndexEntry) {
+ if (AbbrOffset)
+ return false;
+ auto *UnitContrib = IndexEntry->getOffset();
+ if (!UnitContrib || UnitContrib->Length != (Length + 4))
+ return false;
+ auto *AbbrEntry = IndexEntry->getOffset(DW_SECT_ABBREV);
+ if (!AbbrEntry)
+ return false;
+ AbbrOffset = AbbrEntry->Offset;
+ }
AddrSize = debug_info.getU8(offset_ptr);
bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
@@ -375,3 +394,12 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address) {
return DWARFDebugInfoEntryInlinedChain();
return SubprogramDIE->getInlinedChainForAddress(ChainCU, Address);
}
+
+const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
+ DWARFSectionKind Kind) {
+ if (Kind == DW_SECT_INFO)
+ return Context.getCUIndex();
+ assert(Kind == DW_SECT_TYPES);
+ return Context.getTUIndex();
+}
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
new file mode 100644
index 0000000..96b3169
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -0,0 +1,168 @@
+//===-- DWARFUnitIndex.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+bool DWARFUnitIndex::Header::parse(DataExtractor IndexData,
+ uint32_t *OffsetPtr) {
+ if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16))
+ return false;
+ Version = IndexData.getU32(OffsetPtr);
+ NumColumns = IndexData.getU32(OffsetPtr);
+ NumUnits = IndexData.getU32(OffsetPtr);
+ NumBuckets = IndexData.getU32(OffsetPtr);
+ return Version <= 2;
+}
+
+void DWARFUnitIndex::Header::dump(raw_ostream &OS) const {
+ OS << format("version = %u slots = %u\n\n", Version, NumBuckets);
+}
+
+bool DWARFUnitIndex::parse(DataExtractor IndexData) {
+ bool b = parseImpl(IndexData);
+ if (!b) {
+ // Make sure we don't try to dump anything
+ Header.NumBuckets = 0;
+ // Release any partially initialized data.
+ ColumnKinds.reset();
+ Rows.reset();
+ }
+ return b;
+}
+
+bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) {
+ uint32_t Offset = 0;
+ if (!Header.parse(IndexData, &Offset))
+ return false;
+
+ if (!IndexData.isValidOffsetForDataOfSize(
+ Offset, Header.NumBuckets * (8 + 4) +
+ (2 * Header.NumUnits + 1) * 4 * Header.NumColumns))
+ return false;
+
+ Rows = llvm::make_unique<Entry[]>(Header.NumBuckets);
+ auto Contribs =
+ llvm::make_unique<Entry::SectionContribution *[]>(Header.NumUnits);
+ ColumnKinds = llvm::make_unique<DWARFSectionKind[]>(Header.NumColumns);
+
+ // Read Hash Table of Signatures
+ for (unsigned i = 0; i != Header.NumBuckets; ++i)
+ Rows[i].Signature = IndexData.getU64(&Offset);
+
+ // Read Parallel Table of Indexes
+ for (unsigned i = 0; i != Header.NumBuckets; ++i) {
+ auto Index = IndexData.getU32(&Offset);
+ if (!Index)
+ continue;
+ Rows[i].Index = this;
+ Rows[i].Contributions =
+ llvm::make_unique<Entry::SectionContribution[]>(Header.NumColumns);
+ Contribs[Index - 1] = Rows[i].Contributions.get();
+ }
+
+ // Read the Column Headers
+ for (unsigned i = 0; i != Header.NumColumns; ++i) {
+ ColumnKinds[i] = static_cast<DWARFSectionKind>(IndexData.getU32(&Offset));
+ if (ColumnKinds[i] == InfoColumnKind) {
+ if (InfoColumn != -1)
+ return false;
+ InfoColumn = i;
+ }
+ }
+
+ if (InfoColumn == -1)
+ return false;
+
+ // Read Table of Section Offsets
+ for (unsigned i = 0; i != Header.NumUnits; ++i) {
+ auto *Contrib = Contribs[i];
+ for (unsigned i = 0; i != Header.NumColumns; ++i)
+ Contrib[i].Offset = IndexData.getU32(&Offset);
+ }
+
+ // Read Table of Section Sizes
+ for (unsigned i = 0; i != Header.NumUnits; ++i) {
+ auto *Contrib = Contribs[i];
+ for (unsigned i = 0; i != Header.NumColumns; ++i)
+ Contrib[i].Length = IndexData.getU32(&Offset);
+ }
+
+ return true;
+}
+
+StringRef DWARFUnitIndex::getColumnHeader(DWARFSectionKind DS) {
+#define CASE(DS) \
+ case DW_SECT_##DS: \
+ return #DS;
+ switch (DS) {
+ CASE(INFO);
+ CASE(TYPES);
+ CASE(ABBREV);
+ CASE(LINE);
+ CASE(LOC);
+ CASE(STR_OFFSETS);
+ CASE(MACINFO);
+ CASE(MACRO);
+ }
+ llvm_unreachable("unknown DWARFSectionKind");
+}
+
+void DWARFUnitIndex::dump(raw_ostream &OS) const {
+ if (!Header.NumBuckets)
+ return;
+
+ Header.dump(OS);
+ OS << "Index Signature ";
+ for (unsigned i = 0; i != Header.NumColumns; ++i)
+ OS << ' ' << left_justify(getColumnHeader(ColumnKinds[i]), 24);
+ OS << "\n----- ------------------";
+ for (unsigned i = 0; i != Header.NumColumns; ++i)
+ OS << " ------------------------";
+ OS << '\n';
+ for (unsigned i = 0; i != Header.NumBuckets; ++i) {
+ auto &Row = Rows[i];
+ if (auto *Contribs = Row.Contributions.get()) {
+ OS << format("%5u 0x%016" PRIx64 " ", i + 1, Row.Signature);
+ for (unsigned i = 0; i != Header.NumColumns; ++i) {
+ auto &Contrib = Contribs[i];
+ OS << format("[0x%08x, 0x%08x) ", Contrib.Offset,
+ Contrib.Offset + Contrib.Length);
+ }
+ OS << '\n';
+ }
+ }
+}
+
+const DWARFUnitIndex::Entry::SectionContribution *
+DWARFUnitIndex::Entry::getOffset(DWARFSectionKind Sec) const {
+ uint32_t i = 0;
+ for (; i != Index->Header.NumColumns; ++i)
+ if (Index->ColumnKinds[i] == Sec)
+ return &Contributions[i];
+ return nullptr;
+}
+const DWARFUnitIndex::Entry::SectionContribution *
+DWARFUnitIndex::Entry::getOffset() const {
+ return &Contributions[Index->InfoColumn];
+}
+
+const DWARFUnitIndex::Entry *
+DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
+ for (uint32_t i = 0; i != Header.NumBuckets; ++i)
+ if (const auto &Contribs = Rows[i].Contributions)
+ if (Contribs[InfoColumn].Offset == Offset)
+ return &Rows[i];
+ return nullptr;
+}
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
index a6b4c65..4f561d0 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
@@ -27,6 +27,7 @@ WithColor::WithColor(llvm::raw_ostream &OS, enum HighlightColor Type) : OS(OS) {
case Tag: OS.changeColor(llvm::raw_ostream::BLUE); break;
case Attribute: OS.changeColor(llvm::raw_ostream::CYAN); break;
case Enumerator: OS.changeColor(llvm::raw_ostream::MAGENTA); break;
+ case Macro: OS.changeColor(llvm::raw_ostream::RED); break;
}
}
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
index 946a313..16e6835 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
+++ b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
@@ -17,7 +17,7 @@ namespace dwarf {
namespace syntax {
// Symbolic names for various syntax elements.
-enum HighlightColor { Address, String, Tag, Attribute, Enumerator };
+enum HighlightColor { Address, String, Tag, Attribute, Enumerator, Macro };
/// An RAII object that temporarily switches an output stream to a
/// specific color.
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
index 13201bb..613407e 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
@@ -31,7 +31,7 @@ PDB_ErrorCode llvm::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
PDB_ErrorCode llvm::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
std::unique_ptr<IPDBSession> &Session) {
-// Create the correct concrete instance type based on the value of Type.
+ // Create the correct concrete instance type based on the value of Type.
#if HAVE_DIA_SDK
return DIASession::createFromExe(Path, Session);
#endif
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
index 83f27c7..ca2ae66 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
@@ -21,24 +21,11 @@ using namespace llvm;
using namespace llvm::object;
PDBContext::PDBContext(const COFFObjectFile &Object,
- std::unique_ptr<IPDBSession> PDBSession,
- bool RelativeAddress)
+ std::unique_ptr<IPDBSession> PDBSession)
: DIContext(CK_PDB), Session(std::move(PDBSession)) {
- if (!RelativeAddress) {
- uint64_t ImageBase = 0;
- if (Object.is64()) {
- const pe32plus_header *Header = nullptr;
- Object.getPE32PlusHeader(Header);
- if (Header)
- ImageBase = Header->ImageBase;
- } else {
- const pe32_header *Header = nullptr;
- Object.getPE32Header(Header);
- if (Header)
- ImageBase = static_cast<uint64_t>(Header->ImageBase);
- }
- Session->setLoadAddress(ImageBase);
- }
+ ErrorOr<uint64_t> ImageBase = Object.getImageBase();
+ if (ImageBase)
+ Session->setLoadAddress(ImageBase.get());
}
void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType) {}
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
new file mode 100644
index 0000000..c6bfbc0
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -0,0 +1,69 @@
+//===- lib/DebugInfo/Symbolize/DIPrinter.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DIPrinter class, which is responsible for printing
+// structures defined in DebugInfo/DIContext.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
+
+#include "llvm/DebugInfo/DIContext.h"
+
+namespace llvm {
+namespace symbolize {
+
+// By default, DILineInfo contains "<invalid>" for function/filename it
+// cannot fetch. We replace it to "??" to make our output closer to addr2line.
+static const char kDILineInfoBadString[] = "<invalid>";
+static const char kBadString[] = "??";
+
+void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
+ if (PrintFunctionNames) {
+ std::string FunctionName = Info.FunctionName;
+ if (FunctionName == kDILineInfoBadString)
+ FunctionName = kBadString;
+
+ StringRef Delimiter = (PrintPretty == true) ? " at " : "\n";
+ StringRef Prefix = (PrintPretty && Inlined) ? " (inlined by) " : "";
+ OS << Prefix << FunctionName << Delimiter;
+ }
+ std::string Filename = Info.FileName;
+ if (Filename == kDILineInfoBadString)
+ Filename = kBadString;
+ OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+}
+
+DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) {
+ printName(Info, false);
+ return *this;
+}
+
+DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) {
+ uint32_t FramesNum = Info.getNumberOfFrames();
+ if (FramesNum == 0) {
+ printName(DILineInfo(), false);
+ return *this;
+ }
+ for (uint32_t i = 0; i < FramesNum; i++)
+ printName(Info.getFrame(i), i > 0);
+ return *this;
+}
+
+DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) {
+ std::string Name = Global.Name;
+ if (Name == kDILineInfoBadString)
+ Name = kBadString;
+ OS << Name << "\n";
+ OS << Global.Start << " " << Global.Size << "\n";
+ return *this;
+}
+
+}
+}
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
new file mode 100644
index 0000000..e314624
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -0,0 +1,254 @@
+//===-- SymbolizableObjectFile.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of SymbolizableObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolizableObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+
+namespace llvm {
+namespace symbolize {
+
+using namespace object;
+
+static DILineInfoSpecifier
+getDILineInfoSpecifier(FunctionNameKind FNKind) {
+ return DILineInfoSpecifier(
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
+}
+
+ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
+SymbolizableObjectFile::create(object::ObjectFile *Obj,
+ std::unique_ptr<DIContext> DICtx) {
+ std::unique_ptr<SymbolizableObjectFile> res(
+ new SymbolizableObjectFile(Obj, std::move(DICtx)));
+ std::unique_ptr<DataExtractor> OpdExtractor;
+ uint64_t OpdAddress = 0;
+ // Find the .opd (function descriptor) section if any, for big-endian
+ // PowerPC64 ELF.
+ if (Obj->getArch() == Triple::ppc64) {
+ for (section_iterator Section : Obj->sections()) {
+ StringRef Name;
+ StringRef Data;
+ if (auto EC = Section->getName(Name))
+ return EC;
+ if (Name == ".opd") {
+ if (auto EC = Section->getContents(Data))
+ return EC;
+ OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(),
+ Obj->getBytesInAddress()));
+ OpdAddress = Section->getAddress();
+ break;
+ }
+ }
+ }
+ std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
+ computeSymbolSizes(*Obj);
+ for (auto &P : Symbols)
+ res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
+
+ // If this is a COFF object and we didn't find any symbols, try the export
+ // table.
+ if (Symbols.empty()) {
+ if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
+ if (auto EC = res->addCoffExportSymbols(CoffObj))
+ return EC;
+ }
+ return std::move(res);
+}
+
+SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
+ std::unique_ptr<DIContext> DICtx)
+ : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
+
+namespace {
+struct OffsetNamePair {
+ uint32_t Offset;
+ StringRef Name;
+ bool operator<(const OffsetNamePair &R) const {
+ return Offset < R.Offset;
+ }
+};
+}
+
+std::error_code SymbolizableObjectFile::addCoffExportSymbols(
+ const COFFObjectFile *CoffObj) {
+ // Get all export names and offsets.
+ std::vector<OffsetNamePair> ExportSyms;
+ for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
+ StringRef Name;
+ uint32_t Offset;
+ if (auto EC = Ref.getSymbolName(Name))
+ return EC;
+ if (auto EC = Ref.getExportRVA(Offset))
+ return EC;
+ ExportSyms.push_back(OffsetNamePair{Offset, Name});
+ }
+ if (ExportSyms.empty())
+ return std::error_code();
+
+ // Sort by ascending offset.
+ array_pod_sort(ExportSyms.begin(), ExportSyms.end());
+
+ // Approximate the symbol sizes by assuming they run to the next symbol.
+ // FIXME: This assumes all exports are functions.
+ uint64_t ImageBase = CoffObj->getImageBase();
+ for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
+ OffsetNamePair &Export = *I;
+ // FIXME: The last export has a one byte size now.
+ uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
+ uint64_t SymbolStart = ImageBase + Export.Offset;
+ uint64_t SymbolSize = NextOffset - Export.Offset;
+ SymbolDesc SD = {SymbolStart, SymbolSize};
+ Functions.insert(std::make_pair(SD, Export.Name));
+ }
+ return std::error_code();
+}
+
+std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
+ uint64_t SymbolSize,
+ DataExtractor *OpdExtractor,
+ uint64_t OpdAddress) {
+ SymbolRef::Type SymbolType = Symbol.getType();
+ if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
+ return std::error_code();
+ ErrorOr<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
+ if (auto EC = SymbolAddressOrErr.getError())
+ return EC;
+ uint64_t SymbolAddress = *SymbolAddressOrErr;
+ if (OpdExtractor) {
+ // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
+ // function descriptors. The first word of the descriptor is a pointer to
+ // the function's code.
+ // For the purposes of symbolization, pretend the symbol's address is that
+ // of the function's code, not the descriptor.
+ uint64_t OpdOffset = SymbolAddress - OpdAddress;
+ uint32_t OpdOffset32 = OpdOffset;
+ if (OpdOffset == OpdOffset32 &&
+ OpdExtractor->isValidOffsetForAddress(OpdOffset32))
+ SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
+ }
+ ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName();
+ if (auto EC = SymbolNameOrErr.getError())
+ return EC;
+ StringRef SymbolName = *SymbolNameOrErr;
+ // Mach-O symbol table names have leading underscore, skip it.
+ if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
+ SymbolName = SymbolName.drop_front();
+ // FIXME: If a function has alias, there are two entries in symbol table
+ // with same address size. Make sure we choose the correct one.
+ auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
+ SymbolDesc SD = { SymbolAddress, SymbolSize };
+ M.insert(std::make_pair(SD, SymbolName));
+ return std::error_code();
+}
+
+// Return true if this is a 32-bit x86 PE COFF module.
+bool SymbolizableObjectFile::isWin32Module() const {
+ auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
+ return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
+ if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
+ return CoffObject->getImageBase();
+ return 0;
+}
+
+bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
+ uint64_t Address,
+ std::string &Name,
+ uint64_t &Addr,
+ uint64_t &Size) const {
+ const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
+ if (SymbolMap.empty())
+ return false;
+ SymbolDesc SD = { Address, Address };
+ auto SymbolIterator = SymbolMap.upper_bound(SD);
+ if (SymbolIterator == SymbolMap.begin())
+ return false;
+ --SymbolIterator;
+ if (SymbolIterator->first.Size != 0 &&
+ SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
+ return false;
+ Name = SymbolIterator->second.str();
+ Addr = SymbolIterator->first.Addr;
+ Size = SymbolIterator->first.Size;
+ return true;
+}
+
+bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
+ FunctionNameKind FNKind, bool UseSymbolTable) const {
+ // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
+ // better answers for linkage names than the DIContext. Otherwise, we are
+ // probably using PEs and PDBs, and we shouldn't do the override. PE files
+ // generally only contain the names of exported symbols.
+ return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
+ isa<DWARFContext>(DebugInfoContext.get());
+}
+
+DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
+ FunctionNameKind FNKind,
+ bool UseSymbolTable) const {
+ DILineInfo LineInfo;
+ if (DebugInfoContext) {
+ LineInfo = DebugInfoContext->getLineInfoForAddress(
+ ModuleOffset, getDILineInfoSpecifier(FNKind));
+ }
+ // Override function name from symbol table if necessary.
+ if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
+ std::string FunctionName;
+ uint64_t Start, Size;
+ if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+ FunctionName, Start, Size)) {
+ LineInfo.FunctionName = FunctionName;
+ }
+ }
+ return LineInfo;
+}
+
+DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
+ uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const {
+ DIInliningInfo InlinedContext;
+
+ if (DebugInfoContext)
+ InlinedContext = DebugInfoContext->getInliningInfoForAddress(
+ ModuleOffset, getDILineInfoSpecifier(FNKind));
+ // Make sure there is at least one frame in context.
+ if (InlinedContext.getNumberOfFrames() == 0)
+ InlinedContext.addFrame(DILineInfo());
+
+ // Override the function name in lower frame with name from symbol table.
+ if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
+ std::string FunctionName;
+ uint64_t Start, Size;
+ if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+ FunctionName, Start, Size)) {
+ InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
+ ->FunctionName = FunctionName;
+ }
+ }
+
+ return InlinedContext;
+}
+
+DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
+ DIGlobal Res;
+ getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start,
+ Res.Size);
+ return Res;
+}
+
+} // namespace symbolize
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
new file mode 100644
index 0000000..8583b6a
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -0,0 +1,82 @@
+//===-- SymbolizableObjectFile.h -------------------------------- C++ -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolizableObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include <map>
+
+namespace llvm {
+class DataExtractor;
+}
+
+namespace llvm {
+namespace symbolize {
+
+class SymbolizableObjectFile : public SymbolizableModule {
+public:
+ static ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
+ create(object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
+
+ DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind,
+ bool UseSymbolTable) const override;
+ DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
+ FunctionNameKind FNKind,
+ bool UseSymbolTable) const override;
+ DIGlobal symbolizeData(uint64_t ModuleOffset) const override;
+
+ // Return true if this is a 32-bit x86 PE COFF module.
+ bool isWin32Module() const override;
+
+ // Returns the preferred base of the module, i.e. where the loader would place
+ // it in memory assuming there were no conflicts.
+ uint64_t getModulePreferredBase() const override;
+
+private:
+ bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind,
+ bool UseSymbolTable) const;
+
+ bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address,
+ std::string &Name, uint64_t &Addr,
+ uint64_t &Size) const;
+ // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
+ // (function descriptor) section and OpdExtractor refers to its contents.
+ std::error_code addSymbol(const object::SymbolRef &Symbol,
+ uint64_t SymbolSize,
+ DataExtractor *OpdExtractor = nullptr,
+ uint64_t OpdAddress = 0);
+ std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj);
+
+ object::ObjectFile *Module;
+ std::unique_ptr<DIContext> DebugInfoContext;
+
+ struct SymbolDesc {
+ uint64_t Addr;
+ // If size is 0, assume that symbol occupies the whole memory range up to
+ // the following symbol.
+ uint64_t Size;
+ friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
+ return s1.Addr < s2.Addr;
+ }
+ };
+ std::map<SymbolDesc, StringRef> Functions;
+ std::map<SymbolDesc, StringRef> Objects;
+
+ SymbolizableObjectFile(object::ObjectFile *Obj,
+ std::unique_ptr<DIContext> DICtx);
+};
+
+} // namespace symbolize
+} // namespace llvm
+
+#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
new file mode 100644
index 0000000..3da1963
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -0,0 +1,456 @@
+//===-- LLVMSymbolize.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation for LLVM symbolization library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+
+#include "SymbolizableObjectFile.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/DebugInfo/PDB/PDBContext.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include <stdlib.h>
+
+#if defined(_MSC_VER)
+#include <Windows.h>
+#include <DbgHelp.h>
+#pragma comment(lib, "dbghelp.lib")
+
+// Windows.h conflicts with our COFF header definitions.
+#ifdef IMAGE_FILE_MACHINE_I386
+#undef IMAGE_FILE_MACHINE_I386
+#endif
+#endif
+
+namespace llvm {
+namespace symbolize {
+
+ErrorOr<DILineInfo> LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
+ uint64_t ModuleOffset) {
+ auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
+ if (auto EC = InfoOrErr.getError())
+ return EC;
+ SymbolizableModule *Info = InfoOrErr.get();
+
+ // If the user is giving us relative addresses, add the preferred base of the
+ // object to the offset before we do the query. It's what DIContext expects.
+ if (Opts.RelativeAddresses)
+ ModuleOffset += Info->getModulePreferredBase();
+
+ DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
+ Opts.UseSymbolTable);
+ if (Opts.Demangle)
+ LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
+ return LineInfo;
+}
+
+ErrorOr<DIInliningInfo>
+LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
+ uint64_t ModuleOffset) {
+ auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
+ if (auto EC = InfoOrErr.getError())
+ return EC;
+ SymbolizableModule *Info = InfoOrErr.get();
+
+ // If the user is giving us relative addresses, add the preferred base of the
+ // object to the offset before we do the query. It's what DIContext expects.
+ if (Opts.RelativeAddresses)
+ ModuleOffset += Info->getModulePreferredBase();
+
+ DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
+ ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
+ if (Opts.Demangle) {
+ for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
+ auto *Frame = InlinedContext.getMutableFrame(i);
+ Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
+ }
+ }
+ return InlinedContext;
+}
+
+ErrorOr<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
+ uint64_t ModuleOffset) {
+ auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
+ if (auto EC = InfoOrErr.getError())
+ return EC;
+ SymbolizableModule *Info = InfoOrErr.get();
+
+ // If the user is giving us relative addresses, add the preferred base of
+ // the object to the offset before we do the query. It's what DIContext
+ // expects.
+ if (Opts.RelativeAddresses)
+ ModuleOffset += Info->getModulePreferredBase();
+
+ DIGlobal Global = Info->symbolizeData(ModuleOffset);
+ if (Opts.Demangle)
+ Global.Name = DemangleName(Global.Name, Info);
+ return Global;
+}
+
+void LLVMSymbolizer::flush() {
+ ObjectForUBPathAndArch.clear();
+ BinaryForPath.clear();
+ ObjectPairForPathArch.clear();
+ Modules.clear();
+}
+
+// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
+// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
+// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
+// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
+static
+std::string getDarwinDWARFResourceForPath(
+ const std::string &Path, const std::string &Basename) {
+ SmallString<16> ResourceName = StringRef(Path);
+ if (sys::path::extension(Path) != ".dSYM") {
+ ResourceName += ".dSYM";
+ }
+ sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
+ sys::path::append(ResourceName, Basename);
+ return ResourceName.str();
+}
+
+static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileOrSTDIN(Path);
+ if (!MB)
+ return false;
+ return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
+}
+
+static bool findDebugBinary(const std::string &OrigPath,
+ const std::string &DebuglinkName, uint32_t CRCHash,
+ std::string &Result) {
+ std::string OrigRealPath = OrigPath;
+#if defined(HAVE_REALPATH)
+ if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
+ OrigRealPath = RP;
+ free(RP);
+ }
+#endif
+ SmallString<16> OrigDir(OrigRealPath);
+ llvm::sys::path::remove_filename(OrigDir);
+ SmallString<16> DebugPath = OrigDir;
+ // Try /path/to/original_binary/debuglink_name
+ llvm::sys::path::append(DebugPath, DebuglinkName);
+ if (checkFileCRC(DebugPath, CRCHash)) {
+ Result = DebugPath.str();
+ return true;
+ }
+ // Try /path/to/original_binary/.debug/debuglink_name
+ DebugPath = OrigRealPath;
+ llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
+ if (checkFileCRC(DebugPath, CRCHash)) {
+ Result = DebugPath.str();
+ return true;
+ }
+ // Try /usr/lib/debug/path/to/original_binary/debuglink_name
+ DebugPath = "/usr/lib/debug";
+ llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
+ DebuglinkName);
+ if (checkFileCRC(DebugPath, CRCHash)) {
+ Result = DebugPath.str();
+ return true;
+ }
+ return false;
+}
+
+static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
+ uint32_t &CRCHash) {
+ if (!Obj)
+ return false;
+ for (const SectionRef &Section : Obj->sections()) {
+ StringRef Name;
+ Section.getName(Name);
+ Name = Name.substr(Name.find_first_not_of("._"));
+ if (Name == "gnu_debuglink") {
+ StringRef Data;
+ Section.getContents(Data);
+ DataExtractor DE(Data, Obj->isLittleEndian(), 0);
+ uint32_t Offset = 0;
+ if (const char *DebugNameStr = DE.getCStr(&Offset)) {
+ // 4-byte align the offset.
+ Offset = (Offset + 3) & ~0x3;
+ if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
+ DebugName = DebugNameStr;
+ CRCHash = DE.getU32(&Offset);
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+static
+bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
+ const MachOObjectFile *Obj) {
+ ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
+ ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
+ if (dbg_uuid.empty() || bin_uuid.empty())
+ return false;
+ return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
+}
+
+ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
+ const MachOObjectFile *MachExeObj, const std::string &ArchName) {
+ // On Darwin we may find DWARF in separate object file in
+ // resource directory.
+ std::vector<std::string> DsymPaths;
+ StringRef Filename = sys::path::filename(ExePath);
+ DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
+ for (const auto &Path : Opts.DsymHints) {
+ DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
+ }
+ for (const auto &Path : DsymPaths) {
+ auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
+ if (!DbgObjOrErr)
+ continue;
+ ObjectFile *DbgObj = DbgObjOrErr.get();
+ const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
+ if (!MachDbgObj)
+ continue;
+ if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
+ return DbgObj;
+ }
+ return nullptr;
+}
+
+ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
+ const ObjectFile *Obj,
+ const std::string &ArchName) {
+ std::string DebuglinkName;
+ uint32_t CRCHash;
+ std::string DebugBinaryPath;
+ if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
+ return nullptr;
+ if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
+ return nullptr;
+ auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
+ if (!DbgObjOrErr)
+ return nullptr;
+ return DbgObjOrErr.get();
+}
+
+ErrorOr<LLVMSymbolizer::ObjectPair>
+LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
+ const std::string &ArchName) {
+ const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
+ if (I != ObjectPairForPathArch.end())
+ return I->second;
+
+ auto ObjOrErr = getOrCreateObject(Path, ArchName);
+ if (auto EC = ObjOrErr.getError()) {
+ ObjectPairForPathArch.insert(
+ std::make_pair(std::make_pair(Path, ArchName), EC));
+ return EC;
+ }
+
+ ObjectFile *Obj = ObjOrErr.get();
+ assert(Obj != nullptr);
+ ObjectFile *DbgObj = nullptr;
+
+ if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
+ DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
+ if (!DbgObj)
+ DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
+ if (!DbgObj)
+ DbgObj = Obj;
+ ObjectPair Res = std::make_pair(Obj, DbgObj);
+ ObjectPairForPathArch.insert(
+ std::make_pair(std::make_pair(Path, ArchName), Res));
+ return Res;
+}
+
+ErrorOr<ObjectFile *>
+LLVMSymbolizer::getOrCreateObject(const std::string &Path,
+ const std::string &ArchName) {
+ const auto &I = BinaryForPath.find(Path);
+ Binary *Bin = nullptr;
+ if (I == BinaryForPath.end()) {
+ ErrorOr<OwningBinary<Binary>> BinOrErr = createBinary(Path);
+ if (auto EC = BinOrErr.getError()) {
+ BinaryForPath.insert(std::make_pair(Path, EC));
+ return EC;
+ }
+ Bin = BinOrErr->getBinary();
+ BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
+ } else if (auto EC = I->second.getError()) {
+ return EC;
+ } else {
+ Bin = I->second->getBinary();
+ }
+
+ assert(Bin != nullptr);
+
+ if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
+ const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
+ if (I != ObjectForUBPathAndArch.end()) {
+ if (auto EC = I->second.getError())
+ return EC;
+ return I->second->get();
+ }
+ ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
+ UB->getObjectForArch(ArchName);
+ if (auto EC = ObjOrErr.getError()) {
+ ObjectForUBPathAndArch.insert(
+ std::make_pair(std::make_pair(Path, ArchName), EC));
+ return EC;
+ }
+ ObjectFile *Res = ObjOrErr->get();
+ ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
+ std::move(ObjOrErr.get())));
+ return Res;
+ }
+ if (Bin->isObject()) {
+ return cast<ObjectFile>(Bin);
+ }
+ return object_error::arch_not_found;
+}
+
+ErrorOr<SymbolizableModule *>
+LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
+ const auto &I = Modules.find(ModuleName);
+ if (I != Modules.end()) {
+ auto &InfoOrErr = I->second;
+ if (auto EC = InfoOrErr.getError())
+ return EC;
+ return InfoOrErr->get();
+ }
+ std::string BinaryName = ModuleName;
+ std::string ArchName = Opts.DefaultArch;
+ size_t ColonPos = ModuleName.find_last_of(':');
+ // Verify that substring after colon form a valid arch name.
+ if (ColonPos != std::string::npos) {
+ std::string ArchStr = ModuleName.substr(ColonPos + 1);
+ if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
+ BinaryName = ModuleName.substr(0, ColonPos);
+ ArchName = ArchStr;
+ }
+ }
+ auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
+ if (auto EC = ObjectsOrErr.getError()) {
+ // Failed to find valid object file.
+ Modules.insert(std::make_pair(ModuleName, EC));
+ return EC;
+ }
+ ObjectPair Objects = ObjectsOrErr.get();
+
+ std::unique_ptr<DIContext> Context;
+ if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
+ // If this is a COFF object, assume it contains PDB debug information. If
+ // we don't find any we will fall back to the DWARF case.
+ std::unique_ptr<IPDBSession> Session;
+ PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA,
+ Objects.first->getFileName(), Session);
+ if (Error == PDB_ErrorCode::Success) {
+ Context.reset(new PDBContext(*CoffObject, std::move(Session)));
+ }
+ }
+ if (!Context)
+ Context.reset(new DWARFContextInMemory(*Objects.second));
+ assert(Context);
+ auto InfoOrErr =
+ SymbolizableObjectFile::create(Objects.first, std::move(Context));
+ auto InsertResult =
+ Modules.insert(std::make_pair(ModuleName, std::move(InfoOrErr)));
+ assert(InsertResult.second);
+ if (auto EC = InsertResult.first->second.getError())
+ return EC;
+ return InsertResult.first->second->get();
+}
+
+// Undo these various manglings for Win32 extern "C" functions:
+// cdecl - _foo
+// stdcall - _foo@12
+// fastcall - @foo@12
+// vectorcall - foo@@12
+// These are all different linkage names for 'foo'.
+static StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
+ // Remove any '_' or '@' prefix.
+ char Front = SymbolName.empty() ? '\0' : SymbolName[0];
+ if (Front == '_' || Front == '@')
+ SymbolName = SymbolName.drop_front();
+
+ // Remove any '@[0-9]+' suffix.
+ if (Front != '?') {
+ size_t AtPos = SymbolName.rfind('@');
+ if (AtPos != StringRef::npos &&
+ std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
+ [](char C) { return C >= '0' && C <= '9'; })) {
+ SymbolName = SymbolName.substr(0, AtPos);
+ }
+ }
+
+ // Remove any ending '@' for vectorcall.
+ if (SymbolName.endswith("@"))
+ SymbolName = SymbolName.drop_back();
+
+ return SymbolName;
+}
+
+#if !defined(_MSC_VER)
+// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
+extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
+ size_t *length, int *status);
+#endif
+
+std::string LLVMSymbolizer::DemangleName(const std::string &Name,
+ const SymbolizableModule *ModInfo) {
+#if !defined(_MSC_VER)
+ // We can spoil names of symbols with C linkage, so use an heuristic
+ // approach to check if the name should be demangled.
+ if (Name.substr(0, 2) == "_Z") {
+ int status = 0;
+ char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
+ if (status != 0)
+ return Name;
+ std::string Result = DemangledName;
+ free(DemangledName);
+ return Result;
+ }
+#else
+ if (!Name.empty() && Name.front() == '?') {
+ // Only do MSVC C++ demangling on symbols starting with '?'.
+ char DemangledName[1024] = {0};
+ DWORD result = ::UnDecorateSymbolName(
+ Name.c_str(), DemangledName, 1023,
+ UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected
+ UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
+ UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications
+ UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
+ UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
+ UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
+ return (result == 0) ? Name : std::string(DemangledName);
+ }
+#endif
+ if (ModInfo && ModInfo->isWin32Module())
+ return std::string(demanglePE32ExternCFunc(Name));
+ return Name;
+}
+
+} // namespace symbolize
+} // namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 67a1ca6..41c8da4 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -61,8 +61,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
void JITEventListener::anchor() {}
-ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
- : LazyFunctionCreator(nullptr) {
+void ExecutionEngine::Init(std::unique_ptr<Module> M) {
CompilingLazily = false;
GVCompilationDisabled = false;
SymbolSearchingDisabled = false;
@@ -79,6 +78,16 @@ ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
Modules.push_back(std::move(M));
}
+ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
+ : DL(M->getDataLayout()), LazyFunctionCreator(nullptr) {
+ Init(std::move(M));
+}
+
+ExecutionEngine::ExecutionEngine(DataLayout DL, std::unique_ptr<Module> M)
+ : DL(std::move(DL)), LazyFunctionCreator(nullptr) {
+ Init(std::move(M));
+}
+
ExecutionEngine::~ExecutionEngine() {
clearAllGlobalMappings();
}
@@ -86,7 +95,7 @@ ExecutionEngine::~ExecutionEngine() {
namespace {
/// \brief Helper class which uses a value handler to automatically deletes the
/// memory block when the GlobalVariable is destroyed.
-class GVMemoryBlock : public CallbackVH {
+class GVMemoryBlock final : public CallbackVH {
GVMemoryBlock(const GlobalVariable *GV)
: CallbackVH(const_cast<GlobalVariable*>(GV)) {}
@@ -115,7 +124,7 @@ public:
} // anonymous namespace
char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
- return GVMemoryBlock::Create(GV, *getDataLayout());
+ return GVMemoryBlock::Create(GV, getDataLayout());
}
void ExecutionEngine::addObjectFile(std::unique_ptr<object::ObjectFile> O) {
@@ -187,7 +196,7 @@ std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
const DataLayout &DL =
GV->getParent()->getDataLayout().isDefault()
- ? *getDataLayout()
+ ? getDataLayout()
: GV->getParent()->getDataLayout();
Mangler::getNameWithPrefix(FullName, GV->getName(), DL);
@@ -228,11 +237,10 @@ void ExecutionEngine::clearAllGlobalMappings() {
void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
MutexGuard locked(lock);
- for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
- EEState.RemoveMapping(getMangledName(FI));
- for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
- GI != GE; ++GI)
- EEState.RemoveMapping(getMangledName(GI));
+ for (Function &FI : *M)
+ EEState.RemoveMapping(getMangledName(&FI));
+ for (GlobalVariable &GI : M->globals())
+ EEState.RemoveMapping(getMangledName(&GI));
}
uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV,
@@ -333,7 +341,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
const std::vector<std::string> &InputArgv) {
Values.clear(); // Free the old contents.
Values.reserve(InputArgv.size());
- unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+ unsigned PtrSize = EE->getDataLayout().getPointerSize();
Array = make_unique<char[]>((InputArgv.size()+1)*PtrSize);
DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array.get() << "\n");
@@ -408,7 +416,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
#ifndef NDEBUG
/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
- unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+ unsigned PtrSize = EE->getDataLayout().getPointerSize();
for (unsigned i = 0; i < PtrSize; ++i)
if (*(i + (uint8_t*)Loc))
return false;
@@ -621,8 +629,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
break;
case Type::VectorTyID:
// if the whole vector is 'undef' just reserve memory for the value.
- const VectorType* VTy = dyn_cast<VectorType>(C->getType());
- const Type *ElemTy = VTy->getElementType();
+ auto* VTy = dyn_cast<VectorType>(C->getType());
+ Type *ElemTy = VTy->getElementType();
unsigned int elemNum = VTy->getNumElements();
Result.AggregateVal.resize(elemNum);
if (ElemTy->isIntegerTy())
@@ -641,8 +649,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
case Instruction::GetElementPtr: {
// Compute the index
GenericValue Result = getConstantValue(Op0);
- APInt Offset(DL->getPointerSizeInBits(), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(*DL, Offset);
+ APInt Offset(DL.getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(DL, Offset);
char* tmp = (char*) Result.PointerVal;
Result = PTOGV(tmp + Offset.getSExtValue());
@@ -729,16 +737,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
}
case Instruction::PtrToInt: {
GenericValue GV = getConstantValue(Op0);
- uint32_t PtrWidth = DL->getTypeSizeInBits(Op0->getType());
+ uint32_t PtrWidth = DL.getTypeSizeInBits(Op0->getType());
assert(PtrWidth <= 64 && "Bad pointer width");
GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
- uint32_t IntWidth = DL->getTypeSizeInBits(CE->getType());
+ uint32_t IntWidth = DL.getTypeSizeInBits(CE->getType());
GV.IntVal = GV.IntVal.zextOrTrunc(IntWidth);
return GV;
}
case Instruction::IntToPtr: {
GenericValue GV = getConstantValue(Op0);
- uint32_t PtrWidth = DL->getTypeSizeInBits(CE->getType());
+ uint32_t PtrWidth = DL.getTypeSizeInBits(CE->getType());
GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
@@ -860,8 +868,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
GV.IntVal = apfLHS.bitcastToAPInt();
break;
case Instruction::FRem:
- apfLHS.mod(APFloat(Sem, RHS.IntVal),
- APFloat::rmNearestTiesToEven);
+ apfLHS.mod(APFloat(Sem, RHS.IntVal));
GV.IntVal = apfLHS.bitcastToAPInt();
break;
}
@@ -1040,7 +1047,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
GenericValue *Ptr, Type *Ty) {
- const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
+ const unsigned StoreBytes = getDataLayout().getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
default:
@@ -1080,7 +1087,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
break;
}
- if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
+ if (sys::IsLittleEndianHost != getDataLayout().isLittleEndian())
// Host and target are different endian - reverse the stored bytes.
std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
}
@@ -1117,7 +1124,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
GenericValue *Ptr,
Type *Ty) {
- const unsigned LoadBytes = getDataLayout()->getTypeStoreSize(Ty);
+ const unsigned LoadBytes = getDataLayout().getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
case Type::IntegerTyID:
@@ -1143,8 +1150,8 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
break;
}
case Type::VectorTyID: {
- const VectorType *VT = cast<VectorType>(Ty);
- const Type *ElemT = VT->getElementType();
+ auto *VT = cast<VectorType>(Ty);
+ Type *ElemT = VT->getElementType();
const unsigned numElems = VT->getNumElements();
if (ElemT->isFloatTy()) {
Result.AggregateVal.resize(numElems);
@@ -1183,20 +1190,20 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
unsigned ElementSize =
- getDataLayout()->getTypeAllocSize(CP->getType()->getElementType());
+ getDataLayout().getTypeAllocSize(CP->getType()->getElementType());
for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
return;
}
if (isa<ConstantAggregateZero>(Init)) {
- memset(Addr, 0, (size_t)getDataLayout()->getTypeAllocSize(Init->getType()));
+ memset(Addr, 0, (size_t)getDataLayout().getTypeAllocSize(Init->getType()));
return;
}
if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
unsigned ElementSize =
- getDataLayout()->getTypeAllocSize(CPA->getType()->getElementType());
+ getDataLayout().getTypeAllocSize(CPA->getType()->getElementType());
for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
return;
@@ -1204,7 +1211,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
const StructLayout *SL =
- getDataLayout()->getStructLayout(cast<StructType>(CPS->getType()));
+ getDataLayout().getStructLayout(cast<StructType>(CPS->getType()));
for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
return;
@@ -1349,7 +1356,7 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
InitializeMemory(GV->getInitializer(), GA);
Type *ElTy = GV->getType()->getElementType();
- size_t GVSize = (size_t)getDataLayout()->getTypeAllocSize(ElTy);
+ size_t GVSize = (size_t)getDataLayout().getTypeAllocSize(ElTy);
NumInitBytes += (unsigned)GVSize;
++NumGlobals;
}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 55ab5af..ff7c4dc 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
-inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
+static LLVMTargetMachineRef wrap(const TargetMachine *P) {
return
reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
}
@@ -210,35 +210,6 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
return 1;
}
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
- LLVMModuleProviderRef MP,
- char **OutError) {
- /* The module provider is now actually a module. */
- return LLVMCreateExecutionEngineForModule(OutEE,
- reinterpret_cast<LLVMModuleRef>(MP),
- OutError);
-}
-
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
- LLVMModuleProviderRef MP,
- char **OutError) {
- /* The module provider is now actually a module. */
- return LLVMCreateInterpreterForModule(OutInterp,
- reinterpret_cast<LLVMModuleRef>(MP),
- OutError);
-}
-
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
- LLVMModuleProviderRef MP,
- unsigned OptLevel,
- char **OutError) {
- /* The module provider is now actually a module. */
- return LLVMCreateJITCompilerForModule(OutJIT,
- reinterpret_cast<LLVMModuleRef>(MP),
- OptLevel, OutError);
-}
-
-
void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
delete unwrap(EE);
}
@@ -282,11 +253,6 @@ void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
unwrap(EE)->addModule(std::unique_ptr<Module>(unwrap(M)));
}
-void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
- /* The module provider is now actually a module. */
- LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
-}
-
LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
LLVMModuleRef *OutMod, char **OutError) {
Module *Mod = unwrap(M);
@@ -295,14 +261,6 @@ LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
return 0;
}
-LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
- LLVMModuleProviderRef MP,
- LLVMModuleRef *OutMod, char **OutError) {
- /* The module provider is now actually a module. */
- return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
- OutError);
-}
-
LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
LLVMValueRef *OutFn) {
if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
@@ -318,7 +276,7 @@ void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
}
LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
- return wrap(unwrap(EE)->getDataLayout());
+ return wrap(&unwrap(EE)->getDataLayout());
}
LLVMTargetMachineRef
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index dbfa37e..1eb4f7d 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -593,7 +593,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
}
static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
- const Type *Ty, const bool val) {
+ Type *Ty, const bool val) {
GenericValue Dest;
if(Ty->isVectorTy()) {
assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
@@ -788,7 +788,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
}
static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
- GenericValue Src3, const Type *Ty) {
+ GenericValue Src3, Type *Ty) {
GenericValue Dest;
if(Ty->isVectorTy()) {
assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
@@ -805,7 +805,7 @@ static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
void Interpreter::visitSelectInst(SelectInst &I) {
ExecutionContext &SF = ECStack.back();
- const Type * Ty = I.getOperand(0)->getType();
+ Type * Ty = I.getOperand(0)->getType();
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
@@ -968,7 +968,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
unsigned NumElements =
getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
- unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+ unsigned TypeSize = (size_t)getDataLayout().getTypeAllocSize(Ty);
// Avoid malloc-ing zero bytes, use max()...
unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
@@ -1000,7 +1000,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
for (; I != E; ++I) {
if (StructType *STy = dyn_cast<StructType>(*I)) {
- const StructLayout *SLO = TD.getStructLayout(STy);
+ const StructLayout *SLO = getDataLayout().getStructLayout(STy);
const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
unsigned Index = unsigned(CPU->getZExtValue());
@@ -1020,7 +1020,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
assert(BitWidth == 64 && "Invalid index type for getelementptr");
Idx = (int64_t)IdxGV.IntVal.getZExtValue();
}
- Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+ Total += getDataLayout().getTypeAllocSize(ST->getElementType()) * Idx;
}
}
@@ -1139,7 +1139,7 @@ void Interpreter::visitShl(BinaryOperator &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue Dest;
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
if (Ty->isVectorTy()) {
uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
@@ -1166,7 +1166,7 @@ void Interpreter::visitLShr(BinaryOperator &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue Dest;
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
if (Ty->isVectorTy()) {
uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
@@ -1193,7 +1193,7 @@ void Interpreter::visitAShr(BinaryOperator &I) {
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue Dest;
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
if (Ty->isVectorTy()) {
size_t src1Size = Src1.AggregateVal.size();
@@ -1237,10 +1237,10 @@ GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
- const Type *SrcTy = SrcVal->getType();
+ Type *SrcTy = SrcVal->getType();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (SrcTy->isVectorTy()) {
- const Type *DstVecTy = DstTy->getScalarType();
+ Type *DstVecTy = DstTy->getScalarType();
unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
unsigned size = Src.AggregateVal.size();
// the sizes of src and dst vectors must be equal.
@@ -1248,7 +1248,7 @@ GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
for (unsigned i = 0; i < size; i++)
Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth);
} else {
- const IntegerType *DITy = cast<IntegerType>(DstTy);
+ auto *DITy = cast<IntegerType>(DstTy);
unsigned DBitWidth = DITy->getBitWidth();
Dest.IntVal = Src.IntVal.sext(DBitWidth);
}
@@ -1257,10 +1257,10 @@ GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF) {
- const Type *SrcTy = SrcVal->getType();
+ Type *SrcTy = SrcVal->getType();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (SrcTy->isVectorTy()) {
- const Type *DstVecTy = DstTy->getScalarType();
+ Type *DstVecTy = DstTy->getScalarType();
unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
unsigned size = Src.AggregateVal.size();
@@ -1269,7 +1269,7 @@ GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
for (unsigned i = 0; i < size; i++)
Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth);
} else {
- const IntegerType *DITy = cast<IntegerType>(DstTy);
+ auto *DITy = cast<IntegerType>(DstTy);
unsigned DBitWidth = DITy->getBitWidth();
Dest.IntVal = Src.IntVal.zext(DBitWidth);
}
@@ -1327,8 +1327,8 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (SrcTy->getTypeID() == Type::VectorTyID) {
- const Type *DstVecTy = DstTy->getScalarType();
- const Type *SrcVecTy = SrcTy->getScalarType();
+ Type *DstVecTy = DstTy->getScalarType();
+ Type *SrcVecTy = SrcTy->getScalarType();
uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
unsigned size = Src.AggregateVal.size();
// the sizes of src and dst vectors must be equal.
@@ -1365,8 +1365,8 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (SrcTy->getTypeID() == Type::VectorTyID) {
- const Type *DstVecTy = DstTy->getScalarType();
- const Type *SrcVecTy = SrcTy->getScalarType();
+ Type *DstVecTy = DstTy->getScalarType();
+ Type *SrcVecTy = SrcTy->getScalarType();
uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
unsigned size = Src.AggregateVal.size();
// the sizes of src and dst vectors must be equal
@@ -1401,7 +1401,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
- const Type *DstVecTy = DstTy->getScalarType();
+ Type *DstVecTy = DstTy->getScalarType();
unsigned size = Src.AggregateVal.size();
// the sizes of src and dst vectors must be equal
Dest.AggregateVal.resize(size);
@@ -1433,7 +1433,7 @@ GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
- const Type *DstVecTy = DstTy->getScalarType();
+ Type *DstVecTy = DstTy->getScalarType();
unsigned size = Src.AggregateVal.size();
// the sizes of src and dst vectors must be equal
Dest.AggregateVal.resize(size);
@@ -1477,7 +1477,7 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
- uint32_t PtrSize = TD.getPointerSizeInBits();
+ uint32_t PtrSize = getDataLayout().getPointerSizeInBits();
if (PtrSize != Src.IntVal.getBitWidth())
Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
@@ -1497,10 +1497,10 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
(DstTy->getTypeID() == Type::VectorTyID)) {
// vector src bitcast to vector dst or vector src bitcast to scalar dst or
// scalar src bitcast to vector dst
- bool isLittleEndian = TD.isLittleEndian();
+ bool isLittleEndian = getDataLayout().isLittleEndian();
GenericValue TempDst, TempSrc, SrcVec;
- const Type *SrcElemTy;
- const Type *DstElemTy;
+ Type *SrcElemTy;
+ Type *DstElemTy;
unsigned SrcBitSize;
unsigned DstBitSize;
unsigned SrcNum;
@@ -2091,7 +2091,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
}
// Get pointers to first LLVM BB & Instruction in function.
- StackFrame.CurBB = F->begin();
+ StackFrame.CurBB = &F->front();
StackFrame.CurInst = StackFrame.CurBB->begin();
// Run through the function arguments and initialize their values...
@@ -2103,7 +2103,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
unsigned i = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI, ++i)
- SetValue(AI, ArgVals[i], StackFrame);
+ SetValue(&*AI, ArgVals[i], StackFrame);
// Handle varargs arguments...
StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
@@ -2121,27 +2121,5 @@ void Interpreter::run() {
DEBUG(dbgs() << "About to interpret: " << I);
visit(I); // Dispatch to one of the visit* methods...
-#if 0
- // This is not safe, as visiting the instruction could lower it and free I.
-DEBUG(
- if (!isa<CallInst>(I) && !isa<InvokeInst>(I) &&
- I.getType() != Type::VoidTy) {
- dbgs() << " --> ";
- const GenericValue &Val = SF.Values[&I];
- switch (I.getType()->getTypeID()) {
- default: llvm_unreachable("Invalid GenericValue Type");
- case Type::VoidTyID: dbgs() << "void"; break;
- case Type::FloatTyID: dbgs() << "float " << Val.FloatVal; break;
- case Type::DoubleTyID: dbgs() << "double " << Val.DoubleVal; break;
- case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
- break;
- case Type::IntegerTyID:
- dbgs() << "i" << Val.IntVal.getBitWidth() << " "
- << Val.IntVal.toStringUnsigned(10)
- << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
- break;
- }
- });
-#endif
}
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 9b44042..441f0eb 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -178,7 +178,7 @@ static void *ffiValueFor(Type *Ty, const GenericValue &AV,
}
static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
- const DataLayout *TD, GenericValue &Result) {
+ const DataLayout &TD, GenericValue &Result) {
ffi_cif cif;
FunctionType *FTy = F->getFunctionType();
const unsigned NumArgs = F->arg_size();
@@ -198,7 +198,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
const unsigned ArgNo = A->getArgNo();
Type *ArgTy = FTy->getParamType(ArgNo);
args[ArgNo] = ffiTypeFor(ArgTy);
- ArgBytes += TD->getTypeStoreSize(ArgTy);
+ ArgBytes += TD.getTypeStoreSize(ArgTy);
}
SmallVector<uint8_t, 128> ArgData;
@@ -210,7 +210,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
const unsigned ArgNo = A->getArgNo();
Type *ArgTy = FTy->getParamType(ArgNo);
values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
- ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+ ArgDataPtr += TD.getTypeStoreSize(ArgTy);
}
Type *RetTy = FTy->getReturnType();
@@ -219,7 +219,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
SmallVector<uint8_t, 128> ret;
if (RetTy->getTypeID() != Type::VoidTyID)
- ret.resize(TD->getTypeStoreSize(RetTy));
+ ret.resize(TD.getTypeStoreSize(RetTy));
ffi_call(&cif, Fn, ret.data(), values.data());
switch (RetTy->getTypeID()) {
case Type::IntegerTyID:
@@ -368,7 +368,7 @@ static GenericValue lle_X_sprintf(FunctionType *FT,
case 'x': case 'X':
if (HowLong >= 1) {
if (HowLong == 1 &&
- TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
+ TheInterpreter->getDataLayout().getPointerSizeInBits() == 64 &&
sizeof(long) < sizeof(int64_t)) {
// Make sure we use %lld with a 64 bit argument because we might be
// compiling LLI on a 32 bit compiler.
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index f103c09..bc7da2e 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -35,7 +35,7 @@ extern "C" void LLVMLinkInInterpreter() { }
ExecutionEngine *Interpreter::create(std::unique_ptr<Module> M,
std::string *ErrStr) {
// Tell this Module to materialize everything and release the GVMaterializer.
- if (std::error_code EC = M->materializeAllPermanently()) {
+ if (std::error_code EC = M->materializeAll()) {
if (ErrStr)
*ErrStr = EC.message();
// We got an error, just return 0
@@ -49,16 +49,15 @@ ExecutionEngine *Interpreter::create(std::unique_ptr<Module> M,
// Interpreter ctor - Initialize stuff
//
Interpreter::Interpreter(std::unique_ptr<Module> M)
- : ExecutionEngine(std::move(M)), TD(Modules.back().get()) {
+ : ExecutionEngine(std::move(M)) {
memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
- setDataLayout(&TD);
// Initialize the "backend"
initializeExecutionEngine();
initializeExternalFunctions();
emitGlobals();
- IL = new IntrinsicLowering(TD);
+ IL = new IntrinsicLowering(getDataLayout());
}
Interpreter::~Interpreter() {
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index f976641..2e5a867 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -26,7 +26,6 @@
namespace llvm {
class IntrinsicLowering;
-struct FunctionInfo;
template<typename T> class generic_gep_type_iterator;
class ConstantExpr;
typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
@@ -95,7 +94,6 @@ struct ExecutionContext {
//
class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
GenericValue ExitValue; // The return value of the called function
- DataLayout TD;
IntrinsicLowering *IL;
// The runtime stack of executing code. The top of the stack is the current
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index f6944ee..6cbebe9 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -65,12 +65,13 @@ MCJIT::createJIT(std::unique_ptr<Module> M,
std::move(Resolver));
}
-MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
+MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> TM,
std::shared_ptr<MCJITMemoryManager> MemMgr,
std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
- : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr),
- MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)),
- Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) {
+ : ExecutionEngine(TM->createDataLayout(), std::move(M)), TM(std::move(TM)),
+ Ctx(nullptr), MemMgr(std::move(MemMgr)),
+ Resolver(*this, std::move(Resolver)), Dyld(*this->MemMgr, this->Resolver),
+ ObjCache(nullptr) {
// FIXME: We are managing our modules, so we do not want the base class
// ExecutionEngine to manage them as well. To avoid double destruction
// of the first (and only) module added in ExecutionEngine constructor
@@ -85,7 +86,6 @@ MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
Modules.clear();
OwnedModules.addModule(std::move(First));
- setDataLayout(TM->getDataLayout());
RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
}
@@ -159,7 +159,6 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
// Initialize passes.
PM.run(*M);
// Flush the output buffer to get the generated code into memory
- ObjStream.flush();
std::unique_ptr<MemoryBuffer> CompiledObjBuffer(
new ObjectMemoryBuffer(std::move(ObjBufferSV)));
@@ -193,7 +192,11 @@ void MCJIT::generateCodeForModule(Module *M) {
if (ObjCache)
ObjectToLoad = ObjCache->getObject(M);
- M->setDataLayout(*TM->getDataLayout());
+ if (M->getDataLayout().isDefault()) {
+ M->setDataLayout(getDataLayout());
+ } else {
+ assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch");
+ }
// If the cache did not contain a suitable object, compile the object
if (!ObjectToLoad) {
@@ -265,7 +268,7 @@ void MCJIT::finalizeModule(Module *M) {
RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
SmallString<128> FullName;
- Mangler::getNameWithPrefix(FullName, Name, *TM->getDataLayout());
+ Mangler::getNameWithPrefix(FullName, Name, getDataLayout());
if (void *Addr = getPointerToGlobalIfAvailable(FullName))
return RuntimeDyld::SymbolInfo(static_cast<uint64_t>(
@@ -315,10 +318,12 @@ RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
object::Archive *A = OB.getBinary();
// Look for our symbols in each Archive
object::Archive::child_iterator ChildIt = A->findSym(Name);
+ if (std::error_code EC = ChildIt->getError())
+ report_fatal_error(EC.message());
if (ChildIt != A->child_end()) {
// FIXME: Support nested archives?
ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr =
- ChildIt->getAsBinary();
+ (*ChildIt)->getAsBinary();
if (ChildBinOrErr.getError())
continue;
std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index a45173c..3c9d2fd 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -86,7 +86,7 @@ class MCJIT : public ExecutionEngine {
ModulePtrSet::iterator begin_added() { return AddedModules.begin(); }
ModulePtrSet::iterator end_added() { return AddedModules.end(); }
iterator_range<ModulePtrSet::iterator> added() {
- return iterator_range<ModulePtrSet::iterator>(begin_added(), end_added());
+ return make_range(begin_added(), end_added());
}
ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); }
@@ -223,12 +223,13 @@ public:
/// FindFunctionNamed - Search all of the active modules to find the function that
/// defines FnName. This is very slow operation and shouldn't be used for
/// general code.
- virtual Function *FindFunctionNamed(const char *FnName) override;
+ Function *FindFunctionNamed(const char *FnName) override;
- /// FindGlobalVariableNamed - Search all of the active modules to find the global variable
- /// that defines Name. This is very slow operation and shouldn't be used for
- /// general code.
- virtual GlobalVariable *FindGlobalVariableNamed(const char *Name, bool AllowInternal = false) override;
+ /// FindGlobalVariableNamed - Search all of the active modules to find the
+ /// global variable that defines Name. This is very slow operation and
+ /// shouldn't be used for general code.
+ GlobalVariable *FindGlobalVariableNamed(const char *Name,
+ bool AllowInternal = false) override;
/// Sets the object manager that MCJIT should use to avoid compilation.
void setObjectCache(ObjectCache *manager) override;
@@ -335,6 +336,6 @@ protected:
bool CheckFunctionsOnly);
};
-} // End llvm namespace
+} // end llvm namespace
-#endif
+#endif // LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index b439810..34564e4 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -19,6 +19,9 @@
namespace llvm {
namespace orc {
+void JITCompileCallbackManager::anchor() {}
+void IndirectStubsManager::anchor() {}
+
Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
Constant *AddrIntVal =
ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
@@ -37,7 +40,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M,
return IP;
}
-void makeStub(Function &F, GlobalVariable &ImplPointer) {
+void makeStub(Function &F, Value &ImplPointer) {
assert(F.isDeclaration() && "Can't turn a definition into a stub.");
assert(F.getParent() && "Function isn't in a module.");
Module &M = *F.getParent();
@@ -61,9 +64,7 @@ class GlobalRenamer {
public:
static bool needsRenaming(const Value &New) {
- if (!New.hasName() || New.getName().startswith("\01L"))
- return true;
- return false;
+ return !New.hasName() || New.getName().startswith("\01L");
}
const std::string& getRename(const Value &Orig) {
@@ -106,6 +107,9 @@ void makeAllSymbolsExternallyAccessible(Module &M) {
for (auto &GV : M.globals())
raiseVisibilityOnValue(GV, Renamer);
+
+ for (auto &A : M.aliases())
+ raiseVisibilityOnValue(A, Renamer);
}
Function* cloneFunctionDecl(Module &Dst, const Function &F,
@@ -121,7 +125,7 @@ Function* cloneFunctionDecl(Module &Dst, const Function &F,
auto NewArgI = NewF->arg_begin();
for (auto ArgI = F.arg_begin(), ArgE = F.arg_end(); ArgI != ArgE;
++ArgI, ++NewArgI)
- (*VMap)[ArgI] = NewArgI;
+ (*VMap)[&*ArgI] = &*NewArgI;
}
return NewF;
@@ -177,5 +181,16 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
nullptr, Materializer));
}
+GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
+ ValueToValueMapTy &VMap) {
+ assert(OrigA.getAliasee() && "Original alias doesn't have an aliasee?");
+ auto *NewA = GlobalAlias::create(OrigA.getValueType(),
+ OrigA.getType()->getPointerAddressSpace(),
+ OrigA.getLinkage(), OrigA.getName(), &Dst);
+ NewA->copyAttributesFrom(&OrigA);
+ VMap[&OrigA] = NewA;
+ return NewA;
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
new file mode 100644
index 0000000..d2379cd
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -0,0 +1,97 @@
+//===----------- OrcCBindings.cpp - C bindings for the Orc APIs -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcCBindingsStack.h"
+#include "llvm-c/OrcBindings.h"
+
+using namespace llvm;
+
+LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) {
+ TargetMachine *TM2(unwrap(TM));
+
+ Triple T(TM2->getTargetTriple());
+
+ auto CompileCallbackMgr = OrcCBindingsStack::createCompileCallbackMgr(T);
+ auto IndirectStubsMgrBuilder =
+ OrcCBindingsStack::createIndirectStubsMgrBuilder(T);
+
+ OrcCBindingsStack *JITStack =
+ new OrcCBindingsStack(*TM2, std::move(CompileCallbackMgr),
+ IndirectStubsMgrBuilder);
+
+ return wrap(JITStack);
+}
+
+void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName,
+ const char *SymbolName) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ std::string Mangled = J.mangle(SymbolName);
+ *MangledName = new char[Mangled.size() + 1];
+ strcpy(*MangledName, Mangled.c_str());
+}
+
+void LLVMOrcDisposeMangledSymbol(char *MangledName) {
+ delete[] MangledName;
+}
+
+LLVMOrcTargetAddress
+LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
+ LLVMOrcLazyCompileCallbackFn Callback,
+ void *CallbackCtx) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ return J.createLazyCompileCallback(Callback, CallbackCtx);
+}
+
+void LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress InitAddr) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ J.createIndirectStub(StubName, InitAddr);
+}
+
+void LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+ const char *StubName,
+ LLVMOrcTargetAddress NewAddr) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ J.setIndirectStubPointer(StubName, NewAddr);
+}
+
+LLVMOrcModuleHandle
+LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ Module *M(unwrap(Mod));
+ return J.addIRModuleEager(M, SymbolResolver, SymbolResolverCtx);
+}
+
+LLVMOrcModuleHandle
+LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+ LLVMOrcSymbolResolverFn SymbolResolver,
+ void *SymbolResolverCtx) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ Module *M(unwrap(Mod));
+ return J.addIRModuleLazy(M, SymbolResolver, SymbolResolverCtx);
+}
+
+void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ J.removeModule(H);
+}
+
+LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+ const char *SymbolName) {
+ OrcCBindingsStack &J = *unwrap(JITStack);
+ auto Sym = J.findSymbol(SymbolName, true);
+ return Sym.getAddress();
+}
+
+void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
+ delete unwrap(JITStack);
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
new file mode 100644
index 0000000..e519c7f
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
@@ -0,0 +1,43 @@
+//===-------- OrcCBindingsStack.cpp - Orc JIT stack for C bindings --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcCBindingsStack.h"
+
+#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include <cstdio>
+#include <system_error>
+
+using namespace llvm;
+
+std::unique_ptr<OrcCBindingsStack::CompileCallbackMgr>
+OrcCBindingsStack::createCompileCallbackMgr(Triple T) {
+ switch (T.getArch()) {
+ default: return nullptr;
+
+ case Triple::x86_64: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
+ return llvm::make_unique<CCMgrT>(0);
+ }
+ }
+}
+
+OrcCBindingsStack::IndirectStubsManagerBuilder
+OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) {
+ switch (T.getArch()) {
+ default: return nullptr;
+
+ case Triple::x86_64:
+ return [](){
+ return llvm::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
+ };
+ }
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
new file mode 100644
index 0000000..2e17624
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -0,0 +1,282 @@
+//===--- OrcCBindingsStack.h - Orc JIT stack for C bindings ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
+#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm-c/OrcBindings.h"
+
+namespace llvm {
+
+class OrcCBindingsStack;
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
+
+class OrcCBindingsStack {
+public:
+
+ typedef orc::JITCompileCallbackManager CompileCallbackMgr;
+ typedef orc::ObjectLinkingLayer<> ObjLayerT;
+ typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
+ typedef orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr> CODLayerT;
+
+ typedef std::function<std::unique_ptr<CompileCallbackMgr>()>
+ CallbackManagerBuilder;
+
+ typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder;
+
+private:
+
+ class GenericHandle {
+ public:
+ virtual ~GenericHandle() {}
+ virtual orc::JITSymbol findSymbolIn(const std::string &Name,
+ bool ExportedSymbolsOnly) = 0;
+ virtual void removeModule() = 0;
+ };
+
+ template <typename LayerT>
+ class GenericHandleImpl : public GenericHandle {
+ public:
+ GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle)
+ : Layer(Layer), Handle(std::move(Handle)) {}
+
+ orc::JITSymbol findSymbolIn(const std::string &Name,
+ bool ExportedSymbolsOnly) override {
+ return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
+ }
+
+ void removeModule() override {
+ return Layer.removeModuleSet(Handle);
+ }
+
+ private:
+ LayerT &Layer;
+ typename LayerT::ModuleSetHandleT Handle;
+ };
+
+ template <typename LayerT>
+ std::unique_ptr<GenericHandleImpl<LayerT>>
+ createGenericHandle(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) {
+ return llvm::make_unique<GenericHandleImpl<LayerT>>(Layer,
+ std::move(Handle));
+ }
+
+public:
+
+ // We need a 'ModuleSetHandleT' to conform to the layer concept.
+ typedef unsigned ModuleSetHandleT;
+
+ typedef unsigned ModuleHandleT;
+
+ static std::unique_ptr<CompileCallbackMgr> createCompileCallbackMgr(Triple T);
+ static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T);
+
+ OrcCBindingsStack(TargetMachine &TM,
+ std::unique_ptr<CompileCallbackMgr> CCMgr,
+ IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
+ : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)),
+ ObjectLayer(),
+ CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
+ CODLayer(CompileLayer,
+ [](Function &F) { std::set<Function*> S; S.insert(&F); return S; },
+ *this->CCMgr, std::move(IndirectStubsMgrBuilder), false),
+ IndirectStubsMgr(IndirectStubsMgrBuilder()),
+ CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
+
+ ~OrcCBindingsStack() {
+ // Run any destructors registered with __cxa_atexit.
+ CXXRuntimeOverrides.runDestructors();
+ // Run any IR destructors.
+ for (auto &DtorRunner : IRStaticDestructorRunners)
+ DtorRunner.runViaLayer(*this);
+ }
+
+ std::string mangle(StringRef Name) {
+ std::string MangledName;
+ {
+ raw_string_ostream MangledNameStream(MangledName);
+ Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
+ }
+ return MangledName;
+ }
+
+ template <typename PtrTy>
+ static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+ return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
+ }
+
+ orc::TargetAddress
+ createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
+ void *CallbackCtx) {
+ auto CCInfo = CCMgr->getCompileCallback();
+ CCInfo.setCompileAction(
+ [=]() -> orc::TargetAddress {
+ return Callback(wrap(this), CallbackCtx);
+ });
+ return CCInfo.getAddress();
+ }
+
+ void createIndirectStub(StringRef StubName, orc::TargetAddress Addr) {
+ IndirectStubsMgr->createStub(StubName, Addr, JITSymbolFlags::Exported);
+ }
+
+ void setIndirectStubPointer(StringRef Name, orc::TargetAddress Addr) {
+ IndirectStubsMgr->updatePointer(Name, Addr);
+ }
+
+ std::shared_ptr<RuntimeDyld::SymbolResolver>
+ createResolver(LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ auto Resolver = orc::createLambdaResolver(
+ [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) {
+ // Search order:
+ // 1. JIT'd symbols.
+ // 2. Runtime overrides.
+ // 3. External resolver (if present).
+
+ if (auto Sym = CODLayer.findSymbol(Name, true))
+ return RuntimeDyld::SymbolInfo(Sym.getAddress(),
+ Sym.getFlags());
+ if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
+ return Sym;
+
+ if (ExternalResolver)
+ return RuntimeDyld::SymbolInfo(ExternalResolver(Name.c_str(),
+ ExternalResolverCtx),
+ llvm::JITSymbolFlags::Exported);
+
+ return RuntimeDyld::SymbolInfo(nullptr);
+ },
+ [](const std::string &Name) {
+ return RuntimeDyld::SymbolInfo(nullptr);
+ }
+ );
+
+ return std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(Resolver));
+ }
+
+ template <typename LayerT>
+ ModuleHandleT addIRModule(LayerT &Layer,
+ Module *M,
+ std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+
+ // Attach a data-layout if one isn't already present.
+ if (M->getDataLayout().isDefault())
+ M->setDataLayout(DL);
+
+ // Record the static constructors and destructors. We have to do this before
+ // we hand over ownership of the module to the JIT.
+ std::vector<std::string> CtorNames, DtorNames;
+ for (auto Ctor : orc::getConstructors(*M))
+ CtorNames.push_back(mangle(Ctor.Func->getName()));
+ for (auto Dtor : orc::getDestructors(*M))
+ DtorNames.push_back(mangle(Dtor.Func->getName()));
+
+ // Create the resolver.
+ auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx);
+
+ // Add the module to the JIT.
+ std::vector<Module*> S;
+ S.push_back(std::move(M));
+
+ auto LH = Layer.addModuleSet(std::move(S), std::move(MemMgr),
+ std::move(Resolver));
+ ModuleHandleT H = createHandle(Layer, LH);
+
+ // Run the static constructors, and save the static destructor runner for
+ // execution when the JIT is torn down.
+ orc::CtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), H);
+ CtorRunner.runViaLayer(*this);
+
+ IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H);
+
+ return H;
+ }
+
+ ModuleHandleT addIRModuleEager(Module* M,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ return addIRModule(CompileLayer, std::move(M),
+ llvm::make_unique<SectionMemoryManager>(),
+ std::move(ExternalResolver), ExternalResolverCtx);
+ }
+
+ ModuleHandleT addIRModuleLazy(Module* M,
+ LLVMOrcSymbolResolverFn ExternalResolver,
+ void *ExternalResolverCtx) {
+ return addIRModule(CODLayer, std::move(M), nullptr,
+ std::move(ExternalResolver), ExternalResolverCtx);
+ }
+
+ void removeModule(ModuleHandleT H) {
+ GenericHandles[H]->removeModule();
+ GenericHandles[H] = nullptr;
+ FreeHandleIndexes.push_back(H);
+ }
+
+ orc::JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+ if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
+ return Sym;
+ return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
+ }
+
+ orc::JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name,
+ bool ExportedSymbolsOnly) {
+ return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly);
+ }
+
+private:
+
+ template <typename LayerT>
+ unsigned createHandle(LayerT &Layer,
+ typename LayerT::ModuleSetHandleT Handle) {
+ unsigned NewHandle;
+ if (!FreeHandleIndexes.empty()) {
+ NewHandle = FreeHandleIndexes.back();
+ FreeHandleIndexes.pop_back();
+ GenericHandles[NewHandle] = createGenericHandle(Layer, std::move(Handle));
+ return NewHandle;
+ } else {
+ NewHandle = GenericHandles.size();
+ GenericHandles.push_back(createGenericHandle(Layer, std::move(Handle)));
+ }
+ return NewHandle;
+ }
+
+ DataLayout DL;
+ SectionMemoryManager CCMgrMemMgr;
+
+ std::unique_ptr<CompileCallbackMgr> CCMgr;
+ ObjLayerT ObjectLayer;
+ CompileLayerT CompileLayer;
+ CODLayerT CODLayer;
+
+ std::unique_ptr<orc::IndirectStubsManager> IndirectStubsMgr;
+
+ std::vector<std::unique_ptr<GenericHandle>> GenericHandles;
+ std::vector<unsigned> FreeHandleIndexes;
+
+ orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
+ std::vector<orc::CtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 7dc5164..38a27cf 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -137,25 +137,26 @@ public:
}
OrcMCJITReplacement(
- std::shared_ptr<MCJITMemoryManager> MemMgr,
- std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
- std::unique_ptr<TargetMachine> TM)
- : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)),
- Resolver(*this), ClientResolver(std::move(ClientResolver)),
- NotifyObjectLoaded(*this), NotifyFinalized(*this),
+ std::shared_ptr<MCJITMemoryManager> MemMgr,
+ std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
+ std::unique_ptr<TargetMachine> TM)
+ : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
+ MemMgr(*this, std::move(MemMgr)), Resolver(*this),
+ ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
+ NotifyFinalized(*this),
ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
- LazyEmitLayer(CompileLayer) {
- setDataLayout(this->TM->getDataLayout());
- }
+ LazyEmitLayer(CompileLayer) {}
void addModule(std::unique_ptr<Module> M) override {
// If this module doesn't have a DataLayout attached then attach the
// default.
- if (M->getDataLayout().isDefault())
- M->setDataLayout(*getDataLayout());
-
+ if (M->getDataLayout().isDefault()) {
+ M->setDataLayout(getDataLayout());
+ } else {
+ assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch");
+ }
Modules.push_back(std::move(M));
std::vector<Module *> Ms;
Ms.push_back(&*Modules.back());
@@ -174,12 +175,7 @@ public:
std::tie(Obj, Buf) = O.takeBinary();
std::vector<std::unique_ptr<object::ObjectFile>> Objs;
Objs.push_back(std::move(Obj));
- auto H =
- ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
-
- std::vector<std::unique_ptr<MemoryBuffer>> Bufs;
- Bufs.push_back(std::move(Buf));
- ObjectLayer.takeOwnershipOfBuffers(H, std::move(Bufs));
+ ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
}
void addArchive(object::OwningBinary<object::Archive> A) override {
@@ -234,6 +230,10 @@ public:
CompileLayer.setObjectCache(NewCache);
}
+ void setProcessAllSections(bool ProcessAllSections) override {
+ ObjectLayer.setProcessAllSections(ProcessAllSections);
+ }
+
private:
RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) {
@@ -252,10 +252,12 @@ private:
object::Archive *A = OB.getBinary();
// Look for our symbols in each Archive
object::Archive::child_iterator ChildIt = A->findSym(Name);
+ if (std::error_code EC = ChildIt->getError())
+ report_fatal_error(EC.message());
if (ChildIt != A->child_end()) {
// FIXME: Support nested archives?
ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr =
- ChildIt->getAsBinary();
+ (*ChildIt)->getAsBinary();
if (ChildBinOrErr.getError())
continue;
std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
@@ -289,7 +291,7 @@ private:
"Incorrect number of Infos for Objects.");
for (unsigned I = 0; I < Objects.size(); ++I)
M.MemMgr.notifyObjectLoaded(&M, *Objects[I]);
- };
+ }
private:
OrcMCJITReplacement &M;
@@ -310,7 +312,7 @@ private:
std::string MangledName;
{
raw_string_ostream MangledNameStream(MangledName);
- Mang.getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout());
+ Mang.getNameWithPrefix(MangledNameStream, Name, getDataLayout());
}
return MangledName;
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index 258868a..b931f10 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -1,137 +1,170 @@
+//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/Support/Process.h"
#include <array>
-using namespace llvm::orc;
+namespace llvm {
+namespace orc {
-namespace {
+void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+ void *CallbackMgr) {
+
+ const uint8_t ResolverCode[] = {
+ // resolver_entry:
+ 0x55, // 0x00: pushq %rbp
+ 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
+ 0x50, // 0x04: pushq %rax
+ 0x53, // 0x05: pushq %rbx
+ 0x51, // 0x06: pushq %rcx
+ 0x52, // 0x07: pushq %rdx
+ 0x56, // 0x08: pushq %rsi
+ 0x57, // 0x09: pushq %rdi
+ 0x41, 0x50, // 0x0a: pushq %r8
+ 0x41, 0x51, // 0x0c: pushq %r9
+ 0x41, 0x52, // 0x0e: pushq %r10
+ 0x41, 0x53, // 0x10: pushq %r11
+ 0x41, 0x54, // 0x12: pushq %r12
+ 0x41, 0x55, // 0x14: pushq %r13
+ 0x41, 0x56, // 0x16: pushq %r14
+ 0x41, 0x57, // 0x18: pushq %r15
+ 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 20, %rsp
+ 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
+ 0x48, 0x8d, 0x3d, 0x43, 0x00, 0x00, 0x00, // 0x26: leaq 67(%rip), %rdi
+ 0x48, 0x8b, 0x3f, // 0x2d: movq (%rdi), %rdi
+ 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi
+ 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi
+ 0x48, 0xb8, // 0x38: movabsq $0, %rax
+
+ // 0x3a: JIT re-entry fn addr:
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ 0xff, 0xd0, // 0x42: callq *%rax
+ 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp)
+ 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp)
+ 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 20, %rsp
+ 0x41, 0x5f, // 0x54: popq %r15
+ 0x41, 0x5e, // 0x56: popq %r14
+ 0x41, 0x5d, // 0x58: popq %r13
+ 0x41, 0x5c, // 0x5a: popq %r12
+ 0x41, 0x5b, // 0x5c: popq %r11
+ 0x41, 0x5a, // 0x5e: popq %r10
+ 0x41, 0x59, // 0x60: popq %r9
+ 0x41, 0x58, // 0x62: popq %r8
+ 0x5f, // 0x64: popq %rdi
+ 0x5e, // 0x65: popq %rsi
+ 0x5a, // 0x66: popq %rdx
+ 0x59, // 0x67: popq %rcx
+ 0x5b, // 0x68: popq %rbx
+ 0x58, // 0x69: popq %rax
+ 0x5d, // 0x6a: popq %rbp
+ 0xc3, // 0x6b: retq
+ 0x00, 0x00, 0x00, 0x00, // 0x6c: <padding>
+
+ // 0x70: Callback mgr address.
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
-uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
- TargetAddress CallbackID) {
- return JCBM->executeCompileCallback(CallbackID);
+ const unsigned ReentryFnAddrOffset = 0x3a;
+ const unsigned CallbackMgrAddrOffset = 0x70;
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+ sizeof(CallbackMgr));
}
-}
+void OrcX86_64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines) {
-namespace llvm {
-namespace orc {
+ unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
-const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
-
-void OrcX86_64::insertResolverBlock(
- Module &M, JITCompileCallbackManagerBase &JCBM) {
-
- // Trampoline code-sequence length, used to get trampoline address from return
- // address.
- const unsigned X86_64_TrampolineLength = 6;
-
- // List of x86-64 GPRs to save. Note - RBP saved separately below.
- std::array<const char *, 14> GPRs = {{
- "rax", "rbx", "rcx", "rdx",
- "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13",
- "r14", "r15"
- }};
-
- // Address of the executeCompileCallback function.
- uint64_t CallbackAddr =
- static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(executeCompileCallback));
-
- std::ostringstream AsmStream;
- Triple TT(M.getTargetTriple());
-
- // Switch to text section.
- if (TT.getOS() == Triple::Darwin)
- AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
- << ".align 4, 0x90\n";
- else
- AsmStream << ".text\n"
- << ".align 16, 0x90\n";
-
- // Bake in a pointer to the callback manager immediately before the
- // start of the resolver function.
- AsmStream << "jit_callback_manager_addr:\n"
- << " .quad " << &JCBM << "\n";
-
- // Start the resolver function.
- AsmStream << ResolverBlockName << ":\n"
- << " pushq %rbp\n"
- << " movq %rsp, %rbp\n";
-
- // Store the GPRs.
- for (const auto &GPR : GPRs)
- AsmStream << " pushq %" << GPR << "\n";
-
- // Store floating-point state with FXSAVE.
- // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
- // number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
- // an extra 64 bits of padding to the FXSave area.
- unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
- unsigned FXSaveSize = 512 + Padding;
- AsmStream << " subq $" << FXSaveSize << ", %rsp\n"
- << " fxsave64 (%rsp)\n"
-
- // Load callback manager address, compute trampoline address, call JIT.
- << " lea jit_callback_manager_addr(%rip), %rdi\n"
- << " movq (%rdi), %rdi\n"
- << " movq 0x8(%rbp), %rsi\n"
- << " subq $" << X86_64_TrampolineLength << ", %rsi\n"
- << " movabsq $" << CallbackAddr << ", %rax\n"
- << " callq *%rax\n"
-
- // Replace the return to the trampoline with the return address of the
- // compiled function body.
- << " movq %rax, 0x8(%rbp)\n"
-
- // Restore the floating point state.
- << " fxrstor64 (%rsp)\n"
- << " addq $" << FXSaveSize << ", %rsp\n";
-
- for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
- AsmStream << " popq %" << GPR << "\n";
-
- // Restore original RBP and return to compiled function body.
- AsmStream << " popq %rbp\n"
- << " retq\n";
-
- M.appendModuleInlineAsm(AsmStream.str());
-}
+ memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void*));
-OrcX86_64::LabelNameFtor
-OrcX86_64::insertCompileCallbackTrampolines(Module &M,
- TargetAddress ResolverBlockAddr,
- unsigned NumCalls,
- unsigned StartIndex) {
- const char *ResolverBlockPtrName = "Lorc_resolve_block_addr";
-
- std::ostringstream AsmStream;
- Triple TT(M.getTargetTriple());
-
- if (TT.getOS() == Triple::Darwin)
- AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
- << ".align 4, 0x90\n";
- else
- AsmStream << ".text\n"
- << ".align 16, 0x90\n";
-
- AsmStream << ResolverBlockPtrName << ":\n"
- << " .quad " << ResolverBlockAddr << "\n";
-
- auto GetLabelName =
- [=](unsigned I) {
- std::ostringstream LabelStream;
- LabelStream << "orc_jcc_" << (StartIndex + I);
- return LabelStream.str();
- };
+ uint64_t *Trampolines = reinterpret_cast<uint64_t*>(TrampolineMem);
+ uint64_t CallIndirPCRel = 0xf1c40000000015ff;
- for (unsigned I = 0; I < NumCalls; ++I)
- AsmStream << GetLabelName(I) << ":\n"
- << " callq *" << ResolverBlockPtrName << "(%rip)\n";
-
- M.appendModuleInlineAsm(AsmStream.str());
+ for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
+ Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
+}
- return GetLabelName;
+std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // jmpq *ptr1(%rip)
+ // .byte 0xC4 ; <- Invalid opcode padding.
+ // .byte 0xF1
+ // stub2:
+ // jmpq *ptr2(%rip)
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .quad 0x0
+ // ptr2:
+ // .quad 0x0
+ //
+ // ...
+
+ const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+ // Emit at least MinStubs, rounded up to fill the pages allocated.
+ unsigned PageSize = sys::Process::getPageSize();
+ unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+ unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+ // Allocate memory for stubs and pointers in one call.
+ std::error_code EC;
+ auto StubsMem =
+ sys::OwningMemoryBlock(
+ sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, nullptr,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE,
+ EC));
+
+ if (EC)
+ return EC;
+
+ // Create separate MemoryBlocks representing the stubs and pointers.
+ sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+ sys::MemoryBlock PtrsBlock(static_cast<char*>(StubsMem.base()) +
+ NumPages * PageSize,
+ NumPages * PageSize);
+
+ // Populate the stubs page stubs and mark it executable.
+ uint64_t *Stub = reinterpret_cast<uint64_t*>(StubsBlock.base());
+ uint64_t PtrOffsetField =
+ static_cast<uint64_t>(NumPages * PageSize - 6) << 16;
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
+
+ if (auto EC = sys::Memory::protectMappedMemory(StubsBlock,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC))
+ return EC;
+
+ // Initialize all pointers to point at FailureAddress.
+ void **Ptr = reinterpret_cast<void**>(PtrsBlock.base());
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Ptr[I] = InitialPtrVal;
+
+ StubsInfo.NumStubs = NumStubs;
+ StubsInfo.StubsMem = std::move(StubsMem);
+
+ return std::error_code();
}
} // End namespace orc.
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 93287a3..a95f3bb 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -41,20 +41,21 @@ void RuntimeDyldImpl::deregisterEHFrames() {}
#ifndef NDEBUG
static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
- dbgs() << "----- Contents of section " << S.Name << " " << State << " -----";
+ dbgs() << "----- Contents of section " << S.getName() << " " << State
+ << " -----";
- if (S.Address == nullptr) {
+ if (S.getAddress() == nullptr) {
dbgs() << "\n <section not emitted>\n";
return;
}
const unsigned ColsPerRow = 16;
- uint8_t *DataAddr = S.Address;
- uint64_t LoadAddr = S.LoadAddress;
+ uint8_t *DataAddr = S.getAddress();
+ uint64_t LoadAddr = S.getLoadAddress();
unsigned StartPadding = LoadAddr & (ColsPerRow - 1);
- unsigned BytesRemaining = S.Size;
+ unsigned BytesRemaining = S.getSize();
if (StartPadding) {
dbgs() << "\n" << format("0x%016" PRIx64,
@@ -82,30 +83,41 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
void RuntimeDyldImpl::resolveRelocations() {
MutexGuard locked(lock);
+ // Print out the sections prior to relocation.
+ DEBUG(
+ for (int i = 0, e = Sections.size(); i != e; ++i)
+ dumpSectionMemory(Sections[i], "before relocations");
+ );
+
// First, resolve relocations associated with external symbols.
resolveExternalSymbols();
- // Just iterate over the sections we have and resolve all the relocations
- // in them. Gross overkill, but it gets the job done.
- for (int i = 0, e = Sections.size(); i != e; ++i) {
+ // Iterate over all outstanding relocations
+ for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) {
// The Section here (Sections[i]) refers to the section in which the
// symbol for the relocation is located. The SectionID in the relocation
// entry provides the section to which the relocation will be applied.
- uint64_t Addr = Sections[i].LoadAddress;
- DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t"
+ int Idx = it->first;
+ uint64_t Addr = Sections[Idx].getLoadAddress();
+ DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t"
<< format("%p", (uintptr_t)Addr) << "\n");
- DEBUG(dumpSectionMemory(Sections[i], "before relocations"));
- resolveRelocationList(Relocations[i], Addr);
- DEBUG(dumpSectionMemory(Sections[i], "after relocations"));
- Relocations.erase(i);
+ resolveRelocationList(it->second, Addr);
}
+ Relocations.clear();
+
+ // Print out sections after relocation.
+ DEBUG(
+ for (int i = 0, e = Sections.size(); i != e; ++i)
+ dumpSectionMemory(Sections[i], "after relocations");
+ );
+
}
void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
uint64_t TargetAddress) {
MutexGuard locked(lock);
for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
- if (Sections[i].Address == LocalAddress) {
+ if (Sections[i].getAddress() == LocalAddress) {
reassignSectionAddress(i, TargetAddress);
return;
}
@@ -122,14 +134,10 @@ static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec,
return std::error_code();
}
-std::pair<unsigned, unsigned>
+RuntimeDyldImpl::ObjSectionToIDMap
RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
MutexGuard locked(lock);
- // Grab the first Section ID. We'll use this later to construct the underlying
- // range for the returned LoadedObjectInfo.
- unsigned SectionsAddedBeginIdx = Sections.size();
-
// Save information about our target
Arch = (Triple::ArchType)Obj.getArch();
IsTargetLittleEndian = Obj.isLittleEndian();
@@ -155,39 +163,56 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
++I) {
uint32_t Flags = I->getFlags();
- bool IsCommon = Flags & SymbolRef::SF_Common;
- if (IsCommon)
+ if (Flags & SymbolRef::SF_Common)
CommonSymbols.push_back(*I);
else {
object::SymbolRef::Type SymType = I->getType();
- if (SymType == object::SymbolRef::ST_Function ||
- SymType == object::SymbolRef::ST_Data ||
- SymType == object::SymbolRef::ST_Unknown) {
-
- ErrorOr<StringRef> NameOrErr = I->getName();
- Check(NameOrErr.getError());
- StringRef Name = *NameOrErr;
- section_iterator SI = Obj.section_end();
- Check(I->getSection(SI));
+ // Get symbol name.
+ ErrorOr<StringRef> NameOrErr = I->getName();
+ Check(NameOrErr.getError());
+ StringRef Name = *NameOrErr;
+
+ // Compute JIT symbol flags.
+ JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
+ if (Flags & SymbolRef::SF_Weak)
+ RTDyldSymFlags |= JITSymbolFlags::Weak;
+ if (Flags & SymbolRef::SF_Exported)
+ RTDyldSymFlags |= JITSymbolFlags::Exported;
+
+ if (Flags & SymbolRef::SF_Absolute &&
+ SymType != object::SymbolRef::ST_File) {
+ auto Addr = I->getAddress();
+ Check(Addr.getError());
+ uint64_t SectOffset = *Addr;
+ unsigned SectionID = AbsoluteSymbolSection;
+
+ DEBUG(dbgs() << "\tType: " << SymType << " (absolute) Name: " << Name
+ << " SID: " << SectionID << " Offset: "
+ << format("%p", (uintptr_t)SectOffset)
+ << " flags: " << Flags << "\n");
+ GlobalSymbolTable[Name] =
+ SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
+ } else if (SymType == object::SymbolRef::ST_Function ||
+ SymType == object::SymbolRef::ST_Data ||
+ SymType == object::SymbolRef::ST_Unknown ||
+ SymType == object::SymbolRef::ST_Other) {
+
+ ErrorOr<section_iterator> SIOrErr = I->getSection();
+ Check(SIOrErr.getError());
+ section_iterator SI = *SIOrErr;
if (SI == Obj.section_end())
continue;
+ // Get symbol offset.
uint64_t SectOffset;
Check(getOffset(*I, *SI, SectOffset));
- StringRef SectionData;
- Check(SI->getContents(SectionData));
bool IsCode = SI->isText();
- unsigned SectionID =
- findOrEmitSection(Obj, *SI, IsCode, LocalSections);
+ unsigned SectionID = findOrEmitSection(Obj, *SI, IsCode, LocalSections);
+
DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name
<< " SID: " << SectionID << " Offset: "
<< format("%p", (uintptr_t)SectOffset)
<< " flags: " << Flags << "\n");
- JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
- if (Flags & SymbolRef::SF_Weak)
- RTDyldSymFlags |= JITSymbolFlags::Weak;
- if (Flags & SymbolRef::SF_Exported)
- RTDyldSymFlags |= JITSymbolFlags::Exported;
GlobalSymbolTable[Name] =
SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
}
@@ -231,9 +256,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
// Give the subclasses a chance to tie-up any loose ends.
finalizeLoad(Obj, LocalSections);
- unsigned SectionsAddedEndIdx = Sections.size();
+// for (auto E : LocalSections)
+// llvm::dbgs() << "Added: " << E.first.getRawDataRefImpl() << " -> " << E.second << "\n";
- return std::make_pair(SectionsAddedBeginIdx, SectionsAddedEndIdx);
+ return LocalSections;
}
// A helper method for computeTotalAllocSize.
@@ -406,10 +432,9 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj,
if (!(RelSecI == Section))
continue;
- for (const RelocationRef &Reloc : SI->relocations()) {
- (void)Reloc;
- StubBufSize += StubSize;
- }
+ for (const RelocationRef &Reloc : SI->relocations())
+ if (relocationNeedsStub(Reloc))
+ StubBufSize += StubSize;
}
// Get section data size and alignment
@@ -492,7 +517,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
if (!Addr)
report_fatal_error("Unable to allocate memory for common symbols!");
uint64_t Offset = 0;
- Sections.push_back(SectionEntry("<common symbols>", Addr, CommonSize, 0));
+ Sections.push_back(
+ SectionEntry("<common symbols>", Addr, CommonSize, CommonSize, 0));
memset(Addr, 0, CommonSize);
DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID << " new addr: "
@@ -524,6 +550,9 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
Offset += Size;
Addr += Size;
}
+
+ if (Checker)
+ Checker->registerSection(Obj.getFileName(), SectionID);
}
unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
@@ -556,12 +585,20 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
uint8_t *Addr;
const char *pData = nullptr;
- // In either case, set the location of the unrelocated section in memory,
- // since we still process relocations for it even if we're not applying them.
- Check(Section.getContents(data));
- // Virtual sections have no data in the object image, so leave pData = 0
- if (!IsVirtual)
+ // If this section contains any bits (i.e. isn't a virtual or bss section),
+ // grab a reference to them.
+ if (!IsVirtual && !IsZeroInit) {
+ // In either case, set the location of the unrelocated section in memory,
+ // since we still process relocations for it even if we're not applying them.
+ Check(Section.getContents(data));
pData = data.data();
+ }
+
+ // Code section alignment needs to be at least as high as stub alignment or
+ // padding calculations may by incorrect when the section is remapped to a
+ // higher alignment.
+ if (IsCode)
+ Alignment = std::max(Alignment, getStubAlignment());
// Some sections, such as debug info, don't need to be loaded for execution.
// Leave those where they are.
@@ -606,7 +643,8 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
<< " Allocate: " << Allocate << "\n");
}
- Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
+ Sections.push_back(
+ SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData));
if (Checker)
Checker->registerSection(Obj.getFileName(), SectionID);
@@ -742,11 +780,11 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
// Addr is a uint64_t because we can't assume the pointer width
// of the target is the same as that of the host. Just use a generic
// "big enough" type.
- DEBUG(dbgs() << "Reassigning address for section "
- << SectionID << " (" << Sections[SectionID].Name << "): "
- << format("0x%016" PRIx64, Sections[SectionID].LoadAddress) << " -> "
- << format("0x%016" PRIx64, Addr) << "\n");
- Sections[SectionID].LoadAddress = Addr;
+ DEBUG(dbgs() << "Reassigning address for section " << SectionID << " ("
+ << Sections[SectionID].getName() << "): "
+ << format("0x%016" PRIx64, Sections[SectionID].getLoadAddress())
+ << " -> " << format("0x%016" PRIx64, Addr) << "\n");
+ Sections[SectionID].setLoadAddress(Addr);
}
void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
@@ -754,7 +792,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
const RelocationEntry &RE = Relocs[i];
// Ignore relocations for sections that were not loaded
- if (Sections[RE.SectionID].Address == nullptr)
+ if (Sections[RE.SectionID].getAddress() == nullptr)
continue;
resolveRelocation(RE, Value);
}
@@ -818,10 +856,11 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
// RuntimeDyld class implementation
uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
- StringRef SectionName) const {
- for (unsigned I = BeginIdx; I != EndIdx; ++I)
- if (RTDyld.Sections[I].Name == SectionName)
- return RTDyld.Sections[I].LoadAddress;
+ const object::SectionRef &Sec) const {
+
+ auto I = ObjSecToIDMap.find(Sec);
+ if (I != ObjSecToIDMap.end())
+ return RTDyld.Sections[I->second].getLoadAddress();
return 0;
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1dacc13..e5fab92 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "RuntimeDyldCOFF.h"
+#include "Targets/RuntimeDyldCOFFI386.h"
#include "Targets/RuntimeDyldCOFFX86_64.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
@@ -24,12 +25,11 @@ using namespace llvm::object;
namespace {
-class LoadedCOFFObjectInfo
+class LoadedCOFFObjectInfo final
: public RuntimeDyld::LoadedObjectInfoHelper<LoadedCOFFObjectInfo> {
public:
- LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
- unsigned EndIdx)
- : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+ LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+ : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
OwningBinary<ObjectFile>
getObjectForDebug(const ObjectFile &Obj) const override {
@@ -48,6 +48,8 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
default:
llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
break;
+ case Triple::x86:
+ return make_unique<RuntimeDyldCOFFI386>(MemMgr, Resolver);
case Triple::x86_64:
return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
}
@@ -55,10 +57,7 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
- unsigned SectionStartIdx, SectionEndIdx;
- std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
- return llvm::make_unique<LoadedCOFFObjectInfo>(*this, SectionStartIdx,
- SectionEndIdx);
+ return llvm::make_unique<LoadedCOFFObjectInfo>(*this, loadObjectImpl(O));
}
uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index ae199b7..58ce88a 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -727,7 +727,7 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
}
bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
- if (getRTDyld().getSymbolLocalAddress(Symbol))
+ if (getRTDyld().getSymbol(Symbol))
return true;
return !!getRTDyld().Resolver.findSymbol(Symbol);
}
@@ -799,11 +799,10 @@ std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getSectionAddr(
unsigned SectionID = SectionInfo->SectionID;
uint64_t Addr;
if (IsInsideLoad)
- Addr =
- static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(getRTDyld().Sections[SectionID].Address));
+ Addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(
+ getRTDyld().Sections[SectionID].getAddress()));
else
- Addr = getRTDyld().Sections[SectionID].LoadAddress;
+ Addr = getRTDyld().Sections[SectionID].getLoadAddress();
return std::make_pair(Addr, std::string(""));
}
@@ -835,11 +834,11 @@ std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubAddrFor(
uint64_t Addr;
if (IsInsideLoad) {
- uintptr_t SectionBase =
- reinterpret_cast<uintptr_t>(getRTDyld().Sections[SectionID].Address);
+ uintptr_t SectionBase = reinterpret_cast<uintptr_t>(
+ getRTDyld().Sections[SectionID].getAddress());
Addr = static_cast<uint64_t>(SectionBase) + StubOffset;
} else {
- uint64_t SectionBase = getRTDyld().Sections[SectionID].LoadAddress;
+ uint64_t SectionBase = getRTDyld().Sections[SectionID].getLoadAddress();
Addr = SectionBase + StubOffset;
}
@@ -855,16 +854,16 @@ RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const {
const auto &SymInfo = pos->second;
uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID());
return StringRef(reinterpret_cast<const char *>(SectionAddr) +
- SymInfo.getOffset(),
- getRTDyld().Sections[SymInfo.getSectionID()].Size -
- SymInfo.getOffset());
+ SymInfo.getOffset(),
+ getRTDyld().Sections[SymInfo.getSectionID()].getSize() -
+ SymInfo.getOffset());
}
void RuntimeDyldCheckerImpl::registerSection(
StringRef FilePath, unsigned SectionID) {
StringRef FileName = sys::path::filename(FilePath);
const SectionEntry &Section = getRTDyld().Sections[SectionID];
- StringRef SectionName = Section.Name;
+ StringRef SectionName = Section.getName();
Stubs[FileName][SectionName].SectionID = SectionID;
}
@@ -874,7 +873,7 @@ void RuntimeDyldCheckerImpl::registerStubMap(
const RuntimeDyldImpl::StubMap &RTDyldStubs) {
StringRef FileName = sys::path::filename(FilePath);
const SectionEntry &Section = getRTDyld().Sections[SectionID];
- StringRef SectionName = Section.Name;
+ StringRef SectionName = Section.getName();
Stubs[FileName][SectionName].SectionID = SectionID;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 3787950..e09b71a 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -66,7 +66,6 @@ public:
static inline bool classof(const ELFObjectFile<ELFT> *v) {
return v->isDyldType();
}
-
};
@@ -104,12 +103,11 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
sym->st_value = static_cast<addr_type>(Addr);
}
-class LoadedELFObjectInfo
+class LoadedELFObjectInfo final
: public RuntimeDyld::LoadedObjectInfoHelper<LoadedELFObjectInfo> {
public:
- LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
- unsigned EndIdx)
- : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+ LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+ : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
OwningBinary<ObjectFile>
getObjectForDebug(const ObjectFile &Obj) const override;
@@ -118,6 +116,7 @@ public:
template <typename ELFT>
std::unique_ptr<DyldELFObject<ELFT>>
createRTDyldELFObject(MemoryBufferRef Buffer,
+ const ObjectFile &SourceObject,
const LoadedELFObjectInfo &L,
std::error_code &ec) {
typedef typename ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
@@ -127,6 +126,7 @@ createRTDyldELFObject(MemoryBufferRef Buffer,
llvm::make_unique<DyldELFObject<ELFT>>(Buffer, ec);
// Iterate over all sections in the object.
+ auto SI = SourceObject.section_begin();
for (const auto &Sec : Obj->sections()) {
StringRef SectionName;
Sec.getName(SectionName);
@@ -135,12 +135,13 @@ createRTDyldELFObject(MemoryBufferRef Buffer,
Elf_Shdr *shdr = const_cast<Elf_Shdr *>(
reinterpret_cast<const Elf_Shdr *>(ShdrRef.p));
- if (uint64_t SecLoadAddr = L.getSectionLoadAddress(SectionName)) {
+ if (uint64_t SecLoadAddr = L.getSectionLoadAddress(*SI)) {
// This assumes that the address passed in matches the target address
// bitness. The template-based type cast handles everything else.
shdr->sh_addr = static_cast<addr_type>(SecLoadAddr);
}
}
+ ++SI;
}
return Obj;
@@ -158,16 +159,20 @@ OwningBinary<ObjectFile> createELFDebugObject(const ObjectFile &Obj,
std::unique_ptr<ObjectFile> DebugObj;
if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) {
typedef ELFType<support::little, false> ELF32LE;
- DebugObj = createRTDyldELFObject<ELF32LE>(Buffer->getMemBufferRef(), L, ec);
+ DebugObj = createRTDyldELFObject<ELF32LE>(Buffer->getMemBufferRef(), Obj, L,
+ ec);
} else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) {
typedef ELFType<support::big, false> ELF32BE;
- DebugObj = createRTDyldELFObject<ELF32BE>(Buffer->getMemBufferRef(), L, ec);
+ DebugObj = createRTDyldELFObject<ELF32BE>(Buffer->getMemBufferRef(), Obj, L,
+ ec);
} else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) {
typedef ELFType<support::big, true> ELF64BE;
- DebugObj = createRTDyldELFObject<ELF64BE>(Buffer->getMemBufferRef(), L, ec);
+ DebugObj = createRTDyldELFObject<ELF64BE>(Buffer->getMemBufferRef(), Obj, L,
+ ec);
} else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) {
typedef ELFType<support::little, true> ELF64LE;
- DebugObj = createRTDyldELFObject<ELF64LE>(Buffer->getMemBufferRef(), L, ec);
+ DebugObj = createRTDyldELFObject<ELF64LE>(Buffer->getMemBufferRef(), Obj, L,
+ ec);
} else
llvm_unreachable("Unexpected ELF format");
@@ -181,7 +186,7 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const {
return createELFDebugObject(Obj, *this);
}
-} // namespace
+} // anonymous namespace
namespace llvm {
@@ -193,9 +198,9 @@ RuntimeDyldELF::~RuntimeDyldELF() {}
void RuntimeDyldELF::registerEHFrames() {
for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
SID EHFrameSID = UnregisteredEHFrameSections[i];
- uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
- uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
- size_t EHFrameSize = Sections[EHFrameSID].Size;
+ uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress();
+ uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress();
+ size_t EHFrameSize = Sections[EHFrameSID].getSize();
MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
RegisteredEHFrameSections.push_back(EHFrameSID);
}
@@ -205,9 +210,9 @@ void RuntimeDyldELF::registerEHFrames() {
void RuntimeDyldELF::deregisterEHFrames() {
for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) {
SID EHFrameSID = RegisteredEHFrameSections[i];
- uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
- uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
- size_t EHFrameSize = Sections[EHFrameSID].Size;
+ uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress();
+ uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress();
+ size_t EHFrameSize = Sections[EHFrameSID].getSize();
MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
}
RegisteredEHFrameSections.clear();
@@ -215,10 +220,7 @@ void RuntimeDyldELF::deregisterEHFrames() {
std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyldELF::loadObject(const object::ObjectFile &O) {
- unsigned SectionStartIdx, SectionEndIdx;
- std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
- return llvm::make_unique<LoadedELFObjectInfo>(*this, SectionStartIdx,
- SectionEndIdx);
+ return llvm::make_unique<LoadedELFObjectInfo>(*this, loadObjectImpl(O));
}
void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
@@ -230,9 +232,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
llvm_unreachable("Relocation type not implemented yet!");
break;
case ELF::R_X86_64_64: {
- support::ulittle64_t::ref(Section.Address + Offset) = Value + Addend;
+ support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
+ Value + Addend;
DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at "
- << format("%p\n", Section.Address + Offset));
+ << format("%p\n", Section.getAddressWithOffset(Offset)));
break;
}
case ELF::R_X86_64_32:
@@ -242,23 +245,34 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
(Type == ELF::R_X86_64_32S &&
((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
- support::ulittle32_t::ref(Section.Address + Offset) = TruncatedAddr;
+ support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+ TruncatedAddr;
DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) << " at "
- << format("%p\n", Section.Address + Offset));
+ << format("%p\n", Section.getAddressWithOffset(Offset)));
+ break;
+ }
+ case ELF::R_X86_64_PC8: {
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+ int64_t RealOffset = Value + Addend - FinalAddress;
+ assert(isInt<8>(RealOffset));
+ int8_t TruncOffset = (RealOffset & 0xFF);
+ Section.getAddress()[Offset] = TruncOffset;
break;
}
case ELF::R_X86_64_PC32: {
- uint64_t FinalAddress = Section.LoadAddress + Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
int64_t RealOffset = Value + Addend - FinalAddress;
assert(isInt<32>(RealOffset));
int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
- support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
+ support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+ TruncOffset;
break;
}
case ELF::R_X86_64_PC64: {
- uint64_t FinalAddress = Section.LoadAddress + Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
int64_t RealOffset = Value + Addend - FinalAddress;
- support::ulittle64_t::ref(Section.Address + Offset) = RealOffset;
+ support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
+ RealOffset;
break;
}
}
@@ -269,13 +283,16 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
uint32_t Type, int32_t Addend) {
switch (Type) {
case ELF::R_386_32: {
- support::ulittle32_t::ref(Section.Address + Offset) = Value + Addend;
+ support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+ Value + Addend;
break;
}
case ELF::R_386_PC32: {
- uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
+ uint32_t FinalAddress =
+ Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF;
uint32_t RealOffset = Value + Addend - FinalAddress;
- support::ulittle32_t::ref(Section.Address + Offset) = RealOffset;
+ support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+ RealOffset;
break;
}
default:
@@ -289,11 +306,12 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
uint64_t Offset, uint64_t Value,
uint32_t Type, int64_t Addend) {
- uint32_t *TargetPtr = reinterpret_cast<uint32_t *>(Section.Address + Offset);
- uint64_t FinalAddress = Section.LoadAddress + Offset;
+ uint32_t *TargetPtr =
+ reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
- << format("%llx", Section.Address + Offset)
+ << format("%llx", Section.getAddressWithOffset(Offset))
<< " FinalAddress: 0x" << format("%llx", FinalAddress)
<< " Value: 0x" << format("%llx", Value) << " Type: 0x"
<< format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
@@ -305,7 +323,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
break;
case ELF::R_AARCH64_ABS64: {
uint64_t *TargetPtr =
- reinterpret_cast<uint64_t *>(Section.Address + Offset);
+ reinterpret_cast<uint64_t *>(Section.getAddressWithOffset(Offset));
*TargetPtr = Value + Addend;
break;
}
@@ -428,12 +446,13 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
uint64_t Offset, uint32_t Value,
uint32_t Type, int32_t Addend) {
// TODO: Add Thumb relocations.
- uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset);
- uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
+ uint32_t *TargetPtr =
+ reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF;
Value += Addend;
DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: "
- << Section.Address + Offset
+ << Section.getAddressWithOffset(Offset)
<< " FinalAddress: " << format("%p", FinalAddress) << " Value: "
<< format("%x", Value) << " Type: " << format("%x", Type)
<< " Addend: " << format("%x", Addend) << "\n");
@@ -477,13 +496,14 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
uint64_t Offset, uint32_t Value,
uint32_t Type, int32_t Addend) {
- uint8_t *TargetPtr = Section.Address + Offset;
+ uint8_t *TargetPtr = Section.getAddressWithOffset(Offset);
Value += Addend;
DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: "
- << Section.Address + Offset << " FinalAddress: "
- << format("%p", Section.LoadAddress + Offset) << " Value: "
- << format("%x", Value) << " Type: " << format("%x", Type)
+ << Section.getAddressWithOffset(Offset) << " FinalAddress: "
+ << format("%p", Section.getLoadAddressWithOffset(Offset))
+ << " Value: " << format("%x", Value)
+ << " Type: " << format("%x", Type)
<< " Addend: " << format("%x", Addend) << "\n");
uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
@@ -512,47 +532,47 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
case ELF::R_MIPS_PC32: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4);
break;
}
case ELF::R_MIPS_PC16: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
Insn &= 0xffff0000;
Insn |= ((Value - FinalAddress) >> 2) & 0xffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
}
case ELF::R_MIPS_PC19_S2: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
Insn &= 0xfff80000;
Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
}
case ELF::R_MIPS_PC21_S2: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
Insn &= 0xffe00000;
Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
}
case ELF::R_MIPS_PC26_S2: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
Insn &= 0xfc000000;
Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
}
case ELF::R_MIPS_PCHI16: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
Insn &= 0xffff0000;
Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
}
case ELF::R_MIPS_PCLO16: {
- uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
Insn &= 0xffff0000;
Insn |= (Value - FinalAddress) & 0xffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
@@ -603,7 +623,8 @@ void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section,
CalculatedValue, SymOffset,
SectionID);
}
- applyMIPS64Relocation(Section.Address + Offset, CalculatedValue, RelType);
+ applyMIPS64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue,
+ RelType);
}
int64_t
@@ -613,13 +634,12 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
uint64_t SymOffset, SID SectionID) {
DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x"
- << format("%llx", Section.Address + Offset)
+ << format("%llx", Section.getAddressWithOffset(Offset))
<< " FinalAddress: 0x"
- << format("%llx", Section.LoadAddress + Offset)
+ << format("%llx", Section.getLoadAddressWithOffset(Offset))
<< " Value: 0x" << format("%llx", Value) << " Type: 0x"
<< format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
- << " SymOffset: " << format("%x", SymOffset)
- << "\n");
+ << " SymOffset: " << format("%x", SymOffset) << "\n");
switch (Type) {
default:
@@ -672,35 +692,35 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
return Value + Addend - (GOTAddr + 0x7ff0);
}
case ELF::R_MIPS_PC16: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
return ((Value + Addend - FinalAddress) >> 2) & 0xffff;
}
case ELF::R_MIPS_PC32: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
return Value + Addend - FinalAddress;
}
case ELF::R_MIPS_PC18_S3: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
- return ((Value + Addend - ((FinalAddress | 7) ^ 7)) >> 3) & 0x3ffff;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+ return ((Value + Addend - (FinalAddress & ~0x7)) >> 3) & 0x3ffff;
}
case ELF::R_MIPS_PC19_S2: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
- return ((Value + Addend - FinalAddress) >> 2) & 0x7ffff;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+ return ((Value + Addend - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
}
case ELF::R_MIPS_PC21_S2: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff;
}
case ELF::R_MIPS_PC26_S2: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff;
}
case ELF::R_MIPS_PCHI16: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff;
}
case ELF::R_MIPS_PCLO16: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
return (Value + Addend - FinalAddress) & 0xffff;
}
}
@@ -769,7 +789,7 @@ void RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj,
// relocation) without a .toc directive. In this case just use the
// first section (which is usually the .odp) since the code won't
// reference the .toc base directly.
- Rel.SymbolName = NULL;
+ Rel.SymbolName = nullptr;
Rel.SectionID = 0;
// The TOC consists of sections .got, .toc, .tocbss, .plt in that
@@ -842,8 +862,9 @@ void RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
if (Rel.Addend != (int64_t)TargetSymbolOffset)
continue;
- section_iterator tsi(Obj.section_end());
- check(TargetSymbol->getSection(tsi));
+ ErrorOr<section_iterator> TSIOrErr = TargetSymbol->getSection();
+ check(TSIOrErr.getError());
+ section_iterator tsi = *TSIOrErr;
bool IsCode = tsi->isText();
Rel.SectionID = findOrEmitSection(Obj, (*tsi), IsCode, LocalSections);
Rel.Addend = (intptr_t)Addend;
@@ -884,10 +905,30 @@ static inline uint16_t applyPPChighesta (uint64_t value) {
return ((value + 0x8000) >> 48) & 0xffff;
}
+void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section,
+ uint64_t Offset, uint64_t Value,
+ uint32_t Type, int64_t Addend) {
+ uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_PPC_ADDR16_LO:
+ writeInt16BE(LocalAddress, applyPPClo(Value + Addend));
+ break;
+ case ELF::R_PPC_ADDR16_HI:
+ writeInt16BE(LocalAddress, applyPPChi(Value + Addend));
+ break;
+ case ELF::R_PPC_ADDR16_HA:
+ writeInt16BE(LocalAddress, applyPPCha(Value + Addend));
+ break;
+ }
+}
+
void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
uint64_t Offset, uint64_t Value,
uint32_t Type, int64_t Addend) {
- uint8_t *LocalAddress = Section.Address + Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
switch (Type) {
default:
llvm_unreachable("Relocation type not implemented yet!");
@@ -929,17 +970,17 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc));
} break;
case ELF::R_PPC64_REL16_LO: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
uint64_t Delta = Value - FinalAddress + Addend;
writeInt16BE(LocalAddress, applyPPClo(Delta));
} break;
case ELF::R_PPC64_REL16_HI: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
uint64_t Delta = Value - FinalAddress + Addend;
writeInt16BE(LocalAddress, applyPPChi(Delta));
} break;
case ELF::R_PPC64_REL16_HA: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
uint64_t Delta = Value - FinalAddress + Addend;
writeInt16BE(LocalAddress, applyPPCha(Delta));
} break;
@@ -950,22 +991,22 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
writeInt32BE(LocalAddress, Result);
} break;
case ELF::R_PPC64_REL24: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
- if (SignExtend32<24>(delta) != delta)
+ if (SignExtend32<26>(delta) != delta)
llvm_unreachable("Relocation R_PPC64_REL24 overflow");
// Generates a 'bl <address>' instruction
writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC));
} break;
case ELF::R_PPC64_REL32: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
if (SignExtend32<32>(delta) != delta)
llvm_unreachable("Relocation R_PPC64_REL32 overflow");
writeInt32BE(LocalAddress, delta);
} break;
case ELF::R_PPC64_REL64: {
- uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
uint64_t Delta = Value - FinalAddress + Addend;
writeInt64BE(LocalAddress, Delta);
} break;
@@ -978,27 +1019,27 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
uint64_t Offset, uint64_t Value,
uint32_t Type, int64_t Addend) {
- uint8_t *LocalAddress = Section.Address + Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
switch (Type) {
default:
llvm_unreachable("Relocation type not implemented yet!");
break;
case ELF::R_390_PC16DBL:
case ELF::R_390_PLT16DBL: {
- int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+ int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset);
assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow");
writeInt16BE(LocalAddress, Delta / 2);
break;
}
case ELF::R_390_PC32DBL:
case ELF::R_390_PLT32DBL: {
- int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+ int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset);
assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow");
writeInt32BE(LocalAddress, Delta / 2);
break;
}
case ELF::R_390_PC32: {
- int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+ int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset);
assert(int32_t(Delta) == Delta && "R_390_PC32 overflow");
writeInt32BE(LocalAddress, Delta);
break;
@@ -1072,6 +1113,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
else
llvm_unreachable("Mips ABI not handled");
break;
+ case Triple::ppc:
+ resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
+ break;
case Triple::ppc64: // Fall through.
case Triple::ppc64le:
resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
@@ -1085,7 +1129,7 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
}
void *RuntimeDyldELF::computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const {
- return (void*)(Sections[SectionID].ObjAddress + Offset);
+ return (void *)(Sections[SectionID].getObjAddress() + Offset);
}
void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value) {
@@ -1096,6 +1140,29 @@ void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset
addRelocationForSection(RE, Value.SectionID);
}
+uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType,
+ bool IsLocal) const {
+ switch (RelType) {
+ case ELF::R_MICROMIPS_GOT16:
+ if (IsLocal)
+ return ELF::R_MICROMIPS_LO16;
+ break;
+ case ELF::R_MICROMIPS_HI16:
+ return ELF::R_MICROMIPS_LO16;
+ case ELF::R_MIPS_GOT16:
+ if (IsLocal)
+ return ELF::R_MIPS_LO16;
+ break;
+ case ELF::R_MIPS_HI16:
+ return ELF::R_MIPS_LO16;
+ case ELF::R_MIPS_PCHI16:
+ return ELF::R_MIPS_PCLO16;
+ default:
+ break;
+ }
+ return ELF::R_MIPS_NONE;
+}
+
relocation_iterator RuntimeDyldELF::processRelocationRef(
unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) {
@@ -1136,8 +1203,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
// and can be changed by another developers. Maybe best way is add
// a new symbol type ST_Section to SymbolRef and use it.
- section_iterator si(Obj.section_end());
- Symbol->getSection(si);
+ section_iterator si = *Symbol->getSection();
if (si == Obj.section_end())
llvm_unreachable("Symbol section not found, bad object file format!");
DEBUG(dbgs() << "\t\tThis is section symbol\n");
@@ -1178,24 +1244,28 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// Look for an existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
- resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
+ resolveRelocation(Section, Offset,
+ (uint64_t)Section.getAddressWithOffset(i->second),
RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function.
DEBUG(dbgs() << " Create a new stub function\n");
- Stubs[Value] = Section.StubOffset;
- uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset);
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *StubTargetAddr = createStubFunction(
+ Section.getAddressWithOffset(Section.getStubOffset()));
- RelocationEntry REmovz_g3(SectionID, StubTargetAddr - Section.Address,
+ RelocationEntry REmovz_g3(SectionID,
+ StubTargetAddr - Section.getAddress(),
ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
- RelocationEntry REmovk_g2(SectionID, StubTargetAddr - Section.Address + 4,
+ RelocationEntry REmovk_g2(SectionID, StubTargetAddr -
+ Section.getAddress() + 4,
ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
- RelocationEntry REmovk_g1(SectionID, StubTargetAddr - Section.Address + 8,
+ RelocationEntry REmovk_g1(SectionID, StubTargetAddr -
+ Section.getAddress() + 8,
ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
- RelocationEntry REmovk_g0(SectionID,
- StubTargetAddr - Section.Address + 12,
+ RelocationEntry REmovk_g0(SectionID, StubTargetAddr -
+ Section.getAddress() + 12,
ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
if (Value.SymbolName) {
@@ -1210,9 +1280,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
addRelocationForSection(REmovk_g0, Value.SectionID);
}
resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + Section.StubOffset, RelType,
- 0);
- Section.StubOffset += getMaxStubSize();
+ reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
+ Section.getStubOffset())),
+ RelType, 0);
+ Section.advanceStubOffset(getMaxStubSize());
}
} else if (Arch == Triple::arm) {
if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL ||
@@ -1224,26 +1295,29 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// Look for an existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
- resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
- RelType, 0);
+ resolveRelocation(
+ Section, Offset,
+ reinterpret_cast<uint64_t>(Section.getAddressWithOffset(i->second)),
+ RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function.
DEBUG(dbgs() << " Create a new stub function\n");
- Stubs[Value] = Section.StubOffset;
- uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset);
- RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
- ELF::R_ARM_ABS32, Value.Addend);
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *StubTargetAddr = createStubFunction(
+ Section.getAddressWithOffset(Section.getStubOffset()));
+ RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(),
+ ELF::R_ARM_ABS32, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
- resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + Section.StubOffset, RelType,
- 0);
- Section.StubOffset += getMaxStubSize();
+ resolveRelocation(Section, Offset, reinterpret_cast<uint64_t>(
+ Section.getAddressWithOffset(
+ Section.getStubOffset())),
+ RelType, 0);
+ Section.advanceStubOffset(getMaxStubSize());
}
} else {
uint32_t *Placeholder =
@@ -1282,15 +1356,16 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
} else {
// Create a new stub function.
DEBUG(dbgs() << " Create a new stub function\n");
- Stubs[Value] = Section.StubOffset;
- uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset);
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *StubTargetAddr = createStubFunction(
+ Section.getAddressWithOffset(Section.getStubOffset()));
// Creating Hi and Lo relocations for the filled stub instructions.
- RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address,
- ELF::R_MIPS_HI16, Value.Addend);
- RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4,
- ELF::R_MIPS_LO16, Value.Addend);
+ RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(),
+ ELF::R_MIPS_HI16, Value.Addend);
+ RelocationEntry RELo(SectionID,
+ StubTargetAddr - Section.getAddress() + 4,
+ ELF::R_MIPS_LO16, Value.Addend);
if (Value.SymbolName) {
addRelocationForSymbol(REHi, Value.SymbolName);
@@ -1301,21 +1376,39 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
addRelocationForSection(RELo, Value.SectionID);
}
- RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset);
+ RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset());
addRelocationForSection(RE, SectionID);
- Section.StubOffset += getMaxStubSize();
+ Section.advanceStubOffset(getMaxStubSize());
}
+ } else if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) {
+ int64_t Addend = (Opcode & 0x0000ffff) << 16;
+ RelocationEntry RE(SectionID, Offset, RelType, Addend);
+ PendingRelocs.push_back(std::make_pair(Value, RE));
+ } else if (RelType == ELF::R_MIPS_LO16 || RelType == ELF::R_MIPS_PCLO16) {
+ int64_t Addend = Value.Addend + SignExtend32<16>(Opcode & 0x0000ffff);
+ for (auto I = PendingRelocs.begin(); I != PendingRelocs.end();) {
+ const RelocationValueRef &MatchingValue = I->first;
+ RelocationEntry &Reloc = I->second;
+ if (MatchingValue == Value &&
+ RelType == getMatchingLoRelocation(Reloc.RelType) &&
+ SectionID == Reloc.SectionID) {
+ Reloc.Addend += Addend;
+ if (Value.SymbolName)
+ addRelocationForSymbol(Reloc, Value.SymbolName);
+ else
+ addRelocationForSection(Reloc, Value.SectionID);
+ I = PendingRelocs.erase(I);
+ } else
+ ++I;
+ }
+ RelocationEntry RE(SectionID, Offset, RelType, Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
} else {
- // FIXME: Calculate correct addends for R_MIPS_HI16, R_MIPS_LO16,
- // R_MIPS_PCHI16 and R_MIPS_PCLO16 relocations.
- if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16)
- Value.Addend += (Opcode & 0x0000ffff) << 16;
- else if (RelType == ELF::R_MIPS_LO16)
- Value.Addend += (Opcode & 0x0000ffff);
- else if (RelType == ELF::R_MIPS_32)
+ if (RelType == ELF::R_MIPS_32)
Value.Addend += Opcode;
- else if (RelType == ELF::R_MIPS_PCLO16)
- Value.Addend += SignExtend32<16>((Opcode & 0x0000ffff));
else if (RelType == ELF::R_MIPS_PC16)
Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2);
else if (RelType == ELF::R_MIPS_PC19_S2)
@@ -1353,7 +1446,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// an external symbol (Symbol::ST_Unknown) or if the target address
// is not within the signed 24-bits branch address.
SectionEntry &Section = Sections[SectionID];
- uint8_t *Target = Section.Address + Offset;
+ uint8_t *Target = Section.getAddressWithOffset(Offset);
bool RangeOverflow = false;
if (SymType != SymbolRef::ST_Unknown) {
if (AbiVariant != 2) {
@@ -1367,10 +1460,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
uint8_t SymOther = Symbol->getOther();
Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
}
- uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend;
+ uint8_t *RelocTarget =
+ Sections[Value.SectionID].getAddressWithOffset(Value.Addend);
int32_t delta = static_cast<int32_t>(Target - RelocTarget);
- // If it is within 24-bits branch range, just set the branch target
- if (SignExtend32<24>(delta) == delta) {
+ // If it is within 26-bits branch range, just set the branch target
+ if (SignExtend32<26>(delta) == delta) {
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
@@ -1387,23 +1481,25 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
if (i != Stubs.end()) {
// Symbol function stub already created, just relocate to it
resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + i->second, RelType, 0);
+ reinterpret_cast<uint64_t>(
+ Section.getAddressWithOffset(i->second)),
+ RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function.
DEBUG(dbgs() << " Create a new stub function\n");
- Stubs[Value] = Section.StubOffset;
- uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset,
- AbiVariant);
- RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *StubTargetAddr = createStubFunction(
+ Section.getAddressWithOffset(Section.getStubOffset()),
+ AbiVariant);
+ RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(),
ELF::R_PPC64_ADDR64, Value.Addend);
// Generates the 64-bits address loads as exemplified in section
// 4.5.1 in PPC64 ELF ABI. Note that the relocations need to
// apply to the low part of the instructions, so we have to update
// the offset according to the target endianness.
- uint64_t StubRelocOffset = StubTargetAddr - Section.Address;
+ uint64_t StubRelocOffset = StubTargetAddr - Section.getAddress();
if (!IsTargetLittleEndian)
StubRelocOffset += 2;
@@ -1428,10 +1524,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
addRelocationForSection(REl, Value.SectionID);
}
- resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + Section.StubOffset,
+ resolveRelocation(Section, Offset, reinterpret_cast<uint64_t>(
+ Section.getAddressWithOffset(
+ Section.getStubOffset())),
RelType, 0);
- Section.StubOffset += getMaxStubSize();
+ Section.advanceStubOffset(getMaxStubSize());
}
if (SymType == SymbolRef::ST_Unknown) {
// Restore the TOC for external calls
@@ -1450,11 +1547,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// These relocations are supposed to subtract the TOC address from
// the final value. This does not fit cleanly into the RuntimeDyld
// scheme, since there may be *two* sections involved in determining
- // the relocation value (the section of the symbol refered to by the
+ // the relocation value (the section of the symbol referred to by the
// relocation, and the TOC section associated with the current module).
//
// Fortunately, these relocations are currently only ever generated
- // refering to symbols that themselves reside in the TOC, which means
+ // referring to symbols that themselves reside in the TOC, which means
// that the two sections are actually the same. Thus they cancel out
// and we can immediately resolve the relocation right now.
switch (RelType) {
@@ -1511,16 +1608,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
StubMap::const_iterator i = Stubs.find(Value);
uintptr_t StubAddress;
if (i != Stubs.end()) {
- StubAddress = uintptr_t(Section.Address) + i->second;
+ StubAddress = uintptr_t(Section.getAddressWithOffset(i->second));
DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function.
DEBUG(dbgs() << " Create a new stub function\n");
- uintptr_t BaseAddress = uintptr_t(Section.Address);
+ uintptr_t BaseAddress = uintptr_t(Section.getAddress());
uintptr_t StubAlignment = getStubAlignment();
- StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) &
- -StubAlignment;
+ StubAddress =
+ (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
+ -StubAlignment;
unsigned StubOffset = StubAddress - BaseAddress;
Stubs[Value] = StubOffset;
@@ -1531,7 +1629,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
- Section.StubOffset = StubOffset + getMaxStubSize();
+ Section.advanceStubOffset(getMaxStubSize());
}
if (RelType == ELF::R_390_GOTENT)
@@ -1564,37 +1662,39 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
StubMap::const_iterator i = Stubs.find(Value);
uintptr_t StubAddress;
if (i != Stubs.end()) {
- StubAddress = uintptr_t(Section.Address) + i->second;
- DEBUG(dbgs() << " Stub function found\n");
+ StubAddress = uintptr_t(Section.getAddress()) + i->second;
+ DEBUG(dbgs() << " Stub function found\n");
} else {
- // Create a new stub function (equivalent to a PLT entry).
- DEBUG(dbgs() << " Create a new stub function\n");
+ // Create a new stub function (equivalent to a PLT entry).
+ DEBUG(dbgs() << " Create a new stub function\n");
- uintptr_t BaseAddress = uintptr_t(Section.Address);
- uintptr_t StubAlignment = getStubAlignment();
- StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) &
- -StubAlignment;
- unsigned StubOffset = StubAddress - BaseAddress;
- Stubs[Value] = StubOffset;
- createStubFunction((uint8_t *)StubAddress);
+ uintptr_t BaseAddress = uintptr_t(Section.getAddress());
+ uintptr_t StubAlignment = getStubAlignment();
+ StubAddress =
+ (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
+ -StubAlignment;
+ unsigned StubOffset = StubAddress - BaseAddress;
+ Stubs[Value] = StubOffset;
+ createStubFunction((uint8_t *)StubAddress);
- // Bump our stub offset counter
- Section.StubOffset = StubOffset + getMaxStubSize();
+ // Bump our stub offset counter
+ Section.advanceStubOffset(getMaxStubSize());
- // Allocate a GOT Entry
- uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+ // Allocate a GOT Entry
+ uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
- // The load of the GOT address has an addend of -4
- resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
+ // The load of the GOT address has an addend of -4
+ resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
- // Fill in the value of the symbol we're targeting into the GOT
- addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64),
- Value.SymbolName);
+ // Fill in the value of the symbol we're targeting into the GOT
+ addRelocationForSymbol(
+ computeGOTOffsetRE(SectionID, GOTOffset, 0, ELF::R_X86_64_64),
+ Value.SymbolName);
}
// Make the target call a call into the stub table.
resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32,
- Addend);
+ Addend);
} else {
RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend,
Value.Offset);
@@ -1670,7 +1770,7 @@ uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
GOTSectionID = Sections.size();
// Reserve a section id. We'll allocate the section later
// once we know the total size
- Sections.push_back(SectionEntry(".got", 0, 0, 0));
+ Sections.push_back(SectionEntry(".got", nullptr, 0, 0, 0));
}
uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize();
CurrentGOTIndex += no;
@@ -1693,6 +1793,10 @@ RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t
void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
ObjSectionToIDMap &SectionMap) {
+ if (IsMipsO32ABI)
+ if (!PendingRelocs.empty())
+ report_fatal_error("Can't find matching LO16 reloc");
+
// If necessary, allocate the global offset table
if (GOTSectionID != 0) {
// Allocate memory for the section
@@ -1702,7 +1806,8 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
if (!Addr)
report_fatal_error("Unable to allocate memory for GOT!");
- Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0);
+ Sections[GOTSectionID] =
+ SectionEntry(".got", Addr, TotalSize, TotalSize, 0);
if (Checker)
Checker->registerSection(Obj.getFileName(), GOTSectionID);
@@ -1746,4 +1851,23 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
return Obj.isELF();
}
+bool RuntimeDyldELF::relocationNeedsStub(const RelocationRef &R) const {
+ if (Arch != Triple::x86_64)
+ return true; // Conservative answer
+
+ switch (R.getType()) {
+ default:
+ return true; // Conservative answer
+
+
+ case ELF::R_X86_64_GOTPCREL:
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_PC64:
+ case ELF::R_X86_64_64:
+ // We know that these reloation types won't need a stub function. This list
+ // can be extended as needed.
+ return false;
+ }
+}
+
} // namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 1a2552d..041811d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -43,6 +43,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
void resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset,
uint32_t Value, uint32_t Type, int32_t Addend);
+ void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset,
+ uint64_t Value, uint32_t Type, int64_t Addend);
+
void resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend);
@@ -120,6 +123,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
// no particular advanced processing.
void processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value);
+ // Return matching *LO16 relocation (Mips specific)
+ uint32_t getMatchingLoRelocation(uint32_t RelType,
+ bool IsLocal = false) const;
+
// The tentative ID for the GOT section
unsigned GOTSectionID;
@@ -135,12 +142,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
// A map to avoid duplicate got entries (Mips64 specific)
StringMap<uint64_t> GOTSymbolOffsets;
+ // *HI16 relocations will be added for resolving when we find matching
+ // *LO16 part. (Mips specific)
+ SmallVector<std::pair<RelocationValueRef, RelocationEntry>, 8> PendingRelocs;
+
// When a module is loaded we save the SectionID of the EH frame section
// in a table until we receive a request to register all unregistered
// EH frame sections with the memory manager.
SmallVector<SID, 2> UnregisteredEHFrameSections;
SmallVector<SID, 2> RegisteredEHFrameSections;
+ bool relocationNeedsStub(const RelocationRef &R) const override;
+
public:
RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
RuntimeDyld::SymbolResolver &Resolver);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index e085a92..dafd3c8 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -30,6 +30,7 @@
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
+#include <unordered_map>
#include <system_error>
using namespace llvm;
@@ -50,7 +51,6 @@ class Twine;
/// SectionEntry - represents a section emitted into memory by the dynamic
/// linker.
class SectionEntry {
-public:
/// Name - section name.
std::string Name;
@@ -70,15 +70,54 @@ public:
/// relocations (like ARM).
uintptr_t StubOffset;
+ /// The total amount of space allocated for this section. This includes the
+ /// section size and the maximum amount of space that the stubs can occupy.
+ size_t AllocationSize;
+
/// ObjAddress - address of the section in the in-memory object file. Used
/// for calculating relocations in some object formats (like MachO).
uintptr_t ObjAddress;
+public:
SectionEntry(StringRef name, uint8_t *address, size_t size,
- uintptr_t objAddress)
+ size_t allocationSize, uintptr_t objAddress)
: Name(name), Address(address), Size(size),
LoadAddress(reinterpret_cast<uintptr_t>(address)), StubOffset(size),
- ObjAddress(objAddress) {}
+ AllocationSize(allocationSize), ObjAddress(objAddress) {
+ // AllocationSize is used only in asserts, prevent an "unused private field"
+ // warning:
+ (void)AllocationSize;
+ }
+
+ StringRef getName() const { return Name; }
+
+ uint8_t *getAddress() const { return Address; }
+
+ /// \brief Return the address of this section with an offset.
+ uint8_t *getAddressWithOffset(unsigned OffsetBytes) const {
+ assert(OffsetBytes <= AllocationSize && "Offset out of bounds!");
+ return Address + OffsetBytes;
+ }
+
+ size_t getSize() const { return Size; }
+
+ uint64_t getLoadAddress() const { return LoadAddress; }
+ void setLoadAddress(uint64_t LA) { LoadAddress = LA; }
+
+ /// \brief Return the load address of this section with an offset.
+ uint64_t getLoadAddressWithOffset(unsigned OffsetBytes) const {
+ assert(OffsetBytes <= AllocationSize && "Offset out of bounds!");
+ return LoadAddress + OffsetBytes;
+ }
+
+ uintptr_t getStubOffset() const { return StubOffset; }
+
+ void advanceStubOffset(unsigned StubSize) {
+ StubOffset += StubSize;
+ assert(StubOffset <= AllocationSize && "Not enough space allocated!");
+ }
+
+ uintptr_t getObjAddress() const { return ObjAddress; }
};
/// RelocationEntry - used to represent relocations internally in the dynamic
@@ -188,6 +227,8 @@ class RuntimeDyldImpl {
friend class RuntimeDyld::LoadedObjectInfo;
friend class RuntimeDyldCheckerImpl;
protected:
+ static const unsigned AbsoluteSymbolSection = ~0U;
+
// The MemoryManager to load objects into.
RuntimeDyld::MemoryManager &MemMgr;
@@ -224,7 +265,7 @@ protected:
// Relocations to sections already loaded. Indexed by SectionID which is the
// source of the address. The target where the address will be written is
// SectionID/Offset in the relocation itself.
- DenseMap<unsigned, RelocationList> Relocations;
+ std::unordered_map<unsigned, RelocationList> Relocations;
// Relocations to external symbols that are not yet resolved. Symbols are
// external when they aren't found in the global symbol table of all loaded
@@ -269,11 +310,11 @@ protected:
}
uint64_t getSectionLoadAddress(unsigned SectionID) const {
- return Sections[SectionID].LoadAddress;
+ return Sections[SectionID].getLoadAddress();
}
uint8_t *getSectionAddress(unsigned SectionID) const {
- return (uint8_t *)Sections[SectionID].Address;
+ return Sections[SectionID].getAddress();
}
void writeInt16BE(uint8_t *Addr, uint16_t Value) {
@@ -378,7 +419,12 @@ protected:
const SectionRef &Section);
// \brief Implementation of the generic part of the loadObject algorithm.
- std::pair<unsigned, unsigned> loadObjectImpl(const object::ObjectFile &Obj);
+ ObjSectionToIDMap loadObjectImpl(const object::ObjectFile &Obj);
+
+ // \brief Return true if the relocation R may require allocating a stub.
+ virtual bool relocationNeedsStub(const RelocationRef &R) const {
+ return true; // Conservative answer
+ }
public:
RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
@@ -407,6 +453,9 @@ public:
if (pos == GlobalSymbolTable.end())
return nullptr;
const auto &SymInfo = pos->second;
+ // Absolute symbols do not have a local address.
+ if (SymInfo.getSectionID() == AbsoluteSymbolSection)
+ return nullptr;
return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
}
@@ -417,8 +466,10 @@ public:
if (pos == GlobalSymbolTable.end())
return nullptr;
const auto &SymEntry = pos->second;
- uint64_t TargetAddr =
- getSectionLoadAddress(SymEntry.getSectionID()) + SymEntry.getOffset();
+ uint64_t SectionAddr = 0;
+ if (SymEntry.getSectionID() != AbsoluteSymbolSection)
+ SectionAddr = getSectionLoadAddress(SymEntry.getSectionID());
+ uint64_t TargetAddr = SectionAddr + SymEntry.getOffset();
return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags());
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index c074114..739e8d6 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -26,12 +26,12 @@ using namespace llvm::object;
namespace {
-class LoadedMachOObjectInfo
+class LoadedMachOObjectInfo final
: public RuntimeDyld::LoadedObjectInfoHelper<LoadedMachOObjectInfo> {
public:
- LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
- unsigned EndIdx)
- : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+ LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld,
+ ObjSectionToIDMap ObjSecToIDMap)
+ : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
OwningBinary<ObjectFile>
getObjectForDebug(const ObjectFile &Obj) const override {
@@ -45,11 +45,47 @@ namespace llvm {
int64_t RuntimeDyldMachO::memcpyAddend(const RelocationEntry &RE) const {
unsigned NumBytes = 1 << RE.Size;
- uint8_t *Src = Sections[RE.SectionID].Address + RE.Offset;
+ uint8_t *Src = Sections[RE.SectionID].getAddress() + RE.Offset;
return static_cast<int64_t>(readBytesUnaligned(Src, NumBytes));
}
+relocation_iterator RuntimeDyldMachO::processScatteredVANILLA(
+ unsigned SectionID, relocation_iterator RelI,
+ const ObjectFile &BaseObjT,
+ RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile&>(BaseObjT);
+ MachO::any_relocation_info RE =
+ Obj.getRelocation(RelI->getRawDataRefImpl());
+
+ SectionEntry &Section = Sections[SectionID];
+ uint32_t RelocType = Obj.getAnyRelocationType(RE);
+ bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
+ unsigned Size = Obj.getAnyRelocationLength(RE);
+ uint64_t Offset = RelI->getOffset();
+ uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
+ unsigned NumBytes = 1 << Size;
+ int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
+
+ unsigned SymbolBaseAddr = Obj.getScatteredRelocationValue(RE);
+ section_iterator TargetSI = getSectionByAddress(Obj, SymbolBaseAddr);
+ assert(TargetSI != Obj.section_end() && "Can't find section for symbol");
+ uint64_t SectionBaseAddr = TargetSI->getAddress();
+ SectionRef TargetSection = *TargetSI;
+ bool IsCode = TargetSection.isText();
+ uint32_t TargetSectionID =
+ findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID);
+
+ Addend -= SectionBaseAddr;
+ RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size);
+
+ addRelocationForSection(R, TargetSectionID);
+
+ return ++RelI;
+}
+
+
RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
const ObjectFile &BaseTObj, const relocation_iterator &RI,
const RelocationEntry &RE, ObjSectionToIDMap &ObjSectionToID) {
@@ -99,8 +135,8 @@ void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value,
void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE,
uint64_t Value) const {
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddress() + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddress() + RE.Offset;
dbgs() << "resolveRelocation Section: " << RE.SectionID
<< " LocalAddress: " << format("%p", LocalAddress)
@@ -147,10 +183,9 @@ void RuntimeDyldMachO::populateIndirectSymbolPointersSection(
"Pointers section does not contain a whole number of stubs?");
DEBUG(dbgs() << "Populating pointer table section "
- << Sections[PTSectionID].Name
- << ", Section ID " << PTSectionID << ", "
- << NumPTEntries << " entries, " << PTEntrySize
- << " bytes each:\n");
+ << Sections[PTSectionID].getName() << ", Section ID "
+ << PTSectionID << ", " << NumPTEntries << " entries, "
+ << PTEntrySize << " bytes each:\n");
for (unsigned i = 0; i < NumPTEntries; ++i) {
unsigned SymbolIndex =
@@ -204,7 +239,7 @@ void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
}
template <typename Impl>
-unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
+unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(uint8_t *P,
int64_t DeltaForText,
int64_t DeltaForEH) {
typedef typename Impl::TargetPtrT TargetPtrT;
@@ -213,7 +248,7 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
<< ", Delta for EH: " << DeltaForEH << "\n");
uint32_t Length = readBytesUnaligned(P, 4);
P += 4;
- unsigned char *Ret = P + Length;
+ uint8_t *Ret = P + Length;
uint32_t Offset = readBytesUnaligned(P, 4);
if (Offset == 0) // is a CIE
return Ret;
@@ -240,9 +275,9 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
}
static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
- int64_t ObjDistance =
- static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
- int64_t MemDistance = A->LoadAddress - B->LoadAddress;
+ int64_t ObjDistance = static_cast<int64_t>(A->getObjAddress()) -
+ static_cast<int64_t>(B->getObjAddress());
+ int64_t MemDistance = A->getLoadAddress() - B->getLoadAddress();
return ObjDistance - MemDistance;
}
@@ -265,14 +300,14 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
if (ExceptTab)
DeltaForEH = computeDelta(ExceptTab, EHFrame);
- unsigned char *P = EHFrame->Address;
- unsigned char *End = P + EHFrame->Size;
+ uint8_t *P = EHFrame->getAddress();
+ uint8_t *End = P + EHFrame->getSize();
do {
P = processFDE(P, DeltaForText, DeltaForEH);
} while (P != End);
- MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
- EHFrame->Size);
+ MemMgr.registerEHFrames(EHFrame->getAddress(), EHFrame->getLoadAddress(),
+ EHFrame->getSize());
}
UnregisteredEHFrameSections.clear();
}
@@ -298,10 +333,7 @@ RuntimeDyldMachO::create(Triple::ArchType Arch,
std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyldMachO::loadObject(const object::ObjectFile &O) {
- unsigned SectionStartIdx, SectionEndIdx;
- std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
- return llvm::make_unique<LoadedMachOObjectInfo>(*this, SectionStartIdx,
- SectionEndIdx);
+ return llvm::make_unique<LoadedMachOObjectInfo>(*this, loadObjectImpl(O));
}
} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 0d7364f..c8ae47b 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -79,6 +79,12 @@ protected:
return RelocationEntry(SectionID, Offset, RelType, 0, IsPCRel, Size);
}
+ /// Process a scattered vanilla relocation.
+ relocation_iterator processScatteredVANILLA(
+ unsigned SectionID, relocation_iterator RelI,
+ const ObjectFile &BaseObjT,
+ RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID);
+
/// Construct a RelocationValueRef representing the relocation target.
/// For Symbols in known sections, this will return a RelocationValueRef
/// representing a (SectionID, Offset) pair.
@@ -140,7 +146,7 @@ private:
Impl &impl() { return static_cast<Impl &>(*this); }
const Impl &impl() const { return static_cast<const Impl &>(*this); }
- unsigned char *processFDE(unsigned char *P, int64_t DeltaForText,
+ unsigned char *processFDE(uint8_t *P, int64_t DeltaForText,
int64_t DeltaForEH);
public:
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
new file mode 100644
index 0000000..fbfbb32
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -0,0 +1,201 @@
+//===--- RuntimeDyldCOFFI386.h --- COFF/X86_64 specific code ---*- C++ --*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF x86 support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H
+
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/COFF.h"
+#include "../RuntimeDyldCOFF.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF {
+public:
+ RuntimeDyldCOFFI386(RuntimeDyld::MemoryManager &MM,
+ RuntimeDyld::SymbolResolver &Resolver)
+ : RuntimeDyldCOFF(MM, Resolver) {}
+
+ unsigned getMaxStubSize() override {
+ return 8; // 2-byte jmp instruction + 32-bit relative address + 2 byte pad
+ }
+
+ unsigned getStubAlignment() override { return 1; }
+
+ relocation_iterator processRelocationRef(unsigned SectionID,
+ relocation_iterator RelI,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ StubMap &Stubs) override {
+ auto Symbol = RelI->getSymbol();
+ if (Symbol == Obj.symbol_end())
+ report_fatal_error("Unknown symbol in relocation");
+
+ ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
+ if (auto EC = TargetNameOrErr.getError())
+ report_fatal_error(EC.message());
+ StringRef TargetName = *TargetNameOrErr;
+
+ auto Section = *Symbol->getSection();
+
+ uint64_t RelType = RelI->getType();
+ uint64_t Offset = RelI->getOffset();
+
+#if !defined(NDEBUG)
+ SmallString<32> RelTypeName;
+ RelI->getTypeName(RelTypeName);
+#endif
+ DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
+ << " RelType: " << RelTypeName << " TargetName: " << TargetName
+ << "\n");
+
+ unsigned TargetSectionID = -1;
+ if (Section == Obj.section_end()) {
+ RelocationEntry RE(SectionID, Offset, RelType, 0, -1, 0, 0, 0, false, 0);
+ addRelocationForSymbol(RE, TargetName);
+ } else {
+ TargetSectionID =
+ findOrEmitSection(Obj, *Section, Section->isText(), ObjSectionToID);
+
+ switch (RelType) {
+ case COFF::IMAGE_REL_I386_ABSOLUTE:
+ // This relocation is ignored.
+ break;
+ case COFF::IMAGE_REL_I386_DIR32:
+ case COFF::IMAGE_REL_I386_DIR32NB:
+ case COFF::IMAGE_REL_I386_REL32: {
+ RelocationEntry RE =
+ RelocationEntry(SectionID, Offset, RelType, 0, TargetSectionID,
+ getSymbolOffset(*Symbol), 0, 0, false, 0);
+ addRelocationForSection(RE, TargetSectionID);
+ break;
+ }
+ case COFF::IMAGE_REL_I386_SECTION: {
+ RelocationEntry RE =
+ RelocationEntry(TargetSectionID, Offset, RelType, 0);
+ addRelocationForSection(RE, TargetSectionID);
+ break;
+ }
+ case COFF::IMAGE_REL_I386_SECREL: {
+ RelocationEntry RE = RelocationEntry(SectionID, Offset, RelType,
+ getSymbolOffset(*Symbol));
+ addRelocationForSection(RE, TargetSectionID);
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+
+ }
+
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ const auto Section = Sections[RE.SectionID];
+ uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
+
+ switch (RE.RelType) {
+ case COFF::IMAGE_REL_I386_ABSOLUTE:
+ // This relocation is ignored.
+ break;
+ case COFF::IMAGE_REL_I386_DIR32: {
+ // The target's 32-bit VA.
+ uint64_t Result =
+ RE.Sections.SectionA == static_cast<uint32_t>(-1)
+ ? Value
+ : Sections[RE.Sections.SectionA].getLoadAddressWithOffset(
+ RE.Addend);
+ assert(static_cast<int32_t>(Result) <= INT32_MAX &&
+ "relocation overflow");
+ assert(static_cast<int32_t>(Result) >= INT32_MIN &&
+ "relocation underflow");
+ DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+ << " RelType: IMAGE_REL_I386_DIR32"
+ << " TargetSection: " << RE.Sections.SectionA
+ << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+ writeBytesUnaligned(Result, Target, 4);
+ break;
+ }
+ case COFF::IMAGE_REL_I386_DIR32NB: {
+ // The target's 32-bit RVA.
+ // NOTE: use Section[0].getLoadAddress() as an approximation of ImageBase
+ uint64_t Result =
+ Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend) -
+ Sections[0].getLoadAddress();
+ assert(static_cast<int32_t>(Result) <= INT32_MAX &&
+ "relocation overflow");
+ assert(static_cast<int32_t>(Result) >= INT32_MIN &&
+ "relocation underflow");
+ DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+ << " RelType: IMAGE_REL_I386_DIR32NB"
+ << " TargetSection: " << RE.Sections.SectionA
+ << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+ writeBytesUnaligned(Result, Target, 4);
+ break;
+ }
+ case COFF::IMAGE_REL_I386_REL32: {
+ // 32-bit relative displacement to the target.
+ uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddress() -
+ Section.getLoadAddress() + RE.Addend - 4 - RE.Offset;
+ assert(static_cast<int32_t>(Result) <= INT32_MAX &&
+ "relocation overflow");
+ assert(static_cast<int32_t>(Result) >= INT32_MIN &&
+ "relocation underflow");
+ DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+ << " RelType: IMAGE_REL_I386_REL32"
+ << " TargetSection: " << RE.Sections.SectionA
+ << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+ writeBytesUnaligned(Result, Target, 4);
+ break;
+ }
+ case COFF::IMAGE_REL_I386_SECTION:
+ // 16-bit section index of the section that contains the target.
+ assert(static_cast<int32_t>(RE.SectionID) <= INT16_MAX &&
+ "relocation overflow");
+ assert(static_cast<int32_t>(RE.SectionID) >= INT16_MIN &&
+ "relocation underflow");
+ DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+ << " RelType: IMAGE_REL_I386_SECTION Value: " << RE.SectionID
+ << '\n');
+ writeBytesUnaligned(RE.SectionID, Target, 2);
+ break;
+ case COFF::IMAGE_REL_I386_SECREL:
+ // 32-bit offset of the target from the beginning of its section.
+ assert(static_cast<int32_t>(RE.Addend) <= INT32_MAX &&
+ "relocation overflow");
+ assert(static_cast<int32_t>(RE.Addend) >= INT32_MIN &&
+ "relocation underflow");
+ DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+ << " RelType: IMAGE_REL_I386_SECREL Value: " << RE.Addend
+ << '\n');
+ writeBytesUnaligned(RE.Addend, Target, 2);
+ break;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+ }
+
+ void registerEHFrames() override {}
+ void deregisterEHFrames() override {}
+
+ void finalizeLoad(const ObjectFile &Obj,
+ ObjSectionToIDMap &SectionMap) override {}
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 408227e..25f538d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -62,7 +62,7 @@ public:
// symbol in the target address space.
void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *Target = Section.Address + RE.Offset;
+ uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
switch (RE.RelType) {
@@ -72,8 +72,7 @@ public:
case COFF::IMAGE_REL_AMD64_REL32_3:
case COFF::IMAGE_REL_AMD64_REL32_4:
case COFF::IMAGE_REL_AMD64_REL32_5: {
- uint32_t *TargetAddress = (uint32_t *)Target;
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
// Delta is the distance from the start of the reloc to the end of the
// instruction with the reloc.
uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32);
@@ -81,7 +80,7 @@ public:
uint64_t Result = Value + RE.Addend;
assert(((int64_t)Result <= INT32_MAX) && "Relocation overflow");
assert(((int64_t)Result >= INT32_MIN) && "Relocation underflow");
- *TargetAddress = Result;
+ writeBytesUnaligned(Result, Target, 4);
break;
}
@@ -92,14 +91,12 @@ public:
// within a 32 bit offset from the base.
//
// For now we just set these to zero.
- uint32_t *TargetAddress = (uint32_t *)Target;
- *TargetAddress = 0;
+ writeBytesUnaligned(0, Target, 4);
break;
}
case COFF::IMAGE_REL_AMD64_ADDR64: {
- uint64_t *TargetAddress = (uint64_t *)Target;
- *TargetAddress = Value + RE.Addend;
+ writeBytesUnaligned(Value + RE.Addend, Target, 8);
break;
}
@@ -119,8 +116,7 @@ public:
symbol_iterator Symbol = RelI->getSymbol();
if (Symbol == Obj.symbol_end())
report_fatal_error("Unknown symbol in relocation");
- section_iterator SecI(Obj.section_end());
- Symbol->getSection(SecI);
+ section_iterator SecI = *Symbol->getSection();
// If there is no section, this must be an external reference.
const bool IsExtern = SecI == Obj.section_end();
@@ -129,7 +125,7 @@ public:
uint64_t Offset = RelI->getOffset();
uint64_t Addend = 0;
SectionEntry &Section = Sections[SectionID];
- uintptr_t ObjTarget = Section.ObjAddress + Offset;
+ uintptr_t ObjTarget = Section.getObjAddress() + Offset;
switch (RelType) {
@@ -140,14 +136,14 @@ public:
case COFF::IMAGE_REL_AMD64_REL32_4:
case COFF::IMAGE_REL_AMD64_REL32_5:
case COFF::IMAGE_REL_AMD64_ADDR32NB: {
- uint32_t *Displacement = (uint32_t *)ObjTarget;
- Addend = *Displacement;
+ uint8_t *Displacement = (uint8_t *)ObjTarget;
+ Addend = readBytesUnaligned(Displacement, 4);
break;
}
case COFF::IMAGE_REL_AMD64_ADDR64: {
- uint64_t *Displacement = (uint64_t *)ObjTarget;
- Addend = *Displacement;
+ uint8_t *Displacement = (uint8_t *)ObjTarget;
+ Addend = readBytesUnaligned(Displacement, 8);
break;
}
@@ -182,9 +178,9 @@ public:
unsigned getStubAlignment() override { return 1; }
void registerEHFrames() override {
for (auto const &EHFrameSID : UnregisteredEHFrameSections) {
- uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
- uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
- size_t EHFrameSize = Sections[EHFrameSID].Size;
+ uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress();
+ uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress();
+ size_t EHFrameSize = Sections[EHFrameSID].getSize();
MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
RegisteredEHFrameSections.push_back(EHFrameSID);
}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 7bf7641..dbca377 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -34,7 +34,7 @@ public:
/// Extract the addend encoded in the instruction / memory location.
int64_t decodeAddend(const RelocationEntry &RE) const {
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
unsigned NumBytes = 1 << RE.Size;
int64_t Addend = 0;
// Verify that the relocation has the correct size and alignment.
@@ -272,15 +272,14 @@ public:
RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI));
RE.Addend = decodeAddend(RE);
- RelocationValueRef Value(
- getRelocationValueRef(Obj, RelI, RE, ObjSectionToID));
assert((ExplicitAddend == 0 || RE.Addend == 0) && "Relocation has "\
"ARM64_RELOC_ADDEND and embedded addend in the instruction.");
- if (ExplicitAddend) {
+ if (ExplicitAddend)
RE.Addend = ExplicitAddend;
- Value.Offset = ExplicitAddend;
- }
+
+ RelocationValueRef Value(
+ getRelocationValueRef(Obj, RelI, RE, ObjSectionToID));
bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
if (!IsExtern && RE.IsPCRel)
@@ -305,7 +304,7 @@ public:
DEBUG(dumpRelocationToResolve(RE, Value));
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
MachO::RelocationInfoType RelType =
static_cast<MachO::RelocationInfoType>(RE.RelType);
@@ -325,7 +324,7 @@ public:
case MachO::ARM64_RELOC_BRANCH26: {
assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_BRANCH26 not supported");
// Check if branch is in range.
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
int64_t PCRelVal = Value - FinalAddress + RE.Addend;
encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal);
break;
@@ -334,7 +333,7 @@ public:
case MachO::ARM64_RELOC_PAGE21: {
assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_PAGE21 not supported");
// Adjust for PC-relative relocation and offset.
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
int64_t PCRelVal =
((Value + RE.Addend) & (-4096)) - (FinalAddress & (-4096));
encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal);
@@ -376,10 +375,10 @@ private:
else {
// FIXME: There must be a better way to do this then to check and fix the
// alignment every time!!!
- uintptr_t BaseAddress = uintptr_t(Section.Address);
+ uintptr_t BaseAddress = uintptr_t(Section.getAddress());
uintptr_t StubAlignment = getStubAlignment();
uintptr_t StubAddress =
- (BaseAddress + Section.StubOffset + StubAlignment - 1) &
+ (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
-StubAlignment;
unsigned StubOffset = StubAddress - BaseAddress;
Stubs[Value] = StubOffset;
@@ -392,7 +391,7 @@ private:
addRelocationForSymbol(GOTRE, Value.SymbolName);
else
addRelocationForSection(GOTRE, Value.SectionID);
- Section.StubOffset = StubOffset + getMaxStubSize();
+ Section.advanceStubOffset(getMaxStubSize());
Offset = static_cast<int64_t>(StubOffset);
}
RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, Offset,
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 0a24bb2..7731df0 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -35,7 +35,7 @@ public:
int64_t decodeAddend(const RelocationEntry &RE) const {
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
switch (RE.RelType) {
default:
@@ -64,8 +64,10 @@ public:
if (RelType == MachO::ARM_RELOC_HALF_SECTDIFF)
return processHALFSECTDIFFRelocation(SectionID, RelI, Obj,
ObjSectionToID);
+ else if (RelType == MachO::GENERIC_RELOC_VANILLA)
+ return processScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID);
else
- return ++++RelI;
+ return ++RelI;
}
RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI));
@@ -92,12 +94,12 @@ public:
void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
DEBUG(dumpRelocationToResolve(RE, Value));
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (RE.IsPCRel) {
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
Value -= FinalAddress;
// ARM PCRel relocations have an effective-PC offset of two instructions
// (four bytes in Thumb mode, 8 bytes in ARM mode).
@@ -130,8 +132,8 @@ public:
break;
}
case MachO::ARM_RELOC_HALF_SECTDIFF: {
- uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
- uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+ uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress();
+ uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress();
assert((Value == SectionABase || Value == SectionBBase) &&
"Unexpected HALFSECTDIFF relocation value.");
Value = SectionABase - SectionBBase + RE.Addend;
@@ -178,21 +180,21 @@ private:
RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
uint8_t *Addr;
if (i != Stubs.end()) {
- Addr = Section.Address + i->second;
+ Addr = Section.getAddressWithOffset(i->second);
} else {
// Create a new stub function.
- Stubs[Value] = Section.StubOffset;
- uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset);
- RelocationEntry StubRE(RE.SectionID, StubTargetAddr - Section.Address,
- MachO::GENERIC_RELOC_VANILLA, Value.Offset, false,
- 2);
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *StubTargetAddr = createStubFunction(
+ Section.getAddressWithOffset(Section.getStubOffset()));
+ RelocationEntry StubRE(
+ RE.SectionID, StubTargetAddr - Section.getAddress(),
+ MachO::GENERIC_RELOC_VANILLA, Value.Offset, false, 2);
if (Value.SymbolName)
addRelocationForSymbol(StubRE, Value.SymbolName);
else
addRelocationForSection(StubRE, Value.SectionID);
- Addr = Section.Address + Section.StubOffset;
- Section.StubOffset += getMaxStubSize();
+ Addr = Section.getAddressWithOffset(Section.getStubOffset());
+ Section.advanceStubOffset(getMaxStubSize());
}
RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, 0,
RE.IsPCRel, RE.Size);
@@ -221,7 +223,7 @@ private:
uint32_t RelocType = MachO.getAnyRelocationType(RE);
bool IsPCRel = MachO.getAnyRelocationPCRel(RE);
uint64_t Offset = RelI->getOffset();
- uint8_t *LocalAddress = Section.Address + Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
int64_t Immediate = readBytesUnaligned(LocalAddress, 4); // Copy the whole instruction out.
Immediate = ((Immediate >> 4) & 0xf000) | (Immediate & 0xfff);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 569a078..85059d7 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -47,8 +47,7 @@ public:
return processSECTDIFFRelocation(SectionID, RelI, Obj,
ObjSectionToID);
else if (RelType == MachO::GENERIC_RELOC_VANILLA)
- return processI386ScatteredVANILLA(SectionID, RelI, Obj,
- ObjSectionToID);
+ return processScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID);
llvm_unreachable("Unhandled scattered relocation.");
}
@@ -84,10 +83,10 @@ public:
DEBUG(dumpRelocationToResolve(RE, Value));
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
if (RE.IsPCRel) {
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
}
@@ -99,8 +98,8 @@ public:
break;
case MachO::GENERIC_RELOC_SECTDIFF:
case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
- uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
- uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+ uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress();
+ uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress();
assert((Value == SectionABase || Value == SectionBBase) &&
"Unexpected SECTDIFF relocation value.");
Value = SectionABase - SectionBBase + RE.Addend;
@@ -139,7 +138,7 @@ private:
bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
unsigned Size = Obj.getAnyRelocationLength(RE);
uint64_t Offset = RelI->getOffset();
- uint8_t *LocalAddress = Section.Address + Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
unsigned NumBytes = 1 << Size;
uint64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
@@ -183,41 +182,6 @@ private:
return ++RelI;
}
- relocation_iterator processI386ScatteredVANILLA(
- unsigned SectionID, relocation_iterator RelI,
- const ObjectFile &BaseObjT,
- RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) {
- const MachOObjectFile &Obj =
- static_cast<const MachOObjectFile&>(BaseObjT);
- MachO::any_relocation_info RE =
- Obj.getRelocation(RelI->getRawDataRefImpl());
-
- SectionEntry &Section = Sections[SectionID];
- uint32_t RelocType = Obj.getAnyRelocationType(RE);
- bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
- unsigned Size = Obj.getAnyRelocationLength(RE);
- uint64_t Offset = RelI->getOffset();
- uint8_t *LocalAddress = Section.Address + Offset;
- unsigned NumBytes = 1 << Size;
- int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
-
- unsigned SymbolBaseAddr = Obj.getScatteredRelocationValue(RE);
- section_iterator TargetSI = getSectionByAddress(Obj, SymbolBaseAddr);
- assert(TargetSI != Obj.section_end() && "Can't find section for symbol");
- uint64_t SectionBaseAddr = TargetSI->getAddress();
- SectionRef TargetSection = *TargetSI;
- bool IsCode = TargetSection.isText();
- uint32_t TargetSectionID =
- findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID);
-
- Addend -= SectionBaseAddr;
- RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size);
-
- addRelocationForSection(R, TargetSectionID);
-
- return ++RelI;
- }
-
// Populate stubs in __jump_table section.
void populateJumpTable(const MachOObjectFile &Obj, const SectionRef &JTSection,
unsigned JTSectionID) {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index dd56e72..2242295 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -39,6 +39,10 @@ public:
static_cast<const MachOObjectFile &>(BaseObjT);
MachO::any_relocation_info RelInfo =
Obj.getRelocation(RelI->getRawDataRefImpl());
+ uint32_t RelType = Obj.getAnyRelocationType(RelInfo);
+
+ if (RelType == MachO::X86_64_RELOC_SUBTRACTOR)
+ return processSubtractRelocation(SectionID, RelI, Obj, ObjSectionToID);
assert(!Obj.isRelocationScattered(RelInfo) &&
"Scattered relocations not supported on X86_64");
@@ -69,14 +73,14 @@ public:
void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
DEBUG(dumpRelocationToResolve(RE, Value));
const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (RE.IsPCRel) {
// FIXME: It seems this value needs to be adjusted by 4 for an effective
// PC address. Is that expected? Only for branches, perhaps?
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
Value -= FinalAddress + 4;
}
@@ -91,9 +95,17 @@ public:
case MachO::X86_64_RELOC_BRANCH:
writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size);
break;
+ case MachO::X86_64_RELOC_SUBTRACTOR: {
+ uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress();
+ uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress();
+ assert((Value == SectionABase || Value == SectionBBase) &&
+ "Unexpected SUBTRACTOR relocation value.");
+ Value = SectionABase - SectionBBase + RE.Addend;
+ writeBytesUnaligned(Value, LocalAddress, 1 << RE.Size);
+ break;
+ }
case MachO::X86_64_RELOC_GOT_LOAD:
case MachO::X86_64_RELOC_GOT:
- case MachO::X86_64_RELOC_SUBTRACTOR:
case MachO::X86_64_RELOC_TLV:
Error("Relocation type not implemented yet!");
}
@@ -112,24 +124,65 @@ private:
RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
uint8_t *Addr;
if (i != Stubs.end()) {
- Addr = Section.Address + i->second;
+ Addr = Section.getAddressWithOffset(i->second);
} else {
- Stubs[Value] = Section.StubOffset;
- uint8_t *GOTEntry = Section.Address + Section.StubOffset;
- RelocationEntry GOTRE(RE.SectionID, Section.StubOffset,
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *GOTEntry = Section.getAddressWithOffset(Section.getStubOffset());
+ RelocationEntry GOTRE(RE.SectionID, Section.getStubOffset(),
MachO::X86_64_RELOC_UNSIGNED, Value.Offset, false,
3);
if (Value.SymbolName)
addRelocationForSymbol(GOTRE, Value.SymbolName);
else
addRelocationForSection(GOTRE, Value.SectionID);
- Section.StubOffset += 8;
+ Section.advanceStubOffset(8);
Addr = GOTEntry;
}
RelocationEntry TargetRE(RE.SectionID, RE.Offset,
MachO::X86_64_RELOC_UNSIGNED, RE.Addend, true, 2);
resolveRelocation(TargetRE, (uint64_t)Addr);
}
+
+ relocation_iterator
+ processSubtractRelocation(unsigned SectionID, relocation_iterator RelI,
+ const ObjectFile &BaseObjT,
+ ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile&>(BaseObjT);
+ MachO::any_relocation_info RE =
+ Obj.getRelocation(RelI->getRawDataRefImpl());
+
+ unsigned Size = Obj.getAnyRelocationLength(RE);
+ uint64_t Offset = RelI->getOffset();
+ uint8_t *LocalAddress = Sections[SectionID].getAddressWithOffset(Offset);
+ unsigned NumBytes = 1 << Size;
+
+ ErrorOr<StringRef> SubtrahendNameOrErr = RelI->getSymbol()->getName();
+ if (auto EC = SubtrahendNameOrErr.getError())
+ report_fatal_error(EC.message());
+ auto SubtrahendI = GlobalSymbolTable.find(*SubtrahendNameOrErr);
+ unsigned SectionBID = SubtrahendI->second.getSectionID();
+ uint64_t SectionBOffset = SubtrahendI->second.getOffset();
+ int64_t Addend =
+ SignExtend64(readBytesUnaligned(LocalAddress, NumBytes), NumBytes * 8);
+
+ ++RelI;
+ ErrorOr<StringRef> MinuendNameOrErr = RelI->getSymbol()->getName();
+ if (auto EC = MinuendNameOrErr.getError())
+ report_fatal_error(EC.message());
+ auto MinuendI = GlobalSymbolTable.find(*MinuendNameOrErr);
+ unsigned SectionAID = MinuendI->second.getSectionID();
+ uint64_t SectionAOffset = MinuendI->second.getOffset();
+
+ RelocationEntry R(SectionID, Offset, MachO::X86_64_RELOC_SUBTRACTOR, (uint64_t)Addend,
+ SectionAID, SectionAOffset, SectionBID, SectionBOffset,
+ false, Size);
+
+ addRelocationForSection(R, SectionAID);
+
+ return ++RelI;
+ }
+
};
}
diff --git a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index 5986084..e2f2208 100644
--- a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -15,6 +15,7 @@
#include "llvm/Config/config.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Process.h"
namespace llvm {
@@ -48,16 +49,27 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
// Look in the list of free memory regions and use a block there if one
// is available.
- for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) {
- sys::MemoryBlock &MB = MemGroup.FreeMem[i];
- if (MB.size() >= RequiredSize) {
- Addr = (uintptr_t)MB.base();
- uintptr_t EndOfBlock = Addr + MB.size();
+ for (FreeMemBlock &FreeMB : MemGroup.FreeMem) {
+ if (FreeMB.Free.size() >= RequiredSize) {
+ Addr = (uintptr_t)FreeMB.Free.base();
+ uintptr_t EndOfBlock = Addr + FreeMB.Free.size();
// Align the address.
Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
- // Store cutted free memory block.
- MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
- EndOfBlock - Addr - Size);
+
+ if (FreeMB.PendingPrefixIndex == (unsigned)-1) {
+ // The part of the block we're giving out to the user is now pending
+ MemGroup.PendingMem.push_back(sys::MemoryBlock((void *)Addr, Size));
+
+ // Remember this pending block, such that future allocations can just
+ // modify it rather than creating a new one
+ FreeMB.PendingPrefixIndex = MemGroup.PendingMem.size() - 1;
+ } else {
+ sys::MemoryBlock &PendingMB = MemGroup.PendingMem[FreeMB.PendingPrefixIndex];
+ PendingMB = sys::MemoryBlock(PendingMB.base(), Addr + Size - (uintptr_t)PendingMB.base());
+ }
+
+ // Remember how much free space is now left in this block
+ FreeMB.Free = sys::MemoryBlock((void *)(Addr + Size), EndOfBlock - Addr - Size);
return (uint8_t*)Addr;
}
}
@@ -85,6 +97,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
// Save this address as the basis for our next request
MemGroup.Near = MB;
+ // Remember that we allocated this memory
MemGroup.AllocatedMem.push_back(MB);
Addr = (uintptr_t)MB.base();
uintptr_t EndOfBlock = Addr + MB.size();
@@ -92,11 +105,18 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
// Align the address.
Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+ // The part of the block we're giving out to the user is now pending
+ MemGroup.PendingMem.push_back(sys::MemoryBlock((void *)Addr, Size));
+
// The allocateMappedMemory may allocate much more memory than we need. In
// this case, we store the unused memory as a free memory block.
unsigned FreeSize = EndOfBlock-Addr-Size;
- if (FreeSize > 16)
- MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
+ if (FreeSize > 16) {
+ FreeMemBlock FreeMB;
+ FreeMB.Free = sys::MemoryBlock((void*)(Addr + Size), FreeSize);
+ FreeMB.PendingPrefixIndex = (unsigned)-1;
+ MemGroup.FreeMem.push_back(FreeMB);
+ }
// Return aligned address
return (uint8_t*)Addr;
@@ -107,9 +127,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
// FIXME: Should in-progress permissions be reverted if an error occurs?
std::error_code ec;
- // Don't allow free memory blocks to be used after setting protection flags.
- CodeMem.FreeMem.clear();
-
// Make code memory executable.
ec = applyMemoryGroupPermissions(CodeMem,
sys::Memory::MF_READ | sys::Memory::MF_EXEC);
@@ -143,36 +160,62 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
return false;
}
+static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) {
+ static const size_t PageSize = sys::Process::getPageSize();
+
+ size_t StartOverlap =
+ (PageSize - ((uintptr_t)M.base() % PageSize)) % PageSize;
+
+ size_t TrimmedSize = M.size();
+ TrimmedSize -= StartOverlap;
+ TrimmedSize -= TrimmedSize % PageSize;
+
+ sys::MemoryBlock Trimmed((void *)((uintptr_t)M.base() + StartOverlap), TrimmedSize);
+
+ assert(((uintptr_t)Trimmed.base() % PageSize) == 0);
+ assert((Trimmed.size() % PageSize) == 0);
+ assert(M.base() <= Trimmed.base() && Trimmed.size() <= M.size());
+
+ return Trimmed;
+}
+
+
std::error_code
SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
unsigned Permissions) {
-
- for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) {
- std::error_code ec;
- ec =
- sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], Permissions);
- if (ec) {
- return ec;
- }
+ for (sys::MemoryBlock &MB : MemGroup.PendingMem)
+ if (std::error_code EC = sys::Memory::protectMappedMemory(MB, Permissions))
+ return EC;
+
+ MemGroup.PendingMem.clear();
+
+ // Now go through free blocks and trim any of them that don't span the entire
+ // page because one of the pending blocks may have overlapped it.
+ for (FreeMemBlock &FreeMB : MemGroup.FreeMem) {
+ FreeMB.Free = trimBlockToPageSize(FreeMB.Free);
+ // We cleared the PendingMem list, so all these pointers are now invalid
+ FreeMB.PendingPrefixIndex = (unsigned)-1;
}
+ // Remove all blocks which are now empty
+ MemGroup.FreeMem.erase(
+ std::remove_if(MemGroup.FreeMem.begin(), MemGroup.FreeMem.end(),
+ [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }),
+ MemGroup.FreeMem.end());
+
return std::error_code();
}
void SectionMemoryManager::invalidateInstructionCache() {
- for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
- sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(),
- CodeMem.AllocatedMem[i].size());
+ for (sys::MemoryBlock &Block : CodeMem.PendingMem)
+ sys::Memory::InvalidateInstructionCache(Block.base(), Block.size());
}
SectionMemoryManager::~SectionMemoryManager() {
- for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
- sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]);
- for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i)
- sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]);
- for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i)
- sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]);
+ for (MemoryGroup *Group : {&CodeMem, &RWDataMem, &RODataMem}) {
+ for (sys::MemoryBlock &Block : Group->AllocatedMem)
+ sys::Memory::releaseMappedMemory(Block);
+ }
}
} // namespace llvm
-
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index b553f11..185db47 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -102,17 +103,9 @@ static OrderMap orderModule(const Module *M) {
orderValue(&A, OM);
}
for (const Function &F : *M) {
- if (F.hasPrefixData())
- if (!isa<GlobalValue>(F.getPrefixData()))
- orderValue(F.getPrefixData(), OM);
-
- if (F.hasPrologueData())
- if (!isa<GlobalValue>(F.getPrologueData()))
- orderValue(F.getPrologueData(), OM);
-
- if (F.hasPersonalityFn())
- if (!isa<GlobalValue>(F.getPersonalityFn()))
- orderValue(F.getPersonalityFn(), OM);
+ for (const Use &U : F.operands())
+ if (!isa<GlobalValue>(U.get()))
+ orderValue(U.get(), OM);
orderValue(&F, OM);
@@ -232,8 +225,7 @@ static UseListOrderStack predictUseListOrder(const Module *M) {
// We want to visit the functions backward now so we can list function-local
// constants in the last Function they're used in. Module-level constants
// have already been visited above.
- for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) {
- const Function &F = *I;
+ for (const Function &F : make_range(M->rbegin(), M->rend())) {
if (F.isDeclaration())
continue;
for (const BasicBlock &BB : F)
@@ -263,8 +255,8 @@ static UseListOrderStack predictUseListOrder(const Module *M) {
for (const GlobalAlias &A : M->aliases())
predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
for (const Function &F : *M)
- if (F.hasPrefixData())
- predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
+ for (const Use &U : F.operands())
+ predictValueUseListOrder(U.get(), nullptr, OM, Stack);
return Stack;
}
@@ -304,6 +296,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AnyReg: Out << "anyregcc"; break;
case CallingConv::PreserveMost: Out << "preserve_mostcc"; break;
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
+ case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
@@ -320,6 +313,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::X86_64_Win64: Out << "x86_64_win64cc"; break;
case CallingConv::SPIR_FUNC: Out << "spir_func"; break;
case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break;
+ case CallingConv::X86_INTR: Out << "x86_intrcc"; break;
+ case CallingConv::HHVM: Out << "hhvmcc"; break;
+ case CallingConv::HHVM_C: Out << "hhvm_ccc"; break;
}
}
@@ -343,18 +339,8 @@ enum PrefixType {
NoPrefix
};
-/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
-/// prefixed with % (if the string only contains simple characters) or is
-/// surrounded with ""'s (if it has special chars in it). Print it out.
-static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+void llvm::printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name) {
assert(!Name.empty() && "Cannot get empty name!");
- switch (Prefix) {
- case NoPrefix: break;
- case GlobalPrefix: OS << '@'; break;
- case ComdatPrefix: OS << '$'; break;
- case LabelPrefix: break;
- case LocalPrefix: OS << '%'; break;
- }
// Scan the name to see if it needs quotes first.
bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0]));
@@ -386,9 +372,31 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
OS << '"';
}
-/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
-/// prefixed with % (if the string only contains simple characters) or is
-/// surrounded with ""'s (if it has special chars in it). Print it out.
+/// Turn the specified name into an 'LLVM name', which is either prefixed with %
+/// (if the string only contains simple characters) or is surrounded with ""'s
+/// (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+ switch (Prefix) {
+ case NoPrefix:
+ break;
+ case GlobalPrefix:
+ OS << '@';
+ break;
+ case ComdatPrefix:
+ OS << '$';
+ break;
+ case LabelPrefix:
+ break;
+ case LocalPrefix:
+ OS << '%';
+ break;
+ }
+ printLLVMNameWithoutPrefix(OS, Name);
+}
+
+/// Turn the specified name into an 'LLVM name', which is either prefixed with %
+/// (if the string only contains simple characters) or is surrounded with ""'s
+/// (if it has special chars in it). Print it out.
static void PrintLLVMName(raw_ostream &OS, const Value *V) {
PrintLLVMName(OS, V->getName(),
isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
@@ -456,6 +464,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
case Type::LabelTyID: OS << "label"; return;
case Type::MetadataTyID: OS << "metadata"; return;
case Type::X86_MMXTyID: OS << "x86_mmx"; return;
+ case Type::TokenTyID: OS << "token"; return;
case Type::IntegerTyID:
OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
return;
@@ -691,8 +700,9 @@ void ModuleSlotTracker::incorporateFunction(const Function &F) {
this->F = &F;
}
-static SlotTracker *createSlotTracker(const Module *M) {
- return new SlotTracker(M);
+int ModuleSlotTracker::getLocalSlot(const Value *V) {
+ assert(F && "No function incorporated");
+ return Machine->getLocalSlot(V);
}
static SlotTracker *createSlotTracker(const Value *V) {
@@ -802,7 +812,7 @@ void SlotTracker::processFunction() {
for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
AE = TheFunction->arg_end(); AI != AE; ++AI)
if (!AI->hasName())
- CreateFunctionSlot(AI);
+ CreateFunctionSlot(&*AI);
ST_DEBUG("Inserting Instructions:\n");
@@ -1093,11 +1103,10 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
// the value back and get the same value.
//
bool ignored;
- bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
bool isInf = CFP->getValueAPF().isInfinity();
bool isNaN = CFP->getValueAPF().isNaN();
- if (!isHalf && !isInf && !isNaN) {
+ if (!isInf && !isNaN) {
double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
CFP->getValueAPF().convertToFloat();
SmallString<128> StrVal;
@@ -1123,15 +1132,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
// x86, so we must not use these types.
static_assert(sizeof(double) == sizeof(uint64_t),
"assuming that double is 64 bits!");
- char Buffer[40];
APFloat apf = CFP->getValueAPF();
- // Halves and floats are represented in ASCII IR as double, convert.
+ // Floats are represented in ASCII IR as double, convert.
if (!isDouble)
apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
&ignored);
- Out << "0x" <<
- utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
- Buffer+40);
+ Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true);
return;
}
@@ -1139,60 +1145,32 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
// These appear as a magic letter identifying the type, then a
// fixed number of hex digits.
Out << "0x";
- // Bit position, in the current word, of the next nibble to print.
- int shiftcount;
-
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
Out << 'K';
- // api needed to prevent premature destruction
- APInt api = CFP->getValueAPF().bitcastToAPInt();
- const uint64_t* p = api.getRawData();
- uint64_t word = p[1];
- shiftcount = 12;
- int width = api.getBitWidth();
- for (int j=0; j<width; j+=4, shiftcount-=4) {
- unsigned int nibble = (word>>shiftcount) & 15;
- if (nibble < 10)
- Out << (unsigned char)(nibble + '0');
- else
- Out << (unsigned char)(nibble - 10 + 'A');
- if (shiftcount == 0 && j+4 < width) {
- word = *p;
- shiftcount = 64;
- if (width-j-4 < 64)
- shiftcount = width-j-4;
- }
- }
+ Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
+ /*Upper=*/true);
+ Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
return;
} else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) {
- shiftcount = 60;
Out << 'L';
+ Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
+ Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
} else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) {
- shiftcount = 60;
Out << 'M';
+ Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
+ Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
} else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) {
- shiftcount = 12;
Out << 'H';
+ Out << format_hex_no_prefix(API.getZExtValue(), 4,
+ /*Upper=*/true);
} else
llvm_unreachable("Unsupported floating point type");
- // api needed to prevent premature destruction
- APInt api = CFP->getValueAPF().bitcastToAPInt();
- const uint64_t* p = api.getRawData();
- uint64_t word = *p;
- int width = api.getBitWidth();
- for (int j=0; j<width; j+=4, shiftcount-=4) {
- unsigned int nibble = (word>>shiftcount) & 15;
- if (nibble < 10)
- Out << (unsigned char)(nibble + '0');
- else
- Out << (unsigned char)(nibble - 10 + 'A');
- if (shiftcount == 0 && j+4 < width) {
- word = *(++p);
- shiftcount = 64;
- if (width-j-4 < 64)
- shiftcount = width-j-4;
- }
- }
return;
}
@@ -1313,6 +1291,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
+ if (isa<ConstantTokenNone>(CV)) {
+ Out << "none";
+ return;
+ }
+
if (isa<UndefValue>(CV)) {
Out << "undef";
return;
@@ -1326,10 +1309,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << " (";
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
- TypePrinter.print(
- cast<PointerType>(GEP->getPointerOperandType()->getScalarType())
- ->getElementType(),
- Out);
+ TypePrinter.print(GEP->getSourceElementType(), Out);
Out << ", ";
}
@@ -1409,6 +1389,7 @@ struct MDFieldPrinter {
: Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
}
void printTag(const DINode *N);
+ void printMacinfoType(const DIMacroNode *N);
void printString(StringRef Name, StringRef Value,
bool ShouldSkipEmpty = true);
void printMetadata(StringRef Name, const Metadata *MD,
@@ -1431,6 +1412,14 @@ void MDFieldPrinter::printTag(const DINode *N) {
Out << N->getTag();
}
+void MDFieldPrinter::printMacinfoType(const DIMacroNode *N) {
+ Out << FS << "type: ";
+ if (const char *Type = dwarf::MacinfoString(N->getMacinfoType()))
+ Out << Type;
+ else
+ Out << N->getMacinfoType();
+}
+
void MDFieldPrinter::printString(StringRef Name, StringRef Value,
bool ShouldSkipEmpty) {
if (ShouldSkipEmpty && Value.empty())
@@ -1656,6 +1645,7 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Printer.printMetadata("subprograms", N->getRawSubprograms());
Printer.printMetadata("globals", N->getRawGlobalVariables());
Printer.printMetadata("imports", N->getRawImportedEntities());
+ Printer.printMetadata("macros", N->getRawMacros());
Printer.printInt("dwoId", N->getDWOId());
Out << ")";
}
@@ -1680,7 +1670,6 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
Printer.printInt("virtualIndex", N->getVirtualIndex());
Printer.printDIFlags("flags", N->getFlags());
Printer.printBool("isOptimized", N->isOptimized());
- Printer.printMetadata("function", N->getRawFunction());
Printer.printMetadata("templateParams", N->getRawTemplateParams());
Printer.printMetadata("declaration", N->getRawDeclaration());
Printer.printMetadata("variables", N->getRawVariables());
@@ -1725,6 +1714,29 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
Out << ")";
}
+static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
+ TypePrinting *TypePrinter, SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!DIMacro(";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printMacinfoType(N);
+ Printer.printInt("line", N->getLine());
+ Printer.printString("name", N->getName());
+ Printer.printString("value", N->getValue());
+ Out << ")";
+}
+
+static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N,
+ TypePrinting *TypePrinter, SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!DIMacroFile(";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printInt("line", N->getLine());
+ Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("nodes", N->getRawElements());
+ Out << ")";
+}
+
static void writeDIModule(raw_ostream &Out, const DIModule *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
@@ -1789,11 +1801,8 @@ static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
SlotTracker *Machine, const Module *Context) {
Out << "!DILocalVariable(";
MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
- Printer.printTag(N);
Printer.printString("name", N->getName());
- Printer.printInt("arg", N->getArg(),
- /* ShouldSkipZero */
- N->getTag() == dwarf::DW_TAG_auto_variable);
+ Printer.printInt("arg", N->getArg());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
@@ -1998,6 +2007,7 @@ class AssemblyWriter {
TypePrinting TypePrinter;
AssemblyAnnotationWriter *AnnotationWriter;
SetVector<const Comdat *> Comdats;
+ bool IsForDebug;
bool ShouldPreserveUseListOrder;
UseListOrderStack UseListOrders;
SmallVector<StringRef, 8> MDNames;
@@ -2005,12 +2015,7 @@ class AssemblyWriter {
public:
/// Construct an AssemblyWriter with an external SlotTracker
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
- AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder = false);
-
- /// Construct an AssemblyWriter with an internally allocated SlotTracker
- AssemblyWriter(formatted_raw_ostream &o, const Module *M,
- AssemblyAnnotationWriter *AAW,
+ AssemblyAnnotationWriter *AAW, bool IsForDebug,
bool ShouldPreserveUseListOrder = false);
void printMDNodeBody(const MDNode *MD);
@@ -2020,6 +2025,7 @@ public:
void writeOperand(const Value *Op, bool PrintType);
void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+ void writeOperandBundles(ImmutableCallSite CS);
void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
@@ -2043,8 +2049,6 @@ public:
void printUseLists(const Function *F);
private:
- void init();
-
/// \brief Print out metadata attachments.
void printMetadataAttachments(
const SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs,
@@ -2060,7 +2064,12 @@ private:
};
} // namespace
-void AssemblyWriter::init() {
+AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+ const Module *M, AssemblyAnnotationWriter *AAW,
+ bool IsForDebug, bool ShouldPreserveUseListOrder)
+ : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
+ IsForDebug(IsForDebug),
+ ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
if (!TheModule)
return;
TypePrinter.incorporateTypes(*TheModule);
@@ -2072,23 +2081,6 @@ void AssemblyWriter::init() {
Comdats.insert(C);
}
-AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
- const Module *M, AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder)
- : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
- ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
- init();
-}
-
-AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
- AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder)
- : Out(o), TheModule(M), SlotTrackerStorage(createSlotTracker(M)),
- Machine(*SlotTrackerStorage), AnnotationWriter(AAW),
- ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
- init();
-}
-
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
if (!Operand) {
Out << "<null operand!>";
@@ -2170,6 +2162,43 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
+void AssemblyWriter::writeOperandBundles(ImmutableCallSite CS) {
+ if (!CS.hasOperandBundles())
+ return;
+
+ Out << " [ ";
+
+ bool FirstBundle = true;
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+ OperandBundleUse BU = CS.getOperandBundleAt(i);
+
+ if (!FirstBundle)
+ Out << ", ";
+ FirstBundle = false;
+
+ Out << '"';
+ PrintEscapedString(BU.getTagName(), Out);
+ Out << '"';
+
+ Out << '(';
+
+ bool FirstInput = true;
+ for (const auto &Input : BU.Inputs) {
+ if (!FirstInput)
+ Out << ", ";
+ FirstInput = false;
+
+ TypePrinter.print(Input->getType(), Out);
+ Out << " ";
+ WriteAsOperandInternal(Out, Input, &TypePrinter, &Machine, TheModule);
+ }
+
+ Out << ')';
+ }
+
+ Out << " ]";
+}
+
void AssemblyWriter::printModule(const Module *M) {
Machine.initialize();
@@ -2422,6 +2451,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
Out << "alias ";
+ TypePrinter.print(GA->getValueType(), Out);
+
+ Out << ", ";
+
const Constant *Aliasee = GA->getAliasee();
if (!Aliasee) {
@@ -2536,28 +2569,26 @@ void AssemblyWriter::printFunction(const Function *F) {
Machine.incorporateFunction(F);
// Loop over the arguments, printing them...
-
- unsigned Idx = 1;
- if (!F->isDeclaration()) {
- // If this isn't a declaration, print the argument names as well.
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
+ if (F->isDeclaration() && !IsForDebug) {
+ // We're only interested in the type here - don't print argument names.
+ for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
// Insert commas as we go... the first arg doesn't get a comma
- if (I != F->arg_begin()) Out << ", ";
- printArgument(I, Attrs, Idx);
- Idx++;
+ if (I)
+ Out << ", ";
+ // Output type...
+ TypePrinter.print(FT->getParamType(I), Out);
+
+ if (Attrs.hasAttributes(I + 1))
+ Out << ' ' << Attrs.getAsString(I + 1);
}
} else {
- // Otherwise, print the types from the function type.
- for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ // The arguments are meaningful here, print them in detail.
+ unsigned Idx = 1;
+ for (const Argument &Arg : F->args()) {
// Insert commas as we go... the first arg doesn't get a comma
- if (i) Out << ", ";
-
- // Output type...
- TypePrinter.print(FT->getParamType(i), Out);
-
- if (Attrs.hasAttributes(i+1))
- Out << ' ' << Attrs.getAsString(i+1);
+ if (Idx != 1)
+ Out << ", ";
+ printArgument(&Arg, Attrs, Idx++);
}
}
@@ -2604,7 +2635,7 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << " {";
// Output all of the function's basic blocks.
for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
- printBasicBlock(I);
+ printBasicBlock(&*I);
// Output the function's use-lists.
printUseLists(F);
@@ -2738,6 +2769,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << "musttail ";
else if (CI->isTailCall())
Out << "tail ";
+ else if (CI->isNoTailCall())
+ Out << "notail ";
}
// Print out the opcode...
@@ -2850,8 +2883,50 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(LPI->getClause(i), true);
}
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(&I)) {
+ Out << " within ";
+ writeOperand(CatchSwitch->getParentPad(), /*PrintType=*/false);
+ Out << " [";
+ unsigned Op = 0;
+ for (const BasicBlock *PadBB : CatchSwitch->handlers()) {
+ if (Op > 0)
+ Out << ", ";
+ writeOperand(PadBB, /*PrintType=*/true);
+ ++Op;
+ }
+ Out << "] unwind ";
+ if (const BasicBlock *UnwindDest = CatchSwitch->getUnwindDest())
+ writeOperand(UnwindDest, /*PrintType=*/true);
+ else
+ Out << "to caller";
+ } else if (const auto *FPI = dyn_cast<FuncletPadInst>(&I)) {
+ Out << " within ";
+ writeOperand(FPI->getParentPad(), /*PrintType=*/false);
+ Out << " [";
+ for (unsigned Op = 0, NumOps = FPI->getNumArgOperands(); Op < NumOps;
+ ++Op) {
+ if (Op > 0)
+ Out << ", ";
+ writeOperand(FPI->getArgOperand(Op), /*PrintType=*/true);
+ }
+ Out << ']';
} else if (isa<ReturnInst>(I) && !Operand) {
Out << " void";
+ } else if (const auto *CRI = dyn_cast<CatchReturnInst>(&I)) {
+ Out << " from ";
+ writeOperand(CRI->getOperand(0), /*PrintType=*/false);
+
+ Out << " to ";
+ writeOperand(CRI->getOperand(1), /*PrintType=*/true);
+ } else if (const auto *CRI = dyn_cast<CleanupReturnInst>(&I)) {
+ Out << " from ";
+ writeOperand(CRI->getOperand(0), /*PrintType=*/false);
+
+ Out << " unwind ";
+ if (CRI->hasUnwindDest())
+ writeOperand(CRI->getOperand(1), /*PrintType=*/true);
+ else
+ Out << "to caller";
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
// Print the calling convention being used.
if (CI->getCallingConv() != CallingConv::C) {
@@ -2892,6 +2967,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ')';
if (PAL.hasAttributes(AttributeSet::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+
+ writeOperandBundles(CI);
+
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
Operand = II->getCalledValue();
FunctionType *FTy = cast<FunctionType>(II->getFunctionType());
@@ -2926,6 +3004,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (PAL.hasAttributes(AttributeSet::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ writeOperandBundles(II);
+
Out << "\n to ";
writeOperand(II->getNormalDest(), true);
Out << " unwind ";
@@ -3138,29 +3218,23 @@ void AssemblyWriter::printUseLists(const Function *F) {
// External Interface declarations
//===----------------------------------------------------------------------===//
-void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
- SlotTracker SlotTable(this->getParent());
- formatted_raw_ostream OS(ROS);
- AssemblyWriter W(OS, SlotTable, this->getParent(), AAW);
- W.printFunction(this);
-}
-
void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder) const {
+ bool ShouldPreserveUseListOrder, bool IsForDebug) const {
SlotTracker SlotTable(this);
formatted_raw_ostream OS(ROS);
- AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder);
+ AssemblyWriter W(OS, SlotTable, this, AAW, IsForDebug,
+ ShouldPreserveUseListOrder);
W.printModule(this);
}
-void NamedMDNode::print(raw_ostream &ROS) const {
+void NamedMDNode::print(raw_ostream &ROS, bool IsForDebug) const {
SlotTracker SlotTable(getParent());
formatted_raw_ostream OS(ROS);
- AssemblyWriter W(OS, SlotTable, getParent(), nullptr);
+ AssemblyWriter W(OS, SlotTable, getParent(), nullptr, IsForDebug);
W.printNamedMDNode(this);
}
-void Comdat::print(raw_ostream &ROS) const {
+void Comdat::print(raw_ostream &ROS, bool /*IsForDebug*/) const {
PrintLLVMName(ROS, getName(), ComdatPrefix);
ROS << " = comdat ";
@@ -3185,7 +3259,7 @@ void Comdat::print(raw_ostream &ROS) const {
ROS << '\n';
}
-void Type::print(raw_ostream &OS) const {
+void Type::print(raw_ostream &OS, bool /*IsForDebug*/) const {
TypePrinting TP;
TP.print(const_cast<Type*>(this), OS);
@@ -3208,7 +3282,7 @@ static bool isReferencingMDNode(const Instruction &I) {
return false;
}
-void Value::print(raw_ostream &ROS) const {
+void Value::print(raw_ostream &ROS, bool IsForDebug) const {
bool ShouldInitializeAllMetadata = false;
if (auto *I = dyn_cast<Instruction>(this))
ShouldInitializeAllMetadata = isReferencingMDNode(*I);
@@ -3216,10 +3290,11 @@ void Value::print(raw_ostream &ROS) const {
ShouldInitializeAllMetadata = true;
ModuleSlotTracker MST(getModuleFromVal(this), ShouldInitializeAllMetadata);
- print(ROS, MST);
+ print(ROS, MST, IsForDebug);
}
-void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
+void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST,
+ bool IsForDebug) const {
formatted_raw_ostream OS(ROS);
SlotTracker EmptySlotTable(static_cast<const Module *>(nullptr));
SlotTracker &SlotTable =
@@ -3231,14 +3306,14 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
if (const Instruction *I = dyn_cast<Instruction>(this)) {
incorporateFunction(I->getParent() ? I->getParent()->getParent() : nullptr);
- AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr);
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr, IsForDebug);
W.printInstruction(*I);
} else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
incorporateFunction(BB->getParent());
- AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr);
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr, IsForDebug);
W.printBasicBlock(BB);
} else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
- AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr);
+ AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr, IsForDebug);
if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
W.printGlobal(V);
else if (const Function *F = dyn_cast<Function>(GV))
@@ -3261,7 +3336,7 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
/// Print without a type, skipping the TypePrinting object.
///
-/// \return \c true iff printing was succesful.
+/// \return \c true iff printing was successful.
static bool printWithoutType(const Value &V, raw_ostream &O,
SlotTracker *Machine, const Module *M) {
if (V.hasName() || isa<GlobalValue>(V) ||
@@ -3340,41 +3415,45 @@ void Metadata::printAsOperand(raw_ostream &OS, ModuleSlotTracker &MST,
printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ true);
}
-void Metadata::print(raw_ostream &OS, const Module *M) const {
+void Metadata::print(raw_ostream &OS, const Module *M,
+ bool /*IsForDebug*/) const {
ModuleSlotTracker MST(M, isa<MDNode>(this));
printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
}
void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST,
- const Module *M) const {
+ const Module *M, bool /*IsForDebug*/) const {
printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
}
// Value::dump - allow easy printing of Values from the debugger.
LLVM_DUMP_METHOD
-void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+void Value::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
// Type::dump - allow easy printing of Types from the debugger.
LLVM_DUMP_METHOD
-void Type::dump() const { print(dbgs()); dbgs() << '\n'; }
+void Type::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
// Module::dump() - Allow printing of Modules from the debugger.
LLVM_DUMP_METHOD
-void Module::dump() const { print(dbgs(), nullptr); }
+void Module::dump() const {
+ print(dbgs(), nullptr,
+ /*ShouldPreserveUseListOrder=*/false, /*IsForDebug=*/true);
+}
// \brief Allow printing of Comdats from the debugger.
LLVM_DUMP_METHOD
-void Comdat::dump() const { print(dbgs()); }
+void Comdat::dump() const { print(dbgs(), /*IsForDebug=*/true); }
// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
LLVM_DUMP_METHOD
-void NamedMDNode::dump() const { print(dbgs()); }
+void NamedMDNode::dump() const { print(dbgs(), /*IsForDebug=*/true); }
LLVM_DUMP_METHOD
void Metadata::dump() const { dump(nullptr); }
LLVM_DUMP_METHOD
void Metadata::dump(const Module *M) const {
- print(dbgs(), M);
+ print(dbgs(), M, /*IsForDebug=*/true);
dbgs() << '\n';
}
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
index 6f338ae..659f956 100644
--- a/contrib/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/Support/TrailingObjects.h"
#include <string>
namespace llvm {
@@ -141,13 +142,16 @@ public:
/// \class
/// \brief This class represents a group of attributes that apply to one
/// element: function, return type, or parameter.
-class AttributeSetNode : public FoldingSetNode {
+class AttributeSetNode final
+ : public FoldingSetNode,
+ private TrailingObjects<AttributeSetNode, Attribute> {
+ friend TrailingObjects;
+
unsigned NumAttrs; ///< Number of attributes in this node.
AttributeSetNode(ArrayRef<Attribute> Attrs) : NumAttrs(Attrs.size()) {
// There's memory after the node where we can store the entries in.
- std::copy(Attrs.begin(), Attrs.end(),
- reinterpret_cast<Attribute *>(this + 1));
+ std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
}
// AttributesSetNode is uniqued, these should not be publicly available.
@@ -170,7 +174,7 @@ public:
std::string getAsString(bool InAttrGrp) const;
typedef const Attribute *iterator;
- iterator begin() const { return reinterpret_cast<iterator>(this + 1); }
+ iterator begin() const { return getTrailingObjects<Attribute>(); }
iterator end() const { return begin() + NumAttrs; }
void Profile(FoldingSetNodeID &ID) const {
@@ -181,27 +185,29 @@ public:
AttrList[I].Profile(ID);
}
};
-static_assert(
- AlignOf<AttributeSetNode>::Alignment >= AlignOf<Attribute>::Alignment,
- "Alignment is insufficient for objects appended to AttributeSetNode");
+
+typedef std::pair<unsigned, AttributeSetNode *> IndexAttrPair;
//===----------------------------------------------------------------------===//
/// \class
/// \brief This class represents a set of attributes that apply to the function,
/// return type, and parameters.
-class AttributeSetImpl : public FoldingSetNode {
+class AttributeSetImpl final
+ : public FoldingSetNode,
+ private TrailingObjects<AttributeSetImpl, IndexAttrPair> {
friend class AttributeSet;
-
-public:
- typedef std::pair<unsigned, AttributeSetNode*> IndexAttrPair;
+ friend TrailingObjects;
private:
LLVMContext &Context;
unsigned NumAttrs; ///< Number of entries in this set.
+ // Helper fn for TrailingObjects class.
+ size_t numTrailingObjects(OverloadToken<IndexAttrPair>) { return NumAttrs; }
+
/// \brief Return a pointer to the IndexAttrPair for the specified slot.
const IndexAttrPair *getNode(unsigned Slot) const {
- return reinterpret_cast<const IndexAttrPair *>(this + 1) + Slot;
+ return getTrailingObjects<IndexAttrPair>() + Slot;
}
// AttributesSet is uniqued, these should not be publicly available.
@@ -222,8 +228,7 @@ public:
}
#endif
// There's memory after the node where we can store the entries in.
- std::copy(Attrs.begin(), Attrs.end(),
- reinterpret_cast<IndexAttrPair *>(this + 1));
+ std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<IndexAttrPair>());
}
/// \brief Get the context that created this AttributeSetImpl.
@@ -273,10 +278,6 @@ public:
void dump() const;
};
-static_assert(
- AlignOf<AttributeSetImpl>::Alignment >=
- AlignOf<AttributeSetImpl::IndexAttrPair>::Alignment,
- "Alignment is insufficient for objects appended to AttributeSetImpl");
} // end llvm namespace
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index 546a986..bcf7dc3 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
#include "AttributeImpl.h"
#include "LLVMContextImpl.h"
#include "llvm/ADT/STLExtras.h"
@@ -120,28 +121,28 @@ Attribute::AttrKind Attribute::getKindAsEnum() const {
if (!pImpl) return None;
assert((isEnumAttribute() || isIntAttribute()) &&
"Invalid attribute type to get the kind as an enum!");
- return pImpl ? pImpl->getKindAsEnum() : None;
+ return pImpl->getKindAsEnum();
}
uint64_t Attribute::getValueAsInt() const {
if (!pImpl) return 0;
assert(isIntAttribute() &&
"Expected the attribute to be an integer attribute!");
- return pImpl ? pImpl->getValueAsInt() : 0;
+ return pImpl->getValueAsInt();
}
StringRef Attribute::getKindAsString() const {
if (!pImpl) return StringRef();
assert(isStringAttribute() &&
"Invalid attribute type to get the kind as a string!");
- return pImpl ? pImpl->getKindAsString() : StringRef();
+ return pImpl->getKindAsString();
}
StringRef Attribute::getValueAsString() const {
if (!pImpl) return StringRef();
assert(isStringAttribute() &&
"Invalid attribute type to get the value as a string!");
- return pImpl ? pImpl->getValueAsString() : StringRef();
+ return pImpl->getValueAsString();
}
bool Attribute::hasAttribute(AttrKind Kind) const {
@@ -198,6 +199,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "byval";
if (hasAttribute(Attribute::Convergent))
return "convergent";
+ if (hasAttribute(Attribute::InaccessibleMemOnly))
+ return "inaccessiblememonly";
+ if (hasAttribute(Attribute::InaccessibleMemOrArgMemOnly))
+ return "inaccessiblemem_or_argmemonly";
if (hasAttribute(Attribute::InAlloca))
return "inalloca";
if (hasAttribute(Attribute::InlineHint))
@@ -232,6 +237,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "noredzone";
if (hasAttribute(Attribute::NoReturn))
return "noreturn";
+ if (hasAttribute(Attribute::NoRecurse))
+ return "norecurse";
if (hasAttribute(Attribute::NoUnwind))
return "nounwind";
if (hasAttribute(Attribute::OptimizeNone))
@@ -442,6 +449,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
case Attribute::JumpTable: return 1ULL << 45;
case Attribute::Convergent: return 1ULL << 46;
case Attribute::SafeStack: return 1ULL << 47;
+ case Attribute::NoRecurse: return 1ULL << 48;
+ case Attribute::InaccessibleMemOnly: return 1ULL << 49;
+ case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50;
case Attribute::Dereferenceable:
llvm_unreachable("dereferenceable attribute not supported in raw format");
break;
@@ -472,9 +482,8 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
array_pod_sort(SortedAttrs.begin(), SortedAttrs.end());
- for (SmallVectorImpl<Attribute>::iterator I = SortedAttrs.begin(),
- E = SortedAttrs.end(); I != E; ++I)
- I->Profile(ID);
+ for (Attribute Attr : SortedAttrs)
+ Attr.Profile(ID);
void *InsertPoint;
AttributeSetNode *PA =
@@ -484,8 +493,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
// new one and insert it.
if (!PA) {
// Coallocate entries after the AttributeSetNode itself.
- void *Mem = ::operator new(sizeof(AttributeSetNode) +
- sizeof(Attribute) * SortedAttrs.size());
+ void *Mem = ::operator new(totalSizeToAlloc<Attribute>(SortedAttrs.size()));
PA = new (Mem) AttributeSetNode(SortedAttrs);
pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint);
}
@@ -617,9 +625,8 @@ AttributeSet::getImpl(LLVMContext &C,
// create a new one and insert it.
if (!PA) {
// Coallocate entries after the AttributeSetImpl itself.
- void *Mem = ::operator new(sizeof(AttributeSetImpl) +
- sizeof(std::pair<unsigned, AttributeSetNode *>) *
- Attrs.size());
+ void *Mem = ::operator new(
+ AttributeSetImpl::totalSizeToAlloc<IndexAttrPair>(Attrs.size()));
PA = new (Mem) AttributeSetImpl(C, Attrs);
pImpl->AttrsLists.InsertNode(PA, InsertPoint);
}
@@ -684,22 +691,26 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
if (!B.contains(Kind))
continue;
- if (Kind == Attribute::Alignment)
- Attrs.push_back(std::make_pair(Index, Attribute::
- getWithAlignment(C, B.getAlignment())));
- else if (Kind == Attribute::StackAlignment)
- Attrs.push_back(std::make_pair(Index, Attribute::
- getWithStackAlignment(C, B.getStackAlignment())));
- else if (Kind == Attribute::Dereferenceable)
- Attrs.push_back(std::make_pair(Index,
- Attribute::getWithDereferenceableBytes(C,
- B.getDereferenceableBytes())));
- else if (Kind == Attribute::DereferenceableOrNull)
- Attrs.push_back(
- std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes(
- C, B.getDereferenceableOrNullBytes())));
- else
- Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
+ Attribute Attr;
+ switch (Kind) {
+ case Attribute::Alignment:
+ Attr = Attribute::getWithAlignment(C, B.getAlignment());
+ break;
+ case Attribute::StackAlignment:
+ Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment());
+ break;
+ case Attribute::Dereferenceable:
+ Attr = Attribute::getWithDereferenceableBytes(
+ C, B.getDereferenceableBytes());
+ break;
+ case Attribute::DereferenceableOrNull:
+ Attr = Attribute::getWithDereferenceableOrNullBytes(
+ C, B.getDereferenceableOrNullBytes());
+ break;
+ default:
+ Attr = Attribute::get(C, Kind);
+ }
+ Attrs.push_back(std::make_pair(Index, Attr));
}
// Add target-dependent (string) attributes.
@@ -713,9 +724,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
ArrayRef<Attribute::AttrKind> Kind) {
SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
- for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
- E = Kind.end(); I != E; ++I)
- Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I)));
+ for (Attribute::AttrKind K : Kind)
+ Attrs.push_back(std::make_pair(Index, Attribute::get(C, K)));
return get(C, Attrs);
}
@@ -736,9 +746,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
if (!AS) continue;
SmallVector<std::pair<unsigned, AttributeSetNode *>, 8>::iterator
ANVI = AttrNodeVec.begin(), ANVE;
- for (const AttributeSetImpl::IndexAttrPair
- *AI = AS->getNode(0),
- *AE = AS->getNode(AS->getNumAttributes());
+ for (const IndexAttrPair *AI = AS->getNode(0),
+ *AE = AS->getNode(AS->getNumAttributes());
AI != AE; ++AI) {
ANVE = AttrNodeVec.end();
while (ANVI != ANVE && ANVI->first <= AI->first)
@@ -770,6 +779,36 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
return addAttributes(C, Index, AttributeSet::get(C, Index, B));
}
+AttributeSet AttributeSet::addAttribute(LLVMContext &C,
+ ArrayRef<unsigned> Indices,
+ Attribute A) const {
+ unsigned I = 0, E = pImpl ? pImpl->getNumAttributes() : 0;
+ auto IdxI = Indices.begin(), IdxE = Indices.end();
+ SmallVector<AttributeSet, 4> AttrSet;
+
+ while (I != E && IdxI != IdxE) {
+ if (getSlotIndex(I) < *IdxI)
+ AttrSet.emplace_back(getSlotAttributes(I++));
+ else if (getSlotIndex(I) > *IdxI)
+ AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A)));
+ else {
+ AttrBuilder B(getSlotAttributes(I), *IdxI);
+ B.addAttribute(A);
+ AttrSet.emplace_back(AttributeSet::get(C, *IdxI, B));
+ ++I;
+ ++IdxI;
+ }
+ }
+
+ while (I != E)
+ AttrSet.emplace_back(getSlotAttributes(I++));
+
+ while (IdxI != IdxE)
+ AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A)));
+
+ return get(C, AttrSet);
+}
+
AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
AttributeSet Attrs) const {
if (!pImpl) return Attrs;
@@ -955,17 +994,17 @@ AttributeSet AttributeSet::getFnAttributes() const {
bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{
AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->hasAttribute(Kind) : false;
+ return ASN && ASN->hasAttribute(Kind);
}
bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const {
AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->hasAttribute(Kind) : false;
+ return ASN && ASN->hasAttribute(Kind);
}
bool AttributeSet::hasAttributes(unsigned Index) const {
AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->hasAttributes() : false;
+ return ASN && ASN->hasAttributes();
}
/// \brief Return true if the specified attribute is set for at least one
@@ -1111,6 +1150,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
void AttrBuilder::clear() {
Attrs.reset();
+ TargetDepAttrs.clear();
Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
}
@@ -1177,23 +1217,10 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
- Attribute::AttrKind Kind = I->getKindAsEnum();
- Attrs[Kind] = false;
-
- if (Kind == Attribute::Alignment)
- Alignment = 0;
- else if (Kind == Attribute::StackAlignment)
- StackAlignment = 0;
- else if (Kind == Attribute::Dereferenceable)
- DerefBytes = 0;
- else if (Kind == Attribute::DereferenceableOrNull)
- DerefOrNullBytes = 0;
+ removeAttribute(Attr.getKindAsEnum());
} else {
assert(Attr.isStringAttribute() && "Invalid attribute type!");
- std::map<std::string, std::string>::iterator
- Iter = TargetDepAttrs.find(Attr.getKindAsString());
- if (Iter != TargetDepAttrs.end())
- TargetDepAttrs.erase(Iter);
+ removeAttribute(Attr.getKindAsString());
}
}
@@ -1322,8 +1349,7 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
assert(Slot != ~0U && "Couldn't find the index!");
- for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
- I != E; ++I) {
+ for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
if (Attrs[I->getKindAsEnum()])
@@ -1382,7 +1408,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
//===----------------------------------------------------------------------===//
/// \brief Which attributes cannot be applied to a type.
-AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) {
+AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) {
AttrBuilder Incompatible;
if (!Ty->isIntegerTy())
@@ -1406,3 +1432,80 @@ AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) {
return Incompatible;
}
+
+template<typename AttrClass>
+static bool isEqual(const Function &Caller, const Function &Callee) {
+ return Caller.getFnAttribute(AttrClass::getKind()) ==
+ Callee.getFnAttribute(AttrClass::getKind());
+}
+
+/// \brief Compute the logical AND of the attributes of the caller and the
+/// callee.
+///
+/// This function sets the caller's attribute to false if the callee's attribute
+/// is false.
+template<typename AttrClass>
+static void setAND(Function &Caller, const Function &Callee) {
+ if (AttrClass::isSet(Caller, AttrClass::getKind()) &&
+ !AttrClass::isSet(Callee, AttrClass::getKind()))
+ AttrClass::set(Caller, AttrClass::getKind(), false);
+}
+
+/// \brief Compute the logical OR of the attributes of the caller and the
+/// callee.
+///
+/// This function sets the caller's attribute to true if the callee's attribute
+/// is true.
+template<typename AttrClass>
+static void setOR(Function &Caller, const Function &Callee) {
+ if (!AttrClass::isSet(Caller, AttrClass::getKind()) &&
+ AttrClass::isSet(Callee, AttrClass::getKind()))
+ AttrClass::set(Caller, AttrClass::getKind(), true);
+}
+
+/// \brief If the inlined function had a higher stack protection level than the
+/// calling function, then bump up the caller's stack protection level.
+static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
+ // If upgrading the SSP attribute, clear out the old SSP Attributes first.
+ // Having multiple SSP attributes doesn't actually hurt, but it adds useless
+ // clutter to the IR.
+ AttrBuilder B;
+ B.addAttribute(Attribute::StackProtect)
+ .addAttribute(Attribute::StackProtectStrong)
+ .addAttribute(Attribute::StackProtectReq);
+ AttributeSet OldSSPAttr = AttributeSet::get(Caller.getContext(),
+ AttributeSet::FunctionIndex,
+ B);
+
+ if (Callee.hasFnAttribute(Attribute::SafeStack)) {
+ Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller.addFnAttr(Attribute::SafeStack);
+ } else if (Callee.hasFnAttribute(Attribute::StackProtectReq) &&
+ !Caller.hasFnAttribute(Attribute::SafeStack)) {
+ Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller.addFnAttr(Attribute::StackProtectReq);
+ } else if (Callee.hasFnAttribute(Attribute::StackProtectStrong) &&
+ !Caller.hasFnAttribute(Attribute::SafeStack) &&
+ !Caller.hasFnAttribute(Attribute::StackProtectReq)) {
+ Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller.addFnAttr(Attribute::StackProtectStrong);
+ } else if (Callee.hasFnAttribute(Attribute::StackProtect) &&
+ !Caller.hasFnAttribute(Attribute::SafeStack) &&
+ !Caller.hasFnAttribute(Attribute::StackProtectReq) &&
+ !Caller.hasFnAttribute(Attribute::StackProtectStrong))
+ Caller.addFnAttr(Attribute::StackProtect);
+}
+
+#define GET_ATTR_COMPAT_FUNC
+#include "AttributesCompatFunc.inc"
+
+bool AttributeFuncs::areInlineCompatible(const Function &Caller,
+ const Function &Callee) {
+ return hasCompatibleFnAttrs(Caller, Callee);
+}
+
+
+void AttributeFuncs::mergeAttributesForInlining(Function &Caller,
+ const Function &Callee) {
+ mergeFnAttrs(Caller, Callee);
+}
diff --git a/contrib/llvm/lib/IR/AttributesCompatFunc.td b/contrib/llvm/lib/IR/AttributesCompatFunc.td
new file mode 100644
index 0000000..7c85b3d
--- /dev/null
+++ b/contrib/llvm/lib/IR/AttributesCompatFunc.td
@@ -0,0 +1 @@
+include "llvm/IR/Attributes.td"
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index f1c6ebd..12c354c 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Regex.h"
#include <cstring>
using namespace llvm;
@@ -92,8 +93,42 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
+ Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
+ if (vldRegex.match(Name)) {
+ auto fArgs = F->getFunctionType()->params();
+ SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
+ // Can't use Intrinsic::getDeclaration here as the return types might
+ // then only be structurally equal.
+ FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
+ NewFn = Function::Create(fType, F->getLinkage(),
+ "llvm." + Name + ".p0i8", F->getParent());
+ return true;
+ }
+ Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
+ if (vstRegex.match(Name)) {
+ static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
+ Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
+
+ static const Intrinsic::ID StoreLaneInts[] = {
+ Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
+ Intrinsic::arm_neon_vst4lane
+ };
+
+ auto fArgs = F->getFunctionType()->params();
+ Type *Tys[] = {fArgs[0], fArgs[1]};
+ if (Name.find("lane") == StringRef::npos)
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ StoreInts[fArgs.size() - 3], Tys);
+ else
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ StoreLaneInts[fArgs.size() - 5], Tys);
+ return true;
+ }
break;
}
+
case 'c': {
if (Name.startswith("ctlz.") && F->arg_size() == 1) {
F->setName(Name + ".old");
@@ -129,7 +164,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("x86.sse2.pcmpgt.") ||
Name.startswith("x86.avx2.pcmpeq.") ||
Name.startswith("x86.avx2.pcmpgt.") ||
+ Name.startswith("x86.avx2.vbroadcast") ||
+ Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
+ Name.startswith("x86.sse41.pmovsx") ||
Name == "x86.avx.vinsertf128.pd.256" ||
Name == "x86.avx.vinsertf128.ps.256" ||
Name == "x86.avx.vinsertf128.si.256" ||
@@ -162,6 +200,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx2.pblendd.128" ||
Name == "x86.avx2.pblendd.256" ||
Name == "x86.avx2.vbroadcasti128" ||
+ Name == "x86.xop.vpcmov" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
@@ -325,7 +364,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
+ Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
assert(F && "Intrinsic call is not direct?");
@@ -351,7 +390,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name == "llvm.x86.avx.movnt.ps.256" ||
Name == "llvm.x86.avx.movnt.pd.256") {
IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
+ Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
@@ -368,7 +407,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
"cast");
StoreInst *SI = Builder.CreateStore(Arg1, BC);
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
- SI->setAlignment(16);
+ SI->setAlignment(32);
// Remove intrinsic.
CI->eraseFromParent();
@@ -419,6 +458,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
+ } else if (Name == "llvm.x86.xop.vpcmov") {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ Value *Sel = CI->getArgOperand(2);
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+ Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
+ Value *NotSel = Builder.CreateXor(Sel, MinusOne);
+ Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
+ Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
+ Rep = Builder.CreateOr(Sel0, Sel1);
} else if (Name == "llvm.x86.sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);
@@ -438,6 +487,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
+ } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
+ VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
+ VectorType *DstTy = cast<VectorType>(CI->getType());
+ unsigned NumDstElts = DstTy->getNumElements();
+
+ // Extract a subvector of the first NumDstElts lanes and sign extend.
+ SmallVector<int, 8> ShuffleMask;
+ for (int i = 0; i != (int)NumDstElts; ++i)
+ ShuffleMask.push_back(i);
+
+ Value *SV = Builder.CreateShuffleVector(
+ CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
+ Rep = Builder.CreateSExt(SV, DstTy);
} else if (Name == "llvm.x86.avx2.vbroadcasti128") {
// Replace vbroadcasts with a vector shuffle.
Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
@@ -447,6 +509,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
const int Idxs[4] = { 0, 1, 0, 1 };
Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
Idxs);
+ } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
+ Name.startswith("llvm.x86.avx2.vbroadcast")) {
+ // Replace vp?broadcasts with a vector shuffle.
+ Value *Op = CI->getArgOperand(0);
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+ Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
+ Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
+ Constant::getNullValue(MaskTy));
} else if (Name == "llvm.x86.sse2.psll.dq") {
// 128-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
@@ -517,10 +587,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
-
+
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm & 1;
-
+
// Extend the second operand into a vector that is twice as big.
Value *UndefV = UndefValue::get(Op1->getType());
SmallVector<Constant*, 8> Idxs;
@@ -562,7 +632,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
-
+
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm & 1;
@@ -627,6 +697,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
default:
llvm_unreachable("Unknown function for CallInst upgrade.");
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+ CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
+ CI->eraseFromParent();
+ return;
+ }
+
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->getNumArgOperands() == 1 &&
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
index 0a04494..f61276f 100644
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include <algorithm>
+
using namespace llvm;
ValueSymbolTable *BasicBlock::getValueSymbolTable() {
@@ -35,8 +36,7 @@ LLVMContext &BasicBlock::getContext() const {
// Explicit instantiation of SymbolTableListTraits since some of the methods
// are not in the public header file...
-template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
-
+template class llvm::SymbolTableListTraits<Instruction>;
BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
BasicBlock *InsertBefore)
@@ -56,7 +56,7 @@ void BasicBlock::insertInto(Function *NewParent, BasicBlock *InsertBefore) {
assert(!Parent && "Already has a parent");
if (InsertBefore)
- NewParent->getBasicBlockList().insert(InsertBefore, this);
+ NewParent->getBasicBlockList().insert(InsertBefore->getIterator(), this);
else
NewParent->getBasicBlockList().push_back(this);
}
@@ -91,26 +91,26 @@ void BasicBlock::setParent(Function *parent) {
}
void BasicBlock::removeFromParent() {
- getParent()->getBasicBlockList().remove(this);
+ getParent()->getBasicBlockList().remove(getIterator());
}
iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
- return getParent()->getBasicBlockList().erase(this);
+ return getParent()->getBasicBlockList().erase(getIterator());
}
/// Unlink this basic block from its current function and
/// insert it into the function that MovePos lives in, right before MovePos.
void BasicBlock::moveBefore(BasicBlock *MovePos) {
- MovePos->getParent()->getBasicBlockList().splice(MovePos,
- getParent()->getBasicBlockList(), this);
+ MovePos->getParent()->getBasicBlockList().splice(
+ MovePos->getIterator(), getParent()->getBasicBlockList(), getIterator());
}
/// Unlink this basic block from its current function and
/// insert it into the function that MovePos lives in, right after MovePos.
void BasicBlock::moveAfter(BasicBlock *MovePos) {
- Function::iterator I = MovePos;
- MovePos->getParent()->getBasicBlockList().splice(++I,
- getParent()->getBasicBlockList(), this);
+ MovePos->getParent()->getBasicBlockList().splice(
+ ++MovePos->getIterator(), getParent()->getBasicBlockList(),
+ getIterator());
}
const Module *BasicBlock::getModule() const {
@@ -196,8 +196,8 @@ BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
if (!FirstNonPHI)
return end();
- iterator InsertPt = FirstNonPHI;
- if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
+ iterator InsertPt = FirstNonPHI->getIterator();
+ if (InsertPt->isEHPad()) ++InsertPt;
return InsertPt;
}
@@ -245,12 +245,12 @@ BasicBlock *BasicBlock::getSingleSuccessor() {
BasicBlock *BasicBlock::getUniqueSuccessor() {
succ_iterator SI = succ_begin(this), E = succ_end(this);
- if (SI == E) return NULL; // No successors
+ if (SI == E) return nullptr; // No successors
BasicBlock *SuccBB = *SI;
++SI;
for (;SI != E; ++SI) {
if (*SI != SuccBB)
- return NULL;
+ return nullptr;
// The same successor appears multiple times in the successor list.
// This is OK.
}
@@ -333,6 +333,17 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
}
}
+bool BasicBlock::canSplitPredecessors() const {
+ const Instruction *FirstNonPHI = getFirstNonPHI();
+ if (isa<LandingPadInst>(FirstNonPHI))
+ return true;
+ // This is perhaps a little conservative because constructs like
+ // CleanupBlockInst are pretty easy to split. However, SplitBlockPredecessors
+ // cannot handle such things just yet.
+ if (FirstNonPHI->isEHPad())
+ return false;
+ return true;
+}
/// This splits a basic block into two at the specified
/// instruction. Note that all instructions BEFORE the specified iterator stay
@@ -393,8 +404,7 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
// Cope with being called on a BasicBlock that doesn't have a terminator
// yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
return;
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = TI->getSuccessor(i);
+ for (BasicBlock *Succ : TI->successors()) {
// N.B. Succ might not be a complete BasicBlock, so don't assume
// that it ends with a non-phi instruction.
for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index 46bb20e..ce3fe03 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -83,7 +83,7 @@ foldConstantCastPair(
assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
assert(CastInst::isCast(opc) && "Invalid cast opcode");
- // The the types and opcodes for the two Cast constant expressions
+ // The types and opcodes for the two Cast constant expressions
Type *SrcTy = Op->getOperand(0)->getType();
Type *MidTy = Op->getType();
Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
@@ -109,7 +109,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
if (PTy->getAddressSpace() == DPTy->getAddressSpace()
- && DPTy->getElementType()->isSized()) {
+ && PTy->getElementType()->isSized()) {
SmallVector<Value*, 8> IdxList;
Value *Zero =
Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
@@ -1187,7 +1187,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
(void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
return ConstantFP::get(C1->getContext(), C3V);
case Instruction::FRem:
- (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+ (void)C3V.mod(C2V);
return ConstantFP::get(C1->getContext(), C3V);
}
}
@@ -1277,9 +1277,9 @@ static bool isMaybeZeroSizedType(Type *Ty) {
}
/// IdxCompare - Compare the two constants as though they were getelementptr
-/// indices. This allows coersion of the types to be the same thing.
+/// indices. This allows coercion of the types to be the same thing.
///
-/// If the two constants are the "same" (after coersion), return 0. If the
+/// If the two constants are the "same" (after coercion), return 0. If the
/// first is less than the second, return -1, if the second is less than the
/// first, return 1. If the constants are not integral, return -2.
///
@@ -1685,7 +1685,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// Otherwise, for integer compare, pick the same value as the non-undef
// operand, and fold it to true or false.
if (isIntegerPredicate)
- return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+ return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(Predicate));
// Choosing NaN for the undef will always make unordered comparison succeed
// and ordered comparison fails.
@@ -1869,7 +1869,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
} else {
// Evaluate the relation between the two constants, per the predicate.
int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
- switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+ switch (evaluateICmpRelation(C1, C2,
+ CmpInst::isSigned((CmpInst::Predicate)pred))) {
default: llvm_unreachable("Unknown relational!");
case ICmpInst::BAD_ICMP_PREDICATE:
break; // Couldn't determine anything about these constants.
@@ -1950,8 +1951,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// If the left hand side is an extension, try eliminating it.
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
- if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
- (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+ if ((CE1->getOpcode() == Instruction::SExt &&
+ ICmpInst::isSigned((ICmpInst::Predicate)pred)) ||
+ (CE1->getOpcode() == Instruction::ZExt &&
+ !ICmpInst::isSigned((ICmpInst::Predicate)pred))){
Constant *CE1Op0 = CE1->getOperand(0);
Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
if (CE1Inverse == CE1Op0) {
@@ -1997,17 +2000,17 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
}
/// \brief Test whether a given ConstantInt is in-range for a SequentialType.
-static bool isIndexInRangeOfSequentialType(const SequentialType *STy,
+static bool isIndexInRangeOfSequentialType(SequentialType *STy,
const ConstantInt *CI) {
- if (const PointerType *PTy = dyn_cast<PointerType>(STy))
- // Only handle pointers to sized types, not pointers to functions.
- return PTy->getElementType()->isSized();
+ // And indices are valid when indexing along a pointer
+ if (isa<PointerType>(STy))
+ return true;
uint64_t NumElements = 0;
// Determine the number of elements in our sequential type.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ if (auto *ATy = dyn_cast<ArrayType>(STy))
NumElements = ATy->getNumElements();
- else if (const VectorType *VTy = dyn_cast<VectorType>(STy))
+ else if (auto *VTy = dyn_cast<VectorType>(STy))
NumElements = VTy->getNumElements();
assert((isa<ArrayType>(STy) || NumElements > 0) &&
@@ -2178,7 +2181,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
// dimension.
NewIdxs.resize(Idxs.size());
uint64_t NumElements = 0;
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ if (auto *ATy = dyn_cast<ArrayType>(Ty))
NumElements = ATy->getNumElements();
else
NumElements = cast<VectorType>(Ty)->getNumElements();
diff --git a/contrib/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm/lib/IR/ConstantRange.cpp
index 91095cf..48f9b27 100644
--- a/contrib/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm/lib/IR/ConstantRange.cpp
@@ -21,7 +21,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -125,6 +127,57 @@ ConstantRange ConstantRange::makeSatisfyingICmpRegion(CmpInst::Predicate Pred,
.inverse();
}
+ConstantRange ConstantRange::makeNoWrapRegion(Instruction::BinaryOps BinOp,
+ const APInt &C,
+ unsigned NoWrapKind) {
+ typedef OverflowingBinaryOperator OBO;
+
+ // Computes the intersection of CR0 and CR1. It is different from
+ // intersectWith in that the ConstantRange returned will only contain elements
+ // in both CR0 and CR1 (i.e. SubsetIntersect(X, Y) is a *subset*, proper or
+ // not, of both X and Y).
+ auto SubsetIntersect =
+ [](const ConstantRange &CR0, const ConstantRange &CR1) {
+ return CR0.inverse().unionWith(CR1.inverse()).inverse();
+ };
+
+ assert(BinOp >= Instruction::BinaryOpsBegin &&
+ BinOp < Instruction::BinaryOpsEnd && "Binary operators only!");
+
+ assert((NoWrapKind == OBO::NoSignedWrap ||
+ NoWrapKind == OBO::NoUnsignedWrap ||
+ NoWrapKind == (OBO::NoUnsignedWrap | OBO::NoSignedWrap)) &&
+ "NoWrapKind invalid!");
+
+ unsigned BitWidth = C.getBitWidth();
+ if (BinOp != Instruction::Add)
+ // Conservative answer: empty set
+ return ConstantRange(BitWidth, false);
+
+ if (C.isMinValue())
+ // Full set: nothing signed / unsigned wraps when added to 0.
+ return ConstantRange(BitWidth);
+
+ ConstantRange Result(BitWidth);
+
+ if (NoWrapKind & OBO::NoUnsignedWrap)
+ Result = SubsetIntersect(Result,
+ ConstantRange(APInt::getNullValue(BitWidth), -C));
+
+ if (NoWrapKind & OBO::NoSignedWrap) {
+ if (C.isStrictlyPositive())
+ Result = SubsetIntersect(
+ Result, ConstantRange(APInt::getSignedMinValue(BitWidth),
+ APInt::getSignedMinValue(BitWidth) - C));
+ else
+ Result = SubsetIntersect(
+ Result, ConstantRange(APInt::getSignedMinValue(BitWidth) - C,
+ APInt::getSignedMinValue(BitWidth)));
+ }
+
+ return Result;
+}
+
/// isFullSet - Return true if this set contains all of the elements possible
/// for this data-type
bool ConstantRange::isFullSet() const {
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 308e6bd..0898bf6 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -53,6 +53,11 @@ bool Constant::isNegativeZeroValue() const {
if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
return true;
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+ return true;
+
// We've already handled true FP case; any other FP vectors can't represent -0.0.
if (getType()->isFPOrFPVectorTy())
return false;
@@ -68,6 +73,17 @@ bool Constant::isZeroValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return CFP->isZero();
+ // Equivalent for a vector of -0.0's.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero())
+ return true;
+
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero())
+ return true;
+
// Otherwise, just use +0.0.
return isNullValue();
}
@@ -81,8 +97,10 @@ bool Constant::isNullValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return CFP->isZero() && !CFP->isNegative();
- // constant zero is zero for aggregates and cpnull is null for pointers.
- return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+ // constant zero is zero for aggregates, cpnull is null for pointers, none for
+ // tokens.
+ return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this) ||
+ isa<ConstantTokenNone>(this);
}
bool Constant::isAllOnesValue() const {
@@ -204,6 +222,8 @@ Constant *Constant::getNullValue(Type *Ty) {
case Type::ArrayTyID:
case Type::VectorTyID:
return ConstantAggregateZero::get(Ty);
+ case Type::TokenTyID:
+ return ConstantTokenNone::get(Ty->getContext());
default:
// Function, Label, or Opaque type?
llvm_unreachable("Cannot create a null constant of that type!");
@@ -410,32 +430,13 @@ bool Constant::isConstantUsed() const {
return false;
}
+bool Constant::needsRelocation() const {
+ if (isa<GlobalValue>(this))
+ return true; // Global reference.
-
-/// getRelocationInfo - This method classifies the entry according to
-/// whether or not it may generate a relocation entry. This must be
-/// conservative, so if it might codegen to a relocatable entry, it should say
-/// so. The return values are:
-///
-/// NoRelocation: This constant pool entry is guaranteed to never have a
-/// relocation applied to it (because it holds a simple constant like
-/// '4').
-/// LocalRelocation: This entry has relocations, but the entries are
-/// guaranteed to be resolvable by the static linker, so the dynamic
-/// linker will never see them.
-/// GlobalRelocations: This entry may have arbitrary relocations.
-///
-/// FIXME: This really should not be in IR.
-Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
- return LocalRelocation; // Local to this file/library.
- return GlobalRelocations; // Global reference.
- }
-
if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
- return BA->getFunction()->getRelocationInfo();
-
+ return BA->getFunction()->needsRelocation();
+
// While raw uses of blockaddress need to be relocated, differences between
// two of them don't when they are for labels in the same function. This is a
// common idiom when creating a table for the indirect goto extension, so we
@@ -444,20 +445,18 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
if (CE->getOpcode() == Instruction::Sub) {
ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
- if (LHS && RHS &&
- LHS->getOpcode() == Instruction::PtrToInt &&
+ if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt &&
RHS->getOpcode() == Instruction::PtrToInt &&
isa<BlockAddress>(LHS->getOperand(0)) &&
isa<BlockAddress>(RHS->getOperand(0)) &&
cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
- cast<BlockAddress>(RHS->getOperand(0))->getFunction())
- return NoRelocation;
+ cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+ return false;
}
- PossibleRelocationsTy Result = NoRelocation;
+ bool Result = false;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- Result = std::max(Result,
- cast<Constant>(getOperand(i))->getRelocationInfo());
+ Result |= cast<Constant>(getOperand(i))->needsRelocation();
return Result;
}
@@ -797,10 +796,10 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
}
unsigned ConstantAggregateZero::getNumElements() const {
- const Type *Ty = getType();
- if (const auto *AT = dyn_cast<ArrayType>(Ty))
+ Type *Ty = getType();
+ if (auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
- if (const auto *VT = dyn_cast<VectorType>(Ty))
+ if (auto *VT = dyn_cast<VectorType>(Ty))
return VT->getNumElements();
return Ty->getStructNumElements();
}
@@ -838,10 +837,10 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
}
unsigned UndefValue::getNumElements() const {
- const Type *Ty = getType();
- if (const auto *AT = dyn_cast<ArrayType>(Ty))
+ Type *Ty = getType();
+ if (auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
- if (const auto *VT = dyn_cast<VectorType>(Ty))
+ if (auto *VT = dyn_cast<VectorType>(Ty))
return VT->getNumElements();
return Ty->getStructNumElements();
}
@@ -858,6 +857,59 @@ static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
return true;
}
+template <typename SequentialTy, typename ElementTy>
+static Constant *getIntSequenceIfElementsMatch(ArrayRef<Constant *> V) {
+ assert(!V.empty() && "Cannot get empty int sequence.");
+
+ SmallVector<ElementTy, 16> Elts;
+ for (Constant *C : V)
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ Elts.push_back(CI->getZExtValue());
+ else
+ return nullptr;
+ return SequentialTy::get(V[0]->getContext(), Elts);
+}
+
+template <typename SequentialTy, typename ElementTy>
+static Constant *getFPSequenceIfElementsMatch(ArrayRef<Constant *> V) {
+ assert(!V.empty() && "Cannot get empty FP sequence.");
+
+ SmallVector<ElementTy, 16> Elts;
+ for (Constant *C : V)
+ if (auto *CFP = dyn_cast<ConstantFP>(C))
+ Elts.push_back(CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
+ else
+ return nullptr;
+ return SequentialTy::getFP(V[0]->getContext(), Elts);
+}
+
+template <typename SequenceTy>
+static Constant *getSequenceIfElementsMatch(Constant *C,
+ ArrayRef<Constant *> V) {
+ // We speculatively build the elements here even if it turns out that there is
+ // a constantexpr or something else weird, since it is so uncommon for that to
+ // happen.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (CI->getType()->isIntegerTy(8))
+ return getIntSequenceIfElementsMatch<SequenceTy, uint8_t>(V);
+ else if (CI->getType()->isIntegerTy(16))
+ return getIntSequenceIfElementsMatch<SequenceTy, uint16_t>(V);
+ else if (CI->getType()->isIntegerTy(32))
+ return getIntSequenceIfElementsMatch<SequenceTy, uint32_t>(V);
+ else if (CI->getType()->isIntegerTy(64))
+ return getIntSequenceIfElementsMatch<SequenceTy, uint64_t>(V);
+ } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isHalfTy())
+ return getFPSequenceIfElementsMatch<SequenceTy, uint16_t>(V);
+ else if (CFP->getType()->isFloatTy())
+ return getFPSequenceIfElementsMatch<SequenceTy, uint32_t>(V);
+ else if (CFP->getType()->isDoubleTy())
+ return getFPSequenceIfElementsMatch<SequenceTy, uint64_t>(V);
+ }
+
+ return nullptr;
+}
+
ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
: Constant(T, ConstantArrayVal,
OperandTraits<ConstantArray>::op_end(this) - V.size(),
@@ -875,6 +927,7 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
return C;
return Ty->getContext().pImpl->ArrayConstants.getOrCreate(Ty, V);
}
+
Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
// Empty arrays are canonicalized to ConstantAggregateZero.
if (V.empty())
@@ -897,74 +950,8 @@ Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
// Check to see if all of the elements are ConstantFP or ConstantInt and if
// the element type is compatible with ConstantDataVector. If so, use it.
- if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
- // We speculatively build the elements here even if it turns out that there
- // is a constantexpr or something else weird in the array, since it is so
- // uncommon for that to happen.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- if (CI->getType()->isIntegerTy(8)) {
- SmallVector<uint8_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataArray::get(C->getContext(), Elts);
- } else if (CI->getType()->isIntegerTy(16)) {
- SmallVector<uint16_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataArray::get(C->getContext(), Elts);
- } else if (CI->getType()->isIntegerTy(32)) {
- SmallVector<uint32_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataArray::get(C->getContext(), Elts);
- } else if (CI->getType()->isIntegerTy(64)) {
- SmallVector<uint64_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataArray::get(C->getContext(), Elts);
- }
- }
-
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
- if (CFP->getType()->isFloatTy()) {
- SmallVector<uint32_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
- Elts.push_back(
- CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataArray::getFP(C->getContext(), Elts);
- } else if (CFP->getType()->isDoubleTy()) {
- SmallVector<uint64_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
- Elts.push_back(
- CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataArray::getFP(C->getContext(), Elts);
- }
- }
- }
+ if (ConstantDataSequential::isElementTypeCompatible(C->getType()))
+ return getSequenceIfElementsMatch<ConstantDataArray>(C, V);
// Otherwise, we really do want to create a ConstantArray.
return nullptr;
@@ -1060,6 +1047,7 @@ Constant *ConstantVector::get(ArrayRef<Constant*> V) {
VectorType *Ty = VectorType::get(V.front()->getType(), V.size());
return Ty->getContext().pImpl->VectorConstants.getOrCreate(Ty, V);
}
+
Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
assert(!V.empty() && "Vectors can't be empty");
VectorType *T = VectorType::get(V.front()->getType(), V.size());
@@ -1085,74 +1073,8 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
// Check to see if all of the elements are ConstantFP or ConstantInt and if
// the element type is compatible with ConstantDataVector. If so, use it.
- if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
- // We speculatively build the elements here even if it turns out that there
- // is a constantexpr or something else weird in the array, since it is so
- // uncommon for that to happen.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- if (CI->getType()->isIntegerTy(8)) {
- SmallVector<uint8_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataVector::get(C->getContext(), Elts);
- } else if (CI->getType()->isIntegerTy(16)) {
- SmallVector<uint16_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataVector::get(C->getContext(), Elts);
- } else if (CI->getType()->isIntegerTy(32)) {
- SmallVector<uint32_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataVector::get(C->getContext(), Elts);
- } else if (CI->getType()->isIntegerTy(64)) {
- SmallVector<uint64_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
- Elts.push_back(CI->getZExtValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataVector::get(C->getContext(), Elts);
- }
- }
-
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
- if (CFP->getType()->isFloatTy()) {
- SmallVector<uint32_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
- Elts.push_back(
- CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataVector::getFP(C->getContext(), Elts);
- } else if (CFP->getType()->isDoubleTy()) {
- SmallVector<uint64_t, 16> Elts;
- for (unsigned i = 0, e = V.size(); i != e; ++i)
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
- Elts.push_back(
- CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
- else
- break;
- if (Elts.size() == V.size())
- return ConstantDataVector::getFP(C->getContext(), Elts);
- }
- }
- }
+ if (ConstantDataSequential::isElementTypeCompatible(C->getType()))
+ return getSequenceIfElementsMatch<ConstantDataVector>(C, V);
// Otherwise, the element type isn't compatible with ConstantDataVector, or
// the operand list constants a ConstantExpr or something else strange.
@@ -1170,6 +1092,17 @@ Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
return get(Elts);
}
+ConstantTokenNone *ConstantTokenNone::get(LLVMContext &Context) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ if (!pImpl->TheNoneToken)
+ pImpl->TheNoneToken.reset(new ConstantTokenNone(Context));
+ return pImpl->TheNoneToken.get();
+}
+
+/// Remove the constant from the constant table.
+void ConstantTokenNone::destroyConstantImpl() {
+ llvm_unreachable("You can't ConstantTokenNone->destroyConstantImpl()!");
+}
// Utility function for determining if a ConstantExpr is a CastOp or not. This
// can't be inline because we don't want to #include Instruction.h into
@@ -1221,8 +1154,7 @@ ArrayRef<unsigned> ConstantExpr::getIndices() const {
}
unsigned ConstantExpr::getPredicate() const {
- assert(isCompare());
- return ((const CompareConstantExpr*)this)->predicate;
+ return cast<CompareConstantExpr>(this)->predicate;
}
/// getWithOperandReplaced - Return a constant expression identical to this
@@ -1245,7 +1177,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
/// operands replaced with the specified values. The specified array must
/// have the same number of operands as our current one.
Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
- bool OnlyIfReduced) const {
+ bool OnlyIfReduced, Type *SrcTy) const {
assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
// If no operands changed return self.
@@ -1283,10 +1215,13 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2],
OnlyIfReducedTy);
- case Instruction::GetElementPtr:
- return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1),
- cast<GEPOperator>(this)->isInBounds(),
- OnlyIfReducedTy);
+ case Instruction::GetElementPtr: {
+ auto *GEPO = cast<GEPOperator>(this);
+ assert(SrcTy || (Ops[0]->getType() == getOperand(0)->getType()));
+ return ConstantExpr::getGetElementPtr(
+ SrcTy ? SrcTy : GEPO->getSourceElementType(), Ops[0], Ops.slice(1),
+ GEPO->isInBounds(), OnlyIfReducedTy);
+ }
case Instruction::ICmp:
case Instruction::FCmp:
return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1],
@@ -2430,9 +2365,9 @@ StringRef ConstantDataSequential::getRawDataValues() const {
/// formed with a vector or array of the specified element type.
/// ConstantDataArray only works with normal float and int types that are
/// stored densely in memory, not with things like i42 or x86_f80.
-bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
- if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
- if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+bool ConstantDataSequential::isElementTypeCompatible(Type *Ty) {
+ if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true;
+ if (auto *IT = dyn_cast<IntegerType>(Ty)) {
switch (IT->getBitWidth()) {
case 8:
case 16:
@@ -2587,7 +2522,7 @@ Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
/// object.
Constant *ConstantDataArray::getFP(LLVMContext &Context,
ArrayRef<uint16_t> Elts) {
- Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size());
+ Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size());
const char *Data = reinterpret_cast<const char *>(Elts.data());
return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty);
}
@@ -2703,6 +2638,11 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
}
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isHalfTy()) {
+ SmallVector<uint16_t, 16> Elts(
+ NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
+ return getFP(V->getContext(), Elts);
+ }
if (CFP->getType()->isFloatTy()) {
SmallVector<uint32_t, 16> Elts(
NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
@@ -2748,6 +2688,10 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
switch (getElementType()->getTypeID()) {
default:
llvm_unreachable("Accessor can only be used when element is float/double!");
+ case Type::HalfTyID: {
+ auto EltVal = *reinterpret_cast<const uint16_t *>(EltPtr);
+ return APFloat(APFloat::IEEEhalf, APInt(16, EltVal));
+ }
case Type::FloatTyID: {
auto EltVal = *reinterpret_cast<const uint32_t *>(EltPtr);
return APFloat(APFloat::IEEEsingle, APInt(32, EltVal));
@@ -2782,7 +2726,8 @@ double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
/// Note that this has to compute a new constant to return, so it isn't as
/// efficient as getElementAsInteger/Float/Double.
Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
- if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
+ if (getElementType()->isHalfTy() || getElementType()->isFloatTy() ||
+ getElementType()->isDoubleTy())
return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
@@ -2872,6 +2817,11 @@ Value *ConstantFP::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
llvm_unreachable("Unsupported class for handleOperandChange()!");
}
+Value *ConstantTokenNone::handleOperandChangeImpl(Value *From, Value *To,
+ Use *U) {
+ llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
Value *UndefValue::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
llvm_unreachable("Unsupported class for handleOperandChange()!");
}
@@ -3070,7 +3020,7 @@ Instruction *ConstantExpr::getAsInstruction() {
case Instruction::ICmp:
case Instruction::FCmp:
return CmpInst::Create((Instruction::OtherOps)getOpcode(),
- getPredicate(), Ops[0], Ops[1]);
+ (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]);
default:
assert(getNumOperands() == 2 && "Must be binary operator?");
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index f3ddcd7..13fcbd2 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -179,6 +179,13 @@ public:
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ static bool classof(const ConstantExpr *CE) {
+ return CE->getOpcode() == Instruction::ExtractValue;
+ }
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+ }
};
/// InsertValueConstantExpr - This class is private to
@@ -205,6 +212,13 @@ public:
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ static bool classof(const ConstantExpr *CE) {
+ return CE->getOpcode() == Instruction::InsertValue;
+ }
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+ }
};
/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
@@ -235,6 +249,13 @@ public:
Type *getSourceElementType() const;
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ static bool classof(const ConstantExpr *CE) {
+ return CE->getOpcode() == Instruction::GetElementPtr;
+ }
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+ }
};
// CompareConstantExpr - This class is private to Constants.cpp, and is used
@@ -257,6 +278,14 @@ public:
}
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ static bool classof(const ConstantExpr *CE) {
+ return CE->getOpcode() == Instruction::ICmp ||
+ CE->getOpcode() == Instruction::FCmp;
+ }
+ static bool classof(const Value *V) {
+ return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+ }
};
template <>
@@ -373,41 +402,45 @@ template <class ConstantClass> struct ConstantAggrKeyType {
struct InlineAsmKeyType {
StringRef AsmString;
StringRef Constraints;
+ FunctionType *FTy;
bool HasSideEffects;
bool IsAlignStack;
InlineAsm::AsmDialect AsmDialect;
InlineAsmKeyType(StringRef AsmString, StringRef Constraints,
- bool HasSideEffects, bool IsAlignStack,
+ FunctionType *FTy, bool HasSideEffects, bool IsAlignStack,
InlineAsm::AsmDialect AsmDialect)
- : AsmString(AsmString), Constraints(Constraints),
+ : AsmString(AsmString), Constraints(Constraints), FTy(FTy),
HasSideEffects(HasSideEffects), IsAlignStack(IsAlignStack),
AsmDialect(AsmDialect) {}
InlineAsmKeyType(const InlineAsm *Asm, SmallVectorImpl<Constant *> &)
: AsmString(Asm->getAsmString()), Constraints(Asm->getConstraintString()),
- HasSideEffects(Asm->hasSideEffects()),
+ FTy(Asm->getFunctionType()), HasSideEffects(Asm->hasSideEffects()),
IsAlignStack(Asm->isAlignStack()), AsmDialect(Asm->getDialect()) {}
bool operator==(const InlineAsmKeyType &X) const {
return HasSideEffects == X.HasSideEffects &&
IsAlignStack == X.IsAlignStack && AsmDialect == X.AsmDialect &&
- AsmString == X.AsmString && Constraints == X.Constraints;
+ AsmString == X.AsmString && Constraints == X.Constraints &&
+ FTy == X.FTy;
}
bool operator==(const InlineAsm *Asm) const {
return HasSideEffects == Asm->hasSideEffects() &&
IsAlignStack == Asm->isAlignStack() &&
AsmDialect == Asm->getDialect() &&
AsmString == Asm->getAsmString() &&
- Constraints == Asm->getConstraintString();
+ Constraints == Asm->getConstraintString() &&
+ FTy == Asm->getFunctionType();
}
unsigned getHash() const {
return hash_combine(AsmString, Constraints, HasSideEffects, IsAlignStack,
- AsmDialect);
+ AsmDialect, FTy);
}
typedef ConstantInfo<InlineAsm>::TypeClass TypeClass;
InlineAsm *create(TypeClass *Ty) const {
- return new InlineAsm(Ty, AsmString, Constraints, HasSideEffects,
+ assert(PointerType::getUnqual(FTy) == Ty);
+ return new InlineAsm(FTy, AsmString, Constraints, HasSideEffects,
IsAlignStack, AsmDialect);
}
};
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 0eb88a9..7f39c80 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -262,6 +262,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMVectorTypeKind;
case Type::X86_MMXTyID:
return LLVMX86_MMXTypeKind;
+ case Type::TokenTyID:
+ return LLVMTokenTypeKind;
}
llvm_unreachable("Unhandled TypeID.");
}
@@ -366,6 +368,9 @@ LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
}
+LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getTokenTy(*unwrap(C));
+}
LLVMTypeRef LLVMHalfType(void) {
return LLVMHalfTypeInContext(LLVMGetGlobalContext());
@@ -1528,7 +1533,7 @@ LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
Module::global_iterator I = Mod->global_begin();
if (I == Mod->global_end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
@@ -1536,23 +1541,23 @@ LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
Module::global_iterator I = Mod->global_end();
if (I == Mod->global_begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
- Module::global_iterator I = GV;
+ Module::global_iterator I(GV);
if (++I == GV->getParent()->global_end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
- Module::global_iterator I = GV;
+ Module::global_iterator I(GV);
if (I == GV->getParent()->global_begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
@@ -1639,7 +1644,8 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
const char *Name) {
auto *PTy = cast<PointerType>(unwrap(Ty));
- return wrap(GlobalAlias::create(PTy, GlobalValue::ExternalLinkage, Name,
+ return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ GlobalValue::ExternalLinkage, Name,
unwrap<Constant>(Aliasee), unwrap(M)));
}
@@ -1660,7 +1666,7 @@ LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
Module::iterator I = Mod->begin();
if (I == Mod->end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
@@ -1668,23 +1674,23 @@ LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
Module::iterator I = Mod->end();
if (I == Mod->begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
- Module::iterator I = Func;
+ Module::iterator I(Func);
if (++I == Func->getParent()->end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
- Module::iterator I = Func;
+ Module::iterator I(Func);
if (I == Func->getParent()->begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
void LLVMDeleteFunction(LLVMValueRef Fn) {
@@ -1779,14 +1785,14 @@ void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
Function *Fn = unwrap<Function>(FnRef);
for (Function::arg_iterator I = Fn->arg_begin(),
E = Fn->arg_end(); I != E; I++)
- *ParamRefs++ = wrap(I);
+ *ParamRefs++ = wrap(&*I);
}
LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
while (index --> 0)
AI++;
- return wrap(AI);
+ return wrap(&*AI);
}
LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
@@ -1798,7 +1804,7 @@ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
Function::arg_iterator I = Func->arg_begin();
if (I == Func->arg_end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
@@ -1806,23 +1812,23 @@ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
Function::arg_iterator I = Func->arg_end();
if (I == Func->arg_begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
- Function::arg_iterator I = A;
+ Function::arg_iterator I(A);
if (++I == A->getParent()->arg_end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
- Function::arg_iterator I = A;
+ Function::arg_iterator I(A);
if (I == A->getParent()->arg_begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
@@ -1880,7 +1886,7 @@ unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
Function *Fn = unwrap<Function>(FnRef);
for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
- *BasicBlocksRefs++ = wrap(I);
+ *BasicBlocksRefs++ = wrap(&*I);
}
LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
@@ -1892,7 +1898,7 @@ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
Function::iterator I = Func->begin();
if (I == Func->end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
@@ -1900,23 +1906,23 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
Function::iterator I = Func->end();
if (I == Func->begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
BasicBlock *Block = unwrap(BB);
- Function::iterator I = Block;
+ Function::iterator I(Block);
if (++I == Block->getParent()->end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
BasicBlock *Block = unwrap(BB);
- Function::iterator I = Block;
+ Function::iterator I(Block);
if (I == Block->getParent()->begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
@@ -1968,7 +1974,7 @@ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
BasicBlock::iterator I = Block->begin();
if (I == Block->end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
@@ -1976,23 +1982,23 @@ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
BasicBlock::iterator I = Block->end();
if (I == Block->begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
Instruction *Instr = unwrap<Instruction>(Inst);
- BasicBlock::iterator I = Instr;
+ BasicBlock::iterator I(Instr);
if (++I == Instr->getParent()->end())
return nullptr;
- return wrap(I);
+ return wrap(&*I);
}
LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
Instruction *Instr = unwrap<Instruction>(Inst);
- BasicBlock::iterator I = Instr;
+ BasicBlock::iterator I(Instr);
if (I == Instr->getParent()->begin())
return nullptr;
- return wrap(--I);
+ return wrap(&*--I);
}
void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
@@ -2160,12 +2166,12 @@ void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
LLVMValueRef Instr) {
BasicBlock *BB = unwrap(Block);
Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
- unwrap(Builder)->SetInsertPoint(BB, I);
+ unwrap(Builder)->SetInsertPoint(BB, I->getIterator());
}
void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
Instruction *I = unwrap<Instruction>(Instr);
- unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+ unwrap(Builder)->SetInsertPoint(I->getParent(), I->getIterator());
}
void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
@@ -2489,7 +2495,6 @@ LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
}
-
LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
const char *Name) {
return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
@@ -2515,6 +2520,21 @@ static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) {
llvm_unreachable("Invalid LLVMAtomicOrdering value!");
}
+static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ case NotAtomic: return LLVMAtomicOrderingNotAtomic;
+ case Unordered: return LLVMAtomicOrderingUnordered;
+ case Monotonic: return LLVMAtomicOrderingMonotonic;
+ case Acquire: return LLVMAtomicOrderingAcquire;
+ case Release: return LLVMAtomicOrderingRelease;
+ case AcquireRelease: return LLVMAtomicOrderingAcquireRelease;
+ case SequentiallyConsistent:
+ return LLVMAtomicOrderingSequentiallyConsistent;
+ }
+
+ llvm_unreachable("Invalid AtomicOrdering value!");
+}
+
LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
LLVMBool isSingleThread, const char *Name) {
return wrap(
@@ -2567,6 +2587,25 @@ void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
return cast<StoreInst>(P)->setVolatile(isVolatile);
}
+LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) {
+ Value *P = unwrap<Value>(MemAccessInst);
+ AtomicOrdering O;
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ O = LI->getOrdering();
+ else
+ O = cast<StoreInst>(P)->getOrdering();
+ return mapToLLVMOrdering(O);
+}
+
+void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) {
+ Value *P = unwrap<Value>(MemAccessInst);
+ AtomicOrdering O = mapFromLLVMOrdering(Ordering);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->setOrdering(O);
+ return cast<StoreInst>(P)->setOrdering(O);
+}
+
/*--.. Casts ...............................................................--*/
LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 2a90e70..b7841fe 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -148,7 +148,7 @@ DICompileUnit *DIBuilder::createCompileUnit(
CUNode = DICompileUnit::getDistinct(
VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer,
isOptimized, Flags, RunTimeVer, SplitName, Kind, nullptr,
- nullptr, nullptr, nullptr, nullptr, DWOId);
+ nullptr, nullptr, nullptr, nullptr, nullptr, DWOId);
// Create a named metadata so that it is easier to find cu in a module.
// Note that we only generate this when the caller wants to actually
@@ -255,10 +255,12 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
DITypeRef::get(Base));
}
-DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy) {
+DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits) {
assert(RTy && "Unable to create reference type");
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
- DITypeRef::get(RTy), 0, 0, 0, 0);
+ DITypeRef::get(RTy), SizeInBits, AlignInBits, 0, 0);
}
DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
@@ -429,12 +431,23 @@ DICompositeType *DIBuilder::createUnionType(
return R;
}
-DISubroutineType *DIBuilder::createSubroutineType(DIFile *File,
- DITypeRefArray ParameterTypes,
+DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
unsigned Flags) {
return DISubroutineType::get(VMContext, Flags, ParameterTypes);
}
+DICompositeType *DIBuilder::createExternalTypeRef(unsigned Tag, DIFile *File,
+ StringRef UniqueIdentifier) {
+ assert(!UniqueIdentifier.empty() && "external type ref without uid");
+ auto *CTy =
+ DICompositeType::get(VMContext, Tag, "", nullptr, 0, nullptr, nullptr, 0,
+ 0, 0, DINode::FlagExternalTypeRef, nullptr, 0,
+ nullptr, nullptr, UniqueIdentifier);
+ // Types with unique IDs need to be in the type map.
+ retainType(CTy);
+ return CTy;
+}
+
DICompositeType *DIBuilder::createEnumerationType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits, DINodeArray Elements,
@@ -590,18 +603,20 @@ DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
.release();
}
-DILocalVariable *DIBuilder::createLocalVariable(
- unsigned Tag, DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
- DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
+static DILocalVariable *createLocalVariable(
+ LLVMContext &VMContext,
+ DenseMap<MDNode *, std::vector<TrackingMDNodeRef>> &PreservedVariables,
+ DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
+ unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) {
// FIXME: Why getNonCompileUnitScope()?
// FIXME: Why is "!Context" okay here?
// FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
// the only valid scopes)?
DIScope *Context = getNonCompileUnitScope(Scope);
- auto *Node = DILocalVariable::get(
- VMContext, Tag, cast_or_null<DILocalScope>(Context), Name, File, LineNo,
- DITypeRef::get(Ty), ArgNo, Flags);
+ auto *Node =
+ DILocalVariable::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
+ File, LineNo, DITypeRef::get(Ty), ArgNo, Flags);
if (AlwaysPreserve) {
// The optimizer may remove local variables. If there is an interest
// to preserve variable info in such situation then stash it in a
@@ -613,6 +628,23 @@ DILocalVariable *DIBuilder::createLocalVariable(
return Node;
}
+DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name,
+ DIFile *File, unsigned LineNo,
+ DIType *Ty, bool AlwaysPreserve,
+ unsigned Flags) {
+ return createLocalVariable(VMContext, PreservedVariables, Scope, Name,
+ /* ArgNo */ 0, File, LineNo, Ty, AlwaysPreserve,
+ Flags);
+}
+
+DILocalVariable *DIBuilder::createParameterVariable(
+ DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
+ unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) {
+ assert(ArgNo && "Expected non-zero argument number for parameter");
+ return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo,
+ File, LineNo, Ty, AlwaysPreserve, Flags);
+}
+
DIExpression *DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
return DIExpression::get(VMContext, Addr);
}
@@ -629,36 +661,37 @@ DIExpression *DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
return DIExpression::get(VMContext, Addr);
}
-DISubprogram *DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
- StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty,
- bool isLocalToUnit, bool isDefinition,
- unsigned ScopeLine, unsigned Flags,
- bool isOptimized, Function *Fn,
- MDNode *TParams, MDNode *Decl) {
+DISubprogram *DIBuilder::createFunction(
+ DIScopeRef Context, StringRef Name, StringRef LinkageName, DIFile *File,
+ unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+ bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
+ DITemplateParameterArray TParams, DISubprogram *Decl) {
// dragonegg does not generate identifier for types, so using an empty map
// to resolve the context should be fine.
DITypeIdentifierMap EmptyMap;
return createFunction(Context.resolve(EmptyMap), Name, LinkageName, File,
LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
- Flags, isOptimized, Fn, TParams, Decl);
-}
-
-DISubprogram *DIBuilder::createFunction(DIScope *Context, StringRef Name,
- StringRef LinkageName, DIFile *File,
- unsigned LineNo, DISubroutineType *Ty,
- bool isLocalToUnit, bool isDefinition,
- unsigned ScopeLine, unsigned Flags,
- bool isOptimized, Function *Fn,
- MDNode *TParams, MDNode *Decl) {
- assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
- "function types should be subroutines");
- auto *Node = DISubprogram::get(
- VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
- LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
- nullptr, 0, 0, Flags, isOptimized, Fn, cast_or_null<MDTuple>(TParams),
- cast_or_null<DISubprogram>(Decl),
- MDTuple::getTemporary(VMContext, None).release());
+ Flags, isOptimized, TParams, Decl);
+}
+
+template <class... Ts>
+static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
+ if (IsDistinct)
+ return DISubprogram::getDistinct(std::forward<Ts>(Args)...);
+ return DISubprogram::get(std::forward<Ts>(Args)...);
+}
+
+DISubprogram *DIBuilder::createFunction(
+ DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
+ unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+ bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
+ DITemplateParameterArray TParams, DISubprogram *Decl) {
+ auto *Node =
+ getSubprogram(/* IsDistinct = */ isDefinition, VMContext,
+ DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
+ LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition,
+ ScopeLine, nullptr, 0, 0, Flags, isOptimized, TParams, Decl,
+ MDTuple::getTemporary(VMContext, None).release());
if (isDefinition)
AllSubprograms.push_back(Node);
@@ -670,12 +703,11 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
- Function *Fn, MDNode *TParams, MDNode *Decl) {
+ DITemplateParameterArray TParams, DISubprogram *Decl) {
return DISubprogram::getTemporary(
VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition,
- ScopeLine, nullptr, 0, 0, Flags, isOptimized, Fn,
- cast_or_null<MDTuple>(TParams), cast_or_null<DISubprogram>(Decl),
+ ScopeLine, nullptr, 0, 0, Flags, isOptimized, TParams, Decl,
nullptr)
.release();
}
@@ -685,18 +717,16 @@ DIBuilder::createMethod(DIScope *Context, StringRef Name, StringRef LinkageName,
DIFile *F, unsigned LineNo, DISubroutineType *Ty,
bool isLocalToUnit, bool isDefinition, unsigned VK,
unsigned VIndex, DIType *VTableHolder, unsigned Flags,
- bool isOptimized, Function *Fn, MDNode *TParam) {
- assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
- "function types should be subroutines");
+ bool isOptimized, DITemplateParameterArray TParams) {
assert(getNonCompileUnitScope(Context) &&
"Methods should have both a Context and a context that isn't "
"the compile unit.");
// FIXME: Do we want to use different scope/lines?
- auto *SP = DISubprogram::get(
- VMContext, DIScopeRef::get(cast<DIScope>(Context)), Name, LinkageName, F,
- LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
- DITypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn,
- cast_or_null<MDTuple>(TParam), nullptr, nullptr);
+ auto *SP = getSubprogram(
+ /* IsDistinct = */ isDefinition, VMContext,
+ DIScopeRef::get(cast<DIScope>(Context)), Name, LinkageName, F, LineNo, Ty,
+ isLocalToUnit, isDefinition, LineNo, DITypeRef::get(VTableHolder), VK,
+ VIndex, Flags, isOptimized, TParams, nullptr, nullptr);
if (isDefinition)
AllSubprograms.push_back(SP);
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index 4d867ef..5468f47 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -41,6 +41,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
StructAlignment = 0;
StructSize = 0;
+ IsPadded = false;
NumElements = ST->getNumElements();
// Loop over each of the elements, placing them in memory.
@@ -49,8 +50,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
// Add padding if necessary to align the data element properly.
- if ((StructSize & (TyAlign-1)) != 0)
+ if ((StructSize & (TyAlign-1)) != 0) {
+ IsPadded = true;
StructSize = RoundUpToAlignment(StructSize, TyAlign);
+ }
// Keep track of maximum alignment constraint.
StructAlignment = std::max(TyAlign, StructAlignment);
@@ -64,8 +67,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
// Add padding to the end of the struct so that it could be put in an array
// and all array elements would be aligned correctly.
- if ((StructSize & (StructAlignment-1)) != 0)
+ if ((StructSize & (StructAlignment-1)) != 0) {
+ IsPadded = true;
StructSize = RoundUpToAlignment(StructSize, StructAlignment);
+ }
}
@@ -461,8 +466,8 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
// The best match so far depends on what we're looking for.
- if (AlignType == INTEGER_ALIGN &&
- Alignments[i].AlignType == INTEGER_ALIGN) {
+ if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
// The "best match" for integers is the smallest size that is larger than
// the BitWidth requested.
if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 9646d1a..a2443be 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -56,21 +56,6 @@ DISubprogram *llvm::getDISubprogram(const Function *F) {
return nullptr;
}
-DICompositeTypeBase *llvm::getDICompositeType(DIType *T) {
- if (auto *C = dyn_cast_or_null<DICompositeTypeBase>(T))
- return C;
-
- if (auto *D = dyn_cast_or_null<DIDerivedTypeBase>(T)) {
- // This function is currently used by dragonegg and dragonegg does
- // not generate identifier for types, so using an empty map to resolve
- // DerivedFrom should be fine.
- DITypeIdentifierMap EmptyMap;
- return getDICompositeType(D->getBaseType().resolve(EmptyMap));
- }
-
- return nullptr;
-}
-
DITypeIdentifierMap
llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
DITypeIdentifierMap Map;
@@ -164,20 +149,22 @@ void DebugInfoFinder::processType(DIType *DT) {
if (!addType(DT))
return;
processScope(DT->getScope().resolve(TypeIdentifierMap));
- if (auto *DCT = dyn_cast<DICompositeTypeBase>(DT)) {
+ if (auto *ST = dyn_cast<DISubroutineType>(DT)) {
+ for (DITypeRef Ref : ST->getTypeArray())
+ processType(Ref.resolve(TypeIdentifierMap));
+ return;
+ }
+ if (auto *DCT = dyn_cast<DICompositeType>(DT)) {
processType(DCT->getBaseType().resolve(TypeIdentifierMap));
- if (auto *ST = dyn_cast<DISubroutineType>(DCT)) {
- for (DITypeRef Ref : ST->getTypeArray())
- processType(Ref.resolve(TypeIdentifierMap));
- return;
- }
for (Metadata *D : DCT->getElements()) {
if (auto *T = dyn_cast<DIType>(D))
processType(T);
else if (auto *SP = dyn_cast<DISubprogram>(D))
processSubprogram(SP);
}
- } else if (auto *DDT = dyn_cast<DIDerivedTypeBase>(DT)) {
+ return;
+ }
+ if (auto *DDT = dyn_cast<DIDerivedType>(DT)) {
processType(DDT->getBaseType().resolve(TypeIdentifierMap));
}
}
@@ -313,6 +300,10 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
bool llvm::stripDebugInfo(Function &F) {
bool Changed = false;
+ if (F.getSubprogram()) {
+ Changed = true;
+ F.setSubprogram(nullptr);
+ }
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (I.getDebugLoc()) {
@@ -349,7 +340,7 @@ bool llvm::StripDebugInfo(Module &M) {
for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
NME = M.named_metadata_end(); NMI != NME;) {
- NamedMDNode *NMD = NMI;
+ NamedMDNode *NMD = &*NMI;
++NMI;
if (NMD->getName().startswith("llvm.dbg.")) {
NMD->eraseFromParent();
@@ -372,21 +363,3 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
return Val->getZExtValue();
return 0;
}
-
-DenseMap<const llvm::Function *, DISubprogram *>
-llvm::makeSubprogramMap(const Module &M) {
- DenseMap<const Function *, DISubprogram *> R;
-
- NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu");
- if (!CU_Nodes)
- return R;
-
- for (MDNode *N : CU_Nodes->operands()) {
- auto *CUNode = cast<DICompileUnit>(N);
- for (auto *SP : CUNode->getSubprograms()) {
- if (Function *F = SP->getFunction())
- R.insert(std::make_pair(F, SP));
- }
- }
- return R;
-}
diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
index 5e01748..58e0abd 100644
--- a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -295,8 +295,7 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context,
StorageType Storage,
bool ShouldCreate) {
DEFINE_GETIMPL_LOOKUP(DISubroutineType, (Flags, TypeArray));
- Metadata *Ops[] = {nullptr, nullptr, nullptr, nullptr,
- TypeArray, nullptr, nullptr, nullptr};
+ Metadata *Ops[] = {nullptr, nullptr, nullptr, TypeArray};
DEFINE_GETIMPL_STORE(DISubroutineType, (Flags), Ops);
}
@@ -316,22 +315,20 @@ DICompileUnit *DICompileUnit::getImpl(
unsigned RuntimeVersion, MDString *SplitDebugFilename,
unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
Metadata *Subprograms, Metadata *GlobalVariables,
- Metadata *ImportedEntities, uint64_t DWOId,
+ Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
StorageType Storage, bool ShouldCreate) {
+ assert(Storage != Uniqued && "Cannot unique DICompileUnit");
assert(isCanonical(Producer) && "Expected canonical MDString");
assert(isCanonical(Flags) && "Expected canonical MDString");
assert(isCanonical(SplitDebugFilename) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(
- DICompileUnit,
- (SourceLanguage, File, getString(Producer), IsOptimized, getString(Flags),
- RuntimeVersion, getString(SplitDebugFilename), EmissionKind, EnumTypes,
- RetainedTypes, Subprograms, GlobalVariables, ImportedEntities, DWOId));
+
Metadata *Ops[] = {File, Producer, Flags, SplitDebugFilename, EnumTypes,
RetainedTypes, Subprograms, GlobalVariables,
- ImportedEntities};
- DEFINE_GETIMPL_STORE(
- DICompileUnit,
- (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind, DWOId), Ops);
+ ImportedEntities, Macros};
+ return storeImpl(new (ArrayRef<Metadata *>(Ops).size()) DICompileUnit(
+ Context, Storage, SourceLanguage, IsOptimized,
+ RuntimeVersion, EmissionKind, DWOId, Ops),
+ Storage);
}
DISubprogram *DILocalScope::getSubprogram() const {
@@ -345,34 +342,28 @@ DISubprogram *DISubprogram::getImpl(
MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
- unsigned Flags, bool IsOptimized, Metadata *Function,
- Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
- StorageType Storage, bool ShouldCreate) {
+ unsigned Flags, bool IsOptimized, Metadata *TemplateParams,
+ Metadata *Declaration, Metadata *Variables, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DISubprogram,
(Scope, getString(Name), getString(LinkageName), File,
Line, Type, IsLocalToUnit, IsDefinition, ScopeLine,
ContainingType, Virtuality, VirtualIndex, Flags,
- IsOptimized, Function, TemplateParams, Declaration,
- Variables));
- Metadata *Ops[] = {File, Scope, Name, Name,
- LinkageName, Type, ContainingType, Function,
- TemplateParams, Declaration, Variables};
+ IsOptimized, TemplateParams, Declaration, Variables));
+ Metadata *Ops[] = {File, Scope, Name, Name,
+ LinkageName, Type, ContainingType, TemplateParams,
+ Declaration, Variables};
DEFINE_GETIMPL_STORE(DISubprogram,
(Line, ScopeLine, Virtuality, VirtualIndex, Flags,
IsLocalToUnit, IsDefinition, IsOptimized),
Ops);
}
-Function *DISubprogram::getFunction() const {
- // FIXME: Should this be looking through bitcasts?
- return dyn_cast_or_null<Function>(getFunctionConstant());
-}
-
bool DISubprogram::describes(const Function *F) const {
assert(F && "Invalid function");
- if (F == getFunction())
+ if (F->getSubprogram() == this)
return true;
StringRef Name = getLinkageName();
if (Name.empty())
@@ -380,15 +371,13 @@ bool DISubprogram::describes(const Function *F) const {
return F->getName() == Name;
}
-void DISubprogram::replaceFunction(Function *F) {
- replaceFunction(F ? ConstantAsMetadata::get(F)
- : static_cast<ConstantAsMetadata *>(nullptr));
-}
-
DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *File, unsigned Line,
unsigned Column, StorageType Storage,
bool ShouldCreate) {
+ // Fixup column.
+ adjustColumn(Column);
+
assert(Scope && "Expected scope");
DEFINE_GETIMPL_LOOKUP(DILexicalBlock, (Scope, File, Line, Column));
Metadata *Ops[] = {File, Scope};
@@ -467,21 +456,21 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Ops);
}
-DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, unsigned Tag,
- Metadata *Scope, MDString *Name,
- Metadata *File, unsigned Line,
- Metadata *Type, unsigned Arg,
- unsigned Flags, StorageType Storage,
+DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
+ MDString *Name, Metadata *File,
+ unsigned Line, Metadata *Type,
+ unsigned Arg, unsigned Flags,
+ StorageType Storage,
bool ShouldCreate) {
// 64K ought to be enough for any frontend.
assert(Arg <= UINT16_MAX && "Expected argument number to fit in 16-bits");
assert(Scope && "Expected scope");
assert(isCanonical(Name) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DILocalVariable, (Tag, Scope, getString(Name), File,
- Line, Type, Arg, Flags));
+ DEFINE_GETIMPL_LOOKUP(DILocalVariable,
+ (Scope, getString(Name), File, Line, Type, Arg, Flags));
Metadata *Ops[] = {Scope, Name, File, Type};
- DEFINE_GETIMPL_STORE(DILocalVariable, (Tag, Line, Arg, Flags), Ops);
+ DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags), Ops);
}
DIExpression *DIExpression::getImpl(LLVMContext &Context,
@@ -496,6 +485,7 @@ unsigned DIExpression::ExprOperand::getSize() const {
case dwarf::DW_OP_bit_piece:
return 3;
case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_minus:
return 2;
default:
return 1;
@@ -516,6 +506,7 @@ bool DIExpression::isValid() const {
// Piece expressions must be at the end.
return I->get() + I->getSize() == E->get();
case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_minus:
case dwarf::DW_OP_deref:
break;
}
@@ -566,3 +557,24 @@ DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag,
Metadata *Ops[] = {Scope, Entity, Name};
DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops);
}
+
+DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType,
+ unsigned Line, MDString *Name, MDString *Value,
+ StorageType Storage, bool ShouldCreate) {
+ assert(isCanonical(Name) && "Expected canonical MDString");
+ DEFINE_GETIMPL_LOOKUP(DIMacro,
+ (MIType, Line, getString(Name), getString(Value)));
+ Metadata *Ops[] = { Name, Value };
+ DEFINE_GETIMPL_STORE(DIMacro, (MIType, Line), Ops);
+}
+
+DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType,
+ unsigned Line, Metadata *File,
+ Metadata *Elements, StorageType Storage,
+ bool ShouldCreate) {
+ DEFINE_GETIMPL_LOOKUP(DIMacroFile,
+ (MIType, Line, File, Elements));
+ Metadata *Ops[] = { File, Elements };
+ DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops);
+}
+
diff --git a/contrib/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
index b8f77ed..6426f76 100644
--- a/contrib/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
@@ -49,7 +49,7 @@ struct PassRemarksOpt {
"' in -pass-remarks: " + RegexError,
false);
}
- };
+ }
};
static PassRemarksOpt PassRemarksOptLoc;
@@ -91,6 +91,8 @@ int llvm::getNextAvailablePluginDiagnosticKind() {
return ++PluginKindID;
}
+const char *DiagnosticInfo::AlwaysPrint = "";
+
DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I,
const Twine &MsgStr,
DiagnosticSeverity Severity)
@@ -121,9 +123,17 @@ void DiagnosticInfoDebugMetadataVersion::print(DiagnosticPrinter &DP) const {
}
void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
- if (getFileName() && getLineNum() > 0)
- DP << getFileName() << ":" << getLineNum() << ": ";
- else if (getFileName())
+ if (!FileName.empty()) {
+ DP << getFileName();
+ if (LineNum > 0)
+ DP << ":" << getLineNum();
+ DP << ": ";
+ }
+ DP << getMsg();
+}
+
+void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const {
+ if (getFileName())
DP << getFileName() << ": ";
DP << getMsg();
}
@@ -166,8 +176,9 @@ bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const {
}
bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const {
- return PassRemarksAnalysisOptLoc.Pattern &&
- PassRemarksAnalysisOptLoc.Pattern->match(getPassName());
+ return getPassName() == DiagnosticInfo::AlwaysPrint ||
+ (PassRemarksAnalysisOptLoc.Pattern &&
+ PassRemarksAnalysisOptLoc.Pattern->match(getPassName()));
}
void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
@@ -196,6 +207,24 @@ void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
}
+void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
+ const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg) {
+ Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisFPCommute(PassName, Fn,
+ DLoc, Msg));
+}
+
+void llvm::emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
+ const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg) {
+ Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisAliasing(PassName, Fn,
+ DLoc, Msg));
+}
+
bool DiagnosticInfoOptimizationFailure::isEnabled() const {
// Only print warnings.
return getSeverity() == DS_Warning;
diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp
index b6a8bbc..b9d4fb7 100644
--- a/contrib/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm/lib/IR/Dominators.cpp
@@ -91,10 +91,10 @@ bool DominatorTree::dominates(const Instruction *Def,
if (Def == User)
return false;
- // The value defined by an invoke dominates an instruction only if
- // it dominates every instruction in UseBB.
- // A PHI is dominated only if the instruction dominates every possible use
- // in the UseBB.
+ // The value defined by an invoke dominates an instruction only if it
+ // dominates every instruction in UseBB.
+ // A PHI is dominated only if the instruction dominates every possible use in
+ // the UseBB.
if (isa<InvokeInst>(Def) || isa<PHINode>(User))
return dominates(Def, UseBB);
@@ -126,15 +126,15 @@ bool DominatorTree::dominates(const Instruction *Def,
if (DefBB == UseBB)
return false;
- const InvokeInst *II = dyn_cast<InvokeInst>(Def);
- if (!II)
- return dominates(DefBB, UseBB);
-
// Invoke results are only usable in the normal destination, not in the
// exceptional destination.
- BasicBlock *NormalDest = II->getNormalDest();
- BasicBlockEdge E(DefBB, NormalDest);
- return dominates(E, UseBB);
+ if (const auto *II = dyn_cast<InvokeInst>(Def)) {
+ BasicBlock *NormalDest = II->getNormalDest();
+ BasicBlockEdge E(DefBB, NormalDest);
+ return dominates(E, UseBB);
+ }
+
+ return dominates(DefBB, UseBB);
}
bool DominatorTree::dominates(const BasicBlockEdge &BBE,
@@ -142,7 +142,8 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE,
// Assert that we have a single edge. We could handle them by simply
// returning false, but since isSingleEdge is linear on the number of
// edges, the callers can normally handle them more efficiently.
- assert(BBE.isSingleEdge());
+ assert(BBE.isSingleEdge() &&
+ "This function is not efficient in handling multiple edges");
// If the BB the edge ends in doesn't dominate the use BB, then the
// edge also doesn't.
@@ -192,7 +193,8 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const {
// Assert that we have a single edge. We could handle them by simply
// returning false, but since isSingleEdge is linear on the number of
// edges, the callers can normally handle them more efficiently.
- assert(BBE.isSingleEdge());
+ assert(BBE.isSingleEdge() &&
+ "This function is not efficient in handling multiple edges");
Instruction *UserInst = cast<Instruction>(U.getUser());
// A PHI in the end of the edge is dominated by it.
@@ -232,8 +234,8 @@ bool DominatorTree::dominates(const Instruction *Def, const Use &U) const {
if (!isReachableFromEntry(DefBB))
return false;
- // Invoke instructions define their return values on the edges
- // to their normal successors, so we have to handle them specially.
+ // Invoke instructions define their return values on the edges to their normal
+ // successors, so we have to handle them specially.
// Among other things, this means they don't dominate anything in
// their own block, except possibly a phi, so we don't need to
// walk the block in any case.
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index b50ad12..cfb40b1 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -35,8 +35,8 @@ using namespace llvm;
// Explicit instantiations of SymbolTableListTraits since some of the methods
// are not in the public header file...
-template class llvm::SymbolTableListTraits<Argument, Function>;
-template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+template class llvm::SymbolTableListTraits<Argument>;
+template class llvm::SymbolTableListTraits<BasicBlock>;
//===----------------------------------------------------------------------===//
// Argument Implementation
@@ -235,11 +235,11 @@ Type *Function::getReturnType() const {
}
void Function::removeFromParent() {
- getParent()->getFunctionList().remove(this);
+ getParent()->getFunctionList().remove(getIterator());
}
void Function::eraseFromParent() {
- getParent()->getFunctionList().erase(this);
+ getParent()->getFunctionList().erase(getIterator());
}
//===----------------------------------------------------------------------===//
@@ -248,7 +248,7 @@ void Function::eraseFromParent() {
Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
Module *ParentModule)
- : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal,
+ : GlobalObject(Ty, Value::FunctionVal,
OperandTraits<Function>::op_begin(this), 0, Linkage, name),
Ty(Ty) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
@@ -279,9 +279,6 @@ Function::~Function() {
// Remove the function from the on-the-side GC table.
clearGC();
-
- // FIXME: needed by operator delete
- setFunctionNumOperands(1);
}
void Function::BuildLazyArguments() const {
@@ -328,14 +325,15 @@ void Function::dropAllReferences() {
while (!BasicBlocks.empty())
BasicBlocks.begin()->eraseFromParent();
- // Prefix and prologue data are stored in a side table.
- setPrefixData(nullptr);
- setPrologueData(nullptr);
+ // Drop uses of any optional data (real or placeholder).
+ if (getNumOperands()) {
+ User::dropAllReferences();
+ setNumHungOffUseOperands(0);
+ setValueSubclassData(getSubclassDataFromValue() & ~0xe);
+ }
// Metadata is stored in a side-table.
clearMetadata();
-
- setPersonalityFn(nullptr);
}
void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
@@ -411,30 +409,26 @@ void Function::clearGC() {
}
}
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a Function) from the Function Src to this one.
+/// Copy all additional attributes (those not needed to create a Function) from
+/// the Function Src to this one.
void Function::copyAttributesFrom(const GlobalValue *Src) {
- assert(isa<Function>(Src) && "Expected a Function!");
GlobalObject::copyAttributesFrom(Src);
- const Function *SrcF = cast<Function>(Src);
+ const Function *SrcF = dyn_cast<Function>(Src);
+ if (!SrcF)
+ return;
+
setCallingConv(SrcF->getCallingConv());
setAttributes(SrcF->getAttributes());
if (SrcF->hasGC())
setGC(SrcF->getGC());
else
clearGC();
+ if (SrcF->hasPersonalityFn())
+ setPersonalityFn(SrcF->getPersonalityFn());
if (SrcF->hasPrefixData())
setPrefixData(SrcF->getPrefixData());
- else
- setPrefixData(nullptr);
if (SrcF->hasPrologueData())
setPrologueData(SrcF->getPrologueData());
- else
- setPrologueData(nullptr);
- if (SrcF->hasPersonalityFn())
- setPersonalityFn(SrcF->getPersonalityFn());
- else
- setPersonalityFn(nullptr);
}
/// \brief This does the actual lookup of an intrinsic ID which
@@ -492,7 +486,10 @@ static std::string getMangledTypeStr(Type* Ty) {
Result += "vararg";
// Ensure nested function types are distinguishable.
Result += "f";
- } else if (Ty)
+ } else if (isa<VectorType>(Ty))
+ Result += "v" + utostr(Ty->getVectorNumElements()) +
+ getMangledTypeStr(Ty->getVectorElementType());
+ else if (Ty)
Result += EVT::getEVT(Ty).getEVTString();
return Result;
}
@@ -541,22 +538,25 @@ enum IIT_Info {
// Values from 16+ are only encodable with the inefficient encoding.
IIT_V64 = 16,
IIT_MMX = 17,
- IIT_METADATA = 18,
- IIT_EMPTYSTRUCT = 19,
- IIT_STRUCT2 = 20,
- IIT_STRUCT3 = 21,
- IIT_STRUCT4 = 22,
- IIT_STRUCT5 = 23,
- IIT_EXTEND_ARG = 24,
- IIT_TRUNC_ARG = 25,
- IIT_ANYPTR = 26,
- IIT_V1 = 27,
- IIT_VARARG = 28,
- IIT_HALF_VEC_ARG = 29,
- IIT_SAME_VEC_WIDTH_ARG = 30,
- IIT_PTR_TO_ARG = 31,
- IIT_VEC_OF_PTRS_TO_ELT = 32,
- IIT_I128 = 33
+ IIT_TOKEN = 18,
+ IIT_METADATA = 19,
+ IIT_EMPTYSTRUCT = 20,
+ IIT_STRUCT2 = 21,
+ IIT_STRUCT3 = 22,
+ IIT_STRUCT4 = 23,
+ IIT_STRUCT5 = 24,
+ IIT_EXTEND_ARG = 25,
+ IIT_TRUNC_ARG = 26,
+ IIT_ANYPTR = 27,
+ IIT_V1 = 28,
+ IIT_VARARG = 29,
+ IIT_HALF_VEC_ARG = 30,
+ IIT_SAME_VEC_WIDTH_ARG = 31,
+ IIT_PTR_TO_ARG = 32,
+ IIT_VEC_OF_PTRS_TO_ELT = 33,
+ IIT_I128 = 34,
+ IIT_V512 = 35,
+ IIT_V1024 = 36
};
@@ -576,6 +576,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_MMX:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
return;
+ case IIT_TOKEN:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Token, 0));
+ return;
case IIT_METADATA:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0));
return;
@@ -634,6 +637,14 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64));
DecodeIITType(NextElt, Infos, OutputTable);
return;
+ case IIT_V512:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 512));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
+ case IIT_V1024:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1024));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
case IIT_PTR:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
DecodeIITType(NextElt, Infos, OutputTable);
@@ -751,6 +762,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::Void: return Type::getVoidTy(Context);
case IITDescriptor::VarArg: return Type::getVoidTy(Context);
case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
+ case IITDescriptor::Token: return Type::getTokenTy(Context);
case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
case IITDescriptor::Half: return Type::getHalfTy(Context);
case IITDescriptor::Float: return Type::getFloatTy(Context);
@@ -924,62 +936,68 @@ bool Function::callsFunctionThatReturnsTwice() const {
return false;
}
+Constant *Function::getPersonalityFn() const {
+ assert(hasPersonalityFn() && getNumOperands());
+ return cast<Constant>(Op<0>());
+}
+
+void Function::setPersonalityFn(Constant *Fn) {
+ setHungoffOperand<0>(Fn);
+ setValueSubclassDataBit(3, Fn != nullptr);
+}
+
Constant *Function::getPrefixData() const {
- assert(hasPrefixData());
- const LLVMContextImpl::PrefixDataMapTy &PDMap =
- getContext().pImpl->PrefixDataMap;
- assert(PDMap.find(this) != PDMap.end());
- return cast<Constant>(PDMap.find(this)->second->getReturnValue());
+ assert(hasPrefixData() && getNumOperands());
+ return cast<Constant>(Op<1>());
}
void Function::setPrefixData(Constant *PrefixData) {
- if (!PrefixData && !hasPrefixData())
- return;
-
- unsigned SCData = getSubclassDataFromValue();
- LLVMContextImpl::PrefixDataMapTy &PDMap = getContext().pImpl->PrefixDataMap;
- ReturnInst *&PDHolder = PDMap[this];
- if (PrefixData) {
- if (PDHolder)
- PDHolder->setOperand(0, PrefixData);
- else
- PDHolder = ReturnInst::Create(getContext(), PrefixData);
- SCData |= (1<<1);
- } else {
- delete PDHolder;
- PDMap.erase(this);
- SCData &= ~(1<<1);
- }
- setValueSubclassData(SCData);
+ setHungoffOperand<1>(PrefixData);
+ setValueSubclassDataBit(1, PrefixData != nullptr);
}
Constant *Function::getPrologueData() const {
- assert(hasPrologueData());
- const LLVMContextImpl::PrologueDataMapTy &SOMap =
- getContext().pImpl->PrologueDataMap;
- assert(SOMap.find(this) != SOMap.end());
- return cast<Constant>(SOMap.find(this)->second->getReturnValue());
+ assert(hasPrologueData() && getNumOperands());
+ return cast<Constant>(Op<2>());
}
void Function::setPrologueData(Constant *PrologueData) {
- if (!PrologueData && !hasPrologueData())
- return;
-
- unsigned PDData = getSubclassDataFromValue();
- LLVMContextImpl::PrologueDataMapTy &PDMap = getContext().pImpl->PrologueDataMap;
- ReturnInst *&PDHolder = PDMap[this];
- if (PrologueData) {
- if (PDHolder)
- PDHolder->setOperand(0, PrologueData);
- else
- PDHolder = ReturnInst::Create(getContext(), PrologueData);
- PDData |= (1<<2);
- } else {
- delete PDHolder;
- PDMap.erase(this);
- PDData &= ~(1<<2);
+ setHungoffOperand<2>(PrologueData);
+ setValueSubclassDataBit(2, PrologueData != nullptr);
+}
+
+void Function::allocHungoffUselist() {
+ // If we've already allocated a uselist, stop here.
+ if (getNumOperands())
+ return;
+
+ allocHungoffUses(3, /*IsPhi=*/ false);
+ setNumHungOffUseOperands(3);
+
+ // Initialize the uselist with placeholder operands to allow traversal.
+ auto *CPN = ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0));
+ Op<0>().set(CPN);
+ Op<1>().set(CPN);
+ Op<2>().set(CPN);
+}
+
+template <int Idx>
+void Function::setHungoffOperand(Constant *C) {
+ if (C) {
+ allocHungoffUselist();
+ Op<Idx>().set(C);
+ } else if (getNumOperands()) {
+ Op<Idx>().set(
+ ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0)));
}
- setValueSubclassData(PDData);
+}
+
+void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
+ assert(Bit < 16 && "SubclassData contains only 16 bits");
+ if (On)
+ setValueSubclassData(getSubclassDataFromValue() | (1 << Bit));
+ else
+ setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
}
void Function::setEntryCount(uint64_t Count) {
@@ -997,22 +1015,3 @@ Optional<uint64_t> Function::getEntryCount() const {
}
return None;
}
-
-void Function::setPersonalityFn(Constant *C) {
- if (!C) {
- if (hasPersonalityFn()) {
- // Note, the num operands is used to compute the offset of the operand, so
- // the order here matters. Clearing the operand then clearing the num
- // operands ensures we have the correct offset to the operand.
- Op<0>().set(nullptr);
- setFunctionNumOperands(0);
- }
- } else {
- // Note, the num operands is used to compute the offset of the operand, so
- // the order here matters. We need to set num operands to 1 first so that
- // we get the correct offset to the first operand when we set it.
- if (!hasPersonalityFn())
- setFunctionNumOperands(1);
- Op<0>().set(C);
- }
-}
diff --git a/contrib/llvm/lib/IR/FunctionInfo.cpp b/contrib/llvm/lib/IR/FunctionInfo.cpp
new file mode 100644
index 0000000..17a67bc
--- /dev/null
+++ b/contrib/llvm/lib/IR/FunctionInfo.cpp
@@ -0,0 +1,67 @@
+//===-- FunctionInfo.cpp - Function Info Index ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the function info index and summary classes for the
+// IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+// Create the combined function index/summary from multiple
+// per-module instances.
+void FunctionInfoIndex::mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
+ uint64_t NextModuleId) {
+
+ StringRef ModPath;
+ for (auto &OtherFuncInfoLists : *Other) {
+ std::string FuncName = OtherFuncInfoLists.getKey();
+ FunctionInfoList &List = OtherFuncInfoLists.second;
+
+ // Assert that the func info list only has one entry, since we shouldn't
+ // have duplicate names within a single per-module index.
+ assert(List.size() == 1);
+ std::unique_ptr<FunctionInfo> Info = std::move(List.front());
+
+ // Skip if there was no function summary section.
+ if (!Info->functionSummary())
+ continue;
+
+ // Add the module path string ref for this module if we haven't already
+ // saved a reference to it.
+ if (ModPath.empty())
+ ModPath =
+ addModulePath(Info->functionSummary()->modulePath(), NextModuleId);
+ else
+ assert(ModPath == Info->functionSummary()->modulePath() &&
+ "Each module in the combined map should have a unique ID");
+
+ // Note the module path string ref was copied above and is still owned by
+ // the original per-module index. Reset it to the new module path
+ // string reference owned by the combined index.
+ Info->functionSummary()->setModulePath(ModPath);
+
+ // If it is a local function, rename it.
+ if (Info->functionSummary()->isLocalFunction()) {
+ // Any local functions are virtually renamed when being added to the
+ // combined index map, to disambiguate from other functions with
+ // the same name. The symbol table created for the combined index
+ // file should contain the renamed symbols.
+ FuncName =
+ FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId);
+ }
+
+ // Add new function info to existing list. There may be duplicates when
+ // combining FunctionMap entries, due to COMDAT functions. Any local
+ // functions were virtually renamed above.
+ addFunctionInfo(FuncName, std::move(Info));
+ }
+}
diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp
index 6ed58913..35b8157 100644
--- a/contrib/llvm/lib/IR/GCOV.cpp
+++ b/contrib/llvm/lib/IR/GCOV.cpp
@@ -448,7 +448,7 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) {
namespace {
struct formatBranchInfo {
- formatBranchInfo(const GCOVOptions &Options, uint64_t Count, uint64_t Total)
+ formatBranchInfo(const GCOV::Options &Options, uint64_t Count, uint64_t Total)
: Options(Options), Count(Count), Total(Total) {}
void print(raw_ostream &OS) const {
@@ -460,7 +460,7 @@ struct formatBranchInfo {
OS << "taken " << branchDiv(Count, Total) << "%";
}
- const GCOVOptions &Options;
+ const GCOV::Options &Options;
uint64_t Count;
uint64_t Total;
};
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
index 1d02826..6159f93 100644
--- a/contrib/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -32,15 +32,9 @@ bool GlobalValue::isMaterializable() const {
return F->isMaterializable();
return false;
}
-bool GlobalValue::isDematerializable() const {
- return getParent() && getParent()->isDematerializable(this);
-}
std::error_code GlobalValue::materialize() {
return getParent()->materialize(this);
}
-void GlobalValue::dematerialize() {
- getParent()->dematerialize(this);
-}
/// Override destroyConstantImpl to make sure it doesn't get called on
/// GlobalValue's because they shouldn't be treated like other constants.
@@ -97,10 +91,11 @@ void GlobalObject::setGlobalObjectSubClassData(unsigned Val) {
}
void GlobalObject::copyAttributesFrom(const GlobalValue *Src) {
- const auto *GV = cast<GlobalObject>(Src);
- GlobalValue::copyAttributesFrom(GV);
- setAlignment(GV->getAlignment());
- setSection(GV->getSection());
+ GlobalValue::copyAttributesFrom(Src);
+ if (const auto *GV = dyn_cast<GlobalObject>(Src)) {
+ setAlignment(GV->getAlignment());
+ setSection(GV->getSection());
+ }
}
const char *GlobalValue::getSection() const {
@@ -147,9 +142,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
Constant *InitVal, const Twine &Name,
ThreadLocalMode TLMode, unsigned AddressSpace,
bool isExternallyInitialized)
- : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+ : GlobalObject(Ty, Value::GlobalVariableVal,
OperandTraits<GlobalVariable>::op_begin(this),
- InitVal != nullptr, Link, Name),
+ InitVal != nullptr, Link, Name, AddressSpace),
isConstantGlobal(constant),
isExternallyInitializedConstant(isExternallyInitialized) {
setThreadLocalMode(TLMode);
@@ -165,9 +160,9 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
const Twine &Name, GlobalVariable *Before,
ThreadLocalMode TLMode, unsigned AddressSpace,
bool isExternallyInitialized)
- : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+ : GlobalObject(Ty, Value::GlobalVariableVal,
OperandTraits<GlobalVariable>::op_begin(this),
- InitVal != nullptr, Link, Name),
+ InitVal != nullptr, Link, Name, AddressSpace),
isConstantGlobal(constant),
isExternallyInitializedConstant(isExternallyInitialized) {
setThreadLocalMode(TLMode);
@@ -178,7 +173,7 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
}
if (Before)
- Before->getParent()->getGlobalList().insert(Before, this);
+ Before->getParent()->getGlobalList().insert(Before->getIterator(), this);
else
M.getGlobalList().push_back(this);
}
@@ -188,11 +183,11 @@ void GlobalVariable::setParent(Module *parent) {
}
void GlobalVariable::removeFromParent() {
- getParent()->getGlobalList().remove(this);
+ getParent()->getGlobalList().remove(getIterator());
}
void GlobalVariable::eraseFromParent() {
- getParent()->getGlobalList().erase(this);
+ getParent()->getGlobalList().erase(getIterator());
}
void GlobalVariable::setInitializer(Constant *InitVal) {
@@ -216,14 +211,14 @@ void GlobalVariable::setInitializer(Constant *InitVal) {
}
}
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a GlobalVariable) from the GlobalVariable Src to this one.
+/// Copy all additional attributes (those not needed to create a GlobalVariable)
+/// from the GlobalVariable Src to this one.
void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
- assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
GlobalObject::copyAttributesFrom(Src);
- const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
- setThreadLocalMode(SrcVar->getThreadLocalMode());
- setExternallyInitialized(SrcVar->isExternallyInitialized());
+ if (const GlobalVariable *SrcVar = dyn_cast<GlobalVariable>(Src)) {
+ setThreadLocalMode(SrcVar->getThreadLocalMode());
+ setExternallyInitialized(SrcVar->isExternallyInitialized());
+ }
}
@@ -231,35 +226,40 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
// GlobalAlias Implementation
//===----------------------------------------------------------------------===//
-GlobalAlias::GlobalAlias(PointerType *Ty, LinkageTypes Link, const Twine &Name,
- Constant *Aliasee, Module *ParentModule)
- : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
+ const Twine &Name, Constant *Aliasee,
+ Module *ParentModule)
+ : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name,
+ AddressSpace) {
Op<0>() = Aliasee;
if (ParentModule)
ParentModule->getAliasList().push_back(this);
}
-GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Link,
- const Twine &Name, Constant *Aliasee,
- Module *ParentModule) {
- return new GlobalAlias(Ty, Link, Name, Aliasee, ParentModule);
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Link, const Twine &Name,
+ Constant *Aliasee, Module *ParentModule) {
+ return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule);
}
-GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage,
- const Twine &Name, Module *Parent) {
- return create(Ty, Linkage, Name, nullptr, Parent);
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ Module *Parent) {
+ return create(Ty, AddressSpace, Linkage, Name, nullptr, Parent);
}
-GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage,
- const Twine &Name, GlobalValue *Aliasee) {
- return create(Ty, Linkage, Name, Aliasee, Aliasee->getParent());
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ GlobalValue *Aliasee) {
+ return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent());
}
GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name,
GlobalValue *Aliasee) {
PointerType *PTy = Aliasee->getType();
- return create(PTy, Link, Name, Aliasee);
+ return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name,
+ Aliasee);
}
GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) {
@@ -271,11 +271,11 @@ void GlobalAlias::setParent(Module *parent) {
}
void GlobalAlias::removeFromParent() {
- getParent()->getAliasList().remove(this);
+ getParent()->getAliasList().remove(getIterator());
}
void GlobalAlias::eraseFromParent() {
- getParent()->getAliasList().erase(this);
+ getParent()->getAliasList().erase(getIterator());
}
void GlobalAlias::setAliasee(Constant *Aliasee) {
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
index bddb278..4474129 100644
--- a/contrib/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -247,18 +247,21 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
return createCallHelper(TheFn, Ops, this, Name);
}
+template <typename T0, typename T1, typename T2, typename T3>
static std::vector<Value *>
getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualCallee, ArrayRef<Value *> CallArgs,
- ArrayRef<Value *> DeoptArgs, ArrayRef<Value *> GCArgs) {
+ Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs,
+ ArrayRef<T1> TransitionArgs, ArrayRef<T2> DeoptArgs,
+ ArrayRef<T3> GCArgs) {
std::vector<Value *> Args;
Args.push_back(B.getInt64(ID));
Args.push_back(B.getInt32(NumPatchBytes));
Args.push_back(ActualCallee);
Args.push_back(B.getInt32(CallArgs.size()));
- Args.push_back(B.getInt32((unsigned)StatepointFlags::None));
+ Args.push_back(B.getInt32(Flags));
Args.insert(Args.end(), CallArgs.begin(), CallArgs.end());
- Args.push_back(B.getInt32(0 /* no transition args */));
+ Args.push_back(B.getInt32(TransitionArgs.size()));
+ Args.insert(Args.end(), TransitionArgs.begin(), TransitionArgs.end());
Args.push_back(B.getInt32(DeoptArgs.size()));
Args.insert(Args.end(), DeoptArgs.begin(), DeoptArgs.end());
Args.insert(Args.end(), GCArgs.begin(), GCArgs.end());
@@ -266,69 +269,109 @@ getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
return Args;
}
-CallInst *IRBuilderBase::CreateGCStatepointCall(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
- ArrayRef<Value *> CallArgs, ArrayRef<Value *> DeoptArgs,
- ArrayRef<Value *> GCArgs, const Twine &Name) {
+template <typename T0, typename T1, typename T2, typename T3>
+static CallInst *CreateGCStatepointCallCommon(
+ IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes,
+ Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs,
+ ArrayRef<T1> TransitionArgs, ArrayRef<T2> DeoptArgs, ArrayRef<T3> GCArgs,
+ const Twine &Name) {
// Extract out the type of the callee.
PointerType *FuncPtrType = cast<PointerType>(ActualCallee->getType());
assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
"actual callee must be a callable value");
- Module *M = BB->getParent()->getParent();
+ Module *M = Builder->GetInsertBlock()->getParent()->getParent();
// Fill in the one generic type'd argument (the function is also vararg)
Type *ArgTypes[] = { FuncPtrType };
Function *FnStatepoint =
Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
ArgTypes);
- std::vector<llvm::Value *> Args = getStatepointArgs(
- *this, ID, NumPatchBytes, ActualCallee, CallArgs, DeoptArgs, GCArgs);
- return createCallHelper(FnStatepoint, Args, this, Name);
+ std::vector<llvm::Value *> Args =
+ getStatepointArgs(*Builder, ID, NumPatchBytes, ActualCallee, Flags,
+ CallArgs, TransitionArgs, DeoptArgs, GCArgs);
+ return createCallHelper(FnStatepoint, Args, Builder, Name);
}
CallInst *IRBuilderBase::CreateGCStatepointCall(
uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
- ArrayRef<Use> CallArgs, ArrayRef<Value *> DeoptArgs,
+ ArrayRef<Value *> CallArgs, ArrayRef<Value *> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name) {
- std::vector<Value *> VCallArgs;
- for (auto &U : CallArgs)
- VCallArgs.push_back(U.get());
- return CreateGCStatepointCall(ID, NumPatchBytes, ActualCallee, VCallArgs,
- DeoptArgs, GCArgs, Name);
+ return CreateGCStatepointCallCommon<Value *, Value *, Value *, Value *>(
+ this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None),
+ CallArgs, None /* No Transition Args */, DeoptArgs, GCArgs, Name);
}
-InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
- BasicBlock *NormalDest, BasicBlock *UnwindDest,
- ArrayRef<Value *> InvokeArgs, ArrayRef<Value *> DeoptArgs,
+CallInst *IRBuilderBase::CreateGCStatepointCall(
+ uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags,
+ ArrayRef<Use> CallArgs, ArrayRef<Use> TransitionArgs,
+ ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+ return CreateGCStatepointCallCommon<Use, Use, Use, Value *>(
+ this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs,
+ DeoptArgs, GCArgs, Name);
+}
+
+CallInst *IRBuilderBase::CreateGCStatepointCall(
+ uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
+ ArrayRef<Use> CallArgs, ArrayRef<Value *> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name) {
+ return CreateGCStatepointCallCommon<Use, Value *, Value *, Value *>(
+ this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None),
+ CallArgs, None, DeoptArgs, GCArgs, Name);
+}
+
+template <typename T0, typename T1, typename T2, typename T3>
+static InvokeInst *CreateGCStatepointInvokeCommon(
+ IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes,
+ Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest,
+ uint32_t Flags, ArrayRef<T0> InvokeArgs, ArrayRef<T1> TransitionArgs,
+ ArrayRef<T2> DeoptArgs, ArrayRef<T3> GCArgs, const Twine &Name) {
// Extract out the type of the callee.
PointerType *FuncPtrType = cast<PointerType>(ActualInvokee->getType());
assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
"actual callee must be a callable value");
- Module *M = BB->getParent()->getParent();
+ Module *M = Builder->GetInsertBlock()->getParent()->getParent();
// Fill in the one generic type'd argument (the function is also vararg)
Function *FnStatepoint = Intrinsic::getDeclaration(
M, Intrinsic::experimental_gc_statepoint, {FuncPtrType});
- std::vector<llvm::Value *> Args = getStatepointArgs(
- *this, ID, NumPatchBytes, ActualInvokee, InvokeArgs, DeoptArgs, GCArgs);
- return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, this,
+ std::vector<llvm::Value *> Args =
+ getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee, Flags,
+ InvokeArgs, TransitionArgs, DeoptArgs, GCArgs);
+ return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, Builder,
Name);
}
InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ BasicBlock *NormalDest, BasicBlock *UnwindDest,
+ ArrayRef<Value *> InvokeArgs, ArrayRef<Value *> DeoptArgs,
+ ArrayRef<Value *> GCArgs, const Twine &Name) {
+ return CreateGCStatepointInvokeCommon<Value *, Value *, Value *, Value *>(
+ this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest,
+ uint32_t(StatepointFlags::None), InvokeArgs, None /* No Transition Args*/,
+ DeoptArgs, GCArgs, Name);
+}
+
+InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
+ uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
+ ArrayRef<Use> InvokeArgs, ArrayRef<Use> TransitionArgs,
+ ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+ return CreateGCStatepointInvokeCommon<Use, Use, Use, Value *>(
+ this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags,
+ InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name);
+}
+
+InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
+ uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef<Use> InvokeArgs,
ArrayRef<Value *> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
- std::vector<Value *> VCallArgs;
- for (auto &U : InvokeArgs)
- VCallArgs.push_back(U.get());
- return CreateGCStatepointInvoke(ID, NumPatchBytes, ActualInvokee, NormalDest,
- UnwindDest, VCallArgs, DeoptArgs, GCArgs,
- Name);
+ return CreateGCStatepointInvokeCommon<Use, Value *, Value *, Value *>(
+ this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest,
+ uint32_t(StatepointFlags::None), InvokeArgs, None, DeoptArgs, GCArgs,
+ Name);
}
CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
diff --git a/contrib/llvm/lib/IR/InlineAsm.cpp b/contrib/llvm/lib/IR/InlineAsm.cpp
index aa9e027..15d3b83 100644
--- a/contrib/llvm/lib/IR/InlineAsm.cpp
+++ b/contrib/llvm/lib/IR/InlineAsm.cpp
@@ -24,23 +24,22 @@ using namespace llvm;
InlineAsm::~InlineAsm() {
}
-
-InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString,
+InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString,
StringRef Constraints, bool hasSideEffects,
bool isAlignStack, AsmDialect asmDialect) {
- InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack,
- asmDialect);
- LLVMContextImpl *pImpl = Ty->getContext().pImpl;
- return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+ InlineAsmKeyType Key(AsmString, Constraints, FTy, hasSideEffects,
+ isAlignStack, asmDialect);
+ LLVMContextImpl *pImpl = FTy->getContext().pImpl;
+ return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(FTy), Key);
}
-InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString,
+InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString,
const std::string &constraints, bool hasSideEffects,
bool isAlignStack, AsmDialect asmDialect)
- : Value(Ty, Value::InlineAsmVal),
- AsmString(asmString), Constraints(constraints),
- HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
- Dialect(asmDialect) {
+ : Value(PointerType::getUnqual(FTy), Value::InlineAsmVal),
+ AsmString(asmString), Constraints(constraints), FTy(FTy),
+ HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
+ Dialect(asmDialect) {
// Do various checks on the constraint string and type.
assert(Verify(getFunctionType(), constraints) &&
@@ -53,7 +52,7 @@ void InlineAsm::destroyConstant() {
}
FunctionType *InlineAsm::getFunctionType() const {
- return cast<FunctionType>(getType()->getElementType());
+ return FTy;
}
///Default constructor.
@@ -160,6 +159,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
// If Operand N already has a matching input, reject this. An output
// can't be constrained to the same value as multiple inputs.
if (isMultipleAlternative) {
+ if (multipleAlternativeIndex >=
+ ConstraintsSoFar[N].multipleAlternatives.size())
+ return true;
InlineAsm::SubConstraintInfo &scInfo =
ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
if (scInfo.MatchingInput != -1)
@@ -291,4 +293,3 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
if (Ty->getNumParams() != NumInputs) return false;
return true;
}
-
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index c57ba16..a0bd2c9 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -28,7 +28,7 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
if (InsertBefore) {
BasicBlock *BB = InsertBefore->getParent();
assert(BB && "Instruction to insert before is not in a basic block!");
- BB->getInstList().insert(InsertBefore, this);
+ BB->getInstList().insert(InsertBefore->getIterator(), this);
}
}
@@ -62,33 +62,39 @@ Module *Instruction::getModule() {
return getParent()->getModule();
}
+Function *Instruction::getFunction() { return getParent()->getParent(); }
+
+const Function *Instruction::getFunction() const {
+ return getParent()->getParent();
+}
void Instruction::removeFromParent() {
- getParent()->getInstList().remove(this);
+ getParent()->getInstList().remove(getIterator());
}
iplist<Instruction>::iterator Instruction::eraseFromParent() {
- return getParent()->getInstList().erase(this);
+ return getParent()->getInstList().erase(getIterator());
}
/// insertBefore - Insert an unlinked instructions into a basic block
/// immediately before the specified instruction.
void Instruction::insertBefore(Instruction *InsertPos) {
- InsertPos->getParent()->getInstList().insert(InsertPos, this);
+ InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
}
/// insertAfter - Insert an unlinked instructions into a basic block
/// immediately after the specified instruction.
void Instruction::insertAfter(Instruction *InsertPos) {
- InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+ InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(),
+ this);
}
/// moveBefore - Unlink this instruction from its current basic block and
/// insert it into the basic block that MovePos lives in, right before
/// MovePos.
void Instruction::moveBefore(Instruction *MovePos) {
- MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
- this);
+ MovePos->getParent()->getInstList().splice(
+ MovePos->getIterator(), getParent()->getInstList(), getIterator());
}
/// Set or clear the unsafe-algebra flag on this instruction, which must be an
@@ -196,6 +202,10 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case Invoke: return "invoke";
case Resume: return "resume";
case Unreachable: return "unreachable";
+ case CleanupRet: return "cleanupret";
+ case CatchRet: return "catchret";
+ case CatchPad: return "catchpad";
+ case CatchSwitch: return "catchswitch";
// Standard binary operators...
case Add: return "add";
@@ -256,6 +266,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case ExtractValue: return "extractvalue";
case InsertValue: return "insertvalue";
case LandingPad: return "landingpad";
+ case CleanupPad: return "cleanuppad";
default: return "<Invalid operator> ";
}
@@ -285,11 +296,12 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
if (const CallInst *CI = dyn_cast<CallInst>(I1))
return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
- CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+ CI->getAttributes() == cast<CallInst>(I2)->getAttributes() &&
+ CI->hasIdenticalOperandBundleSchema(*cast<CallInst>(I2));
if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
- CI->getAttributes() ==
- cast<InvokeInst>(I2)->getAttributes();
+ CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes() &&
+ CI->hasIdenticalOperandBundleSchema(*cast<InvokeInst>(I2));
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
@@ -407,6 +419,8 @@ bool Instruction::mayReadFromMemory() const {
case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory
case Instruction::AtomicCmpXchg:
case Instruction::AtomicRMW:
+ case Instruction::CatchPad:
+ case Instruction::CatchRet:
return true;
case Instruction::Call:
return !cast<CallInst>(this)->doesNotAccessMemory();
@@ -427,6 +441,8 @@ bool Instruction::mayWriteToMemory() const {
case Instruction::VAArg:
case Instruction::AtomicCmpXchg:
case Instruction::AtomicRMW:
+ case Instruction::CatchPad:
+ case Instruction::CatchRet:
return true;
case Instruction::Call:
return !cast<CallInst>(this)->onlyReadsMemory();
@@ -455,6 +471,10 @@ bool Instruction::isAtomic() const {
bool Instruction::mayThrow() const {
if (const CallInst *CI = dyn_cast<CallInst>(this))
return !CI->doesNotThrow();
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(this))
+ return CRI->unwindsToCaller();
+ if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(this))
+ return CatchSwitch->unwindsToCaller();
return isa<ResumeInst>(this);
}
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index 86c921a..4ae2fd5 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -62,7 +62,10 @@ UnaryInstruction::~UnaryInstruction() {
const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
if (Op1->getType() != Op2->getType())
return "both values to select must have same type";
-
+
+ if (Op1->getType()->isTokenTy())
+ return "select values cannot have token type";
+
if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
// Vector select.
if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
@@ -84,6 +87,8 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
// PHINode Class
//===----------------------------------------------------------------------===//
+void PHINode::anchor() {}
+
PHINode::PHINode(const PHINode &PN)
: Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()),
ReservedSpace(PN.getNumOperands()) {
@@ -223,9 +228,10 @@ CallInst::~CallInst() {
}
void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
- const Twine &NameStr) {
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
this->FTy = FTy;
- assert(getNumOperands() == Args.size() + 1 && "NumOperands not set up?");
+ assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 &&
+ "NumOperands not set up?");
Op<-1>() = Func;
#ifndef NDEBUG
@@ -240,6 +246,11 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
#endif
std::copy(Args.begin(), Args.end(), op_begin());
+
+ auto It = populateBundleOperandInfos(Bundles, Args.size());
+ (void)It;
+ assert(It + 1 == op_end() && "Should add up!");
+
setName(NameStr);
}
@@ -281,11 +292,26 @@ CallInst::CallInst(const CallInst &CI)
AttributeList(CI.AttributeList), FTy(CI.FTy) {
setTailCallKind(CI.getTailCallKind());
setCallingConv(CI.getCallingConv());
-
+
std::copy(CI.op_begin(), CI.op_end(), op_begin());
+ std::copy(CI.bundle_op_info_begin(), CI.bundle_op_info_end(),
+ bundle_op_info_begin());
SubclassOptionalData = CI.SubclassOptionalData;
}
+CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
+ Instruction *InsertPt) {
+ std::vector<Value *> Args(CI->arg_begin(), CI->arg_end());
+
+ auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(),
+ InsertPt);
+ NewCI->setTailCallKind(CI->getTailCallKind());
+ NewCI->setCallingConv(CI->getCallingConv());
+ NewCI->SubclassOptionalData = CI->SubclassOptionalData;
+ NewCI->setAttributes(CI->getAttributes());
+ return NewCI;
+}
+
void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
AttributeSet PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, attr);
@@ -320,6 +346,8 @@ void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
}
bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+ assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
+
if (AttributeList.hasAttribute(i, A))
return true;
if (const Function *F = getCalledFunction())
@@ -327,6 +355,25 @@ bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
return false;
}
+bool CallInst::dataOperandHasImpliedAttr(unsigned i,
+ Attribute::AttrKind A) const {
+
+ // There are getNumOperands() - 1 data operands. The last operand is the
+ // callee.
+ assert(i < getNumOperands() && "Data operand index out of bounds!");
+
+ // The attribute A can either be directly specified, if the operand in
+ // question is a call argument; or be indirectly implied by the kind of its
+ // containing operand bundle, if the operand is a bundle operand.
+
+ if (i < (getNumArgOperands() + 1))
+ return paramHasAttr(i, A);
+
+ assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+ "Must be either a call argument or an operand bundle!");
+ return bundleOperandHasAttr(i - 1, A);
+}
+
/// IsConstantOne - Return true only if val is constant int 1
static bool IsConstantOne(Value *val) {
assert(val && "IsConstantOne does not work with nullptr val");
@@ -496,10 +543,12 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
const Twine &NameStr) {
this->FTy = FTy;
- assert(getNumOperands() == 3 + Args.size() && "NumOperands not set up?");
+ assert(getNumOperands() == 3 + Args.size() + CountBundleInputs(Bundles) &&
+ "NumOperands not set up?");
Op<-3>() = Fn;
Op<-2>() = IfNormal;
Op<-1>() = IfException;
@@ -516,6 +565,11 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
#endif
std::copy(Args.begin(), Args.end(), op_begin());
+
+ auto It = populateBundleOperandInfos(Bundles, Args.size());
+ (void)It;
+ assert(It + 3 == op_end() && "Should add up!");
+
setName(NameStr);
}
@@ -527,9 +581,24 @@ InvokeInst::InvokeInst(const InvokeInst &II)
AttributeList(II.AttributeList), FTy(II.FTy) {
setCallingConv(II.getCallingConv());
std::copy(II.op_begin(), II.op_end(), op_begin());
+ std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(),
+ bundle_op_info_begin());
SubclassOptionalData = II.SubclassOptionalData;
}
+InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
+ Instruction *InsertPt) {
+ std::vector<Value *> Args(II->arg_begin(), II->arg_end());
+
+ auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(),
+ II->getUnwindDest(), Args, OpB,
+ II->getName(), InsertPt);
+ NewII->setCallingConv(II->getCallingConv());
+ NewII->SubclassOptionalData = II->SubclassOptionalData;
+ NewII->setAttributes(II->getAttributes());
+ return NewII;
+}
+
BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
return getSuccessor(idx);
}
@@ -543,12 +612,20 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const {
if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
return true;
+
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the invoke instruction.
+ if (isFnAttrDisallowedByOpBundle(A))
+ return false;
+
if (const Function *F = getCalledFunction())
return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
return false;
}
bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+ assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
+
if (AttributeList.hasAttribute(i, A))
return true;
if (const Function *F = getCalledFunction())
@@ -556,6 +633,24 @@ bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
return false;
}
+bool InvokeInst::dataOperandHasImpliedAttr(unsigned i,
+ Attribute::AttrKind A) const {
+ // There are getNumOperands() - 3 data operands. The last three operands are
+ // the callee and the two successor basic blocks.
+ assert(i < (getNumOperands() - 2) && "Data operand index out of bounds!");
+
+ // The attribute A can either be directly specified, if the operand in
+ // question is an invoke argument; or be indirectly implied by the kind of its
+ // containing operand bundle, if the operand is a bundle operand.
+
+ if (i < (getNumArgOperands() + 1))
+ return paramHasAttr(i, A);
+
+ assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+ "Must be either an invoke argument or an operand bundle!");
+ return bundleOperandHasAttr(i - 1, A);
+}
+
void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
AttributeSet PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, attr);
@@ -671,6 +766,223 @@ BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
}
//===----------------------------------------------------------------------===//
+// CleanupReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI)
+ : TerminatorInst(CRI.getType(), Instruction::CleanupRet,
+ OperandTraits<CleanupReturnInst>::op_end(this) -
+ CRI.getNumOperands(),
+ CRI.getNumOperands()) {
+ setInstructionSubclassData(CRI.getSubclassDataFromInstruction());
+ Op<0>() = CRI.Op<0>();
+ if (CRI.hasUnwindDest())
+ Op<1>() = CRI.Op<1>();
+}
+
+void CleanupReturnInst::init(Value *CleanupPad, BasicBlock *UnwindBB) {
+ if (UnwindBB)
+ setInstructionSubclassData(getSubclassDataFromInstruction() | 1);
+
+ Op<0>() = CleanupPad;
+ if (UnwindBB)
+ Op<1>() = UnwindBB;
+}
+
+CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
+ unsigned Values, Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
+ Instruction::CleanupRet,
+ OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+ Values, InsertBefore) {
+ init(CleanupPad, UnwindBB);
+}
+
+CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
+ unsigned Values, BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
+ Instruction::CleanupRet,
+ OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+ Values, InsertAtEnd) {
+ init(CleanupPad, UnwindBB);
+}
+
+BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const {
+ assert(Idx == 0);
+ return getUnwindDest();
+}
+unsigned CleanupReturnInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) {
+ assert(Idx == 0);
+ setUnwindDest(B);
+}
+
+//===----------------------------------------------------------------------===//
+// CatchReturnInst Implementation
+//===----------------------------------------------------------------------===//
+void CatchReturnInst::init(Value *CatchPad, BasicBlock *BB) {
+ Op<0>() = CatchPad;
+ Op<1>() = BB;
+}
+
+CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI)
+ : TerminatorInst(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet,
+ OperandTraits<CatchReturnInst>::op_begin(this), 2) {
+ Op<0>() = CRI.Op<0>();
+ Op<1>() = CRI.Op<1>();
+}
+
+CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
+ Instruction *InsertBefore)
+ : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+ OperandTraits<CatchReturnInst>::op_begin(this), 2,
+ InsertBefore) {
+ init(CatchPad, BB);
+}
+
+CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
+ BasicBlock *InsertAtEnd)
+ : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+ OperandTraits<CatchReturnInst>::op_begin(this), 2,
+ InsertAtEnd) {
+ init(CatchPad, BB);
+}
+
+BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const {
+ assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
+ return getSuccessor();
+}
+unsigned CatchReturnInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) {
+ assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
+ setSuccessor(B);
+}
+
+//===----------------------------------------------------------------------===//
+// CatchSwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumReservedValues,
+ const Twine &NameStr,
+ Instruction *InsertBefore)
+ : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+ InsertBefore) {
+ if (UnwindDest)
+ ++NumReservedValues;
+ init(ParentPad, UnwindDest, NumReservedValues + 1);
+ setName(NameStr);
+}
+
+CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumReservedValues,
+ const Twine &NameStr, BasicBlock *InsertAtEnd)
+ : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+ InsertAtEnd) {
+ if (UnwindDest)
+ ++NumReservedValues;
+ init(ParentPad, UnwindDest, NumReservedValues + 1);
+ setName(NameStr);
+}
+
+CatchSwitchInst::CatchSwitchInst(const CatchSwitchInst &CSI)
+ : TerminatorInst(CSI.getType(), Instruction::CatchSwitch, nullptr,
+ CSI.getNumOperands()) {
+ init(CSI.getParentPad(), CSI.getUnwindDest(), CSI.getNumOperands());
+ setNumHungOffUseOperands(ReservedSpace);
+ Use *OL = getOperandList();
+ const Use *InOL = CSI.getOperandList();
+ for (unsigned I = 1, E = ReservedSpace; I != E; ++I)
+ OL[I] = InOL[I];
+}
+
+void CatchSwitchInst::init(Value *ParentPad, BasicBlock *UnwindDest,
+ unsigned NumReservedValues) {
+ assert(ParentPad && NumReservedValues);
+
+ ReservedSpace = NumReservedValues;
+ setNumHungOffUseOperands(UnwindDest ? 2 : 1);
+ allocHungoffUses(ReservedSpace);
+
+ Op<0>() = ParentPad;
+ if (UnwindDest) {
+ setInstructionSubclassData(getSubclassDataFromInstruction() | 1);
+ setUnwindDest(UnwindDest);
+ }
+}
+
+/// growOperands - grow operands - This grows the operand list in response to a
+/// push_back style of operation. This grows the number of ops by 2 times.
+void CatchSwitchInst::growOperands(unsigned Size) {
+ unsigned NumOperands = getNumOperands();
+ assert(NumOperands >= 1);
+ if (ReservedSpace >= NumOperands + Size)
+ return;
+ ReservedSpace = (NumOperands + Size / 2) * 2;
+ growHungoffUses(ReservedSpace);
+}
+
+void CatchSwitchInst::addHandler(BasicBlock *Handler) {
+ unsigned OpNo = getNumOperands();
+ growOperands(1);
+ assert(OpNo < ReservedSpace && "Growing didn't work!");
+ setNumHungOffUseOperands(getNumOperands() + 1);
+ getOperandList()[OpNo] = Handler;
+}
+
+BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
+ return getSuccessor(idx);
+}
+unsigned CatchSwitchInst::getNumSuccessorsV() const {
+ return getNumSuccessors();
+}
+void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+ setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+// FuncletPadInst Implementation
+//===----------------------------------------------------------------------===//
+void FuncletPadInst::init(Value *ParentPad, ArrayRef<Value *> Args,
+ const Twine &NameStr) {
+ assert(getNumOperands() == 1 + Args.size() && "NumOperands not set up?");
+ std::copy(Args.begin(), Args.end(), op_begin());
+ setParentPad(ParentPad);
+ setName(NameStr);
+}
+
+FuncletPadInst::FuncletPadInst(const FuncletPadInst &FPI)
+ : Instruction(FPI.getType(), FPI.getOpcode(),
+ OperandTraits<FuncletPadInst>::op_end(this) -
+ FPI.getNumOperands(),
+ FPI.getNumOperands()) {
+ std::copy(FPI.op_begin(), FPI.op_end(), op_begin());
+ setParentPad(FPI.getParentPad());
+}
+
+FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+ ArrayRef<Value *> Args, unsigned Values,
+ const Twine &NameStr, Instruction *InsertBefore)
+ : Instruction(ParentPad->getType(), Op,
+ OperandTraits<FuncletPadInst>::op_end(this) - Values, Values,
+ InsertBefore) {
+ init(ParentPad, Args, NameStr);
+}
+
+FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+ ArrayRef<Value *> Args, unsigned Values,
+ const Twine &NameStr, BasicBlock *InsertAtEnd)
+ : Instruction(ParentPad->getType(), Op,
+ OperandTraits<FuncletPadInst>::op_end(this) - Values, Values,
+ InsertAtEnd) {
+ init(ParentPad, Args, NameStr);
+}
+
+//===----------------------------------------------------------------------===//
// UnreachableInst Implementation
//===----------------------------------------------------------------------===//
@@ -1193,6 +1505,8 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
// GetElementPtrInst Implementation
//===----------------------------------------------------------------------===//
+void GetElementPtrInst::anchor() {}
+
void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name) {
assert(getNumOperands() == 1 + IdxList.size() &&
@@ -2029,7 +2343,7 @@ bool CastInst::isNoopCast(const DataLayout &DL) const {
/// * %S = secondOpcode MidTy %F to DstTy
/// The function returns a resultOpcode so these two casts can be replaced with:
/// * %Replacement = resultOpcode %SrcTy %x to DstTy
-/// If no such cast is permited, the function returns 0.
+/// If no such cast is permitted, the function returns 0.
unsigned CastInst::isEliminableCastPair(
Instruction::CastOps firstOp, Instruction::CastOps secondOp,
Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy,
@@ -2037,7 +2351,7 @@ unsigned CastInst::isEliminableCastPair(
// Define the 144 possibilities for these two cast instructions. The values
// in this matrix determine what to do in a given situation and select the
// case in the switch below. The rows correspond to firstOp, the columns
- // correspond to secondOp. In looking at the table below, keep in mind
+ // correspond to secondOp. In looking at the table below, keep in mind
// the following cast properties:
//
// Size Compare Source Destination
@@ -2087,17 +2401,19 @@ unsigned CastInst::isEliminableCastPair(
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
};
+ // TODO: This logic could be encoded into the table above and handled in the
+ // switch below.
// If either of the casts are a bitcast from scalar to vector, disallow the
- // merging. However, bitcast of A->B->A are allowed.
- bool isFirstBitcast = (firstOp == Instruction::BitCast);
- bool isSecondBitcast = (secondOp == Instruction::BitCast);
- bool chainedBitcast = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast);
-
- // Check if any of the bitcasts convert scalars<->vectors.
- if ((isFirstBitcast && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
- (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
- // Unless we are bitcasing to the original type, disallow optimizations.
- if (!chainedBitcast) return 0;
+ // merging. However, any pair of bitcasts are allowed.
+ bool IsFirstBitcast = (firstOp == Instruction::BitCast);
+ bool IsSecondBitcast = (secondOp == Instruction::BitCast);
+ bool AreBothBitcasts = IsFirstBitcast && IsSecondBitcast;
+
+ // Check if any of the casts convert scalars <-> vectors.
+ if ((IsFirstBitcast && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+ (IsSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+ if (!AreBothBitcasts)
+ return 0;
int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
[secondOp-Instruction::CastOpsBegin];
@@ -2966,9 +3282,8 @@ AddrSpaceCastInst::AddrSpaceCastInst(
void CmpInst::anchor() {}
-CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
- Value *LHS, Value *RHS, const Twine &Name,
- Instruction *InsertBefore)
+CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
+ Value *RHS, const Twine &Name, Instruction *InsertBefore)
: Instruction(ty, op,
OperandTraits<CmpInst>::op_begin(this),
OperandTraits<CmpInst>::operands(this),
@@ -2979,9 +3294,8 @@ CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
setName(Name);
}
-CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
- Value *LHS, Value *RHS, const Twine &Name,
- BasicBlock *InsertAtEnd)
+CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
+ Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd)
: Instruction(ty, op,
OperandTraits<CmpInst>::op_begin(this),
OperandTraits<CmpInst>::operands(this),
@@ -2993,8 +3307,7 @@ CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
}
CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate,
- Value *S1, Value *S2,
+CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
const Twine &Name, Instruction *InsertBefore) {
if (Op == Instruction::ICmp) {
if (InsertBefore)
@@ -3014,7 +3327,7 @@ CmpInst::Create(OtherOps Op, unsigned short predicate,
}
CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2,
+CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
const Twine &Name, BasicBlock *InsertAtEnd) {
if (Op == Instruction::ICmp) {
return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
@@ -3077,6 +3390,8 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
}
}
+void ICmpInst::anchor() {}
+
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
@@ -3196,7 +3511,24 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
}
}
-bool CmpInst::isUnsigned(unsigned short predicate) {
+CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) {
+ assert(CmpInst::isUnsigned(pred) && "Call only with signed predicates!");
+
+ switch (pred) {
+ default:
+ llvm_unreachable("Unknown predicate!");
+ case CmpInst::ICMP_ULT:
+ return CmpInst::ICMP_SLT;
+ case CmpInst::ICMP_ULE:
+ return CmpInst::ICMP_SLE;
+ case CmpInst::ICMP_UGT:
+ return CmpInst::ICMP_SGT;
+ case CmpInst::ICMP_UGE:
+ return CmpInst::ICMP_SGE;
+ }
+}
+
+bool CmpInst::isUnsigned(Predicate predicate) {
switch (predicate) {
default: return false;
case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT:
@@ -3204,7 +3536,7 @@ bool CmpInst::isUnsigned(unsigned short predicate) {
}
}
-bool CmpInst::isSigned(unsigned short predicate) {
+bool CmpInst::isSigned(Predicate predicate) {
switch (predicate) {
default: return false;
case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT:
@@ -3212,7 +3544,7 @@ bool CmpInst::isSigned(unsigned short predicate) {
}
}
-bool CmpInst::isOrdered(unsigned short predicate) {
+bool CmpInst::isOrdered(Predicate predicate) {
switch (predicate) {
default: return false;
case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT:
@@ -3221,7 +3553,7 @@ bool CmpInst::isOrdered(unsigned short predicate) {
}
}
-bool CmpInst::isUnordered(unsigned short predicate) {
+bool CmpInst::isUnordered(Predicate predicate) {
switch (predicate) {
default: return false;
case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT:
@@ -3230,7 +3562,7 @@ bool CmpInst::isUnordered(unsigned short predicate) {
}
}
-bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+bool CmpInst::isTrueWhenEqual(Predicate predicate) {
switch(predicate) {
default: return false;
case ICMP_EQ: case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
@@ -3238,7 +3570,7 @@ bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
}
}
-bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+bool CmpInst::isFalseWhenEqual(Predicate predicate) {
switch(predicate) {
case ICMP_NE: case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
@@ -3569,6 +3901,10 @@ AddrSpaceCastInst *AddrSpaceCastInst::cloneImpl() const {
}
CallInst *CallInst::cloneImpl() const {
+ if (hasOperandBundles()) {
+ unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo);
+ return new(getNumOperands(), DescriptorBytes) CallInst(*this);
+ }
return new(getNumOperands()) CallInst(*this);
}
@@ -3613,11 +3949,31 @@ IndirectBrInst *IndirectBrInst::cloneImpl() const {
}
InvokeInst *InvokeInst::cloneImpl() const {
+ if (hasOperandBundles()) {
+ unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo);
+ return new(getNumOperands(), DescriptorBytes) InvokeInst(*this);
+ }
return new(getNumOperands()) InvokeInst(*this);
}
ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); }
+CleanupReturnInst *CleanupReturnInst::cloneImpl() const {
+ return new (getNumOperands()) CleanupReturnInst(*this);
+}
+
+CatchReturnInst *CatchReturnInst::cloneImpl() const {
+ return new (getNumOperands()) CatchReturnInst(*this);
+}
+
+CatchSwitchInst *CatchSwitchInst::cloneImpl() const {
+ return new CatchSwitchInst(*this);
+}
+
+FuncletPadInst *FuncletPadInst::cloneImpl() const {
+ return new (getNumOperands()) FuncletPadInst(*this);
+}
+
UnreachableInst *UnreachableInst::cloneImpl() const {
LLVMContext &Context = getContext();
return new UnreachableInst(Context);
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index 6d799e4..8848bcb 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -104,6 +104,39 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
assert(DereferenceableOrNullID == MD_dereferenceable_or_null &&
"dereferenceable_or_null kind id drifted");
(void)DereferenceableOrNullID;
+
+ // Create the 'make.implicit' metadata kind.
+ unsigned MakeImplicitID = getMDKindID("make.implicit");
+ assert(MakeImplicitID == MD_make_implicit &&
+ "make.implicit kind id drifted");
+ (void)MakeImplicitID;
+
+ // Create the 'unpredictable' metadata kind.
+ unsigned UnpredictableID = getMDKindID("unpredictable");
+ assert(UnpredictableID == MD_unpredictable &&
+ "unpredictable kind id drifted");
+ (void)UnpredictableID;
+
+ // Create the 'invariant.group' metadata kind.
+ unsigned InvariantGroupId = getMDKindID("invariant.group");
+ assert(InvariantGroupId == MD_invariant_group &&
+ "invariant.group kind id drifted");
+ (void)InvariantGroupId;
+
+ // Create the 'align' metadata kind.
+ unsigned AlignID = getMDKindID("align");
+ assert(AlignID == MD_align && "align kind id drifted");
+ (void)AlignID;
+
+ auto *DeoptEntry = pImpl->getOrInsertBundleTag("deopt");
+ assert(DeoptEntry->second == LLVMContext::OB_deopt &&
+ "deopt operand bundle id drifted!");
+ (void)DeoptEntry;
+
+ auto *FuncletEntry = pImpl->getOrInsertBundleTag("funclet");
+ assert(FuncletEntry->second == LLVMContext::OB_funclet &&
+ "funclet operand bundle id drifted!");
+ (void)FuncletEntry;
}
LLVMContext::~LLVMContext() { delete pImpl; }
@@ -193,6 +226,11 @@ static bool isDiagnosticEnabled(const DiagnosticInfo &DI) {
if (!cast<DiagnosticInfoOptimizationRemarkAnalysis>(DI).isEnabled())
return false;
break;
+ case llvm::DK_OptimizationRemarkAnalysisFPCommute:
+ if (!cast<DiagnosticInfoOptimizationRemarkAnalysisFPCommute>(DI)
+ .isEnabled())
+ return false;
+ break;
default:
break;
}
@@ -250,7 +288,7 @@ unsigned LLVMContext::getMDKindID(StringRef Name) const {
.first->second;
}
-/// getHandlerNames - Populate client supplied smallvector using custome
+/// getHandlerNames - Populate client-supplied smallvector using custom
/// metadata name and ID.
void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
Names.resize(pImpl->CustomMDKindNames.size());
@@ -258,3 +296,11 @@ void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
E = pImpl->CustomMDKindNames.end(); I != E; ++I)
Names[I->second] = I->first();
}
+
+void LLVMContext::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
+ pImpl->getOperandBundleTags(Tags);
+}
+
+uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const {
+ return pImpl->getOperandBundleTagID(Tag);
+}
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
index 1e20807..5239b4f 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
@@ -27,6 +27,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
FloatTy(C, Type::FloatTyID),
DoubleTy(C, Type::DoubleTyID),
MetadataTy(C, Type::MetadataTyID),
+ TokenTy(C, Type::TokenTyID),
X86_FP80Ty(C, Type::X86_FP80TyID),
FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID),
@@ -78,7 +79,7 @@ LLVMContextImpl::~LLVMContextImpl() {
// unnecessary RAUW when nodes are still unresolved.
for (auto *I : DistinctMDNodes)
I->dropAllReferences();
-#define HANDLE_MDNODE_LEAF(CLASS) \
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
for (auto *I : CLASS##s) \
I->dropAllReferences();
#include "llvm/IR/Metadata.def"
@@ -92,8 +93,8 @@ LLVMContextImpl::~LLVMContextImpl() {
// Destroy MDNodes.
for (MDNode *I : DistinctMDNodes)
I->deleteAsSubclass();
-#define HANDLE_MDNODE_LEAF(CLASS) \
- for (CLASS *I : CLASS##s) \
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
+ for (CLASS * I : CLASS##s) \
delete I;
#include "llvm/IR/Metadata.def"
@@ -218,6 +219,23 @@ unsigned MDNodeOpsKey::calculateHash(ArrayRef<Metadata *> Ops) {
return hash_combine_range(Ops.begin(), Ops.end());
}
+StringMapEntry<uint32_t> *LLVMContextImpl::getOrInsertBundleTag(StringRef Tag) {
+ uint32_t NewIdx = BundleTagCache.size();
+ return &*(BundleTagCache.insert(std::make_pair(Tag, NewIdx)).first);
+}
+
+void LLVMContextImpl::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
+ Tags.resize(BundleTagCache.size());
+ for (const auto &T : BundleTagCache)
+ Tags[T.second] = T.first();
+}
+
+uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const {
+ auto I = BundleTagCache.find(Tag);
+ assert(I != BundleTagCache.end() && "Unknown tag!");
+ return I->second;
+}
+
// ConstantsContext anchors
void UnaryConstantExpr::anchor() { }
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index cbbf11e..a24114d 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -458,67 +458,6 @@ template <> struct MDNodeKeyImpl<DIFile> {
unsigned getHashValue() const { return hash_combine(Filename, Directory); }
};
-template <> struct MDNodeKeyImpl<DICompileUnit> {
- unsigned SourceLanguage;
- Metadata *File;
- StringRef Producer;
- bool IsOptimized;
- StringRef Flags;
- unsigned RuntimeVersion;
- StringRef SplitDebugFilename;
- unsigned EmissionKind;
- Metadata *EnumTypes;
- Metadata *RetainedTypes;
- Metadata *Subprograms;
- Metadata *GlobalVariables;
- Metadata *ImportedEntities;
- uint64_t DWOId;
-
- MDNodeKeyImpl(unsigned SourceLanguage, Metadata *File, StringRef Producer,
- bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
- StringRef SplitDebugFilename, unsigned EmissionKind,
- Metadata *EnumTypes, Metadata *RetainedTypes,
- Metadata *Subprograms, Metadata *GlobalVariables,
- Metadata *ImportedEntities, uint64_t DWOId)
- : SourceLanguage(SourceLanguage), File(File), Producer(Producer),
- IsOptimized(IsOptimized), Flags(Flags), RuntimeVersion(RuntimeVersion),
- SplitDebugFilename(SplitDebugFilename), EmissionKind(EmissionKind),
- EnumTypes(EnumTypes), RetainedTypes(RetainedTypes),
- Subprograms(Subprograms), GlobalVariables(GlobalVariables),
- ImportedEntities(ImportedEntities), DWOId(DWOId) {}
- MDNodeKeyImpl(const DICompileUnit *N)
- : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()),
- Producer(N->getProducer()), IsOptimized(N->isOptimized()),
- Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()),
- SplitDebugFilename(N->getSplitDebugFilename()),
- EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()),
- RetainedTypes(N->getRawRetainedTypes()),
- Subprograms(N->getRawSubprograms()),
- GlobalVariables(N->getRawGlobalVariables()),
- ImportedEntities(N->getRawImportedEntities()), DWOId(N->getDWOId()) {}
-
- bool isKeyOf(const DICompileUnit *RHS) const {
- return SourceLanguage == RHS->getSourceLanguage() &&
- File == RHS->getRawFile() && Producer == RHS->getProducer() &&
- IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() &&
- RuntimeVersion == RHS->getRuntimeVersion() &&
- SplitDebugFilename == RHS->getSplitDebugFilename() &&
- EmissionKind == RHS->getEmissionKind() &&
- EnumTypes == RHS->getRawEnumTypes() &&
- RetainedTypes == RHS->getRawRetainedTypes() &&
- Subprograms == RHS->getRawSubprograms() &&
- GlobalVariables == RHS->getRawGlobalVariables() &&
- ImportedEntities == RHS->getRawImportedEntities() &&
- DWOId == RHS->getDWOId();
- }
- unsigned getHashValue() const {
- return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags,
- RuntimeVersion, SplitDebugFilename, EmissionKind,
- EnumTypes, RetainedTypes, Subprograms, GlobalVariables,
- ImportedEntities, DWOId);
- }
-};
-
template <> struct MDNodeKeyImpl<DISubprogram> {
Metadata *Scope;
StringRef Name;
@@ -534,7 +473,6 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
unsigned VirtualIndex;
unsigned Flags;
bool IsOptimized;
- Metadata *Function;
Metadata *TemplateParams;
Metadata *Declaration;
Metadata *Variables;
@@ -544,15 +482,15 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
Metadata *ContainingType, unsigned Virtuality,
unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
- Metadata *Function, Metadata *TemplateParams,
- Metadata *Declaration, Metadata *Variables)
+ Metadata *TemplateParams, Metadata *Declaration,
+ Metadata *Variables)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
IsDefinition(IsDefinition), ScopeLine(ScopeLine),
ContainingType(ContainingType), Virtuality(Virtuality),
VirtualIndex(VirtualIndex), Flags(Flags), IsOptimized(IsOptimized),
- Function(Function), TemplateParams(TemplateParams),
- Declaration(Declaration), Variables(Variables) {}
+ TemplateParams(TemplateParams), Declaration(Declaration),
+ Variables(Variables) {}
MDNodeKeyImpl(const DISubprogram *N)
: Scope(N->getRawScope()), Name(N->getName()),
LinkageName(N->getLinkageName()), File(N->getRawFile()),
@@ -561,7 +499,6 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
Flags(N->getFlags()), IsOptimized(N->isOptimized()),
- Function(N->getRawFunction()),
TemplateParams(N->getRawTemplateParams()),
Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {}
@@ -576,7 +513,6 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Virtuality == RHS->getVirtuality() &&
VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() &&
IsOptimized == RHS->isOptimized() &&
- Function == RHS->getRawFunction() &&
TemplateParams == RHS->getRawTemplateParams() &&
Declaration == RHS->getRawDeclaration() &&
Variables == RHS->getRawVariables();
@@ -584,7 +520,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
unsigned getHashValue() const {
return hash_combine(Scope, Name, LinkageName, File, Line, Type,
IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
- Virtuality, VirtualIndex, Flags, IsOptimized, Function,
+ Virtuality, VirtualIndex, Flags, IsOptimized,
TemplateParams, Declaration, Variables);
}
};
@@ -759,7 +695,6 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
};
template <> struct MDNodeKeyImpl<DILocalVariable> {
- unsigned Tag;
Metadata *Scope;
StringRef Name;
Metadata *File;
@@ -768,23 +703,23 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
unsigned Arg;
unsigned Flags;
- MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File,
- unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags)
- : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type),
- Arg(Arg), Flags(Flags) {}
+ MDNodeKeyImpl(Metadata *Scope, StringRef Name, Metadata *File, unsigned Line,
+ Metadata *Type, unsigned Arg, unsigned Flags)
+ : Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), Arg(Arg),
+ Flags(Flags) {}
MDNodeKeyImpl(const DILocalVariable *N)
- : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()),
- File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()),
- Arg(N->getArg()), Flags(N->getFlags()) {}
+ : Scope(N->getRawScope()), Name(N->getName()), File(N->getRawFile()),
+ Line(N->getLine()), Type(N->getRawType()), Arg(N->getArg()),
+ Flags(N->getFlags()) {}
bool isKeyOf(const DILocalVariable *RHS) const {
- return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
- Name == RHS->getName() && File == RHS->getRawFile() &&
- Line == RHS->getLine() && Type == RHS->getRawType() &&
- Arg == RHS->getArg() && Flags == RHS->getFlags();
+ return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+ File == RHS->getRawFile() && Line == RHS->getLine() &&
+ Type == RHS->getRawType() && Arg == RHS->getArg() &&
+ Flags == RHS->getFlags();
}
unsigned getHashValue() const {
- return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags);
+ return hash_combine(Scope, Name, File, Line, Type, Arg, Flags);
}
};
@@ -857,6 +792,49 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> {
}
};
+template <> struct MDNodeKeyImpl<DIMacro> {
+ unsigned MIType;
+ unsigned Line;
+ StringRef Name;
+ StringRef Value;
+
+ MDNodeKeyImpl(unsigned MIType, unsigned Line, StringRef Name, StringRef Value)
+ : MIType(MIType), Line(Line), Name(Name), Value(Value) {}
+ MDNodeKeyImpl(const DIMacro *N)
+ : MIType(N->getMacinfoType()), Line(N->getLine()), Name(N->getName()),
+ Value(N->getValue()) {}
+
+ bool isKeyOf(const DIMacro *RHS) const {
+ return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
+ Name == RHS->getName() && Value == RHS->getValue();
+ }
+ unsigned getHashValue() const {
+ return hash_combine(MIType, Line, Name, Value);
+ }
+};
+
+template <> struct MDNodeKeyImpl<DIMacroFile> {
+ unsigned MIType;
+ unsigned Line;
+ Metadata *File;
+ Metadata *Elements;
+
+ MDNodeKeyImpl(unsigned MIType, unsigned Line, Metadata *File,
+ Metadata *Elements)
+ : MIType(MIType), Line(Line), File(File), Elements(Elements) {}
+ MDNodeKeyImpl(const DIMacroFile *N)
+ : MIType(N->getMacinfoType()), Line(N->getLine()), File(N->getRawFile()),
+ Elements(N->getRawElements()) {}
+
+ bool isKeyOf(const DIMacroFile *RHS) const {
+ return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
+ File == RHS->getRawFile() && File == RHS->getRawElements();
+ }
+ unsigned getHashValue() const {
+ return hash_combine(MIType, Line, File, Elements);
+ }
+};
+
/// \brief DenseMapInfo for MDNode subclasses.
template <class NodeTy> struct MDNodeInfo {
typedef MDNodeKeyImpl<NodeTy> KeyTy;
@@ -953,7 +931,8 @@ public:
DenseMap<const Value*, ValueName*> ValueNames;
-#define HANDLE_MDNODE_LEAF(CLASS) DenseSet<CLASS *, CLASS##Info> CLASS##s;
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
+ DenseSet<CLASS *, CLASS##Info> CLASS##s;
#include "llvm/IR/Metadata.def"
// MDNodes may be uniqued or not uniqued. When they're not uniqued, they
@@ -988,8 +967,10 @@ public:
ConstantInt *TheTrueVal;
ConstantInt *TheFalseVal;
+ std::unique_ptr<ConstantTokenNone> TheNoneToken;
+
// Basic type instances.
- Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
+ Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy;
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
@@ -1033,20 +1014,19 @@ public:
/// instructions in different blocks at the same location.
DenseMap<std::pair<const char *, unsigned>, unsigned> DiscriminatorTable;
- /// \brief Mapping from a function to its prefix data, which is stored as the
- /// operand of an unparented ReturnInst so that the prefix data has a Use.
- typedef DenseMap<const Function *, ReturnInst *> PrefixDataMapTy;
- PrefixDataMapTy PrefixDataMap;
-
- /// \brief Mapping from a function to its prologue data, which is stored as
- /// the operand of an unparented ReturnInst so that the prologue data has a
- /// Use.
- typedef DenseMap<const Function *, ReturnInst *> PrologueDataMapTy;
- PrologueDataMapTy PrologueDataMap;
-
int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+ /// \brief A set of interned tags for operand bundles. The StringMap maps
+ /// bundle tags to their IDs.
+ ///
+ /// \see LLVMContext::getOperandBundleTagID
+ StringMap<uint32_t> BundleTagCache;
+
+ StringMapEntry<uint32_t> *getOrInsertBundleTag(StringRef Tag);
+ void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const;
+ uint32_t getOperandBundleTagID(StringRef Tag) const;
+
LLVMContextImpl(LLVMContext &C);
~LLVMContextImpl();
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index 27d98a2..f2e0c7d 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -569,13 +569,33 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
AnalysisUsage *AnUsage = nullptr;
- DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+ auto DMI = AnUsageMap.find(P);
if (DMI != AnUsageMap.end())
AnUsage = DMI->second;
else {
- AnUsage = new AnalysisUsage();
- P->getAnalysisUsage(*AnUsage);
- AnUsageMap[P] = AnUsage;
+ // Look up the analysis usage from the pass instance (different instances
+ // of the same pass can produce different results), but unique the
+ // resulting object to reduce memory usage. This helps to greatly reduce
+ // memory usage when we have many instances of only a few pass types
+ // (e.g. instcombine, simplifycfg, etc...) which tend to share a fixed set
+ // of dependencies.
+ AnalysisUsage AU;
+ P->getAnalysisUsage(AU);
+
+ AUFoldingSetNode* Node = nullptr;
+ FoldingSetNodeID ID;
+ AUFoldingSetNode::Profile(ID, AU);
+ void *IP = nullptr;
+ if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP))
+ Node = N;
+ else {
+ Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU);
+ UniqueAnalysisUsages.InsertNode(Node, IP);
+ }
+ assert(Node && "cached analysis usage must be non null");
+
+ AnUsageMap[P] = &Node->AU;
+ AnUsage = &Node->AU;;
}
return AnUsage;
}
@@ -686,6 +706,10 @@ void PMTopLevelManager::schedulePass(Pass *P) {
/// passes and all pass managers. If desired pass is not found
/// then return NULL.
Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+ // For immutable passes we have a direct mapping from ID to pass, so check
+ // that first.
+ if (Pass *P = ImmutablePassMap.lookup(AID))
+ return P;
// Check pass managers
for (PMDataManager *PassManager : PassManagers)
@@ -697,24 +721,6 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
if (Pass *P = IndirectPassManager->findAnalysisPass(AID, false))
return P;
- // Check the immutable passes. Iterate in reverse order so that we find
- // the most recently registered passes first.
- for (auto I = ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E;
- ++I) {
- AnalysisID PI = (*I)->getPassID();
- if (PI == AID)
- return *I;
-
- // If Pass not found then check the interfaces implemented by Immutable Pass
- const PassInfo *PassInf = findAnalysisPassInfo(PI);
- assert(PassInf && "Expected all immutable passes to be initialized");
- const std::vector<const PassInfo*> &ImmPI =
- PassInf->getInterfacesImplemented();
- for (const PassInfo *PI : ImmPI)
- if (PI->getTypeInfo() == AID)
- return *I;
- }
-
return nullptr;
}
@@ -729,6 +735,24 @@ const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const {
return PI;
}
+void PMTopLevelManager::addImmutablePass(ImmutablePass *P) {
+ P->initializePass();
+ ImmutablePasses.push_back(P);
+
+ // Add this pass to the map from its analysis ID. We clobber any prior runs
+ // of the pass in the map so that the last one added is the one found when
+ // doing lookups.
+ AnalysisID AID = P->getPassID();
+ ImmutablePassMap[AID] = P;
+
+ // Also add any interfaces implemented by the immutable pass to the map for
+ // fast lookup.
+ const PassInfo *PassInf = findAnalysisPassInfo(AID);
+ assert(PassInf && "Expected all immutable passes to be initialized");
+ for (const PassInfo *ImmPI : PassInf->getInterfacesImplemented())
+ ImmutablePassMap[ImmPI->getTypeInfo()] = P;
+}
+
// Print passes managed by this top level manager.
void PMTopLevelManager::dumpPasses() const {
@@ -780,15 +804,8 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
DME = LastUser.end(); DMI != DME; ++DMI) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
- InversedLastUser.find(DMI->second);
- if (InvDMI != InversedLastUser.end()) {
- SmallPtrSet<Pass *, 8> &L = InvDMI->second;
- L.insert(DMI->first);
- } else {
- SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
- InversedLastUser[DMI->second] = L;
- }
+ SmallPtrSet<Pass *, 8> &L = InversedLastUser[DMI->second];
+ L.insert(DMI->first);
}
}
@@ -801,10 +818,6 @@ PMTopLevelManager::~PMTopLevelManager() {
for (SmallVectorImpl<ImmutablePass *>::iterator
I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
delete *I;
-
- for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
- DME = AnUsageMap.end(); DMI != DME; ++DMI)
- delete DMI->second;
}
//===----------------------------------------------------------------------===//
@@ -989,31 +1002,28 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
// At the moment, this pass is the last user of all required passes.
SmallVector<Pass *, 12> LastUses;
- SmallVector<Pass *, 8> RequiredPasses;
+ SmallVector<Pass *, 8> UsedPasses;
SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
unsigned PDepth = this->getDepth();
- collectRequiredAnalysis(RequiredPasses,
- ReqAnalysisNotAvailable, P);
- for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
- E = RequiredPasses.end(); I != E; ++I) {
- Pass *PRequired = *I;
+ collectRequiredAndUsedAnalyses(UsedPasses, ReqAnalysisNotAvailable, P);
+ for (Pass *PUsed : UsedPasses) {
unsigned RDepth = 0;
- assert(PRequired->getResolver() && "Analysis Resolver is not set");
- PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+ assert(PUsed->getResolver() && "Analysis Resolver is not set");
+ PMDataManager &DM = PUsed->getResolver()->getPMDataManager();
RDepth = DM.getDepth();
if (PDepth == RDepth)
- LastUses.push_back(PRequired);
+ LastUses.push_back(PUsed);
else if (PDepth > RDepth) {
// Let the parent claim responsibility of last use
- TransferLastUses.push_back(PRequired);
+ TransferLastUses.push_back(PUsed);
// Keep track of higher level analysis used by this manager.
- HigherLevelAnalysis.push_back(PRequired);
+ HigherLevelAnalysis.push_back(PUsed);
} else
- llvm_unreachable("Unable to accommodate Required Pass");
+ llvm_unreachable("Unable to accommodate Used Pass");
}
// Set P as P's last user until someone starts using P.
@@ -1030,10 +1040,8 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
}
// Now, take care of required analyses that are not available.
- for (SmallVectorImpl<AnalysisID>::iterator
- I = ReqAnalysisNotAvailable.begin(),
- E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
- const PassInfo *PI = TPM->findAnalysisPassInfo(*I);
+ for (AnalysisID ID : ReqAnalysisNotAvailable) {
+ const PassInfo *PI = TPM->findAnalysisPassInfo(ID);
Pass *AnalysisPass = PI->createPass();
this->addLowerLevelRequiredPass(P, AnalysisPass);
}
@@ -1048,30 +1056,29 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
}
-/// Populate RP with analysis pass that are required by
+/// Populate UP with analysis pass that are used or required by
/// pass P and are available. Populate RP_NotAvail with analysis
/// pass that are required by pass P but are not available.
-void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
- SmallVectorImpl<AnalysisID> &RP_NotAvail,
- Pass *P) {
+void PMDataManager::collectRequiredAndUsedAnalyses(
+ SmallVectorImpl<Pass *> &UP, SmallVectorImpl<AnalysisID> &RP_NotAvail,
+ Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
- const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
- for (AnalysisUsage::VectorType::const_iterator
- I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
- if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
+
+ for (const auto &UsedID : AnUsage->getUsedSet())
+ if (Pass *AnalysisPass = findAnalysisPass(UsedID, true))
+ UP.push_back(AnalysisPass);
+
+ for (const auto &RequiredID : AnUsage->getRequiredSet())
+ if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true))
+ UP.push_back(AnalysisPass);
else
- RP_NotAvail.push_back(*I);
- }
+ RP_NotAvail.push_back(RequiredID);
- const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
- for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
- E = IDs.end(); I != E; ++I) {
- if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
+ for (const auto &RequiredID : AnUsage->getRequiredTransitiveSet())
+ if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true))
+ UP.push_back(AnalysisPass);
else
- RP_NotAvail.push_back(*I);
- }
+ RP_NotAvail.push_back(RequiredID);
}
// All Required analyses should be available to the pass as it runs! Here
@@ -1206,6 +1213,15 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const {
dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
}
+void PMDataManager::dumpUsedSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Used", P, analysisUsage.getUsedSet());
+}
+
void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
const AnalysisUsage::VectorType &Set) const {
assert(PassDebugging >= Details);
@@ -1310,6 +1326,7 @@ bool BBPassManager::runOnFunction(Function &F) {
dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
I->getName());
dumpPreservedSet(BP);
+ dumpUsedSet(BP);
verifyPreservedAnalysis(BP);
removeNotPreservedAnalysis(BP);
@@ -1524,6 +1541,7 @@ bool FPPassManager::runOnFunction(Function &F) {
if (LocalChanged)
dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
dumpPreservedSet(FP);
+ dumpUsedSet(FP);
verifyPreservedAnalysis(FP);
removeNotPreservedAnalysis(FP);
@@ -1601,6 +1619,7 @@ MPPassManager::runOnModule(Module &M) {
dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
M.getModuleIdentifier());
dumpPreservedSet(MP);
+ dumpUsedSet(MP);
verifyPreservedAnalysis(MP);
removeNotPreservedAnalysis(MP);
diff --git a/contrib/llvm/lib/IR/MDBuilder.cpp b/contrib/llvm/lib/IR/MDBuilder.cpp
index b4c5ca7..4ce3ea2 100644
--- a/contrib/llvm/lib/IR/MDBuilder.cpp
+++ b/contrib/llvm/lib/IR/MDBuilder.cpp
@@ -36,8 +36,7 @@ MDNode *MDBuilder::createFPMath(float Accuracy) {
MDNode *MDBuilder::createBranchWeights(uint32_t TrueWeight,
uint32_t FalseWeight) {
- uint32_t Weights[] = {TrueWeight, FalseWeight};
- return createBranchWeights(Weights);
+ return createBranchWeights({TrueWeight, FalseWeight});
}
MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) {
@@ -53,14 +52,15 @@ MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) {
return MDNode::get(Context, Vals);
}
-MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
- SmallVector<Metadata *, 2> Vals(2);
- Vals[0] = createString("function_entry_count");
+MDNode *MDBuilder::createUnpredictable() {
+ return MDNode::get(Context, None);
+}
+MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
Type *Int64Ty = Type::getInt64Ty(Context);
- Vals[1] = createConstant(ConstantInt::get(Int64Ty, Count));
-
- return MDNode::get(Context, Vals);
+ return MDNode::get(Context,
+ {createString("function_entry_count"),
+ createConstant(ConstantInt::get(Int64Ty, Count))});
}
MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
@@ -76,8 +76,7 @@ MDNode *MDBuilder::createRange(Constant *Lo, Constant *Hi) {
return nullptr;
// Return the range [Lo, Hi).
- Metadata *Range[2] = {createConstant(Lo), createConstant(Hi)};
- return MDNode::get(Context, Range);
+ return MDNode::get(Context, {createConstant(Lo), createConstant(Hi)});
}
MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
@@ -112,12 +111,10 @@ MDNode *MDBuilder::createTBAANode(StringRef Name, MDNode *Parent,
bool isConstant) {
if (isConstant) {
Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1);
- Metadata *Ops[3] = {createString(Name), Parent, createConstant(Flags)};
- return MDNode::get(Context, Ops);
- } else {
- Metadata *Ops[2] = {createString(Name), Parent};
- return MDNode::get(Context, Ops);
+ return MDNode::get(Context,
+ {createString(Name), Parent, createConstant(Flags)});
}
+ return MDNode::get(Context, {createString(Name), Parent});
}
MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) {
@@ -125,8 +122,7 @@ MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) {
}
MDNode *MDBuilder::createAliasScope(StringRef Name, MDNode *Domain) {
- Metadata *Ops[2] = {createString(Name), Domain};
- return MDNode::get(Context, Ops);
+ return MDNode::get(Context, {createString(Name), Domain});
}
/// \brief Return metadata for a tbaa.struct node with the given
@@ -161,23 +157,19 @@ MDNode *MDBuilder::createTBAAStructTypeNode(
MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent,
uint64_t Offset) {
ConstantInt *Off = ConstantInt::get(Type::getInt64Ty(Context), Offset);
- Metadata *Ops[3] = {createString(Name), Parent, createConstant(Off)};
- return MDNode::get(Context, Ops);
+ return MDNode::get(Context,
+ {createString(Name), Parent, createConstant(Off)});
}
/// \brief Return metadata for a TBAA tag node with the given
/// base type, access type and offset relative to the base type.
MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
uint64_t Offset, bool IsConstant) {
- Type *Int64 = Type::getInt64Ty(Context);
+ IntegerType *Int64 = Type::getInt64Ty(Context);
+ ConstantInt *Off = ConstantInt::get(Int64, Offset);
if (IsConstant) {
- Metadata *Ops[4] = {BaseType, AccessType,
- createConstant(ConstantInt::get(Int64, Offset)),
- createConstant(ConstantInt::get(Int64, 1))};
- return MDNode::get(Context, Ops);
- } else {
- Metadata *Ops[3] = {BaseType, AccessType,
- createConstant(ConstantInt::get(Int64, Offset))};
- return MDNode::get(Context, Ops);
+ return MDNode::get(Context, {BaseType, AccessType, createConstant(Off),
+ createConstant(ConstantInt::get(Int64, 1))});
}
+ return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)});
}
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 1abcf0d..ab1ba5e 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -120,6 +120,38 @@ void MetadataAsValue::untrack() {
MetadataTracking::untrack(MD);
}
+bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) {
+ assert(Ref && "Expected live reference");
+ assert((Owner || *static_cast<Metadata **>(Ref) == &MD) &&
+ "Reference without owner must be direct");
+ if (auto *R = ReplaceableMetadataImpl::get(MD)) {
+ R->addRef(Ref, Owner);
+ return true;
+ }
+ return false;
+}
+
+void MetadataTracking::untrack(void *Ref, Metadata &MD) {
+ assert(Ref && "Expected live reference");
+ if (auto *R = ReplaceableMetadataImpl::get(MD))
+ R->dropRef(Ref);
+}
+
+bool MetadataTracking::retrack(void *Ref, Metadata &MD, void *New) {
+ assert(Ref && "Expected live reference");
+ assert(New && "Expected live reference");
+ assert(Ref != New && "Expected change");
+ if (auto *R = ReplaceableMetadataImpl::get(MD)) {
+ R->moveRef(Ref, New, MD);
+ return true;
+ }
+ return false;
+}
+
+bool MetadataTracking::isReplaceable(const Metadata &MD) {
+ return ReplaceableMetadataImpl::get(const_cast<Metadata &>(MD));
+}
+
void ReplaceableMetadataImpl::addRef(void *Ref, OwnerTy Owner) {
bool WasInserted =
UseMap.insert(std::make_pair(Ref, std::make_pair(Owner, NextIndex)))
@@ -239,6 +271,12 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
}
}
+ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) {
+ if (auto *N = dyn_cast<MDNode>(&MD))
+ return N->Context.getReplaceableUses();
+ return dyn_cast<ValueAsMetadata>(&MD);
+}
+
static Function *getLocalFunction(Value *V) {
assert(V && "Expected value");
if (auto *A = dyn_cast<Argument>(V))
@@ -517,7 +555,7 @@ void MDNode::decrementUnresolvedOperandCount() {
resolve();
}
-void MDNode::resolveCycles() {
+void MDNode::resolveCycles(bool MDMaterialized) {
if (isResolved())
return;
@@ -530,6 +568,8 @@ void MDNode::resolveCycles() {
if (!N)
continue;
+ if (N->isTemporary() && !MDMaterialized)
+ continue;
assert(!N->isTemporary() &&
"Expected all forward declarations to be resolved");
if (!N->isResolved())
@@ -545,6 +585,18 @@ static bool hasSelfReference(MDNode *N) {
}
MDNode *MDNode::replaceWithPermanentImpl() {
+ switch (getMetadataID()) {
+ default:
+ // If this type isn't uniquable, replace with a distinct node.
+ return replaceWithDistinctImpl();
+
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
+ case CLASS##Kind: \
+ break;
+#include "llvm/IR/Metadata.def"
+ }
+
+ // Even if this type is uniquable, self-references have to be distinct.
if (hasSelfReference(this))
return replaceWithDistinctImpl();
return replaceWithUniquedImpl();
@@ -671,8 +723,8 @@ MDNode *MDNode::uniquify() {
// Try to insert into uniquing store.
switch (getMetadataID()) {
default:
- llvm_unreachable("Invalid subclass of MDNode");
-#define HANDLE_MDNODE_LEAF(CLASS) \
+ llvm_unreachable("Invalid or non-uniquable subclass of MDNode");
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
case CLASS##Kind: { \
CLASS *SubclassThis = cast<CLASS>(this); \
std::integral_constant<bool, HasCachedHash<CLASS>::value> \
@@ -687,8 +739,8 @@ MDNode *MDNode::uniquify() {
void MDNode::eraseFromStore() {
switch (getMetadataID()) {
default:
- llvm_unreachable("Invalid subclass of MDNode");
-#define HANDLE_MDNODE_LEAF(CLASS) \
+ llvm_unreachable("Invalid or non-uniquable subclass of MDNode");
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
case CLASS##Kind: \
getContext().pImpl->CLASS##s.erase(cast<CLASS>(this)); \
break;
@@ -941,6 +993,17 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
return MDNode::get(A->getContext(), MDs);
}
+MDNode *MDNode::getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B) {
+ if (!A || !B)
+ return nullptr;
+
+ ConstantInt *AVal = mdconst::extract<ConstantInt>(A->getOperand(0));
+ ConstantInt *BVal = mdconst::extract<ConstantInt>(B->getOperand(0));
+ if (AVal->getZExtValue() < BVal->getZExtValue())
+ return A;
+ return B;
+}
+
//===----------------------------------------------------------------------===//
// NamedMDNode implementation.
//
@@ -1045,14 +1108,10 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
return getMetadataImpl(getContext().getMDKindID(Kind));
}
-void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
+void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
SmallSet<unsigned, 5> KnownSet;
KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
- // Drop debug if needed
- if (KnownSet.erase(LLVMContext::MD_dbg))
- DbgLoc = DebugLoc();
-
if (!hasMetadataHashEntry())
return; // Nothing to remove!
@@ -1077,7 +1136,7 @@ void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
}
}
-/// setMetadata - Set the metadata of of the specified kind to the specified
+/// setMetadata - Set the metadata of the specified kind to the specified
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
@@ -1251,3 +1310,11 @@ void Function::clearMetadata() {
getContext().pImpl->FunctionMetadata.erase(this);
setHasMetadataHashEntry(false);
}
+
+void Function::setSubprogram(DISubprogram *SP) {
+ setMetadata(LLVMContext::MD_dbg, SP);
+}
+
+DISubprogram *Function::getSubprogram() const {
+ return cast_or_null<DISubprogram>(getMetadata(LLVMContext::MD_dbg));
+}
diff --git a/contrib/llvm/lib/IR/MetadataImpl.h b/contrib/llvm/lib/IR/MetadataImpl.h
index 662a50e..b913746 100644
--- a/contrib/llvm/lib/IR/MetadataImpl.h
+++ b/contrib/llvm/lib/IR/MetadataImpl.h
@@ -26,6 +26,19 @@ static T *getUniqued(DenseSet<T *, InfoT> &Store,
return I == Store.end() ? nullptr : *I;
}
+template <class T> T *MDNode::storeImpl(T *N, StorageType Storage) {
+ switch (Storage) {
+ case Uniqued:
+ llvm_unreachable("Cannot unique without a uniquing-store");
+ case Distinct:
+ N->storeDistinctInContext();
+ break;
+ case Temporary:
+ break;
+ }
+ return N;
+}
+
template <class T, class StoreT>
T *MDNode::storeImpl(T *N, StorageType Storage, StoreT &Store) {
switch (Storage) {
diff --git a/contrib/llvm/lib/IR/MetadataTracking.cpp b/contrib/llvm/lib/IR/MetadataTracking.cpp
deleted file mode 100644
index 47f0b93..0000000
--- a/contrib/llvm/lib/IR/MetadataTracking.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- MetadataTracking.cpp - Implement metadata tracking -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Metadata tracking.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/MetadataTracking.h"
-#include "llvm/IR/Metadata.h"
-
-using namespace llvm;
-
-ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) {
- if (auto *N = dyn_cast<MDNode>(&MD))
- return N->Context.getReplaceableUses();
- return dyn_cast<ValueAsMetadata>(&MD);
-}
-
-bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) {
- assert(Ref && "Expected live reference");
- assert((Owner || *static_cast<Metadata **>(Ref) == &MD) &&
- "Reference without owner must be direct");
- if (auto *R = ReplaceableMetadataImpl::get(MD)) {
- R->addRef(Ref, Owner);
- return true;
- }
- return false;
-}
-
-void MetadataTracking::untrack(void *Ref, Metadata &MD) {
- assert(Ref && "Expected live reference");
- if (auto *R = ReplaceableMetadataImpl::get(MD))
- R->dropRef(Ref);
-}
-
-bool MetadataTracking::retrack(void *Ref, Metadata &MD, void *New) {
- assert(Ref && "Expected live reference");
- assert(New && "Expected live reference");
- assert(Ref != New && "Expected change");
- if (auto *R = ReplaceableMetadataImpl::get(MD)) {
- R->moveRef(Ref, New, MD);
- return true;
- }
- return false;
-}
-
-bool MetadataTracking::isReplaceable(const Metadata &MD) {
- return ReplaceableMetadataImpl::get(const_cast<Metadata &>(MD));
-}
diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp
index 043f74e..ac578d6 100644
--- a/contrib/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm/lib/IR/Module.cpp
@@ -29,6 +29,7 @@
#include <algorithm>
#include <cstdarg>
#include <cstdlib>
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -37,9 +38,9 @@ using namespace llvm;
// Explicit instantiations of SymbolTableListTraits since some of the methods
// are not in the public header file.
-template class llvm::SymbolTableListTraits<Function, Module>;
-template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
-template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+template class llvm::SymbolTableListTraits<Function>;
+template class llvm::SymbolTableListTraits<GlobalVariable>;
+template class llvm::SymbolTableListTraits<GlobalAlias>;
//===----------------------------------------------------------------------===//
// Primitive Module methods.
@@ -81,7 +82,6 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const {
return new RandomNumberGenerator(Salt);
}
-
/// getNamedValue - Return the first global value in the module with
/// the specified name, of arbitrary type. This method returns null
/// if a global with the specified name is not found.
@@ -102,6 +102,9 @@ void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
return Context.getMDKindNames(Result);
}
+void Module::getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const {
+ return Context.getOperandBundleTags(Result);
+}
//===----------------------------------------------------------------------===//
// Methods for easy access to the functions in the module.
@@ -274,7 +277,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
/// delete it.
void Module::eraseNamedMetadata(NamedMDNode *NMD) {
static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
- NamedMDList.erase(NMD);
+ NamedMDList.erase(NMD->getIterator());
}
bool Module::isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB) {
@@ -376,17 +379,11 @@ const DataLayout &Module::getDataLayout() const { return DL; }
//
void Module::setMaterializer(GVMaterializer *GVM) {
assert(!Materializer &&
- "Module already has a GVMaterializer. Call MaterializeAllPermanently"
+ "Module already has a GVMaterializer. Call materializeAll"
" to clear it out before setting another one.");
Materializer.reset(GVM);
}
-bool Module::isDematerializable(const GlobalValue *GV) const {
- if (Materializer)
- return Materializer->isDematerializable(GV);
- return false;
-}
-
std::error_code Module::materialize(GlobalValue *GV) {
if (!Materializer)
return std::error_code();
@@ -394,23 +391,11 @@ std::error_code Module::materialize(GlobalValue *GV) {
return Materializer->materialize(GV);
}
-void Module::dematerialize(GlobalValue *GV) {
- if (Materializer)
- return Materializer->dematerialize(GV);
-}
-
std::error_code Module::materializeAll() {
if (!Materializer)
return std::error_code();
- return Materializer->materializeModule(this);
-}
-
-std::error_code Module::materializeAllPermanently() {
- if (std::error_code EC = materializeAll())
- return EC;
-
- Materializer.reset();
- return std::error_code();
+ std::unique_ptr<GVMaterializer> M = std::move(Materializer);
+ return M->materializeModule();
}
std::error_code Module::materializeMetadata() {
@@ -458,7 +443,14 @@ void Module::dropAllReferences() {
unsigned Module::getDwarfVersion() const {
auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Dwarf Version"));
if (!Val)
- return dwarf::DWARF_VERSION;
+ return 0;
+ return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
+
+unsigned Module::getCodeViewFlag() const {
+ auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("CodeView"));
+ if (!Val)
+ return 0;
return cast<ConstantInt>(Val->getValue())->getZExtValue();
}
@@ -471,7 +463,7 @@ Comdat *Module::getOrInsertComdat(StringRef Name) {
PICLevel::Level Module::getPICLevel() const {
auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("PIC Level"));
- if (Val == NULL)
+ if (!Val)
return PICLevel::Default;
return static_cast<PICLevel::Level>(
@@ -481,3 +473,15 @@ PICLevel::Level Module::getPICLevel() const {
void Module::setPICLevel(PICLevel::Level PL) {
addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL);
}
+
+void Module::setMaximumFunctionCount(uint64_t Count) {
+ addModuleFlag(ModFlagBehavior::Error, "MaxFunctionCount", Count);
+}
+
+Optional<uint64_t> Module::getMaximumFunctionCount() {
+ auto *Val =
+ cast_or_null<ConstantAsMetadata>(getModuleFlag("MaxFunctionCount"));
+ if (!Val)
+ return None;
+ return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
diff --git a/contrib/llvm/lib/IR/Statepoint.cpp b/contrib/llvm/lib/IR/Statepoint.cpp
index 83ee611..d45c188 100644
--- a/contrib/llvm/lib/IR/Statepoint.cpp
+++ b/contrib/llvm/lib/IR/Statepoint.cpp
@@ -67,10 +67,7 @@ bool llvm::isGCResult(const ImmutableCallSite &CS) {
bool llvm::isGCResult(const Value *inst) {
if (const CallInst *call = dyn_cast<CallInst>(inst)) {
if (Function *F = call->getCalledFunction()) {
- return (F->getIntrinsicID() == Intrinsic::experimental_gc_result_int ||
- F->getIntrinsicID() == Intrinsic::experimental_gc_result_float ||
- F->getIntrinsicID() == Intrinsic::experimental_gc_result_ptr ||
- F->getIntrinsicID() == Intrinsic::experimental_gc_result);
+ return F->getIntrinsicID() == Intrinsic::experimental_gc_result;
}
}
return false;
diff --git a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
index a18f982..50573d8 100644
--- a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
+++ b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
@@ -24,77 +24,73 @@ namespace llvm {
/// setSymTabObject - This is called when (f.e.) the parent of a basic block
/// changes. This requires us to remove all the instruction symtab entries from
/// the current function and reinsert them into the new function.
-template<typename ValueSubClass, typename ItemParentClass>
-template<typename TPtr>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::setSymTabObject(TPtr *Dest, TPtr Src) {
+template <typename ValueSubClass>
+template <typename TPtr>
+void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
+ TPtr Src) {
// Get the old symtab and value list before doing the assignment.
- ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+ ValueSymbolTable *OldST = getSymTab(getListOwner());
// Do it.
*Dest = Src;
// Get the new SymTab object.
- ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+ ValueSymbolTable *NewST = getSymTab(getListOwner());
// If there is nothing to do, quick exit.
if (OldST == NewST) return;
// Move all the elements from the old symtab to the new one.
- iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+ ListTy &ItemList = getList(getListOwner());
if (ItemList.empty()) return;
if (OldST) {
// Remove all entries from the previous symtab.
- for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
- I != ItemList.end(); ++I)
+ for (auto I = ItemList.begin(); I != ItemList.end(); ++I)
if (I->hasName())
OldST->removeValueName(I->getValueName());
}
if (NewST) {
// Add all of the items to the new symtab.
- for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
- I != ItemList.end(); ++I)
+ for (auto I = ItemList.begin(); I != ItemList.end(); ++I)
if (I->hasName())
- NewST->reinsertValue(I);
+ NewST->reinsertValue(&*I);
}
}
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::addNodeToList(ValueSubClass *V) {
+template <typename ValueSubClass>
+void SymbolTableListTraits<ValueSubClass>::addNodeToList(ValueSubClass *V) {
assert(!V->getParent() && "Value already in a container!!");
ItemParentClass *Owner = getListOwner();
V->setParent(Owner);
if (V->hasName())
- if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+ if (ValueSymbolTable *ST = getSymTab(Owner))
ST->reinsertValue(V);
}
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::removeNodeFromList(ValueSubClass *V) {
+template <typename ValueSubClass>
+void SymbolTableListTraits<ValueSubClass>::removeNodeFromList(
+ ValueSubClass *V) {
V->setParent(nullptr);
if (V->hasName())
- if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+ if (ValueSymbolTable *ST = getSymTab(getListOwner()))
ST->removeValueName(V->getValueName());
}
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
- ilist_iterator<ValueSubClass> first,
- ilist_iterator<ValueSubClass> last) {
+template <typename ValueSubClass>
+void SymbolTableListTraits<ValueSubClass>::transferNodesFromList(
+ SymbolTableListTraits &L2, ilist_iterator<ValueSubClass> first,
+ ilist_iterator<ValueSubClass> last) {
// We only have to do work here if transferring instructions between BBs
ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
if (NewIP == OldIP) return; // No work to do at all...
// We only have to update symbol table entries if we are transferring the
// instructions to a different symtab object...
- ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
- ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+ ValueSymbolTable *NewST = getSymTab(NewIP);
+ ValueSymbolTable *OldST = getSymTab(OldIP);
if (NewST != OldST) {
for (; first != last; ++first) {
ValueSubClass &V = *first;
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index a9ca800..4c1baf5 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -35,6 +35,7 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
case LabelTyID : return getLabelTy(C);
case MetadataTyID : return getMetadataTy(C);
case X86_MMXTyID : return getX86_MMXTy(C);
+ case TokenTyID : return getTokenTy(C);
default:
return nullptr;
}
@@ -42,16 +43,10 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
/// getScalarType - If this is a vector type, return the element type,
/// otherwise return this.
-Type *Type::getScalarType() {
- if (VectorType *VTy = dyn_cast<VectorType>(this))
+Type *Type::getScalarType() const {
+ if (auto *VTy = dyn_cast<VectorType>(this))
return VTy->getElementType();
- return this;
-}
-
-const Type *Type::getScalarType() const {
- if (const VectorType *VTy = dyn_cast<VectorType>(this))
- return VTy->getElementType();
- return this;
+ return const_cast<Type*>(this);
}
/// isIntegerTy - Return true if this is an IntegerType of the specified width.
@@ -74,8 +69,8 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
// Vector -> Vector conversions are always lossless if the two vector types
// have the same size, otherwise not. Also, 64-bit vector types can be
// converted to x86mmx.
- if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
- if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ if (auto *thisPTy = dyn_cast<VectorType>(this)) {
+ if (auto *thatPTy = dyn_cast<VectorType>(Ty))
return thisPTy->getBitWidth() == thatPTy->getBitWidth();
if (Ty->getTypeID() == Type::X86_MMXTyID &&
thisPTy->getBitWidth() == 64)
@@ -83,7 +78,7 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
}
if (this->getTypeID() == Type::X86_MMXTyID)
- if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ if (auto *thatPTy = dyn_cast<VectorType>(Ty))
if (thatPTy->getBitWidth() == 64)
return true;
@@ -91,8 +86,8 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
// remaining and ptr->ptr. Just select the lossless conversions. Everything
// else is not lossless. Conservatively assume we can't losslessly convert
// between pointers with different address spaces.
- if (const PointerType *PTy = dyn_cast<PointerType>(this)) {
- if (const PointerType *OtherPTy = dyn_cast<PointerType>(Ty))
+ if (auto *PTy = dyn_cast<PointerType>(this)) {
+ if (auto *OtherPTy = dyn_cast<PointerType>(Ty))
return PTy->getAddressSpace() == OtherPTy->getAddressSpace();
return false;
}
@@ -100,14 +95,12 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
}
bool Type::isEmptyTy() const {
- const ArrayType *ATy = dyn_cast<ArrayType>(this);
- if (ATy) {
+ if (auto *ATy = dyn_cast<ArrayType>(this)) {
unsigned NumElements = ATy->getNumElements();
return NumElements == 0 || ATy->getElementType()->isEmptyTy();
}
- const StructType *STy = dyn_cast<StructType>(this);
- if (STy) {
+ if (auto *STy = dyn_cast<StructType>(this)) {
unsigned NumElements = STy->getNumElements();
for (unsigned i = 0; i < NumElements; ++i)
if (!STy->getElementType(i)->isEmptyTy())
@@ -144,7 +137,7 @@ unsigned Type::getScalarSizeInBits() const {
/// is only valid on floating point types. If the FP type does not
/// have a stable mantissa (e.g. ppc long double), this method returns -1.
int Type::getFPMantissaWidth() const {
- if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ if (auto *VTy = dyn_cast<VectorType>(this))
return VTy->getElementType()->getFPMantissaWidth();
assert(isFloatingPointTy() && "Not a floating point type!");
if (getTypeID() == HalfTyID) return 11;
@@ -159,66 +152,17 @@ int Type::getFPMantissaWidth() const {
/// isSizedDerivedType - Derived types like structures and arrays are sized
/// iff all of the members of the type are sized as well. Since asking for
/// their size is relatively uncommon, move this operation out of line.
-bool Type::isSizedDerivedType(SmallPtrSetImpl<const Type*> *Visited) const {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+bool Type::isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited) const {
+ if (auto *ATy = dyn_cast<ArrayType>(this))
return ATy->getElementType()->isSized(Visited);
- if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ if (auto *VTy = dyn_cast<VectorType>(this))
return VTy->getElementType()->isSized(Visited);
return cast<StructType>(this)->isSized(Visited);
}
//===----------------------------------------------------------------------===//
-// Subclass Helper Methods
-//===----------------------------------------------------------------------===//
-
-unsigned Type::getIntegerBitWidth() const {
- return cast<IntegerType>(this)->getBitWidth();
-}
-
-bool Type::isFunctionVarArg() const {
- return cast<FunctionType>(this)->isVarArg();
-}
-
-Type *Type::getFunctionParamType(unsigned i) const {
- return cast<FunctionType>(this)->getParamType(i);
-}
-
-unsigned Type::getFunctionNumParams() const {
- return cast<FunctionType>(this)->getNumParams();
-}
-
-StringRef Type::getStructName() const {
- return cast<StructType>(this)->getName();
-}
-
-unsigned Type::getStructNumElements() const {
- return cast<StructType>(this)->getNumElements();
-}
-
-Type *Type::getStructElementType(unsigned N) const {
- return cast<StructType>(this)->getElementType(N);
-}
-
-Type *Type::getSequentialElementType() const {
- return cast<SequentialType>(this)->getElementType();
-}
-
-uint64_t Type::getArrayNumElements() const {
- return cast<ArrayType>(this)->getNumElements();
-}
-
-unsigned Type::getVectorNumElements() const {
- return cast<VectorType>(this)->getNumElements();
-}
-
-unsigned Type::getPointerAddressSpace() const {
- return cast<PointerType>(getScalarType())->getAddressSpace();
-}
-
-
-//===----------------------------------------------------------------------===//
// Primitive 'Type' data
//===----------------------------------------------------------------------===//
@@ -228,6 +172,7 @@ Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getTokenTy(LLVMContext &C) { return &C.pImpl->TokenTy; }
Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
@@ -345,7 +290,7 @@ FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
assert(isValidReturnType(Result) && "invalid return type for function");
setSubclassData(IsVarArgs);
- SubTys[0] = const_cast<Type*>(Result);
+ SubTys[0] = Result;
for (unsigned i = 0, e = Params.size(); i != e; ++i) {
assert(isValidArgumentType(Params[i]) &&
@@ -428,12 +373,14 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
if (isPacked)
setSubclassData(getSubclassData() | SCDB_Packed);
- unsigned NumElements = Elements.size();
- Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
- memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
-
- ContainedTys = Elts;
- NumContainedTys = NumElements;
+ NumContainedTys = Elements.size();
+
+ if (Elements.empty()) {
+ ContainedTys = nullptr;
+ return;
+ }
+
+ ContainedTys = Elements.copy(getContext().pImpl->TypeAllocator).data();
}
void StructType::setName(StringRef Name) {
@@ -470,7 +417,6 @@ void StructType::setName(StringRef Name) {
do {
TempStr.resize(NameSize + 1);
- TmpStream.resync();
TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
IterBool = getContext().pImpl->NamedStructTypes.insert(
@@ -556,13 +502,13 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
return Ret;
}
-bool StructType::isSized(SmallPtrSetImpl<const Type*> *Visited) const {
+bool StructType::isSized(SmallPtrSetImpl<Type*> *Visited) const {
if ((getSubclassData() & SCDB_IsSized) != 0)
return true;
if (isOpaque())
return false;
- if (Visited && !Visited->insert(this).second)
+ if (Visited && !Visited->insert(const_cast<StructType*>(this)).second)
return false;
// Okay, our struct is sized if all of the elements are, but if one of the
@@ -602,22 +548,19 @@ void StructType::setBody(Type *type, ...) {
bool StructType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
- !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
+ !ElemTy->isTokenTy();
}
/// isLayoutIdentical - Return true if this is layout identical to the
/// specified struct.
bool StructType::isLayoutIdentical(StructType *Other) const {
if (this == Other) return true;
-
- if (isPacked() != Other->isPacked() ||
- getNumElements() != Other->getNumElements())
+
+ if (isPacked() != Other->isPacked())
return false;
- if (!getNumElements())
- return true;
-
- return std::equal(element_begin(), element_end(), Other->element_begin());
+ return elements() == Other->elements();
}
/// getTypeByName - Return the type with the specified name, or null if there
@@ -631,8 +574,8 @@ StructType *Module::getTypeByName(StringRef Name) const {
// CompositeType Implementation
//===----------------------------------------------------------------------===//
-Type *CompositeType::getTypeAtIndex(const Value *V) {
- if (StructType *STy = dyn_cast<StructType>(this)) {
+Type *CompositeType::getTypeAtIndex(const Value *V) const {
+ if (auto *STy = dyn_cast<StructType>(this)) {
unsigned Idx =
(unsigned)cast<Constant>(V)->getUniqueInteger().getZExtValue();
assert(indexValid(Idx) && "Invalid structure index!");
@@ -641,16 +584,18 @@ Type *CompositeType::getTypeAtIndex(const Value *V) {
return cast<SequentialType>(this)->getElementType();
}
-Type *CompositeType::getTypeAtIndex(unsigned Idx) {
- if (StructType *STy = dyn_cast<StructType>(this)) {
+
+Type *CompositeType::getTypeAtIndex(unsigned Idx) const{
+ if (auto *STy = dyn_cast<StructType>(this)) {
assert(indexValid(Idx) && "Invalid structure index!");
return STy->getElementType(Idx);
}
-
+
return cast<SequentialType>(this)->getElementType();
}
+
bool CompositeType::indexValid(const Value *V) const {
- if (const StructType *STy = dyn_cast<StructType>(this)) {
+ if (auto *STy = dyn_cast<StructType>(this)) {
// Structure indexes require (vectors of) 32-bit integer constants. In the
// vector case all of the indices must be equal.
if (!V->getType()->getScalarType()->isIntegerTy(32))
@@ -667,7 +612,7 @@ bool CompositeType::indexValid(const Value *V) const {
}
bool CompositeType::indexValid(unsigned Idx) const {
- if (const StructType *STy = dyn_cast<StructType>(this))
+ if (auto *STy = dyn_cast<StructType>(this))
return Idx < STy->getNumElements();
// Sequential types can be indexed by any integer.
return true;
@@ -683,10 +628,9 @@ ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
NumElements = NumEl;
}
-ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
- Type *ElementType = const_cast<Type*>(elementType);
+ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
assert(isValidElementType(ElementType) && "Invalid type for array element!");
-
+
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
ArrayType *&Entry =
pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
@@ -698,7 +642,8 @@ ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
bool ArrayType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
- !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
+ !ElemTy->isTokenTy();
}
//===----------------------------------------------------------------------===//
@@ -710,8 +655,7 @@ VectorType::VectorType(Type *ElType, unsigned NumEl)
NumElements = NumEl;
}
-VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
- Type *ElementType = const_cast<Type*>(elementType);
+VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
assert(isValidElementType(ElementType) && "Element type of a VectorType must "
"be an integer, floating point, or "
@@ -761,13 +705,13 @@ PointerType::PointerType(Type *E, unsigned AddrSpace)
assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
}
-PointerType *Type::getPointerTo(unsigned addrs) {
- return PointerType::get(this, addrs);
+PointerType *Type::getPointerTo(unsigned addrs) const {
+ return PointerType::get(const_cast<Type*>(this), addrs);
}
bool PointerType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
- !ElemTy->isMetadataTy();
+ !ElemTy->isMetadataTy() && !ElemTy->isTokenTy();
}
bool PointerType::isLoadableOrStorableType(Type *ElemTy) {
diff --git a/contrib/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm/lib/IR/TypeFinder.cpp
index 7accc5b..b5bdab0 100644
--- a/contrib/llvm/lib/IR/TypeFinder.cpp
+++ b/contrib/llvm/lib/IR/TypeFinder.cpp
@@ -44,19 +44,13 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
incorporateType(FI->getType());
- if (FI->hasPrefixData())
- incorporateValue(FI->getPrefixData());
-
- if (FI->hasPrologueData())
- incorporateValue(FI->getPrologueData());
-
- if (FI->hasPersonalityFn())
- incorporateValue(FI->getPersonalityFn());
+ for (const Use &U : FI->operands())
+ incorporateValue(U.get());
// First incorporate the arguments.
for (Function::const_arg_iterator AI = FI->arg_begin(),
AE = FI->arg_end(); AI != AE; ++AI)
- incorporateValue(AI);
+ incorporateValue(&*AI);
for (Function::const_iterator BB = FI->begin(), E = FI->end();
BB != E;++BB)
@@ -85,7 +79,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end(); I != E; ++I) {
- const NamedMDNode *NMD = I;
+ const NamedMDNode *NMD = &*I;
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
incorporateMDNode(NMD->getOperand(i));
}
diff --git a/contrib/llvm/lib/IR/User.cpp b/contrib/llvm/lib/IR/User.cpp
index 522722d..a75abe6 100644
--- a/contrib/llvm/lib/IR/User.cpp
+++ b/contrib/llvm/lib/IR/User.cpp
@@ -87,22 +87,70 @@ void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
Use::zap(OldOps, OldOps + OldNumUses, true);
}
+
+// This is a private struct used by `User` to track the co-allocated descriptor
+// section.
+struct DescriptorInfo {
+ intptr_t SizeInBytes;
+};
+
+ArrayRef<const uint8_t> User::getDescriptor() const {
+ auto MutableARef = const_cast<User *>(this)->getDescriptor();
+ return {MutableARef.begin(), MutableARef.end()};
+}
+
+MutableArrayRef<uint8_t> User::getDescriptor() {
+ assert(HasDescriptor && "Don't call otherwise!");
+ assert(!HasHungOffUses && "Invariant!");
+
+ auto *DI = reinterpret_cast<DescriptorInfo *>(getIntrusiveOperands()) - 1;
+ assert(DI->SizeInBytes != 0 && "Should not have had a descriptor otherwise!");
+
+ return MutableArrayRef<uint8_t>(
+ reinterpret_cast<uint8_t *>(DI) - DI->SizeInBytes, DI->SizeInBytes);
+}
+
//===----------------------------------------------------------------------===//
// User operator new Implementations
//===----------------------------------------------------------------------===//
-void *User::operator new(size_t Size, unsigned Us) {
+void *User::allocateFixedOperandUser(size_t Size, unsigned Us,
+ unsigned DescBytes) {
assert(Us < (1u << NumUserOperandsBits) && "Too many operands");
- void *Storage = ::operator new(Size + sizeof(Use) * Us);
- Use *Start = static_cast<Use*>(Storage);
+
+ static_assert(sizeof(DescriptorInfo) % sizeof(void *) == 0, "Required below");
+
+ unsigned DescBytesToAllocate =
+ DescBytes == 0 ? 0 : (DescBytes + sizeof(DescriptorInfo));
+ assert(DescBytesToAllocate % sizeof(void *) == 0 &&
+ "We need this to satisfy alignment constraints for Uses");
+
+ uint8_t *Storage = static_cast<uint8_t *>(
+ ::operator new(Size + sizeof(Use) * Us + DescBytesToAllocate));
+ Use *Start = reinterpret_cast<Use *>(Storage + DescBytesToAllocate);
Use *End = Start + Us;
User *Obj = reinterpret_cast<User*>(End);
Obj->NumUserOperands = Us;
Obj->HasHungOffUses = false;
+ Obj->HasDescriptor = DescBytes != 0;
Use::initTags(Start, End);
+
+ if (DescBytes != 0) {
+ auto *DescInfo = reinterpret_cast<DescriptorInfo *>(Storage + DescBytes);
+ DescInfo->SizeInBytes = DescBytes;
+ }
+
return Obj;
}
+void *User::operator new(size_t Size, unsigned Us) {
+ return allocateFixedOperandUser(Size, Us, 0);
+}
+
+void *User::operator new(size_t Size, unsigned Us, unsigned DescBytes) {
+ return allocateFixedOperandUser(Size, Us, DescBytes);
+}
+
void *User::operator new(size_t Size) {
// Allocate space for a single Use*
void *Storage = ::operator new(Size + sizeof(Use *));
@@ -110,6 +158,7 @@ void *User::operator new(size_t Size) {
User *Obj = reinterpret_cast<User *>(HungOffOperandList + 1);
Obj->NumUserOperands = 0;
Obj->HasHungOffUses = true;
+ Obj->HasDescriptor = false;
*HungOffOperandList = nullptr;
return Obj;
}
@@ -123,11 +172,20 @@ void User::operator delete(void *Usr) {
// use a Use[] allocated prior to the user.
User *Obj = static_cast<User *>(Usr);
if (Obj->HasHungOffUses) {
+ assert(!Obj->HasDescriptor && "not supported!");
+
Use **HungOffOperandList = static_cast<Use **>(Usr) - 1;
// drop the hung off uses.
Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands,
/* Delete */ true);
::operator delete(HungOffOperandList);
+ } else if (Obj->HasDescriptor) {
+ Use *UseBegin = static_cast<Use *>(Usr) - Obj->NumUserOperands;
+ Use::zap(UseBegin, UseBegin + Obj->NumUserOperands, /* Delete */ false);
+
+ auto *DI = reinterpret_cast<DescriptorInfo *>(UseBegin) - 1;
+ uint8_t *Storage = reinterpret_cast<uint8_t *>(DI) - DI->SizeInBytes;
+ ::operator delete(Storage);
} else {
Use *Storage = static_cast<Use *>(Usr) - Obj->NumUserOperands;
Use::zap(Storage, Storage + Obj->NumUserOperands,
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index f554d59..eb9deb6 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -314,6 +314,16 @@ void Value::takeName(Value *V) {
}
#ifndef NDEBUG
+void Value::assertModuleIsMaterialized() const {
+ const GlobalValue *GV = dyn_cast<GlobalValue>(this);
+ if (!GV)
+ return;
+ const Module *M = GV->getParent();
+ if (!M)
+ return;
+ assert(M->isMaterialized());
+}
+
static bool contains(SmallPtrSetImpl<ConstantExpr *> &Cache, ConstantExpr *Expr,
Constant *C) {
if (!Cache.insert(Expr).second)
@@ -490,8 +500,7 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
return V;
Offset = GEPOffset;
V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast ||
- Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
V = GA->getAliasee();
diff --git a/contrib/llvm/lib/IR/ValueSymbolTable.cpp b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
index e10142d..deb6e75 100644
--- a/contrib/llvm/lib/IR/ValueSymbolTable.cpp
+++ b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
@@ -32,6 +32,24 @@ ValueSymbolTable::~ValueSymbolTable() {
#endif
}
+ValueName *ValueSymbolTable::makeUniqueName(Value *V,
+ SmallString<256> &UniqueName) {
+ unsigned BaseSize = UniqueName.size();
+ while (1) {
+ // Trim any suffix off and append the next number.
+ UniqueName.resize(BaseSize);
+ raw_svector_ostream S(UniqueName);
+ if (isa<GlobalValue>(V))
+ S << ".";
+ S << ++LastUnique;
+
+ // Try insert the vmap entry with this suffix.
+ auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
+ if (IterBool.second)
+ return &*IterBool.first;
+ }
+}
+
// Insert a value into the symbol table with the specified name...
//
void ValueSymbolTable::reinsertValue(Value* V) {
@@ -49,21 +67,8 @@ void ValueSymbolTable::reinsertValue(Value* V) {
// The name is too already used, just free it so we can allocate a new name.
V->getValueName()->Destroy();
- unsigned BaseSize = UniqueName.size();
- while (1) {
- // Trim any suffix off and append the next number.
- UniqueName.resize(BaseSize);
- raw_svector_ostream(UniqueName) << "." << ++LastUnique;
-
- // Try insert the vmap entry with this suffix.
- auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
- if (IterBool.second) {
- // Newly inserted name. Success!
- V->setValueName(&*IterBool.first);
- //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
- return;
- }
- }
+ ValueName *VN = makeUniqueName(V, UniqueName);
+ V->setValueName(VN);
}
void ValueSymbolTable::removeValueName(ValueName *V) {
@@ -86,20 +91,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// Otherwise, there is a naming conflict. Rename this value.
SmallString<256> UniqueName(Name.begin(), Name.end());
-
- while (1) {
- // Trim any suffix off and append the next number.
- UniqueName.resize(Name.size());
- raw_svector_ostream(UniqueName) << ++LastUnique;
-
- // Try insert the vmap entry with this suffix.
- auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
- if (IterBool.second) {
- // DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V <<
- // "\n");
- return &*IterBool.first;
- }
- }
+ return makeUniqueName(V, UniqueName);
}
diff --git a/contrib/llvm/lib/IR/ValueTypes.cpp b/contrib/llvm/lib/IR/ValueTypes.cpp
index d95de39..f293230 100644
--- a/contrib/llvm/lib/IR/ValueTypes.cpp
+++ b/contrib/llvm/lib/IR/ValueTypes.cpp
@@ -19,6 +19,11 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+EVT EVT::changeExtendedTypeToInteger() const {
+ LLVMContext &Context = LLVMTy->getContext();
+ return getIntegerVT(Context, getSizeInBits());
+}
+
EVT EVT::changeExtendedVectorElementTypeToInteger() const {
LLVMContext &Context = LLVMTy->getContext();
EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
@@ -83,6 +88,10 @@ bool EVT::isExtended1024BitVector() const {
return isExtendedVector() && getExtendedSizeInBits() == 1024;
}
+bool EVT::isExtended2048BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 2048;
+}
+
EVT EVT::getExtendedVectorElementType() const {
assert(isExtended() && "Type is not extended!");
return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -134,6 +143,8 @@ std::string EVT::getEVTString() const {
case MVT::v16i1: return "v16i1";
case MVT::v32i1: return "v32i1";
case MVT::v64i1: return "v64i1";
+ case MVT::v512i1: return "v512i1";
+ case MVT::v1024i1: return "v1024i1";
case MVT::v1i8: return "v1i8";
case MVT::v2i8: return "v2i8";
case MVT::v4i8: return "v4i8";
@@ -141,22 +152,29 @@ std::string EVT::getEVTString() const {
case MVT::v16i8: return "v16i8";
case MVT::v32i8: return "v32i8";
case MVT::v64i8: return "v64i8";
+ case MVT::v128i8: return "v128i8";
+ case MVT::v256i8: return "v256i8";
case MVT::v1i16: return "v1i16";
case MVT::v2i16: return "v2i16";
case MVT::v4i16: return "v4i16";
case MVT::v8i16: return "v8i16";
case MVT::v16i16: return "v16i16";
case MVT::v32i16: return "v32i16";
+ case MVT::v64i16: return "v64i16";
+ case MVT::v128i16: return "v128i16";
case MVT::v1i32: return "v1i32";
case MVT::v2i32: return "v2i32";
case MVT::v4i32: return "v4i32";
case MVT::v8i32: return "v8i32";
case MVT::v16i32: return "v16i32";
+ case MVT::v32i32: return "v32i32";
+ case MVT::v64i32: return "v64i32";
case MVT::v1i64: return "v1i64";
case MVT::v2i64: return "v2i64";
case MVT::v4i64: return "v4i64";
case MVT::v8i64: return "v8i64";
case MVT::v16i64: return "v16i64";
+ case MVT::v32i64: return "v32i64";
case MVT::v1i128: return "v1i128";
case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32";
@@ -203,6 +221,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512);
+ case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024);
case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
@@ -210,22 +230,29 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16);
case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32);
case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::v128i8: return VectorType::get(Type::getInt8Ty(Context), 128);
+ case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256);
case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::v64i16: return VectorType::get(Type::getInt16Ty(Context), 64);
+ case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1);
case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
+ case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32);
+ case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64);
case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
+ case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32);
case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 2a0a4ff..81c87e4 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -39,8 +39,7 @@
// only by the unwind edge of an invoke instruction.
// * A landingpad instruction must be the first non-PHI instruction in the
// block.
-// * All landingpad instructions must use the same personality function with
-// the same function.
+// * Landingpad instructions must be in a function with a personality function.
// * All other things that are tested by asserts spread about the code...
//
//===----------------------------------------------------------------------===//
@@ -92,6 +91,16 @@ struct VerifierSupport {
: OS(OS), M(nullptr), Broken(false) {}
private:
+ template <class NodeTy> void Write(const ilist_iterator<NodeTy> &I) {
+ Write(&*I);
+ }
+
+ void Write(const Module *M) {
+ if (!M)
+ return;
+ OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+ }
+
void Write(const Value *V) {
if (!V)
return;
@@ -184,6 +193,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// \brief Track unresolved string-based type references.
SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
+ /// \brief The result type for a landingpad.
+ Type *LandingPadResultTy;
+
/// \brief Whether we've seen a call to @llvm.localescape in this function
/// already.
bool SawFrameEscape;
@@ -192,9 +204,15 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
+ /// Cache of constants visited in search of ConstantExprs.
+ SmallPtrSet<const Constant *, 32> ConstantExprVisited;
+
+ void checkAtomicMemAccessSize(const Module *M, Type *Ty,
+ const Instruction *I);
public:
explicit Verifier(raw_ostream &OS)
- : VerifierSupport(OS), Context(nullptr), SawFrameEscape(false) {}
+ : VerifierSupport(OS), Context(nullptr), LandingPadResultTy(nullptr),
+ SawFrameEscape(false) {}
bool verify(const Function &F) {
M = F.getParent();
@@ -228,6 +246,7 @@ public:
// FIXME: We strip const here because the inst visitor strips const.
visit(const_cast<Function &>(F));
InstsInThisBlock.clear();
+ LandingPadResultTy = nullptr;
SawFrameEscape = false;
return !Broken;
@@ -297,12 +316,12 @@ private:
void visitFunction(const Function &F);
void visitBasicBlock(BasicBlock &BB);
void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
+ void visitDereferenceableMetadata(Instruction& I, MDNode* MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
#include "llvm/IR/Metadata.def"
void visitDIScope(const DIScope &N);
- void visitDIDerivedTypeBase(const DIDerivedTypeBase &N);
void visitDIVariable(const DIVariable &N);
void visitDILexicalBlockBase(const DILexicalBlockBase &N);
void visitDITemplateParameter(const DITemplateParameter &N);
@@ -379,7 +398,13 @@ private:
void visitAllocaInst(AllocaInst &AI);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
+ void visitEHPadPredecessors(Instruction &I);
void visitLandingPadInst(LandingPadInst &LPI);
+ void visitCatchPadInst(CatchPadInst &CPI);
+ void visitCatchReturnInst(CatchReturnInst &CatchReturn);
+ void visitCleanupPadInst(CleanupPadInst &CPI);
+ void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
+ void visitCleanupReturnInst(CleanupReturnInst &CRI);
void VerifyCallSite(CallSite CS);
void verifyMustTailCall(CallInst &CI);
@@ -399,7 +424,8 @@ private:
void VerifyFunctionMetadata(
const SmallVector<std::pair<unsigned, MDNode *>, 4> MDs);
- void VerifyConstantExprBitcastType(const ConstantExpr *CE);
+ void visitConstantExprsRecursively(const Constant *EntryC);
+ void visitConstantExpr(const ConstantExpr *CE);
void VerifyStatepoint(ImmutableCallSite CS);
void verifyFrameRecoverIndices();
@@ -524,25 +550,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
}
// Walk any aggregate initializers looking for bitcasts between address spaces
- SmallPtrSet<const Value *, 4> Visited;
- SmallVector<const Value *, 4> WorkStack;
- WorkStack.push_back(cast<Value>(GV.getInitializer()));
-
- while (!WorkStack.empty()) {
- const Value *V = WorkStack.pop_back_val();
- if (!Visited.insert(V).second)
- continue;
-
- if (const User *U = dyn_cast<User>(V)) {
- WorkStack.append(U->op_begin(), U->op_end());
- }
-
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- VerifyConstantExprBitcastType(CE);
- if (Broken)
- return;
- }
- }
+ visitConstantExprsRecursively(GV.getInitializer());
visitGlobalValue(GV);
}
@@ -556,7 +564,8 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
const GlobalAlias &GA, const Constant &C) {
if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
- Assert(!GV->isDeclaration(), "Alias must point to a definition", &GA);
+ Assert(!GV->isDeclarationForLinker(), "Alias must point to a definition",
+ &GA);
if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
@@ -571,7 +580,7 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
}
if (const auto *CE = dyn_cast<ConstantExpr>(&C))
- VerifyConstantExprBitcastType(CE);
+ visitConstantExprsRecursively(CE);
for (const Use &U : C.operands()) {
Value *V = &*U;
@@ -779,39 +788,10 @@ void Verifier::visitDIBasicType(const DIBasicType &N) {
"invalid tag", &N);
}
-void Verifier::visitDIDerivedTypeBase(const DIDerivedTypeBase &N) {
+void Verifier::visitDIDerivedType(const DIDerivedType &N) {
// Common scope checks.
visitDIScope(N);
- Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
- Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
- N.getBaseType());
-
- // FIXME: Sink this into the subclass verifies.
- if (!N.getFile() || N.getFile()->getFilename().empty()) {
- // Check whether the filename is allowed to be empty.
- uint16_t Tag = N.getTag();
- Assert(
- Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
- Tag == dwarf::DW_TAG_pointer_type ||
- Tag == dwarf::DW_TAG_ptr_to_member_type ||
- Tag == dwarf::DW_TAG_reference_type ||
- Tag == dwarf::DW_TAG_rvalue_reference_type ||
- Tag == dwarf::DW_TAG_restrict_type ||
- Tag == dwarf::DW_TAG_array_type ||
- Tag == dwarf::DW_TAG_enumeration_type ||
- Tag == dwarf::DW_TAG_subroutine_type ||
- Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend ||
- Tag == dwarf::DW_TAG_structure_type ||
- Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef,
- "derived/composite type requires a filename", &N, N.getFile());
- }
-}
-
-void Verifier::visitDIDerivedType(const DIDerivedType &N) {
- // Common derived type checks.
- visitDIDerivedTypeBase(N);
-
Assert(N.getTag() == dwarf::DW_TAG_typedef ||
N.getTag() == dwarf::DW_TAG_pointer_type ||
N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
@@ -828,6 +808,10 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N,
N.getExtraData());
}
+
+ Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+ Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+ N.getBaseType());
}
static bool hasConflictingReferenceFlags(unsigned Flags) {
@@ -845,27 +829,34 @@ void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) {
}
void Verifier::visitDICompositeType(const DICompositeType &N) {
- // Common derived type checks.
- visitDIDerivedTypeBase(N);
+ // Common scope checks.
+ visitDIScope(N);
Assert(N.getTag() == dwarf::DW_TAG_array_type ||
N.getTag() == dwarf::DW_TAG_structure_type ||
N.getTag() == dwarf::DW_TAG_union_type ||
N.getTag() == dwarf::DW_TAG_enumeration_type ||
- N.getTag() == dwarf::DW_TAG_subroutine_type ||
N.getTag() == dwarf::DW_TAG_class_type,
"invalid tag", &N);
+ Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+ Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+ N.getBaseType());
+
Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
"invalid composite elements", &N, N.getRawElements());
Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N,
N.getRawVTableHolder());
- Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
- "invalid composite elements", &N, N.getRawElements());
Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
&N);
if (auto *Params = N.getRawTemplateParams())
visitTemplateParams(N, *Params);
+
+ if (N.getTag() == dwarf::DW_TAG_class_type ||
+ N.getTag() == dwarf::DW_TAG_union_type) {
+ Assert(N.getFile() && !N.getFile()->getFilename().empty(),
+ "class/union requires a filename", &N, N.getFile());
+ }
}
void Verifier::visitDISubroutineType(const DISubroutineType &N) {
@@ -885,6 +876,7 @@ void Verifier::visitDIFile(const DIFile &N) {
}
void Verifier::visitDICompileUnit(const DICompileUnit &N) {
+ Assert(N.isDistinct(), "compile units must be distinct", &N);
Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
// Don't bother verifying the compilation directory or producer string
@@ -928,6 +920,12 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
Op);
}
}
+ if (auto *Array = N.getRawMacros()) {
+ Assert(isa<MDTuple>(Array), "invalid macro list", &N, Array);
+ for (Metadata *Op : N.getMacros()->operands()) {
+ Assert(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
+ }
+ }
}
void Verifier::visitDISubprogram(const DISubprogram &N) {
@@ -937,13 +935,6 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
Assert(isa<DISubroutineType>(T), "invalid subroutine type", &N, T);
Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N,
N.getRawContainingType());
- if (auto *RawF = N.getRawFunction()) {
- auto *FMD = dyn_cast<ConstantAsMetadata>(RawF);
- auto *F = FMD ? FMD->getValue() : nullptr;
- auto *FT = F ? dyn_cast<PointerType>(F->getType()) : nullptr;
- Assert(F && FT && isa<FunctionType>(FT->getElementType()),
- "invalid function", &N, F, FT);
- }
if (auto *Params = N.getRawTemplateParams())
visitTemplateParams(N, *Params);
if (auto *S = N.getRawDeclaration()) {
@@ -961,40 +952,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
&N);
- auto *F = N.getFunction();
- if (!F)
- return;
-
- // Check that all !dbg attachments lead to back to N (or, at least, another
- // subprogram that describes the same function).
- //
- // FIXME: Check this incrementally while visiting !dbg attachments.
- // FIXME: Only check when N is the canonical subprogram for F.
- SmallPtrSet<const MDNode *, 32> Seen;
- for (auto &BB : *F)
- for (auto &I : BB) {
- // Be careful about using DILocation here since we might be dealing with
- // broken code (this is the Verifier after all).
- DILocation *DL =
- dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
- if (!DL)
- continue;
- if (!Seen.insert(DL).second)
- continue;
-
- DILocalScope *Scope = DL->getInlinedAtScope();
- if (Scope && !Seen.insert(Scope).second)
- continue;
-
- DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
- if (SP && !Seen.insert(SP).second)
- continue;
-
- // FIXME: Once N is canonical, check "SP == &N".
- Assert(SP->describes(F),
- "!dbg attachment points at wrong subprogram for function", &N, F,
- &I, DL, Scope, SP);
- }
+ if (N.isDefinition())
+ Assert(N.isDistinct(), "subprogram definitions must be distinct", &N);
}
void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
@@ -1020,6 +979,27 @@ void Verifier::visitDINamespace(const DINamespace &N) {
Assert(isa<DIScope>(S), "invalid scope ref", &N, S);
}
+void Verifier::visitDIMacro(const DIMacro &N) {
+ Assert(N.getMacinfoType() == dwarf::DW_MACINFO_define ||
+ N.getMacinfoType() == dwarf::DW_MACINFO_undef,
+ "invalid macinfo type", &N);
+ Assert(!N.getName().empty(), "anonymous macro", &N);
+}
+
+void Verifier::visitDIMacroFile(const DIMacroFile &N) {
+ Assert(N.getMacinfoType() == dwarf::DW_MACINFO_start_file,
+ "invalid macinfo type", &N);
+ if (auto *F = N.getRawFile())
+ Assert(isa<DIFile>(F), "invalid file", &N, F);
+
+ if (auto *Array = N.getRawElements()) {
+ Assert(isa<MDTuple>(Array), "invalid macro list", &N, Array);
+ for (Metadata *Op : N.getElements()->operands()) {
+ Assert(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
+ }
+ }
+}
+
void Verifier::visitDIModule(const DIModule &N) {
Assert(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N);
Assert(!N.getName().empty(), "anonymous module", &N);
@@ -1075,9 +1055,7 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) {
// Checks common to all variables.
visitDIVariable(N);
- Assert(N.getTag() == dwarf::DW_TAG_auto_variable ||
- N.getTag() == dwarf::DW_TAG_arg_variable,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
Assert(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
"local variable requires a valid scope", &N, N.getRawScope());
}
@@ -1274,7 +1252,10 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
I->getKindAsEnum() == Attribute::OptimizeNone ||
I->getKindAsEnum() == Attribute::JumpTable ||
I->getKindAsEnum() == Attribute::Convergent ||
- I->getKindAsEnum() == Attribute::ArgMemOnly) {
+ I->getKindAsEnum() == Attribute::ArgMemOnly ||
+ I->getKindAsEnum() == Attribute::NoRecurse ||
+ I->getKindAsEnum() == Attribute::InaccessibleMemOnly ||
+ I->getKindAsEnum() == Attribute::InaccessibleMemOrArgMemOnly) {
if (!isFunction) {
CheckFailed("Attribute '" + I->getAsString() +
"' only applies to functions!", V);
@@ -1365,7 +1346,7 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
V);
if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
- SmallPtrSet<const Type*, 4> Visited;
+ SmallPtrSet<Type*, 4> Visited;
if (!PTy->getElementType()->isSized(&Visited)) {
Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
!Attrs.hasAttribute(Idx, Attribute::InAlloca),
@@ -1445,6 +1426,18 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
"Attributes 'readnone and readonly' are incompatible!", V);
Assert(
+ !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InaccessibleMemOrArgMemOnly)),
+ "Attributes 'readnone and inaccessiblemem_or_argmemonly' are incompatible!", V);
+
+ Assert(
+ !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InaccessibleMemOnly)),
+ "Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
+
+ Assert(
!(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) &&
Attrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::AlwaysInline)),
@@ -1501,7 +1494,35 @@ void Verifier::VerifyFunctionMetadata(
}
}
-void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) {
+void Verifier::visitConstantExprsRecursively(const Constant *EntryC) {
+ if (!ConstantExprVisited.insert(EntryC).second)
+ return;
+
+ SmallVector<const Constant *, 16> Stack;
+ Stack.push_back(EntryC);
+
+ while (!Stack.empty()) {
+ const Constant *C = Stack.pop_back_val();
+
+ // Check this constant expression.
+ if (const auto *CE = dyn_cast<ConstantExpr>(C))
+ visitConstantExpr(CE);
+
+ // Visit all sub-expressions.
+ for (const Use &U : C->operands()) {
+ const auto *OpC = dyn_cast<Constant>(U);
+ if (!OpC)
+ continue;
+ if (isa<GlobalValue>(OpC))
+ continue; // Global values get visited separately.
+ if (!ConstantExprVisited.insert(OpC).second)
+ continue;
+ Stack.push_back(OpC);
+ }
+ }
+}
+
+void Verifier::visitConstantExpr(const ConstantExpr *CE) {
if (CE->getOpcode() != Instruction::BitCast)
return;
@@ -1554,17 +1575,11 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
&CI);
const Value *Target = CS.getArgument(2);
- const PointerType *PT = dyn_cast<PointerType>(Target->getType());
+ auto *PT = dyn_cast<PointerType>(Target->getType());
Assert(PT && PT->getElementType()->isFunctionTy(),
"gc.statepoint callee must be of function pointer type", &CI, Target);
FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
- if (NumPatchBytes)
- Assert(isa<ConstantPointerNull>(Target->stripPointerCasts()),
- "gc.statepoint must have null as call target if number of patchable "
- "bytes is non zero",
- &CI);
-
const Value *NumCallArgsV = CS.getArgument(3);
Assert(isa<ConstantInt>(NumCallArgsV),
"gc.statepoint number of arguments to underlying call "
@@ -1743,17 +1758,33 @@ void Verifier::visitFunction(const Function &F) {
FT->getParamType(i));
Assert(I->getType()->isFirstClassType(),
"Function arguments must have first-class types!", I);
- if (!isLLVMdotName)
+ if (!isLLVMdotName) {
Assert(!I->getType()->isMetadataTy(),
"Function takes metadata but isn't an intrinsic", I, &F);
+ Assert(!I->getType()->isTokenTy(),
+ "Function takes token but isn't an intrinsic", I, &F);
+ }
}
+ if (!isLLVMdotName)
+ Assert(!F.getReturnType()->isTokenTy(),
+ "Functions returns a token but isn't an intrinsic", &F);
+
// Get the function metadata attachments.
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
F.getAllMetadata(MDs);
assert(F.hasMetadata() != MDs.empty() && "Bit out-of-sync");
VerifyFunctionMetadata(MDs);
+ // Check validity of the personality function
+ if (F.hasPersonalityFn()) {
+ auto *Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ if (Per)
+ Assert(Per->getParent() == F.getParent(),
+ "Referencing personality function in another module!",
+ &F, F.getParent(), Per, Per->getParent());
+ }
+
if (F.isMaterializable()) {
// Function has a body somewhere we can't see.
Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F,
@@ -1782,13 +1813,27 @@ void Verifier::visitFunction(const Function &F) {
}
// Visit metadata attachments.
- for (const auto &I : MDs)
+ for (const auto &I : MDs) {
+ // Verify that the attachment is legal.
+ switch (I.first) {
+ default:
+ break;
+ case LLVMContext::MD_dbg:
+ Assert(isa<DISubprogram>(I.second),
+ "function !dbg attachment must be a subprogram", &F, I.second);
+ break;
+ }
+
+ // Verify the metadata itself.
visitMDNode(*I.second);
+ }
}
// If this function is actually an intrinsic, verify that it is only used in
// direct call/invokes, never having its "address taken".
- if (F.getIntrinsicID()) {
+ // Only do this if the module is materialized, otherwise we don't have all the
+ // uses.
+ if (F.getIntrinsicID() && F.getParent()->isMaterialized()) {
const User *U;
if (F.hasAddressTaken(&U))
Assert(0, "Invalid user of intrinsic instruction!", U);
@@ -1798,6 +1843,44 @@ void Verifier::visitFunction(const Function &F) {
(F.isDeclaration() && F.hasExternalLinkage()) ||
F.hasAvailableExternallyLinkage(),
"Function is marked as dllimport, but not external.", &F);
+
+ auto *N = F.getSubprogram();
+ if (!N)
+ return;
+
+ // Check that all !dbg attachments lead to back to N (or, at least, another
+ // subprogram that describes the same function).
+ //
+ // FIXME: Check this incrementally while visiting !dbg attachments.
+ // FIXME: Only check when N is the canonical subprogram for F.
+ SmallPtrSet<const MDNode *, 32> Seen;
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ // Be careful about using DILocation here since we might be dealing with
+ // broken code (this is the Verifier after all).
+ DILocation *DL =
+ dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
+ if (!DL)
+ continue;
+ if (!Seen.insert(DL).second)
+ continue;
+
+ DILocalScope *Scope = DL->getInlinedAtScope();
+ if (Scope && !Seen.insert(Scope).second)
+ continue;
+
+ DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+
+ // Scope and SP could be the same MDNode and we don't want to skip
+ // validation in that case
+ if (SP && ((Scope != SP) && !Seen.insert(SP).second))
+ continue;
+
+ // FIXME: Once N is canonical, check "SP == &N".
+ Assert(SP->describes(&F),
+ "!dbg attachment points at wrong subprogram for function", N, &F,
+ &I, DL, Scope, SP);
+ }
}
// verifyBasicBlock - Verify that a basic block is well formed...
@@ -2194,6 +2277,9 @@ void Verifier::visitPHINode(PHINode &PN) {
isa<PHINode>(--BasicBlock::iterator(&PN)),
"PHI nodes not grouped at top of basic block!", &PN, PN.getParent());
+ // Check that a PHI doesn't yield a Token.
+ Assert(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!");
+
// Check that all of the values of the PHI node have the same type as the
// result, and that the incoming blocks are really basic blocks.
for (Value *IncValue : PN.incoming_values()) {
@@ -2296,16 +2382,44 @@ void Verifier::VerifyCallSite(CallSite CS) {
// Verify that there's no metadata unless it's a direct call to an intrinsic.
if (CS.getCalledFunction() == nullptr ||
!CS.getCalledFunction()->getName().startswith("llvm.")) {
- for (FunctionType::param_iterator PI = FTy->param_begin(),
- PE = FTy->param_end(); PI != PE; ++PI)
- Assert(!(*PI)->isMetadataTy(),
+ for (Type *ParamTy : FTy->params()) {
+ Assert(!ParamTy->isMetadataTy(),
"Function has metadata parameter but isn't an intrinsic", I);
+ Assert(!ParamTy->isTokenTy(),
+ "Function has token parameter but isn't an intrinsic", I);
+ }
}
+ // Verify that indirect calls don't return tokens.
+ if (CS.getCalledFunction() == nullptr)
+ Assert(!FTy->getReturnType()->isTokenTy(),
+ "Return type cannot be token for indirect call!");
+
if (Function *F = CS.getCalledFunction())
if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
visitIntrinsicCallSite(ID, CS);
+ // Verify that a callsite has at most one "deopt" and one "funclet" operand
+ // bundle.
+ bool FoundDeoptBundle = false, FoundFuncletBundle = false;
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i < e; ++i) {
+ OperandBundleUse BU = CS.getOperandBundleAt(i);
+ uint32_t Tag = BU.getTagID();
+ if (Tag == LLVMContext::OB_deopt) {
+ Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", I);
+ FoundDeoptBundle = true;
+ }
+ if (Tag == LLVMContext::OB_funclet) {
+ Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", I);
+ FoundFuncletBundle = true;
+ Assert(BU.Inputs.size() == 1,
+ "Expected exactly one funclet bundle operand", I);
+ Assert(isa<FuncletPadInst>(BU.Inputs.front()),
+ "Funclet bundle operands should correspond to a FuncletPadInst",
+ I);
+ }
+ }
+
visitInstruction(*I);
}
@@ -2406,10 +2520,12 @@ void Verifier::visitCallInst(CallInst &CI) {
void Verifier::visitInvokeInst(InvokeInst &II) {
VerifyCallSite(&II);
- // Verify that there is a landingpad instruction as the first non-PHI
- // instruction of the 'unwind' destination.
- Assert(II.getUnwindDest()->isLandingPad(),
- "The unwind destination does not have a landingpad instruction!", &II);
+ // Verify that the first non-PHI instruction of the unwind destination is an
+ // exception handling instruction.
+ Assert(
+ II.getUnwindDest()->isEHPad(),
+ "The unwind destination does not have an exception handling instruction!",
+ &II);
visitTerminatorInst(II);
}
@@ -2622,6 +2738,14 @@ void Verifier::visitRangeMetadata(Instruction& I,
}
}
+void Verifier::checkAtomicMemAccessSize(const Module *M, Type *Ty,
+ const Instruction *I) {
+ unsigned Size = M->getDataLayout().getTypeSizeInBits(Ty);
+ Assert(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I);
+ Assert(!(Size & (Size - 1)),
+ "atomic memory access' operand must have a power-of-two size", Ty, I);
+}
+
void Verifier::visitLoadInst(LoadInst &LI) {
PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
Assert(PTy, "Load operand must be a pointer.", &LI);
@@ -2633,14 +2757,12 @@ void Verifier::visitLoadInst(LoadInst &LI) {
"Load cannot have Release ordering", &LI);
Assert(LI.getAlignment() != 0,
"Atomic load must specify explicit alignment", &LI);
- if (!ElTy->isPointerTy()) {
- Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!",
- &LI, ElTy);
- unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert(Size >= 8 && !(Size & (Size - 1)),
- "atomic load operand must be power-of-two byte-sized integer", &LI,
- ElTy);
- }
+ Assert(ElTy->isIntegerTy() || ElTy->isPointerTy() ||
+ ElTy->isFloatingPointTy(),
+ "atomic load operand must have integer, pointer, or floating point "
+ "type!",
+ ElTy, &LI);
+ checkAtomicMemAccessSize(M, ElTy, &LI);
} else {
Assert(LI.getSynchScope() == CrossThread,
"Non-atomic load cannot have SynchronizationScope specified", &LI);
@@ -2662,14 +2784,12 @@ void Verifier::visitStoreInst(StoreInst &SI) {
"Store cannot have Acquire ordering", &SI);
Assert(SI.getAlignment() != 0,
"Atomic store must specify explicit alignment", &SI);
- if (!ElTy->isPointerTy()) {
- Assert(ElTy->isIntegerTy(),
- "atomic store operand must have integer type!", &SI, ElTy);
- unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert(Size >= 8 && !(Size & (Size - 1)),
- "atomic store operand must be power-of-two byte-sized integer",
- &SI, ElTy);
- }
+ Assert(ElTy->isIntegerTy() || ElTy->isPointerTy() ||
+ ElTy->isFloatingPointTy(),
+ "atomic store operand must have integer, pointer, or floating point "
+ "type!",
+ ElTy, &SI);
+ checkAtomicMemAccessSize(M, ElTy, &SI);
} else {
Assert(SI.getSynchScope() == CrossThread,
"Non-atomic store cannot have SynchronizationScope specified", &SI);
@@ -2678,7 +2798,7 @@ void Verifier::visitStoreInst(StoreInst &SI) {
}
void Verifier::visitAllocaInst(AllocaInst &AI) {
- SmallPtrSet<const Type*, 4> Visited;
+ SmallPtrSet<Type*, 4> Visited;
PointerType *PTy = AI.getType();
Assert(PTy->getAddressSpace() == 0,
"Allocation instruction pointer not in the generic address space!",
@@ -2716,9 +2836,7 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
Type *ElTy = PTy->getElementType();
Assert(ElTy->isIntegerTy(), "cmpxchg operand must have integer type!", &CXI,
ElTy);
- unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert(Size >= 8 && !(Size & (Size - 1)),
- "cmpxchg operand must be power-of-two byte-sized integer", &CXI, ElTy);
+ checkAtomicMemAccessSize(M, ElTy, &CXI);
Assert(ElTy == CXI.getOperand(1)->getType(),
"Expected value type does not match pointer operand type!", &CXI,
ElTy);
@@ -2737,10 +2855,7 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
Type *ElTy = PTy->getElementType();
Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
&RMWI, ElTy);
- unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert(Size >= 8 && !(Size & (Size - 1)),
- "atomicrmw operand must be power-of-two byte-sized integer", &RMWI,
- ElTy);
+ checkAtomicMemAccessSize(M, ElTy, &RMWI);
Assert(ElTy == RMWI.getOperand(1)->getType(),
"Argument value type does not match pointer operand type!", &RMWI,
ElTy);
@@ -2777,23 +2892,62 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
visitInstruction(IVI);
}
-void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
- BasicBlock *BB = LPI.getParent();
+void Verifier::visitEHPadPredecessors(Instruction &I) {
+ assert(I.isEHPad());
+
+ BasicBlock *BB = I.getParent();
+ Function *F = BB->getParent();
+
+ Assert(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I);
+
+ if (auto *LPI = dyn_cast<LandingPadInst>(&I)) {
+ // The landingpad instruction defines its parent as a landing pad block. The
+ // landing pad block may be branched to only by the unwind edge of an
+ // invoke.
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ const auto *II = dyn_cast<InvokeInst>(PredBB->getTerminator());
+ Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+ "Block containing LandingPadInst must be jumped to "
+ "only by the unwind edge of an invoke.",
+ LPI);
+ }
+ return;
+ }
+ if (auto *CPI = dyn_cast<CatchPadInst>(&I)) {
+ if (!pred_empty(BB))
+ Assert(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(),
+ "Block containg CatchPadInst must be jumped to "
+ "only by its catchswitch.",
+ CPI);
+ return;
+ }
+
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ TerminatorInst *TI = PredBB->getTerminator();
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
+ "EH pad must be jumped to via an unwind edge", &I, II);
+ } else if (!isa<CleanupReturnInst>(TI) && !isa<CatchSwitchInst>(TI)) {
+ Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI);
+ }
+ }
+}
+void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
// The landingpad instruction is ill-formed if it doesn't have any clauses and
// isn't a cleanup.
Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(),
"LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
- // The landingpad instruction defines its parent as a landing pad block. The
- // landing pad block may be branched to only by the unwind edge of an invoke.
- for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
- Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
- "Block containing LandingPadInst must be jumped to "
- "only by the unwind edge of an invoke.",
+ visitEHPadPredecessors(LPI);
+
+ if (!LandingPadResultTy)
+ LandingPadResultTy = LPI.getType();
+ else
+ Assert(LandingPadResultTy == LPI.getType(),
+ "The landingpad instruction should have a consistent result type "
+ "inside a function.",
&LPI);
- }
Function *F = LPI.getParent()->getParent();
Assert(F->hasPersonalityFn(),
@@ -2820,6 +2974,132 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
visitInstruction(LPI);
}
+void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
+ visitEHPadPredecessors(CPI);
+
+ BasicBlock *BB = CPI.getParent();
+
+ Function *F = BB->getParent();
+ Assert(F->hasPersonalityFn(),
+ "CatchPadInst needs to be in a function with a personality.", &CPI);
+
+ Assert(isa<CatchSwitchInst>(CPI.getParentPad()),
+ "CatchPadInst needs to be directly nested in a CatchSwitchInst.",
+ CPI.getParentPad());
+
+ // The catchpad instruction must be the first non-PHI instruction in the
+ // block.
+ Assert(BB->getFirstNonPHI() == &CPI,
+ "CatchPadInst not the first non-PHI instruction in the block.", &CPI);
+
+ visitInstruction(CPI);
+}
+
+void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
+ Assert(isa<CatchPadInst>(CatchReturn.getOperand(0)),
+ "CatchReturnInst needs to be provided a CatchPad", &CatchReturn,
+ CatchReturn.getOperand(0));
+
+ visitTerminatorInst(CatchReturn);
+}
+
+void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
+ visitEHPadPredecessors(CPI);
+
+ BasicBlock *BB = CPI.getParent();
+
+ Function *F = BB->getParent();
+ Assert(F->hasPersonalityFn(),
+ "CleanupPadInst needs to be in a function with a personality.", &CPI);
+
+ // The cleanuppad instruction must be the first non-PHI instruction in the
+ // block.
+ Assert(BB->getFirstNonPHI() == &CPI,
+ "CleanupPadInst not the first non-PHI instruction in the block.",
+ &CPI);
+
+ auto *ParentPad = CPI.getParentPad();
+ Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
+ isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+ "CleanupPadInst has an invalid parent.", &CPI);
+
+ User *FirstUser = nullptr;
+ BasicBlock *FirstUnwindDest = nullptr;
+ for (User *U : CPI.users()) {
+ BasicBlock *UnwindDest;
+ if (CleanupReturnInst *CRI = dyn_cast<CleanupReturnInst>(U)) {
+ UnwindDest = CRI->getUnwindDest();
+ } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
+ continue;
+ } else if (CallSite(U)) {
+ continue;
+ } else {
+ Assert(false, "bogus cleanuppad use", &CPI);
+ }
+
+ if (!FirstUser) {
+ FirstUser = U;
+ FirstUnwindDest = UnwindDest;
+ } else {
+ Assert(
+ UnwindDest == FirstUnwindDest,
+ "cleanupret instructions from the same cleanuppad must have the same "
+ "unwind destination",
+ FirstUser, U);
+ }
+ }
+
+ visitInstruction(CPI);
+}
+
+void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
+ visitEHPadPredecessors(CatchSwitch);
+
+ BasicBlock *BB = CatchSwitch.getParent();
+
+ Function *F = BB->getParent();
+ Assert(F->hasPersonalityFn(),
+ "CatchSwitchInst needs to be in a function with a personality.",
+ &CatchSwitch);
+
+ // The catchswitch instruction must be the first non-PHI instruction in the
+ // block.
+ Assert(BB->getFirstNonPHI() == &CatchSwitch,
+ "CatchSwitchInst not the first non-PHI instruction in the block.",
+ &CatchSwitch);
+
+ if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) {
+ Instruction *I = UnwindDest->getFirstNonPHI();
+ Assert(I->isEHPad() && !isa<LandingPadInst>(I),
+ "CatchSwitchInst must unwind to an EH block which is not a "
+ "landingpad.",
+ &CatchSwitch);
+ }
+
+ auto *ParentPad = CatchSwitch.getParentPad();
+ Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
+ isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+ "CatchSwitchInst has an invalid parent.", ParentPad);
+
+ visitTerminatorInst(CatchSwitch);
+}
+
+void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
+ Assert(isa<CleanupPadInst>(CRI.getOperand(0)),
+ "CleanupReturnInst needs to be provided a CleanupPad", &CRI,
+ CRI.getOperand(0));
+
+ if (BasicBlock *UnwindDest = CRI.getUnwindDest()) {
+ Instruction *I = UnwindDest->getFirstNonPHI();
+ Assert(I->isEHPad() && !isa<LandingPadInst>(I),
+ "CleanupReturnInst must unwind to an EH block which is not a "
+ "landingpad.",
+ &CRI);
+ }
+
+ visitTerminatorInst(CRI);
+}
+
void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
Instruction *Op = cast<Instruction>(I.getOperand(i));
// If the we have an invalid invoke, don't try to compute the dominance.
@@ -2835,6 +3115,19 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
"Instruction does not dominate all uses!", Op, &I);
}
+void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
+ Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null "
+ "apply only to pointer types", &I);
+ Assert(isa<LoadInst>(I),
+ "dereferenceable, dereferenceable_or_null apply only to load"
+ " instructions, use attributes for calls or invokes", &I);
+ Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null "
+ "take one operand!", &I);
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
+ Assert(CI && CI->getType()->isIntegerTy(64), "dereferenceable, "
+ "dereferenceable_or_null metadata value must be an i64!", &I);
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -2903,7 +3196,7 @@ void Verifier::visitInstruction(Instruction &I) {
" donothing or patchpoint",
&I);
Assert(F->getParent() == M, "Referencing function in another module!",
- &I);
+ &I, M, F, F->getParent());
} else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
Assert(OpBB->getParent() == BB->getParent(),
"Referring to a basic block in another function!", &I);
@@ -2911,7 +3204,7 @@ void Verifier::visitInstruction(Instruction &I) {
Assert(OpArg->getParent() == BB->getParent(),
"Referring to an argument in another function!", &I);
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
- Assert(GV->getParent() == M, "Referencing global in another module!", &I);
+ Assert(GV->getParent() == M, "Referencing global in another module!", &I, M, GV, GV->getParent());
} else if (isa<Instruction>(I.getOperand(i))) {
verifyDominatesUse(I, i);
} else if (isa<InlineAsm>(I.getOperand(i))) {
@@ -2922,22 +3215,7 @@ void Verifier::visitInstruction(Instruction &I) {
if (CE->getType()->isPtrOrPtrVectorTy()) {
// If we have a ConstantExpr pointer, we need to see if it came from an
// illegal bitcast (inttoptr <constant int> )
- SmallVector<const ConstantExpr *, 4> Stack;
- SmallPtrSet<const ConstantExpr *, 4> Visited;
- Stack.push_back(CE);
-
- while (!Stack.empty()) {
- const ConstantExpr *V = Stack.pop_back_val();
- if (!Visited.insert(V).second)
- continue;
-
- VerifyConstantExprBitcastType(V);
-
- for (unsigned I = 0, N = V->getNumOperands(); I != N; ++I) {
- if (ConstantExpr *Op = dyn_cast<ConstantExpr>(V->getOperand(I)))
- Stack.push_back(Op);
- }
- }
+ visitConstantExprsRecursively(CE);
}
}
}
@@ -2971,6 +3249,28 @@ void Verifier::visitInstruction(Instruction &I) {
&I);
}
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable))
+ visitDereferenceableMetadata(I, MD);
+
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable_or_null))
+ visitDereferenceableMetadata(I, MD);
+
+ if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
+ Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
+ &I);
+ Assert(isa<LoadInst>(I), "align applies only to load instructions, "
+ "use attributes for calls or invokes", &I);
+ Assert(AlignMD->getNumOperands() == 1, "align takes one operand!", &I);
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(AlignMD->getOperand(0));
+ Assert(CI && CI->getType()->isIntegerTy(64),
+ "align metadata value must be an i64!", &I);
+ uint64_t Align = CI->getZExtValue();
+ Assert(isPowerOf2_64(Align),
+ "align metadata value must be a power of 2!", &I);
+ Assert(Align <= Value::MaximumAlignment,
+ "alignment is larger that implementation defined limit", &I);
+ }
+
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
Assert(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N);
@@ -2998,6 +3298,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
case IITDescriptor::Void: return !Ty->isVoidTy();
case IITDescriptor::VarArg: return true;
case IITDescriptor::MMX: return !Ty->isX86_MMXTy();
+ case IITDescriptor::Token: return !Ty->isTokenTy();
case IITDescriptor::Metadata: return !Ty->isMetadataTy();
case IITDescriptor::Half: return !Ty->isHalfTy();
case IITDescriptor::Float: return !Ty->isFloatTy();
@@ -3321,9 +3622,6 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
VerifyStatepoint(CS);
break;
- case Intrinsic::experimental_gc_result_int:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_ptr:
case Intrinsic::experimental_gc_result: {
Assert(CS.getParent()->getParent()->hasGC(),
"Enclosing function does not use GC.", CS);
@@ -3339,9 +3637,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// Assert that result type matches wrapped callee.
const Value *Target = StatepointCS.getArgument(2);
- const PointerType *PT = cast<PointerType>(Target->getType());
- const FunctionType *TargetFuncType =
- cast<FunctionType>(PT->getElementType());
+ auto *PT = cast<PointerType>(Target->getType());
+ auto *TargetFuncType = cast<FunctionType>(PT->getElementType());
Assert(CS.getType() == TargetFuncType->getReturnType(),
"gc.result result type does not match wrapped callee", CS);
break;
@@ -3352,19 +3649,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// Check that this relocate is correctly tied to the statepoint
// This is case for relocate on the unwinding path of an invoke statepoint
- if (ExtractValueInst *ExtractValue =
- dyn_cast<ExtractValueInst>(CS.getArgOperand(0))) {
- Assert(isa<LandingPadInst>(ExtractValue->getAggregateOperand()),
- "gc relocate on unwind path incorrectly linked to the statepoint",
- CS);
+ if (LandingPadInst *LandingPad =
+ dyn_cast<LandingPadInst>(CS.getArgOperand(0))) {
const BasicBlock *InvokeBB =
- ExtractValue->getParent()->getUniquePredecessor();
+ LandingPad->getParent()->getUniquePredecessor();
// Landingpad relocates should have only one predecessor with invoke
// statepoint terminator
Assert(InvokeBB, "safepoints should have unique landingpads",
- ExtractValue->getParent());
+ LandingPad->getParent());
Assert(InvokeBB->getTerminator(), "safepoint block should be well formed",
InvokeBB);
Assert(isStatepoint(InvokeBB->getTerminator()),
@@ -3448,6 +3742,12 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"gc.relocate: relocating a pointer shouldn't change its address space", CS);
break;
}
+ case Intrinsic::eh_exceptioncode:
+ case Intrinsic::eh_exceptionpointer: {
+ Assert(isa<CatchPadInst>(CS.getArgOperand(0)),
+ "eh.exceptionpointer argument must be a catchpad", CS);
+ break;
+ }
};
}
@@ -3598,7 +3898,7 @@ void Verifier::verifyTypeRefs() {
for (auto *CU : CUs->operands())
if (auto Ts = cast<DICompileUnit>(CU)->getRetainedTypes())
for (DIType *Op : Ts)
- if (auto *T = dyn_cast<DICompositeType>(Op))
+ if (auto *T = dyn_cast_or_null<DICompositeType>(Op))
if (auto *S = T->getRawIdentifier()) {
UnresolvedTypeRefs.erase(S);
TypeRefs.insert(std::make_pair(S, T));
diff --git a/contrib/llvm/lib/IRReader/IRReader.cpp b/contrib/llvm/lib/IRReader/IRReader.cpp
index 43fee65..9b243fc 100644
--- a/contrib/llvm/lib/IRReader/IRReader.cpp
+++ b/contrib/llvm/lib/IRReader/IRReader.cpp
@@ -31,11 +31,11 @@ static const char *const TimeIRParsingName = "Parse IR";
static std::unique_ptr<Module>
getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
- LLVMContext &Context) {
+ LLVMContext &Context, bool ShouldLazyLoadMetadata) {
if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd())) {
- ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
- getLazyBitcodeModule(std::move(Buffer), Context);
+ ErrorOr<std::unique_ptr<Module>> ModuleOrErr = getLazyBitcodeModule(
+ std::move(Buffer), Context, ShouldLazyLoadMetadata);
if (std::error_code EC = ModuleOrErr.getError()) {
Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
@@ -49,7 +49,8 @@ getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
SMDiagnostic &Err,
- LLVMContext &Context) {
+ LLVMContext &Context,
+ bool ShouldLazyLoadMetadata) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
@@ -58,7 +59,8 @@ std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
return nullptr;
}
- return getLazyIRModule(std::move(FileOrErr.get()), Err, Context);
+ return getLazyIRModule(std::move(FileOrErr.get()), Err, Context,
+ ShouldLazyLoadMetadata);
}
std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index 25ae4ac..6baaaa4 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/ParallelCG.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
@@ -63,47 +64,15 @@ const char* LTOCodeGenerator::getVersionString() {
#endif
}
-static void handleLTODiagnostic(const DiagnosticInfo &DI) {
- DiagnosticPrinterRawOStream DP(errs());
- DI.print(DP);
- errs() << "\n";
-}
-
-LTOCodeGenerator::LTOCodeGenerator()
- : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context),
- handleLTODiagnostic) {
- initializeLTOPasses();
-}
-
-LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
- : OwnedContext(std::move(Context)), Context(*OwnedContext),
- IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) {
+LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
+ : Context(Context), MergedModule(new Module("ld-temp.o", Context)),
+ TheLinker(new Linker(*MergedModule)) {
initializeLTOPasses();
}
-void LTOCodeGenerator::destroyMergedModule() {
- if (OwnedModule) {
- assert(IRLinker.getModule() == &OwnedModule->getModule() &&
- "The linker's module should be the same as the owned module");
- delete OwnedModule;
- OwnedModule = nullptr;
- } else if (IRLinker.getModule())
- IRLinker.deleteModule();
-}
-
-LTOCodeGenerator::~LTOCodeGenerator() {
- destroyMergedModule();
+LTOCodeGenerator::~LTOCodeGenerator() {}
- delete TargetMach;
- TargetMach = nullptr;
-
- for (std::vector<char *>::iterator I = CodegenOptions.begin(),
- E = CodegenOptions.end();
- I != E; ++I)
- free(*I);
-}
-
-// Initialize LTO passes. Please keep this funciton in sync with
+// Initialize LTO passes. Please keep this function in sync with
// PassManagerBuilder::populateLTOPassManager(), and make sure all LTO
// passes are initialized.
void LTOCodeGenerator::initializeLTOPasses() {
@@ -120,11 +89,11 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeGlobalDCEPass(R);
initializeArgPromotionPass(R);
initializeJumpThreadingPass(R);
- initializeSROAPass(R);
+ initializeSROALegacyPassPass(R);
initializeSROA_DTPass(R);
initializeSROA_SSAUpPass(R);
initializeFunctionAttrsPass(R);
- initializeGlobalsModRefPass(R);
+ initializeGlobalsAAWrapperPassPass(R);
initializeLICMPass(R);
initializeMergedLoadStoreMotionPass(R);
initializeGVNPass(R);
@@ -133,41 +102,39 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeCFGSimplifyPassPass(R);
}
-bool LTOCodeGenerator::addModule(LTOModule *mod) {
- assert(&mod->getModule().getContext() == &Context &&
+bool LTOCodeGenerator::addModule(LTOModule *Mod) {
+ assert(&Mod->getModule().getContext() == &Context &&
"Expected module in same context");
- bool ret = IRLinker.linkInModule(&mod->getModule());
+ bool ret = TheLinker->linkInModule(Mod->takeModule());
- const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
+ const std::vector<const char *> &undefs = Mod->getAsmUndefinedRefs();
for (int i = 0, e = undefs.size(); i != e; ++i)
AsmUndefinedRefs[undefs[i]] = 1;
return !ret;
}
-void LTOCodeGenerator::setModule(LTOModule *Mod) {
+void LTOCodeGenerator::setModule(std::unique_ptr<LTOModule> Mod) {
assert(&Mod->getModule().getContext() == &Context &&
"Expected module in same context");
- // Delete the old merged module.
- destroyMergedModule();
AsmUndefinedRefs.clear();
- OwnedModule = Mod;
- IRLinker.setModule(&Mod->getModule());
+ MergedModule = Mod->takeModule();
+ TheLinker = make_unique<Linker>(*MergedModule);
const std::vector<const char*> &Undefs = Mod->getAsmUndefinedRefs();
for (int I = 0, E = Undefs.size(); I != E; ++I)
AsmUndefinedRefs[Undefs[I]] = 1;
}
-void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
- Options = options;
+void LTOCodeGenerator::setTargetOptions(TargetOptions Options) {
+ this->Options = Options;
}
-void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
- switch (debug) {
+void LTOCodeGenerator::setDebugInfo(lto_debug_model Debug) {
+ switch (Debug) {
case LTO_DEBUG_MODEL_NONE:
EmitDwarfDebugInfo = false;
return;
@@ -179,21 +146,26 @@ void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
llvm_unreachable("Unknown debug format!");
}
-void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) {
- switch (model) {
- case LTO_CODEGEN_PIC_MODEL_STATIC:
- case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
- case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
- case LTO_CODEGEN_PIC_MODEL_DEFAULT:
- CodeModel = model;
- return;
+void LTOCodeGenerator::setOptLevel(unsigned Level) {
+ OptLevel = Level;
+ switch (OptLevel) {
+ case 0:
+ CGOptLevel = CodeGenOpt::None;
+ break;
+ case 1:
+ CGOptLevel = CodeGenOpt::Less;
+ break;
+ case 2:
+ CGOptLevel = CodeGenOpt::Default;
+ break;
+ case 3:
+ CGOptLevel = CodeGenOpt::Aggressive;
+ break;
}
- llvm_unreachable("Unknown PIC model!");
}
-bool LTOCodeGenerator::writeMergedModules(const char *path,
- std::string &errMsg) {
- if (!determineTarget(errMsg))
+bool LTOCodeGenerator::writeMergedModules(const char *Path) {
+ if (!determineTarget())
return false;
// mark which symbols can not be internalized
@@ -201,20 +173,22 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
// create output file
std::error_code EC;
- tool_output_file Out(path, EC, sys::fs::F_None);
+ tool_output_file Out(Path, EC, sys::fs::F_None);
if (EC) {
- errMsg = "could not open bitcode file for writing: ";
- errMsg += path;
+ std::string ErrMsg = "could not open bitcode file for writing: ";
+ ErrMsg += Path;
+ emitError(ErrMsg);
return false;
}
// write bitcode to it
- WriteBitcodeToFile(IRLinker.getModule(), Out.os(), ShouldEmbedUselists);
+ WriteBitcodeToFile(MergedModule.get(), Out.os(), ShouldEmbedUselists);
Out.os().close();
if (Out.os().has_error()) {
- errMsg = "could not write bitcode file: ";
- errMsg += path;
+ std::string ErrMsg = "could not write bitcode file: ";
+ ErrMsg += Path;
+ emitError(ErrMsg);
Out.os().clear_error();
return false;
}
@@ -223,22 +197,25 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
return true;
}
-bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
- std::string &errMsg) {
- // make unique temp .o file to put generated object file
+bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
+ // make unique temp output file to put generated code
SmallString<128> Filename;
int FD;
+
+ const char *Extension =
+ (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o");
+
std::error_code EC =
- sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename);
+ sys::fs::createTemporaryFile("lto-llvm", Extension, FD, Filename);
if (EC) {
- errMsg = EC.message();
+ emitError(EC.message());
return false;
}
// generate object file
tool_output_file objFile(Filename.c_str(), FD);
- bool genResult = compileOptimized(objFile.os(), errMsg);
+ bool genResult = compileOptimized(&objFile.os());
objFile.os().close();
if (objFile.os().has_error()) {
objFile.os().clear_error();
@@ -253,21 +230,21 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
}
NativeObjectPath = Filename.c_str();
- *name = NativeObjectPath.c_str();
+ *Name = NativeObjectPath.c_str();
return true;
}
std::unique_ptr<MemoryBuffer>
-LTOCodeGenerator::compileOptimized(std::string &errMsg) {
+LTOCodeGenerator::compileOptimized() {
const char *name;
- if (!compileOptimizedToFile(&name, errMsg))
+ if (!compileOptimizedToFile(&name))
return nullptr;
// read .o file into memory buffer
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(name, -1, false);
if (std::error_code EC = BufferOrErr.getError()) {
- errMsg = EC.message();
+ emitError(EC.message());
sys::fs::remove(NativeObjectPath);
return nullptr;
}
@@ -278,66 +255,51 @@ LTOCodeGenerator::compileOptimized(std::string &errMsg) {
return std::move(*BufferOrErr);
}
-
-bool LTOCodeGenerator::compile_to_file(const char **name,
- bool disableInline,
- bool disableGVNLoadPRE,
- bool disableVectorization,
- std::string &errMsg) {
- if (!optimize(disableInline, disableGVNLoadPRE,
- disableVectorization, errMsg))
+bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify,
+ bool DisableInline,
+ bool DisableGVNLoadPRE,
+ bool DisableVectorization) {
+ if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+ DisableVectorization))
return false;
- return compileOptimizedToFile(name, errMsg);
+ return compileOptimizedToFile(Name);
}
std::unique_ptr<MemoryBuffer>
-LTOCodeGenerator::compile(bool disableInline, bool disableGVNLoadPRE,
- bool disableVectorization, std::string &errMsg) {
- if (!optimize(disableInline, disableGVNLoadPRE,
- disableVectorization, errMsg))
+LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline,
+ bool DisableGVNLoadPRE, bool DisableVectorization) {
+ if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+ DisableVectorization))
return nullptr;
- return compileOptimized(errMsg);
+ return compileOptimized();
}
-bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
+bool LTOCodeGenerator::determineTarget() {
if (TargetMach)
return true;
- std::string TripleStr = IRLinker.getModule()->getTargetTriple();
- if (TripleStr.empty())
+ std::string TripleStr = MergedModule->getTargetTriple();
+ if (TripleStr.empty()) {
TripleStr = sys::getDefaultTargetTriple();
+ MergedModule->setTargetTriple(TripleStr);
+ }
llvm::Triple Triple(TripleStr);
// create target machine from info for merged modules
- const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
- if (!march)
+ std::string ErrMsg;
+ const Target *march = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+ if (!march) {
+ emitError(ErrMsg);
return false;
-
- // The relocation model is actually a static member of TargetMachine and
- // needs to be set before the TargetMachine is instantiated.
- Reloc::Model RelocModel = Reloc::Default;
- switch (CodeModel) {
- case LTO_CODEGEN_PIC_MODEL_STATIC:
- RelocModel = Reloc::Static;
- break;
- case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
- RelocModel = Reloc::PIC_;
- break;
- case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
- RelocModel = Reloc::DynamicNoPIC;
- break;
- case LTO_CODEGEN_PIC_MODEL_DEFAULT:
- // RelocModel is already the default, so leave it that way.
- break;
}
// Construct LTOModule, hand over ownership of module and target. Use MAttr as
// the default set of features.
SubtargetFeatures Features(MAttr);
Features.getDefaultSubtargetFeatures(Triple);
- std::string FeatureStr = Features.getString();
+ FeatureStr = Features.getString();
// Set a default CPU for Darwin triples.
if (MCpu.empty() && Triple.isOSDarwin()) {
if (Triple.getArch() == llvm::Triple::x86_64)
@@ -348,25 +310,9 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
MCpu = "cyclone";
}
- CodeGenOpt::Level CGOptLevel;
- switch (OptLevel) {
- case 0:
- CGOptLevel = CodeGenOpt::None;
- break;
- case 1:
- CGOptLevel = CodeGenOpt::Less;
- break;
- case 2:
- CGOptLevel = CodeGenOpt::Default;
- break;
- case 3:
- CGOptLevel = CodeGenOpt::Aggressive;
- break;
- }
-
- TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
- RelocModel, CodeModel::Default,
- CGOptLevel);
+ TargetMach.reset(march->createTargetMachine(TripleStr, MCpu, FeatureStr,
+ Options, RelocModel,
+ CodeModel::Default, CGOptLevel));
return true;
}
@@ -453,7 +399,6 @@ static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
void LTOCodeGenerator::applyScopeRestrictions() {
if (ScopeRestrictionsDone || !ShouldInternalize)
return;
- Module *mergedModule = IRLinker.getModule();
// Start off with a verification pass.
legacy::PassManager passes;
@@ -467,20 +412,17 @@ void LTOCodeGenerator::applyScopeRestrictions() {
TargetLibraryInfoImpl TLII(Triple(TargetMach->getTargetTriple()));
TargetLibraryInfo TLI(TLII);
- accumulateAndSortLibcalls(Libcalls, TLI, *mergedModule, *TargetMach);
+ accumulateAndSortLibcalls(Libcalls, TLI, *MergedModule, *TargetMach);
- for (Module::iterator f = mergedModule->begin(),
- e = mergedModule->end(); f != e; ++f)
- applyRestriction(*f, Libcalls, MustPreserveList, AsmUsed, Mangler);
- for (Module::global_iterator v = mergedModule->global_begin(),
- e = mergedModule->global_end(); v != e; ++v)
- applyRestriction(*v, Libcalls, MustPreserveList, AsmUsed, Mangler);
- for (Module::alias_iterator a = mergedModule->alias_begin(),
- e = mergedModule->alias_end(); a != e; ++a)
- applyRestriction(*a, Libcalls, MustPreserveList, AsmUsed, Mangler);
+ for (Function &f : *MergedModule)
+ applyRestriction(f, Libcalls, MustPreserveList, AsmUsed, Mangler);
+ for (GlobalVariable &v : MergedModule->globals())
+ applyRestriction(v, Libcalls, MustPreserveList, AsmUsed, Mangler);
+ for (GlobalAlias &a : MergedModule->aliases())
+ applyRestriction(a, Libcalls, MustPreserveList, AsmUsed, Mangler);
GlobalVariable *LLVMCompilerUsed =
- mergedModule->getGlobalVariable("llvm.compiler.used");
+ MergedModule->getGlobalVariable("llvm.compiler.used");
findUsedValues(LLVMCompilerUsed, AsmUsed);
if (LLVMCompilerUsed)
LLVMCompilerUsed->eraseFromParent();
@@ -495,7 +437,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
LLVMCompilerUsed =
- new llvm::GlobalVariable(*mergedModule, ATy, false,
+ new llvm::GlobalVariable(*MergedModule, ATy, false,
llvm::GlobalValue::AppendingLinkage,
llvm::ConstantArray::get(ATy, asmUsed2),
"llvm.compiler.used");
@@ -506,21 +448,18 @@ void LTOCodeGenerator::applyScopeRestrictions() {
passes.add(createInternalizePass(MustPreserveList));
// apply scope restrictions
- passes.run(*mergedModule);
+ passes.run(*MergedModule);
ScopeRestrictionsDone = true;
}
/// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::optimize(bool DisableInline,
+bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
bool DisableGVNLoadPRE,
- bool DisableVectorization,
- std::string &errMsg) {
- if (!this->determineTarget(errMsg))
+ bool DisableVectorization) {
+ if (!this->determineTarget())
return false;
- Module *mergedModule = IRLinker.getModule();
-
// Mark which symbols can not be internalized
this->applyScopeRestrictions();
@@ -528,7 +467,7 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
legacy::PassManager passes;
// Add an appropriate DataLayout instance for this module...
- mergedModule->setDataLayout(*TargetMach->getDataLayout());
+ MergedModule->setDataLayout(TargetMach->createDataLayout());
passes.add(
createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis()));
@@ -542,60 +481,57 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
PMB.Inliner = createFunctionInliningPass();
PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
PMB.OptLevel = OptLevel;
- PMB.VerifyInput = true;
- PMB.VerifyOutput = true;
+ PMB.VerifyInput = !DisableVerify;
+ PMB.VerifyOutput = !DisableVerify;
PMB.populateLTOPassManager(passes);
// Run our queue of passes all at once now, efficiently.
- passes.run(*mergedModule);
+ passes.run(*MergedModule);
return true;
}
-bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
- std::string &errMsg) {
- if (!this->determineTarget(errMsg))
+bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) {
+ if (!this->determineTarget())
return false;
- Module *mergedModule = IRLinker.getModule();
-
- legacy::PassManager codeGenPasses;
+ legacy::PassManager preCodeGenPasses;
// If the bitcode files contain ARC code and were compiled with optimization,
// the ObjCARCContractPass must be run, so do it unconditionally here.
- codeGenPasses.add(createObjCARCContractPass());
-
- if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
- TargetMachine::CGFT_ObjectFile)) {
- errMsg = "target file type not supported";
- return false;
- }
-
- // Run the code generator, and write assembly file
- codeGenPasses.run(*mergedModule);
+ preCodeGenPasses.add(createObjCARCContractPass());
+ preCodeGenPasses.run(*MergedModule);
+
+ // Do code generation. We need to preserve the module in case the client calls
+ // writeMergedModules() after compilation, but we only need to allow this at
+ // parallelism level 1. This is achieved by having splitCodeGen return the
+ // original module at parallelism level 1 which we then assign back to
+ // MergedModule.
+ MergedModule =
+ splitCodeGen(std::move(MergedModule), Out, MCpu, FeatureStr, Options,
+ RelocModel, CodeModel::Default, CGOptLevel, FileType);
return true;
}
/// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging
/// LTO problems.
-void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) {
- for (std::pair<StringRef, StringRef> o = getToken(options);
- !o.first.empty(); o = getToken(o.second)) {
- // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add
- // that.
- if (CodegenOptions.empty())
- CodegenOptions.push_back(strdup("libLLVMLTO"));
- CodegenOptions.push_back(strdup(o.first.str().c_str()));
- }
+void LTOCodeGenerator::setCodeGenDebugOptions(const char *Options) {
+ for (std::pair<StringRef, StringRef> o = getToken(Options); !o.first.empty();
+ o = getToken(o.second))
+ CodegenOptions.push_back(o.first);
}
void LTOCodeGenerator::parseCodeGenDebugOptions() {
// if options were requested, set them
- if (!CodegenOptions.empty())
- cl::ParseCommandLineOptions(CodegenOptions.size(),
- const_cast<char **>(&CodegenOptions[0]));
+ if (!CodegenOptions.empty()) {
+ // ParseCommandLineOptions() expects argv[0] to be program name.
+ std::vector<const char *> CodegenArgv(1, "libLLVMLTO");
+ for (std::string &Arg : CodegenOptions)
+ CodegenArgv.push_back(Arg.c_str());
+ cl::ParseCommandLineOptions(CodegenArgv.size(), CodegenArgv.data());
+ }
}
void LTOCodeGenerator::DiagnosticHandler(const DiagnosticInfo &DI,
@@ -645,3 +581,20 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler,
Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this,
/* RespectFilters */ true);
}
+
+namespace {
+class LTODiagnosticInfo : public DiagnosticInfo {
+ const Twine &Msg;
+public:
+ LTODiagnosticInfo(const Twine &DiagMsg, DiagnosticSeverity Severity=DS_Error)
+ : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {}
+ void print(DiagnosticPrinter &DP) const override { DP << Msg; }
+};
+}
+
+void LTOCodeGenerator::emitError(const std::string &ErrMsg) {
+ if (DiagHandler)
+ (*DiagHandler)(LTO_DS_ERROR, ErrMsg.c_str(), DiagContext);
+ else
+ Context.diagnose(LTODiagnosticInfo(ErrMsg));
+}
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index 53ed417..409b949 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -91,106 +91,97 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
return StringRef(Triple).startswith(TriplePrefix);
}
-LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
- std::string &errMsg) {
+std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
+ ErrorOr<MemoryBufferRef> BCOrErr =
+ IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
+ if (!BCOrErr)
+ return "";
+ LLVMContext Context;
+ return getBitcodeProducerString(*BCOrErr, Context);
+}
+
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromFile(LLVMContext &Context, const char *path,
+ TargetOptions options) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(path);
- if (std::error_code EC = BufferOrErr.getError()) {
- errMsg = EC.message();
- return nullptr;
- }
+ if (std::error_code EC = BufferOrErr.getError())
+ return EC;
std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
- return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
- &getGlobalContext());
+ return makeLTOModule(Buffer->getMemBufferRef(), options, &Context);
}
-LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
- TargetOptions options,
- std::string &errMsg) {
- return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path,
+ size_t size, TargetOptions options) {
+ return createFromOpenFileSlice(Context, fd, path, size, 0, options);
}
-LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
- size_t map_size, off_t offset,
- TargetOptions options,
- std::string &errMsg) {
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd,
+ const char *path, size_t map_size,
+ off_t offset, TargetOptions options) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
- if (std::error_code EC = BufferOrErr.getError()) {
- errMsg = EC.message();
- return nullptr;
- }
+ if (std::error_code EC = BufferOrErr.getError())
+ return EC;
std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
- return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
- &getGlobalContext());
+ return makeLTOModule(Buffer->getMemBufferRef(), options, &Context);
}
-LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
- TargetOptions options,
- std::string &errMsg, StringRef path) {
- return createInContext(mem, length, options, errMsg, path,
- &getGlobalContext());
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromBuffer(LLVMContext &Context, const void *mem,
+ size_t length, TargetOptions options,
+ StringRef path) {
+ return createInContext(mem, length, options, path, &Context);
}
-LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length,
- TargetOptions options,
- std::string &errMsg,
- StringRef path) {
- return createInContext(mem, length, options, errMsg, path, nullptr);
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createInLocalContext(const void *mem, size_t length,
+ TargetOptions options, StringRef path) {
+ return createInContext(mem, length, options, path, nullptr);
}
-LTOModule *LTOModule::createInContext(const void *mem, size_t length,
- TargetOptions options,
- std::string &errMsg, StringRef path,
- LLVMContext *Context) {
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createInContext(const void *mem, size_t length,
+ TargetOptions options, StringRef path,
+ LLVMContext *Context) {
StringRef Data((const char *)mem, length);
MemoryBufferRef Buffer(Data, path);
- return makeLTOModule(Buffer, options, errMsg, Context);
+ return makeLTOModule(Buffer, options, Context);
}
-static std::unique_ptr<Module> parseBitcodeFileImpl(MemoryBufferRef Buffer,
- LLVMContext &Context,
- bool ShouldBeLazy,
- std::string &ErrMsg) {
+static ErrorOr<std::unique_ptr<Module>>
+parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context,
+ bool ShouldBeLazy) {
// Find the buffer.
ErrorOr<MemoryBufferRef> MBOrErr =
IRObjectFile::findBitcodeInMemBuffer(Buffer);
- if (std::error_code EC = MBOrErr.getError()) {
- ErrMsg = EC.message();
- return nullptr;
- }
-
- std::function<void(const DiagnosticInfo &)> DiagnosticHandler =
- [&ErrMsg](const DiagnosticInfo &DI) {
- raw_string_ostream Stream(ErrMsg);
- DiagnosticPrinterRawOStream DP(Stream);
- DI.print(DP);
- };
+ if (std::error_code EC = MBOrErr.getError())
+ return EC;
if (!ShouldBeLazy) {
// Parse the full file.
- ErrorOr<std::unique_ptr<Module>> M =
- parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler);
- if (!M)
- return nullptr;
+ ErrorOr<std::unique_ptr<Module>> M = parseBitcodeFile(*MBOrErr, Context);
+ if (std::error_code EC = M.getError())
+ return EC;
return std::move(*M);
}
// Parse lazily.
std::unique_ptr<MemoryBuffer> LightweightBuf =
MemoryBuffer::getMemBuffer(*MBOrErr, false);
- ErrorOr<std::unique_ptr<Module>> M =
- getLazyBitcodeModule(std::move(LightweightBuf), Context,
- DiagnosticHandler, true /*ShouldLazyLoadMetadata*/);
- if (!M)
- return nullptr;
+ ErrorOr<std::unique_ptr<Module>> M = getLazyBitcodeModule(
+ std::move(LightweightBuf), Context, true /*ShouldLazyLoadMetadata*/);
+ if (std::error_code EC = M.getError())
+ return EC;
return std::move(*M);
}
-LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
- TargetOptions options, std::string &errMsg,
- LLVMContext *Context) {
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
+ LLVMContext *Context) {
std::unique_ptr<LLVMContext> OwnedContext;
if (!Context) {
OwnedContext = llvm::make_unique<LLVMContext>();
@@ -199,11 +190,12 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
// If we own a context, we know this is being used only for symbol
// extraction, not linking. Be lazy in that case.
- std::unique_ptr<Module> M = parseBitcodeFileImpl(
- Buffer, *Context,
- /* ShouldBeLazy */ static_cast<bool>(OwnedContext), errMsg);
- if (!M)
- return nullptr;
+ ErrorOr<std::unique_ptr<Module>> MOrErr =
+ parseBitcodeFileImpl(Buffer, *Context,
+ /* ShouldBeLazy */ static_cast<bool>(OwnedContext));
+ if (std::error_code EC = MOrErr.getError())
+ return EC;
+ std::unique_ptr<Module> &M = *MOrErr;
std::string TripleStr = M->getTargetTriple();
if (TripleStr.empty())
@@ -211,9 +203,10 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
llvm::Triple Triple(TripleStr);
// find machine architecture for this module
+ std::string errMsg;
const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
if (!march)
- return nullptr;
+ return std::unique_ptr<LTOModule>(nullptr);
// construct LTOModule, hand over ownership of module and target
SubtargetFeatures Features;
@@ -232,25 +225,21 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
options);
- M->setDataLayout(*target->getDataLayout());
+ M->setDataLayout(target->createDataLayout());
std::unique_ptr<object::IRObjectFile> IRObj(
new object::IRObjectFile(Buffer, std::move(M)));
- LTOModule *Ret;
+ std::unique_ptr<LTOModule> Ret;
if (OwnedContext)
- Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext));
+ Ret.reset(new LTOModule(std::move(IRObj), target, std::move(OwnedContext)));
else
- Ret = new LTOModule(std::move(IRObj), target);
-
- if (Ret->parseSymbols(errMsg)) {
- delete Ret;
- return nullptr;
- }
+ Ret.reset(new LTOModule(std::move(IRObj), target));
+ Ret->parseSymbols();
Ret->parseMetadata();
- return Ret;
+ return std::move(Ret);
}
/// Create a MemoryBuffer from a memory range with an optional name.
@@ -583,9 +572,7 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
info.symbol = decl;
}
-/// parseSymbols - Parse the symbols from the module and model-level ASM and add
-/// them to either the defined or undefined lists.
-bool LTOModule::parseSymbols(std::string &errMsg) {
+void LTOModule::parseSymbols() {
for (auto &Sym : IRFile->symbols()) {
const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
uint32_t Flags = Sym.getFlags();
@@ -640,8 +627,6 @@ bool LTOModule::parseSymbols(std::string &errMsg) {
NameAndAttributes info = u->getValue();
_symbols.push_back(info);
}
-
- return false;
}
/// parseMetadata - Parse metadata from the module
diff --git a/contrib/llvm/lib/LibDriver/LibDriver.cpp b/contrib/llvm/lib/LibDriver/LibDriver.cpp
index b33a22f..3ae5434 100644
--- a/contrib/llvm/lib/LibDriver/LibDriver.cpp
+++ b/contrib/llvm/lib/LibDriver/LibDriver.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// Defines an interface to a lib.exe-compatible driver that also understands
-// bitcode files. Used by llvm-lib and lld-link2 /lib.
+// bitcode files. Used by llvm-lib and lld-link /lib.
//
//===----------------------------------------------------------------------===//
@@ -51,7 +51,7 @@ static const llvm::opt::OptTable::Info infoTable[] = {
class LibOptTable : public llvm::opt::OptTable {
public:
- LibOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable), true) {}
+ LibOptTable() : OptTable(infoTable, true) {}
};
}
@@ -102,7 +102,7 @@ static Optional<std::string> findInputFile(StringRef File,
int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
SmallVector<const char *, 20> NewArgs(ArgsArr.begin(), ArgsArr.end());
BumpPtrAllocator Alloc;
- BumpPtrStringSaver Saver(Alloc);
+ StringSaver Saver(Alloc);
cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgs);
ArgsArr = NewArgs;
@@ -135,14 +135,13 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
llvm::errs() << Arg->getValue() << ": no such file or directory\n";
return 1;
}
- Members.emplace_back(Saver.save(*Path),
- llvm::sys::path::filename(Arg->getValue()));
+ Members.emplace_back(Saver.save(*Path));
}
std::pair<StringRef, std::error_code> Result =
llvm::writeArchive(getOutputPath(&Args, Members[0]), Members,
/*WriteSymtab=*/true, object::Archive::K_GNU,
- /*Deterministic*/ true);
+ /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin));
if (Result.second) {
if (Result.first.empty())
diff --git a/contrib/llvm/lib/LibDriver/Options.td b/contrib/llvm/lib/LibDriver/Options.td
index 0aa1aff..5a56ef7 100644
--- a/contrib/llvm/lib/LibDriver/Options.td
+++ b/contrib/llvm/lib/LibDriver/Options.td
@@ -12,6 +12,8 @@ class P<string name, string help> :
def libpath: P<"libpath", "Object file search path">;
def out : P<"out", "Path to file to write output">;
+def llvmlibthin : F<"llvmlibthin">;
+
//==============================================================================
// The flags below do nothing. They are defined only for lib.exe compatibility.
//==============================================================================
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
new file mode 100644
index 0000000..fa6e375
--- /dev/null
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -0,0 +1,1657 @@
+//===- lib/Linker/IRMover.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker/IRMover.h"
+#include "LinkDiagnosticInfo.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GVMaterializer.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TypeMapTy : public ValueMapTypeRemapper {
+ /// This is a mapping from a source type to a destination type to use.
+ DenseMap<Type *, Type *> MappedTypes;
+
+ /// When checking to see if two subgraphs are isomorphic, we speculatively
+ /// add types to MappedTypes, but keep track of them here in case we need to
+ /// roll back.
+ SmallVector<Type *, 16> SpeculativeTypes;
+
+ SmallVector<StructType *, 16> SpeculativeDstOpaqueTypes;
+
+ /// This is a list of non-opaque structs in the source module that are mapped
+ /// to an opaque struct in the destination module.
+ SmallVector<StructType *, 16> SrcDefinitionsToResolve;
+
+ /// This is the set of opaque types in the destination modules who are
+ /// getting a body from the source module.
+ SmallPtrSet<StructType *, 16> DstResolvedOpaqueTypes;
+
+public:
+ TypeMapTy(IRMover::IdentifiedStructTypeSet &DstStructTypesSet)
+ : DstStructTypesSet(DstStructTypesSet) {}
+
+ IRMover::IdentifiedStructTypeSet &DstStructTypesSet;
+ /// Indicate that the specified type in the destination module is conceptually
+ /// equivalent to the specified type in the source module.
+ void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+ /// Produce a body for an opaque type in the dest module from a type
+ /// definition in the source module.
+ void linkDefinedTypeBodies();
+
+ /// Return the mapped type to use for the specified input type from the
+ /// source module.
+ Type *get(Type *SrcTy);
+ Type *get(Type *SrcTy, SmallPtrSet<StructType *, 8> &Visited);
+
+ void finishType(StructType *DTy, StructType *STy, ArrayRef<Type *> ETypes);
+
+ FunctionType *get(FunctionType *T) {
+ return cast<FunctionType>(get((Type *)T));
+ }
+
+private:
+ Type *remapType(Type *SrcTy) override { return get(SrcTy); }
+
+ bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
+
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+ assert(SpeculativeTypes.empty());
+ assert(SpeculativeDstOpaqueTypes.empty());
+
+ // Check to see if these types are recursively isomorphic and establish a
+ // mapping between them if so.
+ if (!areTypesIsomorphic(DstTy, SrcTy)) {
+ // Oops, they aren't isomorphic. Just discard this request by rolling out
+ // any speculative mappings we've established.
+ for (Type *Ty : SpeculativeTypes)
+ MappedTypes.erase(Ty);
+
+ SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() -
+ SpeculativeDstOpaqueTypes.size());
+ for (StructType *Ty : SpeculativeDstOpaqueTypes)
+ DstResolvedOpaqueTypes.erase(Ty);
+ } else {
+ for (Type *Ty : SpeculativeTypes)
+ if (auto *STy = dyn_cast<StructType>(Ty))
+ if (STy->hasName())
+ STy->setName("");
+ }
+ SpeculativeTypes.clear();
+ SpeculativeDstOpaqueTypes.clear();
+}
+
+/// Recursively walk this pair of types, returning true if they are isomorphic,
+/// false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+ // Two types with differing kinds are clearly not isomorphic.
+ if (DstTy->getTypeID() != SrcTy->getTypeID())
+ return false;
+
+ // If we have an entry in the MappedTypes table, then we have our answer.
+ Type *&Entry = MappedTypes[SrcTy];
+ if (Entry)
+ return Entry == DstTy;
+
+ // Two identical types are clearly isomorphic. Remember this
+ // non-speculatively.
+ if (DstTy == SrcTy) {
+ Entry = DstTy;
+ return true;
+ }
+
+ // Okay, we have two types with identical kinds that we haven't seen before.
+
+ // If this is an opaque struct type, special case it.
+ if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+ // Mapping an opaque type to any struct, just keep the dest struct.
+ if (SSTy->isOpaque()) {
+ Entry = DstTy;
+ SpeculativeTypes.push_back(SrcTy);
+ return true;
+ }
+
+ // Mapping a non-opaque source type to an opaque dest. If this is the first
+ // type that we're mapping onto this destination type then we succeed. Keep
+ // the dest, but fill it in later. If this is the second (different) type
+ // that we're trying to map onto the same opaque type then we fail.
+ if (cast<StructType>(DstTy)->isOpaque()) {
+ // We can only map one source type onto the opaque destination type.
+ if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)).second)
+ return false;
+ SrcDefinitionsToResolve.push_back(SSTy);
+ SpeculativeTypes.push_back(SrcTy);
+ SpeculativeDstOpaqueTypes.push_back(cast<StructType>(DstTy));
+ Entry = DstTy;
+ return true;
+ }
+ }
+
+ // If the number of subtypes disagree between the two types, then we fail.
+ if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+ return false;
+
+ // Fail if any of the extra properties (e.g. array size) of the type disagree.
+ if (isa<IntegerType>(DstTy))
+ return false; // bitwidth disagrees.
+ if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+ if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+ return false;
+
+ } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+ if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+ return false;
+ } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+ StructType *SSTy = cast<StructType>(SrcTy);
+ if (DSTy->isLiteral() != SSTy->isLiteral() ||
+ DSTy->isPacked() != SSTy->isPacked())
+ return false;
+ } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+ if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+ return false;
+ } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+ if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
+ return false;
+ }
+
+ // Otherwise, we speculate that these two types will line up and recursively
+ // check the subelements.
+ Entry = DstTy;
+ SpeculativeTypes.push_back(SrcTy);
+
+ for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I)
+ if (!areTypesIsomorphic(DstTy->getContainedType(I),
+ SrcTy->getContainedType(I)))
+ return false;
+
+ // If everything seems to have lined up, then everything is great.
+ return true;
+}
+
+void TypeMapTy::linkDefinedTypeBodies() {
+ SmallVector<Type *, 16> Elements;
+ for (StructType *SrcSTy : SrcDefinitionsToResolve) {
+ StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+ assert(DstSTy->isOpaque());
+
+ // Map the body of the source type over to a new body for the dest type.
+ Elements.resize(SrcSTy->getNumElements());
+ for (unsigned I = 0, E = Elements.size(); I != E; ++I)
+ Elements[I] = get(SrcSTy->getElementType(I));
+
+ DstSTy->setBody(Elements, SrcSTy->isPacked());
+ DstStructTypesSet.switchToNonOpaque(DstSTy);
+ }
+ SrcDefinitionsToResolve.clear();
+ DstResolvedOpaqueTypes.clear();
+}
+
+void TypeMapTy::finishType(StructType *DTy, StructType *STy,
+ ArrayRef<Type *> ETypes) {
+ DTy->setBody(ETypes, STy->isPacked());
+
+ // Steal STy's name.
+ if (STy->hasName()) {
+ SmallString<16> TmpName = STy->getName();
+ STy->setName("");
+ DTy->setName(TmpName);
+ }
+
+ DstStructTypesSet.addNonOpaque(DTy);
+}
+
+Type *TypeMapTy::get(Type *Ty) {
+ SmallPtrSet<StructType *, 8> Visited;
+ return get(Ty, Visited);
+}
+
+Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
+ // If we already have an entry for this type, return it.
+ Type **Entry = &MappedTypes[Ty];
+ if (*Entry)
+ return *Entry;
+
+ // These are types that LLVM itself will unique.
+ bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
+
+#ifndef NDEBUG
+ if (!IsUniqued) {
+ for (auto &Pair : MappedTypes) {
+ assert(!(Pair.first != Ty && Pair.second == Ty) &&
+ "mapping to a source type");
+ }
+ }
+#endif
+
+ if (!IsUniqued && !Visited.insert(cast<StructType>(Ty)).second) {
+ StructType *DTy = StructType::create(Ty->getContext());
+ return *Entry = DTy;
+ }
+
+ // If this is not a recursive type, then just map all of the elements and
+ // then rebuild the type from inside out.
+ SmallVector<Type *, 4> ElementTypes;
+
+ // If there are no element types to map, then the type is itself. This is
+ // true for the anonymous {} struct, things like 'float', integers, etc.
+ if (Ty->getNumContainedTypes() == 0 && IsUniqued)
+ return *Entry = Ty;
+
+ // Remap all of the elements, keeping track of whether any of them change.
+ bool AnyChange = false;
+ ElementTypes.resize(Ty->getNumContainedTypes());
+ for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
+ ElementTypes[I] = get(Ty->getContainedType(I), Visited);
+ AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
+ }
+
+ // If we found our type while recursively processing stuff, just use it.
+ Entry = &MappedTypes[Ty];
+ if (*Entry) {
+ if (auto *DTy = dyn_cast<StructType>(*Entry)) {
+ if (DTy->isOpaque()) {
+ auto *STy = cast<StructType>(Ty);
+ finishType(DTy, STy, ElementTypes);
+ }
+ }
+ return *Entry;
+ }
+
+ // If all of the element types mapped directly over and the type is not
+ // a nomed struct, then the type is usable as-is.
+ if (!AnyChange && IsUniqued)
+ return *Entry = Ty;
+
+ // Otherwise, rebuild a modified type.
+ switch (Ty->getTypeID()) {
+ default:
+ llvm_unreachable("unknown derived type to remap");
+ case Type::ArrayTyID:
+ return *Entry = ArrayType::get(ElementTypes[0],
+ cast<ArrayType>(Ty)->getNumElements());
+ case Type::VectorTyID:
+ return *Entry = VectorType::get(ElementTypes[0],
+ cast<VectorType>(Ty)->getNumElements());
+ case Type::PointerTyID:
+ return *Entry = PointerType::get(ElementTypes[0],
+ cast<PointerType>(Ty)->getAddressSpace());
+ case Type::FunctionTyID:
+ return *Entry = FunctionType::get(ElementTypes[0],
+ makeArrayRef(ElementTypes).slice(1),
+ cast<FunctionType>(Ty)->isVarArg());
+ case Type::StructTyID: {
+ auto *STy = cast<StructType>(Ty);
+ bool IsPacked = STy->isPacked();
+ if (IsUniqued)
+ return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
+
+ // If the type is opaque, we can just use it directly.
+ if (STy->isOpaque()) {
+ DstStructTypesSet.addOpaque(STy);
+ return *Entry = Ty;
+ }
+
+ if (StructType *OldT =
+ DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) {
+ STy->setName("");
+ return *Entry = OldT;
+ }
+
+ if (!AnyChange) {
+ DstStructTypesSet.addNonOpaque(STy);
+ return *Entry = Ty;
+ }
+
+ StructType *DTy = StructType::create(Ty->getContext());
+ finishType(DTy, STy, ElementTypes);
+ return *Entry = DTy;
+ }
+ }
+}
+
+LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity,
+ const Twine &Msg)
+ : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {}
+void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
+
+//===----------------------------------------------------------------------===//
+// IRLinker implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class IRLinker;
+
+/// Creates prototypes for functions that are lazily linked on the fly. This
+/// speeds up linking for modules with many/ lazily linked functions of which
+/// few get used.
+class GlobalValueMaterializer final : public ValueMaterializer {
+ IRLinker *TheIRLinker;
+
+public:
+ GlobalValueMaterializer(IRLinker *TheIRLinker) : TheIRLinker(TheIRLinker) {}
+ Value *materializeDeclFor(Value *V) override;
+ void materializeInitFor(GlobalValue *New, GlobalValue *Old) override;
+ Metadata *mapTemporaryMetadata(Metadata *MD) override;
+ void replaceTemporaryMetadata(const Metadata *OrigMD,
+ Metadata *NewMD) override;
+ bool isMetadataNeeded(Metadata *MD) override;
+};
+
+class LocalValueMaterializer final : public ValueMaterializer {
+ IRLinker *TheIRLinker;
+
+public:
+ LocalValueMaterializer(IRLinker *TheIRLinker) : TheIRLinker(TheIRLinker) {}
+ Value *materializeDeclFor(Value *V) override;
+ void materializeInitFor(GlobalValue *New, GlobalValue *Old) override;
+ Metadata *mapTemporaryMetadata(Metadata *MD) override;
+ void replaceTemporaryMetadata(const Metadata *OrigMD,
+ Metadata *NewMD) override;
+ bool isMetadataNeeded(Metadata *MD) override;
+};
+
+/// This is responsible for keeping track of the state used for moving data
+/// from SrcM to DstM.
+class IRLinker {
+ Module &DstM;
+ Module &SrcM;
+
+ std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor;
+
+ TypeMapTy TypeMap;
+ GlobalValueMaterializer GValMaterializer;
+ LocalValueMaterializer LValMaterializer;
+
+ /// Mapping of values from what they used to be in Src, to what they are now
+ /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead
+ /// due to the use of Value handles which the Linker doesn't actually need,
+ /// but this allows us to reuse the ValueMapper code.
+ ValueToValueMapTy ValueMap;
+ ValueToValueMapTy AliasValueMap;
+
+ DenseSet<GlobalValue *> ValuesToLink;
+ std::vector<GlobalValue *> Worklist;
+
+ void maybeAdd(GlobalValue *GV) {
+ if (ValuesToLink.insert(GV).second)
+ Worklist.push_back(GV);
+ }
+
+ /// Set to true when all global value body linking is complete (including
+ /// lazy linking). Used to prevent metadata linking from creating new
+ /// references.
+ bool DoneLinkingBodies = false;
+
+ bool HasError = false;
+
+ /// Flag indicating that we are just linking metadata (after function
+ /// importing).
+ bool IsMetadataLinkingPostpass;
+
+ /// Flags to pass to value mapper invocations.
+ RemapFlags ValueMapperFlags = RF_MoveDistinctMDs;
+
+ /// Association between metadata values created during bitcode parsing and
+ /// the value id. Used to correlate temporary metadata created during
+ /// function importing with the final metadata parsed during the subsequent
+ /// metadata linking postpass.
+ DenseMap<const Metadata *, unsigned> MetadataToIDs;
+
+ /// Association between metadata value id and temporary metadata that
+ /// remains unmapped after function importing. Saved during function
+ /// importing and consumed during the metadata linking postpass.
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap;
+
+ /// Set of subprogram metadata that does not need to be linked into the
+ /// destination module, because the functions were not imported directly
+ /// or via an inlined body in an imported function.
+ SmallPtrSet<const Metadata *, 16> UnneededSubprograms;
+
+ /// Handles cloning of a global values from the source module into
+ /// the destination module, including setting the attributes and visibility.
+ GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, bool ForDefinition);
+
+ /// Helper method for setting a message and returning an error code.
+ bool emitError(const Twine &Message) {
+ SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
+ HasError = true;
+ return true;
+ }
+
+ void emitWarning(const Twine &Message) {
+ SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Warning, Message));
+ }
+
+ /// Check whether we should be linking metadata from the source module.
+ bool shouldLinkMetadata() {
+ // ValIDToTempMDMap will be non-null when we are importing or otherwise want
+ // to link metadata lazily, and then when linking the metadata.
+ // We only want to return true for the former case.
+ return ValIDToTempMDMap == nullptr || IsMetadataLinkingPostpass;
+ }
+
+ /// Given a global in the source module, return the global in the
+ /// destination module that is being linked to, if any.
+ GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
+ // If the source has no name it can't link. If it has local linkage,
+ // there is no name match-up going on.
+ if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+ return nullptr;
+
+ // Otherwise see if we have a match in the destination module's symtab.
+ GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
+ if (!DGV)
+ return nullptr;
+
+ // If we found a global with the same name in the dest module, but it has
+ // internal linkage, we are really not doing any linkage here.
+ if (DGV->hasLocalLinkage())
+ return nullptr;
+
+ // Otherwise, we do in fact link to the destination global.
+ return DGV;
+ }
+
+ void computeTypeMapping();
+
+ Constant *linkAppendingVarProto(GlobalVariable *DstGV,
+ const GlobalVariable *SrcGV);
+
+ bool shouldLink(GlobalValue *DGV, GlobalValue &SGV);
+ Constant *linkGlobalValueProto(GlobalValue *GV, bool ForAlias);
+
+ bool linkModuleFlagsMetadata();
+
+ void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src);
+ bool linkFunctionBody(Function &Dst, Function &Src);
+ void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
+ bool linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
+
+ /// Functions that take care of cloning a specific global value type
+ /// into the destination module.
+ GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
+ Function *copyFunctionProto(const Function *SF);
+ GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA);
+
+ void linkNamedMDNodes();
+
+ /// Populate the UnneededSubprograms set with the DISubprogram metadata
+ /// from the source module that we don't need to link into the dest module,
+ /// because the functions were not imported directly or via an inlined body
+ /// in an imported function.
+ void findNeededSubprograms(ValueToValueMapTy &ValueMap);
+
+ /// The value mapper leaves nulls in the list of subprograms for any
+ /// in the UnneededSubprograms map. Strip those out after metadata linking.
+ void stripNullSubprograms();
+
+public:
+ IRLinker(Module &DstM, IRMover::IdentifiedStructTypeSet &Set, Module &SrcM,
+ ArrayRef<GlobalValue *> ValuesToLink,
+ std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr,
+ bool IsMetadataLinkingPostpass = false)
+ : DstM(DstM), SrcM(SrcM), AddLazyFor(AddLazyFor), TypeMap(Set),
+ GValMaterializer(this), LValMaterializer(this),
+ IsMetadataLinkingPostpass(IsMetadataLinkingPostpass),
+ ValIDToTempMDMap(ValIDToTempMDMap) {
+ for (GlobalValue *GV : ValuesToLink)
+ maybeAdd(GV);
+
+ // If appropriate, tell the value mapper that it can expect to see
+ // temporary metadata.
+ if (!shouldLinkMetadata())
+ ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata;
+ }
+
+ bool run();
+ Value *materializeDeclFor(Value *V, bool ForAlias);
+ void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias);
+
+ /// Save the mapping between the given temporary metadata and its metadata
+ /// value id. Used to support metadata linking as a postpass for function
+ /// importing.
+ Metadata *mapTemporaryMetadata(Metadata *MD);
+
+ /// Replace any temporary metadata saved for the source metadata's id with
+ /// the new non-temporary metadata. Used when metadata linking as a postpass
+ /// for function importing.
+ void replaceTemporaryMetadata(const Metadata *OrigMD, Metadata *NewMD);
+
+ /// Indicates whether we need to map the given metadata into the destination
+ /// module. Used to prevent linking of metadata only needed by functions not
+ /// linked into the dest module.
+ bool isMetadataNeeded(Metadata *MD);
+};
+}
+
+/// The LLVM SymbolTable class autorenames globals that conflict in the symbol
+/// table. This is good for all clients except for us. Go through the trouble
+/// to force this back.
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+ // If the global doesn't force its name or if it already has the right name,
+ // there is nothing for us to do.
+ if (GV->hasLocalLinkage() || GV->getName() == Name)
+ return;
+
+ Module *M = GV->getParent();
+
+ // If there is a conflict, rename the conflict.
+ if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
+ GV->takeName(ConflictGV);
+ ConflictGV->setName(Name); // This will cause ConflictGV to get renamed
+ assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
+ } else {
+ GV->setName(Name); // Force the name back
+ }
+}
+
+Value *GlobalValueMaterializer::materializeDeclFor(Value *V) {
+ return TheIRLinker->materializeDeclFor(V, false);
+}
+
+void GlobalValueMaterializer::materializeInitFor(GlobalValue *New,
+ GlobalValue *Old) {
+ TheIRLinker->materializeInitFor(New, Old, false);
+}
+
+Metadata *GlobalValueMaterializer::mapTemporaryMetadata(Metadata *MD) {
+ return TheIRLinker->mapTemporaryMetadata(MD);
+}
+
+void GlobalValueMaterializer::replaceTemporaryMetadata(const Metadata *OrigMD,
+ Metadata *NewMD) {
+ TheIRLinker->replaceTemporaryMetadata(OrigMD, NewMD);
+}
+
+bool GlobalValueMaterializer::isMetadataNeeded(Metadata *MD) {
+ return TheIRLinker->isMetadataNeeded(MD);
+}
+
+Value *LocalValueMaterializer::materializeDeclFor(Value *V) {
+ return TheIRLinker->materializeDeclFor(V, true);
+}
+
+void LocalValueMaterializer::materializeInitFor(GlobalValue *New,
+ GlobalValue *Old) {
+ TheIRLinker->materializeInitFor(New, Old, true);
+}
+
+Metadata *LocalValueMaterializer::mapTemporaryMetadata(Metadata *MD) {
+ return TheIRLinker->mapTemporaryMetadata(MD);
+}
+
+void LocalValueMaterializer::replaceTemporaryMetadata(const Metadata *OrigMD,
+ Metadata *NewMD) {
+ TheIRLinker->replaceTemporaryMetadata(OrigMD, NewMD);
+}
+
+bool LocalValueMaterializer::isMetadataNeeded(Metadata *MD) {
+ return TheIRLinker->isMetadataNeeded(MD);
+}
+
+Value *IRLinker::materializeDeclFor(Value *V, bool ForAlias) {
+ auto *SGV = dyn_cast<GlobalValue>(V);
+ if (!SGV)
+ return nullptr;
+
+ return linkGlobalValueProto(SGV, ForAlias);
+}
+
+void IRLinker::materializeInitFor(GlobalValue *New, GlobalValue *Old,
+ bool ForAlias) {
+ // If we already created the body, just return.
+ if (auto *F = dyn_cast<Function>(New)) {
+ if (!F->isDeclaration())
+ return;
+ } else if (auto *V = dyn_cast<GlobalVariable>(New)) {
+ if (V->hasInitializer())
+ return;
+ } else {
+ auto *A = cast<GlobalAlias>(New);
+ if (A->getAliasee())
+ return;
+ }
+
+ if (ForAlias || shouldLink(New, *Old))
+ linkGlobalValueBody(*New, *Old);
+}
+
+Metadata *IRLinker::mapTemporaryMetadata(Metadata *MD) {
+ if (!ValIDToTempMDMap)
+ return nullptr;
+ // If this temporary metadata has a value id recorded during function
+ // parsing, record that in the ValIDToTempMDMap if one was provided.
+ if (MetadataToIDs.count(MD)) {
+ unsigned Idx = MetadataToIDs[MD];
+ // Check if we created a temp MD when importing a different function from
+ // this module. If so, reuse it the same temporary metadata, otherwise
+ // add this temporary metadata to the map.
+ if (!ValIDToTempMDMap->count(Idx)) {
+ MDNode *Node = cast<MDNode>(MD);
+ assert(Node->isTemporary());
+ (*ValIDToTempMDMap)[Idx] = Node;
+ }
+ return (*ValIDToTempMDMap)[Idx];
+ }
+ return nullptr;
+}
+
+void IRLinker::replaceTemporaryMetadata(const Metadata *OrigMD,
+ Metadata *NewMD) {
+ if (!ValIDToTempMDMap)
+ return;
+#ifndef NDEBUG
+ auto *N = dyn_cast_or_null<MDNode>(NewMD);
+ assert(!N || !N->isTemporary());
+#endif
+ // If a mapping between metadata value ids and temporary metadata
+ // created during function importing was provided, and the source
+ // metadata has a value id recorded during metadata parsing, replace
+ // the temporary metadata with the final mapped metadata now.
+ if (MetadataToIDs.count(OrigMD)) {
+ unsigned Idx = MetadataToIDs[OrigMD];
+ // Nothing to do if we didn't need to create a temporary metadata during
+ // function importing.
+ if (!ValIDToTempMDMap->count(Idx))
+ return;
+ MDNode *TempMD = (*ValIDToTempMDMap)[Idx];
+ TempMD->replaceAllUsesWith(NewMD);
+ MDNode::deleteTemporary(TempMD);
+ ValIDToTempMDMap->erase(Idx);
+ }
+}
+
+bool IRLinker::isMetadataNeeded(Metadata *MD) {
+ // Currently only DISubprogram metadata is marked as being unneeded.
+ if (UnneededSubprograms.empty())
+ return true;
+ MDNode *Node = dyn_cast<MDNode>(MD);
+ if (!Node)
+ return true;
+ DISubprogram *SP = getDISubprogram(Node);
+ if (!SP)
+ return true;
+ return !UnneededSubprograms.count(SP);
+}
+
+/// Loop through the global variables in the src module and merge them into the
+/// dest module.
+GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
+ // No linking to be performed or linking from the source: simply create an
+ // identical version of the symbol over in the dest module... the
+ // initializer will be filled in later by LinkGlobalInits.
+ GlobalVariable *NewDGV =
+ new GlobalVariable(DstM, TypeMap.get(SGVar->getType()->getElementType()),
+ SGVar->isConstant(), GlobalValue::ExternalLinkage,
+ /*init*/ nullptr, SGVar->getName(),
+ /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
+ SGVar->getType()->getAddressSpace());
+ NewDGV->setAlignment(SGVar->getAlignment());
+ return NewDGV;
+}
+
+/// Link the function in the source module into the destination module if
+/// needed, setting up mapping information.
+Function *IRLinker::copyFunctionProto(const Function *SF) {
+ // If there is no linkage to be performed or we are linking from the source,
+ // bring SF over.
+ return Function::Create(TypeMap.get(SF->getFunctionType()),
+ GlobalValue::ExternalLinkage, SF->getName(), &DstM);
+}
+
+/// Set up prototypes for any aliases that come over from the source module.
+GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) {
+ // If there is no linkage to be performed or we're linking from the source,
+ // bring over SGA.
+ auto *Ty = TypeMap.get(SGA->getValueType());
+ return GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(),
+ GlobalValue::ExternalLinkage, SGA->getName(),
+ &DstM);
+}
+
+GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
+ bool ForDefinition) {
+ GlobalValue *NewGV;
+ if (auto *SGVar = dyn_cast<GlobalVariable>(SGV)) {
+ NewGV = copyGlobalVariableProto(SGVar);
+ } else if (auto *SF = dyn_cast<Function>(SGV)) {
+ NewGV = copyFunctionProto(SF);
+ } else {
+ if (ForDefinition)
+ NewGV = copyGlobalAliasProto(cast<GlobalAlias>(SGV));
+ else
+ NewGV = new GlobalVariable(
+ DstM, TypeMap.get(SGV->getType()->getElementType()),
+ /*isConstant*/ false, GlobalValue::ExternalLinkage,
+ /*init*/ nullptr, SGV->getName(),
+ /*insertbefore*/ nullptr, SGV->getThreadLocalMode(),
+ SGV->getType()->getAddressSpace());
+ }
+
+ if (ForDefinition)
+ NewGV->setLinkage(SGV->getLinkage());
+ else if (SGV->hasExternalWeakLinkage() || SGV->hasWeakLinkage() ||
+ SGV->hasLinkOnceLinkage())
+ NewGV->setLinkage(GlobalValue::ExternalWeakLinkage);
+
+ NewGV->copyAttributesFrom(SGV);
+ return NewGV;
+}
+
+/// Loop over all of the linked values to compute type mappings. For example,
+/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct
+/// types 'Foo' but one got renamed when the module was loaded into the same
+/// LLVMContext.
+void IRLinker::computeTypeMapping() {
+ for (GlobalValue &SGV : SrcM.globals()) {
+ GlobalValue *DGV = getLinkedToGlobal(&SGV);
+ if (!DGV)
+ continue;
+
+ if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) {
+ TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+ continue;
+ }
+
+ // Unify the element type of appending arrays.
+ ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+ ArrayType *SAT = cast<ArrayType>(SGV.getType()->getElementType());
+ TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+ }
+
+ for (GlobalValue &SGV : SrcM)
+ if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
+ TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+
+ for (GlobalValue &SGV : SrcM.aliases())
+ if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
+ TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+
+ // Incorporate types by name, scanning all the types in the source module.
+ // At this point, the destination module may have a type "%foo = { i32 }" for
+ // example. When the source module got loaded into the same LLVMContext, if
+ // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
+ std::vector<StructType *> Types = SrcM.getIdentifiedStructTypes();
+ for (StructType *ST : Types) {
+ if (!ST->hasName())
+ continue;
+
+ // Check to see if there is a dot in the name followed by a digit.
+ size_t DotPos = ST->getName().rfind('.');
+ if (DotPos == 0 || DotPos == StringRef::npos ||
+ ST->getName().back() == '.' ||
+ !isdigit(static_cast<unsigned char>(ST->getName()[DotPos + 1])))
+ continue;
+
+ // Check to see if the destination module has a struct with the prefix name.
+ StructType *DST = DstM.getTypeByName(ST->getName().substr(0, DotPos));
+ if (!DST)
+ continue;
+
+ // Don't use it if this actually came from the source module. They're in
+ // the same LLVMContext after all. Also don't use it unless the type is
+ // actually used in the destination module. This can happen in situations
+ // like this:
+ //
+ // Module A Module B
+ // -------- --------
+ // %Z = type { %A } %B = type { %C.1 }
+ // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* }
+ // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] }
+ // %C = type { i8* } %B.3 = type { %C.1 }
+ //
+ // When we link Module B with Module A, the '%B' in Module B is
+ // used. However, that would then use '%C.1'. But when we process '%C.1',
+ // we prefer to take the '%C' version. So we are then left with both
+ // '%C.1' and '%C' being used for the same types. This leads to some
+ // variables using one type and some using the other.
+ if (TypeMap.DstStructTypesSet.hasType(DST))
+ TypeMap.addTypeMapping(DST, ST);
+ }
+
+ // Now that we have discovered all of the type equivalences, get a body for
+ // any 'opaque' types in the dest module that are now resolved.
+ TypeMap.linkDefinedTypeBodies();
+}
+
+static void getArrayElements(const Constant *C,
+ SmallVectorImpl<Constant *> &Dest) {
+ unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
+
+ for (unsigned i = 0; i != NumElements; ++i)
+ Dest.push_back(C->getAggregateElement(i));
+}
+
+/// If there were any appending global variables, link them together now.
+/// Return true on error.
+Constant *IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+ const GlobalVariable *SrcGV) {
+ Type *EltTy = cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()))
+ ->getElementType();
+
+ StringRef Name = SrcGV->getName();
+ bool IsNewStructor = false;
+ bool IsOldStructor = false;
+ if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") {
+ if (cast<StructType>(EltTy)->getNumElements() == 3)
+ IsNewStructor = true;
+ else
+ IsOldStructor = true;
+ }
+
+ PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo();
+ if (IsOldStructor) {
+ auto &ST = *cast<StructType>(EltTy);
+ Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
+ EltTy = StructType::get(SrcGV->getContext(), Tys, false);
+ }
+
+ if (DstGV) {
+ ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+
+ if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) {
+ emitError(
+ "Linking globals named '" + SrcGV->getName() +
+ "': can only link appending global with another appending global!");
+ return nullptr;
+ }
+
+ // Check to see that they two arrays agree on type.
+ if (EltTy != DstTy->getElementType()) {
+ emitError("Appending variables with different element types!");
+ return nullptr;
+ }
+ if (DstGV->isConstant() != SrcGV->isConstant()) {
+ emitError("Appending variables linked with different const'ness!");
+ return nullptr;
+ }
+
+ if (DstGV->getAlignment() != SrcGV->getAlignment()) {
+ emitError(
+ "Appending variables with different alignment need to be linked!");
+ return nullptr;
+ }
+
+ if (DstGV->getVisibility() != SrcGV->getVisibility()) {
+ emitError(
+ "Appending variables with different visibility need to be linked!");
+ return nullptr;
+ }
+
+ if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) {
+ emitError(
+ "Appending variables with different unnamed_addr need to be linked!");
+ return nullptr;
+ }
+
+ if (StringRef(DstGV->getSection()) != SrcGV->getSection()) {
+ emitError(
+ "Appending variables with different section name need to be linked!");
+ return nullptr;
+ }
+ }
+
+ SmallVector<Constant *, 16> DstElements;
+ if (DstGV)
+ getArrayElements(DstGV->getInitializer(), DstElements);
+
+ SmallVector<Constant *, 16> SrcElements;
+ getArrayElements(SrcGV->getInitializer(), SrcElements);
+
+ if (IsNewStructor)
+ SrcElements.erase(
+ std::remove_if(SrcElements.begin(), SrcElements.end(),
+ [this](Constant *E) {
+ auto *Key = dyn_cast<GlobalValue>(
+ E->getAggregateElement(2)->stripPointerCasts());
+ if (!Key)
+ return false;
+ GlobalValue *DGV = getLinkedToGlobal(Key);
+ return !shouldLink(DGV, *Key);
+ }),
+ SrcElements.end());
+ uint64_t NewSize = DstElements.size() + SrcElements.size();
+ ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+
+ // Create the new global variable.
+ GlobalVariable *NG = new GlobalVariable(
+ DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(),
+ /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(),
+ SrcGV->getType()->getAddressSpace());
+
+ NG->copyAttributesFrom(SrcGV);
+ forceRenaming(NG, SrcGV->getName());
+
+ Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+ // Stop recursion.
+ ValueMap[SrcGV] = Ret;
+
+ for (auto *V : SrcElements) {
+ Constant *NewV;
+ if (IsOldStructor) {
+ auto *S = cast<ConstantStruct>(V);
+ auto *E1 = MapValue(S->getOperand(0), ValueMap, ValueMapperFlags,
+ &TypeMap, &GValMaterializer);
+ auto *E2 = MapValue(S->getOperand(1), ValueMap, ValueMapperFlags,
+ &TypeMap, &GValMaterializer);
+ Value *Null = Constant::getNullValue(VoidPtrTy);
+ NewV =
+ ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr);
+ } else {
+ NewV =
+ MapValue(V, ValueMap, ValueMapperFlags, &TypeMap, &GValMaterializer);
+ }
+ DstElements.push_back(NewV);
+ }
+
+ NG->setInitializer(ConstantArray::get(NewType, DstElements));
+
+ // Replace any uses of the two global variables with uses of the new
+ // global.
+ if (DstGV) {
+ DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+ DstGV->eraseFromParent();
+ }
+
+ return Ret;
+}
+
+static bool useExistingDest(GlobalValue &SGV, GlobalValue *DGV,
+ bool ShouldLink) {
+ if (!DGV)
+ return false;
+
+ if (SGV.isDeclaration())
+ return true;
+
+ if (DGV->isDeclarationForLinker() && !SGV.isDeclarationForLinker())
+ return false;
+
+ if (ShouldLink)
+ return false;
+
+ return true;
+}
+
+bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) {
+ // Already imported all the values. Just map to the Dest value
+ // in case it is referenced in the metadata.
+ if (IsMetadataLinkingPostpass) {
+ assert(!ValuesToLink.count(&SGV) &&
+ "Source value unexpectedly requested for link during metadata link");
+ return false;
+ }
+
+ if (ValuesToLink.count(&SGV))
+ return true;
+
+ if (SGV.hasLocalLinkage())
+ return true;
+
+ if (DGV && !DGV->isDeclaration())
+ return false;
+
+ if (SGV.hasAvailableExternallyLinkage())
+ return true;
+
+ if (DoneLinkingBodies)
+ return false;
+
+ AddLazyFor(SGV, [this](GlobalValue &GV) { maybeAdd(&GV); });
+ return ValuesToLink.count(&SGV);
+}
+
+Constant *IRLinker::linkGlobalValueProto(GlobalValue *SGV, bool ForAlias) {
+ GlobalValue *DGV = getLinkedToGlobal(SGV);
+
+ bool ShouldLink = shouldLink(DGV, *SGV);
+
+ // just missing from map
+ if (ShouldLink) {
+ auto I = ValueMap.find(SGV);
+ if (I != ValueMap.end())
+ return cast<Constant>(I->second);
+
+ I = AliasValueMap.find(SGV);
+ if (I != AliasValueMap.end())
+ return cast<Constant>(I->second);
+ }
+
+ DGV = nullptr;
+ if (ShouldLink || !ForAlias)
+ DGV = getLinkedToGlobal(SGV);
+
+ // Handle the ultra special appending linkage case first.
+ assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage());
+ if (SGV->hasAppendingLinkage())
+ return linkAppendingVarProto(cast_or_null<GlobalVariable>(DGV),
+ cast<GlobalVariable>(SGV));
+
+ GlobalValue *NewGV;
+ if (useExistingDest(*SGV, DGV, ShouldLink)) {
+ NewGV = DGV;
+ } else {
+ // If we are done linking global value bodies (i.e. we are performing
+ // metadata linking), don't link in the global value due to this
+ // reference, simply map it to null.
+ if (DoneLinkingBodies)
+ return nullptr;
+
+ NewGV = copyGlobalValueProto(SGV, ShouldLink);
+ if (!ForAlias)
+ forceRenaming(NewGV, SGV->getName());
+ }
+ if (ShouldLink || ForAlias) {
+ if (const Comdat *SC = SGV->getComdat()) {
+ if (auto *GO = dyn_cast<GlobalObject>(NewGV)) {
+ Comdat *DC = DstM.getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SC->getSelectionKind());
+ GO->setComdat(DC);
+ }
+ }
+ }
+
+ if (!ShouldLink && ForAlias)
+ NewGV->setLinkage(GlobalValue::InternalLinkage);
+
+ Constant *C = NewGV;
+ if (DGV)
+ C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
+
+ if (DGV && NewGV != DGV) {
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+
+ return C;
+}
+
+/// Update the initializers in the Dest module now that all globals that may be
+/// referenced are in Dest.
+void IRLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) {
+ // Figure out what the initializer looks like in the dest module.
+ Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, ValueMapperFlags,
+ &TypeMap, &GValMaterializer));
+}
+
+/// Copy the source function over into the dest function and fix up references
+/// to values. At this point we know that Dest is an external function, and
+/// that Src is not.
+bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
+ assert(Dst.isDeclaration() && !Src.isDeclaration());
+
+ // Materialize if needed.
+ if (std::error_code EC = Src.materialize())
+ return emitError(EC.message());
+
+ if (!shouldLinkMetadata())
+ // This is only supported for lazy links. Do after materialization of
+ // a function and before remapping metadata on instructions below
+ // in RemapInstruction, as the saved mapping is used to handle
+ // the temporary metadata hanging off instructions.
+ SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true);
+
+ // Link in the prefix data.
+ if (Src.hasPrefixData())
+ Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, ValueMapperFlags,
+ &TypeMap, &GValMaterializer));
+
+ // Link in the prologue data.
+ if (Src.hasPrologueData())
+ Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap,
+ ValueMapperFlags, &TypeMap,
+ &GValMaterializer));
+
+ // Link in the personality function.
+ if (Src.hasPersonalityFn())
+ Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap,
+ ValueMapperFlags, &TypeMap,
+ &GValMaterializer));
+
+ // Go through and convert function arguments over, remembering the mapping.
+ Function::arg_iterator DI = Dst.arg_begin();
+ for (Argument &Arg : Src.args()) {
+ DI->setName(Arg.getName()); // Copy the name over.
+
+ // Add a mapping to our mapping.
+ ValueMap[&Arg] = &*DI;
+ ++DI;
+ }
+
+ // Copy over the metadata attachments.
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+ Src.getAllMetadata(MDs);
+ for (const auto &I : MDs)
+ Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, ValueMapperFlags,
+ &TypeMap, &GValMaterializer));
+
+ // Splice the body of the source function into the dest function.
+ Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList());
+
+ // At this point, all of the instructions and values of the function are now
+ // copied over. The only problem is that they are still referencing values in
+ // the Source function as operands. Loop through all of the operands of the
+ // functions and patch them up to point to the local versions.
+ for (BasicBlock &BB : Dst)
+ for (Instruction &I : BB)
+ RemapInstruction(&I, ValueMap, RF_IgnoreMissingEntries | ValueMapperFlags,
+ &TypeMap, &GValMaterializer);
+
+ // There is no need to map the arguments anymore.
+ for (Argument &Arg : Src.args())
+ ValueMap.erase(&Arg);
+
+ return false;
+}
+
+void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) {
+ Constant *Aliasee = Src.getAliasee();
+ Constant *Val = MapValue(Aliasee, AliasValueMap, ValueMapperFlags, &TypeMap,
+ &LValMaterializer);
+ Dst.setAliasee(Val);
+}
+
+bool IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
+ if (auto *F = dyn_cast<Function>(&Src))
+ return linkFunctionBody(cast<Function>(Dst), *F);
+ if (auto *GVar = dyn_cast<GlobalVariable>(&Src)) {
+ linkGlobalInit(cast<GlobalVariable>(Dst), *GVar);
+ return false;
+ }
+ linkAliasBody(cast<GlobalAlias>(Dst), cast<GlobalAlias>(Src));
+ return false;
+}
+
+void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
+ // Track unneeded nodes to make it simpler to handle the case
+ // where we are checking if an already-mapped SP is needed.
+ NamedMDNode *CompileUnits = SrcM.getNamedMetadata("llvm.dbg.cu");
+ if (!CompileUnits)
+ return;
+ for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
+ auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
+ assert(CU && "Expected valid compile unit");
+ for (auto *Op : CU->getSubprograms()) {
+ // Unless we were doing function importing and deferred metadata linking,
+ // any needed SPs should have been mapped as they would be reached
+ // from the function linked in (either on the function itself for linked
+ // function bodies, or from DILocation on inlined instructions).
+ assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) &&
+ "DISubprogram shouldn't be mapped yet");
+ if (!ValueMap.MD()[Op])
+ UnneededSubprograms.insert(Op);
+ }
+ }
+ if (!IsMetadataLinkingPostpass)
+ return;
+ // In the case of metadata linking as a postpass (e.g. for function
+ // importing), see which DISubprogram MD from the source has an associated
+ // temporary metadata node, which means the SP was needed by an imported
+ // function.
+ for (auto MDI : MetadataToIDs) {
+ const MDNode *Node = dyn_cast<MDNode>(MDI.first);
+ if (!Node)
+ continue;
+ DISubprogram *SP = getDISubprogram(Node);
+ if (!SP || !ValIDToTempMDMap->count(MDI.second))
+ continue;
+ UnneededSubprograms.erase(SP);
+ }
+}
+
+// Squash null subprograms from compile unit subprogram lists.
+void IRLinker::stripNullSubprograms() {
+ NamedMDNode *CompileUnits = DstM.getNamedMetadata("llvm.dbg.cu");
+ if (!CompileUnits)
+ return;
+ for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
+ auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
+ assert(CU && "Expected valid compile unit");
+
+ SmallVector<Metadata *, 16> NewSPs;
+ NewSPs.reserve(CU->getSubprograms().size());
+ bool FoundNull = false;
+ for (DISubprogram *SP : CU->getSubprograms()) {
+ if (!SP) {
+ FoundNull = true;
+ continue;
+ }
+ NewSPs.push_back(SP);
+ }
+ if (FoundNull)
+ CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
+ }
+}
+
+/// Insert all of the named MDNodes in Src into the Dest module.
+void IRLinker::linkNamedMDNodes() {
+ findNeededSubprograms(ValueMap);
+ const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata();
+ for (const NamedMDNode &NMD : SrcM.named_metadata()) {
+ // Don't link module flags here. Do them separately.
+ if (&NMD == SrcModFlags)
+ continue;
+ NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName());
+ // Add Src elements into Dest node.
+ for (const MDNode *op : NMD.operands())
+ DestNMD->addOperand(MapMetadata(
+ op, ValueMap, ValueMapperFlags | RF_NullMapMissingGlobalValues,
+ &TypeMap, &GValMaterializer));
+ }
+ stripNullSubprograms();
+}
+
+/// Merge the linker flags in Src into the Dest module.
+bool IRLinker::linkModuleFlagsMetadata() {
+ // If the source module has no module flags, we are done.
+ const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata();
+ if (!SrcModFlags)
+ return false;
+
+ // If the destination module doesn't have module flags yet, then just copy
+ // over the source module's flags.
+ NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata();
+ if (DstModFlags->getNumOperands() == 0) {
+ for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
+ DstModFlags->addOperand(SrcModFlags->getOperand(I));
+
+ return false;
+ }
+
+ // First build a map of the existing module flags and requirements.
+ DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
+ SmallSetVector<MDNode *, 16> Requirements;
+ for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *Op = DstModFlags->getOperand(I);
+ ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
+ MDString *ID = cast<MDString>(Op->getOperand(1));
+
+ if (Behavior->getZExtValue() == Module::Require) {
+ Requirements.insert(cast<MDNode>(Op->getOperand(2)));
+ } else {
+ Flags[ID] = std::make_pair(Op, I);
+ }
+ }
+
+ // Merge in the flags from the source module, and also collect its set of
+ // requirements.
+ for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *SrcOp = SrcModFlags->getOperand(I);
+ ConstantInt *SrcBehavior =
+ mdconst::extract<ConstantInt>(SrcOp->getOperand(0));
+ MDString *ID = cast<MDString>(SrcOp->getOperand(1));
+ MDNode *DstOp;
+ unsigned DstIndex;
+ std::tie(DstOp, DstIndex) = Flags.lookup(ID);
+ unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
+
+ // If this is a requirement, add it and continue.
+ if (SrcBehaviorValue == Module::Require) {
+ // If the destination module does not already have this requirement, add
+ // it.
+ if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
+ DstModFlags->addOperand(SrcOp);
+ }
+ continue;
+ }
+
+ // If there is no existing flag with this ID, just add it.
+ if (!DstOp) {
+ Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
+ DstModFlags->addOperand(SrcOp);
+ continue;
+ }
+
+ // Otherwise, perform a merge.
+ ConstantInt *DstBehavior =
+ mdconst::extract<ConstantInt>(DstOp->getOperand(0));
+ unsigned DstBehaviorValue = DstBehavior->getZExtValue();
+
+ // If either flag has override behavior, handle it first.
+ if (DstBehaviorValue == Module::Override) {
+ // Diagnose inconsistent flags which both have override behavior.
+ if (SrcBehaviorValue == Module::Override &&
+ SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting override values");
+ }
+ continue;
+ } else if (SrcBehaviorValue == Module::Override) {
+ // Update the destination flag to that of the source.
+ DstModFlags->setOperand(DstIndex, SrcOp);
+ Flags[ID].first = SrcOp;
+ continue;
+ }
+
+ // Diagnose inconsistent merge behavior types.
+ if (SrcBehaviorValue != DstBehaviorValue) {
+ emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting behaviors");
+ continue;
+ }
+
+ auto replaceDstValue = [&](MDNode *New) {
+ Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
+ MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
+ DstModFlags->setOperand(DstIndex, Flag);
+ Flags[ID].first = Flag;
+ };
+
+ // Perform the merge for standard behavior types.
+ switch (SrcBehaviorValue) {
+ case Module::Require:
+ case Module::Override:
+ llvm_unreachable("not possible");
+ case Module::Error: {
+ // Emit an error if the values differ.
+ if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ emitError("linking module flags '" + ID->getString() +
+ "': IDs have conflicting values");
+ }
+ continue;
+ }
+ case Module::Warning: {
+ // Emit a warning if the values differ.
+ if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+ emitWarning("linking module flags '" + ID->getString() +
+ "': IDs have conflicting values");
+ }
+ continue;
+ }
+ case Module::Append: {
+ MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+ MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+ SmallVector<Metadata *, 8> MDs;
+ MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
+ MDs.append(DstValue->op_begin(), DstValue->op_end());
+ MDs.append(SrcValue->op_begin(), SrcValue->op_end());
+
+ replaceDstValue(MDNode::get(DstM.getContext(), MDs));
+ break;
+ }
+ case Module::AppendUnique: {
+ SmallSetVector<Metadata *, 16> Elts;
+ MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+ MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+ Elts.insert(DstValue->op_begin(), DstValue->op_end());
+ Elts.insert(SrcValue->op_begin(), SrcValue->op_end());
+
+ replaceDstValue(MDNode::get(DstM.getContext(),
+ makeArrayRef(Elts.begin(), Elts.end())));
+ break;
+ }
+ }
+ }
+
+ // Check all of the requirements.
+ for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
+ MDNode *Requirement = Requirements[I];
+ MDString *Flag = cast<MDString>(Requirement->getOperand(0));
+ Metadata *ReqValue = Requirement->getOperand(1);
+
+ MDNode *Op = Flags[Flag].first;
+ if (!Op || Op->getOperand(2) != ReqValue) {
+ emitError("linking module flags '" + Flag->getString() +
+ "': does not have the required value");
+ continue;
+ }
+ }
+
+ return HasError;
+}
+
+// This function returns true if the triples match.
+static bool triplesMatch(const Triple &T0, const Triple &T1) {
+ // If vendor is apple, ignore the version number.
+ if (T0.getVendor() == Triple::Apple)
+ return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() &&
+ T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS();
+
+ return T0 == T1;
+}
+
+// This function returns the merged triple.
+static std::string mergeTriples(const Triple &SrcTriple,
+ const Triple &DstTriple) {
+ // If vendor is apple, pick the triple with the larger version number.
+ if (SrcTriple.getVendor() == Triple::Apple)
+ if (DstTriple.isOSVersionLT(SrcTriple))
+ return SrcTriple.str();
+
+ return DstTriple.str();
+}
+
+bool IRLinker::run() {
+ // Inherit the target data from the source module if the destination module
+ // doesn't have one already.
+ if (DstM.getDataLayout().isDefault())
+ DstM.setDataLayout(SrcM.getDataLayout());
+
+ if (SrcM.getDataLayout() != DstM.getDataLayout()) {
+ emitWarning("Linking two modules of different data layouts: '" +
+ SrcM.getModuleIdentifier() + "' is '" +
+ SrcM.getDataLayoutStr() + "' whereas '" +
+ DstM.getModuleIdentifier() + "' is '" +
+ DstM.getDataLayoutStr() + "'\n");
+ }
+
+ // Copy the target triple from the source to dest if the dest's is empty.
+ if (DstM.getTargetTriple().empty() && !SrcM.getTargetTriple().empty())
+ DstM.setTargetTriple(SrcM.getTargetTriple());
+
+ Triple SrcTriple(SrcM.getTargetTriple()), DstTriple(DstM.getTargetTriple());
+
+ if (!SrcM.getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple))
+ emitWarning("Linking two modules of different target triples: " +
+ SrcM.getModuleIdentifier() + "' is '" + SrcM.getTargetTriple() +
+ "' whereas '" + DstM.getModuleIdentifier() + "' is '" +
+ DstM.getTargetTriple() + "'\n");
+
+ DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple));
+
+ // Append the module inline asm string.
+ if (!SrcM.getModuleInlineAsm().empty()) {
+ if (DstM.getModuleInlineAsm().empty())
+ DstM.setModuleInlineAsm(SrcM.getModuleInlineAsm());
+ else
+ DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" +
+ SrcM.getModuleInlineAsm());
+ }
+
+ // Loop over all of the linked values to compute type mappings.
+ computeTypeMapping();
+
+ std::reverse(Worklist.begin(), Worklist.end());
+ while (!Worklist.empty()) {
+ GlobalValue *GV = Worklist.back();
+ Worklist.pop_back();
+
+ // Already mapped.
+ if (ValueMap.find(GV) != ValueMap.end() ||
+ AliasValueMap.find(GV) != AliasValueMap.end())
+ continue;
+
+ assert(!GV->isDeclaration());
+ MapValue(GV, ValueMap, ValueMapperFlags, &TypeMap, &GValMaterializer);
+ if (HasError)
+ return true;
+ }
+
+ // Note that we are done linking global value bodies. This prevents
+ // metadata linking from creating new references.
+ DoneLinkingBodies = true;
+
+ // Remap all of the named MDNodes in Src into the DstM module. We do this
+ // after linking GlobalValues so that MDNodes that reference GlobalValues
+ // are properly remapped.
+ if (shouldLinkMetadata()) {
+ // Even if just linking metadata we should link decls above in case
+ // any are referenced by metadata. IRLinker::shouldLink ensures that
+ // we don't actually link anything from source.
+ if (IsMetadataLinkingPostpass) {
+ // Ensure metadata materialized
+ if (SrcM.getMaterializer()->materializeMetadata())
+ return true;
+ SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false);
+ }
+
+ linkNamedMDNodes();
+
+ if (IsMetadataLinkingPostpass) {
+ // Handle anything left in the ValIDToTempMDMap, such as metadata nodes
+ // not reached by the dbg.cu NamedMD (i.e. only reached from
+ // instructions).
+ // Walk the MetadataToIDs once to find the set of new (imported) MD
+ // that still has corresponding temporary metadata, and invoke metadata
+ // mapping on each one.
+ for (auto MDI : MetadataToIDs) {
+ if (!ValIDToTempMDMap->count(MDI.second))
+ continue;
+ MapMetadata(MDI.first, ValueMap, ValueMapperFlags, &TypeMap,
+ &GValMaterializer);
+ }
+ assert(ValIDToTempMDMap->empty());
+ }
+
+ // Merge the module flags into the DstM module.
+ if (linkModuleFlagsMetadata())
+ return true;
+ }
+
+ return false;
+}
+
+IRMover::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef<Type *> E, bool P)
+ : ETypes(E), IsPacked(P) {}
+
+IRMover::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST)
+ : ETypes(ST->elements()), IsPacked(ST->isPacked()) {}
+
+bool IRMover::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const {
+ if (IsPacked != That.IsPacked)
+ return false;
+ if (ETypes != That.ETypes)
+ return false;
+ return true;
+}
+
+bool IRMover::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const {
+ return !this->operator==(That);
+}
+
+StructType *IRMover::StructTypeKeyInfo::getEmptyKey() {
+ return DenseMapInfo<StructType *>::getEmptyKey();
+}
+
+StructType *IRMover::StructTypeKeyInfo::getTombstoneKey() {
+ return DenseMapInfo<StructType *>::getTombstoneKey();
+}
+
+unsigned IRMover::StructTypeKeyInfo::getHashValue(const KeyTy &Key) {
+ return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()),
+ Key.IsPacked);
+}
+
+unsigned IRMover::StructTypeKeyInfo::getHashValue(const StructType *ST) {
+ return getHashValue(KeyTy(ST));
+}
+
+bool IRMover::StructTypeKeyInfo::isEqual(const KeyTy &LHS,
+ const StructType *RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ return LHS == KeyTy(RHS);
+}
+
+bool IRMover::StructTypeKeyInfo::isEqual(const StructType *LHS,
+ const StructType *RHS) {
+ if (RHS == getEmptyKey())
+ return LHS == getEmptyKey();
+
+ if (RHS == getTombstoneKey())
+ return LHS == getTombstoneKey();
+
+ return KeyTy(LHS) == KeyTy(RHS);
+}
+
+void IRMover::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
+ assert(!Ty->isOpaque());
+ NonOpaqueStructTypes.insert(Ty);
+}
+
+void IRMover::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) {
+ assert(!Ty->isOpaque());
+ NonOpaqueStructTypes.insert(Ty);
+ bool Removed = OpaqueStructTypes.erase(Ty);
+ (void)Removed;
+ assert(Removed);
+}
+
+void IRMover::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
+ assert(Ty->isOpaque());
+ OpaqueStructTypes.insert(Ty);
+}
+
+StructType *
+IRMover::IdentifiedStructTypeSet::findNonOpaque(ArrayRef<Type *> ETypes,
+ bool IsPacked) {
+ IRMover::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked);
+ auto I = NonOpaqueStructTypes.find_as(Key);
+ if (I == NonOpaqueStructTypes.end())
+ return nullptr;
+ return *I;
+}
+
+bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) {
+ if (Ty->isOpaque())
+ return OpaqueStructTypes.count(Ty);
+ auto I = NonOpaqueStructTypes.find(Ty);
+ if (I == NonOpaqueStructTypes.end())
+ return false;
+ return *I == Ty;
+}
+
+IRMover::IRMover(Module &M) : Composite(M) {
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (StructType *Ty : StructTypes) {
+ if (Ty->isOpaque())
+ IdentifiedStructTypes.addOpaque(Ty);
+ else
+ IdentifiedStructTypes.addNonOpaque(Ty);
+ }
+}
+
+bool IRMover::move(
+ Module &Src, ArrayRef<GlobalValue *> ValuesToLink,
+ std::function<void(GlobalValue &, ValueAdder Add)> AddLazyFor,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap,
+ bool IsMetadataLinkingPostpass) {
+ IRLinker TheIRLinker(Composite, IdentifiedStructTypes, Src, ValuesToLink,
+ AddLazyFor, ValIDToTempMDMap, IsMetadataLinkingPostpass);
+ bool RetCode = TheIRLinker.run();
+ Composite.dropTriviallyDeadConstantArrays();
+ return RetCode;
+}
diff --git a/contrib/llvm/lib/Linker/LinkDiagnosticInfo.h b/contrib/llvm/lib/Linker/LinkDiagnosticInfo.h
new file mode 100644
index 0000000..d91f19c
--- /dev/null
+++ b/contrib/llvm/lib/Linker/LinkDiagnosticInfo.h
@@ -0,0 +1,25 @@
+//===- LinkDiagnosticInfo.h -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H
+#define LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+
+namespace llvm {
+class LinkDiagnosticInfo : public DiagnosticInfo {
+ const Twine &Msg;
+
+public:
+ LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg);
+ void print(DiagnosticPrinter &DP) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index f090680..9de3be4 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -12,447 +12,72 @@
//===----------------------------------------------------------------------===//
#include "llvm/Linker/Linker.h"
+#include "LinkDiagnosticInfo.h"
#include "llvm-c/Linker.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeFinder.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include <cctype>
-#include <tuple>
using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// TypeMap implementation.
-//===----------------------------------------------------------------------===//
-
namespace {
-class TypeMapTy : public ValueMapTypeRemapper {
- /// This is a mapping from a source type to a destination type to use.
- DenseMap<Type*, Type*> MappedTypes;
-
- /// When checking to see if two subgraphs are isomorphic, we speculatively
- /// add types to MappedTypes, but keep track of them here in case we need to
- /// roll back.
- SmallVector<Type*, 16> SpeculativeTypes;
-
- SmallVector<StructType*, 16> SpeculativeDstOpaqueTypes;
-
- /// This is a list of non-opaque structs in the source module that are mapped
- /// to an opaque struct in the destination module.
- SmallVector<StructType*, 16> SrcDefinitionsToResolve;
-
- /// This is the set of opaque types in the destination modules who are
- /// getting a body from the source module.
- SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
-
-public:
- TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet)
- : DstStructTypesSet(DstStructTypesSet) {}
-
- Linker::IdentifiedStructTypeSet &DstStructTypesSet;
- /// Indicate that the specified type in the destination module is conceptually
- /// equivalent to the specified type in the source module.
- void addTypeMapping(Type *DstTy, Type *SrcTy);
-
- /// Produce a body for an opaque type in the dest module from a type
- /// definition in the source module.
- void linkDefinedTypeBodies();
-
- /// Return the mapped type to use for the specified input type from the
- /// source module.
- Type *get(Type *SrcTy);
- Type *get(Type *SrcTy, SmallPtrSet<StructType *, 8> &Visited);
-
- void finishType(StructType *DTy, StructType *STy, ArrayRef<Type *> ETypes);
-
- FunctionType *get(FunctionType *T) {
- return cast<FunctionType>(get((Type *)T));
- }
-
- /// Dump out the type map for debugging purposes.
- void dump() const {
- for (auto &Pair : MappedTypes) {
- dbgs() << "TypeMap: ";
- Pair.first->print(dbgs());
- dbgs() << " => ";
- Pair.second->print(dbgs());
- dbgs() << '\n';
- }
- }
-
-private:
- Type *remapType(Type *SrcTy) override { return get(SrcTy); }
-
- bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
-};
-}
-
-void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
- assert(SpeculativeTypes.empty());
- assert(SpeculativeDstOpaqueTypes.empty());
-
- // Check to see if these types are recursively isomorphic and establish a
- // mapping between them if so.
- if (!areTypesIsomorphic(DstTy, SrcTy)) {
- // Oops, they aren't isomorphic. Just discard this request by rolling out
- // any speculative mappings we've established.
- for (Type *Ty : SpeculativeTypes)
- MappedTypes.erase(Ty);
-
- SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() -
- SpeculativeDstOpaqueTypes.size());
- for (StructType *Ty : SpeculativeDstOpaqueTypes)
- DstResolvedOpaqueTypes.erase(Ty);
- } else {
- for (Type *Ty : SpeculativeTypes)
- if (auto *STy = dyn_cast<StructType>(Ty))
- if (STy->hasName())
- STy->setName("");
- }
- SpeculativeTypes.clear();
- SpeculativeDstOpaqueTypes.clear();
-}
-
-/// Recursively walk this pair of types, returning true if they are isomorphic,
-/// false if they are not.
-bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
- // Two types with differing kinds are clearly not isomorphic.
- if (DstTy->getTypeID() != SrcTy->getTypeID())
- return false;
-
- // If we have an entry in the MappedTypes table, then we have our answer.
- Type *&Entry = MappedTypes[SrcTy];
- if (Entry)
- return Entry == DstTy;
-
- // Two identical types are clearly isomorphic. Remember this
- // non-speculatively.
- if (DstTy == SrcTy) {
- Entry = DstTy;
- return true;
- }
-
- // Okay, we have two types with identical kinds that we haven't seen before.
-
- // If this is an opaque struct type, special case it.
- if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
- // Mapping an opaque type to any struct, just keep the dest struct.
- if (SSTy->isOpaque()) {
- Entry = DstTy;
- SpeculativeTypes.push_back(SrcTy);
- return true;
- }
-
- // Mapping a non-opaque source type to an opaque dest. If this is the first
- // type that we're mapping onto this destination type then we succeed. Keep
- // the dest, but fill it in later. If this is the second (different) type
- // that we're trying to map onto the same opaque type then we fail.
- if (cast<StructType>(DstTy)->isOpaque()) {
- // We can only map one source type onto the opaque destination type.
- if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)).second)
- return false;
- SrcDefinitionsToResolve.push_back(SSTy);
- SpeculativeTypes.push_back(SrcTy);
- SpeculativeDstOpaqueTypes.push_back(cast<StructType>(DstTy));
- Entry = DstTy;
- return true;
- }
- }
-
- // If the number of subtypes disagree between the two types, then we fail.
- if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
- return false;
-
- // Fail if any of the extra properties (e.g. array size) of the type disagree.
- if (isa<IntegerType>(DstTy))
- return false; // bitwidth disagrees.
- if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
- if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
- return false;
-
- } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
- if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
- return false;
- } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
- StructType *SSTy = cast<StructType>(SrcTy);
- if (DSTy->isLiteral() != SSTy->isLiteral() ||
- DSTy->isPacked() != SSTy->isPacked())
- return false;
- } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
- if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
- return false;
- } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
- if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
- return false;
- }
-
- // Otherwise, we speculate that these two types will line up and recursively
- // check the subelements.
- Entry = DstTy;
- SpeculativeTypes.push_back(SrcTy);
-
- for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I)
- if (!areTypesIsomorphic(DstTy->getContainedType(I),
- SrcTy->getContainedType(I)))
- return false;
-
- // If everything seems to have lined up, then everything is great.
- return true;
-}
-
-void TypeMapTy::linkDefinedTypeBodies() {
- SmallVector<Type*, 16> Elements;
- for (StructType *SrcSTy : SrcDefinitionsToResolve) {
- StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
- assert(DstSTy->isOpaque());
-
- // Map the body of the source type over to a new body for the dest type.
- Elements.resize(SrcSTy->getNumElements());
- for (unsigned I = 0, E = Elements.size(); I != E; ++I)
- Elements[I] = get(SrcSTy->getElementType(I));
-
- DstSTy->setBody(Elements, SrcSTy->isPacked());
- DstStructTypesSet.switchToNonOpaque(DstSTy);
- }
- SrcDefinitionsToResolve.clear();
- DstResolvedOpaqueTypes.clear();
-}
-
-void TypeMapTy::finishType(StructType *DTy, StructType *STy,
- ArrayRef<Type *> ETypes) {
- DTy->setBody(ETypes, STy->isPacked());
-
- // Steal STy's name.
- if (STy->hasName()) {
- SmallString<16> TmpName = STy->getName();
- STy->setName("");
- DTy->setName(TmpName);
- }
-
- DstStructTypesSet.addNonOpaque(DTy);
-}
-
-Type *TypeMapTy::get(Type *Ty) {
- SmallPtrSet<StructType *, 8> Visited;
- return get(Ty, Visited);
-}
-
-Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
- // If we already have an entry for this type, return it.
- Type **Entry = &MappedTypes[Ty];
- if (*Entry)
- return *Entry;
-
- // These are types that LLVM itself will unique.
- bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
-
-#ifndef NDEBUG
- if (!IsUniqued) {
- for (auto &Pair : MappedTypes) {
- assert(!(Pair.first != Ty && Pair.second == Ty) &&
- "mapping to a source type");
- }
- }
-#endif
-
- if (!IsUniqued && !Visited.insert(cast<StructType>(Ty)).second) {
- StructType *DTy = StructType::create(Ty->getContext());
- return *Entry = DTy;
- }
-
- // If this is not a recursive type, then just map all of the elements and
- // then rebuild the type from inside out.
- SmallVector<Type *, 4> ElementTypes;
-
- // If there are no element types to map, then the type is itself. This is
- // true for the anonymous {} struct, things like 'float', integers, etc.
- if (Ty->getNumContainedTypes() == 0 && IsUniqued)
- return *Entry = Ty;
-
- // Remap all of the elements, keeping track of whether any of them change.
- bool AnyChange = false;
- ElementTypes.resize(Ty->getNumContainedTypes());
- for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
- ElementTypes[I] = get(Ty->getContainedType(I), Visited);
- AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
- }
-
- // If we found our type while recursively processing stuff, just use it.
- Entry = &MappedTypes[Ty];
- if (*Entry) {
- if (auto *DTy = dyn_cast<StructType>(*Entry)) {
- if (DTy->isOpaque()) {
- auto *STy = cast<StructType>(Ty);
- finishType(DTy, STy, ElementTypes);
- }
- }
- return *Entry;
- }
-
- // If all of the element types mapped directly over and the type is not
- // a nomed struct, then the type is usable as-is.
- if (!AnyChange && IsUniqued)
- return *Entry = Ty;
-
- // Otherwise, rebuild a modified type.
- switch (Ty->getTypeID()) {
- default:
- llvm_unreachable("unknown derived type to remap");
- case Type::ArrayTyID:
- return *Entry = ArrayType::get(ElementTypes[0],
- cast<ArrayType>(Ty)->getNumElements());
- case Type::VectorTyID:
- return *Entry = VectorType::get(ElementTypes[0],
- cast<VectorType>(Ty)->getNumElements());
- case Type::PointerTyID:
- return *Entry = PointerType::get(ElementTypes[0],
- cast<PointerType>(Ty)->getAddressSpace());
- case Type::FunctionTyID:
- return *Entry = FunctionType::get(ElementTypes[0],
- makeArrayRef(ElementTypes).slice(1),
- cast<FunctionType>(Ty)->isVarArg());
- case Type::StructTyID: {
- auto *STy = cast<StructType>(Ty);
- bool IsPacked = STy->isPacked();
- if (IsUniqued)
- return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
-
- // If the type is opaque, we can just use it directly.
- if (STy->isOpaque()) {
- DstStructTypesSet.addOpaque(STy);
- return *Entry = Ty;
- }
-
- if (StructType *OldT =
- DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) {
- STy->setName("");
- return *Entry = OldT;
- }
-
- if (!AnyChange) {
- DstStructTypesSet.addNonOpaque(STy);
- return *Entry = Ty;
- }
-
- StructType *DTy = StructType::create(Ty->getContext());
- finishType(DTy, STy, ElementTypes);
- return *Entry = DTy;
- }
- }
-}
-
-//===----------------------------------------------------------------------===//
-// ModuleLinker implementation.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ModuleLinker;
-
-/// Creates prototypes for functions that are lazily linked on the fly. This
-/// speeds up linking for modules with many/ lazily linked functions of which
-/// few get used.
-class ValueMaterializerTy : public ValueMaterializer {
- TypeMapTy &TypeMap;
- Module *DstM;
- std::vector<GlobalValue *> &LazilyLinkGlobalValues;
-
-public:
- ValueMaterializerTy(TypeMapTy &TypeMap, Module *DstM,
- std::vector<GlobalValue *> &LazilyLinkGlobalValues)
- : ValueMaterializer(), TypeMap(TypeMap), DstM(DstM),
- LazilyLinkGlobalValues(LazilyLinkGlobalValues) {}
-
- Value *materializeValueFor(Value *V) override;
-};
-
-class LinkDiagnosticInfo : public DiagnosticInfo {
- const Twine &Msg;
-
-public:
- LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg);
- void print(DiagnosticPrinter &DP) const override;
-};
-LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity,
- const Twine &Msg)
- : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {}
-void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
/// This is an implementation class for the LinkModules function, which is the
/// entrypoint for this file.
class ModuleLinker {
- Module *DstM, *SrcM;
-
- TypeMapTy TypeMap;
- ValueMaterializerTy ValMaterializer;
+ IRMover &Mover;
+ Module &SrcM;
- /// Mapping of values from what they used to be in Src, to what they are now
- /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead
- /// due to the use of Value handles which the Linker doesn't actually need,
- /// but this allows us to reuse the ValueMapper code.
- ValueToValueMapTy ValueMap;
+ SetVector<GlobalValue *> ValuesToLink;
+ StringSet<> Internalize;
- struct AppendingVarInfo {
- GlobalVariable *NewGV; // New aggregate global in dest module.
- const Constant *DstInit; // Old initializer from dest module.
- const Constant *SrcInit; // Old initializer from src module.
- };
-
- std::vector<AppendingVarInfo> AppendingVars;
+ /// For symbol clashes, prefer those from Src.
+ unsigned Flags;
- // Set of items not to link in from source.
- SmallPtrSet<const Value *, 16> DoNotLinkFromSource;
+ /// Function index passed into ModuleLinker for using in function
+ /// importing/exporting handling.
+ const FunctionInfoIndex *ImportIndex;
- // Vector of GlobalValues to lazily link in.
- std::vector<GlobalValue *> LazilyLinkGlobalValues;
+ /// Functions to import from source module, all other functions are
+ /// imported as declarations instead of definitions.
+ DenseSet<const GlobalValue *> *FunctionsToImport;
- /// Functions that have replaced other functions.
- SmallPtrSet<const Function *, 16> OverridingFunctions;
+ /// Set to true if the given FunctionInfoIndex contains any functions
+ /// from this source module, in which case we must conservatively assume
+ /// that any of its functions may be imported into another module
+ /// as part of a different backend compilation process.
+ bool HasExportedFunctions = false;
- DiagnosticHandlerFunction DiagnosticHandler;
+ /// Association between metadata value id and temporary metadata that
+ /// remains unmapped after function importing. Saved during function
+ /// importing and consumed during the metadata linking postpass.
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap;
- /// For symbol clashes, prefer those from Src.
- bool OverrideFromSrc;
+ /// Used as the callback for lazy linking.
+ /// The mover has just hit GV and we have to decide if it, and other members
+ /// of the same comdat, should be linked. Every member to be linked is passed
+ /// to Add.
+ void addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add);
-public:
- ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM,
- DiagnosticHandlerFunction DiagnosticHandler,
- bool OverrideFromSrc)
- : DstM(dstM), SrcM(srcM), TypeMap(Set),
- ValMaterializer(TypeMap, DstM, LazilyLinkGlobalValues),
- DiagnosticHandler(DiagnosticHandler), OverrideFromSrc(OverrideFromSrc) {
+ bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
+ bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
+ bool shouldInternalizeLinkedSymbols() {
+ return Flags & Linker::InternalizeLinkedSymbols;
}
- bool run();
+ /// Check if we should promote the given local value to global scope.
+ bool doPromoteLocalToGlobal(const GlobalValue *SGV);
-private:
bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
const GlobalValue &Src);
- /// Helper method for setting a message and returning an error code.
+ /// Should we have mover and linker error diag info?
bool emitError(const Twine &Message) {
- DiagnosticHandler(LinkDiagnosticInfo(DS_Error, Message));
+ SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
return true;
}
- void emitWarning(const Twine &Message) {
- DiagnosticHandler(LinkDiagnosticInfo(DS_Warning, Message));
- }
-
- bool getComdatLeader(Module *M, StringRef ComdatName,
+ bool getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar);
bool computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
@@ -463,17 +88,20 @@ private:
ComdatsChosen;
bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
bool &LinkFromSrc);
+ // Keep track of the global value members of each comdat in source.
+ DenseMap<const Comdat *, std::vector<GlobalValue *>> ComdatMembers;
/// Given a global in the source module, return the global in the
/// destination module that is being linked to, if any.
GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
+ Module &DstM = Mover.getModule();
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
- if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+ if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV)))
return nullptr;
// Otherwise see if we have a match in the destination module's symtab.
- GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+ GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV));
if (!DGV)
return nullptr;
@@ -486,139 +114,237 @@ private:
return DGV;
}
- void computeTypeMapping();
-
- void upgradeMismatchedGlobalArray(StringRef Name);
- void upgradeMismatchedGlobals();
-
- bool linkAppendingVarProto(GlobalVariable *DstGV,
- const GlobalVariable *SrcGV);
+ bool linkIfNeeded(GlobalValue &GV);
- bool linkGlobalValueProto(GlobalValue *GV);
- bool linkModuleFlagsMetadata();
+ /// Helper methods to check if we are importing from or potentially
+ /// exporting from the current source module.
+ bool isPerformingImport() const { return FunctionsToImport != nullptr; }
+ bool isModuleExporting() const { return HasExportedFunctions; }
- void linkAppendingVarInit(const AppendingVarInfo &AVI);
+ /// If we are importing from the source module, checks if we should
+ /// import SGV as a definition, otherwise import as a declaration.
+ bool doImportAsDefinition(const GlobalValue *SGV);
- void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src);
- bool linkFunctionBody(Function &Dst, Function &Src);
- void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
- bool linkGlobalValueBody(GlobalValue &Src);
+ /// Get the name for SGV that should be used in the linked destination
+ /// module. Specifically, this handles the case where we need to rename
+ /// a local that is being promoted to global scope.
+ std::string getName(const GlobalValue *SGV);
- void linkNamedMDNodes();
- void stripReplacedSubprograms();
-};
-}
-
-/// The LLVM SymbolTable class autorenames globals that conflict in the symbol
-/// table. This is good for all clients except for us. Go through the trouble
-/// to force this back.
-static void forceRenaming(GlobalValue *GV, StringRef Name) {
- // If the global doesn't force its name or if it already has the right name,
- // there is nothing for us to do.
- if (GV->hasLocalLinkage() || GV->getName() == Name)
- return;
+ /// Process globals so that they can be used in ThinLTO. This includes
+ /// promoting local variables so that they can be reference externally by
+ /// thin lto imported globals and converting strong external globals to
+ /// available_externally.
+ void processGlobalsForThinLTO();
+ void processGlobalForThinLTO(GlobalValue &GV);
- Module *M = GV->getParent();
+ /// Get the new linkage for SGV that should be used in the linked destination
+ /// module. Specifically, for ThinLTO importing or exporting it may need
+ /// to be adjusted.
+ GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV);
- // If there is a conflict, rename the conflict.
- if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
- GV->takeName(ConflictGV);
- ConflictGV->setName(Name); // This will cause ConflictGV to get renamed
- assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
- } else {
- GV->setName(Name); // Force the name back
+public:
+ ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
+ const FunctionInfoIndex *Index = nullptr,
+ DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
+ : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
+ FunctionsToImport(FunctionsToImport),
+ ValIDToTempMDMap(ValIDToTempMDMap) {
+ assert((ImportIndex || !FunctionsToImport) &&
+ "Expect a FunctionInfoIndex when importing");
+ // If we have a FunctionInfoIndex but no function to import,
+ // then this is the primary module being compiled in a ThinLTO
+ // backend compilation, and we need to see if it has functions that
+ // may be exported to another backend compilation.
+ if (ImportIndex && !FunctionsToImport)
+ HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
+ assert((ValIDToTempMDMap || !FunctionsToImport) &&
+ "Function importing must provide a ValIDToTempMDMap");
}
-}
-/// copy additional attributes (those not needed to construct a GlobalValue)
-/// from the SrcGV to the DestGV.
-static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
- DestGV->copyAttributesFrom(SrcGV);
- forceRenaming(DestGV, SrcGV->getName());
+ bool run();
+};
}
-static bool isLessConstraining(GlobalValue::VisibilityTypes a,
- GlobalValue::VisibilityTypes b) {
- if (a == GlobalValue::HiddenVisibility)
+bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
+ if (!isPerformingImport())
return false;
- if (b == GlobalValue::HiddenVisibility)
+ auto *GA = dyn_cast<GlobalAlias>(SGV);
+ if (GA) {
+ if (GA->hasWeakAnyLinkage())
+ return false;
+ const GlobalObject *GO = GA->getBaseObject();
+ if (!GO->hasLinkOnceODRLinkage())
+ return false;
+ return doImportAsDefinition(GO);
+ }
+ // Always import GlobalVariable definitions, except for the special
+ // case of WeakAny which are imported as ExternalWeak declarations
+ // (see comments in ModuleLinker::getLinkage). The linkage changes
+ // described in ModuleLinker::getLinkage ensure the correct behavior (e.g.
+ // global variables with external linkage are transformed to
+ // available_externally definitions, which are ultimately turned into
+ // declarations after the EliminateAvailableExternally pass).
+ if (isa<GlobalVariable>(SGV) && !SGV->isDeclaration() &&
+ !SGV->hasWeakAnyLinkage())
return true;
- if (a == GlobalValue::ProtectedVisibility)
- return false;
- if (b == GlobalValue::ProtectedVisibility)
+ // Only import the function requested for importing.
+ auto *SF = dyn_cast<Function>(SGV);
+ if (SF && FunctionsToImport->count(SF))
return true;
+ // Otherwise no.
return false;
}
-/// Loop through the global variables in the src module and merge them into the
-/// dest module.
-static GlobalVariable *copyGlobalVariableProto(TypeMapTy &TypeMap, Module &DstM,
- const GlobalVariable *SGVar) {
- // No linking to be performed or linking from the source: simply create an
- // identical version of the symbol over in the dest module... the
- // initializer will be filled in later by LinkGlobalInits.
- GlobalVariable *NewDGV = new GlobalVariable(
- DstM, TypeMap.get(SGVar->getType()->getElementType()),
- SGVar->isConstant(), SGVar->getLinkage(), /*init*/ nullptr,
- SGVar->getName(), /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
- SGVar->getType()->getAddressSpace());
-
- return NewDGV;
-}
-
-/// Link the function in the source module into the destination module if
-/// needed, setting up mapping information.
-static Function *copyFunctionProto(TypeMapTy &TypeMap, Module &DstM,
- const Function *SF) {
- // If there is no linkage to be performed or we are linking from the source,
- // bring SF over.
- return Function::Create(TypeMap.get(SF->getFunctionType()), SF->getLinkage(),
- SF->getName(), &DstM);
-}
+bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
+ assert(SGV->hasLocalLinkage());
+ // Both the imported references and the original local variable must
+ // be promoted.
+ if (!isPerformingImport() && !isModuleExporting())
+ return false;
-/// Set up prototypes for any aliases that come over from the source module.
-static GlobalAlias *copyGlobalAliasProto(TypeMapTy &TypeMap, Module &DstM,
- const GlobalAlias *SGA) {
- // If there is no linkage to be performed or we're linking from the source,
- // bring over SGA.
- auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType()));
- return GlobalAlias::create(PTy, SGA->getLinkage(), SGA->getName(), &DstM);
-}
+ // Local const variables never need to be promoted unless they are address
+ // taken. The imported uses can simply use the clone created in this module.
+ // For now we are conservative in determining which variables are not
+ // address taken by checking the unnamed addr flag. To be more aggressive,
+ // the address taken information must be checked earlier during parsing
+ // of the module and recorded in the function index for use when importing
+ // from that module.
+ auto *GVar = dyn_cast<GlobalVariable>(SGV);
+ if (GVar && GVar->isConstant() && GVar->hasUnnamedAddr())
+ return false;
-static GlobalValue *copyGlobalValueProto(TypeMapTy &TypeMap, Module &DstM,
- const GlobalValue *SGV) {
- GlobalValue *NewGV;
- if (auto *SGVar = dyn_cast<GlobalVariable>(SGV))
- NewGV = copyGlobalVariableProto(TypeMap, DstM, SGVar);
- else if (auto *SF = dyn_cast<Function>(SGV))
- NewGV = copyFunctionProto(TypeMap, DstM, SF);
- else
- NewGV = copyGlobalAliasProto(TypeMap, DstM, cast<GlobalAlias>(SGV));
- copyGVAttributes(NewGV, SGV);
- return NewGV;
+ // Eventually we only need to promote functions in the exporting module that
+ // are referenced by a potentially exported function (i.e. one that is in the
+ // function index).
+ return true;
}
-Value *ValueMaterializerTy::materializeValueFor(Value *V) {
- auto *SGV = dyn_cast<GlobalValue>(V);
- if (!SGV)
- return nullptr;
+std::string ModuleLinker::getName(const GlobalValue *SGV) {
+ // For locals that must be promoted to global scope, ensure that
+ // the promoted name uniquely identifies the copy in the original module,
+ // using the ID assigned during combined index creation. When importing,
+ // we rename all locals (not just those that are promoted) in order to
+ // avoid naming conflicts between locals imported from different modules.
+ if (SGV->hasLocalLinkage() &&
+ (doPromoteLocalToGlobal(SGV) || isPerformingImport()))
+ return FunctionInfoIndex::getGlobalNameForLocal(
+ SGV->getName(),
+ ImportIndex->getModuleId(SGV->getParent()->getModuleIdentifier()));
+ return SGV->getName();
+}
+
+GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
+ // Any local variable that is referenced by an exported function needs
+ // to be promoted to global scope. Since we don't currently know which
+ // functions reference which local variables/functions, we must treat
+ // all as potentially exported if this module is exporting anything.
+ if (isModuleExporting()) {
+ if (SGV->hasLocalLinkage() && doPromoteLocalToGlobal(SGV))
+ return GlobalValue::ExternalLinkage;
+ return SGV->getLinkage();
+ }
+
+ // Otherwise, if we aren't importing, no linkage change is needed.
+ if (!isPerformingImport())
+ return SGV->getLinkage();
+
+ switch (SGV->getLinkage()) {
+ case GlobalValue::ExternalLinkage:
+ // External defnitions are converted to available_externally
+ // definitions upon import, so that they are available for inlining
+ // and/or optimization, but are turned into declarations later
+ // during the EliminateAvailableExternally pass.
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ // An imported external declaration stays external.
+ return SGV->getLinkage();
+
+ case GlobalValue::AvailableExternallyLinkage:
+ // An imported available_externally definition converts
+ // to external if imported as a declaration.
+ if (!doImportAsDefinition(SGV))
+ return GlobalValue::ExternalLinkage;
+ // An imported available_externally declaration stays that way.
+ return SGV->getLinkage();
+
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ // These both stay the same when importing the definition.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+
+ case GlobalValue::WeakAnyLinkage:
+ // Can't import weak_any definitions correctly, or we might change the
+ // program semantics, since the linker will pick the first weak_any
+ // definition and importing would change the order they are seen by the
+ // linker. The module linking caller needs to enforce this.
+ assert(!doImportAsDefinition(SGV));
+ // If imported as a declaration, it becomes external_weak.
+ return GlobalValue::ExternalWeakLinkage;
+
+ case GlobalValue::WeakODRLinkage:
+ // For weak_odr linkage, there is a guarantee that all copies will be
+ // equivalent, so the issue described above for weak_any does not exist,
+ // and the definition can be imported. It can be treated similarly
+ // to an imported externally visible global value.
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+
+ case GlobalValue::AppendingLinkage:
+ // It would be incorrect to import an appending linkage variable,
+ // since it would cause global constructors/destructors to be
+ // executed multiple times. This should have already been handled
+ // by linkIfNeeded, and we will assert in shouldLinkFromSource
+ // if we try to import, so we simply return AppendingLinkage here
+ // as this helper is called more widely in getLinkedToGlobal.
+ return GlobalValue::AppendingLinkage;
+
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ // If we are promoting the local to global scope, it is handled
+ // similarly to a normal externally visible global.
+ if (doPromoteLocalToGlobal(SGV)) {
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+ }
+ // A non-promoted imported local definition stays local.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
- GlobalValue *DGV = copyGlobalValueProto(TypeMap, *DstM, SGV);
+ case GlobalValue::ExternalWeakLinkage:
+ // External weak doesn't apply to definitions, must be a declaration.
+ assert(!doImportAsDefinition(SGV));
+ // Linkage stays external_weak.
+ return SGV->getLinkage();
- if (Comdat *SC = SGV->getComdat()) {
- if (auto *DGO = dyn_cast<GlobalObject>(DGV)) {
- Comdat *DC = DstM->getOrInsertComdat(SC->getName());
- DGO->setComdat(DC);
- }
+ case GlobalValue::CommonLinkage:
+ // Linkage stays common on definitions.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
}
- LazilyLinkGlobalValues.push_back(SGV);
- return DGV;
+ llvm_unreachable("unknown linkage type");
}
-bool ModuleLinker::getComdatLeader(Module *M, StringRef ComdatName,
+static GlobalValue::VisibilityTypes
+getMinVisibility(GlobalValue::VisibilityTypes A,
+ GlobalValue::VisibilityTypes B) {
+ if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
+ return GlobalValue::HiddenVisibility;
+ if (A == GlobalValue::ProtectedVisibility ||
+ B == GlobalValue::ProtectedVisibility)
+ return GlobalValue::ProtectedVisibility;
+ return GlobalValue::DefaultVisibility;
+}
+
+bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar) {
- const GlobalValue *GVal = M->getNamedValue(ComdatName);
+ const GlobalValue *GVal = M.getNamedValue(ComdatName);
if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
GVal = GA->getBaseObject();
if (!GVal)
@@ -641,6 +367,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
+ Module &DstM = Mover.getModule();
// The ability to mix Comdat::SelectionKind::Any with
// Comdat::SelectionKind::Largest is a behavior that comes from COFF.
bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
@@ -677,8 +404,8 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
getComdatLeader(SrcM, ComdatName, SrcGV))
return true;
- const DataLayout &DstDL = DstM->getDataLayout();
- const DataLayout &SrcDL = SrcM->getDataLayout();
+ const DataLayout &DstDL = DstM.getDataLayout();
+ const DataLayout &SrcDL = SrcM.getDataLayout();
uint64_t DstSize =
DstDL.getTypeAllocSize(DstGV->getType()->getPointerElementType());
uint64_t SrcSize =
@@ -708,9 +435,10 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
bool ModuleLinker::getComdatResult(const Comdat *SrcC,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
+ Module &DstM = Mover.getModule();
Comdat::SelectionKind SSK = SrcC->getSelectionKind();
StringRef ComdatName = SrcC->getName();
- Module::ComdatSymTabType &ComdatSymTab = DstM->getComdatSymbolTable();
+ Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
if (DstCI == ComdatSymTab.end()) {
@@ -729,14 +457,17 @@ bool ModuleLinker::getComdatResult(const Comdat *SrcC,
bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
const GlobalValue &Dest,
const GlobalValue &Src) {
+
// Should we unconditionally use the Src?
- if (OverrideFromSrc) {
+ if (shouldOverrideFromSrc()) {
LinkFromSrc = true;
return false;
}
// We always have to add Src if it has appending linkage.
if (Src.hasAppendingLinkage()) {
+ // Should have prevented importing for appending linkage in linkIfNeeded.
+ assert(!isPerformingImport());
LinkFromSrc = true;
return false;
}
@@ -744,6 +475,28 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
bool SrcIsDeclaration = Src.isDeclarationForLinker();
bool DestIsDeclaration = Dest.isDeclarationForLinker();
+ if (isPerformingImport()) {
+ if (isa<Function>(&Src)) {
+ // For functions, LinkFromSrc iff this is a function requested
+ // for importing. For variables, decide below normally.
+ LinkFromSrc = FunctionsToImport->count(&Src);
+ return false;
+ }
+
+ // Check if this is an alias with an already existing definition
+ // in Dest, which must have come from a prior importing pass from
+ // the same Src module. Unlike imported function and variable
+ // definitions, which are imported as available_externally and are
+ // not definitions for the linker, that is not a valid linkage for
+ // imported aliases which must be definitions. Simply use the existing
+ // Dest copy.
+ if (isa<GlobalAlias>(&Src) && !DestIsDeclaration) {
+ assert(isa<GlobalAlias>(&Dest));
+ LinkFromSrc = false;
+ return false;
+ }
+ }
+
if (SrcIsDeclaration) {
// If Src is external or if both Src & Dest are external.. Just link the
// external globals, we aren't adding anything.
@@ -753,7 +506,12 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
return false;
}
// If the Dest is weak, use the source linkage.
- LinkFromSrc = Dest.hasExternalWeakLinkage();
+ if (Dest.hasExternalWeakLinkage()) {
+ LinkFromSrc = true;
+ return false;
+ }
+ // Link an available_externally over a declaration.
+ LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
return false;
}
@@ -808,730 +566,117 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
"': symbol multiply defined!");
}
-/// Loop over all of the linked values to compute type mappings. For example,
-/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct
-/// types 'Foo' but one got renamed when the module was loaded into the same
-/// LLVMContext.
-void ModuleLinker::computeTypeMapping() {
- for (GlobalValue &SGV : SrcM->globals()) {
- GlobalValue *DGV = getLinkedToGlobal(&SGV);
- if (!DGV)
- continue;
-
- if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) {
- TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
- continue;
- }
-
- // Unify the element type of appending arrays.
- ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
- ArrayType *SAT = cast<ArrayType>(SGV.getType()->getElementType());
- TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
- }
-
- for (GlobalValue &SGV : *SrcM) {
- if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
- TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
- }
-
- for (GlobalValue &SGV : SrcM->aliases()) {
- if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
- TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
- }
-
- // Incorporate types by name, scanning all the types in the source module.
- // At this point, the destination module may have a type "%foo = { i32 }" for
- // example. When the source module got loaded into the same LLVMContext, if
- // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
- std::vector<StructType *> Types = SrcM->getIdentifiedStructTypes();
- for (StructType *ST : Types) {
- if (!ST->hasName())
- continue;
-
- // Check to see if there is a dot in the name followed by a digit.
- size_t DotPos = ST->getName().rfind('.');
- if (DotPos == 0 || DotPos == StringRef::npos ||
- ST->getName().back() == '.' ||
- !isdigit(static_cast<unsigned char>(ST->getName()[DotPos + 1])))
- continue;
-
- // Check to see if the destination module has a struct with the prefix name.
- StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos));
- if (!DST)
- continue;
+bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
+ GlobalValue *DGV = getLinkedToGlobal(&GV);
- // Don't use it if this actually came from the source module. They're in
- // the same LLVMContext after all. Also don't use it unless the type is
- // actually used in the destination module. This can happen in situations
- // like this:
- //
- // Module A Module B
- // -------- --------
- // %Z = type { %A } %B = type { %C.1 }
- // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* }
- // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] }
- // %C = type { i8* } %B.3 = type { %C.1 }
- //
- // When we link Module B with Module A, the '%B' in Module B is
- // used. However, that would then use '%C.1'. But when we process '%C.1',
- // we prefer to take the '%C' version. So we are then left with both
- // '%C.1' and '%C' being used for the same types. This leads to some
- // variables using one type and some using the other.
- if (TypeMap.DstStructTypesSet.hasType(DST))
- TypeMap.addTypeMapping(DST, ST);
- }
-
- // Now that we have discovered all of the type equivalences, get a body for
- // any 'opaque' types in the dest module that are now resolved.
- TypeMap.linkDefinedTypeBodies();
-}
+ if (shouldLinkOnlyNeeded() && !(DGV && DGV->isDeclaration()))
+ return false;
-static void upgradeGlobalArray(GlobalVariable *GV) {
- ArrayType *ATy = cast<ArrayType>(GV->getType()->getElementType());
- StructType *OldTy = cast<StructType>(ATy->getElementType());
- assert(OldTy->getNumElements() == 2 && "Expected to upgrade from 2 elements");
-
- // Get the upgraded 3 element type.
- PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo();
- Type *Tys[3] = {OldTy->getElementType(0), OldTy->getElementType(1),
- VoidPtrTy};
- StructType *NewTy = StructType::get(GV->getContext(), Tys, false);
-
- // Build new constants with a null third field filled in.
- Constant *OldInitC = GV->getInitializer();
- ConstantArray *OldInit = dyn_cast<ConstantArray>(OldInitC);
- if (!OldInit && !isa<ConstantAggregateZero>(OldInitC))
- // Invalid initializer; give up.
- return;
- std::vector<Constant *> Initializers;
- if (OldInit && OldInit->getNumOperands()) {
- Value *Null = Constant::getNullValue(VoidPtrTy);
- for (Use &U : OldInit->operands()) {
- ConstantStruct *Init = cast<ConstantStruct>(U.get());
- Initializers.push_back(ConstantStruct::get(
- NewTy, Init->getOperand(0), Init->getOperand(1), Null, nullptr));
+ if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
+ auto *DGVar = dyn_cast<GlobalVariable>(DGV);
+ auto *SGVar = dyn_cast<GlobalVariable>(&GV);
+ if (DGVar && SGVar) {
+ if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
+ (!DGVar->isConstant() || !SGVar->isConstant())) {
+ DGVar->setConstant(false);
+ SGVar->setConstant(false);
+ }
+ if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
+ unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment());
+ SGVar->setAlignment(Align);
+ DGVar->setAlignment(Align);
+ }
}
- }
- assert(Initializers.size() == ATy->getNumElements() &&
- "Failed to copy all array elements");
-
- // Replace the old GV with a new one.
- ATy = ArrayType::get(NewTy, Initializers.size());
- Constant *NewInit = ConstantArray::get(ATy, Initializers);
- GlobalVariable *NewGV = new GlobalVariable(
- *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "",
- GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(),
- GV->isExternallyInitialized());
- NewGV->copyAttributesFrom(GV);
- NewGV->takeName(GV);
- assert(GV->use_empty() && "program cannot use initializer list");
- GV->eraseFromParent();
-}
-void ModuleLinker::upgradeMismatchedGlobalArray(StringRef Name) {
- // Look for the global arrays.
- auto *DstGV = dyn_cast_or_null<GlobalVariable>(DstM->getNamedValue(Name));
- if (!DstGV)
- return;
- auto *SrcGV = dyn_cast_or_null<GlobalVariable>(SrcM->getNamedValue(Name));
- if (!SrcGV)
- return;
-
- // Check if the types already match.
- auto *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
- auto *SrcTy =
- cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
- if (DstTy == SrcTy)
- return;
+ GlobalValue::VisibilityTypes Visibility =
+ getMinVisibility(DGV->getVisibility(), GV.getVisibility());
+ DGV->setVisibility(Visibility);
+ GV.setVisibility(Visibility);
- // Grab the element types. We can only upgrade an array of a two-field
- // struct. Only bother if the other one has three-fields.
- auto *DstEltTy = cast<StructType>(DstTy->getElementType());
- auto *SrcEltTy = cast<StructType>(SrcTy->getElementType());
- if (DstEltTy->getNumElements() == 2 && SrcEltTy->getNumElements() == 3) {
- upgradeGlobalArray(DstGV);
- return;
+ bool HasUnnamedAddr = GV.hasUnnamedAddr() && DGV->hasUnnamedAddr();
+ DGV->setUnnamedAddr(HasUnnamedAddr);
+ GV.setUnnamedAddr(HasUnnamedAddr);
}
- if (DstEltTy->getNumElements() == 3 && SrcEltTy->getNumElements() == 2)
- upgradeGlobalArray(SrcGV);
-
- // We can't upgrade any other differences.
-}
-
-void ModuleLinker::upgradeMismatchedGlobals() {
- upgradeMismatchedGlobalArray("llvm.global_ctors");
- upgradeMismatchedGlobalArray("llvm.global_dtors");
-}
-
-/// If there were any appending global variables, link them together now.
-/// Return true on error.
-bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
- const GlobalVariable *SrcGV) {
-
- if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
- return emitError("Linking globals named '" + SrcGV->getName() +
- "': can only link appending global with another appending global!");
-
- ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
- ArrayType *SrcTy =
- cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
- Type *EltTy = DstTy->getElementType();
-
- // Check to see that they two arrays agree on type.
- if (EltTy != SrcTy->getElementType())
- return emitError("Appending variables with different element types!");
- if (DstGV->isConstant() != SrcGV->isConstant())
- return emitError("Appending variables linked with different const'ness!");
-
- if (DstGV->getAlignment() != SrcGV->getAlignment())
- return emitError(
- "Appending variables with different alignment need to be linked!");
-
- if (DstGV->getVisibility() != SrcGV->getVisibility())
- return emitError(
- "Appending variables with different visibility need to be linked!");
-
- if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr())
- return emitError(
- "Appending variables with different unnamed_addr need to be linked!");
-
- if (StringRef(DstGV->getSection()) != SrcGV->getSection())
- return emitError(
- "Appending variables with different section name need to be linked!");
-
- uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
- ArrayType *NewType = ArrayType::get(EltTy, NewSize);
-
- // Create the new global variable.
- GlobalVariable *NG =
- new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
- DstGV->getLinkage(), /*init*/nullptr, /*name*/"", DstGV,
- DstGV->getThreadLocalMode(),
- DstGV->getType()->getAddressSpace());
-
- // Propagate alignment, visibility and section info.
- copyGVAttributes(NG, DstGV);
-
- AppendingVarInfo AVI;
- AVI.NewGV = NG;
- AVI.DstInit = DstGV->getInitializer();
- AVI.SrcInit = SrcGV->getInitializer();
- AppendingVars.push_back(AVI);
-
- // Replace any uses of the two global variables with uses of the new
- // global.
- ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
- DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
- DstGV->eraseFromParent();
-
- // Track the source variable so we don't try to link it.
- DoNotLinkFromSource.insert(SrcGV);
-
- return false;
-}
+ // Don't want to append to global_ctors list, for example, when we
+ // are importing for ThinLTO, otherwise the global ctors and dtors
+ // get executed multiple times for local variables (the latter causing
+ // double frees).
+ if (GV.hasAppendingLinkage() && isPerformingImport())
+ return false;
-bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) {
- GlobalValue *DGV = getLinkedToGlobal(SGV);
+ if (isPerformingImport() && !doImportAsDefinition(&GV))
+ return false;
- // Handle the ultra special appending linkage case first.
- if (DGV && DGV->hasAppendingLinkage())
- return linkAppendingVarProto(cast<GlobalVariable>(DGV),
- cast<GlobalVariable>(SGV));
+ if (!DGV && !shouldOverrideFromSrc() &&
+ (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
+ GV.hasAvailableExternallyLinkage()))
+ return false;
- bool LinkFromSrc = true;
- Comdat *C = nullptr;
- GlobalValue::VisibilityTypes Visibility = SGV->getVisibility();
- bool HasUnnamedAddr = SGV->hasUnnamedAddr();
+ if (GV.isDeclaration())
+ return false;
- if (const Comdat *SC = SGV->getComdat()) {
+ if (const Comdat *SC = GV.getComdat()) {
+ bool LinkFromSrc;
Comdat::SelectionKind SK;
std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
- C = DstM->getOrInsertComdat(SC->getName());
- C->setSelectionKind(SK);
- } else if (DGV) {
- if (shouldLinkFromSource(LinkFromSrc, *DGV, *SGV))
- return true;
- }
-
- if (!LinkFromSrc) {
- // Track the source global so that we don't attempt to copy it over when
- // processing global initializers.
- DoNotLinkFromSource.insert(SGV);
-
- if (DGV)
- // Make sure to remember this mapping.
- ValueMap[SGV] =
- ConstantExpr::getBitCast(DGV, TypeMap.get(SGV->getType()));
- }
-
- if (DGV) {
- Visibility = isLessConstraining(Visibility, DGV->getVisibility())
- ? DGV->getVisibility()
- : Visibility;
- HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
- }
-
- if (!LinkFromSrc && !DGV)
+ if (LinkFromSrc)
+ ValuesToLink.insert(&GV);
return false;
-
- GlobalValue *NewGV;
- if (!LinkFromSrc) {
- NewGV = DGV;
- } else {
- // If the GV is to be lazily linked, don't create it just yet.
- // The ValueMaterializerTy will deal with creating it if it's used.
- if (!DGV && !OverrideFromSrc &&
- (SGV->hasLocalLinkage() || SGV->hasLinkOnceLinkage() ||
- SGV->hasAvailableExternallyLinkage())) {
- DoNotLinkFromSource.insert(SGV);
- return false;
- }
-
- NewGV = copyGlobalValueProto(TypeMap, *DstM, SGV);
-
- if (DGV && isa<Function>(DGV))
- if (auto *NewF = dyn_cast<Function>(NewGV))
- OverridingFunctions.insert(NewF);
- }
-
- NewGV->setUnnamedAddr(HasUnnamedAddr);
- NewGV->setVisibility(Visibility);
-
- if (auto *NewGO = dyn_cast<GlobalObject>(NewGV)) {
- if (C)
- NewGO->setComdat(C);
-
- if (DGV && DGV->hasCommonLinkage() && SGV->hasCommonLinkage())
- NewGO->setAlignment(std::max(DGV->getAlignment(), SGV->getAlignment()));
- }
-
- if (auto *NewGVar = dyn_cast<GlobalVariable>(NewGV)) {
- auto *DGVar = dyn_cast_or_null<GlobalVariable>(DGV);
- auto *SGVar = dyn_cast<GlobalVariable>(SGV);
- if (DGVar && SGVar && DGVar->isDeclaration() && SGVar->isDeclaration() &&
- (!DGVar->isConstant() || !SGVar->isConstant()))
- NewGVar->setConstant(false);
- }
-
- // Make sure to remember this mapping.
- if (NewGV != DGV) {
- if (DGV) {
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType()));
- DGV->eraseFromParent();
- }
- ValueMap[SGV] = NewGV;
- }
-
- return false;
-}
-
-static void getArrayElements(const Constant *C,
- SmallVectorImpl<Constant *> &Dest) {
- unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
-
- for (unsigned i = 0; i != NumElements; ++i)
- Dest.push_back(C->getAggregateElement(i));
-}
-
-void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
- // Merge the initializer.
- SmallVector<Constant *, 16> DstElements;
- getArrayElements(AVI.DstInit, DstElements);
-
- SmallVector<Constant *, 16> SrcElements;
- getArrayElements(AVI.SrcInit, SrcElements);
-
- ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
-
- StringRef Name = AVI.NewGV->getName();
- bool IsNewStructor =
- (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") &&
- cast<StructType>(NewType->getElementType())->getNumElements() == 3;
-
- for (auto *V : SrcElements) {
- if (IsNewStructor) {
- Constant *Key = V->getAggregateElement(2);
- if (DoNotLinkFromSource.count(Key))
- continue;
- }
- DstElements.push_back(
- MapValue(V, ValueMap, RF_None, &TypeMap, &ValMaterializer));
}
- if (IsNewStructor) {
- NewType = ArrayType::get(NewType->getElementType(), DstElements.size());
- AVI.NewGV->mutateType(PointerType::get(NewType, 0));
- }
-
- AVI.NewGV->setInitializer(ConstantArray::get(NewType, DstElements));
-}
-/// Update the initializers in the Dest module now that all globals that may be
-/// referenced are in Dest.
-void ModuleLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) {
- // Figure out what the initializer looks like in the dest module.
- Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, RF_None, &TypeMap,
- &ValMaterializer));
-}
-
-/// Copy the source function over into the dest function and fix up references
-/// to values. At this point we know that Dest is an external function, and
-/// that Src is not.
-bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) {
- assert(Dst.isDeclaration() && !Src.isDeclaration());
-
- // Materialize if needed.
- if (std::error_code EC = Src.materialize())
- return emitError(EC.message());
-
- // Link in the prefix data.
- if (Src.hasPrefixData())
- Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, RF_None, &TypeMap,
- &ValMaterializer));
-
- // Link in the prologue data.
- if (Src.hasPrologueData())
- Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, RF_None,
- &TypeMap, &ValMaterializer));
-
- // Link in the personality function.
- if (Src.hasPersonalityFn())
- Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, RF_None,
- &TypeMap, &ValMaterializer));
-
- // Go through and convert function arguments over, remembering the mapping.
- Function::arg_iterator DI = Dst.arg_begin();
- for (Argument &Arg : Src.args()) {
- DI->setName(Arg.getName()); // Copy the name over.
-
- // Add a mapping to our mapping.
- ValueMap[&Arg] = DI;
- ++DI;
- }
-
- // Copy over the metadata attachments.
- SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
- Src.getAllMetadata(MDs);
- for (const auto &I : MDs)
- Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, RF_None, &TypeMap,
- &ValMaterializer));
-
- // Splice the body of the source function into the dest function.
- Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList());
-
- // At this point, all of the instructions and values of the function are now
- // copied over. The only problem is that they are still referencing values in
- // the Source function as operands. Loop through all of the operands of the
- // functions and patch them up to point to the local versions.
- for (BasicBlock &BB : Dst)
- for (Instruction &I : BB)
- RemapInstruction(&I, ValueMap, RF_IgnoreMissingEntries, &TypeMap,
- &ValMaterializer);
-
- // There is no need to map the arguments anymore.
- for (Argument &Arg : Src.args())
- ValueMap.erase(&Arg);
-
- Src.dematerialize();
- return false;
-}
-
-void ModuleLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) {
- Constant *Aliasee = Src.getAliasee();
- Constant *Val =
- MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer);
- Dst.setAliasee(Val);
-}
-
-bool ModuleLinker::linkGlobalValueBody(GlobalValue &Src) {
- Value *Dst = ValueMap[&Src];
- assert(Dst);
- if (auto *F = dyn_cast<Function>(&Src))
- return linkFunctionBody(cast<Function>(*Dst), *F);
- if (auto *GVar = dyn_cast<GlobalVariable>(&Src)) {
- linkGlobalInit(cast<GlobalVariable>(*Dst), *GVar);
- return false;
- }
- linkAliasBody(cast<GlobalAlias>(*Dst), cast<GlobalAlias>(Src));
+ bool LinkFromSrc = true;
+ if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
+ return true;
+ if (LinkFromSrc)
+ ValuesToLink.insert(&GV);
return false;
}
-/// Insert all of the named MDNodes in Src into the Dest module.
-void ModuleLinker::linkNamedMDNodes() {
- const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
- for (const NamedMDNode &NMD : SrcM->named_metadata()) {
- // Don't link module flags here. Do them separately.
- if (&NMD == SrcModFlags)
- continue;
- NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(NMD.getName());
- // Add Src elements into Dest node.
- for (const MDNode *op : NMD.operands())
- DestNMD->addOperand(
- MapMetadata(op, ValueMap, RF_None, &TypeMap, &ValMaterializer));
- }
-}
-
-/// Drop DISubprograms that have been superseded.
-///
-/// FIXME: this creates an asymmetric result: we strip functions from losing
-/// subprograms in DstM, but leave losing subprograms in SrcM.
-/// TODO: Remove this logic once the backend can correctly determine canonical
-/// subprograms.
-void ModuleLinker::stripReplacedSubprograms() {
- // Avoid quadratic runtime by returning early when there's nothing to do.
- if (OverridingFunctions.empty())
+void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) {
+ // Add these to the internalize list
+ if (!GV.hasLinkOnceLinkage())
return;
- // Move the functions now, so the set gets cleared even on early returns.
- auto Functions = std::move(OverridingFunctions);
- OverridingFunctions.clear();
+ if (shouldInternalizeLinkedSymbols())
+ Internalize.insert(GV.getName());
+ Add(GV);
- // Drop functions from subprograms if they've been overridden by the new
- // compile unit.
- NamedMDNode *CompileUnits = DstM->getNamedMetadata("llvm.dbg.cu");
- if (!CompileUnits)
+ const Comdat *SC = GV.getComdat();
+ if (!SC)
+ return;
+ for (GlobalValue *GV2 : ComdatMembers[SC]) {
+ if (!GV2->hasLocalLinkage() && shouldInternalizeLinkedSymbols())
+ Internalize.insert(GV2->getName());
+ Add(*GV2);
+ }
+}
+
+void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
+ if (GV.hasLocalLinkage() &&
+ (doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
+ GV.setName(getName(&GV));
+ GV.setLinkage(getLinkage(&GV));
+ if (!GV.hasLocalLinkage())
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+ if (isModuleExporting())
+ ValuesToLink.insert(&GV);
return;
- for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
- auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
- assert(CU && "Expected valid compile unit");
-
- for (DISubprogram *SP : CU->getSubprograms()) {
- if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction()))
- continue;
-
- // Prevent DebugInfoFinder from tagging this as the canonical subprogram,
- // since the canonical one is in the incoming module.
- SP->replaceFunction(nullptr);
- }
- }
-}
-
-/// Merge the linker flags in Src into the Dest module.
-bool ModuleLinker::linkModuleFlagsMetadata() {
- // If the source module has no module flags, we are done.
- const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
- if (!SrcModFlags) return false;
-
- // If the destination module doesn't have module flags yet, then just copy
- // over the source module's flags.
- NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
- if (DstModFlags->getNumOperands() == 0) {
- for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
- DstModFlags->addOperand(SrcModFlags->getOperand(I));
-
- return false;
- }
-
- // First build a map of the existing module flags and requirements.
- DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
- SmallSetVector<MDNode*, 16> Requirements;
- for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
- MDNode *Op = DstModFlags->getOperand(I);
- ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
- MDString *ID = cast<MDString>(Op->getOperand(1));
-
- if (Behavior->getZExtValue() == Module::Require) {
- Requirements.insert(cast<MDNode>(Op->getOperand(2)));
- } else {
- Flags[ID] = std::make_pair(Op, I);
- }
- }
-
- // Merge in the flags from the source module, and also collect its set of
- // requirements.
- bool HasErr = false;
- for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
- MDNode *SrcOp = SrcModFlags->getOperand(I);
- ConstantInt *SrcBehavior =
- mdconst::extract<ConstantInt>(SrcOp->getOperand(0));
- MDString *ID = cast<MDString>(SrcOp->getOperand(1));
- MDNode *DstOp;
- unsigned DstIndex;
- std::tie(DstOp, DstIndex) = Flags.lookup(ID);
- unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
-
- // If this is a requirement, add it and continue.
- if (SrcBehaviorValue == Module::Require) {
- // If the destination module does not already have this requirement, add
- // it.
- if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
- DstModFlags->addOperand(SrcOp);
- }
- continue;
- }
-
- // If there is no existing flag with this ID, just add it.
- if (!DstOp) {
- Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
- DstModFlags->addOperand(SrcOp);
- continue;
- }
-
- // Otherwise, perform a merge.
- ConstantInt *DstBehavior =
- mdconst::extract<ConstantInt>(DstOp->getOperand(0));
- unsigned DstBehaviorValue = DstBehavior->getZExtValue();
-
- // If either flag has override behavior, handle it first.
- if (DstBehaviorValue == Module::Override) {
- // Diagnose inconsistent flags which both have override behavior.
- if (SrcBehaviorValue == Module::Override &&
- SrcOp->getOperand(2) != DstOp->getOperand(2)) {
- HasErr |= emitError("linking module flags '" + ID->getString() +
- "': IDs have conflicting override values");
- }
- continue;
- } else if (SrcBehaviorValue == Module::Override) {
- // Update the destination flag to that of the source.
- DstModFlags->setOperand(DstIndex, SrcOp);
- Flags[ID].first = SrcOp;
- continue;
- }
-
- // Diagnose inconsistent merge behavior types.
- if (SrcBehaviorValue != DstBehaviorValue) {
- HasErr |= emitError("linking module flags '" + ID->getString() +
- "': IDs have conflicting behaviors");
- continue;
- }
-
- auto replaceDstValue = [&](MDNode *New) {
- Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
- MDNode *Flag = MDNode::get(DstM->getContext(), FlagOps);
- DstModFlags->setOperand(DstIndex, Flag);
- Flags[ID].first = Flag;
- };
-
- // Perform the merge for standard behavior types.
- switch (SrcBehaviorValue) {
- case Module::Require:
- case Module::Override: llvm_unreachable("not possible");
- case Module::Error: {
- // Emit an error if the values differ.
- if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
- HasErr |= emitError("linking module flags '" + ID->getString() +
- "': IDs have conflicting values");
- }
- continue;
- }
- case Module::Warning: {
- // Emit a warning if the values differ.
- if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
- emitWarning("linking module flags '" + ID->getString() +
- "': IDs have conflicting values");
- }
- continue;
- }
- case Module::Append: {
- MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
- MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
- SmallVector<Metadata *, 8> MDs;
- MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
- MDs.append(DstValue->op_begin(), DstValue->op_end());
- MDs.append(SrcValue->op_begin(), SrcValue->op_end());
-
- replaceDstValue(MDNode::get(DstM->getContext(), MDs));
- break;
- }
- case Module::AppendUnique: {
- SmallSetVector<Metadata *, 16> Elts;
- MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
- MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
- Elts.insert(DstValue->op_begin(), DstValue->op_end());
- Elts.insert(SrcValue->op_begin(), SrcValue->op_end());
-
- replaceDstValue(MDNode::get(DstM->getContext(),
- makeArrayRef(Elts.begin(), Elts.end())));
- break;
- }
- }
- }
-
- // Check all of the requirements.
- for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
- MDNode *Requirement = Requirements[I];
- MDString *Flag = cast<MDString>(Requirement->getOperand(0));
- Metadata *ReqValue = Requirement->getOperand(1);
-
- MDNode *Op = Flags[Flag].first;
- if (!Op || Op->getOperand(2) != ReqValue) {
- HasErr |= emitError("linking module flags '" + Flag->getString() +
- "': does not have the required value");
- continue;
- }
}
-
- return HasErr;
-}
-
-// This function returns true if the triples match.
-static bool triplesMatch(const Triple &T0, const Triple &T1) {
- // If vendor is apple, ignore the version number.
- if (T0.getVendor() == Triple::Apple)
- return T0.getArch() == T1.getArch() &&
- T0.getSubArch() == T1.getSubArch() &&
- T0.getVendor() == T1.getVendor() &&
- T0.getOS() == T1.getOS();
-
- return T0 == T1;
+ GV.setLinkage(getLinkage(&GV));
}
-// This function returns the merged triple.
-static std::string mergeTriples(const Triple &SrcTriple, const Triple &DstTriple) {
- // If vendor is apple, pick the triple with the larger version number.
- if (SrcTriple.getVendor() == Triple::Apple)
- if (DstTriple.isOSVersionLT(SrcTriple))
- return SrcTriple.str();
-
- return DstTriple.str();
+void ModuleLinker::processGlobalsForThinLTO() {
+ for (GlobalVariable &GV : SrcM.globals())
+ processGlobalForThinLTO(GV);
+ for (Function &SF : SrcM)
+ processGlobalForThinLTO(SF);
+ for (GlobalAlias &GA : SrcM.aliases())
+ processGlobalForThinLTO(GA);
}
bool ModuleLinker::run() {
- assert(DstM && "Null destination module");
- assert(SrcM && "Null source module");
-
- // Inherit the target data from the source module if the destination module
- // doesn't have one already.
- if (DstM->getDataLayout().isDefault())
- DstM->setDataLayout(SrcM->getDataLayout());
-
- if (SrcM->getDataLayout() != DstM->getDataLayout()) {
- emitWarning("Linking two modules of different data layouts: '" +
- SrcM->getModuleIdentifier() + "' is '" +
- SrcM->getDataLayoutStr() + "' whereas '" +
- DstM->getModuleIdentifier() + "' is '" +
- DstM->getDataLayoutStr() + "'\n");
- }
-
- // Copy the target triple from the source to dest if the dest's is empty.
- if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
- DstM->setTargetTriple(SrcM->getTargetTriple());
-
- Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM->getTargetTriple());
-
- if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple))
- emitWarning("Linking two modules of different target triples: " +
- SrcM->getModuleIdentifier() + "' is '" +
- SrcM->getTargetTriple() + "' whereas '" +
- DstM->getModuleIdentifier() + "' is '" +
- DstM->getTargetTriple() + "'\n");
-
- DstM->setTargetTriple(mergeTriples(SrcTriple, DstTriple));
-
- // Append the module inline asm string.
- if (!SrcM->getModuleInlineAsm().empty()) {
- if (DstM->getModuleInlineAsm().empty())
- DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
- else
- DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
- SrcM->getModuleInlineAsm());
- }
-
- // Loop over all of the linked values to compute type mappings.
- computeTypeMapping();
-
- ComdatsChosen.clear();
- for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
+ for (const auto &SMEC : SrcM.getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
if (ComdatsChosen.count(&C))
continue;
@@ -1542,233 +687,88 @@ bool ModuleLinker::run() {
ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
}
- // Upgrade mismatched global arrays.
- upgradeMismatchedGlobals();
+ for (GlobalVariable &GV : SrcM.globals())
+ if (const Comdat *SC = GV.getComdat())
+ ComdatMembers[SC].push_back(&GV);
+
+ for (Function &SF : SrcM)
+ if (const Comdat *SC = SF.getComdat())
+ ComdatMembers[SC].push_back(&SF);
+
+ for (GlobalAlias &GA : SrcM.aliases())
+ if (const Comdat *SC = GA.getComdat())
+ ComdatMembers[SC].push_back(&GA);
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
- for (GlobalVariable &GV : SrcM->globals())
- if (linkGlobalValueProto(&GV))
+ for (GlobalVariable &GV : SrcM.globals())
+ if (linkIfNeeded(GV))
return true;
- // Link the functions together between the two modules, without doing function
- // bodies... this just adds external function prototypes to the DstM
- // function... We do this so that when we begin processing function bodies,
- // all of the global values that may be referenced are available in our
- // ValueMap.
- for (Function &F :*SrcM)
- if (linkGlobalValueProto(&F))
+ for (Function &SF : SrcM)
+ if (linkIfNeeded(SF))
return true;
- // If there were any aliases, link them now.
- for (GlobalAlias &GA : SrcM->aliases())
- if (linkGlobalValueProto(&GA))
+ for (GlobalAlias &GA : SrcM.aliases())
+ if (linkIfNeeded(GA))
return true;
- for (const AppendingVarInfo &AppendingVar : AppendingVars)
- linkAppendingVarInit(AppendingVar);
+ processGlobalsForThinLTO();
- for (const auto &Entry : DstM->getComdatSymbolTable()) {
- const Comdat &C = Entry.getValue();
- if (C.getSelectionKind() == Comdat::Any)
+ for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
+ GlobalValue *GV = ValuesToLink[I];
+ const Comdat *SC = GV->getComdat();
+ if (!SC)
continue;
- const GlobalValue *GV = SrcM->getNamedValue(C.getName());
- if (GV)
- MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer);
+ for (GlobalValue *GV2 : ComdatMembers[SC])
+ ValuesToLink.insert(GV2);
}
- // Strip replaced subprograms before mapping any metadata -- so that we're
- // not changing metadata from the source module (note that
- // linkGlobalValueBody() eventually calls RemapInstruction() and therefore
- // MapMetadata()) -- but after linking global value protocols -- so that
- // OverridingFunctions has been built.
- stripReplacedSubprograms();
-
- // Link in the function bodies that are defined in the source module into
- // DstM.
- for (Function &SF : *SrcM) {
- // Skip if no body (function is external).
- if (SF.isDeclaration())
- continue;
-
- // Skip if not linking from source.
- if (DoNotLinkFromSource.count(&SF))
- continue;
-
- if (linkGlobalValueBody(SF))
- return true;
+ if (shouldInternalizeLinkedSymbols()) {
+ for (GlobalValue *GV : ValuesToLink)
+ Internalize.insert(GV->getName());
}
- // Resolve all uses of aliases with aliasees.
- for (GlobalAlias &Src : SrcM->aliases()) {
- if (DoNotLinkFromSource.count(&Src))
- continue;
- linkGlobalValueBody(Src);
- }
-
- // Remap all of the named MDNodes in Src into the DstM module. We do this
- // after linking GlobalValues so that MDNodes that reference GlobalValues
- // are properly remapped.
- linkNamedMDNodes();
-
- // Merge the module flags into the DstM module.
- if (linkModuleFlagsMetadata())
+ if (Mover.move(SrcM, ValuesToLink.getArrayRef(),
+ [this](GlobalValue &GV, IRMover::ValueAdder Add) {
+ addLazyFor(GV, Add);
+ },
+ ValIDToTempMDMap, false))
return true;
-
- // Update the initializers in the DstM module now that all globals that may
- // be referenced are in DstM.
- for (GlobalVariable &Src : SrcM->globals()) {
- // Only process initialized GV's or ones not already in dest.
- if (!Src.hasInitializer() || DoNotLinkFromSource.count(&Src))
- continue;
- linkGlobalValueBody(Src);
- }
-
- // Process vector of lazily linked in functions.
- while (!LazilyLinkGlobalValues.empty()) {
- GlobalValue *SGV = LazilyLinkGlobalValues.back();
- LazilyLinkGlobalValues.pop_back();
-
- assert(!SGV->isDeclaration() && "users should not pass down decls");
- if (linkGlobalValueBody(*SGV))
- return true;
+ Module &DstM = Mover.getModule();
+ for (auto &P : Internalize) {
+ GlobalValue *GV = DstM.getNamedValue(P.first());
+ GV->setLinkage(GlobalValue::InternalLinkage);
}
return false;
}
-Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef<Type *> E, bool P)
- : ETypes(E), IsPacked(P) {}
-
-Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST)
- : ETypes(ST->elements()), IsPacked(ST->isPacked()) {}
-
-bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const {
- if (IsPacked != That.IsPacked)
- return false;
- if (ETypes != That.ETypes)
- return false;
- return true;
-}
-
-bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const {
- return !this->operator==(That);
-}
-
-StructType *Linker::StructTypeKeyInfo::getEmptyKey() {
- return DenseMapInfo<StructType *>::getEmptyKey();
-}
-
-StructType *Linker::StructTypeKeyInfo::getTombstoneKey() {
- return DenseMapInfo<StructType *>::getTombstoneKey();
-}
-
-unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) {
- return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()),
- Key.IsPacked);
-}
-
-unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) {
- return getHashValue(KeyTy(ST));
-}
-
-bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS,
- const StructType *RHS) {
- if (RHS == getEmptyKey() || RHS == getTombstoneKey())
- return false;
- return LHS == KeyTy(RHS);
-}
-
-bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS,
- const StructType *RHS) {
- if (RHS == getEmptyKey())
- return LHS == getEmptyKey();
-
- if (RHS == getTombstoneKey())
- return LHS == getTombstoneKey();
-
- return KeyTy(LHS) == KeyTy(RHS);
-}
-
-void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
- assert(!Ty->isOpaque());
- NonOpaqueStructTypes.insert(Ty);
-}
-
-void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) {
- assert(!Ty->isOpaque());
- NonOpaqueStructTypes.insert(Ty);
- bool Removed = OpaqueStructTypes.erase(Ty);
- (void)Removed;
- assert(Removed);
-}
+Linker::Linker(Module &M) : Mover(M) {}
-void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
- assert(Ty->isOpaque());
- OpaqueStructTypes.insert(Ty);
-}
-
-StructType *
-Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef<Type *> ETypes,
- bool IsPacked) {
- Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked);
- auto I = NonOpaqueStructTypes.find_as(Key);
- if (I == NonOpaqueStructTypes.end())
- return nullptr;
- return *I;
+bool Linker::linkInModule(std::unique_ptr<Module> Src, unsigned Flags,
+ const FunctionInfoIndex *Index,
+ DenseSet<const GlobalValue *> *FunctionsToImport,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap) {
+ ModuleLinker ModLinker(Mover, *Src, Flags, Index, FunctionsToImport,
+ ValIDToTempMDMap);
+ return ModLinker.run();
}
-bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) {
- if (Ty->isOpaque())
- return OpaqueStructTypes.count(Ty);
- auto I = NonOpaqueStructTypes.find(Ty);
- if (I == NonOpaqueStructTypes.end())
- return false;
- return *I == Ty;
-}
-
-void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
- this->Composite = M;
- this->DiagnosticHandler = DiagnosticHandler;
-
- TypeFinder StructTypes;
- StructTypes.run(*M, true);
- for (StructType *Ty : StructTypes) {
- if (Ty->isOpaque())
- IdentifiedStructTypes.addOpaque(Ty);
- else
- IdentifiedStructTypes.addNonOpaque(Ty);
- }
+bool Linker::linkInModuleForCAPI(Module &Src) {
+ ModuleLinker ModLinker(Mover, Src, 0, nullptr, nullptr);
+ return ModLinker.run();
}
-Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
- init(M, DiagnosticHandler);
-}
-
-Linker::Linker(Module *M) {
- init(M, [this](const DiagnosticInfo &DI) {
- Composite->getContext().diagnose(DI);
- });
-}
-
-Linker::~Linker() {
-}
-
-void Linker::deleteModule() {
- delete Composite;
- Composite = nullptr;
-}
-
-bool Linker::linkInModule(Module *Src, bool OverrideSymbols) {
- ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src,
- DiagnosticHandler, OverrideSymbols);
- bool RetCode = TheLinker.run();
- Composite->dropTriviallyDeadConstantArrays();
- return RetCode;
-}
-
-void Linker::setModule(Module *Dst) {
- init(Dst, DiagnosticHandler);
+bool Linker::linkInMetadata(Module &Src,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap) {
+ SetVector<GlobalValue *> ValuesToLink;
+ if (Mover.move(
+ Src, ValuesToLink.getArrayRef(),
+ [this](GlobalValue &GV, IRMover::ValueAdder Add) { assert(false); },
+ ValIDToTempMDMap, true))
+ return true;
+ return false;
}
//===----------------------------------------------------------------------===//
@@ -1780,34 +780,58 @@ void Linker::setModule(Module *Dst) {
/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
/// Upon failure, the Dest module could be in a modified state, and shouldn't be
/// relied on to be consistent.
-bool Linker::LinkModules(Module *Dest, Module *Src,
- DiagnosticHandlerFunction DiagnosticHandler) {
- Linker L(Dest, DiagnosticHandler);
- return L.linkInModule(Src);
+bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src,
+ unsigned Flags) {
+ Linker L(Dest);
+ return L.linkInModule(std::move(Src), Flags);
}
-bool Linker::LinkModules(Module *Dest, Module *Src) {
- Linker L(Dest);
- return L.linkInModule(Src);
+std::unique_ptr<Module>
+llvm::renameModuleForThinLTO(std::unique_ptr<Module> M,
+ const FunctionInfoIndex *Index) {
+ std::unique_ptr<llvm::Module> RenamedModule(
+ new llvm::Module(M->getModuleIdentifier(), M->getContext()));
+ Linker L(*RenamedModule.get());
+ if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index))
+ return nullptr;
+ return RenamedModule;
}
//===----------------------------------------------------------------------===//
// C API.
//===----------------------------------------------------------------------===//
+static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
+ auto *Message = reinterpret_cast<std::string *>(C);
+ raw_string_ostream Stream(*Message);
+ DiagnosticPrinterRawOStream DP(Stream);
+ DI.print(DP);
+}
+
LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
LLVMLinkerMode Unused, char **OutMessages) {
Module *D = unwrap(Dest);
+ LLVMContext &Ctx = D->getContext();
+
+ LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+ Ctx.getDiagnosticHandler();
+ void *OldDiagnosticContext = Ctx.getDiagnosticContext();
std::string Message;
- raw_string_ostream Stream(Message);
- DiagnosticPrinterRawOStream DP(Stream);
+ Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
+
+ Linker L(*D);
+ Module *M = unwrap(Src);
+ LLVMBool Result = L.linkInModuleForCAPI(*M);
- LLVMBool Result = Linker::LinkModules(
- D, unwrap(Src), [&](const DiagnosticInfo &DI) { DI.print(DP); });
+ Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
- if (OutMessages && Result) {
- Stream.flush();
+ if (OutMessages && Result)
*OutMessages = strdup(Message.c_str());
- }
return Result;
}
+
+LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
+ Module *D = unwrap(Dest);
+ std::unique_ptr<Module> M(unwrap(Src));
+ return Linker::linkModules(*D, std::move(M));
+}
diff --git a/contrib/llvm/lib/MC/ConstantPools.cpp b/contrib/llvm/lib/MC/ConstantPools.cpp
index f7649fb..9643b75 100644
--- a/contrib/llvm/lib/MC/ConstantPools.cpp
+++ b/contrib/llvm/lib/MC/ConstantPools.cpp
@@ -29,17 +29,17 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
I != E; ++I) {
Streamer.EmitCodeAlignment(I->Size); // align naturally
Streamer.EmitLabel(I->Label);
- Streamer.EmitValue(I->Value, I->Size);
+ Streamer.EmitValue(I->Value, I->Size, I->Loc);
}
Streamer.EmitDataRegion(MCDR_DataRegionEnd);
Entries.clear();
}
const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
- unsigned Size) {
+ unsigned Size, SMLoc Loc) {
MCSymbol *CPEntryLabel = Context.createTempSymbol();
- Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size));
+ Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc));
return MCSymbolRefExpr::create(CPEntryLabel, Context);
}
@@ -90,8 +90,8 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
const MCExpr *Expr,
- unsigned Size) {
+ unsigned Size, SMLoc Loc) {
MCSection *Section = Streamer.getCurrentSection().first;
return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext(),
- Size);
+ Size, Loc);
}
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index e925bc2..e6552be 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
#include <vector>
using namespace llvm;
@@ -106,7 +107,9 @@ class ELFObjectWriter : public MCObjectWriter {
/// @name Symbol Table Data
/// @{
- StringTableBuilder StrTabBuilder;
+ BumpPtrAllocator Alloc;
+ StringSaver VersionSymSaver{Alloc};
+ StringTableBuilder StrTabBuilder{StringTableBuilder::ELF};
/// @}
@@ -157,9 +160,9 @@ class ELFObjectWriter : public MCObjectWriter {
template <typename T> void write(T Val) {
if (IsLittleEndian)
- support::endian::Writer<support::little>(OS).write(Val);
+ support::endian::Writer<support::little>(getStream()).write(Val);
else
- support::endian::Writer<support::big>(OS).write(Val);
+ support::endian::Writer<support::big>(getStream()).write(Val);
}
void writeHeader(const MCAssembler &Asm);
@@ -232,7 +235,7 @@ class ELFObjectWriter : public MCObjectWriter {
}
void ELFObjectWriter::align(unsigned Alignment) {
- uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment);
+ uint64_t Padding = OffsetToAlignment(getStream().tell(), Alignment);
WriteZeros(Padding);
}
@@ -447,9 +450,6 @@ void ELFObjectWriter::writeSymbol(SymbolTableWriter &Writer,
uint32_t StringIndex, ELFSymbolData &MSD,
const MCAsmLayout &Layout) {
const auto &Symbol = cast<MCSymbolELF>(*MSD.Symbol);
- assert((!Symbol.getFragment() ||
- (Symbol.getFragment()->getParent() == &Symbol.getSection())) &&
- "The symbol's section doesn't match the fragment's symbol");
const MCSymbolELF *Base =
cast_or_null<MCSymbolELF>(Layout.getBaseSymbol(Symbol));
@@ -630,28 +630,36 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
// In general, ELF has no relocations for -B. It can only represent (A + C)
// or (A + C - R). If B = R + K and the relocation is not pcrel, we can
// replace B to implement it: (A - R - K + C)
- if (IsPCRel)
- Asm.getContext().reportFatalError(
+ if (IsPCRel) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"No relocation available to represent this relative expression");
+ return;
+ }
const auto &SymB = cast<MCSymbolELF>(RefB->getSymbol());
- if (SymB.isUndefined())
- Asm.getContext().reportFatalError(
+ if (SymB.isUndefined()) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
Twine("symbol '") + SymB.getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
assert(!SymB.isAbsolute() && "Should have been folded");
const MCSection &SecB = SymB.getSection();
- if (&SecB != &FixupSection)
- Asm.getContext().reportFatalError(
+ if (&SecB != &FixupSection) {
+ Asm.getContext().reportError(
Fixup.getLoc(), "Cannot represent a difference across sections");
+ return;
+ }
- if (::isWeak(SymB))
- Asm.getContext().reportFatalError(
+ if (::isWeak(SymB)) {
+ Asm.getContext().reportError(
Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol");
+ return;
+ }
uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
uint64_t K = SymBOffset - FixupOffset;
@@ -764,7 +772,7 @@ void ELFObjectWriter::computeSymbolTable(
SymbolTableIndex = addToSectionTable(SymtabSection);
align(SymtabSection->getAlignment());
- uint64_t SecStart = OS.tell();
+ uint64_t SecStart = getStream().tell();
// The first entry is the undefined symbol entry.
Writer.writeSymbol(0, 0, 0, 0, 0, 0, false);
@@ -784,8 +792,10 @@ void ELFObjectWriter::computeSymbolTable(
Renames.count(&Symbol)))
continue;
- if (Symbol.isTemporary() && Symbol.isUndefined())
- Ctx.reportFatalError(SMLoc(), "Undefined temporary");
+ if (Symbol.isTemporary() && Symbol.isUndefined()) {
+ Ctx.reportError(SMLoc(), "Undefined temporary symbol");
+ continue;
+ }
ELFSymbolData MSD;
MSD.Symbol = cast<MCSymbolELF>(&Symbol);
@@ -850,13 +860,15 @@ void ELFObjectWriter::computeSymbolTable(
Buf += Name.substr(0, Pos);
unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
Buf += Name.substr(Pos + Skip);
- Name = Buf;
+ Name = VersionSymSaver.save(Buf.c_str());
}
}
// Sections have their own string table
- if (Symbol.getType() != ELF::STT_SECTION)
- MSD.Name = StrTabBuilder.add(Name);
+ if (Symbol.getType() != ELF::STT_SECTION) {
+ MSD.Name = Name;
+ StrTabBuilder.add(Name);
+ }
if (Local)
LocalSymbolData.push_back(MSD);
@@ -878,7 +890,7 @@ void ELFObjectWriter::computeSymbolTable(
for (const std::string &Name : FileNames)
StrTabBuilder.add(Name);
- StrTabBuilder.finalize(StringTableBuilder::ELF);
+ StrTabBuilder.finalize();
for (const std::string &Name : FileNames)
Writer.writeSymbol(StrTabBuilder.getOffset(Name),
@@ -911,7 +923,7 @@ void ELFObjectWriter::computeSymbolTable(
assert(MSD.Symbol->getBinding() != ELF::STB_LOCAL);
}
- uint64_t SecEnd = OS.tell();
+ uint64_t SecEnd = getStream().tell();
SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd);
ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes();
@@ -921,12 +933,12 @@ void ELFObjectWriter::computeSymbolTable(
}
assert(SymtabShndxSectionIndex != 0);
- SecStart = OS.tell();
+ SecStart = getStream().tell();
const MCSectionELF *SymtabShndxSection =
SectionTable[SymtabShndxSectionIndex - 1];
for (uint32_t Index : ShndxIndexes)
write(Index);
- SecEnd = OS.tell();
+ SecEnd = getStream().tell();
SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd);
}
@@ -957,31 +969,6 @@ ELFObjectWriter::createRelocationSection(MCContext &Ctx,
return RelaSection;
}
-static SmallVector<char, 128>
-getUncompressedData(const MCAsmLayout &Layout,
- const MCSection::FragmentListType &Fragments) {
- SmallVector<char, 128> UncompressedData;
- for (const MCFragment &F : Fragments) {
- const SmallVectorImpl<char> *Contents;
- switch (F.getKind()) {
- case MCFragment::FT_Data:
- Contents = &cast<MCDataFragment>(F).getContents();
- break;
- case MCFragment::FT_Dwarf:
- Contents = &cast<MCDwarfLineAddrFragment>(F).getContents();
- break;
- case MCFragment::FT_DwarfFrame:
- Contents = &cast<MCDwarfCallFrameFragment>(F).getContents();
- break;
- default:
- llvm_unreachable(
- "Not expecting any other fragment types in a debug_* section");
- }
- UncompressedData.append(Contents->begin(), Contents->end());
- }
- return UncompressedData;
-}
-
// Include the debug info compression header:
// "ZLIB" followed by 8 bytes representing the uncompressed size of the section,
// useful for consumers to preallocate a buffer to decompress into.
@@ -1016,27 +1003,29 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
return;
}
- // Gather the uncompressed data from all the fragments.
- const MCSection::FragmentListType &Fragments = Section.getFragmentList();
- SmallVector<char, 128> UncompressedData =
- getUncompressedData(Layout, Fragments);
+ SmallVector<char, 128> UncompressedData;
+ raw_svector_ostream VecOS(UncompressedData);
+ raw_pwrite_stream &OldStream = getStream();
+ setStream(VecOS);
+ Asm.writeSectionData(&Section, Layout);
+ setStream(OldStream);
SmallVector<char, 128> CompressedContents;
zlib::Status Success = zlib::compress(
StringRef(UncompressedData.data(), UncompressedData.size()),
CompressedContents);
if (Success != zlib::StatusOK) {
- Asm.writeSectionData(&Section, Layout);
+ getStream() << UncompressedData;
return;
}
if (!prependCompressionHeader(UncompressedData.size(), CompressedContents)) {
- Asm.writeSectionData(&Section, Layout);
+ getStream() << UncompressedData;
return;
}
Asm.getContext().renameELFSection(&Section,
(".z" + SectionName.drop_front(1)).str());
- OS << CompressedContents;
+ getStream() << CompressedContents;
}
void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
@@ -1061,8 +1050,13 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
const MCSectionELF &Sec) {
std::vector<ELFRelocationEntry> &Relocs = Relocations[&Sec];
- // Sort the relocation entries. Most targets just sort by Offset, but some
- // (e.g., MIPS) have additional constraints.
+ // We record relocations by pushing to the end of a vector. Reverse the vector
+ // to get the relocations in the order they were created.
+ // In most cases that is not important, but it can be for special sections
+ // (.eh_frame) or specific relocations (TLS optimizations on SystemZ).
+ std::reverse(Relocs.begin(), Relocs.end());
+
+ // Sort the relocation entries. MIPS needs this.
TargetObjectWriter->sortRelocs(Asm, Relocs);
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
@@ -1100,7 +1094,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) {
const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1];
- OS << StrTabBuilder.data();
+ getStream() << StrTabBuilder.data();
return StrtabSection;
}
@@ -1209,12 +1203,12 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
align(Section.getAlignment());
// Remember the offset into the file for this section.
- uint64_t SecStart = OS.tell();
+ uint64_t SecStart = getStream().tell();
const MCSymbolELF *SignatureSymbol = Section.getGroup();
writeSectionData(Asm, Section, Layout);
- uint64_t SecEnd = OS.tell();
+ uint64_t SecEnd = getStream().tell();
SectionOffsets[&Section] = std::make_pair(SecStart, SecEnd);
MCSectionELF *RelSection = createRelocationSection(Ctx, Section);
@@ -1246,7 +1240,7 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
align(Group->getAlignment());
// Remember the offset into the file for this section.
- uint64_t SecStart = OS.tell();
+ uint64_t SecStart = getStream().tell();
const MCSymbol *SignatureSymbol = Group->getGroup();
assert(SignatureSymbol);
@@ -1256,7 +1250,7 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
write(SecIndex);
}
- uint64_t SecEnd = OS.tell();
+ uint64_t SecEnd = getStream().tell();
SectionOffsets[Group] = std::make_pair(SecStart, SecEnd);
}
@@ -1267,25 +1261,25 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
align(RelSection->getAlignment());
// Remember the offset into the file for this section.
- uint64_t SecStart = OS.tell();
+ uint64_t SecStart = getStream().tell();
writeRelocations(Asm, *RelSection->getAssociatedSection());
- uint64_t SecEnd = OS.tell();
+ uint64_t SecEnd = getStream().tell();
SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd);
}
{
- uint64_t SecStart = OS.tell();
+ uint64_t SecStart = getStream().tell();
const MCSectionELF *Sec = createStringTable(Ctx);
- uint64_t SecEnd = OS.tell();
+ uint64_t SecEnd = getStream().tell();
SectionOffsets[Sec] = std::make_pair(SecStart, SecEnd);
}
uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
align(NaturalAlignment);
- const unsigned SectionHeaderOffset = OS.tell();
+ const unsigned SectionHeaderOffset = getStream().tell();
// ... then the section header table ...
writeSectionHeader(Layout, SectionIndexMap, SectionOffsets);
@@ -1301,19 +1295,19 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
uint64_t Val = SectionHeaderOffset;
if (sys::IsLittleEndianHost != IsLittleEndian)
sys::swapByteOrder(Val);
- OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
- offsetof(ELF::Elf64_Ehdr, e_shoff));
+ getStream().pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+ offsetof(ELF::Elf64_Ehdr, e_shoff));
NumSectionsOffset = offsetof(ELF::Elf64_Ehdr, e_shnum);
} else {
uint32_t Val = SectionHeaderOffset;
if (sys::IsLittleEndianHost != IsLittleEndian)
sys::swapByteOrder(Val);
- OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
- offsetof(ELF::Elf32_Ehdr, e_shoff));
+ getStream().pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+ offsetof(ELF::Elf32_Ehdr, e_shoff));
NumSectionsOffset = offsetof(ELF::Elf32_Ehdr, e_shnum);
}
- OS.pwrite(reinterpret_cast<char *>(&NumSections), sizeof(NumSections),
- NumSectionsOffset);
+ getStream().pwrite(reinterpret_cast<char *>(&NumSections),
+ sizeof(NumSections), NumSectionsOffset);
}
bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
diff --git a/contrib/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm/lib/MC/MCAsmBackend.cpp
index 36c65b7..fcf139b 100644
--- a/contrib/llvm/lib/MC/MCAsmBackend.cpp
+++ b/contrib/llvm/lib/MC/MCAsmBackend.cpp
@@ -16,6 +16,10 @@ MCAsmBackend::MCAsmBackend() : HasDataInCodeSupport(false) {}
MCAsmBackend::~MCAsmBackend() {}
+bool MCAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const {
+ return false;
+}
+
const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static const MCFixupKindInfo Builtins[] = {
{"FK_Data_1", 0, 8, 0},
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
index 100dc7c..36e10b3 100644
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -157,3 +157,9 @@ bool MCAsmInfo::isValidUnquotedName(StringRef Name) const {
return true;
}
+
+bool MCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ return SectionName == ".text" || SectionName == ".data" ||
+ (SectionName == ".bss" && !usesELFSectionDirectiveForBSS());
+}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
index 97fc76a..5b9dd20 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -37,8 +37,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
UseIntegratedAssembler = true;
- // FIXME: For now keep the previous behavior, AShr. Need to double-check
- // other COFF-targeting assemblers and change this if necessary.
+ // At least MSVC inline-asm does AShr.
UseLogicalShr = false;
}
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
index bb90ff2..ae9486d 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -93,9 +93,4 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
UseIntegratedAssembler = true;
SetDirectiveSuppressesReloc = true;
-
- // FIXME: For now keep the previous behavior, AShr, matching the previous
- // behavior of as(1) (both -q and -Q: resp. LLVM and gas v1.38).
- // If/when this changes, the AArch64 Darwin special case can go away.
- UseLogicalShr = false;
}
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index 227c937..c99ce77 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,9 +29,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include <cctype>
+
using namespace llvm;
namespace {
@@ -78,6 +80,9 @@ public:
}
EmitCommentsAndEOL();
}
+
+ void EmitSyntaxDirective() override;
+
void EmitCommentsAndEOL();
/// isVerboseAsm - Return true if this streamer supports verbose assembly at
@@ -160,7 +165,7 @@ public:
void EmitBytes(StringRef Data) override;
void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc = SMLoc()) override;
+ SMLoc Loc = SMLoc()) override;
void EmitIntValue(uint64_t Value, unsigned Size) override;
void EmitULEB128Value(const MCExpr *Value) override;
@@ -181,7 +186,7 @@ public:
void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0) override;
- bool EmitValueToOffset(const MCExpr *Offset,
+ void emitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0) override;
void EmitFileDirective(StringRef Filename) override;
@@ -207,6 +212,8 @@ public:
void EmitCFISameValue(int64_t Register) override;
void EmitCFIRelOffset(int64_t Register, int64_t Offset) override;
void EmitCFIAdjustCfaOffset(int64_t Adjustment) override;
+ void EmitCFIEscape(StringRef Values) override;
+ void EmitCFIGnuArgsSize(int64_t Size) override;
void EmitCFISignalFrame() override;
void EmitCFIUndefined(int64_t Register) override;
void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
@@ -233,6 +240,9 @@ public:
void EmitBundleLock(bool AlignToEnd) override;
void EmitBundleUnlock() override;
+ bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc Loc) override;
+
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
@@ -250,15 +260,9 @@ public:
void MCAsmStreamer::AddComment(const Twine &T) {
if (!IsVerboseAsm) return;
- // Make sure that CommentStream is flushed.
- CommentStream.flush();
-
T.toVector(CommentToEmit);
// Each comment goes on its own line.
CommentToEmit.push_back('\n');
-
- // Tell the comment stream that the vector changed underneath it.
- CommentStream.resync();
}
void MCAsmStreamer::EmitCommentsAndEOL() {
@@ -267,7 +271,6 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
return;
}
- CommentStream.flush();
StringRef Comments = CommentToEmit;
assert(Comments.back() == '\n' &&
@@ -282,8 +285,6 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
} while (!Comments.empty());
CommentToEmit.clear();
- // Tell the comment stream that the vector changed underneath it.
- CommentStream.resync();
}
static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
@@ -372,6 +373,8 @@ void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) {
void MCAsmStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major,
unsigned Minor, unsigned Update) {
switch (Kind) {
+ case MCVM_WatchOSVersionMin: OS << "\t.watchos_version_min"; break;
+ case MCVM_TvOSVersionMin: OS << "\t.tvos_version_min"; break;
case MCVM_IOSVersionMin: OS << "\t.ios_version_min"; break;
case MCVM_OSXVersionMin: OS << "\t.macosx_version_min"; break;
}
@@ -480,6 +483,14 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
EmitEOL();
}
+void MCAsmStreamer::EmitSyntaxDirective() {
+ if (MAI->getAssemblerDialect() == 1)
+ OS << "\t.intel_syntax noprefix\n";
+ // FIXME: Currently emit unprefix'ed registers.
+ // The intel_syntax directive has one optional argument
+ // with may have a value of prefix or noprefix.
+}
+
void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
OS << "\t.def\t ";
Symbol->print(OS, MAI);
@@ -531,9 +542,6 @@ void MCAsmStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {
void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
- // Common symbols do not belong to any actual section.
- AssignSection(Symbol, nullptr);
-
OS << "\t.comm\t";
Symbol->print(OS, MAI);
OS << ',' << Size;
@@ -553,9 +561,6 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
/// @param Size - The size of the common symbol.
void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlign) {
- // Common symbols do not belong to any actual section.
- AssignSection(Symbol, nullptr);
-
OS << "\t.lcomm\t";
Symbol->print(OS, MAI);
OS << ',' << Size;
@@ -579,7 +584,7 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
if (Symbol)
- AssignSection(Symbol, Section);
+ AssignFragment(Symbol, &Section->getDummyFragment());
// Note: a .zerofill directive does not switch sections.
OS << ".zerofill ";
@@ -603,7 +608,7 @@ void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
// e.g. _a.
void MCAsmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
- AssignSection(Symbol, Section);
+ AssignFragment(Symbol, &Section->getDummyFragment());
assert(Symbol && "Symbol shouldn't be NULL!");
// Instead of using the Section we'll just use the shortcut.
@@ -654,7 +659,6 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
OS << '"';
}
-
void MCAsmStreamer::EmitBytes(StringRef Data) {
assert(getCurrentSection().first &&
"Cannot emit contents before setting section!");
@@ -685,7 +689,7 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
}
void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+ SMLoc Loc) {
assert(Size <= 8 && "Invalid size");
assert(getCurrentSection().first &&
"Cannot emit contents before setting section!");
@@ -776,7 +780,6 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
EmitEOL();
}
-
/// EmitFill - Emit NumBytes bytes worth of the value specified by
/// FillValue. This implements directives such as '.space'.
void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
@@ -856,17 +859,15 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
1, MaxBytesToEmit);
}
-bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+void MCAsmStreamer::emitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
// FIXME: Verify that Offset is associated with the current section.
OS << ".org ";
Offset->print(OS, MAI);
OS << ", " << (unsigned)Value;
EmitEOL();
- return false;
}
-
void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
assert(MAI->hasSingleParameterDotFile());
OS << "\t.file\t";
@@ -1014,6 +1015,32 @@ void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
EmitEOL();
}
+static void PrintCFIEscape(llvm::formatted_raw_ostream &OS, StringRef Values) {
+ OS << "\t.cfi_escape ";
+ if (!Values.empty()) {
+ size_t e = Values.size() - 1;
+ for (size_t i = 0; i < e; ++i)
+ OS << format("0x%02x", uint8_t(Values[i])) << ", ";
+ OS << format("0x%02x", uint8_t(Values[e]));
+ }
+}
+
+void MCAsmStreamer::EmitCFIEscape(StringRef Values) {
+ MCStreamer::EmitCFIEscape(Values);
+ PrintCFIEscape(OS, Values);
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIGnuArgsSize(int64_t Size) {
+ MCStreamer::EmitCFIGnuArgsSize(Size);
+
+ uint8_t Buffer[16] = { dwarf::DW_CFA_GNU_args_size };
+ unsigned Len = encodeULEB128(Size, Buffer + 1) + 1;
+
+ PrintCFIEscape(OS, StringRef((const char *)&Buffer[0], Len));
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
MCStreamer::EmitCFIDefCfaRegister(Register);
OS << "\t.cfi_def_cfa_register ";
@@ -1203,7 +1230,7 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) {
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIEndProlog(void) {
+void MCAsmStreamer::EmitWinCFIEndProlog() {
MCStreamer::EmitWinCFIEndProlog();
OS << "\t.seh_endprologue";
@@ -1217,7 +1244,6 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
Emitter->encodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
// If we are showing fixups, create symbolic markers in the encoded
// representation. We do this by making a per-bit map to the fixup item index,
@@ -1334,6 +1360,19 @@ void MCAsmStreamer::EmitBundleUnlock() {
EmitEOL();
}
+bool MCAsmStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc) {
+ OS << "\t.reloc ";
+ Offset.print(OS, MAI);
+ OS << ", " << Name;
+ if (Expr) {
+ OS << ", ";
+ Expr->print(OS, MAI);
+ }
+ EmitEOL();
+ return false;
+}
+
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index f53b589..15e82fa 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -64,272 +64,11 @@ STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
/* *** */
-MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
- : Assembler(Asm), LastValidFragment()
- {
- // Compute the section layout order. Virtual sections must go last.
- for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
- if (!it->isVirtualSection())
- SectionOrder.push_back(&*it);
- for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
- if (it->isVirtualSection())
- SectionOrder.push_back(&*it);
-}
-
-bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
- const MCSection *Sec = F->getParent();
- const MCFragment *LastValid = LastValidFragment.lookup(Sec);
- if (!LastValid)
- return false;
- assert(LastValid->getParent() == Sec);
- return F->getLayoutOrder() <= LastValid->getLayoutOrder();
-}
-
-void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
- // If this fragment wasn't already valid, we don't need to do anything.
- if (!isFragmentValid(F))
- return;
-
- // Otherwise, reset the last valid fragment to the previous fragment
- // (if this is the first fragment, it will be NULL).
- LastValidFragment[F->getParent()] = F->getPrevNode();
-}
-
-void MCAsmLayout::ensureValid(const MCFragment *F) const {
- MCSection *Sec = F->getParent();
- MCFragment *Cur = LastValidFragment[Sec];
- if (!Cur)
- Cur = Sec->begin();
- else
- Cur = Cur->getNextNode();
-
- // Advance the layout position until the fragment is valid.
- while (!isFragmentValid(F)) {
- assert(Cur && "Layout bookkeeping error");
- const_cast<MCAsmLayout*>(this)->layoutFragment(Cur);
- Cur = Cur->getNextNode();
- }
-}
-
-uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
- ensureValid(F);
- assert(F->Offset != ~UINT64_C(0) && "Address not set!");
- return F->Offset;
-}
-
-// Simple getSymbolOffset helper for the non-varibale case.
-static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S,
- bool ReportError, uint64_t &Val) {
- if (!S.getFragment()) {
- if (ReportError)
- report_fatal_error("unable to evaluate offset to undefined symbol '" +
- S.getName() + "'");
- return false;
- }
- Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset();
- return true;
-}
-
-static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
- bool ReportError, uint64_t &Val) {
- if (!S.isVariable())
- return getLabelOffset(Layout, S, ReportError, Val);
-
- // If SD is a variable, evaluate it.
- MCValue Target;
- if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
- report_fatal_error("unable to evaluate offset for variable '" +
- S.getName() + "'");
-
- uint64_t Offset = Target.getConstant();
-
- const MCSymbolRefExpr *A = Target.getSymA();
- if (A) {
- uint64_t ValA;
- if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
- return false;
- Offset += ValA;
- }
-
- const MCSymbolRefExpr *B = Target.getSymB();
- if (B) {
- uint64_t ValB;
- if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
- return false;
- Offset -= ValB;
- }
-
- Val = Offset;
- return true;
-}
-
-bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const {
- return getSymbolOffsetImpl(*this, S, false, Val);
-}
-
-uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const {
- uint64_t Val;
- getSymbolOffsetImpl(*this, S, true, Val);
- return Val;
-}
-
-const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
- if (!Symbol.isVariable())
- return &Symbol;
-
- const MCExpr *Expr = Symbol.getVariableValue();
- MCValue Value;
- if (!Expr->evaluateAsValue(Value, *this))
- llvm_unreachable("Invalid Expression");
-
- const MCSymbolRefExpr *RefB = Value.getSymB();
- if (RefB)
- Assembler.getContext().reportFatalError(
- SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
- "' could not be evaluated in a subtraction expression");
-
- const MCSymbolRefExpr *A = Value.getSymA();
- if (!A)
- return nullptr;
-
- const MCSymbol &ASym = A->getSymbol();
- const MCAssembler &Asm = getAssembler();
- if (ASym.isCommon()) {
- // FIXME: we should probably add a SMLoc to MCExpr.
- Asm.getContext().reportFatalError(SMLoc(),
- "Common symbol " + ASym.getName() +
- " cannot be used in assignment expr");
- }
-
- return &ASym;
-}
-
-uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const {
- // The size is the last fragment's end offset.
- const MCFragment &F = Sec->getFragmentList().back();
- return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
-}
-
-uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const {
- // Virtual sections have no file size.
- if (Sec->isVirtualSection())
- return 0;
-
- // Otherwise, the file size is the same as the address space size.
- return getSectionAddressSize(Sec);
-}
-
-uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
- const MCFragment *F,
- uint64_t FOffset, uint64_t FSize) {
- uint64_t BundleSize = Assembler.getBundleAlignSize();
- assert(BundleSize > 0 &&
- "computeBundlePadding should only be called if bundling is enabled");
- uint64_t BundleMask = BundleSize - 1;
- uint64_t OffsetInBundle = FOffset & BundleMask;
- uint64_t EndOfFragment = OffsetInBundle + FSize;
-
- // There are two kinds of bundling restrictions:
- //
- // 1) For alignToBundleEnd(), add padding to ensure that the fragment will
- // *end* on a bundle boundary.
- // 2) Otherwise, check if the fragment would cross a bundle boundary. If it
- // would, add padding until the end of the bundle so that the fragment
- // will start in a new one.
- if (F->alignToBundleEnd()) {
- // Three possibilities here:
- //
- // A) The fragment just happens to end at a bundle boundary, so we're good.
- // B) The fragment ends before the current bundle boundary: pad it just
- // enough to reach the boundary.
- // C) The fragment ends after the current bundle boundary: pad it until it
- // reaches the end of the next bundle boundary.
- //
- // Note: this code could be made shorter with some modulo trickery, but it's
- // intentionally kept in its more explicit form for simplicity.
- if (EndOfFragment == BundleSize)
- return 0;
- else if (EndOfFragment < BundleSize)
- return BundleSize - EndOfFragment;
- else { // EndOfFragment > BundleSize
- return 2 * BundleSize - EndOfFragment;
- }
- } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize)
- return BundleSize - OffsetInBundle;
- else
- return 0;
-}
-
-/* *** */
-
-void ilist_node_traits<MCFragment>::deleteNode(MCFragment *V) {
- V->destroy();
-}
-
-MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false),
- AlignToBundleEnd(false), BundlePadding(0) {
-}
-
-MCFragment::~MCFragment() { }
-
-MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
- uint8_t BundlePadding, MCSection *Parent)
- : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false),
- BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr),
- Offset(~UINT64_C(0)) {
- if (Parent)
- Parent->getFragmentList().push_back(this);
-}
-
-void MCFragment::destroy() {
- // First check if we are the sentinal.
- if (Kind == FragmentType(~0)) {
- delete this;
- return;
- }
-
- switch (Kind) {
- case FT_Align:
- delete cast<MCAlignFragment>(this);
- return;
- case FT_Data:
- delete cast<MCDataFragment>(this);
- return;
- case FT_CompactEncodedInst:
- delete cast<MCCompactEncodedInstFragment>(this);
- return;
- case FT_Fill:
- delete cast<MCFillFragment>(this);
- return;
- case FT_Relaxable:
- delete cast<MCRelaxableFragment>(this);
- return;
- case FT_Org:
- delete cast<MCOrgFragment>(this);
- return;
- case FT_Dwarf:
- delete cast<MCDwarfLineAddrFragment>(this);
- return;
- case FT_DwarfFrame:
- delete cast<MCDwarfCallFrameFragment>(this);
- return;
- case FT_LEB:
- delete cast<MCLEBFragment>(this);
- return;
- case FT_SafeSEH:
- delete cast<MCSafeSEHFragment>(this);
- return;
- }
-}
-
-/* *** */
-
MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
- MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
- raw_ostream &OS_)
+ MCCodeEmitter &Emitter_, MCObjectWriter &Writer_)
: Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
- OS(OS_), BundleAlignSize(0), RelaxAll(false),
- SubsectionsViaSymbols(false), ELFHeaderEFlags(0) {
+ BundleAlignSize(0), RelaxAll(false), SubsectionsViaSymbols(false),
+ IncrementalLinkerCompatible(false), ELFHeaderEFlags(0) {
VersionMinInfo.Major = 0; // Major version == 0 for "none specified"
}
@@ -347,6 +86,7 @@ void MCAssembler::reset() {
BundleAlignSize = 0;
RelaxAll = false;
SubsectionsViaSymbols = false;
+ IncrementalLinkerCompatible = false;
ELFHeaderEFlags = 0;
LOHContainer.reset();
VersionMinInfo.Major = 0;
@@ -358,6 +98,14 @@ void MCAssembler::reset() {
getLOHContainer().reset();
}
+bool MCAssembler::registerSection(MCSection &Section) {
+ if (Section.isRegistered())
+ return false;
+ Sections.push_back(&Section);
+ Section.setIsRegistered(true);
+ return true;
+}
+
bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
if (ThumbFuncs.count(Symbol))
return true;
@@ -404,7 +152,7 @@ const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const {
return &S;
// Absolute and undefined symbols have no defining atom.
- if (!S.getFragment())
+ if (!S.isInSection())
return nullptr;
// Non-linker visible symbols in sections which can't be atomized have no
@@ -426,8 +174,13 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
// probably merge the two into a single callback that tries to evaluate a
// fixup and records a relocation if one is needed.
const MCExpr *Expr = Fixup.getValue();
- if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup))
- getContext().reportFatalError(Fixup.getLoc(), "expected relocatable expression");
+ if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) {
+ getContext().reportError(Fixup.getLoc(), "expected relocatable expression");
+ // Claim to have completely evaluated the fixup, to prevent any further
+ // processing from being done.
+ Value = 0;
+ return true;
+ }
bool IsPCRel = Backend.getFixupKindInfo(
Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
@@ -523,12 +276,19 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_Org: {
const MCOrgFragment &OF = cast<MCOrgFragment>(F);
- int64_t TargetLocation;
- if (!OF.getOffset().evaluateAsAbsolute(TargetLocation, Layout))
+ MCValue Value;
+ if (!OF.getOffset().evaluateAsValue(Value, Layout))
report_fatal_error("expected assembly-time absolute expression");
// FIXME: We need a way to communicate this error.
uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+ int64_t TargetLocation = Value.getConstant();
+ if (const MCSymbolRefExpr *A = Value.getSymA()) {
+ uint64_t Val;
+ if (!Layout.getSymbolOffset(A->getSymbol(), Val))
+ report_fatal_error("expected absolute expression");
+ TargetLocation += Val;
+ }
int64_t Size = TargetLocation - FragmentOffset;
if (Size < 0 || Size >= 0x40000000)
report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
@@ -540,6 +300,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
return cast<MCDwarfLineAddrFragment>(F).getContents().size();
case MCFragment::FT_DwarfFrame:
return cast<MCDwarfCallFrameFragment>(F).getContents().size();
+ case MCFragment::FT_Dummy:
+ llvm_unreachable("Should not have been added");
}
llvm_unreachable("invalid fragment kind");
@@ -773,6 +535,8 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
OW->writeBytes(CF.getContents());
break;
}
+ case MCFragment::FT_Dummy:
+ llvm_unreachable("Should not have been added");
}
assert(OW->getStream().tell() - Start == FragmentSize &&
@@ -786,15 +550,14 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
assert(Layout.getSectionFileSize(Sec) == 0 && "Invalid size for section!");
// Check that contents are only things legal inside a virtual section.
- for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie;
- ++it) {
- switch (it->getKind()) {
+ for (const MCFragment &F : *Sec) {
+ switch (F.getKind()) {
default: llvm_unreachable("Invalid fragment in virtual section!");
case MCFragment::FT_Data: {
// Check that we aren't trying to write a non-zero contents (or fixups)
// into a virtual section. This is to support clients which use standard
// directives to fill the contents of virtual sections.
- const MCDataFragment &DF = cast<MCDataFragment>(*it);
+ const MCDataFragment &DF = cast<MCDataFragment>(F);
assert(DF.fixup_begin() == DF.fixup_end() &&
"Cannot have fixups in virtual section!");
for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
@@ -810,13 +573,13 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
case MCFragment::FT_Align:
// Check that we aren't trying to write a non-zero value into a virtual
// section.
- assert((cast<MCAlignFragment>(it)->getValueSize() == 0 ||
- cast<MCAlignFragment>(it)->getValue() == 0) &&
+ assert((cast<MCAlignFragment>(F).getValueSize() == 0 ||
+ cast<MCAlignFragment>(F).getValue() == 0) &&
"Invalid align in virtual section!");
break;
case MCFragment::FT_Fill:
- assert((cast<MCFillFragment>(it)->getValueSize() == 0 ||
- cast<MCFillFragment>(it)->getValue() == 0) &&
+ assert((cast<MCFillFragment>(F).getValueSize() == 0 ||
+ cast<MCFillFragment>(F).getValue() == 0) &&
"Invalid fill in virtual section!");
break;
}
@@ -828,9 +591,8 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
uint64_t Start = getWriter().getStream().tell();
(void)Start;
- for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie;
- ++it)
- writeFragment(*this, Layout, *it);
+ for (const MCFragment &F : *Sec)
+ writeFragment(*this, Layout, F);
assert(getWriter().getStream().tell() - Start ==
Layout.getSectionAddressSize(Sec));
@@ -854,23 +616,20 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
return std::make_pair(FixedValue, IsPCRel);
}
-void MCAssembler::Finish() {
+void MCAssembler::layout(MCAsmLayout &Layout) {
DEBUG_WITH_TYPE("mc-dump", {
llvm::errs() << "assembler backend - pre-layout\n--\n";
dump(); });
- // Create the layout object.
- MCAsmLayout Layout(*this);
-
// Create dummy fragments and assign section ordinals.
unsigned SectionIndex = 0;
- for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ for (MCSection &Sec : *this) {
// Create dummy fragments to eliminate any empty sections, this simplifies
// layout.
- if (it->getFragmentList().empty())
- new MCDataFragment(&*it);
+ if (Sec.getFragmentList().empty())
+ new MCDataFragment(&Sec);
- it->setOrdinal(SectionIndex++);
+ Sec.setOrdinal(SectionIndex++);
}
// Assign layout order indices to sections and fragments.
@@ -879,9 +638,8 @@ void MCAssembler::Finish() {
Sec->setLayoutOrder(i);
unsigned FragmentIndex = 0;
- for (MCSection::iterator iFrag = Sec->begin(), iFragEnd = Sec->end();
- iFrag != iFragEnd; ++iFrag)
- iFrag->setLayoutOrder(FragmentIndex++);
+ for (MCFragment &Frag : *Sec)
+ Frag.setLayoutOrder(FragmentIndex++);
}
// Layout until everything fits.
@@ -899,17 +657,14 @@ void MCAssembler::Finish() {
llvm::errs() << "assembler backend - final-layout\n--\n";
dump(); });
- uint64_t StartOffset = OS.tell();
-
// Allow the object writer a chance to perform post-layout binding (for
// example, to set the index fields in the symbol data).
getWriter().executePostLayoutBinding(*this, Layout);
// Evaluate and apply the fixups, generating relocation entries as necessary.
- for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
- for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
- ++it2) {
- MCEncodedFragment *F = dyn_cast<MCEncodedFragment>(it2);
+ for (MCSection &Sec : *this) {
+ for (MCFragment &Frag : Sec) {
+ MCEncodedFragment *F = dyn_cast<MCEncodedFragment>(&Frag);
// Data and relaxable fragments both have fixups. So only process
// those here.
// FIXME: Is there a better way to do this? MCEncodedFragmentWithFixups
@@ -935,6 +690,15 @@ void MCAssembler::Finish() {
}
}
}
+}
+
+void MCAssembler::Finish() {
+ // Create the layout object.
+ MCAsmLayout Layout(*this);
+ layout(Layout);
+
+ raw_ostream &OS = getWriter().getStream();
+ uint64_t StartOffset = OS.tell();
// Write the object file.
getWriter().writeObject(*this, Layout);
@@ -960,9 +724,8 @@ bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F,
if (!getBackend().mayNeedRelaxation(F->getInst()))
return false;
- for (MCRelaxableFragment::const_fixup_iterator it = F->fixup_begin(),
- ie = F->fixup_end(); it != ie; ++it)
- if (fixupNeedsRelaxation(*it, F, Layout))
+ for (const MCFixup &Fixup : F->getFixups())
+ if (fixupNeedsRelaxation(Fixup, F, Layout))
return true;
return false;
@@ -991,7 +754,6 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
getEmitter().encodeInstruction(Relaxed, VecOS, Fixups, F.getSubtargetInfo());
- VecOS.flush();
// Update the fragment.
F.setInst(Relaxed);
@@ -1014,7 +776,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
encodeSLEB128(Value, OSE);
else
encodeULEB128(Value, OSE);
- OSE.flush();
return OldSize != LF.getContents().size();
}
@@ -1031,8 +792,8 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
SmallString<8> &Data = DF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
- MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OSE);
- OSE.flush();
+ MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
+ AddrDelta, OSE);
return OldSize != Data.size();
}
@@ -1048,7 +809,6 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
Data.clear();
raw_svector_ostream OSE(Data);
MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE);
- OSE.flush();
return OldSize != Data.size();
}
@@ -1085,7 +845,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) {
break;
}
if (RelaxedFrag && !FirstRelaxedFragment)
- FirstRelaxedFragment = I;
+ FirstRelaxedFragment = &*I;
}
if (FirstRelaxedFragment) {
Layout.invalidateFragmentsFrom(FirstRelaxedFragment);
@@ -1113,158 +873,3 @@ void MCAssembler::finishLayout(MCAsmLayout &Layout) {
Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
}
}
-
-// Debugging methods
-
-namespace llvm {
-
-raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
- OS << "<MCFixup" << " Offset:" << AF.getOffset()
- << " Value:" << *AF.getValue()
- << " Kind:" << AF.getKind() << ">";
- return OS;
-}
-
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MCFragment::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<";
- switch (getKind()) {
- case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
- case MCFragment::FT_Data: OS << "MCDataFragment"; break;
- case MCFragment::FT_CompactEncodedInst:
- OS << "MCCompactEncodedInstFragment"; break;
- case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
- case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break;
- case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
- case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
- case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
- case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
- case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break;
- }
-
- OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
- << " Offset:" << Offset
- << " HasInstructions:" << hasInstructions()
- << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
-
- switch (getKind()) {
- case MCFragment::FT_Align: {
- const MCAlignFragment *AF = cast<MCAlignFragment>(this);
- if (AF->hasEmitNops())
- OS << " (emit nops)";
- OS << "\n ";
- OS << " Alignment:" << AF->getAlignment()
- << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
- << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
- break;
- }
- case MCFragment::FT_Data: {
- const MCDataFragment *DF = cast<MCDataFragment>(this);
- OS << "\n ";
- OS << " Contents:[";
- const SmallVectorImpl<char> &Contents = DF->getContents();
- for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
- if (i) OS << ",";
- OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
- }
- OS << "] (" << Contents.size() << " bytes)";
-
- if (DF->fixup_begin() != DF->fixup_end()) {
- OS << ",\n ";
- OS << " Fixups:[";
- for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
- ie = DF->fixup_end(); it != ie; ++it) {
- if (it != DF->fixup_begin()) OS << ",\n ";
- OS << *it;
- }
- OS << "]";
- }
- break;
- }
- case MCFragment::FT_CompactEncodedInst: {
- const MCCompactEncodedInstFragment *CEIF =
- cast<MCCompactEncodedInstFragment>(this);
- OS << "\n ";
- OS << " Contents:[";
- const SmallVectorImpl<char> &Contents = CEIF->getContents();
- for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
- if (i) OS << ",";
- OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
- }
- OS << "] (" << Contents.size() << " bytes)";
- break;
- }
- case MCFragment::FT_Fill: {
- const MCFillFragment *FF = cast<MCFillFragment>(this);
- OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
- << " Size:" << FF->getSize();
- break;
- }
- case MCFragment::FT_Relaxable: {
- const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
- OS << "\n ";
- OS << " Inst:";
- F->getInst().dump_pretty(OS);
- break;
- }
- case MCFragment::FT_Org: {
- const MCOrgFragment *OF = cast<MCOrgFragment>(this);
- OS << "\n ";
- OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
- break;
- }
- case MCFragment::FT_Dwarf: {
- const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
- OS << "\n ";
- OS << " AddrDelta:" << OF->getAddrDelta()
- << " LineDelta:" << OF->getLineDelta();
- break;
- }
- case MCFragment::FT_DwarfFrame: {
- const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
- OS << "\n ";
- OS << " AddrDelta:" << CF->getAddrDelta();
- break;
- }
- case MCFragment::FT_LEB: {
- const MCLEBFragment *LF = cast<MCLEBFragment>(this);
- OS << "\n ";
- OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
- break;
- }
- case MCFragment::FT_SafeSEH: {
- const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this);
- OS << "\n ";
- OS << " Sym:" << F->getSymbol();
- break;
- }
- }
- OS << ">";
-}
-
-void MCAssembler::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<MCAssembler\n";
- OS << " Sections:[\n ";
- for (iterator it = begin(), ie = end(); it != ie; ++it) {
- if (it != begin()) OS << ",\n ";
- it->dump();
- }
- OS << "],\n";
- OS << " Symbols:[";
-
- for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
- if (it != symbol_begin()) OS << ",\n ";
- OS << "(";
- it->dump();
- OS << ", Index:" << it->getIndex() << ", ";
- OS << ")";
- }
- OS << "]>\n";
-}
-#endif
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index a85796c..b5ad518 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolMachO.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -41,7 +42,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4),
AllowTemporaryLabels(true), DwarfCompileUnitID(0),
- AutoReset(DoAutoReset) {
+ AutoReset(DoAutoReset), HadError(false) {
std::error_code EC = llvm::sys::fs::current_path(CompilationDir);
if (EC)
@@ -62,9 +63,6 @@ MCContext::~MCContext() {
// NOTE: The symbols are all allocated out of a bump pointer allocator,
// we don't need to free them here.
-
- // If the stream for the .secure_log_unique directive was created free it.
- delete (raw_ostream *)SecureLog;
}
//===----------------------------------------------------------------------===//
@@ -73,13 +71,11 @@ MCContext::~MCContext() {
void MCContext::reset() {
// Call the destructors so the fragments are freed
- for (auto &I : ELFUniquingMap)
- I.second->~MCSectionELF();
- for (auto &I : COFFUniquingMap)
- I.second->~MCSectionCOFF();
- for (auto &I : MachOUniquingMap)
- I.second->~MCSectionMachO();
+ COFFAllocator.DestroyAll();
+ ELFAllocator.DestroyAll();
+ MachOAllocator.DestroyAll();
+ MCSubtargetAllocator.DestroyAll();
UsedNames.clear();
Symbols.clear();
SectionSymbols.clear();
@@ -103,6 +99,8 @@ void MCContext::reset() {
DwarfLocSeen = false;
GenDwarfForAssembly = false;
GenDwarfFileNumber = 0;
+
+ HadError = false;
}
//===----------------------------------------------------------------------===//
@@ -294,8 +292,8 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section,
Begin = createTempSymbol(BeginSymName, false);
// Otherwise, return a new section.
- return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
- Reserved2, Kind, Begin);
+ return Entry = new (MachOAllocator.Allocate()) MCSectionMachO(
+ Segment, Section, TypeAndAttributes, Reserved2, Kind, Begin);
}
void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) {
@@ -322,7 +320,7 @@ MCSectionELF *MCContext::createELFRelSection(StringRef Name, unsigned Type,
bool Inserted;
std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true));
- return new (*this)
+ return new (ELFAllocator.Allocate())
MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
EntrySize, Group, true, nullptr, Associated);
}
@@ -367,15 +365,15 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
if (BeginSymName)
Begin = createTempSymbol(BeginSymName, false);
- MCSectionELF *Result =
- new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize,
- GroupSym, UniqueID, Begin, Associated);
+ MCSectionELF *Result = new (ELFAllocator.Allocate())
+ MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym, UniqueID,
+ Begin, Associated);
Entry.second = Result;
return Result;
}
MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) {
- MCSectionELF *Result = new (*this)
+ MCSectionELF *Result = new (ELFAllocator.Allocate())
MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
Group, ~0, nullptr, nullptr);
return Result;
@@ -404,7 +402,7 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
Begin = createTempSymbol(BeginSymName, false);
StringRef CachedName = Iter->first.SectionName;
- MCSectionCOFF *Result = new (*this) MCSectionCOFF(
+ MCSectionCOFF *Result = new (COFFAllocator.Allocate()) MCSectionCOFF(
CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin);
Iter->second = Result;
@@ -441,6 +439,10 @@ MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
}
+MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
+ return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI);
+}
+
//===----------------------------------------------------------------------===//
// Dwarf Management
//===----------------------------------------------------------------------===//
@@ -472,14 +474,24 @@ void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
[&](MCSection *Sec) { return !MCOS.mayHaveInstructions(*Sec); });
}
-void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) const {
- // If we have a source manager and a location, use it. Otherwise just
- // use the generic report_fatal_error().
- if (!SrcMgr || Loc == SMLoc())
+//===----------------------------------------------------------------------===//
+// Error Reporting
+//===----------------------------------------------------------------------===//
+
+void MCContext::reportError(SMLoc Loc, const Twine &Msg) {
+ HadError = true;
+
+ // If we have a source manager use it. Otherwise just use the generic
+ // report_fatal_error().
+ if (!SrcMgr)
report_fatal_error(Msg, false);
// Use the source manager to print the message.
SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+}
+
+void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) {
+ reportError(Loc, Msg);
// If we reached here, we are failing ungracefully. Run the interrupt handlers
// to make sure any special cleanups get done, in particular that we remove
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 716d76a..82063fb 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -125,7 +125,6 @@ void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
static void emitComments(LLVMDisasmContext *DC,
formatted_raw_ostream &FormattedOS) {
// Flush the stream before taking its content.
- DC->CommentStream.flush();
StringRef Comments = DC->CommentsToEmit.str();
// Get the default information for printing a comment.
const MCAsmInfo *MAI = DC->getAsmInfo();
@@ -147,7 +146,6 @@ static void emitComments(LLVMDisasmContext *DC,
// Tell the comment stream that the vector changed underneath it.
DC->CommentsToEmit.clear();
- DC->CommentStream.resync();
}
/// \brief Gets latency information for \p Inst from the itinerary
@@ -261,7 +259,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
return 0;
case MCDisassembler::Success: {
- Annotations.flush();
StringRef AnnotationsStr = Annotations.str();
SmallVector<char, 64> InsnStr;
@@ -273,7 +270,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
emitLatency(DC, Inst);
emitComments(DC, FormattedOS);
- OS.flush();
assert(OutStringSize != 0 && "Output buffer cannot be zero size");
size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index c84c486..a99ac4e 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -27,26 +27,8 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-// Given a special op, return the address skip amount (in units of
-// DWARF2_LINE_MIN_INSN_LENGTH.
-#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
-
-// The maximum address skip amount that can be encoded with a special op.
-#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255)
-
-// First special line opcode - leave room for the standard opcodes.
-// Note: If you want to change this, you'll have to update the
-// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().
-#define DWARF2_LINE_OPCODE_BASE 13
-
-// Minimum line offset in a special line info. opcode. This value
-// was chosen to give a reasonable range of values.
-#define DWARF2_LINE_BASE -5
-// Range of line offsets in a special line info. opcode.
-#define DWARF2_LINE_RANGE 14
+using namespace llvm;
static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment();
@@ -197,7 +179,8 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section,
//
// This emits the Dwarf file and the line tables.
//
-void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) {
+void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS,
+ MCDwarfLineTableParams Params) {
MCContext &context = MCOS->getContext();
auto &LineTables = context.getMCDwarfLineTables();
@@ -212,14 +195,17 @@ void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) {
// Handle the rest of the Compile Units.
for (const auto &CUIDTablePair : LineTables)
- CUIDTablePair.second.EmitCU(MCOS);
+ CUIDTablePair.second.EmitCU(MCOS, Params);
}
-void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS) const {
- MCOS.EmitLabel(Header.Emit(&MCOS, None).second);
+void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS,
+ MCDwarfLineTableParams Params) const {
+ MCOS.EmitLabel(Header.Emit(&MCOS, Params, None).second);
}
-std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS) const {
+std::pair<MCSymbol *, MCSymbol *>
+MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
+ MCDwarfLineTableParams Params) const {
static const char StandardOpcodeLengths[] = {
0, // length of DW_LNS_copy
1, // length of DW_LNS_advance_pc
@@ -234,9 +220,10 @@ std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS)
0, // length of DW_LNS_set_epilogue_begin
1 // DW_LNS_set_isa
};
- assert(array_lengthof(StandardOpcodeLengths) ==
- (DWARF2_LINE_OPCODE_BASE - 1));
- return Emit(MCOS, StandardOpcodeLengths);
+ assert(array_lengthof(StandardOpcodeLengths) >=
+ (Params.DWARF2LineOpcodeBase - 1U));
+ return Emit(MCOS, Params, makeArrayRef(StandardOpcodeLengths,
+ Params.DWARF2LineOpcodeBase - 1));
}
static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) {
@@ -256,9 +243,8 @@ static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) {
}
std::pair<MCSymbol *, MCSymbol *>
-MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
+MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
ArrayRef<char> StandardOpcodeLengths) const {
-
MCContext &context = MCOS->getContext();
// Create a symbol at the beginning of the line table.
@@ -293,8 +279,8 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
// Parameters of the state machine, are next.
MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1);
MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
- MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
- MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+ MCOS->EmitIntValue(Params.DWARF2LineBase, 1);
+ MCOS->EmitIntValue(Params.DWARF2LineRange, 1);
MCOS->EmitIntValue(StandardOpcodeLengths.size() + 1, 1);
// Standard opcode lengths
@@ -329,8 +315,9 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
return std::make_pair(LineStartSym, LineEndSym);
}
-void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS) const {
- MCSymbol *LineEndSym = Header.Emit(MCOS).second;
+void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS,
+ MCDwarfLineTableParams Params) const {
+ MCSymbol *LineEndSym = Header.Emit(MCOS, Params).second;
// Put out the line tables.
for (const auto &LineSec : MCLineSections.getMCLineEntries())
@@ -416,21 +403,31 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory,
}
/// Utility function to emit the encoding to a streamer.
-void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
- uint64_t AddrDelta) {
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta) {
MCContext &Context = MCOS->getContext();
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
- MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OS);
+ MCDwarfLineAddr::Encode(Context, Params, LineDelta, AddrDelta, OS);
MCOS->EmitBytes(OS.str());
}
+/// Given a special op, return the address skip amount (in units of
+/// DWARF2_LINE_MIN_INSN_LENGTH).
+static uint64_t SpecialAddr(MCDwarfLineTableParams Params, uint64_t op) {
+ return (op - Params.DWARF2LineOpcodeBase) / Params.DWARF2LineRange;
+}
+
/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
-void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
- uint64_t AddrDelta, raw_ostream &OS) {
+void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS) {
uint64_t Temp, Opcode;
bool NeedCopy = false;
+ // The maximum address skip amount that can be encoded with a special op.
+ uint64_t MaxSpecialAddrDelta = SpecialAddr(Params, 255);
+
// Scale the address delta by the minimum instruction length.
AddrDelta = ScaleAddrDelta(Context, AddrDelta);
@@ -438,7 +435,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
// DW_LNE_end_sequence. We cannot use special opcodes here, since we want the
// end_sequence to emit the matrix entry.
if (LineDelta == INT64_MAX) {
- if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+ if (AddrDelta == MaxSpecialAddrDelta)
OS << char(dwarf::DW_LNS_const_add_pc);
else if (AddrDelta) {
OS << char(dwarf::DW_LNS_advance_pc);
@@ -451,16 +448,16 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
}
// Bias the line delta by the base.
- Temp = LineDelta - DWARF2_LINE_BASE;
+ Temp = LineDelta - Params.DWARF2LineBase;
// If the line increment is out of range of a special opcode, we must encode
// it with DW_LNS_advance_line.
- if (Temp >= DWARF2_LINE_RANGE) {
+ if (Temp >= Params.DWARF2LineRange) {
OS << char(dwarf::DW_LNS_advance_line);
encodeSLEB128(LineDelta, OS);
LineDelta = 0;
- Temp = 0 - DWARF2_LINE_BASE;
+ Temp = 0 - Params.DWARF2LineBase;
NeedCopy = true;
}
@@ -471,19 +468,19 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
}
// Bias the opcode by the special opcode base.
- Temp += DWARF2_LINE_OPCODE_BASE;
+ Temp += Params.DWARF2LineOpcodeBase;
// Avoid overflow when addr_delta is large.
- if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+ if (AddrDelta < 256 + MaxSpecialAddrDelta) {
// Try using a special opcode.
- Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+ Opcode = Temp + AddrDelta * Params.DWARF2LineRange;
if (Opcode <= 255) {
OS << char(Opcode);
return;
}
// Try using DW_LNS_const_add_pc followed by special op.
- Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+ Opcode = Temp + (AddrDelta - MaxSpecialAddrDelta) * Params.DWARF2LineRange;
if (Opcode <= 255) {
OS << char(dwarf::DW_LNS_const_add_pc);
OS << char(Opcode);
@@ -517,10 +514,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCOS->EmitULEB128IntValue(1);
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
- EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
- if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1 &&
- MCOS->getContext().getDwarfVersion() >= 3) {
- EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list,
+ context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4);
+ if (context.getGenDwarfSectionSyms().size() > 1 &&
+ context.getDwarfVersion() >= 3) {
+ EmitAbbrev(MCOS, dwarf::DW_AT_ranges,
+ context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4);
} else {
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
@@ -845,7 +846,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0);
MCSymbol *AbbrevSectionSymbol = nullptr;
MCSymbol *InfoSectionSymbol = nullptr;
- MCSymbol *RangesSectionSymbol = NULL;
+ MCSymbol *RangesSectionSymbol = nullptr;
// Create end symbols for each section, and remove empty sections
MCOS->getContext().finalizeDwarfSections(*MCOS);
@@ -998,38 +999,29 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
}
namespace {
- class FrameEmitterImpl {
- int CFAOffset;
- int InitialCFAOffset;
- bool IsEH;
- const MCSymbol *SectionStart;
- public:
- FrameEmitterImpl(bool isEH)
- : CFAOffset(0), InitialCFAOffset(0), IsEH(isEH), SectionStart(nullptr) {
- }
-
- void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
-
- /// Emit the unwind information in a compact way.
- void EmitCompactUnwind(MCObjectStreamer &streamer,
- const MCDwarfFrameInfo &frame);
-
- const MCSymbol &EmitCIE(MCObjectStreamer &streamer,
- const MCSymbol *personality,
- unsigned personalityEncoding,
- const MCSymbol *lsda,
- bool IsSignalFrame,
- unsigned lsdaEncoding,
- bool IsSimple);
- MCSymbol *EmitFDE(MCObjectStreamer &streamer,
- const MCSymbol &cieStart,
- const MCDwarfFrameInfo &frame);
- void EmitCFIInstructions(MCObjectStreamer &streamer,
- ArrayRef<MCCFIInstruction> Instrs,
- MCSymbol *BaseLabel);
- void EmitCFIInstruction(MCObjectStreamer &Streamer,
- const MCCFIInstruction &Instr);
- };
+class FrameEmitterImpl {
+ int CFAOffset = 0;
+ int InitialCFAOffset = 0;
+ bool IsEH;
+ MCObjectStreamer &Streamer;
+
+public:
+ FrameEmitterImpl(bool IsEH, MCObjectStreamer &Streamer)
+ : IsEH(IsEH), Streamer(Streamer) {}
+
+ /// Emit the unwind information in a compact way.
+ void EmitCompactUnwind(const MCDwarfFrameInfo &frame);
+
+ const MCSymbol &EmitCIE(const MCSymbol *personality,
+ unsigned personalityEncoding, const MCSymbol *lsda,
+ bool IsSignalFrame, unsigned lsdaEncoding,
+ bool IsSimple);
+ void EmitFDE(const MCSymbol &cieStart, const MCDwarfFrameInfo &frame,
+ bool LastInSection, const MCSymbol &SectionStart);
+ void EmitCFIInstructions(ArrayRef<MCCFIInstruction> Instrs,
+ MCSymbol *BaseLabel);
+ void EmitCFIInstruction(const MCCFIInstruction &Instr);
+};
} // end anonymous namespace
@@ -1037,8 +1029,7 @@ static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) {
Streamer.EmitIntValue(Encoding, 1);
}
-void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
- const MCCFIInstruction &Instr) {
+void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
auto *MRI = Streamer.getContext().getRegisterInfo();
@@ -1150,6 +1141,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
return;
}
+ case MCCFIInstruction::OpGnuArgsSize: {
+ Streamer.EmitIntValue(dwarf::DW_CFA_GNU_args_size, 1);
+ Streamer.EmitULEB128IntValue(Instr.getOffset());
+ return;
+ }
case MCCFIInstruction::OpEscape:
Streamer.EmitBytes(Instr.getValues());
return;
@@ -1158,8 +1154,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
}
/// Emit frame instructions to describe the layout of the frame.
-void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
- ArrayRef<MCCFIInstruction> Instrs,
+void FrameEmitterImpl::EmitCFIInstructions(ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel) {
for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
const MCCFIInstruction &Instr = Instrs[i];
@@ -1171,18 +1166,17 @@ void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
if (BaseLabel && Label) {
MCSymbol *ThisSym = Label;
if (ThisSym != BaseLabel) {
- streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+ Streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
BaseLabel = ThisSym;
}
}
- EmitCFIInstruction(streamer, Instr);
+ EmitCFIInstruction(Instr);
}
}
/// Emit the unwind information in a compact way.
-void FrameEmitterImpl::EmitCompactUnwind(MCObjectStreamer &Streamer,
- const MCDwarfFrameInfo &Frame) {
+void FrameEmitterImpl::EmitCompactUnwind(const MCDwarfFrameInfo &Frame) {
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
@@ -1254,39 +1248,39 @@ static unsigned getCIEVersion(bool IsEH, unsigned DwarfVersion) {
case 3:
return 3;
case 4:
+ case 5:
return 4;
}
llvm_unreachable("Unknown version");
}
-const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
- const MCSymbol *personality,
+const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
bool IsSignalFrame,
unsigned lsdaEncoding,
bool IsSimple) {
- MCContext &context = streamer.getContext();
+ MCContext &context = Streamer.getContext();
const MCRegisterInfo *MRI = context.getRegisterInfo();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
MCSymbol *sectionStart = context.createTempSymbol();
- streamer.EmitLabel(sectionStart);
+ Streamer.EmitLabel(sectionStart);
MCSymbol *sectionEnd = context.createTempSymbol();
// Length
- const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
- *sectionEnd, 4);
- emitAbsValue(streamer, Length, 4);
+ const MCExpr *Length =
+ MakeStartMinusEndExpr(Streamer, *sectionStart, *sectionEnd, 4);
+ emitAbsValue(Streamer, Length, 4);
// CIE ID
unsigned CIE_ID = IsEH ? 0 : -1;
- streamer.EmitIntValue(CIE_ID, 4);
+ Streamer.EmitIntValue(CIE_ID, 4);
// Version
uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion());
- streamer.EmitIntValue(CIEVersion, 1);
+ Streamer.EmitIntValue(CIEVersion, 1);
// Augmentation String
SmallString<8> Augmentation;
@@ -1299,31 +1293,31 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
Augmentation += "R";
if (IsSignalFrame)
Augmentation += "S";
- streamer.EmitBytes(Augmentation);
+ Streamer.EmitBytes(Augmentation);
}
- streamer.EmitIntValue(0, 1);
+ Streamer.EmitIntValue(0, 1);
if (CIEVersion >= 4) {
// Address Size
- streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1);
+ Streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1);
// Segment Descriptor Size
- streamer.EmitIntValue(0, 1);
+ Streamer.EmitIntValue(0, 1);
}
// Code Alignment Factor
- streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
+ Streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
// Data Alignment Factor
- streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+ Streamer.EmitSLEB128IntValue(getDataAlignmentFactor(Streamer));
// Return Address Register
if (CIEVersion == 1) {
assert(MRI->getRARegister() <= 255 &&
"DWARF 2 encodes return_address_register in one byte");
- streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1);
+ Streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1);
} else {
- streamer.EmitULEB128IntValue(
+ Streamer.EmitULEB128IntValue(
MRI->getDwarfRegNum(MRI->getRARegister(), IsEH));
}
@@ -1335,28 +1329,28 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
// Personality Encoding
augmentationLength += 1;
// Personality
- augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
+ augmentationLength += getSizeForEncoding(Streamer, personalityEncoding);
}
if (lsda)
augmentationLength += 1;
// Encoding of the FDE pointers
augmentationLength += 1;
- streamer.EmitULEB128IntValue(augmentationLength);
+ Streamer.EmitULEB128IntValue(augmentationLength);
// Augmentation Data (optional)
if (personality) {
// Personality Encoding
- emitEncodingByte(streamer, personalityEncoding);
+ emitEncodingByte(Streamer, personalityEncoding);
// Personality
- EmitPersonality(streamer, *personality, personalityEncoding);
+ EmitPersonality(Streamer, *personality, personalityEncoding);
}
if (lsda)
- emitEncodingByte(streamer, lsdaEncoding);
+ emitEncodingByte(Streamer, lsdaEncoding);
// Encoding of the FDE pointers
- emitEncodingByte(streamer, MOFI->getFDEEncoding());
+ emitEncodingByte(Streamer, MOFI->getFDEEncoding());
}
// Initial Instructions
@@ -1365,22 +1359,23 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
if (!IsSimple) {
const std::vector<MCCFIInstruction> &Instructions =
MAI->getInitialFrameState();
- EmitCFIInstructions(streamer, Instructions, nullptr);
+ EmitCFIInstructions(Instructions, nullptr);
}
InitialCFAOffset = CFAOffset;
// Padding
- streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
+ Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
- streamer.EmitLabel(sectionEnd);
+ Streamer.EmitLabel(sectionEnd);
return *sectionStart;
}
-MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
- const MCSymbol &cieStart,
- const MCDwarfFrameInfo &frame) {
- MCContext &context = streamer.getContext();
+void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart,
+ const MCDwarfFrameInfo &frame,
+ bool LastInSection,
+ const MCSymbol &SectionStart) {
+ MCContext &context = Streamer.getContext();
MCSymbol *fdeStart = context.createTempSymbol();
MCSymbol *fdeEnd = context.createTempSymbol();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
@@ -1388,107 +1383,103 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
CFAOffset = InitialCFAOffset;
// Length
- const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
- emitAbsValue(streamer, Length, 4);
+ const MCExpr *Length = MakeStartMinusEndExpr(Streamer, *fdeStart, *fdeEnd, 0);
+ emitAbsValue(Streamer, Length, 4);
- streamer.EmitLabel(fdeStart);
+ Streamer.EmitLabel(fdeStart);
// CIE Pointer
const MCAsmInfo *asmInfo = context.getAsmInfo();
if (IsEH) {
- const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
- 0);
- emitAbsValue(streamer, offset, 4);
+ const MCExpr *offset =
+ MakeStartMinusEndExpr(Streamer, cieStart, *fdeStart, 0);
+ emitAbsValue(Streamer, offset, 4);
} else if (!asmInfo->doesDwarfUseRelocationsAcrossSections()) {
- const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
- cieStart, 0);
- emitAbsValue(streamer, offset, 4);
+ const MCExpr *offset =
+ MakeStartMinusEndExpr(Streamer, SectionStart, cieStart, 0);
+ emitAbsValue(Streamer, offset, 4);
} else {
- streamer.EmitSymbolValue(&cieStart, 4);
+ Streamer.EmitSymbolValue(&cieStart, 4);
}
// PC Begin
unsigned PCEncoding =
IsEH ? MOFI->getFDEEncoding() : (unsigned)dwarf::DW_EH_PE_absptr;
- unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
- emitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH);
+ unsigned PCSize = getSizeForEncoding(Streamer, PCEncoding);
+ emitFDESymbol(Streamer, *frame.Begin, PCEncoding, IsEH);
// PC Range
- const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
- *frame.End, 0);
- emitAbsValue(streamer, Range, PCSize);
+ const MCExpr *Range =
+ MakeStartMinusEndExpr(Streamer, *frame.Begin, *frame.End, 0);
+ emitAbsValue(Streamer, Range, PCSize);
if (IsEH) {
// Augmentation Data Length
unsigned augmentationLength = 0;
if (frame.Lsda)
- augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
+ augmentationLength += getSizeForEncoding(Streamer, frame.LsdaEncoding);
- streamer.EmitULEB128IntValue(augmentationLength);
+ Streamer.EmitULEB128IntValue(augmentationLength);
// Augmentation Data
if (frame.Lsda)
- emitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true);
+ emitFDESymbol(Streamer, *frame.Lsda, frame.LsdaEncoding, true);
}
// Call Frame Instructions
- EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+ EmitCFIInstructions(frame.Instructions, frame.Begin);
// Padding
- streamer.EmitValueToAlignment(PCSize);
+ // The size of a .eh_frame section has to be a multiple of the alignment
+ // since a null CIE is interpreted as the end. Old systems overaligned
+ // .eh_frame, so we do too and account for it in the last FDE.
+ unsigned Align = LastInSection ? asmInfo->getPointerSize() : PCSize;
+ Streamer.EmitValueToAlignment(Align);
- return fdeEnd;
+ Streamer.EmitLabel(fdeEnd);
}
namespace {
- struct CIEKey {
- static const CIEKey getEmptyKey() {
- return CIEKey(nullptr, 0, -1, false, false);
- }
- static const CIEKey getTombstoneKey() {
- return CIEKey(nullptr, -1, 0, false, false);
- }
+struct CIEKey {
+ static const CIEKey getEmptyKey() {
+ return CIEKey(nullptr, 0, -1, false, false);
+ }
+ static const CIEKey getTombstoneKey() {
+ return CIEKey(nullptr, -1, 0, false, false);
+ }
- CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_,
- unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_)
- : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
- LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
- IsSimple(IsSimple_) {}
- const MCSymbol *Personality;
- unsigned PersonalityEncoding;
- unsigned LsdaEncoding;
- bool IsSignalFrame;
- bool IsSimple;
- };
-}
+ CIEKey(const MCSymbol *Personality, unsigned PersonalityEncoding,
+ unsigned LsdaEncoding, bool IsSignalFrame, bool IsSimple)
+ : Personality(Personality), PersonalityEncoding(PersonalityEncoding),
+ LsdaEncoding(LsdaEncoding), IsSignalFrame(IsSignalFrame),
+ IsSimple(IsSimple) {}
+ const MCSymbol *Personality;
+ unsigned PersonalityEncoding;
+ unsigned LsdaEncoding;
+ bool IsSignalFrame;
+ bool IsSimple;
+};
+} // anonymous namespace
namespace llvm {
- template <>
- struct DenseMapInfo<CIEKey> {
- static CIEKey getEmptyKey() {
- return CIEKey::getEmptyKey();
- }
- static CIEKey getTombstoneKey() {
- return CIEKey::getTombstoneKey();
- }
- static unsigned getHashValue(const CIEKey &Key) {
- return static_cast<unsigned>(hash_combine(Key.Personality,
- Key.PersonalityEncoding,
- Key.LsdaEncoding,
- Key.IsSignalFrame,
- Key.IsSimple));
- }
- static bool isEqual(const CIEKey &LHS,
- const CIEKey &RHS) {
- return LHS.Personality == RHS.Personality &&
- LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
- LHS.LsdaEncoding == RHS.LsdaEncoding &&
- LHS.IsSignalFrame == RHS.IsSignalFrame &&
- LHS.IsSimple == RHS.IsSimple;
- }
- };
-}
+template <> struct DenseMapInfo<CIEKey> {
+ static CIEKey getEmptyKey() { return CIEKey::getEmptyKey(); }
+ static CIEKey getTombstoneKey() { return CIEKey::getTombstoneKey(); }
+ static unsigned getHashValue(const CIEKey &Key) {
+ return static_cast<unsigned>(
+ hash_combine(Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
+ Key.IsSignalFrame, Key.IsSimple));
+ }
+ static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) {
+ return LHS.Personality == RHS.Personality &&
+ LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+ LHS.LsdaEncoding == RHS.LsdaEncoding &&
+ LHS.IsSignalFrame == RHS.IsSignalFrame &&
+ LHS.IsSimple == RHS.IsSimple;
+ }
+};
+} // namespace llvm
void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
bool IsEH) {
@@ -1496,7 +1487,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
- FrameEmitterImpl Emitter(IsEH);
+ FrameEmitterImpl Emitter(IsEH, Streamer);
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getDwarfFrameInfos();
// Emit the compact unwind info if available.
@@ -1514,7 +1505,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
NeedsEHFrameSection |=
Frame.CompactUnwindEncoding ==
MOFI->getCompactUnwindDwarfEHFrameOnly();
- Emitter.EmitCompactUnwind(Streamer, Frame);
+ Emitter.EmitCompactUnwind(Frame);
}
}
@@ -1527,23 +1518,15 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
Streamer.SwitchSection(&Section);
MCSymbol *SectionStart = Context.createTempSymbol();
Streamer.EmitLabel(SectionStart);
- Emitter.setSectionStart(SectionStart);
- MCSymbol *FDEEnd = nullptr;
DenseMap<CIEKey, const MCSymbol *> CIEStarts;
const MCSymbol *DummyDebugKey = nullptr;
- NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
- for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
- const MCDwarfFrameInfo &Frame = FrameArray[i];
-
- // Emit the label from the previous iteration
- if (FDEEnd) {
- Streamer.EmitLabel(FDEEnd);
- FDEEnd = nullptr;
- }
-
- if (!NeedsEHFrameSection && Frame.CompactUnwindEncoding !=
+ bool CanOmitDwarf = MOFI->getOmitDwarfIfHaveCompactUnwind();
+ for (auto I = FrameArray.begin(), E = FrameArray.end(); I != E;) {
+ const MCDwarfFrameInfo &Frame = *I;
+ ++I;
+ if (CanOmitDwarf && Frame.CompactUnwindEncoding !=
MOFI->getCompactUnwindDwarfEHFrameOnly())
// Don't generate an EH frame if we don't need one. I.e., it's taken care
// of by the compact unwind encoding.
@@ -1553,18 +1536,12 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
Frame.LsdaEncoding, Frame.IsSignalFrame, Frame.IsSimple);
const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
if (!CIEStart)
- CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
- Frame.PersonalityEncoding, Frame.Lsda,
- Frame.IsSignalFrame,
- Frame.LsdaEncoding,
- Frame.IsSimple);
+ CIEStart = &Emitter.EmitCIE(Frame.Personality, Frame.PersonalityEncoding,
+ Frame.Lsda, Frame.IsSignalFrame,
+ Frame.LsdaEncoding, Frame.IsSimple);
- FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+ Emitter.EmitFDE(*CIEStart, Frame, I == E, *SectionStart);
}
-
- Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize());
- if (FDEEnd)
- Streamer.EmitLabel(FDEEnd);
}
void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer,
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
index bc0ba85..de645ca 100644
--- a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -29,23 +29,7 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return false;
}
-// ELF doesn't require relocations to be in any order. We sort by the Offset,
-// just to match gnu as for easier comparison. The use type is an arbitrary way
-// of making the sort deterministic.
-static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
- const ELFRelocationEntry &A = *AP;
- const ELFRelocationEntry &B = *BP;
- if (A.Offset != B.Offset)
- return B.Offset - A.Offset;
- if (B.Type != A.Type)
- return A.Type - B.Type;
- //llvm_unreachable("ELFRelocs might be unstable!");
- return 0;
-}
-
-
void
MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
- array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index fe9ac21..06d161b 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -68,7 +68,6 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
Assembler.writeFragmentPadding(*EF, FSize, OW);
- VecOS.flush();
delete OW;
DF->getContents().append(Code.begin(), Code.end());
@@ -87,20 +86,10 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
}
void MCELFStreamer::InitSections(bool NoExecStack) {
- // This emulates the same behavior of GNU as. This makes it easier
- // to compare the output as the major sections are in the same order.
MCContext &Ctx = getContext();
SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
EmitCodeAlignment(4);
- SwitchSection(Ctx.getObjectFileInfo()->getDataSection());
- EmitCodeAlignment(4);
-
- SwitchSection(Ctx.getObjectFileInfo()->getBSSSection());
- EmitCodeAlignment(4);
-
- SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
-
if (NoExecStack)
SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
}
@@ -112,7 +101,7 @@ void MCELFStreamer::EmitLabel(MCSymbol *S) {
MCObjectStreamer::EmitLabel(Symbol);
const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(Symbol->getSection());
+ static_cast<const MCSectionELF &>(*getCurrentSectionOnly());
if (Section.getFlags() & ELF::SHF_TLS)
Symbol->setType(ELF::STT_TLS);
}
@@ -134,7 +123,7 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
llvm_unreachable("invalid assembler flag!");
}
-// If bundle aligment is used and there are any instructions in the section, it
+// If bundle alignment is used and there are any instructions in the section, it
// needs to be aligned to at least the bundle size.
static void setSectionAlignmentForBundling(const MCAssembler &Assembler,
MCSection *Section) {
@@ -312,13 +301,20 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
Symbol->setType(ELF::STT_OBJECT);
if (Symbol->getBinding() == ELF::STB_LOCAL) {
- MCSection *Section = getAssembler().getContext().getELFSection(
+ MCSection &Section = *getAssembler().getContext().getELFSection(
".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ MCSectionSubPair P = getCurrentSection();
+ SwitchSection(&Section);
- AssignSection(Symbol, Section);
+ EmitValueToAlignment(ByteAlignment, 0, 1, 0);
+ EmitLabel(Symbol);
+ EmitZeros(Size);
- struct LocalCommon L = {Symbol, Size, ByteAlignment};
- LocalCommons.push_back(L);
+ // Update the maximum alignment of the section if necessary.
+ if (ByteAlignment > Section.getAlignment())
+ Section.setAlignment(ByteAlignment);
+
+ SwitchSection(P.first, P.second);
} else {
if(Symbol->declareCommon(Size, ByteAlignment))
report_fatal_error("Symbol: " + Symbol->getName() +
@@ -344,7 +340,7 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
}
void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+ SMLoc Loc) {
if (isBundleLocked())
report_fatal_error("Emitting values inside a locked bundle is forbidden");
fixSymbolsInTLSFixups(Value);
@@ -480,7 +476,6 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
fixSymbolsInTLSFixups(Fixups[i].getValue());
@@ -603,7 +598,7 @@ void MCELFStreamer::EmitBundleUnlock() {
report_fatal_error("Empty bundle-locked group is forbidden");
// When the -mc-relax-all flag is used, we emit instructions to fragments
- // stored on a stack. When the bundle unlock is emited, we pop a fragment
+ // stored on a stack. When the bundle unlock is emitted, we pop a fragment
// from the stack a merge it to the one below.
if (getAssembler().getRelaxAll()) {
assert(!BundleGroups.empty() && "There are no bundle groups");
@@ -625,29 +620,6 @@ void MCELFStreamer::EmitBundleUnlock() {
Sec.setBundleLockState(MCSection::NotBundleLocked);
}
-void MCELFStreamer::Flush() {
- for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
- e = LocalCommons.end();
- i != e; ++i) {
- const MCSymbol &Symbol = *i->Symbol;
- uint64_t Size = i->Size;
- unsigned ByteAlignment = i->ByteAlignment;
- MCSection &Section = Symbol.getSection();
-
- getAssembler().registerSection(Section);
- new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &Section);
-
- MCFragment *F = new MCFillFragment(0, 0, Size, &Section);
- Symbol.setFragment(F);
-
- // Update the maximum alignment of the section if necessary.
- if (ByteAlignment > Section.getAlignment())
- Section.setAlignment(ByteAlignment);
- }
-
- LocalCommons.clear();
-}
-
void MCELFStreamer::FinishImpl() {
// Ensure the last section gets aligned if necessary.
MCSection *CurSection = getCurrentSectionOnly();
@@ -655,8 +627,6 @@ void MCELFStreamer::FinishImpl() {
EmitFrames(nullptr);
- Flush();
-
this->MCObjectStreamer::FinishImpl();
}
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index a30ceec..0f26b38 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -43,7 +43,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
const MCSymbol &Sym = SRE.getSymbol();
// Parenthesize names that start with $ so that they don't look like
// absolute names.
- bool UseParens = Sym.getName()[0] == '$';
+ bool UseParens = Sym.getName().size() && Sym.getName()[0] == '$';
if (UseParens) {
OS << '(';
Sym.print(OS, MAI);
@@ -202,6 +202,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_SIZE: return "SIZE";
case VK_WEAKREF: return "WEAKREF";
case VK_ARM_NONE: return "none";
+ case VK_ARM_GOT_PREL: return "GOT_PREL";
case VK_ARM_TARGET1: return "target1";
case VK_ARM_TARGET2: return "target2";
case VK_ARM_PREL31: return "prel31";
@@ -311,7 +312,6 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got", VK_GOT)
.Case("gotoff", VK_GOTOFF)
.Case("gotpcrel", VK_GOTPCREL)
- .Case("got_prel", VK_GOTPCREL)
.Case("gottpoff", VK_GOTTPOFF)
.Case("indntpoff", VK_INDNTPOFF)
.Case("ntpoff", VK_NTPOFF)
@@ -382,7 +382,15 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO)
.Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
+ .Case("gdgot", VK_Hexagon_GD_GOT)
+ .Case("gdplt", VK_Hexagon_GD_PLT)
+ .Case("iegot", VK_Hexagon_IE_GOT)
+ .Case("ie", VK_Hexagon_IE)
+ .Case("ldgot", VK_Hexagon_LD_GOT)
+ .Case("ldplt", VK_Hexagon_LD_PLT)
+ .Case("pcrel", VK_Hexagon_PCREL)
.Case("none", VK_ARM_NONE)
+ .Case("got_prel", VK_ARM_GOT_PREL)
.Case("target1", VK_ARM_TARGET1)
.Case("target2", VK_ARM_TARGET2)
.Case("prel31", VK_ARM_PREL31)
@@ -477,7 +485,8 @@ static void AttemptToFoldSymbolOffsetDifference(
if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
return;
- if (SA.getFragment() == SB.getFragment()) {
+ if (SA.getFragment() == SB.getFragment() && !SA.isVariable() &&
+ !SB.isVariable()) {
Addend += (SA.getOffset() - SB.getOffset());
// Pointers to Thumb symbols need to have their low-bit set to allow
@@ -583,11 +592,6 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
- // If we have a negated symbol, then we must have also have a non-negated
- // symbol in order to encode the expression.
- if (B && !A)
- return false;
-
Res = MCValue::get(A, B, Result_Cst);
return true;
}
@@ -606,7 +610,7 @@ bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
true);
}
-static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
+static bool canExpand(const MCSymbol &Sym, bool InSet) {
const MCExpr *Expr = Sym.getVariableValue();
const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
if (Inner) {
@@ -616,9 +620,7 @@ static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
if (InSet)
return true;
- if (!Asm)
- return false;
- return !Asm->getWriter().isWeak(Sym);
+ return !Sym.isInSection();
}
bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
@@ -643,7 +645,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
// Evaluate recursively if this is a variable.
if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None &&
- canExpand(Sym, Asm, InSet)) {
+ canExpand(Sym, InSet)) {
bool IsMachO = SRE->hasSubsectionsViaSymbols();
if (Sym.getVariableValue()->evaluateAsRelocatableImpl(
Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
@@ -739,7 +741,17 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
case MCBinaryExpr::AShr: Result = LHS >> RHS; break;
case MCBinaryExpr::Add: Result = LHS + RHS; break;
case MCBinaryExpr::And: Result = LHS & RHS; break;
- case MCBinaryExpr::Div: Result = LHS / RHS; break;
+ case MCBinaryExpr::Div:
+ // Handle division by zero. gas just emits a warning and keeps going,
+ // we try to be stricter.
+ // FIXME: Currently the caller of this function has no way to understand
+ // we're bailing out because of 'division by zero'. Therefore, it will
+ // emit a 'expected relocatable expression' error. It would be nice to
+ // change this code to emit a better diagnostic.
+ if (RHS == 0)
+ return false;
+ Result = LHS / RHS;
+ break;
case MCBinaryExpr::EQ: Result = LHS == RHS; break;
case MCBinaryExpr::GT: Result = LHS > RHS; break;
case MCBinaryExpr::GTE: Result = LHS >= RHS; break;
@@ -765,45 +777,41 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
llvm_unreachable("Invalid assembly expression kind!");
}
-MCSection *MCExpr::findAssociatedSection() const {
+MCFragment *MCExpr::findAssociatedFragment() const {
switch (getKind()) {
case Target:
// We never look through target specific expressions.
- return cast<MCTargetExpr>(this)->findAssociatedSection();
+ return cast<MCTargetExpr>(this)->findAssociatedFragment();
case Constant:
- return MCSymbol::AbsolutePseudoSection;
+ return MCSymbol::AbsolutePseudoFragment;
case SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
const MCSymbol &Sym = SRE->getSymbol();
-
- if (Sym.isDefined())
- return &Sym.getSection();
-
- return nullptr;
+ return Sym.getFragment();
}
case Unary:
- return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedSection();
+ return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedFragment();
case Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
- MCSection *LHS_S = BE->getLHS()->findAssociatedSection();
- MCSection *RHS_S = BE->getRHS()->findAssociatedSection();
+ MCFragment *LHS_F = BE->getLHS()->findAssociatedFragment();
+ MCFragment *RHS_F = BE->getRHS()->findAssociatedFragment();
- // If either section is absolute, return the other.
- if (LHS_S == MCSymbol::AbsolutePseudoSection)
- return RHS_S;
- if (RHS_S == MCSymbol::AbsolutePseudoSection)
- return LHS_S;
+ // If either is absolute, return the other.
+ if (LHS_F == MCSymbol::AbsolutePseudoFragment)
+ return RHS_F;
+ if (RHS_F == MCSymbol::AbsolutePseudoFragment)
+ return LHS_F;
// Not always correct, but probably the best we can do without more context.
if (BE->getOpcode() == MCBinaryExpr::Sub)
- return MCSymbol::AbsolutePseudoSection;
+ return MCSymbol::AbsolutePseudoFragment;
- // Otherwise, return the first non-null section.
- return LHS_S ? LHS_S : RHS_S;
+ // Otherwise, return the first non-null fragment.
+ return LHS_F ? LHS_F : RHS_F;
}
}
diff --git a/contrib/llvm/lib/MC/MCFragment.cpp b/contrib/llvm/lib/MC/MCFragment.cpp
new file mode 100644
index 0000000..efdb704
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCFragment.cpp
@@ -0,0 +1,458 @@
+//===- lib/MC/MCFragment.cpp - Assembler Fragment Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCFragment.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <tuple>
+using namespace llvm;
+
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+ : Assembler(Asm), LastValidFragment()
+ {
+ // Compute the section layout order. Virtual sections must go last.
+ for (MCSection &Sec : Asm)
+ if (!Sec.isVirtualSection())
+ SectionOrder.push_back(&Sec);
+ for (MCSection &Sec : Asm)
+ if (Sec.isVirtualSection())
+ SectionOrder.push_back(&Sec);
+}
+
+bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
+ const MCSection *Sec = F->getParent();
+ const MCFragment *LastValid = LastValidFragment.lookup(Sec);
+ if (!LastValid)
+ return false;
+ assert(LastValid->getParent() == Sec);
+ return F->getLayoutOrder() <= LastValid->getLayoutOrder();
+}
+
+void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
+ // If this fragment wasn't already valid, we don't need to do anything.
+ if (!isFragmentValid(F))
+ return;
+
+ // Otherwise, reset the last valid fragment to the previous fragment
+ // (if this is the first fragment, it will be NULL).
+ LastValidFragment[F->getParent()] = F->getPrevNode();
+}
+
+void MCAsmLayout::ensureValid(const MCFragment *F) const {
+ MCSection *Sec = F->getParent();
+ MCSection::iterator I;
+ if (MCFragment *Cur = LastValidFragment[Sec])
+ I = ++MCSection::iterator(Cur);
+ else
+ I = Sec->begin();
+
+ // Advance the layout position until the fragment is valid.
+ while (!isFragmentValid(F)) {
+ assert(I != Sec->end() && "Layout bookkeeping error");
+ const_cast<MCAsmLayout *>(this)->layoutFragment(&*I);
+ ++I;
+ }
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+ ensureValid(F);
+ assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+ return F->Offset;
+}
+
+// Simple getSymbolOffset helper for the non-varibale case.
+static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S,
+ bool ReportError, uint64_t &Val) {
+ if (!S.getFragment()) {
+ if (ReportError)
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ S.getName() + "'");
+ return false;
+ }
+ Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset();
+ return true;
+}
+
+static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
+ bool ReportError, uint64_t &Val) {
+ if (!S.isVariable())
+ return getLabelOffset(Layout, S, ReportError, Val);
+
+ // If SD is a variable, evaluate it.
+ MCValue Target;
+ if (!S.getVariableValue()->evaluateAsValue(Target, Layout))
+ report_fatal_error("unable to evaluate offset for variable '" +
+ S.getName() + "'");
+
+ uint64_t Offset = Target.getConstant();
+
+ const MCSymbolRefExpr *A = Target.getSymA();
+ if (A) {
+ uint64_t ValA;
+ if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
+ return false;
+ Offset += ValA;
+ }
+
+ const MCSymbolRefExpr *B = Target.getSymB();
+ if (B) {
+ uint64_t ValB;
+ if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
+ return false;
+ Offset -= ValB;
+ }
+
+ Val = Offset;
+ return true;
+}
+
+bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const {
+ return getSymbolOffsetImpl(*this, S, false, Val);
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const {
+ uint64_t Val;
+ getSymbolOffsetImpl(*this, S, true, Val);
+ return Val;
+}
+
+const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
+ if (!Symbol.isVariable())
+ return &Symbol;
+
+ const MCExpr *Expr = Symbol.getVariableValue();
+ MCValue Value;
+ if (!Expr->evaluateAsValue(Value, *this)) {
+ Assembler.getContext().reportError(
+ SMLoc(), "expression could not be evaluated");
+ return nullptr;
+ }
+
+ const MCSymbolRefExpr *RefB = Value.getSymB();
+ if (RefB) {
+ Assembler.getContext().reportError(
+ SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
+ "' could not be evaluated in a subtraction expression");
+ return nullptr;
+ }
+
+ const MCSymbolRefExpr *A = Value.getSymA();
+ if (!A)
+ return nullptr;
+
+ const MCSymbol &ASym = A->getSymbol();
+ const MCAssembler &Asm = getAssembler();
+ if (ASym.isCommon()) {
+ // FIXME: we should probably add a SMLoc to MCExpr.
+ Asm.getContext().reportError(SMLoc(),
+ "Common symbol '" + ASym.getName() +
+ "' cannot be used in assignment expr");
+ return nullptr;
+ }
+
+ return &ASym;
+}
+
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const {
+ // The size is the last fragment's end offset.
+ const MCFragment &F = Sec->getFragmentList().back();
+ return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const {
+ // Virtual sections have no file size.
+ if (Sec->isVirtualSection())
+ return 0;
+
+ // Otherwise, the file size is the same as the address space size.
+ return getSectionAddressSize(Sec);
+}
+
+uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
+ const MCFragment *F,
+ uint64_t FOffset, uint64_t FSize) {
+ uint64_t BundleSize = Assembler.getBundleAlignSize();
+ assert(BundleSize > 0 &&
+ "computeBundlePadding should only be called if bundling is enabled");
+ uint64_t BundleMask = BundleSize - 1;
+ uint64_t OffsetInBundle = FOffset & BundleMask;
+ uint64_t EndOfFragment = OffsetInBundle + FSize;
+
+ // There are two kinds of bundling restrictions:
+ //
+ // 1) For alignToBundleEnd(), add padding to ensure that the fragment will
+ // *end* on a bundle boundary.
+ // 2) Otherwise, check if the fragment would cross a bundle boundary. If it
+ // would, add padding until the end of the bundle so that the fragment
+ // will start in a new one.
+ if (F->alignToBundleEnd()) {
+ // Three possibilities here:
+ //
+ // A) The fragment just happens to end at a bundle boundary, so we're good.
+ // B) The fragment ends before the current bundle boundary: pad it just
+ // enough to reach the boundary.
+ // C) The fragment ends after the current bundle boundary: pad it until it
+ // reaches the end of the next bundle boundary.
+ //
+ // Note: this code could be made shorter with some modulo trickery, but it's
+ // intentionally kept in its more explicit form for simplicity.
+ if (EndOfFragment == BundleSize)
+ return 0;
+ else if (EndOfFragment < BundleSize)
+ return BundleSize - EndOfFragment;
+ else { // EndOfFragment > BundleSize
+ return 2 * BundleSize - EndOfFragment;
+ }
+ } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize)
+ return BundleSize - OffsetInBundle;
+ else
+ return 0;
+}
+
+/* *** */
+
+void ilist_node_traits<MCFragment>::deleteNode(MCFragment *V) {
+ V->destroy();
+}
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false),
+ AlignToBundleEnd(false), BundlePadding(0) {
+}
+
+MCFragment::~MCFragment() { }
+
+MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
+ uint8_t BundlePadding, MCSection *Parent)
+ : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false),
+ BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr),
+ Offset(~UINT64_C(0)) {
+ if (Parent && !isDummy())
+ Parent->getFragmentList().push_back(this);
+}
+
+void MCFragment::destroy() {
+ // First check if we are the sentinal.
+ if (Kind == FragmentType(~0)) {
+ delete this;
+ return;
+ }
+
+ switch (Kind) {
+ case FT_Align:
+ delete cast<MCAlignFragment>(this);
+ return;
+ case FT_Data:
+ delete cast<MCDataFragment>(this);
+ return;
+ case FT_CompactEncodedInst:
+ delete cast<MCCompactEncodedInstFragment>(this);
+ return;
+ case FT_Fill:
+ delete cast<MCFillFragment>(this);
+ return;
+ case FT_Relaxable:
+ delete cast<MCRelaxableFragment>(this);
+ return;
+ case FT_Org:
+ delete cast<MCOrgFragment>(this);
+ return;
+ case FT_Dwarf:
+ delete cast<MCDwarfLineAddrFragment>(this);
+ return;
+ case FT_DwarfFrame:
+ delete cast<MCDwarfCallFrameFragment>(this);
+ return;
+ case FT_LEB:
+ delete cast<MCLEBFragment>(this);
+ return;
+ case FT_SafeSEH:
+ delete cast<MCSafeSEHFragment>(this);
+ return;
+ case FT_Dummy:
+ delete cast<MCDummyFragment>(this);
+ return;
+ }
+}
+
+/* *** */
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+ OS << "<MCFixup" << " Offset:" << AF.getOffset()
+ << " Value:" << *AF.getValue()
+ << " Kind:" << AF.getKind() << ">";
+ return OS;
+}
+
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<";
+ switch (getKind()) {
+ case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+ case MCFragment::FT_Data: OS << "MCDataFragment"; break;
+ case MCFragment::FT_CompactEncodedInst:
+ OS << "MCCompactEncodedInstFragment"; break;
+ case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
+ case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break;
+ case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
+ case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+ case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+ case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
+ case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break;
+ case MCFragment::FT_Dummy:
+ OS << "MCDummyFragment";
+ break;
+ }
+
+ OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+ << " Offset:" << Offset
+ << " HasInstructions:" << hasInstructions()
+ << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
+
+ switch (getKind()) {
+ case MCFragment::FT_Align: {
+ const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+ if (AF->hasEmitNops())
+ OS << " (emit nops)";
+ OS << "\n ";
+ OS << " Alignment:" << AF->getAlignment()
+ << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+ << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+ break;
+ }
+ case MCFragment::FT_Data: {
+ const MCDataFragment *DF = cast<MCDataFragment>(this);
+ OS << "\n ";
+ OS << " Contents:[";
+ const SmallVectorImpl<char> &Contents = DF->getContents();
+ for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+ }
+ OS << "] (" << Contents.size() << " bytes)";
+
+ if (DF->fixup_begin() != DF->fixup_end()) {
+ OS << ",\n ";
+ OS << " Fixups:[";
+ for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+ ie = DF->fixup_end(); it != ie; ++it) {
+ if (it != DF->fixup_begin()) OS << ",\n ";
+ OS << *it;
+ }
+ OS << "]";
+ }
+ break;
+ }
+ case MCFragment::FT_CompactEncodedInst: {
+ const MCCompactEncodedInstFragment *CEIF =
+ cast<MCCompactEncodedInstFragment>(this);
+ OS << "\n ";
+ OS << " Contents:[";
+ const SmallVectorImpl<char> &Contents = CEIF->getContents();
+ for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+ }
+ OS << "] (" << Contents.size() << " bytes)";
+ break;
+ }
+ case MCFragment::FT_Fill: {
+ const MCFillFragment *FF = cast<MCFillFragment>(this);
+ OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+ << " Size:" << FF->getSize();
+ break;
+ }
+ case MCFragment::FT_Relaxable: {
+ const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
+ OS << "\n ";
+ OS << " Inst:";
+ F->getInst().dump_pretty(OS);
+ break;
+ }
+ case MCFragment::FT_Org: {
+ const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+ OS << "\n ";
+ OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+ break;
+ }
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << OF->getAddrDelta()
+ << " LineDelta:" << OF->getLineDelta();
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << CF->getAddrDelta();
+ break;
+ }
+ case MCFragment::FT_LEB: {
+ const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+ OS << "\n ";
+ OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+ break;
+ }
+ case MCFragment::FT_SafeSEH: {
+ const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this);
+ OS << "\n ";
+ OS << " Sym:" << F->getSymbol();
+ break;
+ }
+ case MCFragment::FT_Dummy:
+ break;
+ }
+ OS << ">";
+}
+
+void MCAssembler::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCAssembler\n";
+ OS << " Sections:[\n ";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "],\n";
+ OS << " Symbols:[";
+
+ for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+ if (it != symbol_begin()) OS << ",\n ";
+ OS << "(";
+ it->dump();
+ OS << ", Index:" << it->getIndex() << ", ";
+ OS << ")";
+ }
+ OS << "]>\n";
+}
+#endif
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
index 7ef69be..5f829ae 100644
--- a/contrib/llvm/lib/MC/MCInst.cpp
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -23,6 +23,8 @@ void MCOperand::print(raw_ostream &OS) const {
OS << "Reg:" << getReg();
else if (isImm())
OS << "Imm:" << getImm();
+ else if (isFPImm())
+ OS << "FPImm:" << getFPImm();
else if (isExpr()) {
OS << "Expr:(" << *getExpr() << ")";
} else if (isInst()) {
diff --git a/contrib/llvm/lib/MC/MCInstrDesc.cpp b/contrib/llvm/lib/MC/MCInstrDesc.cpp
index 5be2fa1..ee55f3e 100644
--- a/contrib/llvm/lib/MC/MCInstrDesc.cpp
+++ b/contrib/llvm/lib/MC/MCInstrDesc.cpp
@@ -53,7 +53,7 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI,
bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg,
const MCRegisterInfo *MRI) const {
- if (const uint16_t *ImpDefs = ImplicitDefs)
+ if (const MCPhysReg *ImpDefs = ImplicitDefs)
for (; *ImpDefs; ++ImpDefs)
if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs)))
return true;
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index 53cd131..21f7571 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -60,6 +60,7 @@ public:
/// state management
void reset() override {
+ CreatedADWARFSection = false;
HasSectionLabel.clear();
MCObjectStreamer::reset();
}
@@ -180,8 +181,6 @@ void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- // isSymbolLinkerVisible uses the section.
- AssignSection(Symbol, getCurrentSection().first);
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
if (getAssembler().isSymbolLinkerVisible(*Symbol))
@@ -384,8 +383,6 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- AssignSection(Symbol, nullptr);
-
getAssembler().registerSymbol(*Symbol);
Symbol->setExternal(true);
Symbol->setCommon(Size, ByteAlignment);
@@ -417,8 +414,6 @@ void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
if (ByteAlignment != 1)
new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section);
- AssignSection(Symbol, Section);
-
MCFragment *F = new MCFillFragment(0, 0, Size, Section);
Symbol->setFragment(F);
@@ -443,12 +438,11 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst,
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
// Add the fixups and data.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
- DF->getFixups().push_back(Fixups[i]);
+ for (MCFixup &Fixup : Fixups) {
+ Fixup.setOffset(Fixup.getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixup);
}
DF->getContents().append(Code.begin(), Code.end());
}
@@ -463,7 +457,8 @@ void MCMachOStreamer::FinishImpl() {
// defining symbols.
DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
for (const MCSymbol &Symbol : getAssembler().symbols()) {
- if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.getFragment()) {
+ if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.isInSection() &&
+ !Symbol.isVariable()) {
// An atom defining symbol should never be internal to a fragment.
assert(Symbol.getOffset() == 0 &&
"Invalid offset in atom defining symbol!");
@@ -473,14 +468,12 @@ void MCMachOStreamer::FinishImpl() {
// Set the fragment atom associations by tracking the last seen atom defining
// symbol.
- for (MCAssembler::iterator it = getAssembler().begin(),
- ie = getAssembler().end(); it != ie; ++it) {
+ for (MCSection &Sec : getAssembler()) {
const MCSymbol *CurrentAtom = nullptr;
- for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
- ++it2) {
- if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(it2))
+ for (MCFragment &Frag : Sec) {
+ if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag))
CurrentAtom = Symbol;
- it2->setAtom(CurrentAtom);
+ Frag.setAtom(CurrentAtom);
}
}
@@ -493,6 +486,26 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
bool LabelSections) {
MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE,
DWARFMustBeAtTheEnd, LabelSections);
+ const Triple &TT = Context.getObjectFileInfo()->getTargetTriple();
+ if (TT.isOSDarwin()) {
+ unsigned Major, Minor, Update;
+ TT.getOSVersion(Major, Minor, Update);
+ // If there is a version specified, Major will be non-zero.
+ if (Major) {
+ MCVersionMinType VersionType;
+ if (TT.isWatchOS())
+ VersionType = MCVM_WatchOSVersionMin;
+ else if (TT.isTvOS())
+ VersionType = MCVM_TvOSVersionMin;
+ else if (TT.isMacOSX())
+ VersionType = MCVM_OSXVersionMin;
+ else {
+ assert(TT.isiOS() && "Must only be iOS platform left");
+ VersionType = MCVM_IOSVersionMin;
+ }
+ S->EmitVersionMin(VersionType, Major, Minor, Update);
+ }
+ }
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index 576827a..028f2e9 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -16,6 +16,8 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/COFF.h"
+
using namespace llvm;
static bool useCompactUnwind(const Triple &T) {
@@ -27,6 +29,10 @@ static bool useCompactUnwind(const Triple &T) {
if (T.getArch() == Triple::aarch64)
return true;
+ // armv7k always has it.
+ if (T.isWatchOS())
+ return true;
+
// Use it on newer version of OS X.
if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
return true;
@@ -43,9 +49,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
// MachO
SupportsWeakOmittedEHFrame = false;
+ EHFrameSection = Ctx->getMachOSection(
+ "__TEXT", "__eh_frame",
+ MachO::S_COALESCED | MachO::S_ATTR_NO_TOC |
+ MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+
if (T.isOSDarwin() && T.getArch() == Triple::aarch64)
SupportsCompactUnwindWithoutEHFrame = true;
+ if (T.isWatchOS())
+ OmitDwarfIfHaveCompactUnwind = true;
+
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
| dwarf::DW_EH_PE_sdata4;
LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
@@ -61,16 +76,15 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
MachO::S_ATTR_PURE_INSTRUCTIONS,
SectionKind::getText());
DataSection // .data
- = Ctx->getMachOSection("__DATA", "__data", 0,
- SectionKind::getDataRel());
+ = Ctx->getMachOSection("__DATA", "__data", 0, SectionKind::getData());
// BSSSection might not be expected initialized on msvc.
BSSSection = nullptr;
TLSDataSection // .tdata
- = Ctx->getMachOSection("__DATA", "__thread_data",
- MachO::S_THREAD_LOCAL_REGULAR,
- SectionKind::getDataRel());
+ = Ctx->getMachOSection("__DATA", "__thread_data",
+ MachO::S_THREAD_LOCAL_REGULAR,
+ SectionKind::getData());
TLSBSSSection // .tbss
= Ctx->getMachOSection("__DATA", "__thread_bss",
MachO::S_THREAD_LOCAL_ZEROFILL,
@@ -78,14 +92,13 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
// TODO: Verify datarel below.
TLSTLVSection // .tlv
- = Ctx->getMachOSection("__DATA", "__thread_vars",
- MachO::S_THREAD_LOCAL_VARIABLES,
- SectionKind::getDataRel());
+ = Ctx->getMachOSection("__DATA", "__thread_vars",
+ MachO::S_THREAD_LOCAL_VARIABLES,
+ SectionKind::getData());
- TLSThreadInitSection
- = Ctx->getMachOSection("__DATA", "__thread_init",
- MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
- SectionKind::getDataRel());
+ TLSThreadInitSection = Ctx->getMachOSection(
+ "__DATA", "__thread_init", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+ SectionKind::getData());
CStringSection // .cstring
= Ctx->getMachOSection("__TEXT", "__cstring",
@@ -112,22 +125,35 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
= Ctx->getMachOSection("__TEXT", "__const", 0,
SectionKind::getReadOnly());
- TextCoalSection
- = Ctx->getMachOSection("__TEXT", "__textcoal_nt",
- MachO::S_COALESCED |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- ConstTextCoalSection
- = Ctx->getMachOSection("__TEXT", "__const_coal",
- MachO::S_COALESCED,
- SectionKind::getReadOnly());
+ // If the target is not powerpc, map the coal sections to the non-coal
+ // sections.
+ //
+ // "__TEXT/__textcoal_nt" => section "__TEXT/__text"
+ // "__TEXT/__const_coal" => section "__TEXT/__const"
+ // "__DATA/__datacoal_nt" => section "__DATA/__data"
+ Triple::ArchType ArchTy = T.getArch();
+
+ if (ArchTy == Triple::ppc || ArchTy == Triple::ppc64) {
+ TextCoalSection
+ = Ctx->getMachOSection("__TEXT", "__textcoal_nt",
+ MachO::S_COALESCED |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ ConstTextCoalSection
+ = Ctx->getMachOSection("__TEXT", "__const_coal",
+ MachO::S_COALESCED,
+ SectionKind::getReadOnly());
+ DataCoalSection = Ctx->getMachOSection(
+ "__DATA", "__datacoal_nt", MachO::S_COALESCED, SectionKind::getData());
+ } else {
+ TextCoalSection = TextSection;
+ ConstTextCoalSection = ReadOnlySection;
+ DataCoalSection = DataSection;
+ }
+
ConstDataSection // .const_data
= Ctx->getMachOSection("__DATA", "__const", 0,
SectionKind::getReadOnlyWithRel());
- DataCoalSection
- = Ctx->getMachOSection("__DATA","__datacoal_nt",
- MachO::S_COALESCED,
- SectionKind::getDataRel());
DataCommonSection
= Ctx->getMachOSection("__DATA","__common",
MachO::S_ZEROFILL,
@@ -147,21 +173,17 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
SectionKind::getMetadata());
if (RelocM == Reloc::Static) {
- StaticCtorSection
- = Ctx->getMachOSection("__TEXT", "__constructor", 0,
- SectionKind::getDataRel());
- StaticDtorSection
- = Ctx->getMachOSection("__TEXT", "__destructor", 0,
- SectionKind::getDataRel());
+ StaticCtorSection = Ctx->getMachOSection("__TEXT", "__constructor", 0,
+ SectionKind::getData());
+ StaticDtorSection = Ctx->getMachOSection("__TEXT", "__destructor", 0,
+ SectionKind::getData());
} else {
- StaticCtorSection
- = Ctx->getMachOSection("__DATA", "__mod_init_func",
- MachO::S_MOD_INIT_FUNC_POINTERS,
- SectionKind::getDataRel());
- StaticDtorSection
- = Ctx->getMachOSection("__DATA", "__mod_term_func",
- MachO::S_MOD_TERM_FUNC_POINTERS,
- SectionKind::getDataRel());
+ StaticCtorSection = Ctx->getMachOSection("__DATA", "__mod_init_func",
+ MachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getData());
+ StaticDtorSection = Ctx->getMachOSection("__DATA", "__mod_term_func",
+ MachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getData());
}
// Exception Handling.
@@ -176,9 +198,11 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
SectionKind::getReadOnly());
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
- CompactUnwindDwarfEHFrameOnly = 0x04000000;
+ CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF
else if (T.getArch() == Triple::aarch64)
- CompactUnwindDwarfEHFrameOnly = 0x03000000;
+ CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF
+ else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
+ CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF
}
// Debug Information.
@@ -235,6 +259,12 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
DwarfDebugInlineSection =
Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
+ DwarfCUIndexSection =
+ Ctx->getMachOSection("__DWARF", "__debug_cu_index", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfTUIndexSection =
+ Ctx->getMachOSection("__DWARF", "__debug_tu_index", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps",
0, SectionKind::getMetadata());
@@ -258,7 +288,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8
: dwarf::DW_EH_PE_sdata4);
-
break;
default:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -391,17 +420,15 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
break;
}
+ unsigned EHSectionType = T.getArch() == Triple::x86_64
+ ? ELF::SHT_X86_64_UNWIND
+ : ELF::SHT_PROGBITS;
+
// Solaris requires different flags for .eh_frame to seemingly every other
// platform.
- EHSectionType = ELF::SHT_PROGBITS;
- EHSectionFlags = ELF::SHF_ALLOC;
- if (T.isOSSolaris()) {
- if (T.getArch() == Triple::x86_64)
- EHSectionType = ELF::SHT_X86_64_UNWIND;
- else
- EHSectionFlags |= ELF::SHF_WRITE;
- }
-
+ unsigned EHSectionFlags = ELF::SHF_ALLOC;
+ if (T.isOSSolaris() && T.getArch() != Triple::x86_64)
+ EHSectionFlags |= ELF::SHF_WRITE;
// ELF
BSSSection = Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
@@ -423,18 +450,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
TLSBSSSection = Ctx->getELFSection(
".tbss", ELF::SHT_NOBITS, ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE);
- DataRelSection = Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
- DataRelLocalSection = Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
DataRelROSection = Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_WRITE);
- DataRelROLocalSection = Ctx->getELFSection(
- ".data.rel.ro.local", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
MergeableConst4Section =
Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_MERGE, 4, "");
@@ -519,14 +537,28 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
DwarfAddrSection =
Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec");
+ // DWP Sections
+ DwarfCUIndexSection =
+ Ctx->getELFSection(".debug_cu_index", ELF::SHT_PROGBITS, 0);
+ DwarfTUIndexSection =
+ Ctx->getELFSection(".debug_tu_index", ELF::SHT_PROGBITS, 0);
+
StackMapSection =
Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
FaultMapSection =
Ctx->getELFSection(".llvm_faultmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+ EHFrameSection =
+ Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
+ EHFrameSection = Ctx->getCOFFSection(
+ ".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData());
+
bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
CommDirectiveSupportsAlignment = true;
@@ -545,7 +577,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
DataSection = Ctx->getCOFFSection(
".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ SectionKind::getData());
ReadOnlySection = Ctx->getCOFFSection(
".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
@@ -563,21 +595,20 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
StaticCtorSection = Ctx->getCOFFSection(
".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ SectionKind::getData());
StaticDtorSection = Ctx->getCOFFSection(
".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ SectionKind::getData());
}
// FIXME: We're emitting LSDA info into a readonly section on COFF, even
// though it contains relocatable pointers. In PIC mode, this is probably a
// big runtime hit for C++ apps. Either the contents of the LSDA need to be
// adjusted or this should be a data section.
- assert(T.isOSWindows() && "Windows is the only supported COFF target");
if (T.getArch() == Triple::x86_64) {
// On Windows 64 with SEH, the LSDA is emitted into the .xdata section
- LSDASection = 0;
+ LSDASection = nullptr;
} else {
LSDASection = Ctx->getCOFFSection(".gcc_except_table",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -693,6 +724,16 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata(), "addr_sec");
+ DwarfCUIndexSection = Ctx->getCOFFSection(
+ ".debug_cu_index",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfTUIndexSection = Ctx->getCOFFSection(
+ ".debug_tu_index",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfAccelNamesSection = Ctx->getCOFFSection(
".apple_names",
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -720,11 +761,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
PDataSection = Ctx->getCOFFSection(
".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
+ SectionKind::getData());
XDataSection = Ctx->getCOFFSection(
".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
+ SectionKind::getData());
SXDataSection = Ctx->getCOFFSection(".sxdata", COFF::IMAGE_SCN_LNK_INFO,
SectionKind::getMetadata());
@@ -732,12 +773,12 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
TLSDataSection = Ctx->getCOFFSection(
".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-
+ SectionKind::getData());
+
StackMapSection = Ctx->getCOFFSection(".llvm_stackmaps",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ SectionKind::getReadOnly());
}
void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
@@ -752,6 +793,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
CommDirectiveSupportsAlignment = true;
SupportsWeakOmittedEHFrame = true;
SupportsCompactUnwindWithoutEHFrame = false;
+ OmitDwarfIfHaveCompactUnwind = false;
PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding =
dwarf::DW_EH_PE_absptr;
@@ -767,25 +809,26 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
TT = TheTriple;
- Triple::ArchType Arch = TT.getArch();
- // FIXME: Checking for Arch here to filter out bogus triples such as
- // cellspu-apple-darwin. Perhaps we should fix in Triple?
- if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
- Arch == Triple::arm || Arch == Triple::thumb ||
- Arch == Triple::aarch64 ||
- Arch == Triple::ppc || Arch == Triple::ppc64 ||
- Arch == Triple::UnknownArch) &&
- TT.isOSBinFormatMachO()) {
+ switch (TT.getObjectFormat()) {
+ case Triple::MachO:
Env = IsMachO;
initMachOMCObjectFileInfo(TT);
- } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
- Arch == Triple::arm || Arch == Triple::thumb) &&
- (TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) {
+ break;
+ case Triple::COFF:
+ if (!TT.isOSWindows())
+ report_fatal_error(
+ "Cannot initialize MC for non-Windows COFF object files.");
+
Env = IsCOFF;
initCOFFMCObjectFileInfo(TT);
- } else {
+ break;
+ case Triple::ELF:
Env = IsELF;
initELFMCObjectFileInfo(TT);
+ break;
+ case Triple::UnknownObjectFormat:
+ report_fatal_error("Cannot initialize MC for unknown object file format.");
+ break;
}
}
@@ -799,24 +842,3 @@ MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
0, utostr(Hash));
}
-
-void MCObjectFileInfo::InitEHFrameSection() {
- if (Env == IsMachO)
- EHFrameSection =
- Ctx->getMachOSection("__TEXT", "__eh_frame",
- MachO::S_COALESCED |
- MachO::S_ATTR_NO_TOC |
- MachO::S_ATTR_STRIP_STATIC_SYMS |
- MachO::S_ATTR_LIVE_SUPPORT,
- SectionKind::getReadOnly());
- else if (Env == IsELF)
- EHFrameSection =
- Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
- else
- EHFrameSection =
- Ctx->getCOFFSection(".eh_frame",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-}
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 0a63777..d0a7daf 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -28,7 +28,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
MCCodeEmitter *Emitter_)
: MCStreamer(Context),
Assembler(new MCAssembler(Context, TAB, *Emitter_,
- *TAB.createObjectWriter(OS), OS)),
+ *TAB.createObjectWriter(OS))),
EmitEHFrame(true), EmitDebugFrame(false) {}
MCObjectStreamer::~MCObjectStreamer() {
@@ -39,26 +39,27 @@ MCObjectStreamer::~MCObjectStreamer() {
}
void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
- if (PendingLabels.size()) {
- if (!F) {
- F = new MCDataFragment();
- MCSection *CurSection = getCurrentSectionOnly();
- CurSection->getFragmentList().insert(CurInsertionPoint, F);
- F->setParent(CurSection);
- }
- for (MCSymbol *Sym : PendingLabels) {
- Sym->setFragment(F);
- Sym->setOffset(FOffset);
- }
- PendingLabels.clear();
+ if (PendingLabels.empty())
+ return;
+ if (!F) {
+ F = new MCDataFragment();
+ MCSection *CurSection = getCurrentSectionOnly();
+ CurSection->getFragmentList().insert(CurInsertionPoint, F);
+ F->setParent(CurSection);
+ }
+ for (MCSymbol *Sym : PendingLabels) {
+ Sym->setFragment(F);
+ Sym->setOffset(FOffset);
}
+ PendingLabels.clear();
}
void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
const MCSymbol *Lo,
unsigned Size) {
// If not assigned to the same (valid) fragment, fallback.
- if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment()) {
+ if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() ||
+ Hi->isVariable() || Lo->isVariable()) {
MCStreamer::emitAbsoluteSymbolDiff(Hi, Lo, Size);
return;
}
@@ -93,7 +94,7 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const {
assert(getCurrentSectionOnly() && "No current section!");
if (CurInsertionPoint != getCurrentSectionOnly()->getFragmentList().begin())
- return std::prev(CurInsertionPoint);
+ return &*std::prev(CurInsertionPoint);
return nullptr;
}
@@ -121,7 +122,7 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) {
}
void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+ SMLoc Loc) {
MCStreamer::EmitValueImpl(Value, Size, Loc);
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
@@ -155,7 +156,6 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
MCStreamer::EmitLabel(Symbol);
getAssembler().registerSymbol(*Symbol);
- assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
// If there is a current fragment, mark the symbol as pointing into it.
// Otherwise queue the label and set its fragment pointer when we emit the
@@ -276,7 +276,6 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
raw_svector_ostream VecOS(Code);
getAssembler().getEmitter().encodeInstruction(Inst, VecOS, IF->getFixups(),
STI);
- VecOS.flush();
IF->getContents().append(Code.begin(), Code.end());
}
@@ -321,8 +320,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A,
return AddrDelta;
}
-static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta,
- const MCSymbol *Label, int PointerSize) {
+static void emitDwarfSetLineAddr(MCObjectStreamer &OS,
+ MCDwarfLineTableParams Params,
+ int64_t LineDelta, const MCSymbol *Label,
+ int PointerSize) {
// emit the sequence to set the address
OS.EmitIntValue(dwarf::DW_LNS_extended_op, 1);
OS.EmitULEB128IntValue(PointerSize + 1);
@@ -330,7 +331,7 @@ static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta,
OS.EmitSymbolValue(Label, PointerSize);
// emit the sequence for the LineDelta (from 1) and a zero address delta.
- MCDwarfLineAddr::Emit(&OS, LineDelta, 0);
+ MCDwarfLineAddr::Emit(&OS, Params, LineDelta, 0);
}
void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
@@ -338,13 +339,15 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
const MCSymbol *Label,
unsigned PointerSize) {
if (!LastLabel) {
- emitDwarfSetLineAddr(*this, LineDelta, Label, PointerSize);
+ emitDwarfSetLineAddr(*this, Assembler->getDWARFLinetableParams(), LineDelta,
+ Label, PointerSize);
return;
}
const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
int64_t Res;
if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) {
- MCDwarfLineAddr::Emit(this, LineDelta, Res);
+ MCDwarfLineAddr::Emit(this, Assembler->getDWARFLinetableParams(), LineDelta,
+ Res);
return;
}
insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
@@ -388,26 +391,9 @@ void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true);
}
-bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
- int64_t Res;
- if (Offset->evaluateAsAbsolute(Res, getAssembler())) {
- insert(new MCOrgFragment(*Offset, Value));
- return false;
- }
-
- MCSymbol *CurrentPos = getContext().createTempSymbol();
- EmitLabel(CurrentPos);
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- const MCExpr *Ref =
- MCSymbolRefExpr::create(CurrentPos, Variant, getContext());
- const MCExpr *Delta =
- MCBinaryExpr::create(MCBinaryExpr::Sub, Offset, Ref, getContext());
-
- if (!Delta->evaluateAsAbsolute(Res, getAssembler()))
- return true;
- EmitFill(Res, Value);
- return false;
+ insert(new MCOrgFragment(*Offset, Value));
}
// Associate GPRel32 fixup with data and resize data area
@@ -430,19 +416,31 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
DF->getContents().resize(DF->getContents().size() + 8, 0);
}
-void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
- // FIXME: A MCFillFragment would be more memory efficient but MCExpr has
- // problems evaluating expressions across multiple fragments.
+bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+ const MCExpr *Expr, SMLoc Loc) {
+ int64_t OffsetValue;
+ if (!Offset.evaluateAsAbsolute(OffsetValue))
+ llvm_unreachable("Offset is not absolute");
+
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
- DF->getContents().append(NumBytes, FillValue);
+
+ MCFixupKind Kind;
+ if (!Assembler->getBackend().getFixupKind(Name, Kind))
+ return true;
+
+ if (Expr == nullptr)
+ Expr =
+ MCSymbolRefExpr::create(getContext().createTempSymbol(), getContext());
+ DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
+ return false;
}
-void MCObjectStreamer::EmitZeros(uint64_t NumBytes) {
+void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
const MCSection *Sec = getCurrentSection().first;
assert(Sec && "need a section");
unsigned ItemSize = Sec->isVirtualSection() ? 0 : 1;
- insert(new MCFillFragment(0, ItemSize, NumBytes));
+ insert(new MCFillFragment(FillValue, ItemSize, NumBytes));
}
void MCObjectStreamer::FinishImpl() {
@@ -451,7 +449,7 @@ void MCObjectStreamer::FinishImpl() {
MCGenDwarfInfo::Emit(this);
// Dump out the dwarf file & directory tables and line tables.
- MCDwarfLineTable::Emit(this);
+ MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
flushPendingLabels(nullptr);
getAssembler().Finish();
diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp
index 3479034..e84f74a 100644
--- a/contrib/llvm/lib/MC/MCObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp
@@ -33,8 +33,14 @@ bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
if (!SA.getFragment() || !SB.getFragment())
return false;
- return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(),
- InSet, false);
+ return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, SB, InSet);
+}
+
+bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
+ const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
+ bool InSet) const {
+ return isSymbolRefDifferenceFullyResolvedImpl(Asm, A, *B.getFragment(), InSet,
+ false);
}
bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index b983d99..36c1920 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -436,7 +436,8 @@ StringRef AsmLexer::LexUntilEndOfLine() {
return StringRef(TokStart, CurPtr-TokStart);
}
-const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
+size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
+ bool ShouldSkipSpace) {
const char *SavedTokStart = TokStart;
const char *SavedCurPtr = CurPtr;
bool SavedAtStartOfLine = isAtStartOfLine;
@@ -446,7 +447,16 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
SMLoc SavedErrLoc = getErrLoc();
SkipSpace = ShouldSkipSpace;
- AsmToken Token = LexToken();
+
+ size_t ReadCount;
+ for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
+ AsmToken Token = LexToken();
+
+ Buf[ReadCount] = Token;
+
+ if (Token.is(AsmToken::Eof))
+ break;
+ }
SetError(SavedErrLoc, SavedErr);
@@ -455,7 +465,7 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
CurPtr = SavedCurPtr;
TokStart = SavedTokStart;
- return Token;
+ return ReadCount;
}
bool AsmLexer::isAtStartOfComment(const char *Ptr) {
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index 04d1413..646cbb4 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -33,6 +33,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -251,14 +252,14 @@ private:
bool parseStatement(ParseStatementInfo &Info,
MCAsmParserSemaCallback *SI);
void eatToEndOfLine();
- bool parseCppHashLineFilenameComment(const SMLoc &L);
+ bool parseCppHashLineFilenameComment(SMLoc L);
void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
ArrayRef<MCAsmMacroParameter> Parameters);
bool expandMacro(raw_svector_ostream &OS, StringRef Body,
ArrayRef<MCAsmMacroParameter> Parameters,
ArrayRef<MCAsmMacroArgument> A, bool EnableAtPseudoVariable,
- const SMLoc &L);
+ SMLoc L);
/// \brief Are macros enabled in the parser?
bool areMacrosEnabled() {return MacrosEnabledFlag;}
@@ -342,6 +343,7 @@ private:
enum DirectiveKind {
DK_NO_DIRECTIVE, // Placeholder
DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT,
+ DK_RELOC,
DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_OCTA,
DK_SINGLE, DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
@@ -374,6 +376,7 @@ private:
// ".ascii", ".asciz", ".string"
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+ bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc"
bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ...
bool parseDirectiveOctaValue(); // ".octa"
bool parseDirectiveRealValue(const fltSemantics &); // ".single", ...
@@ -553,6 +556,8 @@ void AsmParser::Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
}
bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+ if(getTargetParser().getTargetOptions().MCNoWarn)
+ return false;
if (getTargetParser().getTargetOptions().MCFatalWarnings)
return Error(L, Msg, Ranges);
printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
@@ -679,11 +684,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// so conservatively exclude them. Only do this if we're finalizing, though,
// as otherwise we won't necessarilly have seen everything yet.
if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
- const MCContext::SymbolTable &Symbols = getContext().getSymbols();
- for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
- e = Symbols.end();
- i != e; ++i) {
- MCSymbol *Sym = i->getValue();
+ for (const auto &TableEntry : getContext().getSymbols()) {
+ MCSymbol *Sym = TableEntry.getValue();
// Variable symbols may not be marked as defined, so check those
// explicitly. If we know it's a variable, we have a definition for
// the purposes of this check.
@@ -691,9 +693,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// FIXME: We would really like to refer back to where the symbol was
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
- printMessage(
- getLexer().getLoc(), SourceMgr::DK_Error,
- "assembler local symbol '" + Sym->getName() + "' not defined");
+ return Error(getLexer().getLoc(), "assembler local symbol '" +
+ Sym->getName() + "' not defined");
}
}
@@ -702,7 +703,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
if (!HadError && !NoFinalize)
Out.Finish();
- return HadError;
+ return HadError || getContext().hadError();
}
void AsmParser::checkForValidSection() {
@@ -865,11 +866,12 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
- if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
+ if (Sym->isVariable() &&
+ isa<MCConstantExpr>(Sym->getVariableValue(/*SetUsed*/ false))) {
if (Variant)
return Error(EndLoc, "unexpected modifier on variable reference");
- Res = Sym->getVariableValue();
+ Res = Sym->getVariableValue(/*SetUsed*/ false);
return false;
}
@@ -1102,8 +1104,9 @@ bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
return false;
}
-unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
- MCBinaryExpr::Opcode &Kind) {
+static unsigned getDarwinBinOpPrecedence(AsmToken::TokenKind K,
+ MCBinaryExpr::Opcode &Kind,
+ bool ShouldUseLogicalShr) {
switch (K) {
default:
return 0; // not a binop.
@@ -1155,7 +1158,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::Shl;
return 4;
case AsmToken::GreaterGreater:
- Kind = MAI.shouldUseLogicalShr() ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
+ Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
return 4;
// High Intermediate Precedence: +, -
@@ -1179,6 +1182,89 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
}
}
+static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
+ MCBinaryExpr::Opcode &Kind,
+ bool ShouldUseLogicalShr) {
+ switch (K) {
+ default:
+ return 0; // not a binop.
+
+ // Lowest Precedence: &&, ||
+ case AsmToken::AmpAmp:
+ Kind = MCBinaryExpr::LAnd;
+ return 2;
+ case AsmToken::PipePipe:
+ Kind = MCBinaryExpr::LOr;
+ return 1;
+
+ // Low Precedence: ==, !=, <>, <, <=, >, >=
+ case AsmToken::EqualEqual:
+ Kind = MCBinaryExpr::EQ;
+ return 3;
+ case AsmToken::ExclaimEqual:
+ case AsmToken::LessGreater:
+ Kind = MCBinaryExpr::NE;
+ return 3;
+ case AsmToken::Less:
+ Kind = MCBinaryExpr::LT;
+ return 3;
+ case AsmToken::LessEqual:
+ Kind = MCBinaryExpr::LTE;
+ return 3;
+ case AsmToken::Greater:
+ Kind = MCBinaryExpr::GT;
+ return 3;
+ case AsmToken::GreaterEqual:
+ Kind = MCBinaryExpr::GTE;
+ return 3;
+
+ // Low Intermediate Precedence: +, -
+ case AsmToken::Plus:
+ Kind = MCBinaryExpr::Add;
+ return 4;
+ case AsmToken::Minus:
+ Kind = MCBinaryExpr::Sub;
+ return 4;
+
+ // High Intermediate Precedence: |, &, ^
+ //
+ // FIXME: gas seems to support '!' as an infix operator?
+ case AsmToken::Pipe:
+ Kind = MCBinaryExpr::Or;
+ return 5;
+ case AsmToken::Caret:
+ Kind = MCBinaryExpr::Xor;
+ return 5;
+ case AsmToken::Amp:
+ Kind = MCBinaryExpr::And;
+ return 5;
+
+ // Highest Precedence: *, /, %, <<, >>
+ case AsmToken::Star:
+ Kind = MCBinaryExpr::Mul;
+ return 6;
+ case AsmToken::Slash:
+ Kind = MCBinaryExpr::Div;
+ return 6;
+ case AsmToken::Percent:
+ Kind = MCBinaryExpr::Mod;
+ return 6;
+ case AsmToken::LessLess:
+ Kind = MCBinaryExpr::Shl;
+ return 6;
+ case AsmToken::GreaterGreater:
+ Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
+ return 6;
+ }
+}
+
+unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
+ MCBinaryExpr::Opcode &Kind) {
+ bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
+ return IsDarwin ? getDarwinBinOpPrecedence(K, Kind, ShouldUseLogicalShr)
+ : getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr);
+}
+
/// \brief Parse all binary operators with precedence >= 'Precedence'.
/// Res contains the LHS of the expression on input.
bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
@@ -1251,6 +1337,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// Treat '.' as a valid identifier in this context.
Lex();
IDVal = ".";
+ } else if (Lexer.is(AsmToken::LCurly)) {
+ // Treat '{' as a valid identifier in this context.
+ Lex();
+ IDVal = "{";
+
+ } else if (Lexer.is(AsmToken::RCurly)) {
+ // Treat '}' as a valid identifier in this context.
+ Lex();
+ IDVal = "}";
} else if (parseIdentifier(IDVal)) {
if (!TheCondState.Ignore)
return TokError("unexpected token at start of statement");
@@ -1313,6 +1408,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// See what kind of statement we have.
switch (Lexer.getKind()) {
case AsmToken::Colon: {
+ if (!getTargetParser().isLabel(ID))
+ break;
checkForValidSection();
// identifier ':' -> Label.
@@ -1334,8 +1431,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
assert(RewrittenLabel.size() &&
"We should have an internal name here.");
- Info.AsmRewrites->push_back(AsmRewrite(AOK_Label, IDLoc,
- IDVal.size(), RewrittenLabel));
+ Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
+ RewrittenLabel);
IDVal = RewrittenLabel;
}
Sym = getContext().getOrCreateSymbol(IDVal);
@@ -1371,6 +1468,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
}
case AsmToken::Equal:
+ if (!getTargetParser().equalIsAsmAssignment())
+ break;
// identifier '=' ... -> assignment statement
Lex();
@@ -1599,6 +1698,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveError(IDLoc, true);
case DK_WARNING:
return parseDirectiveWarning(IDLoc);
+ case DK_RELOC:
+ return parseDirectiveReloc(IDLoc);
}
return Error(IDLoc, "unknown directive");
@@ -1613,12 +1714,14 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
return parseDirectiveMSAlign(IDLoc, Info);
+ if (ParsingInlineAsm && (IDVal == "even"))
+ Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
checkForValidSection();
// Canonicalize the opcode to lower case.
std::string OpcodeStr = IDVal.lower();
ParseInstructionInfo IInfo(Info.AsmRewrites);
- bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, IDLoc,
+ bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
Info.ParsedOperands);
Info.ParseError = HadError;
@@ -1703,7 +1806,7 @@ void AsmParser::eatToEndOfLine() {
/// parseCppHashLineFilenameComment as this:
/// ::= # number "filename"
/// or just as a full line comment if it doesn't have a number and a string.
-bool AsmParser::parseCppHashLineFilenameComment(const SMLoc &L) {
+bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
Lex(); // Eat the hash token.
if (getLexer().isNot(AsmToken::Integer)) {
@@ -1743,7 +1846,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
raw_ostream &OS = errs();
const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
- const SMLoc &DiagLoc = Diag.getLoc();
+ SMLoc DiagLoc = Diag.getLoc();
unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
unsigned CppHashBuf =
Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
@@ -1802,7 +1905,7 @@ static bool isIdentifierChar(char c) {
bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
ArrayRef<MCAsmMacroParameter> Parameters,
ArrayRef<MCAsmMacroArgument> A,
- bool EnableAtPseudoVariable, const SMLoc &L) {
+ bool EnableAtPseudoVariable, SMLoc L) {
unsigned NParameters = Parameters.size();
bool HasVararg = NParameters ? Parameters.back().Vararg : false;
if ((!IsDarwin || NParameters != 0) && NParameters != A.size())
@@ -1858,10 +1961,8 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
break;
// Otherwise substitute with the token values, with spaces eliminated.
- for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
- ie = A[Index].end();
- it != ie; ++it)
- OS << it->getString();
+ for (const AsmToken &Token : A[Index])
+ OS << Token.getString();
break;
}
}
@@ -1897,15 +1998,13 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
}
} else {
bool VarargParameter = HasVararg && Index == (NParameters - 1);
- for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
- ie = A[Index].end();
- it != ie; ++it)
+ for (const AsmToken &Token : A[Index])
// We expect no quotes around the string's contents when
// parsing for varargs.
- if (it->getKind() != AsmToken::String || VarargParameter)
- OS << it->getString();
+ if (Token.getKind() != AsmToken::String || VarargParameter)
+ OS << Token.getString();
else
- OS << it->getStringContents();
+ OS << Token.getStringContents();
Pos += 1 + Argument.size();
}
@@ -2371,6 +2470,51 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
return false;
}
+/// parseDirectiveReloc
+/// ::= .reloc expression , identifier [ , expression ]
+bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
+ const MCExpr *Offset;
+ const MCExpr *Expr = nullptr;
+
+ SMLoc OffsetLoc = Lexer.getTok().getLoc();
+ if (parseExpression(Offset))
+ return true;
+
+ // We can only deal with constant expressions at the moment.
+ int64_t OffsetValue;
+ if (!Offset->evaluateAsAbsolute(OffsetValue))
+ return Error(OffsetLoc, "expression is not a constant value");
+
+ if (Lexer.isNot(AsmToken::Comma))
+ return TokError("expected comma");
+ Lexer.Lex();
+
+ if (Lexer.isNot(AsmToken::Identifier))
+ return TokError("expected relocation name");
+ SMLoc NameLoc = Lexer.getTok().getLoc();
+ StringRef Name = Lexer.getTok().getIdentifier();
+ Lexer.Lex();
+
+ if (Lexer.is(AsmToken::Comma)) {
+ Lexer.Lex();
+ SMLoc ExprLoc = Lexer.getLoc();
+ if (parseExpression(Expr))
+ return true;
+
+ MCValue Value;
+ if (!Expr->evaluateAsRelocatable(Value, nullptr, nullptr))
+ return Error(ExprLoc, "expression must be relocatable");
+ }
+
+ if (Lexer.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in .reloc directive");
+
+ if (getStreamer().EmitRelocDirective(*Offset, Name, Expr, DirectiveLoc))
+ return Error(NameLoc, "unknown relocation name");
+
+ return false;
+}
+
/// parseDirectiveValue
/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
bool AsmParser::parseDirectiveValue(unsigned Size) {
@@ -2617,7 +2761,6 @@ bool AsmParser::parseDirectiveOrg() {
checkForValidSection();
const MCExpr *Offset;
- SMLoc Loc = getTok().getLoc();
if (parseExpression(Offset))
return true;
@@ -2636,13 +2779,7 @@ bool AsmParser::parseDirectiveOrg() {
}
Lex();
-
- // Only limited forms of relocatable expressions are accepted here, it
- // has to be relative to the current section. The streamer will return
- // 'true' if the expression wasn't evaluatable.
- if (getStreamer().EmitValueToOffset(Offset, FillExpr))
- return Error(Loc, "expected assembly-time absolute expression");
-
+ getStreamer().emitValueToOffset(Offset, FillExpr);
return false;
}
@@ -2703,7 +2840,11 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
Alignment = 1ULL << Alignment;
} else {
- // Reject alignments that aren't a power of two, for gas compatibility.
+ // Reject alignments that aren't either a power of two or zero,
+ // for gas compatibility. Alignment of zero is silently rounded
+ // up to one.
+ if (Alignment == 0)
+ Alignment = 1;
if (!isPowerOf2_64(Alignment))
Error(AlignmentLoc, "alignment must be a power of 2");
}
@@ -4269,6 +4410,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".err"] = DK_ERR;
DirectiveKindMap[".error"] = DK_ERROR;
DirectiveKindMap[".warning"] = DK_WARNING;
+ DirectiveKindMap[".reloc"] = DK_RELOC;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -4405,10 +4547,10 @@ bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
SmallString<256> Buf;
raw_svector_ostream OS(Buf);
- for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
+ for (const MCAsmMacroArgument &Arg : A) {
// Note that the AtPseudoVariable is enabled for instantiations of .irp.
// This is undocumented, but GAS seems to support it.
- if (expandMacro(OS, M->Body, Parameter, *i, true, getTok().getLoc()))
+ if (expandMacro(OS, M->Body, Parameter, Arg, true, getTok().getLoc()))
return true;
}
@@ -4488,10 +4630,10 @@ bool AsmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
if (!MCE)
return Error(ExprLoc, "unexpected expression in _emit");
uint64_t IntValue = MCE->getValue();
- if (!isUIntN(8, IntValue) && !isIntN(8, IntValue))
+ if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
return Error(ExprLoc, "literal value out of range for directive");
- Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len));
+ Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
return false;
}
@@ -4507,8 +4649,7 @@ bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
if (!isPowerOf2_64(IntValue))
return Error(ExprLoc, "literal value not a power of two greater then zero");
- Info.AsmRewrites->push_back(
- AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue)));
+ Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
return false;
}
@@ -4604,18 +4745,18 @@ bool AsmParser::parseMSInlineAsm(
OutputDecls.push_back(OpDecl);
OutputDeclsAddressOf.push_back(Operand.needAddressOf());
OutputConstraints.push_back(("=" + Operand.getConstraint()).str());
- AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
+ AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
} else {
InputDecls.push_back(OpDecl);
InputDeclsAddressOf.push_back(Operand.needAddressOf());
InputConstraints.push_back(Operand.getConstraint().str());
- AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
+ AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
}
}
// Consider implicit defs to be clobbers. Think of cpuid and push.
- ArrayRef<uint16_t> ImpDefs(Desc.getImplicitDefs(),
- Desc.getNumImplicitDefs());
+ ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
+ Desc.getNumImplicitDefs());
ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end());
}
@@ -4710,14 +4851,23 @@ bool AsmParser::parseMSInlineAsm(
OS << ".byte";
break;
case AOK_Align: {
- unsigned Val = AR.Val;
- OS << ".align " << Val;
+ // MS alignment directives are measured in bytes. If the native assembler
+ // measures alignment in bytes, we can pass it straight through.
+ OS << ".align";
+ if (getContext().getAsmInfo()->getAlignmentIsInBytes())
+ break;
- // Skip the original immediate.
+ // Alignment is in log2 form, so print that instead and skip the original
+ // immediate.
+ unsigned Val = AR.Val;
+ OS << ' ' << Val;
assert(Val < 10 && "Expected alignment less then 2^10.");
AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
break;
}
+ case AOK_EVEN:
+ OS << ".even";
+ break;
case AOK_DotOperator:
// Insert the dot if the user omitted it.
OS.flush();
@@ -4803,7 +4953,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
if (isSymbolUsedInExpression(Sym, Value))
return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'");
- else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+ else if (Sym->isUndefined(/*SetUsed*/ false) && !Sym->isUsed() &&
+ !Sym->isVariable())
; // Allow redefinitions of undefined symbols only used in directives.
else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
; // Allow redefinitions of variables that haven't yet been used.
@@ -4815,15 +4966,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
return Parser.Error(EqualLoc,
"invalid reassignment of non-absolute variable '" +
Name + "'");
-
- // Don't count these checks as uses.
- Sym->setUsed(false);
} else if (Name == ".") {
- if (Parser.getStreamer().EmitValueToOffset(Value, 0)) {
- Parser.Error(EqualLoc, "expected absolute expression");
- Parser.eatToEndOfStatement();
- return true;
- }
+ Parser.getStreamer().emitValueToOffset(Value, 0);
return false;
} else
Sym = Parser.getContext().getOrCreateSymbol(Name);
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index f09bce0..a4b2b19 100644
--- a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -98,11 +98,10 @@ class COFFAsmParser : public MCAsmParserExtension {
SectionKind::getText());
}
bool ParseSectionDirectiveData(StringRef, SMLoc) {
- return ParseSectionSwitch(".data",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
- | COFF::IMAGE_SCN_MEM_READ
- | COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ return ParseSectionSwitch(".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData());
}
bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
return ParseSectionSwitch(".bss",
@@ -153,7 +152,7 @@ static SectionKind computeSectionKind(unsigned Flags) {
if (Flags & COFF::IMAGE_SCN_MEM_READ &&
(Flags & COFF::IMAGE_SCN_MEM_WRITE) == 0)
return SectionKind::getReadOnly();
- return SectionKind::getDataRel();
+ return SectionKind::getData();
}
bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) {
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index dc664e8..73e068a 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -8,10 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -38,6 +41,8 @@ class DarwinAsmParser : public MCAsmParserExtension {
unsigned TAA = 0, unsigned ImplicitAlign = 0,
unsigned StubSize = 0);
+ SMLoc LastVersionMinDirective;
+
public:
DarwinAsmParser() {}
@@ -164,9 +169,14 @@ public:
addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveTLV>(".tlv");
addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveIdent>(".ident");
+ addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(
+ ".watchos_version_min");
+ addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".tvos_version_min");
addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".ios_version_min");
addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(
".macosx_version_min");
+
+ LastVersionMinDirective = SMLoc();
}
bool parseDirectiveDesc(StringRef, SMLoc);
@@ -381,9 +391,8 @@ bool DarwinAsmParser::parseSectionSwitch(const char *Segment,
// FIXME: Arch specific.
bool isText = TAA & MachO::S_ATTR_PURE_INSTRUCTIONS;
getStreamer().SwitchSection(getContext().getMachOSection(
- Segment, Section, TAA, StubSize,
- isText ? SectionKind::getText()
- : SectionKind::getDataRel()));
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText() : SectionKind::getData()));
// Set the implicit alignment, if any.
//
@@ -579,12 +588,34 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) {
if (!ErrorStr.empty())
return Error(Loc, ErrorStr.c_str());
+ // Issue a warning if the target is not powerpc and Section is a *coal* section.
+ Triple TT = getParser().getContext().getObjectFileInfo()->getTargetTriple();
+ Triple::ArchType ArchTy = TT.getArch();
+
+ if (ArchTy != Triple::ppc && ArchTy != Triple::ppc64) {
+ StringRef NonCoalSection = StringSwitch<StringRef>(Section)
+ .Case("__textcoal_nt", "__text")
+ .Case("__const_coal", "__const")
+ .Case("__datacoal_nt", "__data")
+ .Default(Section);
+
+ if (!Section.equals(NonCoalSection)) {
+ StringRef SectionVal(Loc.getPointer());
+ size_t B = SectionVal.find(',') + 1, E = SectionVal.find(',', B);
+ SMLoc BLoc = SMLoc::getFromPointer(SectionVal.data() + B);
+ SMLoc ELoc = SMLoc::getFromPointer(SectionVal.data() + E);
+ getParser().Warning(Loc, "section \"" + Section + "\" is deprecated",
+ SMRange(BLoc, ELoc));
+ getParser().Note(Loc, "change section name to \"" + NonCoalSection +
+ "\"", SMRange(BLoc, ELoc));
+ }
+ }
+
// FIXME: Arch specific.
bool isText = Segment == "__TEXT"; // FIXME: Hack.
getStreamer().SwitchSection(getContext().getMachOSection(
- Segment, Section, TAA, StubSize,
- isText ? SectionKind::getText()
- : SectionKind::getDataRel()));
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText() : SectionKind::getData()));
return false;
}
@@ -636,17 +667,16 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
"environment variable unset.");
// Open the secure log file if we haven't already.
- raw_ostream *OS = getContext().getSecureLog();
+ raw_fd_ostream *OS = getContext().getSecureLog();
if (!OS) {
std::error_code EC;
- OS = new raw_fd_ostream(SecureLogFile, EC,
- sys::fs::F_Append | sys::fs::F_Text);
- if (EC) {
- delete OS;
+ auto NewOS = llvm::make_unique<raw_fd_ostream>(
+ SecureLogFile, EC, sys::fs::F_Append | sys::fs::F_Text);
+ if (EC)
return Error(IDLoc, Twine("can't open secure log file: ") +
SecureLogFile + " (" + EC.message() + ")");
- }
- getContext().setSecureLog(OS);
+ OS = NewOS.get();
+ getContext().setSecureLog(std::move(NewOS));
}
// Write the message.
@@ -867,9 +897,11 @@ bool DarwinAsmParser::parseDirectiveDataRegionEnd(StringRef, SMLoc) {
/// parseVersionMin
/// ::= .ios_version_min major,minor[,update]
/// ::= .macosx_version_min major,minor[,update]
-bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc) {
+bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc) {
int64_t Major = 0, Minor = 0, Update = 0;
int Kind = StringSwitch<int>(Directive)
+ .Case(".watchos_version_min", MCVM_WatchOSVersionMin)
+ .Case(".tvos_version_min", MCVM_TvOSVersionMin)
.Case(".ios_version_min", MCVM_IOSVersionMin)
.Case(".macosx_version_min", MCVM_OSXVersionMin);
// Get the major version number.
@@ -902,6 +934,24 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc) {
Lex();
}
+ const Triple &T = getContext().getObjectFileInfo()->getTargetTriple();
+ Triple::OSType ExpectedOS = Triple::UnknownOS;
+ switch ((MCVersionMinType)Kind) {
+ case MCVM_WatchOSVersionMin: ExpectedOS = Triple::WatchOS; break;
+ case MCVM_TvOSVersionMin: ExpectedOS = Triple::TvOS; break;
+ case MCVM_IOSVersionMin: ExpectedOS = Triple::IOS; break;
+ case MCVM_OSXVersionMin: ExpectedOS = Triple::MacOSX; break;
+ }
+ if (T.getOS() != ExpectedOS)
+ Warning(Loc, Directive + " should only be used for " +
+ Triple::getOSTypeName(ExpectedOS) + " targets");
+
+ if (LastVersionMinDirective.isValid()) {
+ Warning(Loc, "overriding previous version_min directive");
+ Note(LastVersionMinDirective, "previous definition is here");
+ }
+ LastVersionMinDirective = Loc;
+
// We've parsed a correct version specifier, so send it to the streamer.
getStreamer().EmitVersionMin((MCVersionMinType)Kind, Major, Minor, Update);
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 5f8a603..6cbcdec 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -52,8 +52,6 @@ public:
addDirectiveHandler<
&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
addDirectiveHandler<
- &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
- addDirectiveHandler<
&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
addDirectiveHandler<
@@ -81,8 +79,8 @@ public:
// the best way for us to get access to it?
bool ParseSectionDirectiveData(StringRef, SMLoc) {
return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
- ELF::SHF_WRITE |ELF::SHF_ALLOC,
- SectionKind::getDataRel());
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getData());
}
bool ParseSectionDirectiveText(StringRef, SMLoc) {
return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
@@ -113,9 +111,8 @@ public:
}
bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |
- ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getData());
}
bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
@@ -123,17 +120,10 @@ public:
ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRel());
}
- bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
- return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |
- ELF::SHF_WRITE,
- SectionKind::getReadOnlyWithRelLocal());
- }
bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |
- ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getData());
}
bool ParseDirectivePushSection(StringRef, SMLoc);
bool ParseDirectivePopSection(StringRef, SMLoc);
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 795cc85..e891bd2 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -12,8 +12,8 @@
using namespace llvm;
-MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
- TokStart(nullptr), SkipSpace(true) {
+MCAsmLexer::MCAsmLexer() : TokStart(nullptr), SkipSpace(true) {
+ CurTok.emplace_back(AsmToken::Error, StringRef());
}
MCAsmLexer::~MCAsmLexer() {
diff --git a/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
index 60a3a3b..4e4b478 100644
--- a/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -7,13 +7,26 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCTargetAsmParser.h"
using namespace llvm;
-MCTargetAsmParser::MCTargetAsmParser()
- : AvailableFeatures(0), ParsingInlineAsm(false)
+MCTargetAsmParser::MCTargetAsmParser(MCTargetOptions const &MCOptions,
+ const MCSubtargetInfo &STI)
+ : AvailableFeatures(0), ParsingInlineAsm(false), MCOptions(MCOptions),
+ STI(&STI)
{
}
MCTargetAsmParser::~MCTargetAsmParser() {
}
+
+MCSubtargetInfo &MCTargetAsmParser::copySTI() {
+ MCSubtargetInfo &STICopy = getContext().getSubtargetCopy(getSTI());
+ STI = &STICopy;
+ return STICopy;
+}
+
+const MCSubtargetInfo &MCTargetAsmParser::getSTI() const {
+ return *STI;
+}
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
index 9152f2b..dbd544a 100644
--- a/contrib/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
: Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
- IsRegistered(false), Variant(V), Kind(K) {}
+ IsRegistered(false), DummyFragment(this), Variant(V), Kind(K) {}
MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
if (!End)
@@ -72,7 +72,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
if (MI == SubsectionFragmentMap.end())
IP = end();
else
- IP = MI->second;
+ IP = MI->second->getIterator();
if (!ExactMatch && Subsection != 0) {
// The GNU as documentation claims that subsections have an alignment of 4,
// although this appears not to be the case.
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
index ce0b4f5..b8373f4 100644
--- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index b4448d7..5a0bb7f 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -27,12 +27,7 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
if (isUnique())
return false;
- // FIXME: Does .section .bss/.data/.text work everywhere??
- if (Name == ".text" || Name == ".data" ||
- (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
- return true;
-
- return false;
+ return MAI.shouldOmitSectionDirective(Name);
}
static void printName(raw_ostream &OS, StringRef Name) {
@@ -138,6 +133,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << "note";
else if (Type == ELF::SHT_PROGBITS)
OS << "progbits";
+ else if (Type == ELF::SHT_X86_64_UNWIND)
+ OS << "unwind";
if (EntrySize) {
assert(Flags & ELF::SHF_MERGE);
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
index c9f1591..879c6e5 100644
--- a/contrib/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -177,7 +177,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
TAAParsed = false;
SmallVector<StringRef, 5> SplitSpec;
- Spec.split(SplitSpec, ",");
+ Spec.split(SplitSpec, ',');
// Remove leading and trailing whitespace.
auto GetEmptyOrTrim = [&SplitSpec](size_t Idx) -> StringRef {
return SplitSpec.size() > Idx ? SplitSpec[Idx].trim() : StringRef();
@@ -235,7 +235,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
// The attribute list is a '+' separated list of attributes.
SmallVector<StringRef, 1> SectionAttrs;
- Attrs.split(SectionAttrs, "+", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+ Attrs.split(SectionAttrs, '+', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (StringRef &SectionAttr : SectionAttrs) {
auto AttrDescriptorI = std::find_if(
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index 7fbbbd9..836b405 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -107,8 +107,7 @@ void MCStreamer::EmitSLEB128IntValue(int64_t Value) {
EmitBytes(OSE.str());
}
-void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, SMLoc Loc) {
EmitValueImpl(Value, Size, Loc);
}
@@ -189,11 +188,9 @@ void MCStreamer::InitSections(bool NoExecStack) {
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
-void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) {
- if (Section)
- Symbol->setSection(*Section);
- else
- Symbol->setUndefined();
+void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) {
+ assert(Fragment);
+ Symbol->setFragment(Fragment);
// As we emit symbols into a section, track the order so that they can
// be sorted upon later. Zero is reserved to mean 'unemitted'.
@@ -203,7 +200,8 @@ void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) {
void MCStreamer::EmitLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection().first && "Cannot emit before setting section!");
- AssignSection(Symbol, getCurrentSection().first);
+ assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
+ Symbol->setFragment(&getCurrentSectionOnly()->getDummyFragment());
MCTargetStreamer *TS = getTargetStreamer();
if (TS)
@@ -361,6 +359,14 @@ void MCStreamer::EmitCFIEscape(StringRef Values) {
CurFrame->Instructions.push_back(Instruction);
}
+void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createGnuArgsSize(Label, Size);
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
void MCStreamer::EmitCFISignalFrame() {
EnsureValidDwarfFrame();
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
@@ -467,6 +473,8 @@ void MCStreamer::EmitWinEHHandlerData() {
report_fatal_error("Chained unwind areas can't have handlers!");
}
+void MCStreamer::EmitSyntaxDirective() {}
+
void MCStreamer::EmitWinCFIPushReg(unsigned Register) {
EnsureValidWinFrameInfo();
@@ -679,8 +687,7 @@ void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
void MCStreamer::ChangeSection(MCSection *, const MCExpr *) {}
void MCStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
void MCStreamer::EmitBytes(StringRef Data) {}
-void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) {
visitUsedExpr(*Value);
}
void MCStreamer::EmitULEB128Value(const MCExpr *Value) {}
@@ -690,9 +697,7 @@ void MCStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned MaxBytesToEmit) {}
void MCStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {}
-bool MCStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) {
- return false;
-}
+void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value) {}
void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
void MCStreamer::FinishImpl() {}
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
index 9210cf5..dc864d3 100644
--- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -32,8 +32,8 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
}
-void MCSubtargetInfo::setDefaultFeatures(StringRef CPU) {
- FeatureBits = getFeatures(CPU, "", ProcDesc, ProcFeatures);
+void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) {
+ FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures);
}
MCSubtargetInfo::MCSubtargetInfo(
@@ -77,13 +77,12 @@ FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
assert(ProcSchedModels && "Processor machine model not available!");
- unsigned NumProcs = ProcDesc.size();
-#ifndef NDEBUG
- for (size_t i = 1; i < NumProcs; i++) {
- assert(strcmp(ProcSchedModels[i - 1].Key, ProcSchedModels[i].Key) < 0 &&
- "Processor machine model table is not sorted");
- }
-#endif
+ size_t NumProcs = ProcDesc.size();
+ assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs,
+ [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
+ return strcmp(LHS.Key, RHS.Key) < 0;
+ }) &&
+ "Processor machine model table is not sorted");
// Find entry
const SubtargetInfoKV *Found =
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
index 125380a..ab3b8eb 100644
--- a/contrib/llvm/lib/MC/MCSymbol.cpp
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -16,8 +16,11 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// Sentinel value for the absolute pseudo section.
-MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
+// Only the address of this fragment is ever actually used.
+static MCDummyFragment SentinelFragment(nullptr);
+
+// Sentinel value for the absolute pseudo fragment.
+MCFragment *MCSymbol::AbsolutePseudoFragment = &SentinelFragment;
void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
MCContext &Ctx) {
diff --git a/contrib/llvm/lib/MC/MCTargetOptions.cpp b/contrib/llvm/lib/MC/MCTargetOptions.cpp
index 1258d9e..4656227 100644
--- a/contrib/llvm/lib/MC/MCTargetOptions.cpp
+++ b/contrib/llvm/lib/MC/MCTargetOptions.cpp
@@ -14,9 +14,10 @@ namespace llvm {
MCTargetOptions::MCTargetOptions()
: SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
- MCFatalWarnings(false), MCSaveTempLabels(false),
- MCUseDwarfDirectory(false), ShowMCEncoding(false), ShowMCInst(false),
- AsmVerbose(false), DwarfVersion(0), ABIName() {}
+ MCFatalWarnings(false), MCNoWarn(false), MCSaveTempLabels(false),
+ MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
+ ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+ DwarfVersion(0), ABIName() {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/contrib/llvm/lib/MC/MCWinEH.cpp b/contrib/llvm/lib/MC/MCWinEH.cpp
index d5d9ead..83af203 100644
--- a/contrib/llvm/lib/MC/MCWinEH.cpp
+++ b/contrib/llvm/lib/MC/MCWinEH.cpp
@@ -49,10 +49,10 @@ static MCSection *getUnwindInfoSection(StringRef SecName,
if (CodeSecName.startswith(".text$"))
CodeSecName = CodeSecName.substr(6);
- return Context.getCOFFSection(
- (SecName + Twine('$') + CodeSecName).str(),
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
+ return Context.getCOFFSection((SecName + Twine('$') + CodeSecName).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getData());
}
}
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
index 8ce6127..324385f 100644
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -78,7 +78,6 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
dyn_cast<const MCConstantExpr>(S.getVariableValue()))
return C->getValue();
-
MCValue Target;
if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
report_fatal_error("unable to evaluate offset for variable '" +
@@ -117,7 +116,8 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
return OffsetToAlignment(EndAddr, NextSec.getAlignment());
}
-void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
+void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
+ unsigned NumLoadCommands,
unsigned LoadCommandsSize,
bool SubsectionsViaSymbols) {
uint32_t Flags = 0;
@@ -128,7 +128,7 @@ void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
// struct mach_header (28 bytes) or
// struct mach_header_64 (32 bytes)
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
@@ -136,29 +136,30 @@ void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
write32(TargetObjectWriter->getCPUType());
write32(TargetObjectWriter->getCPUSubtype());
- write32(MachO::MH_OBJECT);
+ write32(Type);
write32(NumLoadCommands);
write32(LoadCommandsSize);
write32(Flags);
if (is64Bit())
write32(0); // reserved
- assert(OS.tell() - Start ==
- (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header)));
+ assert(
+ getStream().tell() - Start ==
+ (is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header)));
}
/// writeSegmentLoadCommand - Write a segment load command.
///
/// \param NumSections The number of sections in this segment.
/// \param SectionDataSize The total size of the sections.
-void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections,
- uint64_t VMSize,
- uint64_t SectionDataStartOffset,
- uint64_t SectionDataSize) {
+void MachObjectWriter::writeSegmentLoadCommand(
+ StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
+ uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
+ uint32_t InitProt) {
// struct segment_command (56 bytes) or
// struct segment_command_64 (72 bytes)
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
unsigned SegmentLoadCommandSize =
@@ -169,31 +170,32 @@ void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections,
NumSections * (is64Bit() ? sizeof(MachO::section_64) :
sizeof(MachO::section)));
- writeBytes("", 16);
+ assert(Name.size() <= 16);
+ writeBytes(Name, 16);
if (is64Bit()) {
- write64(0); // vmaddr
+ write64(VMAddr); // vmaddr
write64(VMSize); // vmsize
write64(SectionDataStartOffset); // file offset
write64(SectionDataSize); // file size
} else {
- write32(0); // vmaddr
+ write32(VMAddr); // vmaddr
write32(VMSize); // vmsize
write32(SectionDataStartOffset); // file offset
write32(SectionDataSize); // file size
}
// maxprot
- write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+ write32(MaxProt);
// initprot
- write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+ write32(InitProt);
write32(NumSections);
write32(0); // flags
- assert(OS.tell() - Start == SegmentLoadCommandSize);
+ assert(getStream().tell() - Start == SegmentLoadCommandSize);
}
-void MachObjectWriter::writeSection(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCSection &Sec, uint64_t FileOffset,
+void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
+ const MCSection &Sec, uint64_t VMAddr,
+ uint64_t FileOffset, unsigned Flags,
uint64_t RelocationsStart,
unsigned NumRelocations) {
uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
@@ -208,24 +210,20 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
// struct section (68 bytes) or
// struct section_64 (80 bytes)
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
writeBytes(Section.getSectionName(), 16);
writeBytes(Section.getSegmentName(), 16);
if (is64Bit()) {
- write64(getSectionAddress(&Sec)); // address
+ write64(VMAddr); // address
write64(SectionSize); // size
} else {
- write32(getSectionAddress(&Sec)); // address
+ write32(VMAddr); // address
write32(SectionSize); // size
}
write32(FileOffset);
- unsigned Flags = Section.getTypeAndAttributes();
- if (Section.hasInstructions())
- Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
-
assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
write32(Log2_32(Section.getAlignment()));
write32(NumRelocations ? RelocationsStart : 0);
@@ -236,8 +234,8 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
if (is64Bit())
write32(0); // reserved3
- assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) :
- sizeof(MachO::section)));
+ assert(getStream().tell() - Start ==
+ (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
}
void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
@@ -246,7 +244,7 @@ void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
uint32_t StringTableSize) {
// struct symtab_command (24 bytes)
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
write32(MachO::LC_SYMTAB);
@@ -256,7 +254,7 @@ void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
write32(StringTableOffset);
write32(StringTableSize);
- assert(OS.tell() - Start == sizeof(MachO::symtab_command));
+ assert(getStream().tell() - Start == sizeof(MachO::symtab_command));
}
void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
@@ -269,7 +267,7 @@ void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
uint32_t NumIndirectSymbols) {
// struct dysymtab_command (80 bytes)
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
write32(MachO::LC_DYSYMTAB);
@@ -293,7 +291,7 @@ void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
write32(0); // locreloff
write32(0); // nlocrel
- assert(OS.tell() - Start == sizeof(MachO::dysymtab_command));
+ assert(getStream().tell() - Start == sizeof(MachO::dysymtab_command));
}
MachObjectWriter::MachSymbolData *
@@ -389,7 +387,7 @@ void MachObjectWriter::writeNlist(MachSymbolData &MSD,
void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
uint32_t DataOffset,
uint32_t DataSize) {
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
write32(Type);
@@ -397,7 +395,7 @@ void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
write32(DataOffset);
write32(DataSize);
- assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command));
+ assert(getStream().tell() - Start == sizeof(MachO::linkedit_data_command));
}
static unsigned ComputeLinkerOptionsLoadCommandSize(
@@ -413,7 +411,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
const std::vector<std::string> &Options)
{
unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
- uint64_t Start = OS.tell();
+ uint64_t Start = getStream().tell();
(void) Start;
write32(MachO::LC_LINKER_OPTION);
@@ -429,7 +427,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
// Pad to a multiple of the pointer size.
writeBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
- assert(OS.tell() - Start == Size);
+ assert(getStream().tell() - Start == Size);
}
void MachObjectWriter::recordRelocation(MCAssembler &Asm,
@@ -458,9 +456,9 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_SYMBOL_STUBS) {
- MCSymbol &Symbol = *it->Symbol;
- report_fatal_error("indirect symbol '" + Symbol.getName() +
- "' not in a symbol pointer or stub section");
+ MCSymbol &Symbol = *it->Symbol;
+ report_fatal_error("indirect symbol '" + Symbol.getName() +
+ "' not in a symbol pointer or stub section");
}
}
@@ -522,7 +520,7 @@ void MachObjectWriter::computeSymbolTable(
StringTable.add(Symbol.getName());
}
- StringTable.finalize(StringTableBuilder::MachO);
+ StringTable.finalize();
// Build the symbol arrays but only for non-local symbols.
//
@@ -628,6 +626,18 @@ void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
}
bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
+ const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
+ bool InSet) const {
+ // FIXME: We don't handle things like
+ // foo = .
+ // creating atoms.
+ if (A.isVariable() || B.isVariable())
+ return false;
+ return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
+ InSet);
+}
+
+bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
bool InSet, bool IsPCRel) const {
if (InSet)
@@ -746,7 +756,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
++NumLoadCommands;
LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
}
-
+
// Compute the total size of the section data, as well as its file size and vm
// size.
uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
@@ -776,18 +786,25 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
SectionDataFileSize += SectionDataPadding;
// Write the prolog, starting with the header and load command...
- writeHeader(NumLoadCommands, LoadCommandsSize,
+ writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
Asm.getSubsectionsViaSymbols());
- writeSegmentLoadCommand(NumSections, VMSize,
- SectionDataStart, SectionDataSize);
+ uint32_t Prot =
+ MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
+ writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
+ SectionDataSize, Prot, Prot);
// ... and then the section headers.
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
- for (const MCSection &Sec : Asm) {
+ for (const MCSection &Section : Asm) {
+ const auto &Sec = cast<MCSectionMachO>(Section);
std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
unsigned NumRelocs = Relocs.size();
uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
- writeSection(Asm, Layout, Sec, SectionStart, RelocTableEnd, NumRelocs);
+ unsigned Flags = Sec.getTypeAndAttributes();
+ if (Sec.hasInstructions())
+ Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
+ writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
+ RelocTableEnd, NumRelocs);
RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
}
@@ -798,8 +815,22 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
assert(VersionInfo.Major < 65536 && "unencodable major target version");
uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) |
(VersionInfo.Major << 16);
- write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX :
- MachO::LC_VERSION_MIN_IPHONEOS);
+ MachO::LoadCommandType LCType;
+ switch (VersionInfo.Kind) {
+ case MCVM_OSXVersionMin:
+ LCType = MachO::LC_VERSION_MIN_MACOSX;
+ break;
+ case MCVM_IOSVersionMin:
+ LCType = MachO::LC_VERSION_MIN_IPHONEOS;
+ break;
+ case MCVM_TvOSVersionMin:
+ LCType = MachO::LC_VERSION_MIN_TVOS;
+ break;
+ case MCVM_WatchOSVersionMin:
+ LCType = MachO::LC_VERSION_MIN_WATCHOS;
+ break;
+ }
+ write32(LCType);
write32(sizeof(MachO::version_min_command));
write32(EncodedVersion);
write32(0); // reserved.
@@ -901,12 +932,12 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
// Write out the loh commands, if there is one.
if (LOHSize) {
#ifndef NDEBUG
- unsigned Start = OS.tell();
+ unsigned Start = getStream().tell();
#endif
Asm.getLOHContainer().emit(*this, Layout);
// Pad to a multiple of the pointer size.
writeBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
- assert(OS.tell() - Start == LOHSize);
+ assert(getStream().tell() - Start == LOHSize);
}
// Write the symbol table data, if used.
@@ -942,7 +973,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
writeNlist(Entry, Layout);
// Write the string table.
- OS << StringTable.data();
+ getStream() << StringTable.data();
}
}
diff --git a/contrib/llvm/lib/MC/StringTableBuilder.cpp b/contrib/llvm/lib/MC/StringTableBuilder.cpp
index 9de9363..80e5522 100644
--- a/contrib/llvm/lib/MC/StringTableBuilder.cpp
+++ b/contrib/llvm/lib/MC/StringTableBuilder.cpp
@@ -8,35 +8,71 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Endian.h"
+#include <vector>
+
using namespace llvm;
-static bool compareBySuffix(StringRef a, StringRef b) {
- size_t sizeA = a.size();
- size_t sizeB = b.size();
- size_t len = std::min(sizeA, sizeB);
- for (size_t i = 0; i < len; ++i) {
- char ca = a[sizeA - i - 1];
- char cb = b[sizeB - i - 1];
- if (ca != cb)
- return ca > cb;
+StringTableBuilder::StringTableBuilder(Kind K) : K(K) {}
+
+typedef std::pair<StringRef, size_t> StringPair;
+
+// Returns the character at Pos from end of a string.
+static int charTailAt(StringPair *P, size_t Pos) {
+ StringRef S = P->first;
+ if (Pos >= S.size())
+ return -1;
+ return (unsigned char)S[S.size() - Pos - 1];
+}
+
+// Three-way radix quicksort. This is much faster than std::sort with strcmp
+// because it does not compare characters that we already know the same.
+static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
+tailcall:
+ if (End - Begin <= 1)
+ return;
+
+ // Partition items. Items in [Begin, P) are greater than the pivot,
+ // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
+ int Pivot = charTailAt(*Begin, Pos);
+ StringPair **P = Begin;
+ StringPair **Q = End;
+ for (StringPair **R = Begin + 1; R < Q;) {
+ int C = charTailAt(*R, Pos);
+ if (C > Pivot)
+ std::swap(*P++, *R++);
+ else if (C < Pivot)
+ std::swap(*--Q, *R);
+ else
+ R++;
+ }
+
+ multikey_qsort(Begin, P, Pos);
+ multikey_qsort(Q, End, Pos);
+ if (Pivot != -1) {
+ // qsort(P, Q, Pos + 1), but with tail call optimization.
+ Begin = P;
+ End = Q;
+ ++Pos;
+ goto tailcall;
}
- return sizeA > sizeB;
}
-void StringTableBuilder::finalize(Kind kind) {
- SmallVector<StringRef, 8> Strings;
+void StringTableBuilder::finalize() {
+ std::vector<std::pair<StringRef, size_t> *> Strings;
Strings.reserve(StringIndexMap.size());
+ for (std::pair<StringRef, size_t> &P : StringIndexMap)
+ Strings.push_back(&P);
- for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i)
- Strings.push_back(i->getKey());
-
- std::sort(Strings.begin(), Strings.end(), compareBySuffix);
+ if (!Strings.empty())
+ multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
- switch (kind) {
+ switch (K) {
+ case RAW:
+ break;
case ELF:
case MachO:
// Start the table with a NUL byte.
@@ -49,22 +85,25 @@ void StringTableBuilder::finalize(Kind kind) {
}
StringRef Previous;
- for (StringRef s : Strings) {
- if (kind == WinCOFF)
- assert(s.size() > COFF::NameSize && "Short string in COFF string table!");
+ for (std::pair<StringRef, size_t> *P : Strings) {
+ StringRef S = P->first;
+ if (K == WinCOFF)
+ assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
- if (Previous.endswith(s)) {
- StringIndexMap[s] = StringTable.size() - 1 - s.size();
+ if (Previous.endswith(S)) {
+ P->second = StringTable.size() - S.size() - (K != RAW);
continue;
}
- StringIndexMap[s] = StringTable.size();
- StringTable += s;
- StringTable += '\x00';
- Previous = s;
+ P->second = StringTable.size();
+ StringTable += S;
+ if (K != RAW)
+ StringTable += '\x00';
+ Previous = S;
}
- switch (kind) {
+ switch (K) {
+ case RAW:
case ELF:
break;
case MachO:
@@ -75,14 +114,31 @@ void StringTableBuilder::finalize(Kind kind) {
case WinCOFF:
// Write the table size in the first word.
assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
- uint32_t size = static_cast<uint32_t>(StringTable.size());
+ uint32_t Size = static_cast<uint32_t>(StringTable.size());
support::endian::write<uint32_t, support::little, support::unaligned>(
- StringTable.data(), size);
+ StringTable.data(), Size);
break;
}
+
+ Size = StringTable.size();
}
void StringTableBuilder::clear() {
StringTable.clear();
StringIndexMap.clear();
}
+
+size_t StringTableBuilder::getOffset(StringRef S) const {
+ assert(isFinalized());
+ auto I = StringIndexMap.find(S);
+ assert(I != StringIndexMap.end() && "String is not in table!");
+ return I->second;
+}
+
+size_t StringTableBuilder::add(StringRef S) {
+ assert(!isFinalized());
+ auto P = StringIndexMap.insert(std::make_pair(S, Size));
+ if (P.second)
+ Size += S.size() + (K != RAW);
+ return P.first->second;
+}
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
index 76574e9..b642f17 100644
--- a/contrib/llvm/lib/MC/SubtargetFeature.cpp
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -56,7 +56,7 @@ static inline bool isEnabled(StringRef Feature) {
///
static void Split(std::vector<std::string> &V, StringRef S) {
SmallVector<StringRef, 3> Tmp;
- S.split(Tmp, ",", -1, false /* KeepEmpty */);
+ S.split(Tmp, ',', -1, false /* KeepEmpty */);
V.assign(Tmp.begin(), Tmp.end());
}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 56ef1c7..a382090 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -32,8 +33,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/JamCRC.h"
#include "llvm/Support/TimeValue.h"
#include <cstdio>
+#include <ctime>
using namespace llvm;
@@ -76,8 +79,6 @@ public:
COFFSymbol(StringRef name);
void set_name_offset(uint32_t Offset);
- bool should_keep() const;
-
int64_t getIndex() const { return Index; }
void setIndex(int Value) {
Index = Value;
@@ -125,7 +126,7 @@ public:
COFF::header Header;
sections Sections;
symbols Symbols;
- StringTableBuilder Strings;
+ StringTableBuilder Strings{StringTableBuilder::WinCOFF};
// Maps used during object file creation.
section_map SectionMap;
@@ -160,8 +161,6 @@ public:
void SetSymbolName(COFFSymbol &S);
void SetSectionName(COFFSection &S);
- bool ExportSymbol(const MCSymbol &Symbol, MCAssembler &Asm);
-
bool IsPhysicalSection(COFFSection *S);
// Entity writing methods.
@@ -215,38 +214,6 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
write_uint32_le(Data.Name + 4, Offset);
}
-/// logic to decide if the symbol should be reported in the symbol table
-bool COFFSymbol::should_keep() const {
- // no section means its external, keep it
- if (!Section)
- return true;
-
- // if it has relocations pointing at it, keep it
- if (Relocations > 0) {
- assert(Section->Number != -1 && "Sections with relocations must be real!");
- return true;
- }
-
- // if this is a safeseh handler, keep it
- if (MC && (cast<MCSymbolCOFF>(MC)->isSafeSEH()))
- return true;
-
- // if the section its in is being droped, drop it
- if (Section->Number == -1)
- return false;
-
- // if it is the section symbol, keep it
- if (Section->Symbol == this)
- return true;
-
- // if its temporary, drop it
- if (MC && MC->isTemporary())
- return false;
-
- // otherwise, keep it
- return true;
-}
-
//------------------------------------------------------------------------------
// Section class implementation
@@ -392,7 +359,6 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
MCAssembler &Assembler,
const MCAsmLayout &Layout) {
COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
- SymbolMap[&Symbol] = coff_symbol;
if (cast<MCSymbolCOFF>(Symbol).isWeakExternal()) {
coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
@@ -515,25 +481,6 @@ void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
}
-bool WinCOFFObjectWriter::ExportSymbol(const MCSymbol &Symbol,
- MCAssembler &Asm) {
- // This doesn't seem to be right. Strings referred to from the .data section
- // need symbols so they can be linked to code in the .text section right?
-
- // return Asm.isSymbolLinkerVisible(Symbol);
-
- // Non-temporary labels should always be visible to the linker.
- if (!Symbol.isTemporary())
- return true;
-
- // Temporary variable symbols are invisible.
- if (Symbol.isVariable())
- return false;
-
- // Absolute temporary labels are never visible.
- return !Symbol.isAbsolute();
-}
-
bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
return (S->Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) ==
0;
@@ -663,7 +610,7 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
defineSection(static_cast<const MCSectionCOFF &>(Section));
for (const MCSymbol &Symbol : Asm.symbols())
- if (ExportSymbol(Symbol, Asm))
+ if (!Symbol.isTemporary())
DefineSymbol(Symbol, Asm, Layout);
}
@@ -674,7 +621,8 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
// thunk to implement their /INCREMENTAL feature. Make sure we don't optimize
// away any relocations to functions.
uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
- if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
+ if (Asm.isIncrementalLinkerCompatible() &&
+ (Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
return false;
return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
InSet, IsPCRel);
@@ -702,41 +650,49 @@ void WinCOFFObjectWriter::recordRelocation(
const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
assert(Target.getSymA() && "Relocation must reference a symbol!");
- const MCSymbol &Symbol = Target.getSymA()->getSymbol();
- const MCSymbol &A = Symbol;
- if (!A.isRegistered())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ const MCSymbol &A = Target.getSymA()->getSymbol();
+ if (!A.isRegistered()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
Twine("symbol '") + A.getName() +
"' can not be undefined");
+ return;
+ }
+ if (A.isTemporary() && A.isUndefined()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ Twine("assembler label '") + A.getName() +
+ "' can not be undefined");
+ return;
+ }
MCSection *Section = Fragment->getParent();
// Mark this symbol as requiring an entry in the symbol table.
assert(SectionMap.find(Section) != SectionMap.end() &&
"Section must already have been defined in executePostLayoutBinding!");
- assert(SymbolMap.find(&A) != SymbolMap.end() &&
- "Symbol must already have been defined in executePostLayoutBinding!");
COFFSection *coff_section = SectionMap[Section];
- COFFSymbol *coff_symbol = SymbolMap[&A];
const MCSymbolRefExpr *SymB = Target.getSymB();
bool CrossSection = false;
if (SymB) {
const MCSymbol *B = &SymB->getSymbol();
- if (!B->getFragment())
- Asm.getContext().reportFatalError(
+ if (!B->getFragment()) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
Twine("symbol '") + B->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
- if (!A.getFragment())
- Asm.getContext().reportFatalError(
+ if (!A.getFragment()) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
- Twine("symbol '") + Symbol.getName() +
+ Twine("symbol '") + A.getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
- CrossSection = &Symbol.getSection() != &B->getSection();
+ CrossSection = &A.getSection() != &B->getSection();
// Offset of the symbol in the section
int64_t OffsetOfB = Layout.getSymbolOffset(*B);
@@ -765,12 +721,19 @@ void WinCOFFObjectWriter::recordRelocation(
Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
// Turn relocations for temporary symbols into section relocations.
- if (coff_symbol->MC->isTemporary() || CrossSection) {
- Reloc.Symb = coff_symbol->Section->Symbol;
- FixedValue += Layout.getFragmentOffset(coff_symbol->MC->getFragment()) +
- coff_symbol->MC->getOffset();
- } else
- Reloc.Symb = coff_symbol;
+ if (A.isTemporary() || CrossSection) {
+ MCSection *TargetSection = &A.getSection();
+ assert(
+ SectionMap.find(TargetSection) != SectionMap.end() &&
+ "Section must already have been defined in executePostLayoutBinding!");
+ Reloc.Symb = SectionMap[TargetSection]->Symbol;
+ FixedValue += Layout.getSymbolOffset(A);
+ } else {
+ assert(
+ SymbolMap.find(&A) != SymbolMap.end() &&
+ "Symbol must already have been defined in executePostLayoutBinding!");
+ Reloc.Symb = SymbolMap[&A];
+ }
++Reloc.Symb->Relocations;
@@ -884,14 +847,10 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
// Update section number & offset for symbols that have them.
if (Symbol->Section)
Symbol->Data.SectionNumber = Symbol->Section->Number;
- if (Symbol->should_keep()) {
- Symbol->setIndex(Header.NumberOfSymbols++);
- // Update auxiliary symbol info.
- Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
- Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
- } else {
- Symbol->setIndex(-1);
- }
+ Symbol->setIndex(Header.NumberOfSymbols++);
+ // Update auxiliary symbol info.
+ Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
+ Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
}
// Build string table.
@@ -899,16 +858,15 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
if (S->Name.size() > COFF::NameSize)
Strings.add(S->Name);
for (const auto &S : Symbols)
- if (S->should_keep() && S->Name.size() > COFF::NameSize)
+ if (S->Name.size() > COFF::NameSize)
Strings.add(S->Name);
- Strings.finalize(StringTableBuilder::WinCOFF);
+ Strings.finalize();
// Set names.
for (const auto &S : Sections)
SetSectionName(*S);
for (auto &S : Symbols)
- if (S->should_keep())
- SetSymbolName(*S);
+ SetSymbolName(*S);
// Fixup weak external references.
for (auto &Symbol : Symbols) {
@@ -948,7 +906,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
// Assign file offsets to COFF object file structures.
- unsigned offset = 0;
+ unsigned offset = getInitialOffset();
if (UseBigObj)
offset += COFF::Header32Size;
@@ -1011,8 +969,23 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Header.PointerToSymbolTable = offset;
+ // FIXME: Remove the #else branch and make the #if branch unconditional once
+ // LLVM's self host configuration is aware of /Brepro.
+#if (ENABLE_TIMESTAMPS == 1)
+ // MS LINK expects to be able to use this timestamp to implement their
+ // /INCREMENTAL feature.
+ if (Asm.isIncrementalLinkerCompatible()) {
+ std::time_t Now = time(nullptr);
+ if (Now < 0 || !isUInt<32>(Now))
+ Now = UINT32_MAX;
+ Header.TimeDateStamp = Now;
+ } else {
+ Header.TimeDateStamp = 0;
+ }
+#else
// We want a deterministic output. It looks like GNU as also writes 0 in here.
Header.TimeDateStamp = 0;
+#endif
// Write it all to disk...
WriteFileHeader(Header);
@@ -1029,6 +1002,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
}
}
+ SmallVector<char, 128> SectionContents;
for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(),
je = Asm.end();
(i != ie) && (j != je); ++i, ++j) {
@@ -1037,20 +1011,47 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
continue;
if ((*i)->Header.PointerToRawData != 0) {
- assert(OS.tell() <= (*i)->Header.PointerToRawData &&
+ assert(getStream().tell() <= (*i)->Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
- unsigned SectionDataPadding = (*i)->Header.PointerToRawData - OS.tell();
+ unsigned SectionDataPadding =
+ (*i)->Header.PointerToRawData - getStream().tell();
assert(SectionDataPadding < 4 &&
"Should only need at most three bytes of padding!");
WriteZeros(SectionDataPadding);
+ // Save the contents of the section to a temporary buffer, we need this
+ // to CRC the data before we dump it into the object file.
+ SectionContents.clear();
+ raw_svector_ostream VecOS(SectionContents);
+ raw_pwrite_stream &OldStream = getStream();
+ // Redirect the output stream to our buffer.
+ setStream(VecOS);
+ // Fill our buffer with the section data.
Asm.writeSectionData(&*j, Layout);
+ // Reset the stream back to what it was before.
+ setStream(OldStream);
+
+ // Calculate our CRC with an initial value of '0', this is not how
+ // JamCRC is specified but it aligns with the expected output.
+ JamCRC JC(/*Init=*/0x00000000U);
+ JC.update(SectionContents);
+
+ // Write the section contents to the object file.
+ getStream() << SectionContents;
+
+ // Update the section definition auxiliary symbol to record the CRC.
+ COFFSection *Sec = SectionMap[&*j];
+ COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux;
+ assert(AuxSyms.size() == 1 &&
+ AuxSyms[0].AuxType == ATSectionDefinition);
+ AuxSymbol &SecDef = AuxSyms[0];
+ SecDef.Aux.SectionDefinition.CheckSum = JC.getCRC();
}
if ((*i)->Relocations.size() > 0) {
- assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+ assert(getStream().tell() == (*i)->Header.PointerToRelocations &&
"Section::PointerToRelocations is insane!");
if ((*i)->Relocations.size() >= 0xffff) {
@@ -1071,14 +1072,14 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
}
}
- assert(OS.tell() == Header.PointerToSymbolTable &&
+ assert(getStream().tell() == Header.PointerToSymbolTable &&
"Header::PointerToSymbolTable is insane!");
for (auto &Symbol : Symbols)
if (Symbol->getIndex() != -1)
WriteSymbol(*Symbol);
- OS.write(Strings.data().data(), Strings.data().size());
+ getStream().write(Strings.data().data(), Strings.data().size());
}
MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
index 36dd691..a38b1a4 100644
--- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -49,7 +49,6 @@ void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
// Add the fixups and data.
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
@@ -123,29 +122,37 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
"Got non-COFF section in the COFF backend!");
if (CurSymbol)
- FatalError("starting a new symbol definition without completing the "
- "previous one");
+ Error("starting a new symbol definition without completing the "
+ "previous one");
CurSymbol = Symbol;
}
void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
- if (!CurSymbol)
- FatalError("storage class specified outside of symbol definition");
+ if (!CurSymbol) {
+ Error("storage class specified outside of symbol definition");
+ return;
+ }
- if (StorageClass & ~COFF::SSC_Invalid)
- FatalError("storage class value '" + Twine(StorageClass) +
+ if (StorageClass & ~COFF::SSC_Invalid) {
+ Error("storage class value '" + Twine(StorageClass) +
"' out of range");
+ return;
+ }
getAssembler().registerSymbol(*CurSymbol);
cast<MCSymbolCOFF>(CurSymbol)->setClass((uint16_t)StorageClass);
}
void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
- if (!CurSymbol)
- FatalError("symbol type specified outside of a symbol definition");
+ if (!CurSymbol) {
+ Error("symbol type specified outside of a symbol definition");
+ return;
+ }
- if (Type & ~0xffff)
- FatalError("type value '" + Twine(Type) + "' out of range");
+ if (Type & ~0xffff) {
+ Error("type value '" + Twine(Type) + "' out of range");
+ return;
+ }
getAssembler().registerSymbol(*CurSymbol);
cast<MCSymbolCOFF>(CurSymbol)->setType((uint16_t)Type);
@@ -153,7 +160,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
void MCWinCOFFStreamer::EndCOFFSymbolDef() {
if (!CurSymbol)
- FatalError("ending symbol definition without starting one");
+ Error("ending symbol definition without starting one");
CurSymbol = nullptr;
}
@@ -215,8 +222,6 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
Size = std::max(Size, static_cast<uint64_t>(ByteAlignment));
}
- AssignSection(Symbol, nullptr);
-
getAssembler().registerSymbol(*Symbol);
Symbol->setExternal(true);
Symbol->setCommon(Size, ByteAlignment);
@@ -228,7 +233,6 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
OS << " -aligncomm:\"" << Symbol->getName() << "\","
<< Log2_32_Ceil(ByteAlignment);
- OS.flush();
PushSection();
SwitchSection(MFI->getDrectveSection());
@@ -249,8 +253,6 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
getAssembler().registerSymbol(*Symbol);
Symbol->setExternal(false);
- AssignSection(Symbol, Section);
-
if (ByteAlignment != 1)
new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0,
ByteAlignment, Section);
@@ -287,9 +289,8 @@ void MCWinCOFFStreamer::FinishImpl() {
MCObjectStreamer::FinishImpl();
}
-LLVM_ATTRIBUTE_NORETURN
-void MCWinCOFFStreamer::FatalError(const Twine &Msg) const {
- getContext().reportFatalError(SMLoc(), Msg);
+void MCWinCOFFStreamer::Error(const Twine &Msg) const {
+ getContext().reportError(SMLoc(), Msg);
}
}
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp
index d482119..99b0650 100644
--- a/contrib/llvm/lib/Object/Archive.cpp
+++ b/contrib/llvm/lib/Object/Archive.cpp
@@ -43,10 +43,10 @@ StringRef ArchiveMemberHeader::getName() const {
return llvm::StringRef(Name, end);
}
-uint32_t ArchiveMemberHeader::getSize() const {
+ErrorOr<uint32_t> ArchiveMemberHeader::getSize() const {
uint32_t Ret;
if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
- llvm_unreachable("Size is not a decimal number.");
+ return object_error::parse_failed; // Size is not a decimal number.
return Ret;
}
@@ -82,22 +82,30 @@ unsigned ArchiveMemberHeader::getGID() const {
return Ret;
}
-Archive::Child::Child(const Archive *Parent, const char *Start)
+Archive::Child::Child(const Archive *Parent, StringRef Data,
+ uint16_t StartOfFile)
+ : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {}
+
+Archive::Child::Child(const Archive *Parent, const char *Start,
+ std::error_code *EC)
: Parent(Parent) {
if (!Start)
return;
- const ArchiveMemberHeader *Header =
- reinterpret_cast<const ArchiveMemberHeader *>(Start);
uint64_t Size = sizeof(ArchiveMemberHeader);
- if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
- Size += Header->getSize();
Data = StringRef(Start, Size);
+ if (!isThinMember()) {
+ ErrorOr<uint64_t> MemberSize = getRawSize();
+ if ((*EC = MemberSize.getError()))
+ return;
+ Size += MemberSize.get();
+ Data = StringRef(Start, Size);
+ }
// Setup StartOfFile and PaddingBytes.
StartOfFile = sizeof(ArchiveMemberHeader);
// Don't include attached name.
- StringRef Name = Header->getName();
+ StringRef Name = getRawName();
if (Name.startswith("#1/")) {
uint64_t NameSize;
if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
@@ -106,25 +114,40 @@ Archive::Child::Child(const Archive *Parent, const char *Start)
}
}
-uint64_t Archive::Child::getSize() const {
- if (Parent->IsThin)
- return getHeader()->getSize();
+ErrorOr<uint64_t> Archive::Child::getSize() const {
+ if (Parent->IsThin) {
+ ErrorOr<uint32_t> Size = getHeader()->getSize();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ return Size.get();
+ }
return Data.size() - StartOfFile;
}
-uint64_t Archive::Child::getRawSize() const {
- return getHeader()->getSize();
+ErrorOr<uint64_t> Archive::Child::getRawSize() const {
+ ErrorOr<uint32_t> Size = getHeader()->getSize();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ return Size.get();
+}
+
+bool Archive::Child::isThinMember() const {
+ StringRef Name = getHeader()->getName();
+ return Parent->IsThin && Name != "/" && Name != "//";
}
ErrorOr<StringRef> Archive::Child::getBuffer() const {
- if (!Parent->IsThin)
- return StringRef(Data.data() + StartOfFile, getSize());
+ if (!isThinMember()) {
+ ErrorOr<uint32_t> Size = getSize();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ return StringRef(Data.data() + StartOfFile, Size.get());
+ }
ErrorOr<StringRef> Name = getName();
if (std::error_code EC = Name.getError())
return EC;
- SmallString<128> FullName =
- Parent->getMemoryBufferRef().getBufferIdentifier();
- sys::path::remove_filename(FullName);
+ SmallString<128> FullName = sys::path::parent_path(
+ Parent->getMemoryBufferRef().getBufferIdentifier());
sys::path::append(FullName, *Name);
ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
if (std::error_code EC = Buf.getError())
@@ -133,7 +156,7 @@ ErrorOr<StringRef> Archive::Child::getBuffer() const {
return Parent->ThinBuffers.back()->getBuffer();
}
-Archive::Child Archive::Child::getNext() const {
+ErrorOr<Archive::Child> Archive::Child::getNext() const {
size_t SpaceToSkip = Data.size();
// If it's odd, add 1 to make it even.
if (SpaceToSkip & 1)
@@ -141,11 +164,19 @@ Archive::Child Archive::Child::getNext() const {
const char *NextLoc = Data.data() + SpaceToSkip;
+ // Check to see if this is at the end of the archive.
+ if (NextLoc == Parent->Data.getBufferEnd())
+ return Child(Parent, nullptr, nullptr);
+
// Check to see if this is past the end of the archive.
- if (NextLoc >= Parent->Data.getBufferEnd())
- return Child(Parent, nullptr);
+ if (NextLoc > Parent->Data.getBufferEnd())
+ return object_error::parse_failed;
- return Child(Parent, NextLoc);
+ std::error_code EC;
+ Child Ret(Parent, NextLoc, &EC);
+ if (EC)
+ return EC;
+ return Ret;
}
uint64_t Archive::Child::getChildOffset() const {
@@ -168,17 +199,11 @@ ErrorOr<StringRef> Archive::Child::getName() const {
std::size_t offset;
if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
llvm_unreachable("Long name offset is not an integer");
- const char *addr = Parent->StringTable->Data.begin()
- + sizeof(ArchiveMemberHeader)
- + offset;
+
// Verify it.
- if (Parent->StringTable == Parent->child_end()
- || addr < (Parent->StringTable->Data.begin()
- + sizeof(ArchiveMemberHeader))
- || addr > (Parent->StringTable->Data.begin()
- + sizeof(ArchiveMemberHeader)
- + Parent->StringTable->getSize()))
+ if (offset >= Parent->StringTable.size())
return object_error::parse_failed;
+ const char *addr = Parent->StringTable.begin() + offset;
// GNU long file names end with a "/\n".
if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
@@ -227,9 +252,13 @@ ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
return std::move(Ret);
}
+void Archive::setFirstRegular(const Child &C) {
+ FirstRegularData = C.Data;
+ FirstRegularStartOfFile = C.StartOfFile;
+}
+
Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
- : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()),
- StringTable(child_end()), FirstRegular(child_end()) {
+ : Binary(Binary::ID_Archive, Source) {
StringRef Buffer = Data.getBuffer();
// Check for sufficient magic.
if (Buffer.startswith(ThinMagic)) {
@@ -242,15 +271,26 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
}
// Get the special members.
- child_iterator i = child_begin(false);
- child_iterator e = child_end();
+ child_iterator I = child_begin(false);
+ if ((ec = I->getError()))
+ return;
+ child_iterator E = child_end();
- if (i == e) {
+ if (I == E) {
ec = std::error_code();
return;
}
+ const Child *C = &**I;
- StringRef Name = i->getRawName();
+ auto Increment = [&]() {
+ ++I;
+ if ((ec = I->getError()))
+ return true;
+ C = &**I;
+ return false;
+ };
+
+ StringRef Name = C->getRawName();
// Below is the pattern that is used to figure out the archive format
// GNU archive format
@@ -273,9 +313,13 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
if (Name == "__.SYMDEF") {
Format = K_BSD;
- SymbolTable = i;
- ++i;
- FirstRegular = i;
+ // We know that the symbol table is not an external file, so we just assert
+ // there is no error.
+ SymbolTable = *C->getBuffer();
+ if (Increment())
+ return;
+ setFirstRegular(*C);
+
ec = std::error_code();
return;
}
@@ -283,16 +327,19 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
if (Name.startswith("#1/")) {
Format = K_BSD;
// We know this is BSD, so getName will work since there is no string table.
- ErrorOr<StringRef> NameOrErr = i->getName();
+ ErrorOr<StringRef> NameOrErr = C->getName();
ec = NameOrErr.getError();
if (ec)
return;
Name = NameOrErr.get();
if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
- SymbolTable = i;
- ++i;
+ // We know that the symbol table is not an external file, so we just
+ // assert there is no error.
+ SymbolTable = *C->getBuffer();
+ if (Increment())
+ return;
}
- FirstRegular = i;
+ setFirstRegular(*C);
return;
}
@@ -303,30 +350,36 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
bool has64SymTable = false;
if (Name == "/" || Name == "/SYM64/") {
- SymbolTable = i;
+ // We know that the symbol table is not an external file, so we just assert
+ // there is no error.
+ SymbolTable = *C->getBuffer();
if (Name == "/SYM64/")
has64SymTable = true;
- ++i;
- if (i == e) {
+ if (Increment())
+ return;
+ if (I == E) {
ec = std::error_code();
return;
}
- Name = i->getRawName();
+ Name = C->getRawName();
}
if (Name == "//") {
Format = has64SymTable ? K_MIPS64 : K_GNU;
- StringTable = i;
- ++i;
- FirstRegular = i;
+ // The string table is never an external member, so we just assert on the
+ // ErrorOr.
+ StringTable = *C->getBuffer();
+ if (Increment())
+ return;
+ setFirstRegular(*C);
ec = std::error_code();
return;
}
if (Name[0] != '/') {
Format = has64SymTable ? K_MIPS64 : K_GNU;
- FirstRegular = i;
+ setFirstRegular(*C);
ec = std::error_code();
return;
}
@@ -337,23 +390,30 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
}
Format = K_COFF;
- SymbolTable = i;
+ // We know that the symbol table is not an external file, so we just assert
+ // there is no error.
+ SymbolTable = *C->getBuffer();
- ++i;
- if (i == e) {
- FirstRegular = i;
+ if (Increment())
+ return;
+
+ if (I == E) {
+ setFirstRegular(*C);
ec = std::error_code();
return;
}
- Name = i->getRawName();
+ Name = C->getRawName();
if (Name == "//") {
- StringTable = i;
- ++i;
+ // The string table is never an external member, so we just assert on the
+ // ErrorOr.
+ StringTable = *C->getBuffer();
+ if (Increment())
+ return;
}
- FirstRegular = i;
+ setFirstRegular(*C);
ec = std::error_code();
}
@@ -362,22 +422,25 @@ Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
return child_end();
if (SkipInternal)
- return FirstRegular;
+ return Child(this, FirstRegularData, FirstRegularStartOfFile);
const char *Loc = Data.getBufferStart() + strlen(Magic);
- Child c(this, Loc);
- return c;
+ std::error_code EC;
+ Child c(this, Loc, &EC);
+ if (EC)
+ return child_iterator(EC);
+ return child_iterator(c);
}
Archive::child_iterator Archive::child_end() const {
- return Child(this, nullptr);
+ return Child(this, nullptr, nullptr);
}
StringRef Archive::Symbol::getName() const {
return Parent->getSymbolTable().begin() + StringIndex;
}
-ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
+ErrorOr<Archive::Child> Archive::Symbol::getMember() const {
const char *Buf = Parent->getSymbolTable().begin();
const char *Offsets = Buf;
if (Parent->kind() == K_MIPS64)
@@ -422,8 +485,11 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
}
const char *Loc = Parent->getData().begin() + Offset;
- child_iterator Iter(Child(Parent, Loc));
- return Iter;
+ std::error_code EC;
+ Child C(Parent, Loc, &EC);
+ if (EC)
+ return EC;
+ return C;
}
Archive::Symbol Archive::Symbol::getNext() const {
@@ -506,12 +572,12 @@ Archive::symbol_iterator Archive::symbol_begin() const {
}
Archive::symbol_iterator Archive::symbol_end() const {
- if (!hasSymbolTable())
- return symbol_iterator(Symbol(this, 0, 0));
return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
}
uint32_t Archive::getNumberOfSymbols() const {
+ if (!hasSymbolTable())
+ return 0;
const char *buf = getSymbolTable().begin();
if (kind() == K_GNU)
return read32be(buf);
@@ -542,6 +608,4 @@ Archive::child_iterator Archive::findSym(StringRef name) const {
return child_end();
}
-bool Archive::hasSymbolTable() const {
- return SymbolTable != child_end();
-}
+bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp
index a40901c..c7343fd 100644
--- a/contrib/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp
@@ -34,32 +34,32 @@
using namespace llvm;
-NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I,
+NewArchiveIterator::NewArchiveIterator(const object::Archive::Child &OldMember,
StringRef Name)
- : IsNewMember(false), Name(Name), OldI(I) {}
+ : IsNewMember(false), Name(Name), OldMember(OldMember) {}
-NewArchiveIterator::NewArchiveIterator(StringRef NewFilename, StringRef Name)
- : IsNewMember(true), Name(Name), NewFilename(NewFilename) {}
+NewArchiveIterator::NewArchiveIterator(StringRef FileName)
+ : IsNewMember(true), Name(FileName), OldMember(nullptr, nullptr, nullptr) {}
StringRef NewArchiveIterator::getName() const { return Name; }
bool NewArchiveIterator::isNewMember() const { return IsNewMember; }
-object::Archive::child_iterator NewArchiveIterator::getOld() const {
+const object::Archive::Child &NewArchiveIterator::getOld() const {
assert(!IsNewMember);
- return OldI;
+ return OldMember;
}
StringRef NewArchiveIterator::getNew() const {
assert(IsNewMember);
- return NewFilename;
+ return Name;
}
llvm::ErrorOr<int>
NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
assert(IsNewMember);
int NewFD;
- if (auto EC = sys::fs::openFileForRead(NewFilename, NewFD))
+ if (auto EC = sys::fs::openFileForRead(Name, NewFD))
return EC;
assert(NewFD != -1);
@@ -77,7 +77,7 @@ NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
template <typename T>
static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
- bool MayTruncate = false) {
+ bool MayTruncate = false) {
uint64_t OldPos = OS.tell();
OS << Data;
unsigned SizeSoFar = OS.tell() - OldPos;
@@ -135,30 +135,56 @@ static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
Out.write(uint8_t(0));
}
+static bool useStringTable(bool Thin, StringRef Name) {
+ return Thin || Name.size() >= 16;
+}
+
static void
-printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind,
+printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin,
StringRef Name,
std::vector<unsigned>::iterator &StringMapIndexIter,
const sys::TimeValue &ModTime, unsigned UID, unsigned GID,
unsigned Perms, unsigned Size) {
if (Kind == object::Archive::K_BSD)
return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
- if (Name.size() < 16)
+ if (!useStringTable(Thin, Name))
return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
Out << '/';
printWithSpacePadding(Out, *StringMapIndexIter++, 15);
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
}
-static void writeStringTable(raw_fd_ostream &Out,
+// Compute the relative path from From to To.
+static std::string computeRelativePath(StringRef From, StringRef To) {
+ if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
+ return To;
+
+ StringRef DirFrom = sys::path::parent_path(From);
+ auto FromI = sys::path::begin(DirFrom);
+ auto ToI = sys::path::begin(To);
+ while (*FromI == *ToI) {
+ ++FromI;
+ ++ToI;
+ }
+
+ SmallString<128> Relative;
+ for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
+ sys::path::append(Relative, "..");
+
+ for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
+ sys::path::append(Relative, *ToI);
+
+ return Relative.str();
+}
+
+static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName,
ArrayRef<NewArchiveIterator> Members,
- std::vector<unsigned> &StringMapIndexes) {
+ std::vector<unsigned> &StringMapIndexes,
+ bool Thin) {
unsigned StartOffset = 0;
- for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
- E = Members.end();
- I != E; ++I) {
- StringRef Name = I->getName();
- if (Name.size() < 16)
+ for (const NewArchiveIterator &I : Members) {
+ StringRef Name = sys::path::filename(I.getName());
+ if (!useStringTable(Thin, Name))
continue;
if (StartOffset == 0) {
printWithSpacePadding(Out, "//", 58);
@@ -166,7 +192,13 @@ static void writeStringTable(raw_fd_ostream &Out,
StartOffset = Out.tell();
}
StringMapIndexes.push_back(Out.tell() - StartOffset);
- Out << Name << "/\n";
+
+ if (Thin)
+ Out << computeRelativePath(ArcName, I.getName());
+ else
+ Out << Name;
+
+ Out << "/\n";
}
if (StartOffset == 0)
return;
@@ -268,9 +300,11 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
return BodyStartOffset + 4;
}
-std::pair<StringRef, std::error_code> llvm::writeArchive(
- StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic) {
+std::pair<StringRef, std::error_code>
+llvm::writeArchive(StringRef ArcName,
+ std::vector<NewArchiveIterator> &NewMembers,
+ bool WriteSymtab, object::Archive::Kind Kind,
+ bool Deterministic, bool Thin) {
SmallString<128> TmpArchive;
int TmpArchiveFD;
if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
@@ -279,7 +313,10 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
tool_output_file Output(TmpArchive, TmpArchiveFD);
raw_fd_ostream &Out = Output.os();
- Out << "!<arch>\n";
+ if (Thin)
+ Out << "!<thin>\n";
+ else
+ Out << "!<arch>\n";
std::vector<unsigned> MemberOffsetRefs;
@@ -309,9 +346,11 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
Buffers.push_back(std::move(MemberBufferOrErr.get()));
MemberRef = Buffers.back()->getMemBufferRef();
} else {
- object::Archive::child_iterator OldMember = Member.getOld();
+ const object::Archive::Child &OldMember = Member.getOld();
+ assert((!Thin || OldMember.getParent()->isThin()) &&
+ "Thin archives cannot refers to member of other archives");
ErrorOr<MemoryBufferRef> MemberBufferOrErr =
- OldMember->getMemoryBufferRef();
+ OldMember.getMemoryBufferRef();
if (auto EC = MemberBufferOrErr.getError())
return std::make_pair("", EC);
MemberRef = MemberBufferOrErr.get();
@@ -330,7 +369,7 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
std::vector<unsigned> StringMapIndexes;
if (Kind != object::Archive::K_BSD)
- writeStringTable(Out, NewMembers, StringMapIndexes);
+ writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin);
unsigned MemberNum = 0;
unsigned NewMemberNum = 0;
@@ -358,26 +397,32 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
GID = Status.getGroup();
Perms = Status.permissions();
} else {
- object::Archive::child_iterator OldMember = I.getOld();
- ModTime = OldMember->getLastModified();
- UID = OldMember->getUID();
- GID = OldMember->getGID();
- Perms = OldMember->getAccessMode();
+ const object::Archive::Child &OldMember = I.getOld();
+ ModTime = OldMember.getLastModified();
+ UID = OldMember.getUID();
+ GID = OldMember.getGID();
+ Perms = OldMember.getAccessMode();
}
if (I.isNewMember()) {
StringRef FileName = I.getNew();
const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum++];
- printMemberHeader(Out, Kind, sys::path::filename(FileName),
+ printMemberHeader(Out, Kind, Thin, sys::path::filename(FileName),
StringMapIndexIter, ModTime, UID, GID, Perms,
Status.getSize());
} else {
- object::Archive::child_iterator OldMember = I.getOld();
- printMemberHeader(Out, Kind, I.getName(), StringMapIndexIter, ModTime,
- UID, GID, Perms, OldMember->getSize());
+ const object::Archive::Child &OldMember = I.getOld();
+ ErrorOr<uint32_t> Size = OldMember.getSize();
+ if (std::error_code EC = Size.getError())
+ return std::make_pair("", EC);
+ StringRef FileName = I.getName();
+ printMemberHeader(Out, Kind, Thin, sys::path::filename(FileName),
+ StringMapIndexIter, ModTime, UID, GID, Perms,
+ Size.get());
}
- Out << File.getBuffer();
+ if (!Thin)
+ Out << File.getBuffer();
if (Out.tell() % 2)
Out << '\n';
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index bcca983..1f21117 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -171,6 +171,11 @@ ErrorOr<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const {
if (std::error_code EC = getSection(SectionNumber, Section))
return EC;
Result += Section->VirtualAddress;
+
+ // The section VirtualAddress does not include ImageBase, and we want to
+ // return virtual addresses.
+ Result += getImageBase();
+
return Result;
}
@@ -178,10 +183,10 @@ SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const {
COFFSymbolRef Symb = getCOFFSymbol(Ref);
int32_t SectionNumber = Symb.getSectionNumber();
+ if (Symb.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION)
+ return SymbolRef::ST_Function;
if (Symb.isAnyUndefined())
return SymbolRef::ST_Unknown;
- if (Symb.isFunctionDefinition())
- return SymbolRef::ST_Function;
if (Symb.isCommon())
return SymbolRef::ST_Data;
if (Symb.isFileRecord())
@@ -230,21 +235,17 @@ uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const {
return Symb.getValue();
}
-std::error_code
-COFFObjectFile::getSymbolSection(DataRefImpl Ref,
- section_iterator &Result) const {
+ErrorOr<section_iterator>
+COFFObjectFile::getSymbolSection(DataRefImpl Ref) const {
COFFSymbolRef Symb = getCOFFSymbol(Ref);
- if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
- Result = section_end();
- } else {
- const coff_section *Sec = nullptr;
- if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec))
- return EC;
- DataRefImpl Ref;
- Ref.p = reinterpret_cast<uintptr_t>(Sec);
- Result = section_iterator(SectionRef(Ref, this));
- }
- return std::error_code();
+ if (COFF::isReservedSectionNumber(Symb.getSectionNumber()))
+ return section_end();
+ const coff_section *Sec = nullptr;
+ if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec))
+ return EC;
+ DataRefImpl Ret;
+ Ret.p = reinterpret_cast<uintptr_t>(Sec);
+ return section_iterator(SectionRef(Ret, this));
}
unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const {
@@ -266,7 +267,12 @@ std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- return Sec->VirtualAddress;
+ uint64_t Result = Sec->VirtualAddress;
+
+ // The section VirtualAddress does not include ImageBase, and we want to
+ // return virtual addresses.
+ Result += getImageBase();
+ return Result;
}
uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const {
@@ -412,10 +418,18 @@ std::error_code COFFObjectFile::initSymbolTablePtr() {
return std::error_code();
}
+uint64_t COFFObjectFile::getImageBase() const {
+ if (PE32Header)
+ return PE32Header->ImageBase;
+ else if (PE32PlusHeader)
+ return PE32PlusHeader->ImageBase;
+ // This actually comes up in practice.
+ return 0;
+}
+
// Returns the file offset for the given VA.
std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const {
- uint64_t ImageBase = PE32Header ? (uint64_t)PE32Header->ImageBase
- : (uint64_t)PE32PlusHeader->ImageBase;
+ uint64_t ImageBase = getImageBase();
uint64_t Rva = Addr - ImageBase;
assert(Rva <= UINT32_MAX);
return getRvaPtr((uint32_t)Rva, Res);
@@ -744,6 +758,8 @@ StringRef COFFObjectFile::getFileFormatName() const {
return "COFF-x86-64";
case COFF::IMAGE_FILE_MACHINE_ARMNT:
return "COFF-ARM";
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ return "COFF-ARM64";
default:
return "COFF-<unknown arch>";
}
@@ -757,6 +773,8 @@ unsigned COFFObjectFile::getArch() const {
return Triple::x86_64;
case COFF::IMAGE_FILE_MACHINE_ARMNT:
return Triple::thumb;
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ return Triple::aarch64;
default:
return Triple::UnknownArch;
}
diff --git a/contrib/llvm/lib/Object/COFFYAML.cpp b/contrib/llvm/lib/Object/COFFYAML.cpp
index 9a24b53..4c1fca1 100644
--- a/contrib/llvm/lib/Object/COFFYAML.cpp
+++ b/contrib/llvm/lib/Object/COFFYAML.cpp
@@ -56,6 +56,7 @@ void ScalarEnumerationTraits<COFF::MachineTypes>::enumeration(
ECase(IMAGE_FILE_MACHINE_AMD64);
ECase(IMAGE_FILE_MACHINE_ARM);
ECase(IMAGE_FILE_MACHINE_ARMNT);
+ ECase(IMAGE_FILE_MACHINE_ARM64);
ECase(IMAGE_FILE_MACHINE_EBC);
ECase(IMAGE_FILE_MACHINE_I386);
ECase(IMAGE_FILE_MACHINE_IA64);
@@ -210,6 +211,7 @@ void ScalarBitSetTraits<COFF::Characteristics>::bitset(
void ScalarBitSetTraits<COFF::SectionCharacteristics>::bitset(
IO &IO, COFF::SectionCharacteristics &Value) {
+ BCase(IMAGE_SCN_TYPE_NOLOAD);
BCase(IMAGE_SCN_TYPE_NO_PAD);
BCase(IMAGE_SCN_CNT_CODE);
BCase(IMAGE_SCN_CNT_INITIALIZED_DATA);
diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp
index 398e9e4..62c27cc 100644
--- a/contrib/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm/lib/Object/ELF.cpp
@@ -26,6 +26,7 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
}
break;
case ELF::EM_386:
+ case ELF::EM_IAMCU:
switch (Type) {
#include "llvm/Support/ELFRelocs/i386.def"
default:
diff --git a/contrib/llvm/lib/Object/ELFYAML.cpp b/contrib/llvm/lib/Object/ELFYAML.cpp
index 72c232c..4a4b227 100644
--- a/contrib/llvm/lib/Object/ELFYAML.cpp
+++ b/contrib/llvm/lib/Object/ELFYAML.cpp
@@ -193,6 +193,7 @@ ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(IO &IO,
ECase(EM_VIDEOCORE5)
ECase(EM_78KOR)
ECase(EM_56800EX)
+ ECase(EM_AMDGPU)
#undef ECase
}
@@ -316,6 +317,25 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCase(EF_HEXAGON_ISA_V4)
BCase(EF_HEXAGON_ISA_V5)
break;
+ case ELF::EM_AVR:
+ BCase(EF_AVR_ARCH_AVR1)
+ BCase(EF_AVR_ARCH_AVR2)
+ BCase(EF_AVR_ARCH_AVR25)
+ BCase(EF_AVR_ARCH_AVR3)
+ BCase(EF_AVR_ARCH_AVR31)
+ BCase(EF_AVR_ARCH_AVR35)
+ BCase(EF_AVR_ARCH_AVR4)
+ BCase(EF_AVR_ARCH_AVR51)
+ BCase(EF_AVR_ARCH_AVR6)
+ BCase(EF_AVR_ARCH_AVRTINY)
+ BCase(EF_AVR_ARCH_XMEGA1)
+ BCase(EF_AVR_ARCH_XMEGA2)
+ BCase(EF_AVR_ARCH_XMEGA3)
+ BCase(EF_AVR_ARCH_XMEGA4)
+ BCase(EF_AVR_ARCH_XMEGA5)
+ BCase(EF_AVR_ARCH_XMEGA6)
+ BCase(EF_AVR_ARCH_XMEGA7)
+ break;
default:
llvm_unreachable("Unsupported architecture");
}
@@ -382,6 +402,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
ELFYAML::ELF_SHF &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
BCase(SHF_WRITE)
BCase(SHF_ALLOC)
@@ -394,6 +415,17 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
BCase(SHF_OS_NONCONFORMING)
BCase(SHF_GROUP)
BCase(SHF_TLS)
+ switch(Object->Header.Machine) {
+ case ELF::EM_AMDGPU:
+ BCase(SHF_AMDGPU_HSA_GLOBAL)
+ BCase(SHF_AMDGPU_HSA_READONLY)
+ BCase(SHF_AMDGPU_HSA_CODE)
+ BCase(SHF_AMDGPU_HSA_AGENT)
+ break;
+ default:
+ // Nothing to do.
+ break;
+ }
#undef BCase
}
@@ -466,6 +498,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
#include "llvm/Support/ELFRelocs/Hexagon.def"
break;
case ELF::EM_386:
+ case ELF::EM_IAMCU:
#include "llvm/Support/ELFRelocs/i386.def"
break;
case ELF::EM_AARCH64:
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
index 7ca2f12..7ecc3a1 100644
--- a/contrib/llvm/lib/Object/Error.cpp
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -47,6 +47,8 @@ std::string _object_error_category::message(int EV) const {
return "Invalid section index";
case object_error::bitcode_section_not_found:
return "Bitcode section not found in object file";
+ case object_error::elf_invalid_dynamic_table_size:
+ return "Invalid dynamic table size";
case object_error::macho_small_load_command:
return "Mach-O load command with size < 8 bytes";
case object_error::macho_load_segment_too_many_sections:
diff --git a/contrib/llvm/lib/Object/FunctionIndexObjectFile.cpp b/contrib/llvm/lib/Object/FunctionIndexObjectFile.cpp
new file mode 100644
index 0000000..fe111de
--- /dev/null
+++ b/contrib/llvm/lib/Object/FunctionIndexObjectFile.cpp
@@ -0,0 +1,143 @@
+//===- FunctionIndexObjectFile.cpp - Function index file implementation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the FunctionIndexObjectFile class implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace object;
+
+FunctionIndexObjectFile::FunctionIndexObjectFile(
+ MemoryBufferRef Object, std::unique_ptr<FunctionInfoIndex> I)
+ : SymbolicFile(Binary::ID_FunctionIndex, Object), Index(std::move(I)) {}
+
+FunctionIndexObjectFile::~FunctionIndexObjectFile() {}
+
+std::unique_ptr<FunctionInfoIndex> FunctionIndexObjectFile::takeIndex() {
+ return std::move(Index);
+}
+
+ErrorOr<MemoryBufferRef>
+FunctionIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
+ for (const SectionRef &Sec : Obj.sections()) {
+ StringRef SecName;
+ if (std::error_code EC = Sec.getName(SecName))
+ return EC;
+ if (SecName == ".llvmbc") {
+ StringRef SecContents;
+ if (std::error_code EC = Sec.getContents(SecContents))
+ return EC;
+ return MemoryBufferRef(SecContents, Obj.getFileName());
+ }
+ }
+
+ return object_error::bitcode_section_not_found;
+}
+
+ErrorOr<MemoryBufferRef>
+FunctionIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
+ sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
+ switch (Type) {
+ case sys::fs::file_magic::bitcode:
+ return Object;
+ case sys::fs::file_magic::elf_relocatable:
+ case sys::fs::file_magic::macho_object:
+ case sys::fs::file_magic::coff_object: {
+ ErrorOr<std::unique_ptr<ObjectFile>> ObjFile =
+ ObjectFile::createObjectFile(Object, Type);
+ if (!ObjFile)
+ return ObjFile.getError();
+ return findBitcodeInObject(*ObjFile->get());
+ }
+ default:
+ return object_error::invalid_file_type;
+ }
+}
+
+// Looks for function index in the given memory buffer.
+// returns true if found, else false.
+bool FunctionIndexObjectFile::hasFunctionSummaryInMemBuffer(
+ MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler) {
+ ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
+ if (!BCOrErr)
+ return false;
+
+ return hasFunctionSummary(BCOrErr.get(), DiagnosticHandler);
+}
+
+// Parse function index in the given memory buffer.
+// Return new FunctionIndexObjectFile instance containing parsed
+// function summary/index.
+ErrorOr<std::unique_ptr<FunctionIndexObjectFile>>
+FunctionIndexObjectFile::create(MemoryBufferRef Object,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy) {
+ std::unique_ptr<FunctionInfoIndex> Index;
+
+ ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
+ if (!BCOrErr)
+ return BCOrErr.getError();
+
+ ErrorOr<std::unique_ptr<FunctionInfoIndex>> IOrErr = getFunctionInfoIndex(
+ BCOrErr.get(), DiagnosticHandler, IsLazy);
+
+ if (std::error_code EC = IOrErr.getError())
+ return EC;
+
+ Index = std::move(IOrErr.get());
+
+ return llvm::make_unique<FunctionIndexObjectFile>(Object, std::move(Index));
+}
+
+// Parse the function summary information for function with the
+// given name out of the given buffer. Parsed information is
+// stored on the index object saved in this object.
+std::error_code FunctionIndexObjectFile::findFunctionSummaryInMemBuffer(
+ MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler,
+ StringRef FunctionName) {
+ sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
+ switch (Type) {
+ case sys::fs::file_magic::bitcode: {
+ return readFunctionSummary(Object, DiagnosticHandler, FunctionName,
+ std::move(Index));
+ }
+ default:
+ return object_error::invalid_file_type;
+ }
+}
+
+// Parse the function index out of an IR file and return the function
+// index object if found, or nullptr if not.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+llvm::getFunctionIndexForFile(StringRef Path,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFileOrSTDIN(Path);
+ std::error_code EC = FileOrErr.getError();
+ if (EC)
+ return EC;
+ MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
+ ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+ object::FunctionIndexObjectFile::create(BufferRef, DiagnosticHandler);
+ EC = ObjOrErr.getError();
+ if (EC)
+ return EC;
+
+ object::FunctionIndexObjectFile &Obj = **ObjOrErr;
+ return Obj.takeIndex();
+}
diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp
index 9f5132e..c35c413 100644
--- a/contrib/llvm/lib/Object/IRObjectFile.cpp
+++ b/contrib/llvm/lib/Object/IRObjectFile.cpp
@@ -219,6 +219,12 @@ uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
uint32_t Res = BasicSymbolRef::SF_None;
if (GV->isDeclarationForLinker())
Res |= BasicSymbolRef::SF_Undefined;
+ else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
+ Res |= BasicSymbolRef::SF_Hidden;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (GVar->isConstant())
+ Res |= BasicSymbolRef::SF_Const;
+ }
if (GV->hasPrivateLinkage())
Res |= BasicSymbolRef::SF_FormatSpecific;
if (!GV->hasLocalLinkage())
@@ -303,7 +309,7 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object,
MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
ErrorOr<std::unique_ptr<Module>> MOrErr =
- getLazyBitcodeModule(std::move(Buff), Context, nullptr,
+ getLazyBitcodeModule(std::move(Buff), Context,
/*ShouldLazyLoadMetadata*/ true);
if (std::error_code EC = MOrErr.getError())
return EC;
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index 0590063..d1f79b2 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -278,7 +278,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
return;
}
LinkOptHintsLoadCmd = Load.Ptr;
- } else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
+ } else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
// Multiple dyldinfo load commands
if (DyldInfoLoadCmd) {
@@ -401,6 +401,9 @@ SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
case MachO::N_UNDF :
return SymbolRef::ST_Unknown;
case MachO::N_SECT :
+ section_iterator Sec = *getSymbolSection(Symb);
+ if (Sec->isData() || Sec->isBSS())
+ return SymbolRef::ST_Data;
return SymbolRef::ST_Function;
}
return SymbolRef::ST_Other;
@@ -445,22 +448,18 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
return Result;
}
-std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const {
+ErrorOr<section_iterator>
+MachOObjectFile::getSymbolSection(DataRefImpl Symb) const {
MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
uint8_t index = Entry.n_sect;
- if (index == 0) {
- Res = section_end();
- } else {
- DataRefImpl DRI;
- DRI.d.a = index - 1;
- if (DRI.d.a >= Sections.size())
- report_fatal_error("getSymbolSection: Invalid section index.");
- Res = section_iterator(SectionRef(DRI, this));
- }
-
- return std::error_code();
+ if (index == 0)
+ return section_end();
+ DataRefImpl DRI;
+ DRI.d.a = index - 1;
+ if (DRI.d.a >= Sections.size())
+ report_fatal_error("getSymbolSection: Invalid section index.");
+ return section_iterator(SectionRef(DRI, this));
}
unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const {
@@ -487,9 +486,32 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
}
uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
- if (is64Bit())
- return getSection64(Sec).size;
- return getSection(Sec).size;
+ // In the case if a malformed Mach-O file where the section offset is past
+ // the end of the file or some part of the section size is past the end of
+ // the file return a size of zero or a size that covers the rest of the file
+ // but does not extend past the end of the file.
+ uint32_t SectOffset, SectType;
+ uint64_t SectSize;
+
+ if (is64Bit()) {
+ MachO::section_64 Sect = getSection64(Sec);
+ SectOffset = Sect.offset;
+ SectSize = Sect.size;
+ SectType = Sect.flags & MachO::SECTION_TYPE;
+ } else {
+ MachO::section Sect = getSection(Sec);
+ SectOffset = Sect.offset;
+ SectSize = Sect.size;
+ SectType = Sect.flags & MachO::SECTION_TYPE;
+ }
+ if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL)
+ return SectSize;
+ uint64_t FileSize = getData().size();
+ if (SectOffset > FileSize)
+ return 0;
+ if (FileSize - SectOffset < SectSize)
+ return FileSize - SectOffset;
+ return SectSize;
}
std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
@@ -1136,8 +1158,7 @@ Triple MachOObjectFile::getThumbArch(uint32_t CPUType, uint32_t CPUSubType,
}
Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType,
- const char **McpuDefault,
- Triple *ThumbTriple) {
+ const char **McpuDefault, Triple *ThumbTriple) {
Triple T = MachOObjectFile::getArch(CPUType, CPUSubType, McpuDefault);
*ThumbTriple = MachOObjectFile::getThumbArch(CPUType, CPUSubType,
McpuDefault);
@@ -1212,8 +1233,8 @@ dice_iterator MachOObjectFile::end_dices() const {
return dice_iterator(DiceRef(DRI, this));
}
-ExportEntry::ExportEntry(ArrayRef<uint8_t> T)
- : Trie(T), Malformed(false), Done(false) { }
+ExportEntry::ExportEntry(ArrayRef<uint8_t> T)
+ : Trie(T), Malformed(false), Done(false) {}
void ExportEntry::moveToFirst() {
pushNode(0);
@@ -1226,7 +1247,7 @@ void ExportEntry::moveToEnd() {
}
bool ExportEntry::operator==(const ExportEntry &Other) const {
- // Common case, one at end, other iterating from begin.
+ // Common case, one at end, other iterating from begin.
if (Done || Other.Done)
return (Done == Other.Done);
// Not equal if different stack sizes.
@@ -1240,7 +1261,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const {
if (Stack[i].Start != Other.Stack[i].Start)
return false;
}
- return true;
+ return true;
}
uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) {
@@ -1281,11 +1302,10 @@ uint32_t ExportEntry::nodeOffset() const {
return Stack.back().Start - Trie.begin();
}
-ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
- : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
- ImportName(nullptr), ChildCount(0), NextChildIndex(0),
- ParentStringLength(0), IsExportNode(false) {
-}
+ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
+ : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
+ ImportName(nullptr), ChildCount(0), NextChildIndex(0),
+ ParentStringLength(0), IsExportNode(false) {}
void ExportEntry::pushNode(uint64_t offset) {
const uint8_t *Ptr = Trie.begin() + offset;
@@ -1302,7 +1322,7 @@ void ExportEntry::pushNode(uint64_t offset) {
} else {
State.Address = readULEB128(State.Current);
if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
- State.Other = readULEB128(State.Current);
+ State.Other = readULEB128(State.Current);
}
}
State.ChildCount = *Children;
@@ -1339,7 +1359,7 @@ void ExportEntry::pushDownUntilBottom() {
//
// There is one "export" node for each exported symbol. But because some
// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export
-// node may have child nodes too.
+// node may have child nodes too.
//
// The algorithm for moveNext() is to keep moving down the leftmost unvisited
// child until hitting a node with no children (which is an export node or
@@ -1372,7 +1392,7 @@ void ExportEntry::moveNext() {
Done = true;
}
-iterator_range<export_iterator>
+iterator_range<export_iterator>
MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
ExportEntry Start(Trie);
if (Trie.size() == 0)
@@ -1383,15 +1403,13 @@ MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
ExportEntry Finish(Trie);
Finish.moveToEnd();
- return iterator_range<export_iterator>(export_iterator(Start),
- export_iterator(Finish));
+ return make_range(export_iterator(Start), export_iterator(Finish));
}
iterator_range<export_iterator> MachOObjectFile::exports() const {
return exports(getDyldInfoExportsTrie());
}
-
MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit)
: Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
@@ -1555,17 +1573,14 @@ MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) {
MachORebaseEntry Finish(Opcodes, is64);
Finish.moveToEnd();
- return iterator_range<rebase_iterator>(rebase_iterator(Start),
- rebase_iterator(Finish));
+ return make_range(rebase_iterator(Start), rebase_iterator(Finish));
}
iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const {
return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit());
}
-
-MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit,
- Kind BK)
+MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK)
: Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0),
BindType(0), PointerSize(is64Bit ? 8 : 4),
@@ -1769,7 +1784,6 @@ int64_t MachOBindEntry::readSLEB128() {
return Result;
}
-
uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; }
@@ -1810,8 +1824,7 @@ MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64,
MachOBindEntry Finish(Opcodes, is64, BKind);
Finish.moveToEnd();
- return iterator_range<bind_iterator>(bind_iterator(Start),
- bind_iterator(Finish));
+ return make_range(bind_iterator(Start), bind_iterator(Finish));
}
iterator_range<bind_iterator> MachOObjectFile::bindTable() const {
@@ -1841,8 +1854,7 @@ MachOObjectFile::end_load_commands() const {
iterator_range<MachOObjectFile::load_command_iterator>
MachOObjectFile::load_commands() const {
- return iterator_range<load_command_iterator>(begin_load_commands(),
- end_load_commands());
+ return make_range(begin_load_commands(), end_load_commands());
}
StringRef
@@ -2207,66 +2219,66 @@ MachOObjectFile::getLinkOptHintsLoadCommand() const {
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
- if (!DyldInfoLoadCmd)
- return ArrayRef<uint8_t>();
+ if (!DyldInfoLoadCmd)
+ return None;
- MachO::dyld_info_command DyldInfo
- = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
- const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
- getPtr(this, DyldInfo.rebase_off));
- return ArrayRef<uint8_t>(Ptr, DyldInfo.rebase_size);
+ MachO::dyld_info_command DyldInfo =
+ getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr =
+ reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.rebase_off));
+ return makeArrayRef(Ptr, DyldInfo.rebase_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
- if (!DyldInfoLoadCmd)
- return ArrayRef<uint8_t>();
+ if (!DyldInfoLoadCmd)
+ return None;
- MachO::dyld_info_command DyldInfo
- = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
- const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
- getPtr(this, DyldInfo.bind_off));
- return ArrayRef<uint8_t>(Ptr, DyldInfo.bind_size);
+ MachO::dyld_info_command DyldInfo =
+ getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr =
+ reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.bind_off));
+ return makeArrayRef(Ptr, DyldInfo.bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
- if (!DyldInfoLoadCmd)
- return ArrayRef<uint8_t>();
+ if (!DyldInfoLoadCmd)
+ return None;
- MachO::dyld_info_command DyldInfo
- = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
- const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
- getPtr(this, DyldInfo.weak_bind_off));
- return ArrayRef<uint8_t>(Ptr, DyldInfo.weak_bind_size);
+ MachO::dyld_info_command DyldInfo =
+ getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr =
+ reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.weak_bind_off));
+ return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
- if (!DyldInfoLoadCmd)
- return ArrayRef<uint8_t>();
+ if (!DyldInfoLoadCmd)
+ return None;
- MachO::dyld_info_command DyldInfo
- = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
- const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
- getPtr(this, DyldInfo.lazy_bind_off));
- return ArrayRef<uint8_t>(Ptr, DyldInfo.lazy_bind_size);
+ MachO::dyld_info_command DyldInfo =
+ getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr =
+ reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.lazy_bind_off));
+ return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
}
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
- if (!DyldInfoLoadCmd)
- return ArrayRef<uint8_t>();
+ if (!DyldInfoLoadCmd)
+ return None;
- MachO::dyld_info_command DyldInfo
- = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
- const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
- getPtr(this, DyldInfo.export_off));
- return ArrayRef<uint8_t>(Ptr, DyldInfo.export_size);
+ MachO::dyld_info_command DyldInfo =
+ getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr =
+ reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.export_off));
+ return makeArrayRef(Ptr, DyldInfo.export_size);
}
ArrayRef<uint8_t> MachOObjectFile::getUuid() const {
if (!UuidLoadCmd)
- return ArrayRef<uint8_t>();
+ return None;
// Returning a pointer is fine as uuid doesn't need endian swapping.
const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid);
- return ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), 16);
+ return makeArrayRef(reinterpret_cast<const uint8_t *>(Ptr), 16);
}
StringRef MachOObjectFile::getStringTableData() const {
@@ -2315,4 +2327,3 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer) {
return EC;
return std::move(Ret);
}
-
diff --git a/contrib/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm/lib/Object/MachOUniversal.cpp
index 1d0e69e..a1c83b9 100644
--- a/contrib/llvm/lib/Object/MachOUniversal.cpp
+++ b/contrib/llvm/lib/Object/MachOUniversal.cpp
@@ -69,14 +69,14 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch(
ErrorOr<std::unique_ptr<MachOObjectFile>>
MachOUniversalBinary::ObjectForArch::getAsObjectFile() const {
- if (Parent) {
- StringRef ParentData = Parent->getData();
- StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
- StringRef ObjectName = Parent->getFileName();
- MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
- return ObjectFile::createMachOObjectFile(ObjBuffer);
- }
- return object_error::parse_failed;
+ if (!Parent)
+ return object_error::parse_failed;
+
+ StringRef ParentData = Parent->getData();
+ StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
+ StringRef ObjectName = Parent->getFileName();
+ MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
+ return ObjectFile::createMachOObjectFile(ObjBuffer);
}
ErrorOr<std::unique_ptr<Archive>>
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
index 5c4b7a6..b44c1a1 100644
--- a/contrib/llvm/lib/Object/Object.cpp
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -98,8 +98,10 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
LLVMSymbolIteratorRef Sym) {
- if (std::error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
+ ErrorOr<section_iterator> SecOrErr = (*unwrap(Sym))->getSection();
+ if (std::error_code ec = SecOrErr.getError())
report_fatal_error(ec.message());
+ *unwrap(Sect) = *SecOrErr;
}
// ObjectFile Symbol iterators
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index f82edae..d12dc41 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -29,10 +29,10 @@ ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source)
: SymbolicFile(Type, Source) {}
bool SectionRef::containsSymbol(SymbolRef S) const {
- section_iterator SymSec = getObject()->section_end();
- if (S.getSection(SymSec))
+ ErrorOr<section_iterator> SymSec = S.getSection();
+ if (!SymSec)
return false;
- return *this == *SymSec;
+ return *this == **SymSec;
}
uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const {
diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp
index 854e68e..bf79dfb 100644
--- a/contrib/llvm/lib/Object/SymbolicFile.cpp
+++ b/contrib/llvm/lib/Object/SymbolicFile.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
@@ -54,9 +56,10 @@ ErrorOr<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
case sys::fs::file_magic::macho_dsym_companion:
case sys::fs::file_magic::macho_kext_bundle:
- case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
return ObjectFile::createObjectFile(Object, Type);
+ case sys::fs::file_magic::coff_import_library:
+ return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object));
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::coff_object: {
diff --git a/contrib/llvm/lib/Option/Arg.cpp b/contrib/llvm/lib/Option/Arg.cpp
index ac00073..c3de2d1 100644
--- a/contrib/llvm/lib/Option/Arg.cpp
+++ b/contrib/llvm/lib/Option/Arg.cpp
@@ -13,6 +13,7 @@
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
using namespace llvm::opt;
@@ -43,23 +44,25 @@ Arg::~Arg() {
}
}
-void Arg::dump() const {
- llvm::errs() << "<";
+void Arg::print(raw_ostream& O) const {
+ O << "<";
- llvm::errs() << " Opt:";
- Opt.dump();
+ O << " Opt:";
+ Opt.print(O);
- llvm::errs() << " Index:" << Index;
+ O << " Index:" << Index;
- llvm::errs() << " Values: [";
+ O << " Values: [";
for (unsigned i = 0, e = Values.size(); i != e; ++i) {
- if (i) llvm::errs() << ", ";
- llvm::errs() << "'" << Values[i] << "'";
+ if (i) O << ", ";
+ O << "'" << Values[i] << "'";
}
- llvm::errs() << "]>\n";
+ O << "]>\n";
}
+LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); }
+
std::string Arg::getAsString(const ArgList &Args) const {
SmallString<256> Res;
llvm::raw_svector_ostream OS(Res);
diff --git a/contrib/llvm/lib/Option/ArgList.cpp b/contrib/llvm/lib/Option/ArgList.cpp
index a74ead6..0826ef8 100644
--- a/contrib/llvm/lib/Option/ArgList.cpp
+++ b/contrib/llvm/lib/Option/ArgList.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/Option.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -258,6 +259,21 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
}
}
+void ArgList::AddAllArgs(ArgStringList &Output,
+ ArrayRef<OptSpecifier> Ids) const {
+ for (const Arg *Arg : Args) {
+ for (OptSpecifier Id : Ids) {
+ if (Arg->getOption().matches(Id)) {
+ Arg->claim();
+ Arg->render(*this, Output);
+ break;
+ }
+ }
+ }
+}
+
+/// This 3-opt variant of AddAllArgs could be eliminated in favor of one
+/// that accepts a single specifier, given the above which accepts any number.
void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
OptSpecifier Id1, OptSpecifier Id2) const {
for (auto Arg: filtered(Id0, Id1, Id2)) {
@@ -313,6 +329,15 @@ const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
return MakeArgString(LHS + RHS);
}
+void ArgList::print(raw_ostream &O) const {
+ for (Arg *A : *this) {
+ O << "* ";
+ A->print(O);
+ }
+}
+
+LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); }
+
//
void InputArgList::releaseMemory() {
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
index e83536f..09d4ceb 100644
--- a/contrib/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -84,11 +84,9 @@ static inline bool operator<(const OptTable::Info &I, const char *Name) {
OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
-OptTable::OptTable(const Info *OptionInfos, unsigned NumOptionInfos,
- bool IgnoreCase)
- : OptionInfos(OptionInfos), NumOptionInfos(NumOptionInfos),
- IgnoreCase(IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0),
- FirstSearchableIndex(0) {
+OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
+ : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0),
+ TheUnknownOptionID(0), FirstSearchableIndex(0) {
// Explicitly zero initialize the error to work around a bug in array
// value-initialization on MinGW with gcc 4.3.5.
@@ -199,8 +197,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
if (isInput(PrefixesUnion, Str))
return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
- const Info *Start = OptionInfos + FirstSearchableIndex;
- const Info *End = OptionInfos + getNumOptions();
+ const Info *Start = OptionInfos.begin() + FirstSearchableIndex;
+ const Info *End = OptionInfos.end();
StringRef Name = StringRef(Str).ltrim(PrefixChars);
// Search for the first next option which could be a prefix.
diff --git a/contrib/llvm/lib/Option/Option.cpp b/contrib/llvm/lib/Option/Option.cpp
index 221414d..ebf05aa 100644
--- a/contrib/llvm/lib/Option/Option.cpp
+++ b/contrib/llvm/lib/Option/Option.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -35,10 +36,10 @@ Option::Option(const OptTable::Info *info, const OptTable *owner)
}
}
-void Option::dump() const {
- llvm::errs() << "<";
+void Option::print(raw_ostream &O) const {
+ O << "<";
switch (getKind()) {
-#define P(N) case N: llvm::errs() << #N; break
+#define P(N) case N: O << #N; break
P(GroupClass);
P(InputClass);
P(UnknownClass);
@@ -54,33 +55,35 @@ void Option::dump() const {
}
if (Info->Prefixes) {
- llvm::errs() << " Prefixes:[";
- for (const char * const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) {
- llvm::errs() << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", ");
+ O << " Prefixes:[";
+ for (const char *const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) {
+ O << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", ");
}
- llvm::errs() << ']';
+ O << ']';
}
- llvm::errs() << " Name:\"" << getName() << '"';
+ O << " Name:\"" << getName() << '"';
const Option Group = getGroup();
if (Group.isValid()) {
- llvm::errs() << " Group:";
- Group.dump();
+ O << " Group:";
+ Group.print(O);
}
const Option Alias = getAlias();
if (Alias.isValid()) {
- llvm::errs() << " Alias:";
- Alias.dump();
+ O << " Alias:";
+ Alias.print(O);
}
if (getKind() == MultiArgClass)
- llvm::errs() << " NumArgs:" << getNumArgs();
+ O << " NumArgs:" << getNumArgs();
- llvm::errs() << ">\n";
+ O << ">\n";
}
+void Option::dump() const { print(dbgs()); }
+
bool Option::matches(OptSpecifier Opt) const {
// Aliases are never considered in matching, look through them.
const Option Alias = getAlias();
diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp
index ba71320..8ba81f7 100644
--- a/contrib/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm/lib/Passes/PassBuilder.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
@@ -28,9 +29,14 @@
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Passes/PassRegistry.def b/contrib/llvm/lib/Passes/PassRegistry.def
index d768a3a..241a789 100644
--- a/contrib/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm/lib/Passes/PassRegistry.def
@@ -27,10 +27,13 @@ MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
#ifndef MODULE_PASS
#define MODULE_PASS(NAME, CREATE_PASS)
#endif
+MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
+MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("print", PrintModulePass(dbgs()))
MODULE_PASS("print-cg", LazyCallGraphPrinterPass(dbgs()))
+MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
MODULE_PASS("verify", VerifierPass())
#undef MODULE_PASS
@@ -54,6 +57,7 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
FUNCTION_ANALYSIS("loops", LoopAnalysis())
FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
+FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
@@ -62,6 +66,7 @@ FUNCTION_ANALYSIS("targetir",
#ifndef FUNCTION_PASS
#define FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+FUNCTION_PASS("adce", ADCEPass())
FUNCTION_PASS("early-cse", EarlyCSEPass())
FUNCTION_PASS("instcombine", InstCombinePass())
FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
@@ -71,7 +76,9 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
+FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
+FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
#undef FUNCTION_PASS
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
index cf04fea..55c0fb4 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
@@ -181,18 +181,6 @@ void FunctionRecordIterator::skipOtherFiles() {
*this = FunctionRecordIterator();
}
-/// Get the function name from the record, removing the filename prefix if
-/// necessary.
-static StringRef getFuncNameWithoutPrefix(const CoverageMappingRecord &Record) {
- StringRef FunctionName = Record.FunctionName;
- if (Record.Filenames.empty())
- return FunctionName;
- StringRef Filename = sys::path::filename(Record.Filenames[0]);
- if (FunctionName.startswith(Filename))
- FunctionName = FunctionName.drop_front(Filename.size() + 1);
- return FunctionName;
-}
-
ErrorOr<std::unique_ptr<CoverageMapping>>
CoverageMapping::load(CoverageMappingReader &CoverageReader,
IndexedInstrProfReader &ProfileReader) {
@@ -216,7 +204,11 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader,
assert(!Record.MappingRegions.empty() && "Function has no regions");
- FunctionRecord Function(getFuncNameWithoutPrefix(Record), Record.Filenames);
+ StringRef OrigFuncName = Record.FunctionName;
+ if (!Record.Filenames.empty())
+ OrigFuncName =
+ getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
+ FunctionRecord Function(OrigFuncName, Record.Filenames);
for (const auto &Region : Record.MappingRegions) {
ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
if (!ExecutionCount)
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
index 334a3f5..a0f82a0 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@@ -290,36 +290,25 @@ std::error_code RawCoverageMappingReader::read() {
return std::error_code();
}
-namespace {
-
-/// \brief A helper structure to access the data from a section
-/// in an object file.
-struct SectionData {
- StringRef Data;
- uint64_t Address;
-
- std::error_code load(SectionRef &Section) {
- if (auto Err = Section.getContents(Data))
- return Err;
- Address = Section.getAddress();
- return std::error_code();
- }
+std::error_code InstrProfSymtab::create(SectionRef &Section) {
+ if (auto Err = Section.getContents(Data))
+ return Err;
+ Address = Section.getAddress();
+ return std::error_code();
+}
- std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) {
- if (Pointer < Address)
- return coveragemap_error::malformed;
- auto Offset = Pointer - Address;
- if (Offset + Size > Data.size())
- return coveragemap_error::malformed;
- Result = Data.substr(Pointer - Address, Size);
- return std::error_code();
- }
-};
+StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) {
+ if (Pointer < Address)
+ return StringRef();
+ auto Offset = Pointer - Address;
+ if (Offset + Size > Data.size())
+ return StringRef();
+ return Data.substr(Pointer - Address, Size);
}
template <typename T, support::endianness Endian>
-std::error_code readCoverageMappingData(
- SectionData &ProfileNames, StringRef Data,
+static std::error_code readCoverageMappingData(
+ InstrProfSymtab &ProfileNames, StringRef Data,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
std::vector<StringRef> &Filenames) {
using namespace support;
@@ -343,7 +332,7 @@ std::error_code readCoverageMappingData(
// Skip past the function records, saving the start and end for later.
const char *FunBuf = Buf;
- Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t));
+ Buf += NRecords * sizeof(coverage::CovMapFunctionRecord<T>);
const char *FunEnd = Buf;
// Get the filenames.
@@ -366,12 +355,15 @@ std::error_code readCoverageMappingData(
// before reading the next map.
Buf += alignmentAdjustment(Buf, 8);
- while (FunBuf < FunEnd) {
+ auto CFR =
+ reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf);
+ while ((const char *)CFR < FunEnd) {
// Read the function information
- T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf);
- uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
- uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
- uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf);
+ T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr);
+ uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize);
+ uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize);
+ uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash);
+ CFR++;
// Now use that to read the coverage data.
if (CovBuf + DataSize > CovEnd)
@@ -386,9 +378,9 @@ std::error_code readCoverageMappingData(
continue;
// Finally, grab the name and create a record.
- StringRef FuncName;
- if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName))
- return EC;
+ StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize);
+ if (NameSize && FuncName.empty())
+ return coveragemap_error::malformed;
Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
FilenamesBegin, Filenames.size() - FilenamesBegin));
@@ -401,7 +393,7 @@ std::error_code readCoverageMappingData(
static const char *TestingFormatMagic = "llvmcovmtestdata";
static std::error_code loadTestingFormat(StringRef Data,
- SectionData &ProfileNames,
+ InstrProfSymtab &ProfileNames,
StringRef &CoverageMapping,
uint8_t &BytesInAddress,
support::endianness &Endian) {
@@ -420,14 +412,14 @@ static std::error_code loadTestingFormat(StringRef Data,
if (Data.size() < 1)
return coveragemap_error::truncated;
N = 0;
- ProfileNames.Address =
+ uint64_t Address =
decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
if (N > Data.size())
return coveragemap_error::malformed;
Data = Data.substr(N);
if (Data.size() < ProfileNamesSize)
return coveragemap_error::malformed;
- ProfileNames.Data = Data.substr(0, ProfileNamesSize);
+ ProfileNames.create(Data.substr(0, ProfileNamesSize), Address);
CoverageMapping = Data.substr(ProfileNamesSize);
return std::error_code();
}
@@ -443,12 +435,10 @@ static ErrorOr<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) {
return coveragemap_error::no_data_found;
}
-static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
- SectionData &ProfileNames,
- StringRef &CoverageMapping,
- uint8_t &BytesInAddress,
- support::endianness &Endian,
- StringRef Arch) {
+static std::error_code
+loadBinaryFormat(MemoryBufferRef ObjectBuffer, InstrProfSymtab &ProfileNames,
+ StringRef &CoverageMapping, uint8_t &BytesInAddress,
+ support::endianness &Endian, StringRef Arch) {
auto BinOrErr = object::createBinary(ObjectBuffer);
if (std::error_code EC = BinOrErr.getError())
return EC;
@@ -477,17 +467,18 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
: support::endianness::big;
// Look for the sections that we are interested in.
- auto NamesSection = lookupSection(*OF, "__llvm_prf_names");
+ auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false));
if (auto EC = NamesSection.getError())
return EC;
- auto CoverageSection = lookupSection(*OF, "__llvm_covmap");
+ auto CoverageSection =
+ lookupSection(*OF, getInstrProfCoverageSectionName(false));
if (auto EC = CoverageSection.getError())
return EC;
// Get the contents of the given sections.
if (std::error_code EC = CoverageSection->getContents(CoverageMapping))
return EC;
- if (std::error_code EC = ProfileNames.load(*NamesSection))
+ if (std::error_code EC = ProfileNames.create(*NamesSection))
return EC;
return std::error_code();
@@ -498,33 +489,33 @@ BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
StringRef Arch) {
std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
- SectionData Profile;
+ InstrProfSymtab ProfileNames;
StringRef Coverage;
uint8_t BytesInAddress;
support::endianness Endian;
std::error_code EC;
if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
// This is a special format used for testing.
- EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage,
+ EC = loadTestingFormat(ObjectBuffer->getBuffer(), ProfileNames, Coverage,
BytesInAddress, Endian);
else
- EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage,
- BytesInAddress, Endian, Arch);
+ EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), ProfileNames,
+ Coverage, BytesInAddress, Endian, Arch);
if (EC)
return EC;
if (BytesInAddress == 4 && Endian == support::endianness::little)
EC = readCoverageMappingData<uint32_t, support::endianness::little>(
- Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+ ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
else if (BytesInAddress == 4 && Endian == support::endianness::big)
EC = readCoverageMappingData<uint32_t, support::endianness::big>(
- Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+ ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
else if (BytesInAddress == 8 && Endian == support::endianness::little)
EC = readCoverageMappingData<uint64_t, support::endianness::little>(
- Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+ ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
else if (BytesInAddress == 8 && Endian == support::endianness::big)
EC = readCoverageMappingData<uint64_t, support::endianness::big>(
- Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+ ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
else
return coveragemap_error::malformed;
if (EC)
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 92822a7..f5acd23 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -12,6 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
@@ -28,28 +32,32 @@ class InstrProfErrorCategoryType : public std::error_category {
return "Success";
case instrprof_error::eof:
return "End of File";
+ case instrprof_error::unrecognized_format:
+ return "Unrecognized instrumentation profile encoding format";
case instrprof_error::bad_magic:
- return "Invalid profile data (bad magic)";
+ return "Invalid instrumentation profile data (bad magic)";
case instrprof_error::bad_header:
- return "Invalid profile data (file header is corrupt)";
+ return "Invalid instrumentation profile data (file header is corrupt)";
case instrprof_error::unsupported_version:
- return "Unsupported profiling format version";
+ return "Unsupported instrumentation profile format version";
case instrprof_error::unsupported_hash_type:
- return "Unsupported profiling hash";
+ return "Unsupported instrumentation profile hash type";
case instrprof_error::too_large:
return "Too much profile data";
case instrprof_error::truncated:
return "Truncated profile data";
case instrprof_error::malformed:
- return "Malformed profile data";
+ return "Malformed instrumentation profile data";
case instrprof_error::unknown_function:
return "No profile data available for function";
case instrprof_error::hash_mismatch:
- return "Function hash mismatch";
+ return "Function control flow change detected (hash mismatch)";
case instrprof_error::count_mismatch:
- return "Function count mismatch";
+ return "Function basic block count change detected (counter mismatch)";
case instrprof_error::counter_overflow:
return "Counter overflow";
+ case instrprof_error::value_site_count_mismatch:
+ return "Function value site count change detected (counter mismatch)";
}
llvm_unreachable("A value of instrprof_error has no message.");
}
@@ -61,3 +69,415 @@ static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
const std::error_category &llvm::instrprof_category() {
return *ErrorCategory;
}
+
+namespace llvm {
+
+std::string getPGOFuncName(StringRef RawFuncName,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef FileName,
+ uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
+
+ // Function names may be prefixed with a binary '1' to indicate
+ // that the backend should not modify the symbols due to any platform
+ // naming convention. Do not include that '1' in the PGO profile name.
+ if (RawFuncName[0] == '\1')
+ RawFuncName = RawFuncName.substr(1);
+
+ std::string FuncName = RawFuncName;
+ if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+ // For local symbols, prepend the main file name to distinguish them.
+ // Do not include the full path in the file name since there's no guarantee
+ // that it will stay the same, e.g., if the files are checked out from
+ // version control in different locations.
+ if (FileName.empty())
+ FuncName = FuncName.insert(0, "<unknown>:");
+ else
+ FuncName = FuncName.insert(0, FileName.str() + ":");
+ }
+ return FuncName;
+}
+
+std::string getPGOFuncName(const Function &F, uint64_t Version) {
+ return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(),
+ Version);
+}
+
+StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
+ if (FileName.empty())
+ return PGOFuncName;
+ // Drop the file name including ':'. See also getPGOFuncName.
+ if (PGOFuncName.startswith(FileName))
+ PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1);
+ return PGOFuncName;
+}
+
+// \p FuncName is the string used as profile lookup key for the function. A
+// symbol is created to hold the name. Return the legalized symbol name.
+static std::string getPGOFuncNameVarName(StringRef FuncName,
+ GlobalValue::LinkageTypes Linkage) {
+ std::string VarName = getInstrProfNameVarPrefix();
+ VarName += FuncName;
+
+ if (!GlobalValue::isLocalLinkage(Linkage))
+ return VarName;
+
+ // Now fix up illegal chars in local VarName that may upset the assembler.
+ const char *InvalidChars = "-:<>\"'";
+ size_t found = VarName.find_first_of(InvalidChars);
+ while (found != std::string::npos) {
+ VarName[found] = '_';
+ found = VarName.find_first_of(InvalidChars, found + 1);
+ }
+ return VarName;
+}
+
+GlobalVariable *createPGOFuncNameVar(Module &M,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef FuncName) {
+
+ // We generally want to match the function's linkage, but available_externally
+ // and extern_weak both have the wrong semantics, and anything that doesn't
+ // need to link across compilation units doesn't need to be visible at all.
+ if (Linkage == GlobalValue::ExternalWeakLinkage)
+ Linkage = GlobalValue::LinkOnceAnyLinkage;
+ else if (Linkage == GlobalValue::AvailableExternallyLinkage)
+ Linkage = GlobalValue::LinkOnceODRLinkage;
+ else if (Linkage == GlobalValue::InternalLinkage ||
+ Linkage == GlobalValue::ExternalLinkage)
+ Linkage = GlobalValue::PrivateLinkage;
+
+ auto *Value = ConstantDataArray::getString(M.getContext(), FuncName, false);
+ auto FuncNameVar =
+ new GlobalVariable(M, Value->getType(), true, Linkage, Value,
+ getPGOFuncNameVarName(FuncName, Linkage));
+
+ // Hide the symbol so that we correctly get a copy for each executable.
+ if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+ FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
+
+ return FuncNameVar;
+}
+
+GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
+ return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
+}
+
+instrprof_error
+InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
+ uint64_t Weight) {
+ this->sortByTargetValues();
+ Input.sortByTargetValues();
+ auto I = ValueData.begin();
+ auto IE = ValueData.end();
+ instrprof_error Result = instrprof_error::success;
+ for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE;
+ ++J) {
+ while (I != IE && I->Value < J->Value)
+ ++I;
+ if (I != IE && I->Value == J->Value) {
+ uint64_t JCount = J->Count;
+ bool Overflowed;
+ if (Weight > 1) {
+ JCount = SaturatingMultiply(JCount, Weight, &Overflowed);
+ if (Overflowed)
+ Result = instrprof_error::counter_overflow;
+ }
+ I->Count = SaturatingAdd(I->Count, JCount, &Overflowed);
+ if (Overflowed)
+ Result = instrprof_error::counter_overflow;
+ ++I;
+ continue;
+ }
+ ValueData.insert(I, *J);
+ }
+ return Result;
+}
+
+// Merge Value Profile data from Src record to this record for ValueKind.
+// Scale merged value counts by \p Weight.
+instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
+ InstrProfRecord &Src,
+ uint64_t Weight) {
+ uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+ uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind);
+ if (ThisNumValueSites != OtherNumValueSites)
+ return instrprof_error::value_site_count_mismatch;
+ std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+ getValueSitesForKind(ValueKind);
+ std::vector<InstrProfValueSiteRecord> &OtherSiteRecords =
+ Src.getValueSitesForKind(ValueKind);
+ instrprof_error Result = instrprof_error::success;
+ for (uint32_t I = 0; I < ThisNumValueSites; I++)
+ MergeResult(Result,
+ ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight));
+ return Result;
+}
+
+instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
+ uint64_t Weight) {
+ // If the number of counters doesn't match we either have bad data
+ // or a hash collision.
+ if (Counts.size() != Other.Counts.size())
+ return instrprof_error::count_mismatch;
+
+ instrprof_error Result = instrprof_error::success;
+
+ for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
+ bool Overflowed;
+ uint64_t OtherCount = Other.Counts[I];
+ if (Weight > 1) {
+ OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed);
+ if (Overflowed)
+ Result = instrprof_error::counter_overflow;
+ }
+ Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed);
+ if (Overflowed)
+ Result = instrprof_error::counter_overflow;
+ }
+
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ MergeResult(Result, mergeValueProfData(Kind, Other, Weight));
+
+ return Result;
+}
+
+// Map indirect call target name hash to name string.
+uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
+ ValueMapType *ValueMap) {
+ if (!ValueMap)
+ return Value;
+ switch (ValueKind) {
+ case IPVK_IndirectCallTarget: {
+ auto Result =
+ std::lower_bound(ValueMap->begin(), ValueMap->end(), Value,
+ [](const std::pair<uint64_t, uint64_t> &LHS,
+ uint64_t RHS) { return LHS.first < RHS; });
+ if (Result != ValueMap->end())
+ Value = (uint64_t)Result->second;
+ break;
+ }
+ }
+ return Value;
+}
+
+void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
+ InstrProfValueData *VData, uint32_t N,
+ ValueMapType *ValueMap) {
+ for (uint32_t I = 0; I < N; I++) {
+ VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
+ }
+ std::vector<InstrProfValueSiteRecord> &ValueSites =
+ getValueSitesForKind(ValueKind);
+ if (N == 0)
+ ValueSites.push_back(InstrProfValueSiteRecord());
+ else
+ ValueSites.emplace_back(VData, VData + N);
+}
+
+#define INSTR_PROF_COMMON_API_IMPL
+#include "llvm/ProfileData/InstrProfData.inc"
+
+/*!
+ * \brief ValueProfRecordClosure Interface implementation for InstrProfRecord
+ * class. These C wrappers are used as adaptors so that C++ code can be
+ * invoked as callbacks.
+ */
+uint32_t getNumValueKindsInstrProf(const void *Record) {
+ return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds();
+}
+
+uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) {
+ return reinterpret_cast<const InstrProfRecord *>(Record)
+ ->getNumValueSites(VKind);
+}
+
+uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) {
+ return reinterpret_cast<const InstrProfRecord *>(Record)
+ ->getNumValueData(VKind);
+}
+
+uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK,
+ uint32_t S) {
+ return reinterpret_cast<const InstrProfRecord *>(R)
+ ->getNumValueDataForSite(VK, S);
+}
+
+void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
+ uint32_t K, uint32_t S,
+ uint64_t (*Mapper)(uint32_t, uint64_t)) {
+ return reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite(
+ Dst, K, S, Mapper);
+}
+
+ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
+ ValueProfData *VD =
+ (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData());
+ memset(VD, 0, TotalSizeInBytes);
+ return VD;
+}
+
+static ValueProfRecordClosure InstrProfRecordClosure = {
+ 0,
+ getNumValueKindsInstrProf,
+ getNumValueSitesInstrProf,
+ getNumValueDataInstrProf,
+ getNumValueDataForSiteInstrProf,
+ 0,
+ getValueForSiteInstrProf,
+ allocValueProfDataInstrProf};
+
+// Wrapper implementation using the closure mechanism.
+uint32_t ValueProfData::getSize(const InstrProfRecord &Record) {
+ InstrProfRecordClosure.Record = &Record;
+ return getValueProfDataSize(&InstrProfRecordClosure);
+}
+
+// Wrapper implementation using the closure mechanism.
+std::unique_ptr<ValueProfData>
+ValueProfData::serializeFrom(const InstrProfRecord &Record) {
+ InstrProfRecordClosure.Record = &Record;
+
+ std::unique_ptr<ValueProfData> VPD(
+ serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr));
+ return VPD;
+}
+
+void ValueProfRecord::deserializeTo(InstrProfRecord &Record,
+ InstrProfRecord::ValueMapType *VMap) {
+ Record.reserveSites(Kind, NumValueSites);
+
+ InstrProfValueData *ValueData = getValueProfRecordValueData(this);
+ for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) {
+ uint8_t ValueDataCount = this->SiteCountArray[VSite];
+ Record.addValueData(Kind, VSite, ValueData, ValueDataCount, VMap);
+ ValueData += ValueDataCount;
+ }
+}
+
+// For writing/serializing, Old is the host endianness, and New is
+// byte order intended on disk. For Reading/deserialization, Old
+// is the on-disk source endianness, and New is the host endianness.
+void ValueProfRecord::swapBytes(support::endianness Old,
+ support::endianness New) {
+ using namespace support;
+ if (Old == New)
+ return;
+
+ if (getHostEndianness() != Old) {
+ sys::swapByteOrder<uint32_t>(NumValueSites);
+ sys::swapByteOrder<uint32_t>(Kind);
+ }
+ uint32_t ND = getValueProfRecordNumValueData(this);
+ InstrProfValueData *VD = getValueProfRecordValueData(this);
+
+ // No need to swap byte array: SiteCountArrray.
+ for (uint32_t I = 0; I < ND; I++) {
+ sys::swapByteOrder<uint64_t>(VD[I].Value);
+ sys::swapByteOrder<uint64_t>(VD[I].Count);
+ }
+ if (getHostEndianness() == Old) {
+ sys::swapByteOrder<uint32_t>(NumValueSites);
+ sys::swapByteOrder<uint32_t>(Kind);
+ }
+}
+
+void ValueProfData::deserializeTo(InstrProfRecord &Record,
+ InstrProfRecord::ValueMapType *VMap) {
+ if (NumValueKinds == 0)
+ return;
+
+ ValueProfRecord *VR = getFirstValueProfRecord(this);
+ for (uint32_t K = 0; K < NumValueKinds; K++) {
+ VR->deserializeTo(Record, VMap);
+ VR = getValueProfRecordNext(VR);
+ }
+}
+
+template <class T>
+static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) {
+ using namespace support;
+ if (Orig == little)
+ return endian::readNext<T, little, unaligned>(D);
+ else
+ return endian::readNext<T, big, unaligned>(D);
+}
+
+static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
+ return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize))
+ ValueProfData());
+}
+
+instrprof_error ValueProfData::checkIntegrity() {
+ if (NumValueKinds > IPVK_Last + 1)
+ return instrprof_error::malformed;
+ // Total size needs to be mulltiple of quadword size.
+ if (TotalSize % sizeof(uint64_t))
+ return instrprof_error::malformed;
+
+ ValueProfRecord *VR = getFirstValueProfRecord(this);
+ for (uint32_t K = 0; K < this->NumValueKinds; K++) {
+ if (VR->Kind > IPVK_Last)
+ return instrprof_error::malformed;
+ VR = getValueProfRecordNext(VR);
+ if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize)
+ return instrprof_error::malformed;
+ }
+ return instrprof_error::success;
+}
+
+ErrorOr<std::unique_ptr<ValueProfData>>
+ValueProfData::getValueProfData(const unsigned char *D,
+ const unsigned char *const BufferEnd,
+ support::endianness Endianness) {
+ using namespace support;
+ if (D + sizeof(ValueProfData) > BufferEnd)
+ return instrprof_error::truncated;
+
+ const unsigned char *Header = D;
+ uint32_t TotalSize = swapToHostOrder<uint32_t>(Header, Endianness);
+ if (D + TotalSize > BufferEnd)
+ return instrprof_error::too_large;
+
+ std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize);
+ memcpy(VPD.get(), D, TotalSize);
+ // Byte swap.
+ VPD->swapBytesToHost(Endianness);
+
+ instrprof_error EC = VPD->checkIntegrity();
+ if (EC != instrprof_error::success)
+ return EC;
+
+ return std::move(VPD);
+}
+
+void ValueProfData::swapBytesToHost(support::endianness Endianness) {
+ using namespace support;
+ if (Endianness == getHostEndianness())
+ return;
+
+ sys::swapByteOrder<uint32_t>(TotalSize);
+ sys::swapByteOrder<uint32_t>(NumValueKinds);
+
+ ValueProfRecord *VR = getFirstValueProfRecord(this);
+ for (uint32_t K = 0; K < NumValueKinds; K++) {
+ VR->swapBytes(Endianness, getHostEndianness());
+ VR = getValueProfRecordNext(VR);
+ }
+}
+
+void ValueProfData::swapBytesFromHost(support::endianness Endianness) {
+ using namespace support;
+ if (Endianness == getHostEndianness())
+ return;
+
+ ValueProfRecord *VR = getFirstValueProfRecord(this);
+ for (uint32_t K = 0; K < NumValueKinds; K++) {
+ ValueProfRecord *NVR = getValueProfRecordNext(VR);
+ VR->swapBytes(getHostEndianness(), Endianness);
+ VR = NVR;
+ }
+ sys::swapByteOrder<uint32_t>(TotalSize);
+ sys::swapByteOrder<uint32_t>(NumValueKinds);
+}
+
+}
diff --git a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h b/contrib/llvm/lib/ProfileData/InstrProfIndexed.h
deleted file mode 100644
index ebca7b2..0000000
--- a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Shared header for the instrumented profile data reader and writer.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
-#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
-
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MD5.h"
-
-namespace llvm {
-
-namespace IndexedInstrProf {
-enum class HashT : uint32_t {
- MD5,
-
- Last = MD5
-};
-
-static inline uint64_t MD5Hash(StringRef Str) {
- MD5 Hash;
- Hash.update(Str);
- llvm::MD5::MD5Result Result;
- Hash.final(Result);
- // Return the least significant 8 bytes. Our MD5 implementation returns the
- // result in little endian, so we may need to swap bytes.
- using namespace llvm::support;
- return endian::read<uint64_t, little, unaligned>(Result);
-}
-
-static inline uint64_t ComputeHash(HashT Type, StringRef K) {
- switch (Type) {
- case HashT::MD5:
- return IndexedInstrProf::MD5Hash(K);
- }
- llvm_unreachable("Unhandled hash type");
-}
-
-const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
-const uint64_t Version = 2;
-const HashT HashType = HashT::MD5;
-}
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
index 8a529a0..5e83456 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProfReader.h"
-#include "InstrProfIndexed.h"
#include "llvm/ADT/STLExtras.h"
#include <cassert>
@@ -55,8 +54,10 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
Result.reset(new RawInstrProfReader64(std::move(Buffer)));
else if (RawInstrProfReader32::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader32(std::move(Buffer)));
- else
+ else if (TextInstrProfReader::hasFormat(*Buffer))
Result.reset(new TextInstrProfReader(std::move(Buffer)));
+ else
+ return instrprof_error::unrecognized_format;
// Initialize the reader and return the result.
if (std::error_code EC = initializeReader(*Result))
@@ -98,16 +99,98 @@ void InstrProfIterator::Increment() {
*this = InstrProfIterator();
}
+bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
+ // Verify that this really looks like plain ASCII text by checking a
+ // 'reasonable' number of characters (up to profile magic size).
+ size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
+ StringRef buffer = Buffer.getBufferStart();
+ return count == 0 ||
+ std::all_of(buffer.begin(), buffer.begin() + count,
+ [](char c) { return ::isprint(c) || ::isspace(c); });
+}
+
+std::error_code TextInstrProfReader::readHeader() {
+ Symtab.reset(new InstrProfSymtab());
+ return success();
+}
+
+std::error_code
+TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
+
+#define CHECK_LINE_END(Line) \
+ if (Line.is_at_end()) \
+ return error(instrprof_error::truncated);
+#define READ_NUM(Str, Dst) \
+ if ((Str).getAsInteger(10, (Dst))) \
+ return error(instrprof_error::malformed);
+#define VP_READ_ADVANCE(Val) \
+ CHECK_LINE_END(Line); \
+ uint32_t Val; \
+ READ_NUM((*Line), (Val)); \
+ Line++;
+
+ if (Line.is_at_end())
+ return success();
+
+ uint32_t NumValueKinds;
+ if (Line->getAsInteger(10, NumValueKinds)) {
+ // No value profile data
+ return success();
+ }
+ if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
+ return error(instrprof_error::malformed);
+ Line++;
+
+ for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
+ VP_READ_ADVANCE(ValueKind);
+ if (ValueKind > IPVK_Last)
+ return error(instrprof_error::malformed);
+ VP_READ_ADVANCE(NumValueSites);
+ if (!NumValueSites)
+ continue;
+
+ Record.reserveSites(VK, NumValueSites);
+ for (uint32_t S = 0; S < NumValueSites; S++) {
+ VP_READ_ADVANCE(NumValueData);
+
+ std::vector<InstrProfValueData> CurrentValues;
+ for (uint32_t V = 0; V < NumValueData; V++) {
+ CHECK_LINE_END(Line);
+ std::pair<StringRef, StringRef> VD = Line->split(':');
+ uint64_t TakenCount, Value;
+ if (VK == IPVK_IndirectCallTarget) {
+ Symtab->addFuncName(VD.first);
+ Value = IndexedInstrProf::ComputeHash(VD.first);
+ } else {
+ READ_NUM(VD.first, Value);
+ }
+ READ_NUM(VD.second, TakenCount);
+ CurrentValues.push_back({Value, TakenCount});
+ Line++;
+ }
+ Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr);
+ }
+ }
+ return success();
+
+#undef CHECK_LINE_END
+#undef READ_NUM
+#undef VP_READ_ADVANCE
+}
+
std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
// Skip empty lines and comments.
while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
// If we hit EOF while looking for a name, we're done.
- if (Line.is_at_end())
+ if (Line.is_at_end()) {
+ Symtab->finalizeSymtab();
return error(instrprof_error::eof);
+ }
// Read the function name.
Record.Name = *Line++;
+ Symtab->addFuncName(Record.Name);
// Read the function hash.
if (Line.is_at_end())
@@ -136,36 +219,14 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
Record.Counts.push_back(Count);
}
- return success();
-}
+ // Check if value profile data exists and read it if so.
+ if (std::error_code EC = readValueProfileData(Record))
+ return EC;
-template <class IntPtrT>
-static uint64_t getRawMagic();
-
-template <>
-uint64_t getRawMagic<uint64_t>() {
- return
- uint64_t(255) << 56 |
- uint64_t('l') << 48 |
- uint64_t('p') << 40 |
- uint64_t('r') << 32 |
- uint64_t('o') << 24 |
- uint64_t('f') << 16 |
- uint64_t('r') << 8 |
- uint64_t(129);
-}
-
-template <>
-uint64_t getRawMagic<uint32_t>() {
- return
- uint64_t(255) << 56 |
- uint64_t('l') << 48 |
- uint64_t('p') << 40 |
- uint64_t('r') << 32 |
- uint64_t('o') << 24 |
- uint64_t('f') << 16 |
- uint64_t('R') << 8 |
- uint64_t(129);
+ // This is needed to avoid two pass parsing because llvm-profdata
+ // does dumping while reading.
+ Symtab->finalizeSymtab();
+ return success();
}
template <class IntPtrT>
@@ -174,19 +235,19 @@ bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
return false;
uint64_t Magic =
*reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
- return getRawMagic<IntPtrT>() == Magic ||
- sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
+ return RawInstrProf::getMagic<IntPtrT>() == Magic ||
+ sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
}
template <class IntPtrT>
std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
if (!hasFormat(*DataBuffer))
return error(instrprof_error::bad_magic);
- if (DataBuffer->getBufferSize() < sizeof(RawHeader))
+ if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
return error(instrprof_error::bad_header);
- auto *Header =
- reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
- ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
+ auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
+ DataBuffer->getBufferStart());
+ ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
return readHeader(*Header);
}
@@ -202,29 +263,38 @@ RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
return instrprof_error::eof;
// If there isn't enough space for another header, this is probably just
// garbage at the end of the file.
- if (CurrentPos + sizeof(RawHeader) > End)
+ if (CurrentPos + sizeof(RawInstrProf::Header) > End)
return instrprof_error::malformed;
// The writer ensures each profile is padded to start at an aligned address.
if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
return instrprof_error::malformed;
// The magic should have the same byte order as in the previous header.
uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
- if (Magic != swap(getRawMagic<IntPtrT>()))
+ if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
return instrprof_error::bad_magic;
// There's another profile to read, so we need to process the header.
- auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);
+ auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
return readHeader(*Header);
}
-static uint64_t getRawVersion() {
- return 1;
+template <class IntPtrT>
+void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
+ for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
+ StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
+ Symtab.addFuncName(FunctionName);
+ const IntPtrT FPtr = swap(I->FunctionPointer);
+ if (!FPtr)
+ continue;
+ Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName));
+ }
+ Symtab.finalizeSymtab();
}
template <class IntPtrT>
std::error_code
-RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
- if (swap(Header.Version) != getRawVersion())
+RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) {
+ if (swap(Header.Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
CountersDelta = swap(Header.CountersDelta);
@@ -232,50 +302,69 @@ RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
auto DataSize = swap(Header.DataSize);
auto CountersSize = swap(Header.CountersSize);
auto NamesSize = swap(Header.NamesSize);
+ auto ValueDataSize = swap(Header.ValueDataSize);
+ ValueKindLast = swap(Header.ValueKindLast);
+
+ auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
+ auto PaddingSize = getNumPaddingBytes(NamesSize);
- ptrdiff_t DataOffset = sizeof(RawHeader);
- ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
+ ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
+ ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes;
ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
- size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
+ ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
+ size_t ProfileSize = ValueDataOffset + ValueDataSize;
auto *Start = reinterpret_cast<const char *>(&Header);
if (Start + ProfileSize > DataBuffer->getBufferEnd())
return error(instrprof_error::bad_header);
- Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);
+ Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
+ Start + DataOffset);
DataEnd = Data + DataSize;
CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
NamesStart = Start + NamesOffset;
+ ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
ProfileEnd = Start + ProfileSize;
+ std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ createSymtab(*NewSymtab.get());
+ Symtab = std::move(NewSymtab);
return success();
}
template <class IntPtrT>
-std::error_code
-RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
- if (Data == DataEnd)
- if (std::error_code EC = readNextHeader(ProfileEnd))
- return EC;
+std::error_code RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) {
+ Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize));
+ if (Record.Name.data() < NamesStart ||
+ Record.Name.data() + Record.Name.size() >
+ reinterpret_cast<const char *>(ValueDataStart))
+ return error(instrprof_error::malformed);
+ return success();
+}
+
+template <class IntPtrT>
+std::error_code RawInstrProfReader<IntPtrT>::readFuncHash(
+ InstrProfRecord &Record) {
+ Record.Hash = swap(Data->FuncHash);
+ return success();
+}
- // Get the raw data.
- StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize));
+template <class IntPtrT>
+std::error_code RawInstrProfReader<IntPtrT>::readRawCounts(
+ InstrProfRecord &Record) {
uint32_t NumCounters = swap(Data->NumCounters);
+ IntPtrT CounterPtr = Data->CounterPtr;
if (NumCounters == 0)
return error(instrprof_error::malformed);
- auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters);
- // Check bounds.
+ auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters);
auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
- if (RawName.data() < NamesStart ||
- RawName.data() + RawName.size() > DataBuffer->getBufferEnd() ||
- RawCounts.data() < CountersStart ||
+
+ // Check bounds.
+ if (RawCounts.data() < CountersStart ||
RawCounts.data() + RawCounts.size() > NamesStartAsCounter)
return error(instrprof_error::malformed);
- // Store the data in Record, byte-swapping as necessary.
- Record.Hash = swap(Data->FuncHash);
- Record.Name = RawName;
if (ShouldSwapBytes) {
Record.Counts.clear();
Record.Counts.reserve(RawCounts.size());
@@ -284,8 +373,61 @@ RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
} else
Record.Counts = RawCounts;
+ return success();
+}
+
+template <class IntPtrT>
+std::error_code
+RawInstrProfReader<IntPtrT>::readValueProfilingData(InstrProfRecord &Record) {
+
+ Record.clearValueData();
+ CurValueDataSize = 0;
+ // Need to match the logic in value profile dumper code in compiler-rt:
+ uint32_t NumValueKinds = 0;
+ for (uint32_t I = 0; I < IPVK_Last + 1; I++)
+ NumValueKinds += (Data->NumValueSites[I] != 0);
+
+ if (!NumValueKinds)
+ return success();
+
+ ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
+ ValueProfData::getValueProfData(ValueDataStart,
+ (const unsigned char *)ProfileEnd,
+ getDataEndianness());
+
+ if (VDataPtrOrErr.getError())
+ return VDataPtrOrErr.getError();
+
+ VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap());
+ CurValueDataSize = VDataPtrOrErr.get()->getSize();
+ return success();
+}
+
+template <class IntPtrT>
+std::error_code
+RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
+ if (atEnd())
+ if (std::error_code EC = readNextHeader(ProfileEnd))
+ return EC;
+
+ // Read name ad set it in Record.
+ if (std::error_code EC = readName(Record))
+ return EC;
+
+ // Read FuncHash and set it in Record.
+ if (std::error_code EC = readFuncHash(Record))
+ return EC;
+
+ // Read raw counts and set Record.
+ if (std::error_code EC = readRawCounts(Record))
+ return EC;
+
+ // Read value data and set Record.
+ if (std::error_code EC = readValueProfilingData(Record))
+ return EC;
+
// Iterate.
- ++Data;
+ advanceData();
return success();
}
@@ -302,52 +444,112 @@ InstrProfLookupTrait::ComputeHash(StringRef K) {
typedef InstrProfLookupTrait::data_type data_type;
typedef InstrProfLookupTrait::offset_type offset_type;
+bool InstrProfLookupTrait::readValueProfilingData(
+ const unsigned char *&D, const unsigned char *const End) {
+ ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
+ ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
+
+ if (VDataPtrOrErr.getError())
+ return false;
+
+ VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
+ D += VDataPtrOrErr.get()->TotalSize;
+
+ return true;
+}
+
data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
offset_type N) {
-
// Check if the data is corrupt. If so, don't try to read it.
if (N % sizeof(uint64_t))
return data_type();
DataBuffer.clear();
- uint64_t NumCounts;
- uint64_t NumEntries = N / sizeof(uint64_t);
std::vector<uint64_t> CounterBuffer;
- for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
- using namespace support;
- // The function hash comes first.
- uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
- if (++I >= NumEntries)
+ using namespace support;
+ const unsigned char *End = D + N;
+ while (D < End) {
+ // Read hash.
+ if (D + sizeof(uint64_t) >= End)
return data_type();
+ uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
- // In v1, we have at least one count.
- // Later, we have the number of counts.
- NumCounts = (1 == FormatVersion)
- ? NumEntries - I
- : endian::readNext<uint64_t, little, unaligned>(D);
- if (1 != FormatVersion)
- ++I;
-
- // If we have more counts than data, this is bogus.
- if (I + NumCounts > NumEntries)
+ // Initialize number of counters for FormatVersion == 1.
+ uint64_t CountsSize = N / sizeof(uint64_t) - 1;
+ // If format version is different then read the number of counters.
+ if (FormatVersion != 1) {
+ if (D + sizeof(uint64_t) > End)
+ return data_type();
+ CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
+ }
+ // Read counter values.
+ if (D + CountsSize * sizeof(uint64_t) > End)
return data_type();
CounterBuffer.clear();
- for (unsigned J = 0; J < NumCounts; ++J)
+ CounterBuffer.reserve(CountsSize);
+ for (uint64_t J = 0; J < CountsSize; ++J)
CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
- DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer)));
+ DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
+
+ // Read value profiling data.
+ if (FormatVersion > 2 && !readValueProfilingData(D, End)) {
+ DataBuffer.clear();
+ return data_type();
+ }
}
return DataBuffer;
}
+template <typename HashTableImpl>
+std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords(
+ StringRef FuncName, ArrayRef<InstrProfRecord> &Data) {
+ auto Iter = HashTable->find(FuncName);
+ if (Iter == HashTable->end())
+ return instrprof_error::unknown_function;
+
+ Data = (*Iter);
+ if (Data.empty())
+ return instrprof_error::malformed;
+
+ return instrprof_error::success;
+}
+
+template <typename HashTableImpl>
+std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords(
+ ArrayRef<InstrProfRecord> &Data) {
+ if (atEnd())
+ return instrprof_error::eof;
+
+ Data = *RecordIterator;
+
+ if (Data.empty())
+ return instrprof_error::malformed;
+
+ return instrprof_error::success;
+}
+
+template <typename HashTableImpl>
+InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
+ const unsigned char *Buckets, const unsigned char *const Payload,
+ const unsigned char *const Base, IndexedInstrProf::HashT HashType,
+ uint64_t Version) {
+ FormatVersion = Version;
+ HashTable.reset(HashTableImpl::Create(
+ Buckets, Payload, Base,
+ typename HashTableImpl::InfoType(HashType, Version)));
+ RecordIterator = HashTable->data_begin();
+}
+
bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
if (DataBuffer.getBufferSize() < 8)
return false;
using namespace support;
uint64_t Magic =
endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+ // Verify that it's magical.
return Magic == IndexedInstrProf::Magic;
}
@@ -360,71 +562,91 @@ std::error_code IndexedInstrProfReader::readHeader() {
using namespace support;
+ auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
+ Cur += sizeof(IndexedInstrProf::Header);
+
// Check the magic number.
- uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
+ uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
if (Magic != IndexedInstrProf::Magic)
return error(instrprof_error::bad_magic);
// Read the version.
- FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
+ uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
if (FormatVersion > IndexedInstrProf::Version)
return error(instrprof_error::unsupported_version);
// Read the maximal function count.
- MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
+ MaxFunctionCount =
+ endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
// Read the hash type and start offset.
IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
- endian::readNext<uint64_t, little, unaligned>(Cur));
+ endian::byte_swap<uint64_t, little>(Header->HashType));
if (HashType > IndexedInstrProf::HashT::Last)
return error(instrprof_error::unsupported_hash_type);
- uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
- // The rest of the file is an on disk hash table.
- Index.reset(InstrProfReaderIndex::Create(
- Start + HashOffset, Cur, Start,
- InstrProfLookupTrait(HashType, FormatVersion)));
- // Set up our iterator for readNextRecord.
- RecordIterator = Index->data_begin();
+ uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
+ // The rest of the file is an on disk hash table.
+ InstrProfReaderIndexBase *IndexPtr = nullptr;
+ IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>(
+ Start + HashOffset, Cur, Start, HashType, FormatVersion);
+ Index.reset(IndexPtr);
return success();
}
-std::error_code IndexedInstrProfReader::getFunctionCounts(
- StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
- auto Iter = Index->find(FuncName);
- if (Iter == Index->end())
- return error(instrprof_error::unknown_function);
+InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
+ if (Symtab.get())
+ return *Symtab.get();
- // Found it. Look for counters with the right hash.
- ArrayRef<InstrProfRecord> Data = (*Iter);
- if (Data.empty())
- return error(instrprof_error::malformed);
+ std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ Index->populateSymtab(*NewSymtab.get());
+
+ Symtab = std::move(NewSymtab);
+ return *Symtab.get();
+}
+ErrorOr<InstrProfRecord>
+IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
+ uint64_t FuncHash) {
+ ArrayRef<InstrProfRecord> Data;
+ std::error_code EC = Index->getRecords(FuncName, Data);
+ if (EC != instrprof_error::success)
+ return EC;
+ // Found it. Look for counters with the right hash.
for (unsigned I = 0, E = Data.size(); I < E; ++I) {
// Check for a match and fill the vector if there is one.
if (Data[I].Hash == FuncHash) {
- Counts = Data[I].Counts;
- return success();
+ return std::move(Data[I]);
}
}
return error(instrprof_error::hash_mismatch);
}
std::error_code
-IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
- // Are we out of records?
- if (RecordIterator == Index->data_end())
- return error(instrprof_error::eof);
+IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
+ std::vector<uint64_t> &Counts) {
+ ErrorOr<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
+ if (std::error_code EC = Record.getError())
+ return EC;
- if ((*RecordIterator).empty())
- return error(instrprof_error::malformed);
+ Counts = Record.get().Counts;
+ return success();
+}
+std::error_code IndexedInstrProfReader::readNextRecord(
+ InstrProfRecord &Record) {
static unsigned RecordIndex = 0;
- ArrayRef<InstrProfRecord> Data = (*RecordIterator);
+
+ ArrayRef<InstrProfRecord> Data;
+
+ std::error_code EC = Index->getRecords(Data);
+ if (EC != instrprof_error::success)
+ return error(EC);
+
Record = Data[RecordIndex++];
if (RecordIndex >= Data.size()) {
- ++RecordIterator;
+ Index->advanceToNextKey();
RecordIndex = 0;
}
return success();
diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2188543..9bb03e1 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -13,27 +13,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProfWriter.h"
-#include "InstrProfIndexed.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/OnDiskHashTable.h"
+#include <tuple>
using namespace llvm;
namespace {
+static support::endianness ValueProfDataEndianness = support::little;
+
class InstrProfRecordTrait {
public:
typedef StringRef key_type;
typedef StringRef key_type_ref;
- typedef const InstrProfWriter::CounterData *const data_type;
- typedef const InstrProfWriter::CounterData *const data_type_ref;
+ typedef const InstrProfWriter::ProfilingData *const data_type;
+ typedef const InstrProfWriter::ProfilingData *const data_type_ref;
typedef uint64_t hash_value_type;
typedef uint64_t offset_type;
static hash_value_type ComputeHash(key_type_ref K) {
- return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K);
+ return IndexedInstrProf::ComputeHash(K);
}
static std::pair<offset_type, offset_type>
@@ -45,8 +47,15 @@ public:
LE.write<offset_type>(N);
offset_type M = 0;
- for (const auto &Counts : *V)
- M += (2 + Counts.second.size()) * sizeof(uint64_t);
+ for (const auto &ProfileData : *V) {
+ const InstrProfRecord &ProfRecord = ProfileData.second;
+ M += sizeof(uint64_t); // The function hash
+ M += sizeof(uint64_t); // The size of the Counts vector
+ M += ProfRecord.Counts.size() * sizeof(uint64_t);
+
+ // Value data
+ M += ValueProfData::getSize(ProfileData.second);
+ }
LE.write<offset_type>(M);
return std::make_pair(N, M);
@@ -60,50 +69,68 @@ public:
offset_type) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
+ for (const auto &ProfileData : *V) {
+ const InstrProfRecord &ProfRecord = ProfileData.second;
- for (const auto &Counts : *V) {
- LE.write<uint64_t>(Counts.first);
- LE.write<uint64_t>(Counts.second.size());
- for (uint64_t I : Counts.second)
+ LE.write<uint64_t>(ProfileData.first); // Function hash
+ LE.write<uint64_t>(ProfRecord.Counts.size());
+ for (uint64_t I : ProfRecord.Counts)
LE.write<uint64_t>(I);
+
+ // Write value data
+ std::unique_ptr<ValueProfData> VDataPtr =
+ ValueProfData::serializeFrom(ProfileData.second);
+ uint32_t S = VDataPtr->getSize();
+ VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
+ Out.write((const char *)VDataPtr.get(), S);
}
}
};
}
-std::error_code
-InstrProfWriter::addFunctionCounts(StringRef FunctionName,
- uint64_t FunctionHash,
- ArrayRef<uint64_t> Counters) {
- auto &CounterData = FunctionData[FunctionName];
+// Internal interface for testing purpose only.
+void InstrProfWriter::setValueProfDataEndianness(
+ support::endianness Endianness) {
+ ValueProfDataEndianness = Endianness;
+}
+
+std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
+ uint64_t Weight) {
+ auto &ProfileDataMap = FunctionData[I.Name];
+
+ bool NewFunc;
+ ProfilingData::iterator Where;
+ std::tie(Where, NewFunc) =
+ ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
+ InstrProfRecord &Dest = Where->second;
- auto Where = CounterData.find(FunctionHash);
- if (Where == CounterData.end()) {
+ instrprof_error Result;
+ if (NewFunc) {
// We've never seen a function with this name and hash, add it.
- CounterData[FunctionHash] = Counters;
- // We keep track of the max function count as we go for simplicity.
- if (Counters[0] > MaxFunctionCount)
- MaxFunctionCount = Counters[0];
- return instrprof_error::success;
+ Dest = std::move(I);
+ // Fix up the name to avoid dangling reference.
+ Dest.Name = FunctionData.find(Dest.Name)->getKey();
+ Result = instrprof_error::success;
+ if (Weight > 1) {
+ for (auto &Count : Dest.Counts) {
+ bool Overflowed;
+ Count = SaturatingMultiply(Count, Weight, &Overflowed);
+ if (Overflowed && Result == instrprof_error::success) {
+ Result = instrprof_error::counter_overflow;
+ }
+ }
+ }
+ } else {
+ // We're updating a function we've seen before.
+ Result = Dest.merge(I, Weight);
}
- // We're updating a function we've seen before.
- auto &FoundCounters = Where->second;
- // If the number of counters doesn't match we either have bad data or a hash
- // collision.
- if (FoundCounters.size() != Counters.size())
- return instrprof_error::count_mismatch;
-
- for (size_t I = 0, E = Counters.size(); I < E; ++I) {
- if (FoundCounters[I] + Counters[I] < FoundCounters[I])
- return instrprof_error::counter_overflow;
- FoundCounters[I] += Counters[I];
- }
// We keep track of the max function count as we go for simplicity.
- if (FoundCounters[0] > MaxFunctionCount)
- MaxFunctionCount = FoundCounters[0];
+ // Update this statistic no matter the result of the merge.
+ if (Dest.Counts[0] > MaxFunctionCount)
+ MaxFunctionCount = Dest.Counts[0];
- return instrprof_error::success;
+ return Result;
}
std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
@@ -117,13 +144,23 @@ std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
endian::Writer<little> LE(OS);
// Write the header.
- LE.write<uint64_t>(IndexedInstrProf::Magic);
- LE.write<uint64_t>(IndexedInstrProf::Version);
- LE.write<uint64_t>(MaxFunctionCount);
- LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
+ IndexedInstrProf::Header Header;
+ Header.Magic = IndexedInstrProf::Magic;
+ Header.Version = IndexedInstrProf::Version;
+ Header.MaxFunctionCount = MaxFunctionCount;
+ Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
+ Header.HashOffset = 0;
+ int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
+
+ // Only write out all the fields execpt 'HashOffset'. We need
+ // to remember the offset of that field to allow back patching
+ // later.
+ for (int I = 0; I < N - 1; I++)
+ LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]);
// Save a space to write the hash table start location.
uint64_t HashTableStartLoc = OS.tell();
+ // Reserve the space for HashOffset field.
LE.write<uint64_t>(0);
// Write the hash table.
uint64_t HashTableStart = Generator.Emit(OS);
@@ -138,9 +175,65 @@ void InstrProfWriter::write(raw_fd_ostream &OS) {
// Go back and fill in the hash table start.
using namespace support;
OS.seek(TableStart.first);
+ // Now patch the HashOffset field previously reserved.
endian::Writer<little>(OS).write<uint64_t>(TableStart.second);
}
+static const char *ValueProfKindStr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
+ InstrProfSymtab &Symtab,
+ raw_fd_ostream &OS) {
+ OS << Func.Name << "\n";
+ OS << "# Func Hash:\n" << Func.Hash << "\n";
+ OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
+ OS << "# Counter Values:\n";
+ for (uint64_t Count : Func.Counts)
+ OS << Count << "\n";
+
+ uint32_t NumValueKinds = Func.getNumValueKinds();
+ if (!NumValueKinds) {
+ OS << "\n";
+ return;
+ }
+
+ OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";
+ for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
+ uint32_t NS = Func.getNumValueSites(VK);
+ if (!NS)
+ continue;
+ OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";
+ OS << "# NumValueSites:\n" << NS << "\n";
+ for (uint32_t S = 0; S < NS; S++) {
+ uint32_t ND = Func.getNumValueDataForSite(VK, S);
+ OS << ND << "\n";
+ std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
+ for (uint32_t I = 0; I < ND; I++) {
+ if (VK == IPVK_IndirectCallTarget)
+ OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n";
+ else
+ OS << VD[I].Value << ":" << VD[I].Count << "\n";
+ }
+ }
+ }
+
+ OS << "\n";
+}
+
+void InstrProfWriter::writeText(raw_fd_ostream &OS) {
+ InstrProfSymtab Symtab;
+ for (const auto &I : FunctionData)
+ Symtab.addFuncName(I.getKey());
+ Symtab.finalizeSymtab();
+
+ for (const auto &I : FunctionData)
+ for (const auto &Func : I.getValue())
+ writeRecordInText(Func.second, Symtab, OS);
+}
+
std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
std::string Data;
llvm::raw_string_ostream OS(Data);
diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp
index 920c48a..9ded757 100644
--- a/contrib/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+using namespace llvm::sampleprof;
using namespace llvm;
namespace {
@@ -27,17 +28,25 @@ class SampleProfErrorCategoryType : public std::error_category {
case sampleprof_error::success:
return "Success";
case sampleprof_error::bad_magic:
- return "Invalid file format (bad magic)";
+ return "Invalid sample profile data (bad magic)";
case sampleprof_error::unsupported_version:
- return "Unsupported format version";
+ return "Unsupported sample profile format version";
case sampleprof_error::too_large:
return "Too much profile data";
case sampleprof_error::truncated:
return "Truncated profile data";
case sampleprof_error::malformed:
- return "Malformed profile data";
+ return "Malformed sample profile data";
case sampleprof_error::unrecognized_format:
- return "Unrecognized profile encoding format";
+ return "Unrecognized sample profile encoding format";
+ case sampleprof_error::unsupported_writing_format:
+ return "Profile encoding format unsupported for writing operations";
+ case sampleprof_error::truncated_name_table:
+ return "Truncated function name table";
+ case sampleprof_error::not_implemented:
+ return "Unimplemented feature";
+ case sampleprof_error::counter_overflow:
+ return "Counter overflow";
}
llvm_unreachable("A value of sampleprof_error has no message.");
}
@@ -49,3 +58,92 @@ static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
const std::error_category &llvm::sampleprof_category() {
return *ErrorCategory;
}
+
+void LineLocation::print(raw_ostream &OS) const {
+ OS << LineOffset;
+ if (Discriminator > 0)
+ OS << "." << Discriminator;
+}
+
+raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+ const LineLocation &Loc) {
+ Loc.print(OS);
+ return OS;
+}
+
+void LineLocation::dump() const { print(dbgs()); }
+
+void CallsiteLocation::print(raw_ostream &OS) const {
+ LineLocation::print(OS);
+ OS << ": inlined callee: " << CalleeName;
+}
+
+void CallsiteLocation::dump() const { print(dbgs()); }
+
+inline raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+ const CallsiteLocation &Loc) {
+ Loc.print(OS);
+ return OS;
+}
+
+/// \brief Print the sample record to the stream \p OS indented by \p Indent.
+void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {
+ OS << NumSamples;
+ if (hasCalls()) {
+ OS << ", calls:";
+ for (const auto &I : getCallTargets())
+ OS << " " << I.first() << ":" << I.second;
+ }
+ OS << "\n";
+}
+
+void SampleRecord::dump() const { print(dbgs(), 0); }
+
+raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+ const SampleRecord &Sample) {
+ Sample.print(OS, 0);
+ return OS;
+}
+
+/// \brief Print the samples collected for a function on stream \p OS.
+void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
+ OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
+ << " sampled lines\n";
+
+ OS.indent(Indent);
+ if (BodySamples.size() > 0) {
+ OS << "Samples collected in the function's body {\n";
+ SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples);
+ for (const auto &SI : SortedBodySamples.get()) {
+ OS.indent(Indent + 2);
+ OS << SI->first << ": " << SI->second;
+ }
+ OS.indent(Indent);
+ OS << "}\n";
+ } else {
+ OS << "No samples collected in the function's body\n";
+ }
+
+ OS.indent(Indent);
+ if (CallsiteSamples.size() > 0) {
+ OS << "Samples collected in inlined callsites {\n";
+ SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples(
+ CallsiteSamples);
+ for (const auto &CS : SortedCallsiteSamples.get()) {
+ OS.indent(Indent + 2);
+ OS << CS->first << ": ";
+ CS->second.print(OS, Indent + 4);
+ }
+ OS << "}\n";
+ } else {
+ OS << "No inlined callsites in this function\n";
+ }
+}
+
+raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+ const FunctionSamples &FS) {
+ FS.print(OS);
+ return OS;
+}
+
+void FunctionSamples::dump(void) const { print(dbgs(), 0); }
diff --git a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
index b39bfd6..93cd87b 100644
--- a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -8,133 +8,37 @@
//===----------------------------------------------------------------------===//
//
// This file implements the class that reads LLVM sample profiles. It
-// supports two file formats: text and binary. The textual representation
-// is useful for debugging and testing purposes. The binary representation
-// is more compact, resulting in smaller file sizes. However, they can
-// both be used interchangeably.
+// supports three file formats: text, binary and gcov.
//
-// NOTE: If you are making changes to the file format, please remember
-// to document them in the Clang documentation at
-// tools/clang/docs/UsersManual.rst.
+// The textual representation is useful for debugging and testing purposes. The
+// binary representation is more compact, resulting in smaller file sizes.
//
-// Text format
-// -----------
+// The gcov encoding is the one generated by GCC's AutoFDO profile creation
+// tool (https://github.com/google/autofdo)
//
-// Sample profiles are written as ASCII text. The file is divided into
-// sections, which correspond to each of the functions executed at runtime.
-// Each section has the following format
-//
-// function1:total_samples:total_head_samples
-// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
-// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
-// ...
-// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
-//
-// The file may contain blank lines between sections and within a
-// section. However, the spacing within a single line is fixed. Additional
-// spaces will result in an error while reading the file.
-//
-// Function names must be mangled in order for the profile loader to
-// match them in the current translation unit. The two numbers in the
-// function header specify how many total samples were accumulated in the
-// function (first number), and the total number of samples accumulated
-// in the prologue of the function (second number). This head sample
-// count provides an indicator of how frequently the function is invoked.
-//
-// Each sampled line may contain several items. Some are optional (marked
-// below):
-//
-// a. Source line offset. This number represents the line number
-// in the function where the sample was collected. The line number is
-// always relative to the line where symbol of the function is
-// defined. So, if the function has its header at line 280, the offset
-// 13 is at line 293 in the file.
-//
-// Note that this offset should never be a negative number. This could
-// happen in cases like macros. The debug machinery will register the
-// line number at the point of macro expansion. So, if the macro was
-// expanded in a line before the start of the function, the profile
-// converter should emit a 0 as the offset (this means that the optimizers
-// will not be able to associate a meaningful weight to the instructions
-// in the macro).
-//
-// b. [OPTIONAL] Discriminator. This is used if the sampled program
-// was compiled with DWARF discriminator support
-// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
-// DWARF discriminators are unsigned integer values that allow the
-// compiler to distinguish between multiple execution paths on the
-// same source line location.
-//
-// For example, consider the line of code ``if (cond) foo(); else bar();``.
-// If the predicate ``cond`` is true 80% of the time, then the edge
-// into function ``foo`` should be considered to be taken most of the
-// time. But both calls to ``foo`` and ``bar`` are at the same source
-// line, so a sample count at that line is not sufficient. The
-// compiler needs to know which part of that line is taken more
-// frequently.
-//
-// This is what discriminators provide. In this case, the calls to
-// ``foo`` and ``bar`` will be at the same line, but will have
-// different discriminator values. This allows the compiler to correctly
-// set edge weights into ``foo`` and ``bar``.
-//
-// c. Number of samples. This is an integer quantity representing the
-// number of samples collected by the profiler at this source
-// location.
-//
-// d. [OPTIONAL] Potential call targets and samples. If present, this
-// line contains a call instruction. This models both direct and
-// number of samples. For example,
-//
-// 130: 7 foo:3 bar:2 baz:7
-//
-// The above means that at relative line offset 130 there is a call
-// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
-// with ``baz()`` being the relatively more frequently called target.
+// All three encodings can be used interchangeably as an input sample profile.
//
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
using namespace llvm::sampleprof;
using namespace llvm;
-/// \brief Print the samples collected for a function on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-void FunctionSamples::print(raw_ostream &OS) {
- OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
- << " sampled lines\n";
- for (const auto &SI : BodySamples) {
- LineLocation Loc = SI.first;
- const SampleRecord &Sample = SI.second;
- OS << "\tline offset: " << Loc.LineOffset
- << ", discriminator: " << Loc.Discriminator
- << ", number of samples: " << Sample.getSamples();
- if (Sample.hasCalls()) {
- OS << ", calls:";
- for (const auto &I : Sample.getCallTargets())
- OS << " " << I.first() << ":" << I.second;
- }
- OS << "\n";
- }
- OS << "\n";
-}
-
/// \brief Dump the function profile for \p FName.
///
/// \param FName Name of the function to print.
/// \param OS Stream to emit the output to.
void SampleProfileReader::dumpFunctionProfile(StringRef FName,
raw_ostream &OS) {
- OS << "Function: " << FName << ": ";
- Profiles[FName].print(OS);
+ OS << "Function: " << FName << ": " << Profiles[FName];
}
/// \brief Dump all the function profiles found on stream \p OS.
@@ -143,6 +47,102 @@ void SampleProfileReader::dump(raw_ostream &OS) {
dumpFunctionProfile(I.getKey(), OS);
}
+/// \brief Parse \p Input as function head.
+///
+/// Parse one line of \p Input, and update function name in \p FName,
+/// function's total sample count in \p NumSamples, function's entry
+/// count in \p NumHeadSamples.
+///
+/// \returns true if parsing is successful.
+static bool ParseHead(const StringRef &Input, StringRef &FName,
+ uint64_t &NumSamples, uint64_t &NumHeadSamples) {
+ if (Input[0] == ' ')
+ return false;
+ size_t n2 = Input.rfind(':');
+ size_t n1 = Input.rfind(':', n2 - 1);
+ FName = Input.substr(0, n1);
+ if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
+ return false;
+ if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
+ return false;
+ return true;
+}
+
+
+/// \brief Returns true if line offset \p L is legal (only has 16 bits).
+static bool isOffsetLegal(unsigned L) {
+ return (L & 0xffff) == L;
+}
+
+/// \brief Parse \p Input as line sample.
+///
+/// \param Input input line.
+/// \param IsCallsite true if the line represents an inlined callsite.
+/// \param Depth the depth of the inline stack.
+/// \param NumSamples total samples of the line/inlined callsite.
+/// \param LineOffset line offset to the start of the function.
+/// \param Discriminator discriminator of the line.
+/// \param TargetCountMap map from indirect call target to count.
+///
+/// returns true if parsing is successful.
+static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
+ uint64_t &NumSamples, uint32_t &LineOffset,
+ uint32_t &Discriminator, StringRef &CalleeName,
+ DenseMap<StringRef, uint64_t> &TargetCountMap) {
+ for (Depth = 0; Input[Depth] == ' '; Depth++)
+ ;
+ if (Depth == 0)
+ return false;
+
+ size_t n1 = Input.find(':');
+ StringRef Loc = Input.substr(Depth, n1 - Depth);
+ size_t n2 = Loc.find('.');
+ if (n2 == StringRef::npos) {
+ if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
+ return false;
+ Discriminator = 0;
+ } else {
+ if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
+ return false;
+ if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
+ return false;
+ }
+
+ StringRef Rest = Input.substr(n1 + 2);
+ if (Rest[0] >= '0' && Rest[0] <= '9') {
+ IsCallsite = false;
+ size_t n3 = Rest.find(' ');
+ if (n3 == StringRef::npos) {
+ if (Rest.getAsInteger(10, NumSamples))
+ return false;
+ } else {
+ if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
+ return false;
+ }
+ while (n3 != StringRef::npos) {
+ n3 += Rest.substr(n3).find_first_not_of(' ');
+ Rest = Rest.substr(n3);
+ n3 = Rest.find(' ');
+ StringRef pair = Rest;
+ if (n3 != StringRef::npos) {
+ pair = Rest.substr(0, n3);
+ }
+ size_t n4 = pair.find(':');
+ uint64_t count;
+ if (pair.substr(n4 + 1).getAsInteger(10, count))
+ return false;
+ TargetCountMap[pair.substr(0, n4)] = count;
+ }
+ } else {
+ IsCallsite = true;
+ size_t n3 = Rest.find_last_of(':');
+ CalleeName = Rest.substr(0, n3);
+ if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
+ return false;
+ }
+ return true;
+}
+
/// \brief Load samples from a text file.
///
/// See the documentation at the top of the file for an explanation of
@@ -151,14 +151,13 @@ void SampleProfileReader::dump(raw_ostream &OS) {
/// \returns true if the file was loaded successfully, false otherwise.
std::error_code SampleProfileReaderText::read() {
line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
+ sampleprof_error Result = sampleprof_error::success;
+
+ InlineCallStack InlineStack;
- // Read the profile of each function. Since each function may be
- // mentioned more than once, and we are collecting flat profiles,
- // accumulate samples as we parse them.
- Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
- Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
- Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)");
- while (!LineIt.is_at_eof()) {
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
+ continue;
// Read the header of each function.
//
// Note that for function identifiers we are actually expecting
@@ -171,63 +170,74 @@ std::error_code SampleProfileReaderText::read() {
//
// The only requirement we place on the identifier, then, is that it
// should not begin with a number.
- SmallVector<StringRef, 4> Matches;
- if (!HeadRE.match(*LineIt, &Matches)) {
- reportParseError(LineIt.line_number(),
- "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
- return sampleprof_error::malformed;
- }
- assert(Matches.size() == 4);
- StringRef FName = Matches[1];
- unsigned NumSamples, NumHeadSamples;
- Matches[2].getAsInteger(10, NumSamples);
- Matches[3].getAsInteger(10, NumHeadSamples);
- Profiles[FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[FName];
- FProfile.addTotalSamples(NumSamples);
- FProfile.addHeadSamples(NumHeadSamples);
- ++LineIt;
-
- // Now read the body. The body of the function ends when we reach
- // EOF or when we see the start of the next function.
- while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
- if (!LineSampleRE.match(*LineIt, &Matches)) {
- reportParseError(
- LineIt.line_number(),
- "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
+ if ((*LineIt)[0] != ' ') {
+ uint64_t NumSamples, NumHeadSamples;
+ StringRef FName;
+ if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
+ reportError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ Profiles[FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FName];
+ MergeResult(Result, FProfile.addTotalSamples(NumSamples));
+ MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
+ InlineStack.clear();
+ InlineStack.push_back(&FProfile);
+ } else {
+ uint64_t NumSamples;
+ StringRef FName;
+ DenseMap<StringRef, uint64_t> TargetCountMap;
+ bool IsCallsite;
+ uint32_t Depth, LineOffset, Discriminator;
+ if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
+ Discriminator, FName, TargetCountMap)) {
+ reportError(LineIt.line_number(),
+ "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
+ *LineIt);
return sampleprof_error::malformed;
}
- assert(Matches.size() == 5);
- unsigned LineOffset, NumSamples, Discriminator = 0;
- Matches[1].getAsInteger(10, LineOffset);
- if (Matches[2] != "")
- Matches[2].getAsInteger(10, Discriminator);
- Matches[3].getAsInteger(10, NumSamples);
-
- // If there are function calls in this line, generate a call sample
- // entry for each call.
- std::string CallsLine(Matches[4]);
- while (CallsLine != "") {
- SmallVector<StringRef, 3> CallSample;
- if (!CallSampleRE.match(CallsLine, &CallSample)) {
- reportParseError(LineIt.line_number(),
- "Expected 'mangled_name:NUM', found " + CallsLine);
- return sampleprof_error::malformed;
+ if (IsCallsite) {
+ while (InlineStack.size() > Depth) {
+ InlineStack.pop_back();
}
- StringRef CalledFunction = CallSample[1];
- unsigned CalledFunctionSamples;
- CallSample[2].getAsInteger(10, CalledFunctionSamples);
- FProfile.addCalledTargetSamples(LineOffset, Discriminator,
- CalledFunction, CalledFunctionSamples);
- CallsLine = CallSampleRE.sub("", CallsLine);
+ FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
+ CallsiteLocation(LineOffset, Discriminator, FName));
+ MergeResult(Result, FSamples.addTotalSamples(NumSamples));
+ InlineStack.push_back(&FSamples);
+ } else {
+ while (InlineStack.size() > Depth) {
+ InlineStack.pop_back();
+ }
+ FunctionSamples &FProfile = *InlineStack.back();
+ for (const auto &name_count : TargetCountMap) {
+ MergeResult(Result, FProfile.addCalledTargetSamples(
+ LineOffset, Discriminator, name_count.first,
+ name_count.second));
+ }
+ MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
+ NumSamples));
}
+ }
+ }
- FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
- ++LineIt;
+ return Result;
+}
+
+bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
+ bool result = false;
+
+ // Check that the first non-comment line is a valid function header.
+ line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
+ if (!LineIt.is_at_eof()) {
+ if ((*LineIt)[0] != ' ') {
+ uint64_t NumSamples, NumHeadSamples;
+ StringRef FName;
+ result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
}
}
- return sampleprof_error::success;
+ return result;
}
template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
@@ -243,7 +253,7 @@ template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
EC = sampleprof_error::success;
if (EC) {
- reportParseError(0, EC.message());
+ reportError(0, EC.message());
return EC;
}
@@ -256,7 +266,7 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
StringRef Str(reinterpret_cast<const char *>(Data));
if (Data + Str.size() + 1 > End) {
EC = sampleprof_error::truncated;
- reportParseError(0, EC.message());
+ reportError(0, EC.message());
return EC;
}
@@ -264,62 +274,109 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
return Str;
}
-std::error_code SampleProfileReaderBinary::read() {
- while (!at_eof()) {
- auto FName(readString());
- if (std::error_code EC = FName.getError())
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
+ std::error_code EC;
+ auto Idx = readNumber<uint32_t>();
+ if (std::error_code EC = Idx.getError())
+ return EC;
+ if (*Idx >= NameTable.size())
+ return sampleprof_error::truncated_name_table;
+ return NameTable[*Idx];
+}
+
+std::error_code
+SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
+ auto NumSamples = readNumber<uint64_t>();
+ if (std::error_code EC = NumSamples.getError())
+ return EC;
+ FProfile.addTotalSamples(*NumSamples);
+
+ // Read the samples in the body.
+ auto NumRecords = readNumber<uint32_t>();
+ if (std::error_code EC = NumRecords.getError())
+ return EC;
+
+ for (uint32_t I = 0; I < *NumRecords; ++I) {
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
return EC;
- Profiles[*FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[*FName];
+ if (!isOffsetLegal(*LineOffset)) {
+ return std::error_code();
+ }
- auto Val = readNumber<unsigned>();
- if (std::error_code EC = Val.getError())
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
return EC;
- FProfile.addTotalSamples(*Val);
- Val = readNumber<unsigned>();
- if (std::error_code EC = Val.getError())
+ auto NumSamples = readNumber<uint64_t>();
+ if (std::error_code EC = NumSamples.getError())
return EC;
- FProfile.addHeadSamples(*Val);
- // Read the samples in the body.
- auto NumRecords = readNumber<unsigned>();
- if (std::error_code EC = NumRecords.getError())
+ auto NumCalls = readNumber<uint32_t>();
+ if (std::error_code EC = NumCalls.getError())
return EC;
- for (unsigned I = 0; I < *NumRecords; ++I) {
- auto LineOffset = readNumber<uint64_t>();
- if (std::error_code EC = LineOffset.getError())
- return EC;
- auto Discriminator = readNumber<uint64_t>();
- if (std::error_code EC = Discriminator.getError())
+ for (uint32_t J = 0; J < *NumCalls; ++J) {
+ auto CalledFunction(readStringFromTable());
+ if (std::error_code EC = CalledFunction.getError())
return EC;
- auto NumSamples = readNumber<uint64_t>();
- if (std::error_code EC = NumSamples.getError())
+ auto CalledFunctionSamples = readNumber<uint64_t>();
+ if (std::error_code EC = CalledFunctionSamples.getError())
return EC;
- auto NumCalls = readNumber<unsigned>();
- if (std::error_code EC = NumCalls.getError())
- return EC;
+ FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
+ *CalledFunction, *CalledFunctionSamples);
+ }
+
+ FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
+ }
- for (unsigned J = 0; J < *NumCalls; ++J) {
- auto CalledFunction(readString());
- if (std::error_code EC = CalledFunction.getError())
- return EC;
+ // Read all the samples for inlined function calls.
+ auto NumCallsites = readNumber<uint32_t>();
+ if (std::error_code EC = NumCallsites.getError())
+ return EC;
- auto CalledFunctionSamples = readNumber<uint64_t>();
- if (std::error_code EC = CalledFunctionSamples.getError())
- return EC;
+ for (uint32_t J = 0; J < *NumCallsites; ++J) {
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
+ return EC;
- FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
- *CalledFunction,
- *CalledFunctionSamples);
- }
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
+ return EC;
- FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
- }
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
+ CallsiteLocation(*LineOffset, *Discriminator, *FName));
+ if (std::error_code EC = readProfile(CalleeProfile))
+ return EC;
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderBinary::read() {
+ while (!at_eof()) {
+ auto NumHeadSamples = readNumber<uint64_t>();
+ if (std::error_code EC = NumHeadSamples.getError())
+ return EC;
+
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ Profiles[*FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FName];
+
+ FProfile.addHeadSamples(*NumHeadSamples);
+
+ if (std::error_code EC = readProfile(FProfile))
+ return EC;
}
return sampleprof_error::success;
@@ -343,6 +400,18 @@ std::error_code SampleProfileReaderBinary::readHeader() {
else if (*Version != SPVersion())
return sampleprof_error::unsupported_version;
+ // Read the name table.
+ auto Size = readNumber<uint32_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ NameTable.reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ auto Name(readString());
+ if (std::error_code EC = Name.getError())
+ return EC;
+ NameTable.push_back(*Name);
+ }
+
return sampleprof_error::success;
}
@@ -353,6 +422,249 @@ bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
return Magic == SPMagic();
}
+std::error_code SampleProfileReaderGCC::skipNextWord() {
+ uint32_t dummy;
+ if (!GcovBuffer.readInt(dummy))
+ return sampleprof_error::truncated;
+ return sampleprof_error::success;
+}
+
+template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
+ if (sizeof(T) <= sizeof(uint32_t)) {
+ uint32_t Val;
+ if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
+ return static_cast<T>(Val);
+ } else if (sizeof(T) <= sizeof(uint64_t)) {
+ uint64_t Val;
+ if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
+ return static_cast<T>(Val);
+ }
+
+ std::error_code EC = sampleprof_error::malformed;
+ reportError(0, EC.message());
+ return EC;
+}
+
+ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
+ StringRef Str;
+ if (!GcovBuffer.readString(Str))
+ return sampleprof_error::truncated;
+ return Str;
+}
+
+std::error_code SampleProfileReaderGCC::readHeader() {
+ // Read the magic identifier.
+ if (!GcovBuffer.readGCDAFormat())
+ return sampleprof_error::unrecognized_format;
+
+ // Read the version number. Note - the GCC reader does not validate this
+ // version, but the profile creator generates v704.
+ GCOV::GCOVVersion version;
+ if (!GcovBuffer.readGCOVVersion(version))
+ return sampleprof_error::unrecognized_format;
+
+ if (version != GCOV::V704)
+ return sampleprof_error::unsupported_version;
+
+ // Skip the empty integer.
+ if (std::error_code EC = skipNextWord())
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
+ uint32_t Tag;
+ if (!GcovBuffer.readInt(Tag))
+ return sampleprof_error::truncated;
+
+ if (Tag != Expected)
+ return sampleprof_error::malformed;
+
+ if (std::error_code EC = skipNextWord())
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readNameTable() {
+ if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
+ return EC;
+
+ uint32_t Size;
+ if (!GcovBuffer.readInt(Size))
+ return sampleprof_error::truncated;
+
+ for (uint32_t I = 0; I < Size; ++I) {
+ StringRef Str;
+ if (!GcovBuffer.readString(Str))
+ return sampleprof_error::truncated;
+ Names.push_back(Str);
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
+ if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
+ return EC;
+
+ uint32_t NumFunctions;
+ if (!GcovBuffer.readInt(NumFunctions))
+ return sampleprof_error::truncated;
+
+ InlineCallStack Stack;
+ for (uint32_t I = 0; I < NumFunctions; ++I)
+ if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
+ const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
+ uint64_t HeadCount = 0;
+ if (InlineStack.size() == 0)
+ if (!GcovBuffer.readInt64(HeadCount))
+ return sampleprof_error::truncated;
+
+ uint32_t NameIdx;
+ if (!GcovBuffer.readInt(NameIdx))
+ return sampleprof_error::truncated;
+
+ StringRef Name(Names[NameIdx]);
+
+ uint32_t NumPosCounts;
+ if (!GcovBuffer.readInt(NumPosCounts))
+ return sampleprof_error::truncated;
+
+ uint32_t NumCallsites;
+ if (!GcovBuffer.readInt(NumCallsites))
+ return sampleprof_error::truncated;
+
+ FunctionSamples *FProfile = nullptr;
+ if (InlineStack.size() == 0) {
+ // If this is a top function that we have already processed, do not
+ // update its profile again. This happens in the presence of
+ // function aliases. Since these aliases share the same function
+ // body, there will be identical replicated profiles for the
+ // original function. In this case, we simply not bother updating
+ // the profile of the original function.
+ FProfile = &Profiles[Name];
+ FProfile->addHeadSamples(HeadCount);
+ if (FProfile->getTotalSamples() > 0)
+ Update = false;
+ } else {
+ // Otherwise, we are reading an inlined instance. The top of the
+ // inline stack contains the profile of the caller. Insert this
+ // callee in the caller's CallsiteMap.
+ FunctionSamples *CallerProfile = InlineStack.front();
+ uint32_t LineOffset = Offset >> 16;
+ uint32_t Discriminator = Offset & 0xffff;
+ FProfile = &CallerProfile->functionSamplesAt(
+ CallsiteLocation(LineOffset, Discriminator, Name));
+ }
+
+ for (uint32_t I = 0; I < NumPosCounts; ++I) {
+ uint32_t Offset;
+ if (!GcovBuffer.readInt(Offset))
+ return sampleprof_error::truncated;
+
+ uint32_t NumTargets;
+ if (!GcovBuffer.readInt(NumTargets))
+ return sampleprof_error::truncated;
+
+ uint64_t Count;
+ if (!GcovBuffer.readInt64(Count))
+ return sampleprof_error::truncated;
+
+ // The line location is encoded in the offset as:
+ // high 16 bits: line offset to the start of the function.
+ // low 16 bits: discriminator.
+ uint32_t LineOffset = Offset >> 16;
+ uint32_t Discriminator = Offset & 0xffff;
+
+ InlineCallStack NewStack;
+ NewStack.push_back(FProfile);
+ NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+ if (Update) {
+ // Walk up the inline stack, adding the samples on this line to
+ // the total sample count of the callers in the chain.
+ for (auto CallerProfile : NewStack)
+ CallerProfile->addTotalSamples(Count);
+
+ // Update the body samples for the current profile.
+ FProfile->addBodySamples(LineOffset, Discriminator, Count);
+ }
+
+ // Process the list of functions called at an indirect call site.
+ // These are all the targets that a function pointer (or virtual
+ // function) resolved at runtime.
+ for (uint32_t J = 0; J < NumTargets; J++) {
+ uint32_t HistVal;
+ if (!GcovBuffer.readInt(HistVal))
+ return sampleprof_error::truncated;
+
+ if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
+ return sampleprof_error::malformed;
+
+ uint64_t TargetIdx;
+ if (!GcovBuffer.readInt64(TargetIdx))
+ return sampleprof_error::truncated;
+ StringRef TargetName(Names[TargetIdx]);
+
+ uint64_t TargetCount;
+ if (!GcovBuffer.readInt64(TargetCount))
+ return sampleprof_error::truncated;
+
+ if (Update) {
+ FunctionSamples &TargetProfile = Profiles[TargetName];
+ TargetProfile.addCalledTargetSamples(LineOffset, Discriminator,
+ TargetName, TargetCount);
+ }
+ }
+ }
+
+ // Process all the inlined callers into the current function. These
+ // are all the callsites that were inlined into this function.
+ for (uint32_t I = 0; I < NumCallsites; I++) {
+ // The offset is encoded as:
+ // high 16 bits: line offset to the start of the function.
+ // low 16 bits: discriminator.
+ uint32_t Offset;
+ if (!GcovBuffer.readInt(Offset))
+ return sampleprof_error::truncated;
+ InlineCallStack NewStack;
+ NewStack.push_back(FProfile);
+ NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+ if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
+ return EC;
+ }
+
+ return sampleprof_error::success;
+}
+
+/// \brief Read a GCC AutoFDO profile.
+///
+/// This format is generated by the Linux Perf conversion tool at
+/// https://github.com/google/autofdo.
+std::error_code SampleProfileReaderGCC::read() {
+ // Read the string table.
+ if (std::error_code EC = readNameTable())
+ return EC;
+
+ // Read the source profile.
+ if (std::error_code EC = readFunctionProfiles())
+ return EC;
+
+ return sampleprof_error::success;
+}
+
+bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
+ StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
+ return Magic == "adcg*704";
+}
+
/// \brief Prepare a memory buffer for the contents of \p Filename.
///
/// \returns an error code indicating the status of the buffer.
@@ -364,7 +676,7 @@ setupMemoryBuffer(std::string Filename) {
auto Buffer = std::move(BufferOrErr.get());
// Sanity check the file.
- if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+ if (Buffer->getBufferSize() > std::numeric_limits<uint32_t>::max())
return sampleprof_error::too_large;
return std::move(Buffer);
@@ -384,13 +696,29 @@ SampleProfileReader::create(StringRef Filename, LLVMContext &C) {
auto BufferOrError = setupMemoryBuffer(Filename);
if (std::error_code EC = BufferOrError.getError())
return EC;
+ return create(BufferOrError.get(), C);
+}
- auto Buffer = std::move(BufferOrError.get());
+/// \brief Create a sample profile reader based on the format of the input data.
+///
+/// \param B The memory buffer to create the reader from (assumes ownership).
+///
+/// \param Reader The reader to instantiate according to \p Filename's format.
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReader>>
+SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
std::unique_ptr<SampleProfileReader> Reader;
- if (SampleProfileReaderBinary::hasFormat(*Buffer))
- Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C));
+ if (SampleProfileReaderBinary::hasFormat(*B))
+ Reader.reset(new SampleProfileReaderBinary(std::move(B), C));
+ else if (SampleProfileReaderGCC::hasFormat(*B))
+ Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
+ else if (SampleProfileReaderText::hasFormat(*B))
+ Reader.reset(new SampleProfileReaderText(std::move(B), C));
else
- Reader.reset(new SampleProfileReaderText(std::move(Buffer), C));
+ return sampleprof_error::unrecognized_format;
if (std::error_code EC = Reader->readHeader())
return EC;
diff --git a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
index c95267a..51feee5 100644
--- a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -30,16 +30,27 @@ using namespace llvm::sampleprof;
using namespace llvm;
/// \brief Write samples to a text file.
-bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
- if (S.empty())
- return true;
-
- OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples()
- << "\n";
-
- for (const auto &I : S.getBodySamples()) {
- LineLocation Loc = I.first;
- const SampleRecord &Sample = I.second;
+///
+/// Note: it may be tempting to implement this in terms of
+/// FunctionSamples::print(). Please don't. The dump functionality is intended
+/// for debugging and has no specified form.
+///
+/// The format used here is more structured and deliberate because
+/// it needs to be parsed by the SampleProfileReaderText class.
+std::error_code SampleProfileWriterText::write(StringRef FName,
+ const FunctionSamples &S) {
+ auto &OS = *OutputStream;
+
+ OS << FName << ":" << S.getTotalSamples();
+ if (Indent == 0)
+ OS << ":" << S.getHeadSamples();
+ OS << "\n";
+
+ SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
+ for (const auto &I : SortedSamples.get()) {
+ LineLocation Loc = I->first;
+ const SampleRecord &Sample = I->second;
+ OS.indent(Indent + 1);
if (Loc.Discriminator == 0)
OS << Loc.LineOffset << ": ";
else
@@ -52,32 +63,89 @@ bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
OS << "\n";
}
- return true;
+ SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples(
+ S.getCallsiteSamples());
+ Indent += 1;
+ for (const auto &I : SortedCallsiteSamples.get()) {
+ CallsiteLocation Loc = I->first;
+ const FunctionSamples &CalleeSamples = I->second;
+ OS.indent(Indent);
+ if (Loc.Discriminator == 0)
+ OS << Loc.LineOffset << ": ";
+ else
+ OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+ if (std::error_code EC = write(Loc.CalleeName, CalleeSamples))
+ return EC;
+ }
+ Indent -= 1;
+
+ return sampleprof_error::success;
}
-SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F,
- std::error_code &EC)
- : SampleProfileWriter(F, EC, sys::fs::F_None) {
- if (EC)
- return;
+std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
+ const auto &ret = NameTable.find(FName);
+ if (ret == NameTable.end())
+ return sampleprof_error::truncated_name_table;
+ encodeULEB128(ret->second, *OutputStream);
+ return sampleprof_error::success;
+}
- // Write the file header.
+void SampleProfileWriterBinary::addName(StringRef FName) {
+ auto NextIdx = NameTable.size();
+ NameTable.insert(std::make_pair(FName, NextIdx));
+}
+
+void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
+ // Add all the names in indirect call targets.
+ for (const auto &I : S.getBodySamples()) {
+ const SampleRecord &Sample = I.second;
+ for (const auto &J : Sample.getCallTargets())
+ addName(J.first());
+ }
+
+ // Recursively add all the names for inlined callsites.
+ for (const auto &J : S.getCallsiteSamples()) {
+ CallsiteLocation Loc = J.first;
+ const FunctionSamples &CalleeSamples = J.second;
+ addName(Loc.CalleeName);
+ addNames(CalleeSamples);
+ }
+}
+
+std::error_code SampleProfileWriterBinary::writeHeader(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ auto &OS = *OutputStream;
+
+ // Write file magic identifier.
encodeULEB128(SPMagic(), OS);
encodeULEB128(SPVersion(), OS);
+
+ // Generate the name table for all the functions referenced in the profile.
+ for (const auto &I : ProfileMap) {
+ addName(I.first());
+ addNames(I.second);
+ }
+
+ // Write out the name table.
+ encodeULEB128(NameTable.size(), OS);
+ for (auto N : NameTable) {
+ OS << N.first;
+ encodeULEB128(0, OS);
+ }
+
+ return sampleprof_error::success;
}
-/// \brief Write samples to a binary file.
-///
-/// \returns true if the samples were written successfully, false otherwise.
-bool SampleProfileWriterBinary::write(StringRef FName,
- const FunctionSamples &S) {
- if (S.empty())
- return true;
+std::error_code SampleProfileWriterBinary::writeBody(StringRef FName,
+ const FunctionSamples &S) {
+ auto &OS = *OutputStream;
+
+ if (std::error_code EC = writeNameIdx(FName))
+ return EC;
- OS << FName;
- encodeULEB128(0, OS);
encodeULEB128(S.getTotalSamples(), OS);
- encodeULEB128(S.getHeadSamples(), OS);
+
+ // Emit all the body samples.
encodeULEB128(S.getBodySamples().size(), OS);
for (const auto &I : S.getBodySamples()) {
LineLocation Loc = I.first;
@@ -87,18 +155,38 @@ bool SampleProfileWriterBinary::write(StringRef FName,
encodeULEB128(Sample.getSamples(), OS);
encodeULEB128(Sample.getCallTargets().size(), OS);
for (const auto &J : Sample.getCallTargets()) {
- std::string Callee = J.first();
- unsigned CalleeSamples = J.second;
- OS << Callee;
- encodeULEB128(0, OS);
+ StringRef Callee = J.first();
+ uint64_t CalleeSamples = J.second;
+ if (std::error_code EC = writeNameIdx(Callee))
+ return EC;
encodeULEB128(CalleeSamples, OS);
}
}
- return true;
+ // Recursively emit all the callsite samples.
+ encodeULEB128(S.getCallsiteSamples().size(), OS);
+ for (const auto &J : S.getCallsiteSamples()) {
+ CallsiteLocation Loc = J.first;
+ const FunctionSamples &CalleeSamples = J.second;
+ encodeULEB128(Loc.LineOffset, OS);
+ encodeULEB128(Loc.Discriminator, OS);
+ if (std::error_code EC = writeBody(Loc.CalleeName, CalleeSamples))
+ return EC;
+ }
+
+ return sampleprof_error::success;
}
-/// \brief Create a sample profile writer based on the specified format.
+/// \brief Write samples of a top-level function to a binary file.
+///
+/// \returns true if the samples were written successfully, false otherwise.
+std::error_code SampleProfileWriterBinary::write(StringRef FName,
+ const FunctionSamples &S) {
+ encodeULEB128(S.getHeadSamples(), *OutputStream);
+ return writeBody(FName, S);
+}
+
+/// \brief Create a sample profile file writer based on the specified format.
///
/// \param Filename The file to create.
///
@@ -110,12 +198,38 @@ bool SampleProfileWriterBinary::write(StringRef FName,
ErrorOr<std::unique_ptr<SampleProfileWriter>>
SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
std::error_code EC;
+ std::unique_ptr<raw_ostream> OS;
+ if (Format == SPF_Binary)
+ OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None));
+ else
+ OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text));
+ if (EC)
+ return EC;
+
+ return create(OS, Format);
+}
+
+/// \brief Create a sample profile stream writer based on the specified format.
+///
+/// \param OS The output stream to store the profile data to.
+///
+/// \param Writer The writer to instantiate according to the specified format.
+///
+/// \param Format Encoding format for the profile file.
+///
+/// \returns an error code indicating the status of the created writer.
+ErrorOr<std::unique_ptr<SampleProfileWriter>>
+SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
+ SampleProfileFormat Format) {
+ std::error_code EC;
std::unique_ptr<SampleProfileWriter> Writer;
if (Format == SPF_Binary)
- Writer.reset(new SampleProfileWriterBinary(Filename, EC));
+ Writer.reset(new SampleProfileWriterBinary(OS));
else if (Format == SPF_Text)
- Writer.reset(new SampleProfileWriterText(Filename, EC));
+ Writer.reset(new SampleProfileWriterText(OS));
+ else if (Format == SPF_GCC)
+ EC = sampleprof_error::unsupported_writing_format;
else
EC = sampleprof_error::unrecognized_format;
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index 5d31225..19b8221 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -768,6 +768,15 @@ APFloat::isLargest() const {
}
bool
+APFloat::isInteger() const {
+ // This could be made more efficient; I'm going for obviously correct.
+ if (!isFinite()) return false;
+ APFloat truncated = *this;
+ truncated.roundToIntegral(rmTowardZero);
+ return compare(truncated) == cmpEqual;
+}
+
+bool
APFloat::bitwiseIsEqual(const APFloat &rhs) const {
if (this == &rhs)
return true;
@@ -777,18 +786,12 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const {
return false;
if (category==fcZero || category==fcInfinity)
return true;
- else if (isFiniteNonZero() && exponent!=rhs.exponent)
+
+ if (isFiniteNonZero() && exponent != rhs.exponent)
return false;
- else {
- int i= partCount();
- const integerPart* p=significandParts();
- const integerPart* q=rhs.significandParts();
- for (; i>0; i--, p++, q++) {
- if (*p != *q)
- return false;
- }
- return true;
- }
+
+ return std::equal(significandParts(), significandParts() + partCount(),
+ rhs.significandParts());
}
APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
@@ -847,6 +850,21 @@ APFloat::semanticsPrecision(const fltSemantics &semantics)
{
return semantics.precision;
}
+APFloat::ExponentType
+APFloat::semanticsMaxExponent(const fltSemantics &semantics)
+{
+ return semantics.maxExponent;
+}
+APFloat::ExponentType
+APFloat::semanticsMinExponent(const fltSemantics &semantics)
+{
+ return semantics.minExponent;
+}
+unsigned int
+APFloat::semanticsSizeInBits(const fltSemantics &semantics)
+{
+ return semantics.sizeInBits;
+}
const integerPart *
APFloat::significandParts() const
@@ -1762,7 +1780,7 @@ APFloat::remainder(const APFloat &rhs)
/* Normalized llvm frem (C fmod).
This is not currently correct in all cases. */
APFloat::opStatus
-APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+APFloat::mod(const APFloat &rhs)
{
opStatus fs;
fs = modSpecials(rhs);
@@ -1787,10 +1805,10 @@ APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
rmNearestTiesToEven);
assert(fs==opOK); // should always work
- fs = V.multiply(rhs, rounding_mode);
+ fs = V.multiply(rhs, rmNearestTiesToEven);
assert(fs==opOK || fs==opInexact); // should not overflow or underflow
- fs = subtract(V, rounding_mode);
+ fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK || fs==opInexact); // likewise
if (isZero())
diff --git a/contrib/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm/lib/Support/BlockFrequency.cpp
index 6f7e341..e7f3e17 100644
--- a/contrib/llvm/lib/Support/BlockFrequency.cpp
+++ b/contrib/llvm/lib/Support/BlockFrequency.cpp
@@ -11,37 +11,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
using namespace llvm;
-BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
+BlockFrequency &BlockFrequency::operator*=(BranchProbability Prob) {
Frequency = Prob.scale(Frequency);
return *this;
}
-const BlockFrequency
-BlockFrequency::operator*(const BranchProbability &Prob) const {
+BlockFrequency BlockFrequency::operator*(BranchProbability Prob) const {
BlockFrequency Freq(Frequency);
Freq *= Prob;
return Freq;
}
-BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
+BlockFrequency &BlockFrequency::operator/=(BranchProbability Prob) {
Frequency = Prob.scaleByInverse(Frequency);
return *this;
}
-BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
+BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const {
BlockFrequency Freq(Frequency);
Freq /= Prob;
return Freq;
}
-BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
+BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) {
uint64_t Before = Freq.Frequency;
Frequency += Freq.Frequency;
@@ -52,11 +50,25 @@ BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
return *this;
}
-const BlockFrequency
-BlockFrequency::operator+(const BlockFrequency &Prob) const {
- BlockFrequency Freq(Frequency);
- Freq += Prob;
- return Freq;
+BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const {
+ BlockFrequency NewFreq(Frequency);
+ NewFreq += Freq;
+ return NewFreq;
+}
+
+BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) {
+ // If underflow, set frequency to 0.
+ if (Frequency <= Freq.Frequency)
+ Frequency = 0;
+ else
+ Frequency -= Freq.Frequency;
+ return *this;
+}
+
+BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const {
+ BlockFrequency NewFreq(Frequency);
+ NewFreq -= Freq;
+ return NewFreq;
}
BlockFrequency &BlockFrequency::operator>>=(const unsigned count) {
diff --git a/contrib/llvm/lib/Support/BranchProbability.cpp b/contrib/llvm/lib/Support/BranchProbability.cpp
index 65878d6..771d02c 100644
--- a/contrib/llvm/lib/Support/BranchProbability.cpp
+++ b/contrib/llvm/lib/Support/BranchProbability.cpp
@@ -15,17 +15,58 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
+const uint32_t BranchProbability::D;
+
raw_ostream &BranchProbability::print(raw_ostream &OS) const {
- return OS << N << " / " << D << " = "
- << format("%g%%", ((double)N / D) * 100.0);
+ if (isUnknown())
+ return OS << "?%";
+
+ // Get a percentage rounded to two decimal digits. This avoids
+ // implementation-defined rounding inside printf.
+ double Percent = rint(((double)N / D) * 100.0 * 100.0) / 100.0;
+ return OS << format("0x%08" PRIx32 " / 0x%08" PRIx32 " = %.2f%%", N, D,
+ Percent);
}
void BranchProbability::dump() const { print(dbgs()) << '\n'; }
+BranchProbability::BranchProbability(uint32_t Numerator, uint32_t Denominator) {
+ assert(Denominator > 0 && "Denominator cannot be 0!");
+ assert(Numerator <= Denominator && "Probability cannot be bigger than 1!");
+ if (Denominator == D)
+ N = Numerator;
+ else {
+ uint64_t Prob64 =
+ (Numerator * static_cast<uint64_t>(D) + Denominator / 2) / Denominator;
+ N = static_cast<uint32_t>(Prob64);
+ }
+}
+
+BranchProbability
+BranchProbability::getBranchProbability(uint64_t Numerator,
+ uint64_t Denominator) {
+ assert(Numerator <= Denominator && "Probability cannot be bigger than 1!");
+ // Scale down Denominator to fit in a 32-bit integer.
+ int Scale = 0;
+ while (Denominator > UINT32_MAX) {
+ Denominator >>= 1;
+ Scale++;
+ }
+ return BranchProbability(Numerator >> Scale, Denominator);
+}
+
+// If ConstD is not zero, then replace D by ConstD so that division and modulo
+// operations by D can be optimized, in case this function is not inlined by the
+// compiler.
+template <uint32_t ConstD>
static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
+ if (ConstD > 0)
+ D = ConstD;
+
assert(D && "divide by 0");
// Fast path for multiplying by 1.0.
@@ -65,9 +106,9 @@ static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
}
uint64_t BranchProbability::scale(uint64_t Num) const {
- return ::scale(Num, N, D);
+ return ::scale<D>(Num, N, D);
}
uint64_t BranchProbability::scaleByInverse(uint64_t Num) const {
- return ::scale(Num, D, N);
+ return ::scale<0>(Num, D, N);
}
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index 17fba95..fdcdb03 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -120,7 +120,7 @@ public:
void addOption(Option *O) {
bool HadErrors = false;
- if (O->ArgStr[0]) {
+ if (O->hasArgStr()) {
// Add argument to the argument map!
if (!OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) {
errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
@@ -151,12 +151,12 @@ public:
}
void removeOption(Option *O) {
- SmallVector<const char *, 16> OptionNames;
+ SmallVector<StringRef, 16> OptionNames;
O->getExtraOptionNames(OptionNames);
- if (O->ArgStr[0])
+ if (O->hasArgStr())
OptionNames.push_back(O->ArgStr);
for (auto Name : OptionNames)
- OptionsMap.erase(StringRef(Name));
+ OptionsMap.erase(Name);
if (O->getFormattingFlag() == cl::Positional)
for (auto Opt = PositionalOpts.begin(); Opt != PositionalOpts.end();
@@ -182,13 +182,13 @@ public:
nullptr != ConsumeAfterOpt);
}
- void updateArgStr(Option *O, const char *NewName) {
+ void updateArgStr(Option *O, StringRef NewName) {
if (!OptionsMap.insert(std::make_pair(NewName, O)).second) {
errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
<< "' registered more than once!\n";
report_fatal_error("inconsistency in registered CommandLine options");
}
- OptionsMap.erase(StringRef(O->ArgStr));
+ OptionsMap.erase(O->ArgStr);
}
void printOptionValues();
@@ -227,7 +227,7 @@ void Option::addArgument() {
void Option::removeArgument() { GlobalParser->removeOption(this); }
-void Option::setArgStr(const char *S) {
+void Option::setArgStr(StringRef S) {
if (FullyInitialized)
GlobalParser->updateArgStr(this, S);
ArgStr = S;
@@ -296,24 +296,23 @@ static Option *LookupNearestOption(StringRef Arg,
ie = OptionsMap.end();
it != ie; ++it) {
Option *O = it->second;
- SmallVector<const char *, 16> OptionNames;
+ SmallVector<StringRef, 16> OptionNames;
O->getExtraOptionNames(OptionNames);
- if (O->ArgStr[0])
+ if (O->hasArgStr())
OptionNames.push_back(O->ArgStr);
bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed;
StringRef Flag = PermitValue ? LHS : Arg;
- for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
- StringRef Name = OptionNames[i];
+ for (auto Name : OptionNames) {
unsigned Distance = StringRef(Name).edit_distance(
Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
if (!Best || Distance < BestDistance) {
Best = O;
BestDistance = Distance;
if (RHS.empty() || !PermitValue)
- NearestString = OptionNames[i];
+ NearestString = Name;
else
- NearestString = (Twine(OptionNames[i]) + "=" + RHS).str();
+ NearestString = (Twine(Name) + "=" + RHS).str();
}
}
}
@@ -346,10 +345,7 @@ static bool CommaSeparateAndAddOccurrence(Option *Handler, unsigned pos,
Value = Val;
}
- if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
- return true;
-
- return false;
+ return Handler->addOccurrence(pos, ArgName, Value, MultiArg);
}
/// ProvideOption - For Value, this differentiates between an empty value ("")
@@ -799,7 +795,7 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
// telling us.
SmallVector<const char *, 20> newArgv;
BumpPtrAllocator A;
- BumpPtrStringSaver Saver(A);
+ StringSaver Saver(A);
newArgv.push_back(Saver.save(progName));
// Parse the value of the environment variable into a "command line"
@@ -822,7 +818,7 @@ void CommandLineParser::ParseCommandLineOptions(int argc,
// Expand response files.
SmallVector<const char *, 20> newArgv(argv, argv + argc);
BumpPtrAllocator A;
- BumpPtrStringSaver Saver(A);
+ StringSaver Saver(A);
ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv);
argv = &newArgv[0];
argc = static_cast<int>(newArgv.size());
@@ -859,7 +855,7 @@ void CommandLineParser::ParseCommandLineOptions(int argc,
"error - this positional option will never be matched, "
"because it does not Require a value, and a "
"cl::ConsumeAfter option is active!");
- } else if (UnboundedFound && !Opt->ArgStr[0]) {
+ } else if (UnboundedFound && !Opt->hasArgStr()) {
// This option does not "require" a value... Make sure this option is
// not specified after an option that eats all extra arguments, or this
// one will never get any!
@@ -1144,8 +1140,8 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value,
// getValueStr - Get the value description string, using "DefaultMsg" if nothing
// has been specified yet.
//
-static const char *getValueStr(const Option &O, const char *DefaultMsg) {
- if (O.ValueStr[0] == 0)
+static StringRef getValueStr(const Option &O, StringRef DefaultMsg) {
+ if (O.ValueStr.empty())
return DefaultMsg;
return O.ValueStr;
}
@@ -1155,7 +1151,7 @@ static const char *getValueStr(const Option &O, const char *DefaultMsg) {
//
// Return the width of the option tag for printing...
-size_t alias::getOptionWidth() const { return std::strlen(ArgStr) + 6; }
+size_t alias::getOptionWidth() const { return ArgStr.size() + 6; }
static void printHelpStr(StringRef HelpStr, size_t Indent,
size_t FirstLineIndentedBy) {
@@ -1170,7 +1166,7 @@ static void printHelpStr(StringRef HelpStr, size_t Indent,
// Print out the option for the alias.
void alias::printOptionInfo(size_t GlobalWidth) const {
outs() << " -" << ArgStr;
- printHelpStr(HelpStr, GlobalWidth, std::strlen(ArgStr) + 6);
+ printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6);
}
//===----------------------------------------------------------------------===//
@@ -1182,9 +1178,9 @@ void alias::printOptionInfo(size_t GlobalWidth) const {
// Return the width of the option tag for printing...
size_t basic_parser_impl::getOptionWidth(const Option &O) const {
- size_t Len = std::strlen(O.ArgStr);
+ size_t Len = O.ArgStr.size();
if (const char *ValName = getValueName())
- Len += std::strlen(getValueStr(O, ValName)) + 3;
+ Len += getValueStr(O, ValName).size() + 3;
return Len + 6;
}
@@ -1205,7 +1201,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,
void basic_parser_impl::printOptionName(const Option &O,
size_t GlobalWidth) const {
outs() << " -" << O.ArgStr;
- outs().indent(GlobalWidth - std::strlen(O.ArgStr));
+ outs().indent(GlobalWidth - O.ArgStr.size());
}
// parser<bool> implementation
@@ -1319,7 +1315,7 @@ unsigned generic_parser_base::findOption(const char *Name) {
// Return the width of the option tag for printing...
size_t generic_parser_base::getOptionWidth(const Option &O) const {
if (O.hasArgStr()) {
- size_t Size = std::strlen(O.ArgStr) + 6;
+ size_t Size = O.ArgStr.size() + 6;
for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
Size = std::max(Size, std::strlen(getOption(i)) + 8);
return Size;
@@ -1338,7 +1334,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
size_t GlobalWidth) const {
if (O.hasArgStr()) {
outs() << " -" << O.ArgStr;
- printHelpStr(O.HelpStr, GlobalWidth, std::strlen(O.ArgStr) + 6);
+ printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
size_t NumSpaces = GlobalWidth - strlen(getOption(i)) - 8;
@@ -1346,7 +1342,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
outs().indent(NumSpaces) << " - " << getDescription(i) << '\n';
}
} else {
- if (O.HelpStr[0])
+ if (!O.HelpStr.empty())
outs() << " " << O.HelpStr << '\n';
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
const char *Option = getOption(i);
@@ -1365,7 +1361,7 @@ void generic_parser_base::printGenericOptionDiff(
const Option &O, const GenericOptionValue &Value,
const GenericOptionValue &Default, size_t GlobalWidth) const {
outs() << " -" << O.ArgStr;
- outs().indent(GlobalWidth - std::strlen(O.ArgStr));
+ outs().indent(GlobalWidth - O.ArgStr.size());
unsigned NumOpts = getNumOptions();
for (unsigned i = 0; i != NumOpts; ++i) {
@@ -1508,7 +1504,7 @@ public:
outs() << "USAGE: " << GlobalParser->ProgramName << " [options]";
for (auto Opt : GlobalParser->PositionalOpts) {
- if (Opt->ArgStr[0])
+ if (Opt->hasArgStr())
outs() << " --" << Opt->ArgStr;
outs() << " " << Opt->HelpStr;
}
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
index aba0f1d..3f4ef9d 100644
--- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -24,6 +24,12 @@ static ManagedStatic<
sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext;
struct CrashRecoveryContextImpl {
+ // When threads are disabled, this links up all active
+ // CrashRecoveryContextImpls. When threads are enabled there's one thread
+ // per CrashRecoveryContext and CurrentContext is a thread-local, so only one
+ // CrashRecoveryContextImpl is active per thread and this is always null.
+ const CrashRecoveryContextImpl *Next;
+
CrashRecoveryContext *CRC;
std::string Backtrace;
::jmp_buf JumpBuffer;
@@ -34,21 +40,26 @@ public:
CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
Failed(false),
SwitchedThread(false) {
+ Next = CurrentContext->get();
CurrentContext->set(this);
}
~CrashRecoveryContextImpl() {
if (!SwitchedThread)
- CurrentContext->erase();
+ CurrentContext->set(Next);
}
/// \brief Called when the separate crash-recovery thread was finished, to
/// indicate that we don't need to clear the thread-local CurrentContext.
- void setSwitchedThread() { SwitchedThread = true; }
+ void setSwitchedThread() {
+#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
+ SwitchedThread = true;
+#endif
+ }
void HandleCrash() {
// Eliminate the current context entry, to avoid re-entering in case the
// cleanup code crashes.
- CurrentContext->erase();
+ CurrentContext->set(Next);
assert(!Failed && "Crash recovery context already failed!");
Failed = true;
@@ -65,7 +76,7 @@ public:
static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex;
static bool gCrashRecoveryEnabled = false;
-static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextCleanup> >
+static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
tlIsRecoveringFromCrash;
CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
@@ -73,7 +84,8 @@ CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
CrashRecoveryContext::~CrashRecoveryContext() {
// Reclaim registered resources.
CrashRecoveryContextCleanup *i = head;
- tlIsRecoveringFromCrash->set(head);
+ const CrashRecoveryContext *PC = tlIsRecoveringFromCrash->get();
+ tlIsRecoveringFromCrash->set(this);
while (i) {
CrashRecoveryContextCleanup *tmp = i;
i = tmp->next;
@@ -81,7 +93,7 @@ CrashRecoveryContext::~CrashRecoveryContext() {
tmp->recoverResources();
delete tmp;
}
- tlIsRecoveringFromCrash->erase();
+ tlIsRecoveringFromCrash->set(PC);
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
delete CRCI;
@@ -232,7 +244,7 @@ void CrashRecoveryContext::Disable() {
static const int Signals[] =
{ SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
-static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static const unsigned NumSignals = array_lengthof(Signals);
static struct sigaction PrevActions[NumSignals];
static void CrashRecoverySignalHandler(int Signal) {
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
index 13a4155..7d72256 100644
--- a/contrib/llvm/lib/Support/Dwarf.cpp
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -177,6 +177,23 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
case DW_AT_MIPS_assumed_size: return "DW_AT_MIPS_assumed_size";
case DW_AT_lo_user: return "DW_AT_lo_user";
case DW_AT_hi_user: return "DW_AT_hi_user";
+ case DW_AT_BORLAND_property_read: return "DW_AT_BORLAND_property_read";
+ case DW_AT_BORLAND_property_write: return "DW_AT_BORLAND_property_write";
+ case DW_AT_BORLAND_property_implements: return "DW_AT_BORLAND_property_implements";
+ case DW_AT_BORLAND_property_index: return "DW_AT_BORLAND_property_index";
+ case DW_AT_BORLAND_property_default: return "DW_AT_BORLAND_property_default";
+ case DW_AT_BORLAND_Delphi_unit: return "DW_AT_BORLAND_Delphi_unit";
+ case DW_AT_BORLAND_Delphi_class: return "DW_AT_BORLAND_Delphi_class";
+ case DW_AT_BORLAND_Delphi_record: return "DW_AT_BORLAND_Delphi_record";
+ case DW_AT_BORLAND_Delphi_metaclass: return "DW_AT_BORLAND_Delphi_metaclass";
+ case DW_AT_BORLAND_Delphi_constructor: return "DW_AT_BORLAND_Delphi_constructor";
+ case DW_AT_BORLAND_Delphi_destructor: return "DW_AT_BORLAND_Delphi_destructor";
+ case DW_AT_BORLAND_Delphi_anonymous_method: return "DW_AT_BORLAND_Delphi_anonymous_method";
+ case DW_AT_BORLAND_Delphi_interface: return "DW_AT_BORLAND_Delphi_interface";
+ case DW_AT_BORLAND_Delphi_ABI: return "DW_AT_BORLAND_Delphi_ABI";
+ case DW_AT_BORLAND_Delphi_return: return "DW_AT_BORLAND_Delphi_return";
+ case DW_AT_BORLAND_Delphi_frameptr: return "DW_AT_BORLAND_Delphi_frameptr";
+ case DW_AT_BORLAND_closure: return "DW_AT_BORLAND_closure";
case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized";
case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags";
case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa";
@@ -201,6 +218,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
case DW_AT_GNU_addr_base: return "DW_AT_GNU_addr_base";
case DW_AT_GNU_pubnames: return "DW_AT_GNU_pubnames";
case DW_AT_GNU_pubtypes: return "DW_AT_GNU_pubtypes";
+ case DW_AT_GNU_discriminator: return "DW_AT_GNU_discriminator";
}
return nullptr;
}
@@ -373,6 +391,14 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) {
case DW_CC_nocall: return "DW_CC_nocall";
case DW_CC_lo_user: return "DW_CC_lo_user";
case DW_CC_hi_user: return "DW_CC_hi_user";
+ case DW_CC_GNU_borland_fastcall_i386: return "DW_CC_GNU_borland_fastcall_i386";
+ case DW_CC_BORLAND_safecall: return "DW_CC_BORLAND_safecall";
+ case DW_CC_BORLAND_stdcall: return "DW_CC_BORLAND_stdcall";
+ case DW_CC_BORLAND_pascal: return "DW_CC_BORLAND_pascal";
+ case DW_CC_BORLAND_msfastcall: return "DW_CC_BORLAND_msfastcall";
+ case DW_CC_BORLAND_msreturn: return "DW_CC_BORLAND_msreturn";
+ case DW_CC_BORLAND_thiscall: return "DW_CC_BORLAND_thiscall";
+ case DW_CC_BORLAND_fastcall: return "DW_CC_BORLAND_fastcall";
}
return nullptr;
}
@@ -442,10 +468,21 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
case DW_MACINFO_start_file: return "DW_MACINFO_start_file";
case DW_MACINFO_end_file: return "DW_MACINFO_end_file";
case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext";
+ case DW_MACINFO_invalid: return "DW_MACINFO_invalid";
}
return nullptr;
}
+unsigned llvm::dwarf::getMacinfo(StringRef MacinfoString) {
+ return StringSwitch<unsigned>(MacinfoString)
+ .Case("DW_MACINFO_define", DW_MACINFO_define)
+ .Case("DW_MACINFO_undef", DW_MACINFO_undef)
+ .Case("DW_MACINFO_start_file", DW_MACINFO_start_file)
+ .Case("DW_MACINFO_end_file", DW_MACINFO_end_file)
+ .Case("DW_MACINFO_vendor_ext", DW_MACINFO_vendor_ext)
+ .Default(DW_MACINFO_invalid);
+}
+
const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
switch (Encoding) {
case DW_CFA_nop: return "DW_CFA_nop";
diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp
index a25e21a..2808bd3 100644
--- a/contrib/llvm/lib/Support/ErrorHandling.cpp
+++ b/contrib/llvm/lib/Support/ErrorHandling.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/ErrorHandling.h"
-#include "llvm-c/Core.h"
+#include "llvm-c/ErrorHandling.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
index 307ff09..651e679 100644
--- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/Signals.h"
#include <system_error>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -34,10 +35,8 @@ FileOutputBuffer::~FileOutputBuffer() {
sys::fs::remove(Twine(TempPath));
}
-std::error_code
-FileOutputBuffer::create(StringRef FilePath, size_t Size,
- std::unique_ptr<FileOutputBuffer> &Result,
- unsigned Flags) {
+ErrorOr<std::unique_ptr<FileOutputBuffer>>
+FileOutputBuffer::create(StringRef FilePath, size_t Size, unsigned Flags) {
// If file already exists, it must be a regular file (to be mappable).
sys::fs::file_status Stat;
std::error_code EC = sys::fs::status(FilePath, Stat);
@@ -76,6 +75,8 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
if (EC)
return EC;
+ sys::RemoveFileOnSignal(TempFilePath);
+
#ifndef LLVM_ON_WIN32
// On Windows, CreateFileMapping (the mmap function on Windows)
// automatically extends the underlying file. We don't need to
@@ -95,10 +96,9 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
if (Ret)
return std::error_code(errno, std::generic_category());
- Result.reset(
+ std::unique_ptr<FileOutputBuffer> Buf(
new FileOutputBuffer(std::move(MappedFile), FilePath, TempFilePath));
-
- return std::error_code();
+ return std::move(Buf);
}
std::error_code FileOutputBuffer::commit() {
@@ -107,6 +107,8 @@ std::error_code FileOutputBuffer::commit() {
// Rename file to final name.
- return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+ std::error_code EC = sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+ sys::DontRemoveFileOnSignal(TempPath);
+ return EC;
}
} // namespace
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index b8538ff..bb0ec2d 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -232,9 +232,29 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
Buckets = AllocateBuckets(NumBuckets);
NumNodes = 0;
}
+
+FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg)
+ : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
+ Arg.Buckets = nullptr;
+ Arg.NumBuckets = 0;
+ Arg.NumNodes = 0;
+}
+
+FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) {
+ free(Buckets); // This may be null if the set is in a moved-from state.
+ Buckets = RHS.Buckets;
+ NumBuckets = RHS.NumBuckets;
+ NumNodes = RHS.NumNodes;
+ RHS.Buckets = nullptr;
+ RHS.NumBuckets = 0;
+ RHS.NumNodes = 0;
+ return *this;
+}
+
FoldingSetImpl::~FoldingSetImpl() {
free(Buckets);
}
+
void FoldingSetImpl::clear() {
// Set all but the last bucket to null pointers.
memset(Buckets, 0, NumBuckets*sizeof(void*));
diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp
index a9b0220..d0e1d50 100644
--- a/contrib/llvm/lib/Support/GraphWriter.cpp
+++ b/contrib/llvm/lib/Support/GraphWriter.cpp
@@ -103,7 +103,7 @@ struct GraphSession {
bool TryFindProgram(StringRef Names, std::string &ProgramPath) {
raw_string_ostream Log(LogBuffer);
SmallVector<StringRef, 8> parts;
- Names.split(parts, "|");
+ Names.split(parts, '|');
for (auto Name : parts) {
if (ErrorOr<std::string> P = sys::findProgramByName(Name)) {
ProgramPath = *P;
@@ -189,61 +189,87 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg);
}
- enum PSViewerKind { PSV_None, PSV_OSXOpen, PSV_XDGOpen, PSV_Ghostview };
- PSViewerKind PSViewer = PSV_None;
+ enum ViewerKind {
+ VK_None,
+ VK_OSXOpen,
+ VK_XDGOpen,
+ VK_Ghostview,
+ VK_CmdStart
+ };
+ ViewerKind Viewer = VK_None;
#ifdef __APPLE__
- if (!PSViewer && S.TryFindProgram("open", ViewerPath))
- PSViewer = PSV_OSXOpen;
+ if (!Viewer && S.TryFindProgram("open", ViewerPath))
+ Viewer = VK_OSXOpen;
+#endif
+ if (!Viewer && S.TryFindProgram("gv", ViewerPath))
+ Viewer = VK_Ghostview;
+ if (!Viewer && S.TryFindProgram("xdg-open", ViewerPath))
+ Viewer = VK_XDGOpen;
+#ifdef LLVM_ON_WIN32
+ if (!Viewer && S.TryFindProgram("cmd", ViewerPath)) {
+ Viewer = VK_CmdStart;
+ }
#endif
- if (!PSViewer && S.TryFindProgram("gv", ViewerPath))
- PSViewer = PSV_Ghostview;
- if (!PSViewer && S.TryFindProgram("xdg-open", ViewerPath))
- PSViewer = PSV_XDGOpen;
- // PostScript graph generator + PostScript viewer
+ // PostScript or PDF graph generator + PostScript/PDF viewer
std::string GeneratorPath;
- if (PSViewer &&
+ if (Viewer &&
(S.TryFindProgram(getProgramName(program), GeneratorPath) ||
S.TryFindProgram("dot|fdp|neato|twopi|circo", GeneratorPath))) {
- std::string PSFilename = Filename + ".ps";
+ std::string OutputFilename =
+ Filename + (Viewer == VK_CmdStart ? ".pdf" : ".ps");
std::vector<const char *> args;
args.push_back(GeneratorPath.c_str());
- args.push_back("-Tps");
+ if (Viewer == VK_CmdStart)
+ args.push_back("-Tpdf");
+ else
+ args.push_back("-Tps");
args.push_back("-Nfontname=Courier");
args.push_back("-Gsize=7.5,10");
args.push_back(Filename.c_str());
args.push_back("-o");
- args.push_back(PSFilename.c_str());
+ args.push_back(OutputFilename.c_str());
args.push_back(nullptr);
errs() << "Running '" << GeneratorPath << "' program... ";
- if (ExecGraphViewer(GeneratorPath, args, Filename, wait, ErrMsg))
+ if (ExecGraphViewer(GeneratorPath, args, Filename, true, ErrMsg))
return true;
+ // The lifetime of StartArg must include the call of ExecGraphViewer
+ // because the args are passed as vector of char*.
+ std::string StartArg;
+
args.clear();
args.push_back(ViewerPath.c_str());
- switch (PSViewer) {
- case PSV_OSXOpen:
+ switch (Viewer) {
+ case VK_OSXOpen:
args.push_back("-W");
- args.push_back(PSFilename.c_str());
+ args.push_back(OutputFilename.c_str());
break;
- case PSV_XDGOpen:
+ case VK_XDGOpen:
wait = false;
- args.push_back(PSFilename.c_str());
+ args.push_back(OutputFilename.c_str());
break;
- case PSV_Ghostview:
+ case VK_Ghostview:
args.push_back("--spartan");
- args.push_back(PSFilename.c_str());
+ args.push_back(OutputFilename.c_str());
+ break;
+ case VK_CmdStart:
+ args.push_back("/S");
+ args.push_back("/C");
+ StartArg =
+ (StringRef("start ") + (wait ? "/WAIT " : "") + OutputFilename).str();
+ args.push_back(StartArg.c_str());
break;
- case PSV_None:
+ case VK_None:
llvm_unreachable("Invalid viewer");
}
args.push_back(nullptr);
ErrMsg.clear();
- return ExecGraphViewer(ViewerPath, args, PSFilename, wait, ErrMsg);
+ return ExecGraphViewer(ViewerPath, args, OutputFilename, wait, ErrMsg);
}
// dotty
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 1bd1fe2..c0f9e07 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -368,8 +368,14 @@ StringRef sys::getHostCPUName() {
// Broadwell:
case 61:
+ case 71:
return "broadwell";
+ // Skylake:
+ case 78:
+ case 94:
+ return "skylake";
+
case 28: // Most 45 nm Intel Atom processors
case 38: // 45 nm Atom Lincroft
case 39: // 32 nm Atom Medfield
@@ -381,6 +387,8 @@ StringRef sys::getHostCPUName() {
case 55:
case 74:
case 77:
+ case 90:
+ case 93:
return "silvermont";
default: // Unknown family 6 CPU, try to guess.
@@ -689,7 +697,7 @@ StringRef sys::getHostCPUName() {
if (Lines[I].startswith("features")) {
size_t Pos = Lines[I].find(":");
if (Pos != StringRef::npos) {
- Lines[I].drop_front(Pos + 1).split(CPUFeatures, " ");
+ Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
break;
}
}
@@ -766,14 +774,17 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
// switch, then we have full AVX support.
- bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
- !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
- Features["avx"] = HasAVX;
- Features["fma"] = HasAVX && (ECX >> 12) & 1;
- Features["f16c"] = HasAVX && (ECX >> 29) & 1;
+ bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
+ !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+ Features["avx"] = HasAVXSave;
+ Features["fma"] = HasAVXSave && (ECX >> 12) & 1;
+ Features["f16c"] = HasAVXSave && (ECX >> 29) & 1;
+
+ // Only enable XSAVE if OS has enabled support for saving YMM state.
+ Features["xsave"] = HasAVXSave && (ECX >> 26) & 1;
// AVX512 requires additional context to be saved by the OS.
- bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+ bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
unsigned MaxExtLevel;
GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -783,15 +794,15 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
- Features["xop"] = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1);
- Features["fma4"] = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1);
+ Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
+ Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
bool HasLeaf7 = MaxLevel >= 7 &&
!GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
// AVX2 is only supported if we have the OS save support from AVX.
- Features["avx2"] = HasAVX && HasLeaf7 && (EBX >> 5) & 1;
+ Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
@@ -801,6 +812,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
+ // Enable protection keys
+ Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
// AVX512 is only supported if the OS supports the context save for it.
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
@@ -811,6 +824,14 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
+ bool HasLeafD = MaxLevel >= 0xd &&
+ !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+
+ // Only enable XSAVE if OS has enabled support for saving YMM state.
+ Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1);
+ Features["xsavec"] = HasAVXSave && HasLeafD && ((EAX >> 1) & 1);
+ Features["xsaves"] = HasAVXSave && HasLeafD && ((EAX >> 3) & 1);
+
return true;
}
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
@@ -832,7 +853,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// Look for the CPU features.
for (unsigned I = 0, E = Lines.size(); I != E; ++I)
if (Lines[I].startswith("Features")) {
- Lines[I].split(CPUFeatures, " ");
+ Lines[I].split(CPUFeatures, ' ');
break;
}
diff --git a/contrib/llvm/lib/Support/JamCRC.cpp b/contrib/llvm/lib/Support/JamCRC.cpp
new file mode 100644
index 0000000..bc21c91
--- /dev/null
+++ b/contrib/llvm/lib/Support/JamCRC.cpp
@@ -0,0 +1,96 @@
+//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of JamCRC.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation technique is the one mentioned in:
+// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table
+// look-up. Commun. ACM 31, 8 (August 1988)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/JamCRC.h"
+
+using namespace llvm;
+
+static const uint32_t CRCTable[256] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+void JamCRC::update(ArrayRef<char> Data) {
+ for (char Byte : Data) {
+ int TableIdx = (CRC ^ Byte) & 0xff;
+ CRC = CRCTable[TableIdx] ^ (CRC >> 8);
+ }
+}
diff --git a/contrib/llvm/lib/Support/Locale.cpp b/contrib/llvm/lib/Support/Locale.cpp
index d5cb72b..53bc0e3 100644
--- a/contrib/llvm/lib/Support/Locale.cpp
+++ b/contrib/llvm/lib/Support/Locale.cpp
@@ -1,3 +1,4 @@
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/Unicode.h"
diff --git a/contrib/llvm/lib/Support/ManagedStatic.cpp b/contrib/llvm/lib/Support/ManagedStatic.cpp
index b8fb284..9868207 100644
--- a/contrib/llvm/lib/Support/ManagedStatic.cpp
+++ b/contrib/llvm/lib/Support/ManagedStatic.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include <cassert>
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
index d09ef3a..faee10b 100644
--- a/contrib/llvm/lib/Support/MemoryBuffer.cpp
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -162,13 +162,14 @@ MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize) {
+MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
+ bool RequiresNullTerminator) {
SmallString<256> NameBuf;
StringRef NameRef = Filename.toStringRef(NameBuf);
if (NameRef == "-")
return getSTDIN();
- return getFile(Filename, FileSize);
+ return getFile(Filename, FileSize, RequiresNullTerminator);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
index cf46738..4952f59 100644
--- a/contrib/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -455,17 +455,15 @@ void append(SmallVectorImpl<char> &path, const Twine &a,
if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
- for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
- e = components.end();
- i != e; ++i) {
+ for (auto &component : components) {
bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
- bool component_has_sep = !i->empty() && is_separator((*i)[0]);
- bool is_root_name = has_root_name(*i);
+ bool component_has_sep = !component.empty() && is_separator(component[0]);
+ bool is_root_name = has_root_name(component);
if (path_has_sep) {
// Strip separators from beginning of component.
- size_t loc = i->find_first_not_of(separators);
- StringRef c = i->substr(loc);
+ size_t loc = component.find_first_not_of(separators);
+ StringRef c = component.substr(loc);
// Append it.
path.append(c.begin(), c.end());
@@ -477,7 +475,7 @@ void append(SmallVectorImpl<char> &path, const Twine &a,
path.push_back(preferred_separator);
}
- path.append(i->begin(), i->end());
+ path.append(component.begin(), component.end());
}
}
@@ -661,8 +659,51 @@ bool is_absolute(const Twine &path) {
return rootDir && rootName;
}
-bool is_relative(const Twine &path) {
- return !is_absolute(path);
+bool is_relative(const Twine &path) { return !is_absolute(path); }
+
+StringRef remove_leading_dotslash(StringRef Path) {
+ // Remove leading "./" (or ".//" or "././" etc.)
+ while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1])) {
+ Path = Path.substr(2);
+ while (Path.size() > 0 && is_separator(Path[0]))
+ Path = Path.substr(1);
+ }
+ return Path;
+}
+
+static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) {
+ SmallVector<StringRef, 16> components;
+
+ // Skip the root path, then look for traversal in the components.
+ StringRef rel = path::relative_path(path);
+ for (StringRef C : llvm::make_range(path::begin(rel), path::end(rel))) {
+ if (C == ".")
+ continue;
+ if (remove_dot_dot) {
+ if (C == "..") {
+ if (!components.empty())
+ components.pop_back();
+ continue;
+ }
+ }
+ components.push_back(C);
+ }
+
+ SmallString<256> buffer = path::root_path(path);
+ for (StringRef C : components)
+ path::append(buffer, C);
+ return buffer;
+}
+
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot) {
+ StringRef p(path.data(), path.size());
+
+ SmallString<256> result = remove_dots(p, remove_dot_dot);
+ if (result == path)
+ return false;
+
+ path.swap(result);
+ return true;
}
} // end namespace path
@@ -732,7 +773,9 @@ std::error_code createUniqueDirectory(const Twine &Prefix,
true, 0, FS_Dir);
}
-std::error_code make_absolute(SmallVectorImpl<char> &path) {
+static std::error_code make_absolute(const Twine &current_directory,
+ SmallVectorImpl<char> &path,
+ bool use_current_directory) {
StringRef p(path.data(), path.size());
bool rootDirectory = path::has_root_directory(p),
@@ -748,7 +791,9 @@ std::error_code make_absolute(SmallVectorImpl<char> &path) {
// All of the following conditions will need the current directory.
SmallString<128> current_dir;
- if (std::error_code ec = current_path(current_dir))
+ if (use_current_directory)
+ current_directory.toVector(current_dir);
+ else if (std::error_code ec = current_path(current_dir))
return ec;
// Relative path. Prepend the current directory.
@@ -785,12 +830,22 @@ std::error_code make_absolute(SmallVectorImpl<char> &path) {
"occurred above!");
}
-std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
+std::error_code make_absolute(const Twine &current_directory,
+ SmallVectorImpl<char> &path) {
+ return make_absolute(current_directory, path, true);
+}
+
+std::error_code make_absolute(SmallVectorImpl<char> &path) {
+ return make_absolute(Twine(), path, false);
+}
+
+std::error_code create_directories(const Twine &Path, bool IgnoreExisting,
+ perms Perms) {
SmallString<128> PathStorage;
StringRef P = Path.toStringRef(PathStorage);
// Be optimistic and try to create the directory
- std::error_code EC = create_directory(P, IgnoreExisting);
+ std::error_code EC = create_directory(P, IgnoreExisting, Perms);
// If we succeeded, or had any error other than the parent not existing, just
// return it.
if (EC != errc::no_such_file_or_directory)
@@ -802,10 +857,10 @@ std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
if (Parent.empty())
return EC;
- if ((EC = create_directories(Parent)))
+ if ((EC = create_directories(Parent, IgnoreExisting, Perms)))
return EC;
- return create_directory(P, IgnoreExisting);
+ return create_directory(P, IgnoreExisting, Perms);
}
std::error_code copy_file(const Twine &From, const Twine &To) {
@@ -889,8 +944,7 @@ std::error_code is_other(const Twine &Path, bool &Result) {
}
void directory_entry::replace_filename(const Twine &filename, file_status st) {
- SmallString<128> path(Path.begin(), Path.end());
- path::remove_filename(path);
+ SmallString<128> path = path::parent_path(Path);
path::append(path, filename);
Path = path.str();
Status = st;
@@ -940,7 +994,8 @@ file_magic identify_magic(StringRef Magic) {
break;
case '!':
if (Magic.size() >= 8)
- if (memcmp(Magic.data(),"!<arch>\n",8) == 0)
+ if (memcmp(Magic.data(), "!<arch>\n", 8) == 0 ||
+ memcmp(Magic.data(), "!<thin>\n", 8) == 0)
return file_magic::archive;
break;
@@ -1074,3 +1129,20 @@ std::error_code directory_entry::status(file_status &result) const {
#if defined(LLVM_ON_WIN32)
#include "Windows/Path.inc"
#endif
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
+ const Twine &Path2, const Twine &Path3) {
+ if (getUserCacheDir(Result)) {
+ append(Result, Path1, Path2, Path3);
+ return true;
+ }
+ return false;
+}
+
+} // end namespace path
+} // end namsspace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
index f9f8cab..05b3e31 100644
--- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm-c/Core.h"
+#include "llvm-c/ErrorHandling.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h" // Get autoconf configuration settings
#include "llvm/Support/Compiler.h"
@@ -154,6 +154,20 @@ void llvm::EnablePrettyStackTrace() {
#endif
}
+const void* llvm::SavePrettyStackState() {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+ return PrettyStackTraceHead;
+#else
+ return nullptr;
+#endif
+}
+
+void llvm::RestorePrettyStackState(const void* Top) {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+ PrettyStackTraceHead = (const PrettyStackTraceEntry*)Top;
+#endif
+}
+
void LLVMEnablePrettyStackTrace() {
EnablePrettyStackTrace();
}
diff --git a/contrib/llvm/lib/Support/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
index a117893..3dc6b7c 100644
--- a/contrib/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -12,8 +12,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Signals.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
namespace llvm {
using namespace sys;
@@ -23,6 +36,131 @@ using namespace sys;
//=== independent code.
//===----------------------------------------------------------------------===//
+static ManagedStatic<std::vector<std::pair<void (*)(void *), void *>>>
+ CallBacksToRun;
+void sys::RunSignalHandlers() {
+ if (!CallBacksToRun.isConstructed())
+ return;
+ for (auto &I : *CallBacksToRun)
+ I.first(I.second);
+ CallBacksToRun->clear();
+}
+}
+
+using namespace llvm;
+
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+ const char **Modules, intptr_t *Offsets,
+ const char *MainExecutableName,
+ StringSaver &StrPool);
+
+/// Format a pointer value as hexadecimal. Zero pad it out so its always the
+/// same width.
+static FormattedNumber format_ptr(void *PC) {
+ // Each byte is two hex digits plus 2 for the 0x prefix.
+ unsigned PtrWidth = 2 + 2 * sizeof(void *);
+ return format_hex((uint64_t)PC, PtrWidth);
+}
+
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
+ llvm::raw_ostream &OS)
+ LLVM_ATTRIBUTE_USED;
+
+/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
+ llvm::raw_ostream &OS) {
+ // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
+ // into actual instruction addresses.
+ // Use llvm-symbolizer tool to symbolize the stack traces.
+ ErrorOr<std::string> LLVMSymbolizerPathOrErr =
+ sys::findProgramByName("llvm-symbolizer");
+ if (!LLVMSymbolizerPathOrErr)
+ return false;
+ const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+ // We don't know argv0 or the address of main() at this point, but try
+ // to guess it anyway (it's possible on some platforms).
+ std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
+ if (MainExecutableName.empty() ||
+ MainExecutableName.find("llvm-symbolizer") != std::string::npos)
+ return false;
+
+ BumpPtrAllocator Allocator;
+ StringSaver StrPool(Allocator);
+ std::vector<const char *> Modules(Depth, nullptr);
+ std::vector<intptr_t> Offsets(Depth, 0);
+ if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
+ MainExecutableName.c_str(), StrPool))
+ return false;
+ int InputFD;
+ SmallString<32> InputFile, OutputFile;
+ sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
+ sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
+ FileRemover InputRemover(InputFile.c_str());
+ FileRemover OutputRemover(OutputFile.c_str());
+
+ {
+ raw_fd_ostream Input(InputFD, true);
+ for (int i = 0; i < Depth; i++) {
+ if (Modules[i])
+ Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
+ }
+ }
+
+ StringRef InputFileStr(InputFile);
+ StringRef OutputFileStr(OutputFile);
+ StringRef StderrFileStr;
+ const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr,
+ &StderrFileStr};
+ const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
+#ifdef LLVM_ON_WIN32
+ // Pass --relative-address on Windows so that we don't
+ // have to add ImageBase from PE file.
+ // FIXME: Make this the default for llvm-symbolizer.
+ "--relative-address",
+#endif
+ "--demangle", nullptr};
+ int RunResult =
+ sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects);
+ if (RunResult != 0)
+ return false;
+
+ // This report format is based on the sanitizer stack trace printer. See
+ // sanitizer_stacktrace_printer.cc in compiler-rt.
+ auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
+ if (!OutputBuf)
+ return false;
+ StringRef Output = OutputBuf.get()->getBuffer();
+ SmallVector<StringRef, 32> Lines;
+ Output.split(Lines, "\n");
+ auto CurLine = Lines.begin();
+ int frame_no = 0;
+ for (int i = 0; i < Depth; i++) {
+ if (!Modules[i]) {
+ OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << '\n';
+ continue;
+ }
+ // Read pairs of lines (function name and file/line info) until we
+ // encounter empty line.
+ for (;;) {
+ if (CurLine == Lines.end())
+ return false;
+ StringRef FunctionName = *CurLine++;
+ if (FunctionName.empty())
+ break;
+ OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << ' ';
+ if (!FunctionName.startswith("??"))
+ OS << FunctionName << ' ';
+ if (CurLine == Lines.end())
+ return false;
+ StringRef FileLineInfo = *CurLine++;
+ if (!FileLineInfo.startswith("??"))
+ OS << FileLineInfo;
+ else
+ OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
+ OS << "\n";
+ }
+ }
+ return true;
}
// Include the platform-specific parts of this class.
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
index 56c3b0f..e49d1cb 100644
--- a/contrib/llvm/lib/Support/Statistic.cpp
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
@@ -33,9 +34,6 @@
#include <cstring>
using namespace llvm;
-// CreateInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
-
/// -stats - Command line option to cause transformations to emit stats about
/// what they did.
///
@@ -144,20 +142,18 @@ void llvm::PrintStatistics() {
if (Stats.Stats.empty()) return;
// Get the stream to write to.
- raw_ostream &OutStream = *CreateInfoOutputFile();
- PrintStatistics(OutStream);
- delete &OutStream; // Close the file.
+ std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
+ PrintStatistics(*OutStream);
+
#else
// Check if the -stats option is set instead of checking
// !Stats.Stats.empty(). In release builds, Statistics operators
// do nothing, so stats are never Registered.
if (Enabled) {
// Get the stream to write to.
- raw_ostream &OutStream = *CreateInfoOutputFile();
- OutStream << "Statistics are disabled. "
- << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
- OutStream.flush();
- delete &OutStream; // Close the file.
+ std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
+ (*OutStream) << "Statistics are disabled. "
+ << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
}
#endif
}
diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp
index ddece08..7ecff29 100644
--- a/contrib/llvm/lib/Support/StringRef.cpp
+++ b/contrib/llvm/lib/Support/StringRef.cpp
@@ -140,37 +140,44 @@ std::string StringRef::upper() const {
/// \return - The index of the first occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::find(StringRef Str, size_t From) const {
+ if (From > Length)
+ return npos;
+
+ const char *Needle = Str.data();
size_t N = Str.size();
- if (N > Length)
+ if (N == 0)
+ return From;
+
+ size_t Size = Length - From;
+ if (Size < N)
return npos;
+ const char *Start = Data + From;
+ const char *Stop = Start + (Size - N + 1);
+
// For short haystacks or unsupported needles fall back to the naive algorithm
- if (Length < 16 || N > 255 || N == 0) {
- for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
- if (substr(i, N).equals(Str))
- return i;
+ if (Size < 16 || N > 255) {
+ do {
+ if (std::memcmp(Start, Needle, N) == 0)
+ return Start - Data;
+ ++Start;
+ } while (Start < Stop);
return npos;
}
- if (From >= Length)
- return npos;
-
// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
uint8_t BadCharSkip[256];
std::memset(BadCharSkip, N, 256);
for (unsigned i = 0; i != N-1; ++i)
BadCharSkip[(uint8_t)Str[i]] = N-1-i;
- unsigned Len = Length-From, Pos = From;
- while (Len >= N) {
- if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
- return Pos;
+ do {
+ if (std::memcmp(Start, Needle, N) == 0)
+ return Start - Data;
// Otherwise skip the appropriate number of bytes.
- uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
- Len -= Skip;
- Pos += Skip;
- }
+ Start += BadCharSkip[(uint8_t)Start[N-1]];
+ } while (Start < Stop);
return npos;
}
@@ -274,24 +281,56 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
}
void StringRef::split(SmallVectorImpl<StringRef> &A,
- StringRef Separators, int MaxSplit,
+ StringRef Separator, int MaxSplit,
bool KeepEmpty) const {
- StringRef rest = *this;
-
- // rest.data() is used to distinguish cases like "a," that splits into
- // "a" + "" and "a" that splits into "a" + 0.
- for (int splits = 0;
- rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
- ++splits) {
- std::pair<StringRef, StringRef> p = rest.split(Separators);
-
- if (KeepEmpty || p.first.size() != 0)
- A.push_back(p.first);
- rest = p.second;
+ StringRef S = *this;
+
+ // Count down from MaxSplit. When MaxSplit is -1, this will just split
+ // "forever". This doesn't support splitting more than 2^31 times
+ // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+ // but that seems unlikely to be useful.
+ while (MaxSplit-- != 0) {
+ size_t Idx = S.find(Separator);
+ if (Idx == npos)
+ break;
+
+ // Push this split.
+ if (KeepEmpty || Idx > 0)
+ A.push_back(S.slice(0, Idx));
+
+ // Jump forward.
+ S = S.slice(Idx + Separator.size(), npos);
+ }
+
+ // Push the tail.
+ if (KeepEmpty || !S.empty())
+ A.push_back(S);
+}
+
+void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
+ int MaxSplit, bool KeepEmpty) const {
+ StringRef S = *this;
+
+ // Count down from MaxSplit. When MaxSplit is -1, this will just split
+ // "forever". This doesn't support splitting more than 2^31 times
+ // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+ // but that seems unlikely to be useful.
+ while (MaxSplit-- != 0) {
+ size_t Idx = S.find(Separator);
+ if (Idx == npos)
+ break;
+
+ // Push this split.
+ if (KeepEmpty || Idx > 0)
+ A.push_back(S.slice(0, Idx));
+
+ // Jump forward.
+ S = S.slice(Idx + 1, npos);
}
- // If we have a tail left, add it.
- if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
- A.push_back(rest);
+
+ // Push the tail.
+ if (KeepEmpty || !S.empty())
+ A.push_back(S);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Support/StringSaver.cpp b/contrib/llvm/lib/Support/StringSaver.cpp
index d6b84e5..bbc1fd2 100644
--- a/contrib/llvm/lib/Support/StringSaver.cpp
+++ b/contrib/llvm/lib/Support/StringSaver.cpp
@@ -11,7 +11,7 @@
using namespace llvm;
-const char *StringSaver::saveImpl(StringRef S) {
+const char *StringSaver::save(StringRef S) {
char *P = Alloc.Allocate<char>(S.size() + 1);
memcpy(P, S.data(), S.size());
P[S.size()] = '\0';
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
index 4d4c041..337532e 100644
--- a/contrib/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -16,9 +16,11 @@
#include "llvm/Support/TargetParser.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include <cctype>
using namespace llvm;
+using namespace ARM;
namespace {
@@ -26,36 +28,19 @@ namespace {
// features they correspond to (use getFPUFeatures).
// FIXME: TableGen this.
// The entries must appear in the order listed in ARM::FPUKind for correct indexing
-struct {
- const char * Name;
+static const struct {
+ const char *NameCStr;
+ size_t NameLength;
ARM::FPUKind ID;
ARM::FPUVersion FPUVersion;
ARM::NeonSupportLevel NeonSupport;
ARM::FPURestriction Restriction;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
} FPUNames[] = {
- { "invalid", ARM::FK_INVALID, ARM::FV_NONE, ARM::NS_None, ARM::FR_None},
- { "none", ARM::FK_NONE, ARM::FV_NONE, ARM::NS_None, ARM::FR_None},
- { "vfp", ARM::FK_VFP, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None},
- { "vfpv2", ARM::FK_VFPV2, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None},
- { "vfpv3", ARM::FK_VFPV3, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_None},
- { "vfpv3-fp16", ARM::FK_VFPV3_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_None},
- { "vfpv3-d16", ARM::FK_VFPV3_D16, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_D16},
- { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_D16},
- { "vfpv3xd", ARM::FK_VFPV3XD, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_SP_D16},
- { "vfpv3xd-fp16", ARM::FK_VFPV3XD_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_SP_D16},
- { "vfpv4", ARM::FK_VFPV4, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_None},
- { "vfpv4-d16", ARM::FK_VFPV4_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_D16},
- { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_SP_D16},
- { "fpv5-d16", ARM::FK_FPV5_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_D16},
- { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_SP_D16},
- { "fp-armv8", ARM::FK_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_None},
- { "neon", ARM::FK_NEON, ARM::FV_VFPV3, ARM::NS_Neon, ARM::FR_None},
- { "neon-fp16", ARM::FK_NEON_FP16, ARM::FV_VFPV3_FP16, ARM::NS_Neon, ARM::FR_None},
- { "neon-vfpv4", ARM::FK_NEON_VFPV4, ARM::FV_VFPV4, ARM::NS_Neon, ARM::FR_None},
- { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Neon, ARM::FR_None},
- { "crypto-neon-fp-armv8",
- ARM::FK_CRYPTO_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Crypto, ARM::FR_None},
- { "softvfp", ARM::FK_SOFTVFP, ARM::FV_NONE, ARM::NS_None, ARM::FR_None},
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) \
+ { NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION },
+#include "llvm/Support/ARMTargetParser.def"
};
// List of canonical arch names (use getArchSynonym).
@@ -66,165 +51,79 @@ struct {
// of the triples and are not conforming with their official names.
// Check to see if the expectation should be changed.
// FIXME: TableGen this.
-struct {
- const char *Name;
+static const struct {
+ const char *NameCStr;
+ size_t NameLength;
+ const char *CPUAttrCStr;
+ size_t CPUAttrLength;
+ const char *SubArchCStr;
+ size_t SubArchLength;
+ unsigned DefaultFPU;
+ unsigned ArchBaseExtensions;
ARM::ArchKind ID;
- const char *CPUAttr; // CPU class in build attributes.
- const char *SubArch; // Sub-Arch name.
ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+
+ // CPU class in build attributes.
+ StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); }
+
+ // Sub-Arch name.
+ StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
} ARCHNames[] = {
- { "invalid", ARM::AK_INVALID, nullptr, nullptr, ARMBuildAttrs::CPUArch::Pre_v4 },
- { "armv2", ARM::AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4 },
- { "armv2a", ARM::AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4 },
- { "armv3", ARM::AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4 },
- { "armv3m", ARM::AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4 },
- { "armv4", ARM::AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4 },
- { "armv4t", ARM::AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T },
- { "armv5t", ARM::AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T },
- { "armv5te", ARM::AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE },
- { "armv5tej", ARM::AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ },
- { "armv6", ARM::AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6 },
- { "armv6k", ARM::AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K },
- { "armv6t2", ARM::AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2 },
- { "armv6z", ARM::AK_ARMV6Z, "6Z", "v6z", ARMBuildAttrs::CPUArch::v6KZ },
- { "armv6zk", ARM::AK_ARMV6ZK, "6ZK", "v6zk", ARMBuildAttrs::CPUArch::v6KZ },
- { "armv6-m", ARM::AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M },
- { "armv6s-m", ARM::AK_ARMV6SM, "6S-M", "v6sm", ARMBuildAttrs::CPUArch::v6S_M },
- { "armv7-a", ARM::AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7 },
- { "armv7-r", ARM::AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7 },
- { "armv7-m", ARM::AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7 },
- { "armv7e-m", ARM::AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M },
- { "armv8-a", ARM::AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8 },
- { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8 },
- // Non-standard Arch names.
- { "iwmmxt", ARM::AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE },
- { "iwmmxt2", ARM::AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE },
- { "xscale", ARM::AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE },
- { "armv5", ARM::AK_ARMV5, "5T", "v5", ARMBuildAttrs::CPUArch::v5T },
- { "armv5e", ARM::AK_ARMV5E, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE },
- { "armv6j", ARM::AK_ARMV6J, "6J", "v6", ARMBuildAttrs::CPUArch::v6 },
- { "armv6hl", ARM::AK_ARMV6HL, "6-M", "v6hl", ARMBuildAttrs::CPUArch::v6_M },
- { "armv7", ARM::AK_ARMV7, "7", "v7", ARMBuildAttrs::CPUArch::v7 },
- { "armv7l", ARM::AK_ARMV7L, "7-L", "v7l", ARMBuildAttrs::CPUArch::v7 },
- { "armv7hl", ARM::AK_ARMV7HL, "7-L", "v7hl", ARMBuildAttrs::CPUArch::v7 },
- { "armv7s", ARM::AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7 }
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) \
+ {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH, \
+ sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ID, ARCH_ATTR},
+#include "llvm/Support/ARMTargetParser.def"
};
+
// List of Arch Extension names.
// FIXME: TableGen this.
-struct {
- const char *Name;
- ARM::ArchExtKind ID;
+static const struct {
+ const char *NameCStr;
+ size_t NameLength;
+ unsigned ID;
+ const char *Feature;
+ const char *NegFeature;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
} ARCHExtNames[] = {
- { "invalid", ARM::AEK_INVALID },
- { "crc", ARM::AEK_CRC },
- { "crypto", ARM::AEK_CRYPTO },
- { "fp", ARM::AEK_FP },
- { "idiv", ARM::AEK_HWDIV },
- { "mp", ARM::AEK_MP },
- { "simd", ARM::AEK_SIMD },
- { "sec", ARM::AEK_SEC },
- { "virt", ARM::AEK_VIRT },
- { "os", ARM::AEK_OS },
- { "iwmmxt", ARM::AEK_IWMMXT },
- { "iwmmxt2", ARM::AEK_IWMMXT2 },
- { "maverick", ARM::AEK_MAVERICK },
- { "xscale", ARM::AEK_XSCALE }
+#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+ { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE },
+#include "llvm/Support/ARMTargetParser.def"
};
+
+// List of HWDiv names (use getHWDivSynonym) and which architectural
+// features they correspond to (use getHWDivFeatures).
+// FIXME: TableGen this.
+static const struct {
+ const char *NameCStr;
+ size_t NameLength;
+ unsigned ID;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+} HWDivNames[] = {
+#define ARM_HW_DIV_NAME(NAME, ID) { NAME, sizeof(NAME) - 1, ID },
+#include "llvm/Support/ARMTargetParser.def"
+};
+
// List of CPU names and their arches.
// The same CPU can have multiple arches and can be default on multiple arches.
// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
// When this becomes table-generated, we'd probably need two tables.
// FIXME: TableGen this.
-struct {
- const char *Name;
+static const struct {
+ const char *NameCStr;
+ size_t NameLength;
ARM::ArchKind ArchID;
- bool Default;
+ bool Default; // is $Name the default CPU for $ArchID ?
+ unsigned DefaultExtensions;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
} CPUNames[] = {
- { "arm2", ARM::AK_ARMV2, true },
- { "arm3", ARM::AK_ARMV2A, true },
- { "arm6", ARM::AK_ARMV3, true },
- { "arm7m", ARM::AK_ARMV3M, true },
- { "arm8", ARM::AK_ARMV4, false },
- { "arm810", ARM::AK_ARMV4, false },
- { "strongarm", ARM::AK_ARMV4, true },
- { "strongarm110", ARM::AK_ARMV4, false },
- { "strongarm1100", ARM::AK_ARMV4, false },
- { "strongarm1110", ARM::AK_ARMV4, false },
- { "arm7tdmi", ARM::AK_ARMV4T, true },
- { "arm7tdmi-s", ARM::AK_ARMV4T, false },
- { "arm710t", ARM::AK_ARMV4T, false },
- { "arm720t", ARM::AK_ARMV4T, false },
- { "arm9", ARM::AK_ARMV4T, false },
- { "arm9tdmi", ARM::AK_ARMV4T, false },
- { "arm920", ARM::AK_ARMV4T, false },
- { "arm920t", ARM::AK_ARMV4T, false },
- { "arm922t", ARM::AK_ARMV4T, false },
- { "arm9312", ARM::AK_ARMV4T, false },
- { "arm940t", ARM::AK_ARMV4T, false },
- { "ep9312", ARM::AK_ARMV4T, false },
- { "arm10tdmi", ARM::AK_ARMV5T, true },
- { "arm1020t", ARM::AK_ARMV5T, false },
- { "arm9e", ARM::AK_ARMV5TE, false },
- { "arm946e-s", ARM::AK_ARMV5TE, false },
- { "arm966e-s", ARM::AK_ARMV5TE, false },
- { "arm968e-s", ARM::AK_ARMV5TE, false },
- { "arm10e", ARM::AK_ARMV5TE, false },
- { "arm1020e", ARM::AK_ARMV5TE, false },
- { "arm1022e", ARM::AK_ARMV5TE, true },
- { "iwmmxt", ARM::AK_ARMV5TE, false },
- { "xscale", ARM::AK_ARMV5TE, false },
- { "arm926ej-s", ARM::AK_ARMV5TEJ, true },
- { "arm1136jf-s", ARM::AK_ARMV6, true },
- { "arm1176j-s", ARM::AK_ARMV6K, false },
- { "arm1176jz-s", ARM::AK_ARMV6K, false },
- { "mpcore", ARM::AK_ARMV6K, false },
- { "mpcorenovfp", ARM::AK_ARMV6K, false },
- { "arm1176jzf-s", ARM::AK_ARMV6K, true },
- { "arm1176jzf-s", ARM::AK_ARMV6Z, true },
- { "arm1176jzf-s", ARM::AK_ARMV6ZK, true },
- { "arm1156t2-s", ARM::AK_ARMV6T2, true },
- { "arm1156t2f-s", ARM::AK_ARMV6T2, false },
- { "cortex-m0", ARM::AK_ARMV6M, true },
- { "cortex-m0plus", ARM::AK_ARMV6M, false },
- { "cortex-m1", ARM::AK_ARMV6M, false },
- { "sc000", ARM::AK_ARMV6M, false },
- { "cortex-a5", ARM::AK_ARMV7A, false },
- { "cortex-a7", ARM::AK_ARMV7A, false },
- { "cortex-a8", ARM::AK_ARMV7A, true },
- { "cortex-a9", ARM::AK_ARMV7A, false },
- { "cortex-a12", ARM::AK_ARMV7A, false },
- { "cortex-a15", ARM::AK_ARMV7A, false },
- { "cortex-a17", ARM::AK_ARMV7A, false },
- { "krait", ARM::AK_ARMV7A, false },
- { "cortex-r4", ARM::AK_ARMV7R, true },
- { "cortex-r4f", ARM::AK_ARMV7R, false },
- { "cortex-r5", ARM::AK_ARMV7R, false },
- { "cortex-r7", ARM::AK_ARMV7R, false },
- { "sc300", ARM::AK_ARMV7M, false },
- { "cortex-m3", ARM::AK_ARMV7M, true },
- { "cortex-m4", ARM::AK_ARMV7EM, true },
- { "cortex-m7", ARM::AK_ARMV7EM, false },
- { "cortex-a53", ARM::AK_ARMV8A, true },
- { "cortex-a57", ARM::AK_ARMV8A, false },
- { "cortex-a72", ARM::AK_ARMV8A, false },
- { "cyclone", ARM::AK_ARMV8A, false },
- { "generic", ARM::AK_ARMV8_1A, true },
- // Non-standard Arch names.
- { "iwmmxt", ARM::AK_IWMMXT, true },
- { "xscale", ARM::AK_XSCALE, true },
- { "arm10tdmi", ARM::AK_ARMV5, true },
- { "arm1022e", ARM::AK_ARMV5E, true },
- { "arm1136j-s", ARM::AK_ARMV6J, true },
- { "arm1136jz-s", ARM::AK_ARMV6J, false },
- { "cortex-m0", ARM::AK_ARMV6SM, true },
- { "arm1176jzf-s", ARM::AK_ARMV6HL, true },
- { "cortex-a8", ARM::AK_ARMV7, true },
- { "cortex-a8", ARM::AK_ARMV7L, true },
- { "cortex-a8", ARM::AK_ARMV7HL, true },
- { "cortex-m4", ARM::AK_ARMV7EM, true },
- { "swift", ARM::AK_ARMV7S, true },
- // Invalid CPU
- { "invalid", ARM::AK_INVALID, true }
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ { NAME, sizeof(NAME) - 1, ID, IS_DEFAULT, DEFAULT_EXT },
+#include "llvm/Support/ARMTargetParser.def"
};
} // namespace
@@ -233,33 +132,93 @@ struct {
// Information by ID
// ======================================================= //
-const char *ARMTargetParser::getFPUName(unsigned FPUKind) {
+StringRef llvm::ARM::getFPUName(unsigned FPUKind) {
if (FPUKind >= ARM::FK_LAST)
- return nullptr;
- return FPUNames[FPUKind].Name;
+ return StringRef();
+ return FPUNames[FPUKind].getName();
}
-unsigned ARMTargetParser::getFPUVersion(unsigned FPUKind) {
+unsigned llvm::ARM::getFPUVersion(unsigned FPUKind) {
if (FPUKind >= ARM::FK_LAST)
return 0;
return FPUNames[FPUKind].FPUVersion;
}
-unsigned ARMTargetParser::getFPUNeonSupportLevel(unsigned FPUKind) {
+unsigned llvm::ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
if (FPUKind >= ARM::FK_LAST)
return 0;
return FPUNames[FPUKind].NeonSupport;
}
-unsigned ARMTargetParser::getFPURestriction(unsigned FPUKind) {
+unsigned llvm::ARM::getFPURestriction(unsigned FPUKind) {
if (FPUKind >= ARM::FK_LAST)
return 0;
return FPUNames[FPUKind].Restriction;
}
-bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
+unsigned llvm::ARM::getDefaultFPU(StringRef CPU, unsigned ArchKind) {
+ if (CPU == "generic")
+ return ARCHNames[ArchKind].DefaultFPU;
+
+ return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, DEFAULT_FPU)
+#include "llvm/Support/ARMTargetParser.def"
+ .Default(ARM::FK_INVALID);
+}
+
+unsigned llvm::ARM::getDefaultExtensions(StringRef CPU, unsigned ArchKind) {
+ if (CPU == "generic")
+ return ARCHNames[ArchKind].ArchBaseExtensions;
+
+ return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+ .Case(NAME, ARCHNames[ID].ArchBaseExtensions | DEFAULT_EXT)
+#include "llvm/Support/ARMTargetParser.def"
+ .Default(ARM::AEK_INVALID);
+}
+
+bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
+ std::vector<const char *> &Features) {
+
+ if (HWDivKind == ARM::AEK_INVALID)
+ return false;
+
+ if (HWDivKind & ARM::AEK_HWDIVARM)
+ Features.push_back("+hwdiv-arm");
+ else
+ Features.push_back("-hwdiv-arm");
+
+ if (HWDivKind & ARM::AEK_HWDIV)
+ Features.push_back("+hwdiv");
+ else
+ Features.push_back("-hwdiv");
+
+ return true;
+}
+
+bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
std::vector<const char *> &Features) {
+ if (Extensions == ARM::AEK_INVALID)
+ return false;
+
+ if (Extensions & ARM::AEK_CRC)
+ Features.push_back("+crc");
+ else
+ Features.push_back("-crc");
+
+ if (Extensions & ARM::AEK_DSP)
+ Features.push_back("+dsp");
+ else
+ Features.push_back("-dsp");
+
+ return getHWDivFeatures(Extensions, Features);
+}
+
+bool llvm::ARM::getFPUFeatures(unsigned FPUKind,
+ std::vector<const char *> &Features) {
+
if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID)
return false;
@@ -323,6 +282,7 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
// crypto includes neon, so we handle this similarly to FPU version.
switch (FPUNames[FPUKind].NeonSupport) {
case ARM::NS_Crypto:
+ Features.push_back("+neon");
Features.push_back("+crypto");
break;
case ARM::NS_Neon:
@@ -338,88 +298,127 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
return true;
}
-const char *ARMTargetParser::getArchName(unsigned ArchKind) {
+StringRef llvm::ARM::getArchName(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
- return nullptr;
- return ARCHNames[ArchKind].Name;
+ return StringRef();
+ return ARCHNames[ArchKind].getName();
}
-const char *ARMTargetParser::getCPUAttr(unsigned ArchKind) {
+StringRef llvm::ARM::getCPUAttr(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
- return nullptr;
- return ARCHNames[ArchKind].CPUAttr;
+ return StringRef();
+ return ARCHNames[ArchKind].getCPUAttr();
}
-const char *ARMTargetParser::getSubArch(unsigned ArchKind) {
+StringRef llvm::ARM::getSubArch(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
- return nullptr;
- return ARCHNames[ArchKind].SubArch;
+ return StringRef();
+ return ARCHNames[ArchKind].getSubArch();
}
-unsigned ARMTargetParser::getArchAttr(unsigned ArchKind) {
+unsigned llvm::ARM::getArchAttr(unsigned ArchKind) {
if (ArchKind >= ARM::AK_LAST)
return ARMBuildAttrs::CPUArch::Pre_v4;
return ARCHNames[ArchKind].ArchAttr;
}
-const char *ARMTargetParser::getArchExtName(unsigned ArchExtKind) {
- if (ArchExtKind >= ARM::AEK_LAST)
- return nullptr;
- return ARCHExtNames[ArchExtKind].Name;
+StringRef llvm::ARM::getArchExtName(unsigned ArchExtKind) {
+ for (const auto AE : ARCHExtNames) {
+ if (ArchExtKind == AE.ID)
+ return AE.getName();
+ }
+ return StringRef();
}
-const char *ARMTargetParser::getDefaultCPU(StringRef Arch) {
+const char *llvm::ARM::getArchExtFeature(StringRef ArchExt) {
+ if (ArchExt.startswith("no")) {
+ StringRef ArchExtBase(ArchExt.substr(2));
+ for (const auto AE : ARCHExtNames) {
+ if (AE.NegFeature && ArchExtBase == AE.getName())
+ return AE.NegFeature;
+ }
+ }
+ for (const auto AE : ARCHExtNames) {
+ if (AE.Feature && ArchExt == AE.getName())
+ return AE.Feature;
+ }
+
+ return nullptr;
+}
+
+StringRef llvm::ARM::getHWDivName(unsigned HWDivKind) {
+ for (const auto D : HWDivNames) {
+ if (HWDivKind == D.ID)
+ return D.getName();
+ }
+ return StringRef();
+}
+
+StringRef llvm::ARM::getDefaultCPU(StringRef Arch) {
unsigned AK = parseArch(Arch);
if (AK == ARM::AK_INVALID)
- return nullptr;
+ return StringRef();
// Look for multiple AKs to find the default for pair AK+Name.
for (const auto CPU : CPUNames) {
if (CPU.ArchID == AK && CPU.Default)
- return CPU.Name;
+ return CPU.getName();
}
- return nullptr;
+
+ // If we can't find a default then target the architecture instead
+ return "generic";
}
// ======================================================= //
// Parsers
// ======================================================= //
-StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) {
+static StringRef getHWDivSynonym(StringRef HWDiv) {
+ return StringSwitch<StringRef>(HWDiv)
+ .Case("thumb,arm", "arm,thumb")
+ .Default(HWDiv);
+}
+
+static StringRef getFPUSynonym(StringRef FPU) {
return StringSwitch<StringRef>(FPU)
- .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
- .Case("vfp2", "vfpv2")
- .Case("vfp3", "vfpv3")
- .Case("vfp4", "vfpv4")
- .Case("vfp3-d16", "vfpv3-d16")
- .Case("vfp4-d16", "vfpv4-d16")
- .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
- .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
- .Case("fp5-sp-d16", "fpv5-sp-d16")
- .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
- // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
- .Case("neon-vfpv3", "neon")
- .Default(FPU);
+ .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+ .Case("vfp2", "vfpv2")
+ .Case("vfp3", "vfpv3")
+ .Case("vfp4", "vfpv4")
+ .Case("vfp3-d16", "vfpv3-d16")
+ .Case("vfp4-d16", "vfpv4-d16")
+ .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
+ .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+ .Case("fp5-sp-d16", "fpv5-sp-d16")
+ .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+ // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
+ .Case("neon-vfpv3", "neon")
+ .Default(FPU);
}
-StringRef ARMTargetParser::getArchSynonym(StringRef Arch) {
+static StringRef getArchSynonym(StringRef Arch) {
return StringSwitch<StringRef>(Arch)
- .Case("v6sm", "v6s-m")
- .Case("v6m", "v6-m")
- .Case("v7a", "v7-a")
- .Case("v7r", "v7-r")
- .Case("v7m", "v7-m")
- .Case("v7em", "v7e-m")
- .Cases("v8", "v8a", "aarch64", "arm64", "v8-a")
- .Case("v8.1a", "v8.1-a")
- .Default(Arch);
+ .Case("v5", "v5t")
+ .Case("v5e", "v5te")
+ .Case("v6j", "v6")
+ .Case("v6hl", "v6k")
+ .Cases("v6m", "v6sm", "v6s-m", "v6-m")
+ .Cases("v6z", "v6zk", "v6kz")
+ .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+ .Case("v7r", "v7-r")
+ .Case("v7m", "v7-m")
+ .Case("v7em", "v7e-m")
+ .Cases("v8", "v8a", "aarch64", "arm64", "v8-a")
+ .Case("v8.1a", "v8.1-a")
+ .Case("v8.2a", "v8.2-a")
+ .Default(Arch);
}
// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
// (iwmmxt|xscale)(eb)? is also permitted. If the former, return
// "v.+", if the latter, return unmodified string, minus 'eb'.
// If invalid, return empty string.
-StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
+StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) {
size_t offset = StringRef::npos;
StringRef A = Arch;
StringRef Error = "";
@@ -436,7 +435,7 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
// AArch64 uses "_be", not "eb" suffix.
if (A.find("eb") != StringRef::npos)
return Error;
- if (A.substr(offset,3) == "_be")
+ if (A.substr(offset, 3) == "_be")
offset += 3;
}
@@ -456,7 +455,7 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
// Only match non-marketing names
if (offset != StringRef::npos) {
- // Must start with 'vN'.
+ // Must start with 'vN'.
if (A[0] != 'v' || !std::isdigit(A[1]))
return Error;
// Can't have an extra 'eb'.
@@ -468,56 +467,64 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
return A;
}
-unsigned ARMTargetParser::parseFPU(StringRef FPU) {
+unsigned llvm::ARM::parseHWDiv(StringRef HWDiv) {
+ StringRef Syn = getHWDivSynonym(HWDiv);
+ for (const auto D : HWDivNames) {
+ if (Syn == D.getName())
+ return D.ID;
+ }
+ return ARM::AEK_INVALID;
+}
+
+unsigned llvm::ARM::parseFPU(StringRef FPU) {
StringRef Syn = getFPUSynonym(FPU);
for (const auto F : FPUNames) {
- if (Syn == F.Name)
+ if (Syn == F.getName())
return F.ID;
}
return ARM::FK_INVALID;
}
// Allows partial match, ex. "v7a" matches "armv7a".
-unsigned ARMTargetParser::parseArch(StringRef Arch) {
+unsigned llvm::ARM::parseArch(StringRef Arch) {
Arch = getCanonicalArchName(Arch);
StringRef Syn = getArchSynonym(Arch);
for (const auto A : ARCHNames) {
- if (StringRef(A.Name).endswith(Syn))
+ if (A.getName().endswith(Syn))
return A.ID;
}
return ARM::AK_INVALID;
}
-unsigned ARMTargetParser::parseArchExt(StringRef ArchExt) {
+unsigned llvm::ARM::parseArchExt(StringRef ArchExt) {
for (const auto A : ARCHExtNames) {
- if (ArchExt == A.Name)
+ if (ArchExt == A.getName())
return A.ID;
}
return ARM::AEK_INVALID;
}
-unsigned ARMTargetParser::parseCPUArch(StringRef CPU) {
+unsigned llvm::ARM::parseCPUArch(StringRef CPU) {
for (const auto C : CPUNames) {
- if (CPU == C.Name)
+ if (CPU == C.getName())
return C.ArchID;
}
return ARM::AK_INVALID;
}
// ARM, Thumb, AArch64
-unsigned ARMTargetParser::parseArchISA(StringRef Arch) {
+unsigned llvm::ARM::parseArchISA(StringRef Arch) {
return StringSwitch<unsigned>(Arch)
.StartsWith("aarch64", ARM::IK_AARCH64)
- .StartsWith("arm64", ARM::IK_AARCH64)
- .StartsWith("thumb", ARM::IK_THUMB)
- .StartsWith("arm", ARM::IK_ARM)
+ .StartsWith("arm64", ARM::IK_AARCH64)
+ .StartsWith("thumb", ARM::IK_THUMB)
+ .StartsWith("arm", ARM::IK_ARM)
.Default(ARM::EK_INVALID);
}
// Little/Big endian
-unsigned ARMTargetParser::parseArchEndian(StringRef Arch) {
- if (Arch.startswith("armeb") ||
- Arch.startswith("thumbeb") ||
+unsigned llvm::ARM::parseArchEndian(StringRef Arch) {
+ if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
Arch.startswith("aarch64_be"))
return ARM::EK_BIG;
@@ -535,29 +542,29 @@ unsigned ARMTargetParser::parseArchEndian(StringRef Arch) {
}
// Profile A/R/M
-unsigned ARMTargetParser::parseArchProfile(StringRef Arch) {
+unsigned llvm::ARM::parseArchProfile(StringRef Arch) {
Arch = getCanonicalArchName(Arch);
- switch(parseArch(Arch)) {
+ switch (parseArch(Arch)) {
case ARM::AK_ARMV6M:
case ARM::AK_ARMV7M:
- case ARM::AK_ARMV6SM:
case ARM::AK_ARMV7EM:
return ARM::PK_M;
case ARM::AK_ARMV7R:
return ARM::PK_R;
- case ARM::AK_ARMV7:
case ARM::AK_ARMV7A:
+ case ARM::AK_ARMV7K:
case ARM::AK_ARMV8A:
case ARM::AK_ARMV8_1A:
+ case ARM::AK_ARMV8_2A:
return ARM::PK_A;
}
return ARM::PK_INVALID;
}
// Version number (ex. v7 = 7).
-unsigned ARMTargetParser::parseArchVersion(StringRef Arch) {
+unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
Arch = getCanonicalArchName(Arch);
- switch(parseArch(Arch)) {
+ switch (parseArch(Arch)) {
case ARM::AK_ARMV2:
case ARM::AK_ARMV2A:
return 2;
@@ -567,36 +574,29 @@ unsigned ARMTargetParser::parseArchVersion(StringRef Arch) {
case ARM::AK_ARMV4:
case ARM::AK_ARMV4T:
return 4;
- case ARM::AK_ARMV5:
case ARM::AK_ARMV5T:
case ARM::AK_ARMV5TE:
case ARM::AK_IWMMXT:
case ARM::AK_IWMMXT2:
case ARM::AK_XSCALE:
- case ARM::AK_ARMV5E:
case ARM::AK_ARMV5TEJ:
return 5;
case ARM::AK_ARMV6:
- case ARM::AK_ARMV6J:
case ARM::AK_ARMV6K:
case ARM::AK_ARMV6T2:
- case ARM::AK_ARMV6Z:
- case ARM::AK_ARMV6ZK:
+ case ARM::AK_ARMV6KZ:
case ARM::AK_ARMV6M:
- case ARM::AK_ARMV6SM:
- case ARM::AK_ARMV6HL:
return 6;
- case ARM::AK_ARMV7:
case ARM::AK_ARMV7A:
case ARM::AK_ARMV7R:
case ARM::AK_ARMV7M:
- case ARM::AK_ARMV7L:
- case ARM::AK_ARMV7HL:
case ARM::AK_ARMV7S:
case ARM::AK_ARMV7EM:
+ case ARM::AK_ARMV7K:
return 7;
case ARM::AK_ARMV8A:
case ARM::AK_ARMV8_1A:
+ case ARM::AK_ARMV8_2A:
return 8;
}
return 0;
diff --git a/contrib/llvm/lib/Support/ThreadPool.cpp b/contrib/llvm/lib/Support/ThreadPool.cpp
new file mode 100644
index 0000000..d4dcb2e
--- /dev/null
+++ b/contrib/llvm/lib/Support/ThreadPool.cpp
@@ -0,0 +1,155 @@
+//==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a crude C++11 based thread pool.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ThreadPool.h"
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#if LLVM_ENABLE_THREADS
+
+// Default to std::thread::hardware_concurrency
+ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {}
+
+ThreadPool::ThreadPool(unsigned ThreadCount)
+ : ActiveThreads(0), EnableFlag(true) {
+ // Create ThreadCount threads that will loop forever, wait on QueueCondition
+ // for tasks to be queued or the Pool to be destroyed.
+ Threads.reserve(ThreadCount);
+ for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) {
+ Threads.emplace_back([&] {
+ while (true) {
+ PackagedTaskTy Task;
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ // Wait for tasks to be pushed in the queue
+ QueueCondition.wait(LockGuard,
+ [&] { return !EnableFlag || !Tasks.empty(); });
+ // Exit condition
+ if (!EnableFlag && Tasks.empty())
+ return;
+ // Yeah, we have a task, grab it and release the lock on the queue
+
+ // We first need to signal that we are active before popping the queue
+ // in order for wait() to properly detect that even if the queue is
+ // empty, there is still a task in flight.
+ {
+ ++ActiveThreads;
+ std::unique_lock<std::mutex> LockGuard(CompletionLock);
+ }
+ Task = std::move(Tasks.front());
+ Tasks.pop();
+ }
+ // Run the task we just grabbed
+#ifndef _MSC_VER
+ Task();
+#else
+ Task(/* unused */ false);
+#endif
+
+ {
+ // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait()
+ std::unique_lock<std::mutex> LockGuard(CompletionLock);
+ --ActiveThreads;
+ }
+
+ // Notify task completion, in case someone waits on ThreadPool::wait()
+ CompletionCondition.notify_all();
+ }
+ });
+ }
+}
+
+void ThreadPool::wait() {
+ // Wait for all threads to complete and the queue to be empty
+ std::unique_lock<std::mutex> LockGuard(CompletionLock);
+ CompletionCondition.wait(LockGuard,
+ [&] { return Tasks.empty() && !ActiveThreads; });
+}
+
+std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) {
+ /// Wrap the Task in a packaged_task to return a future object.
+ PackagedTaskTy PackagedTask(std::move(Task));
+ auto Future = PackagedTask.get_future();
+ {
+ // Lock the queue and push the new task
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+ // Don't allow enqueueing after disabling the pool
+ assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
+
+ Tasks.push(std::move(PackagedTask));
+ }
+ QueueCondition.notify_one();
+ return Future.share();
+}
+
+// The destructor joins all threads, waiting for completion.
+ThreadPool::~ThreadPool() {
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ EnableFlag = false;
+ }
+ QueueCondition.notify_all();
+ for (auto &Worker : Threads)
+ Worker.join();
+}
+
+#else // LLVM_ENABLE_THREADS Disabled
+
+ThreadPool::ThreadPool() : ThreadPool(0) {}
+
+// No threads are launched, issue a warning if ThreadCount is not 0
+ThreadPool::ThreadPool(unsigned ThreadCount)
+ : ActiveThreads(0) {
+ if (ThreadCount) {
+ errs() << "Warning: request a ThreadPool with " << ThreadCount
+ << " threads, but LLVM_ENABLE_THREADS has been turned off\n";
+ }
+}
+
+void ThreadPool::wait() {
+ // Sequential implementation running the tasks
+ while (!Tasks.empty()) {
+ auto Task = std::move(Tasks.front());
+ Tasks.pop();
+#ifndef _MSC_VER
+ Task();
+#else
+ Task(/* unused */ false);
+#endif
+ }
+}
+
+std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) {
+#ifndef _MSC_VER
+ // Get a Future with launch::deferred execution using std::async
+ auto Future = std::async(std::launch::deferred, std::move(Task)).share();
+ // Wrap the future so that both ThreadPool::wait() can operate and the
+ // returned future can be sync'ed on.
+ PackagedTaskTy PackagedTask([Future]() { Future.get(); });
+#else
+ auto Future = std::async(std::launch::deferred, std::move(Task), false).share();
+ PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; });
+#endif
+ Tasks.push(std::move(PackagedTask));
+ return Future;
+}
+
+ThreadPool::~ThreadPool() {
+ wait();
+}
+
+#endif
diff --git a/contrib/llvm/lib/Support/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
index 136b93e..94a4c01 100644
--- a/contrib/llvm/lib/Support/TimeValue.cpp
+++ b/contrib/llvm/lib/Support/TimeValue.cpp
@@ -15,6 +15,7 @@
#include "llvm/Config/config.h"
namespace llvm {
+
using namespace sys;
const TimeValue::SecondsType
@@ -22,8 +23,7 @@ const TimeValue::SecondsType
const TimeValue::SecondsType
TimeValue::Win32ZeroTimeSeconds = -12591158400ULL;
-void
-TimeValue::normalize( void ) {
+void TimeValue::normalize() {
if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
do {
seconds_++;
@@ -45,7 +45,7 @@ TimeValue::normalize( void ) {
}
}
-}
+} // namespace llvm
/// Include the platform-specific portion of TimeValue class
#ifdef LLVM_ON_UNIX
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index d7b6515..414f559 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Timer.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
@@ -22,9 +23,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// CreateInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
-
// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
// of constructor/destructor ordering being unspecified by C++. Basically the
// problem is that a Statistic object gets destroyed, which ends up calling
@@ -52,28 +50,27 @@ namespace {
cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
}
-// CreateInfoOutputFile - Return a file stream to print our output on.
-raw_ostream *llvm::CreateInfoOutputFile() {
+// Return a file stream to print our output on.
+std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
const std::string &OutputFilename = getLibSupportInfoOutputFilename();
if (OutputFilename.empty())
- return new raw_fd_ostream(2, false); // stderr.
+ return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
if (OutputFilename == "-")
- return new raw_fd_ostream(1, false); // stdout.
-
+ return llvm::make_unique<raw_fd_ostream>(1, false); // stdout.
+
// Append mode is used because the info output file is opened and closed
// each time -stats or -time-passes wants to print output to it. To
// compensate for this, the test-suite Makefiles have code to delete the
// info output file before running commands which write to it.
std::error_code EC;
- raw_ostream *Result = new raw_fd_ostream(OutputFilename, EC,
- sys::fs::F_Append | sys::fs::F_Text);
+ auto Result = llvm::make_unique<raw_fd_ostream>(
+ OutputFilename, EC, sys::fs::F_Append | sys::fs::F_Text);
if (!EC)
return Result;
-
+
errs() << "Error opening info-output-file '"
<< OutputFilename << " for appending!\n";
- delete Result;
- return new raw_fd_ostream(2, false); // stderr.
+ return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
}
@@ -99,17 +96,13 @@ static TimerGroup *getDefaultTimerGroup() {
//===----------------------------------------------------------------------===//
void Timer::init(StringRef N) {
- assert(!TG && "Timer already initialized");
- Name.assign(N.begin(), N.end());
- Started = false;
- TG = getDefaultTimerGroup();
- TG->addTimer(*this);
+ init(N, *getDefaultTimerGroup());
}
void Timer::init(StringRef N, TimerGroup &tg) {
assert(!TG && "Timer already initialized");
Name.assign(N.begin(), N.end());
- Started = false;
+ Running = Triggered = false;
TG = &tg;
TG->addTimer(*this);
}
@@ -142,25 +135,22 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) {
return Result;
}
-static ManagedStatic<std::vector<Timer*> > ActiveTimers;
-
void Timer::startTimer() {
- Started = true;
- ActiveTimers->push_back(this);
- Time -= TimeRecord::getCurrentTime(true);
+ assert(!Running && "Cannot start a running timer");
+ Running = Triggered = true;
+ StartTime = TimeRecord::getCurrentTime(true);
}
void Timer::stopTimer() {
+ assert(Running && "Cannot stop a paused timer");
+ Running = false;
Time += TimeRecord::getCurrentTime(false);
+ Time -= StartTime;
+}
- if (ActiveTimers->back() == this) {
- ActiveTimers->pop_back();
- } else {
- std::vector<Timer*>::iterator I =
- std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
- assert(I != ActiveTimers->end() && "stop but no startTimer?");
- ActiveTimers->erase(I);
- }
+void Timer::clear() {
+ Running = Triggered = false;
+ Time = StartTime = TimeRecord();
}
static void printVal(double Val, double Total, raw_ostream &OS) {
@@ -278,8 +268,8 @@ void TimerGroup::removeTimer(Timer &T) {
sys::SmartScopedLock<true> L(*TimerLock);
// If the timer was started, move its data to TimersToPrint.
- if (T.Started)
- TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+ if (T.hasTriggered())
+ TimersToPrint.emplace_back(T.Time, T.Name);
T.TG = nullptr;
@@ -292,10 +282,9 @@ void TimerGroup::removeTimer(Timer &T) {
// them were started.
if (FirstTimer || TimersToPrint.empty())
return;
-
- raw_ostream *OutStream = CreateInfoOutputFile();
+
+ std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
PrintQueuedTimers(*OutStream);
- delete OutStream; // Close the file.
}
void TimerGroup::addTimer(Timer &T) {
@@ -314,8 +303,8 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
std::sort(TimersToPrint.begin(), TimersToPrint.end());
TimeRecord Total;
- for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
- Total += TimersToPrint[i].first;
+ for (auto &RecordNamePair : TimersToPrint)
+ Total += RecordNamePair.first;
// Print out timing header.
OS << "===" << std::string(73, '-') << "===\n";
@@ -365,12 +354,11 @@ void TimerGroup::print(raw_ostream &OS) {
// See if any of our timers were started, if so add them to TimersToPrint and
// reset them.
for (Timer *T = FirstTimer; T; T = T->Next) {
- if (!T->Started) continue;
- TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+ if (!T->hasTriggered()) continue;
+ TimersToPrint.emplace_back(T->Time, T->Name);
// Clear out the time.
- T->Started = 0;
- T->Time = TimeRecord();
+ T->clear();
}
// If any timers were started, print the group.
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index c6646fb..3bb1116 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -25,6 +25,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case aarch64_be: return "aarch64_be";
case arm: return "arm";
case armeb: return "armeb";
+ case avr: return "avr";
case bpfel: return "bpfel";
case bpfeb: return "bpfeb";
case hexagon: return "hexagon";
@@ -80,6 +81,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case thumb:
case thumbeb: return "arm";
+ case avr: return "avr";
+
case ppc64:
case ppc64le:
case ppc: return "ppc";
@@ -124,8 +127,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case spir64: return "spir";
case kalimba: return "kalimba";
case shave: return "shave";
- case wasm32: return "wasm32";
- case wasm64: return "wasm64";
+ case wasm32:
+ case wasm64: return "wasm";
}
}
@@ -144,6 +147,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
case MipsTechnologies: return "mti";
case NVIDIA: return "nvidia";
case CSR: return "csr";
+ case Myriad: return "myriad";
}
llvm_unreachable("Invalid VendorType!");
@@ -177,6 +181,9 @@ const char *Triple::getOSTypeName(OSType Kind) {
case NVCL: return "nvcl";
case AMDHSA: return "amdhsa";
case PS4: return "ps4";
+ case ELFIAMCU: return "elfiamcu";
+ case TvOS: return "tvos";
+ case WatchOS: return "watchos";
}
llvm_unreachable("Invalid OSType");
@@ -196,6 +203,8 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case MSVC: return "msvc";
case Itanium: return "itanium";
case Cygnus: return "cygnus";
+ case AMDOpenCL: return "amdopencl";
+ case CoreCLR: return "coreclr";
}
llvm_unreachable("Invalid EnvironmentType!");
@@ -224,6 +233,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
.Case("arm", arm)
.Case("armeb", armeb)
+ .Case("avr", avr)
.StartsWith("bpf", BPFArch)
.Case("mips", mips)
.Case("mipsel", mipsel)
@@ -265,8 +275,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
}
static Triple::ArchType parseARMArch(StringRef ArchName) {
- unsigned ISA = ARMTargetParser::parseArchISA(ArchName);
- unsigned ENDIAN = ARMTargetParser::parseArchEndian(ArchName);
+ unsigned ISA = ARM::parseArchISA(ArchName);
+ unsigned ENDIAN = ARM::parseArchEndian(ArchName);
Triple::ArchType arch = Triple::UnknownArch;
switch (ENDIAN) {
@@ -300,7 +310,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
}
}
- ArchName = ARMTargetParser::getCanonicalArchName(ArchName);
+ ArchName = ARM::getCanonicalArchName(ArchName);
if (ArchName.empty())
return Triple::UnknownArch;
@@ -310,8 +320,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
return Triple::UnknownArch;
// Thumb only for v6m
- unsigned Profile = ARMTargetParser::parseArchProfile(ArchName);
- unsigned Version = ARMTargetParser::parseArchVersion(ArchName);
+ unsigned Profile = ARM::parseArchProfile(ArchName);
+ unsigned Version = ARM::parseArchVersion(ArchName);
if (Profile == ARM::PK_M && Version == 6) {
if (ENDIAN == ARM::EK_BIG)
return Triple::thumbeb;
@@ -323,10 +333,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
}
static Triple::ArchType parseArch(StringRef ArchName) {
- Triple::ArchType ARMArch(parseARMArch(ArchName));
- Triple::ArchType BPFArch(parseBPFArch(ArchName));
-
- return StringSwitch<Triple::ArchType>(ArchName)
+ auto AT = StringSwitch<Triple::ArchType>(ArchName)
.Cases("i386", "i486", "i586", "i686", Triple::x86)
// FIXME: Do we need to support these?
.Cases("i786", "i886", "i986", Triple::x86)
@@ -336,9 +343,14 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("powerpc64le", Triple::ppc64le)
.Case("xscale", Triple::arm)
.Case("xscaleeb", Triple::armeb)
- .StartsWith("arm", ARMArch)
- .StartsWith("thumb", ARMArch)
- .StartsWith("aarch64", ARMArch)
+ .Case("aarch64", Triple::aarch64)
+ .Case("aarch64_be", Triple::aarch64_be)
+ .Case("arm64", Triple::aarch64)
+ .Case("arm", Triple::arm)
+ .Case("armeb", Triple::armeb)
+ .Case("thumb", Triple::thumb)
+ .Case("thumbeb", Triple::thumbeb)
+ .Case("avr", Triple::avr)
.Case("msp430", Triple::msp430)
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@@ -346,7 +358,6 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("mips64el", Triple::mips64el)
.Case("r600", Triple::r600)
.Case("amdgcn", Triple::amdgcn)
- .StartsWith("bpf", BPFArch)
.Case("hexagon", Triple::hexagon)
.Case("s390x", Triple::systemz)
.Case("sparc", Triple::sparc)
@@ -369,6 +380,18 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("wasm32", Triple::wasm32)
.Case("wasm64", Triple::wasm64)
.Default(Triple::UnknownArch);
+
+ // Some architectures require special parsing logic just to compute the
+ // ArchType result.
+ if (AT == Triple::UnknownArch) {
+ if (ArchName.startswith("arm") || ArchName.startswith("thumb") ||
+ ArchName.startswith("aarch64"))
+ return parseARMArch(ArchName);
+ if (ArchName.startswith("bpf"))
+ return parseBPFArch(ArchName);
+ }
+
+ return AT;
}
static Triple::VendorType parseVendor(StringRef VendorName) {
@@ -384,6 +407,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("mti", Triple::MipsTechnologies)
.Case("nvidia", Triple::NVIDIA)
.Case("csr", Triple::CSR)
+ .Case("myriad", Triple::Myriad)
.Default(Triple::UnknownVendor);
}
@@ -414,6 +438,9 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("nvcl", Triple::NVCL)
.StartsWith("amdhsa", Triple::AMDHSA)
.StartsWith("ps4", Triple::PS4)
+ .StartsWith("elfiamcu", Triple::ELFIAMCU)
+ .StartsWith("tvos", Triple::TvOS)
+ .StartsWith("watchos", Triple::WatchOS)
.Default(Triple::UnknownOS);
}
@@ -430,6 +457,8 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("msvc", Triple::MSVC)
.StartsWith("itanium", Triple::Itanium)
.StartsWith("cygnus", Triple::Cygnus)
+ .StartsWith("amdopencl", Triple::AMDOpenCL)
+ .StartsWith("coreclr", Triple::CoreCLR)
.Default(Triple::UnknownEnvironment);
}
@@ -442,7 +471,7 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
}
static Triple::SubArchType parseSubArch(StringRef SubArchName) {
- StringRef ARMSubArch = ARMTargetParser::getCanonicalArchName(SubArchName);
+ StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
// For now, this is the small part. Early return.
if (ARMSubArch.empty())
@@ -453,14 +482,12 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
.Default(Triple::NoSubArch);
// ARM sub arch.
- switch(ARMTargetParser::parseArch(ARMSubArch)) {
+ switch(ARM::parseArch(ARMSubArch)) {
case ARM::AK_ARMV4:
return Triple::NoSubArch;
case ARM::AK_ARMV4T:
return Triple::ARMSubArch_v4t;
- case ARM::AK_ARMV5:
case ARM::AK_ARMV5T:
- case ARM::AK_ARMV5E:
return Triple::ARMSubArch_v5;
case ARM::AK_ARMV5TE:
case ARM::AK_IWMMXT:
@@ -469,24 +496,19 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
case ARM::AK_ARMV5TEJ:
return Triple::ARMSubArch_v5te;
case ARM::AK_ARMV6:
- case ARM::AK_ARMV6J:
- case ARM::AK_ARMV6Z:
return Triple::ARMSubArch_v6;
case ARM::AK_ARMV6K:
- case ARM::AK_ARMV6ZK:
- case ARM::AK_ARMV6HL:
+ case ARM::AK_ARMV6KZ:
return Triple::ARMSubArch_v6k;
case ARM::AK_ARMV6T2:
return Triple::ARMSubArch_v6t2;
case ARM::AK_ARMV6M:
- case ARM::AK_ARMV6SM:
return Triple::ARMSubArch_v6m;
- case ARM::AK_ARMV7:
case ARM::AK_ARMV7A:
case ARM::AK_ARMV7R:
- case ARM::AK_ARMV7L:
- case ARM::AK_ARMV7HL:
return Triple::ARMSubArch_v7;
+ case ARM::AK_ARMV7K:
+ return Triple::ARMSubArch_v7k;
case ARM::AK_ARMV7M:
return Triple::ARMSubArch_v7m;
case ARM::AK_ARMV7S:
@@ -497,6 +519,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8;
case ARM::AK_ARMV8_1A:
return Triple::ARMSubArch_v8_1a;
+ case ARM::AK_ARMV8_2A:
+ return Triple::ARMSubArch_v8_2a;
default:
return Triple::NoSubArch;
}
@@ -514,20 +538,53 @@ static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
switch (T.getArch()) {
- default:
- break;
+ case Triple::UnknownArch:
+ case Triple::aarch64:
+ case Triple::arm:
+ case Triple::thumb:
+ case Triple::x86:
+ case Triple::x86_64:
+ if (T.isOSDarwin())
+ return Triple::MachO;
+ else if (T.isOSWindows())
+ return Triple::COFF;
+ return Triple::ELF;
+
+ case Triple::aarch64_be:
+ case Triple::amdgcn:
+ case Triple::amdil:
+ case Triple::amdil64:
+ case Triple::armeb:
+ case Triple::avr:
+ case Triple::bpfeb:
+ case Triple::bpfel:
case Triple::hexagon:
+ case Triple::hsail:
+ case Triple::hsail64:
+ case Triple::kalimba:
+ case Triple::le32:
+ case Triple::le64:
case Triple::mips:
- case Triple::mipsel:
case Triple::mips64:
case Triple::mips64el:
+ case Triple::mipsel:
+ case Triple::msp430:
+ case Triple::nvptx:
+ case Triple::nvptx64:
+ case Triple::ppc64le:
case Triple::r600:
- case Triple::amdgcn:
+ case Triple::shave:
case Triple::sparc:
+ case Triple::sparcel:
case Triple::sparcv9:
+ case Triple::spir:
+ case Triple::spir64:
case Triple::systemz:
+ case Triple::tce:
+ case Triple::thumbeb:
+ case Triple::wasm32:
+ case Triple::wasm64:
case Triple::xcore:
- case Triple::ppc64le:
return Triple::ELF;
case Triple::ppc:
@@ -536,12 +593,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
return Triple::MachO;
return Triple::ELF;
}
-
- if (T.isOSDarwin())
- return Triple::MachO;
- else if (T.isOSWindows())
- return Triple::COFF;
- return Triple::ELF;
+ llvm_unreachable("unknown architecture");
}
/// \brief Construct a triple from the string representation provided.
@@ -549,14 +601,27 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
/// This stores the string representation and parses the various pieces into
/// enum members.
Triple::Triple(const Twine &Str)
- : Data(Str.str()),
- Arch(parseArch(getArchName())),
- SubArch(parseSubArch(getArchName())),
- Vendor(parseVendor(getVendorName())),
- OS(parseOS(getOSName())),
- Environment(parseEnvironment(getEnvironmentName())),
- ObjectFormat(parseFormat(getEnvironmentName())) {
- if (ObjectFormat == Triple::UnknownObjectFormat)
+ : Data(Str.str()), Arch(UnknownArch), SubArch(NoSubArch),
+ Vendor(UnknownVendor), OS(UnknownOS), Environment(UnknownEnvironment),
+ ObjectFormat(UnknownObjectFormat) {
+ // Do minimal parsing by hand here.
+ SmallVector<StringRef, 4> Components;
+ StringRef(Data).split(Components, '-', /*MaxSplit*/ 3);
+ if (Components.size() > 0) {
+ Arch = parseArch(Components[0]);
+ SubArch = parseSubArch(Components[0]);
+ if (Components.size() > 1) {
+ Vendor = parseVendor(Components[1]);
+ if (Components.size() > 2) {
+ OS = parseOS(Components[2]);
+ if (Components.size() > 3) {
+ Environment = parseEnvironment(Components[3]);
+ ObjectFormat = parseFormat(Components[3]);
+ }
+ }
+ }
+ }
+ if (ObjectFormat == UnknownObjectFormat)
ObjectFormat = getDefaultFormat(*this);
}
@@ -601,7 +666,7 @@ std::string Triple::normalize(StringRef Str) {
// Parse into components.
SmallVector<StringRef, 4> Components;
- Str.split(Components, "-");
+ Str.split(Components, '-');
// If the first component corresponds to a known architecture, preferentially
// use it for the architecture. If the second component corresponds to a
@@ -889,6 +954,8 @@ bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
return false;
break;
case IOS:
+ case TvOS:
+ case WatchOS:
// Ignore the version from the triple. This is only handled because the
// the clang driver combines OS X and IOS support into a common Darwin
// toolchain that wants to know the OS X version number even when targeting
@@ -916,11 +983,38 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
Micro = 0;
break;
case IOS:
+ case TvOS:
getOSVersion(Major, Minor, Micro);
// Default to 5.0 (or 7.0 for arm64).
if (Major == 0)
Major = (getArch() == aarch64) ? 7 : 5;
break;
+ case WatchOS:
+ llvm_unreachable("conflicting triple info");
+ }
+}
+
+void Triple::getWatchOSVersion(unsigned &Major, unsigned &Minor,
+ unsigned &Micro) const {
+ switch (getOS()) {
+ default: llvm_unreachable("unexpected OS for Darwin triple");
+ case Darwin:
+ case MacOSX:
+ // Ignore the version from the triple. This is only handled because the
+ // the clang driver combines OS X and IOS support into a common Darwin
+ // toolchain that wants to know the iOS version number even when targeting
+ // OS X.
+ Major = 2;
+ Minor = 0;
+ Micro = 0;
+ break;
+ case WatchOS:
+ getOSVersion(Major, Minor, Micro);
+ if (Major == 0)
+ Major = 2;
+ break;
+ case IOS:
+ llvm_unreachable("conflicting triple info");
}
}
@@ -993,6 +1087,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::UnknownArch:
return 0;
+ case llvm::Triple::avr:
case llvm::Triple::msp430:
return 16;
@@ -1062,6 +1157,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::aarch64:
case Triple::aarch64_be:
case Triple::amdgcn:
+ case Triple::avr:
case Triple::bpfel:
case Triple::bpfeb:
case Triple::msp430:
@@ -1116,6 +1212,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::UnknownArch:
case Triple::arm:
case Triple::armeb:
+ case Triple::avr:
case Triple::hexagon:
case Triple::kalimba:
case Triple::msp430:
@@ -1172,6 +1269,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::amdgcn:
case Triple::amdil64:
case Triple::amdil:
+ case Triple::avr:
case Triple::hexagon:
case Triple::hsail64:
case Triple::hsail:
@@ -1244,6 +1342,7 @@ Triple Triple::getLittleEndianArchVariant() const {
case Triple::amdil64:
case Triple::amdil:
case Triple::arm:
+ case Triple::avr:
case Triple::bpfel:
case Triple::hexagon:
case Triple::hsail64:
@@ -1281,10 +1380,10 @@ Triple Triple::getLittleEndianArchVariant() const {
return T;
}
-const char *Triple::getARMCPUForArch(StringRef MArch) const {
+StringRef Triple::getARMCPUForArch(StringRef MArch) const {
if (MArch.empty())
MArch = getArchName();
- MArch = ARMTargetParser::getCanonicalArchName(MArch);
+ MArch = ARM::getCanonicalArchName(MArch);
// Some defaults are forced.
switch (getOS()) {
@@ -1296,15 +1395,21 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
case llvm::Triple::Win32:
// FIXME: this is invalid for WindowsCE
return "cortex-a9";
+ case llvm::Triple::MacOSX:
+ case llvm::Triple::IOS:
+ case llvm::Triple::WatchOS:
+ if (MArch == "v7k")
+ return "cortex-a7";
+ break;
default:
break;
}
if (MArch.empty())
- return nullptr;
+ return StringRef();
- const char *CPU = ARMTargetParser::getDefaultCPU(MArch);
- if (CPU)
+ StringRef CPU = ARM::getDefaultCPU(MArch);
+ if (!CPU.empty())
return CPU;
// If no specific architecture version is requested, return the minimum CPU
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
index c421ee8..d703191 100644
--- a/contrib/llvm/lib/Support/Unix/Memory.inc
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -50,9 +50,8 @@ int getPosixProtectionFlags(unsigned Flags) {
return PROT_READ | PROT_WRITE;
case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC:
return PROT_READ | PROT_EXEC;
- case llvm::sys::Memory::MF_READ |
- llvm::sys::Memory::MF_WRITE |
- llvm::sys::Memory::MF_EXEC:
+ case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE |
+ llvm::sys::Memory::MF_EXEC:
return PROT_READ | PROT_WRITE | PROT_EXEC;
case llvm::sys::Memory::MF_EXEC:
#if defined(__FreeBSD__)
@@ -153,6 +152,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) {
std::error_code
Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
+ static const size_t PageSize = Process::getPageSize();
if (M.Address == nullptr || M.Size == 0)
return std::error_code();
@@ -161,7 +161,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
int Protect = getPosixProtectionFlags(Flags);
- int Result = ::mprotect(M.Address, M.Size, Protect);
+ int Result = ::mprotect((void*)((uintptr_t)M.Address & ~(PageSize-1)), PageSize*((M.Size+PageSize-1)/PageSize), Protect);
if (Result != 0)
return std::error_code(errno, std::generic_category());
@@ -181,7 +181,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
std::string *ErrMsg) {
if (NumBytes == 0) return MemoryBlock();
- size_t PageSize = Process::getPageSize();
+ static const size_t PageSize = Process::getPageSize();
size_t NumPages = (NumBytes+PageSize-1)/PageSize;
int fd = -1;
@@ -265,15 +265,12 @@ bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
}
bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
-#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
if (M.Address == 0 || M.Size == 0) return false;
Memory::InvalidateInstructionCache(M.Address, M.Size);
+#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
(vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
return KERN_SUCCESS == kr;
-#elif defined(__arm__) || defined(__aarch64__)
- Memory::InvalidateInstructionCache(M.Address, M.Size);
- return true;
#else
return true;
#endif
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
index 973d010..d85c37a 100644
--- a/contrib/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -75,12 +75,12 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
char fullpath[PATH_MAX];
snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin);
- if (realpath(fullpath, ret) == NULL)
- return (1);
+ if (!realpath(fullpath, ret))
+ return 1;
if (stat(fullpath, &sb) != 0)
- return (1);
+ return 1;
- return (0);
+ return 0;
}
static char *
@@ -91,34 +91,34 @@ getprogpath(char ret[PATH_MAX], const char *bin)
/* First approach: absolute path. */
if (bin[0] == '/') {
if (test_dir(ret, "/", bin) == 0)
- return (ret);
- return (NULL);
+ return ret;
+ return nullptr;
}
/* Second approach: relative path. */
- if (strchr(bin, '/') != NULL) {
+ if (strchr(bin, '/')) {
char cwd[PATH_MAX];
- if (getcwd(cwd, PATH_MAX) == NULL)
- return (NULL);
+ if (!getcwd(cwd, PATH_MAX))
+ return nullptr;
if (test_dir(ret, cwd, bin) == 0)
- return (ret);
- return (NULL);
+ return ret;
+ return nullptr;
}
/* Third approach: $PATH */
- if ((pv = getenv("PATH")) == NULL)
- return (NULL);
+ if ((pv = getenv("PATH")) == nullptr)
+ return nullptr;
s = pv = strdup(pv);
- if (pv == NULL)
- return (NULL);
- while ((t = strsep(&s, ":")) != NULL) {
+ if (!pv)
+ return nullptr;
+ while ((t = strsep(&s, ":")) != nullptr) {
if (test_dir(ret, t, bin) == 0) {
free(pv);
- return (ret);
+ return ret;
}
}
free(pv);
- return (NULL);
+ return nullptr;
}
#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
@@ -153,8 +153,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
return std::string(exe_path, len);
} else {
// Fall back to the classical detection.
- if (getprogpath(exe_path, argv0) != NULL)
- return exe_path;
+ if (getprogpath(exe_path, argv0))
+ return exe_path;
}
#elif defined(HAVE_DLFCN_H)
// Use dladdr to get executable path if available.
@@ -219,11 +219,12 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
return std::error_code();
}
-std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
+std::error_code create_directory(const Twine &path, bool IgnoreExisting,
+ perms Perms) {
SmallString<128> path_storage;
StringRef p = path.toNullTerminatedStringRef(path_storage);
- if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+ if (::mkdir(p.begin(), Perms) == -1) {
if (errno != EEXIST || !IgnoreExisting)
return std::error_code(errno, std::generic_category());
}
@@ -324,6 +325,10 @@ std::error_code access(const Twine &Path, AccessMode Mode) {
return std::error_code();
}
+bool can_execute(const Twine &Path) {
+ return !access(Path, AccessMode::Execute);
+}
+
bool equivalent(file_status A, file_status B) {
assert(status_known(A) && status_known(B));
return A.fs_st_dev == B.fs_st_dev &&
@@ -555,6 +560,54 @@ bool home_directory(SmallVectorImpl<char> &result) {
return false;
}
+static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) {
+ #if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
+ // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+ // macros defined in <unistd.h> on darwin >= 9
+ int ConfName = TempDir ? _CS_DARWIN_USER_TEMP_DIR
+ : _CS_DARWIN_USER_CACHE_DIR;
+ size_t ConfLen = confstr(ConfName, nullptr, 0);
+ if (ConfLen > 0) {
+ do {
+ Result.resize(ConfLen);
+ ConfLen = confstr(ConfName, Result.data(), Result.size());
+ } while (ConfLen > 0 && ConfLen != Result.size());
+
+ if (ConfLen > 0) {
+ assert(Result.back() == 0);
+ Result.pop_back();
+ return true;
+ }
+
+ Result.clear();
+ }
+ #endif
+ return false;
+}
+
+static bool getUserCacheDir(SmallVectorImpl<char> &Result) {
+ // First try using XDS_CACHE_HOME env variable,
+ // as specified in XDG Base Directory Specification at
+ // http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+ if (const char *XdsCacheDir = std::getenv("XDS_CACHE_HOME")) {
+ Result.clear();
+ Result.append(XdsCacheDir, XdsCacheDir + strlen(XdsCacheDir));
+ return true;
+ }
+
+ // Try Darwin configuration query
+ if (getDarwinConfDir(false, Result))
+ return true;
+
+ // Use "$HOME/.cache" if $HOME is available
+ if (home_directory(Result)) {
+ append(Result, ".cache");
+ return true;
+ }
+
+ return false;
+}
+
static const char *getEnvTempDir() {
// Check whether the temporary directory is specified by an environment
// variable.
@@ -589,27 +642,8 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
}
}
-#if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
- // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
- // macros defined in <unistd.h> on darwin >= 9
- int ConfName = ErasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
- : _CS_DARWIN_USER_CACHE_DIR;
- size_t ConfLen = confstr(ConfName, nullptr, 0);
- if (ConfLen > 0) {
- do {
- Result.resize(ConfLen);
- ConfLen = confstr(ConfName, Result.data(), Result.size());
- } while (ConfLen > 0 && ConfLen != Result.size());
-
- if (ConfLen > 0) {
- assert(Result.back() == 0);
- Result.pop_back();
- return;
- }
-
- Result.clear();
- }
-#endif
+ if (getDarwinConfDir(ErasedOnReboot, Result))
+ return;
const char *RequestedDir = getDefaultTempDir(ErasedOnReboot);
Result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
index df13bd2..27083ee 100644
--- a/contrib/llvm/lib/Support/Unix/Process.inc
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -430,13 +430,18 @@ const char *Process::ResetColor() {
#if !defined(HAVE_DECL_ARC4RANDOM) || !HAVE_DECL_ARC4RANDOM
static unsigned GetRandomNumberSeed() {
// Attempt to get the initial seed from /dev/urandom, if possible.
- if (FILE *RandomSource = ::fopen("/dev/urandom", "r")) {
+ int urandomFD = open("/dev/urandom", O_RDONLY);
+
+ if (urandomFD != -1) {
unsigned seed;
- int count = ::fread((void *)&seed, sizeof(seed), 1, RandomSource);
- ::fclose(RandomSource);
+ // Don't use a buffered read to avoid reading more data
+ // from /dev/urandom than we need.
+ int count = read(urandomFD, (void *)&seed, sizeof(seed));
+
+ close(urandomFD);
// Return the seed if the read was successful.
- if (count == 1)
+ if (count == sizeof(seed))
return seed;
}
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index 8947b62..a8d1fe3 100644
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -323,7 +323,6 @@ namespace llvm {
ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
bool WaitUntilTerminates, std::string *ErrMsg) {
-#ifdef HAVE_SYS_WAIT_H
struct sigaction Act, Old;
assert(PI.Pid && "invalid pid to wait on, process not started?");
@@ -417,12 +416,6 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
// signal during execution as opposed to failing to execute.
WaitResult.ReturnCode = -2;
}
-#else
- if (ErrMsg)
- *ErrMsg = "Program::Wait is not implemented on this platform yet!";
- ProcessInfo WaitResult;
- WaitResult.ReturnCode = -2;
-#endif
return WaitResult;
}
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index bfe2a3a..061cdb3 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -17,7 +17,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Program.h"
@@ -25,7 +24,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
-#include <vector>
#if HAVE_EXECINFO_H
# include <execinfo.h> // For backtrace().
#endif
@@ -58,8 +56,6 @@ static ManagedStatic<SmartMutex<true> > SignalsMutex;
static void (*InterruptFunction)() = nullptr;
static ManagedStatic<std::vector<std::string>> FilesToRemove;
-static ManagedStatic<std::vector<std::pair<void (*)(void *), void *>>>
- CallBacksToRun;
// IntSigs - Signals that represent requested termination. There's no bug
// or failure, or if there is, it's not our direct responsibility. For whatever
@@ -90,12 +86,11 @@ static unsigned NumRegisteredSignals = 0;
static struct {
struct sigaction SA;
int SigNo;
-} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+} RegisteredSignalInfo[array_lengthof(IntSigs) + array_lengthof(KillSigs)];
static void RegisterHandler(int Signal) {
- assert(NumRegisteredSignals <
- sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+ assert(NumRegisteredSignals < array_lengthof(RegisteredSignalInfo) &&
"Out of space for signal handlers!");
struct sigaction NewHandler;
@@ -117,7 +112,7 @@ static void RegisterHandlers() {
// during handling an actual signal because you can't safely call new in a
// signal handler.
*SignalsMutex;
-
+
// If the handlers are already registered, we're done.
if (NumRegisteredSignals != 0) return;
@@ -148,9 +143,6 @@ static void RemoveFilesToRemove() {
// memory.
std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i) {
- // We rely on a std::string implementation for which repeated calls to
- // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these
- // strings to try to ensure this is safe.
const char *path = FilesToRemoveRef[i].c_str();
// Get the status so we can determine if it's a file or directory. If we
@@ -164,7 +156,7 @@ static void RemoveFilesToRemove() {
// super-user permissions.
if (!S_ISREG(buf.st_mode))
continue;
-
+
// Otherwise, remove the file. We ignore any errors here as there is nothing
// else we can do.
unlink(path);
@@ -205,11 +197,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
}
// Otherwise if it is a fault (like SEGV) run any handler.
- if (CallBacksToRun.isConstructed()) {
- auto &CallBacksToRunRef = *CallBacksToRun;
- for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
- CallBacksToRunRef[i].first(CallBacksToRunRef[i].second);
- }
+ llvm::sys::RunSignalHandlers();
#ifdef __s390__
// On S/390, certain signals are delivered with PSW Address pointing to
@@ -239,21 +227,7 @@ bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
{
sys::SmartScopedLock<true> Guard(*SignalsMutex);
- std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
- std::string *OldPtr =
- FilesToRemoveRef.empty() ? nullptr : &FilesToRemoveRef[0];
- FilesToRemoveRef.push_back(Filename);
-
- // We want to call 'c_str()' on every std::string in this vector so that if
- // the underlying implementation requires a re-allocation, it happens here
- // rather than inside of the signal handler. If we see the vector grow, we
- // have to call it on every entry. If it remains in place, we only need to
- // call it on the latest one.
- if (OldPtr == &FilesToRemoveRef[0])
- FilesToRemoveRef.back().c_str();
- else
- for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i)
- FilesToRemoveRef[i].c_str();
+ FilesToRemove->push_back(Filename);
}
RegisterHandlers();
@@ -268,13 +242,6 @@ void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) {
std::vector<std::string>::iterator I = FilesToRemove->end();
if (RI != FilesToRemove->rend())
I = FilesToRemove->erase(RI.base()-1);
-
- // We need to call c_str() on every element which would have been moved by
- // the erase. These elements, in a C++98 implementation where c_str()
- // requires a reallocation on the first call may have had the call to c_str()
- // made on insertion become invalid by being copied down an element.
- for (std::vector<std::string>::iterator E = FilesToRemove->end(); I != E; ++I)
- I->c_str();
}
/// AddSignalHandler - Add a function to be called when a signal is delivered
@@ -285,10 +252,9 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
RegisterHandlers();
}
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
-
-#if HAVE_LINK_H && (defined(__linux__) || defined(__FreeBSD__) || \
- defined(__FreeBSD_kernel__) || defined(__NetBSD__))
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && HAVE_LINK_H && \
+ (defined(__linux__) || defined(__FreeBSD__) || \
+ defined(__FreeBSD_kernel__) || defined(__NetBSD__))
struct DlIteratePhdrData {
void **StackTrace;
int depth;
@@ -321,108 +287,27 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
return 0;
}
+/// If this is an ELF platform, we can find all loaded modules and their virtual
+/// addresses with dl_iterate_phdr.
static bool findModulesAndOffsets(void **StackTrace, int Depth,
const char **Modules, intptr_t *Offsets,
- const char *MainExecutableName) {
+ const char *MainExecutableName,
+ StringSaver &StrPool) {
DlIteratePhdrData data = {StackTrace, Depth, true,
Modules, Offsets, MainExecutableName};
dl_iterate_phdr(dl_iterate_phdr_cb, &data);
return true;
}
#else
+/// This platform does not have dl_iterate_phdr, so we do not yet know how to
+/// find all loaded DSOs.
static bool findModulesAndOffsets(void **StackTrace, int Depth,
const char **Modules, intptr_t *Offsets,
- const char *MainExecutableName) {
+ const char *MainExecutableName,
+ StringSaver &StrPool) {
return false;
}
-#endif
-
-static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
- llvm::raw_ostream &OS) {
- // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
- // into actual instruction addresses.
- // Use llvm-symbolizer tool to symbolize the stack traces.
- ErrorOr<std::string> LLVMSymbolizerPathOrErr =
- sys::findProgramByName("llvm-symbolizer");
- if (!LLVMSymbolizerPathOrErr)
- return false;
- const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
- // We don't know argv0 or the address of main() at this point, but try
- // to guess it anyway (it's possible on some platforms).
- std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
- if (MainExecutableName.empty() ||
- MainExecutableName.find("llvm-symbolizer") != std::string::npos)
- return false;
-
- std::vector<const char *> Modules(Depth, nullptr);
- std::vector<intptr_t> Offsets(Depth, 0);
- if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
- MainExecutableName.c_str()))
- return false;
- int InputFD;
- SmallString<32> InputFile, OutputFile;
- sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
- sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
- FileRemover InputRemover(InputFile.c_str());
- FileRemover OutputRemover(OutputFile.c_str());
-
- {
- raw_fd_ostream Input(InputFD, true);
- for (int i = 0; i < Depth; i++) {
- if (Modules[i])
- Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
- }
- }
-
- StringRef InputFileStr(InputFile);
- StringRef OutputFileStr(OutputFile);
- StringRef StderrFileStr;
- const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr,
- &StderrFileStr};
- const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
- "--demangle", nullptr};
- int RunResult =
- sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects);
- if (RunResult != 0)
- return false;
-
- auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
- if (!OutputBuf)
- return false;
- StringRef Output = OutputBuf.get()->getBuffer();
- SmallVector<StringRef, 32> Lines;
- Output.split(Lines, "\n");
- auto CurLine = Lines.begin();
- int frame_no = 0;
- for (int i = 0; i < Depth; i++) {
- if (!Modules[i]) {
- OS << format("#%d %p\n", frame_no++, StackTrace[i]);
- continue;
- }
- // Read pairs of lines (function name and file/line info) until we
- // encounter empty line.
- for (;;) {
- if (CurLine == Lines.end())
- return false;
- StringRef FunctionName = *CurLine++;
- if (FunctionName.empty())
- break;
- OS << format("#%d %p ", frame_no++, StackTrace[i]);
- if (!FunctionName.startswith("??"))
- OS << format("%s ", FunctionName.str().c_str());
- if (CurLine == Lines.end())
- return false;
- StringRef FileLineInfo = *CurLine++;
- if (!FileLineInfo.startswith("??"))
- OS << format("%s", FileLineInfo.str().c_str());
- else
- OS << format("(%s+%p)", Modules[i], (void *)Offsets[i]);
- OS << "\n";
- }
- }
- return true;
-}
-#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && ...
// PrintStackTrace - In the case of a program crash or fault, print out a stack
// trace so that the user has an indication of why and where we died.
diff --git a/contrib/llvm/lib/Support/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h
index e16a226..871e612 100644
--- a/contrib/llvm/lib/Support/Unix/Unix.h
+++ b/contrib/llvm/lib/Support/Unix/Unix.h
@@ -29,6 +29,7 @@
#include <cstring>
#include <string>
#include <sys/types.h>
+#include <sys/wait.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -43,22 +44,10 @@
#endif
#include <time.h>
-#ifdef HAVE_SYS_WAIT_H
-# include <sys/wait.h>
-#endif
-
#ifdef HAVE_DLFCN_H
# include <dlfcn.h>
#endif
-#ifndef WEXITSTATUS
-# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
-#endif
-
-#ifndef WIFEXITED
-# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
-#endif
-
/// This function builds an error message into \p ErrMsg using the \p prefix
/// string and the Unix error number given by \p errnum. If errnum is -1, the
/// default then the value of errno is used.
diff --git a/contrib/llvm/lib/Support/Valgrind.cpp b/contrib/llvm/lib/Support/Valgrind.cpp
index facf8d9..8d852a6 100644
--- a/contrib/llvm/lib/Support/Valgrind.cpp
+++ b/contrib/llvm/lib/Support/Valgrind.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/Valgrind.h"
#include "llvm/Config/config.h"
+#include <cstddef>
#if HAVE_VALGRIND_VALGRIND_H
#include <valgrind/valgrind.h>
@@ -52,23 +53,3 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
}
#endif // !HAVE_VALGRIND_VALGRIND_H
-
-// These functions require no implementation, tsan just looks at the arguments
-// they're called with. However, they are required to be weak as some other
-// application or library may already be providing these definitions for the
-// same reason we are.
-extern "C" {
-LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line,
- const volatile void *cv);
-void AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {
-}
-LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line,
- const volatile void *cv);
-void AnnotateHappensBefore(const char *file, int line,
- const volatile void *cv) {}
-LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line);
-void AnnotateIgnoreWritesBegin(const char *file, int line) {}
-LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line);
-void AnnotateIgnoreWritesEnd(const char *file, int line) {}
-}
-
diff --git a/contrib/llvm/lib/Support/Windows/COM.inc b/contrib/llvm/lib/Support/Windows/COM.inc
index 0c50d6f..54f3ecf 100644
--- a/contrib/llvm/lib/Support/Windows/COM.inc
+++ b/contrib/llvm/lib/Support/Windows/COM.inc
@@ -1,4 +1,4 @@
-//===- llvm/Support/Windows/COM.inc - Windows COM Implementation *- C++ -*-===//
+//==- llvm/Support/Windows/COM.inc - Windows COM Implementation -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
index d38f197..17418b0 100644
--- a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
+++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
@@ -76,14 +76,14 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
SmallVector<wchar_t, MAX_PATH> filenameUnicode;
if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
SetLastError(ec.value());
- MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: ");
+ MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16");
return DynamicLibrary();
}
HMODULE a_handle = LoadLibraryW(filenameUnicode.data());
if (a_handle == 0) {
- MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");
+ MakeErrMsg(errMsg, std::string(filename) + ": Can't open");
return DynamicLibrary();
}
diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc
index 4b2ff2e..7eab9ff 100644
--- a/contrib/llvm/lib/Support/Windows/Memory.inc
+++ b/contrib/llvm/lib/Support/Windows/Memory.inc
@@ -192,14 +192,14 @@ static DWORD getProtection(const void *addr) {
bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
if (!setRangeWritable(M.Address, M.Size)) {
- return MakeErrMsg(ErrMsg, "Cannot set memory to writeable: ");
+ return MakeErrMsg(ErrMsg, "Cannot set memory to writeable");
}
return true;
}
bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
if (!setRangeExecutable(M.Address, M.Size)) {
- return MakeErrMsg(ErrMsg, "Cannot set memory to executable: ");
+ return MakeErrMsg(ErrMsg, "Cannot set memory to executable");
}
return true;
}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index 72da7c5..4e48412 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -182,7 +182,8 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}
-std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
+std::error_code create_directory(const Twine &path, bool IgnoreExisting,
+ perms Perms) {
SmallVector<wchar_t, 128> path_utf16;
if (std::error_code ec = widenPath(path, path_utf16))
@@ -252,17 +253,34 @@ std::error_code rename(const Twine &from, const Twine &to) {
return ec;
std::error_code ec = std::error_code();
+
+ // Retry while we see ERROR_ACCESS_DENIED.
+ // System scanners (eg. indexer) might open the source file when it is written
+ // and closed.
+
for (int i = 0; i < 2000; i++) {
+ // Try ReplaceFile first, as it is able to associate a new data stream with
+ // the destination even if the destination file is currently open.
+ if (::ReplaceFileW(wide_to.begin(), wide_from.begin(), NULL, 0, NULL, NULL))
+ return std::error_code();
+
+ // We get ERROR_FILE_NOT_FOUND if the destination file is missing.
+ // MoveFileEx can handle this case.
+ DWORD ReplaceError = ::GetLastError();
+ ec = mapWindowsError(ReplaceError);
+ if (ReplaceError != ERROR_ACCESS_DENIED &&
+ ReplaceError != ERROR_FILE_NOT_FOUND &&
+ ReplaceError != ERROR_SHARING_VIOLATION)
+ break;
+
if (::MoveFileExW(wide_from.begin(), wide_to.begin(),
MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
return std::error_code();
- DWORD LastError = ::GetLastError();
- ec = mapWindowsError(LastError);
- if (LastError != ERROR_ACCESS_DENIED)
- break;
- // Retry MoveFile() at ACCESS_DENIED.
- // System scanners (eg. indexer) might open the source file when
- // It is written and closed.
+
+ DWORD MoveError = ::GetLastError();
+ ec = mapWindowsError(MoveError);
+ if (MoveError != ERROR_ACCESS_DENIED) break;
+
::Sleep(1);
}
@@ -301,6 +319,11 @@ std::error_code access(const Twine &Path, AccessMode Mode) {
return std::error_code();
}
+bool can_execute(const Twine &Path) {
+ return !access(Path, AccessMode::Execute) ||
+ !access(Path + ".exe", AccessMode::Execute);
+}
+
bool equivalent(file_status A, file_status B) {
assert(status_known(A) && status_known(B));
return A.FileIndexHigh == B.FileIndexHigh &&
@@ -325,10 +348,12 @@ std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
static bool isReservedName(StringRef path) {
// This list of reserved names comes from MSDN, at:
// http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx
- static const char *sReservedNames[] = { "nul", "con", "prn", "aux",
- "com1", "com2", "com3", "com4", "com5", "com6",
- "com7", "com8", "com9", "lpt1", "lpt2", "lpt3",
- "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" };
+ static const char *const sReservedNames[] = { "nul", "con", "prn", "aux",
+ "com1", "com2", "com3", "com4",
+ "com5", "com6", "com7", "com8",
+ "com9", "lpt1", "lpt2", "lpt3",
+ "lpt4", "lpt5", "lpt6", "lpt7",
+ "lpt8", "lpt9" };
// First, check to see if this is a device namespace, which always
// starts with \\.\, since device namespaces are not legal file paths.
@@ -643,9 +668,10 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
if (std::error_code EC = widenPath(Name, PathUTF16))
return EC;
- HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
- FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ HANDLE H =
+ ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+ NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (H == INVALID_HANDLE_VALUE) {
DWORD LastError = ::GetLastError();
std::error_code EC = mapWindowsError(LastError);
@@ -728,30 +754,31 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
} // end namespace fs
namespace path {
-
-bool home_directory(SmallVectorImpl<char> &result) {
- wchar_t Path[MAX_PATH];
- if (::SHGetFolderPathW(0, CSIDL_APPDATA | CSIDL_FLAG_CREATE, 0,
- /*SHGFP_TYPE_CURRENT*/0, Path) != S_OK)
+static bool getKnownFolderPath(KNOWNFOLDERID folderId,
+ SmallVectorImpl<char> &result) {
+ wchar_t *path = nullptr;
+ if (::SHGetKnownFolderPath(folderId, KF_FLAG_CREATE, nullptr, &path) != S_OK)
return false;
- if (UTF16ToUTF8(Path, ::wcslen(Path), result))
- return false;
+ bool ok = !UTF16ToUTF8(path, ::wcslen(path), result);
+ ::CoTaskMemFree(path);
+ return ok;
+}
- return true;
+bool getUserCacheDir(SmallVectorImpl<char> &Result) {
+ return getKnownFolderPath(FOLDERID_LocalAppData, Result);
}
-static bool getTempDirEnvVar(const char *Var, SmallVectorImpl<char> &Res) {
- SmallVector<wchar_t, 128> NameUTF16;
- if (windows::UTF8ToUTF16(Var, NameUTF16))
- return false;
+bool home_directory(SmallVectorImpl<char> &result) {
+ return getKnownFolderPath(FOLDERID_Profile, result);
+}
+static bool getTempDirEnvVar(const wchar_t *Var, SmallVectorImpl<char> &Res) {
SmallVector<wchar_t, 1024> Buf;
size_t Size = 1024;
do {
Buf.reserve(Size);
- Size =
- GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity());
+ Size = GetEnvironmentVariableW(Var, Buf.data(), Buf.capacity());
if (Size == 0)
return false;
@@ -759,14 +786,12 @@ static bool getTempDirEnvVar(const char *Var, SmallVectorImpl<char> &Res) {
} while (Size > Buf.capacity());
Buf.set_size(Size);
- if (windows::UTF16ToUTF8(Buf.data(), Size, Res))
- return false;
- return true;
+ return !windows::UTF16ToUTF8(Buf.data(), Size, Res);
}
static bool getTempDirEnvVar(SmallVectorImpl<char> &Res) {
- const char *EnvironmentVariables[] = {"TMP", "TEMP", "USERPROFILE"};
- for (const char *Env : EnvironmentVariables) {
+ const wchar_t *EnvironmentVariables[] = {L"TMP", L"TEMP", L"USERPROFILE"};
+ for (auto *Env : EnvironmentVariables) {
if (getTempDirEnvVar(Env, Res))
return true;
}
@@ -777,13 +802,19 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
(void)ErasedOnReboot;
Result.clear();
- // Check whether the temporary directory is specified by an environment
- // variable.
- if (getTempDirEnvVar(Result))
+ // Check whether the temporary directory is specified by an environment var.
+ // This matches GetTempPath logic to some degree. GetTempPath is not used
+ // directly as it cannot handle evn var longer than 130 chars on Windows 7
+ // (fixed on Windows 8).
+ if (getTempDirEnvVar(Result)) {
+ assert(!Result.empty() && "Unexpected empty path");
+ native(Result); // Some Unix-like shells use Unix path separator in $TMP.
+ fs::make_absolute(Result); // Make it absolute if not already.
return;
+ }
// Fall back to a system default.
- const char *DefaultResult = "C:\\TEMP";
+ const char *DefaultResult = "C:\\Temp";
Result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
}
} // end namespace path
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
index 8164956..dae35a8 100644
--- a/contrib/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -417,16 +417,23 @@ const char *Process::ResetColor() {
return 0;
}
+// Include GetLastError() in a fatal error message.
+static void ReportLastErrorFatal(const char *Msg) {
+ std::string ErrMsg;
+ MakeErrMsg(&ErrMsg, Msg);
+ report_fatal_error(ErrMsg);
+}
+
unsigned Process::GetRandomNumber() {
HCRYPTPROV HCPC;
if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL,
CRYPT_VERIFYCONTEXT))
- report_fatal_error("Could not acquire a cryptographic context");
+ ReportLastErrorFatal("Could not acquire a cryptographic context");
ScopedCryptContext CryptoProvider(HCPC);
unsigned Ret;
if (!::CryptGenRandom(CryptoProvider, sizeof(Ret),
reinterpret_cast<BYTE *>(&Ret)))
- report_fatal_error("Could not generate a random number");
+ ReportLastErrorFatal("Could not generate a random number");
return Ret;
}
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index c29d872..d4e14dd 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -75,8 +75,15 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
do {
U16Result.reserve(Len);
- Len = ::SearchPathW(Path, c_str(U16Name),
- U16Ext.empty() ? nullptr : c_str(U16Ext),
+ // Lets attach the extension manually. That is needed for files
+ // with a point in name like aaa.bbb. SearchPathW will not add extension
+ // from its argument to such files because it thinks they already had one.
+ SmallVector<wchar_t, MAX_PATH> U16NameExt;
+ if (std::error_code EC =
+ windows::UTF8ToUTF16(Twine(Name + Ext).str(), U16NameExt))
+ return EC;
+
+ Len = ::SearchPathW(Path, c_str(U16NameExt), nullptr,
U16Result.capacity(), U16Result.data(), nullptr);
} while (Len > U16Result.capacity());
@@ -132,7 +139,7 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
FILE_ATTRIBUTE_NORMAL, NULL);
if (h == INVALID_HANDLE_VALUE) {
MakeErrMsg(ErrMsg, fname + ": Can't open file for " +
- (fd ? "input: " : "output: "));
+ (fd ? "input" : "output"));
}
return h;
@@ -251,6 +258,14 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
return false;
}
+ // can_execute may succeed by looking at Program + ".exe". CreateProcessW
+ // will implicitly add the .exe if we provide a command line without an
+ // executable path, but since we use an explicit executable, we have to add
+ // ".exe" ourselves.
+ SmallString<64> ProgramStorage;
+ if (!sys::fs::exists(Program))
+ Program = Twine(Program + ".exe").toStringRef(ProgramStorage);
+
// Windows wants a command line, not an array of args, to pass to the new
// process. We have to concatenate them all, while quoting the args that
// have embedded spaces (or are empty).
@@ -416,7 +431,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
if (SecondsToWait) {
if (!TerminateProcess(PI.ProcessHandle, 1)) {
if (ErrMsg)
- MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+ MakeErrMsg(ErrMsg, "Failed to terminate timed-out program");
// -2 indicates a crash or timeout as opposed to failure to execute.
WaitResult.ReturnCode = -2;
@@ -441,7 +456,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
if (!rc) {
SetLastError(err);
if (ErrMsg)
- MakeErrMsg(ErrMsg, "Failed getting status for program.");
+ MakeErrMsg(ErrMsg, "Failed getting status for program");
// -2 indicates a crash or timeout as opposed to failure to execute.
WaitResult.ReturnCode = -2;
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index 5c8c239..d109a66 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -14,7 +14,6 @@
#include <algorithm>
#include <signal.h>
#include <stdio.h>
-#include <vector>
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -136,6 +135,10 @@ typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64,
PDWORD, PIMAGEHLP_LINE64);
static fpSymGetLineFromAddr64 fSymGetLineFromAddr64;
+typedef BOOL(WINAPI *fpSymGetModuleInfo64)(HANDLE hProcess, DWORD64 dwAddr,
+ PIMAGEHLP_MODULE64 ModuleInfo);
+static fpSymGetModuleInfo64 fSymGetModuleInfo64;
+
typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
static fpSymFunctionTableAccess64 fSymFunctionTableAccess64;
@@ -145,6 +148,9 @@ static fpSymSetOptions fSymSetOptions;
typedef BOOL (WINAPI *fpSymInitialize)(HANDLE, PCSTR, BOOL);
static fpSymInitialize fSymInitialize;
+typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID);
+static fpEnumerateLoadedModules fEnumerateLoadedModules;
+
static bool load64BitDebugHelp(void) {
HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
if (hLib) {
@@ -156,14 +162,20 @@ static bool load64BitDebugHelp(void) {
::GetProcAddress(hLib, "SymGetSymFromAddr64");
fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)
::GetProcAddress(hLib, "SymGetLineFromAddr64");
+ fSymGetModuleInfo64 = (fpSymGetModuleInfo64)
+ ::GetProcAddress(hLib, "SymGetModuleInfo64");
fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)
::GetProcAddress(hLib, "SymFunctionTableAccess64");
fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions");
fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize");
+ fEnumerateLoadedModules = (fpEnumerateLoadedModules)
+ ::GetProcAddress(hLib, "EnumerateLoadedModules64");
}
return fStackWalk64 && fSymInitialize && fSymSetOptions;
}
+using namespace llvm;
+
// Forward declare.
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
@@ -172,7 +184,6 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
static void (*InterruptFunction)() = 0;
static std::vector<std::string> *FilesToRemove = NULL;
-static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
static bool RegisteredUnhandledExceptionFilter = false;
static bool CleanupExecuted = false;
static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
@@ -183,23 +194,106 @@ static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
static CRITICAL_SECTION CriticalSection;
static bool CriticalSectionInitialized = false;
-static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
- HANDLE hThread, STACKFRAME64 &StackFrame,
- CONTEXT *Context) {
- DWORD machineType;
+enum {
#if defined(_M_X64)
- machineType = IMAGE_FILE_MACHINE_AMD64;
+ NativeMachineType = IMAGE_FILE_MACHINE_AMD64
#else
- machineType = IMAGE_FILE_MACHINE_I386;
+ NativeMachineType = IMAGE_FILE_MACHINE_I386
#endif
+};
+
+static bool printStackTraceWithLLVMSymbolizer(llvm::raw_ostream &OS,
+ HANDLE hProcess, HANDLE hThread,
+ STACKFRAME64 &StackFrameOrig,
+ CONTEXT *ContextOrig) {
+ // StackWalk64 modifies the incoming stack frame and context, so copy them.
+ STACKFRAME64 StackFrame = StackFrameOrig;
+
+ // Copy the register context so that we don't modify it while we unwind. We
+ // could use InitializeContext + CopyContext, but that's only required to get
+ // at AVX registers, which typically aren't needed by StackWalk64. Reduce the
+ // flag set to indicate that there's less data.
+ CONTEXT Context = *ContextOrig;
+ Context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+
+ static void *StackTrace[256];
+ size_t Depth = 0;
+ while (fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame,
+ &Context, 0, fSymFunctionTableAccess64,
+ fSymGetModuleBase64, 0)) {
+ if (StackFrame.AddrFrame.Offset == 0)
+ break;
+ StackTrace[Depth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset;
+ if (Depth >= array_lengthof(StackTrace))
+ break;
+ }
+
+ return printSymbolizedStackTrace(&StackTrace[0], Depth, OS);
+}
+
+namespace {
+struct FindModuleData {
+ void **StackTrace;
+ int Depth;
+ const char **Modules;
+ intptr_t *Offsets;
+ StringSaver *StrPool;
+};
+}
+
+static BOOL CALLBACK findModuleCallback(WIN32_ELMCB_PCSTR ModuleName,
+ DWORD64 ModuleBase, ULONG ModuleSize,
+ void *VoidData) {
+ FindModuleData *Data = (FindModuleData*)VoidData;
+ intptr_t Beg = ModuleBase;
+ intptr_t End = Beg + ModuleSize;
+ for (int I = 0; I < Data->Depth; I++) {
+ if (Data->Modules[I])
+ continue;
+ intptr_t Addr = (intptr_t)Data->StackTrace[I];
+ if (Beg <= Addr && Addr < End) {
+ Data->Modules[I] = Data->StrPool->save(ModuleName);
+ Data->Offsets[I] = Addr - Beg;
+ }
+ }
+ return TRUE;
+}
+
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+ const char **Modules, intptr_t *Offsets,
+ const char *MainExecutableName,
+ StringSaver &StrPool) {
+ if (!fEnumerateLoadedModules)
+ return false;
+ FindModuleData Data;
+ Data.StackTrace = StackTrace;
+ Data.Depth = Depth;
+ Data.Modules = Modules;
+ Data.Offsets = Offsets;
+ Data.StrPool = &StrPool;
+ fEnumerateLoadedModules(GetCurrentProcess(), findModuleCallback, &Data);
+ return true;
+}
+static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
+ HANDLE hThread, STACKFRAME64 &StackFrame,
+ CONTEXT *Context) {
// Initialize the symbol handler.
fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
fSymInitialize(hProcess, NULL, TRUE);
+ // Try llvm-symbolizer first. llvm-symbolizer knows how to deal with both PDBs
+ // and DWARF, so it should do a good job regardless of what debug info or
+ // linker is in use.
+ if (printStackTraceWithLLVMSymbolizer(OS, hProcess, hThread, StackFrame,
+ Context)) {
+ return;
+ }
+
while (true) {
- if (!fStackWalk64(machineType, hProcess, hThread, &StackFrame, Context, 0,
- fSymFunctionTableAccess64, fSymGetModuleBase64, 0)) {
+ if (!fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame,
+ Context, 0, fSymFunctionTableAccess64,
+ fSymGetModuleBase64, 0)) {
break;
}
@@ -311,10 +405,7 @@ static void RegisterHandler() {
// If we cannot load up the APIs (which would be unexpected as they should
// exist on every version of Windows we support), we will bail out since
// there would be nothing to report.
- if (!load64BitDebugHelp()) {
- assert(false && "These APIs should always be available");
- return;
- }
+ assert(load64BitDebugHelp() && "These APIs should always be available");
if (RegisteredUnhandledExceptionFilter) {
EnterCriticalSection(&CriticalSection);
@@ -404,7 +495,6 @@ extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
#endif
void llvm::sys::PrintStackTrace(raw_ostream &OS) {
-
STACKFRAME64 StackFrame = {};
CONTEXT Context = {};
::RtlCaptureContext(&Context);
@@ -436,8 +526,6 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
/// to the process. The handler can have a cookie passed to it to identify
/// what instance of the handler it is.
void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
- if (CallBacksToRun == 0)
- CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
RegisterHandler();
LeaveCriticalSection(&CriticalSection);
@@ -454,17 +542,12 @@ static void Cleanup() {
CleanupExecuted = true;
// FIXME: open files cannot be deleted.
-
if (FilesToRemove != NULL)
while (!FilesToRemove->empty()) {
llvm::sys::fs::remove(FilesToRemove->back());
FilesToRemove->pop_back();
}
-
- if (CallBacksToRun)
- for (auto &I : *CallBacksToRun)
- I.first(I.second);
-
+ llvm::sys::RunSignalHandlers();
LeaveCriticalSection(&CriticalSection);
}
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index 5bb0b8d..34d961b 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -26,12 +26,13 @@
#undef _WIN32_WINNT
#undef _WIN32_IE
-// Require at least Windows XP(5.1) API.
-#define _WIN32_WINNT 0x0501
-#define _WIN32_IE 0x0600 // MinGW at it again.
+// Require at least Windows 7 API.
+#define _WIN32_WINNT 0x0601
+#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed.
#define WIN32_LEAN_AND_MEAN
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h" // Get build system configuration settings
@@ -47,13 +48,16 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
return true;
char *buffer = NULL;
+ DWORD LastError = GetLastError();
DWORD R = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
- FORMAT_MESSAGE_FROM_SYSTEM,
- NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
if (R)
- *ErrMsg = prefix + buffer;
+ *ErrMsg = prefix + ": " + buffer;
else
- *ErrMsg = prefix + "Unknown error";
+ *ErrMsg = prefix + ": Unknown error";
+ *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
LocalFree(buffer);
return R != 0;
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
index d55da5e..c4384ca 100644
--- a/contrib/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -801,7 +801,7 @@ Token &Scanner::peekNext() {
removeStaleSimpleKeyCandidates();
SimpleKey SK;
- SK.Tok = TokenQueue.front();
+ SK.Tok = TokenQueue.begin();
if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
== SimpleKeys.end())
break;
@@ -962,10 +962,8 @@ void Scanner::skip(uint32_t Distance) {
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
if (Position == End)
return false;
- if ( *Position == ' ' || *Position == '\t'
- || *Position == '\r' || *Position == '\n')
- return true;
- return false;
+ return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
+ *Position == '\n';
}
bool Scanner::consumeLineBreakIfPresent() {
@@ -1163,7 +1161,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
TokenQueue.push_back(T);
// [ and { may begin a simple key.
- saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+ saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
// And may also be followed by a simple key.
IsSimpleKeyAllowed = true;
@@ -1326,7 +1324,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
- saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+ saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
@@ -1404,7 +1402,7 @@ bool Scanner::scanPlainScalar() {
TokenQueue.push_back(T);
// Plain scalars can be simple keys.
- saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+ saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
@@ -1439,7 +1437,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
TokenQueue.push_back(T);
// Alias and anchors can be simple keys.
- saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+ saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
@@ -1669,7 +1667,7 @@ bool Scanner::scanTag() {
TokenQueue.push_back(T);
// Tags can be simple keys.
- saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+ saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
index 6b59a16..2aa6e9b 100644
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -332,17 +332,12 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
StringRef KeyStr = SN->getValue(StringStorage);
if (!StringStorage.empty()) {
// Copy string to permanent storage
- unsigned Len = StringStorage.size();
- char *Buf = StringAllocator.Allocate<char>(Len);
- memcpy(Buf, &StringStorage[0], Len);
- KeyStr = StringRef(Buf, Len);
+ KeyStr = StringStorage.str().copy(StringAllocator);
}
return llvm::make_unique<ScalarHNode>(N, KeyStr);
} else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) {
- StringRef Value = BSN->getValue();
- char *Buf = StringAllocator.Allocate<char>(Value.size());
- memcpy(Buf, Value.data(), Value.size());
- return llvm::make_unique<ScalarHNode>(N, StringRef(Buf, Value.size()));
+ StringRef ValueCopy = BSN->getValue().copy(StringAllocator);
+ return llvm::make_unique<ScalarHNode>(N, ValueCopy);
} else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
auto SQHNode = llvm::make_unique<SequenceHNode>(N);
for (Node &SN : *SQ) {
@@ -365,10 +360,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
StringRef KeyStr = KeyScalar->getValue(StringStorage);
if (!StringStorage.empty()) {
// Copy string to permanent storage
- unsigned Len = StringStorage.size();
- char *Buf = StringAllocator.Allocate<char>(Len);
- memcpy(Buf, &StringStorage[0], Len);
- KeyStr = StringRef(Buf, Len);
+ KeyStr = StringStorage.str().copy(StringAllocator);
}
auto ValueHNode = this->createHNodes(KVN.getValue());
if (EC)
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 42f830b..57c7ac3 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -517,7 +517,7 @@ raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
/// closes the file when the stream is destroyed.
raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
: raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
- Error(false), UseAtomicWrites(false) {
+ Error(false) {
if (FD < 0 ) {
ShouldClose = false;
return;
@@ -568,21 +568,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
pos += Size;
do {
- ssize_t ret;
-
- // Check whether we should attempt to use atomic writes.
- if (LLVM_LIKELY(!UseAtomicWrites)) {
- ret = ::write(FD, Ptr, Size);
- } else {
- // Use ::writev() where available.
-#if defined(HAVE_WRITEV)
- const void *Addr = static_cast<const void *>(Ptr);
- struct iovec IOV = {const_cast<void *>(Addr), Size };
- ret = ::writev(FD, &IOV, 1);
-#else
- ret = ::write(FD, Ptr, Size);
-#endif
- }
+ ssize_t ret = ::write(FD, Ptr, Size);
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.
@@ -755,72 +741,15 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
// raw_svector_ostream
//===----------------------------------------------------------------------===//
-// The raw_svector_ostream implementation uses the SmallVector itself as the
-// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
-// always pointing past the end of the vector, but within the vector
-// capacity. This allows raw_ostream to write directly into the correct place,
-// and we only need to set the vector size when the data is flushed.
-
-raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O, unsigned)
- : OS(O) {}
+uint64_t raw_svector_ostream::current_pos() const { return OS.size(); }
-raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
- init();
-}
-
-void raw_svector_ostream::init() {
- // Set up the initial external buffer. We make sure that the buffer has at
- // least 128 bytes free; raw_ostream itself only requires 64, but we want to
- // make sure that we don't grow the buffer unnecessarily on destruction (when
- // the data is flushed). See the FIXME below.
- OS.reserve(OS.size() + 128);
- SetBuffer(OS.end(), OS.capacity() - OS.size());
-}
-
-raw_svector_ostream::~raw_svector_ostream() {
- // FIXME: Prevent resizing during this flush().
- flush();
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+ OS.append(Ptr, Ptr + Size);
}
void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
uint64_t Offset) {
- flush();
- memcpy(OS.begin() + Offset, Ptr, Size);
-}
-
-/// resync - This is called when the SmallVector we're appending to is changed
-/// outside of the raw_svector_ostream's control. It is only safe to do this
-/// if the raw_svector_ostream has previously been flushed.
-void raw_svector_ostream::resync() {
- assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector");
-
- if (OS.capacity() - OS.size() < 64)
- OS.reserve(OS.capacity() * 2);
- SetBuffer(OS.end(), OS.capacity() - OS.size());
-}
-
-void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
- if (Ptr == OS.end()) {
- // Grow the buffer to include the scratch area without copying.
- size_t NewSize = OS.size() + Size;
- assert(NewSize <= OS.capacity() && "Invalid write_impl() call!");
- OS.set_size(NewSize);
- } else {
- assert(!GetNumBytesInBuffer());
- OS.append(Ptr, Ptr + Size);
- }
-
- OS.reserve(OS.size() + 64);
- SetBuffer(OS.end(), OS.capacity() - OS.size());
-}
-
-uint64_t raw_svector_ostream::current_pos() const {
- return OS.size();
-}
-
-StringRef raw_svector_ostream::str() {
- flush();
- return StringRef(OS.begin(), OS.size());
+ memcpy(OS.data() + Offset, Ptr, Size);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
index c9a31b6..87a3422 100644
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -673,6 +673,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
PrintFatalError(CurRec->getLoc(),
"Undefined reference:'" + Name + "'\n");
}
+
+ if (isa<IntRecTy>(getType())) {
+ if (BitsInit *BI = dyn_cast<BitsInit>(LHS)) {
+ if (Init *NewInit = BI->convertInitializerTo(IntRecTy::get()))
+ return NewInit;
+ break;
+ }
+ }
}
break;
}
@@ -1633,7 +1641,7 @@ void Record::dump() const { errs() << *this; }
raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
OS << R.getNameInitAsString();
- const std::vector<Init *> &TArgs = R.getTemplateArgs();
+ ArrayRef<Init *> TArgs = R.getTemplateArgs();
if (!TArgs.empty()) {
OS << "<";
bool NeedComma = false;
diff --git a/contrib/llvm/lib/TableGen/SetTheory.cpp b/contrib/llvm/lib/TableGen/SetTheory.cpp
index 07c5381..f56b17a 100644
--- a/contrib/llvm/lib/TableGen/SetTheory.cpp
+++ b/contrib/llvm/lib/TableGen/SetTheory.cpp
@@ -196,7 +196,7 @@ struct SequenceOp : public SetTheory::Operator {
if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2]))
To = II->getValue();
else
- PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString());
+ PrintFatalError(Loc, "To must be an integer: " + Expr->getAsString());
if (To < 0 || To >= (1 << 30))
PrintFatalError(Loc, "To out of range");
diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp
index 5c36fda..e5f6f16 100644
--- a/contrib/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm/lib/TableGen/TGParser.cpp
@@ -152,7 +152,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
if (AddValue(CurRec, SubClass.RefRange.Start, Val))
return true;
- const std::vector<Init *> &TArgs = SC->getTemplateArgs();
+ ArrayRef<Init *> TArgs = SC->getTemplateArgs();
// Ensure that an appropriate number of template arguments are specified.
if (TArgs.size() < SubClass.TemplateArgs.size())
@@ -228,7 +228,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
CurMC->DefPrototypes.push_back(std::move(NewDef));
}
- const std::vector<Init *> &SMCTArgs = SMC->Rec.getTemplateArgs();
+ ArrayRef<Init *> SMCTArgs = SMC->Rec.getTemplateArgs();
// Ensure that an appropriate number of template arguments are
// specified.
@@ -1641,7 +1641,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
RecTy *ItemType = EltTy;
unsigned int ArgN = 0;
if (ArgsRec && !EltTy) {
- const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+ ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
if (TArgs.empty()) {
TokError("template argument provided to non-template class");
return std::vector<Init*>();
@@ -1662,7 +1662,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
Lex.Lex(); // Eat the comma
if (ArgsRec && !EltTy) {
- const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+ ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
if (ArgN >= TArgs.size()) {
TokError("too many template arguments");
return std::vector<Init*>();
@@ -2313,13 +2313,11 @@ bool TGParser::ParseMultiClass() {
return false;
}
-Record *TGParser::
-InstantiateMulticlassDef(MultiClass &MC,
- Record *DefProto,
- Init *&DefmPrefix,
- SMRange DefmPrefixRange,
- const std::vector<Init *> &TArgs,
- std::vector<Init *> &TemplateVals) {
+Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
+ Init *&DefmPrefix,
+ SMRange DefmPrefixRange,
+ ArrayRef<Init *> TArgs,
+ std::vector<Init *> &TemplateVals) {
// We need to preserve DefProto so it can be reused for later
// instantiations, so create a new Record to inherit from it.
@@ -2437,11 +2435,9 @@ InstantiateMulticlassDef(MultiClass &MC,
return CurRec.release();
}
-bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
- Record *CurRec,
- SMLoc DefmPrefixLoc,
- SMLoc SubClassLoc,
- const std::vector<Init *> &TArgs,
+bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
+ SMLoc DefmPrefixLoc, SMLoc SubClassLoc,
+ ArrayRef<Init *> TArgs,
std::vector<Init *> &TemplateVals,
bool DeleteArgs) {
// Loop over all of the template arguments, setting them to the specified
@@ -2540,7 +2536,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
// Verify that the correct number of template arguments were specified.
- const std::vector<Init *> &TArgs = MC->Rec.getTemplateArgs();
+ ArrayRef<Init *> TArgs = MC->Rec.getTemplateArgs();
if (TArgs.size() < TemplateVals.size())
return Error(SubClassLoc,
"more template args specified than multiclass expects");
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
index d69d1f4..8b41134 100644
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -135,17 +135,13 @@ private: // Parser methods.
bool ParseObject(MultiClass *MC);
bool ParseClass();
bool ParseMultiClass();
- Record *InstantiateMulticlassDef(MultiClass &MC,
- Record *DefProto,
- Init *&DefmPrefix,
- SMRange DefmPrefixRange,
- const std::vector<Init *> &TArgs,
+ Record *InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
+ Init *&DefmPrefix, SMRange DefmPrefixRange,
+ ArrayRef<Init *> TArgs,
std::vector<Init *> &TemplateVals);
- bool ResolveMulticlassDefArgs(MultiClass &MC,
- Record *DefProto,
- SMLoc DefmPrefixLoc,
- SMLoc SubClassLoc,
- const std::vector<Init *> &TArgs,
+ bool ResolveMulticlassDefArgs(MultiClass &MC, Record *DefProto,
+ SMLoc DefmPrefixLoc, SMLoc SubClassLoc,
+ ArrayRef<Init *> TArgs,
std::vector<Init *> &TemplateVals,
bool DeleteArgs);
bool ResolveMulticlassDef(MultiClass &MC,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index 9a7d6c8..0bff9b5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -32,6 +32,15 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable ARMv8 PMUv3 Performance Monitors extension">;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Full FP16", [FeatureFPARMv8]>;
+
+def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
+ "Enable Statistical Profiling extension">;
+
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -40,6 +49,15 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
+def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
+ "Reserve X18, making it unavailable "
+ "as a GPR">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -47,6 +65,9 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions", [FeatureCRC]>;
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions", [HasV8_1aOps]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -70,19 +91,29 @@ include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors",
+ [FeatureFPARMv8,
+ FeatureNEON,
+ FeatureCrypto,
+ FeatureCRC,
+ FeaturePerfMon]>;
+
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors",
[FeatureFPARMv8,
FeatureNEON,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeaturePerfMon]>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors",
[FeatureFPARMv8,
FeatureNEON,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeaturePerfMon]>;
def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
"Cyclone",
@@ -90,12 +121,16 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeatureNEON,
FeatureCrypto,
FeatureCRC,
+ FeaturePerfMon,
FeatureZCRegMove, FeatureZCZeroing]>;
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
- FeatureCRC]>;
+ FeatureCRC,
+ FeaturePerfMon]>;
+// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
+def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
@@ -109,11 +144,13 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def GenericAsmParserVariant : AsmParserVariant {
int Variant = 0;
string Name = "generic";
+ string BreakCharacters = ".";
}
def AppleAsmParserVariant : AsmParserVariant {
int Variant = 1;
string Name = "apple-neon";
+ string BreakCharacters = ".";
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index d7ef3f4..d215d9e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -122,7 +122,7 @@ AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
// Get the previous machine basic block in the function.
- MachineFunction::iterator MBBI = *MBB;
+ MachineFunction::iterator MBBI(MBB);
// Can't go off top of function.
if (MBBI == MBB->getParent()->begin())
@@ -131,7 +131,7 @@ static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 2> Cond;
- MachineBasicBlock *PrevBB = std::prev(MBBI);
+ MachineBasicBlock *PrevBB = &*std::prev(MBBI);
for (MachineBasicBlock *S : MBB->predecessors())
if (S == PrevBB && !TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond) &&
!TBB && !FBB)
@@ -151,10 +151,9 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
// If there is no non-pseudo in the current block, loop back around and try
// the previous block (if there is one).
while ((FMBB = getBBFallenThrough(FMBB, TII))) {
- for (auto I = FMBB->rbegin(), E = FMBB->rend(); I != E; ++I) {
- if (!I->isPseudo())
- return &*I;
- }
+ for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend()))
+ if (!I.isPseudo())
+ return &I;
}
// There was no previous non-pseudo in the fallen through blocks
@@ -217,8 +216,8 @@ AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) {
++Idx;
}
- DEBUG(dbgs() << "Scan complete, "<< Sequences.size()
- << " occurences of pattern found.\n");
+ DEBUG(dbgs() << "Scan complete, " << Sequences.size()
+ << " occurrences of pattern found.\n");
// Then update the basic block, inserting nops between the detected sequences.
for (auto &MI : Sequences) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 9d6dbd6..79a84ad 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -593,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
if (Change) {
Substs[MO.getReg()] = Reg;
MO.setReg(Reg);
- MRI->setPhysRegUsed(Reg);
Changed = true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 716e1a3..3afcdfb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -57,6 +57,8 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
" the other."),
cl::init(true));
+#define AARCH64_TYPE_PROMO_NAME "AArch64 Address Type Promotion"
+
//===----------------------------------------------------------------------===//
// AArch64AddressTypePromotion
//===----------------------------------------------------------------------===//
@@ -76,7 +78,7 @@ public:
}
const char *getPassName() const override {
- return "AArch64 Address Type Promotion";
+ return AARCH64_TYPE_PROMO_NAME;
}
/// Iterate over the functions and promote the computation of interesting
@@ -143,10 +145,10 @@ private:
char AArch64AddressTypePromotion::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
- "AArch64 Type Promotion Pass", false, false)
+ AARCH64_TYPE_PROMO_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
- "AArch64 Type Promotion Pass", false, false)
+ AARCH64_TYPE_PROMO_NAME, false, false)
FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
return new AArch64AddressTypePromotion();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 18d21fd..1644d71 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -61,6 +61,12 @@ STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
+namespace llvm {
+void initializeAArch64AdvSIMDScalarPass(PassRegistry &);
+}
+
+#define AARCH64_ADVSIMD_NAME "AdvSIMD Scalar Operation Optimization"
+
namespace {
class AArch64AdvSIMDScalar : public MachineFunctionPass {
MachineRegisterInfo *MRI;
@@ -82,12 +88,14 @@ private:
public:
static char ID; // Pass identification, replacement for typeid.
- explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {}
+ explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {
+ initializeAArch64AdvSIMDScalarPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
const char *getPassName() const override {
- return "AdvSIMD Scalar Operation Optimization";
+ return AARCH64_ADVSIMD_NAME;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -98,6 +106,9 @@ public:
char AArch64AdvSIMDScalar::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(AArch64AdvSIMDScalar, "aarch64-simd-scalar",
+ AARCH64_ADVSIMD_NAME, false, false)
+
static bool isGPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
if (SubReg)
@@ -381,7 +392,7 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
- if (processMachineBasicBlock(I))
+ if (processMachineBasicBlock(&*I))
Changed = true;
return Changed;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index d973234..a614f55 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -45,6 +45,12 @@ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
STATISTIC(NumSplit, "Number of basic blocks split");
STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
+namespace llvm {
+void initializeAArch64BranchRelaxationPass(PassRegistry &);
+}
+
+#define AARCH64_BR_RELAX_NAME "AArch64 branch relaxation pass"
+
namespace {
class AArch64BranchRelaxation : public MachineFunctionPass {
/// BasicBlockInfo - Information about the offset and size of a single
@@ -93,17 +99,22 @@ class AArch64BranchRelaxation : public MachineFunctionPass {
public:
static char ID;
- AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
+ AArch64BranchRelaxation() : MachineFunctionPass(ID) {
+ initializeAArch64BranchRelaxationPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
- return "AArch64 branch relaxation pass";
+ return AARCH64_BR_RELAX_NAME;
}
};
char AArch64BranchRelaxation::ID = 0;
}
+INITIALIZE_PASS(AArch64BranchRelaxation, "aarch64-branch-relax",
+ AARCH64_BR_RELAX_NAME, false, false)
+
/// verify - check BBOffsets, BBSizes, alignment of islands
void AArch64BranchRelaxation::verify() {
#ifndef NDEBUG
@@ -131,14 +142,14 @@ void AArch64BranchRelaxation::dumpBBs() {
/// into the block immediately after it.
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI(MBB);
// Can't fall off end of function.
- MachineBasicBlock *NextBB = std::next(MBBI);
+ auto NextBB = std::next(MBBI);
if (NextBB == MBB->getParent()->end())
return false;
for (MachineBasicBlock *S : MBB->successors())
- if (S == NextBB)
+ if (S == &*NextBB)
return true;
return false;
@@ -216,9 +227,7 @@ AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB;
- ++MBBI;
- MF->insert(MBBI, NewBB);
+ MF->insert(++OrigBB->getIterator(), NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -421,7 +430,7 @@ bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
MBB->replaceSuccessor(FBB, NewBB);
NewBB->addSuccessor(FBB);
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*std::next(MachineFunction::iterator(MBB));
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< ", invert condition and change dest. to BB#"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 1e2d1c3..bc44bc5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -25,30 +25,28 @@
namespace {
using namespace llvm;
-static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
- AArch64::X3, AArch64::X4, AArch64::X5,
- AArch64::X6, AArch64::X7};
-static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
- AArch64::H3, AArch64::H4, AArch64::H5,
- AArch64::H6, AArch64::H7};
-static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
- AArch64::S3, AArch64::S4, AArch64::S5,
- AArch64::S6, AArch64::S7};
-static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
- AArch64::D3, AArch64::D4, AArch64::D5,
- AArch64::D6, AArch64::D7};
-static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
- AArch64::Q3, AArch64::Q4, AArch64::Q5,
- AArch64::Q6, AArch64::Q7};
+static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+ AArch64::H3, AArch64::H4, AArch64::H5,
+ AArch64::H6, AArch64::H7};
+static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
CCState &State, unsigned SlotAlign) {
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned StackAlign = State.getMachineFunction()
- .getTarget()
- .getDataLayout()
- ->getStackAlignment();
+ unsigned StackAlign =
+ State.getMachineFunction().getDataLayout().getStackAlignment();
unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
for (auto &It : PendingMembers) {
@@ -88,7 +86,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- ArrayRef<uint16_t> RegList;
+ ArrayRef<MCPhysReg> RegList;
if (LocVT.SimpleTy == MVT::i64)
RegList = XRegList;
else if (LocVT.SimpleTy == MVT::f16)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 815ebef..388d64e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
- CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>;
+ CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
@@ -279,6 +279,23 @@ def CSR_AArch64_TLS_Darwin
FP,
(sequence "Q%u", 0, 31))>;
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
+// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
+def CSR_AArch64_CXX_TLS_Darwin
+ : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
+ (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_AArch64_CXX_TLS_Darwin_PE
+ : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
+ : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
+
// The ELF stub used for TLS-descriptor access saves every feasible
// register. Only X0 and LR are clobbered.
def CSR_AArch64_TLS_ELF
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 06ff9af..9310ac4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -117,10 +117,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
*TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
// Insert a copy from X0 to TLSBaseAddrReg for later.
- MachineInstr *Next = I->getNextNode();
- MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- *TLSBaseAddrReg).addReg(AArch64::X0);
+ MachineInstr *Copy =
+ BuildMI(*I->getParent(), ++I->getIterator(), I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+ .addReg(AArch64::X0);
return Copy;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index efdb2e3..78c239b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -168,6 +168,8 @@ namespace llvm {
void initializeAArch64CollectLOHPass(PassRegistry &);
}
+#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
+
namespace {
struct AArch64CollectLOH : public MachineFunctionPass {
static char ID;
@@ -178,7 +180,7 @@ struct AArch64CollectLOH : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
- return "AArch64 Collect Linker Optimization Hint (LOH)";
+ return AARCH64_COLLECT_LOH_NAME;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -220,12 +222,10 @@ typedef SmallVector<unsigned, 32> MapIdToReg;
char AArch64CollectLOH::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
- "AArch64 Collect Linker Optimization Hint (LOH)", false,
- false)
+ AARCH64_COLLECT_LOH_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
- "AArch64 Collect Linker Optimization Hint (LOH)", false,
- false)
+ AARCH64_COLLECT_LOH_NAME, false, false)
/// Given a couple (MBB, reg) get the corresponding set of instruction from
/// the given "sets".
@@ -353,9 +353,17 @@ static void initReachingDef(const MachineFunction &MF,
for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
- assert(ItRegId != RegToId.end() &&
- "Sub-register of an "
- "involved register, not recorded as involved!");
+ // If this alias has not been recorded, then it is not interesting
+ // for the current analysis.
+ // We can end up in this situation because of tuple registers.
+ // E.g., Let say we are interested in S1. When we register
+ // S1, we will also register its aliases and in particular
+ // the tuple Q1_Q2.
+ // Now, when we encounter Q1_Q2, we will look through its aliases
+ // and will find that S2 is not registered.
+ if (ItRegId == RegToId.end())
+ continue;
+
BBKillSet.set(ItRegId->second);
BBGen[ItRegId->second] = &MI;
}
@@ -523,6 +531,8 @@ static bool isCandidateStore(const MachineInstr *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
+ case AArch64::STRBBui:
+ case AArch64::STRHHui:
case AArch64::STRBui:
case AArch64::STRHui:
case AArch64::STRWui:
@@ -884,7 +894,8 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
// If the chain is three instructions long and ldr is the second element,
// then this ldr must load form GOT, otherwise this is not a correct chain.
- if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT)
+ if (L2 && !IsL2Add &&
+ !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
continue;
SmallVector<const MachineInstr *, 3> Args;
MCLOHType Kind;
@@ -1000,7 +1011,8 @@ static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
DEBUG(dbgs() << "** Collect Involved Register\n");
for (const auto &MBB : MF) {
for (const MachineInstr &MI : MBB) {
- if (!canDefBePartOfLOH(&MI))
+ if (!canDefBePartOfLOH(&MI) &&
+ !isCandidateLoad(&MI) && !isCandidateStore(&MI))
continue;
// Process defs
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index b9e41c6..fc27bfe 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -59,6 +59,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -153,13 +154,20 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
case AArch64::SUBSXri:
// cmn is an alias for adds with a dead destination register.
case AArch64::ADDSWri:
- case AArch64::ADDSXri:
- if (MRI->use_empty(I->getOperand(0).getReg()))
- return I;
-
- DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
- return nullptr;
-
+ case AArch64::ADDSXri: {
+ unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
+ if (!I->getOperand(2).isImm()) {
+ DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
+ return nullptr;
+ } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) {
+ DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n');
+ return nullptr;
+ } else if (!MRI->use_empty(I->getOperand(0).getReg())) {
+ DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
+ return nullptr;
+ }
+ return I;
+ }
// Prevent false positive case like:
// cmp w19, #0
// cinc w0, w19, gt
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 2b0c92f..df1320f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -353,7 +353,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
MIOperands::PhysRegInfo PRI =
MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI);
- if (PRI.Reads) {
+ if (PRI.Read) {
// The ccmp doesn't produce exactly the same flags as the original
// compare, so reject the transform if there are uses of the flags
// besides the terminators.
@@ -362,7 +362,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
return nullptr;
}
- if (PRI.Clobbers) {
+ if (PRI.Defined || PRI.Clobbered) {
DEBUG(dbgs() << "Not convertible compare: " << *I);
++NumUnknNZCVDefs;
return nullptr;
@@ -567,8 +567,8 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
updateTailPHIs();
- Head->removeSuccessor(CmpBB);
- CmpBB->removeSuccessor(Tail);
+ Head->removeSuccessor(CmpBB, true);
+ CmpBB->removeSuccessor(Tail, true);
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
TII->RemoveBranch(*Head);
@@ -786,13 +786,13 @@ void AArch64ConditionalCompares::updateDomTree(
// convert() removes CmpBB which was previously dominated by Head.
// CmpBB children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
- for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
- MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ for (MachineBasicBlock *RemovedMBB : Removed) {
+ MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
assert(Node != HeadNode && "Cannot erase the head node");
assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
while (Node->getNumChildren())
DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
- DomTree->eraseNode(Removed[i]);
+ DomTree->eraseNode(RemovedMBB);
}
}
@@ -801,8 +801,8 @@ void
AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
if (!Loops)
return;
- for (unsigned i = 0, e = Removed.size(); i != e; ++i)
- Loops->removeBlock(Removed[i]);
+ for (MachineBasicBlock *RemovedMBB : Removed)
+ Loops->removeBlock(RemovedMBB);
}
/// Invalidate MachineTraceMetrics before if-conversion.
@@ -899,7 +899,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ MinSize = MF.getFunction()->optForMinSize();
bool Changed = false;
CmpConv.runOnMachineFunction(MF);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 74fc167..576cf4a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -26,6 +26,12 @@ using namespace llvm;
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+namespace llvm {
+void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry &);
+}
+
+#define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions"
+
namespace {
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
private:
@@ -35,11 +41,14 @@ private:
bool usesFrameIndex(const MachineInstr &MI);
public:
static char ID; // Pass identification, replacement for typeid.
- explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+ explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
+ initializeAArch64DeadRegisterDefinitionsPass(
+ *PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
- const char *getPassName() const override { return "Dead register definitions"; }
+ const char *getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -49,6 +58,9 @@ public:
char AArch64DeadRegisterDefinitions::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
+ AARCH64_DEAD_REG_DEF_NAME, false, false)
+
bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
unsigned Reg, const MachineInstr &MI) {
for (const MachineOperand &MO : MI.implicit_operands())
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c2470f7..d24e42a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -22,18 +22,26 @@
#include "llvm/Support/MathExtras.h"
using namespace llvm;
+namespace llvm {
+void initializeAArch64ExpandPseudoPass(PassRegistry &);
+}
+
+#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
+
namespace {
class AArch64ExpandPseudo : public MachineFunctionPass {
public:
static char ID;
- AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
+ AArch64ExpandPseudo() : MachineFunctionPass(ID) {
+ initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
const AArch64InstrInfo *TII;
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "AArch64 pseudo instruction expansion pass";
+ return AARCH64_EXPAND_PSEUDO_NAME;
}
private:
@@ -45,6 +53,9 @@ private:
char AArch64ExpandPseudo::ID = 0;
}
+INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
+ AARCH64_EXPAND_PSEUDO_NAME, false, false)
+
/// \brief Transfer implicit operands on the pseudo instruction to the
/// instructions created from the expansion.
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 0728198..0ac4b39 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -523,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
@@ -969,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
- // emit an additonal add to take care of the offset register.
+ // emit an additional add to take care of the offset register.
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
@@ -1058,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr,
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset);
} else {
@@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
}
// Check if the mul can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1193,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(MulLHS);
- return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
- AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
+ ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
+ WantResult);
+ if (ResultReg)
+ return ResultReg;
}
+ }
// Check if the shift can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
@@ -1214,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftType, ShiftVal, SetFlags,
- WantResult);
+ ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftVal, SetFlags,
+ WantResult);
+ if (ResultReg)
+ return ResultReg;
}
}
}
+ }
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
@@ -1323,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
+ // Don't deal with undefined shifts.
+ if (ShiftImm >= RetVT.getSizeInBits())
+ return 0;
+
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrs, AArch64::SUBXrs },
{ AArch64::ADDWrs, AArch64::ADDXrs } },
@@ -1360,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
+ if (ShiftImm >= 4)
+ return 0;
+
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrx, AArch64::SUBXrx },
{ AArch64::ADDWrx, AArch64::ADDXrx } },
@@ -1542,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
return ResultReg;
// Check if the mul can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1558,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(MulLHS);
- return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftVal);
+ ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ if (ResultReg)
+ return ResultReg;
}
+ }
// Check if the shift can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
uint64_t ShiftVal = C->getZExtValue();
@@ -1571,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftVal);
+ ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ if (ResultReg)
+ return ResultReg;
}
+ }
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
@@ -1646,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
{ AArch64::ORRWrs, AArch64::ORRXrs },
{ AArch64::EORWrs, AArch64::EORXrs }
};
+
+ // Don't deal with undefined shifts.
+ if (ShiftImm >= RetVT.getSizeInBits())
+ return 0;
+
const TargetRegisterClass *RC;
unsigned Opc;
switch (RetVT.SimpleTy) {
@@ -2235,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
MIB.addImm(TestBit);
MIB.addMBB(TBB);
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
- fastEmitBranch(FBB, DbgLoc);
-
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -2257,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && isValueAvailable(CI)) {
// Try to optimize or fold the cmp.
@@ -2289,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
// instruction.
- CC = getCompareCC(Predicate);
+ AArch64CC::CondCode CC = getCompareCC(Predicate);
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
switch (Predicate) {
default:
@@ -2317,52 +2334,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
.addImm(CC)
.addMBB(TBB);
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
- return true;
- }
- } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
- MVT SrcVT;
- if (TI->hasOneUse() && isValueAvailable(TI) &&
- isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
- unsigned CondReg = getRegForValue(TI->getOperand(0));
- if (!CondReg)
- return false;
- bool CondIsKill = hasTrivialKill(TI->getOperand(0));
-
- // Issue an extract_subreg to get the lower 32-bits.
- if (SrcVT == MVT::i64) {
- CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
- AArch64::sub_32);
- CondIsKill = true;
- }
-
- unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
- assert(ANDReg && "Unexpected AND instruction emission failure.");
- emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
-
- if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
- std::swap(TBB, FBB);
- CC = AArch64CC::EQ;
- }
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
- .addMBB(TBB);
-
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
@@ -2371,34 +2343,31 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
- // Obtain the branch weight and add the target to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- Target->getBasicBlock());
- FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+ // Obtain the branch probability and add the target to the successor list.
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ BI->getParent(), Target->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(Target, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(Target);
return true;
- } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
- // Fake request the condition, otherwise the intrinsic might be completely
- // optimized away.
- unsigned CondReg = getRegForValue(BI->getCondition());
- if (!CondReg)
- return false;
-
- // Emit the branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
- .addMBB(TBB);
+ } else {
+ AArch64CC::CondCode CC = AArch64CC::NE;
+ if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (!CondReg)
+ return false;
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+ // Emit the branch.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- return true;
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
+ }
}
unsigned CondReg = getRegForValue(BI->getCondition());
@@ -2406,32 +2375,22 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
return false;
bool CondRegIsKill = hasTrivialKill(BI->getCondition());
- // We've been divorced from our compare! Our block was split, and
- // now our compare lives in a predecessor block. We musn't
- // re-compare here, as the children of the compare aren't guaranteed
- // live across the block boundary (we *could* check for this).
- // Regardless, the compare has been done in the predecessor block,
- // and it left a value for us in a virtual register. Ergo, we test
- // the one-bit value left in the virtual register.
- emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
-
+ // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
+ unsigned Opcode = AArch64::TBNZW;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
- CC = AArch64CC::EQ;
+ Opcode = AArch64::TBZW;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
+ const MCInstrDesc &II = TII.get(Opcode);
+ unsigned ConstrainedCondReg
+ = constrainOperandRegClass(II, CondReg, II.getNumDefs());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
+ .addImm(0)
.addMBB(TBB);
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -2447,8 +2406,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
// Make sure the CFG is up-to-date.
- for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
+ for (auto *Succ : BI->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
return true;
}
@@ -2456,6 +2415,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
bool AArch64FastISel::selectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
+ // Vectors of i1 are weird: bail out.
+ if (CI->getType()->isVectorTy())
+ return false;
+
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
unsigned ResultReg = 0;
@@ -2954,8 +2917,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
.addImm(NumBytes);
// Process the args.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (CCValAssign &VA : ArgLocs) {
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
MVT ArgVT = OutVTs[VA.getValNo()];
@@ -3018,8 +2980,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(Addr.getOffset()),
- MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+ MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (!emitStore(ArgVT, ArgReg, Addr, MMO))
return false;
@@ -3318,8 +3280,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
return false;
// Make sure nothing is in the way
- BasicBlock::const_iterator Start = I;
- BasicBlock::const_iterator End = II;
+ BasicBlock::const_iterator Start(I);
+ BasicBlock::const_iterator End(II);
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
@@ -3684,6 +3646,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (F.isVarArg())
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
@@ -3763,8 +3728,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::RET_ReallyLR));
- for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
- MIB.addReg(RetRegs[i], RegState::Implicit);
+ for (unsigned RetReg : RetRegs)
+ MIB.addReg(RetReg, RegState::Implicit);
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a76473f..11ae800 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -72,9 +72,9 @@
//
// For most functions, some of the frame areas are empty. For those functions,
// it may not be necessary to set up fp or bp:
-// * A base pointer is definitly needed when there are both VLAs and local
+// * A base pointer is definitely needed when there are both VLAs and local
// variables with more-than-default alignment requirements.
-// * A frame pointer is definitly needed when there are local variables with
+// * A frame pointer is definitely needed when there are local variables with
// more-than-default alignment requirements.
//
// In some cases when a base pointer is not strictly needed, it is generated
@@ -216,11 +216,11 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout *TD = MF.getTarget().getDataLayout();
+ const DataLayout &TD = MF.getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD->getPointerSize(0);
+ int stackGrowth = -TD.getPointerSize(0);
// Calculate offsets.
int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
@@ -280,14 +280,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
bool HasFP = hasFP(MF);
- DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -354,7 +357,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes && NeedsRealignment) {
// Use the first callee-saved register as a scratch register.
scratchSPReg = AArch64::X9;
- MF.getRegInfo().setPhysRegUsed(scratchSPReg);
}
// If we're a leaf function, try using the red zone.
@@ -400,8 +402,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (needsFrameMoves) {
- const DataLayout *TD = MF.getTarget().getDataLayout();
- const int StackGrowth = -TD->getPointerSize(0);
+ const DataLayout &TD = MF.getDataLayout();
+ const int StackGrowth = -TD.getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
@@ -513,33 +515,33 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
return false;
}
-static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
+/// Checks whether the given instruction restores callee save registers
+/// and if so returns how many.
+static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) {
unsigned RtIdx = 0;
- if (MI->getOpcode() == AArch64::LDPXpost ||
- MI->getOpcode() == AArch64::LDPDpost)
+ switch (MI.getOpcode()) {
+ case AArch64::LDPXpost:
+ case AArch64::LDPDpost:
RtIdx = 1;
-
- if (MI->getOpcode() == AArch64::LDPXpost ||
- MI->getOpcode() == AArch64::LDPDpost ||
- MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
- if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
- !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
- MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
- return false;
- return true;
+ // FALLTHROUGH
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) ||
+ !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) ||
+ MI.getOperand(RtIdx + 2).getReg() != AArch64::SP)
+ return 0;
+ return 2;
}
-
- return false;
+ return 0;
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
- const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool IsTailCallReturn = false;
if (MBB.end() != MBBI) {
@@ -585,7 +587,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// ---------------------| --- |
// | | | |
// | CalleeSavedReg | | |
- // | (NumRestores * 16) | | |
+ // | (NumRestores * 8) | | |
// | | | |
// ---------------------| | NumBytes
// | | StackSize (StackAdjustUp)
@@ -606,17 +608,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
- if (LastPopI != MBB.begin()) {
- do {
- ++NumRestores;
- --LastPopI;
- } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
- if (!isCSRestore(LastPopI, CSRegs)) {
+ MachineBasicBlock::iterator Begin = MBB.begin();
+ while (LastPopI != Begin) {
+ --LastPopI;
+ unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
+ NumRestores += Restores;
+ if (Restores == 0) {
++LastPopI;
- --NumRestores;
+ break;
}
}
- NumBytes -= NumRestores * 16;
+ NumBytes -= NumRestores * 8;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (!hasFP(MF)) {
@@ -634,15 +636,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be able to save any instructions.
if (NumBytes || MFI->hasVarSizedObjects())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
+ -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -739,9 +733,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
-
for (unsigned i = 0; i < Count; i += 2) {
unsigned idx = Count - i - 2;
unsigned Reg1 = CSI[idx].getReg();
@@ -911,7 +902,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
+ DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Check pairs of consecutive callee-saved registers.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 731f031..427afdf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -37,7 +37,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
@@ -61,6 +60,11 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
+
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 772e894..6c86888 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -34,7 +34,6 @@ using namespace llvm;
namespace {
class AArch64DAGToDAGISel : public SelectionDAGISel {
- AArch64TargetMachine &TM;
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -45,7 +44,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
public:
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
ForCodeSize(false) {}
const char *getPassName() const override {
@@ -53,9 +52,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- ForCodeSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = MF.getFunction()->optForSize();
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -79,6 +76,21 @@ public:
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
return SelectShiftedRegister(N, true, Reg, Shift);
}
+ bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+ }
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed(N, 1, Base, OffImm);
}
@@ -153,8 +165,7 @@ public:
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
-
- SDNode *SelectLIBM(SDNode *N);
+ SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
SDNode *SelectReadRegister(SDNode *N);
SDNode *SelectWriteRegister(SDNode *N);
@@ -165,6 +176,8 @@ public:
private:
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
SDValue &Shift);
+ bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
SDValue &OffImm);
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
@@ -422,7 +435,7 @@ static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
return true;
}
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
+// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
// high lane extract.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
SDValue &LaneOp, int &LaneIdx) {
@@ -572,7 +585,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
}
// AArch64 mandates that the RHS of the operation must use the smallest
- // register classs that could contain the size being extended from. Thus,
+ // register class that could contain the size being extended from. Thus,
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
// there might not be an actual 32-bit value in the program. We can
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
@@ -587,7 +600,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
/// need to create a real ADD instruction from it anyway and there's no point in
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
-/// leads to duplaicated ADRP instructions.
+/// leads to duplicated ADRP instructions.
static bool isWorthFoldingADDlow(SDValue N) {
for (auto Use : N->uses()) {
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
@@ -604,6 +617,51 @@ static bool isWorthFoldingADDlow(SDValue N) {
return true;
}
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address. The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
+ const TargetLowering *TLI = getTargetLowering();
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+ }
+
+ // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+ // selected here doesn't support labels/immediates, only base+offset.
+
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t RHSC = RHS->getSExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
+ RHSC < (0x40 << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+ return true;
+ }
+ }
+ }
+
+ // Base only. The address will be materialized into a register before
+ // the memory is accessed.
+ // add x0, Xbase, #offset
+ // stp x1, x2, [x0]
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
@@ -867,7 +925,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
if (isa<ConstantSDNode>(RHS)) {
int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
unsigned Scale = Log2_32(Size);
- // Skip the immediate can be seleced by load/store addressing mode.
+ // Skip the immediate can be selected by load/store addressing mode.
// Also skip the immediate can be encoded by a single ADD (SUB is also
// checked by using -ImmOff).
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
@@ -1034,6 +1092,8 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
// it into an i64.
DstVT = MVT::i32;
}
+ } else if (VT == MVT::f16) {
+ Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
} else if (VT == MVT::f32) {
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
} else if (VT == MVT::f64 || VT.is64BitVector()) {
@@ -1222,8 +1282,8 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue SuperReg = SDValue(Ld, 0);
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
- AArch64::qsub3 };
+ static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
for (unsigned i = 0; i < NumVecs; ++i) {
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
if (Narrow)
@@ -1275,8 +1335,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
} else {
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
- AArch64::qsub3 };
+ static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
for (unsigned i = 0; i < NumVecs; ++i) {
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
SuperReg);
@@ -1420,7 +1480,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
// The resulting code will be at least as good as the original one
// plus it may expose more opportunities for bitfield insert pattern.
// FIXME: Currently we limit this to the bigger pattern, because
- // some optimizations expect AND and not UBFM
+ // some optimizations expect AND and not UBFM.
Opd0 = N->getOperand(0);
} else
return false;
@@ -1852,6 +1912,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
/// Does this tree qualify as an attempt to move a bitfield into position,
/// essentially "(and (shl VAL, N), Mask)".
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+ bool BiggerPattern,
SDValue &Src, int &ShiftAmount,
int &MaskWidth) {
EVT VT = Op.getValueType();
@@ -1874,6 +1935,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
Op = Op.getOperand(0);
}
+ // Don't match if the SHL has more than one use, since then we'll end up
+ // generating SHL+UBFIZ instead of just keeping SHL+AND.
+ if (!BiggerPattern && !Op.hasOneUse())
+ return false;
+
uint64_t ShlImm;
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
return false;
@@ -1887,7 +1953,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
- // amount.
+ // amount. BiggerPattern is true when this pattern is being matched for BFI,
+ // BiggerPattern is false when this pattern is being matched for UBFIZ, in
+ // which case it is not profitable to insert an extra shift.
+ if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
+ return false;
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
return true;
@@ -1904,7 +1974,8 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
SDValue &Src, unsigned &ImmR,
- unsigned &ImmS, SelectionDAG *CurDAG) {
+ unsigned &ImmS, const APInt &UsefulBits,
+ SelectionDAG *CurDAG) {
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
// Set Opc
@@ -1918,23 +1989,30 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
// Because of simplify-demanded-bits in DAGCombine, involved masks may not
// have the expected shape. Try to undo that.
- APInt UsefulBits;
- getUsefulBits(SDValue(N, 0), UsefulBits);
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
- // OR is commutative, check both possibilities (does llvm provide a
- // way to do that directely, e.g., via code matcher?)
- SDValue OrOpd1Val = N->getOperand(1);
- SDNode *OrOpd0 = N->getOperand(0).getNode();
- SDNode *OrOpd1 = N->getOperand(1).getNode();
- for (int i = 0; i < 2;
- ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+ // OR is commutative, check all combinations of operand order and values of
+ // BiggerPattern, i.e.
+ // Opd0, Opd1, BiggerPattern=false
+ // Opd1, Opd0, BiggerPattern=false
+ // Opd0, Opd1, BiggerPattern=true
+ // Opd1, Opd0, BiggerPattern=true
+ // Several of these combinations may match, so check with BiggerPattern=false
+ // first since that will produce better results by matching more instructions
+ // and/or inserting fewer extra instructions.
+ for (int I = 0; I < 4; ++I) {
+
+ bool BiggerPattern = I / 2;
+ SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
+ SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
+ SDNode *OrOpd1 = OrOpd1Val.getNode();
+
unsigned BFXOpc;
int DstLSB, Width;
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
- NumberOfIgnoredLowBits, true)) {
+ NumberOfIgnoredLowBits, BiggerPattern)) {
// Check that the returned opcode is compatible with the pattern,
// i.e., same type and zero extended (U and not S)
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
@@ -1952,8 +2030,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
// If the mask on the insertee is correct, we have a BFXIL operation. We
// can share the ImmR and ImmS values from the already-computed UBFM.
- } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
- DstLSB, Width)) {
+ } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
+ BiggerPattern,
+ Src, DstLSB, Width)) {
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
ImmS = Width - 1;
} else
@@ -2003,11 +2082,18 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
unsigned Opc;
unsigned LSB, MSB;
SDValue Opd0, Opd1;
+ EVT VT = N->getValueType(0);
+ APInt NUsefulBits;
+ getUsefulBits(SDValue(N, 0), NUsefulBits);
+
+ // If all bits are not useful, just return UNDEF.
+ if (!NUsefulBits)
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);
- if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
+ if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,
+ CurDAG))
return nullptr;
- EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue Ops[] = { Opd0,
Opd1,
@@ -2016,58 +2102,37 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
-SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
+/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
+/// equivalent of a left shift by a constant amount followed by an and masking
+/// out a contiguous set of bits.
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
+ if (N->getOpcode() != ISD::AND)
+ return nullptr;
+
EVT VT = N->getValueType(0);
- unsigned Variant;
unsigned Opc;
- unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
-
- if (VT == MVT::f32) {
- Variant = 0;
- } else if (VT == MVT::f64) {
- Variant = 1;
- } else
- return nullptr; // Unrecognized argument type. Fall back on default codegen.
-
- // Pick the FRINTX variant needed to set the flags.
- unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
- switch (N->getOpcode()) {
- default:
- return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
- case ISD::FCEIL: {
- unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
- Opc = FRINTPOpcs[Variant];
- break;
- }
- case ISD::FFLOOR: {
- unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
- Opc = FRINTMOpcs[Variant];
- break;
- }
- case ISD::FTRUNC: {
- unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
- Opc = FRINTZOpcs[Variant];
- break;
- }
- case ISD::FROUND: {
- unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
- Opc = FRINTAOpcs[Variant];
- break;
- }
- }
+ if (VT == MVT::i32)
+ Opc = AArch64::UBFMWri;
+ else if (VT == MVT::i64)
+ Opc = AArch64::UBFMXri;
+ else
+ return nullptr;
- SDLoc dl(N);
- SDValue In = N->getOperand(0);
- SmallVector<SDValue, 2> Ops;
- Ops.push_back(In);
+ SDValue Op0;
+ int DstLSB, Width;
+ if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
+ Op0, DstLSB, Width))
+ return nullptr;
- if (!TM.Options.UnsafeFPMath) {
- SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
- Ops.push_back(SDValue(FRINTX, 1));
- }
+ // ImmR is the rotate right amount.
+ unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+ // ImmS is the most significant bit of the source to be moved.
+ unsigned ImmS = Width - 1;
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+ SDLoc DL(N);
+ SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
+ CurDAG->getTargetConstant(ImmS, DL, VT)};
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
bool
@@ -2119,7 +2184,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
// into a single value to be used in the MRS/MSR instruction.
static int getIntOperandFromRegisterString(StringRef RegString) {
SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ":");
+ RegString.split(Fields, ':');
if (Fields.size() == 1)
return -1;
@@ -2206,7 +2271,15 @@ SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
assert (isa<ConstantSDNode>(N->getOperand(2))
&& "Expected a constant integer expression.");
uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
+ unsigned State;
+ if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+ assert(Immed < 2 && "Bad imm");
+ State = AArch64::MSRpstateImm1;
+ } else {
+ assert(Immed < 16 && "Bad imm");
+ State = AArch64::MSRpstateImm4;
+ }
+ return CurDAG->getMachineNode(State, DL, MVT::Other,
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
CurDAG->getTargetConstant(Immed, DL, MVT::i16),
N->getOperand(0));
@@ -2279,6 +2352,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case ISD::SRA:
if (SDNode *I = SelectBitfieldExtractOp(Node))
return I;
+ if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
+ return I;
break;
case ISD::OR:
@@ -2802,6 +2877,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
}
}
+ break;
}
case AArch64ISD::LD2post: {
if (VT == MVT::v8i8)
@@ -3214,14 +3290,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
break;
}
-
- case ISD::FCEIL:
- case ISD::FFLOOR:
- case ISD::FTRUNC:
- case ISD::FROUND:
- if (SDNode *I = SelectLIBM(Node))
- return I;
- break;
}
// Select the default instruction
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3e8f46c..9f5beff 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -40,23 +40,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
-namespace {
-enum AlignMode {
- StrictAlign,
- NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
- cl::Hidden, cl::init(NoStrictAlign),
- cl::values(
- clEnumValN(StrictAlign, "aarch64-strict-align",
- "Disallow all unaligned memory accesses"),
- clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
- "Allow unaligned memory accesses"),
- clEnumValEnd));
-
// Place holder until extr generation is tested fully.
static cl::opt<bool>
EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
@@ -76,6 +59,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
+/// Value type used for condition codes.
+static const MVT MVT_CC = MVT::i32;
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -210,11 +196,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- // Exception handling.
- // FIXME: These are guesses. Has this been defined yet?
- setExceptionPointerRegister(AArch64::X0);
- setExceptionSelectorRegister(AArch64::X1);
-
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
@@ -234,6 +215,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ }
// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
@@ -252,6 +237,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ }
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
@@ -315,6 +304,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
// v4f16 is also a storage-only type, so promote it to v4f32 when that is
// known to be safe.
@@ -403,10 +394,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
+ setOperationAction(ISD::FMINNUM, Ty, Legal);
+ setOperationAction(ISD::FMAXNUM, Ty, Legal);
+ setOperationAction(ISD::FMINNAN, Ty, Legal);
+ setOperationAction(ISD::FMAXNAN, Ty, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
+ // This requires the Performance Monitors extension.
+ if (Subtarget->hasPerfMon())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+
if (Subtarget->isTargetMachO()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret to avoid memory
@@ -456,12 +456,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
+ setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
+ setIndexedStoreAction(im, MVT::f16, Legal);
}
// Trap.
@@ -479,6 +481,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
+
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::ANY_EXTEND);
@@ -487,16 +493,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
+ if (Subtarget->supportsAddressTopByteIgnored())
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
@@ -512,10 +520,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(2);
- RequireStrictAlign = (Align == StrictAlign);
-
setHasExtractBitsInsn(true);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
@@ -646,6 +654,9 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
+
+ // But we do support custom-lowering for FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
@@ -686,6 +697,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+ // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
+ if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16)
+ for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
+ ISD::FMINNUM, ISD::FMAXNUM})
+ setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -730,7 +747,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
break;
}
case ISD::INTRINSIC_W_CHAIN: {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default: return;
@@ -780,6 +797,34 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
return MVT::i64;
}
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const {
+ if (Subtarget->requiresStrictAlign())
+ return false;
+
+ // FIXME: This is mostly true for Cyclone, but not necessarily others.
+ if (Fast) {
+ // FIXME: Define an attribute for slow unaligned accesses instead of
+ // relying on the CPU type as a proxy.
+ // On Cyclone, unaligned 128-bit stores are slow.
+ *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
+ // See comments in performSTORECombine() for more details about
+ // these conditions.
+
+ // Code that uses clang vector extensions can mark that it
+ // wants unaligned accesses to be treated as fast by
+ // underspecifying alignment to be 1 or 2.
+ Align <= 2 ||
+
+ // Disregard v2i64. Memcpy lowering produces those and splitting
+ // them regresses performance on micro-benchmarks and olden/bh.
+ VT == MVT::v2i64;
+ }
+ return true;
+}
+
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
@@ -809,9 +854,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
+ case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
+ case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
+ case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
- case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
- case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
case AArch64ISD::DUP: return "AArch64ISD::DUP";
case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
@@ -931,8 +977,7 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
- MachineFunction::iterator It = MBB;
- ++It;
+ MachineFunction::iterator It = ++MBB->getIterator();
unsigned DestReg = MI->getOperand(0).getReg();
unsigned IfTrueReg = MI->getOperand(1).getReg();
@@ -1141,8 +1186,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
- if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
- cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
@@ -1156,8 +1200,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// the absence of information about op2.
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
- } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
!isUnsignedIntSetCC(CC)) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
@@ -1167,14 +1210,230 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
LHS = LHS.getOperand(0);
}
- return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
+ return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
+/// \defgroup AArch64CCMP CMP;CCMP matching
+///
+/// These functions deal with the formation of CMP;CCMP;... sequences.
+/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
+/// a comparison. They set the NZCV flags to a predefined value if their
+/// predicate is false. This allows to express arbitrary conjunctions, for
+/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
+/// expressed as:
+/// cmp A
+/// ccmp B, inv(CB), CA
+/// check for CB flags
+///
+/// In general we can create code for arbitrary "... (and (and A B) C)"
+/// sequences. We can also implement some "or" expressions, because "(or A B)"
+/// is equivalent to "not (and (not A) (not B))" and we can implement some
+/// negation operations:
+/// We can negate the results of a single comparison by inverting the flags
+/// used when the predicate fails and inverting the flags tested in the next
+/// instruction; We can also negate the results of the whole previous
+/// conditional compare sequence by inverting the flags tested in the next
+/// instruction. However there is no way to negate the result of a partial
+/// sequence.
+///
+/// Therefore on encountering an "or" expression we can negate the subtree on
+/// one side and have to be able to push the negate to the leafs of the subtree
+/// on the other side (see also the comments in code). As complete example:
+/// "or (or (setCA (cmp A)) (setCB (cmp B)))
+/// (and (setCC (cmp C)) (setCD (cmp D)))"
+/// is transformed to
+/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
+/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
+/// and implemented as:
+/// cmp C
+/// ccmp D, inv(CD), CC
+/// ccmp A, CA, inv(CD)
+/// ccmp B, CB, inv(CA)
+/// check for CB flags
+/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
+/// by conditional compare sequences.
+/// @{
+
+/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
+static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue CCOp,
+ SDValue Condition, unsigned NZCV,
+ SDLoc DL, SelectionDAG &DAG) {
+ unsigned Opcode = 0;
+ if (LHS.getValueType().isFloatingPoint())
+ Opcode = AArch64ISD::FCCMP;
+ else if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = RHS.getOperand(0);
+ if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // See emitComparison() on why we can only do this for SETEQ and SETNE.
+ Opcode = AArch64ISD::CCMN;
+ RHS = RHS.getOperand(1);
+ }
+ }
+ if (Opcode == 0)
+ Opcode = AArch64ISD::CCMP;
+
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+}
+
+/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
+/// CanPushNegate is set to true if we can push a negate operation through
+/// the tree in a was that we are left with AND operations and negate operations
+/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
+/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
+/// brought into such a form.
+static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
+ unsigned Depth = 0) {
+ if (!Val.hasOneUse())
+ return false;
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ CanPushNegate = true;
+ return true;
+ }
+ // Protect against stack overflow.
+ if (Depth > 15)
+ return false;
+ if (Opcode == ISD::AND || Opcode == ISD::OR) {
+ SDValue O0 = Val->getOperand(0);
+ SDValue O1 = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
+ return false;
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
+ return false;
+ // We cannot push a negate through an AND operation (it would become an OR),
+ // we can however change a (not (or x y)) to (and (not x) (not y)) if we can
+ // push the negate through the x/y subtrees.
+ CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
+ return true;
+ }
+ return false;
+}
+
+/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
+/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
+/// Tries to transform the given i1 producing node @p Val to a series compare
+/// and conditional compare operations. @returns an NZCV flags producing node
+/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
+/// transformation was not possible.
+/// On recursive invocations @p PushNegate may be set to true to have negation
+/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
+/// for the comparisons in the current subtree; @p Depth limits the search
+/// depth to avoid stack overflow.
+static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
+ AArch64CC::CondCode &OutCC, bool PushNegate = false,
+ SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
+ unsigned Depth = 0) {
+ // We're at a tree leaf, produce a conditional comparison operation.
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
+ bool isInteger = LHS.getValueType().isInteger();
+ if (PushNegate)
+ CC = getSetCCInverse(CC, isInteger);
+ SDLoc DL(Val);
+ // Determine OutCC and handle FP special case.
+ if (isInteger) {
+ OutCC = changeIntCCToAArch64CC(CC);
+ } else {
+ assert(LHS.getValueType().isFloatingPoint());
+ AArch64CC::CondCode ExtraCC;
+ changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
+ // Surpisingly some floating point conditions can't be tested with a
+ // single condition code. Construct an additional comparison in this case.
+ // See comment below on how we deal with OR conditions.
+ if (ExtraCC != AArch64CC::AL) {
+ SDValue ExtraCmp;
+ if (!CCOp.getNode())
+ ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
+ else {
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+ // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
+ ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
+ NZCV, DL, DAG);
+ }
+ CCOp = ExtraCmp;
+ Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ }
+ }
+
+ // Produce a normal comparison if we are first in the chain
+ if (!CCOp.getNode())
+ return emitComparison(LHS, RHS, CC, DL, DAG);
+ // Otherwise produce a ccmp.
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+ AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+ return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
+ DAG);
+ } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse())
+ return SDValue();
+
+ assert((Opcode == ISD::OR || !PushNegate)
+ && "Can only push negate through OR operation");
+
+ // Check if both sides can be transformed.
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
+ return SDValue();
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
+ return SDValue();
+
+ // Do we need to negate our operands?
+ bool NegateOperands = Opcode == ISD::OR;
+ // We can negate the results of all previous operations by inverting the
+ // predicate flags giving us a free negation for one side. For the other side
+ // we need to be able to push the negation to the leafs of the tree.
+ if (NegateOperands) {
+ if (!CanPushNegateL && !CanPushNegateR)
+ return SDValue();
+ // Order the side where we can push the negate through to LHS.
+ if (!CanPushNegateL && CanPushNegateR)
+ std::swap(LHS, RHS);
+ } else {
+ bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
+ bool NeedsNegOutR = RHS->getOpcode() == ISD::OR;
+ if (NeedsNegOutL && NeedsNegOutR)
+ return SDValue();
+ // Order the side where we need to negate the output flags to RHS so it
+ // gets emitted first.
+ if (NeedsNegOutL)
+ std::swap(LHS, RHS);
+ }
+
+ // Emit RHS. If we want to negate the tree we only need to push a negate
+ // through if we are already in a PushNegate case, otherwise we can negate
+ // the "flags to test" afterwards.
+ AArch64CC::CondCode RHSCC;
+ SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
+ CCOp, Predicate, Depth+1);
+ if (NegateOperands && !PushNegate)
+ RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+ // Emit LHS. We must push the negate through if we need to negate it.
+ SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
+ CmpR, RHSCC, Depth+1);
+ // If we transformed an OR to and AND then we have to negate the result
+ // (or absorb a PushNegate resulting in a double negation).
+ if (Opcode == ISD::OR && !PushNegate)
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ return CmpL;
+}
+
+/// @}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
- SDValue Cmp;
- AArch64CC::CondCode AArch64CC;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
@@ -1229,47 +1488,56 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
- // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
- // For the i8 operand, the largest immediate is 255, so this can be easily
- // encoded in the compare instruction. For the i16 operand, however, the
- // largest immediate cannot be encoded in the compare.
- // Therefore, use a sign extending load and cmn to avoid materializing the -1
- // constant. For example,
- // movz w1, #65535
- // ldrh w0, [x0, #0]
- // cmp w0, w1
- // >
- // ldrsh w0, [x0, #0]
- // cmn w0, #1
- // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
- // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
- // both the LHS and RHS are truely zero extended and to make sure the
- // transformation is profitable.
+ SDValue Cmp;
+ AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
- if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
- isa<LoadSDNode>(LHS)) {
- if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
- cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
- LHS.getNode()->hasNUsesOfValue(1, 0)) {
- int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
- if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
- SDValue SExt =
- DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
- DAG.getValueType(MVT::i16));
- Cmp = emitComparison(SExt,
- DAG.getConstant(ValueofRHS, dl,
- RHS.getValueType()),
- CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
- return Cmp;
- }
+ const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
+
+ // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+ // For the i8 operand, the largest immediate is 255, so this can be easily
+ // encoded in the compare instruction. For the i16 operand, however, the
+ // largest immediate cannot be encoded in the compare.
+ // Therefore, use a sign extending load and cmn to avoid materializing the
+ // -1 constant. For example,
+ // movz w1, #65535
+ // ldrh w0, [x0, #0]
+ // cmp w0, w1
+ // >
+ // ldrsh w0, [x0, #0]
+ // cmn w0, #1
+ // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+ // if and only if (sext LHS) == (sext RHS). The checks are in place to
+ // ensure both the LHS and RHS are truly zero extended and to make sure the
+ // transformation is profitable.
+ if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
+ cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+ LHS.getNode()->hasNUsesOfValue(1, 0)) {
+ int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+ DAG.getValueType(MVT::i16));
+ Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
+ RHS.getValueType()),
+ CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
}
}
+
+ if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+ if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
+ if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+ AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
+ }
+ }
+ }
+
+ if (!Cmp) {
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
}
- Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
@@ -1391,8 +1659,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
- SDLoc(Op)).first;
+ return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
}
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
@@ -1571,8 +1838,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
@@ -1581,6 +1848,16 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
+ unsigned NumElts = InVT.getVectorNumElements();
+
+ // f16 vectors are promoted to f32 before a conversion.
+ if (InVT.getVectorElementType() == MVT::f16) {
+ MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
+ SDLoc dl(Op);
+ return DAG.getNode(
+ Op.getOpcode(), dl, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
+ }
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
@@ -1628,8 +1905,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
- SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -1931,6 +2207,31 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
+SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDLoc dl(Op);
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::aarch64_thread_pointer: {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::aarch64_neon_smax:
+ return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_neon_umax:
+ return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_neon_smin:
+ return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_neon_umin:
+ return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2032,14 +2333,11 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFSINCOS(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return LowerINTRINSIC_WO_CHAIN(Op, DAG);
}
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
- return 2;
-}
-
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -2214,9 +2512,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- MemVT, false, false, false, 0);
+ ArgValue = DAG.getExtLoad(
+ ExtType, DL, VA.getLocVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MemVT, false, false, false, 0);
InVals.push_back(ArgValue);
}
@@ -2289,9 +2588,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 8), false, false, 0);
+ SDValue Store = DAG.getStore(
+ Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8), false,
+ false, 0);
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -2318,9 +2618,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 16), false, false, 0);
+ SDValue Store = DAG.getStore(
+ Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16),
+ false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
@@ -2453,8 +2754,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
- if (!ArgLocs[i].isRegLoc())
+ for (const CCValAssign &ArgLoc : ArgLocs)
+ if (!ArgLoc.isRegLoc())
return false;
}
@@ -2758,7 +3059,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
- DstInfo = MachinePointerInfo::getFixedStack(FI);
+ DstInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
@@ -2768,7 +3070,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
- DstInfo = MachinePointerInfo::getStack(LocMemOffset);
+ DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
+ LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -2802,9 +3105,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
+ for (auto &RegToPass : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
+ RegToPass.second, InFlag);
InFlag = Chain.getValue(1);
}
@@ -2860,9 +3163,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add argument registers to the end of the list so that they are known live
// into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+ for (auto &RegToPass : RegsToPass)
+ Ops.push_back(DAG.getRegister(RegToPass.first,
+ RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
@@ -2968,6 +3271,19 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (AArch64::GPR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else if (AArch64::FPR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
RetOps[0] = Chain; // Update chain.
@@ -3010,11 +3326,12 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags);
SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
- SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr,
- MachinePointerInfo::getConstantPool(),
- /*isVolatile=*/ false,
- /*isNonTemporal=*/ true,
- /*isInvariant=*/ true, 8);
+ SDValue GlobalAddr = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ /*isVolatile=*/false,
+ /*isNonTemporal=*/true,
+ /*isInvariant=*/true, 8);
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
DAG.getConstant(GN->getOffset(), DL, PtrVT));
@@ -3087,8 +3404,9 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet =
- DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
- false, true, true, 8);
+ DAG.getLoad(MVT::i64, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), false,
+ true, true, 8);
Chain = FuncTLVGet.getValue(1);
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -3160,6 +3478,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
Model = TLSModel::GeneralDynamic;
@@ -3277,8 +3599,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
unsigned Opc = LHS.getOpcode();
- if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->isOne() &&
+ if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
@@ -3392,17 +3713,11 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue In1 = Op.getOperand(0);
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
- if (SrcVT != VT) {
- if (SrcVT == MVT::f32 && VT == MVT::f64)
- In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
- else if (SrcVT == MVT::f64 && VT == MVT::f32)
- In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
- DAG.getIntPtrConstant(0, DL));
- else
- // FIXME: Src type is different, bail out for now. Can VT really be a
- // vector type?
- return SDValue();
- }
+
+ if (SrcVT.bitsLT(VT))
+ In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
+ else if (SrcVT.bitsGT(VT))
+ In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
EVT VecVT;
EVT EltVT;
@@ -3410,7 +3725,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
- VecVT = MVT::v4i32;
+ VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
EltMask = 0x80000000ULL;
if (!VT.isVector()) {
@@ -3571,32 +3886,6 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
}
-/// A SELECT_CC operation is really some kind of max or min if both values being
-/// compared are, in some sense, equal to the results in either case. However,
-/// it is permissible to compare f32 values and produce directly extended f64
-/// values.
-///
-/// Extending the comparison operands would also be allowed, but is less likely
-/// to happen in practice since their use is right here. Note that truncate
-/// operations would *not* be semantically equivalent.
-static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
- if (Cmp == Result)
- return (Cmp.getValueType() == MVT::f32 ||
- Cmp.getValueType() == MVT::f64);
-
- ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
- ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
- if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
- Result.getValueType() == MVT::f64) {
- bool Lossy;
- APFloat CmpVal = CCmp->getValueAPF();
- CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy);
- return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
- }
-
- return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
-}
-
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue RHS, SDValue TVal,
SDValue FVal, SDLoc dl,
@@ -3614,7 +3903,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
}
}
- // Handle integers first.
+ // Also handle f16, for which we need to do a f32 comparison.
+ if (LHS.getValueType() == MVT::f16) {
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
+ RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
+ }
+
+ // Next, handle integers.
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
@@ -3637,9 +3932,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
} else if (TVal.getOpcode() == ISD::XOR) {
// If TVal is a NOT we want to swap TVal and FVal so that we can match
// with a CSINV rather than a CSEL.
- ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1));
-
- if (CVal && CVal->isAllOnesValue()) {
+ if (isAllOnesConstant(TVal.getOperand(1))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, true);
@@ -3647,9 +3940,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
} else if (TVal.getOpcode() == ISD::SUB) {
// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
// that we can match with a CSNEG rather than a CSEL.
- ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0));
-
- if (CVal && CVal->isNullValue()) {
+ if (isNullConstant(TVal.getOperand(0))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, true);
@@ -4109,46 +4400,57 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMcc;
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+
+ // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
+ // is "undef". We wanted 0, so CSEL it directly.
+ SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
+ ISD::SETEQ, dl, DAG);
+ SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
+ HiBitsForLo =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
+ HiBitsForLo, CCVal, Cmp);
+
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
- ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+ SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue LoForNormalShift =
+ DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
- SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
- SDValue Lo =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+ Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
+ dl, DAG);
+ CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+ SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+ SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
+ LoForNormalShift, CCVal, Cmp);
// AArch64 shifts larger than the register width are wrapped rather than
// clamped, so we can't just emit "hi >> x".
- SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue TrueValHi = Opc == ISD::SRA
- ? DAG.getNode(Opc, dl, VT, ShOpHi,
- DAG.getConstant(VTBits - 1, dl,
- MVT::i64))
- : DAG.getConstant(0, dl, VT);
- SDValue Hi =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
+ SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue HiForBigShift =
+ Opc == ISD::SRA
+ ? DAG.getNode(Opc, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, dl, MVT::i64))
+ : DAG.getConstant(0, dl, VT);
+ SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
+ HiForNormalShift, CCVal, Cmp);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
+
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i64 values and take a 2 x i64 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -4156,31 +4458,41 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMcc;
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+
+ // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
+ // is "undef". We wanted 0, so CSEL it directly.
+ SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
+ ISD::SETEQ, dl, DAG);
+ SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
+ LoBitsForHi =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
+ LoBitsForHi, CCVal, Cmp);
+
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
- SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+ SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue HiForNormalShift =
+ DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
- ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
- SDValue Hi =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
+ Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
+ dl, DAG);
+ CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
+ HiForNormalShift, CCVal, Cmp);
// AArch64 shifts of larger than register sizes are wrapped rather than
// clamped, so we can't just emit "lo << a" if a is too big.
- SDValue TrueValLo = DAG.getConstant(0, dl, VT);
- SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Lo =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+ SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
+ SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
+ LoForNormalShift, CCVal, Cmp);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -4362,8 +4674,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
// Validate and return a target constant for them if we can.
case 'z': {
// 'z' maps to xzr or wzr so it needs an input of 0.
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C || C->getZExtValue() != 0)
+ if (!isNullConstant(Op))
return;
if (Op.getValueType() == MVT::i64)
@@ -5653,11 +5964,10 @@ static SDValue NormalizeBuildVector(SDValue Op,
return Op;
SmallVector<SDValue, 16> Ops;
- for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
- SDValue Lane = Op.getOperand(I);
- if (Lane.getOpcode() == ISD::Constant) {
+ for (SDValue Lane : Op->ops()) {
+ if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
- cast<ConstantSDNode>(Lane)->getZExtValue());
+ CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
}
Ops.push_back(Lane);
@@ -5997,8 +6307,7 @@ FailedModImm:
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
- SDValue shuffle = ReconstructShuffle(Op, DAG);
- if (shuffle != SDValue())
+ if (SDValue shuffle = ReconstructShuffle(Op, DAG))
return shuffle;
}
@@ -6017,7 +6326,10 @@ FailedModImm:
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
// value is already in an S or D register.
- if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
+ // Do not do this for UNDEF/LOAD nodes because we have better patterns
+ // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
+ if (Op0.getOpcode() != ISD::UNDEF && Op0.getOpcode() != ISD::LOAD &&
+ (ElemSize == 32 || ElemSize == 64)) {
unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
MachineSDNode *N =
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
@@ -6123,24 +6435,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
unsigned Val = Cst->getZExtValue();
unsigned Size = Op.getValueType().getSizeInBits();
- if (Val == 0) {
- switch (Size) {
- case 8:
- return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 16:
- return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 32:
- return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 64:
- return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(),
- Op.getOperand(0));
- default:
- llvm_unreachable("Unexpected vector type in extract_subvector!");
- }
- }
+
+ // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
+ if (Val == 0)
+ return Op;
+
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
@@ -6213,26 +6512,20 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
}
/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation. For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-/// 1 <= |Value| <= ElementBits for a right shift; or
-/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
- int64_t &Cnt) {
+/// operand of a vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits for a right shift; or
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
- if (isIntrinsic)
- Cnt = -Cnt;
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
@@ -6261,8 +6554,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
case ISD::SRA:
case ISD::SRL:
// Right shift immediate
- if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
- Cnt < EltSize) {
+ if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
@@ -6451,7 +6743,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
@@ -6477,7 +6769,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
@@ -6720,10 +7012,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- // Skip illegal vector types.
- if (VecSize != 64 && VecSize != 128)
+ // Skip if we do not have NEON and skip illegal vector types.
+ if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
return false;
// A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -6806,10 +7098,10 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- // Skip illegal vector types.
- if (SubVecSize != 64 && SubVecSize != 128)
+ // Skip if we do not have NEON and skip illegal vector types.
+ if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
return false;
Value *Op0 = SVI->getOperand(0);
@@ -7228,8 +7520,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
- SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
- if (Res != SDValue())
+ if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
EVT VT = N->getValueType(0);
@@ -7242,7 +7533,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
// If the result of an integer load is only used by an integer-to-float
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
- // This eliminates an "integer-to-vector-move UOP and improve throughput.
+ // This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
@@ -7265,6 +7556,134 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Fold a floating-point multiply by power of two into floating-point to
+/// fixed-point conversion.
+static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue Op = N->getOperand(0);
+ if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ SDValue ConstVec = Op->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
+ if (FloatBits != 32 && FloatBits != 64)
+ return SDValue();
+
+ MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
+ if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+ return SDValue();
+
+ // Avoid conversions where iN is larger than the float (e.g., float -> i64).
+ if (IntBits > FloatBits)
+ return SDValue();
+
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t Bits = IntBits == 64 ? 64 : 32;
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
+ if (C == -1 || C == 0 || C > Bits)
+ return SDValue();
+
+ MVT ResTy;
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ switch (NumLanes) {
+ default:
+ return SDValue();
+ case 2:
+ ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+ break;
+ case 4:
+ ResTy = MVT::v4i32;
+ break;
+ }
+
+ SDLoc DL(N);
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
+ : Intrinsic::aarch64_neon_vcvtfp2fxu;
+ SDValue FixConv =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
+ DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
+ Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+ // We can handle smaller integers by generating an extra trunc.
+ if (IntBits < FloatBits)
+ FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
+
+ return FixConv;
+}
+
+/// Fold a floating-point divide by power of two into fixed-point to
+/// floating-point conversion.
+static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue Op = N->getOperand(0);
+ unsigned Opc = Op->getOpcode();
+ if (!Op.getValueType().isVector() ||
+ (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
+ return SDValue();
+
+ SDValue ConstVec = N->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+ int32_t IntBits = IntTy.getSizeInBits();
+ if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+ return SDValue();
+
+ MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ int32_t FloatBits = FloatTy.getSizeInBits();
+ if (FloatBits != 32 && FloatBits != 64)
+ return SDValue();
+
+ // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
+ if (IntBits > FloatBits)
+ return SDValue();
+
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
+ if (C == -1 || C == 0 || C > FloatBits)
+ return SDValue();
+
+ MVT ResTy;
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ switch (NumLanes) {
+ default:
+ return SDValue();
+ case 2:
+ ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+ break;
+ case 4:
+ ResTy = MVT::v4i32;
+ break;
+ }
+
+ SDLoc DL(N);
+ SDValue ConvInput = Op.getOperand(0);
+ bool IsSigned = Opc == ISD::SINT_TO_FP;
+ if (IntBits < FloatBits)
+ ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
+ ResTy, ConvInput);
+
+ unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
+ : Intrinsic::aarch64_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
+ DAG.getConstant(C, DL, MVT::i32));
+}
+
/// An EXTR instruction is made up of two shifts, ORed together. This helper
/// searches for and classifies those shifts.
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
@@ -7964,7 +8383,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
- break;
case Intrinsic::aarch64_neon_saddv:
return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
case Intrinsic::aarch64_neon_uaddv:
@@ -7978,10 +8396,16 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
- return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
- return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_fmaxnm:
+ return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_fminnm:
+ return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
@@ -8141,7 +8565,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
unsigned Alignment = std::min(OrigAlignment, EltOffset);
// Create scalar stores. This is at least as good as the code sequence for a
- // split unaligned store wich is a dup.s, ext.b, and two stores.
+ // split unaligned store which is a dup.s, ext.b, and two stores.
// Most of the time the three stores should be replaced by store pair
// instructions (stp).
SDLoc DL(St);
@@ -8162,10 +8586,9 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
return NewST1;
}
-static SDValue performSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG,
- const AArch64Subtarget *Subtarget) {
+static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
if (!DCI.isBeforeLegalize())
return SDValue();
@@ -8173,15 +8596,17 @@ static SDValue performSTORECombine(SDNode *N,
if (S->isVolatile())
return SDValue();
+ // FIXME: The logic for deciding if an unaligned store should be split should
+ // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
+ // a call to that function here.
+
// Cyclone has bad performance on unaligned 16B stores when crossing line and
// page boundaries. We want to split such stores.
if (!Subtarget->isCyclone())
return SDValue();
- // Don't split at Oz.
- MachineFunction &MF = DAG.getMachineFunction();
- bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
- if (IsMinSize)
+ // Don't split at -Oz.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
SDValue StVal = S->getValue();
@@ -8204,8 +8629,7 @@ static SDValue performSTORECombine(SDNode *N,
// If we get a splat of a scalar convert this vector store to a store of
// scalars. They will be merged into store pairs thereby removing two
// instructions.
- SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
- if (ReplacedSplat != SDValue())
+ if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S))
return ReplacedSplat;
SDLoc DL(S);
@@ -8326,6 +8750,299 @@ static SDValue performPostLD1Combine(SDNode *N,
return SDValue();
}
+/// Simplify \Addr given that the top byte of it is ignored by HW during
+/// address translation.
+static bool performTBISimplification(SDValue Addr,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ APInt DemandedMask = APInt::getLowBitsSet(64, 56);
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+ DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ return true;
+ }
+ return false;
+}
+
+static SDValue performSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ SDValue Split = split16BStores(N, DCI, DAG, Subtarget);
+ if (Split.getNode())
+ return Split;
+
+ if (Subtarget->supportsAddressTopByteIgnored() &&
+ performTBISimplification(N->getOperand(2), DCI, DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+ /// This function handles the log2-shuffle pattern produced by the
+/// LoopVectorizer for the across vector reduction. It consists of
+/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
+/// are reduced, where s is an induction variable from 0 to
+/// log2(NumVectorElements).
+static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
+ unsigned Op,
+ SelectionDAG &DAG) {
+ EVT VTy = OpV->getOperand(0).getValueType();
+ if (!VTy.isVector())
+ return SDValue();
+
+ int NumVecElts = VTy.getVectorNumElements();
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+ if (NumVecElts != 4)
+ return SDValue();
+ } else {
+ if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
+ return SDValue();
+ }
+
+ int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
+ SDValue PreOp = OpV;
+ // Iterate over each step of the across vector reduction.
+ for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
+ SDValue CurOp = PreOp.getOperand(0);
+ SDValue Shuffle = PreOp.getOperand(1);
+ if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // Try to swap the 1st and 2nd operand as add and min/max instructions
+ // are commutative.
+ CurOp = PreOp.getOperand(1);
+ Shuffle = PreOp.getOperand(0);
+ if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return SDValue();
+ }
+
+ // Check if the input vector is fed by the operator we want to handle,
+ // except the last step; the very first input vector is not necessarily
+ // the same operator we are handling.
+ if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
+ return SDValue();
+
+ // Check if it forms one step of the across vector reduction.
+ // E.g.,
+ // %cur = add %1, %0
+ // %shuffle = vector_shuffle %cur, <2, 3, u, u>
+ // %pre = add %cur, %shuffle
+ if (Shuffle.getOperand(0) != CurOp)
+ return SDValue();
+
+ int NumMaskElts = 1 << CurStep;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Shuffle)->getMask();
+ // Check mask values in each step.
+ // We expect the shuffle mask in each step follows a specific pattern
+ // denoted here by the <M, U> form, where M is a sequence of integers
+ // starting from NumMaskElts, increasing by 1, and the number integers
+ // in M should be NumMaskElts. U is a sequence of UNDEFs and the number
+ // of undef in U should be NumVecElts - NumMaskElts.
+ // E.g., for <8 x i16>, mask values in each step should be :
+ // step 0 : <1,u,u,u,u,u,u,u>
+ // step 1 : <2,3,u,u,u,u,u,u>
+ // step 2 : <4,5,6,7,u,u,u,u>
+ for (int i = 0; i < NumVecElts; ++i)
+ if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) ||
+ (i >= NumMaskElts && !(Mask[i] < 0)))
+ return SDValue();
+
+ PreOp = CurOp;
+ }
+ unsigned Opcode;
+ bool IsIntrinsic = false;
+
+ switch (Op) {
+ default:
+ llvm_unreachable("Unexpected operator for across vector reduction");
+ case ISD::ADD:
+ Opcode = AArch64ISD::UADDV;
+ break;
+ case ISD::SMAX:
+ Opcode = AArch64ISD::SMAXV;
+ break;
+ case ISD::UMAX:
+ Opcode = AArch64ISD::UMAXV;
+ break;
+ case ISD::SMIN:
+ Opcode = AArch64ISD::SMINV;
+ break;
+ case ISD::UMIN:
+ Opcode = AArch64ISD::UMINV;
+ break;
+ case ISD::FMAXNUM:
+ Opcode = Intrinsic::aarch64_neon_fmaxnmv;
+ IsIntrinsic = true;
+ break;
+ case ISD::FMINNUM:
+ Opcode = Intrinsic::aarch64_neon_fminnmv;
+ IsIntrinsic = true;
+ break;
+ }
+ SDLoc DL(N);
+
+ return IsIntrinsic
+ ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
+ DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
+ : DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
+ DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
+ DAG.getConstant(0, DL, MVT::i64));
+}
+
+/// Target-specific DAG combine for the across vector min/max reductions.
+/// This function specifically handles the final clean-up step of the vector
+/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
+/// pattern, which narrows down and finds the final min/max value from all
+/// elements of the vector.
+/// For example, for a <16 x i8> vector :
+/// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
+/// %smax0 = smax %arr, svn0
+/// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
+/// %smax1 = smax %smax0, %svn1
+/// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+/// %smax2 = smax %smax1, svn2
+/// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+/// %sc = setcc %smax2, %svn3, gt
+/// %n0 = extract_vector_elt %sc, #0
+/// %n1 = extract_vector_elt %smax2, #0
+/// %n2 = extract_vector_elt $smax2, #1
+/// %result = select %n0, %n1, n2
+/// becomes :
+/// %1 = smaxv %0
+/// %result = extract_vector_elt %1, 0
+static SDValue
+performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue IfTrue = N->getOperand(1);
+ SDValue IfFalse = N->getOperand(2);
+
+ // Check if the SELECT merges up the final result of the min/max
+ // from a vector.
+ if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Expect N0 is fed by SETCC.
+ SDValue SetCC = N0.getOperand(0);
+ EVT SetCCVT = SetCC.getValueType();
+ if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() ||
+ SetCCVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ SDValue VectorOp = SetCC.getOperand(0);
+ unsigned Op = VectorOp->getOpcode();
+ // Check if the input vector is fed by the operator we want to handle.
+ if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
+ Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
+ return SDValue();
+
+ EVT VTy = VectorOp.getValueType();
+ if (!VTy.isVector())
+ return SDValue();
+
+ if (VTy.getSizeInBits() < 64)
+ return SDValue();
+
+ EVT EltTy = VTy.getVectorElementType();
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+ if (EltTy != MVT::f32)
+ return SDValue();
+ } else {
+ if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+ return SDValue();
+ }
+
+ // Check if extracting from the same vector.
+ // For example,
+ // %sc = setcc %vector, %svn1, gt
+ // %n0 = extract_vector_elt %sc, #0
+ // %n1 = extract_vector_elt %vector, #0
+ // %n2 = extract_vector_elt $vector, #1
+ if (!(VectorOp == IfTrue->getOperand(0) &&
+ VectorOp == IfFalse->getOperand(0)))
+ return SDValue();
+
+ // Check if the condition code is matched with the operator type.
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
+ (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
+ (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
+ (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
+ (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
+ CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
+ CC != ISD::SETGE) ||
+ (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
+ CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
+ CC != ISD::SETLE))
+ return SDValue();
+
+ // Expect to check only lane 0 from the vector SETCC.
+ if (!isNullConstant(N0.getOperand(1)))
+ return SDValue();
+
+ // Expect to extract the true value from lane 0.
+ if (!isNullConstant(IfTrue.getOperand(1)))
+ return SDValue();
+
+ // Expect to extract the false value from lane 1.
+ if (!isOneConstant(IfFalse.getOperand(1)))
+ return SDValue();
+
+ return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
+}
+
+/// Target-specific DAG combine for the across vector add reduction.
+/// This function specifically handles the final clean-up step of the vector
+/// add reduction produced by the LoopVectorizer. It is the log2-shuffle
+/// pattern, which adds all elements of a vector together.
+/// For example, for a <4 x i32> vector :
+/// %1 = vector_shuffle %0, <2,3,u,u>
+/// %2 = add %0, %1
+/// %3 = vector_shuffle %2, <1,u,u,u>
+/// %4 = add %2, %3
+/// %result = extract_vector_elt %4, 0
+/// becomes :
+/// %0 = uaddv %0
+/// %result = extract_vector_elt %0, 0
+static SDValue
+performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Check if the input vector is fed by the ADD.
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // The vector extract idx must constant zero because we only expect the final
+ // result of the reduction is placed in lane 0.
+ if (!isNullConstant(N1))
+ return SDValue();
+
+ EVT VTy = N0.getValueType();
+ if (!VTy.isVector())
+ return SDValue();
+
+ EVT EltTy = VTy.getVectorElementType();
+ if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+ return SDValue();
+
+ if (VTy.getSizeInBits() < 64)
+ return SDValue();
+
+ return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
+}
+
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
@@ -8751,10 +9468,10 @@ static SDValue performBRCONDCombine(SDNode *N,
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return SDValue();
- if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
+ if (isNullConstant(LHS))
std::swap(LHS, RHS);
- if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
+ if (!isNullConstant(RHS))
return SDValue();
if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
@@ -8868,75 +9585,6 @@ static SDValue performSelectCombine(SDNode *N,
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
-/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match FMIN/FMAX patterns.
-static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
- // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
- // Unless the NoNaNsFPMath option is set, be careful about NaNs:
- // vmax/vmin return NaN if either operand is a NaN;
- // only do the transformation when it matches that behavior.
-
- SDValue CondLHS = N->getOperand(0);
- SDValue CondRHS = N->getOperand(1);
- SDValue LHS = N->getOperand(2);
- SDValue RHS = N->getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
- unsigned Opcode;
- bool IsReversed;
- if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
- selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
- IsReversed = false; // x CC y ? x : y
- } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
- selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
- IsReversed = true ; // x CC y ? y : x
- } else {
- return SDValue();
- }
-
- bool IsUnordered = false, IsOrEqual;
- switch (CC) {
- default:
- return SDValue();
- case ISD::SETULT:
- case ISD::SETULE:
- IsUnordered = true;
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETLT:
- case ISD::SETLE:
- IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
- Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
- break;
-
- case ISD::SETUGT:
- case ISD::SETUGE:
- IsUnordered = true;
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETGT:
- case ISD::SETGE:
- IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
- Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
- break;
- }
-
- // If LHS is NaN, an ordered comparison will be false and the result will be
- // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
- // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- return SDValue();
-
- // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
- // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
- // used for unsafe math or if one of the operands is known to be nonzero.
- if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- return SDValue();
-
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
-}
-
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
@@ -8961,6 +9609,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return performFpToIntCombine(N, DAG, Subtarget);
+ case ISD::FDIV:
+ return performFDivCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -8973,12 +9626,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performBitcastCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
- case ISD::SELECT:
- return performSelectCombine(N, DCI);
+ case ISD::SELECT: {
+ SDValue RV = performSelectCombine(N, DCI);
+ if (!RV.getNode())
+ RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
+ return RV;
+ }
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
- case ISD::SELECT_CC:
- return performSelectCCCombine(N, DCI.DAG);
+ case ISD::LOAD:
+ if (performTBISimplification(N->getOperand(1), DCI, DAG))
+ return SDValue(N, 0);
+ break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
@@ -8991,6 +9650,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNVCASTCombine(N);
case ISD::INSERT_VECTOR_ELT:
return performPostLD1Combine(N, DCI, true);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -9157,6 +9818,20 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
+static void ReplaceReductionResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, unsigned InterOp,
+ unsigned AcrossOp) {
+ EVT LoVT, HiVT;
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
+ SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
+ Results.push_back(SplitVal);
+}
+
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
@@ -9165,6 +9840,24 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::BITCAST:
ReplaceBITCASTResults(N, Results, DAG);
return;
+ case AArch64ISD::SADDV:
+ ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
+ return;
+ case AArch64ISD::UADDV:
+ ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
+ return;
+ case AArch64ISD::SMINV:
+ ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
+ return;
+ case AArch64ISD::UMINV:
+ ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
+ return;
+ case AArch64ISD::SMAXV:
+ ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
+ return;
+ case AArch64ISD::UMAXV:
+ ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
+ return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
@@ -9177,10 +9870,10 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const {
return true;
}
-bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are three or more FDIVs.
- return NumUsers > 2;
+ return 3;
}
TargetLoweringBase::LegalizeTypeAction
@@ -9206,20 +9899,21 @@ bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
-bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return Size == 128;
+ return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128 ? AtomicRMWExpansionKind::LLSC
- : AtomicRMWExpansionKind::None;
+ return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
}
-bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
+bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
return true;
}
@@ -9258,6 +9952,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
cast<PointerType>(Addr->getType())->getElementType());
}
+void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(
+ llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+}
+
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
@@ -9294,3 +9995,70 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
return Ty->isArrayTy();
}
+
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+ EVT) const {
+ return false;
+}
+
+Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!Subtarget->isTargetAndroid())
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+
+ // Android provides a fixed TLS slot for the SafeStack pointer. See the
+ // definition of TLS_SLOT_SAFESTACK in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ const unsigned TlsOffset = 0x48;
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *ThreadPointerFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_thread_pointer);
+ return IRB.CreatePointerCast(
+ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
+ Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+}
+
+void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ // Update IsSplitCSR in AArch64unctionInfo.
+ AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void AArch64TargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (AArch64::GPR64RegClass.contains(*I))
+ RC = &AArch64::GPR64RegClass;
+ else if (AArch64::FPR64RegClass.contains(*I))
+ RC = &AArch64::FPR64RegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index c73ce1e..e99616c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
+#include "AArch64.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/CallingConv.h"
@@ -58,13 +59,14 @@ enum NodeType : unsigned {
SBCS,
ANDS,
+ // Conditional compares. Operands: left,right,falsecc,cc,flags
+ CCMP,
+ CCMN,
+ FCCMP,
+
// Floating point comparison
FCMP,
- // Floating point max and min instructions.
- FMAX,
- FMIN,
-
// Scalar extract
EXTR,
@@ -217,8 +219,6 @@ class AArch64Subtarget;
class AArch64TargetMachine;
class AArch64TargetLowering : public TargetLowering {
- bool RequireStrictAlign;
-
public:
explicit AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI);
@@ -226,46 +226,35 @@ public:
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
- /// computeKnownBitsForTargetNode - Determine which of the bits specified in
- /// Mask are known to be either zero or one and return them in the
- /// KnownZero/KnownOne bitsets.
+ /// Determine which of the bits specified in Mask are known to be either zero
+ /// or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
APInt &KnownOne, const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
- /// allowsMisalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses of the specified type.
+ /// Returns true if the target allows unaligned memory accesses of the
+ /// specified type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
unsigned Align = 1,
- bool *Fast = nullptr) const override {
- if (RequireStrictAlign)
- return false;
- // FIXME: True for Cyclone, but not necessary others.
- if (Fast)
- *Fast = true;
- return true;
- }
+ bool *Fast = nullptr) const override;
- /// LowerOperation - Provide custom lowering hooks for some operations.
+ /// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- unsigned getFunctionAlignment(const Function *F) const;
-
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
- /// createFastISel - This method returns a target specific FastISel object,
- /// or null if the target does not support "fast" ISel.
+ /// This method returns a target specific FastISel object, or null if the
+ /// target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
@@ -273,11 +262,11 @@ public:
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- /// isShuffleMaskLegal - Return true if the given shuffle mask can be
- /// codegen'd directly, or if it should be stack expanded.
+ /// Return true if the given shuffle mask can be codegen'd directly, or if it
+ /// should be stack expanded.
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
- /// getSetCCResultType - Return the ISD::SETCC ValueType
+ /// Return the ISD::SETCC ValueType.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -322,8 +311,8 @@ public:
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const override;
- /// isLegalAddressingMode - Return true if the addressing mode represented
- /// by AM is legal for this target, for a load/store of the specified type.
+ /// Return true if the addressing mode represented by AM is legal for this
+ /// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
@@ -335,10 +324,9 @@ public:
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+ /// returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
@@ -351,25 +339,65 @@ public:
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
- bool hasLoadLinkedStoreConditional() const override;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
+ /// If the target has a standard location for the unsafe stack pointer,
+ /// returns the address of that location. Otherwise, returns nullptr.
+ Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ // FIXME: This is a guess. Has this been defined yet?
+ return AArch64::X0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ // FIXME: This is a guess. Has this been defined yet?
+ return AArch64::X1;
+ }
+
+ bool isCheapToSpeculateCttz() const override {
+ return true;
+ }
+
+ bool isCheapToSpeculateCtlz() const override {
+ return true;
+ }
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
@@ -392,6 +420,8 @@ private:
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
bool isEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
bool isCalleeStructRet, bool isCallerStructRet,
@@ -470,7 +500,7 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
@@ -516,6 +546,8 @@ private:
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
CallingConv::ID CallConv,
bool isVarArg) const override;
+
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
};
namespace AArch64 {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3f2e772..6ac2175 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -248,6 +248,12 @@ def simm7s16 : Operand<i32> {
let PrintMethod = "printImmScale<16>";
}
+def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
+def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
+def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
+def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
+def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
+
class AsmImmRange<int Low, int High> : AsmOperandClass {
let Name = "Imm" # Low # "_" # High;
let DiagnosticType = "InvalidImm" # Low # "_" # High;
@@ -346,9 +352,11 @@ class fixedpoint_i64<ValueType FloatVT>
let ParserMatchClass = Imm1_64Operand;
}
+def fixedpoint_f16_i32 : fixedpoint_i32<f16>;
def fixedpoint_f32_i32 : fixedpoint_i32<f32>;
def fixedpoint_f64_i32 : fixedpoint_i32<f64>;
+def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
@@ -402,6 +410,7 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
+def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_7Operand : AsmImmRange<0, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
@@ -525,6 +534,20 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_31Operand;
}
+// True if the 32-bit immediate is in the range [0,31]
+def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
+// imm0_1 predicate - True if the immediate is in the range [0,1]
+def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -542,7 +565,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
-}]>;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+}
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
@@ -690,6 +715,17 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
}
// Floating-point immediate.
+def fpimm16 : Operand<f16>,
+ PatLeaf<(f16 fpimm), [{
+ return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
def fpimm32 : Operand<f32>,
PatLeaf<(f32 fpimm), [{
return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1;
@@ -822,7 +858,7 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
// model patterns with sufficiently fine granularity
let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
class HintI<string mnemonic>
- : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "",
+ : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "",
[(int_aarch64_hint imm0_127:$imm)]>,
Sched<[WriteHint]> {
bits <7> imm;
@@ -875,6 +911,25 @@ def msr_sysreg_op : Operand<i32> {
let PrintMethod = "printMSRSystemRegister";
}
+def PSBHintOperand : AsmOperandClass {
+ let Name = "PSBHint";
+ let ParserMethod = "tryParsePSBHint";
+}
+def psbhint_op : Operand<i32> {
+ let ParserMatchClass = PSBHintOperand;
+ let PrintMethod = "printPSBHintOp";
+ let MCOperandPredicate = [{
+ // Check, if operand is valid, to fix exhaustive aliasing in disassembly.
+ // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields.
+ if (!MCOp.isImm())
+ return false;
+ bool ValidNamed;
+ (void)AArch64PSBHint::PSBHintMapper().toString(MCOp.getImm(),
+ STI.getFeatureBits(), ValidNamed);
+ return ValidNamed;
+ }];
+}
+
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
@@ -890,19 +945,19 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
let Inst{20-5} = systemreg;
}
-def SystemPStateFieldOperand : AsmOperandClass {
- let Name = "SystemPStateField";
+def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
+ let Name = "SystemPStateFieldWithImm0_15";
let ParserMethod = "tryParseSysReg";
}
-def pstatefield_op : Operand<i32> {
- let ParserMatchClass = SystemPStateFieldOperand;
+def pstatefield4_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldWithImm0_15Operand;
let PrintMethod = "printSystemPStateField";
}
let Defs = [NZCV] in
-class MSRpstateI
- : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm),
- "msr", "\t$pstate_field, $imm">,
+class MSRpstateImm0_15
+ : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
+ "msr", "\t$pstatefield, $imm">,
Sched<[WriteSys]> {
bits<6> pstatefield;
bits<4> imm;
@@ -913,6 +968,37 @@ class MSRpstateI
let Inst{7-5} = pstatefield{2-0};
let DecoderMethod = "DecodeSystemPStateInstruction";
+ // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+ // Fail the decoder should attempt to decode the instruction as MSRI.
+ let hasCompleteDecoder = 0;
+}
+
+def SystemPStateFieldWithImm0_1Operand : AsmOperandClass {
+ let Name = "SystemPStateFieldWithImm0_1";
+ let ParserMethod = "tryParseSysReg";
+}
+def pstatefield1_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldWithImm0_1Operand;
+ let PrintMethod = "printSystemPStateField";
+}
+
+let Defs = [NZCV] in
+class MSRpstateImm0_1
+ : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
+ "msr", "\t$pstatefield, $imm">,
+ Sched<[WriteSys]> {
+ bits<6> pstatefield;
+ bit imm;
+ let Inst{20-19} = 0b00;
+ let Inst{18-16} = pstatefield{5-3};
+ let Inst{15-9} = 0b0100000;
+ let Inst{8} = imm;
+ let Inst{7-5} = pstatefield{2-0};
+
+ let DecoderMethod = "DecodeSystemPStateInstruction";
+ // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+ // Fail the decoder should attempt to decode the instruction as MSRI.
+ let hasCompleteDecoder = 0;
}
// SYS and SYSL generic system instructions.
@@ -1341,7 +1427,7 @@ multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
}
class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2), 0>;
class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
@@ -1407,13 +1493,13 @@ class MulHi<bits<3> opc, string asm, SDNode OpNode>
}
class MulAccumWAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
class MulAccumXAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
class WideMulAccumAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
@@ -1643,7 +1729,7 @@ class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
RegisterClass src1Regtype, RegisterClass src2Regtype,
int shiftExt>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
shiftExt)>;
@@ -1701,10 +1787,10 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
}
// add Rd, Rb, -imm -> sub Rd, Rn, imm
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1776,43 +1862,43 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
} // Defs = [NZCV]
// Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
// Compare aliases
- def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
- def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
// Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
- def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
// Compare shorthands
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrs")
WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrx")
WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
// Register/register aliases with no shift when SP is not used.
@@ -1998,7 +2084,7 @@ class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
// Aliases for register+register logical instructions.
class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
@@ -2017,10 +2103,10 @@ multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
let Inst{31} = 1;
}
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2039,10 +2125,10 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
}
} // end Defs = [NZCV]
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2105,9 +2191,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
+class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
+ string mnemonic, SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
Sched<[WriteI, ReadI]> {
let Uses = [NZCV];
let Defs = [NZCV];
@@ -2127,19 +2216,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
let Inst{3-0} = nzcv;
}
-multiclass CondSetFlagsImm<bit op, string asm> {
- def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
- let Inst{31} = 0;
- }
- def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
- let Inst{31} = 1;
- }
-}
-
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
+ SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
Sched<[WriteI, ReadI, ReadI]> {
let Uses = [NZCV];
let Defs = [NZCV];
@@ -2159,11 +2242,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
let Inst{3-0} = nzcv;
}
-multiclass CondSetFlagsReg<bit op, string asm> {
- def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
+multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
+ // immediate operand variants
+ def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
let Inst{31} = 0;
}
- def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
+ def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ // register operand variants
+ def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
let Inst{31} = 1;
}
}
@@ -2328,7 +2419,7 @@ multiclass LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
asm, pattern>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2340,7 +2431,7 @@ multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
asm, pattern>,
Sched<[WriteST]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2508,7 +2599,7 @@ class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
}
class ROInstAlias<string asm, RegisterClass regtype, Instruction INST>
- : InstAlias<asm # " $Rt, [$Rn, $Rm]",
+ : InstAlias<asm # "\t$Rt, [$Rn, $Rm]",
(INST regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
@@ -2934,7 +3025,7 @@ multiclass LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
(ins GPR64sp:$Rn, simm9:$offset), asm, pattern>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2946,7 +3037,7 @@ multiclass StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
asm, pattern>,
Sched<[WriteST]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2958,7 +3049,7 @@ multiclass PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm,
asm, pat>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2993,7 +3084,7 @@ multiclass LoadUnprivileged<bits<2> sz, bit V, bits<2> opc,
(ins GPR64sp:$Rn, simm9:$offset), asm>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -3005,7 +3096,7 @@ multiclass StoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
asm>,
Sched<[WriteST]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -3136,7 +3227,7 @@ multiclass LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype,
(ins GPR64sp:$Rn, indextype:$offset), asm>,
Sched<[WriteLD, WriteLDHi]>;
- def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
GPR64sp:$Rn, 0)>;
}
@@ -3151,7 +3242,7 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
asm>,
Sched<[WriteSTP]>;
- def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
GPR64sp:$Rn, 0)>;
}
@@ -3230,8 +3321,8 @@ class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
let mayStore = 1, mayLoad = 0 in
class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
Operand idxtype, string asm>
- : BaseLoadStorePairPostIdx<opc, V, 0, (outs),
- (ins GPR64sp:$wback, regtype:$Rt, regtype:$Rt2,
+ : BaseLoadStorePairPostIdx<opc, V, 0, (outs GPR64sp:$wback),
+ (ins regtype:$Rt, regtype:$Rt2,
GPR64sp:$Rn, idxtype:$offset),
asm>,
Sched<[WriteAdr, WriteSTP]>;
@@ -3477,6 +3568,20 @@ class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
+ // Unscaled half-precision to 32-bit
+ def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm,
+ [(set GPR32:$Rd, (OpN FPR16:$Rn))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ // Unscaled half-precision to 64-bit
+ def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm,
+ [(set GPR64:$Rd, (OpN FPR16:$Rn))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
// Unscaled single-precision to 32-bit
def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
[(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
@@ -3504,6 +3609,25 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
+ // Scaled half-precision to 32-bit
+ def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32,
+ fixedpoint_f16_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn,
+ fixedpoint_f16_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ let Predicates = [HasFullFP16];
+ }
+
+ // Scaled half-precision to 64-bit
+ def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64,
+ fixedpoint_f16_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn,
+ fixedpoint_f16_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
// Scaled single-precision to 32-bit
def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
fixedpoint_f32_i32, asm,
@@ -3553,7 +3677,7 @@ class BaseIntegerToFP<bit isUnsigned,
bits<5> Rd;
bits<5> Rn;
bits<6> scale;
- let Inst{30-23} = 0b00111100;
+ let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b00001;
let Inst{16} = isUnsigned;
let Inst{15-10} = scale;
@@ -3570,7 +3694,7 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
bits<5> Rd;
bits<5> Rn;
bits<6> scale;
- let Inst{30-23} = 0b00111100;
+ let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b10001;
let Inst{16} = isUnsigned;
let Inst{15-10} = 0b000000;
@@ -3580,33 +3704,55 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
// Unscaled
+ def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
}
def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
}
// Scaled
+ def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+ [(set FPR16:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f16_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let scale{5} = 1;
+ let Predicates = [HasFullFP16];
+ }
+
def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
[(set FPR32:$Rd,
(fdiv (node GPR32:$Rn),
fixedpoint_f32_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
let scale{5} = 1;
}
@@ -3615,16 +3761,25 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
(fdiv (node GPR32:$Rn),
fixedpoint_f64_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
let scale{5} = 1;
}
+ def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+ [(set FPR16:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f16_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
[(set FPR32:$Rd,
(fdiv (node GPR64:$Rn),
fixedpoint_f32_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
@@ -3632,7 +3787,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
(fdiv (node GPR64:$Rn),
fixedpoint_f64_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
}
}
@@ -3654,7 +3809,7 @@ class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
Sched<[WriteFCopy]> {
bits<5> Rd;
bits<5> Rn;
- let Inst{30-23} = 0b00111100;
+ let Inst{30-24} = 0b0011110;
let Inst{21} = 1;
let Inst{20-19} = rmode;
let Inst{18-16} = opcode;
@@ -3704,26 +3859,49 @@ class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
}
-
multiclass UnscaledConversion<string asm> {
+ def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
}
def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
}
def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
@@ -3796,7 +3974,7 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
Sched<[WriteF]> {
bits<5> Rd;
bits<5> Rn;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21-19} = 0b100;
let Inst{18-15} = opcode;
let Inst{14-10} = 0b10000;
@@ -3806,12 +3984,17 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
multiclass SingleOperandFPData<bits<4> opcode, string asm,
SDPatternOperator node = null_frag> {
+ def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
@@ -3828,7 +4011,7 @@ class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-12} = opcode;
@@ -3839,28 +4022,41 @@ class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
multiclass TwoOperandFPData<bits<4> opcode, string asm,
SDPatternOperator node = null_frag> {
+ def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+ [(set (f16 FPR16:$Rd),
+ (node (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
[(set (f32 FPR32:$Rd),
(node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
[(set (f64 FPR64:$Rd),
(node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
+ def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+ [(set FPR16:$Rd, (fneg (node FPR16:$Rn, (f16 FPR16:$Rm))))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
[(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
[(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
@@ -3878,7 +4074,7 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
bits<5> Rn;
bits<5> Rm;
bits<5> Ra;
- let Inst{31-23} = 0b000111110;
+ let Inst{31-24} = 0b00011111;
let Inst{21} = isNegated;
let Inst{20-16} = Rm;
let Inst{15} = isSub;
@@ -3889,16 +4085,23 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
SDPatternOperator node> {
+ def Hrrr : BaseThreeOperandFPData<isNegated, isSub, FPR16, asm,
+ [(set FPR16:$Rd,
+ (node (f16 FPR16:$Rn), (f16 FPR16:$Rm), (f16 FPR16:$Ra)))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
[(set FPR32:$Rd,
(node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
[(set FPR64:$Rd,
(node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
@@ -3913,7 +4116,7 @@ class BaseOneOperandFPComparison<bit signalAllNans,
: I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
Sched<[WriteFCmp]> {
bits<5> Rn;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{15-10} = 0b001000;
@@ -3932,7 +4135,7 @@ class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
Sched<[WriteFCmp]> {
bits<5> Rm;
bits<5> Rn;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-10} = 0b001000;
@@ -3944,24 +4147,36 @@ class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
multiclass FPComparison<bit signalAllNans, string asm,
SDPatternOperator OpNode = null_frag> {
let Defs = [NZCV] in {
+ def Hrr : BaseTwoOperandFPComparison<signalAllNans, FPR16, asm,
+ [(OpNode FPR16:$Rn, (f16 FPR16:$Rm)), (implicit NZCV)]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Hri : BaseOneOperandFPComparison<signalAllNans, FPR16, asm,
+ [(OpNode (f16 FPR16:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
[(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit NZCV)]> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
[(OpNode (f32 FPR32:$Rn), fpimm0), (implicit NZCV)]> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
[(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit NZCV)]> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
[(OpNode (f64 FPR64:$Rn), fpimm0), (implicit NZCV)]> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
} // Defs = [NZCV]
}
@@ -3971,17 +4186,20 @@ multiclass FPComparison<bit signalAllNans, string asm,
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans,
- RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
+ string mnemonic, list<dag> pat>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
Sched<[WriteFCmp]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
bits<5> Rn;
bits<5> Rm;
bits<4> nzcv;
bits<4> cond;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-12} = cond;
@@ -3991,16 +4209,24 @@ class BaseFPCondComparison<bit signalAllNans,
let Inst{3-0} = nzcv;
}
-multiclass FPCondComparison<bit signalAllNans, string asm> {
- let Defs = [NZCV], Uses = [NZCV] in {
- def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
- let Inst{22} = 0;
+multiclass FPCondComparison<bit signalAllNans, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic, []> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
}
- def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
- let Inst{22} = 1;
+ def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
+ [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
+ [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b01;
}
- } // Defs = [NZCV], Uses = [NZCV]
}
//---
@@ -4019,7 +4245,7 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
bits<5> Rm;
bits<4> cond;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-12} = cond;
@@ -4030,12 +4256,17 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
multiclass FPCondSelect<string asm> {
let Uses = [NZCV] in {
+ def Hrrr : BaseFPCondSelect<FPR16, f16, asm> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
} // Uses = [NZCV]
}
@@ -4050,7 +4281,7 @@ class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
Sched<[WriteFImm]> {
bits<5> Rd;
bits<8> imm;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-13} = imm;
let Inst{12-5} = 0b10000000;
@@ -4058,12 +4289,17 @@ class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
}
multiclass FPMoveImmediate<string asm> {
+ def Hi : BaseFPMoveImmediate<FPR16, fpimm16, asm> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
}
} // end of 'let Predicates = [HasFPARMv8]'
@@ -4079,7 +4315,7 @@ let Predicates = [HasNEON] in {
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
RegisterOperand regtype, string asm, string kind,
list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -4093,8 +4329,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
+ let Inst{23-21} = size;
let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
let Inst{10} = 1;
@@ -4103,7 +4338,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
}
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
RegisterOperand regtype, string asm, string kind,
list<dag> pattern>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
@@ -4117,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
+ let Inst{23-21} = size;
let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
let Inst{10} = 1;
@@ -4129,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
// All operand sizes distinguished in the encoding.
multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
asm, ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
asm, ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
asm, ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
asm, ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
asm, ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
asm, ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
- def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
+ def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128,
asm, ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
}
@@ -4155,49 +4389,49 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
// As above, but D sized elements unsupported.
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
asm, ".8b",
[(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
asm, ".16b",
[(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
asm, ".4h",
[(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
asm, ".8h",
[(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
asm, ".2s",
[(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
asm, ".4s",
[(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
}
multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64,
asm, ".8b",
[(set (v8i8 V64:$dst),
(OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128,
asm, ".16b",
[(set (v16i8 V128:$dst),
(OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
- def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64,
asm, ".4h",
[(set (v4i16 V64:$dst),
(OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128,
asm, ".8h",
[(set (v8i16 V128:$dst),
(OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64,
asm, ".2s",
[(set (v2i32 V64:$dst),
(OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128,
asm, ".4s",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
@@ -4206,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
// As above, but only B sized elements supported.
multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
asm, ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
asm, ".16b",
[(set (v16i8 V128:$Rd),
(OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
}
-// As above, but only S and D sized floating point elements supported.
-multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
+// As above, but only floating point elements supported.
+multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
string asm, SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
asm, ".2s",
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
asm, ".4s",
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
asm, ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
}
-multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
asm, ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
asm, ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
asm, ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
}
-multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
string asm, SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4f16 V64:$dst),
+ (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8f16 V128:$dst),
+ (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64,
asm, ".2s",
[(set (v2f32 V64:$dst),
(OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128,
asm, ".4s",
[(set (v4f32 V128:$dst),
(OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128,
asm, ".2d",
[(set (v2f64 V128:$dst),
(OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
@@ -4262,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
// As above, but D and B sized elements unsupported.
multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
asm, ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
asm, ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
asm, ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
asm, ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
}
@@ -4279,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
// Logical three vector ops share opcode bits, and only use B sized elements.
multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
SDPatternOperator OpNode = null_frag> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64,
asm, ".8b",
[(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128,
asm, ".16b",
[(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
@@ -4303,11 +4563,11 @@ multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64,
asm, ".8b",
[(set (v8i8 V64:$dst),
(OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128,
asm, ".16b",
[(set (v16i8 V128:$dst),
(OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
@@ -4347,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, list<dag> pattern>
+ bits<2> size2, RegisterOperand regtype, string asm,
+ string dstkind, string srckind, list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
@@ -4360,7 +4620,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -4369,8 +4631,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, list<dag> pattern>
+ bits<2> size2, RegisterOperand regtype,
+ string asm, string dstkind, string srckind,
+ list<dag> pattern>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
@@ -4382,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -4392,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
// Supports B, H, and S element sizes.
multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
@@ -4450,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS {
// Supports all element sizes.
multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".4h", ".8b",
[(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".8h", ".16b",
[(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".2s", ".4h",
[(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".4s", ".8h",
[(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
asm, ".1d", ".2s",
[(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
asm, ".2d", ".4s",
[(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+ def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
asm, ".4h", ".8b",
[(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
(v8i8 V64:$Rn)))]>;
- def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+ def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
asm, ".8h", ".16b",
[(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
(v16i8 V128:$Rn)))]>;
- def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+ def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
asm, ".2s", ".4h",
[(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
(v4i16 V64:$Rn)))]>;
- def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+ def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
asm, ".4s", ".8h",
[(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
(v8i16 V128:$Rn)))]>;
- def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+ def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
asm, ".1d", ".2s",
[(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
(v2i32 V64:$Rn)))]>;
- def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+ def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
asm, ".2d", ".4s",
[(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
(v4i32 V128:$Rn)))]>;
@@ -4501,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
// Supports all element sizes, except 1xD.
multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
- def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
+ def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
}
multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
+ def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
}
@@ -4553,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
// Supports only B element sizes.
multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
@@ -4565,16 +4830,16 @@ multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
// Supports only B and H element sizes.
multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
}
@@ -4583,13 +4848,21 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
// as an extra opcode bit.
multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
@@ -4597,10 +4870,10 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
// Supports only S element size.
multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
@@ -4608,26 +4881,42 @@ multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
}
@@ -4706,10 +4995,10 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
}
-class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype,
- string asm, string kind, string zero,
- ValueType dty, ValueType sty, SDNode OpNode>
+class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
+ bits<5> opcode, RegisterOperand regtype, string asm,
+ string kind, string zero, ValueType dty,
+ ValueType sty, SDNode OpNode>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
@@ -4722,7 +5011,9 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -4732,54 +5023,74 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
// Comparisons support all element sizes, except 1xD.
multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
SDNode OpNode> {
- def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
+ def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64,
asm, ".8b", "0",
v8i8, v8i8, OpNode>;
- def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
+ def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128,
asm, ".16b", "0",
v16i8, v16i8, OpNode>;
- def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64,
asm, ".4h", "0",
v4i16, v4i16, OpNode>;
- def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128,
asm, ".8h", "0",
v8i16, v8i16, OpNode>;
- def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64,
asm, ".2s", "0",
v2i32, v2i32, OpNode>;
- def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128,
asm, ".4s", "0",
v4i32, v4i32, OpNode>;
- def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128,
asm, ".2d", "0",
v2i64, v2i64, OpNode>;
}
-// FP Comparisons support only S and D element sizes.
+// FP Comparisons support only S and D element sizes (and H for v8.2a).
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
string asm, SDNode OpNode> {
- def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64,
+ asm, ".4h", "0.0",
+ v4i16, v4f16, OpNode>;
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128,
+ asm, ".8h", "0.0",
+ v8i16, v8f16, OpNode>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64,
asm, ".2s", "0.0",
v2i32, v2f32, OpNode>;
- def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128,
asm, ".4s", "0.0",
v4i32, v4f32, OpNode>;
- def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128,
asm, ".2d", "0.0",
v2i64, v2f64, OpNode>;
- def : InstAlias<asm # " $Vd.2s, $Vn.2s, #0",
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0",
+ (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # "\t$Vd.8h, $Vn.8h, #0",
+ (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+ }
+ def : InstAlias<asm # "\t$Vd.2s, $Vn.2s, #0",
(!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
- def : InstAlias<asm # " $Vd.4s, $Vn.4s, #0",
+ def : InstAlias<asm # "\t$Vd.4s, $Vn.4s, #0",
(!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
- def : InstAlias<asm # " $Vd.2d, $Vn.2d, #0",
+ def : InstAlias<asm # "\t$Vd.2d, $Vn.2d, #0",
(!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
- def : InstAlias<asm # ".2s $Vd, $Vn, #0",
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # ".4h\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # ".8h\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+ }
+ def : InstAlias<asm # ".2s\t$Vd, $Vn, #0",
(!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
- def : InstAlias<asm # ".4s $Vd, $Vn, #0",
+ def : InstAlias<asm # ".4s\t$Vd, $Vn, #0",
(!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
- def : InstAlias<asm # ".2d $Vd, $Vn, #0",
+ def : InstAlias<asm # ".2d\t$Vd, $Vn, #0",
(!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
}
@@ -5325,7 +5636,7 @@ multiclass SIMDZipVector<bits<3>opc, string asm,
//----------------------------------------------------------------------------
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
RegisterClass regtype, string asm,
list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -5337,8 +5648,7 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{31-30} = 0b01;
let Inst{29} = U;
let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
+ let Inst{23-21} = size;
let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
let Inst{10} = 1;
@@ -5369,17 +5679,17 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+ def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
}
multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+ def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
- def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
- def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
- def v1i8 : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
+ def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
+ def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
@@ -5389,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
+ def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
}
multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
@@ -5404,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
asm, []>;
}
-multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
}
def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
}
-multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ []>;
+ } // Predicates = [HasNEON, HasFullFP16]
}
def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -5482,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, RegisterClass regtype2,
string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
@@ -5494,7 +5812,9 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -5523,7 +5843,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, string asm, string zero>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"\t$Rd, $Rn, #" # zero, "", []>,
@@ -5534,7 +5854,9 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -5556,21 +5878,28 @@ class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm, "0">;
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">;
def : Pat<(v1i64 (OpNode FPR64:$Rn)),
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
}
-multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm, "0.0">;
- def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm, "0.0">;
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">;
+ def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">;
+ }
- def : InstAlias<asm # " $Rd, $Rn, #0",
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
- def : InstAlias<asm # " $Rd, $Rn, #0",
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
(!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>;
+ }
def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
@@ -5578,35 +5907,42 @@ multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
(!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
}
-multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
+ }
}
-multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
[(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+ [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>;
+ }
}
multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
- def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
@@ -5633,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR64, asm,
[(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
- def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR32, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR16, asm, []>;
}
//----------------------------------------------------------------------------
@@ -5668,10 +6004,14 @@ multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
asm, ".2d">;
}
-multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> {
- def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
+multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64,
+ asm, ".2h">;
+ }
+ def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64,
asm, ".2s">;
- def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
+ def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128,
asm, ".2d">;
}
@@ -5727,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
asm, ".4s", []>;
}
-multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
+multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
Intrinsic intOp> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
+ asm, ".4h",
+ [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>;
+ def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128,
+ asm, ".8h",
+ [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
asm, ".4s",
[(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
@@ -5925,7 +6273,7 @@ class SIMDInsMainMovAlias<string size, Instruction inst,
class SIMDInsElementMovAlias<string size, Instruction inst,
Operand idxtype>
: InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
- # "|" # size #" $dst$idx, $src$idx2}",
+ # "|" # size #"\t$dst$idx, $src$idx2}",
(inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
@@ -6215,7 +6563,7 @@ multiclass SIMDScalarCPY<string asm> {
// AdvSIMD modified immediate instructions
//----------------------------------------------------------------------------
-class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
+class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
string asm, string op_string,
string cstr, list<dag> pattern>
: I<oops, iops, asm, op_string, cstr, pattern>,
@@ -6227,16 +6575,17 @@ class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
let Inst{29} = op;
let Inst{28-19} = 0b0111100000;
let Inst{18-16} = imm8{7-5};
- let Inst{11-10} = 0b01;
+ let Inst{11} = op2;
+ let Inst{10} = 1;
let Inst{9-5} = imm8{4-0};
let Inst{4-0} = Rd;
}
-class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
+class BaseSIMDModifiedImmVector<bit Q, bit op, bit op2, RegisterOperand vectype,
Operand immtype, dag opt_shift_iop,
string opt_shift, string asm, string kind,
list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
+ : BaseSIMDModifiedImm<Q, op, op2, (outs vectype:$Rd),
!con((ins immtype:$imm8), opt_shift_iop), asm,
"{\t$Rd" # kind # ", $imm8" # opt_shift #
"|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6248,7 +6597,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
Operand immtype, dag opt_shift_iop,
string opt_shift, string asm, string kind,
list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
+ : BaseSIMDModifiedImm<Q, op, 0, (outs vectype:$dst),
!con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
"|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6259,7 +6608,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
RegisterOperand vectype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
(ins logical_vec_shift:$shift),
"$shift", asm, kind, pattern> {
bits<2> shift;
@@ -6284,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
RegisterOperand vectype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
(ins logical_vec_hw_shift:$shift),
"$shift", asm, kind, pattern> {
bits<2> shift;
@@ -6349,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
RegisterOperand vectype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
(ins move_vec_shift:$shift),
"$shift", asm, kind, pattern> {
bits<1> shift;
@@ -6357,18 +6706,18 @@ class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
let Inst{12} = shift;
}
-class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
+class SIMDModifiedImmVectorNoShift<bit Q, bit op, bit op2, bits<4> cmode,
RegisterOperand vectype,
Operand imm_type, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
+ : BaseSIMDModifiedImmVector<Q, op, op2, vectype, imm_type, (ins), "",
asm, kind, pattern> {
let Inst{15-12} = cmode;
}
class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
+ : BaseSIMDModifiedImm<Q, op, 0, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
"\t$Rd, $imm8", "", pattern> {
let Inst{15-12} = cmode;
let DecoderMethod = "DecodeModImmInstruction";
@@ -6438,8 +6787,36 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
-multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
+multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc,
+ V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4f16 V64:$Rd),
+ (OpNode (v4f16 V64:$Rn),
+ (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8f16 V128:$Rd),
+ (OpNode (v8f16 V128:$Rn),
+ (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
V64, V64,
V128, VectorIndexS,
@@ -6476,6 +6853,21 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
let Inst{21} = 0;
}
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h",
+ [(set (f16 FPR16Op:$Rd),
+ (OpNode (f16 FPR16Op:$Rn),
+ (f16 (vector_extract (v8f16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
FPR32Op, FPR32Op, V128, VectorIndexS,
asm, ".s", "", "", ".s",
@@ -6501,7 +6893,7 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
}
}
-multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
+multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
// 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64duplane32 (v4f32 V128:$Rm),
@@ -6553,7 +6945,28 @@ multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
V128:$Rm, VectorIndexD:$idx)>;
}
-multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
+multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
V128, VectorIndexS,
asm, ".2s", ".2s", ".2s", ".s", []> {
@@ -6580,6 +6993,16 @@ multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
let Inst{21} = 0;
}
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
FPR32Op, FPR32Op, V128, VectorIndexS,
@@ -7117,7 +7540,13 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
}
-multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
+multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR32, vecshiftR32, asm, []> {
let Inst{20-16} = imm{4-0};
@@ -7297,6 +7726,23 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
Intrinsic OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
@@ -7322,8 +7768,26 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
}
}
-multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
+multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
Intrinsic OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
@@ -8604,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in {
class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand regtype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind,
+ : BaseSIMDThreeSameVectorTied<Q, U, {size,0}, opcode, regtype, asm, kind,
pattern> {
- let Inst{21}=0;
}
multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
SDPatternOperator Accum> {
@@ -9041,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">;
def : TokenAlias<".4S", ".4s">;
def : TokenAlias<".2D", ".2d">;
def : TokenAlias<".1Q", ".1q">;
+def : TokenAlias<".2H", ".2h">;
def : TokenAlias<".B", ".b">;
def : TokenAlias<".H", ".h">;
def : TokenAlias<".S", ".s">;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c0b3f2c..3ef3c8b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
-#include "AArch64MachineCombinerPattern.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -533,6 +532,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
CC);
}
+/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
+static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
+ uint64_t Imm = MI->getOperand(1).getImm();
+ uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
+}
+
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
@@ -573,6 +580,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
case AArch64::ORRWrr:
case AArch64::ORRXrr:
return true;
+ // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
+ // ORRXri, it is as cheap as MOV
+ case AArch64::MOVi32imm:
+ return canBeExpandedToORR(MI, 32);
+ case AArch64::MOVi64imm:
+ return canBeExpandedToORR(MI, 64);
}
llvm_unreachable("Unknown opcode to check as cheap as a move!");
@@ -1379,42 +1392,34 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Width = 1;
Scale = 1;
break;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = Width = 16;
+ break;
case AArch64::LDRXui:
+ case AArch64::LDRDui:
case AArch64::STRXui:
+ case AArch64::STRDui:
Scale = Width = 8;
break;
case AArch64::LDRWui:
+ case AArch64::LDRSui:
case AArch64::STRWui:
+ case AArch64::STRSui:
Scale = Width = 4;
break;
- case AArch64::LDRBui:
- case AArch64::STRBui:
- Scale = Width = 1;
- break;
case AArch64::LDRHui:
+ case AArch64::LDRHHui:
case AArch64::STRHui:
+ case AArch64::STRHHui:
Scale = Width = 2;
break;
- case AArch64::LDRSui:
- case AArch64::STRSui:
- Scale = Width = 4;
- break;
- case AArch64::LDRDui:
- case AArch64::STRDui:
- Scale = Width = 8;
- break;
- case AArch64::LDRQui:
- case AArch64::STRQui:
- Scale = Width = 16;
- break;
+ case AArch64::LDRBui:
case AArch64::LDRBBui:
+ case AArch64::STRBui:
case AArch64::STRBBui:
Scale = Width = 1;
break;
- case AArch64::LDRHHui:
- case AArch64::STRHHui:
- Scale = Width = 2;
- break;
};
BaseReg = LdSt->getOperand(1).getReg();
@@ -1445,23 +1450,43 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
MachineInstr *Second) const {
- // Cyclone can fuse CMN, CMP followed by Bcc.
-
- // FIXME: B0 can also fuse:
- // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
- if (Second->getOpcode() != AArch64::Bcc)
- return false;
- switch (First->getOpcode()) {
- default:
- return false;
- case AArch64::SUBSWri:
- case AArch64::ADDSWri:
- case AArch64::ANDSWri:
- case AArch64::SUBSXri:
- case AArch64::ADDSXri:
- case AArch64::ANDSXri:
- return true;
+ if (Subtarget.isCyclone()) {
+ // Cyclone can fuse CMN, CMP, TST followed by Bcc.
+ unsigned SecondOpcode = Second->getOpcode();
+ if (SecondOpcode == AArch64::Bcc) {
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case AArch64::SUBSWri:
+ case AArch64::ADDSWri:
+ case AArch64::ANDSWri:
+ case AArch64::SUBSXri:
+ case AArch64::ADDSXri:
+ case AArch64::ANDSXri:
+ return true;
+ }
+ }
+ // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
+ if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
+ SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ case AArch64::ANDWri:
+ case AArch64::ANDXri:
+ case AArch64::EORWri:
+ case AArch64::EORXri:
+ case AArch64::ORRWri:
+ case AArch64::ORRXri:
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ return true;
+ }
+ }
}
+ return false;
}
MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
@@ -1814,7 +1839,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
unsigned Opc = 0;
@@ -1911,7 +1936,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
@@ -2226,11 +2251,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
case AArch64::LDPDi:
case AArch64::STPXi:
case AArch64::STPDi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNPDi:
+ case AArch64::STNPXi:
+ case AArch64::STNPDi:
+ ImmIdx = 3;
IsSigned = true;
Scale = 8;
break;
case AArch64::LDPQi:
case AArch64::STPQi:
+ case AArch64::LDNPQi:
+ case AArch64::STNPQi:
+ ImmIdx = 3;
IsSigned = true;
Scale = 16;
break;
@@ -2238,6 +2271,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
case AArch64::LDPSi:
case AArch64::STPWi:
case AArch64::STPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPSi:
+ ImmIdx = 3;
IsSigned = true;
Scale = 4;
break;
@@ -2457,7 +2495,7 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
bool AArch64InstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -2485,76 +2523,76 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
"ADDWrr does not have register operands");
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Found = true;
}
break;
case AArch64::ADDXrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Found = true;
}
break;
case AArch64::SUBWrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Found = true;
}
break;
case AArch64::SUBXrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Found = true;
}
break;
case AArch64::ADDWri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Found = true;
}
break;
case AArch64::ADDXri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Found = true;
}
break;
case AArch64::SUBWri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Found = true;
}
break;
case AArch64::SUBXri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Found = true;
}
break;
@@ -2661,7 +2699,7 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
/// this function generates the instructions that could replace the
/// original code sequence
void AArch64InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -2677,13 +2715,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
default:
// signal error.
break;
- case MachineCombinerPattern::MC_MULADDW_OP1:
- case MachineCombinerPattern::MC_MULADDX_OP1:
+ case MachineCombinerPattern::MULADDW_OP1:
+ case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
// ADD R,I,C
// ==> MADD R,A,B,C
// --- Create(MADD);
- if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
+ if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -2692,13 +2730,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MC_MULADDW_OP2:
- case MachineCombinerPattern::MC_MULADDX_OP2:
+ case MachineCombinerPattern::MULADDW_OP2:
+ case MachineCombinerPattern::MULADDX_OP2:
// MUL I=A,B,0
// ADD R,C,I
// ==> MADD R,A,B,C
// --- Create(MADD);
- if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
+ if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -2707,8 +2745,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MC_MULADDWI_OP1:
- case MachineCombinerPattern::MC_MULADDXI_OP1: {
+ case MachineCombinerPattern::MULADDWI_OP1:
+ case MachineCombinerPattern::MULADDXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
// ==> ORR V, ZR, Imm
@@ -2716,7 +2754,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
+ if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
@@ -2751,8 +2789,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::MC_MULSUBW_OP1:
- case MachineCombinerPattern::MC_MULSUBX_OP1: {
+ case MachineCombinerPattern::MULSUBW_OP1:
+ case MachineCombinerPattern::MULSUBX_OP1: {
// MUL I=A,B,0
// SUB R,I, C
// ==> SUB V, 0, C
@@ -2760,7 +2798,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *SubRC;
unsigned SubOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
+ if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
SubOpc = AArch64::SUBWrr;
SubRC = &AArch64::GPR32spRegClass;
ZeroReg = AArch64::WZR;
@@ -2784,13 +2822,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
- case MachineCombinerPattern::MC_MULSUBW_OP2:
- case MachineCombinerPattern::MC_MULSUBX_OP2:
+ case MachineCombinerPattern::MULSUBW_OP2:
+ case MachineCombinerPattern::MULSUBX_OP2:
// MUL I=A,B,0
// SUB R,C,I
// ==> MSUB R,A,B,C (computes C - A*B)
// --- Create(MSUB);
- if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
+ if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Opc = AArch64::MSUBWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -2799,8 +2837,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MC_MULSUBWI_OP1:
- case MachineCombinerPattern::MC_MULSUBXI_OP1: {
+ case MachineCombinerPattern::MULSUBWI_OP1:
+ case MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
// ==> ORR V, ZR, -Imm
@@ -2808,7 +2846,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
+ if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
@@ -2944,3 +2982,34 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
MI->eraseFromParent();
return true;
}
+
+std::pair<unsigned, unsigned>
+AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = AArch64II::MO_FRAGMENT;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace AArch64II;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_PAGE, "aarch64-page"},
+ {MO_PAGEOFF, "aarch64-pageoff"},
+ {MO_G3, "aarch64-g3"},
+ {MO_G2, "aarch64-g2"},
+ {MO_G1, "aarch64-g1"},
+ {MO_G0, "aarch64-g0"},
+ {MO_HI12, "aarch64-hi12"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace AArch64II;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_GOT, "aarch64-got"},
+ {MO_NC, "aarch64-nc"},
+ {MO_TLS, "aarch64-tls"},
+ {MO_CONSTPOOL, "aarch64-constant-pool"}};
+ return makeArrayRef(TargetFlags);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 68c2a28..ae02822 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -167,13 +167,13 @@ public:
/// for an instruction chain ending in <Root>. All potential patterns are
/// listed in the <Patterns> array.
bool getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns)
+ SmallVectorImpl<MachineCombinerPattern> &Patterns)
const override;
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
@@ -181,6 +181,14 @@ public:
bool useMachineCombiner() const override;
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+
private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fa1a46a..d02bc9f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -16,6 +16,8 @@
//
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
@@ -24,6 +26,12 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasSPE : Predicate<"Subtarget->hasSPE()">,
+ AssemblerPredicate<"FeatureSPE", "spe">;
+
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
@@ -66,6 +74,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4,
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
SDTCisVT<4, i32>]>;
+def SDT_AArch64CCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
+def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
[SDTCisFP<0>,
SDTCisSameAs<0, 1>]>;
@@ -160,13 +182,14 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
+def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
+def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
+
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
-def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
-def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
-
def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
@@ -361,6 +384,9 @@ def : InstAlias<"wfi", (HINT 0b011)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
+// v8.2a Statistical Profiling extension
+def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
+
// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
@@ -383,12 +409,17 @@ def : InstAlias<"isb", (ISB 0xf)>;
def MRS : MRSI;
def MSR : MSRI;
-def MSRpstate: MSRpstateI;
+def MSRpstateImm1 : MSRpstateImm0_1;
+def MSRpstateImm4 : MSRpstateImm0_15;
// The thread pointer (on Linux, at least, where this has been implemented) is
// TPIDR_EL0.
def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
+// The cycle counter PMC register is PMCCNTR_EL0.
+let Predicates = [HasPerfMon] in
+def : Pat<(readcyclecounter), (MRS 0xdce8)>;
+
// Generic system instructions
def SYSxt : SystemXtI<0, "sys">;
def SYSLxt : SystemLXtI<1, "sysl">;
@@ -595,10 +626,12 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
(SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
(SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
+let AddedComplexity = 1 in {
def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
(SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
(SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+}
// Because of the immediate format for add/sub-imm instructions, the
// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
@@ -823,7 +856,7 @@ defm AND : LogicalReg<0b00, 0, "and", and>;
defm BIC : LogicalReg<0b00, 1, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
defm EON : LogicalReg<0b10, 1, "eon",
- BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
+ BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
defm ORN : LogicalReg<0b01, 1, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
@@ -1020,13 +1053,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
//===----------------------------------------------------------------------===//
-// Conditionally set flags instructions.
+// Conditional comparison instructions.
//===----------------------------------------------------------------------===//
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
+defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
+defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
//===----------------------------------------------------------------------===//
// Conditional select instructions.
@@ -2421,6 +2451,26 @@ defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvt
defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
}
+multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> {
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, frnd, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, frnd, "FCVTAU">;
+
//===----------------------------------------------------------------------===//
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//
@@ -2466,14 +2516,7 @@ defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
(FRINTNDr FPR64:$Rn)>;
-// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
-// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
-// <rdar://problem/13715968>
-// TODO: We should really model the FPSR flags correctly. This is really ugly.
-let hasSideEffects = 1 in {
defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
-}
-
defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
let SchedRW = [WriteFDiv] in {
@@ -2488,23 +2531,23 @@ defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
let SchedRW = [WriteFDiv] in {
defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
}
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
-defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
-defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>;
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>;
let SchedRW = [WriteFMul] in {
defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
}
defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
-def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
//===----------------------------------------------------------------------===//
@@ -2556,7 +2599,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
//===----------------------------------------------------------------------===//
defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP : FPCondComparison<0, "fccmp">;
+defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
//===----------------------------------------------------------------------===//
// Floating point conditional select instruction.
@@ -2589,6 +2632,40 @@ defm FMOV : FPMoveImmediate<"fmov">;
// Advanced SIMD two vector instructions.
//===----------------------------------------------------------------------===//
+defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
+ int_aarch64_neon_uabd>;
+// Match UABDL in log2-shuffle patterns.
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+ (v8i16 (add (sub (zext (v8i8 V64:$opA)),
+ (zext (v8i8 V64:$opB))),
+ (AArch64vashr v8i16:$src, (i32 15))))),
+ (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+ (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
+ (zext (extract_high_v16i8 V128:$opB))),
+ (AArch64vashr v8i16:$src, (i32 15))))),
+ (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
+ (v4i32 (add (sub (zext (v4i16 V64:$opA)),
+ (zext (v4i16 V64:$opB))),
+ (AArch64vashr v4i32:$src, (i32 31))))),
+ (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
+ (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)),
+ (zext (extract_high_v8i16 V128:$opB))),
+ (AArch64vashr v4i32:$src, (i32 31))))),
+ (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
+ (v2i64 (add (sub (zext (v2i32 V64:$opA)),
+ (zext (v2i32 V64:$opB))),
+ (AArch64vashr v2i64:$src, (i32 63))))),
+ (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
+ (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)),
+ (zext (extract_high_v4i32 V128:$opB))),
+ (AArch64vashr v2i64:$src, (i32 63))))),
+ (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
+
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
(v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
@@ -2780,29 +2857,29 @@ defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
-defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
-defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
-defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
-defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
-defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
-defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
-defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>;
+defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
+defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
+defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
+defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
+defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
+defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
// instruction expects the addend first, while the fma intrinsic puts it last.
-defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
+defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
+defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
// The following def pats catch the case where the LHS of an FMA is negated.
@@ -2816,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
(FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
-defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
-defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
-defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
+defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
+defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
+defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
@@ -2833,9 +2910,9 @@ defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
-defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>;
+defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
-defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>;
+defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
@@ -2852,9 +2929,9 @@ defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
-defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>;
+defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
-defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>;
+defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
@@ -2879,54 +2956,6 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
-def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
- (SMINv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
- (SMINv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
- (SMINv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
- (SMINv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
- (SMINv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
- (SMINv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
- (SMAXv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
- (SMAXv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
- (SMAXv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
- (SMAXv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
- (SMAXv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
- (SMAXv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
- (UMINv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
- (UMINv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
- (UMINv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
- (UMINv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
- (UMINv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
- (UMINv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
- (UMAXv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
- (UMAXv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
- (UMAXv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
- (UMAXv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
- (UMAXv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
- (UMAXv4i32 V128:$Rn, V128:$Rm)>;
def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
@@ -3052,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
"|cmlt.2d\t$dst, $src1, $src2}",
(CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|fcmle.4h\t$dst, $src1, $src2}",
+ (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|fcmle.8h\t$dst, $src1, $src2}",
+ (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
"|fcmle.2s\t$dst, $src1, $src2}",
(FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3062,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
"|fcmle.2d\t$dst, $src1, $src2}",
(FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|fcmlt.4h\t$dst, $src1, $src2}",
+ (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|fcmlt.8h\t$dst, $src1, $src2}",
+ (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
"|fcmlt.2s\t$dst, $src1, $src2}",
(FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3072,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
"|fcmlt.2d\t$dst, $src1, $src2}",
(FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|facle.4h\t$dst, $src1, $src2}",
+ (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|facle.8h\t$dst, $src1, $src2}",
+ (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
"|facle.2s\t$dst, $src1, $src2}",
(FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3082,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
"|facle.2d\t$dst, $src1, $src2}",
(FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|faclt.4h\t$dst, $src1, $src2}",
+ (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|faclt.8h\t$dst, $src1, $src2}",
+ (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
"|faclt.2s\t$dst, $src1, $src2}",
(FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3103,19 +3164,19 @@ defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
+defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FABD64 FPR64:$Rn, FPR64:$Rm)>;
-defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
+defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
int_aarch64_neon_facge>;
-defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
+defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
int_aarch64_neon_facgt>;
-defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -3198,35 +3259,35 @@ defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
-defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
-defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
-defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
-defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
-defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">;
+defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
int_aarch64_neon_suqadd>;
-defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
@@ -3390,8 +3451,6 @@ defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
int_aarch64_neon_uabd>;
-defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- int_aarch64_neon_uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
@@ -3449,8 +3508,8 @@ defm : Neon_mulacc_widen_patterns<
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
-def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
- (vector_extract (v2i64 V128:$Rm), (i64 1))),
+def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
+ (extractelt (v2i64 V128:$Rm), (i64 1))),
(PMULLv2i64 V128:$Rn, V128:$Rm)>;
// CodeGen patterns for addhn and subhn instructions, which can actually be
@@ -3593,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">;
//----------------------------------------------------------------------------
defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
-defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
-defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
-defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
-defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
-defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
+defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
+defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
+defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
+defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
+defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -3713,12 +3772,12 @@ defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
SDNodeXForm IdxXFORM> {
- def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
imm:$idx))))),
(DUP V128:$Rn, (IdxXFORM imm:$idx))>;
- def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
- imm:$idx))))),
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
+ imm:$idx))))),
(DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
}
@@ -3747,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
(i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+ VectorIndexB:$idx)))), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+ VectorIndexH:$idx)))), i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
// Extracting i8 or i16 elements will have the zero-extend transformed to
// an 'and' mask by type legalization since neither i8 nor i16 are legal types
// for AArch64. Match these patterns here since UMOV already zeroes out the high
@@ -3784,6 +3850,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(i64 FPR64:$Rn), dsub))>;
+def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
@@ -3949,10 +4020,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
-defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
-defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
@@ -4199,15 +4270,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
// AdvSIMD FMOV
-def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
+def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
"fmov", ".2d",
[(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8,
+def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
"fmov", ".2s",
[(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
+def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
"fmov", ".4s",
[(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
+ "fmov", ".4h",
+ [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
+ "fmov", ".8h",
+ [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+} // Predicates = [HasNEON, HasFullFP16]
// AdvSIMD MOVI
@@ -4235,7 +4314,7 @@ def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
// The movi_edit node has the immediate value already encoded, so we use
// a plain imm0_255 in the pattern
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
+def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
simdimmtype10,
"movi", ".2d",
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
@@ -4296,10 +4375,10 @@ def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
(AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
// Per byte: 8b & 16b
-def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255,
+def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
"movi", ".8b",
[(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
-def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
+def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
"movi", ".16b",
[(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
@@ -4340,8 +4419,8 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
//----------------------------------------------------------------------------
let hasSideEffects = 0 in {
- defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
- defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
+ defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">;
+ defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">;
}
// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
@@ -4349,18 +4428,18 @@ let hasSideEffects = 0 in {
// On the other hand, there are quite a few valid combinatorial options due to
// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
@@ -4424,7 +4503,9 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
V128:$Rm, VectorIndexS:$idx)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v2f32 (fneg V64:$Rm)),
+ (vector_extract (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
VectorIndexS:$idx))),
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
@@ -4442,8 +4523,8 @@ defm : FMLSIndexedAfterNegPatterns<
defm : FMLSIndexedAfterNegPatterns<
TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
-defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
-defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
+defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv2i32_indexed V64:$Rn,
@@ -4497,10 +4578,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------
-defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
-defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
-defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
-defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
+defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
+defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
+defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
+defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
// Codegen patterns for the above. We don't put these directly on the
// instructions because TableGen's type inference can't handle the truth.
// Having the same base pattern for fp <--> int totally freaks it out.
@@ -4573,7 +4654,7 @@ defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
//----------------------------------------------------------------------------
defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
-defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
+defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
int_aarch64_neon_vcvtfxs2fp>;
defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
int_aarch64_neon_rshrn>;
@@ -4608,7 +4689,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
-defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
+defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
int_aarch64_neon_vcvtfxu2fp>;
defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
@@ -5133,10 +5214,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
def : Pat<(i64 (anyext GPR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
-// When we need to explicitly zero-extend, we use an unsigned bitfield move
-// instruction (UBFM) on the enclosing super-reg.
+// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
+// then assert the extension has happened.
def : Pat<(i64 (zext GPR32:$src)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+ (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
// To sign extend, we use a signed bitfield move instruction (SBFM) on the
// containing super-reg.
@@ -5801,6 +5882,21 @@ def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
(v16i8 (REV16v16i8 FPR128:$src))>;
}
+def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+
def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
(EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
@@ -5852,6 +5948,45 @@ def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+// Patterns for nontemporal/no-allocate stores.
+// We have to resort to tricks to turn a single-input store into a store pair,
+// because there is no single-input nontemporal store, only STNP.
+let Predicates = [IsLE] in {
+let AddedComplexity = 15 in {
+class NTStore128Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR128:$Rt),
+ (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
+ (CPYi64 FPR128:$Rt, (i64 1)),
+ GPR64sp:$Rn, simm7s8:$offset)>;
+
+def : NTStore128Pat<v2i64>;
+def : NTStore128Pat<v4i32>;
+def : NTStore128Pat<v8i16>;
+def : NTStore128Pat<v16i8>;
+
+class NTStore64Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR64:$Rt),
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
+ (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+
+// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
+def : NTStore64Pat<v1f64>;
+def : NTStore64Pat<v1i64>;
+def : NTStore64Pat<v2i32>;
+def : NTStore64Pat<v4i16>;
+def : NTStore64Pat<v8i8>;
+
+def : Pat<(nontemporalstore GPR64:$Rt,
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+} // AddedComplexity=10
+} // Predicates = [IsLE]
+
// Tail call return handling. These are all compiler pseudo-instructions,
// so no encoding information or anything like that.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 82f77a7..566aa2c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -41,54 +41,85 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded");
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
+STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
+STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
+STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
cl::init(20), cl::Hidden);
-// Place holder while testing unscaled load/store combining
-static cl::opt<bool> EnableAArch64UnscaledMemOp(
- "aarch64-unscaled-mem-op", cl::Hidden,
- cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
+namespace llvm {
+void initializeAArch64LoadStoreOptPass(PassRegistry &);
+}
+
+#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
namespace {
+
+typedef struct LdStPairFlags {
+ // If a matching instruction is found, MergeForward is set to true if the
+ // merge is to remove the first instruction and replace the second with
+ // a pair-wise insn, and false if the reverse is true.
+ bool MergeForward;
+
+ // SExtIdx gives the index of the result of the load pair that must be
+ // extended. The value of SExtIdx assumes that the paired load produces the
+ // value in this order: (I, returned iterator), i.e., -1 means no value has
+ // to be extended, 0 means I, and 1 means the returned iterator.
+ int SExtIdx;
+
+ LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
+
+ void setMergeForward(bool V = true) { MergeForward = V; }
+ bool getMergeForward() const { return MergeForward; }
+
+ void setSExtIdx(int V) { SExtIdx = V; }
+ int getSExtIdx() const { return SExtIdx; }
+
+} LdStPairFlags;
+
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
- AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
+ AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const AArch64Subtarget *Subtarget;
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
- // If a matching instruction is found, MergeForward is set to true if the
- // merge is to remove the first instruction and replace the second with
- // a pair-wise insn, and false if the reverse is true.
- // \p SExtIdx[out] gives the index of the result of the load pair that
- // must be extended. The value of SExtIdx assumes that the paired load
- // produces the value in this order: (I, returned iterator), i.e.,
- // -1 means no value has to be extended, 0 means I, and 1 means the
- // returned iterator.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, int &SExtIdx,
+ LdStPairFlags &Flags,
unsigned Limit);
+
+ // Scan the instructions looking for a store that writes to the address from
+ // which the current load instruction reads. Return true if one is found.
+ bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
+ MachineBasicBlock::iterator &StoreI);
+
// Merge the two instructions indicated into a single pair-wise instruction.
// If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
- // \p SExtIdx index of the result that must be extended for a paired load.
- // -1 means none, 0 means I, and 1 means Paired.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool MergeForward,
- int SExtIdx);
+ MachineBasicBlock::iterator Paired,
+ const LdStPairFlags &Flags);
+
+ // Promote the load that reads directly from the address stored to.
+ MachineBasicBlock::iterator
+ promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
+ MachineBasicBlock::iterator StoreI);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan forwards.
MachineBasicBlock::iterator
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
- int Value);
+ int UnscaledOffset);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -96,97 +127,177 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
MachineBasicBlock::iterator
findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
- // Merge a pre-index base register update into a ld/st instruction.
- MachineBasicBlock::iterator
- mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update);
+ // Find an instruction that updates the base register of the ld/st
+ // instruction.
+ bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI,
+ unsigned BaseReg, int Offset);
- // Merge a post-index base register update into a ld/st instruction.
+ // Merge a pre- or post-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
- mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update);
+ mergeUpdateInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update, bool IsPreIdx);
+
+ // Find and merge foldable ldr/str instructions.
+ bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
- bool optimizeBlock(MachineBasicBlock &MBB);
+ // Find and promote load instructions which read directly from store.
+ bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
+
+ // Check if converting two narrow loads into a single wider load with
+ // bitfield extracts could be enabled.
+ bool enableNarrowLdMerge(MachineFunction &Fn);
+
+ bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "AArch64 load / store optimization pass";
+ return AARCH64_LOAD_STORE_OPT_NAME;
}
-
-private:
- int getMemSize(MachineInstr *MemMI);
};
char AArch64LoadStoreOpt::ID = 0;
} // namespace
-static bool isUnscaledLdst(unsigned Opc) {
+INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
+ AARCH64_LOAD_STORE_OPT_NAME, false, false)
+
+static bool isUnscaledLdSt(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64::STURSi:
- return true;
case AArch64::STURDi:
- return true;
case AArch64::STURQi:
- return true;
+ case AArch64::STURBBi:
+ case AArch64::STURHHi:
case AArch64::STURWi:
- return true;
case AArch64::STURXi:
- return true;
case AArch64::LDURSi:
- return true;
case AArch64::LDURDi:
- return true;
case AArch64::LDURQi:
- return true;
case AArch64::LDURWi:
- return true;
case AArch64::LDURXi:
- return true;
case AArch64::LDURSWi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
return true;
}
}
-// Size in bytes of the data moved by an unscaled load or store
-int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
- switch (MemMI->getOpcode()) {
+static bool isUnscaledLdSt(MachineInstr *MI) {
+ return isUnscaledLdSt(MI->getOpcode());
+}
+
+static unsigned getBitExtrOpcode(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode.");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ return AArch64::UBFMWri;
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ return AArch64::SBFMWri;
+ }
+}
+
+static bool isNarrowStore(unsigned Opc) {
+ switch (Opc) {
default:
- llvm_unreachable("Opcode has unknown size!");
+ return false;
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
+ return true;
+ }
+}
+
+static bool isNarrowStore(MachineInstr *MI) {
+ return isNarrowStore(MI->getOpcode());
+}
+
+static bool isNarrowLoad(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ return true;
+ }
+}
+
+static bool isNarrowLoad(MachineInstr *MI) {
+ return isNarrowLoad(MI->getOpcode());
+}
+
+// Scaling factor for unscaled load or store.
+static int getMemScale(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Opcode has unknown scale!");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ return 1;
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
+ return 2;
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
case AArch64::STRSui:
case AArch64::STURSi:
- return 4;
- case AArch64::STRDui:
- case AArch64::STURDi:
- return 8;
- case AArch64::STRQui:
- case AArch64::STURQi:
- return 16;
case AArch64::STRWui:
case AArch64::STURWi:
- return 4;
- case AArch64::STRXui:
- case AArch64::STURXi:
- return 8;
- case AArch64::LDRSui:
- case AArch64::LDURSi:
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPWi:
+ case AArch64::STPSi:
+ case AArch64::STPWi:
return 4;
case AArch64::LDRDui:
case AArch64::LDURDi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDPDi:
+ case AArch64::LDPXi:
+ case AArch64::STPDi:
+ case AArch64::STPXi:
return 8;
case AArch64::LDRQui:
case AArch64::LDURQi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::LDPQi:
+ case AArch64::STPQi:
return 16;
- case AArch64::LDRWui:
- case AArch64::LDURWi:
- return 4;
- case AArch64::LDRXui:
- case AArch64::LDURXi:
- return 8;
- case AArch64::LDRSWui:
- case AArch64::LDURSWi:
- return 4;
}
}
@@ -203,6 +314,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::STURDi:
case AArch64::STRQui:
case AArch64::STURQi:
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
case AArch64::STRWui:
case AArch64::STURWi:
case AArch64::STRXui:
@@ -219,11 +334,23 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::STURSi:
case AArch64::LDRSui:
case AArch64::LDURSi:
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
+ case AArch64::LDRSBWui:
+ return AArch64::LDRBBui;
+ case AArch64::LDRSHWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURSBWi:
+ return AArch64::LDURBBi;
+ case AArch64::LDURSHWi:
+ return AArch64::LDURHHi;
}
}
@@ -240,6 +367,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::STRQui:
case AArch64::STURQi:
return AArch64::STPQi;
+ case AArch64::STRBBui:
+ return AArch64::STRHHui;
+ case AArch64::STRHHui:
+ return AArch64::STRWui;
+ case AArch64::STURBBi:
+ return AArch64::STURHHi;
+ case AArch64::STURHHi:
+ return AArch64::STURWi;
case AArch64::STRWui:
case AArch64::STURWi:
return AArch64::STPWi;
@@ -264,6 +399,48 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return AArch64::LDPSWi;
+ case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHWi:
+ return AArch64::LDURWi;
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ return AArch64::LDURHHi;
+ }
+}
+
+static unsigned isMatchingStore(MachineInstr *LoadInst,
+ MachineInstr *StoreInst) {
+ unsigned LdOpc = LoadInst->getOpcode();
+ unsigned StOpc = StoreInst->getOpcode();
+ switch (LdOpc) {
+ default:
+ llvm_unreachable("Unsupported load instruction!");
+ case AArch64::LDRBBui:
+ return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
+ StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
+ case AArch64::LDURBBi:
+ return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
+ StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
+ case AArch64::LDRHHui:
+ return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
+ StOpc == AArch64::STRXui;
+ case AArch64::LDURHHi:
+ return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
+ StOpc == AArch64::STURXi;
+ case AArch64::LDRWui:
+ return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
+ case AArch64::LDURWi:
+ return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
+ case AArch64::LDRXui:
+ return StOpc == AArch64::STRXui;
+ case AArch64::LDURXi:
+ return StOpc == AArch64::STURXi;
}
}
@@ -277,6 +454,10 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::STRDpre;
case AArch64::STRQui:
return AArch64::STRQpre;
+ case AArch64::STRBBui:
+ return AArch64::STRBBpre;
+ case AArch64::STRHHui:
+ return AArch64::STRHHpre;
case AArch64::STRWui:
return AArch64::STRWpre;
case AArch64::STRXui:
@@ -287,12 +468,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::LDRDpre;
case AArch64::LDRQui:
return AArch64::LDRQpre;
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBpre;
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHpre;
case AArch64::LDRWui:
return AArch64::LDRWpre;
case AArch64::LDRXui:
return AArch64::LDRXpre;
case AArch64::LDRSWui:
return AArch64::LDRSWpre;
+ case AArch64::LDPSi:
+ return AArch64::LDPSpre;
+ case AArch64::LDPSWi:
+ return AArch64::LDPSWpre;
+ case AArch64::LDPDi:
+ return AArch64::LDPDpre;
+ case AArch64::LDPQi:
+ return AArch64::LDPQpre;
+ case AArch64::LDPWi:
+ return AArch64::LDPWpre;
+ case AArch64::LDPXi:
+ return AArch64::LDPXpre;
+ case AArch64::STPSi:
+ return AArch64::STPSpre;
+ case AArch64::STPDi:
+ return AArch64::STPDpre;
+ case AArch64::STPQi:
+ return AArch64::STPQpre;
+ case AArch64::STPWi:
+ return AArch64::STPWpre;
+ case AArch64::STPXi:
+ return AArch64::STPXpre;
}
}
@@ -306,6 +513,10 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::STRDpost;
case AArch64::STRQui:
return AArch64::STRQpost;
+ case AArch64::STRBBui:
+ return AArch64::STRBBpost;
+ case AArch64::STRHHui:
+ return AArch64::STRHHpost;
case AArch64::STRWui:
return AArch64::STRWpost;
case AArch64::STRXui:
@@ -316,19 +527,111 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::LDRDpost;
case AArch64::LDRQui:
return AArch64::LDRQpost;
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBpost;
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHpost;
case AArch64::LDRWui:
return AArch64::LDRWpost;
case AArch64::LDRXui:
return AArch64::LDRXpost;
case AArch64::LDRSWui:
return AArch64::LDRSWpost;
+ case AArch64::LDPSi:
+ return AArch64::LDPSpost;
+ case AArch64::LDPSWi:
+ return AArch64::LDPSWpost;
+ case AArch64::LDPDi:
+ return AArch64::LDPDpost;
+ case AArch64::LDPQi:
+ return AArch64::LDPQpost;
+ case AArch64::LDPWi:
+ return AArch64::LDPWpost;
+ case AArch64::LDPXi:
+ return AArch64::LDPXpost;
+ case AArch64::STPSi:
+ return AArch64::STPSpost;
+ case AArch64::STPDi:
+ return AArch64::STPDpost;
+ case AArch64::STPQi:
+ return AArch64::STPQpost;
+ case AArch64::STPWi:
+ return AArch64::STPWpost;
+ case AArch64::STPXi:
+ return AArch64::STPXpost;
}
}
+static bool isPairedLdSt(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ return true;
+ }
+}
+
+static const MachineOperand &getLdStRegOp(const MachineInstr *MI,
+ unsigned PairedRegOp = 0) {
+ assert(PairedRegOp < 2 && "Unexpected register operand idx.");
+ unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
+ return MI->getOperand(Idx);
+}
+
+static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
+ unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
+ return MI->getOperand(Idx);
+}
+
+static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
+ unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
+ return MI->getOperand(Idx);
+}
+
+static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
+ MachineInstr *StoreInst) {
+ assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
+ int LoadSize = getMemScale(LoadInst);
+ int StoreSize = getMemScale(StoreInst);
+ int UnscaledStOffset = isUnscaledLdSt(StoreInst)
+ ? getLdStOffsetOp(StoreInst).getImm()
+ : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
+ int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
+ ? getLdStOffsetOp(LoadInst).getImm()
+ : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
+ return (UnscaledStOffset <= UnscaledLdOffset) &&
+ (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
+}
+
+// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+ MachineInstr *Op1) {
+ assert(MI->memoperands_empty() && "expected a new machineinstr");
+ size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
+ (Op1->memoperands_end() - Op1->memoperands_begin());
+
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+ MachineSDNode::mmo_iterator MemEnd =
+ std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+ MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+ MI->setMemRefs(MemBegin, MemEnd);
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool MergeForward, int SExtIdx) {
+ const LdStPairFlags &Flags) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -338,25 +641,26 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (NextI == Paired)
++NextI;
+ int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
- bool IsUnscaled = isUnscaledLdst(Opc);
- int OffsetStride =
- IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
+ bool IsUnscaled = isUnscaledLdSt(Opc);
+ int OffsetStride = IsUnscaled ? getMemScale(I) : 1;
+ bool MergeForward = Flags.getMergeForward();
unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
- MachineOperand &BaseRegOp =
- MergeForward ? Paired->getOperand(1) : I->getOperand(1);
+ const MachineOperand &BaseRegOp =
+ MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
- if (I->getOperand(2).getImm() ==
- Paired->getOperand(2).getImm() + OffsetStride) {
+ if (getLdStOffsetOp(I).getImm() ==
+ getLdStOffsetOp(Paired).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
// Here we swapped the assumption made for SExtIdx.
@@ -368,18 +672,135 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
RtMI = I;
Rt2MI = Paired;
}
- // Handle Unscaled
- int OffsetImm = RtMI->getOperand(2).getImm();
- if (IsUnscaled && EnableAArch64UnscaledMemOp)
- OffsetImm /= OffsetStride;
+
+ int OffsetImm = getLdStOffsetOp(RtMI).getImm();
+
+ if (isNarrowLoad(Opc)) {
+ // Change the scaled offset from small to large type.
+ if (!IsUnscaled) {
+ assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+ OffsetImm /= 2;
+ }
+ MachineInstr *RtNewDest = MergeForward ? I : Paired;
+ // When merging small (< 32 bit) loads for big-endian targets, the order of
+ // the component parts gets swapped.
+ if (!Subtarget->isLittleEndian())
+ std::swap(RtMI, Rt2MI);
+ // Construct the new load instruction.
+ MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
+ NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(NewOpc))
+ .addOperand(getLdStRegOp(RtNewDest))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm);
+
+ // Copy MachineMemOperands from the original loads.
+ concatenateMemOperands(NewMemMI, I, Paired);
+
+ DEBUG(
+ dbgs()
+ << "Creating the new load and extract. Replacing instructions:\n ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(Paired->print(dbgs()));
+ DEBUG(dbgs() << " with instructions:\n ");
+ DEBUG((NewMemMI)->print(dbgs()));
+
+ int Width = getMemScale(I) == 1 ? 8 : 16;
+ int LSBLow = 0;
+ int LSBHigh = Width;
+ int ImmsLow = LSBLow + Width - 1;
+ int ImmsHigh = LSBHigh + Width - 1;
+ MachineInstr *ExtDestMI = MergeForward ? Paired : I;
+ if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
+ // Create the bitfield extract for high bits.
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(Rt2MI)))
+ .addOperand(getLdStRegOp(Rt2MI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
+ // Create the bitfield extract for low bits.
+ if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+ // For unsigned, prefer to use AND for low bits.
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(ImmsLow);
+ } else {
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(RtMI)))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
+ }
+ } else {
+ // Create the bitfield extract for low bits.
+ if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+ // For unsigned, prefer to use AND for low bits.
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(ImmsLow);
+ } else {
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(RtMI)))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
+ }
+
+ // Create the bitfield extract for high bits.
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(Rt2MI)))
+ .addOperand(getLdStRegOp(Rt2MI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
+ }
+ DEBUG(dbgs() << " ");
+ DEBUG((BitExtMI1)->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG((BitExtMI2)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+
+ // Erase the old instructions.
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+ return NextI;
+ }
// Construct the new instruction.
- MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
- I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(RtMI->getOperand(0))
- .addOperand(Rt2MI->getOperand(0))
- .addOperand(BaseRegOp)
- .addImm(OffsetImm);
+ MachineInstrBuilder MIB;
+ if (isNarrowStore(Opc)) {
+ // Change the scaled offset from small to large type.
+ if (!IsUnscaled) {
+ assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+ OffsetImm /= 2;
+ }
+ MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(NewOpc))
+ .addOperand(getLdStRegOp(I))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm);
+ // Copy MachineMemOperands from the original stores.
+ concatenateMemOperands(MIB, I, Paired);
+ } else {
+ // Handle Unscaled
+ if (IsUnscaled)
+ OffsetImm /= OffsetStride;
+ MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(NewOpc))
+ .addOperand(getLdStRegOp(RtMI))
+ .addOperand(getLdStRegOp(Rt2MI))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm);
+ }
+
(void)MIB;
// FIXME: Do we need/want to copy the mem operands from the source
@@ -439,13 +860,112 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
return NextI;
}
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
+ MachineBasicBlock::iterator StoreI) {
+ MachineBasicBlock::iterator NextI = LoadI;
+ ++NextI;
+
+ int LoadSize = getMemScale(LoadI);
+ int StoreSize = getMemScale(StoreI);
+ unsigned LdRt = getLdStRegOp(LoadI).getReg();
+ unsigned StRt = getLdStRegOp(StoreI).getReg();
+ bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
+
+ assert((IsStoreXReg ||
+ TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
+ "Unexpected RegClass");
+
+ MachineInstr *BitExtMI;
+ if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
+ // Remove the load, if the destination register of the loads is the same
+ // register for stored value.
+ if (StRt == LdRt && LoadSize == 8) {
+ DEBUG(dbgs() << "Remove load instruction:\n ");
+ DEBUG(LoadI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ LoadI->eraseFromParent();
+ return NextI;
+ }
+ // Replace the load with a mov if the load and store are in the same size.
+ BitExtMI =
+ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
+ .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
+ .addReg(StRt)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ // FIXME: Currently we disable this transformation in big-endian targets as
+ // performance and correctness are verified only in little-endian.
+ if (!Subtarget->isLittleEndian())
+ return NextI;
+ bool IsUnscaled = isUnscaledLdSt(LoadI);
+ assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
+ assert(LoadSize <= StoreSize && "Invalid load size");
+ int UnscaledLdOffset = IsUnscaled
+ ? getLdStOffsetOp(LoadI).getImm()
+ : getLdStOffsetOp(LoadI).getImm() * LoadSize;
+ int UnscaledStOffset = IsUnscaled
+ ? getLdStOffsetOp(StoreI).getImm()
+ : getLdStOffsetOp(StoreI).getImm() * StoreSize;
+ int Width = LoadSize * 8;
+ int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+ int Imms = Immr + Width - 1;
+ unsigned DestReg = IsStoreXReg
+ ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
+ &AArch64::GPR64RegClass)
+ : LdRt;
+
+ assert((UnscaledLdOffset >= UnscaledStOffset &&
+ (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
+ "Invalid offset");
+
+ Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+ Imms = Immr + Width - 1;
+ if (UnscaledLdOffset == UnscaledStOffset) {
+ uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
+ | ((Immr) << 6) // immr
+ | ((Imms) << 0) // imms
+ ;
+
+ BitExtMI =
+ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
+ DestReg)
+ .addReg(StRt)
+ .addImm(AndMaskEncoded);
+ } else {
+ BitExtMI =
+ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
+ DestReg)
+ .addReg(StRt)
+ .addImm(Immr)
+ .addImm(Imms);
+ }
+ }
+
+ DEBUG(dbgs() << "Promoting load by replacing :\n ");
+ DEBUG(StoreI->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(LoadI->print(dbgs()));
+ DEBUG(dbgs() << " with instructions:\n ");
+ DEBUG(StoreI->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG((BitExtMI)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+
+ // Erase the old instructions.
+ LoadI->eraseFromParent();
+ return NextI;
+}
+
/// trackRegDefsUses - Remember what registers the specified instruction uses
/// and modifies.
-static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
+static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
BitVector &UsedRegs,
const TargetRegisterInfo *TRI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
ModifiedRegs.setBitsNotInMask(MO.getRegMask());
@@ -464,16 +984,12 @@ static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
}
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
- if (!IsUnscaled && (Offset > 63 || Offset < -64))
- return false;
- if (IsUnscaled) {
- // Convert the byte-offset used by unscaled into an "element" offset used
- // by the scaled pair load/store instructions.
- int ElemOffset = Offset / OffsetStride;
- if (ElemOffset > 63 || ElemOffset < -64)
- return false;
- }
- return true;
+ // Convert the byte-offset used by unscaled into an "element" offset used
+ // by the scaled pair load/store instructions.
+ if (IsUnscaled)
+ Offset /= OffsetStride;
+
+ return Offset <= 63 && Offset >= -64;
}
// Do alignment, specialized to power of 2 and for signed ints,
@@ -507,12 +1023,65 @@ static bool mayAlias(MachineInstr *MIa,
return false;
}
+bool AArch64LoadStoreOpt::findMatchingStore(
+ MachineBasicBlock::iterator I, unsigned Limit,
+ MachineBasicBlock::iterator &StoreI) {
+ MachineBasicBlock::iterator E = I->getParent()->begin();
+ MachineBasicBlock::iterator MBBI = I;
+ MachineInstr *FirstMI = I;
+ unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+
+ // Track which registers have been modified and used between the first insn
+ // and the second insn.
+ BitVector ModifiedRegs, UsedRegs;
+ ModifiedRegs.resize(TRI->getNumRegs());
+ UsedRegs.resize(TRI->getNumRegs());
+
+ for (unsigned Count = 0; MBBI != E && Count < Limit;) {
+ --MBBI;
+ MachineInstr *MI = MBBI;
+ // Skip DBG_VALUE instructions. Otherwise debug info can affect the
+ // optimization by changing how far we scan.
+ if (MI->isDebugValue())
+ continue;
+ // Now that we know this is a real instruction, count it.
+ ++Count;
+
+ // If the load instruction reads directly from the address to which the
+ // store instruction writes and the stored value is not modified, we can
+ // promote the load. Since we do not handle stores with pre-/post-index,
+ // it's unnecessary to check if BaseReg is modified by the store itself.
+ if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
+ BaseReg == getLdStBaseOp(MI).getReg() &&
+ isLdOffsetInRangeOfSt(FirstMI, MI) &&
+ !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
+ StoreI = MBBI;
+ return true;
+ }
+
+ if (MI->isCall())
+ return false;
+
+ // Update modified / uses register lists.
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+
+ // Otherwise, if the base register is modified, we have no match, so
+ // return early.
+ if (ModifiedRegs[BaseReg])
+ return false;
+
+ // If we encounter a store aliased with the load, return early.
+ if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
+ return false;
+ }
+ return false;
+}
+
/// findMatchingInsn - Scan the instructions looking for a load/store that can
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, int &SExtIdx,
- unsigned Limit) {
+ LdStPairFlags &Flags, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
@@ -520,21 +1089,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
unsigned Opc = FirstMI->getOpcode();
bool MayLoad = FirstMI->mayLoad();
- bool IsUnscaled = isUnscaledLdst(Opc);
- unsigned Reg = FirstMI->getOperand(0).getReg();
- unsigned BaseReg = FirstMI->getOperand(1).getReg();
- int Offset = FirstMI->getOperand(2).getImm();
+ bool IsUnscaled = isUnscaledLdSt(FirstMI);
+ unsigned Reg = getLdStRegOp(FirstMI).getReg();
+ unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+ int Offset = getLdStOffsetOp(FirstMI).getImm();
+ bool IsNarrowStore = isNarrowStore(Opc);
+
+ // For narrow stores, find only the case where the stored value is WZR.
+ if (IsNarrowStore && Reg != AArch64::WZR)
+ return E;
// Early exit if the first instruction modifies the base register.
// e.g., ldr x0, [x0]
- // Early exit if the offset if not possible to match. (6 bits of positive
- // range, plus allow an extra one in case we find a later insn that matches
- // with Offset-1
if (FirstMI->modifiesRegister(BaseReg, TRI))
return E;
- int OffsetStride =
- IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
- if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
+
+ // Early exit if the offset if not possible to match. (6 bits of positive
+ // range, plus allow an extra one in case we find a later insn that matches
+ // with Offset-1)
+ int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
+ if (!(isNarrowLoad(Opc) || IsNarrowStore) &&
+ !inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return E;
// Track which registers have been modified and used between the first insn
@@ -557,18 +1132,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
++Count;
bool CanMergeOpc = Opc == MI->getOpcode();
- SExtIdx = -1;
+ Flags.setSExtIdx(-1);
if (!CanMergeOpc) {
bool IsValidLdStrOpc;
unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
- if (!IsValidLdStrOpc)
- continue;
+ assert(IsValidLdStrOpc &&
+ "Given Opc should be a Load or Store with an immediate");
// Opc will be the first instruction in the pair.
- SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+ Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
}
- if (CanMergeOpc && MI->getOperand(2).isImm()) {
+ if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
+ assert(MI->mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
@@ -579,8 +1155,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Pairwise instructions have a 7-bit signed offset field. Single insns
// have a 12-bit unsigned offset field. To be a valid combine, the
// final offset must be in range.
- unsigned MIBaseReg = MI->getOperand(1).getReg();
- int MIOffset = MI->getOperand(2).getImm();
+ unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
+ int MIOffset = getLdStOffsetOp(MI).getImm();
if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
(Offset + OffsetStride == MIOffset))) {
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
@@ -591,30 +1167,43 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
return E;
// If the resultant immediate offset of merging these instructions
// is out of range for a pairwise instruction, bail and keep looking.
- bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
- if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
+ bool MIIsUnscaled = isUnscaledLdSt(MI);
+ bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
+ if (!IsNarrowLoad &&
+ !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
- // If the alignment requirements of the paired (scaled) instruction
- // can't express the offset of the unscaled input, bail and keep
- // looking.
- if (IsUnscaled && EnableAArch64UnscaledMemOp &&
- (alignTo(MinOffset, OffsetStride) != MinOffset)) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
+
+ if (IsNarrowLoad || IsNarrowStore) {
+ // If the alignment requirements of the scaled wide load/store
+ // instruction can't express the offset of the scaled narrow
+ // input, bail and keep looking.
+ if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(MI);
- continue;
+ continue;
+ }
+ } else {
+ // If the alignment requirements of the paired (scaled) instruction
+ // can't express the offset of the unscaled input, bail and keep
+ // looking.
+ if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ MemInsns.push_back(MI);
+ continue;
+ }
}
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- if (MayLoad && Reg == MI->getOperand(0).getReg()) {
+ // For narrow stores, allow only when the stored value is the same
+ // (i.e., WZR).
+ if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
+ (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
@@ -622,10 +1211,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
// first.
- if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
- !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
+ if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
+ !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
!mayAlias(MI, MemInsns, TII)) {
- MergeForward = false;
+ Flags.setMergeForward(false);
return MBBI;
}
@@ -633,11 +1222,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
- !(FirstMI->mayLoad() &&
- UsedRegs[FirstMI->getOperand(0).getReg()]) &&
+ if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
+ !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
!mayAlias(FirstMI, MemInsns, TII)) {
- MergeForward = true;
+ Flags.setMergeForward(true);
return MBBI;
}
// Unable to combine these instructions due to interference in between.
@@ -666,51 +1254,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
}
MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update) {
- assert((Update->getOpcode() == AArch64::ADDXri ||
- Update->getOpcode() == AArch64::SUBXri) &&
- "Unexpected base register update instruction to merge!");
- MachineBasicBlock::iterator NextI = I;
- // Return the instruction following the merged instruction, which is
- // the instruction following our unmerged load. Unless that's the add/sub
- // instruction we're merging, in which case it's the one after that.
- if (++NextI == Update)
- ++NextI;
-
- int Value = Update->getOperand(2).getImm();
- assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
- "Can't merge 1 << 12 offset into pre-indexed load / store");
- if (Update->getOpcode() == AArch64::SUBXri)
- Value = -Value;
-
- unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
- MachineInstrBuilder MIB =
- BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(Update->getOperand(0))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
- .addImm(Value);
- (void)MIB;
-
- DEBUG(dbgs() << "Creating pre-indexed load/store.");
- DEBUG(dbgs() << " Replacing instructions:\n ");
- DEBUG(I->print(dbgs()));
- DEBUG(dbgs() << " ");
- DEBUG(Update->print(dbgs()));
- DEBUG(dbgs() << " with instruction:\n ");
- DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
-
- // Erase the old instructions for the block.
- I->eraseFromParent();
- Update->eraseFromParent();
-
- return NextI;
-}
-
-MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
- MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
+AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update,
+ bool IsPreIdx) {
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
@@ -723,20 +1269,36 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
int Value = Update->getOperand(2).getImm();
assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
- "Can't merge 1 << 12 offset into post-indexed load / store");
+ "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
if (Update->getOpcode() == AArch64::SUBXri)
Value = -Value;
- unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
- MachineInstrBuilder MIB =
- BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(Update->getOperand(0))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
- .addImm(Value);
+ unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
+ : getPostIndexedOpcode(I->getOpcode());
+ MachineInstrBuilder MIB;
+ if (!isPairedLdSt(I)) {
+ // Non-paired instruction.
+ MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .addOperand(getLdStRegOp(Update))
+ .addOperand(getLdStRegOp(I))
+ .addOperand(getLdStBaseOp(I))
+ .addImm(Value);
+ } else {
+ // Paired instruction.
+ int Scale = getMemScale(I);
+ MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .addOperand(getLdStRegOp(Update))
+ .addOperand(getLdStRegOp(I, 0))
+ .addOperand(getLdStRegOp(I, 1))
+ .addOperand(getLdStBaseOp(I))
+ .addImm(Value / Scale);
+ }
(void)MIB;
- DEBUG(dbgs() << "Creating post-indexed load/store.");
+ if (IsPreIdx)
+ DEBUG(dbgs() << "Creating pre-indexed load/store.");
+ else
+ DEBUG(dbgs() << "Creating post-indexed load/store.");
DEBUG(dbgs() << " Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
@@ -752,8 +1314,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
return NextI;
}
-static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
- int Offset) {
+bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
+ MachineInstr *MI,
+ unsigned BaseReg, int Offset) {
switch (MI->getOpcode()) {
default:
break;
@@ -769,44 +1332,65 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
// Watch out for 1 << 12 shifted value.
if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
break;
- // If the instruction has the base register as source and dest and the
- // immediate will fit in a signed 9-bit integer, then we have a match.
- if (MI->getOperand(0).getReg() == BaseReg &&
- MI->getOperand(1).getReg() == BaseReg &&
- MI->getOperand(2).getImm() <= 255 &&
- MI->getOperand(2).getImm() >= -256) {
- // If we have a non-zero Offset, we check that it matches the amount
- // we're adding to the register.
- if (!Offset || Offset == MI->getOperand(2).getImm())
- return true;
+
+ // The update instruction source and destination register must be the
+ // same as the load/store base register.
+ if (MI->getOperand(0).getReg() != BaseReg ||
+ MI->getOperand(1).getReg() != BaseReg)
+ break;
+
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ int UpdateOffset = MI->getOperand(2).getImm();
+ // For non-paired load/store instructions, the immediate must fit in a
+ // signed 9-bit integer.
+ if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
+ break;
+
+ // For paired load/store instructions, the immediate must be a multiple of
+ // the scaling factor. The scaled offset must also fit into a signed 7-bit
+ // integer.
+ if (IsPairedInsn) {
+ int Scale = getMemScale(MemMI);
+ if (UpdateOffset % Scale != 0)
+ break;
+
+ int ScaledOffset = UpdateOffset / Scale;
+ if (ScaledOffset > 64 || ScaledOffset < -64)
+ break;
}
+
+ // If we have a non-zero Offset, we check that it matches the amount
+ // we're adding to the register.
+ if (!Offset || Offset == MI->getOperand(2).getImm())
+ return true;
break;
}
return false;
}
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
- MachineBasicBlock::iterator I, unsigned Limit, int Value) {
+ MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
- const MachineFunction &MF = *MemMI->getParent()->getParent();
- unsigned DestReg = MemMI->getOperand(0).getReg();
- unsigned BaseReg = MemMI->getOperand(1).getReg();
- int Offset = MemMI->getOperand(2).getImm() *
- TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+ unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
- // If the base register overlaps the destination register, we can't
- // merge the update.
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ // Scan forward looking for post-index opportunities. Updating instructions
+ // can't be formed if the memory instruction doesn't have the offset we're
+ // looking for.
+ if (MIUnscaledOffset != UnscaledOffset)
return E;
- // Scan forward looking for post-index opportunities.
- // Updating instructions can't be formed if the memory insn already
- // has an offset other than the value we're looking for.
- if (Offset != Value)
- return E;
+ // If the base register overlaps a destination register, we can't
+ // merge the update.
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+ unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
+ if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ return E;
+ }
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
@@ -825,7 +1409,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(MI, BaseReg, Value))
+ if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset))
return MBBI;
// Update the status of what the instruction clobbered and used.
@@ -845,21 +1429,22 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
- const MachineFunction &MF = *MemMI->getParent()->getParent();
- unsigned DestReg = MemMI->getOperand(0).getReg();
- unsigned BaseReg = MemMI->getOperand(1).getReg();
- int Offset = MemMI->getOperand(2).getImm();
- unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+ unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ int Offset = getLdStOffsetOp(MemMI).getImm();
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B || Offset != 0)
return E;
- // If the base register overlaps the destination register, we can't
+ // If the base register overlaps a destination register, we can't
// merge the update.
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
- return E;
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+ unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
+ if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ return E;
+ }
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
@@ -878,7 +1463,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
+ if (isMatchingUpdateInsn(I, MI, BaseReg, Offset))
return MBBI;
// Update the status of what the instruction clobbered and used.
@@ -892,17 +1477,101 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
-bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = MBBI;
+ // If this is a volatile load, don't mess with it.
+ if (MI->hasOrderedMemoryRef())
+ return false;
+
+ // Make sure this is a reg+imm.
+ // FIXME: It is possible to extend it to handle reg+reg cases.
+ if (!getLdStOffsetOp(MI).isImm())
+ return false;
+
+ // Look backward up to ScanLimit instructions.
+ MachineBasicBlock::iterator StoreI;
+ if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
+ ++NumLoadsFromStoresPromoted;
+ // Promote the load. Keeping the iterator straight is a
+ // pain, so we let the merge routine tell us what the next instruction
+ // is after it's done mucking about.
+ MBBI = promoteLoadFromStore(MBBI, StoreI);
+ return true;
+ }
+ return false;
+}
+
+bool AArch64LoadStoreOpt::tryToMergeLdStInst(
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = MBBI;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ // If this is a volatile load/store, don't mess with it.
+ if (MI->hasOrderedMemoryRef())
+ return false;
+
+ // Make sure this is a reg+imm (as opposed to an address reloc).
+ if (!getLdStOffsetOp(MI).isImm())
+ return false;
+
+ // Check if this load/store has a hint to avoid pair formation.
+ // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+ if (TII->isLdStPairSuppressed(MI))
+ return false;
+
+ // Look ahead up to ScanLimit instructions for a pairable instruction.
+ LdStPairFlags Flags;
+ MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit);
+ if (Paired != E) {
+ if (isNarrowLoad(MI)) {
+ ++NumNarrowLoadsPromoted;
+ } else if (isNarrowStore(MI)) {
+ ++NumZeroStoresPromoted;
+ } else {
+ ++NumPairCreated;
+ if (isUnscaledLdSt(MI))
+ ++NumUnscaledPairCreated;
+ }
+
+ // Merge the loads into a pair. Keeping the iterator straight is a
+ // pain, so we let the merge routine tell us what the next instruction
+ // is after it's done mucking about.
+ MBBI = mergePairedInsns(MBBI, Paired, Flags);
+ return true;
+ }
+ return false;
+}
+
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
+ bool enableNarrowLdOpt) {
bool Modified = false;
- // Two tranformations to do here:
- // 1) Find loads and stores that can be merged into a single load or store
+ // Three tranformations to do here:
+ // 1) Find loads that directly read from stores and promote them by
+ // replacing with mov instructions. If the store is wider than the load,
+ // the load will be replaced with a bitfield extract.
+ // e.g.,
+ // str w1, [x0, #4]
+ // ldrh w2, [x0, #6]
+ // ; becomes
+ // str w1, [x0, #4]
+ // lsr w2, w1, #16
+ // 2) Find narrow loads that can be converted into a single wider load
+ // with bitfield extract instructions.
+ // e.g.,
+ // ldrh w0, [x2]
+ // ldrh w1, [x2, #2]
+ // ; becomes
+ // ldr w0, [x2]
+ // ubfx w1, w0, #16, #16
+ // and w0, w0, #ffff
+ // 3) Find loads and stores that can be merged into a single load or store
// pair instruction.
// e.g.,
// ldr x0, [x2]
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
- // 2) Find base register updates that can be merged into the load or store
+ // 4) Find base register updates that can be merged into the load or store
// as a base-reg writeback.
// e.g.,
// ldr x0, [x2]
@@ -918,6 +1587,69 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// Just move on to the next instruction.
++MBBI;
break;
+ // Scaled instructions.
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ // Unscaled instructions.
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi: {
+ if (tryToPromoteLoadFromStore(MBBI)) {
+ Modified = true;
+ break;
+ }
+ ++MBBI;
+ break;
+ }
+ // FIXME: Do the other instructions.
+ }
+ }
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ enableNarrowLdOpt && MBBI != E;) {
+ MachineInstr *MI = MBBI;
+ switch (MI->getOpcode()) {
+ default:
+ // Just move on to the next instruction.
+ ++MBBI;
+ break;
+ // Scaled instructions.
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSHWui:
+ case AArch64::STRBBui:
+ case AArch64::STRHHui:
+ // Unscaled instructions.
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ case AArch64::STURBBi:
+ case AArch64::STURHHi: {
+ if (tryToMergeLdStInst(MBBI)) {
+ Modified = true;
+ break;
+ }
+ ++MBBI;
+ break;
+ }
+ // FIXME: Do the other instructions.
+ }
+ }
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ MachineInstr *MI = MBBI;
+ switch (MI->getOpcode()) {
+ default:
+ // Just move on to the next instruction.
+ ++MBBI;
+ break;
+ // Scaled instructions.
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
@@ -929,7 +1661,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDRXui:
case AArch64::LDRWui:
case AArch64::LDRSWui:
- // do the unscaled versions as well
+ // Unscaled instructions.
case AArch64::STURSi:
case AArch64::STURDi:
case AArch64::STURQi:
@@ -941,37 +1673,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDURWi:
case AArch64::LDURXi:
case AArch64::LDURSWi: {
- // If this is a volatile load/store, don't mess with it.
- if (MI->hasOrderedMemoryRef()) {
- ++MBBI;
- break;
- }
- // Make sure this is a reg+imm (as opposed to an address reloc).
- if (!MI->getOperand(2).isImm()) {
- ++MBBI;
- break;
- }
- // Check if this load/store has a hint to avoid pair formation.
- // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
- if (TII->isLdStPairSuppressed(MI)) {
- ++MBBI;
- break;
- }
- // Look ahead up to ScanLimit instructions for a pairable instruction.
- bool MergeForward = false;
- int SExtIdx = -1;
- MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
- if (Paired != E) {
- // Merge the loads into a pair. Keeping the iterator straight is a
- // pain, so we let the merge routine tell us what the next instruction
- // is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
-
+ if (tryToMergeLdStInst(MBBI)) {
Modified = true;
- ++NumPairCreated;
- if (isUnscaledLdst(MI->getOpcode()))
- ++NumUnscaledPairCreated;
break;
}
++MBBI;
@@ -992,17 +1695,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// Just move on to the next instruction.
++MBBI;
break;
+ // Scaled instructions.
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
case AArch64::STRXui:
case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
case AArch64::LDRXui:
case AArch64::LDRWui:
- // do the unscaled versions as well
+ case AArch64::LDRHHui:
+ case AArch64::LDRBBui:
+ // Unscaled instructions.
case AArch64::STURSi:
case AArch64::STURDi:
case AArch64::STURQi:
@@ -1012,25 +1720,41 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDURDi:
case AArch64::LDURQi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
+ // Paired instructions.
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi: {
// Make sure this is a reg+imm (as opposed to an address reloc).
- if (!MI->getOperand(2).isImm()) {
+ if (!getLdStOffsetOp(MI).isImm()) {
++MBBI;
break;
}
- // Look ahead up to ScanLimit instructions for a mergable instruction.
+ // Look forward to try to form a post-index instruction. For example,
+ // ldr x0, [x20]
+ // add x20, x20, #32
+ // merged into:
+ // ldr x0, [x20], #32
MachineBasicBlock::iterator Update =
findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergePostIdxUpdateInsn(MBBI, Update);
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
Modified = true;
++NumPostFolded;
break;
}
// Don't know how to handle pre/post-index versions, so move to the next
// instruction.
- if (isUnscaledLdst(Opc)) {
+ if (isUnscaledLdSt(Opc)) {
++MBBI;
break;
}
@@ -1043,28 +1767,25 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergePreIdxUpdateInsn(MBBI, Update);
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Modified = true;
++NumPreFolded;
break;
}
+ // The immediate in the load/store is scaled by the size of the memory
+ // operation. The immediate in the add we're looking for,
+ // however, is not, so adjust here.
+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
// Look forward to try to find a post-index instruction. For example,
// ldr x1, [x0, #64]
// add x0, x0, #64
// merged into:
// ldr x1, [x0, #64]!
-
- // The immediate in the load/store is scaled by the size of the register
- // being loaded. The immediate in the add we're looking for,
- // however, is not, so adjust here.
- int Value = MI->getOperand(2).getImm() *
- TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
- ->getSize();
- Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
+ Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergePreIdxUpdateInsn(MBBI, Update);
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Modified = true;
++NumPreFolded;
break;
@@ -1081,13 +1802,24 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
return Modified;
}
+bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
+ bool ProfitableArch = Subtarget->isCortexA57();
+ // FIXME: The benefit from converting narrow loads into a wider load could be
+ // microarchitectural as it assumes that a single load with two bitfield
+ // extracts is cheaper than two narrow loads. Currently, this conversion is
+ // enabled only in cortex-a57 on which performance benefits were verified.
+ return ProfitableArch && !Subtarget->requiresStrictAlign();
+}
+
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
- TRI = Fn.getSubtarget().getRegisterInfo();
+ Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
+ TRI = Subtarget->getRegisterInfo();
bool Modified = false;
+ bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB);
+ Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
return Modified;
}
@@ -1095,8 +1827,8 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
// loads and stores near one another?
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// createAArch64LoadStoreOptimizationPass - returns an instance of the
+/// load / store optimization pass.
FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
return new AArch64LoadStoreOpt();
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 580427a..2b4cdf1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -207,9 +207,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
- if (lowerOperand(MI->getOperand(i), MCOp))
+ if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h
deleted file mode 100644
index 4164b33..0000000
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- AArch64MachineCombinerPattern.h -===//
-//===- AArch64 instruction pattern supported by combiner -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines instruction pattern supported by combiner
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
-
-namespace llvm {
-
-/// Enumeration of instruction pattern supported by machine combiner
-///
-///
-namespace MachineCombinerPattern {
-enum MC_PATTERN : int {
- MC_NONE = 0,
- MC_MULADDW_OP1 = 1,
- MC_MULADDW_OP2 = 2,
- MC_MULSUBW_OP1 = 3,
- MC_MULSUBW_OP2 = 4,
- MC_MULADDWI_OP1 = 5,
- MC_MULSUBWI_OP1 = 6,
- MC_MULADDX_OP1 = 7,
- MC_MULADDX_OP2 = 8,
- MC_MULSUBX_OP1 = 9,
- MC_MULSUBX_OP2 = 10,
- MC_MULADDXI_OP1 = 11,
- MC_MULSUBXI_OP1 = 12
-};
-} // end namespace MachineCombinerPattern
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 536a8d0..318f839 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=//
+//=- AArch64MachineFunctionInfo.h - AArch64 machine function info -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -42,7 +42,7 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
unsigned ArgumentStackToRestore;
/// HasStackFrame - True if this function has a stack frame. Set by
- /// processFunctionBeforeCalleeSavedScan().
+ /// determineCalleeSaves().
bool HasStackFrame;
/// \brief Amount of stack frame size, not including callee-saved registers.
@@ -72,16 +72,22 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
/// registers.
unsigned VarArgsFPRSize;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR;
+
public:
AArch64FunctionInfo()
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+ IsSplitCSR(false) {}
explicit AArch64FunctionInfo(MachineFunction &MF)
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+ IsSplitCSR(false) {
(void)MF;
}
@@ -96,6 +102,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
unsigned getLocalStackSize() const { return LocalStackSize; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index e1b93bf..79c09d9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -489,7 +489,7 @@ bool AArch64PromoteConstant::insertDefinitions(
for (const auto &IPI : InsertPts) {
// Create the load of the global variable.
- IRBuilder<> Builder(IPI.first->getParent(), IPI.first);
+ IRBuilder<> Builder(IPI.first);
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
DEBUG(dbgs() << "**********\n");
DEBUG(dbgs() << "New def: ");
@@ -540,7 +540,7 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
bool LocalChange = false;
SmallPtrSet<Constant *, 8> AlreadyChecked;
- for (Instruction &I : inst_range(&F)) {
+ for (Instruction &I : instructions(&F)) {
// Traverse the operand, looking for constant vectors. Replace them by a
// load of a global variable of constant vector type.
for (Value *Op : I.operand_values()) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 841af55..32b4888 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@@ -34,10 +35,6 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
-static cl::opt<bool>
-ReserveX18("aarch64-reserve-x18", cl::Hidden,
- cl::desc("Reserve X18, making it unavailable as GPR"));
-
AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
: AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
@@ -50,10 +47,23 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_NoRegs_SaveList;
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS)
+ return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
+ CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
+ CSR_AArch64_CXX_TLS_Darwin_SaveList;
else
return CSR_AArch64_AAPCS_SaveList;
}
+const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
+ return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -62,6 +72,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_AArch64_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_RegMask;
+ if (CC == CallingConv::CXX_FAST_TLS)
+ return CSR_AArch64_CXX_TLS_Darwin_RegMask;
else
return CSR_AArch64_AAPCS_RegMask;
}
@@ -104,7 +116,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::W29);
}
- if (TT.isOSDarwin() || ReserveX18) {
+ if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -131,7 +143,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return TT.isOSDarwin() || ReserveX18;
+ return MF.getSubtarget<AArch64Subtarget>().isX18Reserved();
case AArch64::FP:
case AArch64::W29:
return TFI->hasFP(MF) || TT.isOSDarwin();
@@ -186,29 +198,6 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
-bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
-
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
- return false;
-
- return true;
-}
-
-// FIXME: share this with other backends with identical implementation?
-bool
-AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const AArch64FrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment =
- ((MFI->getMaxAlignment() > StackAlign) ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const AArch64FrameLowering *TFI = getFrameLowering(MF);
@@ -424,10 +413,11 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64RegClassID:
case AArch64::GPR32commonRegClassID:
case AArch64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
- - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
- - hasBasePointer(MF); // X19
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+ - MF.getSubtarget<AArch64Subtarget>()
+ .isX18Reserved() // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR32RegClassID:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 8c379d9..f33f788 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -35,6 +35,8 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
@@ -93,9 +95,6 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- // Base pointer (stack realignment) support.
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index b2efca0..a8c8b17 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64",
// The lower 16 vector registers. Some instructions can only take registers
// in this range.
def FPR128_lo : RegisterClass<"AArch64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
128, (trunc FPR128, 16)>;
// Pairs, triples, and quads of 64-bit vector registers.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 486efd6..f6ee8cf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -31,6 +31,11 @@ static cl::opt<bool>
EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
"converter pass"), cl::init(true), cl::Hidden);
+// If OS supports TBI, use this flag to enable it.
+static cl::opt<bool>
+UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
+ "an address is ignored"), cl::init(false), cl::Hidden);
+
AArch64Subtarget &
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
// Determine default and user-specified characteristics
@@ -46,9 +51,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false),
- HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
- IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
+ HasV8_1aOps(false), HasV8_2aOps(false), HasFPARMv8(false), HasNEON(false),
+ HasCrypto(false), HasCRC(false), HasPerfMon(false), HasFullFP16(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ StrictAlign(false), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian),
+ CPUString(CPU), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
TLInfo(TM, *this) {}
@@ -113,12 +120,30 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
// bi-directional scheduling. 253.perlbmk.
Policy.OnlyTopDown = false;
Policy.OnlyBottomUp = false;
+ // Enabling or Disabling the latency heuristic is a close call: It seems to
+ // help nearly no benchmark on out-of-order architectures, on the other hand
+ // it regresses register pressure on a few benchmarking.
+ if (isCyclone())
+ Policy.DisableLatencyHeuristic = true;
}
bool AArch64Subtarget::enableEarlyIfConversion() const {
return EnableEarlyIfConvert;
}
+bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
+ if (!UseAddressTopByteIgnored)
+ return false;
+
+ if (TargetTriple.isiOS()) {
+ unsigned Major, Minor, Micro;
+ TargetTriple.getiOSVersion(Major, Minor, Micro);
+ return Major >= 8;
+ }
+
+ return false;
+}
+
std::unique_ptr<PBQPRAConstraint>
AArch64Subtarget::getCustomPBQPConstraints() const {
if (!isCortexA57())
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 6bb0694..1b8b9b2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -33,17 +33,21 @@ class Triple;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
protected:
- enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone};
+ enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
bool HasV8_1aOps;
+ bool HasV8_2aOps;
bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
bool HasCRC;
+ bool HasPerfMon;
+ bool HasFullFP16;
+ bool HasSPE;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
@@ -51,6 +55,12 @@ protected:
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing;
+ // StrictAlign - Disallow unaligned memory accesses.
+ bool StrictAlign;
+
+ // ReserveX18 - X18 is not available as a general purpose register.
+ bool ReserveX18;
+
bool IsLittle;
/// CPUString - String name of used CPU.
@@ -92,19 +102,30 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override {
- return isCortexA53() || isCortexA57();
+ return isGeneric() || isCortexA53() || isCortexA57();
}
bool hasV8_1aOps() const { return HasV8_1aOps; }
+ bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool requiresStrictAlign() const { return StrictAlign; }
+
+ bool isX18Reserved() const { return ReserveX18; }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ /// CPU has TBI (top byte of addresses is ignored during HW address
+ /// translation) and OS enables it.
+ bool supportsAddressTopByteIgnored() const;
+
+ bool hasPerfMon() const { return HasPerfMon; }
+ bool hasFullFP16() const { return HasFullFP16; }
+ bool hasSPE() const { return HasSPE; }
bool isLittleEndian() const { return IsLittle; }
@@ -112,11 +133,13 @@ public:
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+ bool isGeneric() const { return CPUString == "generic"; }
bool isCyclone() const { return CPUString == "cyclone"; }
bool isCortexA57() const { return CPUString == "cortex-a57"; }
bool isCortexA53() const { return CPUString == "cortex-a53"; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index db6e244..c52c554 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -203,7 +203,7 @@ public:
} // namespace
TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(AArch64TTIImpl(this, F));
});
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e085cca..9af0e64 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
-unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
+int AArch64TTIImpl::getIntImmCost(int64_t Val) {
// Check if the immediate can be encoded within an instruction.
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
return 0;
@@ -37,7 +37,7 @@ unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
}
/// \brief Calculate the cost of materializing the given constant.
-unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -51,18 +51,18 @@ unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
- unsigned Cost = 0;
+ int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
int64_t Val = Tmp.getSExtValue();
Cost += getIntImmCost(Val);
}
// We need at least one instruction to materialze the constant.
- return std::max(1U, Cost);
+ return std::max(1, Cost);
}
-unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -118,17 +118,17 @@ unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
}
if (Idx == ImmIdx) {
- unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ int NumConstants = (BitSize + 63) / 64;
+ int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
- ? static_cast<unsigned>(TTI::TCC_Free)
+ ? static_cast<int>(TTI::TCC_Free)
: Cost;
}
return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -147,10 +147,10 @@ unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
if (Idx == 1) {
- unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ int NumConstants = (BitSize + 63) / 64;
+ int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
- ? static_cast<unsigned>(TTI::TCC_Free)
+ ? static_cast<int>(TTI::TCC_Free)
: Cost;
}
break;
@@ -176,8 +176,7 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) {
+int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -187,7 +186,31 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
- static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
+ static const TypeConversionCostTblEntry
+ ConversionTbl[] = {
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+
+ // The number of shll instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@@ -210,6 +233,16 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ // Complex: to v8f32
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+
+ // Complex: to v16f32
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
+
// Complex: to v2f64
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
@@ -250,22 +283,21 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
};
- int Idx = ConvertCostTableLookup<MVT>(
- ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
- if (Idx != -1)
- return ConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) {
+int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -281,15 +313,15 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
}
// All other insert/extracts cost this much.
- return 2;
+ return 3;
}
-unsigned AArch64TTIImpl::getArithmeticInstrCost(
+int AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -300,10 +332,9 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
// The OperandValue properties many not be same as that of previous
// operation; conservatively assume OP_None.
- unsigned Cost =
- getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
+ int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
@@ -331,7 +362,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
}
}
-unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -346,19 +377,20 @@ unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}
-unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
- // We don't lower vector selects well that are wider than the register width.
+ // We don't lower some vector selects well that are wider than the register
+ // width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
- const unsigned AmortizationCost = 20;
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ const int AmortizationCost = 20;
+ static const TypeConversionCostTblEntry
VectorSelectTbl[] = {
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
@@ -367,20 +399,18 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx =
- ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return VectorSelectTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT()))
+ return Entry->Cost;
}
}
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isIntegerTy(64)) {
@@ -389,7 +419,7 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// practice on inlined memcpy code.
// We make v2i64 stores expensive so that we will only vectorize if there
// are 6 other instructions getting vectorized.
- unsigned AmortizationCost = 6;
+ int AmortizationCost = 6;
return LT.first * 2 * AmortizationCost;
}
@@ -407,16 +437,18 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) {
+int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
@@ -427,8 +459,8 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
Alignment, AddressSpace);
}
-unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
- unsigned Cost = 0;
+int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
+ int Cost = 0;
for (auto *I : Tys) {
if (!I->isVectorTy())
continue;
@@ -506,7 +538,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_ld4:
Info.ReadMem = true;
Info.WriteMem = false;
- Info.Vol = false;
+ Info.IsSimple = true;
Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(0);
break;
@@ -515,7 +547,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.Vol = false;
+ Info.IsSimple = true;
Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
break;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 444d3cc..ec58c4f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -48,7 +48,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
};
public:
- explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
+ explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -63,12 +63,11 @@ public:
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(int64_t Val);
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCost(int64_t Val);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @}
@@ -76,6 +75,8 @@ public:
/// \name Vector TTI Implementations
/// @{
+ bool enableInterleavedAccessVectorization() { return true; }
+
unsigned getNumberOfRegisters(bool Vector) {
if (Vector) {
if (ST->hasNEON())
@@ -96,25 +97,25 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
+ int getAddressComputationCost(Type *Ty, bool IsComplex);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
+ int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
@@ -123,11 +124,9 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 38e8b4d..394c8e7 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -43,7 +43,6 @@ class AArch64Operand;
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
- MCSubtargetInfo &STI;
// Map of register aliases registers via the .req directive.
StringMap<std::pair<bool, unsigned> > RegisterReqs;
@@ -101,6 +100,7 @@ private:
OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
+ OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
@@ -115,16 +115,16 @@ public:
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI) {
+ : MCTargetAsmParser(Options, STI) {
MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -160,7 +160,8 @@ private:
k_Prefetch,
k_ShiftExtend,
k_FPImm,
- k_Barrier
+ k_Barrier,
+ k_PSBHint,
} Kind;
SMLoc StartLoc, EndLoc;
@@ -228,6 +229,12 @@ private:
unsigned Length;
};
+ struct PSBHintOp {
+ unsigned Val;
+ const char *Data;
+ unsigned Length;
+ };
+
struct ShiftExtendOp {
AArch64_AM::ShiftExtendType Type;
unsigned Amount;
@@ -251,6 +258,7 @@ private:
struct SysRegOp SysReg;
struct SysCRImmOp SysCRImm;
struct PrefetchOp Prefetch;
+ struct PSBHintOp PSBHint;
struct ShiftExtendOp ShiftExtend;
};
@@ -302,6 +310,9 @@ public:
case k_Prefetch:
Prefetch = o.Prefetch;
break;
+ case k_PSBHint:
+ PSBHint = o.PSBHint;
+ break;
case k_ShiftExtend:
ShiftExtend = o.ShiftExtend;
break;
@@ -393,6 +404,16 @@ public:
return Prefetch.Val;
}
+ unsigned getPSBHint() const {
+ assert(Kind == k_PSBHint && "Invalid access!");
+ return PSBHint.Val;
+ }
+
+ StringRef getPSBHintName() const {
+ assert(Kind == k_PSBHint && "Invalid access!");
+ return StringRef(PSBHint.Data, PSBHint.Length);
+ }
+
StringRef getPrefetchName() const {
assert(Kind == k_Prefetch && "Invalid access!");
return StringRef(Prefetch.Data, Prefetch.Length);
@@ -497,6 +518,15 @@ public:
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
}
+ bool isImm0_1() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 2);
+ }
bool isImm0_7() const {
if (!isImm())
return false;
@@ -876,12 +906,15 @@ public:
}
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
-
return SysReg.MSRReg != -1U;
}
- bool isSystemPStateField() const {
+ bool isSystemPStateFieldWithImm0_1() const {
if (!isSysReg()) return false;
-
+ return (SysReg.PStateField == AArch64PState::PAN ||
+ SysReg.PStateField == AArch64PState::UAO);
+ }
+ bool isSystemPStateFieldWithImm0_15() const {
+ if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
return SysReg.PStateField != -1U;
}
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
@@ -950,6 +983,7 @@ public:
}
bool isSysCR() const { return Kind == k_SysCR; }
bool isPrefetch() const { return Kind == k_Prefetch; }
+ bool isPSBHint() const { return Kind == k_PSBHint; }
bool isShiftExtend() const { return Kind == k_ShiftExtend; }
bool isShifter() const {
if (!isShiftExtend())
@@ -1175,8 +1209,10 @@ public:
template <unsigned NumRegs>
void addVectorList64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- static unsigned FirstRegs[] = { AArch64::D0, AArch64::D0_D1,
- AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 };
+ static const unsigned FirstRegs[] = { AArch64::D0,
+ AArch64::D0_D1,
+ AArch64::D0_D1_D2,
+ AArch64::D0_D1_D2_D3 };
unsigned FirstReg = FirstRegs[NumRegs - 1];
Inst.addOperand(
@@ -1186,8 +1222,10 @@ public:
template <unsigned NumRegs>
void addVectorList128Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- static unsigned FirstRegs[] = { AArch64::Q0, AArch64::Q0_Q1,
- AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 };
+ static const unsigned FirstRegs[] = { AArch64::Q0,
+ AArch64::Q0_Q1,
+ AArch64::Q0_Q1_Q2,
+ AArch64::Q0_Q1_Q2_Q3 };
unsigned FirstReg = FirstRegs[NumRegs - 1];
Inst.addOperand(
@@ -1304,6 +1342,12 @@ public:
Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16));
}
+ void addImm0_1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
+ }
+
void addImm0_7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
@@ -1491,7 +1535,13 @@ public:
Inst.addOperand(MCOperand::createImm(SysReg.MSRReg));
}
- void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
+ void addSystemPStateFieldWithImm0_1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
+ }
+
+ void addSystemPStateFieldWithImm0_15Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
@@ -1507,6 +1557,11 @@ public:
Inst.addOperand(MCOperand::createImm(getPrefetch()));
}
+ void addPSBHintOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getPSBHint()));
+ }
+
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
@@ -1703,6 +1758,19 @@ public:
return Op;
}
+ static std::unique_ptr<AArch64Operand> CreatePSBHint(unsigned Val,
+ StringRef Str,
+ SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_PSBHint, Ctx);
+ Op->PSBHint.Val = Val;
+ Op->PSBHint.Data = Str.data();
+ Op->PSBHint.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -1776,6 +1844,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "<prfop invalid #" << getPrefetch() << ">";
break;
}
+ case k_PSBHint: {
+ OS << getPSBHintName();
+ break;
+ }
case k_ShiftExtend: {
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
@@ -1849,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) {
.Case(".h", true)
.Case(".s", true)
.Case(".d", true)
+ // Needed for fp16 scalar pairwise reductions
+ .Case(".2h", true)
.Default(false);
}
@@ -2016,7 +2090,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64PRFM::PRFMMapper();
StringRef Name =
- Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+ Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid);
Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
S, getContext()));
return MatchOperand_Success;
@@ -2030,7 +2104,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64PRFM::PRFMMapper();
unsigned prfop =
- Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
+ Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
@@ -2042,6 +2116,32 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_Success;
}
+/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ bool Valid;
+ auto Mapper = AArch64PSBHint::PSBHintMapper();
+ unsigned psbhint =
+ Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
+ if (!Valid) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(AArch64Operand::CreatePSBHint(psbhint, Tok.getString(),
+ S, getContext()));
+ return MatchOperand_Success;
+}
+
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
AArch64AsmParser::OperandMatchResultTy
@@ -2439,6 +2539,13 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
} else if (!Op.compare_lower("cisw")) {
// SYS #0, C7, C14, #2
SYS_ALIAS(0, 7, 14, 2);
+ } else if (!Op.compare_lower("cvap")) {
+ if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+ // SYS #3, C7, C12, #1
+ SYS_ALIAS(3, 7, 12, 1);
+ } else {
+ return TokError("DC CVAP requires ARMv8.2a");
+ }
} else {
return TokError("invalid operand for DC instruction");
}
@@ -2479,6 +2586,20 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
} else if (!Op.compare_lower("s12e0w")) {
// SYS #4, C7, C8, #7
SYS_ALIAS(4, 7, 8, 7);
+ } else if (!Op.compare_lower("s1e1rp")) {
+ if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+ // SYS #0, C7, C9, #0
+ SYS_ALIAS(0, 7, 9, 0);
+ } else {
+ return TokError("AT S1E1RP requires ARMv8.2a");
+ }
+ } else if (!Op.compare_lower("s1e1wp")) {
+ if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+ // SYS #0, C7, C9, #1
+ SYS_ALIAS(0, 7, 9, 1);
+ } else {
+ return TokError("AT S1E1WP requires ARMv8.2a");
+ }
} else {
return TokError("invalid operand for AT instruction");
}
@@ -2644,7 +2765,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64DB::DBarrierMapper();
StringRef Name =
- Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+ Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid);
Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
ExprLoc, getContext()));
return MatchOperand_Success;
@@ -2658,7 +2779,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64DB::DBarrierMapper();
unsigned Opt =
- Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
+ Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2687,20 +2808,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
bool IsKnown;
auto MRSMapper = AArch64SysReg::MRSMapper();
- uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
- IsKnown);
+ uint32_t MRSReg = MRSMapper.fromString(Tok.getString(),
+ getSTI().getFeatureBits(), IsKnown);
assert(IsKnown == (MRSReg != -1U) &&
"register should be -1 if and only if it's unknown");
auto MSRMapper = AArch64SysReg::MSRMapper();
- uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
- IsKnown);
+ uint32_t MSRReg = MSRMapper.fromString(Tok.getString(),
+ getSTI().getFeatureBits(), IsKnown);
assert(IsKnown == (MSRReg != -1U) &&
"register should be -1 if and only if it's unknown");
auto PStateMapper = AArch64PState::PStateMapper();
uint32_t PStateField =
- PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
+ PStateMapper.fromString(Tok.getString(),
+ getSTI().getFeatureBits(), IsKnown);
assert(IsKnown == (PStateField != -1U) &&
"register should be -1 if and only if it's unknown");
@@ -3151,7 +3273,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (Operands.size() < 2 ||
!static_cast<AArch64Operand &>(*Operands[1]).isReg())
- return true;
+ return Error(Loc, "Only valid when first operand is register");
bool IsXReg =
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
@@ -3183,7 +3305,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
}
// If it is a label or an imm that cannot fit in a movz, put it into CP.
const MCExpr *CPLoc =
- getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4);
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4, Loc);
Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
return false;
}
@@ -3601,6 +3723,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
return Error(Loc, "index must be a multiple of 8 in range [0, 32760].");
case Match_InvalidMemoryIndexed16:
return Error(Loc, "index must be a multiple of 16 in range [0, 65520].");
+ case Match_InvalidImm0_1:
+ return Error(Loc, "immediate must be an integer in range [0, 1].");
case Match_InvalidImm0_7:
return Error(Loc, "immediate must be an integer in range [0, 7].");
case Match_InvalidImm0_15:
@@ -3912,7 +4036,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
unsigned zreg =
- AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
+ !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(
RegOp.getReg())
? AArch64::WZR
: AArch64::XZR;
@@ -3929,10 +4053,27 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// If that fails, try against the alternate table containing long-form NEON:
// "fadd v0.2s, v1.2s, v2.2s"
- if (MatchResult != Match_Success)
+ if (MatchResult != Match_Success) {
+ // But first, save the short-form match result: we can use it in case the
+ // long-form match also fails.
+ auto ShortFormNEONErrorInfo = ErrorInfo;
+ auto ShortFormNEONMatchResult = MatchResult;
+
MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
+ // Now, both matches failed, and the long-form match failed on the mnemonic
+ // suffix token operand. The short-form match failure is probably more
+ // relevant: use it instead.
+ if (MatchResult == Match_InvalidOperand && ErrorInfo == 1 &&
+ Operands.size() > 1 && ((AArch64Operand &)*Operands[1]).isToken() &&
+ ((AArch64Operand &)*Operands[1]).isTokenSuffix()) {
+ MatchResult = ShortFormNEONMatchResult;
+ ErrorInfo = ShortFormNEONErrorInfo;
+ }
+ }
+
+
switch (MatchResult) {
case Match_Success: {
// Perform range checking and other semantic validations
@@ -3944,7 +4085,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
}
case Match_MissingFeature: {
@@ -3966,6 +4107,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return showMatchError(IDLoc, MatchResult);
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
+
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -4011,6 +4153,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed8SImm7:
case Match_InvalidMemoryIndexed16SImm7:
case Match_InvalidMemoryIndexedSImm9:
+ case Match_InvalidImm0_1:
case Match_InvalidImm0_7:
case Match_InvalidImm0_15:
case Match_InvalidImm0_31:
@@ -4083,7 +4226,7 @@ bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size);
+ getParser().getStreamer().EmitValue(Value, Size, L);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -4155,7 +4298,7 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
Inst.setOpcode(AArch64::TLSDESCCALL);
Inst.addOperand(MCOperand::createExpr(Expr));
- getParser().getStreamer().EmitInstruction(Inst, STI);
+ getParser().getStreamer().EmitInstruction(Inst, getSTI());
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index db9fb0e..f1f968e 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1516,6 +1516,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
uint64_t pstate_field = (op1 << 3) | op2;
+ if ((pstate_field == AArch64PState::PAN ||
+ pstate_field == AArch64PState::UAO) && crm > 1)
+ return Fail;
+
Inst.addOperand(MCOperand::createImm(pstate_field));
Inst.addOperand(MCOperand::createImm(crm));
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 7f56c2c..d8a8108 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -55,7 +56,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
unsigned Opcode = MI->getOpcode();
if (Opcode == AArch64::SYSxt)
- if (printSysAlias(MI, O)) {
+ if (printSysAlias(MI, STI, O)) {
printAnnotation(O, Annot);
return;
}
@@ -269,7 +270,7 @@ struct LdStNInstrDesc {
int NaturalOffset;
};
-static LdStNInstrDesc LdStNInstInfo[] = {
+static const LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::LD1i8, "ld1", ".b", 1, true, 0 },
{ AArch64::LD1i16, "ld1", ".h", 1, true, 0 },
{ AArch64::LD1i32, "ld1", ".s", 1, true, 0 },
@@ -612,7 +613,7 @@ static LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 },
};
-static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
unsigned Idx;
for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
if (LdStNInstInfo[Idx].Opcode == Opcode)
@@ -641,7 +642,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+ if (const LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
// Now onto the operands: first a vector list with possible lane
@@ -674,7 +675,9 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
AArch64InstPrinter::printInst(MI, O, Annot, STI);
}
-bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
+bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
#ifndef NDEBUG
unsigned Opcode = MI->getOpcode();
assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
@@ -729,6 +732,11 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
if (Op1Val == 3 && Op2Val == 1)
Asm = "dc\tcvau";
break;
+ case 12:
+ if (Op1Val == 3 && Op2Val == 1 &&
+ (STI.getFeatureBits()[AArch64::HasV8_2aOps]))
+ Asm = "dc\tcvap";
+ break;
case 14:
if (Op1Val == 3 && Op2Val == 1)
Asm = "dc\tcivac";
@@ -773,6 +781,21 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
break;
}
break;
+ case 9:
+ switch (Op1Val) {
+ default:
+ break;
+ case 0:
+ if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "at\ts1e1rp"; break;
+ case 1: Asm = "at\ts1e1wp"; break;
+ }
+ }
+ break;
+ }
}
} else if (CnVal == 8) {
// TLBI aliases
@@ -1122,6 +1145,19 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
O << '#' << prfop;
}
+void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned psbhintop = MI->getOperand(OpNum).getImm();
+ bool Valid;
+ StringRef Name =
+ AArch64PSBHint::PSBHintMapper().toString(psbhintop, STI.getFeatureBits(), Valid);
+ if (Valid)
+ O << Name;
+ else
+ O << '#' << psbhintop;
+}
+
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 15dee97..ea68d98 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -15,14 +15,10 @@
#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class AArch64InstPrinter : public MCInstPrinter {
public:
AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
@@ -48,7 +44,8 @@ public:
unsigned AltIdx = AArch64::NoRegAltName);
protected:
- bool printSysAlias(const MCInst *MI, raw_ostream &O);
+ bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
// Operand printers
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
@@ -122,6 +119,9 @@ protected:
void printPrefetchOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPSBHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index ed24343..648b1df 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -364,6 +364,32 @@ static inline float getFPImmFloat(unsigned Imm) {
return FPUnion.F;
}
+/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
+/// floating-point value. If the value cannot be represented as an 8-bit
+/// floating-point value, then return -1.
+static inline int getFP16Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
+ int32_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x3f)
+ return -1;
+ Mantissa >>= 6;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+static inline int getFP16Imm(const APFloat &FPImm) {
+ return getFP16Imm(FPImm.bitcastToAPInt());
+}
+
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 16d5356..d26604f 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -128,10 +128,9 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size);
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
private:
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 921c4b9..fbce26e 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -48,10 +48,6 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
UseDataRegionDirectives = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
-
- // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use
- // LShr instead of AShr.
- UseLogicalShr = true;
}
const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 2870341..a540f49 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -85,13 +85,13 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
Streamer.visitUsedExpr(*getSubExpr());
}
-MCSection *AArch64MCExpr::findAssociatedSection() const {
+MCFragment *AArch64MCExpr::findAssociatedFragment() const {
llvm_unreachable("FIXME: what goes here?");
}
bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout,
- const MCFixup *Fixup) const {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
return false;
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 1165314..db36a65 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -149,11 +149,10 @@ public:
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override;
+ MCFragment *findAssociatedFragment() const override;
- bool evaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout,
- const MCFixup *Fixup) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
@@ -162,7 +161,6 @@ public:
}
static bool classof(const AArch64MCExpr *) { return true; }
-
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 741b273..61c96f1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -90,9 +90,11 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
Log2Size = llvm::Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
- default:
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "ADR/ADRP relocations must be GOT relative");
+ default: {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "ADR/ADRP relocations must be GOT relative");
+ return false;
+ }
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
return true;
@@ -170,25 +172,25 @@ void AArch64MachObjectWriter::recordRelocation(
// assembler local symbols. If we got here, that's not what we have,
// so complain loudly.
if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "conditional branch requires assembler-local"
- " label. '" +
- Target.getSymA()->getSymbol().getName() +
- "' is external.");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "conditional branch requires assembler-local"
+ " label. '" +
+ Target.getSymA()->getSymbol().getName() +
+ "' is external.");
return;
}
// 14-bit branch relocations should only target internal labels, and so
// should never get here.
if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "Invalid relocation on conditional branch!");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "Invalid relocation on conditional branch!");
return;
}
if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
- Asm)) {
- Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
+ Asm)) {
+ Asm.getContext().reportError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
return;
}
@@ -200,8 +202,9 @@ void AArch64MachObjectWriter::recordRelocation(
Type = MachO::ARM64_RELOC_UNSIGNED;
if (IsPCRel) {
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "PC relative absolute relocation!");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "PC relative absolute relocation!");
+ return;
// FIXME: x86_64 sets the type to a branch reloc here. Should we do
// something similar?
@@ -229,16 +232,20 @@ void AArch64MachObjectWriter::recordRelocation(
Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
return;
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
- Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
// Otherwise, neither symbol can be modified.
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation of modified symbol");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation of modified symbol");
+ return;
+ }
// We don't support PCrel relocations of differences.
- if (IsPCRel)
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported pc-relative relocation of "
- "difference");
+ if (IsPCRel) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported pc-relative relocation of "
+ "difference");
+ return;
+ }
// AArch64 always uses external relocations. If there is no symbol to use as
// a base address (a local symbol with no preceding non-local symbol),
@@ -246,20 +253,26 @@ void AArch64MachObjectWriter::recordRelocation(
//
// FIXME: We should probably just synthesize an external symbol and use
// that.
- if (!A_Base)
- Asm.getContext().reportFatalError(
+ if (!A_Base) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + A->getName() +
"'. Must have non-local symbol earlier in section.");
- if (!B_Base)
- Asm.getContext().reportFatalError(
+ return;
+ }
+ if (!B_Base) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + B->getName() +
"'. Must have non-local symbol earlier in section.");
+ return;
+ }
- if (A_Base == B_Base && A_Base)
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation with identical base");
+ if (A_Base == B_Base && A_Base) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported relocation with identical base");
+ return;
+ }
Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
(!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress(
@@ -309,10 +322,12 @@ void AArch64MachObjectWriter::recordRelocation(
// we need to preserve and merge with the new Target? How about
// the FixedValue?
if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout,
- &Fixup))
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unable to resolve variable '" +
- Symbol->getName() + "'");
+ &Fixup)) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unable to resolve variable '" +
+ Symbol->getName() + "'");
+ return;
+ }
return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
FixedValue);
}
@@ -337,11 +352,13 @@ void AArch64MachObjectWriter::recordRelocation(
Value +=
Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base);
} else if (Symbol->isInSection()) {
- if (!CanUseLocalRelocation)
- Asm.getContext().reportFatalError(
+ if (!CanUseLocalRelocation) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + Symbol->getName() +
"'. Must have non-local symbol earlier in section.");
+ return;
+ }
// Adjust the relocation to be section-relative.
// The index is the section ordinal (1-based).
const MCSection &Sec = Symbol->getSection();
@@ -361,9 +378,10 @@ void AArch64MachObjectWriter::recordRelocation(
return;
}
}
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
"unsupported relocation of variable '" +
Symbol->getName() + "'");
+ return;
}
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 52b000d..3e86a42 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -26,8 +26,9 @@ AArch64TargetStreamer::~AArch64TargetStreamer() {}
// The constant pool handling is shared by all AArch64TargetStreamer
// implementations.
const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr,
- unsigned Size) {
- return ConstantPools->addEntry(Streamer, Expr, Size);
+ unsigned Size,
+ SMLoc Loc) {
+ return ConstantPools->addEntry(Streamer, Expr, Size, Loc);
}
void AArch64TargetStreamer::emitCurrentConstantPool() {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index fcc0d05..51432830 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -24,7 +24,7 @@ public:
/// Callback used to implement the ldr= pseudo.
/// Add a new entry to the constant pool for the current section and return an
/// MCExpr that can be used to refer to the constant pool location.
- const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size);
+ const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size, SMLoc Loc);
/// Callback used to implemnt the .ltorg directive.
/// Emit contents of constant pool for the current section.
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ee85b65b..78f5289 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -146,11 +146,22 @@ const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings
// v8.1a "Privileged Access Never" extension-specific PStates
{"pan", PAN, {AArch64::HasV8_1aOps}},
+
+ // v8.2a
+ {"uao", UAO, {AArch64::HasV8_2aOps}},
};
AArch64PState::PStateMapper::PStateMapper()
: AArch64NamedImmMapper(PStateMappings, 0) {}
+const AArch64NamedImmMapper::Mapping AArch64PSBHint::PSBHintMapper::PSBHintMappings[] = {
+ // v8.2a "Statistical Profiling" extension-specific PSB operand
+ {"csync", CSync, {AArch64::FeatureSPE}},
+};
+
+AArch64PSBHint::PSBHintMapper::PSBHintMapper()
+ : AArch64NamedImmMapper(PSBHintMappings, 0) {}
+
const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"mdccsr_el0", MDCCSR_EL0, {}},
{"dbgdtrrx_el0", DBGDTRRX_EL0, {}},
@@ -192,6 +203,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}},
{"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}},
{"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}},
+ {"id_aa64mmfr2_el1", ID_A64MMFR2_EL1, {AArch64::HasV8_2aOps}},
{"mvfr0_el1", MVFR0_EL1, {}},
{"mvfr1_el1", MVFR1_EL1, {}},
{"mvfr2_el1", MVFR2_EL1, {}},
@@ -275,9 +287,6 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
{"icc_sgi1r_el1", ICC_SGI1R_EL1, {}},
{"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}},
{"icc_sgi0r_el1", ICC_SGI0R_EL1, {}},
-
- // v8.1a "Privileged Access Never" extension-specific system registers
- {"pan", PAN, {AArch64::HasV8_1aOps}},
};
AArch64SysReg::MSRMapper::MSRMapper() {
@@ -804,6 +813,24 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
{"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}},
{"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}},
{"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}},
+
+ // v8.2a registers
+ {"uao", UAO, {AArch64::HasV8_2aOps}},
+
+ // v8.2a "Statistical Profiling extension" registers
+ {"pmblimitr_el1", PMBLIMITR_EL1, {AArch64::FeatureSPE}},
+ {"pmbptr_el1", PMBPTR_EL1, {AArch64::FeatureSPE}},
+ {"pmbsr_el1", PMBSR_EL1, {AArch64::FeatureSPE}},
+ {"pmbidr_el1", PMBIDR_EL1, {AArch64::FeatureSPE}},
+ {"pmscr_el2", PMSCR_EL2, {AArch64::FeatureSPE}},
+ {"pmscr_el12", PMSCR_EL12, {AArch64::FeatureSPE}},
+ {"pmscr_el1", PMSCR_EL1, {AArch64::FeatureSPE}},
+ {"pmsicr_el1", PMSICR_EL1, {AArch64::FeatureSPE}},
+ {"pmsirr_el1", PMSIRR_EL1, {AArch64::FeatureSPE}},
+ {"pmsfcr_el1", PMSFCR_EL1, {AArch64::FeatureSPE}},
+ {"pmsevfr_el1", PMSEVFR_EL1, {AArch64::FeatureSPE}},
+ {"pmslatfr_el1", PMSLATFR_EL1, {AArch64::FeatureSPE}},
+ {"pmsidr_el1", PMSIDR_EL1, {AArch64::FeatureSPE}},
};
uint32_t
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 7e42f8e..f649cb9 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -337,7 +337,9 @@ namespace AArch64AT {
S12E1R = 0x63c4, // 01 100 0111 1000 100
S12E1W = 0x63c5, // 01 100 0111 1000 101
S12E0R = 0x63c6, // 01 100 0111 1000 110
- S12E0W = 0x63c7 // 01 100 0111 1000 111
+ S12E0W = 0x63c7, // 01 100 0111 1000 111
+ S1E1RP = 0x43c8, // 01 000 0111 1001 000
+ S1E1WP = 0x43c9 // 01 000 0111 1001 001
};
struct ATMapper : AArch64NamedImmMapper {
@@ -463,6 +465,9 @@ namespace AArch64PState {
// v8.1a "Privileged Access Never" extension-specific PStates
PAN = 0x04,
+
+ // v8.2a "User Access Override" extension-specific PStates
+ UAO = 0x03
};
struct PStateMapper : AArch64NamedImmMapper {
@@ -473,6 +478,21 @@ namespace AArch64PState {
}
+namespace AArch64PSBHint {
+ enum PSBHintValues {
+ Invalid = -1,
+ // v8.2a "Statistical Profiling" extension-specific PSB operands
+ CSync = 0x11, // psb csync = hint #0x11
+ };
+
+ struct PSBHintMapper : AArch64NamedImmMapper {
+ const static Mapping PSBHintMappings[];
+
+ PSBHintMapper();
+ };
+
+}
+
namespace AArch64SE {
enum ShiftExtSpecifiers {
Invalid = -1,
@@ -594,6 +614,7 @@ namespace AArch64SysReg {
ID_A64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
ID_A64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
ID_A64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
+ ID_A64MMFR2_EL1 = 0xc03a, // 11 000 0000 0111 010
MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000
MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001
MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010
@@ -1190,6 +1211,24 @@ namespace AArch64SysReg {
SPSR_EL12 = 0xea00, // 11 101 0100 0000 000
ELR_EL12 = 0xea01, // 11 101 0100 0000 001
+ // v8.2a registers
+ UAO = 0xc214, // 11 000 0100 0010 100
+
+ // v8.2a "Statistical Profiling extension" registers
+ PMBLIMITR_EL1 = 0xc4d0, // 11 000 1001 1010 000
+ PMBPTR_EL1 = 0xc4d1, // 11 000 1001 1010 001
+ PMBSR_EL1 = 0xc4d3, // 11 000 1001 1010 011
+ PMBIDR_EL1 = 0xc4d7, // 11 000 1001 1010 111
+ PMSCR_EL2 = 0xe4c8, // 11 100 1001 1001 000
+ PMSCR_EL12 = 0xecc8, // 11 101 1001 1001 000
+ PMSCR_EL1 = 0xc4c8, // 11 000 1001 1001 000
+ PMSICR_EL1 = 0xc4ca, // 11 000 1001 1001 010
+ PMSIRR_EL1 = 0xc4cb, // 11 000 1001 1001 011
+ PMSFCR_EL1 = 0xc4cc, // 11 000 1001 1001 100
+ PMSEVFR_EL1 = 0xc4cd, // 11 000 1001 1001 101
+ PMSLATFR_EL1 = 0xc4ce, // 11 000 1001 1001 110
+ PMSIDR_EL1 = 0xc4cf, // 11 000 1001 1001 111
+
// Cyclone specific system registers
CPM_IOACC_CTL_EL3 = 0xff90,
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25..8c3cb56 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -44,15 +44,21 @@ FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
-FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
-FunctionPass *createSIPrepareScratchRegs();
+
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
+extern char &AMDGPUAnnotateKernelFeaturesID;
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
+void initializeSIFixSGPRCopiesPass(PassRegistry &);
+extern char &SIFixSGPRCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -64,6 +70,8 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
ModulePass *createAMDGPUAlwaysInlinePass();
+ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
+FunctionPass *createAMDGPUAnnotateUniformValues();
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
extern char &SIFixControlFlowLiveIntervalsID;
@@ -71,6 +79,8 @@ extern char &SIFixControlFlowLiveIntervalsID;
void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
extern char &SIFixSGPRLiveRangesID;
+void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
+extern char &AMDGPUAnnotateUniformValuesPassID;
extern Target TheAMDGPUTarget;
extern Target TheGCNTarget;
@@ -85,8 +95,6 @@ enum TargetIndex {
};
}
-#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
-
} // End namespace llvm
namespace ShaderType {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 68b5050..d4af8d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -108,6 +108,11 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-fol
"true",
"Force using DS instruction immediate offsets on SI">;
+def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
+ "FlatForGlobal",
+ "true",
+ "Force to generate flat instruction for global">;
+
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"FlatAddressSpace",
"true",
@@ -272,9 +277,14 @@ def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
>, AssemblerPredicate<"FeatureGCN1Encoding">;
+def isVI : Predicate <
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate<"FeatureGCN3Encoding">;
+
class PredicateControl {
Predicate SubtargetPredicate;
Predicate SIAssemblerPredicate = isSICI;
+ Predicate VIAssemblerPredicate = isVI;
list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
new file mode 100644
index 0000000..3781839
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -0,0 +1,126 @@
+//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass adds target attributes to functions which use intrinsics
+/// which will impact calling convention lowering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAnnotateKernelFeatures : public ModulePass {
+private:
+ void addAttrToCallers(Function *Intrin, StringRef AttrName);
+ bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
+
+public:
+ static char ID;
+
+ AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
+ bool runOnModule(Module &M) override;
+ const char *getPassName() const override {
+ return "AMDGPU Annotate Kernel Features";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+};
+
+}
+
+char AMDGPUAnnotateKernelFeatures::ID = 0;
+
+char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
+
+
+INITIALIZE_PASS_BEGIN(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
+ "Add AMDGPU function attributes", false, false)
+INITIALIZE_PASS_END(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
+ "Add AMDGPU function attributes", false, false)
+
+
+void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
+ StringRef AttrName) {
+ SmallPtrSet<Function *, 4> SeenFuncs;
+
+ for (User *U : Intrin->users()) {
+ // CallInst is the only valid user for an intrinsic.
+ CallInst *CI = cast<CallInst>(U);
+
+ Function *CallingFunction = CI->getParent()->getParent();
+ if (SeenFuncs.insert(CallingFunction).second)
+ CallingFunction->addFnAttr(AttrName);
+ }
+}
+
+bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
+ Module &M,
+ ArrayRef<StringRef[2]> IntrinsicToAttr) {
+ bool Changed = false;
+
+ for (const StringRef *Arr : IntrinsicToAttr) {
+ if (Function *Fn = M.getFunction(Arr[0])) {
+ addAttrToCallers(Fn, Arr[1]);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
+ Triple TT(M.getTargetTriple());
+
+ static const StringRef IntrinsicToAttr[][2] = {
+ // .x omitted
+ { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
+ { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
+
+ // .x omitted
+ { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
+ { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
+
+ };
+
+ static const StringRef HSAIntrinsicToAttr[][2] = {
+ { "llvm.r600.read.local.size.x", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.local.size.y", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.local.size.z", "amdgpu-dispatch-ptr" },
+
+ { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
+ { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }
+ };
+
+ // TODO: Intrinsics that require queue ptr.
+
+ // We do not need to note the x workitem or workgroup id because they are
+ // always initialized.
+
+ bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
+ if (TT.getOS() == Triple::AMDHSA)
+ Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
+
+ return Changed;
+}
+
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
+ return new AMDGPUAnnotateKernelFeatures();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
new file mode 100644
index 0000000..dfddc34
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -0,0 +1,84 @@
+//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass adds amdgpu.uniform metadata to IR values so this information
+/// can be used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "amdgpu-annotate-uniform"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAnnotateUniformValues : public FunctionPass,
+ public InstVisitor<AMDGPUAnnotateUniformValues> {
+ DivergenceAnalysis *DA;
+
+public:
+ static char ID;
+ AMDGPUAnnotateUniformValues() :
+ FunctionPass(ID) { }
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ const char *getPassName() const override { return "AMDGPU Annotate Uniform Values"; }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DivergenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ void visitLoadInst(LoadInst &I);
+
+};
+
+} // End anonymous namespace
+
+INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
+ "Add AMDGPU uniform metadata", false, false)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
+ "Add AMDGPU uniform metadata", false, false)
+
+char AMDGPUAnnotateUniformValues::ID = 0;
+
+void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
+ Value *Ptr = I.getPointerOperand();
+ if (!DA->isUniform(Ptr))
+ return;
+
+ if (Instruction *PtrI = dyn_cast<Instruction>(Ptr))
+ PtrI->setMetadata("amdgpu.uniform", MDNode::get(I.getContext(), {}));
+
+}
+
+bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
+ return false;
+}
+
+bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
+ DA = &getAnalysis<DivergenceAnalysis>();
+ visit(F);
+
+ return true;
+}
+
+FunctionPass *
+llvm::createAMDGPUAnnotateUniformValues() {
+ return new AMDGPUAnnotateUniformValues();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0a5309b..ba71dc0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -100,14 +100,63 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
}
}
-void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
-
- // This label is used to mark the end of the .text section.
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- OutStreamer->SwitchSection(TLOF.getTextSection());
- MCSymbol *EndOfTextLabel =
- OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- OutStreamer->EmitLabel(EndOfTextLabel);
+void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
+ if (MFI->isKernel() && STM.isAmdHsaOS()) {
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(),
+ ELF::STT_AMDGPU_HSA_KERNEL);
+ }
+
+ AsmPrinter::EmitFunctionEntryLabel();
+}
+
+static bool isModuleLinkage(const GlobalValue *GV) {
+ switch (GV->getLinkage()) {
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::CommonLinkage:
+ return true;
+ case GlobalValue::ExternalLinkage:
+ return false;
+ default: llvm_unreachable("unknown linkage type");
+ }
+}
+
+void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA) {
+ AsmPrinter::EmitGlobalVariable(GV);
+ return;
+ }
+
+ if (GV->isDeclaration() || GV->getLinkage() == GlobalValue::PrivateLinkage) {
+ AsmPrinter::EmitGlobalVariable(GV);
+ return;
+ }
+
+ // Group segment variables aren't emitted in HSA.
+ if (AMDGPU::isGroupSegment(GV))
+ return;
+
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ if (isModuleLinkage(GV)) {
+ TS->EmitAMDGPUHsaModuleScopeGlobal(GV->getName());
+ } else {
+ TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
+ }
+
+ const DataLayout &DL = getDataLayout();
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(
+ getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
+ MCSymbol *GVSym = getSymbol(GV);
+ const Constant *C = GV->getInitializer();
+ OutStreamer->EmitLabel(GVSym);
+ EmitGlobalConstant(DL, C);
+ OutStreamer->PopSection();
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -125,8 +174,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ getSIProgramInfo(KernelInfo, MF);
if (!STM.isAmdHsaOS()) {
- getSIProgramInfo(KernelInfo, MF);
EmitProgramInfoSI(MF, KernelInfo);
}
// Emit directives
@@ -165,6 +214,23 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
false);
+
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
+ Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
+ Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
+ Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
+ Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
+ Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
+ false);
+
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer->emitRawComment(
@@ -278,27 +344,30 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
unsigned width = 0;
bool isSGPR = false;
- if (!MO.isReg()) {
+ if (!MO.isReg())
continue;
- }
+
unsigned reg = MO.getReg();
- if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
- reg == AMDGPU::VCC_HI) {
+ switch (reg) {
+ case AMDGPU::EXEC:
+ case AMDGPU::SCC:
+ case AMDGPU::M0:
+ continue;
+
+ case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
VCCUsed = true;
continue;
- } else if (reg == AMDGPU::FLAT_SCR ||
- reg == AMDGPU::FLAT_SCR_LO ||
- reg == AMDGPU::FLAT_SCR_HI) {
+
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
FlatUsed = true;
continue;
- }
- switch (reg) {
- default: break;
- case AMDGPU::SCC:
- case AMDGPU::EXEC:
- case AMDGPU::M0:
- continue;
+ default:
+ break;
}
if (AMDGPU::SReg_32RegClass.contains(reg)) {
@@ -348,11 +417,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
}
- if (VCCUsed)
+ if (VCCUsed || FlatUsed)
MaxSGPR += 2;
- if (FlatUsed)
+ if (FlatUsed) {
MaxSGPR += 2;
+ // 2 additional for VI+.
+ if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ MaxSGPR += 2;
+ }
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
@@ -368,6 +441,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
}
+ if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ Ctx.emitError("too many user SGPRs used");
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
@@ -419,18 +497,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
S_00B848_PRIV(ProgInfo.Priv) |
S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
- S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+ S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+ // 0 = X, 1 = XY, 2 = XYZ
+ unsigned TIDIGCompCnt = 0;
+ if (MFI->hasWorkItemIDZ())
+ TIDIGCompCnt = 2;
+ else if (MFI->hasWorkItemIDY())
+ TIDIGCompCnt = 1;
+
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
- S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
- S_00B84C_TGID_X_EN(1) |
- S_00B84C_TGID_Y_EN(1) |
- S_00B84C_TGID_Z_EN(1) |
- S_00B84C_TG_SIZE_EN(1) |
- S_00B84C_TIDIG_COMP_CNT(2) |
- S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
+ S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+ S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
+ S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
+ S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
+ S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
+ S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
+ S_00B84C_EXCP_EN_MSB(0) |
+ S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
+ S_00B84C_EXCP_EN(0);
}
static unsigned getRsrcReg(unsigned ShaderType) {
@@ -491,14 +578,53 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
- header.code_properties =
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
- AMD_CODE_PROPERTY_IS_PTR64;
+ header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+
+ if (MFI->hasPrivateSegmentBuffer()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ }
+
+ if (MFI->hasDispatchPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+
+ if (MFI->hasQueuePtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+
+ if (MFI->hasKernargSegmentPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+
+ if (MFI->hasDispatchID())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+
+ if (MFI->hasFlatScratchInit())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+ // TODO: Private segment size
+
+ if (MFI->hasGridWorkgroupCountX()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
+ }
+
+ if (MFI->hasGridWorkgroupCountY()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
+ }
+
+ if (MFI->hasGridWorkgroupCountZ()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
+ }
+
+ if (MFI->hasDispatchPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
-
+ header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+ header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 345af9b..817cbfc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -99,7 +99,9 @@ public:
void EmitFunctionBodyStart() override;
- void EmitEndOfAsmFile(Module &M) override;
+ void EmitFunctionEntryLabel() override;
+
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
new file mode 100644
index 0000000..2f6b302
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
@@ -0,0 +1,26 @@
+//===-- AMDGPUDiagnosticInfoUnsupported.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDiagnosticInfoUnsupported.h"
+
+using namespace llvm;
+
+DiagnosticInfoUnsupported::DiagnosticInfoUnsupported(
+ const Function &Fn,
+ const Twine &Desc,
+ DiagnosticSeverity Severity)
+ : DiagnosticInfo(getKindID(), Severity),
+ Description(Desc),
+ Fn(Fn) { }
+
+int DiagnosticInfoUnsupported::KindID = 0;
+
+void DiagnosticInfoUnsupported::print(DiagnosticPrinter &DP) const {
+ DP << "unsupported " << getDescription() << " in " << Fn.getName();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
new file mode 100644
index 0000000..0fd37e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
@@ -0,0 +1,48 @@
+//===-- AMDGPUDiagnosticInfoUnsupported.h - Error reporting -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+
+namespace llvm {
+
+/// Diagnostic information for unimplemented or unsupported feature reporting.
+class DiagnosticInfoUnsupported : public DiagnosticInfo {
+private:
+ const Twine &Description;
+ const Function &Fn;
+
+ static int KindID;
+
+ static int getKindID() {
+ if (KindID == 0)
+ KindID = llvm::getNextAvailablePluginDiagnosticKind();
+ return KindID;
+ }
+
+public:
+ DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
+ DiagnosticSeverity Severity = DS_Error);
+
+ const Function &getFunction() const { return Fn; }
+ const Twine &getDescription() const { return Description; }
+
+ void print(DiagnosticPrinter &DP) const override;
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == getKindID();
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index 8175786..4d84d28 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -71,9 +71,15 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
}
/// \returns The number of registers allocated for \p FI.
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
+
// Start the offset at 2 so we don't overwrite work group information.
// XXX: We should only do this when the shader actually uses this
// information.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 9f31be1..257a3da 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -8,14 +8,12 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDIL target
-/// machine.
+/// \brief Interface to describe a layout of a stack frame on an AMDGPU target.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
-#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -34,7 +32,8 @@ public:
/// \returns The number of 32-bit sub-registers that are used when storing
/// values to the stack.
unsigned getStackWidth(const MachineFunction &MF) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 64c54cc..b33040b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,6 +11,8 @@
/// \brief Defines an instruction selector for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
@@ -20,9 +22,9 @@
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h"
@@ -40,12 +42,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget *Subtarget;
+
public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
bool runOnMachineFunction(MachineFunction &MF) override;
SDNode *Select(SDNode *N) override;
const char *getPassName() const override;
+ void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
private:
@@ -91,7 +95,7 @@ private:
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
- void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
@@ -108,6 +112,16 @@ private:
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
@@ -273,6 +287,23 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
return N;
}
+static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
+ switch (NumVectorElts) {
+ case 1:
+ return AMDGPU::SReg_32RegClassID;
+ case 2:
+ return AMDGPU::SReg_64RegClassID;
+ case 4:
+ return AMDGPU::SReg_128RegClassID;
+ case 8:
+ return AMDGPU::SReg_256RegClassID;
+ case 16:
+ return AMDGPU::SReg_512RegClassID;
+ }
+
+ llvm_unreachable("invalid vector size");
+}
+
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -306,38 +337,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsEq(MVT::i32));
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- bool UseVReg = true;
- for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- U != E; ++U) {
- if (!U->isMachineOpcode()) {
- continue;
- }
- const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
- if (!RC) {
- continue;
- }
- if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
- UseVReg = false;
- }
- }
- switch(NumVectorElts) {
- case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
- AMDGPU::SReg_32RegClassID;
- break;
- case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
- AMDGPU::SReg_64RegClassID;
- break;
- case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
- AMDGPU::SReg_128RegClassID;
- break;
- case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
- AMDGPU::SReg_256RegClassID;
- break;
- case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
- AMDGPU::SReg_512RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
+ RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
} else {
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
// that adds a 128 bits reg copy when going through TwoAddressInstructions
@@ -455,98 +455,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
N->getValueType(0), Ops);
}
-
- case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- SDLoc SL(N);
- EVT VT = N->getValueType(0);
-
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
- N = glueCopyToM0(N);
- break;
- }
-
- // To simplify the TableGen patters, we replace all i64 loads with
- // v2i32 loads. Alternatively, we could promote i64 loads to v2i32
- // during DAG legalization, however, so places (ExpandUnalignedLoad)
- // in the DAG legalizer assume that if i64 is legal, so doing this
- // promotion early can cause problems.
-
- SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
- MVT::i64, NewLoad);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
- SDNode *Load = glueCopyToM0(NewLoad.getNode());
- SelectCode(Load);
- N = BitCast.getNode();
- break;
- }
-
+ case ISD::LOAD:
case ISD::STORE: {
- // Handle i64 stores here for the same reason mentioned above for loads.
- StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Value = ST->getValue();
- if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
-
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
- MVT::v2i32, Value);
- SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
- ST->getBasePtr(), ST->getMemOperand());
-
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
-
- if (NewValue.getOpcode() == ISD::BITCAST) {
- Select(NewStore.getNode());
- return SelectCode(NewValue.getNode());
- }
-
- // getNode() may fold the bitcast if its input was another bitcast. If that
- // happens we should only select the new store.
- N = NewStore.getNode();
- }
-
N = glueCopyToM0(N);
break;
}
- case AMDGPUISD::REGISTER_LOAD: {
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- break;
- SDValue Addr, Offset;
-
- SDLoc DL(N);
- SelectADDRIndirect(N->getOperand(1), Addr, Offset);
- const SDValue Ops[] = {
- Addr,
- Offset,
- CurDAG->getTargetConstant(0, DL, MVT::i32),
- N->getOperand(0),
- };
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
- CurDAG->getVTList(MVT::i32, MVT::i64,
- MVT::Other),
- Ops);
- }
- case AMDGPUISD::REGISTER_STORE: {
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- break;
- SDValue Addr, Offset;
- SelectADDRIndirect(N->getOperand(2), Addr, Offset);
- SDLoc DL(N);
- const SDValue Ops[] = {
- N->getOperand(1),
- Addr,
- Offset,
- CurDAG->getTargetConstant(0, DL, MVT::i32),
- N->getOperand(0),
- };
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
- CurDAG->getVTList(MVT::Other),
- Ops);
- }
-
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -575,7 +489,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
N->getOperand(0), OffsetVal, WidthVal);
-
}
case AMDGPUISD::DIV_SCALE: {
return SelectDIV_SCALE(N);
@@ -601,7 +514,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
-
bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
assert(AS != 0 && "Use checkPrivateAddress instead.");
if (!Ptr)
@@ -681,7 +593,7 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
if (checkPrivateAddress(N->getMemOperand())) {
if (MMO) {
const PseudoSourceValue *PSV = MMO->getPseudoValue();
- if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ if (PSV && PSV->isConstantPool()) {
return true;
}
}
@@ -847,7 +759,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+ // omod
SDValue Ops[8];
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
@@ -883,15 +796,39 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
Offset = N1;
return true;
}
- }
+ } else if (Addr.getOpcode() == ISD::SUB) {
+ // sub C, x -> add (sub 0, x), C
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+ int64_t ByteOffset = C->getSExtValue();
+ if (isUInt<16>(ByteOffset)) {
+ SDLoc DL(Addr);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+ // XXX - This is kind of hacky. Create a dummy sub node so we can check
+ // the known bits in isDSOffsetLegal. We need to emit the selected node
+ // here, so this is thrown away.
+ SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+ MachineSDNode *MachineSub
+ = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ Base = SDValue(MachineSub, 0);
+ Offset = Addr.getOperand(0);
+ return true;
+ }
+ }
+ }
+ } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ // If we have a constant address, prefer to put the constant into the
+ // offset. This can save moves to load the constant address since multiple
+ // operations can share the zero base address register, and enables merging
+ // into read2 / write2 instructions.
- SDLoc DL(Addr);
+ SDLoc DL(Addr);
- // If we have a constant address, prefer to put the constant into the
- // offset. This can save moves to load the constant address since multiple
- // operations can share the zero base address register, and enables merging
- // into read2 / write2 instructions.
- if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
if (isUInt<16>(CAddr->getZExtValue())) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
@@ -904,10 +841,11 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// default case
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
return true;
}
+// TODO: If offset is too big, put low 16-bit into offset.
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
SDValue &Offset1) const {
@@ -926,9 +864,35 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
return true;
}
- }
-
- if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ } else if (Addr.getOpcode() == ISD::SUB) {
+ // sub C, x -> add (sub 0, x), C
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+ unsigned DWordOffset0 = C->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+
+ if (isUInt<8>(DWordOffset0)) {
+ SDLoc DL(Addr);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+ // XXX - This is kind of hacky. Create a dummy sub node so we can check
+ // the known bits in isDSOffsetLegal. We need to emit the selected node
+ // here, so this is thrown away.
+ SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
+ MachineSDNode *MachineSub
+ = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ Base = SDValue(MachineSub, 0);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ return true;
+ }
+ }
+ }
+ } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
unsigned DWordOffset1 = DWordOffset0 + 1;
assert(4 * DWordOffset0 == CAddr->getZExtValue());
@@ -956,12 +920,16 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
return isUInt<12>(Imm->getZExtValue());
}
-void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
+bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
SDValue &TFE) const {
+ // Subtarget prefers to use flat instruction
+ if (Subtarget->useFlatForGlobal())
+ return false;
+
SDLoc DL(Addr);
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -994,14 +962,14 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
if (isLegalMUBUFImmOffset(C1)) {
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return;
+ return true;
} else if (isUInt<32>(C1->getZExtValue())) {
// Illegal offset, store it in soffset.
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
0);
- return;
+ return true;
}
}
@@ -1013,7 +981,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Ptr = N0;
VAddr = N1;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- return;
+ return true;
}
// default case -> offset
@@ -1021,6 +989,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Ptr = Addr;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
@@ -1033,8 +1002,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return false;
- SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE);
+ if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE))
+ return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
if (C->getSExtValue()) {
@@ -1052,8 +1022,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset,
- SDValue &SLC) const {
+ SDValue &Offset,
+ SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
SDValue GLC, TFE;
@@ -1066,36 +1036,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- unsigned ScratchOffsetReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
- Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
- ScratchOffsetReg, MVT::i32);
- SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
- SDValue ScratchRsrcDword0 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
- SDValue ScratchRsrcDword1 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
-
- const SDValue RsrcOps[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- ScratchRsrcDword0,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- ScratchRsrcDword1,
- CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
- };
- SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, RsrcOps), 0);
- Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
- SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
+ Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+ SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
// (add n0, c1)
if (CurDAG->isBaseWithConstantOffset(Addr)) {
@@ -1126,8 +1070,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
- SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE);
+ if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE))
+ return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
@@ -1153,18 +1098,134 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
+///
+/// \param EncodedOffset This is the immediate value that will be encoded
+/// directly into the instruction. On SI/CI the \p EncodedOffset
+/// will be in units of dwords and on VI+ it will be units of bytes.
+static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
+ int64_t EncodedOffset) {
+ return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
+ isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
+ SDValue &Offset, bool &Imm) const {
+
+ // FIXME: Handle non-constant offsets.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
+ if (!C)
+ return false;
+
+ SDLoc SL(ByteOffsetNode);
+ AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
+ int64_t ByteOffset = C->getSExtValue();
+ int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
+ ByteOffset >> 2 : ByteOffset;
+
+ if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
+ Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+ Imm = true;
+ return true;
+ }
+
+ if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
+ return false;
+
+ if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
+ // 32-bit Immediates are supported on Sea Islands.
+ Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+ } else {
+ SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
+ Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
+ C32Bit), 0);
+ }
+ Imm = false;
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
+ SDValue &Offset, bool &Imm) const {
+
+ SDLoc SL(Addr);
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+
+ if (SelectSMRDOffset(N1, Offset, Imm)) {
+ SBase = N0;
+ return true;
+ }
+ }
+ SBase = Addr;
+ Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ Imm = true;
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+
+ if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ bool Imm;
+ if (!SelectSMRD(Addr, SBase, Offset, Imm))
+ return false;
+
+ return !Imm && isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
+ !isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
+ SDValue &Offset) const {
+ if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ bool Imm;
+ if (!SelectSMRDOffset(Addr, Offset, Imm))
+ return false;
+
+ return !Imm && isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
+ !isa<ConstantSDNode>(Offset);
+}
+
// FIXME: This is incorrect and only enough to be able to compile.
SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
SDLoc DL(N);
+ const MachineFunction &MF = CurDAG->getMachineFunction();
+ DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
+ "addrspacecast not implemented");
+ CurDAG->getContext()->diagnose(NotImplemented);
+
assert(Subtarget->hasFlatAddressSpace() &&
"addrspacecast only supported with flat address space!");
- assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
- "Cannot cast address space to / from constant address!");
-
assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
"Can only cast to / from flat address space!");
@@ -1190,7 +1251,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
}
-
if (DestSize > SrcSize) {
assert(SrcSize == 32 && DestSize == 64);
@@ -1371,6 +1431,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
+ bool Modified = false;
+
+ // XXX - Other targets seem to be able to do this without a worklist.
+ SmallVector<LoadSDNode *, 8> LoadsToReplace;
+ SmallVector<StoreSDNode *, 8> StoresToReplace;
+
+ for (SDNode &Node : CurDAG->allnodes()) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
+ EVT VT = LD->getValueType(0);
+ if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ continue;
+
+ // To simplify the TableGen patters, we replace all i64 loads with v2i32
+ // loads. Alternatively, we could promote i64 loads to v2i32 during DAG
+ // legalization, however, so places (ExpandUnalignedLoad) in the DAG
+ // legalizer assume that if i64 is legal, so doing this promotion early
+ // can cause problems.
+ LoadsToReplace.push_back(LD);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
+ // Handle i64 stores here for the same reason mentioned above for loads.
+ SDValue Value = ST->getValue();
+ if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
+ continue;
+ StoresToReplace.push_back(ST);
+ }
+ }
+
+ for (LoadSDNode *LD : LoadsToReplace) {
+ SDLoc SL(LD);
+
+ SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
+ LD->getBasePtr(), LD->getMemOperand());
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
+ MVT::i64, NewLoad);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
+ Modified = true;
+ }
+
+ for (StoreSDNode *ST : StoresToReplace) {
+ SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
+ MVT::v2i32, ST->getValue());
+ const SDValue StoreOps[] = {
+ ST->getChain(),
+ NewValue,
+ ST->getBasePtr(),
+ ST->getOffset()
+ };
+
+ CurDAG->UpdateNodeOperands(ST, StoreOps);
+ Modified = true;
+ }
+
+ // XXX - Is this necessary?
+ if (Modified)
+ CurDAG->RemoveDeadNodes();
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3a65f3b..222f631 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
@@ -27,50 +28,9 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DiagnosticPrinter.h"
using namespace llvm;
-namespace {
-
-/// Diagnostic information for unimplemented or unsupported feature reporting.
-class DiagnosticInfoUnsupported : public DiagnosticInfo {
-private:
- const Twine &Description;
- const Function &Fn;
-
- static int KindID;
-
- static int getKindID() {
- if (KindID == 0)
- KindID = llvm::getNextAvailablePluginDiagnosticKind();
- return KindID;
- }
-
-public:
- DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
- DiagnosticSeverity Severity = DS_Error)
- : DiagnosticInfo(getKindID(), Severity),
- Description(Desc),
- Fn(Fn) { }
-
- const Function &getFunction() const { return Fn; }
- const Twine &getDescription() const { return Description; }
-
- void print(DiagnosticPrinter &DP) const override {
- DP << "unsupported " << getDescription() << " in " << Fn.getName();
- }
-
- static bool classof(const DiagnosticInfo *DI) {
- return DI->getKind() == getKindID();
- }
-};
-
-int DiagnosticInfoUnsupported::KindID = 0;
-}
-
-
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -113,6 +73,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ // This is totally unsupported, just custom lower to produce an error.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -352,7 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Custom);
- setOperationAction(ISD::UDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
@@ -429,12 +392,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setSelectIsExpensive(false);
PredictableSelectIsExpensive = false;
- // There are no integer divide instructions, and these expand to a pretty
- // large sequence of instructions.
- setIntDivIsCheap(false);
- setPow2SDivIsCheap(false);
setFsqrtIsCheap(true);
+ // We want to find all load dependencies for long chains of stores to enable
+ // merging into very wide vectors. The problem is with vectors with > 4
+ // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
+ // vectors are a legal type, even though we have to split the loads
+ // usually. When we can more precisely specify load legality per address
+ // space, we should be able to make FindBetterChain/MergeConsecutiveStores
+ // smarter so that they can figure out what to do in 2 iterations without all
+ // N > 4 stores on the same chain.
+ GatherAllAliasesMaxDepth = 16;
+
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
MaxStoresPerMemmove = 4096;
@@ -534,6 +503,18 @@ bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
return true;
}
+bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+ // There are few operations which truly have vector input operands. Any vector
+ // operation is going to involve operations on each component, and a
+ // build_vector will be a copy per element, so it always makes sense to use a
+ // build_vector input in place of the extracted element to avoid a copy into a
+ // super register.
+ //
+ // We should probably only do this if all users are extracts only, but this
+ // should be the common case.
+ return true;
+}
+
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
// Truncate is just accessing a subregister.
return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
@@ -617,6 +598,15 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
return SDValue();
}
+SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ const Function &Fn = *DAG.getMachineFunction().getFunction();
+
+ DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "dynamic alloca");
+ DAG.getContext()->diagnose(NoDynamicAlloca);
+ return SDValue();
+}
+
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -643,6 +633,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return Op;
}
@@ -892,7 +883,9 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ unsigned IgnoredFrameReg;
+ unsigned Offset =
+ TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
Op.getValueType());
}
@@ -1043,9 +1036,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_brev:
- return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
-
case Intrinsic::AMDGPU_class:
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
Op.getOperand(1), Op.getOperand(2));
@@ -1057,6 +1047,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_brev: // Legacy name
+ return DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(1));
}
}
@@ -1077,6 +1069,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ // TODO: Should this propagate fast-math-flags?
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, DL, MVT::f32),
Op.getOperand(1));
@@ -1167,45 +1160,6 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
return SDValue();
}
-// FIXME: Remove this when combines added to DAGCombiner.
-SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- switch (CCOpcode) {
- case ISD::SETULE:
- case ISD::SETULT: {
- unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETLE:
- case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETGT:
- case ISD::SETGE: {
- unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETUGE:
- case ISD::SETUGT: {
- unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- default:
- return SDValue();
- }
-}
-
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1260,7 +1214,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
EVT PtrVT = BasePtr.getValueType();
EVT MemVT = Load->getMemoryVT();
SDLoc SL(Op);
- MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+ const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
EVT LoVT, HiVT;
EVT LoMemVT, HiMemVT;
@@ -1269,23 +1224,27 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+
+ unsigned Size = LoMemVT.getStoreSize();
+ unsigned BaseAlign = Load->getAlignment();
+ unsigned HiAlign = MinAlign(BaseAlign, Size);
+
SDValue LoLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
Load->getChain(), BasePtr,
SrcValue,
LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
+ Load->isInvariant(), BaseAlign);
SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(LoMemVT.getStoreSize(), SL,
- PtrVT));
+ DAG.getConstant(Size, SL, PtrVT));
SDValue HiLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
Load->getChain(), HiPtr,
SrcValue.getWithOffset(LoMemVT.getStoreSize()),
HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
+ Load->isInvariant(), HiAlign);
SDValue Ops[] = {
DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
@@ -1415,7 +1374,11 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
DAG.getConstant(LoMemVT.getStoreSize(), SL,
PtrVT));
- MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+ const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
+ unsigned BaseAlign = Store->getAlignment();
+ unsigned Size = LoMemVT.getStoreSize();
+ unsigned HiAlign = MinAlign(BaseAlign, Size);
+
SDValue LoStore
= DAG.getTruncStore(Chain, SL, Lo,
BasePtr,
@@ -1423,15 +1386,15 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
LoMemVT,
Store->isNonTemporal(),
Store->isVolatile(),
- Store->getAlignment());
+ BaseAlign);
SDValue HiStore
= DAG.getTruncStore(Chain, SL, Hi,
HiPtr,
- SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+ SrcValue.getWithOffset(Size),
HiMemVT,
Store->isNonTemporal(),
Store->isVolatile(),
- Store->getAlignment());
+ HiAlign);
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
}
@@ -1529,7 +1492,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
- return ScalarizeVectorStore(Op, DAG);
+ return SplitVectorStore(Op, DAG);
}
EVT MemVT = Store->getMemoryVT();
@@ -1630,6 +1593,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
// float fb = (float)ib;
SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
+ // TODO: Should this propagate fast-math-flags?
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
@@ -1940,6 +1904,8 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
@@ -1968,6 +1934,7 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
@@ -2045,6 +2012,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
@@ -2074,6 +2043,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
@@ -2184,6 +2155,7 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
@@ -2206,7 +2178,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
DAG.getConstant(32, SL, MVT::i32));
-
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
}
@@ -2231,6 +2203,7 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(1, DL, MVT::i32));
SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ // TODO: Should this propagate fast-math-flags?
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
@@ -2257,7 +2230,7 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
MVT::f64);
SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
MVT::f64);
-
+ // TODO: Should this propagate fast-math-flags?
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
@@ -2511,12 +2484,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
if (VT == MVT::f32)
return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
-
- // TODO: Implement min / max Evergreen instructions.
- if (VT == MVT::i32 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
}
break;
@@ -2652,20 +2619,14 @@ bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isAllOnesValue();
- }
- return false;
+ return isAllOnesConstant(Op);
}
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->getValueAPF().isZero();
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isNullValue();
- }
- return false;
+ return isNullConstant(Op);
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
@@ -2738,7 +2699,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
- NODE_NAME_CASE(BREV)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
@@ -2893,8 +2853,7 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
unsigned SignBits = 32 - Width->getZExtValue() + 1;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Offset || !Offset->isNullValue())
+ if (!isNullConstant(Op.getOperand(1)))
return SignBits;
// TODO: Could probably figure something out with non-0 offsets.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 478b203..7314cc0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -138,6 +138,7 @@ public:
bool storeOfVectorConstantIsCheap(EVT MemVT,
unsigned NumElem,
unsigned AS) const override;
+ bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
@@ -149,6 +150,9 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const;
+
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
void ReplaceNodeResults(SDNode * N,
@@ -165,14 +169,6 @@ public:
SDValue False,
SDValue CC,
DAGCombinerInfo &DCI) const;
- SDValue CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const;
const char* getTargetNodeName(unsigned Opcode) const override;
@@ -216,7 +212,7 @@ public:
/// \brief Helper function that returns the byte offset of the given
/// type of implicit parameter.
- unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
+ uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
const ImplicitParameter Param) const;
};
@@ -267,7 +263,6 @@ enum NodeType : unsigned {
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
- BREV, // Reverse bits.
MUL_U24,
MUL_I24,
MAD_U24,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 15a3d54..a266e71 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -164,11 +164,6 @@ MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
// TODO: Implement this function
return nullptr;
}
-bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- // TODO: Implement this function
- return false;
-}
bool
AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad,
@@ -312,7 +307,9 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
+ unsigned IgnoredFrameReg;
+ Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
+ MF, -1, IgnoredFrameReg);
return getIndirectIndexBegin(MF) + Offset;
}
@@ -367,3 +364,14 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
return MCOp;
}
+
+ArrayRef<std::pair<int, const char *>>
+AMDGPUInstrInfo::getSerializableTargetIndices() const {
+ static const std::pair<int, const char *> TargetIndices[] = {
+ {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
+ return makeArrayRef(TargetIndices);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 86d3962..53e8b23 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -103,8 +103,6 @@ public:
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexEnd(const MachineFunction &MF) const;
- bool canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const override;
@@ -147,6 +145,9 @@ public:
return get(pseudoToMCOpcode(Opcode));
}
+ ArrayRef<std::pair<int, const char *>>
+ getSerializableTargetIndices() const override;
+
//===---------------------------------------------------------------------===//
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
@@ -195,6 +196,7 @@ public:
};
namespace AMDGPU {
+ LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
} // End namespace AMDGPU
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index b413897..70e589c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -191,8 +191,6 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
-def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
-
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
// performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 72cab39..11f6139 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -514,7 +514,7 @@ class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
SubRegIndex sub_reg>
: Pat<
- (sub_type (vector_extract vec_type:$src, sub_idx)),
+ (sub_type (extractelt vec_type:$src, sub_idx)),
(EXTRACT_SUBREG $src, sub_reg)
>;
@@ -522,7 +522,7 @@ class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
class Insert_Element <ValueType elem_type, ValueType vec_type,
int sub_idx, SubRegIndex sub_reg>
: Pat <
- (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+ (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
(INSERT_SUBREG $vec, $elem, sub_reg)
>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index ab489cd..1de3546 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -69,8 +69,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
- def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
+ def int_AMDGPU_barrier_local : Intrinsic<[], [], [IntrConvergent]>;
+ def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>;
}
// Legacy names for compatibility.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 2083146..dfc652f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -61,7 +61,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_Register:
- MCOp = MCOperand::createReg(MO.getReg());
+ MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
@@ -73,13 +73,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
break;
}
- case MachineOperand::MO_TargetIndex: {
- assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
- MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
- MCOp = MCOperand::createExpr(Expr);
- break;
- }
case MachineOperand::MO_ExternalSymbol: {
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
@@ -104,10 +97,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
#endif
if (MI->isBundle()) {
const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = MI;
- ++I;
- while (I != MBB->end() && I->isInsideBundle()) {
- EmitInstruction(I);
+ MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+ while (I != MBB->instr_end() && I->isInsideBundle()) {
+ EmitInstruction(&*I);
++I;
}
} else {
@@ -136,8 +128,6 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
MF->getSubtarget<MCSubtargetInfo>());
- CodeStream.flush();
-
HexLines.resize(HexLines.size() + 1);
std::string &HexLine = HexLines.back();
raw_string_ostream HexStream(HexLine);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 21c7da6..5413717 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,11 +1,10 @@
#include "AMDGPUMachineFunction.h"
#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
using namespace llvm;
-static const char *const ShaderTypeAttribute = "ShaderType";
-
// Pin the vtable to this file.
void AMDGPUMachineFunction::anchor() {}
@@ -13,13 +12,9 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
ShaderType(ShaderType::COMPUTE),
LDSSize(0),
+ ABIArgOffset(0),
ScratchSize(0),
IsKernel(true) {
- Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- if (Str.getAsInteger(0, ShaderType))
- llvm_unreachable("Can't parse shader type!");
- }
+ ShaderType = AMDGPU::getShaderType(*MF.getFunction());
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index f5e4694..46fcee8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -37,6 +37,11 @@ public:
return ShaderType;
}
+ bool isKernel() const {
+ // FIXME: Assume everything is a kernel until function calls are supported.
+ return true;
+ }
+
unsigned ScratchSize;
bool IsKernel;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
new file mode 100644
index 0000000..554bf1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
@@ -0,0 +1,373 @@
+//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass resolves calls to OpenCL image attribute, image resource ID and
+/// sampler resource ID getter functions.
+///
+/// Image attributes (size and format) are expected to be passed to the kernel
+/// as kernel arguments immediately following the image argument itself,
+/// therefore this pass adds image size and format arguments to the kernel
+/// functions in the module. The kernel functions with image arguments are
+/// re-created using the new signature. The new arguments are added to the
+/// kernel metadata with kernel_arg_type set to "image_size" or "image_format".
+/// Note: this pass may invalidate pointers to functions.
+///
+/// Resource IDs of read-only images, write-only images and samplers are
+/// defined to be their index among the kernel arguments of the same
+/// type and access qualifier.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size";
+StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format";
+StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id";
+StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id";
+
+StringRef ImageSizeArgMDType = "__llvm_image_size";
+StringRef ImageFormatArgMDType = "__llvm_image_format";
+
+StringRef KernelsMDNodeName = "opencl.kernels";
+StringRef KernelArgMDNodeNames[] = {
+ "kernel_arg_addr_space",
+ "kernel_arg_access_qual",
+ "kernel_arg_type",
+ "kernel_arg_base_type",
+ "kernel_arg_type_qual"};
+const unsigned NumKernelArgMDNodes = 5;
+
+typedef SmallVector<Metadata *, 8> MDVector;
+struct KernelArgMD {
+ MDVector ArgVector[NumKernelArgMDNodes];
+};
+
+} // end anonymous namespace
+
+static inline bool
+IsImageType(StringRef TypeString) {
+ return TypeString == "image2d_t" || TypeString == "image3d_t";
+}
+
+static inline bool
+IsSamplerType(StringRef TypeString) {
+ return TypeString == "sampler_t";
+}
+
+static Function *
+GetFunctionFromMDNode(MDNode *Node) {
+ if (!Node)
+ return nullptr;
+
+ size_t NumOps = Node->getNumOperands();
+ if (NumOps != NumKernelArgMDNodes + 1)
+ return nullptr;
+
+ auto F = mdconst::dyn_extract<Function>(Node->getOperand(0));
+ if (!F)
+ return nullptr;
+
+ // Sanity checks.
+ size_t ExpectNumArgNodeOps = F->arg_size() + 1;
+ for (size_t i = 0; i < NumKernelArgMDNodes; ++i) {
+ MDNode *ArgNode = dyn_cast_or_null<MDNode>(Node->getOperand(i + 1));
+ if (ArgNode->getNumOperands() != ExpectNumArgNodeOps)
+ return nullptr;
+ if (!ArgNode->getOperand(0))
+ return nullptr;
+
+ // FIXME: It should be possible to do image lowering when some metadata
+ // args missing or not in the expected order.
+ MDString *StringNode = dyn_cast<MDString>(ArgNode->getOperand(0));
+ if (!StringNode || StringNode->getString() != KernelArgMDNodeNames[i])
+ return nullptr;
+ }
+
+ return F;
+}
+
+static StringRef
+AccessQualFromMD(MDNode *KernelMDNode, unsigned ArgIdx) {
+ MDNode *ArgAQNode = cast<MDNode>(KernelMDNode->getOperand(2));
+ return cast<MDString>(ArgAQNode->getOperand(ArgIdx + 1))->getString();
+}
+
+static StringRef
+ArgTypeFromMD(MDNode *KernelMDNode, unsigned ArgIdx) {
+ MDNode *ArgTypeNode = cast<MDNode>(KernelMDNode->getOperand(3));
+ return cast<MDString>(ArgTypeNode->getOperand(ArgIdx + 1))->getString();
+}
+
+static MDVector
+GetArgMD(MDNode *KernelMDNode, unsigned OpIdx) {
+ MDVector Res;
+ for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) {
+ MDNode *Node = cast<MDNode>(KernelMDNode->getOperand(i + 1));
+ Res.push_back(Node->getOperand(OpIdx));
+ }
+ return Res;
+}
+
+static void
+PushArgMD(KernelArgMD &MD, const MDVector &V) {
+ assert(V.size() == NumKernelArgMDNodes);
+ for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) {
+ MD.ArgVector[i].push_back(V[i]);
+ }
+}
+
+namespace {
+
+class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
+ static char ID;
+
+ LLVMContext *Context;
+ Type *Int32Type;
+ Type *ImageSizeType;
+ Type *ImageFormatType;
+ SmallVector<Instruction *, 4> InstsToErase;
+
+ bool replaceImageUses(Argument &ImageArg, uint32_t ResourceID,
+ Argument &ImageSizeArg,
+ Argument &ImageFormatArg) {
+ bool Modified = false;
+
+ for (auto &Use : ImageArg.uses()) {
+ auto Inst = dyn_cast<CallInst>(Use.getUser());
+ if (!Inst) {
+ continue;
+ }
+
+ Function *F = Inst->getCalledFunction();
+ if (!F)
+ continue;
+
+ Value *Replacement = nullptr;
+ StringRef Name = F->getName();
+ if (Name.startswith(GetImageResourceIDFunc)) {
+ Replacement = ConstantInt::get(Int32Type, ResourceID);
+ } else if (Name.startswith(GetImageSizeFunc)) {
+ Replacement = &ImageSizeArg;
+ } else if (Name.startswith(GetImageFormatFunc)) {
+ Replacement = &ImageFormatArg;
+ } else {
+ continue;
+ }
+
+ Inst->replaceAllUsesWith(Replacement);
+ InstsToErase.push_back(Inst);
+ Modified = true;
+ }
+
+ return Modified;
+ }
+
+ bool replaceSamplerUses(Argument &SamplerArg, uint32_t ResourceID) {
+ bool Modified = false;
+
+ for (const auto &Use : SamplerArg.uses()) {
+ auto Inst = dyn_cast<CallInst>(Use.getUser());
+ if (!Inst) {
+ continue;
+ }
+
+ Function *F = Inst->getCalledFunction();
+ if (!F)
+ continue;
+
+ Value *Replacement = nullptr;
+ StringRef Name = F->getName();
+ if (Name == GetSamplerResourceIDFunc) {
+ Replacement = ConstantInt::get(Int32Type, ResourceID);
+ } else {
+ continue;
+ }
+
+ Inst->replaceAllUsesWith(Replacement);
+ InstsToErase.push_back(Inst);
+ Modified = true;
+ }
+
+ return Modified;
+ }
+
+ bool replaceImageAndSamplerUses(Function *F, MDNode *KernelMDNode) {
+ uint32_t NumReadOnlyImageArgs = 0;
+ uint32_t NumWriteOnlyImageArgs = 0;
+ uint32_t NumSamplerArgs = 0;
+
+ bool Modified = false;
+ InstsToErase.clear();
+ for (auto ArgI = F->arg_begin(); ArgI != F->arg_end(); ++ArgI) {
+ Argument &Arg = *ArgI;
+ StringRef Type = ArgTypeFromMD(KernelMDNode, Arg.getArgNo());
+
+ // Handle image types.
+ if (IsImageType(Type)) {
+ StringRef AccessQual = AccessQualFromMD(KernelMDNode, Arg.getArgNo());
+ uint32_t ResourceID;
+ if (AccessQual == "read_only") {
+ ResourceID = NumReadOnlyImageArgs++;
+ } else if (AccessQual == "write_only") {
+ ResourceID = NumWriteOnlyImageArgs++;
+ } else {
+ llvm_unreachable("Wrong image access qualifier.");
+ }
+
+ Argument &SizeArg = *(++ArgI);
+ Argument &FormatArg = *(++ArgI);
+ Modified |= replaceImageUses(Arg, ResourceID, SizeArg, FormatArg);
+
+ // Handle sampler type.
+ } else if (IsSamplerType(Type)) {
+ uint32_t ResourceID = NumSamplerArgs++;
+ Modified |= replaceSamplerUses(Arg, ResourceID);
+ }
+ }
+ for (unsigned i = 0; i < InstsToErase.size(); ++i) {
+ InstsToErase[i]->eraseFromParent();
+ }
+
+ return Modified;
+ }
+
+ std::tuple<Function *, MDNode *>
+ addImplicitArgs(Function *F, MDNode *KernelMDNode) {
+ bool Modified = false;
+
+ FunctionType *FT = F->getFunctionType();
+ SmallVector<Type *, 8> ArgTypes;
+
+ // Metadata operands for new MDNode.
+ KernelArgMD NewArgMDs;
+ PushArgMD(NewArgMDs, GetArgMD(KernelMDNode, 0));
+
+ // Add implicit arguments to the signature.
+ for (unsigned i = 0; i < FT->getNumParams(); ++i) {
+ ArgTypes.push_back(FT->getParamType(i));
+ MDVector ArgMD = GetArgMD(KernelMDNode, i + 1);
+ PushArgMD(NewArgMDs, ArgMD);
+
+ if (!IsImageType(ArgTypeFromMD(KernelMDNode, i)))
+ continue;
+
+ // Add size implicit argument.
+ ArgTypes.push_back(ImageSizeType);
+ ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageSizeArgMDType);
+ PushArgMD(NewArgMDs, ArgMD);
+
+ // Add format implicit argument.
+ ArgTypes.push_back(ImageFormatType);
+ ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageFormatArgMDType);
+ PushArgMD(NewArgMDs, ArgMD);
+
+ Modified = true;
+ }
+ if (!Modified) {
+ return std::make_tuple(nullptr, nullptr);
+ }
+
+ // Create function with new signature and clone the old body into it.
+ auto NewFT = FunctionType::get(FT->getReturnType(), ArgTypes, false);
+ auto NewF = Function::Create(NewFT, F->getLinkage(), F->getName());
+ ValueToValueMapTy VMap;
+ auto NewFArgIt = NewF->arg_begin();
+ for (auto &Arg: F->args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ if (IsImageType(ArgTypeFromMD(KernelMDNode, Arg.getArgNo()))) {
+ (NewFArgIt++)->setName(Twine("__size_") + ArgName);
+ (NewFArgIt++)->setName(Twine("__format_") + ArgName);
+ }
+ }
+ SmallVector<ReturnInst*, 8> Returns;
+ CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns);
+
+ // Build new MDNode.
+ SmallVector<llvm::Metadata *, 6> KernelMDArgs;
+ KernelMDArgs.push_back(ConstantAsMetadata::get(NewF));
+ for (unsigned i = 0; i < NumKernelArgMDNodes; ++i)
+ KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i]));
+ MDNode *NewMDNode = MDNode::get(*Context, KernelMDArgs);
+
+ return std::make_tuple(NewF, NewMDNode);
+ }
+
+ bool transformKernels(Module &M) {
+ NamedMDNode *KernelsMDNode = M.getNamedMetadata(KernelsMDNodeName);
+ if (!KernelsMDNode)
+ return false;
+
+ bool Modified = false;
+ for (unsigned i = 0; i < KernelsMDNode->getNumOperands(); ++i) {
+ MDNode *KernelMDNode = KernelsMDNode->getOperand(i);
+ Function *F = GetFunctionFromMDNode(KernelMDNode);
+ if (!F)
+ continue;
+
+ Function *NewF;
+ MDNode *NewMDNode;
+ std::tie(NewF, NewMDNode) = addImplicitArgs(F, KernelMDNode);
+ if (NewF) {
+ // Replace old function and metadata with new ones.
+ F->eraseFromParent();
+ M.getFunctionList().push_back(NewF);
+ M.getOrInsertFunction(NewF->getName(), NewF->getFunctionType(),
+ NewF->getAttributes());
+ KernelsMDNode->setOperand(i, NewMDNode);
+
+ F = NewF;
+ KernelMDNode = NewMDNode;
+ Modified = true;
+ }
+
+ Modified |= replaceImageAndSamplerUses(F, KernelMDNode);
+ }
+
+ return Modified;
+ }
+
+ public:
+ AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override {
+ Context = &M.getContext();
+ Int32Type = Type::getInt32Ty(M.getContext());
+ ImageSizeType = ArrayType::get(Int32Type, 3);
+ ImageFormatType = ArrayType::get(Int32Type, 2);
+
+ return transformKernels(M);
+ }
+
+ const char *getPassName() const override {
+ return "AMDGPU OpenCL Image Type Pass";
+ }
+};
+
+char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
+
+} // end anonymous namespace
+
+ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() {
+ return new AMDGPUOpenCLImageTypeLoweringPass();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 57b7a73..87d50d5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -54,7 +54,7 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
- const FunctionType *FTy = F.getFunctionType();
+ FunctionType *FTy = F.getFunctionType();
LocalMemAvailable = ST.getLocalMemorySize();
@@ -63,7 +63,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// possible these arguments require the entire local memory space, so
// we cannot use local memory in the pass.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
- const Type *ParamTy = FTy->getParamType(i);
+ Type *ParamTy = FTy->getParamType(i);
if (ParamTy->isPointerTy() &&
ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
LocalMemAvailable = 0;
@@ -77,7 +77,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Check how much local memory is being used by global objects
for (Module::global_iterator I = Mod->global_begin(),
E = Mod->global_end(); I != E; ++I) {
- GlobalVariable *GV = I;
+ GlobalVariable *GV = &*I;
PointerType *GVTy = GV->getType();
if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
continue;
@@ -101,7 +101,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
return false;
}
-static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+static VectorType *arrayTypeToVecType(Type *ArrayTy) {
return VectorType::get(ArrayTy->getArrayElementType(),
ArrayTy->getArrayNumElements());
}
@@ -276,6 +276,9 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
}
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+ if (!I.isStaticAlloca())
+ return;
+
IRBuilder<> Builder(&I);
// First try to replace the alloca with a vector
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index cfd800b..0344834 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -37,10 +37,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
assert(!"Unimplemented"); return BitVector();
}
- virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
- assert(!"Unimplemented"); return nullptr;
- }
-
virtual unsigned getHWRegIndex(unsigned Reg) const {
assert(!"Unimplemented"); return 0;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5f32a65..44e0c47 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -16,6 +16,7 @@
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "R600MachineScheduler.h"
+#include "SIFrameLowering.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -44,6 +45,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// disable it.
SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
+ if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
+ FullFS += "+flat-for-global,";
FullFS += FS;
if (GPU == "" && TT.getArch() == Triple::amdgcn)
@@ -67,26 +70,36 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
- CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
- EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
- EnableUnsafeDSOffsetFolding(false),
+ CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
+ EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
+ EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- 64 * 16, // Maximum stack alignment (long16)
- 0),
+ FrameLowering(nullptr),
InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
initializeSubtargetDependencies(TT, GPU, FS);
+ const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16)
+
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
InstrInfo.reset(new R600InstrInfo(*this));
TLInfo.reset(new R600TargetLowering(TM, *this));
+
+ // FIXME: Should have R600 specific FrameLowering
+ FrameLowering.reset(new AMDGPUFrameLowering(
+ TargetFrameLowering::StackGrowsUp,
+ MaxStackAlign,
+ 0));
} else {
InstrInfo.reset(new SIInstrInfo(*this));
TLInfo.reset(new SITargetLowering(TM, *this));
+ FrameLowering.reset(new SIFrameLowering(
+ TargetFrameLowering::StackGrowsUp,
+ MaxStackAlign,
+ 0));
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 735f01d..9c7bb88 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
@@ -12,17 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
-#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
+
#include "AMDGPU.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -72,6 +70,7 @@ private:
bool FastFMAF32;
bool CaymanISA;
bool FlatAddressSpace;
+ bool FlatForGlobal;
bool EnableIRStructurizer;
bool EnablePromoteAlloca;
bool EnableIfCvt;
@@ -88,10 +87,10 @@ private:
bool CIInsts;
bool FeatureDisable;
int LDSBankCount;
- unsigned IsaVersion;
+ unsigned IsaVersion;
bool EnableHugeScratchBuffer;
- AMDGPUFrameLowering FrameLowering;
+ std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
InstrItineraryData InstrItins;
@@ -104,7 +103,7 @@ public:
StringRef GPU, StringRef FS);
const AMDGPUFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return FrameLowering.get();
}
const AMDGPUInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
@@ -161,6 +160,10 @@ public:
return FlatAddressSpace;
}
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
+ }
+
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
@@ -305,6 +308,9 @@ public:
return isAmdHsaOS() ? 0 : 36;
}
+ unsigned getMaxNumUserSGPRs() const {
+ return 16;
+ }
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2297b52..22f85b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
+#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
#include "AMDGPUTargetTransformInfo.h"
#include "R600ISelLowering.h"
@@ -41,6 +42,23 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
+
+ PassRegistry *PR = PassRegistry::getPassRegistry();
+ initializeSILowerI1CopiesPass(*PR);
+ initializeSIFixSGPRCopiesPass(*PR);
+ initializeSIFoldOperandsPass(*PR);
+ initializeSIFixSGPRLiveRangesPass(*PR);
+ initializeSIFixControlFlowLiveIntervalsPass(*PR);
+ initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
+ initializeAMDGPUAnnotateUniformValuesPass(*PR);
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.getOS() == Triple::AMDHSA)
+ return make_unique<AMDGPUHSATargetObjectFile>();
+
+ return make_unique<AMDGPUTargetObjectFile>();
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
@@ -72,15 +90,13 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
OptLevel),
- TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+ TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this),
IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
-AMDGPUTargetMachine::~AMDGPUTargetMachine() {
- delete TLOF;
-}
+AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
//===----------------------------------------------------------------------===//
// R600 Target Machine (R600 -> Cayman)
@@ -110,7 +126,13 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+
+ // Exceptions and StackMaps are not supported, so these passes will never do
+ // anything.
+ disablePass(&StackMapLivenessID);
+ disablePass(&FuncletLayoutID);
+ }
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
@@ -126,8 +148,9 @@ public:
void addIRPasses() override;
void addCodeGenPrepare() override;
- virtual bool addPreISel() override;
- virtual bool addInstSelector() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addGCPasses() override;
};
class R600PassConfig : public AMDGPUPassConfig {
@@ -147,6 +170,8 @@ public:
: AMDGPUPassConfig(TM, PM) { }
bool addPreISel() override;
bool addInstSelector() override;
+ void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addPreSched2() override;
@@ -156,7 +181,7 @@ public:
} // End of anonymous namespace
TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(
AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
});
@@ -172,6 +197,10 @@ void AMDGPUPassConfig::addIRPasses() {
// functions, then we will generate code for the first function
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
+
+ // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
+ addPass(createAMDGPUOpenCLImageTypeLoweringPass());
+
TargetPassConfig::addIRPasses();
}
@@ -198,6 +227,11 @@ bool AMDGPUPassConfig::addInstSelector() {
return false;
}
+bool AMDGPUPassConfig::addGCPasses() {
+ // Do nothing. GC is not supported.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// R600 Pass Setup
//===----------------------------------------------------------------------===//
@@ -238,16 +272,23 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+
+ // FIXME: We need to run a pass to propagate the attributes when calls are
+ // supported.
+ addPass(&AMDGPUAnnotateKernelFeaturesID);
+
addPass(createSinkingPass());
addPass(createSITypeRewriter());
addPass(createSIAnnotateControlFlowPass());
+ addPass(createAMDGPUAnnotateUniformValues());
+
return false;
}
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
addPass(createSILowerI1CopiesPass());
- addPass(createSIFixSGPRCopiesPass(*TM));
+ addPass(&SIFixSGPRCopiesID);
addPass(createSIFoldOperandsPass());
return false;
}
@@ -259,7 +300,6 @@ void GCNPassConfig::addPreRegAlloc() {
// earlier passes might recompute live intervals.
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
if (getOptLevel() > CodeGenOpt::None) {
- initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
}
@@ -269,16 +309,27 @@ void GCNPassConfig::addPreRegAlloc() {
// This should be run after scheduling, but before register allocation. It
// also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
insertPass(&MachineSchedulerID, &RegisterCoalescerID);
}
addPass(createSIShrinkInstructionsPass(), false);
- addPass(createSIFixSGPRLiveRangesPass(), false);
+}
+
+void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&SIFixSGPRLiveRangesID);
+ TargetPassConfig::addFastRegAlloc(RegAllocPass);
+}
+
+void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ // We want to run this after LiveVariables is computed to avoid computing them
+ // twice.
+ // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure
+ // that needs to be fixed.
+ insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false);
+ TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
}
void GCNPassConfig::addPostRegAlloc() {
- addPass(createSIPrepareScratchRegs(), false);
addPass(createSIShrinkInstructionsPass(), false);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 14792e3..236e3f8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -32,7 +32,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
private:
protected:
- TargetLoweringObjectFile *TLOF;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUSubtarget Subtarget;
AMDGPUIntrinsicInfo IntrinsicInfo;
@@ -52,7 +52,7 @@ public:
TargetIRAnalysis getTargetIRAnalysis() override;
TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF;
+ return TLOF.get();
}
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
new file mode 100644
index 0000000..e050f21
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetObjectFile.h"
+#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Object File
+//===----------------------------------------------------------------------===//
+
+MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const {
+ if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
+ return TextSection;
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+//===----------------------------------------------------------------------===//
+// HSA Object File
+//===----------------------------------------------------------------------===//
+
+
+void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+
+ TextSection = AMDGPU::getHSATextSection(Ctx);
+
+ DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx);
+ DataGlobalProgramSection = AMDGPU::getHSADataGlobalProgramSection(Ctx);
+
+ RodataReadonlyAgentSection = AMDGPU::getHSARodataReadonlyAgentSection(Ctx);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocationSection(
+ const char *SectionName) const {
+ return cast<MCSectionELF>(DataGlobalAgentSection)
+ ->getSectionName()
+ .equals(SectionName);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocation(const GlobalValue *GV) const {
+ // Read-only segments can only have agent allocation.
+ return AMDGPU::isReadOnlySegment(GV) ||
+ (AMDGPU::isGlobalSegment(GV) && GV->hasSection() &&
+ isAgentAllocationSection(GV->getSection()));
+}
+
+bool AMDGPUHSATargetObjectFile::isProgramAllocation(
+ const GlobalValue *GV) const {
+ // The default for global segments is program allocation.
+ return AMDGPU::isGlobalSegment(GV) && !isAgentAllocation(GV);
+}
+
+MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const {
+ if (Kind.isText() && !GV->hasComdat())
+ return getTextSection();
+
+ if (AMDGPU::isGlobalSegment(GV)) {
+ if (isAgentAllocation(GV))
+ return DataGlobalAgentSection;
+
+ if (isProgramAllocation(GV))
+ return DataGlobalProgramSection;
+ }
+
+ return AMDGPUTargetObjectFile::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
new file mode 100644
index 0000000..921341e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -0,0 +1,51 @@
+//===-- AMDGPUTargetObjectFile.h - AMDGPU Object Info ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the AMDGPU-specific subclass of
+/// TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
+ public:
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+class AMDGPUHSATargetObjectFile final : public AMDGPUTargetObjectFile {
+private:
+ MCSection *DataGlobalAgentSection;
+ MCSection *DataGlobalProgramSection;
+ MCSection *RodataReadonlyAgentSection;
+
+ bool isAgentAllocationSection(const char *SectionName) const;
+ bool isAgentAllocation(const GlobalValue *GV) const;
+ bool isProgramAllocation(const GlobalValue *GV) const;
+
+public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6dacc74..54a003d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -74,9 +74,109 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
-unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
+ return Vector ? 0 : 32;
+}
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Semi-arbitrary large amount.
return 64;
}
+
+unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
+ // XXX - For some reason this isn't called for switch.
+ switch (Opcode) {
+ case Instruction::Br:
+ case Instruction::Ret:
+ return 10;
+ default:
+ return BaseT::getCFInstrCost(Opcode);
+ }
+}
+
+int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ switch (Opcode) {
+ case Instruction::ExtractElement:
+ // Dynamic indexing isn't free and is best avoided.
+ return Index == ~0u ? 2 : 0;
+ default:
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+}
+
+static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
+ const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::not_intrinsic:
+ // This means we have an intrinsic that isn't defined in
+ // IntrinsicsAMDGPU.td
+ break;
+
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_mbcnt_hi:
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::r600_read_tidig_z:
+ return true;
+ }
+
+ StringRef Name = I->getCalledFunction()->getName();
+ switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
+ default:
+ return false;
+ case AMDGPUIntrinsic::SI_tid:
+ case AMDGPUIntrinsic::SI_fs_interp:
+ return true;
+ }
+}
+
+static bool isArgPassedInSGPR(const Argument *A) {
+ const Function *F = A->getParent();
+ unsigned ShaderType = AMDGPU::getShaderType(*F);
+
+ // Arguments to compute shaders are never a source of divergence.
+ if (ShaderType == ShaderType::COMPUTE)
+ return true;
+
+ // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
+ if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal))
+ return true;
+
+ // Everything else is in VGPRs.
+ return false;
+}
+
+///
+/// \returns true if the result of the value could potentially be
+/// different across workitems in a wavefront.
+bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
+
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return !isArgPassedInSGPR(A);
+
+ // Loads from the private address space are divergent, because threads
+ // can execute the load instruction with the same inputs and get different
+ // results.
+ //
+ // All other loads are not divergent, because if threads issue loads with the
+ // same arguments, they will always get the same result.
+ if (const LoadInst *Load = dyn_cast<LoadInst>(V))
+ return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic);
+ }
+
+ // Assume all function calls are a source of divergence.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index dee0a69..976afb0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -60,6 +60,11 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
+
+ unsigned getCFInstrCost(unsigned Opcode);
+
+ int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
+ bool isSourceOfDivergence(const Value *V) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index d918ac3..917efd1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -185,7 +185,7 @@ protected:
MachinePostDominatorTree *PDT;
MachineLoopInfo *MLI;
const R600InstrInfo *TII;
- const AMDGPURegisterInfo *TRI;
+ const R600RegisterInfo *TRI;
// PRINT FUNCTIONS
/// Print the ordered Blocks.
@@ -881,7 +881,7 @@ bool AMDGPUCFGStructurizer::run() {
} //while, "one iteration" over the function.
MachineBasicBlock *EntryMBB =
- GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
+ &*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
if (EntryMBB->succ_size() == 0) {
Finish = true;
DEBUG(
@@ -904,7 +904,7 @@ bool AMDGPUCFGStructurizer::run() {
} while (!Finish && MakeProgress);
// Misc wrap up to maintain the consistency of the Function representation.
- wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
+ wrapup(&*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
// Detach retired Block, release memory.
for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
@@ -1164,7 +1164,7 @@ int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
E = ContMBB.end(); It != E; ++It) {
- (*It)->removeSuccessor(LoopHeader);
+ (*It)->removeSuccessor(LoopHeader, true);
}
numLoopcontPatternMatch += NumCont;
@@ -1353,7 +1353,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// If MigrateTrue is true, then TrueBB is the block being "branched into"
// and if MigrateFalse is true, then FalseBB is the block being
// "branched into"
- //
+ //
// Here is the pseudo code for how I think the optimization should work:
// 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
// 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
@@ -1372,7 +1372,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// the late machine optimization passes, however if we implement
// bool TargetRegisterInfo::requiresRegisterScavenging(
// const MachineFunction &MF)
- // and have it return true, liveness will be tracked correctly
+ // and have it return true, liveness will be tracked correctly
// by generic optimization passes. We will also need to make sure that
// all of our target-specific passes that run after regalloc and before
// the CFGStructurizer track liveness and we will need to modify this pass
@@ -1487,7 +1487,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
);
DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
- DstMBB->removeSuccessor(SrcMBB);
+ DstMBB->removeSuccessor(SrcMBB, true);
cloneSuccessorList(DstMBB, SrcMBB);
removeSuccessor(SrcMBB);
@@ -1537,9 +1537,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
if (TrueMBB) {
MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
- MBB->removeSuccessor(TrueMBB);
+ MBB->removeSuccessor(TrueMBB, true);
if (LandMBB && TrueMBB->succ_size()!=0)
- TrueMBB->removeSuccessor(LandMBB);
+ TrueMBB->removeSuccessor(LandMBB, true);
retireBlock(TrueMBB);
MLI->removeBlock(TrueMBB);
}
@@ -1548,9 +1548,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
insertInstrBefore(I, AMDGPU::ELSE);
MBB->splice(I, FalseMBB, FalseMBB->begin(),
FalseMBB->end());
- MBB->removeSuccessor(FalseMBB);
+ MBB->removeSuccessor(FalseMBB, true);
if (LandMBB && FalseMBB->succ_size() != 0)
- FalseMBB->removeSuccessor(LandMBB);
+ FalseMBB->removeSuccessor(LandMBB, true);
retireBlock(FalseMBB);
MLI->removeBlock(FalseMBB);
}
@@ -1570,8 +1570,7 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
- DstBlk->addSuccessor(LandMBB);
- DstBlk->removeSuccessor(DstBlk);
+ DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
@@ -1592,7 +1591,7 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
//now branchInst can be erase safely
BranchMI->eraseFromParent();
//now take care of successors, retire blocks
- ExitingMBB->removeSuccessor(LandMBB);
+ ExitingMBB->removeSuccessor(LandMBB, true);
}
void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
@@ -1666,8 +1665,7 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
//srcBlk, oldBlk, newBlk
- PredMBB->removeSuccessor(MBB);
- PredMBB->addSuccessor(CloneMBB);
+ PredMBB->replaceSuccessor(MBB, CloneMBB);
// add all successor to cloneBlk
cloneSuccessorList(CloneMBB, MBB);
@@ -1695,10 +1693,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
);
SpliceEnd = SrcMBB->end();
} else {
- DEBUG(
- dbgs() << "migrateInstruction see branch instr\n" ;
- BranchMI->dump();
- );
+ DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
SpliceEnd = BranchMI;
}
DEBUG(
@@ -1711,7 +1706,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
DEBUG(
dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
- << "srcSize = " << SrcMBB->size() << "\n";
+ << "srcSize = " << SrcMBB->size() << '\n';
);
}
@@ -1743,7 +1738,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
// test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
while ((BranchMI = getLoopendBlockBranchInstr(MBB))
&& isUncondBranch(BranchMI)) {
- DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
+ DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
BranchMI->eraseFromParent();
}
}
@@ -1759,10 +1754,10 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
assert(BranchMI && isCondBranch(BranchMI));
- DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
+ DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
BranchMI->eraseFromParent();
SHOWNEWBLK(MBB1, "Removing redundant successor");
- MBB->removeSuccessor(MBB1);
+ MBB->removeSuccessor(MBB1, true);
}
void AMDGPUCFGStructurizer::addDummyExitBlock(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2018983..d9f753f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -28,7 +28,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -83,6 +85,7 @@ public:
unsigned RegNo;
int Modifiers;
const MCRegisterInfo *TRI;
+ const MCSubtargetInfo *STI;
bool IsForcedVOP3;
};
@@ -102,7 +105,7 @@ public:
}
void addRegOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::createReg(getReg()));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
}
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
@@ -215,6 +218,10 @@ public:
(isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
}
+ bool isSCSrc64() const {
+ return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
+ }
+
bool isVCSrc32() const {
return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
}
@@ -251,7 +258,22 @@ public:
return EndLoc;
}
- void print(raw_ostream &OS) const override { }
+ void print(raw_ostream &OS) const override {
+ switch (Kind) {
+ case Register:
+ OS << "<register " << getReg() << " mods: " << Reg.Modifiers << '>';
+ break;
+ case Immediate:
+ OS << getImm();
+ break;
+ case Token:
+ OS << '\'' << getToken() << '\'';
+ break;
+ case Expression:
+ OS << "<expr " << *Expr << '>';
+ break;
+ }
+ }
static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
enum ImmTy Type = ImmTyNone,
@@ -278,10 +300,12 @@ public:
static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
SMLoc E,
const MCRegisterInfo *TRI,
+ const MCSubtargetInfo *STI,
bool ForceVOP3) {
auto Op = llvm::make_unique<AMDGPUOperand>(Register);
Op->Reg.RegNo = RegNo;
Op->Reg.TRI = TRI;
+ Op->Reg.STI = STI;
Op->Reg.Modifiers = -1;
Op->Reg.IsForcedVOP3 = ForceVOP3;
Op->StartLoc = S;
@@ -301,14 +325,32 @@ public:
bool isDSOffset01() const;
bool isSWaitCnt() const;
bool isMubufOffset() const;
+ bool isSMRDOffset() const;
+ bool isSMRDLiteralOffset() const;
};
class AMDGPUAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
MCAsmParser &Parser;
unsigned ForcedEncodingSize;
+
+ bool isSI() const {
+ return AMDGPU::isSI(getSTI());
+ }
+
+ bool isCI() const {
+ return AMDGPU::isCI(getSTI());
+ }
+
+ bool isVI() const {
+ return AMDGPU::isVI(getSTI());
+ }
+
+ bool hasSGPR102_SGPR103() const {
+ return !isVI();
+ }
+
/// @name Auto-generated Match Functions
/// {
@@ -323,20 +365,34 @@ private:
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
+ bool ParseSectionDirectiveHSAText();
+ bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
+ bool ParseDirectiveAMDGPUHsaKernel();
+ bool ParseDirectiveAMDGPUHsaModuleGlobal();
+ bool ParseDirectiveAMDGPUHsaProgramGlobal();
+ bool ParseSectionDirectiveHSADataGlobalAgent();
+ bool ParseSectionDirectiveHSADataGlobalProgram();
+ bool ParseSectionDirectiveHSARodataReadonlyAgent();
public:
- AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+public:
+ enum AMDGPUMatchResultTy {
+ Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
+ };
+
+ AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
- ForcedEncodingSize(0){
+ : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
+ ForcedEncodingSize(0) {
+ MCAsmParserExtension::Initialize(Parser);
- if (STI.getFeatureBits().none()) {
+ if (getSTI().getFeatureBits().none()) {
// Set default features.
- STI.ToggleFeature("SOUTHERN_ISLANDS");
+ copySTI().ToggleFeature("SOUTHERN_ISLANDS");
}
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
AMDGPUTargetStreamer &getTargetStreamer() {
@@ -420,10 +476,10 @@ struct OptionalOperand {
}
-static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+static int getRegClass(bool IsVgpr, unsigned RegWidth) {
if (IsVgpr) {
switch (RegWidth) {
- default: llvm_unreachable("Unknown register width");
+ default: return -1;
case 1: return AMDGPU::VGPR_32RegClassID;
case 2: return AMDGPU::VReg_64RegClassID;
case 3: return AMDGPU::VReg_96RegClassID;
@@ -434,7 +490,7 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
}
switch (RegWidth) {
- default: llvm_unreachable("Unknown register width");
+ default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SReg_128RegClassID;
@@ -443,16 +499,16 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
}
}
-static unsigned getRegForName(const StringRef &RegName) {
+static unsigned getRegForName(StringRef RegName) {
return StringSwitch<unsigned>(RegName)
.Case("exec", AMDGPU::EXEC)
.Case("vcc", AMDGPU::VCC)
- .Case("flat_scr", AMDGPU::FLAT_SCR)
+ .Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("m0", AMDGPU::M0)
.Case("scc", AMDGPU::SCC)
- .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
- .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+ .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
+ .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
.Case("vcc_lo", AMDGPU::VCC_LO)
.Case("vcc_hi", AMDGPU::VCC_HI)
.Case("exec_lo", AMDGPU::EXEC_LO)
@@ -464,12 +520,14 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
const AsmToken Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- const StringRef &RegName = Tok.getString();
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+
+ StringRef RegName = Tok.getString();
RegNo = getRegForName(RegName);
if (RegNo) {
Parser.Lex();
- return false;
+ return !subtargetHasRegister(*TRI, RegNo);
}
// Match vgprs and sgprs
@@ -514,16 +572,24 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
RegIndexInClass = RegLo;
} else {
// SGPR registers are aligned. Max alignment is 4 dwords.
- RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+ unsigned Size = std::min(RegWidth, 4u);
+ if (RegLo % Size != 0)
+ return true;
+
+ RegIndexInClass = RegLo / Size;
}
}
- const MCRegisterInfo *TRC = getContext().getRegisterInfo();
- unsigned RC = getRegClass(IsVgpr, RegWidth);
- if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+ int RCID = getRegClass(IsVgpr, RegWidth);
+ if (RCID == -1)
return true;
- RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
- return false;
+
+ const MCRegisterClass RC = TRI->getRegClass(RCID);
+ if (RegIndexInClass >= RC.getNumRegs())
+ return true;
+
+ RegNo = RC.getRegister(RegIndexInClass);
+ return !subtargetHasRegister(*TRI, RegNo);
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -534,6 +600,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
(getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
return Match_InvalidOperand;
+ if ((TSFlags & SIInstrFlags::VOP3) &&
+ (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
+ getForcedEncodingSize() != 64)
+ return Match_PreferE32;
+
return Match_Success;
}
@@ -549,7 +620,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
default: break;
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction not supported on this GPU");
@@ -592,6 +663,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
return Error(ErrorLoc, "invalid operand for instruction");
}
+ case Match_PreferE32:
+ return Error(IDLoc, "internal error: instruction without _e64 suffix "
+ "should be encoded as e32");
}
llvm_unreachable("Implement any new match types added!");
}
@@ -640,7 +714,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
Isa.Stepping,
"AMD", "AMDGPU");
@@ -852,7 +926,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
while (true) {
@@ -882,6 +956,64 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
return false;
}
+bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSATextSection(getContext()));
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef KernelName = Parser.getTok().getString();
+
+ getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
+ ELF::STT_AMDGPU_HSA_KERNEL);
+ Lex();
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef GlobalName = Parser.getTok().getIdentifier();
+
+ getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
+ Lex();
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef GlobalName = Parser.getTok().getIdentifier();
+
+ getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
+ Lex();
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSADataGlobalAgentSection(getContext()));
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSADataGlobalProgramSection(getContext()));
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -894,6 +1026,55 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
+ if (IDVal == ".hsatext" || IDVal == ".text")
+ return ParseSectionDirectiveHSAText();
+
+ if (IDVal == ".amdgpu_hsa_kernel")
+ return ParseDirectiveAMDGPUHsaKernel();
+
+ if (IDVal == ".amdgpu_hsa_module_global")
+ return ParseDirectiveAMDGPUHsaModuleGlobal();
+
+ if (IDVal == ".amdgpu_hsa_program_global")
+ return ParseDirectiveAMDGPUHsaProgramGlobal();
+
+ if (IDVal == ".hsadata_global_agent")
+ return ParseSectionDirectiveHSADataGlobalAgent();
+
+ if (IDVal == ".hsadata_global_program")
+ return ParseSectionDirectiveHSADataGlobalProgram();
+
+ if (IDVal == ".hsarodata_readonly_agent")
+ return ParseSectionDirectiveHSARodataReadonlyAgent();
+
+ return true;
+}
+
+bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
+ unsigned RegNo) const {
+ if (isCI())
+ return true;
+
+ if (isSI()) {
+ // No flat_scr
+ switch (RegNo) {
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
+ // SI/CI have.
+ for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
+ R.isValid(); ++R) {
+ if (*R == RegNo)
+ return false;
+ }
+
return true;
}
@@ -943,13 +1124,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
- APInt IntVal32(32, IntVal);
- if (IntVal32.getSExtValue() != IntVal) {
+ if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
Error(S, "invalid immediate: only 32-bit values are legal");
return MatchOperand_ParseFail;
}
- IntVal = IntVal32.getSExtValue();
if (Negate)
IntVal *= -1;
Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
@@ -1002,7 +1181,7 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
Operands.push_back(AMDGPUOperand::CreateReg(
- RegNo, S, E, getContext().getRegisterInfo(),
+ RegNo, S, E, getContext().getRegisterInfo(), &getSTI(),
isForcedVOP3()));
if (HasModifiers || Modifiers) {
@@ -1571,6 +1750,23 @@ AMDGPUAsmParser::parseR128(OperandVector &Operands) {
}
//===----------------------------------------------------------------------===//
+// smrd
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isSMRDOffset() const {
+
+ // FIXME: Support 20-bit offsets on VI. We need to to pass subtarget
+ // information here.
+ return isImm() && isUInt<8>(getImm());
+}
+
+bool AMDGPUOperand::isSMRDLiteralOffset() const {
+ // 32-bit literals are only supported on CI and we only want to use them
+ // when the offset is > 8-bits.
+ return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
+}
+
+//===----------------------------------------------------------------------===//
// vop3
//===----------------------------------------------------------------------===//
@@ -1653,8 +1849,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
- unsigned i = 2;
+
+ unsigned i = 1;
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ if (Desc.getNumDefs() > 0) {
+ ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1);
+ }
std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
diff --git a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
index 2f5fdbe..88a090d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
@@ -8,6 +8,22 @@
//===----------------------------------------------------------------------===//
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
+// Remaining instructions:
+// S_CBRANCH_CDBGUSER
+// S_CBRANCH_CDBGSYS
+// S_CBRANCH_CDBGSYS_OR_USER
+// S_CBRANCH_CDBGSYS_AND_USER
+// DS_NOP
+// DS_GWS_SEMA_RELEASE_ALL
+// DS_WRAP_RTN_B32
+// DS_CNDXCHG32_RTN_B64
+// DS_WRITE_B96
+// DS_WRITE_B128
+// DS_CONDXCHG32_RTN_B128
+// DS_READ_B96
+// DS_READ_B128
+// BUFFER_LOAD_DWORDX3
+// BUFFER_STORE_DWORDX3
def isCIVI : Predicate <
@@ -23,6 +39,7 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
let SubtargetPredicate = isCIVI in {
+let SchedRW = [WriteDoubleAdd] in {
defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
VOP_F64_F64, ftrunc
>;
@@ -35,82 +52,218 @@ defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
VOP_F64_F64, frint
>;
+} // End SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteQuarterRate32] in {
defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
VOP_F32_F32
>;
defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
VOP_F32_F32
>;
+} // End SchedRW = [WriteQuarterRate32]
+
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
+
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
+ VOP_I32_I32_I32
+>;
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
+ VOP_I32_I32_I32
+>;
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
+ VOP_I32_I32_I32
+>;
+
+let isCommutable = 1 in {
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
+ VOP_I64_I32_I32_I64
+>;
+
+// XXX - Does this set VCC?
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
+ VOP_I64_I32_I32_I64
+>;
+} // End isCommutable = 1
+
+
+//===----------------------------------------------------------------------===//
+// DS Instructions
+//===----------------------------------------------------------------------===//
+defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
+
+// DS_CONDXCHG32_RTN_B64
+// DS_CONDXCHG32_RTN_B128
+
+//===----------------------------------------------------------------------===//
+// SMRD Instructions
+//===----------------------------------------------------------------------===//
+
+defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
+ "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF Instructions
+//===----------------------------------------------------------------------===//
+
+defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
+ "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
+>;
//===----------------------------------------------------------------------===//
// Flat Instructions
//===----------------------------------------------------------------------===//
-def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x8, "flat_load_ubyte", VGPR_32>;
-def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x9, "flat_load_sbyte", VGPR_32>;
-def FLAT_LOAD_USHORT : FLAT_Load_Helper <0xa, "flat_load_ushort", VGPR_32>;
-def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0xb, "flat_load_sshort", VGPR_32>;
-def FLAT_LOAD_DWORD : FLAT_Load_Helper <0xc, "flat_load_dword", VGPR_32>;
-def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0xd, "flat_load_dwordx2", VReg_64>;
-def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0xe, "flat_load_dwordx4", VReg_128>;
-def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0xf, "flat_load_dwordx3", VReg_96>;
-def FLAT_STORE_BYTE : FLAT_Store_Helper <0x18, "flat_store_byte", VGPR_32>;
-def FLAT_STORE_SHORT : FLAT_Store_Helper <0x1a, "flat_store_short", VGPR_32>;
-def FLAT_STORE_DWORD : FLAT_Store_Helper <0x1c, "flat_store_dword", VGPR_32>;
-def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
- 0x1d, "flat_store_dwordx2", VReg_64
+defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
+ flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
+>;
+defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
+ flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
+>;
+defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
+ flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
+>;
+defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
+ flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
+;
+defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
+ flat<0xc, 0x14>, "flat_load_dword", VGPR_32
+>;
+defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
+ flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
+>;
+defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
+ flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
+>;
+defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
+ flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
+>;
+defm FLAT_STORE_BYTE : FLAT_Store_Helper <
+ flat<0x18>, "flat_store_byte", VGPR_32
+>;
+defm FLAT_STORE_SHORT : FLAT_Store_Helper <
+ flat <0x1a>, "flat_store_short", VGPR_32
+>;
+defm FLAT_STORE_DWORD : FLAT_Store_Helper <
+ flat<0x1c>, "flat_store_dword", VGPR_32
+>;
+defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
+ flat<0x1d>, "flat_store_dwordx2", VReg_64
+>;
+defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
+ flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
>;
-def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
- 0x1e, "flat_store_dwordx4", VReg_128
+defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
+ flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
>;
-def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
- 0x1f, "flat_store_dwordx3", VReg_96
+defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
+ flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32
>;
-defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <0x30, "flat_atomic_swap", VGPR_32>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
- 0x31, "flat_atomic_cmpswap", VGPR_32, VReg_64
->;
-defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <0x32, "flat_atomic_add", VGPR_32>;
-defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <0x33, "flat_atomic_sub", VGPR_32>;
-defm FLAT_ATOMIC_RSUB : FLAT_ATOMIC <0x34, "flat_atomic_rsub", VGPR_32>;
-defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <0x35, "flat_atomic_smin", VGPR_32>;
-defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <0x36, "flat_atomic_umin", VGPR_32>;
-defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <0x37, "flat_atomic_smax", VGPR_32>;
-defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <0x38, "flat_atomic_umax", VGPR_32>;
-defm FLAT_ATOMIC_AND : FLAT_ATOMIC <0x39, "flat_atomic_and", VGPR_32>;
-defm FLAT_ATOMIC_OR : FLAT_ATOMIC <0x3a, "flat_atomic_or", VGPR_32>;
-defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <0x3b, "flat_atomic_xor", VGPR_32>;
-defm FLAT_ATOMIC_INC : FLAT_ATOMIC <0x3c, "flat_atomic_inc", VGPR_32>;
-defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <0x3d, "flat_atomic_dec", VGPR_32>;
-defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
- 0x3e, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+ flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
+ flat<0x32, 0x42>, "flat_atomic_add", VGPR_32
+>;
+defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
+ flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32
+>;
+defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
+ flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32
+>;
+defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
+ flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32
+>;
+defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
+ flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32
+>;
+defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
+ flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32
+>;
+defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
+ flat<0x39, 0x48>, "flat_atomic_and", VGPR_32
+>;
+defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
+ flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32
+>;
+defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
+ flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32
+>;
+defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
+ flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32
+>;
+defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
+ flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32
+>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
+ flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64
>;
-defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <0x3f, "flat_atomic_fmin", VGPR_32>;
-defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <0x40, "flat_atomic_fmax", VGPR_32>;
-defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <0x50, "flat_atomic_swap_x2", VReg_64>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
- 0x51, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
->;
-defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <0x52, "flat_atomic_add_x2", VReg_64>;
-defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <0x53, "flat_atomic_sub_x2", VReg_64>;
-defm FLAT_ATOMIC_RSUB_X2 : FLAT_ATOMIC <0x54, "flat_atomic_rsub_x2", VReg_64>;
-defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <0x55, "flat_atomic_smin_x2", VReg_64>;
-defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <0x56, "flat_atomic_umin_x2", VReg_64>;
-defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <0x57, "flat_atomic_smax_x2", VReg_64>;
-defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <0x58, "flat_atomic_umax_x2", VReg_64>;
-defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <0x59, "flat_atomic_and_x2", VReg_64>;
-defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <0x5a, "flat_atomic_or_x2", VReg_64>;
-defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <0x5b, "flat_atomic_xor_x2", VReg_64>;
-defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <0x5c, "flat_atomic_inc_x2", VReg_64>;
-defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <0x5d, "flat_atomic_dec_x2", VReg_64>;
-defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
- 0x5e, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+ flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
+ flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
+ flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
+ flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
+ flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
+ flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64
+>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
+ flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64
+>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
+ flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64
+>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
+ flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64
+>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
+ flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64
+>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
+ flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64
+>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
+ flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64
>;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <0x5f, "flat_atomic_fmin_x2", VReg_64>;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <0x60, "flat_atomic_fmax_x2", VReg_64>;
} // End SubtargetPredicate = isCIVI
+// CI Only flat instructions
+
+let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst in {
+
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
+ flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
+ flat<0x3f>, "flat_atomic_fmin", VGPR_32
+>;
+defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
+ flat<0x40>, "flat_atomic_fmax", VGPR_32
+>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
+ flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
+ flat<0x5f>, "flat_atomic_fmin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
+ flat<0x60>, "flat_atomic_fmax_x2", VReg_64
+>;
+
+} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -147,3 +300,80 @@ def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
} // End HasFlatAddressSpace predicate
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+ (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+ (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+ (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
+
+//===----------------------------------------------------------------------===//
+// Patterns to generate flat for global
+//===----------------------------------------------------------------------===//
+
+def useFlatForGlobal : Predicate <
+ "Subtarget->useFlatForGlobal() || "
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
+
+let Predicates = [useFlatForGlobal] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+// Patterns for global loads with no offset
+class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr)),
+ (inst $addr, 0, 0, 0)
+>;
+
+def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
+
+class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (node vt:$data, i64:$addr),
+ (inst $data, $addr, 0, 0, 0)
+>;
+
+def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
+
+class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr, vt:$data)),
+ (inst $addr, $data, 0, 0)
+>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+
+} // End Predicates = [useFlatForGlobal]
diff --git a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index ba4df82..a6c3785 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -82,6 +82,10 @@ def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
+def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> {
+ let eop = 0; // This bit is not used on Cayman.
+}
+
class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 7adcd46..779a14e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -40,6 +40,15 @@ class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
: EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
"MEM_RAT "#name, pattern>;
+class CF_MEM_RAT_STORE_TYPED<bits<1> has_eop>
+ : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
+ i32imm:$rat_id, InstFlag:$eop),
+ "STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr"
+ #!if(has_eop, ", $eop", ""),
+ [(int_r600_rat_store_typed R600_Reg128:$rw_gpr,
+ R600_Reg128:$index_gpr,
+ (i32 imm:$rat_id))]>;
+
def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
"MSKOR $rw_gpr.XW, $index_gpr",
@@ -105,6 +114,8 @@ def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
+def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>;
+
} // End usesCustomInserter = 1
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index e811d5c..a187de8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -283,8 +284,13 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
O << "4.0";
else if (Imm == DoubleToBits(-4.0))
O << "-4.0";
- else
- llvm_unreachable("64-bit literal constants not supported");
+ else {
+ assert(isUInt<32>(Imm));
+
+ // In rare situations, we will have a 32-bit literal in a 64-bit
+ // operand. This is technically allowed for the encoding of s_mov_b64.
+ O << formatHex(static_cast<uint64_t>(Imm));
+ }
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -592,11 +598,11 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
} else {
unsigned Stream = (SImm16 >> 8) & 0x3;
if (Op == 1)
- O << "cut";
+ O << "cut";
else if (Op == 2)
- O << "emit";
+ O << "emit";
else if (Op == 3)
- O << "emit-cut";
+ O << "emit-cut";
O << " stream " << Stream;
}
O << "), [m0] ";
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 14fb511..90541d8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -13,9 +13,7 @@
#ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
#define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 4434d9b..60e8c8f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -99,14 +99,22 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
case AMDGPU::fixup_si_rodata: {
uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- *Dst = Value;
- break;
- }
-
- case AMDGPU::fixup_si_end_of_text: {
- uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- // The value points to the last instruction in the text section, so we
- // need to add 4 bytes to get to the start of the constants.
+ // We emit constant data at the end of the text section and generate its
+ // address using the following code sequence:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol
+ // s_addc_u32 s1, s1, 0
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // the fixup replaces $symbol with a literal constant, which is a
+ // pc-relative offset from the encoding of the $symbol operand to the
+ // constant data.
+ //
+ // What we want here is an offset from the start of the s_add_u32
+ // instruction to the constant data, but since the encoding of $symbol
+ // starts 4 bytes after the start of the add instruction, we end up
+ // with an offset that is 4 bytes too small. This requires us to
+ // add 4 to the fixup value before applying it.
*Dst = Value + 4;
break;
}
@@ -136,8 +144,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_si_rodata", 0, 32, 0 },
- { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+ { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
};
if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
new file mode 100644
index 0000000..9ff9fe7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -0,0 +1,26 @@
+//===-------- AMDGPUELFStreamer.cpp - ELF Object Output -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUELFStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+
+using namespace llvm;
+
+void AMDGPUELFStreamer::InitSections(bool NoExecStack) {
+ // Start with the .hsatext section by default.
+ SwitchSection(AMDGPU::getHSATextSection(getContext()));
+}
+
+MCELFStreamer *llvm::createAMDGPUELFStreamer(MCContext &Context,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new AMDGPUELFStreamer(Context, MAB, OS, Emitter);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
new file mode 100644
index 0000000..488d7e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -0,0 +1,40 @@
+//===-------- AMDGPUELFStreamer.h - ELF Object Output ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer which allows us to insert some hooks before
+// emitting data into an actual object file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+
+class AMDGPUELFStreamer : public MCELFStreamer {
+public:
+ AMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, MAB, OS, Emitter) { }
+
+ virtual void InitSections(bool NoExecStac) override;
+};
+
+MCELFStreamer *createAMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
+} // namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index 01021d6..59a9178 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -21,9 +21,6 @@ enum Fixups {
/// fixup for global addresses with constant initializers
fixup_si_rodata,
- /// fixup for offset from instruction to end of text section
- fixup_si_end_of_text,
-
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 028a86d..68b1d1a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -22,13 +22,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
InlineAsmEnd = ";#ASMEND";
//===--- Data Emission Directives -------------------------------------===//
- ZeroDirective = ".zero";
- AsciiDirective = ".ascii\t";
- AscizDirective = ".asciz\t";
- Data8bitsDirective = ".byte\t";
- Data16bitsDirective = ".short\t";
- Data32bitsDirective = ".long\t";
- Data64bitsDirective = ".quad\t";
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
@@ -41,3 +34,10 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
//===--- Dwarf Emission Directives -----------------------------------===//
SupportsDebugInformation = true;
}
+
+bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
+ return SectionName == ".hsatext" || SectionName == ".hsadata_global_agent" ||
+ SectionName == ".hsadata_global_program" ||
+ SectionName == ".hsarodata_readonly_agent" ||
+ MCAsmInfo::shouldOmitSectionDirective(SectionName);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
index a5bac51..a546961 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -21,12 +21,13 @@ class Triple;
// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
// you will need to make sure your new class sets PrivateGlobalPrefix to
-// a prefix that won't appeary in a fuction name. The default value
+// a prefix that won't appear in a function name. The default value
// for PrivateGlobalPrefix is 'L', so it will consider any function starting
// with 'L' as a local symbol.
class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
explicit AMDGPUMCAsmInfo(const Triple &TT);
+ bool shouldOmitSectionDirective(StringRef SectionName) const override;
};
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index c709741..f704094 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUELFStreamer.h"
#include "AMDGPUMCAsmInfo.h"
#include "AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
@@ -85,6 +86,15 @@ static MCTargetStreamer * createAMDGPUObjectTargetStreamer(
return new AMDGPUTargetELFStreamer(S);
}
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ if (T.getOS() == Triple::AMDHSA)
+ return createAMDGPUELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+
+ return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+}
+
extern "C" void LLVMInitializeAMDGPUTargetMC() {
for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
@@ -95,6 +105,7 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+ TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
}
// R600 specific registration
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 09e6cb1..b91134d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AMDGPUTargetStreamer.h"
#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -220,6 +221,26 @@ AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
}
+void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
+ unsigned Type) {
+ switch (Type) {
+ default: llvm_unreachable("Invalid AMDGPU symbol type");
+ case ELF::STT_AMDGPU_HSA_KERNEL:
+ OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
+ break;
+ }
+}
+
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
+ StringRef GlobalName) {
+ OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
+}
+
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
+ StringRef GlobalName) {
+ OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
@@ -291,7 +312,35 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
MCStreamer &OS = getStreamer();
OS.PushSection();
- OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+ // The MCObjectFileInfo that is available to the assembler is a generic
+ // implementation and not AMDGPUHSATargetObjectFile, so we can't use
+ // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
+ OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
OS.PopSection();
}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
+ unsigned Type) {
+ MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ getStreamer().getContext().getOrCreateSymbol(SymbolName));
+ Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
+}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
+ StringRef GlobalName) {
+
+ MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ getStreamer().getContext().getOrCreateSymbol(GlobalName));
+ Symbol->setType(ELF::STT_OBJECT);
+ Symbol->setBinding(ELF::STB_LOCAL);
+}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
+ StringRef GlobalName) {
+
+ MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ getStreamer().getContext().getOrCreateSymbol(GlobalName));
+ Symbol->setType(ELF::STT_OBJECT);
+ Symbol->setBinding(ELF::STB_GLOBAL);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index d37677c..83bb728 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+
#include "AMDKernelCodeT.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -27,6 +30,12 @@ public:
StringRef ArchName) = 0;
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
+
+ virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
+
+ virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
+
+ virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
};
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@@ -41,6 +50,12 @@ public:
StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
+ void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+ void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
+
+ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
};
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
@@ -72,6 +87,12 @@ public:
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+ void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+ void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
+
+ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
};
}
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index e683498..3c1142d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -37,7 +37,6 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
const MCRegisterInfo &MRI;
public:
-
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
: MCII(mcii), MRI(mri) { }
@@ -50,8 +49,8 @@ public:
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
-private:
+private:
void EmitByte(unsigned int byte, raw_ostream &OS) const;
void Emit(uint32_t value, raw_ostream &OS) const;
@@ -59,7 +58,6 @@ private:
unsigned getHWRegChan(unsigned reg) const;
unsigned getHWReg(unsigned regNo) const;
-
};
} // End anonymous namespace
@@ -83,7 +81,7 @@ enum FCInstr {
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- MCContext &Ctx) {
+ MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 65a0eeb..9eb3dad 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -36,7 +36,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
void operator=(const SIMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
- MCContext &Ctx;
/// \brief Can this operand also contain immediate values?
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -47,7 +46,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
MCContext &ctx)
- : MCII(mcii), MRI(mri), Ctx(ctx) { }
+ : MCII(mcii), MRI(mri) { }
~SIMCCodeEmitter() override {}
@@ -250,17 +249,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
if (MO.isExpr()) {
const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
- MCFixupKind Kind;
- const MCSymbol *Sym =
- Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-
- if (&Expr->getSymbol() == Sym) {
- // Add the offset to the beginning of the constant values.
- Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
- } else {
- // This is used for constant data stored in .rodata.
- Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
- }
+ MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/Processors.td b/contrib/llvm/lib/Target/AMDGPU/Processors.td
index d9a0723..a1584a2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Processors.td
+++ b/contrib/llvm/lib/Target/AMDGPU/Processors.td
@@ -142,3 +142,7 @@ def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
def : ProcessorModel<"fiji", SIQuarterSpeedModel,
[FeatureVolcanicIslands, FeatureISAVersion8_0_1]
>;
+
+def : ProcessorModel<"stoney", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index c8f37f6..bd80bb2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -405,8 +405,8 @@ private:
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
- getLiteral(BI, Literals);
- ClauseContent.push_back(BI);
+ getLiteral(&*BI, Literals);
+ ClauseContent.push_back(&*BI);
}
I = BI;
DeleteMI->eraseFromParent();
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 4e4d554..124a9c6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -190,6 +190,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM,
setSchedulingPreference(Sched::Source);
}
+static inline bool isEOP(MachineBasicBlock::iterator I) {
+ return std::next(I)->getOpcode() == AMDGPU::RETURN;
+}
+
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineFunction * MF = BB->getParent();
@@ -276,12 +280,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
- unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
-
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
- .addImm(EOP); // Set End of program bit
+ .addImm(isEOP(I)); // Set End of program bit
+ break;
+ }
+ case AMDGPU::RAT_STORE_TYPED_eg: {
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2))
+ .addImm(isEOP(I)); // Set End of program bit
break;
}
@@ -539,7 +549,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
}
}
}
- bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+ bool EOP = isEOP(I);
if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
@@ -946,6 +956,8 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
SDLoc DL(Op);
+
+ // TODO: Should this propagate fast-math-flags?
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
DAG.getNode(ISD::FADD, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Arg,
@@ -1936,6 +1948,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
Arg->getOperand(0).getOperand(Element));
}
}
+ break;
}
case ISD::SELECT_CC: {
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 855fa9f..8b6eea1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -922,7 +922,7 @@ bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const{
+ BranchProbability Probability) const{
return true;
}
@@ -933,14 +933,14 @@ R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB,
unsigned NumFCycles,
unsigned ExtraFCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return true;
}
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
- const BranchProbability &Probability)
+ BranchProbability Probability)
const {
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index dee4c2b..e7251c3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -174,18 +174,18 @@ namespace llvm {
bool
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override ;
+ BranchProbability Probability) const override ;
bool
isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles, unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index 7beed09..33ef6a4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1655,7 +1655,7 @@ def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
// ISel Patterns
//===----------------------------------------------------------------------===//
-// CND*_INT Pattterns for f32 True / False values
+// CND*_INT Patterns for f32 True / False values
class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
(selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 0c06ccc..5efb3b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -318,7 +318,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
MRI = &(Fn.getRegInfo());
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
- MachineBasicBlock *MB = MBB;
+ MachineBasicBlock *MB = &*MBB;
PreviousRegSeq.clear();
PreviousRegSeqByReg.clear();
PreviousRegSeqByUndefCount.clear();
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index deee5bc..2126961 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -81,11 +81,11 @@ private:
int LastDstChan = -1;
do {
bool isTrans = false;
- int BISlot = getSlot(BI);
+ int BISlot = getSlot(&*BI);
if (LastDstChan >= BISlot)
isTrans = true;
LastDstChan = BISlot;
- if (TII->isPredicated(BI))
+ if (TII->isPredicated(&*BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
@@ -95,7 +95,7 @@ private:
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
- if (isTrans || TII->isTransOnly(BI)) {
+ if (isTrans || TII->isTransOnly(&*BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
@@ -149,7 +149,7 @@ private:
public:
// Ctor.
R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
- : VLIWPacketizerList(MF, MLI, true),
+ : VLIWPacketizerList(MF, MLI, nullptr),
TII(static_cast<const R600InstrInfo *>(
MF.getSubtarget().getInstrInfo())),
TRI(TII->getRegisterInfo()) {
@@ -162,14 +162,14 @@ public:
}
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) override {
+ bool ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock *MBB) override {
return false;
}
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI) override {
+ bool isSoloInstruction(const MachineInstr *MI) override {
if (TII->isVector(*MI))
return true;
if (!TII->isALUInstr(MI->getOpcode()))
@@ -375,7 +375,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
// instruction stream until we find the nearest boundary.
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingCount) {
- if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
+ if (TII->isSchedulingBoundary(&*std::prev(I), &*MBB, Fn))
break;
}
I = MBB->begin();
@@ -392,7 +392,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
continue;
}
- Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+ Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
RegionEnd = I;
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
index 9713e60..4f8a129 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -35,7 +35,7 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
- const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
+ const TargetRegisterClass *getCFGStructurizerRegClass(MVT VT) const;
const RegClassWeight &
getRegClassWeight(const TargetRegisterClass *RC) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index ccfbf1b..fa4d24a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -312,11 +312,10 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
Preds.push_back(*PI);
}
- BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
- LI, false);
+ BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
}
- CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+ CallInst::Create(EndCf, popSaved(), "", &*BB->getFirstInsertionPt());
}
/// \brief Annotate the control flow with intrinsics so the backend can
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index 4c32639..7f79dd3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -37,7 +37,8 @@ enum {
MIMG = 1 << 18,
FLAT = 1 << 19,
WQM = 1 << 20,
- VGPRSpill = 1 << 21
+ VGPRSpill = 1 << 21,
+ VOPAsmPrefer32Bit = 1 << 22
};
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
index 5fe8d19..636750d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
@@ -16,15 +16,9 @@
#include "AMDGPU.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 23502b4..96e37c5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -82,22 +82,10 @@ using namespace llvm;
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
-
-private:
+public:
static char ID;
- const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const;
- const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const;
- bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI) const;
-public:
- SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
+ SIFixSGPRCopies() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -105,14 +93,23 @@ public:
return "SI Fix SGPR copies";
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
+INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
+ "SI Fix SGPR copies", false, false)
+
char SIFixSGPRCopies::ID = 0;
-FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
- return new SIFixSGPRCopies(tm);
+char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
+
+FunctionPass *llvm::createSIFixSGPRCopiesPass() {
+ return new SIFixSGPRCopies();
}
static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
@@ -128,77 +125,115 @@ static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
return false;
}
-/// This functions walks the use list of Reg until it finds an Instruction
-/// that isn't a COPY returns the register class of that instruction.
-/// \return The register defined by the first non-COPY instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const {
-
- const TargetRegisterClass *RC
- = TargetRegisterInfo::isVirtualRegister(Reg) ?
- MRI.getRegClass(Reg) :
- TRI->getPhysRegClass(Reg);
-
- RC = TRI->getSubRegClass(RC, SubReg);
- for (MachineRegisterInfo::use_instr_iterator
- I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
- switch (I->getOpcode()) {
- case AMDGPU::COPY:
- RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
- I->getOperand(0).getReg(),
- I->getOperand(0).getSubReg()));
- break;
- }
- }
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getCopyRegClasses(const MachineInstr &Copy,
+ const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+
+ const TargetRegisterClass *SrcRC =
+ TargetRegisterInfo::isVirtualRegister(SrcReg) ?
+ MRI.getRegClass(SrcReg) :
+ TRI.getPhysRegClass(SrcReg);
- return RC;
+ // We don't really care about the subregister here.
+ // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
+
+ const TargetRegisterClass *DstRC =
+ TargetRegisterInfo::isVirtualRegister(DstReg) ?
+ MRI.getRegClass(DstReg) :
+ TRI.getPhysRegClass(DstReg);
+
+ return std::make_pair(SrcRC, DstRC);
}
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const {
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
- return TRI->getSubRegClass(RC, SubReg);
- }
- MachineInstr *Def = MRI.getVRegDef(Reg);
- if (Def->getOpcode() != AMDGPU::COPY) {
- return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
- }
+static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const SIRegisterInfo &TRI) {
+ return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
+}
- return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
- Def->getOperand(1).getSubReg());
+static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const SIRegisterInfo &TRI) {
+ return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
}
-bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI) const {
+// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
+//
+// SGPRx = ...
+// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+// VGPRz = COPY SGPRy
+//
+// ==>
+//
+// VGPRx = COPY SGPRx
+// VGPRz = REG_SEQUENCE VGPRx, sub0
+//
+// This exposes immediate folding opportunities when materializing 64-bit
+// immediates.
+static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI) {
+ assert(MI.isRegSequence());
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
+ return false;
- unsigned DstReg = Copy.getOperand(0).getReg();
- unsigned SrcReg = Copy.getOperand(1).getReg();
- unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+ if (!MRI.hasOneUse(DstReg))
+ return false;
- if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // If the destination register is a physical register there isn't really
- // much we can do to fix this.
+ MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
+ if (!CopyUse.isCopy())
return false;
- }
- const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
- const TargetRegisterClass *SrcRC;
+ if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
+ return false;
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
+ // TODO: Could have multiple extracts?
+ unsigned SubReg = CopyUse.getOperand(1).getSubReg();
+ if (SubReg != AMDGPU::NoSubRegister)
return false;
- SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
- return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+ MRI.setRegClass(DstReg, DstRC);
+
+ // SGPRx = ...
+ // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+ // VGPRz = COPY SGPRy
+
+ // =>
+ // VGPRx = COPY SGPRx
+ // VGPRz = REG_SEQUENCE VGPRx, sub0
+
+ MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+
+ for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
+ unsigned SrcReg = MI.getOperand(I).getReg();
+ unsigned SrcSubReg = MI.getOperand(I).getReg();
+
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ assert(TRI->isSGPRClass(SrcRC) &&
+ "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
+
+ SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
+ const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
+
+ unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
+
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
+ .addOperand(MI.getOperand(I));
+
+ MI.getOperand(I).setReg(TmpReg);
+ }
+
+ CopyUse.eraseFromParent();
+ return true;
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
@@ -207,40 +242,38 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ SmallVector<MachineInstr *, 16> Worklist;
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
+ I != E; ++I) {
MachineInstr &MI = *I;
- if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
- DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
- DEBUG(MI.print(dbgs()));
- TII->moveToVALU(MI);
-
- }
switch (MI.getOpcode()) {
- default: continue;
- case AMDGPU::PHI: {
- DEBUG(dbgs() << "Fixing PHI: " << MI);
-
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- const MachineOperand &Op = MI.getOperand(i);
- unsigned Reg = Op.getReg();
- const TargetRegisterClass *RC
- = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
+ default:
+ continue;
+ case AMDGPU::COPY: {
+ // If the destination register is a physical register there isn't really
+ // much we can do to fix this.
+ if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
+ continue;
- MRI.constrainRegClass(Op.getReg(), RC);
- }
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
- MI.getOperand(0).getSubReg());
- if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
- MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
+ if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
+ DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
+ TII->moveToVALU(MI);
}
+ break;
+ }
+ case AMDGPU::PHI: {
+ DEBUG(dbgs() << "Fixing PHI: " << MI);
+ unsigned Reg = MI.getOperand(0).getReg();
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;
@@ -310,8 +343,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
case AMDGPU::REG_SEQUENCE: {
if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
- !hasVGPROperands(MI, TRI))
+ !hasVGPROperands(MI, TRI)) {
+ foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
continue;
+ }
DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
index 0c54446..8bda283 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
@@ -7,9 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file
-/// SALU instructions ignore control flow, so we need to modify the live ranges
-/// of the registers they define in some cases.
+/// \file SALU instructions ignore the execution mask, so we need to modify the
+/// live ranges of the registers they define in some cases.
///
/// The main case we need to handle is when a def is used in one side of a
/// branch and not another. For example:
@@ -42,13 +41,15 @@
/// ENDIF
/// %use
///
-/// Adding this use will make the def live thoughout the IF branch, which is
+/// Adding this use will make the def live throughout the IF branch, which is
/// what we want.
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -79,9 +80,13 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LiveIntervals>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+
AU.addRequired<MachinePostDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
AU.setPreservesCFG();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -90,7 +95,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
@@ -108,40 +113,48 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
- MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
- std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
+ bool MadeChange = false;
+
+ MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
+ SmallVector<unsigned, 16> SGPRLiveRanges;
+
+ LiveVariables *LV = &getAnalysis<LiveVariables>();
+ MachineBasicBlock *Entry = &MF.front();
- // First pass, collect all live intervals for SGPRs
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
+ // Use a depth first order so that in SSA, we encounter all defs before
+ // uses. Once the defs of the block have been found, attempt to insert
+ // SGPR_USE instructions in successor blocks if required.
+ for (MachineBasicBlock *MBB : depth_first(Entry)) {
+ for (const MachineInstr &MI : *MBB) {
for (const MachineOperand &MO : MI.defs()) {
- if (MO.isImplicit())
- continue;
+ // We should never see a live out def of a physical register, so we also
+ // do not need to worry about implicit_defs().
unsigned Def = MO.getReg();
if (TargetRegisterInfo::isVirtualRegister(Def)) {
- if (TRI->isSGPRClass(MRI.getRegClass(Def)))
- SGPRLiveRanges.push_back(
- std::make_pair(Def, &LIS->getInterval(Def)));
- } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
- SGPRLiveRanges.push_back(
- std::make_pair(Def, &LIS->getRegUnit(Def)));
+ if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
+ // Only consider defs that are live outs. We don't care about def /
+ // use within the same block.
+
+ // LiveVariables does not consider registers that are only used in a
+ // phi in a sucessor block as live out, unlike LiveIntervals.
+ //
+ // This is OK because SIFixSGPRCopies replaced any SGPR phis with
+ // VGPRs.
+ if (LV->isLiveOut(Def, *MBB))
+ SGPRLiveRanges.push_back(Def);
+ }
}
}
}
- }
- // Second pass fix the intervals
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
- MachineBasicBlock &MBB = *BI;
- if (MBB.succ_size() < 2)
+ if (MBB->succ_size() < 2)
continue;
- // We have structured control flow, so number of succesors should be two.
- assert(MBB.succ_size() == 2);
- MachineBasicBlock *SuccA = *MBB.succ_begin();
- MachineBasicBlock *SuccB = *(++MBB.succ_begin());
+ // We have structured control flow, so the number of successors should be
+ // two.
+ assert(MBB->succ_size() == 2);
+ MachineBasicBlock *SuccA = *MBB->succ_begin();
+ MachineBasicBlock *SuccB = *(++MBB->succ_begin());
MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
if (!NCD)
@@ -156,37 +169,51 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
*(++NCD->succ_begin()));
}
- assert(SuccA && SuccB);
- for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
- unsigned Reg = RegLR.first;
- LiveRange *LR = RegLR.second;
-
- // FIXME: We could be smarter here. If the register is Live-In to
- // one block, but the other doesn't have any SGPR defs, then there
- // won't be a conflict. Also, if the branch decision is based on
- // a value in an SGPR, then there will be no conflict.
- bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
- bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
-
- if ((!LiveInToA && !LiveInToB) ||
- (LiveInToA && LiveInToB))
+
+ for (unsigned Reg : SGPRLiveRanges) {
+ // FIXME: We could be smarter here. If the register is Live-In to one
+ // block, but the other doesn't have any SGPR defs, then there won't be a
+ // conflict. Also, if the branch condition is uniform then there will be
+ // no conflict.
+ bool LiveInToA = LV->isLiveIn(Reg, *SuccA);
+ bool LiveInToB = LV->isLiveIn(Reg, *SuccB);
+
+ if (!LiveInToA && !LiveInToB) {
+ DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
+ << " is live into neither successor\n");
continue;
+ }
+
+ if (LiveInToA && LiveInToB) {
+ DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
+ << " is live into both successors\n");
+ continue;
+ }
// This interval is live in to one successor, but not the other, so
// we need to update its range so it is live in to both.
- DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR <<
- " BB#" << SuccA->getNumber() << ", BB#" <<
- SuccB->getNumber() <<
- " with NCD = " << NCD->getNumber() << '\n');
+ DEBUG(dbgs() << "Possible SGPR conflict detected for "
+ << PrintReg(Reg, TRI, 0)
+ << " BB#" << SuccA->getNumber()
+ << ", BB#" << SuccB->getNumber()
+ << " with NCD = BB#" << NCD->getNumber() << '\n');
+
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Not expecting to extend live range of physreg");
// FIXME: Need to figure out how to update LiveRange here so this pass
// will be able to preserve LiveInterval analysis.
- BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
- TII->get(AMDGPU::SGPR_USE))
- .addReg(Reg, RegState::Implicit);
- DEBUG(NCD->getFirstNonPHI()->dump());
+ MachineInstr *NCDSGPRUse =
+ BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
+ TII->get(AMDGPU::SGPR_USE))
+ .addReg(Reg, RegState::Implicit);
+
+ MadeChange = true;
+ LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
+
+ DEBUG(NCDSGPRUse->dump());
}
}
- return false;
+ return MadeChange;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c288725..02a3930 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -45,6 +45,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -164,8 +165,8 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
- unsigned CommuteIdx0;
- unsigned CommuteIdx1;
+ unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
+ unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
if (CanCommute) {
@@ -175,7 +176,16 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
OpNo = CommuteIdx0;
}
- if (!CanCommute || !TII->commuteInstruction(MI))
+ // One of operands might be an Imm operand, and OpNo may refer to it after
+ // the call of commuteInstruction() below. Such situations are avoided
+ // here explicitly as OpNo must be a register operand to be a candidate
+ // for memory folding.
+ if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
+ !MI->getOperand(CommuteIdx1).isReg()))
+ return false;
+
+ if (!CanCommute ||
+ !TII->commuteInstruction(MI, false, CommuteIdx0, CommuteIdx1))
return false;
if (!TII->isOperandLegal(MI, OpNo, OpToFold))
@@ -186,6 +196,110 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
return true;
}
+static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
+ unsigned UseOpIdx,
+ std::vector<FoldCandidate> &FoldList,
+ SmallVectorImpl<MachineInstr *> &CopiesToReplace,
+ const SIInstrInfo *TII, const SIRegisterInfo &TRI,
+ MachineRegisterInfo &MRI) {
+ const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
+
+ // FIXME: Fold operands with subregs.
+ if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+ UseOp.isImplicit())) {
+ return;
+ }
+
+ bool FoldingImm = OpToFold.isImm();
+ APInt Imm;
+
+ if (FoldingImm) {
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getPhysRegClass(UseReg);
+
+ Imm = APInt(64, OpToFold.getImm());
+
+ const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
+ const TargetRegisterClass *FoldRC =
+ TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+ // Split 64-bit constants into 32-bits for folding.
+ if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
+ if (UseRC->getSize() != 8)
+ return;
+
+ if (UseOp.getSubReg() == AMDGPU::sub0) {
+ Imm = Imm.getLoBits(32);
+ } else {
+ assert(UseOp.getSubReg() == AMDGPU::sub1);
+ Imm = Imm.getHiBits(32);
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+ if (UseMI->getOpcode() == AMDGPU::COPY) {
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getPhysRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
+ if (MovOp == AMDGPU::COPY)
+ return;
+
+ UseMI->setDesc(TII->get(MovOp));
+ CopiesToReplace.push_back(UseMI);
+ }
+ }
+
+ // Special case for REG_SEQUENCE: We can't fold literals into
+ // REG_SEQUENCE instructions, so we have to fold them into the
+ // uses of REG_SEQUENCE.
+ if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+ unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+ for (MachineRegisterInfo::use_iterator
+ RSUse = MRI.use_begin(RegSeqDstReg),
+ RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+
+ MachineInstr *RSUseMI = RSUse->getParent();
+ if (RSUse->getSubReg() != RegSeqDstSubReg)
+ continue;
+
+ foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+ CopiesToReplace, TII, TRI, MRI);
+ }
+ return;
+ }
+
+ const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+ // Don't fold into target independent nodes. Target independent opcodes
+ // don't have defined register classes.
+ if (UseDesc.isVariadic() ||
+ UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+ return;
+
+ if (FoldingImm) {
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+ return;
+ }
+
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
+ // to enable more folding opportunites. The shrink operands pass
+ // already does this.
+ return;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -226,88 +340,36 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
OpToFold.getSubReg()))
continue;
+
+ // We need mutate the operands of new mov instructions to add implicit
+ // uses of EXEC, but adding them invalidates the use_iterator, so defer
+ // this.
+ SmallVector<MachineInstr *, 4> CopiesToReplace;
+
std::vector<FoldCandidate> FoldList;
for (MachineRegisterInfo::use_iterator
Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
Use != E; ++Use) {
MachineInstr *UseMI = Use->getParent();
- const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
- // FIXME: Fold operands with subregs.
- if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
- UseOp.isImplicit())) {
- continue;
- }
-
- APInt Imm;
-
- if (FoldingImm) {
- unsigned UseReg = UseOp.getReg();
- const TargetRegisterClass *UseRC
- = TargetRegisterInfo::isVirtualRegister(UseReg) ?
- MRI.getRegClass(UseReg) :
- TRI.getPhysRegClass(UseReg);
-
- Imm = APInt(64, OpToFold.getImm());
-
- // Split 64-bit constants into 32-bits for folding.
- if (UseOp.getSubReg()) {
- if (UseRC->getSize() != 8)
- continue;
-
- if (UseOp.getSubReg() == AMDGPU::sub0) {
- Imm = Imm.getLoBits(32);
- } else {
- assert(UseOp.getSubReg() == AMDGPU::sub1);
- Imm = Imm.getHiBits(32);
- }
- }
-
- // In order to fold immediates into copies, we need to change the
- // copy to a MOV.
- if (UseMI->getOpcode() == AMDGPU::COPY) {
- unsigned DestReg = UseMI->getOperand(0).getReg();
- const TargetRegisterClass *DestRC
- = TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI.getRegClass(DestReg) :
- TRI.getPhysRegClass(DestReg);
-
- unsigned MovOp = TII->getMovOpcode(DestRC);
- if (MovOp == AMDGPU::COPY)
- continue;
-
- UseMI->setDesc(TII->get(MovOp));
- }
- }
-
- const MCInstrDesc &UseDesc = UseMI->getDesc();
-
- // Don't fold into target independent nodes. Target independent opcodes
- // don't have defined register classes.
- if (UseDesc.isVariadic() ||
- UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
- continue;
-
- if (FoldingImm) {
- MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
- continue;
- }
-
- tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
-
- // FIXME: We could try to change the instruction from 64-bit to 32-bit
- // to enable more folding opportunites. The shrink operands pass
- // already does this.
+ foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
+ CopiesToReplace, TII, TRI, MRI);
}
+ // Make sure we add EXEC uses to any new v_mov instructions created.
+ for (MachineInstr *Copy : CopiesToReplace)
+ Copy->addImplicitDefUseOperands(MF);
+
for (FoldCandidate &Fold : FoldList) {
if (updateOperand(Fold, TRI)) {
// Clear kill flags.
if (!Fold.isImm()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
- Fold.OpToFold->setIsKill(false);
+ // FIXME: Probably shouldn't bother trying to fold if not an
+ // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
+ // copies.
+ MRI.clearKillFlags(Fold.OpToFold->getReg());
}
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
new file mode 100644
index 0000000..6b3c81c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -0,0 +1,243 @@
+//===----------------------- SIFrameLowering.cpp --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#include "SIFrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+
+using namespace llvm;
+
+
+static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
+ const MachineFrameInfo *FrameInfo) {
+ if (!FuncInfo->hasSpilledSGPRs())
+ return false;
+
+ if (FuncInfo->hasSpilledVGPRs())
+ return false;
+
+ for (int I = FrameInfo->getObjectIndexBegin(),
+ E = FrameInfo->getObjectIndexEnd(); I != E; ++I) {
+ if (!FrameInfo->isSpillSlotObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
+static ArrayRef<MCPhysReg> getAllSGPR128() {
+ return makeArrayRef(AMDGPU::SReg_128RegClass.begin(),
+ AMDGPU::SReg_128RegClass.getNumRegs());
+}
+
+static ArrayRef<MCPhysReg> getAllSGPRs() {
+ return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
+ AMDGPU::SGPR_32RegClass.getNumRegs());
+}
+
+void SIFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ if (!MF.getFrameInfo()->hasStackObjects())
+ return;
+
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // If we only have SGPR spills, we won't actually be using scratch memory
+ // since these spill to VGPRs.
+ //
+ // FIXME: We should be cleaning up these unused SGPR spill frame indices
+ // somewhere.
+ if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
+ return;
+
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+ // We need to insert initialization of the scratch resource descriptor.
+ unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
+ assert(ScratchRsrcReg != AMDGPU::NoRegister);
+
+ unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
+
+ unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+ unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
+ if (ST.isAmdHsaOS()) {
+ PreloadedPrivateBufferReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ }
+
+ // If we reserved the original input registers, we don't need to copy to the
+ // reserved registers.
+ if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
+ // We should always reserve these 5 registers at the same time.
+ assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg &&
+ "scratch wave offset and private segment buffer inconsistent");
+ return;
+ }
+
+
+ // We added live-ins during argument lowering, but since they were not used
+ // they were deleted. We're adding the uses now, so add them back.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+ MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
+
+ if (ST.isAmdHsaOS()) {
+ MRI.addLiveIn(PreloadedPrivateBufferReg);
+ MBB.addLiveIn(PreloadedPrivateBufferReg);
+ }
+
+ // We reserved the last registers for this. Shift it down to the end of those
+ // which were actually used.
+ //
+ // FIXME: It might be safer to use a pseudoregister before replacement.
+
+ // FIXME: We should be able to eliminate unused input registers. We only
+ // cannot do this for the resources required for scratch access. For now we
+ // skip over user SGPRs and may leave unused holes.
+
+ // We find the resource first because it has an alignment requirement.
+ if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
+ // Skip the last 2 elements because the last one is reserved for VCC, and
+ // this is the 2nd to last element already.
+ for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
+ // Pick the first unallocated one. Make sure we don't clobber the other
+ // reserved input we needed.
+ if (!MRI.isPhysRegUsed(Reg)) {
+ assert(MRI.isAllocatable(Reg));
+ MRI.replaceRegWith(ScratchRsrcReg, Reg);
+ ScratchRsrcReg = Reg;
+ MFI->setScratchRSrcReg(ScratchRsrcReg);
+ break;
+ }
+ }
+ }
+
+ if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Skip the last 2 elements because the last one is reserved for VCC, and
+ // this is the 2nd to last element already.
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+ for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
+ // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+ // scratch descriptor, since we haven’t added its uses yet.
+ if (!MRI.isPhysRegUsed(Reg)) {
+ assert(MRI.isAllocatable(Reg) &&
+ !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
+
+ MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ ScratchWaveOffsetReg = Reg;
+ MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ break;
+ }
+ }
+ }
+
+
+ assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
+
+ const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL;
+
+ if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+ // Make sure we emit the copy for the offset first. We may have chosen to copy
+ // the buffer resource into a register that aliases the input offset register.
+ BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg)
+ .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
+ }
+
+ if (ST.isAmdHsaOS()) {
+ // Insert copies from argument register.
+ assert(
+ !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) &&
+ !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg));
+
+ unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+ unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3);
+
+ unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1);
+ unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3);
+
+ const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64);
+
+ BuildMI(MBB, I, DL, SMovB64, Rsrc01)
+ .addReg(Lo, RegState::Kill);
+ BuildMI(MBB, I, DL, SMovB64, Rsrc23)
+ .addReg(Hi, RegState::Kill);
+ } else {
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+ // Use relocations to get the pointer, and setup the other bits manually.
+ uint64_t Rsrc23 = TII->getScratchRsrcWords23();
+ BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc2)
+ .addImm(Rsrc23 & 0xffffffff)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc3)
+ .addImm(Rsrc23 >> 32)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ }
+
+ // Make the register selected live throughout the function.
+ for (MachineBasicBlock &OtherBB : MF) {
+ if (&OtherBB == &MBB)
+ continue;
+
+ OtherBB.addLiveIn(ScratchRsrcReg);
+ OtherBB.addLiveIn(ScratchWaveOffsetReg);
+ }
+}
+
+void SIFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ if (!MFI->hasStackObjects())
+ return;
+
+ bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects();
+
+ assert((RS || !MayNeedScavengingEmergencySlot) &&
+ "RegScavenger required if spilling");
+
+ if (MayNeedScavengingEmergencySlot) {
+ int ScavengeFI = MFI->CreateSpillStackObject(
+ AMDGPU::SGPR_32RegClass.getSize(),
+ AMDGPU::SGPR_32RegClass.getAlignment());
+ RS->addScavengingFrameIndex(ScavengeFI);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
new file mode 100644
index 0000000..a9152fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -0,0 +1,34 @@
+//===--------------------- SIFrameLowering.h --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
+
+#include "AMDGPUFrameLowering.h"
+
+namespace llvm {
+
+class SIFrameLowering final : public AMDGPUFrameLowering {
+public:
+ SIFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+ unsigned TransAl = 1) :
+ AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+ ~SIFrameLowering() override {}
+
+ void emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override;
+
+ void processFunctionBeforeFrameFinalized(
+ MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c2db9ff..0e043cb 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -20,6 +20,7 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
@@ -51,6 +52,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
+
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
@@ -103,6 +107,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
@@ -155,13 +160,30 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+
+ setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
+
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
+
setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
+
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
+
+ setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
@@ -173,9 +195,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
+
+
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -186,6 +213,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
case ISD::INSERT_VECTOR_ELT:
case ISD::INSERT_SUBVECTOR:
case ISD::EXTRACT_SUBVECTOR:
+ case ISD::SCALAR_TO_VECTOR:
break;
case ISD::CONCAT_VECTORS:
setOperationAction(Op, VT, Custom);
@@ -197,6 +225,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
}
}
+ // Most operations are naturally 32-bit vector operations. We only support
+ // load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
+ for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
+ }
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -261,6 +305,41 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
}
+bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
+ // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+ // additionally can do r + r + i with addr64. 32-bit has more addressing
+ // mode options. Depending on the resource constant, it can also do
+ // (i64 r0) + (i32 r1) * (i14 i).
+ //
+ // Private arrays end up using a scratch buffer most of the time, so also
+ // assume those use MUBUF instructions. Scratch loads / stores are currently
+ // implemented as mubuf instructions with offen bit set, so slightly
+ // different than the normal addr64.
+ if (!isUInt<12>(AM.BaseOffs))
+ return false;
+
+ // FIXME: Since we can split immediate into soffset and immediate offset,
+ // would it make sense to allow any immediate?
+
+ switch (AM.Scale) {
+ case 0: // r + i or just i, depending on HasBaseReg.
+ return true;
+ case 1:
+ return true; // We have r + r or r + i.
+ case 2:
+ if (AM.HasBaseReg) {
+ // Reject 2 * r + r.
+ return false;
+ }
+
+ // Allow 2 * r as r + r
+ // Or 2 * r + i is allowed as r + r + i.
+ return true;
+ default: // Don't allow n * r
+ return false;
+ }
+}
+
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
@@ -269,7 +348,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::GLOBAL_ADDRESS: {
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
@@ -282,51 +361,51 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// because it has never been validated.
return isLegalFlatAddressingMode(AM);
}
- // fall-through
- case AMDGPUAS::PRIVATE_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
- case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
- // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
- // additionally can do r + r + i with addr64. 32-bit has more addressing
- // mode options. Depending on the resource constant, it can also do
- // (i64 r0) + (i32 r1) * (i14 i).
- //
- // SMRD instructions have an 8-bit, dword offset.
- //
- // Assume nonunifom access, since the address space isn't enough to know
- // what instruction we will use, and since we don't know if this is a load
- // or store and scalar stores are only available on VI.
- //
- // We also know if we are doing an extload, we can't do a scalar load.
- //
- // Private arrays end up using a scratch buffer most of the time, so also
- // assume those use MUBUF instructions. Scratch loads / stores are currently
- // implemented as mubuf instructions with offen bit set, so slightly
- // different than the normal addr64.
- if (!isUInt<12>(AM.BaseOffs))
- return false;
- // FIXME: Since we can split immediate into soffset and immediate offset,
- // would it make sense to allow any immediate?
+ return isLegalMUBUFAddressingMode(AM);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // If the offset isn't a multiple of 4, it probably isn't going to be
+ // correctly aligned.
+ if (AM.BaseOffs % 4 != 0)
+ return isLegalMUBUFAddressingMode(AM);
+
+ // There are no SMRD extloads, so if we have to do a small type access we
+ // will use a MUBUF load.
+ // FIXME?: We also need to do this if unaligned, but we don't know the
+ // alignment here.
+ if (DL.getTypeStoreSize(Ty) < 4)
+ return isLegalMUBUFAddressingMode(AM);
+
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ // SMRD instructions have an 8-bit, dword offset on SI.
+ if (!isUInt<8>(AM.BaseOffs / 4))
+ return false;
+ } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
+ // On CI+, this can also be a 32-bit literal constant offset. If it fits
+ // in 8-bits, it can use a smaller encoding.
+ if (!isUInt<32>(AM.BaseOffs / 4))
+ return false;
+ } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // On VI, these use the SMEM format and the offset is 20-bit in bytes.
+ if (!isUInt<20>(AM.BaseOffs))
+ return false;
+ } else
+ llvm_unreachable("unhandled generation");
- switch (AM.Scale) {
- case 0: // r + i or just i, depending on HasBaseReg.
+ if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
- case 1:
- return true; // We have r + r or r + i.
- case 2:
- if (AM.HasBaseReg) {
- // Reject 2 * r + r.
- return false;
- }
- // Allow 2 * r as r + r
- // Or 2 * r + i is allowed as r + r + i.
+ if (AM.Scale == 1 && AM.HasBaseReg)
return true;
- default: // Don't allow n * r
- return false;
- }
+
+ return false;
}
+
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+ return isLegalMUBUFAddressingMode(AM);
+
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS: {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
@@ -374,7 +453,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
- return Align % 4 == 0;
+ bool AlignedBy4 = (Align % 4 == 0);
+ if (IsFast)
+ *IsFast = AlignedBy4;
+ return AlignedBy4;
}
// Smaller than dword value must be aligned.
@@ -411,6 +493,32 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
+static bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
+bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
+}
+
+
+bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
+ const MemSDNode *MemNode = cast<MemSDNode>(N);
+ const Value *Ptr = MemNode->getMemOperand()->getValue();
+
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers
+ if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
+ isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -426,12 +534,6 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return TII->isInlineConstant(Imm);
}
-static EVT toIntegerVT(EVT VT) {
- if (VT.isVector())
- return VT.changeVectorElementTypeToInteger();
- return MVT::getIntegerVT(VT.getSizeInBits());
-}
-
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
@@ -439,7 +541,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
- unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -455,30 +557,10 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
unsigned Align = DL.getABITypeAlignment(Ty);
- if (VT != MemVT && VT.isFloatingPoint()) {
- // Do an integer load and convert.
- // FIXME: This is mostly because load legalization after type legalization
- // doesn't handle FP extloads.
- assert(VT.getScalarType() == MVT::f32 &&
- MemVT.getScalarType() == MVT::f16);
-
- EVT IVT = toIntegerVT(VT);
- EVT MemIVT = toIntegerVT(MemVT);
- SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
- IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
- false, // isVolatile
- true, // isNonTemporal
- true, // isInvariant
- Align); // Alignment
- SDValue Ops[] = {
- DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
- Load.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, SL);
- }
-
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (MemVT.isFloatingPoint())
+ ExtTy = ISD::EXTLOAD;
+
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
@@ -497,8 +579,16 @@ SDValue SITargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+ if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) {
+ const Function *Fn = MF.getFunction();
+ DiagnosticInfoUnsupported NoGraphicsHSA(*Fn, "non-compute shaders with HSA");
+ DAG.getContext()->diagnose(NoGraphicsHSA);
+ return SDValue();
+ }
- assert(CallConv == CallingConv::C);
+ // FIXME: We currently assume all calling conventions are kernels.
SmallVector<ISD::InputArg, 16> Splits;
BitVector Skipped(Ins.size());
@@ -513,7 +603,7 @@ SDValue SITargetLowering::LowerFormalArguments(
assert((PSInputNum <= 15) && "Too many PS inputs!");
if (!Arg.Used) {
- // We can savely skip PS inputs
+ // We can safely skip PS inputs
Skipped.set(i);
++PSInputNum;
continue;
@@ -530,7 +620,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
// three or five element vertex only needs three or five registers,
- // NOT four or eigth.
+ // NOT four or eight.
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
@@ -556,41 +646,30 @@ SDValue SITargetLowering::LowerFormalArguments(
CCInfo.AllocateReg(AMDGPU::VGPR1);
}
- // The pointer to the list of arguments is stored in SGPR0, SGPR1
- // The pointer to the scratch buffer is stored in SGPR2, SGPR3
- if (Info->getShaderType() == ShaderType::COMPUTE) {
- if (Subtarget->isAmdHsaOS())
- Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
- else
- Info->NumUserSGPRs = 4;
-
- unsigned InputPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
- unsigned InputPtrRegLo =
- TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
- unsigned InputPtrRegHi =
- TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
- unsigned ScratchPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchPtrRegLo =
- TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
- unsigned ScratchPtrRegHi =
- TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
- CCInfo.AllocateReg(InputPtrRegLo);
- CCInfo.AllocateReg(InputPtrRegHi);
- CCInfo.AllocateReg(ScratchPtrRegLo);
- CCInfo.AllocateReg(ScratchPtrRegHi);
- MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
- MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
- }
-
if (Info->getShaderType() == ShaderType::COMPUTE) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
}
+ // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
+ if (Info->hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ CCInfo.AllocateReg(PrivateSegmentBufferReg);
+ }
+
+ if (Info->hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(DispatchPtrReg);
+ }
+
+ if (Info->hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(InputPtrReg);
+ }
+
AnalyzeFormalArguments(CCInfo, Splits);
SmallVector<SDValue, 16> Chains;
@@ -617,7 +696,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Offset, Ins[i].Flags.isSExt());
Chains.push_back(Arg.getValue(1));
- const PointerType *ParamTy =
+ auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
@@ -678,10 +757,113 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
- if (Info->getShaderType() != ShaderType::COMPUTE) {
- unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
- AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
- Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
+ // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+ // these from the dispatch pointer.
+
+ // Start adding system SGPRs.
+ if (Info->hasWorkGroupIDX()) {
+ unsigned Reg = Info->addWorkGroupIDX();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ } else
+ llvm_unreachable("work group id x is always enabled");
+
+ if (Info->hasWorkGroupIDY()) {
+ unsigned Reg = Info->addWorkGroupIDY();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasWorkGroupIDZ()) {
+ unsigned Reg = Info->addWorkGroupIDZ();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasWorkGroupInfo()) {
+ unsigned Reg = Info->addWorkGroupInfo();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasPrivateSegmentWaveByteOffset()) {
+ // Scratch wave offset passed in system SGPR.
+ unsigned PrivateSegmentWaveByteOffsetReg
+ = Info->addPrivateSegmentWaveByteOffset();
+
+ MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
+ }
+
+ // Now that we've figured out where the scratch register inputs are, see if
+ // should reserve the arguments and use them directly.
+
+ bool HasStackObjects = MF.getFrameInfo()->hasStackObjects();
+
+ if (ST.isAmdHsaOS()) {
+ // TODO: Assume we will spill without optimizations.
+ if (HasStackObjects) {
+ // If we have stack objects, we unquestionably need the private buffer
+ // resource. For the HSA ABI, this will be the first 4 user SGPR
+ // inputs. We can reserve those and use them directly.
+
+ unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ Info->setScratchRSrcReg(PrivateSegmentBufferReg);
+
+ unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
+ } else {
+ unsigned ReservedBufferReg
+ = TRI->reservedPrivateSegmentBufferReg(MF);
+ unsigned ReservedOffsetReg
+ = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+
+ // We tentatively reserve the last registers (skipping the last two
+ // which may contain VCC). After register allocation, we'll replace
+ // these with the ones immediately after those which were really
+ // allocated. In the prologue copies will be inserted from the argument
+ // to these reserved registers.
+ Info->setScratchRSrcReg(ReservedBufferReg);
+ Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ }
+ } else {
+ unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF);
+
+ // Without HSA, relocations are used for the scratch pointer and the
+ // buffer resource setup is always inserted in the prologue. Scratch wave
+ // offset is still in an input SGPR.
+ Info->setScratchRSrcReg(ReservedBufferReg);
+
+ if (HasStackObjects) {
+ unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ } else {
+ unsigned ReservedOffsetReg
+ = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ }
+ }
+
+ if (Info->hasWorkItemIDX()) {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ } else
+ llvm_unreachable("workitem id x should always be enabled");
+
+ if (Info->hasWorkItemIDY()) {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasWorkItemIDZ()) {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
}
if (Chains.empty())
@@ -693,27 +875,11 @@ SDValue SITargetLowering::LowerFormalArguments(
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
- MachineBasicBlock::iterator I = *MI;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH:
return BB;
- case AMDGPU::SI_RegisterStorePseudo: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- MachineInstrBuilder MIB =
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
- Reg);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- MIB.addOperand(MI->getOperand(i));
-
- MI->eraseFromParent();
- break;
- }
}
return BB;
}
@@ -944,20 +1110,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
const GlobalValue *GV = GSD->getGlobal();
MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
- SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
-
- SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(1, DL, MVT::i32));
-
- SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrLo, GA);
- SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrHi, DAG.getConstant(0, DL, MVT::i32),
- SDValue(Lo.getNode(), 1));
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+ return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT, GA);
}
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
@@ -977,6 +1131,18 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
// a glue result.
}
+SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
+ SDValue Op,
+ MVT VT,
+ unsigned Offset) const {
+ SDLoc SL(Op);
+ SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
+ DAG.getEntryNode(), Offset, false);
+ // The local size values will have the hi 16-bits as zero.
+ return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
+ DAG.getValueType(VT));
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -988,7 +1154,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc DL(Op);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Should this propagate fast-math-flags?
+
switch (IntrinsicID) {
+ case Intrinsic::amdgcn_dispatch_ptr:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
+
case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);
@@ -1008,37 +1180,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_X, false);
+ return lowerImplicitZextParam(DAG, Op, MVT::i16,
+ SI::KernelInputOffsets::LOCAL_SIZE_X);
case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
+ return lowerImplicitZextParam(DAG, Op, MVT::i16,
+ SI::KernelInputOffsets::LOCAL_SIZE_Y);
case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
-
+ return lowerImplicitZextParam(DAG, Op, MVT::i16,
+ SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::AMDGPU_read_workdim:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- getImplicitParameterOffset(MFI, GRID_DIM), false);
-
+ // Really only 2 bits.
+ return lowerImplicitZextParam(DAG, Op, MVT::i8,
+ getImplicitParameterOffset(MFI, GRID_DIM));
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
case Intrinsic::r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
case Intrinsic::r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
case Intrinsic::r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT);
case Intrinsic::r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT);
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops[] = {
Op.getOperand(1),
@@ -1077,6 +1248,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case AMDGPUIntrinsic::SI_packf16:
+ if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
+ return DAG.getUNDEF(MVT::i32);
+ return Op;
case AMDGPUIntrinsic::SI_fs_interp: {
SDValue IJ = Op.getOperand(4);
SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
@@ -1092,6 +1267,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case Intrinsic::amdgcn_interp_p1: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+ SDValue Glue = M0.getValue(1);
+ return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Glue);
+ }
+ case Intrinsic::amdgcn_interp_p2: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+ SDValue Glue = SDValue(M0.getNode(), 1);
+ return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+ Glue);
+ }
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1152,16 +1340,29 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
"Custom lowering for non-i32 vectors hasn't been implemented.");
unsigned NumElements = Op.getValueType().getVectorNumElements();
assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
+
switch (Load->getAddressSpace()) {
default: break;
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ if (isMemOpUniform(Load))
+ break;
+ // Non-uniform loads will be selected to MUBUF instructions, so they
+ // have the same legalization requires ments as global and private
+ // loads.
+ //
+ // Fall-through
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::PRIVATE_ADDRESS:
+ if (NumElements >= 8)
+ return SplitVectorLoad(Op, DAG);
+
// v4 loads are supported for private and global memory.
if (NumElements <= 4)
break;
// fall-through
case AMDGPUAS::LOCAL_ADDRESS:
- return ScalarizeVectorLoad(Op, DAG);
+ // If properly aligned, if we split we might be able to use ds_read_b64.
+ return SplitVectorLoad(Op, DAG);
}
}
@@ -1236,8 +1437,10 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
if (Unsafe) {
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags);
}
return SDValue();
@@ -1274,6 +1477,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ // TODO: Should this propagate fast-math-flags?
+
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
@@ -1379,7 +1584,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Ret;
if (VT.isVector() && VT.getVectorNumElements() >= 8)
- return ScalarizeVectorStore(Op, DAG);
+ return SplitVectorStore(Op, DAG);
if (VT == MVT::i1)
return DAG.getTruncStore(Store->getChain(), DL,
@@ -1393,6 +1598,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
+ // TODO: Should this propagate fast-math-flags?
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Arg,
DAG.getConstantFP(0.5/M_PI, DL,
@@ -2125,9 +2331,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- TII->legalizeOperands(MI);
- if (TII->isMIMG(MI->getOpcode())) {
+ if (TII->isVOP3(MI->getOpcode())) {
+ // Make sure constant bus requirements are respected.
+ TII->legalizeOperandsVOP3(MRI, MI);
+ return;
+ }
+
+ if (TII->isMIMG(*MI)) {
unsigned VReg = MI->getOperand(0).getReg();
unsigned Writemask = MI->getOperand(1).getImm();
unsigned BitsSet = 0;
@@ -2169,53 +2380,38 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-#if 1
- // XXX - Workaround for moveToVALU not handling different register class
- // inserts for REG_SEQUENCE.
-
- // Build the half of the subregister with the constants.
- const SDValue Ops0[] = {
- DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
- buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, Ops0), 0);
-
- // Combine the constants and the pointer.
- const SDValue Ops1[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
- Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
- SubRegHi,
- DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
- };
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+ // Build the half of the subregister with the constants before building the
+ // full 128-bit register. If we are building multiple resource descriptors,
+ // this will allow CSEing of the 2-component register.
+ const SDValue Ops0[] = {
+ DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
+ buildSMovImm32(DAG, DL, 0),
+ DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
+ DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+ };
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
-#else
- const SDValue Ops[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
- Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
- buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
- buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
- };
+ SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::v2i32, Ops0), 0);
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+ // Combine the constants and the pointer.
+ const SDValue Ops1[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+ Ptr,
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
+ SubRegHi,
+ DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
+ };
-#endif
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
}
/// \brief Return a resource descriptor with the 'Add TID' bit enabled
-/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
-/// of the resource descriptor) to create an offset, which is added to the
-/// resource ponter.
+/// The TID (Thread ID) is multiplied by the stride value (bits [61:48]
+/// of the resource descriptor) to create an offset, which is added to
+/// the resource pointer.
MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr,
@@ -2248,15 +2444,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
}
-MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
-}
-
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
@@ -2274,13 +2461,41 @@ std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- if (Constraint == "r") {
- switch(VT.SimpleTy) {
- default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
- case MVT::i64:
- return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
- case MVT::i32:
+
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 's':
+ case 'r':
+ switch (VT.getSizeInBits()) {
+ default:
+ return std::make_pair(0U, nullptr);
+ case 32:
return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+ case 64:
+ return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+ case 128:
+ return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
+ case 256:
+ return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ }
+
+ case 'v':
+ switch (VT.getSizeInBits()) {
+ default:
+ return std::make_pair(0U, nullptr);
+ case 32:
+ return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
+ case 64:
+ return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
+ case 96:
+ return std::make_pair(0U, &AMDGPU::VReg_96RegClass);
+ case 128:
+ return std::make_pair(0U, &AMDGPU::VReg_128RegClass);
+ case 256:
+ return std::make_pair(0U, &AMDGPU::VReg_256RegClass);
+ case 512:
+ return std::make_pair(0U, &AMDGPU::VReg_512RegClass);
+ }
}
}
@@ -2301,3 +2516,16 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+
+SITargetLowering::ConstraintType
+SITargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 's':
+ case 'v':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index d84c32e..e2f8cb1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -28,6 +28,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
+ SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
+ MVT VT, unsigned Offset) const;
+
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
@@ -57,6 +60,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
+ bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
public:
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
@@ -76,6 +80,9 @@ public:
bool MemcpyStrSrc,
MachineFunction &MF) const override;
+ bool isMemOpUniform(const SDNode *N) const;
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
@@ -112,13 +119,10 @@ public:
SDValue Ptr,
uint32_t RsrcDword1,
uint64_t RsrcDword2And3) const;
- MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const;
-
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 90a37f1..821aada 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -91,7 +91,8 @@ private:
bool isOpRelevant(MachineOperand &Op);
/// \brief Get register interval an operand affects.
- RegInterval getRegInterval(MachineOperand &Op);
+ RegInterval getRegInterval(const TargetRegisterClass *RC,
+ const MachineOperand &Reg) const;
/// \brief Handle instructions async components
void pushInstruction(MachineBasicBlock &MBB,
@@ -121,9 +122,13 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
- return "SI insert wait instructions";
+ return "SI insert wait instructions";
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
@@ -138,9 +143,8 @@ FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
}
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
- Counters Result;
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ Counters Result = { { 0, 0, 0 } };
Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
@@ -151,15 +155,22 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
- if (TII->isSMRD(MI.getOpcode())) {
-
- MachineOperand &Op = MI.getOperand(0);
- assert(Op.isReg() && "First LGKM operand must be a register!");
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
- Result.Named.LGKM = Size > 4 ? 2 : 1;
-
+ if (TII->isSMRD(MI)) {
+
+ if (MI.getNumOperands() != 0) {
+ assert(MI.getOperand(0).isReg() &&
+ "First LGKM operand must be a register!");
+
+ // XXX - What if this is a write into a super register?
+ const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
+ unsigned Size = RC->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+ } else {
+ // s_dcache_inv etc. do not have a a destination register. Assume we
+ // want a wait on these.
+ // XXX - What is the right value?
+ Result.Named.LGKM = 1;
+ }
} else {
// DS
Result.Named.LGKM = 1;
@@ -173,9 +184,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
}
bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
-
// Constants are always irrelevant
- if (!Op.isReg())
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return false;
// Defines are always relevant
@@ -196,7 +206,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
// operand comes before the value operand and it may have
// multiple data operands.
- if (TII->isDS(MI.getOpcode())) {
+ if (TII->isDS(MI)) {
MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
if (Data && Op.isIdenticalTo(*Data))
return true;
@@ -224,18 +234,13 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
return false;
}
-RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
-
- if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
- return std::make_pair(0, 0);
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-
+RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
+ const MachineOperand &Reg) const {
+ unsigned Size = RC->getSize();
assert(Size >= 4);
RegInterval Result;
- Result.first = TRI->getEncodingValue(Reg);
+ Result.first = TRI->getEncodingValue(Reg.getReg());
Result.second = Result.first + Size / 4;
return Result;
@@ -246,10 +251,13 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// Get the hardware counter increments and sum them up
Counters Increment = getHwCounts(*I);
+ Counters Limit = ZeroCounts;
unsigned Sum = 0;
for (unsigned i = 0; i < 3; ++i) {
LastIssued.Array[i] += Increment.Array[i];
+ if (Increment.Array[i])
+ Limit.Array[i] = LastIssued.Array[i];
Sum += Increment.Array[i];
}
@@ -261,7 +269,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
+ // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
// The temporary workaround is to break the clauses with S_NOP.
@@ -270,7 +278,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// and destination registers don't overlap, e.g. this is illegal:
// r0 = load r2
// r2 = load r0
- if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
+ if ((LastOpcodeType == SMEM && TII->isSMRD(*I)) ||
(LastOpcodeType == VMEM && Increment.Named.VM)) {
// Insert a NOP to break the clause.
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
@@ -278,7 +286,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
LastInstWritesM0 = false;
}
- if (TII->isSMRD(I->getOpcode()))
+ if (TII->isSMRD(*I))
LastOpcodeType = SMEM;
else if (Increment.Named.VM)
LastOpcodeType = VMEM;
@@ -290,21 +298,21 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
}
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-
MachineOperand &Op = I->getOperand(i);
if (!isOpRelevant(Op))
continue;
- RegInterval Interval = getRegInterval(Op);
+ const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
+ RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
// Remember which registers we define
if (Op.isDef())
- DefinedRegs[j] = LastIssued;
+ DefinedRegs[j] = Limit;
// and which one we are using
if (Op.isUse())
- UsedRegs[j] = LastIssued;
+ UsedRegs[j] = Limit;
}
}
}
@@ -390,12 +398,18 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
if (MI.getOpcode() == AMDGPU::S_SENDMSG)
return LastIssued;
- // For each register affected by this
- // instruction increase the result sequence
+ // For each register affected by this instruction increase the result
+ // sequence.
+ //
+ // TODO: We could probably just look at explicit operands if we removed VCC /
+ // EXEC from SMRD dest reg classes.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-
MachineOperand &Op = MI.getOperand(i);
- RegInterval Interval = getRegInterval(Op);
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
+ continue;
+
+ const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
+ RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
if (Op.isDef()) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 211666a..0e883f6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -41,6 +41,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> WQM = 0;
field bits<1> VGPRSpill = 0;
+ // This bit tells the assembler to use the 32-bit encoding in case it
+ // is unable to infer the encoding from the operands.
+ field bits<1> VOPAsmPrefer32Bit = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -68,10 +72,8 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{19} = FLAT;
let TSFlags{20} = WQM;
let TSFlags{21} = VGPRSpill;
+ let TSFlags{22} = VOPAsmPrefer32Bit;
- // Most instructions require adjustments after selection to satisfy
- // operand requirements.
- let hasPostISelHook = 1;
let SchedRW = [Write32Bit];
}
@@ -86,7 +88,6 @@ class Enc64 {
}
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
-def VOPDstVCC : VOPDstOperand <VCCReg>;
let Uses = [EXEC] in {
@@ -101,11 +102,11 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
+ VOPAnyCommon <(outs), ins, asm, pattern> {
- let DisableEncoding = "$dst";
let VOPC = 1;
let Size = 4;
+ let Defs = [VCC];
}
class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -138,6 +139,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let isCodeGenOnly = 0;
int Size = 8;
+
+ // Because SGPRs may be allowed if there are multiple operands, we
+ // need a post-isel hook to insert copies in order to avoid
+ // violating constant bus requirements.
+ let hasPostISelHook = 1;
}
} // End Uses = [EXEC]
@@ -222,6 +228,20 @@ class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
let Inst{31-27} = 0x18; //encoding
}
+class SMRD_IMMe_ci <bits<5> op> : Enc64 {
+ bits<7> sdst;
+ bits<7> sbase;
+ bits<32> offset;
+
+ let Inst{7-0} = 0xff;
+ let Inst{8} = 0;
+ let Inst{14-9} = sbase{6-1};
+ let Inst{21-15} = sdst;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+ let Inst{63-32} = offset;
+}
+
let SchedRW = [WriteSALU] in {
class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI<outs, ins, asm, pattern> {
@@ -249,13 +269,13 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
InstSI<outs, ins, asm, pattern>, SOPCe <op> {
- let DisableEncoding = "$dst";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
let SOPC = 1;
let isCodeGenOnly = 0;
+ let Defs = [SCC];
let UseNamedOperandTable = 1;
}
@@ -598,15 +618,13 @@ class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
// Vector I/O operations
//===----------------------------------------------------------------------===//
-let Uses = [EXEC] in {
-
class DS <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
let LGKM_CNT = 1;
let DS = 1;
let UseNamedOperandTable = 1;
- let Uses = [M0];
+ let Uses = [M0, EXEC];
// Most instruction load and store data, so set this as the default.
let mayLoad = 1;
@@ -623,6 +641,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MUBUF = 1;
+ let Uses = [EXEC];
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
@@ -636,6 +655,7 @@ class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MTBUF = 1;
+ let Uses = [EXEC];
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
@@ -665,9 +685,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MIMG = 1;
+ let Uses = [EXEC];
let hasSideEffects = 0; // XXX ????
}
-
-
-} // End Uses = [EXEC]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index cfd2c42..a08a5a8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -82,6 +82,7 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
switch (MI->getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
return true;
default:
return false;
@@ -204,7 +205,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
unsigned &Offset,
const TargetRegisterInfo *TRI) const {
unsigned Opc = LdSt->getOpcode();
- if (isDS(Opc)) {
+
+ if (isDS(*LdSt)) {
const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
AMDGPU::OpName::offset);
if (OffsetImm) {
@@ -254,7 +256,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
return false;
}
- if (isMUBUF(Opc) || isMTBUF(Opc)) {
+ if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
return false;
@@ -270,7 +272,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
return true;
}
- if (isSMRD(Opc)) {
+ if (isSMRD(*LdSt)) {
const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
AMDGPU::OpName::offset);
if (!OffsetImm)
@@ -289,20 +291,18 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
MachineInstr *SecondLdSt,
unsigned NumLoads) const {
- unsigned Opc0 = FirstLdSt->getOpcode();
- unsigned Opc1 = SecondLdSt->getOpcode();
-
// TODO: This needs finer tuning
if (NumLoads > 4)
return false;
- if (isDS(Opc0) && isDS(Opc1))
+ if (isDS(*FirstLdSt) && isDS(*SecondLdSt))
return true;
- if (isSMRD(Opc0) && isSMRD(Opc1))
+ if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt))
return true;
- if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
+ if ((isMUBUF(*FirstLdSt) || isMTBUF(*FirstLdSt)) &&
+ (isMUBUF(*SecondLdSt) || isMTBUF(*SecondLdSt)))
return true;
return false;
@@ -323,28 +323,45 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ };
+
+ static const int16_t Sub0_15_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+ AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
};
static const int16_t Sub0_7[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ };
+
+ static const int16_t Sub0_7_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
};
static const int16_t Sub0_3[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ };
+
+ static const int16_t Sub0_3_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
};
static const int16_t Sub0_2[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
};
static const int16_t Sub0_1[] = {
- AMDGPU::sub0, AMDGPU::sub1, 0
+ AMDGPU::sub0, AMDGPU::sub1,
};
unsigned Opcode;
- const int16_t *SubIndices;
+ ArrayRef<int16_t> SubIndices;
+ bool Forward;
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
@@ -360,7 +377,7 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
.addImm(0)
.addReg(SrcReg, getKillRegState(KillSrc));
}
@@ -375,18 +392,18 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_3;
+ Opcode = AMDGPU::S_MOV_B64;
+ SubIndices = Sub0_3_64;
} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_7;
+ Opcode = AMDGPU::S_MOV_B64;
+ SubIndices = Sub0_7_64;
} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_15;
+ Opcode = AMDGPU::S_MOV_B64;
+ SubIndices = Sub0_15_64;
} else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
@@ -428,13 +445,27 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
llvm_unreachable("Can't copy register!");
}
- while (unsigned SubIdx = *SubIndices++) {
+ if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
+ Forward = true;
+ else
+ Forward = false;
+
+ for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
+ unsigned SubIdx;
+ if (Forward)
+ SubIdx = SubIndices[Idx];
+ else
+ SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+
MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
get(Opcode), RI.getSubReg(DestReg, SubIdx));
- Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
+
+ if (Idx == SubIndices.size() - 1)
+ Builder.addReg(SrcReg, RegState::Kill | RegState::Implicit);
- if (*SubIndices)
+ if (Idx == 0)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
}
}
@@ -471,6 +502,40 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
return AMDGPU::COPY;
}
+static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_S32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_S64_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_S128_SAVE;
+ case 32:
+ return AMDGPU::SI_SPILL_S256_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_S512_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_V32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_V64_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_V128_SAVE;
+ case 32:
+ return AMDGPU::SI_SPILL_V256_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_V512_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
@@ -481,47 +546,83 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
- int Opcode = -1;
+
+ unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+ unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachineMemOperand *MMO
+ = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ Size, Align);
if (RI.isSGPRClass(RC)) {
+ MFI->setHasSpilledSGPRs();
+
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling
// SGPRs.
- switch (RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
- }
- } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
- MFI->setHasSpilledVGPRs();
-
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
- case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
- }
+ unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
+ BuildMI(MBB, MI, DL, get(Opcode))
+ .addReg(SrcReg) // src
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addMemOperand(MMO);
+
+ return;
}
- if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
- BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else {
+ if (!ST.isVGPRSpillingEnabled(MFI)) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
" spill register");
BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
- .addReg(SrcReg);
+ .addReg(SrcReg);
+
+ return;
+ }
+
+ assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+
+ unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
+ MFI->setHasSpilledVGPRs();
+ BuildMI(MBB, MI, DL, get(Opcode))
+ .addReg(SrcReg) // src
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addMemOperand(MMO);
+}
+
+static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_S32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_S64_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_S128_RESTORE;
+ case 32:
+ return AMDGPU::SI_SPILL_S256_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_S512_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_V32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_V64_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_V128_RESTORE;
+ case 32:
+ return AMDGPU::SI_SPILL_V256_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_V512_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
}
}
@@ -534,42 +635,43 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
- int Opcode = -1;
-
- if (RI.isSGPRClass(RC)){
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
- }
- } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
- case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
- }
- }
+ unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+ unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
- if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, Size, Align);
+
+ if (RI.isSGPRClass(RC)) {
+ // FIXME: Maybe this should not include a memoperand because it will be
+ // lowered to non-memory instructions.
+ unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addMemOperand(MMO);
- } else {
+ return;
+ }
+
+ if (!ST.isVGPRSpillingEnabled(MFI)) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
" restore register");
BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
+
+ return;
}
+
+ assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+
+ unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
+ BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addMemOperand(MMO);
}
/// \param @Offset Offset in bytes of the FrameIndex being spilled
@@ -601,17 +703,21 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
if (MFI->getShaderType() == ShaderType::COMPUTE &&
WorkGroupSize > WavefrontSize) {
- unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
- unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
- unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+ unsigned TIDIGXReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
+ unsigned TIDIGYReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
+ unsigned TIDIGZReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
unsigned InputPtrReg =
- TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+ TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
RS->enterBasicBlock(&Entry);
+ // FIXME: Can we scavenge an SReg_64 and access the subregs?
unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
@@ -667,8 +773,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
return TmpReg;
}
-void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
- int Count) const {
+void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
+ int Count) const {
while (Count > 0) {
int Arg;
if (Count >= 8)
@@ -687,26 +793,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- case AMDGPU::SI_CONSTDATA_PTR: {
- unsigned Reg = MI->getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
-
- // Add 32-bit offset from this instruction to the start of the constant data.
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
- .addReg(RegLo)
- .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
- .addReg(RegHi)
- .addImm(0)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
- .addReg(AMDGPU::SCC, RegState::Implicit);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::SGPR_USE:
// This is just a placeholder for register allocation.
MI->eraseFromParent();
@@ -760,49 +846,90 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::SI_CONSTDATA_PTR: {
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+ MachineFunction &MF = *MBB.getParent();
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
+ unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
+
+ // Create a bundle so these instructions won't be re-ordered by the
+ // post-RA scheduler.
+ MIBundleBuilder Bundler(MBB, MI);
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
+
+ // Add 32-bit offset from this instruction to the start of the
+ // constant data.
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
+ .addReg(RegLo)
+ .addOperand(MI->getOperand(1)));
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .addImm(0));
+
+ llvm::finalizeBundle(MBB, Bundler.begin());
+
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
-MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI) const {
-
- if (MI->getNumOperands() < 3)
- return nullptr;
-
+/// Commutes the operands in the given instruction.
+/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
+///
+/// Do not call this method for a non-commutable instruction or for
+/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
+/// Even though the instruction is commutable, the method may still
+/// fail to commute the operands, null pointer is returned in such cases.
+MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx0,
+ unsigned OpIdx1) const {
int CommutedOpcode = commuteOpcode(*MI);
if (CommutedOpcode == -1)
return nullptr;
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
- assert(Src0Idx != -1 && "Should always have src0 operand");
-
MachineOperand &Src0 = MI->getOperand(Src0Idx);
if (!Src0.isReg())
return nullptr;
int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src1);
- if (Src1Idx == -1)
+
+ if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
+ OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
+ (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
+ OpIdx1 != static_cast<unsigned>(Src0Idx)))
return nullptr;
MachineOperand &Src1 = MI->getOperand(Src1Idx);
- // Make sure it's legal to commute operands for VOP2.
- if (isVOP2(MI->getOpcode()) &&
- (!isOperandLegal(MI, Src0Idx, &Src1) ||
- !isOperandLegal(MI, Src1Idx, &Src0))) {
- return nullptr;
+
+ if (isVOP2(*MI)) {
+ const MCInstrDesc &InstrDesc = MI->getDesc();
+ // For VOP2 instructions, any operand type is valid to use for src0. Make
+ // sure we can use the src1 as src0.
+ //
+ // We could be stricter here and only allow commuting if there is a reason
+ // to do so. i.e. if both operands are VGPRs there is no real benefit,
+ // although MachineCSE attempts to find matches by commuting.
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
+ return nullptr;
}
if (!Src1.isReg()) {
// Allow commuting instructions with Imm operands.
if (NewMI || !Src1.isImm() ||
- (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+ (!isVOP2(*MI) && !isVOP3(*MI))) {
return nullptr;
}
-
// Be sure to copy the source modifiers to the right place.
if (MachineOperand *Src0Mods
= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
@@ -832,7 +959,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
Src1.ChangeToRegister(Reg, false);
Src1.setSubReg(SubReg);
} else {
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
}
if (MI)
@@ -845,8 +972,8 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
// between the true commutable operands, and the base
// TargetInstrInfo::commuteInstruction uses it.
bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const {
+ unsigned &SrcOpIdx0,
+ unsigned &SrcOpIdx1) const {
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
@@ -857,7 +984,8 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
return false;
// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
- // immediate.
+ // immediate. Also, immediate src0 operand is not handled in
+ // SIInstrInfo::commuteInstruction();
if (!MI->getOperand(Src0Idx).isReg())
return false;
@@ -865,18 +993,22 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
if (Src1Idx == -1)
return false;
- if (!MI->getOperand(Src1Idx).isReg())
- return false;
-
- // If any source modifiers are set, the generic instruction commuting won't
- // understand how to copy the source modifiers.
- if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ if (Src1.isImm()) {
+ // SIInstrInfo::commuteInstruction() does support commuting the immediate
+ // operand src1 in 2 and 3 operand instructions.
+ if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
+ return false;
+ } else if (Src1.isReg()) {
+ // If any source modifiers are set, the generic instruction commuting won't
+ // understand how to copy the source modifiers.
+ if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+ return false;
+ } else
return false;
- SrcOpIdx1 = Src0Idx;
- SrcOpIdx2 = Src1Idx;
- return true;
+ return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
}
MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
@@ -898,11 +1030,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const {
}
}
-bool
-SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
- return RC != &AMDGPU::EXECRegRegClass;
-}
-
static void removeModOperands(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
@@ -984,9 +1111,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
}
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src2));
- // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
removeModOperands(*UseMI);
@@ -1045,18 +1169,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
return false;
}
-bool
-SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA) const {
- switch(MI->getOpcode()) {
- default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::V_MOV_B32_e32:
- return MI->getOperand(1).isImm();
- }
-}
-
static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
int WidthB, int OffsetB) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
@@ -1088,9 +1200,6 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
MachineInstr *MIb,
AliasAnalysis *AA) const {
- unsigned Opc0 = MIa->getOpcode();
- unsigned Opc1 = MIb->getOpcode();
-
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
"MIa must load from or modify a memory location");
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
@@ -1105,32 +1214,32 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
// TODO: Should we check the address space from the MachineMemOperand? That
// would allow us to distinguish objects we know don't alias based on the
- // underlying addres space, even if it was lowered to a different one,
+ // underlying address space, even if it was lowered to a different one,
// e.g. private accesses lowered to use MUBUF instructions on a scratch
// buffer.
- if (isDS(Opc0)) {
- if (isDS(Opc1))
+ if (isDS(*MIa)) {
+ if (isDS(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(Opc1);
+ return !isFLAT(*MIb);
}
- if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
- if (isMUBUF(Opc1) || isMTBUF(Opc1))
+ if (isMUBUF(*MIa) || isMTBUF(*MIa)) {
+ if (isMUBUF(*MIb) || isMTBUF(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(Opc1) && !isSMRD(Opc1);
+ return !isFLAT(*MIb) && !isSMRD(*MIb);
}
- if (isSMRD(Opc0)) {
- if (isSMRD(Opc1))
+ if (isSMRD(*MIa)) {
+ if (isSMRD(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
+ return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa);
}
- if (isFLAT(Opc0)) {
- if (isFLAT(Opc1))
+ if (isFLAT(*MIa)) {
+ if (isFLAT(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
return false;
@@ -1319,6 +1428,26 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
return false;
}
+static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ // We only care about reads.
+ if (MO.isDef())
+ continue;
+
+ switch (MO.getReg()) {
+ case AMDGPU::VCC:
+ case AMDGPU::M0:
+ case AMDGPU::FLAT_SCR:
+ return MO.getReg();
+
+ default:
+ break;
+ }
+ }
+
+ return AMDGPU::NoRegister;
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
@@ -1335,7 +1464,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
- // Make sure the register classes are correct
+ // Make sure the register classes are correct.
for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isFPImm()) {
ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
@@ -1392,14 +1521,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify VOP*
- if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) {
// Only look at the true operands. Only a real operand can use the constant
// bus, and we don't want to check pseudo-operands like the source modifier
// flags.
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
unsigned ConstantBusCount = 0;
- unsigned SGPRUsed = AMDGPU::NoRegister;
+ unsigned SGPRUsed = findImplicitSGPRRead(*MI);
+ if (SGPRUsed != AMDGPU::NoRegister)
+ ++ConstantBusCount;
+
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
break;
@@ -1435,6 +1567,16 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
}
+ // Make sure we aren't losing exec uses in the td files. This mostly requires
+ // being careful when using let Uses to try to add other use registers.
+ if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
+ const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC);
+ if (!Exec || !Exec->isImplicit()) {
+ ErrInfo = "VALU instruction does not implicitly read exec mask";
+ return false;
+ }
+ }
+
return true;
}
@@ -1483,11 +1625,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
+ case AMDGPU::S_LOAD_DWORD_SGPR:
+ case AMDGPU::S_LOAD_DWORD_IMM_ci:
+ return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX2_SGPR:
+ case AMDGPU::S_LOAD_DWORDX2_IMM_ci:
+ return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX4_SGPR:
+ case AMDGPU::S_LOAD_DWORDX4_IMM_ci:
+ return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -1562,17 +1710,21 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
unsigned SubIdx,
const TargetRegisterClass *SubRC)
const {
- assert(SuperReg.isReg());
-
- unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
unsigned SubReg = MRI.createVirtualRegister(SubRC);
+ if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+ .addReg(SuperReg.getReg(), 0, SubIdx);
+ return SubReg;
+ }
+
// Just in case the super register is itself a sub-register, copy it to a new
// value so we don't need to worry about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
+ unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
@@ -1605,36 +1757,6 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
return MachineOperand::CreateReg(SubReg, false);
}
-unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const {
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned Dst = MRI.createVirtualRegister(RC);
-
- MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- LoDst)
- .addImm(Op.getImm() & 0xFFFFFFFF);
- MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- HiDst)
- .addImm(Op.getImm() >> 32);
-
- BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
- .addReg(LoDst)
- .addImm(AMDGPU::sub0)
- .addReg(HiDst)
- .addImm(AMDGPU::sub1);
-
- Worklist.push_back(Lo);
- Worklist.push_back(Hi);
-
- return Dst;
-}
-
// Change the order of operands from (0, 1, 2) to (0, 2, 1)
void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
assert(Inst->getNumExplicitOperands() == 3);
@@ -1643,6 +1765,41 @@ void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
Inst->addOperand(Op1);
}
+bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const {
+ if (!MO.isReg())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isVirtualRegister(Reg) ?
+ MRI.getRegClass(Reg) :
+ RI.getPhysRegClass(Reg);
+
+ // In order to be legal, the common sub-class must be equal to the
+ // class of the current operand. For example:
+ //
+ // v_mov_b32 s0 ; Operand defined as vsrc_32
+ // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+ //
+ // s_sendmsg 0, s0 ; Operand defined as m0reg
+ // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
+ return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+}
+
+bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return isLegalRegOperand(MRI, OpInfo, MO);
+
+ // Handle non-register types that are treated like immediates.
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ return true;
+}
+
bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -1653,7 +1810,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
if (!MO)
MO = &MI->getOperand(OpIdx);
- if (isVALU(InstDesc.Opcode) &&
+ if (isVALU(*MI) &&
usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
unsigned SGPRUsed =
MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
@@ -1670,21 +1827,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
if (MO->isReg()) {
assert(DefinedRC);
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isVirtualRegister(MO->getReg()) ?
- MRI.getRegClass(MO->getReg()) :
- RI.getPhysRegClass(MO->getReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_32
- // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
-
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ return isLegalRegOperand(MRI, OpInfo, *MO);
}
@@ -1699,81 +1842,143 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
return isImmOperandLegal(MI, OpIdx, *MO);
}
-void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
+ MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &InstrDesc = get(Opc);
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src1);
- int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src2);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
- // Legalize VOP2
- if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
- // Legalize src0
- if (!isOperandLegal(MI, Src0Idx))
+ // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
+ // we need to only have one constant bus use.
+ //
+ // Note we do not need to worry about literal constants here. They are
+ // disabled for the operand type for instructions because they will always
+ // violate the one constant bus use rule.
+ bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
+ if (HasImplicitSGPR) {
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+
+ if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
legalizeOpWithMove(MI, Src0Idx);
+ }
- // Legalize src1
- if (isOperandLegal(MI, Src1Idx))
- return;
+ // VOP2 src0 instructions support all operand types, so we don't need to check
+ // their legality. If src1 is already legal, we don't need to do anything.
+ if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
+ return;
- // Usually src0 of VOP2 instructions allow more types of inputs
- // than src1, so try to commute the instruction to decrease our
- // chances of having to insert a MOV instruction to legalize src1.
- if (MI->isCommutable()) {
- if (commuteInstruction(MI))
- // If we are successful in commuting, then we know MI is legal, so
- // we are done.
- return;
- }
+ // We do not use commuteInstruction here because it is too aggressive and will
+ // commute if it is possible. We only want to commute here if it improves
+ // legality. This can be called a fairly large number of times so don't waste
+ // compile time pointlessly swapping and checking legality again.
+ if (HasImplicitSGPR || !MI->isCommutable()) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ // If src0 can be used as src1, commuting will make the operands legal.
+ // Otherwise we have to give up and insert a move.
+ //
+ // TODO: Other immediate-like operand kinds could be commuted if there was a
+ // MachineOperand::ChangeTo* for them.
+ if ((!Src1.isImm() && !Src1.isReg()) ||
+ !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
legalizeOpWithMove(MI, Src1Idx);
return;
}
- // XXX - Do any VOP3 instructions read VCC?
- // Legalize VOP3
- if (isVOP3(MI->getOpcode())) {
- int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
+ int CommutedOpc = commuteOpcode(*MI);
+ if (CommutedOpc == -1) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
- // Find the one SGPR operand we are allowed to use.
- unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+ MI->setDesc(get(CommutedOpc));
- for (unsigned i = 0; i < 3; ++i) {
- int Idx = VOP3Idx[i];
- if (Idx == -1)
- break;
- MachineOperand &MO = MI->getOperand(Idx);
+ unsigned Src0Reg = Src0.getReg();
+ unsigned Src0SubReg = Src0.getSubReg();
+ bool Src0Kill = Src0.isKill();
- if (MO.isReg()) {
- if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
- continue; // VGPRs are legal
+ if (Src1.isImm())
+ Src0.ChangeToImmediate(Src1.getImm());
+ else if (Src1.isReg()) {
+ Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
+ Src0.setSubReg(Src1.getSubReg());
+ } else
+ llvm_unreachable("Should only have register or immediate operands");
- assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+ Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
+ Src1.setSubReg(Src0SubReg);
+}
- if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
- SGPRReg = MO.getReg();
- // We can use one SGPR in each VOP3 instruction.
- continue;
- }
- } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
- // If it is not a register and not a literal constant, then it must be
- // an inline constant which is always legal.
- continue;
- }
- // If we make it this far, then the operand is not legal and we must
- // legalize it.
- legalizeOpWithMove(MI, Idx);
+// Legalize VOP3 operands. Because all operand types are supported for any
+// operand, and since literal constants are not allowed and should never be
+// seen, we only need to worry about inserting copies if we use multiple SGPR
+// operands.
+void SIInstrInfo::legalizeOperandsVOP3(
+ MachineRegisterInfo &MRI,
+ MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+
+ int VOP3Idx[3] = {
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
+ };
+
+ // Find the one SGPR operand we are allowed to use.
+ unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ break;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ // We should never see a VOP3 instruction with an illegal immediate operand.
+ if (!MO.isReg())
+ continue;
+
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
}
+
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ // Legalize VOP2
+ if (isVOP2(*MI)) {
+ legalizeOperandsVOP2(MRI, MI);
+ return;
+ }
+
+ // Legalize VOP3
+ if (isVOP3(*MI)) {
+ legalizeOperandsVOP3(MRI, MI);
+ return;
}
// Legalize REG_SEQUENCE and PHI
// The register class of the operands much be the same type as the register
// class of the output.
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
- MI->getOpcode() == AMDGPU::PHI) {
+ if (MI->getOpcode() == AMDGPU::PHI) {
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
if (!MI->getOperand(i).isReg() ||
@@ -1802,26 +2007,53 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
// Update all the operands so they have the same type.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
- if (!MI->getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ MachineOperand &Op = MI->getOperand(I);
+ if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
continue;
unsigned DstReg = MRI.createVirtualRegister(RC);
- MachineBasicBlock *InsertBB;
- MachineBasicBlock::iterator Insert;
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
- InsertBB = MI->getParent();
- Insert = MI;
- } else {
- // MI is a PHI instruction.
- InsertBB = MI->getOperand(i + 1).getMBB();
- Insert = InsertBB->getFirstTerminator();
+
+ // MI is a PHI instruction.
+ MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
+ MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
+
+ BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
+ .addOperand(Op);
+ Op.setReg(DstReg);
+ }
+ }
+
+ // REG_SEQUENCE doesn't really require operand legalization, but if one has a
+ // VGPR dest type and SGPR sources, insert copies so all operands are
+ // VGPRs. This seems to help operand folding / the register coalescer.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0);
+ if (RI.hasVGPRs(DstRC)) {
+ // Update all the operands so they are VGPR register classes. These may
+ // not be the same register class because REG_SEQUENCE supports mixing
+ // subregister index types e.g. sub0_sub1 + sub2 + sub3
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ MachineOperand &Op = MI->getOperand(I);
+ if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ continue;
+
+ const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
+ const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
+ if (VRC == OpRC)
+ continue;
+
+ unsigned DstReg = MRI.createVirtualRegister(VRC);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
+ .addOperand(Op);
+
+ Op.setReg(DstReg);
+ Op.setIsKill();
}
- BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
- get(AMDGPU::COPY), DstReg)
- .addOperand(MI->getOperand(i));
- MI->getOperand(i).setReg(DstReg);
}
+
+ return;
}
// Legalize INSERT_SUBREG
@@ -1858,15 +2090,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
MachineBasicBlock &MBB = *MI->getParent();
- // Extract the ptr from the resource descriptor.
-
- // SRsrcPtrLo = srsrc:sub0
- unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
- // SRsrcPtrHi = srsrc:sub1
- unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
+ // Extract the ptr from the resource descriptor.
+ unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
// Create an empty resource descriptor
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -1891,80 +2118,112 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
.addImm(RsrcDataFormat >> 32);
// NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned NewVAddrLo;
- unsigned NewVAddrHi;
if (VAddr) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
- NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
- NewVAddrLo)
- .addReg(SRsrcPtrLo)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
-
- // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
- NewVAddrHi)
- .addReg(SRsrcPtrHi)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
- .addReg(AMDGPU::VCC, RegState::Implicit);
-
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+
+ // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
} else {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
+ assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
+ < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ "FIXME: Need to emit flat atomics here");
+
MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
-
- // Create the new instruction.
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
- MachineInstr *Addr64 =
- BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // tfe
+
+ // Atomics rith return have have an additional tied operand and are
+ // missing some of the special bits.
+ MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in);
+ MachineInstr *Addr64;
+
+ if (!VDataIn) {
+ // Regular buffer load / store.
+ MachineInstrBuilder MIB
+ = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*SRsrc)
+ .addOperand(*SOffset)
+ .addOperand(*Offset);
+
+ // Atomics do not have this operand.
+ if (const MachineOperand *GLC
+ = getNamedOperand(*MI, AMDGPU::OpName::glc)) {
+ MIB.addImm(GLC->getImm());
+ }
+
+ MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
+
+ if (const MachineOperand *TFE
+ = getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
+ MIB.addImm(TFE->getImm());
+ }
+
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ Addr64 = MIB;
+ } else {
+ // Atomics with return.
+ Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addOperand(*VDataIn)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*SRsrc)
+ .addOperand(*SOffset)
+ .addOperand(*Offset)
+ .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ }
MI->removeFromParent();
MI = Addr64;
- NewVAddrLo = SRsrcPtrLo;
- NewVAddrHi = SRsrcPtrHi;
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addImm(AMDGPU::sub1);
+
VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
}
- // NewVaddr = {NewVaddrHi, NewVaddrLo}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewVAddr)
- .addReg(NewVAddrLo)
- .addImm(AMDGPU::sub0)
- .addReg(NewVAddrHi)
- .addImm(AMDGPU::sub1);
-
-
// Update the instruction to use NewVaddr
VAddr->setReg(NewVAddr);
// Update the instruction to use NewSRsrc
@@ -2028,53 +2287,64 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
- .addOperand(*SOff)
- .addImm(HalfSize);
- Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
+ .addReg(SOff->getReg(), 0, SOff->getSubReg())
+ .addImm(HalfSize);
+ Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
.addReg(SBase->getReg(), getKillRegState(IsKill),
SBase->getSubReg())
.addReg(OffsetSGPR);
}
unsigned SubLo, SubHi;
+ const TargetRegisterClass *NewDstRC;
switch (HalfSize) {
case 4:
SubLo = AMDGPU::sub0;
SubHi = AMDGPU::sub1;
+ NewDstRC = &AMDGPU::VReg_64RegClass;
break;
case 8:
SubLo = AMDGPU::sub0_sub1;
SubHi = AMDGPU::sub2_sub3;
+ NewDstRC = &AMDGPU::VReg_128RegClass;
break;
case 16:
SubLo = AMDGPU::sub0_sub1_sub2_sub3;
SubHi = AMDGPU::sub4_sub5_sub6_sub7;
+ NewDstRC = &AMDGPU::VReg_256RegClass;
break;
case 32:
SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
+ NewDstRC = &AMDGPU::VReg_512RegClass;
break;
default:
llvm_unreachable("Unhandled HalfSize");
}
- BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
- .addOperand(MI->getOperand(0))
- .addReg(RegLo)
- .addImm(SubLo)
- .addReg(RegHi)
- .addImm(SubHi);
+ unsigned OldDst = MI->getOperand(0).getReg();
+ unsigned NewDst = MRI.createVirtualRegister(NewDstRC);
+
+ MRI.replaceRegWith(OldDst, NewDst);
+
+ BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDst)
+ .addReg(RegLo)
+ .addImm(SubLo)
+ .addReg(RegHi)
+ .addImm(SubHi);
}
-void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
+void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI,
+ MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const {
MachineBasicBlock *MBB = MI->getParent();
- switch (MI->getOpcode()) {
- case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR:
- case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: {
+ int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass;
+ switch(RI.getRegClass(DstRCID)->getSize()) {
+ case 4:
+ case 8:
+ case 16: {
unsigned NewOpcode = getVALUOp(*MI);
unsigned RegOffset;
unsigned ImmOffset;
@@ -2118,53 +2388,55 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
.addImm(RsrcDataFormat >> 32);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
- .addReg(DWord0)
- .addImm(AMDGPU::sub0)
- .addReg(DWord1)
- .addImm(AMDGPU::sub1)
- .addReg(DWord2)
- .addImm(AMDGPU::sub2)
- .addReg(DWord3)
- .addImm(AMDGPU::sub3);
- MI->setDesc(get(NewOpcode));
- if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(SRsrc);
- } else {
- MI->getOperand(2).ChangeToRegister(SRsrc, false);
- }
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
-
- const TargetRegisterClass *NewDstRC =
- RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
-
- unsigned DstReg = MI->getOperand(0).getReg();
+ .addReg(DWord0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DWord1)
+ .addImm(AMDGPU::sub1)
+ .addReg(DWord2)
+ .addImm(AMDGPU::sub2)
+ .addReg(DWord3)
+ .addImm(AMDGPU::sub3);
+
+ const MCInstrDesc &NewInstDesc = get(NewOpcode);
+ const TargetRegisterClass *NewDstRC
+ = RI.getRegClass(NewInstDesc.OpInfo[0].RegClass);
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ unsigned DstReg = MI->getOperand(0).getReg();
MRI.replaceRegWith(DstReg, NewDstReg);
+
+ MachineInstr *NewInst =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), NewInstDesc, NewDstReg)
+ .addOperand(MI->getOperand(1)) // sbase
+ .addReg(SRsrc)
+ .addImm(0)
+ .addImm(ImmOffset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MI->eraseFromParent();
+
+ legalizeOperands(NewInst);
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
break;
}
- case AMDGPU::S_LOAD_DWORDX8_IMM:
- case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+ case 32: {
MachineInstr *Lo, *Hi;
splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
MI->eraseFromParent();
- moveSMRDToVALU(Lo, MRI);
- moveSMRDToVALU(Hi, MRI);
+ moveSMRDToVALU(Lo, MRI, Worklist);
+ moveSMRDToVALU(Hi, MRI, Worklist);
break;
}
- case AMDGPU::S_LOAD_DWORDX16_IMM:
- case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+ case 64: {
MachineInstr *Lo, *Hi;
splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
MI->eraseFromParent();
- moveSMRDToVALU(Lo, MRI);
- moveSMRDToVALU(Hi, MRI);
+ moveSMRDToVALU(Lo, MRI, Worklist);
+ moveSMRDToVALU(Hi, MRI, Worklist);
break;
}
}
@@ -2185,51 +2457,28 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// Handle some special cases
switch (Opcode) {
default:
- if (isSMRD(Inst->getOpcode())) {
- moveSMRDToVALU(Inst, MRI);
+ if (isSMRD(*Inst)) {
+ moveSMRDToVALU(Inst, MRI, Worklist);
+ continue;
}
break;
- case AMDGPU::S_MOV_B64: {
- DebugLoc DL = Inst->getDebugLoc();
-
- // If the source operand is a register we can replace this with a
- // copy.
- if (Inst->getOperand(1).isReg()) {
- MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
- .addOperand(Inst->getOperand(0))
- .addOperand(Inst->getOperand(1));
- Worklist.push_back(Copy);
- } else {
- // Otherwise, we need to split this into two movs, because there is
- // no 64-bit VALU move instruction.
- unsigned Reg = Inst->getOperand(0).getReg();
- unsigned Dst = split64BitImm(Worklist,
- Inst,
- MRI,
- MRI.getRegClass(Reg),
- Inst->getOperand(1));
- MRI.replaceRegWith(Reg, Dst);
- }
- Inst->eraseFromParent();
- continue;
- }
case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Inst->eraseFromParent();
continue;
@@ -2281,6 +2530,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
break;
+ case AMDGPU::S_ABS_I32:
+ lowerScalarAbs(Worklist, Inst);
+ Inst->eraseFromParent();
+ continue;
+
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
@@ -2319,7 +2573,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst->addOperand(MachineOperand::CreateImm(0));
}
- addDescImplicitUseDef(NewDesc, Inst);
+ Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
@@ -2337,27 +2591,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
// Update the destination register class.
-
- const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
-
- switch (Opcode) {
- // For target instructions, getOpRegClass just returns the virtual
- // register class associated with the operand, so we need to find an
- // equivalent VGPR register class in order to move the instruction to the
- // VALU.
- case AMDGPU::COPY:
- case AMDGPU::PHI:
- case AMDGPU::REG_SEQUENCE:
- case AMDGPU::INSERT_SUBREG:
- if (RI.hasVGPRs(NewDstRC))
- continue;
- NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
- if (!NewDstRC)
- continue;
- break;
- default:
- break;
- }
+ const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst);
+ if (!NewDstRC)
+ continue;
unsigned DstReg = Inst->getOperand(0).getReg();
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
@@ -2366,13 +2602,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// Legalize the operands
legalizeOperands(Inst);
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
- E = MRI.use_end(); I != E; ++I) {
- MachineInstr &UseMI = *I->getParent();
- if (!canReadVGPR(UseMI, I.getOperandNo())) {
- Worklist.push_back(&UseMI);
- }
- }
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
}
}
@@ -2390,6 +2620,30 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::VGPR_32RegClass;
}
+void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
+ .addImm(0)
+ .addReg(Src.getReg());
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
+ .addReg(Src.getReg())
+ .addReg(TmpReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
void SIInstrInfo::splitScalar64BitUnaryOp(
SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst,
@@ -2414,20 +2668,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+ const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
- MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
- MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -2436,10 +2691,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
+ // We don't need to legalizeOperands here because for a single operand, src0
+ // will support any kind of input.
+
+ // Move all users of this moved value.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBinaryOp(
@@ -2474,9 +2730,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
AMDGPU::sub0, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+ const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0)
.addOperand(SrcReg1Sub0);
@@ -2486,12 +2743,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub1, Src1SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1)
.addOperand(SrcReg1Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -2502,8 +2759,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
// Try to legalize the operands in case we need to swap the order to keep it
// valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
+ legalizeOperands(LoHalf);
+ legalizeOperands(HiHalf);
+
+ // Move all users of this moved vlaue.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
@@ -2532,18 +2792,19 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
AMDGPU::sub1, SrcSubRC);
- MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+ BuildMI(MBB, MII, DL, InstDesc, MidReg)
.addOperand(SrcRegSub0)
.addImm(0);
- MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+ BuildMI(MBB, MII, DL, InstDesc, ResultReg)
.addOperand(SrcRegSub1)
.addReg(MidReg);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
- Worklist.push_back(First);
- Worklist.push_back(Second);
+ // We don't need to legalize operands here. src0 for etiher instruction can be
+ // an SGPR, and the second input is unused or determined here.
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
@@ -2587,6 +2848,7 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
return;
}
@@ -2605,33 +2867,53 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
- MachineInstr *Inst) const {
- // Add the implict and explicit register definitions.
- if (NewDesc.ImplicitUses) {
- for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitUses[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+void SIInstrInfo::addUsersToMoveToVALUWorklist(
+ unsigned DstReg,
+ MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I->getParent();
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
}
}
+}
- if (NewDesc.ImplicitDefs) {
- for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitDefs[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
- }
+const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
+ const MachineInstr &Inst) const {
+ const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
+
+ switch (Inst.getOpcode()) {
+ // For target instructions, getOpRegClass just returns the virtual register
+ // class associated with the operand, so we need to find an equivalent VGPR
+ // register class in order to move the instruction to the VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::INSERT_SUBREG:
+ if (RI.hasVGPRs(NewDstRC))
+ return nullptr;
+
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ return nullptr;
+ return NewDstRC;
+ default:
+ return NewDstRC;
}
}
+// Find the one SGPR operand we are allowed to use.
unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
int OpIndices[3]) const {
- const MCInstrDesc &Desc = get(MI->getOpcode());
+ const MCInstrDesc &Desc = MI->getDesc();
// Find the one SGPR operand we are allowed to use.
- unsigned SGPRReg = AMDGPU::NoRegister;
-
+ //
// First we need to consider the instruction's operand requirements before
// legalizing. Some operands are required to be SGPRs, such as implicit uses
// of VCC, but we are still bound by the constant bus requirement to only use
@@ -2639,17 +2921,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
//
// If the operand's class is an SGPR, we can never move it.
- for (const MachineOperand &MO : MI->implicit_operands()) {
- // We only care about reads.
- if (MO.isDef())
- continue;
-
- if (MO.getReg() == AMDGPU::VCC)
- return AMDGPU::VCC;
-
- if (MO.getReg() == AMDGPU::FLAT_SCR)
- return AMDGPU::FLAT_SCR;
- }
+ unsigned SGPRReg = findImplicitSGPRRead(*MI);
+ if (SGPRReg != AMDGPU::NoRegister)
+ return SGPRReg;
unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -2660,15 +2934,22 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
break;
const MachineOperand &MO = MI->getOperand(Idx);
- if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
- SGPRReg = MO.getReg();
+ if (!MO.isReg())
+ continue;
- if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
- UsedSGPRs[i] = MO.getReg();
- }
+ // Is this operand statically required to be an SGPR based on the operand
+ // constraints?
+ const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
+ bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
+ if (IsRequiredSGPR)
+ return MO.getReg();
- if (SGPRReg != AMDGPU::NoRegister)
- return SGPRReg;
+ // If this could be a VGPR or an SGPR, Check the dynamic register class.
+ unsigned Reg = MO.getReg();
+ const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
+ if (RI.isSGPRClass(RegRC))
+ UsedSGPRs[i] = Reg;
+ }
// We don't have a required SGPR operand, so we have a bit more freedom in
// selecting operands to move.
@@ -2680,6 +2961,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
// V_FMA_F32 v0, s0, s0, s0 -> No moves
// V_FMA_F32 v0, s0, s1, s0 -> Move s1
+ // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
+ // prefer those.
+
if (UsedSGPRs[0] != AMDGPU::NoRegister) {
if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
SGPRReg = UsedSGPRs[0];
@@ -2720,7 +3004,7 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
getIndirectIndexBegin(*MBB->getParent()));
- return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC_V1))
.addOperand(I->getOperand(0))
.addOperand(I->getOperand(1))
.addReg(IndirectBaseReg)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 5053786..307ef67 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -39,14 +39,11 @@ private:
unsigned SubIdx,
const TargetRegisterClass *SubRC) const;
- unsigned split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const;
-
void swapOperands(MachineBasicBlock::iterator Inst) const;
+ void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const;
+
void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst, unsigned Opcode) const;
@@ -58,13 +55,24 @@ private:
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst) const;
- void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
+ void addUsersToMoveToVALUWorklist(
+ unsigned Reg, MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const;
+
+ const TargetRegisterClass *
+ getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
MachineInstr *MIb) const;
unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
+protected:
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx0,
+ unsigned OpIdx1) const override;
+
public:
explicit SIInstrInfo(const AMDGPUSubtarget &st);
@@ -117,17 +125,14 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
+
+ LLVM_READONLY
int commuteOpcode(const MachineInstr &MI) const;
- MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI = false) const override;
bool findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
- bool isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA = nullptr) const;
-
bool areMemAccessesTriviallyDisjoint(
MachineInstr *MIa, MachineInstr *MIb,
AliasAnalysis *AA = nullptr) const override;
@@ -137,8 +142,6 @@ public:
unsigned DstReg, unsigned SrcReg) const override;
bool isMov(unsigned Opcode) const override;
- bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
-
bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const final;
@@ -148,78 +151,154 @@ public:
MachineBasicBlock::iterator &MI,
LiveVariables *LV) const override;
+ static bool isSALU(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SALU;
+ }
+
bool isSALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
+ static bool isVALU(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VALU;
+ }
+
bool isVALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VALU;
}
+ static bool isSOP1(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
+ }
+
bool isSOP1(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOP1;
}
+ static bool isSOP2(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
+ }
+
bool isSOP2(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOP2;
}
+ static bool isSOPC(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
+ }
+
bool isSOPC(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOPC;
}
+ static bool isSOPK(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
+ }
+
bool isSOPK(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOPK;
}
+ static bool isSOPP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
+ }
+
bool isSOPP(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOPP;
}
+ static bool isVOP1(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
+ }
+
bool isVOP1(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP1;
}
+ static bool isVOP2(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
+ }
+
bool isVOP2(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP2;
}
+ static bool isVOP3(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
+ }
+
bool isVOP3(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
}
+ static bool isVOPC(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
+ }
+
bool isVOPC(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOPC;
}
+ static bool isMUBUF(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
+ }
+
bool isMUBUF(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
}
+ static bool isMTBUF(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
+ }
+
bool isMTBUF(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
}
+ static bool isSMRD(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
+ }
+
bool isSMRD(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SMRD;
}
+ static bool isDS(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::DS;
+ }
+
bool isDS(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::DS;
}
+ static bool isMIMG(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
+ }
+
bool isMIMG(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}
+ static bool isFLAT(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
+ }
+
bool isFLAT(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
+ static bool isWQM(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::WQM;
+ }
+
bool isWQM(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::WQM;
}
+ static bool isVGPRSpill(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
+ }
+
bool isVGPRSpill(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
}
@@ -302,6 +381,26 @@ public:
bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
const MachineOperand *MO = nullptr) const;
+ /// \brief Check if \p MO would be a valid operand for the given operand
+ /// definition \p OpInfo. Note this does not attempt to validate constant bus
+ /// restrictions (e.g. literal constant usage).
+ bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const;
+
+ /// \brief Check if \p MO (a register operand) is a legal register for the
+ /// given operand description.
+ bool isLegalRegOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const;
+
+ /// \brief Legalize operands in \p MI by either commuting it or inserting a
+ /// copy of src1.
+ void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr *MI) const;
+
+ /// \brief Fix operands in \p MI to satisfy constant bus requirements.
+ void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr *MI) const;
+
/// \brief Legalize all operands in this instruction. This function may
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr *MI) const;
@@ -312,7 +411,8 @@ public:
unsigned HalfImmOp, unsigned HalfSGPROp,
MachineInstr *&Lo, MachineInstr *&Hi) const;
- void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
+ void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const;
/// \brief Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
@@ -341,29 +441,49 @@ public:
void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
unsigned SavReg, unsigned IndexReg) const;
- void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
+ void insertWaitStates(MachineBasicBlock::iterator MI, int Count) const;
/// \brief Returns the operand named \p Op. If \p MI does not have an
/// operand named \c Op, this function returns nullptr.
+ LLVM_READONLY
MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
+ LLVM_READONLY
const MachineOperand *getNamedOperand(const MachineInstr &MI,
unsigned OpName) const {
return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
}
+ /// Get required immediate operand
+ int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
+ return MI.getOperand(Idx).getImm();
+ }
+
uint64_t getDefaultRsrcDataFormat() const;
uint64_t getScratchRsrcWords23() const;
};
namespace AMDGPU {
-
+ LLVM_READONLY
int getVOPe64(uint16_t Opcode);
+
+ LLVM_READONLY
int getVOPe32(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteRev(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteOrig(uint16_t Opcode);
+
+ LLVM_READONLY
int getAddr64Inst(uint16_t Opcode);
+
+ LLVM_READONLY
int getAtomicRetOp(uint16_t Opcode);
+
+ LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8d8110b..10f2adde 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
def isCI : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate<"FeatureGCN3Encoding">;
+def isCIOnly : Predicate<"Subtarget->getGeneration() =="
+ "AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate <"FeatureSeaIslands">;
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
@@ -69,6 +69,15 @@ class sopk <bits<5> si, bits<5> vi = si> {
field bits<5> VI = vi;
}
+// Specify an SMRD opcode for SI and SMEM opcode for VI
+
+// FIXME: This should really be bits<5> si, Tablegen crashes if
+// parameter default value is other parameter with different bit size
+class smrd<bits<8> si, bits<8> vi = si> {
+ field bits<5> SI = si{4-0};
+ field bits<8> VI = vi;
+}
+
// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
// in AMDGPUInstrInfo.cpp
def SISubtarget {
@@ -121,9 +130,20 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
def SIconstdata_ptr : SDNode<
- "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
+ "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, i64>,
+ SDTCisVT<0, i64>]>
>;
+def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(cast<LoadSDNode>(N)) ||
+ isConstantLoad(cast<LoadSDNode>(N), -1);
+}]>;
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(cast<LoadSDNode>(N), -1) &&
+ static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
+}]>;
+
//===----------------------------------------------------------------------===//
// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
// to be glued to the memory instructions.
@@ -328,9 +348,9 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
- if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
+ const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+ if (RC && SIRI->isSGPRClass(RC))
return true;
- }
}
return false;
}]>;
@@ -354,6 +374,8 @@ def sopp_brtarget : Operand<OtherVT> {
let ParserMatchClass = SoppBrTarget;
}
+def const_ga : Operand<iPTR>;
+
include "SIInstrFormats.td"
include "VIInstrFormats.td"
@@ -393,7 +415,7 @@ def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
class GLCBaseMatchClass <string parser> : AsmOperandClass {
let Name = "GLC"#parser;
let PredicateMethod = "isImm";
- let ParserMethod = parser;
+ let ParserMethod = parser;
let RenderMethod = "addImmOperands";
}
@@ -436,6 +458,17 @@ def ClampMatchClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+class SMRDOffsetBaseMatchClass <string predicate> : AsmOperandClass {
+ let Name = "SMRDOffset"#predicate;
+ let PredicateMethod = predicate;
+ let RenderMethod = "addImmOperands";
+}
+
+def SMRDOffsetMatchClass : SMRDOffsetBaseMatchClass <"isSMRDOffset">;
+def SMRDLiteralOffsetMatchClass : SMRDOffsetBaseMatchClass <
+ "isSMRDLiteralOffset"
+>;
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : Operand<i1> {
@@ -510,6 +543,16 @@ def ClampMod : Operand <i1> {
let ParserMatchClass = ClampMatchClass;
}
+def smrd_offset : Operand <i32> {
+ let PrintMethod = "printU32ImmOperand";
+ let ParserMatchClass = SMRDOffsetMatchClass;
+}
+
+def smrd_literal_offset : Operand <i32> {
+ let PrintMethod = "printU32ImmOperand";
+ let ParserMatchClass = SMRDLiteralOffsetMatchClass;
+}
+
} // End OperandType = "OPERAND_IMMEDIATE"
def VOPDstS64 : VOPDstOperand <SReg_64>;
@@ -528,6 +571,13 @@ def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+def SMRDBufferSgpr : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
+
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
@@ -717,19 +767,6 @@ class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
let AssemblerPredicates = [isVI];
}
-multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
- def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), pattern>;
-
- def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]">;
-
- def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]">;
-}
-
multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
list<dag> pattern> {
@@ -758,8 +795,10 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
- op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $src0, $src1", []>;
+ op, (outs), (ins rc:$src0, rc:$src1),
+ opName#" $src0, $src1", []> {
+ let Defs = [SCC];
+}
class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -812,15 +851,20 @@ multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
}
multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
- def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), pattern>;
+ def "" : SOPK_Pseudo <opName, (outs),
+ (ins SReg_32:$src0, u16imm:$src1), pattern> {
+ let Defs = [SCC];
+ }
+
- let DisableEncoding = "$dst" in {
- def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+ def _si : SOPK_Real_si <op, opName, (outs),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
+ let Defs = [SCC];
+ }
- def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+ def _vi : SOPK_Real_vi <op, opName, (outs),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
+ let Defs = [SCC];
}
}
@@ -868,35 +912,68 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
}
class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
- string asm> :
- SMRD <outs, ins, asm, []>,
+ string asm, list<dag> pattern = []> :
+ SMRD <outs, ins, asm, pattern>,
SMEMe_vi <op, imm>,
SIMCInstr<opName, SISubtarget.VI> {
let AssemblerPredicates = [isVI];
}
-multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
+multiclass SMRD_m <smrd op, string opName, bit imm, dag outs, dag ins,
string asm, list<dag> pattern> {
def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
- def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
+ def _si : SMRD_Real_si <op.SI, opName, imm, outs, ins, asm>;
// glc is only applicable to scalar stores, which are not yet
// implemented.
let glc = 0 in {
- def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+ def _vi : SMRD_Real_vi <op.VI, opName, imm, outs, ins, asm>;
}
}
-multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
+multiclass SMRD_Inval <smrd op, string opName,
+ SDPatternOperator node> {
+ let hasSideEffects = 1, mayStore = 1 in {
+ def "" : SMRD_Pseudo <opName, (outs), (ins), [(node)]>;
+
+ let sbase = 0, offset = 0 in {
+ let sdst = 0 in {
+ def _si : SMRD_Real_si <op.SI, opName, 0, (outs), (ins), opName>;
+ }
+
+ let glc = 0, sdata = 0 in {
+ def _vi : SMRD_Real_vi <op.VI, opName, 0, (outs), (ins), opName>;
+ }
+ }
+ }
+}
+
+class SMEM_Inval <bits<8> op, string opName, SDPatternOperator node> :
+ SMRD_Real_vi<op, opName, 0, (outs), (ins), opName, [(node)]> {
+ let hasSideEffects = 1;
+ let mayStore = 1;
+ let sbase = 0;
+ let sdata = 0;
+ let glc = 0;
+ let offset = 0;
+}
+
+multiclass SMRD_Helper <smrd op, string opName, RegisterClass baseClass,
RegisterClass dstClass> {
defm _IMM : SMRD_m <
op, opName#"_IMM", 1, (outs dstClass:$dst),
- (ins baseClass:$sbase, u32imm:$offset),
+ (ins baseClass:$sbase, smrd_offset:$offset),
opName#" $dst, $sbase, $offset", []
>;
+ def _IMM_ci : SMRD <
+ (outs dstClass:$dst), (ins baseClass:$sbase, smrd_literal_offset:$offset),
+ opName#" $dst, $sbase, $offset", []>, SMRD_IMMe_ci <op.SI> {
+ let AssemblerPredicates = [isCIOnly];
+ }
+
defm _SGPR : SMRD_m <
op, opName#"_SGPR", 0, (outs dstClass:$dst),
(ins baseClass:$sbase, SReg_32:$soff),
@@ -922,11 +999,12 @@ def InputModsNoDefault : Operand <i32> {
let ParserMatchClass = InputModsMatchClass;
}
-class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
int ret =
- !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
+ !if (!eq(Src0.Value, untyped.Value), 0,
+ !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
!if (!eq(Src2.Value, untyped.Value), 2, // VOP2
- 3)); // VOP3
+ 3))); // VOP3
}
// Returns the register class to use for the destination of VOP[123C]
@@ -934,28 +1012,37 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
class getVALUDstForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
- VOPDstOperand<SReg_64>)); // else VT == i1
+ !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
+ VOPDstOperand<SReg_64>))); // else VT == i1
}
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+ RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
}
// Returns the register class to use for source 1 of VOP[12C] for the
// given VT.
class getVOPSrc1ForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
+ RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
}
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
+ RegisterOperand ret =
+ !if(!eq(VT.Size, 64),
+ VCSrc_64,
+ !if(!eq(VT.Value, i1.Value),
+ SCSrc_64,
+ VCSrc_32
+ )
+ );
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
+// XXX - do f16 instructions?
class hasModifiers<ValueType SrcVT> {
bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1, 0));
@@ -1009,17 +1096,20 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
// Returns the assembly string for the inputs and outputs of a VOP[12C]
// instruction. This does not add the _e32 suffix, so it can be reused
// by getAsm64.
-class getAsm32 <int NumSrcArgs> {
+class getAsm32 <bit HasDst, int NumSrcArgs> {
+ string dst = "$dst";
+ string src0 = ", $src0";
string src1 = ", $src1";
string src2 = ", $src2";
- string ret = "$dst, $src0"#
- !if(!eq(NumSrcArgs, 1), "", src1)#
- !if(!eq(NumSrcArgs, 3), src2, "");
+ string ret = !if(HasDst, dst, "") #
+ !if(!eq(NumSrcArgs, 1), src0, "") #
+ !if(!eq(NumSrcArgs, 2), src0#src1, "") #
+ !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
}
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
-class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers> {
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
@@ -1027,11 +1117,10 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
string ret =
!if(!eq(HasModifiers, 0),
- getAsm32<NumSrcArgs>.ret,
+ getAsm32<HasDst, NumSrcArgs>.ret,
"$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
-
class VOPProfile <list<ValueType> _ArgVT> {
field list<ValueType> ArgVT = _ArgVT;
@@ -1047,29 +1136,38 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
- field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+ field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
+ field bit HasDst32 = HasDst;
+ field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
field bit HasModifiers = hasModifiers<Src0VT>.ret;
- field dag Outs = (outs DstRC:$dst);
+ field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs));
+
+ // VOP3b instructions are a special case with a second explicit
+ // output. This is manually overridden for them.
+ field dag Outs32 = Outs;
+ field dag Outs64 = Outs;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
- field string Asm32 = getAsm32<NumSrcArgs>.ret;
- field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+ field string Asm32 = getAsm32<HasDst, NumSrcArgs>.ret;
+ field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers>.ret;
}
// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
// for the instruction patterns to work.
-def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
-def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
-def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f16, i32, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i32, f16, untyped, untyped]>;
-def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
-def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>;
def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
+
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -1087,25 +1185,76 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+ let Asm32 = "$dst, vcc, $src0, $src1";
+ let Asm64 = "$dst, $sdst, $src0, $src1";
+ let Outs32 = (outs DstRC:$dst);
+ let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+}
+
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+ // We use VCSrc_32 to exclude literal constants, even though the
+ // encoding normally allows them since the implicit VCC use means
+ // using one would always violate the constant bus
+ // restriction. SGPRs are still allowed because it should
+ // technically be possible to use VCC again as src0.
let Src0RC32 = VCSrc_32;
+ let Asm32 = "$dst, vcc, $src0, $src1, vcc";
+ let Asm64 = "$dst, $sdst, $src0, $src1, $src2";
+ let Outs32 = (outs DstRC:$dst);
+ let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+
+ // Suppress src2 implied by type since the 32-bit encoding uses an
+ // implicit VCC use.
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
}
-def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
- let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = "$dst, $src0_modifiers, $src1";
+class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+ let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod";
+}
+
+def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
+ // FIXME: Hack to stop printing _e64
+ let DstRC = RegisterOperand<VGPR_32>;
}
-def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
+def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
+ // FIXME: Hack to stop printing _e64
+ let DstRC = RegisterOperand<VReg_64>;
+}
+
+// VOPC instructions are a special case because for the 32-bit
+// encoding, we want to display the implicit vcc write as if it were
+// an explicit $dst.
+class VOPC_Profile<ValueType vt0, ValueType vt1 = vt0> : VOPProfile <[i1, vt0, vt1, untyped]> {
+ let Asm32 = "vcc, $src0, $src1";
+ // The destination for 32-bit encoding is implicit.
+ let HasDst32 = 0;
+}
+
+class VOPC_Class_Profile<ValueType vt> : VOPC_Profile<vt, i32> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
let Asm64 = "$dst, $src0_modifiers, $src1";
}
+def VOPC_I1_F32_F32 : VOPC_Profile<f32>;
+def VOPC_I1_F64_F64 : VOPC_Profile<f64>;
+def VOPC_I1_I32_I32 : VOPC_Profile<i32>;
+def VOPC_I1_I64_I64 : VOPC_Profile<i64>;
+
+def VOPC_I1_F32_I32 : VOPC_Class_Profile<f32>;
+def VOPC_I1_F64_I32 : VOPC_Class_Profile<f64>;
+
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
let Asm64 = "$dst, $src0, $src1, $src2";
}
@@ -1119,13 +1268,60 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
HasModifiers>.ret;
- let Asm32 = getAsm32<2>.ret;
- let Asm64 = getAsm64<2, HasModifiers>.ret;
+ let Asm32 = getAsm32<1, 2>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers>.ret;
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+class SIInstAlias <string asm, Instruction inst, VOPProfile p> :
+ InstAlias <asm, (inst)>, PredicateControl {
+
+ field bit isCompare;
+ field bit isCommutable;
+
+ let ResultInst =
+ !if (p.HasDst32,
+ !if (!eq(p.NumSrcArgs, 0),
+ // 1 dst, 0 src
+ (inst p.DstRC:$dst),
+ !if (!eq(p.NumSrcArgs, 1),
+ // 1 dst, 1 src
+ (inst p.DstRC:$dst, p.Src0RC32:$src0),
+ !if (!eq(p.NumSrcArgs, 2),
+ // 1 dst, 2 src
+ (inst p.DstRC:$dst, p.Src0RC32:$src0, p.Src1RC32:$src1),
+ // else - unreachable
+ (inst)))),
+ // else
+ !if (!eq(p.NumSrcArgs, 2),
+ // 0 dst, 2 src
+ (inst p.Src0RC32:$src0, p.Src1RC32:$src1),
+ !if (!eq(p.NumSrcArgs, 1),
+ // 0 dst, 1 src
+ (inst p.Src0RC32:$src1),
+ // else
+ // 0 dst, 0 src
+ (inst))));
+}
+
+class SIInstAliasSI <string asm, string op_name, VOPProfile p> :
+ SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_si"), p> {
+ let AssemblerPredicate = SIAssemblerPredicate;
+}
+
+class SIInstAliasVI <string asm, string op_name, VOPProfile p> :
+ SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_vi"), p> {
+ let AssemblerPredicates = [isVI];
+}
+
+multiclass SIInstAliasBuilder <string asm, VOPProfile p> {
+
+ def : SIInstAliasSI <asm, NAME, p>;
+
+ def : SIInstAliasVI <asm, NAME, p>;
+}
class VOP <string opName> {
string OpName = opName;
@@ -1165,20 +1361,22 @@ class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
let AssemblerPredicates = [isVI];
}
-multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
- def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
+ string asm = opName#p.Asm32> {
+ def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
- def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
+
+ def _vi : VOP1_Real_vi <opName, op, p.Outs, p.Ins32, asm>;
- def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
}
-multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
- def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
+ string asm = opName#p.Asm32> {
+
+ def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
- def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
}
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
@@ -1202,22 +1400,24 @@ class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
let AssemblerPredicates = [isVI];
}
-multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOp> {
- def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
+ string revOp> {
+
+ def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
}
-multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOp> {
- def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
+ string revOp> {
+
+ def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
- def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+ def _vi : VOP2_Real_vi <opName, op, p.Outs32, p.Ins32, p.Asm32>;
}
@@ -1250,6 +1450,9 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
MnemonicAlias<opName#"_e64", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
+
+ field bit vdst;
+ field bit src0;
}
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
@@ -1295,22 +1498,6 @@ multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
HasMods>;
}
-// VOP3_m without source modifiers
-multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, int NumSrcArgs, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
- let src0_modifiers = 0,
- src1_modifiers = 0,
- src2_modifiers = 0,
- clamp = 0,
- omod = 0 in {
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
- }
-}
-
multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, bit HasMods = 1> {
@@ -1335,7 +1522,7 @@ multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+ bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
@@ -1349,7 +1536,7 @@ multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+ bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
@@ -1360,54 +1547,41 @@ multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
// No VI instruction. This class is for SI only.
}
-// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
-// option of implicit vcc use?
-multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- // The VOP2 variant puts the carry out into VCC, the VOP3 variant
- // can write it into any SGPR. We currently don't use the carry out,
- // so for now hardcode it to VCC as well.
- let sdst = SIOperand.VCC, Defs = [VCC] in {
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
- } // End sdst = SIOperand.VCC, Defs = [VCC]
-}
-
-multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
+// instead of an implicit VCC as in the VOP2b format.
+multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit useSrc2Input = 0> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
+ VOP3DisableFields<1, useSrc2Input, HasMods>;
def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
+ VOP3DisableFields<1, useSrc2Input, HasMods>;
}
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec, string revOp> {
+ bit HasMods, bit defExec,
+ string revOp, list<SchedReadWrite> sched> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
+ }
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
}
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
}
}
@@ -1432,32 +1606,28 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
}
}
-multiclass VOP1_Helper <vop1 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- bit HasMods> {
+multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
+ list<dag> pat64> {
- defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
+ defm _e32 : VOP1_m <op, opName, p, pat32>;
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
+ defm _e64 : VOP3_1_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+ p.HasModifiers>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP1_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
- P.HasModifiers
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))])
>;
multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> {
- defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
+ defm _e32 : VOP1SI_m <op, opName, P, []>;
defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
@@ -1467,36 +1637,33 @@ multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
opName, P.HasModifiers>;
}
-multiclass VOP2_Helper <vop2 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
+ list<dag> pat64, string revOp> {
- defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
- >;
+ defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
+
+ defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+ revOp, p.HasModifiers>;
}
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> : VOP2_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp
>;
multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> {
- defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
+
+ defm _e32 : VOP2SI_m <op, opName, P, [], revOp>;
defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
@@ -1508,58 +1675,55 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
opName, revOp, P.HasModifiers>;
}
-multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
+multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
+ list<dag> pat32, list<dag> pat64,
+ string revOp, bit useSGPRInput> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+ let SchedRW = [Write32Bit, WriteSALU] in {
+ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+ defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
+ }
- defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
- >;
+ defm _e64 : VOP3b_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
+ opName, revOp, p.HasModifiers, useSGPRInput>;
+ }
}
multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> : VOP2b_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp, !eq(P.NumSrcArgs, 3)
>;
// A VOP2 instruction that is VOP3-only on VI.
-multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
- defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+multiclass VOP2_VI3_Helper <vop23 op, string opName, VOPProfile p,
+ list<dag> pat32, list<dag> pat64, string revOp> {
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
- revOp, HasMods>;
+ defm _e32 : VOP2SI_m <op, opName, p, pat32, revOp>;
+
+ defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+ revOp, p.HasModifiers>;
}
multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName>
: VOP2_VI3_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp
>;
multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> {
@@ -1583,64 +1747,75 @@ let isCodeGenOnly = 0 in {
} // End isCodeGenOnly = 0
}
-class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
+ SIMCInstr<opName#"_e32", SISubtarget.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
-multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec, string revOpName = ""> {
- def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOPC<op.SI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
- let Defs = !if(DefExec, [EXEC], []);
- let hasSideEffects = DefExec;
- let AssemblerPredicates = [isSICI];
+multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
+ string opName, bit DefExec, VOPProfile p,
+ list<SchedReadWrite> sched,
+ string revOpName = "", string asm = opName#"_e32 "#op_asm,
+ string alias_asm = opName#" "#op_asm> {
+ def "" : VOPC_Pseudo <ins, pattern, opName> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let SchedRW = sched;
}
- def _vi : VOPC<op.VI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
- let Defs = !if(DefExec, [EXEC], []);
- let hasSideEffects = DefExec;
- let AssemblerPredicates = [isVI];
- }
+ let AssemblerPredicates = [isSICI] in {
+ def _si : VOPC<op.SI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let hasSideEffects = DefExec;
+ let SchedRW = sched;
+ }
+
+ } // End AssemblerPredicates = [isSICI]
+
+ let AssemblerPredicates = [isVI] in {
+ def _vi : VOPC<op.VI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let hasSideEffects = DefExec;
+ let SchedRW = sched;
+ }
+
+ } // End AssemblerPredicates = [isVI]
+
+ defm : SIInstAliasBuilder<alias_asm, p>;
}
-multiclass VOPC_Helper <vopc op, string opName,
- dag ins32, string asm32, list<dag> pat32,
- dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec, string revOp> {
- defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+multiclass VOPC_Helper <vopc op, string opName, list<dag> pat32,
+ list<dag> pat64, bit DefExec, string revOp,
+ VOPProfile p, list<SchedReadWrite> sched> {
+ defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>;
+ defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$dst), p.Ins64, opName#p.Asm64, pat64,
+ opName, p.HasModifiers, DefExec, revOp, sched>;
}
// Special case for class instructions which only have modifiers on
// the 1st source operand.
-multiclass VOPC_Class_Helper <vopc op, string opName,
- dag ins32, string asm32, list<dag> pat32,
- dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec, string revOp> {
- defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
-
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>,
+multiclass VOPC_Class_Helper <vopc op, string opName, list<dag> pat32,
+ list<dag> pat64, bit DefExec, string revOp,
+ VOPProfile p, list<SchedReadWrite> sched> {
+ defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
+
+ defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$dst), p.Ins64, opName#p.Asm64, pat64,
+ opName, p.HasModifiers, DefExec, revOp, sched>,
VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
string revOp = opName,
- bit DefExec = 0> : VOPC_Helper <
- op, opName,
- P.Ins32, P.Asm32, [],
- (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+ bit DefExec = 0,
+ list<SchedReadWrite> sched = [Write32Bit]> :
+ VOPC_Helper <
+ op, opName, [],
!if(P.HasModifiers,
[(set i1:$dst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1648,51 +1823,51 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec, revOp
+ DefExec, revOp, P, sched
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
- bit DefExec = 0> : VOPC_Class_Helper <
- op, opName,
- P.Ins32, P.Asm32, [],
- (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+ bit DefExec = 0,
+ list<SchedReadWrite> sched> : VOPC_Class_Helper <
+ op, opName, [],
!if(P.HasModifiers,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec, opName
+ DefExec, opName, P, sched
>;
multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_F32_F32, cond, revOp>;
multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_F64_F64, cond, revOp, 0, [WriteDoubleAdd]>;
multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_I32_I32, cond, revOp>;
multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_I64_I64, cond, revOp, 0, [Write64Bit]>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
PatLeaf cond = COND_NULL,
+ list<SchedReadWrite> sched,
string revOp = "">
- : VOPCInst <op, opName, P, cond, revOp, 1>;
+ : VOPCInst <op, opName, P, cond, revOp, 1, sched>;
multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_F32_F32, COND_NULL, [Write32Bit], revOp>;
multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_F64_F64, COND_NULL, [WriteDoubleAdd], revOp>;
multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_I32_I32, COND_NULL, [Write32Bit], revOp>;
multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_I64_I64, COND_NULL, [Write64Bit], revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
@@ -1700,16 +1875,16 @@ multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F32_I32, 0>;
+ VOPCClassInst <op, opName, VOPC_I1_F32_I32, 0, [Write32Bit]>;
multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F32_I32, 1>;
+ VOPCClassInst <op, opName, VOPC_I1_F32_I32, 1, [Write32Bit]>;
multiclass VOPC_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F64_I32, 0>;
+ VOPCClassInst <op, opName, VOPC_I1_F64_I32, 0, [WriteDoubleAdd]>;
multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F64_I32, 1>;
+ VOPCClassInst <op, opName, VOPC_I1_F64_I32, 1, [WriteDoubleAdd]>;
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
@@ -1761,25 +1936,13 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
3, 1
>;
-multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
- string opName, list<dag> pattern> :
- VOP3b_3_m <
- op, (outs vrc:$vdst, SReg_64:$sdst),
- (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
- InputModsNoDefault:$src1_modifiers, arc:$src1,
- InputModsNoDefault:$src2_modifiers, arc:$src2,
- ClampMod:$clamp, omod:$omod),
- opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
- opName, opName, 1, 1
+multiclass VOP3bInst <vop op, string opName, VOPProfile P, list<dag> pattern = []> :
+ VOP3b_2_3_m <
+ op, P.Outs64, P.Ins64,
+ opName#" "#P.Asm64, pattern,
+ opName, "", 1, 1
>;
-multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-
-multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VGPR_32, VSrc_32, opName, pattern>;
-
-
class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
@@ -1925,12 +2088,14 @@ multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 1>;
+ let hasPostISelHook = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
- let data1 = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ let data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ }
}
}
@@ -1939,11 +2104,13 @@ multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
dag outs = (outs rc:$vdst),
string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 1>;
+ let hasPostISelHook = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ }
}
multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
@@ -2214,7 +2381,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_addr64", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
>;
@@ -2233,7 +2400,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_rtn_addr64", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+ (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
@@ -2245,7 +2412,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
op, name#"_rtn_offset", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+ name#" $vdata, $srsrc, $soffset"#"$offset"#" glc$slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
i1:$slc), vt:$vdata_in))], 1
@@ -2256,6 +2423,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
} // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
}
+// FIXME: tfe can't be an operand because it requires a separate
+// opcode because it needs an N+1 register class dest register.
multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
@@ -2368,47 +2537,121 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
} // End mayLoad = 0, mayStore = 1
}
-class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
- FLAT <op, (outs regClass:$vdst),
- (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
- asm#" $vdst, $addr"#"$glc"#"$slc"#"$tfe", []> {
- let data = 0;
- let mayLoad = 1;
+// For cache invalidation instructions.
+multiclass MUBUF_Invalidate <mubuf op, string opName, SDPatternOperator node> {
+ let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in {
+ def "" : MUBUF_Pseudo <opName, (outs), (ins), [(node)]>;
+
+ // Set everything to 0.
+ let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0,
+ vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in {
+ let addr64 = 0 in {
+ def _si : MUBUF_Real_si <op, opName, (outs), (ins), opName>;
+ }
+
+ def _vi : MUBUF_Real_vi <op, opName, (outs), (ins), opName>;
+ }
+ } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = ""
}
-class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
- FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr,
- glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
- name#" $data, $addr"#"$glc"#"$slc"#"$tfe",
- []> {
+//===----------------------------------------------------------------------===//
+// FLAT classes
+//===----------------------------------------------------------------------===//
+
+class flat <bits<7> ci, bits<7> vi = ci> {
+ field bits<7> CI = ci;
+ field bits<7> VI = vi;
+}
- let mayLoad = 0;
- let mayStore = 1;
+class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ FLAT <0, outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
- // Encoding
- let vdst = 0;
+class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> :
+ FLAT <op, outs, ins, asm, []>,
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicate = isCIOnly;
}
-multiclass FLAT_ATOMIC <bits<7> op, string name, RegisterClass vdst_rc,
- RegisterClass data_rc = vdst_rc> {
+class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> :
+ FLAT <op, outs, ins, asm, []>,
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicate = VIAssemblerPredicate;
+}
- let mayLoad = 1, mayStore = 1 in {
- def "" : FLAT <op, (outs),
- (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
- tfe_flat_atomic:$tfe),
- name#" $addr, $data"#"$slc"#"$tfe", []>,
- AtomicNoRet <NAME, 0> {
- let glc = 0;
- let vdst = 0;
- }
+multiclass FLAT_AtomicRet_m <flat op, dag outs, dag ins, string asm,
+ list<dag> pattern> {
+ def "" : FLAT_Pseudo <NAME#"_RTN", outs, ins, pattern>,
+ AtomicNoRet <NAME, 1>;
- def _RTN : FLAT <op, (outs vdst_rc:$vdst),
- (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
- tfe_flat_atomic:$tfe),
- name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>,
- AtomicNoRet <NAME, 1> {
- let glc = 1;
- }
+ def _ci : FLAT_Real_ci <op.CI, NAME#"_RTN", outs, ins, asm>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME#"_RTN", outs, ins, asm>;
+}
+
+multiclass FLAT_Load_Helper <flat op, string asm_name,
+ RegisterClass regClass,
+ dag outs = (outs regClass:$vdst),
+ dag ins = (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
+ string asm = asm_name#" $vdst, $addr"#"$glc"#"$slc"#"$tfe"> {
+
+ let data = 0, mayLoad = 1 in {
+
+ def "" : FLAT_Pseudo <NAME, outs, ins, []>;
+
+ def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
+ }
+}
+
+multiclass FLAT_Store_Helper <flat op, string asm_name,
+ RegisterClass vdataClass,
+ dag outs = (outs),
+ dag ins = (ins vdataClass:$data, VReg_64:$addr, glc_flat:$glc,
+ slc_flat:$slc, tfe_flat:$tfe),
+ string asm = asm_name#" $data, $addr"#"$glc"#"$slc"#"$tfe"> {
+
+ let mayLoad = 0, mayStore = 1, vdst = 0 in {
+
+ def "" : FLAT_Pseudo <NAME, outs, ins, []>;
+
+ def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
+ }
+}
+
+multiclass FLAT_ATOMIC <flat op, string asm_name, RegisterClass vdst_rc,
+ RegisterClass data_rc = vdst_rc,
+ dag outs_noret = (outs),
+ string asm_noret = asm_name#" $addr, $data"#"$slc"#"$tfe"> {
+
+ let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in {
+ def "" : FLAT_Pseudo <NAME, outs_noret,
+ (ins VReg_64:$addr, data_rc:$data,
+ slc_flat_atomic:$slc, tfe_flat_atomic:$tfe), []>,
+ AtomicNoRet <NAME, 0>;
+
+ def _ci : FLAT_Real_ci <op.CI, NAME, outs_noret,
+ (ins VReg_64:$addr, data_rc:$data,
+ slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+ asm_noret>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME, outs_noret,
+ (ins VReg_64:$addr, data_rc:$data,
+ slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+ asm_noret>;
+ }
+
+ let glc = 1, hasPostISelHook = 1 in {
+ defm _RTN : FLAT_AtomicRet_m <op, (outs vdst_rc:$vdst),
+ (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
+ tfe_flat_atomic:$tfe),
+ asm_name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>;
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index e0eeea9..6f653c7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -30,7 +30,9 @@ def isGCN : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
AssemblerPredicate<"FeatureGCN">;
def isSI : Predicate<"Subtarget->getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ AssemblerPredicate<"FeatureSouthernIslands">;
+
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
@@ -62,36 +64,38 @@ let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
+ smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
+ smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
+ smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
+ smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
+ smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
+
+defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
+ int_amdgcn_s_dcache_inv>;
//===----------------------------------------------------------------------===//
// SOP1 Instructions
@@ -123,7 +127,7 @@ let Defs = [SCC] in {
defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
- [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+ [(set i32:$dst, (bitreverse i32:$src0))]
>;
defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
@@ -183,10 +187,14 @@ defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>
defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
+
+let Uses = [M0] in {
defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
+} // End Uses = [M0]
+
defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
let Defs = [SCC] in {
@@ -354,7 +362,7 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
// SOPK Instructions
//===----------------------------------------------------------------------===//
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isMoveImm = 1 in {
defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
} // End isReMaterializable = 1
let Uses = [SCC] in {
@@ -438,36 +446,38 @@ def S_BRANCH : SOPP <
let isBarrier = 1;
}
-let DisableEncoding = "$scc" in {
+let Uses = [SCC] in {
def S_CBRANCH_SCC0 : SOPP <
- 0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+ 0x00000004, (ins sopp_brtarget:$simm16),
"s_cbranch_scc0 $simm16"
>;
def S_CBRANCH_SCC1 : SOPP <
- 0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+ 0x00000005, (ins sopp_brtarget:$simm16),
"s_cbranch_scc1 $simm16"
>;
-} // End DisableEncoding = "$scc"
+} // End Uses = [SCC]
+let Uses = [VCC] in {
def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ 0x00000006, (ins sopp_brtarget:$simm16),
"s_cbranch_vccz $simm16"
>;
def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ 0x00000007, (ins sopp_brtarget:$simm16),
"s_cbranch_vccnz $simm16"
>;
+} // End Uses = [VCC]
-let DisableEncoding = "$exec" in {
+let Uses = [EXEC] in {
def S_CBRANCH_EXECZ : SOPP <
- 0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+ 0x00000008, (ins sopp_brtarget:$simm16),
"s_cbranch_execz $simm16"
>;
def S_CBRANCH_EXECNZ : SOPP <
- 0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+ 0x00000009, (ins sopp_brtarget:$simm16),
"s_cbranch_execnz $simm16"
>;
-} // End DisableEncoding = "$exec"
+} // End Uses = [EXEC]
} // End isBranch = 1
@@ -477,11 +487,11 @@ let hasSideEffects = 1 in {
def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_AMDGPU_barrier_local)]
> {
+ let SchedRW = [WriteBarrier];
let simm16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
+ let isConvergent = 1;
}
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
@@ -805,9 +815,6 @@ defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmps
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-let SubtargetPredicate = isCI in {
-defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
-} // End isCI
defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
let mayStore = 0 in {
defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
@@ -905,11 +912,6 @@ defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
-//let SubtargetPredicate = isCI in {
-// DS_CONDXCHG32_RTN_B64
-// DS_CONDXCHG32_RTN_B128
-//} // End isCI
-
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
@@ -951,13 +953,13 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
+ mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
+ mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
+ mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
@@ -1034,9 +1036,12 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
-//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
+
+let SubtargetPredicate = isSI in {
+defm BUFFER_WBINVL1_SC : MUBUF_Invalidate <mubuf<0x70>, "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI
+}
+
+defm BUFFER_WBINVL1 : MUBUF_Invalidate <mubuf<0x71, 0x3e>, "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
@@ -1155,8 +1160,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-let vdst = 0, src0 = 0 in {
-defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
}
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1292,7 +1297,9 @@ defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
VOP_F64_F64, fsqrt
>;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
+
+let SchedRW = [WriteQuarterRate32] in {
defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
VOP_F32_F32, AMDGPUsin
@@ -1300,6 +1307,9 @@ defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
VOP_F32_F32, AMDGPUcos
>;
+
+} // End SchedRW = [WriteQuarterRate32]
+
defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
@@ -1308,24 +1318,33 @@ defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
VOP_I32_F64
>;
+
+let SchedRW = [WriteDoubleAdd] in {
defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
VOP_F64_F64
>;
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
+
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
+ VOP_F64_F64
+>;
+} // End SchedRW = [WriteDoubleAdd]
+
+
defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
VOP_I32_F32
>;
defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
VOP_F32_F32
>;
-let vdst = 0, src0 = 0 in {
-defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
- "v_clrexcp"
->;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NONE>;
}
+
+let Uses = [M0, EXEC] in {
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
+} // End Uses = [M0, EXEC]
// These instruction only exist on SI and CI
let SubtargetPredicate = isSICI in {
@@ -1343,7 +1362,7 @@ defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
VOP_F32_F32, AMDGPUrsq_legacy
>;
-} // End let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteDouble] in {
@@ -1360,7 +1379,7 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
// VINTRP Instructions
//===----------------------------------------------------------------------===//
-let Uses = [M0] in {
+let Uses = [M0, EXEC] in {
// FIXME: Specify SchedRW for VINTRP insturctions.
@@ -1405,16 +1424,14 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
[(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;
-} // End Uses = [M0]
+} // End Uses = [M0, EXEC]
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
multiclass V_CNDMASK <vop2 op, string name> {
- defm _e32 : VOP2_m <
- op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
- name, name>;
+ defm _e32 : VOP2_m <op, name, VOP_CNDMASK, [], name>;
defm _e64 : VOP3_m <
op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
@@ -1500,34 +1517,32 @@ let isCommutable = 1 in {
defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
} // End isCommutable = 1
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+let isCommutable = 1 in {
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
// but the VI instructions behave the same as the SI versions.
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
- VOP_I32_I32_I32, add
+ VOP2b_I32_I1_I32_I32
>;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
- VOP_I32_I32_I32, null_frag, "v_sub_i32"
+ VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
>;
-let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_I1
>;
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_I1
>;
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
- VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+ VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
>;
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
+} // End isCommutable = 1
defm V_READLANE_B32 : VOP2SI_3VI_m <
vop3 <0x001, 0x289>,
@@ -1575,10 +1590,10 @@ defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
VOP_I32_I32_I32
>;
defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
- VOP_I32_I32_I32
+ VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
>;
defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
- VOP_I32_I32_I32
+ VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
>;
defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
@@ -1704,15 +1719,15 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <
vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
>;
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDoubleAdd] in {
defm V_DIV_FIXUP_F64 : VOP3Inst <
vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
>;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDoubleAdd] in {
let isCommutable = 1 in {
defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
@@ -1735,7 +1750,7 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
VOP_F64_F64_I32, AMDGPUldexp
>;
-} // let SchedRW = [WriteDouble]
+} // let SchedRW = [WriteDoubleAdd]
let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
@@ -1756,16 +1771,21 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteFloatFMA, WriteSALU] in {
-defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
+ VOP3b_F32_I1_F32_F32_F32
+>;
}
let SchedRW = [WriteDouble, WriteSALU] in {
// Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
+defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
+ VOP3b_F64_I1_F64_F64_F64
+>;
} // let SchedRW = [WriteDouble]
-let isCommutable = 1, Uses = [VCC] in {
+let isCommutable = 1, Uses = [VCC, EXEC] in {
+let SchedRW = [WriteFloatFMA] in {
// v_div_fmas_f32:
// result = src0 * src1 + src2
// if (vcc)
@@ -1774,6 +1794,7 @@ let isCommutable = 1, Uses = [VCC] in {
defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
+}
let SchedRW = [WriteDouble] in {
// v_div_fmas_f64:
@@ -1786,7 +1807,7 @@ defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
>;
} // End SchedRW = [WriteDouble]
-} // End isCommutable = 1
+} // End isCommutable = 1, Uses = [VCC, EXEC]
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
@@ -1835,13 +1856,13 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, SALU = 1 in {
def SGPR_USE : InstSI <(outs),(ins), "", []>;
}
@@ -1921,39 +1942,9 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
-//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
-
-let UseNamedOperandTable = 1 in {
-
-def SI_RegisterLoad : InstSI <
+class SI_INDIRECT_SRC<RegisterClass rc> : InstSI <
(outs VGPR_32:$dst, SReg_64:$temp),
- (ins FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterLoad = 1;
- let mayLoad = 1;
-}
-
-class SIRegStore<dag outs> : InstSI <
- outs,
- (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterStore = 1;
- let mayStore = 1;
-}
-
-let usesCustomInserter = 1 in {
-def SI_RegisterStorePseudo : SIRegStore<(outs)>;
-} // End usesCustomInserter = 1
-def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
-
-
-} // End UseNamedOperandTable = 1
-
-def SI_INDIRECT_SRC : InstSI <
- (outs VGPR_32:$dst, SReg_64:$temp),
- (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ (ins rc:$src, VSrc_32:$idx, i32imm:$off),
"si_indirect_src $dst, $temp, $src, $idx, $off",
[]
>;
@@ -1967,6 +1958,13 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
let Constraints = "$src = $dst";
}
+// TODO: We can support indirect SGPR access.
+def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
+def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
+def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
+def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
+def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
+
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
@@ -1977,19 +1975,24 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
- let UseNamedOperandTable = 1 in {
+ let UseNamedOperandTable = 1, Uses = [EXEC] in {
def _SAVE : InstSI <
(outs),
- (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
- SReg_32:$scratch_offset),
+ (ins sgpr_class:$src, i32imm:$frame_idx),
"", []
- >;
+ > {
+ let mayStore = 1;
+ let mayLoad = 0;
+ }
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx),
"", []
- >;
+ > {
+ let mayStore = 0;
+ let mayLoad = 1;
+ }
} // End UseNamedOperandTable = 1
}
@@ -2003,19 +2006,25 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
- let UseNamedOperandTable = 1, VGPRSpill = 1 in {
+ let UseNamedOperandTable = 1, VGPRSpill = 1, Uses = [EXEC] in {
def _SAVE : InstSI <
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
- >;
+ > {
+ let mayStore = 1;
+ let mayLoad = 0;
+ }
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
- >;
+ > {
+ let mayStore = 0;
+ let mayLoad = 1;
+ }
} // End UseNamedOperandTable = 1, VGPRSpill = 1
}
@@ -2030,9 +2039,11 @@ let Defs = [SCC] in {
def SI_CONSTDATA_PTR : InstSI <
(outs SReg_64:$dst),
- (ins),
- "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
->;
+ (ins const_ga:$ptr),
+ "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))]
+> {
+ let SALU = 1;
+}
} // End Defs = [SCC]
@@ -2072,84 +2083,63 @@ def : Pat <
// SMRD Patterns
//===----------------------------------------------------------------------===//
-multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
- // 1. SI-CI: Offset as 8bit DWORD immediate
+ // 1. IMM offset
def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
- (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
+ (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+ (vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
>;
- // 2. Offset loaded in an 32bit SGPR
+ // 2. SGPR offset
def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+ (vt (!cast<SMRD>(Instr#"_SGPR") $sbase, $offset))
>;
- // 3. No offset at all
def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
+ (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+ (vt (!cast<SMRD>(Instr#"_IMM_ci") $sbase, $offset))
+ > {
+ let Predicates = [isCIOnly];
+ }
}
-multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
-
- // 1. VI: Offset as 20bit immediate in bytes
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))),
- (vt (Instr_IMM $sbase, (as_i32imm $offset)))
- >;
-
- // 2. Offset loaded in an 32bit SGPR
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
- >;
-
- // 3. No offset at all
- def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
-}
-
-let Predicates = [isSICI] in {
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isSICI]
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
-let Predicates = [isVI] in {
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isVI]
+defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
-let Predicates = [isSICI] in {
+// 1. Offset as an immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset)
+>;
-// 1. Offset as 8bit DWORD immediate
+// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
+ (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
+ (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset)
>;
-} // End Predicates = [isSICI]
+let Predicates = [isCI] in {
-// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (SIload_constant v4i32:$sbase, imm:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
+ (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
+ (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)
>;
+} // End Predicates = [isCI]
+
+} // End let AddedComplexity = 10000
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -2161,6 +2151,11 @@ def : Pat <
(S_MOV_B32 0), sub1))
>;
+def : Pat <
+ (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+ (S_ABS_I32 $x)
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
@@ -2488,6 +2483,11 @@ def : Pat <
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
+//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
+//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
+//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
+//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
+
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -2568,11 +2568,25 @@ def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <i64, v2i32, VReg_64>;
def : BitConvert <v2f32, i64, VReg_64>;
def : BitConvert <i64, v2f32, VReg_64>;
+def : BitConvert <v2f32, f64, VReg_64>;
def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <f64, v2f32, VReg_64>;
def : BitConvert <f64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
+
+def : BitConvert <v2i64, v4i32, SReg_128>;
+def : BitConvert <v4i32, v2i64, SReg_128>;
+
+def : BitConvert <v2f64, v4f32, VReg_128>;
+def : BitConvert <v2f64, v4i32, VReg_128>;
+def : BitConvert <v4f32, v2f64, VReg_128>;
+def : BitConvert <v4i32, v2f64, VReg_128>;
+
+
+
+
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8i32, v32i8, SReg_256>;
@@ -2601,10 +2615,9 @@ def : Pat <
// Prevent expanding both fneg and fabs.
-// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f32:$src)),
- (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+ (S_OR_B32 $src, 0x80000000) /* Set sign bit */
>;
// FIXME: Should use S_OR_B32
@@ -2836,10 +2849,6 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
// -1. For the non-rtn variants, the manual says it does
// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
// will always do the increment so I'm assuming it's the same.
-//
-// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
-// needs to be a VGPR. The SGPR copy pass will fix this, and it's
-// easier since there is no v_mov_b64.
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
@@ -2855,9 +2864,9 @@ class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
// 32-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_add_local>;
+ V_MOV_B32_e32, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_sub_local>;
+ V_MOV_B32_e32, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
@@ -2874,9 +2883,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
// 64-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_add_local>;
+ V_MOV_B64_PSEUDO, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_sub_local>;
+ V_MOV_B64_PSEUDO, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
@@ -3019,90 +3028,46 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
-let SubtargetPredicate = isCI in {
-
-defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
- VOP_I32_I32_I32
->;
-defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
- VOP_I32_I32_I32
->;
-defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
- VOP_I32_I32_I32
->;
-
-let isCommutable = 1 in {
-defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
- VOP_I64_I32_I32_I64
->;
-
-// XXX - Does this set VCC?
-defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
- VOP_I64_I32_I32_I64
->;
-} // End isCommutable = 1
-
-// Remaining instructions:
-// FLAT_*
-// S_CBRANCH_CDBGUSER
-// S_CBRANCH_CDBGSYS
-// S_CBRANCH_CDBGSYS_OR_USER
-// S_CBRANCH_CDBGSYS_AND_USER
-// S_DCACHE_INV_VOL
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
-
-} // End isCI
-
/********** ====================== **********/
/********** Indirect adressing **********/
/********** ====================== **********/
-multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
// 1. Extract with offset
def : Pat<
- (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
- (SI_INDIRECT_SRC $vec, $idx, imm:$off)
+ (eltvt (extractelt vt:$vec, (add i32:$idx, imm:$off))),
+ (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
- (eltvt (vector_extract vt:$vec, i32:$idx)),
- (SI_INDIRECT_SRC $vec, $idx, 0)
+ (eltvt (extractelt vt:$vec, i32:$idx)),
+ (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
- (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst $vec, $idx, imm:$off, $val)
+ (insertelt vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
+ (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
- (vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst $vec, $idx, 0, $val)
+ (insertelt vt:$vec, eltvt:$val, i32:$idx),
+ (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val)
>;
}
-defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
+defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
+defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
+defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
-defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
+defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
+defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
+defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
//===----------------------------------------------------------------------===//
// Conversion Patterns
@@ -3215,12 +3180,12 @@ def : Pat <
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
+ (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
>;
def : Pat <
(i1 (trunc i64:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
+ (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
(EXTRACT_SUBREG $a, sub0)), 1)
>;
@@ -3301,24 +3266,6 @@ def : Pat <
} // End Predicates = [isSI]
-let Predicates = [isCI] in {
-
-// Convert (x - floor(x)) to fract(x)
-def : Pat <
- (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
- (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
- (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-// Convert (x + (-floor(x))) to fract(x)
-def : Pat <
- (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
- (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-} // End Predicates = [isCI]
-
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index c319b32..126f624 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -103,6 +103,10 @@ public:
return "SI Lower control flow instructions";
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
@@ -140,8 +144,7 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
DebugLoc DL = From.getDebugLoc();
BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addOperand(To)
- .addReg(AMDGPU::EXEC);
+ .addOperand(To);
}
void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
@@ -159,8 +162,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
// If the exec mask is non-zero, skip the next two instructions
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
+ .addImm(3);
// Exec mask is zero: Export to NULL target...
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
@@ -269,8 +271,7 @@ void SILowerControlFlowPass::Loop(MachineInstr &MI) {
.addReg(Src);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addOperand(MI.getOperand(1))
- .addReg(AMDGPU::EXEC);
+ .addOperand(MI.getOperand(1));
MI.eraseFromParent();
}
@@ -316,7 +317,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
.addImm(0);
}
} else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
.addImm(0)
.addOperand(Op);
}
@@ -362,9 +363,9 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int
.addReg(AMDGPU::VCC_LO);
// Compare the just read M0 value to all possible Idx values
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
- .addReg(AMDGPU::M0)
- .addReg(Idx);
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
// Update EXEC, save the original EXEC value to VCC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
@@ -385,8 +386,7 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(-7)
- .addReg(AMDGPU::EXEC);
+ .addImm(-7);
// Restore EXEC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
@@ -438,7 +438,6 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
.addReg(Reg)
- .addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
LoadM0(MI, MovRel, Off);
@@ -460,7 +459,6 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
.addReg(Reg, RegState::Define)
.addReg(Val)
- .addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
LoadM0(MI, MovRel, Off);
@@ -486,11 +484,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
+ if (TII->isWQM(MI) || TII->isDS(MI))
NeedWQM = true;
// Flat uses m0 in case it needs to access LDS.
- if (TII->isFLAT(MI.getOpcode()))
+ if (TII->isFLAT(MI))
NeedFlat = true;
switch (MI.getOpcode()) {
@@ -541,7 +539,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Branch(MI);
break;
- case AMDGPU::SI_INDIRECT_SRC:
+ case AMDGPU::SI_INDIRECT_SRC_V1:
+ case AMDGPU::SI_INDIRECT_SRC_V2:
+ case AMDGPU::SI_INDIRECT_SRC_V4:
+ case AMDGPU::SI_INDIRECT_SRC_V8:
+ case AMDGPU::SI_INDIRECT_SRC_V16:
IndirectSrc(MI);
break;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 67421e2..a2fa5fd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -48,6 +48,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 587ea63..935aad4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -29,10 +29,114 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
- HasSpilledVGPRs(false),
+ ScratchRSrcReg(AMDGPU::NoRegister),
+ ScratchWaveOffsetReg(AMDGPU::NoRegister),
+ PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
+ DispatchPtrUserSGPR(AMDGPU::NoRegister),
+ QueuePtrUserSGPR(AMDGPU::NoRegister),
+ KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
+ DispatchIDUserSGPR(AMDGPU::NoRegister),
+ FlatScratchInitUserSGPR(AMDGPU::NoRegister),
+ PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
+ GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
+ GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
+ GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
+ WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
+ WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
+ WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
+ WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
+ PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
+ LDSWaveSpillSize(0),
PSInputAddr(0),
NumUserSGPRs(0),
- LDSWaveSpillSize(0) { }
+ NumSystemSGPRs(0),
+ HasSpilledSGPRs(false),
+ HasSpilledVGPRs(false),
+ PrivateSegmentBuffer(false),
+ DispatchPtr(false),
+ QueuePtr(false),
+ DispatchID(false),
+ KernargSegmentPtr(false),
+ FlatScratchInit(false),
+ GridWorkgroupCountX(false),
+ GridWorkgroupCountY(false),
+ GridWorkgroupCountZ(false),
+ WorkGroupIDX(true),
+ WorkGroupIDY(false),
+ WorkGroupIDZ(false),
+ WorkGroupInfo(false),
+ PrivateSegmentWaveByteOffset(false),
+ WorkItemIDX(true),
+ WorkItemIDY(false),
+ WorkItemIDZ(false) {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const Function *F = MF.getFunction();
+
+ const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+
+ if (getShaderType() == ShaderType::COMPUTE)
+ KernargSegmentPtr = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+
+ bool MaySpill = ST.isVGPRSpillingEnabled(this);
+ bool HasStackObjects = FrameInfo->hasStackObjects();
+
+ if (HasStackObjects || MaySpill)
+ PrivateSegmentWaveByteOffset = true;
+
+ if (ST.isAmdHsaOS()) {
+ if (HasStackObjects || MaySpill)
+ PrivateSegmentBuffer = true;
+
+ if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
+ DispatchPtr = true;
+ }
+
+ // X, XY, and XYZ are the only supported combinations, so make sure Y is
+ // enabled if Z is.
+ if (WorkItemIDZ)
+ WorkItemIDY = true;
+}
+
+unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
+ const SIRegisterInfo &TRI) {
+ PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ NumUserSGPRs += 4;
+ return PrivateSegmentBufferUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
+ DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return DispatchPtrUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
+ QueuePtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return QueuePtrUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
+ KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return KernargSegmentPtrUserSGPR;
+}
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
MachineFunction *MF,
@@ -53,7 +157,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
- MRI.setPhysRegUsed(LaneVGPR);
// Add this register as live-in to all blocks to avoid machine verifer
// complaining about use of an undefined physical register.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 667da4c..9c528d6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -26,13 +26,83 @@ class MachineRegisterInfo;
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+ // FIXME: This should be removed and getPreloadedValue moved here.
+ friend struct SIRegisterInfo;
void anchor() override;
unsigned TIDReg;
- bool HasSpilledVGPRs;
+
+ // Registers that may be reserved for spilling purposes. These may be the same
+ // as the input registers.
+ unsigned ScratchRSrcReg;
+ unsigned ScratchWaveOffsetReg;
+
+ // Input registers setup for the HSA ABI.
+ // User SGPRs in allocation order.
+ unsigned PrivateSegmentBufferUserSGPR;
+ unsigned DispatchPtrUserSGPR;
+ unsigned QueuePtrUserSGPR;
+ unsigned KernargSegmentPtrUserSGPR;
+ unsigned DispatchIDUserSGPR;
+ unsigned FlatScratchInitUserSGPR;
+ unsigned PrivateSegmentSizeUserSGPR;
+ unsigned GridWorkGroupCountXUserSGPR;
+ unsigned GridWorkGroupCountYUserSGPR;
+ unsigned GridWorkGroupCountZUserSGPR;
+
+ // System SGPRs in allocation order.
+ unsigned WorkGroupIDXSystemSGPR;
+ unsigned WorkGroupIDYSystemSGPR;
+ unsigned WorkGroupIDZSystemSGPR;
+ unsigned WorkGroupInfoSystemSGPR;
+ unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
public:
+ // FIXME: Make private
+ unsigned LDSWaveSpillSize;
+ unsigned PSInputAddr;
+ std::map<unsigned, unsigned> LaneVGPRs;
+ unsigned ScratchOffsetReg;
+ unsigned NumUserSGPRs;
+ unsigned NumSystemSGPRs;
+
+private:
+ bool HasSpilledSGPRs;
+ bool HasSpilledVGPRs;
+
+ // Feature bits required for inputs passed in user SGPRs.
+ bool PrivateSegmentBuffer : 1;
+ bool DispatchPtr : 1;
+ bool QueuePtr : 1;
+ bool DispatchID : 1;
+ bool KernargSegmentPtr : 1;
+ bool FlatScratchInit : 1;
+ bool GridWorkgroupCountX : 1;
+ bool GridWorkgroupCountY : 1;
+ bool GridWorkgroupCountZ : 1;
+
+ // Feature bits required for inputs passed in system SGPRs.
+ bool WorkGroupIDX : 1; // Always initialized.
+ bool WorkGroupIDY : 1;
+ bool WorkGroupIDZ : 1;
+ bool WorkGroupInfo : 1;
+ bool PrivateSegmentWaveByteOffset : 1;
+
+ bool WorkItemIDX : 1; // Always initialized.
+ bool WorkItemIDY : 1;
+ bool WorkItemIDZ : 1;
+
+ MCPhysReg getNextUserSGPR() const {
+ assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
+ return AMDGPU::SGPR0 + NumUserSGPRs;
+ }
+
+ MCPhysReg getNextSystemSGPR() const {
+ return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
+ }
+
+public:
struct SpilledReg {
unsigned VGPR;
int Lane;
@@ -46,16 +116,162 @@ public:
SIMachineFunctionInfo(const MachineFunction &MF);
SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
unsigned SubIdx);
- unsigned PSInputAddr;
- unsigned NumUserSGPRs;
- std::map<unsigned, unsigned> LaneVGPRs;
- unsigned LDSWaveSpillSize;
- unsigned ScratchOffsetReg;
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
- bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
- void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
+
+ // Add user SGPRs.
+ unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
+ unsigned addDispatchPtr(const SIRegisterInfo &TRI);
+ unsigned addQueuePtr(const SIRegisterInfo &TRI);
+ unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
+
+ // Add system SGPRs.
+ unsigned addWorkGroupIDX() {
+ WorkGroupIDXSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupIDXSystemSGPR;
+ }
+
+ unsigned addWorkGroupIDY() {
+ WorkGroupIDYSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupIDYSystemSGPR;
+ }
+
+ unsigned addWorkGroupIDZ() {
+ WorkGroupIDZSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupIDZSystemSGPR;
+ }
+
+ unsigned addWorkGroupInfo() {
+ WorkGroupInfoSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupInfoSystemSGPR;
+ }
+
+ unsigned addPrivateSegmentWaveByteOffset() {
+ PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return PrivateSegmentWaveByteOffsetSystemSGPR;
+ }
+
+ bool hasPrivateSegmentBuffer() const {
+ return PrivateSegmentBuffer;
+ }
+
+ bool hasDispatchPtr() const {
+ return DispatchPtr;
+ }
+
+ bool hasQueuePtr() const {
+ return QueuePtr;
+ }
+
+ bool hasDispatchID() const {
+ return DispatchID;
+ }
+
+ bool hasKernargSegmentPtr() const {
+ return KernargSegmentPtr;
+ }
+
+ bool hasFlatScratchInit() const {
+ return FlatScratchInit;
+ }
+
+ bool hasGridWorkgroupCountX() const {
+ return GridWorkgroupCountX;
+ }
+
+ bool hasGridWorkgroupCountY() const {
+ return GridWorkgroupCountY;
+ }
+
+ bool hasGridWorkgroupCountZ() const {
+ return GridWorkgroupCountZ;
+ }
+
+ bool hasWorkGroupIDX() const {
+ return WorkGroupIDX;
+ }
+
+ bool hasWorkGroupIDY() const {
+ return WorkGroupIDY;
+ }
+
+ bool hasWorkGroupIDZ() const {
+ return WorkGroupIDZ;
+ }
+
+ bool hasWorkGroupInfo() const {
+ return WorkGroupInfo;
+ }
+
+ bool hasPrivateSegmentWaveByteOffset() const {
+ return PrivateSegmentWaveByteOffset;
+ }
+
+ bool hasWorkItemIDX() const {
+ return WorkItemIDX;
+ }
+
+ bool hasWorkItemIDY() const {
+ return WorkItemIDY;
+ }
+
+ bool hasWorkItemIDZ() const {
+ return WorkItemIDZ;
+ }
+
+ unsigned getNumUserSGPRs() const {
+ return NumUserSGPRs;
+ }
+
+ unsigned getNumPreloadedSGPRs() const {
+ return NumUserSGPRs + NumSystemSGPRs;
+ }
+
+ unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
+ return PrivateSegmentWaveByteOffsetSystemSGPR;
+ }
+
+ /// \brief Returns the physical register reserved for use as the resource
+ /// descriptor for scratch accesses.
+ unsigned getScratchRSrcReg() const {
+ return ScratchRSrcReg;
+ }
+
+ void setScratchRSrcReg(unsigned Reg) {
+ assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+ ScratchRSrcReg = Reg;
+ }
+
+ unsigned getScratchWaveOffsetReg() const {
+ return ScratchWaveOffsetReg;
+ }
+
+ void setScratchWaveOffsetReg(unsigned Reg) {
+ assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+ ScratchWaveOffsetReg = Reg;
+ }
+
+ bool hasSpilledSGPRs() const {
+ return HasSpilledSGPRs;
+ }
+
+ void setHasSpilledSGPRs(bool Spill = true) {
+ HasSpilledSGPRs = Spill;
+ }
+
+ bool hasSpilledVGPRs() const {
+ return HasSpilledVGPRs;
+ }
+
+ void setHasSpilledVGPRs(bool Spill = true) {
+ HasSpilledVGPRs = Spill;
+ }
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
deleted file mode 100644
index 2cd600d..0000000
--- a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// This pass loads scratch pointer and scratch offset into a register or a
-/// frame index which can be used anywhere in the program. These values will
-/// be used for spilling VGPRs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIPrepareScratchRegs : public MachineFunctionPass {
-
-private:
- static char ID;
-
-public:
- SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI prepare scratch registers";
- }
-
-};
-
-} // End anonymous namespace
-
-char SIPrepareScratchRegs::ID = 0;
-
-FunctionPass *llvm::createSIPrepareScratchRegs() {
- return new SIPrepareScratchRegs();
-}
-
-bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- MachineBasicBlock *Entry = MF.begin();
- MachineBasicBlock::iterator I = Entry->begin();
- DebugLoc DL = I->getDebugLoc();
-
- // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
- // run this pass.
- if (!MFI->hasSpilledVGPRs())
- return false;
-
- unsigned ScratchPtrPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchOffsetPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
-
- if (!Entry->isLiveIn(ScratchPtrPreloadReg))
- Entry->addLiveIn(ScratchPtrPreloadReg);
-
- if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
- Entry->addLiveIn(ScratchOffsetPreloadReg);
-
- // Load the scratch offset.
- unsigned ScratchOffsetReg =
- TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
- int ScratchOffsetFI = -1;
-
- if (ScratchOffsetReg != AMDGPU::NoRegister) {
- // Found an SGPR to use
- MRI.setPhysRegUsed(ScratchOffsetReg);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
- .addReg(ScratchOffsetPreloadReg);
- } else {
- // No SGPR is available, we must spill.
- ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
- .addReg(ScratchOffsetPreloadReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- }
-
-
- // Now that we have the scratch pointer and offset values, we need to
- // add them to all the SI_SPILL_V* instructions.
-
- RegScavenger RS;
- unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
- RS.addScavengingFrameIndex(ScratchRsrcFI);
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- // Add the scratch offset reg as a live-in so that the register scavenger
- // doesn't re-use it.
- if (!MBB.isLiveIn(ScratchOffsetReg) &&
- ScratchOffsetReg != AMDGPU::NoRegister)
- MBB.addLiveIn(ScratchOffsetReg);
- RS.enterBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- RS.forward(I);
- DebugLoc DL = MI.getDebugLoc();
- if (!TII->isVGPRSpill(MI.getOpcode()))
- continue;
-
- // Scratch resource
- unsigned ScratchRsrcReg =
- RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
- uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
- unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
- unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
- .addImm(Rsrc23 & 0xffffffff)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
- .addImm(Rsrc23 >> 32)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- // Scratch Offset
- if (ScratchOffsetReg == AMDGPU::NoRegister) {
- ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
- ScratchOffsetReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
- MBB.addLiveIn(ScratchOffsetReg);
- }
-
- if (ScratchRsrcReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF.getFunction()->getContext();
- Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchRsrcReg = AMDGPU::SGPR0;
- ScratchOffsetReg = AMDGPU::SGPR0;
- }
- MI.getOperand(2).setReg(ScratchRsrcReg);
- MI.getOperand(2).setIsKill(true);
- MI.getOperand(2).setIsUndef(false);
- MI.getOperand(3).setReg(ScratchOffsetReg);
- MI.getOperand(3).setIsUndef(false);
- MI.getOperand(3).setIsKill(false);
- MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
- }
- }
- return true;
-}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index e9e8412..3cdffef 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "SIRegisterInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -33,6 +32,40 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
Reserved.set(*R);
}
+unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
+ const MachineFunction &MF) const {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ if (ST.hasSGPRInitBug()) {
+ unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+ unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
+ return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ }
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
+ // next sgpr128 down.
+ return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
+ }
+
+ return AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99;
+}
+
+unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ if (ST.hasSGPRInitBug()) {
+ unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ return AMDGPU::SGPR_32RegClass.getRegister(Idx);
+ }
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // Next register before reservations for flat_scr and vcc.
+ return AMDGPU::SGPR97;
+ }
+
+ return AMDGPU::SGPR95;
+}
+
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
@@ -42,13 +75,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
- // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
- reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
- reserveRegisterTuples(Reserved, AMDGPU::VGPR255);
+ // Reserve the last 2 registers so we will always have at least 2 more that
+ // will physically contain VCC.
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103);
+
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation
+ // for VCC/FLAT_SCR.
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+ }
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
- if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+ if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
// Assume XNACK_MASK is unused.
@@ -60,34 +102,57 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
+ // Reserve 1 SGPR for scratch wave offset in case we need to spill.
+ reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
+ }
+
+ unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
+ if (ScratchRSrcReg != AMDGPU::NoRegister) {
+ // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
+ // to spill.
+ // TODO: May need to reserve a VGPR if doing LDS spilling.
+ reserveRegisterTuples(Reserved, ScratchRSrcReg);
+ assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
+ }
+
return Reserved;
}
unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const {
-
const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
STI.getMaxWavesPerCU());
unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
+ unsigned VSLimit = SGPRLimit + VGPRLimit;
+
for (regclass_iterator I = regclass_begin(), E = regclass_end();
I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
- unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+ unsigned NumSubRegs = std::max((int)RC->getSize() / 4, 1);
unsigned Limit;
- if (isSGPRClass(*I)) {
+ if (isPseudoRegClass(RC)) {
+ // FIXME: This is a hack. We should never be considering the pressure of
+ // these since no virtual register should ever have this class.
+ Limit = VSLimit;
+ } else if (isSGPRClass(RC)) {
Limit = SGPRLimit / NumSubRegs;
} else {
Limit = VGPRLimit / NumSubRegs;
}
- const int *Sets = getRegClassPressureSets(*I);
+ const int *Sets = getRegClassPressureSets(RC);
assert(Sets);
for (unsigned i = 0; Sets[i] != -1; ++i) {
- if (Sets[i] == (int)Idx)
+ if (Sets[i] == (int)Idx)
return Limit;
}
}
@@ -174,17 +239,17 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned SubReg = NumSubRegs > 1 ?
getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
Value;
- bool IsKill = (i == e - 1);
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(SubReg, getDefRegState(IsLoad))
- .addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addReg(SOffset)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+ .addReg(SubReg, getDefRegState(IsLoad))
+ .addReg(ScratchRsrcReg)
+ .addReg(SOffset)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addReg(Value, RegState::Implicit | getDefRegState(IsLoad))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
}
}
@@ -228,6 +293,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg)
.addImm(Spill.Lane);
+ // FIXME: Since this spills to another register instead of an actual
+ // frame index, we should delete the frame index when all references to
+ // it are fixed.
}
MI->eraseFromParent();
break;
@@ -263,16 +331,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// TODO: only do this when it is needed
switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
- TII->insertNOPs(MI, 3);
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
+ // ("S_NOP 3") on SI
+ TII->insertWaitStates(MI, 4);
break;
case AMDGPUSubtarget::SEA_ISLANDS:
break;
default: // VOLCANIC_ISLANDS and later
- // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
- // and later. This also applies to VALUs which write VCC, but we're
- // unlikely to see VMEM use VCC.
- TII->insertNOPs(MI, 4);
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
+ // ("S_NOP 4") on VI and later. This also applies to VALUs which write
+ // VCC, but we're unlikely to see VMEM use VCC.
+ TII->insertWaitStates(MI, 5);
}
MI->eraseFromParent();
@@ -322,22 +391,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
}
-const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const {
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::VGPR_32RegClass;
- }
-}
-
unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
+// FIXME: This is very slow. It might be worth creating a map from physreg to
+// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
- static const TargetRegisterClass *BaseClasses[] = {
+ static const TargetRegisterClass *const BaseClasses[] = {
&AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::VReg_64RegClass,
@@ -359,33 +422,45 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
return nullptr;
}
+// TODO: It might be helpful to have some target specific flags in
+// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+ switch (RC->getSize()) {
+ case 4:
+ return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
+ case 8:
+ return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
+ case 12:
+ return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
+ case 16:
+ return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+ case 32:
+ return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
+ case 64:
+ return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
+ default:
+ llvm_unreachable("Invalid register class size");
+ }
}
const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const {
- if (hasVGPRs(SRC)) {
- return SRC;
- } else if (SRC == &AMDGPU::SCCRegRegClass) {
- return &AMDGPU::VCCRegRegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
- return &AMDGPU::VGPR_32RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
- return &AMDGPU::VReg_64RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
- return &AMDGPU::VReg_128RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
- return &AMDGPU::VReg_256RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
- return &AMDGPU::VReg_512RegClass;
- }
- return nullptr;
+ switch (SRC->getSize()) {
+ case 4:
+ return &AMDGPU::VGPR_32RegClass;
+ case 8:
+ return &AMDGPU::VReg_64RegClass;
+ case 12:
+ return &AMDGPU::VReg_96RegClass;
+ case 16:
+ return &AMDGPU::VReg_128RegClass;
+ case 32:
+ return &AMDGPU::VReg_256RegClass;
+ case 64:
+ return &AMDGPU::VReg_512RegClass;
+ default:
+ llvm_unreachable("Invalid register class size");
+ }
}
const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@@ -402,6 +477,30 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
}
}
+bool SIRegisterInfo::shouldRewriteCopySrc(
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // We want to prefer the smallest register class possible, so we don't want to
+ // stop and rewrite on anything that looks like a subregister
+ // extract. Operations mostly don't care about the super register class, so we
+ // only want to stop on the most basic of copies between the smae register
+ // class.
+ //
+ // e.g. if we have something like
+ // vreg0 = ...
+ // vreg1 = ...
+ // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
+ // vreg3 = COPY vreg2, sub0
+ //
+ // We want to look through the COPY to find:
+ // => vreg3 = COPY vreg0
+
+ // Plain copy.
+ return getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
const TargetRegisterClass *SubRC,
unsigned Channel) const {
@@ -462,30 +561,47 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
return OpType == AMDGPU::OPERAND_REG_INLINE_C;
}
+// FIXME: Most of these are flexible with HSA and we don't need to reserve them
+// as input registers if unused. Whether the dispatch ptr is necessary should be
+// easy to detect from used intrinsics. Scratch setup is harder to know.
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ (void)ST;
switch (Value) {
- case SIRegisterInfo::TGID_X:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
- case SIRegisterInfo::TGID_Y:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
- case SIRegisterInfo::TGID_Z:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
- case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
- if (MFI->getShaderType() != ShaderType::COMPUTE)
- return MFI->ScratchOffsetReg;
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
- case SIRegisterInfo::SCRATCH_PTR:
- return AMDGPU::SGPR2_SGPR3;
- case SIRegisterInfo::INPUT_PTR:
- return AMDGPU::SGPR0_SGPR1;
- case SIRegisterInfo::TIDIG_X:
+ case SIRegisterInfo::WORKGROUP_ID_X:
+ assert(MFI->hasWorkGroupIDX());
+ return MFI->WorkGroupIDXSystemSGPR;
+ case SIRegisterInfo::WORKGROUP_ID_Y:
+ assert(MFI->hasWorkGroupIDY());
+ return MFI->WorkGroupIDYSystemSGPR;
+ case SIRegisterInfo::WORKGROUP_ID_Z:
+ assert(MFI->hasWorkGroupIDZ());
+ return MFI->WorkGroupIDZSystemSGPR;
+ case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
+ return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
+ case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
+ assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations");
+ assert(MFI->hasPrivateSegmentBuffer());
+ return MFI->PrivateSegmentBufferUserSGPR;
+ case SIRegisterInfo::KERNARG_SEGMENT_PTR:
+ assert(MFI->hasKernargSegmentPtr());
+ return MFI->KernargSegmentPtrUserSGPR;
+ case SIRegisterInfo::DISPATCH_PTR:
+ assert(MFI->hasDispatchPtr());
+ return MFI->DispatchPtrUserSGPR;
+ case SIRegisterInfo::QUEUE_PTR:
+ llvm_unreachable("not implemented");
+ case SIRegisterInfo::WORKITEM_ID_X:
+ assert(MFI->hasWorkItemIDX());
return AMDGPU::VGPR0;
- case SIRegisterInfo::TIDIG_Y:
+ case SIRegisterInfo::WORKITEM_ID_Y:
+ assert(MFI->hasWorkItemIDY());
return AMDGPU::VGPR1;
- case SIRegisterInfo::TIDIG_Z:
+ case SIRegisterInfo::WORKITEM_ID_Z:
+ assert(MFI->hasWorkItemIDZ());
return AMDGPU::VGPR2;
}
llvm_unreachable("unexpected preloaded value type");
@@ -496,12 +612,9 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
// AMDGPU::NoRegister.
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const {
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- if (!MRI.isPhysRegUsed(*I))
- return *I;
- }
+ for (unsigned Reg : *RC)
+ if (!MRI.isPhysRegUsed(Reg))
+ return Reg;
return AMDGPU::NoRegister;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 7da6de2..1795237 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -18,6 +18,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
namespace llvm {
@@ -29,6 +30,15 @@ private:
public:
SIRegisterInfo();
+ /// Return the end register initially reserved for the scratch buffer in case
+ /// spilling is needed.
+ unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
+
+ /// Return the end register initially reserved for the scratch wave offset in
+ /// case spilling is needed.
+ unsigned reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
unsigned getRegPressureSetLimit(const MachineFunction &MF,
@@ -40,10 +50,6 @@ public:
unsigned FIOperandNum,
RegScavenger *RS) const override;
- /// \brief get the register class of the specified type to use in the
- /// CFGStructurizer
- const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
-
unsigned getHWRegIndex(unsigned Reg) const override;
/// \brief Return the 'base' register class for this register.
@@ -52,23 +58,30 @@ public:
/// \returns true if this class contains only SGPR registers
bool isSGPRClass(const TargetRegisterClass *RC) const {
- if (!RC)
- return false;
-
return !hasVGPRs(RC);
}
/// \returns true if this class ID contains only SGPR registers
bool isSGPRClassID(unsigned RCID) const {
- if (static_cast<int>(RCID) == -1)
- return false;
-
return isSGPRClass(getRegClass(RCID));
}
+ bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return isSGPRClass(MRI.getRegClass(Reg));
+ return getPhysRegClass(Reg);
+ }
+
/// \returns true if this class contains VGPR registers.
bool hasVGPRs(const TargetRegisterClass *RC) const;
+ /// returns true if this is a pseudoregister class combination of VGPRs and
+ /// SGPRs for operand modeling. FIXME: We should set isAllocatable = 0 on
+ /// them.
+ static bool isPseudoRegClass(const TargetRegisterClass *RC) {
+ return RC == &AMDGPU::VS_32RegClass || RC == &AMDGPU::VS_64RegClass;
+ }
+
/// \returns A VGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const;
@@ -79,6 +92,11 @@ public:
const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
unsigned SubIdx) const;
+ bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const override;
+
/// \p Channel This is the register channel (e.g. a value from 0-16), not the
/// SubReg index.
/// \returns The sub-register of Reg that is in Channel.
@@ -91,19 +109,25 @@ public:
/// \returns True if operands defined with this operand type can accept
/// an inline constant. i.e. An integer value in the range (-16, 64) or
- /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
+ /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
bool opCanUseInlineConstant(unsigned OpType) const;
enum PreloadedValue {
- TGID_X,
- TGID_Y,
- TGID_Z,
- SCRATCH_WAVE_OFFSET,
- SCRATCH_PTR,
- INPUT_PTR,
- TIDIG_X,
- TIDIG_Y,
- TIDIG_Z
+ // SGPRS:
+ PRIVATE_SEGMENT_BUFFER = 0,
+ DISPATCH_PTR = 1,
+ QUEUE_PTR = 2,
+ KERNARG_SEGMENT_PTR = 3,
+ WORKGROUP_ID_X = 10,
+ WORKGROUP_ID_Y = 11,
+ WORKGROUP_ID_Z = 12,
+ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
+
+ // VGPRS:
+ FIRST_VGPR_VALUE = 15,
+ WORKITEM_ID_X = FIRST_VGPR_VALUE,
+ WORKITEM_ID_Y = 16,
+ WORKITEM_ID_Z = 17
};
/// \brief Returns the physical register that \p Value is stored in.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 2a9017f..bfaf937 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -10,10 +10,13 @@
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
-
-class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
+ DwarfRegNum<[!cast<int>(HWEncoding)]> {
let Namespace = "AMDGPU";
- let HWEncoding = encoding;
+
+ // This is the not yet the complete register encoding. An additional
+ // bit is set for VGPRs.
+ let HWEncoding = regIdx;
}
// Special Registers
@@ -21,7 +24,8 @@ def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
// VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+ DwarfRegAlias<VCC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 106;
@@ -30,7 +34,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
def EXEC_LO : SIReg<"exec_lo", 126>;
def EXEC_HI : SIReg<"exec_hi", 127>;
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
+def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+ DwarfRegAlias<EXEC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 126;
@@ -39,18 +44,29 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
def SCC : SIReg<"scc", 253>;
def M0 : SIReg <"m0", 124>;
-def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
-def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
+multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
+ def _ci : SIReg<n, ci_e>;
+ def _vi : SIReg<n, vi_e>;
+ def "" : SIReg<"", 0>;
+}
-// Pair to indicate location of scratch space for flat accesses.
-def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+class FlatReg <Register lo, Register hi, bits<16> encoding> :
+ RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+ DwarfRegAlias<lo> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 104;
+ let HWEncoding = encoding;
}
+defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes.
+defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes.
+
+def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>;
+def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
+def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
+
// SGPR registers
-foreach Index = 0-101 in {
+foreach Index = 0-103 in {
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
}
@@ -65,25 +81,27 @@ foreach Index = 0-255 in {
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
+// TODO: Do we need to set DwarfRegAlias on register tuples?
+
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add (sequence "SGPR%u", 0, 101))>;
+ (add (sequence "SGPR%u", 0, 103))>;
// SGPR 64-bit registers
def SGPR_64Regs : RegisterTuples<[sub0, sub1],
- [(add (decimate (trunc SGPR_32, 101), 2)),
+ [(add (decimate SGPR_32, 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
- [(add (decimate (trunc SGPR_32, 99), 4)),
+ [(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add (decimate (trunc SGPR_32, 95), 4)),
+ [(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
@@ -95,7 +113,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
// SGPR 512-bit registers
def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
- [(add (decimate (trunc SGPR_32, 87), 4)),
+ [(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
@@ -174,44 +192,57 @@ class RegImmMatcher<string name> : AsmOperandClass {
let RenderMethod = "addRegOrImmOperands";
}
-// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
- let CopyCost = -1; // Theoretically it is possible to read from SCC,
- // but it should never be necessary.
-}
-
-def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
- (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
+ (add SGPR_64, VCC, EXEC, FLAT_SCR)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
+ // Requires 2 s_mov_b64 to copy
+ let CopyCost = 2;
+}
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> {
+ // Requires 4 s_mov_b64 to copy
+ let CopyCost = 4;
+}
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
+ // Requires 8 s_mov_b64 to copy
+ let CopyCost = 8;
+}
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> {
+ // Requires 2 v_mov_b32 to copy
+ let CopyCost = 2;
+}
-def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
let Size = 96;
+
+ // Requires 3 v_mov_b32 to copy
+ let CopyCost = 3;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+ // Requires 4 v_mov_b32 to copy
+ let CopyCost = 4;
+}
-def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> {
+ let CopyCost = 8;
+}
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+ let CopyCost = 16;
+}
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
let Size = 32;
@@ -253,7 +284,9 @@ def SCSrc_32 : RegInlineOperand<SReg_32> {
def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+ let CopyCost = 2;
+}
def VSrc_32 : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";
@@ -282,3 +315,13 @@ def VCSrc_64 : RegisterOperand<VS_64> {
let OperandType = "OPERAND_REG_INLINE_C";
let ParserMatchClass = RegImmMatcher<"VCSrc64">;
}
+
+//===----------------------------------------------------------------------===//
+// SCSrc_* Operands with an SGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+def SCSrc_64 : RegisterOperand<SReg_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"SCSrc64">;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
index 9b1f676..cd77e51 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -17,16 +17,28 @@ def WriteLDS : SchedWrite;
def WriteSALU : SchedWrite;
def WriteSMEM : SchedWrite;
def WriteVMEM : SchedWrite;
+def WriteBarrier : SchedWrite;
// Vector ALU instructions
def Write32Bit : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
+def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
-def WriteDouble : SchedWrite;
+// Slow quarter rate f64 instruction.
+def WriteDouble : SchedWrite;
+
+// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
+// Half rate 64-bit instructions.
+def Write64Bit : SchedWrite;
+
+// FIXME: Should there be a class for instructions which are VALU
+// instructions and have VALU rates, but write to the SALU (i.e. VOPC
+// instructions)
+
def SIFullSpeedModel : SchedMachineModel;
def SIQuarterSpeedModel : SchedMachineModel;
@@ -53,7 +65,7 @@ class HWVALUWriteRes<SchedWrite write, int latency> :
// The latency numbers are taken from AMD Accelerated Parallel Processing
-// guide. They may not be acurate.
+// guide. They may not be accurate.
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
@@ -64,8 +76,10 @@ multiclass SICommonWriteRes {
def : HWWriteRes<WriteSALU, [HWSALU], 1>;
def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+ def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
def : HWVALUWriteRes<Write32Bit, 1>;
+ def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 5d00bdd..4f0913f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -141,8 +141,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
if (!MRI.isSSA())
return;
- assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
- TII->isVOPC(MI.getOpcode()));
+ assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
const SIRegisterInfo &TRI = TII->getRegisterInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
@@ -187,6 +186,21 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
}
+// Copy MachineOperand with all flags except setting it as implicit.
+static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) {
+ assert(!Orig.isImplicit());
+ return MachineOperand::CreateReg(Orig.getReg(),
+ Orig.isDef(),
+ true,
+ Orig.isKill(),
+ Orig.isDead(),
+ Orig.isUndef(),
+ Orig.isEarlyClobber(),
+ Orig.getSubReg(),
+ Orig.isDebug(),
+ Orig.isInternalRead());
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -236,14 +250,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (TII->isVOPC(Op32)) {
unsigned DstReg = MI.getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // VOPC instructions can only write to the VCC register. We can't
- // force them to use VCC here, because the register allocator has
- // trouble with sequences like this, which cause the allocator to run
- // out of registers if vreg0 and vreg1 belong to the VCCReg register
- // class:
- // vreg0 = VOPC;
- // vreg1 = VOPC;
- // S_AND_B64 vreg0, vreg1
+ // VOPC instructions can only write to the VCC register. We can't
+ // force them to use VCC here, because this is only one register and
+ // cannot deal with sequences which would require multiple copies of
+ // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
//
// So, instead of forcing the instruction to write to VCC, we provide
// a hint to the register allocator to use VCC and then we we will run
@@ -272,13 +282,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
// We can shrink this instruction
- DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
+ DEBUG(dbgs() << "Shrinking " << MI);
MachineInstrBuilder Inst32 =
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
- // dst
- Inst32.addOperand(MI.getOperand(0));
+ // Add the dst operand if the 32-bit encoding also has an explicit $dst.
+ // For VOPC instructions, this is replaced by an implicit def of vcc.
+ int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::dst);
+ if (Op32DstIdx != -1) {
+ // dst
+ Inst32.addOperand(MI.getOperand(0));
+ } else {
+ assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+ "Unexpected case");
+ }
+
Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
@@ -288,9 +307,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
Inst32.addOperand(*Src1);
const MachineOperand *Src2 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- if (Src2)
- Inst32.addOperand(*Src2);
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2) {
+ int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+ if (Op32Src2Idx != -1) {
+ Inst32.addOperand(*Src2);
+ } else {
+ // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+ // replaced with an implicit read of vcc.
+ assert(Src2->getReg() == AMDGPU::VCC &&
+ "Unexpected missing register operand");
+ Inst32.addOperand(copyRegOperandAsImplicit(*Src2));
+ }
+ }
++NumInstructionsShrunk;
MI.eraseFromParent();
diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
index 591ce85..dbdc76b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -22,6 +22,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -61,14 +62,7 @@ bool SITypeRewriter::doInitialization(Module &M) {
}
bool SITypeRewriter::runOnFunction(Function &F) {
- Attribute A = F.getFnAttribute("ShaderType");
-
- unsigned ShaderType = ShaderType::COMPUTE;
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- Str.getAsInteger(0, ShaderType);
- }
- if (ShaderType == ShaderType::COMPUTE)
+ if (AMDGPU::getShaderType(F) == ShaderType::COMPUTE)
return false;
visit(F);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b76b400..add415e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,12 +7,23 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
+
namespace llvm {
namespace AMDGPU {
@@ -56,5 +67,91 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.private_segment_alignment = 4;
}
+MCSection *getHSATextSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_AMDGPU_HSA_AGENT |
+ ELF::SHF_AMDGPU_HSA_CODE);
+}
+
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::SHF_AMDGPU_HSA_GLOBAL |
+ ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
+MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::SHF_AMDGPU_HSA_GLOBAL);
+}
+
+MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
+ ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
+bool isGroupSegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}
+
+bool isGlobalSegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}
+
+bool isReadOnlySegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
+static const char ShaderTypeAttribute[] = "ShaderType";
+
+unsigned getShaderType(const Function &F) {
+ Attribute A = F.getFnAttribute(ShaderTypeAttribute);
+ unsigned ShaderType = ShaderType::COMPUTE;
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType)) {
+ LLVMContext &Ctx = F.getContext();
+ Ctx.emitError("can't parse shader type");
+ }
+ }
+ return ShaderType;
+}
+
+bool isSI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
+}
+
+bool isCI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
+}
+
+bool isVI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+}
+
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+
+ switch(Reg) {
+ default: break;
+ case AMDGPU::FLAT_SCR:
+ assert(!isSI(STI));
+ return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
+
+ case AMDGPU::FLAT_SCR_LO:
+ assert(!isSI(STI));
+ return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
+
+ case AMDGPU::FLAT_SCR_HI:
+ assert(!isSI(STI));
+ return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
+ }
+ return Reg;
+}
+
} // End namespace AMDGPU
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f57028c..19419a2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -15,6 +15,11 @@
namespace llvm {
class FeatureBitset;
+class Function;
+class GlobalValue;
+class MCContext;
+class MCSection;
+class MCSubtargetInfo;
namespace AMDGPU {
@@ -27,6 +32,27 @@ struct IsaVersion {
IsaVersion getIsaVersion(const FeatureBitset &Features);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
+MCSection *getHSATextSection(MCContext &Ctx);
+
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
+
+MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
+
+MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
+
+bool isGroupSegment(const GlobalValue *GV);
+bool isGlobalSegment(const GlobalValue *GV);
+bool isReadOnlySegment(const GlobalValue *GV);
+
+unsigned getShaderType(const Function &F);
+
+bool isSI(const MCSubtargetInfo &STI);
+bool isCI(const MCSubtargetInfo &STI);
+bool isVI(const MCSubtargetInfo &STI);
+
+/// If \p Reg is a pseudo reg, return the correct hardware register given
+/// \p STI otherwise return \p Reg.
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
index aca4673..20a026a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
@@ -73,8 +73,8 @@ defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
} // End isCommutable = 1
defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
-// Aliases to simplify matching of floating-pint instructions that are VOP2 on
-// SI and VOP3 on VI.
+// Aliases to simplify matching of floating-point instructions that
+// are VOP2 on SI and VOP3 on VI.
class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
name#" $dst, $src0, $src1",
@@ -89,60 +89,15 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
-} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-
//===----------------------------------------------------------------------===//
-// SMEM Patterns
+// SMEM Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [isVI] in {
+def S_DCACHE_WB : SMEM_Inval <0x21,
+ "s_dcache_wb", int_amdgcn_s_dcache_wb>;
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
-
-// Patterns for global loads with no offset
-class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr)),
- (inst $addr, 0, 0, 0)
->;
-
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
-
-class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (node vt:$data, i64:$addr),
- (inst $data, $addr, 0, 0, 0)
->;
-
-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
-
-class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr, vt:$data)),
- (inst $addr, $data, 0, 0)
->;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
+ "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-} // End Predicates = [isVI]
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 9550a3a..cd7540e 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -35,7 +35,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index ef609a6..a44dc83 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -17,6 +17,17 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
+// ARM Helper classes.
+//
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+class Architecture<string fname, string aname, list<SubtargetFeature> features >
+ : SubtargetFeature<fname, "ARMArch", aname,
+ !strconcat(aname, " architecture"), features>;
+
+//===----------------------------------------------------------------------===//
// ARM Subtarget state.
//
@@ -51,8 +62,11 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
"true", "Enable ARMv8 FP",
[FeatureVFP4]>;
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Enable full half-precision floating point",
+ [FeatureFPARMv8]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict VFP3 to 16 double registers">;
+ "Restrict FP to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
@@ -119,9 +133,9 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Has return address stack">;
-/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
-def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
- "Supports v7 DSP instructions in Thumb2">;
+/// DSP extension.
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+ "Supports DSP instructions in ARM and/or Thumb2">;
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
@@ -150,11 +164,28 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
-// ARM ISAs.
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+ "Reserve R9, making it unavailable as "
+ "GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+ "Don't use movt/movw pairs for 32-bit "
+ "imms">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM ISAa.
+//
+
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
@@ -180,302 +211,444 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
[HasV6T2Ops, FeaturePerfMon]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureVirtualization,
- FeatureMP]>;
+ [HasV7Ops]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
- [HasV8Ops, FeatureAClass, FeatureCRC]>;
+ [HasV8Ops]>;
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions",
+ [HasV8_1aOps]>;
+
//===----------------------------------------------------------------------===//
-// ARM Processors supported.
+// ARM Processor subtarget features.
//
-include "ARMSchedule.td"
-
-// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
- "Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureTrustZone, FeatureMP]>;
+ "Cortex-A5 ARM processors", []>;
def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7",
- "Cortex-A7 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureVFP4, FeatureMP,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureVirtualization]>;
+ "Cortex-A7 ARM processors", []>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
- "Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureTrustZone]>;
+ "Cortex-A8 ARM processors", []>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
- "Cortex-A9 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone]>;
-def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
- "Swift ARM processors",
- [FeatureNEONForFP, FeatureT2XtPk,
- FeatureVFP4, FeatureMP, FeatureHWDiv,
- FeatureHWDivARM, FeatureAvoidPartialCPSR,
- FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx, FeatureTrustZone]>;
+ "Cortex-A9 ARM processors", []>;
def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12",
- "Cortex-A12 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureVFP4,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureVirtualization,
- FeatureTrustZone]>;
-
-
-// FIXME: It has not been determined if A15 has these features.
-def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
- "Cortex-A15 ARM processors",
- [FeatureT2XtPk, FeatureVFP4,
- FeatureMP, FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone, FeatureVirtualization]>;
-
+ "Cortex-A12 ARM processors", []>;
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors", []>;
def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
- "Cortex-A17 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureVFP4,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureVirtualization,
- FeatureTrustZone]>;
-
+ "Cortex-A17 ARM processors", []>;
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors", []>;
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
- "Cortex-A53 ARM processors",
- [FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto, FeatureCRC]>;
-
+ "Cortex-A53 ARM processors", []>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
- "Cortex-A57 ARM processors",
- [FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto, FeatureCRC]>;
+ "Cortex-A57 ARM processors", []>;
+def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+ "Cortex-A72 ARM processors", []>;
-def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
- "Cortex-R4 ARM processors",
- [FeatureHWDiv,
- FeatureAvoidPartialCPSR,
- FeatureDSPThumb2, FeatureT2XtPk,
- HasV7Ops, FeatureDB, FeatureHasRAS,
- FeatureRClass]>;
+def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
+ "Qualcomm ARM processors", []>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors", []>;
+
+def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+ "Cortex-R4 ARM processors", []>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
- "Cortex-R5 ARM processors",
- [FeatureSlowFPBrcc,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureHasSlowFPVMLx,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
-
-// FIXME: krait has currently the same features as A9
-// plus VFP4 and hardware division features.
-def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
- "Qualcomm ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone,
- FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM]>;
+ "Cortex-R5 ARM processors", []>;
+def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
+ "Cortex-R7 ARM processors", []>;
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+//===----------------------------------------------------------------------===//
+// ARM schedules.
+//
+
+include "ARMSchedule.td"
+
+
+//===----------------------------------------------------------------------===//
+// ARM architectures
+//
+
+def ARMv2 : Architecture<"armv2", "ARMv2", []>;
+
+def ARMv2a : Architecture<"armv2a", "ARMv2a", []>;
+
+def ARMv3 : Architecture<"armv3", "ARMv3", []>;
+
+def ARMv3m : Architecture<"armv3m", "ARMv3m", []>;
+
+def ARMv4 : Architecture<"armv4", "ARMv4", []>;
+
+def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>;
+
+def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>;
+
+def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>;
+
+def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
+
+def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
+
+def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
+ FeatureDSP]>;
+
+def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
+
+def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps,
+ FeatureTrustZone]>;
+
+def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureAClass]>;
+
+def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
+ FeatureDB,
+ FeatureDSP,
+ FeatureHWDiv,
+ FeatureRClass]>;
+
+def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass]>;
+
+def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass,
+ FeatureDSP,
+ FeatureT2XtPk]>;
+
+def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+// Aliases
+def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>;
+def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>;
+def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>;
+def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
+def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
+def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM processors
+//
+
+// Dummy CPU, used to target architectures
+def : ProcNoItin<"generic", []>;
+
+def : ProcNoItin<"arm8", [ARMv4]>;
+def : ProcNoItin<"arm810", [ARMv4]>;
+def : ProcNoItin<"strongarm", [ARMv4]>;
+def : ProcNoItin<"strongarm110", [ARMv4]>;
+def : ProcNoItin<"strongarm1100", [ARMv4]>;
+def : ProcNoItin<"strongarm1110", [ARMv4]>;
+
+def : ProcNoItin<"arm7tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>;
+def : ProcNoItin<"arm710t", [ARMv4t]>;
+def : ProcNoItin<"arm720t", [ARMv4t]>;
+def : ProcNoItin<"arm9", [ARMv4t]>;
+def : ProcNoItin<"arm9tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm920", [ARMv4t]>;
+def : ProcNoItin<"arm920t", [ARMv4t]>;
+def : ProcNoItin<"arm922t", [ARMv4t]>;
+def : ProcNoItin<"arm940t", [ARMv4t]>;
+def : ProcNoItin<"ep9312", [ARMv4t]>;
+
+def : ProcNoItin<"arm10tdmi", [ARMv5t]>;
+def : ProcNoItin<"arm1020t", [ARMv5t]>;
+
+def : ProcNoItin<"arm9e", [ARMv5te]>;
+def : ProcNoItin<"arm926ej-s", [ARMv5te]>;
+def : ProcNoItin<"arm946e-s", [ARMv5te]>;
+def : ProcNoItin<"arm966e-s", [ARMv5te]>;
+def : ProcNoItin<"arm968e-s", [ARMv5te]>;
+def : ProcNoItin<"arm10e", [ARMv5te]>;
+def : ProcNoItin<"arm1020e", [ARMv5te]>;
+def : ProcNoItin<"arm1022e", [ARMv5te]>;
+def : ProcNoItin<"xscale", [ARMv5te]>;
+def : ProcNoItin<"iwmmxt", [ARMv5te]>;
+
+def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>;
+
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
-// V4 Processors.
-def : ProcNoItin<"generic", []>;
-def : ProcNoItin<"arm8", []>;
-def : ProcNoItin<"arm810", []>;
-def : ProcNoItin<"strongarm", []>;
-def : ProcNoItin<"strongarm110", []>;
-def : ProcNoItin<"strongarm1100", []>;
-def : ProcNoItin<"strongarm1110", []>;
-
-// V4T Processors.
-def : ProcNoItin<"arm7tdmi", [HasV4TOps]>;
-def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>;
-def : ProcNoItin<"arm710t", [HasV4TOps]>;
-def : ProcNoItin<"arm720t", [HasV4TOps]>;
-def : ProcNoItin<"arm9", [HasV4TOps]>;
-def : ProcNoItin<"arm9tdmi", [HasV4TOps]>;
-def : ProcNoItin<"arm920", [HasV4TOps]>;
-def : ProcNoItin<"arm920t", [HasV4TOps]>;
-def : ProcNoItin<"arm922t", [HasV4TOps]>;
-def : ProcNoItin<"arm940t", [HasV4TOps]>;
-def : ProcNoItin<"ep9312", [HasV4TOps]>;
-
-// V5T Processors.
-def : ProcNoItin<"arm10tdmi", [HasV5TOps]>;
-def : ProcNoItin<"arm1020t", [HasV5TOps]>;
-
-// V5TE Processors.
-def : ProcNoItin<"arm9e", [HasV5TEOps]>;
-def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm946e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm966e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm968e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm10e", [HasV5TEOps]>;
-def : ProcNoItin<"arm1020e", [HasV5TEOps]>;
-def : ProcNoItin<"arm1022e", [HasV5TEOps]>;
-def : ProcNoItin<"xscale", [HasV5TEOps]>;
-def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
-
-// V6 Processors.
-def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-
-// V6M Processors.
-def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m0plus", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-
-// V6K Processors.
-def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-
-// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
- FeatureDSPThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx,
- FeatureDSPThumb2]>;
-
-// V7a Processors.
// FIXME: A5 has currently the same Schedule model as A8
-def : ProcessorModel<"cortex-a5", CortexA8Model,
- [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureVFP4, FeatureDSPThumb2,
- FeatureHasRAS, FeatureAClass]>;
-def : ProcessorModel<"cortex-a7", CortexA8Model,
- [ProcA7, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
-def : ProcessorModel<"cortex-a8", CortexA8Model,
- [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
-def : ProcessorModel<"cortex-a9", CortexA9Model,
- [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
- FeatureAClass]>;
+def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4]>;
+
+def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureVirtualization]>;
+
+def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
+
+def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureMP]>;
// FIXME: A12 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a12", CortexA9Model,
- [ProcA12, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
-
-// FIXME: A15 has currently the same ProcessorModel as A9.
-def : ProcessorModel<"cortex-a15", CortexA9Model,
- [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization,
+ FeatureMP]>;
+
+// FIXME: A15 has currently the same Schedule model as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
// FIXME: A17 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a17", CortexA9Model,
- [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
+def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
// FIXME: krait has currently the same Schedule model as A9
-def : ProcessorModel<"krait", CortexA9Model,
- [ProcKrait, HasV7Ops,
- FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
+// division features.
+def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
+ FeatureHasRAS,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM]>;
+
+def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx]>;
// FIXME: R4 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4", CortexA8Model,
- [ProcR4]>;
+def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R4F has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4f", CortexA8Model,
- [ProcR4,
- FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVFP3, FeatureD16]>;
+def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r5", CortexA8Model,
- [ProcR5, HasV7Ops, FeatureDB,
- FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS,
- FeatureD16, FeatureRClass]>;
+def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
-def : ProcessorModel<"cortex-r7", CortexA8Model,
- [ProcR5, HasV7Ops, FeatureDB,
- FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS, FeatureVFPOnlySP,
- FeatureD16, FeatureMP, FeatureRClass]>;
-
-// V7M Processors.
-def : ProcNoItin<"cortex-m3", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureMClass]>;
-def : ProcNoItin<"sc300", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureMClass]>;
-
-// V7EM Processors.
-def : ProcNoItin<"cortex-m4", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP4,
- FeatureVFPOnlySP, FeatureD16,
- FeatureMClass]>;
-def : ProcNoItin<"cortex-m7", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureFPARMv8,
- FeatureD16, FeatureMClass]>;
-
-
-// Swift uArch Processors.
-def : ProcessorModel<"swift", SwiftModel,
- [ProcSwift, HasV7Ops, FeatureNEON,
- FeatureDB, FeatureDSPThumb2,
- FeatureHasRAS, FeatureAClass]>;
-
-// V8 Processors
-def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
-def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
-// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
-def : ProcNoItin<"cortex-a72", [ProcA57, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
+def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureVFPOnlySP,
+ FeatureD16,
+ FeatureFP16,
+ FeatureMP,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+def : ProcNoItin<"cortex-m3", [ARMv7m]>;
+def : ProcNoItin<"sc300", [ARMv7m]>;
+
+def : ProcNoItin<"cortex-m4", [ARMv7em,
+ FeatureVFP4,
+ FeatureVFPOnlySP,
+ FeatureD16]>;
+
+def : ProcNoItin<"cortex-m7", [ARMv7em,
+ FeatureFPARMv8,
+ FeatureD16]>;
+
+
+def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
// Cyclone is very similar to swift
-def : ProcessorModel<"cyclone", SwiftModel,
- [ProcSwift, HasV8Ops, HasV7Ops,
- FeatureCrypto, FeatureFPARMv8,
- FeatureDB,FeatureDSPThumb2,
- FeatureHasRAS, FeatureZCZeroing]>;
+def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx,
+ FeatureCrypto,
+ FeatureZCZeroing]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
@@ -504,8 +677,15 @@ def ARMAsmWriter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def ARMAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "ARM";
+ string BreakCharacters = ".";
+}
+
def ARM : Target {
// Pull in Instruction Info:
let InstructionSet = ARMInstrInfo;
let AssemblyWriters = [ARMAsmWriter];
+ let AssemblyParserVariants = [ARMAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 738dded..206db96 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -60,7 +60,7 @@ using namespace llvm;
ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
- InConstantPool(false) {}
+ InConstantPool(false), OptimizationGoals(-1) {}
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
@@ -80,8 +80,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer->EmitLabel(CurrentFnSym);
}
-void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<ARMSubtarget>();
SetupMachineFunction(MF);
+ const Function* F = MF.getFunction();
+ const TargetMachine& TM = MF.getTarget();
+
+ // Calculate this function's optimization goal.
+ unsigned OptimizationGoal;
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ // For best debugging illusion, speed and small size sacrificed
+ OptimizationGoal = 6;
+ else if (F->optForMinSize())
+ // Aggressively for small size, speed and debug illusion sacrificed
+ OptimizationGoal = 4;
+ else if (F->optForSize())
+ // For small size, but speed and debugging illusion preserved
+ OptimizationGoal = 3;
+ else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ // Aggressively for speed, small size and debug illusion sacrificed
+ OptimizationGoal = 2;
+ else if (TM.getOptLevel() > CodeGenOpt::None)
+ // For speed, but small size and good debug illusion preserved
+ OptimizationGoal = 1;
+ else // TM.getOptLevel() == CodeGenOpt::None
+ // For good debugging, but speed and small size preserved
+ OptimizationGoal = 5;
+
+ // Combine a new optimization goal with existing ones.
+ if (OptimizationGoals == -1) // uninitialized goals
+ OptimizationGoals = OptimizationGoal;
+ else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals
+ OptimizationGoals = 0;
if (Subtarget->isTargetCOFF()) {
- bool Internal = MF.getFunction()->hasInternalLinkage();
+ bool Internal = F->hasInternalLinkage();
COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL;
int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
@@ -198,22 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel(unsigned uid) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
SmallString<60> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid;
return OutContext.getOrCreateSymbol(Name);
}
-
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
- const DataLayout *DL = TM.getDataLayout();
- SmallString<60> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
- << getFunctionNumber();
- return OutContext.getOrCreateSymbol(Name);
-}
-
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
@@ -515,6 +535,17 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+
+ // The last attribute to be emitted is ABI_optimization_goals
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ if (OptimizationGoals > 0 &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI()))
+ ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
+ OptimizationGoals = -1;
+
+ ATS.finishAttributeSection();
}
//===----------------------------------------------------------------------===//
@@ -532,7 +563,7 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
if (Subtarget->hasV8Ops())
return ARMBuildAttrs::v8;
else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+ if (Subtarget->isMClass() && Subtarget->hasDSP())
return ARMBuildAttrs::v7E_M;
return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
@@ -587,7 +618,7 @@ void ARMAsmPrinter::emitAttributes() {
// We consider krait as a "cortex-a9" + hwdiv CPU
// Enable hwdiv through ".arch_extension idiv"
if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV);
+ ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
} else
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
}
@@ -807,8 +838,6 @@ void ARMAsmPrinter::emitAttributes() {
else if (STI.hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowVirtualization);
-
- ATS.finishAttributeSection();
}
//===----------------------------------------------------------------------===//
@@ -828,8 +857,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF;
case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF;
- case ARMCP::GOT: return MCSymbolRefExpr::VK_GOT;
- case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_GOTOFF;
+ case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL;
}
llvm_unreachable("Invalid ARMCPModifier!");
}
@@ -875,8 +903,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const DataLayout *DL = TM.getDataLayout();
- int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+ const DataLayout &DL = getDataLayout();
+ int Size = DL.getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -909,10 +937,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutContext);
if (ACPV->getPCAdjustment()) {
- MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- ACPV->getLabelId(),
- OutContext);
+ MCSymbol *PCLabel =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ ACPV->getLabelId(), OutContext);
const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
PCRelExpr =
MCBinaryExpr::createAdd(PCRelExpr,
@@ -1136,6 +1163,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Offset = 0;
break;
case ARM::ADDri:
+ case ARM::t2ADDri:
Offset = -MI->getOperand(2).getImm();
break;
case ARM::SUBri:
@@ -1198,7 +1226,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1355,9 +1383,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
- MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- MI->getOperand(2).getImm(), OutContext);
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
@@ -1388,9 +1416,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
- MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- MI->getOperand(3).getImm(), OutContext);
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
@@ -1414,10 +1442,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the add.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
@@ -1436,10 +1463,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the add.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
@@ -1468,10 +1494,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// a PC-relative address at the ldr instruction.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the load
unsigned Opcode;
@@ -1519,7 +1544,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MCPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(MCPE.Val.ConstVal);
+ EmitGlobalConstant(DL, MCPE.Val.ConstVal);
return;
}
case ARM::JUMPTABLE_ADDRS:
@@ -1653,12 +1678,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// adds $val, #7
// str $val, [$src, #4]
// movs r0, #0
- // b 1f
+ // b LSJLJEH
// movs r0, #1
- // 1:
+ // LSJLJEH:
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
- MCSymbol *Label = GetARMSJLJEHLabel();
+ MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
.addReg(ValReg)
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 3d25121..ed7be2d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// labels used for ARMv4t thumb code to make register indirect calls.
SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
+ /// OptimizationGoals - Maintain a combined optimization goal for all
+ /// functions in a module: one of Tag_ABI_optimization_goals values,
+ /// -1 if uninitialized, 0 if conflicting goals
+ int OptimizationGoals;
+
public:
explicit ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -84,7 +89,7 @@ public:
void EmitFunctionEntryLabel() override;
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
- void EmitXXStructor(const Constant *CV) override;
+ void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -119,8 +124,6 @@ private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const;
- MCSymbol *GetARMSJLJEHLabel() const;
-
MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags);
public:
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9f43e73..49f3288 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -97,7 +97,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
Subtarget(STI) {
for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
- assert(false && "Duplicated entries?");
+ llvm_unreachable("Duplicated entries?");
MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
}
@@ -440,7 +440,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
if (MI->isBundle()) {
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
int PIdx = I->findFirstPredOperandIdx();
@@ -518,7 +518,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
static bool isCPSRDefined(const MachineInstr *MI) {
for (const auto &MO : MI->operands())
- if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+ if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
@@ -647,7 +647,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
unsigned Size = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
@@ -853,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
switch (RC->getSize()) {
case 4:
@@ -1043,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
switch (RC->getSize()) {
case 4:
@@ -1224,6 +1219,60 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
+/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
+/// depending on whether the result is used.
+void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
+ bool isThumb1 = Subtarget.isThumb1Only();
+ bool isThumb2 = Subtarget.isThumb2();
+ const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineInstr *MI = MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ MachineBasicBlock *BB = MI->getParent();
+
+ MachineInstrBuilder LDM, STM;
+ if (isThumb1 || !MI->getOperand(1).isDead()) {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
+ : isThumb1 ? ARM::tLDMIA_UPD
+ : ARM::LDMIA_UPD))
+ .addOperand(MI->getOperand(1));
+ } else {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
+ }
+
+ if (isThumb1 || !MI->getOperand(0).isDead()) {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
+ : isThumb1 ? ARM::tSTMIA_UPD
+ : ARM::STMIA_UPD))
+ .addOperand(MI->getOperand(0));
+ } else {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
+ }
+
+ AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
+ AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+
+ // Sort the scratch registers into ascending order.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ llvm::SmallVector<unsigned, 6> ScratchRegs;
+ for(unsigned I = 5; I < MI->getNumOperands(); ++I)
+ ScratchRegs.push_back(MI->getOperand(I).getReg());
+ std::sort(ScratchRegs.begin(), ScratchRegs.end(),
+ [&TRI](const unsigned &Reg1,
+ const unsigned &Reg2) -> bool {
+ return TRI.getEncodingValue(Reg1) <
+ TRI.getEncodingValue(Reg2);
+ });
+
+ for (const auto &Reg : ScratchRegs) {
+ LDM.addReg(Reg, RegState::Define);
+ STM.addReg(Reg, RegState::Kill);
+ }
+
+ BB->erase(MBBI);
+}
+
+
bool
ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
@@ -1237,6 +1286,11 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return true;
}
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ expandMEMCPY(MI);
+ return true;
+ }
+
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
@@ -1325,9 +1379,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
- NewCPV = ARMConstantPoolConstant::
- Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
- ARMCP::CPValue, 4);
+ NewCPV = ARMConstantPoolConstant::Create(
+ cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
+ 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
else if (ACPV->isExtSymbol())
NewCPV = ARMConstantPoolSymbol::
Create(MF.getFunction()->getContext(),
@@ -1645,16 +1699,14 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
if (!NumCycles)
return false;
// If we are optimizing for size, see if the branch in the predecessor can be
// lowered to cbn?z by the constant island lowering pass, and return false if
// so. This results in a shorter instruction sequence.
- const Function *F = MBB.getParent()->getFunction();
- if (F->hasFnAttribute(Attribute::OptimizeForSize) ||
- F->hasFnAttribute(Attribute::MinSize)) {
+ if (MBB.getParent()->getFunction()->optForSize()) {
MachineBasicBlock *Pred = *MBB.pred_begin();
if (!Pred->empty()) {
MachineInstr *LastMI = &*Pred->rbegin();
@@ -1677,12 +1729,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
// Attempt to estimate the relative costs of predication versus branching.
- unsigned UnpredCost = Probability.getNumerator() * NumCycles;
- UnpredCost /= Probability.getDenominator();
- UnpredCost += 1; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() / 10;
-
- return (NumCycles + ExtraPredCycles) <= UnpredCost;
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling NumCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
+ UnpredCost += ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+
+ return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
}
bool ARMBaseInstrInfo::
@@ -1690,23 +1744,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned TCycles, unsigned TExtra,
MachineBasicBlock &FMBB,
unsigned FCycles, unsigned FExtra,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
if (!TCycles || !FCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
- unsigned TUnpredCost = Probability.getNumerator() * TCycles;
- TUnpredCost /= Probability.getDenominator();
-
- uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
- unsigned FUnpredCost = Comp * FCycles;
- FUnpredCost /= Probability.getDenominator();
-
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling TCycles/FCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
+ Probability.getCompl().scale(FCycles * ScalingUpFactor);
unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
- return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+ return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
}
bool
@@ -1744,9 +1797,10 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
llvm_unreachable("Unknown unconditional branch opcode!");
}
-/// commuteInstruction - Handle commutable instructions.
-MachineInstr *
-ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
switch (MI->getOpcode()) {
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
@@ -1756,7 +1810,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return nullptr;
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
if (!MI)
return nullptr;
// After swapping the MOVCC operands, also invert the condition.
@@ -1765,7 +1819,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
}
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1975,21 +2029,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
-static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
- MachineInstr *MI) {
- for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
- Subreg.isValid(); ++Subreg)
- if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
- MachineBasicBlock::LQR_Dead)
- return true;
- return false;
-}
bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ if (!MF.getFunction()->optForMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2058,11 +2103,9 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// registers live within the function we might clobber a return value
// register; the other way a register can be live here is if it's
// callee-saved.
- // TODO: Currently, computeRegisterLiveness() does not report "live" if a
- // sub reg is live. When computeRegisterLiveness() works for sub reg, it
- // can replace isAnySubRegLive().
if (isCalleeSavedRegister(CurReg, CSRegs) ||
- isAnySubRegLive(CurReg, TRI, MI)) {
+ MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
// VFP pops don't allow holes in the register list, so any skip is fatal
// for our transformation. GPR pops do, so we should just keep looking.
if (IsVFPPushPop)
@@ -3381,7 +3424,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
assert(Idx != -1 && "Cannot find bundled definition!");
DefIdx = Idx;
- return II;
+ return &*II;
}
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
@@ -3389,7 +3432,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
unsigned &UseIdx, unsigned &Dist) {
Dist = 0;
- MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ MachineBasicBlock::const_instr_iterator II = ++MI->getIterator();
assert(II->isInsideBundle() && "Empty bundle?");
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -3410,7 +3453,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
}
UseIdx = Idx;
- return II;
+ return &*II;
}
/// Return the number of cycles to add to (or subtract from) the static
@@ -3652,6 +3695,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
+ // FIXME: Use Function::optForSize().
if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
}
@@ -3931,11 +3975,11 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
unsigned Latency = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
if (I->getOpcode() != ARM::t2IT)
- Latency += getInstrLatency(ItinData, I, PredCost);
+ Latency += getInstrLatency(ItinData, &*I, PredCost);
}
return Latency;
}
@@ -4054,8 +4098,8 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->
- getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
MIB.addMemOperand(MMO);
AddDefaultPred(MIB);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b4706e3..d80c494 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -86,6 +86,18 @@ protected:
RegSubRegPair &BaseReg,
RegSubRegPairAndIdx &InsertedReg) const override;
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -188,9 +200,6 @@ public:
MachineInstr *duplicate(MachineInstr *Orig,
MachineFunction &MF) const override;
- MachineInstr *commuteInstruction(MachineInstr*,
- bool=false) const override;
-
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const;
@@ -224,15 +233,15 @@ public:
bool isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
unsigned ExtraT, MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- const BranchProbability &Probability) const override {
+ BranchProbability Probability) const override {
return NumCycles == 1;
}
@@ -343,6 +352,8 @@ private:
virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
Reloc::Model RM) const = 0;
+ void expandMEMCPY(MachineBasicBlock::iterator) const;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e7d5be77..419717c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -225,7 +225,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const {
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
@@ -338,7 +339,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ if (!TargetRegisterInfo::canRealignStack(MF))
return false;
if (AFI->isThumb1OnlyFunction())
return false;
@@ -356,18 +357,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
-needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
-bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index fdc1ef9..cea8b80 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -94,7 +94,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
@@ -126,15 +126,15 @@ public:
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const override;
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const override;
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index d687568..a731d00 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
-static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
- ARM::S4, ARM::S5, ARM::S6, ARM::S7,
- ARM::S8, ARM::S9, ARM::S10, ARM::S11,
- ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
-static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
-static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
@@ -199,9 +199,11 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
+ auto &DL = State.getMachineFunction().getDataLayout();
+ unsigned StackAlign = DL.getStackAlignment();
+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
- ArrayRef<uint16_t> RegList;
+ ArrayRef<MCPhysReg> RegList;
switch (LocVT.SimpleTy) {
case MVT::i32: {
RegList = RRegList;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 27cf06b..2335164 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[v2f64], CCIfAlign<"16",
+ CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index f4ec8c6..e89757c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -342,7 +342,7 @@ void ARMConstantIslands::verify() {
#ifndef NDEBUG
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
+ MachineBasicBlock *MBB = &*MBBI;
unsigned MBBId = MBB->getNumber();
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
@@ -542,7 +542,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -589,6 +589,8 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
+ if (MI == MBB.end())
+ continue;
unsigned JTOpcode;
switch (MI->getOpcode()) {
@@ -639,12 +641,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
/// into the block immediately after it.
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI = MBB->getIterator();
// Can't fall off end of function.
if (std::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = std::next(MBBI);
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
return false;
@@ -722,15 +724,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(&MBB);
// The known bits of the entry block offset are determined by the function
// alignment.
BBInfo.front().KnownBits = MF->getAlignment();
// Compute block offsets and known bits.
- adjustBBOffsetsAfter(MF->begin());
+ adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -968,7 +970,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
@@ -1088,7 +1090,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
unsigned NextBlockOffset, NextBlockAlignment;
- MachineFunction::const_iterator NextBlock = Water;
+ MachineFunction::const_iterator NextBlock = Water->getIterator();
if (++NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
NextBlockAlignment = 0;
@@ -1350,7 +1352,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
<< format(", expected CPE offset %#x\n", CPEOffset));
- NewMBB = std::next(MachineFunction::iterator(UserMBB));
+ NewMBB = &*++UserMBB->getIterator();
// Add an unconditional branch from UserMBB to fallthrough block. Record
// it for branch lengthening; this new branch will not get out of range,
// but if the preceding conditional branch is out of range, the targets
@@ -1503,8 +1505,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
NewWaterList.insert(NewIsland);
// The new CPE goes before the following block (NewMBB).
- NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+ NewMBB = &*++WaterBB->getIterator();
} else {
// No water found.
DEBUG(dbgs() << "No water found\n");
@@ -1515,7 +1516,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
- MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1532,7 +1533,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF->insert(NewMBB, NewIsland);
+ MF->insert(NewMBB->getIterator(), NewIsland);
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
@@ -1553,7 +1554,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1732,7 +1733,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
@@ -2058,9 +2059,9 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
/// \brief Returns whether CPEMI is the first instruction in the block
/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
/// we can switch the first register to PC and usually remove the address
-/// calculation that preceeded it.
+/// calculation that preceded it.
static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
- MachineFunction::iterator MBB = JTMI->getParent();
+ MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
MachineFunction *MF = MBB->getParent();
++MBB;
@@ -2235,7 +2236,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
SmallVector<MachineOperand, 4> CondPrior;
- MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator BBi = BB->getIterator();
MachineFunction::iterator OldPrior = std::prev(BBi);
// If the block terminator isn't analyzable, don't try to move the block
@@ -2258,7 +2259,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
- MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++JTBB->getIterator();
MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
@@ -2273,8 +2274,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Update the CFG.
NewBB->addSuccessor(BB);
- JTBB->removeSuccessor(BB);
- JTBB->addSuccessor(NewBB);
+ JTBB->replaceSuccessor(BB, NewBB);
++NumJTInserted;
return NewBB;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 7d41c69..c9849b2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -52,8 +52,7 @@ const char *ARMConstantPoolValue::getModifierText() const {
// strings if that's legal.
case ARMCP::no_modifier: return "none";
case ARMCP::TLSGD: return "tlsgd";
- case ARMCP::GOT: return "GOT";
- case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOT_PREL: return "GOT_PREL";
case ARMCP::GOTTPOFF: return "gottpoff";
case ARMCP::TPOFF: return "tpoff";
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 36f63e2..6b18a4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -39,8 +39,7 @@ namespace ARMCP {
enum ARMCPModifier {
no_modifier,
TLSGD,
- GOT,
- GOTOFF,
+ GOT_PREL,
GOTTPOFF,
TPOFF
};
@@ -103,8 +102,6 @@ public:
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
- unsigned getRelocationInfo() const override { return 2; }
-
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4438f50..56f3498 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -330,22 +330,19 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- const unsigned NumEntries = array_lengthof(NEONLdStTable);
-
#ifndef NDEBUG
// Make sure the table is sorted.
static bool TableChecked = false;
if (!TableChecked) {
- for (unsigned i = 0; i != NumEntries-1; ++i)
- assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
- "NEONLdStTable is not sorted!");
+ assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
+ "NEONLdStTable is not sorted!");
TableChecked = true;
}
#endif
- const NEONLdStTableEntry *I =
- std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
- if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ auto I = std::lower_bound(std::begin(NEONLdStTable),
+ std::end(NEONLdStTable), Opcode);
+ if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
return I;
return nullptr;
}
@@ -734,7 +731,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16.addImm(Pred).addReg(PredReg);
if (RequiresBundling)
- finalizeBundle(MBB, &*LO16, &*MBBI);
+ finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
@@ -747,6 +744,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
switch (Opcode) {
default:
return false;
+
+ case ARM::TCRETURNdi:
+ case ARM::TCRETURNri: {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode =
+ STI->isThumb()
+ ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI->isThumb())
+ MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+ .addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ return true;
+ }
case ARM::VMOVScc:
case ARM::VMOVDcc: {
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index fdd0763..9bdf823c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -922,12 +922,9 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
if (Addr.BaseType == Address::FrameIndexBase) {
int FI = Addr.Base.FI;
int Offset = Addr.Offset;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset),
- Flags,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
@@ -1278,8 +1275,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1303,8 +1299,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const ConstantInt *CI =
@@ -1341,8 +1336,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -1355,8 +1349,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
TII.get(Opc)).addReg(AddrReg));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
- for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
return true;
}
@@ -1860,8 +1854,9 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
- } else
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ } else {
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
case CallingConv::ARM_AAPCS_VFP:
if (!isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
@@ -2944,48 +2939,51 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned Align, MVT VT) {
- bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
- ARMConstantPoolConstant *CPV =
- ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
- unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ LLVMContext *Context = &MF->getFunction()->getContext();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
+
+ unsigned ConstAlign =
+ MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+
+ unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
+ unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
- unsigned Opc;
- unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
- // Load value.
- if (isThumb2) {
- DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::t2LDRpci), DestReg1)
- .addConstantPoolIndex(Idx));
- Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
- } else {
- // The extra immediate is for addrmode2.
- DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(ARM::LDRcp), DestReg1)
- .addConstantPoolIndex(Idx).addImm(0));
- Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
- }
+ // Fix the address by adding pc.
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
+ : ARM::PICADD;
+ DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (!Subtarget->isThumb())
+ AddDefaultPred(MIB);
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- if (GlobalBaseReg == 0) {
- GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
- AFI->setGlobalBaseReg(GlobalBaseReg);
+ if (UseGOT_PREL && Subtarget->isThumb()) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
}
-
- unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
- DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
- DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
- GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc), DestReg2)
- .addReg(DestReg1)
- .addReg(GlobalBaseReg);
- if (!UseGOTOFF)
- MIB.addImm(0);
- AddOptionalDefs(MIB);
-
- return DestReg2;
+ return DestReg;
}
bool ARMFastISel::fastLowerArguments() {
@@ -3038,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() {
}
- static const uint16_t GPRArgRegs[] = {
+ static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -3055,7 +3053,7 @@ bool ARMFastISel::fastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- updateValueMap(I, ResultReg);
+ updateValueMap(&*I, ResultReg);
}
return true;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 6744000..c5990bb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
@@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// iOS requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetIOS())
+ if (STI.isTargetIOS() || STI.isTargetWatchOS())
return true;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -288,7 +289,6 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -305,7 +305,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
@@ -489,7 +493,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
+ tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -689,60 +694,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
-// Resolve TCReturn pseudo-instruction
-void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl = MBBI->getDebugLoc();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
- return;
-
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- unsigned TCOpcode = STI.isThumb() ?
- (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
- ARM::TAILJMPd;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
-
- // Add the default predicate in Thumb mode.
- if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- MBBI = NewMI;
-}
-
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -758,10 +711,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
- fixTCReturn(MF, MBB);
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- }
+
+ // First put ourselves on the first (from top) terminator instructions.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@@ -840,8 +795,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- fixTCReturn(MF, MBB);
-
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
@@ -932,12 +885,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
return Offset;
}
-int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
-}
-
void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -950,7 +897,6 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
SmallVector<std::pair<unsigned,bool>, 4> Regs;
unsigned i = CSI.size();
@@ -1008,7 +954,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
// Put any subsequent vpush instructions before this one: they will refer to
// higher register numbers so need to be pushed first in order to preserve
// monotonicity.
- --MI;
+ if (MI != MBB.begin())
+ --MI;
}
}
@@ -1022,12 +969,20 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
- unsigned RetOpcode = MI->getOpcode();
- bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
- RetOpcode == ARM::TCRETURNri);
- bool isInterrupt =
- RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ DebugLoc DL;
+ bool isTailCall = false;
+ bool isInterrupt = false;
+ bool isTrap = false;
+ if (MBB.end() != MI) {
+ DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+ isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ isTrap =
+ RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
+ RetOpcode == ARM::tTRAP;
+ }
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -1043,11 +998,14 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
- STI.hasV5TOps()) {
- Reg = ARM::PC;
- LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ !isTrap && STI.hasV5TOps()) {
+ if (MBB.succ_empty()) {
+ Reg = ARM::PC;
+ DeleteRet = true;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ } else
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
// Fold the return instruction into the LDM.
- DeleteRet = true;
}
// If NoGap is true, pop consecutive registers and then leave the rest
@@ -1068,7 +1026,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
- if (DeleteRet) {
+ if (DeleteRet && MI != MBB.end()) {
MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
@@ -1095,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
// Put any subsequent vpop instructions after this one: they will refer to
// higher register numbers so need to be popped afterwards.
- ++MI;
+ if (MI != MBB.end())
+ ++MI;
}
}
@@ -1109,7 +1068,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -1118,7 +1077,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// slot offsets can be wrong. The offset for d8 will always be correct.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned DNum = CSI[i].getReg() - ARM::D8;
- if (DNum >= 8)
+ if (DNum > NumAlignedDPRCS2Regs - 1)
continue;
int FI = CSI[i].getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
@@ -1269,7 +1228,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Find the frame index assigned to d8.
@@ -1654,13 +1613,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
- bool BigStack =
- (RS &&
- (MFI->estimateStackSize(MF) +
- ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
- estimateRSStackSizeLimit(MF, this)))
- || MFI->hasVarSizedObjects()
- || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ bool BigStack = (RS && (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
+ estimateRSStackSizeLimit(MF, this))) ||
+ MFI->hasVarSizedObjects() ||
+ (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
@@ -1698,8 +1655,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
unsigned Reg = UnspilledCS1GPRs[i];
- // Don't spill high register if the function is thumb
+ // Don't spill high register if the function is thumb. In the case of
+ // Windows on ARM, accept R11 (frame pointer)
if (!AFI->isThumbFunction() ||
+ (STI.isTargetWindows() && Reg == ARM::R11) ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
@@ -1784,8 +1743,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignSPAdjust(Amount);
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
@@ -1885,7 +1843,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(
if (!ST->isTargetAndroid() && !ST->isTargetLinux())
report_fatal_error("Segmented stacks not supported on this platform.");
- assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
@@ -1913,21 +1870,48 @@ void ARMFrameLowering::adjustForSegmentedStacks(
MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
- for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
- e = PrologueMBB.livein_end();
- i != e; ++i) {
- AllocMBB->addLiveIn(*i);
- GetMBB->addLiveIn(*i);
- McrMBB->addLiveIn(*i);
- PrevStackMBB->addLiveIn(*i);
- PostStackMBB->addLiveIn(*i);
+ // Grab everything that reaches PrologueMBB to update there liveness as well.
+ SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+ SmallVector<MachineBasicBlock *, 2> WalkList;
+ WalkList.push_back(&PrologueMBB);
+
+ do {
+ MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+ for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+ if (BeforePrologueRegion.insert(PredBB).second)
+ WalkList.push_back(PredBB);
+ }
+ } while (!WalkList.empty());
+
+ // The order in that list is important.
+ // The blocks will all be inserted before PrologueMBB using that order.
+ // Therefore the block that should appear first in the CFG should appear
+ // first in the list.
+ MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+ PostStackMBB};
+
+ for (MachineBasicBlock *B : AddedBlocks)
+ BeforePrologueRegion.insert(B);
+
+ for (const auto &LI : PrologueMBB.liveins()) {
+ for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+ PredBB->addLiveIn(LI);
+ }
+
+ // Remove the newly added blocks from the list, since we know
+ // we do not have to do the following updates for them.
+ for (MachineBasicBlock *B : AddedBlocks) {
+ BeforePrologueRegion.erase(B);
+ MF.insert(PrologueMBB.getIterator(), B);
}
- MF.push_front(PostStackMBB);
- MF.push_front(AllocMBB);
- MF.push_front(GetMBB);
- MF.push_front(McrMBB);
- MF.push_front(PrevStackMBB);
+ for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+ // Make sure the LiveIns are still sorted and unique.
+ MBB->sortUniqueLiveIns();
+ // Replace the edges to PrologueMBB by edges to the sequences
+ // we are about to add.
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ }
// The required stack size that is aligned to ARM constant criterion.
AlignedStackSize = alignToARMConstant(StackSize);
@@ -1991,7 +1975,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
MachineConstantPool *MCP = MF.getConstantPool();
- unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
+ unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
// ldr SR0, [pc, offset(STACK_LIMIT)]
AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index 6fdc5ef..66f4dfb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -31,8 +31,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -52,7 +50,6 @@ public:
unsigned &FrameReg) const override;
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg, int SPAdj) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -60,6 +57,11 @@ public:
void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
+
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b110628..0242440 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -160,11 +160,6 @@ public:
// Thumb Addressing Modes:
bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
- unsigned Scale);
- bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
@@ -176,8 +171,6 @@ public:
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
// Thumb 2 Addressing Modes:
- bool SelectT2ShifterOperandReg(SDValue N,
- SDValue &BaseReg, SDValue &Opc);
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
@@ -278,6 +271,22 @@ private:
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
bool is64BitVector);
+
+ /// Returns the number of instructions required to materialize the given
+ /// constant in a register, or 3 if a literal pool load is needed.
+ unsigned ConstantMaterializationCost(unsigned Val) const;
+
+ /// Checks if N is a multiplication by a constant where we can extract out a
+ /// power of two from the constant so that it can be used in a shift, but only
+ /// if it simplifies the materialization of the constant. Returns true if it
+ /// is, and assigns to PowerOfTwo the power of two that should be extracted
+ /// out and to NewMulConst the new constant to be multiplied by.
+ bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
+ unsigned &PowerOfTwo, SDValue &NewMulConst) const;
+
+ /// Replace N with M in CurDAG, in a way that also ensures that M gets
+ /// selected when N would have been selected.
+ void replaceDAGValue(const SDValue &N, SDValue M);
};
}
@@ -334,7 +343,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
bool isThumb2 = Subtarget->isThumb();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
if (N->getOpcode() != ISD::ADD)
continue;
@@ -388,7 +397,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
SDValue CPTmp1;
SDValue CPTmp2;
if (isThumb2) {
- if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
continue;
} else {
if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
@@ -471,6 +480,61 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
+unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
+ if (Subtarget->isThumb()) {
+ if (Val <= 255) return 1; // MOV
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (~Val <= 255) return 2; // MOV + MVN
+ if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
+ if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
+ }
+ if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+ return 3; // Literal pool load
+}
+
+bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
+ unsigned MaxShift,
+ unsigned &PowerOfTwo,
+ SDValue &NewMulConst) const {
+ assert(N.getOpcode() == ISD::MUL);
+ assert(MaxShift > 0);
+
+ // If the multiply is used in more than one place then changing the constant
+ // will make other uses incorrect, so don't.
+ if (!N.hasOneUse()) return false;
+ // Check if the multiply is by a constant
+ ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!MulConst) return false;
+ // If the constant is used in more than one place then modifying it will mean
+ // we need to materialize two constants instead of one, which is a bad idea.
+ if (!MulConst->hasOneUse()) return false;
+ unsigned MulConstVal = MulConst->getZExtValue();
+ if (MulConstVal == 0) return false;
+
+ // Find the largest power of 2 that MulConstVal is a multiple of
+ PowerOfTwo = MaxShift;
+ while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
+ --PowerOfTwo;
+ if (PowerOfTwo == 0) return false;
+ }
+
+ // Only optimise if the new cost is better
+ unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
+ NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
+ unsigned OldCost = ConstantMaterializationCost(MulConstVal);
+ unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+ return NewCost < OldCost;
+}
+
+void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
+ CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
+ CurDAG->ReplaceAllUsesWith(N, M);
+}
+
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &Opc,
@@ -478,6 +542,24 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
if (DisableShifterOp)
return false;
+ // If N is a multiply-by-constant and it's profitable to extract a shift and
+ // use it in a shifted operand do so.
+ if (N.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
+ BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
+ N.getOperand(0), NewMulConst)
+ .getNode()),
+ 0);
+ replaceDAGValue(N.getOperand(1), NewMulConst);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
+ PowerOfTwo),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
@@ -662,6 +744,18 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
}
+ // If Offset is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (Offset.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(Offset.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ ShOpcVal = ARM_AM::lsl;
+ }
+ }
+
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
SDLoc(N), MVT::i32);
return true;
@@ -1086,77 +1180,13 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
}
bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
- SDValue &Offset, unsigned Scale) {
- if (Scale == 4) {
- SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
- return false; // We want to select tLDRspi / tSTRspi instead.
-
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return false; // We want to select tLDRpci instead.
- }
-
- if (!CurDAG->isBaseWithConstantOffset(N))
- return false;
-
- // Thumb does not have [sp, r] address mode.
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
- if ((LHSR && LHSR->getReg() == ARM::SP) ||
- (RHSR && RHSR->getReg() == ARM::SP))
- return false;
-
- // FIXME: Why do we explicitly check for a match here and then return false?
- // Presumably to allow something else to match, but shouldn't this be
- // documented?
- int RHSC;
- if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
- return false;
-
- Base = N.getOperand(0);
- Offset = N.getOperand(1);
- return true;
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 1);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 2);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 4);
-}
-
-bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
SDValue &Base, SDValue &OffImm) {
- if (Scale == 4) {
- SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
- return false; // We want to select tLDRspi / tSTRspi instead.
-
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return false; // We want to select tLDRpci instead.
- }
-
if (!CurDAG->isBaseWithConstantOffset(N)) {
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ if (N.getOpcode() == ISD::ADD) {
+ return false; // We want to select register offset instead
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@@ -1166,23 +1196,6 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return true;
}
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
- if ((LHSR && LHSR->getReg() == ARM::SP) ||
- (RHSR && RHSR->getReg() == ARM::SP)) {
- ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
- ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
- unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
- unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
-
- // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
- if (LHSC != 0 || RHSC != 0) return false;
-
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
- }
-
// If the RHS is + imm5 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
@@ -1191,9 +1204,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return true;
}
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
+ // Offset is too large, so use register offset instead.
+ return false;
}
bool
@@ -1263,28 +1275,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
//===----------------------------------------------------------------------===//
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
- SDValue &Opc) {
- if (DisableShifterOp)
- return false;
-
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
-
- // Don't match base register only case. That is matched to a separate
- // lower complexity pattern with explicit register operand.
- if (ShOpcVal == ARM_AM::no_shift) return false;
-
- BaseReg = N.getOperand(0);
- unsigned ShImmVal = 0;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- ShImmVal = RHS->getZExtValue() & 31;
- Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
- return true;
- }
-
- return false;
-}
-
bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R + imm12 operands.
@@ -1425,6 +1415,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
}
}
+ // If OffReg is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (OffReg.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(OffReg.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ }
+ }
+
ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
return true;
@@ -2503,25 +2504,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
- bool UseCP = true;
- if (Subtarget->useMovt(*MF))
- // Thumb2-aware targets have the MOVT instruction, so all immediates can
- // be done with MOV + MOVT, at worst.
- UseCP = false;
- else {
- if (Subtarget->isThumb()) {
- UseCP = (Val > 255 && // MOV
- ~Val > 255 && // MOV + MVN
- !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL
- !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
- } else
- UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
- ARM_AM::getSOImmVal(~Val) == -1 && // MVN
- !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs.
- !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
- }
-
- if (UseCP) {
+ // If we can't materialize the constant we need to use a literal pool
+ if (ConstantMaterializationCost(Val) > 2) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
@@ -3376,7 +3360,7 @@ static void getIntOperandsFromRegisterString(StringRef RegString,
SelectionDAG *CurDAG, SDLoc DL,
std::vector<SDValue>& Ops) {
SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ":");
+ RegString.split(Fields, ':');
if (Fields.size() > 1) {
bool AllIntFields = true;
@@ -3461,9 +3445,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
// The flags here are common to those allowed for apsr in the A class cores and
// those allowed for the special registers in the M class cores. Returns a
// value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags) {
+static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
if (Flags.empty())
- return 0x3;
+ return 0x2 | (int)hasDSP;
return StringSwitch<int>(Flags)
.Case("g", 0x1)
@@ -3492,7 +3476,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
}
// We know we are now handling a write so need to get the mask for the flags.
- int Mask = getMClassFlagsMask(Flags);
+ int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
// Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
// shouldn't have flags present.
@@ -3501,7 +3485,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
// The _g and _nzcvqg versions are only valid if the DSP extension is
// available.
- if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
+ if (!Subtarget->hasDSP() && (Mask & 0x1))
return -1;
// The register was valid so need to put the mask in the correct place
@@ -3523,7 +3507,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// The flags permitted for apsr are the same flags that are allowed in
// M class registers. We get the flag value and then shift the flags into
// the correct place to combine with the mask.
- Mask = getMClassFlagsMask(Flags);
+ Mask = getMClassFlagsMask(Flags, true);
if (Mask == -1)
return -1;
return Mask << 2;
@@ -3742,7 +3726,7 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
}
SmallVector<StringRef, 5> Fields;
- StringRef(SpecialReg).split(Fields, "_", 1, false);
+ StringRef(SpecialReg).split(Fields, '_', 1, false);
std::string Reg = Fields[0].str();
StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
@@ -3943,6 +3927,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
// be an immediate and not a memory constraint.
// Fallthrough.
case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
case InlineAsm::Constraint_Um:
case InlineAsm::Constraint_Un:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8cc06df..9cfb06b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -142,6 +142,11 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+
+ if (!VT.isFloatingPoint() &&
+ VT != MVT::v2i64 && VT != MVT::v1i64)
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ setOperationAction(Opcode, VT, Legal);
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -166,77 +171,78 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
- // Single-precision floating-point arithmetic.
- setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
- setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
- setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
- setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
-
- // Double-precision floating-point arithmetic.
- setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
- setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
- setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
- setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
-
- // Single-precision comparisons.
- setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
- setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
- setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
- setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
- setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
- setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
- setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
- setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
-
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
-
- // Double-precision comparisons.
- setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
- setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
- setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
- setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
- setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
- setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
- setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
- setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
-
- setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
-
- // Floating-point to integer conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
-
- // Conversions between floating types.
- setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Single-precision floating-point arithmetic.
+ { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
+
+ // Double-precision floating-point arithmetic.
+ { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
+
+ // Single-precision comparisons.
+ { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
+ { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
+ { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
+
+ // Double-precision comparisons.
+ { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
+ { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
+ { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
- // Integer to floating-point conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- // FIXME: There appears to be some naming inconsistency in ARM libgcc:
- // e.g., __floatunsidf vs. __floatunssidfvfp.
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ // Set the correct calling convention for ARMv7k WatchOS. It's just
+ // AAPCS_VFP for functions as simple as libcalls.
+ if (Subtarget->isTargetWatchOS()) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+ setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
}
}
@@ -245,8 +251,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
- !Subtarget->isTargetWindows()) {
+ // RTLIB
+ if (Subtarget->isAAPCS_ABI() &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
+ Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -334,12 +342,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Memory operations
- // RTABI chapter 4.3.4
- { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
@@ -348,6 +350,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
+
+ // EABI dependent RTLIB
+ if (TM.Options.EABIVersion == EABI::EABI4 ||
+ TM.Options.EABIVersion == EABI::EABI5) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char *const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } MemOpsLibraryCalls[] = {
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : MemOpsLibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
}
if (Subtarget->isTargetWindows()) {
@@ -364,6 +390,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
@@ -373,8 +403,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().isiOS() &&
- !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ if (Subtarget->isTargetWatchOS() ||
+ (Subtarget->isTargetIOS() &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
@@ -392,6 +423,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
}
+ // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
+ // a __gnu_ prefix (which is the default).
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
+ setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f");
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -579,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
@@ -605,7 +643,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ADDC);
if (Subtarget->isFPOnlySP()) {
- // When targetting a floating-point unit with only single-precision
+ // When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.
@@ -689,7 +727,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
- || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+ || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -706,8 +744,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
// ARM does not have ROTL.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ }
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
@@ -717,7 +762,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+ // @llvm.readcyclecounter requires the Performance Monitors extension.
+ // Default to the 0 expansion on unsupported platforms.
+ // FIXME: Technically there are older ARM CPUs that have
+ // implementation-specific ways of obtaining this information.
+ if (Subtarget->hasPerfMon())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
@@ -726,15 +776,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
!(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, LibCall);
+ setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- // FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
- if (Subtarget->isTargetAEABI()) {
+ if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+
setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
@@ -762,7 +814,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
@@ -776,13 +827,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (!Subtarget->isTargetMachO()) {
- // Non-MachO platforms may return values in these registers via the
- // personality function.
- setExceptionPointerRegister(ARM::R0);
- setExceptionSelectorRegister(ARM::R1);
- }
-
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
@@ -849,11 +893,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- if (Subtarget->isTargetDarwin()) {
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+ if (Subtarget->useSjLjEH())
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
- }
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -912,7 +956,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->getTargetTriple().isiOS()) {
+ if (Subtarget->isTargetWatchOS()) {
+ setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
+ setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
+ }
+ if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -928,6 +976,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
if (!Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -935,8 +990,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
}
}
+
+ if (Subtarget->hasNEON()) {
+ // vmin and vmax aren't available in a scalar form, so we use
+ // a NEON instruction with an undef lane instead.
+ setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ }
+
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
@@ -959,11 +1028,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
//// temporary - rewrite interface to use type
MaxStoresPerMemset = 8;
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemsetOptSize = 4;
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemcpyOptSize = 2;
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmoveOptSize = 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
@@ -1054,8 +1123,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::RBIT: return "ARMISD::RBIT";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1069,7 +1136,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
- case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
@@ -1082,6 +1150,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+ case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
@@ -1133,14 +1202,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
- case ARMISD::FMAX: return "ARMISD::FMAX";
- case ARMISD::FMIN: return "ARMISD::FMIN";
- case ARMISD::VMAXNM: return "ARMISD::VMAX";
- case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
@@ -1449,9 +1515,10 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
- return DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ false, false, 0);
}
void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
@@ -1734,9 +1801,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1748,9 +1816,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
@@ -1768,7 +1837,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(), false, false, true, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -1781,7 +1851,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
@@ -1804,9 +1875,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
@@ -1821,7 +1893,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -1831,8 +1902,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
- // Emit regular call when code size is the priority
- !HasMinSizeAttr)
+ // Emit regular call when code size is the priority
+ !MF.getFunction()->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -2014,6 +2085,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
+ assert(Subtarget->supportsTailCall());
+
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
@@ -2033,26 +2106,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
- // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
- // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
- // support in the assembler and linker to be used. This would need to be
- // fixed to fully support tail calls in Thumb1.
- //
- // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
- // LR. This means if we need to reload LR, it takes an extra instructions,
- // which outweighs the value of the tail call; but here we don't know yet
- // whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
- // emitEpilogue if LR is used.
-
- // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
- // but we need to make sure there are enough registers; the only valid
- // registers are the 4 used for parameters. We don't currently do this
- // case.
- if (Subtarget->isThumb1Only())
- return false;
-
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
@@ -2400,7 +2453,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
- return !Subtarget->isThumb1Only();
+ return true;
}
// Trying to write a 64 bit value so need to split into two 32 bit values first,
@@ -2467,9 +2520,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result =
+ DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
@@ -2491,9 +2545,10 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
- Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Argument =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2543,17 +2598,19 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
} else {
// local exec model
assert(model == TLSModel::LocalExec);
@@ -2561,9 +2618,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -2577,6 +2635,8 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2597,22 +2657,31 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV,
- UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
- SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
- Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
- if (!UseGOTOFF)
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ if (UseGOT_PREL)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
return Result;
}
@@ -2628,9 +2697,10 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ return DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
}
@@ -2654,7 +2724,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
@@ -2680,32 +2751,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
TargetFlags));
if (GV->hasDLLImportStorageClass())
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
-SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Subtarget->isTargetELF() &&
- "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc dl(Op);
- unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV =
- ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
- ARMPCLabelIndex, PCAdj);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
- return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
-}
-
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -2722,6 +2772,13 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
}
+SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
+ Op.getOperand(0));
+}
+
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
@@ -2732,7 +2789,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_rbit: {
assert(Op.getOperand(1).getValueType() == MVT::i32 &&
"RBIT intrinsic must have i32 type!");
- return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
+ return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
}
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2752,10 +2809,10 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result =
- DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2770,6 +2827,36 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vminnm:
+ case Intrinsic::arm_neon_vmaxnm: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
+ ? ISD::FMINNUM : ISD::FMAXNUM;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vminu:
+ case Intrinsic::arm_neon_vmaxu: {
+ if (Op.getValueType().isFloatingPoint())
+ return SDValue();
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
+ ? ISD::UMIN : ISD::UMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vmins:
+ case Intrinsic::arm_neon_vmaxs: {
+ // v{min,max}s is overloaded between signed integers and floats.
+ if (!Op.getValueType().isFloatingPoint()) {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::SMIN : ISD::SMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::FMINNAN : ISD::FMAXNAN;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
@@ -2870,9 +2957,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(
+ MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -3056,9 +3144,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(
+ MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -3139,9 +3228,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
- int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex, VA.getLocMemOffset(),
- Flags.getByValSize());
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
+ VA.getLocMemOffset(), Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
CCInfo.nextInRegsParam();
} else {
@@ -3151,9 +3240,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+ InVals.push_back(DAG.getLoad(
+ VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0));
}
lastInsIndex = index;
}
@@ -3188,13 +3278,9 @@ static bool isFloatingPointZero(SDValue Op) {
// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
// created by LowerConstantFP().
SDValue BitcastOp = Op->getOperand(0);
- if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
- SDValue MoveOp = BitcastOp->getOperand(0);
- if (MoveOp->getOpcode() == ISD::TargetConstant &&
- cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
- return true;
- }
- }
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(BitcastOp->getOperand(0)))
+ return true;
}
return false;
}
@@ -3559,113 +3645,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
- // We can use VMAXNM/VMINNM for a compare followed by a select with the
- // same operands, as follows:
- // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
- // select c, a, b
- // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
- bool swapSides = false;
- if (!getTargetMachine().Options.NoNaNsFPMath) {
- // transformability may depend on which way around we compare
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- // the non-NaN should be RHS
- swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
- break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULT:
- case ISD::SETULE:
- // the non-NaN should be LHS
- swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
- break;
- }
- }
- swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
- if (swapSides) {
- CC = ISD::getSetCCSwappedOperands(CC);
- std::swap(LHS, RHS);
- }
- if (LHS == TrueVal && RHS == FalseVal) {
- bool canTransform = true;
- // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
- if (!getTargetMachine().Options.UnsafeFPMath &&
- !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- const ConstantFPSDNode *Zero;
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETUGT:
- case ISD::SETGT:
- // RHS must not be -0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
- !Zero->isNegative();
- break;
- case ISD::SETOGE:
- case ISD::SETUGE:
- case ISD::SETGE:
- // LHS must not be -0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
- !Zero->isNegative();
- break;
- case ISD::SETOLT:
- case ISD::SETULT:
- case ISD::SETLT:
- // RHS must not be +0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
- Zero->isNegative();
- break;
- case ISD::SETOLE:
- case ISD::SETULE:
- case ISD::SETLE:
- // LHS must not be +0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
- Zero->isNegative();
- break;
- }
- }
- if (canTransform) {
- // Note: If one of the elements in a pair is a number and the other
- // element is NaN, the corresponding result element is the number.
- // This is consistent with the IEEE 754-2008 standard.
- // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETOGE:
- if (!DAG.isKnownNeverNaN(RHS))
- break;
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
- case ISD::SETUGT:
- case ISD::SETUGE:
- if (!DAG.isKnownNeverNaN(LHS))
- break;
- case ISD::SETGT:
- case ISD::SETGE:
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
- case ISD::SETOLT:
- case ISD::SETOLE:
- if (!DAG.isKnownNeverNaN(RHS))
- break;
- return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
- case ISD::SETULT:
- case ISD::SETULE:
- if (!DAG.isKnownNeverNaN(LHS))
- break;
- case ISD::SETLT:
- case ISD::SETLE:
- return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
- }
- }
- }
-
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -3890,16 +3869,18 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
Addr, Op.getOperand(2), JTI);
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(),
- false, false, false, 0);
+ Addr =
+ DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
- Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(),
- false, false, false, 0);
+ Addr =
+ DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
}
@@ -3936,7 +3917,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
Op.getValueType());
- return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
@@ -3988,7 +3969,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
- return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
@@ -4153,6 +4134,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
Results.push_back(Read.getOperand(0));
}
+/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
+/// When \p DstVT, the destination type of \p BC, is on the vector
+/// register bank and the source of bitcast, \p Op, operates on the same bank,
+/// it might be possible to combine them, such that everything stays on the
+/// vector register bank.
+/// \p return The node that would replace \p BT, if the combine
+/// is possible.
+static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
+ SelectionDAG &DAG) {
+ SDValue Op = BC->getOperand(0);
+ EVT DstVT = BC->getValueType(0);
+
+ // The only vector instruction that can produce a scalar (remember,
+ // since the bitcast was about to be turned into VMOVDRR, the source
+ // type is i64) from a vector is EXTRACT_VECTOR_ELT.
+ // Moreover, we can do this combine only if there is one use.
+ // Finally, if the destination type is not a vector, there is not
+ // much point on forcing everything on the vector bank.
+ if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !Op.hasOneUse())
+ return SDValue();
+
+ // If the index is not constant, we will introduce an additional
+ // multiply that will stick.
+ // Give up in that case.
+ ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Index)
+ return SDValue();
+ unsigned DstNumElt = DstVT.getVectorNumElements();
+
+ // Compute the new index.
+ const APInt &APIntIndex = Index->getAPIntValue();
+ APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
+ NewIndex *= APIntIndex;
+ // Check if the new constant index fits into i32.
+ if (NewIndex.getBitWidth() > 32)
+ return SDValue();
+
+ // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
+ // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
+ SDLoc dl(Op);
+ SDValue ExtractSrc = Op.getOperand(0);
+ EVT VecVT = EVT::getVectorVT(
+ *DAG.getContext(), DstVT.getScalarType(),
+ ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
+ SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
+ DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -4172,6 +4203,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn i64->f64 into VMOVDRR.
if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
+ // if we can combine the bitcast with its source.
+ if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
+ return Val;
+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
@@ -4383,7 +4419,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
if (!ST->hasV6T2Ops())
return SDValue();
- SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
@@ -4544,8 +4580,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
"Unknown shift to lower!");
// We only lower SRA, SRL of 1 here, all others use generic lowering.
- if (!isa<ConstantSDNode>(N->getOperand(1)) ||
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ if (!isOneConstant(N->getOperand(1)))
return SDValue();
// If we are in thumb mode, we don't have RRX.
@@ -5036,18 +5071,56 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
return VT == MVT::v8i8 && M.size() == 8;
}
+// Checks whether the shuffle mask represents a vector transpose (VTRN) by
+// checking that pairs of elements in the shuffle mask represent the same index
+// in each vector, incrementing the expected index by 2 at each step.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
+// v2={e,f,g,h}
+// WhichResult gives the offset for each element in the mask based on which
+// of the two results it belongs to.
+//
+// The transpose can be represented either as:
+// result1 = shufflevector v1, v2, result1_shuffle_mask
+// result2 = shufflevector v1, v2, result2_shuffle_mask
+// where v1/v2 and the shuffle masks have the same number of elements
+// (here WhichResult (see below) indicates which result is being checked)
+//
+// or as:
+// results = shufflevector v1, v2, shuffle_mask
+// where both results are returned in one vector and the shuffle mask has twice
+// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
+// want to check the low half and high half of the shuffle mask as if it were
+// the other case
static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ // If the mask is twice as long as the input vector then we need to check the
+ // upper and lower parts of the mask with a matching value for WhichResult
+ // FIXME: A mask with only even values will be rejected in case the first
+ // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
+ // M[0] is used to determine WhichResult
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
+ return false;
+ }
}
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
return true;
}
@@ -5060,28 +5133,55 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
+ return false;
+ }
}
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
return true;
}
+// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
+// that the mask elements are either all even and in steps of size 2 or all odd
+// and in steps of size 2.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with
+// respect the how results are returned.
static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (M[i] < 0) continue; // ignore UNDEF indices
- if ((unsigned) M[i] != 2 * i + WhichResult)
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; ++j) {
+ if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
+ return false;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5097,18 +5197,27 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
if (EltSz == 64)
return false;
- unsigned Half = VT.getVectorNumElements() / 2;
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned j = 0; j != 2; ++j) {
- unsigned Idx = WhichResult;
- for (unsigned i = 0; i != Half; ++i) {
- int MIdx = M[i + j * Half];
- if (MIdx >= 0 && (unsigned) MIdx != Idx)
- return false;
- Idx += 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ unsigned Half = NumElts / 2;
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += Half) {
+ unsigned Idx = WhichResult;
+ for (unsigned k = 0; k < Half; ++k) {
+ int MIdx = M[i + j + k];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
}
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5116,21 +5225,37 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
+// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
+// that pairs of elements of the shufflemask represent the same index in each
+// vector incrementing sequentially through the vectors.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with respect the how results
+// are returned.
static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
- return false;
- Idx += 1;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5147,15 +5272,23 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
- return false;
- Idx += 1;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
+ return false;
+ Idx += 1;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5329,16 +5462,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// just use VDUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the VDUP from lane instructions only have
// constant-index forms.
+ ConstantSDNode *constIndex;
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Value->getOperand(1))) {
+ (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
// such that the register coalescer will remove unnecessary copies.
if (VT != Value->getOperand(0).getValueType()) {
- ConstantSDNode *constIndex;
- constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
- assert(constIndex && "The index is not a constant!");
unsigned index = constIndex->getAPIntValue().getLimitedValue() %
VT.getVectorNumElements();
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
@@ -5437,14 +5568,35 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 2> SourceVecs;
- SmallVector<unsigned, 2> MinElts;
- SmallVector<unsigned, 2> MaxElts;
+ struct ShuffleSourceInfo {
+ SDValue Vec;
+ unsigned MinElt;
+ unsigned MaxElt;
+
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+ // be compatible with the shuffle we intend to construct. As a result
+ // ShuffleVec will be some sliding window into the original Vec.
+ SDValue ShuffleVec;
+
+ // Code should guarantee that element i in Vec starts at element "WindowBase
+ // + i * WindowScale in ShuffleVec".
+ int WindowBase;
+ int WindowScale;
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ ShuffleSourceInfo(SDValue Vec)
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+ WindowScale(1) {}
+ };
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR
+ // node.
+ SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
@@ -5453,127 +5605,166 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// A shuffle can only come from building a vector from various
// elements of other vectors.
return SDValue();
- } else if (V.getOperand(0).getValueType().getVectorElementType() !=
- VT.getVectorElementType()) {
- // This code doesn't know how to handle shuffles where the vector
- // element types do not match (this happens because type legalization
- // promotes the return type of EXTRACT_VECTOR_ELT).
- // FIXME: It might be appropriate to extend this code to handle
- // mismatched types.
+ } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
+ // Furthermore, shuffles require a constant mask, whereas extractelts
+ // accept variable indices.
return SDValue();
}
- // Record this extraction against the appropriate vector if possible...
+ // Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- // If the element number isn't a constant, we can't effectively
- // analyze what's going on.
- if (!isa<ConstantSDNode>(V.getOperand(1)))
- return SDValue();
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
- bool FoundSource = false;
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {
- if (SourceVecs[j] == SourceVec) {
- if (MinElts[j] > EltNo)
- MinElts[j] = EltNo;
- if (MaxElts[j] < EltNo)
- MaxElts[j] = EltNo;
- FoundSource = true;
- break;
- }
- }
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ if (Source == Sources.end())
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
- // Or record a new source if not...
- if (!FoundSource) {
- SourceVecs.push_back(SourceVec);
- MinElts.push_back(EltNo);
- MaxElts.push_back(EltNo);
- }
+ // Update the minimum and maximum lane number seen.
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ Source->MinElt = std::min(Source->MinElt, EltNo);
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// Currently only do something sane when at most two source vectors
- // involved.
- if (SourceVecs.size() > 2)
+ // are involved.
+ if (Sources.size() > 2)
return SDValue();
- SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
- int VEXTOffsets[2] = {0, 0};
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (auto &Source : Sources) {
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy))
+ SmallestEltTy = SrcEltTy;
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+
+ // If the source vector is too wide or too narrow, we may nevertheless be able
+ // to construct a compatible shuffle either by concatenating it with UNDEF or
+ // extracting a suitable range of elements.
+ for (auto &Src : Sources) {
+ EVT SrcVT = Src.ShuffleVec.getValueType();
+
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+ continue;
+
+ // This stage of the search produces a source with the same element type as
+ // the original, but with a total width matching the BUILD_VECTOR output.
+ EVT EltVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- // This loop extracts the usage patterns of the source vectors
- // and prepares appropriate SDValues for a shuffle if possible.
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
- // No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
- VEXTOffsets[i] = 0;
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+ if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+ // We can pad out the smaller vector for free, so if it's part of a
+ // shuffle...
+ Src.ShuffleVec =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
- // It probably isn't worth padding out a smaller vector just to
- // break it down again in a shuffle.
- return SDValue();
}
- // Since only 64-bit and 128-bit vectors are legal on ARM and
- // we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
- "unexpected vector sizes in ReconstructShuffle");
+ if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
+ return SDValue();
- if (MaxElts[i] - MinElts[i] >= NumElts) {
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
- if (MinElts[i] >= NumElts) {
+ if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
- VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(NumElts, dl));
- } else if (MaxElts[i] < NumElts) {
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
- VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(0, dl));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
} else {
// An actual VEXT is needed
- VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(0, dl));
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(NumElts, dl));
- ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(VEXTOffsets[i], dl,
- MVT::i32));
- }
- }
-
- SmallVector<int, 8> Mask;
-
- for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue VEXTSrc1 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+
+ Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
+ VEXTSrc2,
+ DAG.getConstant(Src.MinElt, dl, MVT::i32));
+ Src.WindowBase = -Src.MinElt;
+ }
+ }
+
+ // Another possible incompatibility occurs from the vector element types. We
+ // can fix this by bitcasting the source vectors to the same type we intend
+ // for the shuffle.
+ for (auto &Src : Sources) {
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+ if (SrcEltTy == SmallestEltTy)
+ continue;
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowBase *= Src.WindowScale;
+ }
+
+ // Final sanity check before we try to actually produce a shuffle.
+ DEBUG(
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ );
+
+ // The stars all align, our next step is to produce the mask for the shuffle.
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
+ if (Entry.getOpcode() == ISD::UNDEF)
continue;
- }
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
- .getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
- }
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+ // segment.
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ VT.getVectorElementType().getSizeInBits());
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,
+ // starting at the appropriate offset.
+ int *LaneMask = &Mask[i * ResMultiplier];
+
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+ ExtractBase += NumElts * (Src - Sources.begin());
+ for (int j = 0; j < LanesDefined; ++j)
+ LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ return SDValue();
- return SDValue();
+ // We can't handle more than two sources. This should have already
+ // been checked before this point.
+ assert(Sources.size() <= 2 && "Too many sources!");
+
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ for (unsigned i = 0; i < Sources.size(); ++i)
+ ShuffleOps[i] = Sources[i].ShuffleVec;
+
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
@@ -6235,6 +6426,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
static SDValue
LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -6265,6 +6458,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
static SDValue
LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -6337,6 +6532,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
}
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
@@ -6445,45 +6641,56 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
auto PtrVT = getPointerTy(DAG.getDataLayout());
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
-
- // Create stack object for sret.
+ Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
auto &DL = DAG.getDataLayout();
- const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
- const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
- SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
ArgListTy Args;
- ArgListEntry Entry;
-
- Entry.Node = SRet;
- Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isSRet = true;
- Args.push_back(Entry);
+ bool ShouldUseSRet = Subtarget->isAPCS_ABI();
+ SDValue SRet;
+ if (ShouldUseSRet) {
+ // Create stack object for sret.
+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
+
+ ArgListEntry Entry;
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(*DAG.getContext());
+ }
+ ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
- const char *LibcallName = (ArgVT == MVT::f64)
- ? "__sincos_stret" : "__sincosf_stret";
+ const char *LibcallName =
+ (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+ RTLIB::Libcall LC =
+ (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+ CallingConv::ID CC = getLibcallCallingConv(LC);
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
- std::move(Args), 0)
- .setDiscardResult();
-
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+ .setDiscardResult(ShouldUseSRet);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ if (!ShouldUseSRet)
+ return CallResult.first;
+
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
MachinePointerInfo(), false, false, false, 0);
@@ -6498,6 +6705,85 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
LoadSin.getValue(0), LoadCos.getValue(0));
}
+SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
+ bool Signed,
+ SDValue &Chain) const {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ const char *Name = nullptr;
+ if (Signed)
+ Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
+ else
+ Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
+
+ SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
+
+ ARMTargetLowering::ArgListTy Args;
+
+ for (auto AI : {1, 0}) {
+ ArgListEntry Arg;
+ Arg.Node = Op.getOperand(AI);
+ Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Arg);
+ }
+
+ CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
+ ES, std::move(Args), 0);
+
+ return LowerCallTo(CLI).first;
+}
+
+SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ assert(Op.getValueType() == MVT::i32 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
+ DAG.getEntryNode(), Op.getOperand(1));
+
+ return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+}
+
+void ARMTargetLowering::ExpandDIV_Windows(
+ SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const {
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ assert(Op.getValueType() == MVT::i64 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
+
+ SDValue DBZCHK =
+ DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+
+ SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+
+ SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
+ SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
+ DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
+ Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
+
+ Results.push_back(Lower);
+ Results.push_back(Upper);
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -6513,36 +6799,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc DL(N);
- SDValue Cycles32, OutChain;
-
- if (Subtarget->hasPerfMon()) {
- // Under Power Management extensions, the cycle-count is:
- // mrc p15, #0, <Rt>, c9, c13, #0
- SDValue Ops[] = { N->getOperand(0), // Chain
- DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
- DAG.getConstant(15, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(9, DL, MVT::i32),
- DAG.getConstant(13, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32)
- };
-
- Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
- DAG.getVTList(MVT::i32, MVT::Other), Ops);
- OutChain = Cycles32.getValue(1);
- } else {
- // Intrinsic is defined to return 0 on unsupported platforms. Technically
- // there are older ARM CPUs that have implementation-specific ways of
- // obtaining this information (FIXME!).
- Cycles32 = DAG.getConstant(0, DL, MVT::i32);
- OutChain = DAG.getEntryNode();
- }
-
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getConstant(15, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(9, DL, MVT::i32),
+ DAG.getConstant(13, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32)
+ };
- SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
- Cycles32, DAG.getConstant(0, DL, MVT::i32));
- Results.push_back(Cycles64);
- Results.push_back(OutChain);
+ SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
+ DAG.getConstant(0, DL, MVT::i32)));
+ Results.push_back(Cycles32.getValue(1));
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -6576,15 +6848,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SREM: return LowerREM(Op.getNode(), DAG);
+ case ISD::UREM: return LowerREM(Op.getNode(), DAG);
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
@@ -6622,13 +6896,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Don't know how to custom lower this!");
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ARMISD::WIN__DBZCHK: return SDValue();
}
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
+ SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res;
switch (N->getOpcode()) {
@@ -6644,9 +6919,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = LowerREM(N, DAG);
+ break;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::UDIV:
+ case ISD::SDIV:
+ assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
+ return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
+ Results);
}
if (Res.getNode())
Results.push_back(Res);
@@ -6683,12 +6967,12 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
- MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
- MachineMemOperand::MOLoad, 4, 4);
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
MachineMemOperand *FIMMOSt =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore, 4, 4);
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore, 4, 4);
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
@@ -6792,7 +7076,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
MachineModuleInfo &MMI = MF->getMMI();
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
- if (!BB->isLandingPad()) continue;
+ if (!BB->isEHPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
// pad.
@@ -6807,7 +7091,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
for (SmallVectorImpl<unsigned>::iterator
CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
CSI != CSE; ++CSI) {
- CallSiteNumToLPad[*CSI].push_back(BB);
+ CallSiteNumToLPad[*CSI].push_back(&*BB);
MaxCSNum = std::max(MaxCSNum, *CSI);
}
break;
@@ -6840,7 +7124,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// Shove the dispatch's address into the return slot in the function context.
MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
- DispatchBB->setIsLandingPad();
+ DispatchBB->setIsEHPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
unsigned trap_opcode;
@@ -6864,10 +7148,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// context.
SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
- MachineMemOperand *FIMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad |
- MachineMemOperand::MOVolatile, 4, 4);
+ MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
MachineInstrBuilder MIB;
MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
@@ -6982,9 +7265,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3));
- MachineMemOperand *JTMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
@@ -7066,9 +7348,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI));
- MachineMemOperand *JTMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(
BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
@@ -7109,13 +7390,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
BB->succ_end());
while (!Successors.empty()) {
MachineBasicBlock *SMBB = Successors.pop_back_val();
- if (SMBB->isLandingPad()) {
+ if (SMBB->isEHPad()) {
BB->removeSuccessor(SMBB);
MBBLPads.push_back(SMBB);
}
}
- BB->addSuccessor(DispatchBB);
+ BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+ BB->normalizeSuccProbs();
// Find the invoke call and mark all of the callee-saved registers as
// 'implicit defined' so that they're spilled. This prevents code from
@@ -7157,7 +7439,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// landing pad now.
for (SmallVectorImpl<MachineBasicBlock*>::iterator
I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
- (*I)->setIsLandingPad(false);
+ (*I)->setIsEHPad(false);
// The instruction is gone now.
MI->eraseFromParent();
@@ -7280,8 +7562,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Otherwise, we will generate unrolled scalar copies.
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned src = MI->getOperand(1).getReg();
@@ -7574,6 +7855,32 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+ MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
+ MF->push_back(ContBB);
+ ContBB->splice(ContBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ MBB->addSuccessor(ContBB);
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
+ MBB->addSuccessor(TrapBB);
+
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
+ .addReg(MI->getOperand(0).getReg())
+ .addMBB(TrapBB);
+
+ MI->eraseFromParent();
+ return ContBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -7643,8 +7950,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -7741,6 +8047,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return BB;
+
+ case ARM::Int_eh_sjlj_setup_dispatch:
EmitSjLjDispatchBlock(MI, BB);
return BB;
@@ -7759,8 +8068,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
// SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator BBI = BB;
- ++BBI;
+ MachineFunction::iterator BBI = ++BB->getIterator();
MachineFunction *Fn = BB->getParent();
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
@@ -7824,11 +8132,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitStructByval(MI, BB);
case ARM::WIN__CHKSTK:
return EmitLowered__chkstk(MI, BB);
+ case ARM::WIN__DBZCHK:
+ return EmitLowered__dbzchk(MI, BB);
+ }
+}
+
+/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
+/// when it is expanded into LDM/STM. This is done as a post-isel lowering
+/// instead of as a custom inserter because we need the use list from the SDNode.
+static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
+ MachineInstr *MI, const SDNode *Node) {
+ bool isThumb1 = Subtarget->isThumb1Only();
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstrBuilder MIB(*MF, MI);
+
+ // If the new dst/src is unused mark it as dead.
+ if (!Node->hasAnyUseOfValue(0)) {
+ MI->getOperand(0).setIsDead(true);
+ }
+ if (!Node->hasAnyUseOfValue(1)) {
+ MI->getOperand(1).setIsDead(true);
+ }
+
+ // The MEMCPY both defines and kills the scratch registers.
+ for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+ unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
+ MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
}
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ attachMEMCPYScratchRegs(Subtarget, MI, Node);
+ return;
+ }
+
const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7898,10 +8241,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Helper function that checks if N is a null or all ones constant.
static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- if (!C)
- return false;
- return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+ return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
}
// Return true if N is conditionally 0 or all ones.
@@ -8723,12 +9063,88 @@ static SDValue PerformXORCombine(SDNode *N,
return SDValue();
}
-/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
-/// the bits being cleared by the AND are not demanded by the BFI.
+// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
+// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
+// their position in "to" (Rd).
+static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
+ assert(N->getOpcode() == ARMISD::BFI);
+
+ SDValue From = N->getOperand(1);
+ ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
+ FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
+
+ // If the Base came from a SHR #C, we can deduce that it is really testing bit
+ // #C in the base of the SHR.
+ if (From->getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(From->getOperand(1))) {
+ APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
+ assert(Shift.getLimitedValue() < 32 && "Shift too large!");
+ FromMask <<= Shift.getLimitedValue(31);
+ From = From->getOperand(0);
+ }
+
+ return From;
+}
+
+// If A and B contain one contiguous set of bits, does A | B == A . B?
+//
+// Neither A nor B must be zero.
+static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
+ unsigned LastActiveBitInA = A.countTrailingZeros();
+ unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
+ return LastActiveBitInA - 1 == FirstActiveBitInB;
+}
+
+static SDValue FindBFIToCombineWith(SDNode *N) {
+ // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
+ // if one exists.
+ APInt ToMask, FromMask;
+ SDValue From = ParseBFI(N, ToMask, FromMask);
+ SDValue To = N->getOperand(0);
+
+ // Now check for a compatible BFI to merge with. We can pass through BFIs that
+ // aren't compatible, but not if they set the same bit in their destination as
+ // we do (or that of any BFI we're going to combine with).
+ SDValue V = To;
+ APInt CombinedToMask = ToMask;
+ while (V.getOpcode() == ARMISD::BFI) {
+ APInt NewToMask, NewFromMask;
+ SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
+ if (NewFrom != From) {
+ // This BFI has a different base. Keep going.
+ CombinedToMask |= NewToMask;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ // Do the written bits conflict with any we've seen so far?
+ if ((NewToMask & CombinedToMask).getBoolValue())
+ // Conflicting bits - bail out because going further is unsafe.
+ return SDValue();
+
+ // Are the new bits contiguous when combined with the old bits?
+ if (BitsProperlyConcatenate(ToMask, NewToMask) &&
+ BitsProperlyConcatenate(FromMask, NewFromMask))
+ return V;
+ if (BitsProperlyConcatenate(NewToMask, ToMask) &&
+ BitsProperlyConcatenate(NewFromMask, FromMask))
+ return V;
+
+ // We've seen a write to some bits, so track it.
+ CombinedToMask |= NewToMask;
+ // Keep going...
+ V = V.getOperand(0);
+ }
+
+ return SDValue();
+}
+
static SDValue PerformBFICombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() == ISD::AND) {
+ // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+ // the bits being cleared by the AND are not demanded by the BFI.
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
@@ -8744,6 +9160,38 @@ static SDValue PerformBFICombine(SDNode *N,
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
+ } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
+ // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
+ // Keep track of any consecutive bits set that all come from the same base
+ // value. We can combine these together into a single BFI.
+ SDValue CombineBFI = FindBFIToCombineWith(N);
+ if (CombineBFI == SDValue())
+ return SDValue();
+
+ // We've found a BFI.
+ APInt ToMask1, FromMask1;
+ SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
+
+ APInt ToMask2, FromMask2;
+ SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
+ assert(From1 == From2);
+ (void)From2;
+
+ // First, unlink CombineBFI.
+ DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
+ // Then create a new BFI, combining the two together.
+ APInt NewFromMask = FromMask1 | FromMask2;
+ APInt NewToMask = ToMask1 | ToMask2;
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (NewFromMask[0] == 0)
+ From1 = DCI.DAG.getNode(
+ ISD::SRL, dl, VT, From1,
+ DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
+ return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
+ DCI.DAG.getConstant(~NewToMask, dl, VT));
}
return SDValue();
}
@@ -9521,32 +9969,6 @@ static SDValue PerformSTORECombine(SDNode *N,
return SDValue();
}
-// isConstVecPow2 - Return true if each vector element is a power of 2, all
-// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
-static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
-{
- integerPart cN;
- integerPart c0 = 0;
- for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
- I != E; I++) {
- ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
- if (!C)
- return false;
-
- bool isExact;
- APFloat APF = C->getValueAPF();
- if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
- != APFloat::opOK || !isExact)
- return false;
-
- c0 = (I == 0) ? cN : c0;
- if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
- return false;
- }
- C = c0;
- return true;
-}
-
/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
/// can replace combinations of VMUL and VCVT (floating-point to integer)
/// when the VMUL has a constant operand that is a power of 2.
@@ -9556,30 +9978,25 @@ static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
/// vcvt.s32.f32 d16, d16
/// becomes:
/// vcvt.s32.f32 d16, d16, #3
-static SDValue PerformVCVTCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
- SDValue Op = N->getOperand(0);
+ if (!Subtarget->hasNEON())
+ return SDValue();
- if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
- Op.getOpcode() != ISD::FMUL)
+ SDValue Op = N->getOperand(0);
+ if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
return SDValue();
- uint64_t C;
- SDValue N0 = Op->getOperand(0);
SDValue ConstVec = Op->getOperand(1);
- bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
-
- if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
- !isConstVecPow2(ConstVec, isSigned, C))
+ if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
- NumLanes > 4) {
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
// be lossy. We also can't handle more then 4 lanes, since these intructions
@@ -9587,16 +10004,22 @@ static SDValue PerformVCVTCombine(SDNode *N,
return SDValue();
}
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
SDLoc dl(N);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
- NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
- DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
- N0,
- DAG.getConstant(Log2_64(C), dl, MVT::i32));
+ SDValue FixConv = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
+ DAG.getConstant(C, dl, MVT::i32));
- if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
return FixConv;
@@ -9611,38 +10034,44 @@ static SDValue PerformVCVTCombine(SDNode *N,
/// vdiv.f32 d16, d17, d16
/// becomes:
/// vcvt.f32.s32 d16, d16, #3
-static SDValue PerformVDIVCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
SDValue Op = N->getOperand(0);
unsigned OpOpcode = Op.getNode()->getOpcode();
-
- if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ if (!N->getValueType(0).isVector() ||
(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
return SDValue();
- uint64_t C;
SDValue ConstVec = N->getOperand(1);
- bool isSigned = OpOpcode == ISD::SINT_TO_FP;
-
- if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
- !isConstVecPow2(ConstVec, isSigned, C))
+ if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ uint32_t IntBits = IntTy.getSizeInBits();
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from i32 to f32. We can handle
// smaller integers by generating an extra extend, but larger ones would
- // be lossy.
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
return SDValue();
}
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
SDLoc dl(N);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
SDValue ConvInput = Op.getOperand(0);
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ if (IntBits < FloatBits)
ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
ConvInput);
@@ -9652,7 +10081,7 @@ static SDValue PerformVDIVCombine(SDNode *N,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
- ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32));
+ ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
/// Getvshiftimm - Check if this is a valid build_vector for the immediate
@@ -9680,7 +10109,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
@@ -9695,12 +10124,16 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
- if (isIntrinsic)
+ if (!isIntrinsic)
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
Cnt = -Cnt;
- return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ return true;
+ }
+ return false;
}
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
@@ -9939,89 +10372,123 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
-static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
- // If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
- // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
- // a NaN; only do the transformation when it matches that behavior.
-
- // For now only do this when using NEON for FP operations; if using VFP, it
- // is not obvious that the benefit outweighs the cost of switching to the
- // NEON pipeline.
- if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
- N->getValueType(0) != MVT::f32)
+static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
+ APInt &KnownOne) {
+ if (Op.getOpcode() == ARMISD::BFI) {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ APInt Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
+ if (Op.getOpcode() == ARMISD::CMOV) {
+ APInt KZ2(KnownZero.getBitWidth(), 0);
+ APInt KO2(KnownOne.getBitWidth(), 0);
+ computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
+ computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+
+ KnownZero &= KZ2;
+ KnownOne &= KO2;
+ return;
+ }
+ return DAG.computeKnownBits(Op, KnownZero, KnownOne);
+}
+
+SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
+ // If we have a CMOV, OR and AND combination such as:
+ // if (x & CN)
+ // y |= CM;
+ //
+ // And:
+ // * CN is a single bit;
+ // * All bits covered by CM are known zero in y
+ //
+ // Then we can convert this into a sequence of BFI instructions. This will
+ // always be a win if CM is a single bit, will always be no worse than the
+ // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
+ // three bits (due to the extra IT instruction).
+
+ SDValue Op0 = CMOV->getOperand(0);
+ SDValue Op1 = CMOV->getOperand(1);
+ auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
+ auto CC = CCNode->getAPIntValue().getLimitedValue();
+ SDValue CmpZ = CMOV->getOperand(4);
+
+ // The compare must be against zero.
+ if (!isNullConstant(CmpZ->getOperand(1)))
+ return SDValue();
+
+ assert(CmpZ->getOpcode() == ARMISD::CMPZ);
+ SDValue And = CmpZ->getOperand(0);
+ if (And->getOpcode() != ISD::AND)
+ return SDValue();
+ ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
+ if (!AndC || !AndC->getAPIntValue().isPowerOf2())
return SDValue();
+ SDValue X = And->getOperand(0);
- SDValue CondLHS = N->getOperand(0);
- SDValue CondRHS = N->getOperand(1);
- SDValue LHS = N->getOperand(2);
- SDValue RHS = N->getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
- unsigned Opcode = 0;
- bool IsReversed;
- if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
- IsReversed = false; // x CC y ? x : y
- } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
- IsReversed = true ; // x CC y ? y : x
+ if (CC == ARMCC::EQ) {
+ // We're performing an "equal to zero" compare. Swap the operands so we
+ // canonicalize on a "not equal to zero" compare.
+ std::swap(Op0, Op1);
} else {
- return SDValue();
+ assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
}
+
+ if (Op1->getOpcode() != ISD::OR)
+ return SDValue();
- bool IsUnordered;
- switch (CC) {
- default: break;
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETLT:
- case ISD::SETLE:
- case ISD::SETULT:
- case ISD::SETULE:
- // If LHS is NaN, an ordered comparison will be false and the result will
- // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
- // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- break;
- // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
- // will return -0, so vmin can only be used for unsafe math or if one of
- // the operands is known to be nonzero.
- if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- break;
- Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
- break;
+ ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
+ if (!OrC)
+ return SDValue();
+ SDValue Y = Op1->getOperand(0);
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- // If LHS is NaN, an ordered comparison will be false and the result will
- // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
- // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- break;
- // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
- // will return +0, so vmax can only be used for unsafe math or if one of
- // the operands is known to be nonzero.
- if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- break;
- Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
- break;
- }
+ if (Op0 != Y)
+ return SDValue();
+
+ // Now, is it profitable to continue?
+ APInt OrCI = OrC->getAPIntValue();
+ unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
+ if (OrCI.countPopulation() > Heuristic)
+ return SDValue();
- if (!Opcode)
+ // Lastly, can we determine that the bits defined by OrCI
+ // are zero in Y?
+ APInt KnownZero, KnownOne;
+ computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ if ((OrCI & KnownZero) != OrCI)
return SDValue();
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+
+ // OK, we can do the combine.
+ SDValue V = Y;
+ SDLoc dl(X);
+ EVT VT = X.getValueType();
+ unsigned BitInX = AndC->getAPIntValue().logBase2();
+
+ if (BitInX != 0) {
+ // We must shift X first.
+ X = DAG.getNode(ISD::SRL, dl, VT, X,
+ DAG.getConstant(BitInX, dl, VT));
+ }
+
+ for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
+ BitInY < NumActiveBits; ++BitInY) {
+ if (OrCI[BitInY] == 0)
+ continue;
+ APInt Mask(VT.getSizeInBits(), 0);
+ Mask.setBit(BitInY);
+ V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
+ // Confusingly, the operand is an *inverted* mask.
+ DAG.getConstant(~Mask, dl, VT));
+ }
+
+ return V;
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
@@ -10042,6 +10509,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
ARMCC::CondCodes CC =
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+ // BFI is only available on V6T2+.
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
+ SDValue R = PerformCMOVToBFICombine(N, DAG);
+ if (R)
+ return R;
+ }
+
// Simplify
// mov r1, r0
// cmp r1, x
@@ -10108,8 +10582,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
- case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
+ case ISD::FP_TO_UINT:
+ return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+ case ISD::FDIV:
+ return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
@@ -10117,7 +10593,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
- case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
@@ -11043,37 +11518,61 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
- unsigned Opcode = Op->getOpcode();
- assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
- "Invalid opcode for Div/Rem lowering");
- bool isSigned = (Opcode == ISD::SDIVREM);
- EVT VT = Op->getValueType(0);
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-
+static RTLIB::Libcall getDivRemLibcall(
+ const SDNode *N, MVT::SimpleValueType SVT) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemLibcall");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
RTLIB::Libcall LC;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (SVT) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
+ return LC;
+}
- SDValue InChain = DAG.getEntryNode();
-
+static TargetLowering::ArgListTy getDivRemArgList(
+ const SDNode *N, LLVMContext *Context) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemArgList");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op->getOperand(i);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*Context);
+ Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
Args.push_back(Entry);
}
+ return Args;
+}
+
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+ "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
+ VT.getSimpleVT().SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
+ DAG.getContext());
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
@@ -11090,6 +11589,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
return CallInfo.first;
}
+// Lowers REM using divmod helpers
+// see RTABI section 4.2/4.3
+SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
+ // Build return types (div and rem)
+ std::vector<Type*> RetTyParams;
+ Type *RetTyElement;
+
+ switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
+ case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
+ case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
+ case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
+ }
+
+ RetTyParams.push_back(RetTyElement);
+ RetTyParams.push_back(RetTyElement);
+ ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
+ Type *RetTy = StructType::get(*DAG.getContext(), ret);
+
+ RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
+ SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+ TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+ bool isSigned = N->getOpcode() == ISD::SREM;
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ // Lower call
+ CallLoweringInfo CLI(DAG);
+ CLI.setChain(InChain)
+ .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // Return second (rem) result operand (first contains div)
+ SDNode *ResNode = CallResult.first.getNode();
+ assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
+ return ResNode->getOperand(1);
+}
+
SDValue
ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "unsupported target platform");
@@ -11124,8 +11664,8 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -11137,8 +11677,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
bool
@@ -11186,7 +11726,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -11212,7 +11752,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
@@ -11295,8 +11835,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
-
Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
ARM_MB::MemBOpt Domain) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11392,19 +11930,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
// guarantee, see DDI0406C ARM architecture reference manual,
// sections A8.8.72-74 LDRD)
-bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return (Size == 64) && !Subtarget->isMClass();
+ return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
return (Size <= (Subtarget->isMClass() ? 32U : 64U))
- ? AtomicRMWExpansionKind::LLSC
- : AtomicRMWExpansionKind::None;
+ ? AtomicExpansionKind::LLSC
+ : AtomicExpansionKind::None;
+}
+
+bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
+ return true;
}
// This has so far only been implemented for MachO.
@@ -11419,7 +11964,7 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
// Floating point values and vector values map to the same register file.
- // Therefore, althought we could do a store extract of a vector type, this is
+ // Therefore, although we could do a store extract of a vector type, this is
// better to leave at float as we have more freedom in the addressing mode for
// those.
if (VectorTy->isFPOrFPVectorTy())
@@ -11441,6 +11986,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
}
+bool ARMTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
+bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11477,6 +12030,14 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
cast<PointerType>(Addr->getType())->getElementType());
}
+void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ if (!Subtarget->hasV7Ops())
+ return;
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+}
+
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
@@ -11534,12 +12095,12 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
- // support i64/f64 element).
- if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vldN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
return false;
// A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -11552,9 +12113,6 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
-
IRBuilder<> Builder(LI);
SmallVector<Value *, 2> Ops;
@@ -11562,6 +12120,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ Type *Tys[] = { VecTy, Int8Ptr };
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
// Replace uses of each shufflevector with the corresponding vector loaded
@@ -11624,12 +12185,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip illegal sub vector types and vector types of i64/f64 element (vstN
- // doesn't support i64/f64 element).
- if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vstN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
+ EltIs64Bits)
return false;
Value *Op0 = SVI->getOperand(0);
@@ -11650,17 +12212,18 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, NumSubElts);
}
- static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
- Intrinsic::arm_neon_vst3,
- Intrinsic::arm_neon_vst4};
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], SubVecTy);
-
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
SmallVector<Value *, 6> Ops;
Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ Type *Tys[] = { Int8Ptr, SubVecTy };
+ Function *VstNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), StoreInts[Factor - 2], Tys);
+
// Split the shufflevector operands into sub vectors for the new vstN call.
for (unsigned i = 0; i < Factor; i++)
Ops.push_back(Builder.CreateShuffleVector(
@@ -11681,14 +12244,14 @@ enum HABaseType {
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
uint64_t &Members) {
- if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (auto *ST = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0; i < ST->getNumElements(); ++i) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
return false;
Members += SubMembers;
}
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
return false;
@@ -11703,7 +12266,7 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
return false;
Members = 1;
Base = HA_DOUBLE;
- } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
Members = 1;
switch (Base) {
case HA_FLOAT:
@@ -11747,3 +12310,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
return IsHA || IsIntArray;
}
+
+unsigned ARMTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
+}
+
+unsigned ARMTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index efc9020..b764624 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -63,8 +63,6 @@ namespace llvm {
BCC_i64,
- RBIT, // ARM bitreverse instruction
-
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
@@ -79,6 +77,7 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
TC_RETURN, // Tail call return pseudo.
@@ -91,6 +90,7 @@ namespace llvm {
PRELOAD, // Preload
WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+ WIN__DBZCHK, // Windows' divide by zero check
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
@@ -172,12 +172,6 @@ namespace llvm {
// BUILD_VECTOR for this purpose.
BUILD_VECTOR,
- // Floating-point max and min:
- FMAX,
- FMIN,
- VMAXNM,
- VMINNM,
-
// Bit-field insert
BFI,
@@ -189,6 +183,10 @@ namespace llvm {
// Vector bitwise select
VBSL,
+ // Pseudo-instruction representing a memory copy using ldm/stm
+ // instructions.
+ MEMCPY,
+
// Vector load N-element structure to all lanes:
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
VLD3DUP,
@@ -260,6 +258,7 @@ namespace llvm {
SDNode *Node) const override;
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
@@ -348,6 +347,8 @@ namespace llvm {
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
else if (ConstraintCode.size() == 2) {
if (ConstraintCode[0] == 'U') {
switch(ConstraintCode[1]) {
@@ -420,13 +421,24 @@ namespace llvm {
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
- bool hasLoadLinkedStoreConditional() const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
bool IsStore, bool IsLoad) const override;
Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
@@ -441,16 +453,21 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -496,6 +513,7 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -508,7 +526,6 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
- SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -526,6 +543,12 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+ void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const;
+ SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SDValue &Chain) const;
+ SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -635,6 +658,8 @@ namespace llvm {
MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 84f95be..cf973d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -51,7 +51,8 @@ void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
- default: break;
+ default:
+ break;
case ARM::LDR_PRE_IMM:
case ARM::LDR_PRE_REG:
case ARM::LDR_POST_IMM:
@@ -124,82 +125,10 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
MIB.addMemOperand(MMO);
MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
AddDefaultPred(MIB);
}
-
-namespace {
- /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
- /// global base register for ARM ELF.
- struct ARMCGBR : public MachineFunctionPass {
- static char ID;
- ARMCGBR() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->getGlobalBaseReg() == 0)
- return false;
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(MF.getSubtarget());
- // Don't do this for Thumb1.
- if (STI.isThumb1Only())
- return false;
-
- const TargetMachine &TM = MF.getTarget();
- if (TM.getRelocationModel() != Reloc::PIC_)
- return false;
-
- LLVMContext *Context = &MF.getFunction()->getContext();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = STI.isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
- *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
-
- unsigned Align = TM.getDataLayout()->getPrefTypeAlignment(
- Type::getInt32PtrTy(*Context));
- unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
-
- MachineBasicBlock &FirstMBB = MF.front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- unsigned TempReg =
- MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
- unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp;
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
- TII.get(Opc), TempReg)
- .addConstantPoolIndex(Idx);
- if (Opc == ARM::LDRcp)
- MIB.addImm(0);
- AddDefaultPred(MIB);
-
- // Fix the GOT address by adding pc.
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD;
- MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
- .addReg(TempReg)
- .addImm(ARMPCLabelIndex);
- if (Opc == ARM::PICADD)
- AddDefaultPred(MIB);
-
- return true;
- }
-
- const char *getPassName() const override {
- return "ARM PIC Global Base Reg Initialization";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-char ARMCGBR::ID = 0;
-FunctionPass*
-llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 9f5bde3..b9de83b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -59,6 +59,7 @@ def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -70,8 +71,11 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
-def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
@@ -163,21 +167,23 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH",
+ SDT_ARMEH_SJLJ_SetupDispatch,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
-def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
-
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
-def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
-def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
@@ -209,6 +215,8 @@ def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -228,7 +236,9 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16","half-float">;
+ AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16","full half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -236,9 +246,8 @@ def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
-def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
- AssemblerPredicate<"FeatureDSPThumb2",
- "thumb2-dsp">;
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
+ AssemblerPredicate<"FeatureDSP", "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate<"FeatureDB",
"data-barriers">;
@@ -2322,6 +2331,7 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
}
+def : MnemonicAlias<"smi", "smc">;
// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
@@ -3671,10 +3681,10 @@ def USAT16 : AI<(outs GPRnopc:$Rd),
let Inst{3-0} = Rn;
}
-def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
- (SSAT imm:$pos, GPRnopc:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
- (USAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -4186,7 +4196,7 @@ def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rbit", "\t$Rd, $Rm",
- [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ [(set GPR:$Rd, (bitreverse GPR:$Rm))]>,
Requires<[IsARM, HasV6T2]>,
Sched<[WriteALU]>;
@@ -4578,6 +4588,19 @@ let usesCustomInserter = 1 in {
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
}
+let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in {
+ // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs...
+ // Copies N registers worth of memory from address %src to address %dst
+ // and returns the incremented addresses. N scratch register will
+ // be attached for the copy to use.
+ def MEMCPY : PseudoInst<
+ (outs GPR:$newdst, GPR:$newsrc),
+ (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops),
+ NoItinerary,
+ [(set GPR:$newdst, GPR:$newsrc,
+ (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>;
+}
+
def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
@@ -4705,7 +4728,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
[(int_arm_clrex)]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6K]> {
let Inst{31-0} = 0b11110101011111111111000000011111;
}
@@ -5242,6 +5265,12 @@ def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+let usesCustomInserter = 1, Defs = [CPSR] in
+ def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary,
+ [(win__dbzchk GPR:$divisor)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -5301,6 +5330,10 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
Requires<[IsARM]>;
}
+let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in
+def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary,
+ [(ARMeh_sjlj_setup_dispatch)]>;
+
// eh.sjlj.dispatchsetup pseudo-instruction.
// This pseudo is used for both ARM and Thumb. Any differences are handled when
// the pseudo is expanded (which happens before any passes that need the
@@ -5622,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
(MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+ (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
- (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+ (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+ (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
- (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+ (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, mod_imm_neg" -> sub
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index f035d61..7020ffb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -587,11 +587,6 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
-def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>;
-def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
-def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
-
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -2465,17 +2460,17 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as above, but not predicated.
-class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
-class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -3255,6 +3250,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
[(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
+ def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
@@ -3275,6 +3277,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
[(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
+ def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
}
@@ -4110,6 +4119,12 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
v2f32, v2f32, fadd, 1>;
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
+def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
+ v4f16, v4f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
+ v8f16, v8f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
// VADDL : Vector Add Long (Q = D + D)
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
@@ -4165,10 +4180,21 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
+def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
+ v4f16, v4f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
+ v8f16, v8f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
+def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
+ v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
@@ -4277,6 +4303,12 @@ def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
+ v8f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4285,6 +4317,12 @@ def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
v4f32, v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
+ v8f16, v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -4495,6 +4533,12 @@ def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4503,6 +4547,12 @@ def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
v4f32, v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
+ v8f16, v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -4570,6 +4620,13 @@ def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
+ v8f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
@@ -4578,6 +4635,12 @@ def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4602,6 +4665,12 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
v2f32, v2f32, fsub, 0>;
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
+def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
+ v4f16, v4f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
+ v8f16, v8f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
// VSUBL : Vector Subtract Long (Q = D - D)
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
@@ -4646,6 +4715,12 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
@@ -4660,6 +4735,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
@@ -4677,6 +4758,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
@@ -4686,36 +4773,68 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
"f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
-def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
"f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
+def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
-def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
"f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
-def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
"f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
+def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
- (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
- (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
- (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
- (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+}
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
- (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
- (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
- (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
- (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+}
// Vector Bitwise Operations.
@@ -5007,6 +5126,12 @@ def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
"vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
@@ -5014,6 +5139,29 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+def abd_shr :
+ PatFrag<(ops node:$in1, node:$in2, node:$shift),
+ (NEONvshrs (sub (zext node:$in1),
+ (zext node:$in2)), (i32 $shift))>;
+
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
+ (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
+ (zext (v8i8 DPR:$opB))),
+ (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
+ (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
+ (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
+ (zext (v4i16 DPR:$opB))),
+ (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
+ (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
+ (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
+ (zext (v2i32 DPR:$opB))),
+ (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
+ (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "s", int_arm_neon_vabds, add>;
@@ -5031,53 +5179,85 @@ defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
// VMAX : Vector Maximum
defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmax", "s", int_arm_neon_vmaxs, 1>;
+ "vmax", "s", smax, 1>;
defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmax", "u", int_arm_neon_vmaxu, 1>;
+ "vmax", "u", umax, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f32",
- v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+ v2f32, v2f32, fmaxnan, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
- v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+ v4f32, v4f32, fmaxnan, 1>;
+def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f16",
+ v4f16, v4f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f16",
+ v8f16, v8f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VMAXNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
+ v2f32, v2f32, fmaxnum, 1>,
Requires<[HasV8, HasNEON]>;
- def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
+ v4f32, v4f32, fmaxnum, 1>,
Requires<[HasV8, HasNEON]>;
+ def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v4f16, v4f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v8f16, v8f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmin", "s", int_arm_neon_vmins, 1>;
+ "vmin", "s", smin, 1>;
defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmin", "u", int_arm_neon_vminu, 1>;
+ "vmin", "u", umin, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f32",
- v2f32, v2f32, int_arm_neon_vmins, 1>;
+ v2f32, v2f32, fminnan, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
- v4f32, v4f32, int_arm_neon_vmins, 1>;
+ v4f32, v4f32, fminnan, 1>;
+def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f16",
+ v4f16, v4f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f16",
+ v8f16, v8f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VMINNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
- v2f32, v2f32, int_arm_neon_vminnm, 1>,
+ v2f32, v2f32, fminnum, 1>,
Requires<[HasV8, HasNEON]>;
- def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
- v4f32, v4f32, int_arm_neon_vminnm, 1>,
+ v4f32, v4f32, fminnum, 1>,
Requires<[HasV8, HasNEON]>;
+ def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v4f16, v4f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v8f16, v8f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// Vector Pairwise Operations.
@@ -5095,6 +5275,10 @@ def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
IIC_VPBIND, "vpadd", "f32",
v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f16",
+ v4f16, v4f16, int_arm_neon_vpadd, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VPADDL : Vector Pairwise Add Long
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -5123,6 +5307,9 @@ def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
"f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VPMIN : Vector Pairwise Minimum
def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
@@ -5139,6 +5326,9 @@ def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
"f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -5155,6 +5345,14 @@ def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
IIC_VUNAQ, "vrecpe", "f32",
v4f32, v4f32, int_arm_neon_vrecpe>;
+def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f16",
+ v4f16, v4f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f16",
+ v8f16, v8f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRECPS : Vector Reciprocal Step
def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
@@ -5163,6 +5361,14 @@ def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrecps", "f32",
v4f32, v4f32, int_arm_neon_vrecps, 1>;
+def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f16",
+ v4f16, v4f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f16",
+ v8f16, v8f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRSQRTE : Vector Reciprocal Square Root Estimate
def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
@@ -5177,6 +5383,14 @@ def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAQ, "vrsqrte", "f32",
v4f32, v4f32, int_arm_neon_vrsqrte>;
+def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRSQRTS : Vector Reciprocal Square Root Step
def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
@@ -5185,6 +5399,14 @@ def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrsqrts", "f32",
v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// Vector Shifts.
@@ -5336,6 +5558,14 @@ def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
+def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v4f16, v4f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v8f16, v8f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
(v2i32 (bitconvert (v8i8 (add DPR:$src,
@@ -5398,6 +5628,16 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
"vneg", "f32", "$Vd, $Vm", "",
[(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -5868,18 +6108,56 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
+def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v4i16, v4f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v4i16, v4f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v4f16, v4i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v4f16, v4i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v8i16, v8f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v8i16, v8f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v8f16, v8i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v8f16, v8i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
// VCVT{A, N, P, M}
multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
SDPatternOperator IntU> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
- def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
- def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
- def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v4i16, v4f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v8i16, v8f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v4i16, v4f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v8i16, v8f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
}
@@ -5898,6 +6176,16 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
}
let DecoderMethod = "DecodeVCVTQ" in {
@@ -5909,6 +6197,16 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
}
def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
@@ -5929,6 +6227,24 @@ def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
(VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
+ (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
+ (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
+ (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
+ (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
+ (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
+ (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
+ (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
+ (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -6182,22 +6498,40 @@ def VTBX4Pseudo
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+ def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
- def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+ def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
+ def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v4f16, v4f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v8f16, v8f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
}
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
- (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+ (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
+ }
}
defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
@@ -6343,8 +6677,8 @@ def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPat<NEONfmax, VMAXfd>;
-def : N3VSPat<NEONfmin, VMINfd>;
+def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
@@ -7704,6 +8038,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
(VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
(VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
+ (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
(VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7719,6 +8056,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
(VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
(VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
+ (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
@@ -7736,6 +8076,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
(VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
(VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
+ (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
(VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7751,6 +8094,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
(VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
(VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
+ (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VSWP allows, but does not require, a type suffix.
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 40414da..df6f243 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -591,6 +591,34 @@ def tTRAP : TI<(outs), (ins), IIC_Br,
// Load Store Instructions.
//
+// PC-relative loads need to be matched first as constant pool accesses need to
+// always be PC-relative. We do this using AddedComplexity, as the pattern is
+// simpler than the patterns of the other load instructions.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// SP-relative loads should be matched before standard immediate-offset loads as
+// it means we avoid having to move SP to another register.
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
// Loads: reg/reg and reg/imm5
let canFoldAsLoad = 1, isReMaterializable = 1 in
multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -598,16 +626,20 @@ multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
AddrMode am, InstrItinClass itin_r,
InstrItinClass itin_i, string asm,
PatFrag opnode> {
- def r : // reg/reg
- T1pILdStEncode<reg_opc,
- (outs tGPR:$Rt), (ins AddrMode_r:$addr),
- am, itin_r, asm, "\t$Rt, $addr",
- [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ // Immediate-offset loads should be matched before register-offset loads as
+ // when the offset is a constant it's simpler to first check if it fits in the
+ // immediate offset field then fall back to register-offset if it doesn't.
def i : // reg/imm5
T1pILdStEncodeImm<imm_opc, 1 /* Load */,
(outs tGPR:$Rt), (ins AddrMode_i:$addr),
am, itin_i, asm, "\t$Rt, $addr",
[(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+ // Register-offset loads are matched last.
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
}
// Stores: reg/reg and reg/imm5
multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -615,32 +647,32 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
AddrMode am, InstrItinClass itin_r,
InstrItinClass itin_i, string asm,
PatFrag opnode> {
- def r : // reg/reg
- T1pILdStEncode<reg_opc,
- (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
- am, itin_r, asm, "\t$Rt, $addr",
- [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
def i : // reg/imm5
T1pILdStEncodeImm<imm_opc, 0 /* Store */,
(outs), (ins tGPR:$Rt, AddrMode_i:$addr),
am, itin_i, asm, "\t$Rt, $addr",
[(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
}
// A8.6.57 & A8.6.60
-defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iLoad_r, IIC_iLoad_i, "ldr",
UnOpFrag<(load node:$Src)>>;
// A8.6.64 & A8.6.61
-defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
UnOpFrag<(zextloadi8 node:$Src)>>;
// A8.6.76 & A8.6.73
-defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
UnOpFrag<(zextloadi16 node:$Src)>>;
@@ -659,58 +691,36 @@ def tLDRSH : // A8.6.84
"ldrsh", "\t$Rt, $addr",
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
-let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
- T1LdStSP<{1,?,?}> {
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
- T1Encoding<{0,1,0,0,1,?}> {
- // A6.2 & A8.6.59
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
+ let Inst{7-0} = addr;
}
// A8.6.194 & A8.6.192
-defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iStore_r, IIC_iStore_i, "str",
BinOpFrag<(store node:$LHS, node:$RHS)>>;
// A8.6.197 & A8.6.195
-defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// A8.6.207 & A8.6.205
-defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
-def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
- "str", "\t$Rt, $addr",
- [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
- T1LdStSP<{0,?,?}> {
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -730,6 +740,7 @@ def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
// Writeback version is just a pseudo, as there's no encoding difference.
// Writeback happens iff the base register is not in the destination register
// list.
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def tLDMIA_UPD :
InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
"$Rn = $wb", IIC_iLoad_mu>,
@@ -1328,16 +1339,16 @@ def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
(tSUBrr tGPR:$lhs, tGPR:$rhs)>;
// Bswap 16 with load/store
-def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)),
- (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>;
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
-def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
- t_addrmode_rrs2:$addr),
- (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
+ (tREV16 (tLDRHr t_addrmode_rr:$addr))>;
def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
t_addrmode_is2:$addr),
(tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_rr:$addr),
+ (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>;
// ConstantPool
def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
@@ -1372,10 +1383,10 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
Requires<[IsThumb, HasV5T]>;
// zextload i1 -> zextload i8
-def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
- (tLDRBr t_addrmode_rrs1:$addr)>;
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
(tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rr:$addr),
+ (tLDRBr t_addrmode_rr:$addr)>;
// extload from the stack -> word load from the stack, as it avoids having to
// materialize the base in a separate register. This only works when a word
@@ -1389,61 +1400,61 @@ def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
Requires<[IsThumb, IsThumb1Only, IsLE]>;
// extload -> zextload
-def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// If it's impossible to use [r,r] address mode for sextload, select to
// ldr{b|h} + sxt{b|h} instead.
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
(tSXTB (tLDRBi t_addrmode_is1:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
- (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tSXTB (tLDRBr t_addrmode_rr:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tSXTH (tLDRHi t_addrmode_is2:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
- (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tSXTH (tLDRHr t_addrmode_rr:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
- (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
(tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
- (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>;
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>;
def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
(tLDRBi t_addrmode_is1:$src)>;
-def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
- (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rr:$src),
+ (tLDRBr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
(tLDRHi t_addrmode_is2:$src)>;
-def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
- (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rr:$src),
+ (tLDRHr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
(tLDRi t_addrmode_is4:$src)>;
-def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
- (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rr:$src),
+ (tLDRr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
(tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
-def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
- (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>;
def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
(tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
-def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
- (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>;
def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
(tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
-def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
- (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>;
// Large immediate handling.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index aba8a7b..d460d33 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -43,7 +43,7 @@ def t2_shift_imm : Operand<i32> {
// Shifted operands. No register controlled shifts for Thumb2.
// Note: We do not support rrx shifted operands yet.
def t2_so_reg : Operand<i32>, // reg imm
- ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
[shl,srl,sra,rotr]> {
let EncoderMethod = "getT2SORegOpValue";
let PrintMethod = "printT2SOOperand";
@@ -1554,19 +1554,21 @@ def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
-// For disassembly only.
+let mayLoad = 1 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
+let mayLoad = 1 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>;
+let mayStore = 1 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
@@ -1574,6 +1576,7 @@ def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
+let mayStore = 1 in
def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
@@ -2100,7 +2103,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b010;
let Inst{23} = 0b1;
@@ -2117,7 +2120,7 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
dag iops = (ins rGPR:$Rn, rGPR:$Rm),
string asm = "\t$Rd, $Rn, $Rm">
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -2215,13 +2218,13 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
"usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
class T2SatI<dag oops, dag iops, InstrItinClass itin,
@@ -2254,7 +2257,7 @@ def t2SSAT: T2SatI<
def t2SSAT16: T2SatI<
(outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
"ssat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@@ -2278,7 +2281,7 @@ def t2USAT: T2SatI<
def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
"usat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;
@@ -2288,8 +2291,8 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
let Inst{5-4} = 0b00;
}
-def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -2605,7 +2608,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects
// Rounding variants of the below included for disassembly only
@@ -2614,7 +2617,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"smmul", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2624,7 +2627,7 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2636,7 +2639,7 @@ def t2SMMLA : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2646,7 +2649,7 @@ def t2SMMLA : T2FourReg<
def t2SMMLAR: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2657,7 +2660,7 @@ def t2SMMLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2667,7 +2670,7 @@ def t2SMMLS: T2FourReg<
def t2SMMLSR:T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2679,7 +2682,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2692,7 +2695,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2705,7 +2708,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2718,7 +2721,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2730,7 +2733,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
[]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2742,7 +2745,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
[]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2760,7 +2763,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2773,7 +2776,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2786,7 +2789,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2799,7 +2802,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2811,7 +2814,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2823,7 +2826,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2839,79 +2842,79 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
def t2SMUAD: T2ThreeReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUADX:T2ThreeReg_mac<
0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUSD: T2ThreeReg_mac<
0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUSDX:T2ThreeReg_mac<
0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMLAD : T2FourReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLADX : T2FourReg_mac<
0, 0b010, 0b0001, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
//===----------------------------------------------------------------------===//
// Division Instructions.
@@ -2961,7 +2964,7 @@ def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rbit", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>,
+ [(set rGPR:$Rd, (bitreverse rGPR:$Rm))]>,
Sched<[WriteALU]>;
def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e83f8c8..050cd1a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -20,7 +20,6 @@ def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
-
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -93,7 +92,7 @@ def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
- [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
@@ -107,7 +106,7 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
IIC_fpStore32, "vstr", "\t$Sd, $addr",
- [(store SPR:$Sd, addrmode5:$addr)]> {
+ [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
@@ -393,8 +392,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
}
}
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -541,19 +540,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -922,6 +925,22 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let isRegSequence = 1;
}
+// Hoist an fabs or a fneg of a value coming from integer registers
+// and do the fabs/fneg on the integer value. This is never a lose
+// and could enable the conversion to float to be removed completely.
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsARM]>;
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsThumb2]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsARM]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsThumb2]>;
+
let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
@@ -1003,7 +1022,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(f64 (sint_to_fp GPR:$a)),
(VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+ def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOD (VLDRS addrmode5:$a))>;
}
@@ -1021,7 +1040,7 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
(VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOS (VLDRS addrmode5:$a))>;
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -1035,7 +1054,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(f64 (uint_to_fp GPR:$a)),
(VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+ def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOD (VLDRS addrmode5:$a))>;
}
@@ -1053,7 +1072,7 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
(VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOS (VLDRS addrmode5:$a))>;
// FP -> Int:
@@ -1106,7 +1125,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
- def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
}
@@ -1124,7 +1143,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+ addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1138,7 +1158,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
- def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
}
@@ -1156,7 +1176,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+ addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 265b86f..725b838 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,17 +60,24 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+namespace llvm {
+void initializeARMLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
+
namespace {
/// Post- register allocation pass the combine load / store instructions to
/// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
@@ -84,7 +91,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "ARM load / store optimization pass";
+ return ARM_LOAD_STORE_OPT_NAME;
}
private:
@@ -118,6 +125,7 @@ namespace {
};
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
+ SmallVector<MachineInstr*,4> MergeBaseCandidates;
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before);
@@ -140,12 +148,16 @@ namespace {
MachineBasicBlock::iterator &MBBI);
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ bool CombineMovBx(MachineBasicBlock &MBB);
};
char ARMLoadStoreOpt::ID = 0;
}
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+
static bool definesCPSR(const MachineInstr *MI) {
for (const auto &MO : MI->operands()) {
if (!MO.isReg())
@@ -619,9 +631,10 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
unsigned NewBase;
if (isi32Load(Opcode)) {
- // If it is a load, then just use one of the destination register to
- // use as the new base.
+ // If it is a load, then just use one of the destination registers
+ // as the new base. Will no longer be writeback in Thumb1.
NewBase = Regs[NumRegs-1].first;
+ Writeback = false;
} else {
// Find a free register that we can use as scratch register.
moveLiveRegsBefore(MBB, InsertBefore);
@@ -725,9 +738,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
if (Writeback) {
- if (Opcode == ARM::tLDMIA)
+ assert(isThumb1 && "expected Writeback only inThumb1");
+ if (Opcode == ARM::tLDMIA) {
+ assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
+ }
MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
@@ -784,6 +800,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 4> ImpDefs;
DenseSet<unsigned> KilledRegs;
+ DenseSet<unsigned> UsedRegs;
// Determine list of registers and list of implicit super-register defs.
for (const MachineInstr *MI : Cand.Instrs) {
const MachineOperand &MO = getLoadStoreRegOp(*MI);
@@ -792,6 +809,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
if (IsKill)
KilledRegs.insert(Reg);
Regs.push_back(std::make_pair(Reg, IsKill));
+ UsedRegs.insert(Reg);
if (IsLoad) {
// Collect any implicit defs of super-registers, after merging we can't
@@ -881,7 +899,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
for (MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.isKill())
continue;
- if (KilledRegs.count(MO.getReg()))
+ if (UsedRegs.count(MO.getReg()))
MO.setIsKill(false);
}
}
@@ -995,76 +1013,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
} while (SIndex < EIndex);
}
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tSUBi8:
- case ARM::t2SUBri:
- case ARM::SUBri:
- CheckCPSRDef = true;
- break;
- case ARM::tSUBspi:
- break;
- }
-
- // Make sure the offset fits in 8 bits.
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
- MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tADDi8:
- case ARM::t2ADDri:
- case ARM::ADDri:
- CheckCPSRDef = true;
- break;
- case ARM::tADDspi:
- break;
- }
-
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- // Make sure the offset fits in 8 bits.
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
- MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
ARM_AM::AMSubMode Mode) {
switch (Opc) {
@@ -1132,6 +1080,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
}
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ bool CheckCPSRDef;
+ int Scale;
+ switch (MI.getOpcode()) {
+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
+ case ARM::t2SUBri:
+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
+ case ARM::t2ADDri:
+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+ default: return 0;
+ }
+
+ unsigned MIPredReg;
+ if (MI.getOperand(0).getReg() != Reg ||
+ MI.getOperand(1).getReg() != Reg ||
+ getInstrPredicate(&MI, MIPredReg) != Pred ||
+ MIPredReg != PredReg)
+ return 0;
+
+ if (CheckCPSRDef && definesCPSR(&MI))
+ return 0;
+ return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (MBBI == BeginMBBI)
+ return EndMBBI;
+
+ // Skip debug values.
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+ --PrevMBBI;
+
+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+ // Skip debug values.
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (NextMBBI == EndMBBI)
+ return EndMBBI;
+
+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
/// Fold proceeding/trailing inc/dec of base register into the
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
@@ -1151,7 +1168,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
const MachineOperand &BaseOP = MI->getOperand(0);
unsigned Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
@@ -1163,49 +1179,24 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
if (MI->getOperand(i).getReg() == Base)
return false;
- bool DoMerge = false;
- ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
- // Try merging with the previous instruction.
+ int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- } else if (Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::da;
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(PrevMBBI);
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(NextMBBI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
+ if (Mode == ARM_AM::ia && Offset == -Bytes) {
+ Mode = ARM_AM::db;
+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+ Mode = ARM_AM::da;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+ return false;
}
-
- if (!DoMerge)
- return false;
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1283,7 +1274,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
@@ -1295,7 +1285,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- bool isLd = isLoadSingle(Opcode);
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
@@ -1303,55 +1292,31 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
- bool DoMerge = false;
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- unsigned NewOpc = 0;
- // AM2 - 12 bits, thumb2 - 8 bits.
- unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
- // Try merging with the previous instruction.
+ int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (!isAM5 &&
- isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(PrevMBBI);
- }
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if (!isAM5 &&
- isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(NextMBBI);
- }
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ unsigned NewOpc;
+ if (!isAM5 && Offset == Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (Offset == -Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (!isAM5 && Offset == -Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else
+ return false;
}
+ MBB.erase(MergeInstr);
- if (!DoMerge)
- return false;
+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
+ bool isLd = isLoadSingle(Opcode);
if (isAM5) {
// VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
@@ -1368,18 +1333,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
}
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
@@ -1391,13 +1354,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
@@ -1409,46 +1371,75 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
return true;
}
-/// Returns true if instruction is a memory operation that this pass is capable
-/// of operating on.
-static bool isMemoryOp(const MachineInstr *MI) {
- // When no memory operands are present, conservatively assume unaligned,
- // volatile, unfoldable.
- if (!MI->hasOneMemOperand())
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+ "Must have t2STRDi8 or t2LDRDi8");
+ if (MI.getOperand(3).getImm() != 0)
return false;
- const MachineMemOperand *MMO = *MI->memoperands_begin();
-
- // Don't touch volatile memory accesses - we may be changing their order.
- if (MMO->isVolatile())
+ // Behaviour for writeback is undefined if base register is the same as one
+ // of the others.
+ const MachineOperand &BaseOp = MI.getOperand(2);
+ unsigned Base = BaseOp.getReg();
+ const MachineOperand &Reg0Op = MI.getOperand(0);
+ const MachineOperand &Reg1Op = MI.getOperand(1);
+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
return false;
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
- // not.
- if (MMO->getAlignment() < 4)
- return false;
+ unsigned PredReg;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ MachineBasicBlock::iterator MBBI(MI);
+ MachineBasicBlock &MBB = *MI.getParent();
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+ PredReg, Offset);
+ unsigned NewOpc;
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
- // str <undef> could probably be eliminated entirely, but for now we just want
- // to avoid making a mess of it.
- // FIXME: Use str <undef> as a wildcard to enable better stm folding.
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
- MI->getOperand(0).isUndef())
- return false;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+ .addReg(BaseOp.getReg(), RegState::Define);
+ } else {
+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+ MIB.addReg(BaseOp.getReg(), RegState::Define)
+ .addOperand(Reg0Op).addOperand(Reg1Op);
+ }
+ MIB.addReg(BaseOp.getReg(), RegState::Kill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg);
+ assert(TII->get(Opcode).getNumOperands() == 6 &&
+ TII->get(NewOpc).getNumOperands() == 7 &&
+ "Unexpected number of operands in Opcode specification.");
- // Likewise don't mess with references to undefined addresses.
- if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
- MI->getOperand(1).isUndef())
- return false;
+ // Transfer implicit operands.
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- unsigned Opcode = MI->getOpcode();
+ MBB.erase(MBBI);
+ return true;
+}
+
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
+static bool isMemoryOp(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
case ARM::VLDRS:
case ARM::VSTRS:
- return MI->getOperand(1).isReg();
case ARM::VLDRD:
case ARM::VSTRD:
- return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
case ARM::tLDRi:
@@ -1459,9 +1450,40 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
- return MI->getOperand(1).isReg();
+ break;
+ default:
+ return false;
}
- return false;
+ if (!MI.getOperand(1).isReg())
+ return false;
+
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO.isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO.getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI.getOperand(1).isUndef())
+ return false;
+
+ return true;
}
static void InsertLDR_STR(MachineBasicBlock &MBB,
@@ -1616,6 +1638,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
ARMCC::CondCodes CurrPred = ARMCC::AL;
unsigned Position = 0;
assert(Candidates.size() == 0);
+ assert(MergeBaseCandidates.size() == 0);
LiveRegsValid = false;
for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
@@ -1626,7 +1649,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
continue;
++Position;
- if (isMemoryOp(MBBI)) {
+ if (isMemoryOp(*MBBI)) {
unsigned Opcode = MBBI->getOpcode();
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
@@ -1694,8 +1717,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MBBI = I;
--Position;
// Fallthrough to look into existing chain.
- } else if (MBBI->isDebugValue())
+ } else if (MBBI->isDebugValue()) {
continue;
+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+ MBBI->getOpcode() == ARM::t2STRDi8) {
+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+ // remember them because we may still be able to merge add/sub into them.
+ MergeBaseCandidates.push_back(MBBI);
+ }
+
// If we are here then the chain is broken; Extract candidates for a merge.
if (MemOps.size() > 0) {
@@ -1726,7 +1756,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (Merged) {
Changed = true;
unsigned Opcode = Merged->getOpcode();
- if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+ MergeBaseUpdateLSDouble(*Merged);
+ else
MergeBaseUpdateLSMultiple(Merged);
} else {
for (MachineInstr *MI : Candidate->Instrs) {
@@ -1741,6 +1773,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
Candidates.clear();
+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+ for (MachineInstr *MI : MergeBaseCandidates)
+ MergeBaseUpdateLSDouble(*MI);
+ MergeBaseCandidates.clear();
return Changed;
}
@@ -1765,7 +1801,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
(MBBI->getOpcode() == ARM::BX_RET ||
MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::MOVPCLR)) {
- MachineInstr *PrevMI = std::prev(MBBI);
+ MachineBasicBlock::iterator PrevI = std::prev(MBBI);
+ // Ignore any DBG_VALUE instructions.
+ while (PrevI->isDebugValue() && PrevI != MBB.begin())
+ --PrevI;
+ MachineInstr *PrevMI = PrevI;
unsigned Opcode = PrevMI->getOpcode();
if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
@@ -1786,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
return false;
}
+bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.begin() || MBBI == MBB.end() ||
+ MBBI->getOpcode() != ARM::tBX_RET)
+ return false;
+
+ MachineBasicBlock::iterator Prev = MBBI;
+ --Prev;
+ if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
+ return false;
+
+ for (auto Use : Prev->uses())
+ if (Use.isKill()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ MBB.erase(Prev);
+ return true;
+ }
+
+ llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
+}
+
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -1793,7 +1857,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
- MRI = &Fn.getRegInfo();
+
RegClassInfoValid = false;
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1805,18 +1869,29 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
+ if (isThumb1)
+ Modified |= CombineMovBx(MBB);
}
Allocator.DestroyAll();
return Modified;
}
+namespace llvm {
+void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
+ "ARM pre- register allocation load / store optimization pass"
+
namespace {
/// Pre- register allocation pass that move load / stores from consecutive
/// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const DataLayout *TD;
const TargetInstrInfo *TII;
@@ -1828,7 +1903,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "ARM pre- register allocation load / store optimization pass";
+ return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
private:
@@ -1847,8 +1922,11 @@ namespace {
char ARMPreAllocLoadStoreOpt::ID = 0;
}
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getDataLayout();
+ TD = &Fn.getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
@@ -1856,9 +1934,8 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI)
- Modified |= RescheduleLoadStoreInstrs(MFI);
+ for (MachineBasicBlock &MFI : Fn)
+ Modified |= RescheduleLoadStoreInstrs(&MFI);
return Modified;
}
@@ -2187,7 +2264,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!MI->isDebugValue())
MI2LocMap[MI] = ++Loc;
- if (!isMemoryOp(MI))
+ if (!isMemoryOp(*MI))
continue;
unsigned PredReg = 0;
if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
@@ -2275,3 +2352,4 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
return new ARMPreAllocLoadStoreOpt();
return new ARMLoadStoreOpt();
}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index f5250ff..ac0330f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,5 +20,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
- GlobalBaseReg(0) {}
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 14dd9ef..d644797 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -52,7 +52,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned ReturnRegsCount;
/// HasStackFrame - True if this function has a stack frame. Set by
- /// processFunctionBeforeCalleeSavedScan().
+ /// determineCalleeSaves().
bool HasStackFrame;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
@@ -110,11 +110,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
- /// GlobalBaseReg - keeps track of the virtual register initialized for
- /// use as the global base register. This is used for PIC in some PIC
- /// relocation models.
- unsigned GlobalBaseReg;
-
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
unsigned ArgumentStackSize;
@@ -133,7 +128,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -204,9 +199,6 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
llvm_unreachable("Duplicate entries!");
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 45cc9ea..02cbfb1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -266,12 +266,19 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// Scalar single precision floating point register class..
-// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
-// avoid partial-write dependencies on D registers (S registers are
-// renamed as portions of D registers).
-def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
- (sequence "S%u", 0, 31), 2),
- (sequence "S%u", 0, 31))>;
+// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
+// to avoid partial-write dependencies on D or Q (depending on platform)
+// registers (S registers are renamed as portions of D/Q registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
+ let AltOrders = [(add (decimate SPR, 2), SPR),
+ (add (decimate SPR, 4),
+ (decimate SPR, 2),
+ (decimate (rotl SPR, 1), 4),
+ (decimate (rotl SPR, 1), 2))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
+}
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
@@ -281,25 +288,29 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// class.
// ARM requires only word alignment for double. It's more performant if it
// is double-word alignment though.
-def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(sequence "D%u", 0, 31)> {
- // Allocate non-VFP2 registers D16-D31 first.
- let AltOrders = [(rotl DPR, 16)];
- let AltOrderSelect = [{ return 1; }];
+ // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
+ // Darwin platforms.
+ let AltOrders = [(rotl DPR, 16),
+ (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
}
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 16)>;
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
-def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 8)>;
// Generic 128-bit vector register class.
-def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
(sequence "Q%u", 0, 15)> {
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8)];
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
index b03d5ff..3ad7730 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -37,1050 +37,13 @@ def SW_FDIV : FuncUnit;
// FIXME: Add preload instruction when it is documented.
// FIXME: Model non-pipelined nature of FP div / sqrt unit.
-def SwiftItineraries : ProcessorItineraries<
- [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
- //
- // Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2]>,
- InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3]>,
- InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_LS]>],
- [5]>,
- //
- // MVN instructions
- InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- //
- // No operand cycles
- InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
- //
- // Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1]>,
- //
- // Bitwise Instructions that produce a result
- InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1]>,
- //
- // Unary Instructions that produce a result
-
- // CLZ, RBIT, etc.
- InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
-
- // BFC, BFI, UBFX, SBFX
- InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1]>,
-
- //
- // Zero and sign extension instructions
- InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1, 1]>,
- //
- // Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- //
- // Test instructions
- InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- //
- // Move instructions, conditional
- // FIXME: Correctly model the extra input dep on the destination.
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2]>,
-
- // Integer multiply pipeline
- //
- InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0], 3>,
- InstrStage<1, [SW_ALU0]>],
- [5, 5, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 6, 1, 1]>,
- //
- // Integer divide
- InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 0>,
- InstrStage<14, [SW_IDIV]>],
- [14, 1, 1]>,
-
- // Integer load pipeline
- // FIXME: The timings are some rough approximations
- //
- // Immediate offset
- InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Register offset
- InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3, 4, 1, 1]>,
- //
- // Scaled register offset
- InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [5, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [5, 1, 1]>,
- //
- // Immediate offset with update
- InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- //
- // Register offset with update
- InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3, 4, 1, 1]>,
- //
- // Scaled register offset with update
- InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 3, 1, 1]>,
- //
- // Load multiple, def is the 5th operand.
- // FIXME: This assumes 3 to 4 registers.
- InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-
- //
- // Load multiple + update, defs are the 1st and 5th operands.
- InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1, 3], [], -1>, // dynamic uops
- //
- // Load multiple plus branch
- InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1, 3], [], -1>, // dynamic uops
- //
- // Pop, def is the 3rd operand.
- InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 3], [], -1>, // dynamic uops
- //
- // Pop + branch, def is the 3rd operand.
- InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 3], [], -1>, // dynamic uops
-
- //
- // iLoadi + iALUr for t2LDRpci_pic.
- InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [4, 1]>,
-
- // Integer store pipeline
- ///
- // Immediate offset
- InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // Register offset
- InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Scaled register offset
- InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Immediate offset with update
- InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Register offset with update
- InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- //
- // Scaled register offset with update
- InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
- [3, 1, 1, 1]>,
- //
- // Store multiple
- InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [], [], -1>, // dynamic uops
- //
- // Store multiple + update
- InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [2], [], -1>, // dynamic uops
-
- //
- // Preload
- InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
-
- // Branch
- //
- // no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
-
- // FP Special Register to Integer Register File Move
- InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- //
- // Single-precision FP Unary
- //
- // Most floating-point moves get issued on ALU0.
- InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Double-precision FP Unary
- InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
-
- //
- // Single-precision FP Compare
- InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [1, 1]>,
- //
- // Double-precision FP Compare
- InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [1, 1]>,
- //
- // Single to Double FP Convert
- InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Double to Single FP Convert
- InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
-
- //
- // Single to Half FP Convert
- InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 4>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1]>,
- //
- // Half to Single FP Convert
- InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
-
- //
- // Single-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Double-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Integer to Single-Precision FP Convert
- InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Integer to Double-Precision FP Convert
- InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Single-precision FP ALU
- InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-precision FP ALU
- InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Single-precision FP Multiply
- InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-precision FP Multiply
- InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 1]>,
- //
- // Single-precision FP MAC
- InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-precision FP MAC
- InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [12, 1, 1]>,
- //
- // Single-precision Fused FP MAC
- InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-precision Fused FP MAC
- InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [12, 1, 1]>,
- //
- // Single-precision FP DIV
- InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<15, [SW_FDIV]>],
- [17, 1, 1]>,
- //
- // Double-precision FP DIV
- InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<30, [SW_FDIV]>],
- [32, 1, 1]>,
- //
- // Single-precision FP SQRT
- InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<15, [SW_FDIV]>],
- [17, 1]>,
- //
- // Double-precision FP SQRT
- InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<30, [SW_FDIV]>],
- [32, 1, 1]>,
-
- //
- // Integer to Single-precision Move
- InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
- //
- // Integer to Double-precision Move
- InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // Single-precision to Integer Move
- InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- //
- // Double-precision to Integer Move
- InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Single-precision FP Load
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // Double-precision FP Load
- InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // FP Load Multiple
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 4], [], -1>, // dynamic uops
- //
- // FP Load Multiple + update
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1, 4], [], -1>, // dynamic uops
- //
- // Single-precision FP Store
- InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // Double-precision FP Store
- InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // FP Store Multiple
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1], [], -1>, // dynamic uops
- //
- // FP Store Multiple + update
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1], [], -1>, // dynamic uops
- // NEON
- //
- // Double-register Integer Unary
- InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Quad-register Integer Unary
- InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Double-register Integer Q-Unary
- InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Quad-register Integer CountQ-Unary
- InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Double-register Integer Binary
- InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Binary
- InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Subtract
- InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Subtract
- InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Shift
- InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Shift
- InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Count
- InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Count
- InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- //
- // Double-register Integer Pair Add Long
- InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Pair Add Long
- InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Double-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
-
- //
- // Move
- InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Move Immediate
- InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2]>,
- //
- // Double-register Permute Move
- InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Quad-register Permute Move
- InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Integer to Single-precision Move
- InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
- //
- // Integer to Double-precision Move
- InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1, 1]>,
- //
- // Single-precision to Integer Move
- InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- //
- // Double-precision to Integer Move
- InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Integer to Lane Move
- // FIXME: I think this is correct, but it is not clear from the tuning guide.
- InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
-
- //
- // Vector narrow move
- InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Double-register FP Unary
- // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
- // and they issue on a different pipeline.
- InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Quad-register FP Unary
- // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
- // and they issue on a different pipeline.
- InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Double-register FP Binary
- // FIXME: We're using this itin for many instructions.
- InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // VPADD, etc.
- InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register FP VMUL
- InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register FP Binary
- InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register FP VMUL
- InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Fused FP Multiple-Accumulate
- InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register FusedF P Multiple-Accumulate
- InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Reciprical Step
- InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register Reciprical Step
- InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Permute
- // FIXME: The latencies are unclear from the documentation.
- InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
- //
- // Quad-register Permute
- // FIXME: The latencies are unclear from the documentation.
- InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
- //
- // Quad-register Permute (3 cycle issue on A9)
- InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
-
- //
- // Double-register VEXT
- InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- //
- // Quad-register VEXT
- InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- //
- // VTB
- InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 3, 3]>,
- InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 3, 5, 5]>,
- InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 3, 5, 7, 7]>,
- //
- // VTBX
- InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 3, 3]>,
- InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 3, 5, 5]>,
- InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 3, 5, 7, 7]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
-
// Swift machine model for scheduling and other instruction cost heuristics.
def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
let MicroOpBufferSize = 45; // Based on NEON renamed registers.
let LoadLatency = 3;
let MispredictPenalty = 14; // A branch direction mispredict.
-
- let Itineraries = SwiftItineraries;
+ let CompleteModel = 0; // FIXME: Remove if all instructions are covered.
}
// Swift predicates.
@@ -1558,6 +521,13 @@ let SchedModel = SwiftModel in {
(instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
"PUSH", "tPUSH")>;
+ // LDRLIT pseudo instructions, they expand to LDR + PICADD
+ def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU],
+ (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+ // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2ThreeCycle],
+ (instregex "LDRLIT_ga_pcrel_ldr")>;
+
// 4.2.26 Branch
def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 6cafbbb..6fded9c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -160,41 +160,39 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned VTSize = 4;
unsigned i = 0;
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
- const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
+ const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
SDValue TFOps[6];
SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
- // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
- // same number of stores. The loads and stores will get combined into
- // ldm/stm later on.
- while (EmittedNumMemOps < NumMemOps) {
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, dl, MVT::i32)),
- SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- SrcOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(TFOps, i));
+ // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
+ // VLDM/VSTM and make this code emit it when appropriate. This would reduce
+ // pressure on the general purpose registers. However this seems harder to map
+ // onto the register allocator's view of the world.
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, dl, MVT::i32)),
- DstPtrInfo.getWithOffset(DstOff),
- isVolatile, false, 0);
- DstOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(TFOps, i));
+ // The number of MEMCPY pseudo-instructions to emit. We use up to
+ // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
+ // later on. This is a lower bound on the number of MEMCPY operations we must
+ // emit.
+ unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
+
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
+
+ for (unsigned I = 0; I != NumMEMCPYs; ++I) {
+ // Evenly distribute registers among MEMCPY operations to reduce register
+ // pressure.
+ unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
+ unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
+
+ Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
+ DAG.getConstant(NumRegs, dl, MVT::i32));
+ Src = Dst.getValue(1);
+ Chain = Dst.getValue(2);
+
+ DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
+ SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
- EmittedNumMemOps += i;
+ EmittedNumMemOps = NextEmittedNumMemOps;
}
if (BytesLeft == 0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 002c3e9..bb6ae28 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -40,37 +41,9 @@ using namespace llvm;
#include "ARMGenSubtargetInfo.inc"
static cl::opt<bool>
-ReserveR9("arm-reserve-r9", cl::Hidden,
- cl::desc("Reserve R9, making it unavailable as GPR"));
-
-static cl::opt<bool>
-ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
-namespace {
-enum AlignMode {
- DefaultAlign,
- StrictAlign,
- NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
- cl::Hidden, cl::init(DefaultAlign),
- cl::values(
- clEnumValN(DefaultAlign, "arm-default-align",
- "Generate unaligned accesses only on hardware/OS "
- "combinations that are known to support them"),
- clEnumValN(StrictAlign, "arm-strict-align",
- "Disallow all unaligned memory accesses"),
- clEnumValN(NoStrictAlign, "arm-no-strict-align",
- "Allow unaligned memory accesses"),
- clEnumValEnd));
-
enum ITMode {
DefaultIT,
RestrictedIT,
@@ -88,6 +61,12 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
"Allow IT blocks based on ARMv7"),
clEnumValEnd));
+/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
+/// currently supported (for testing only).
+static cl::opt<bool>
+ForceFastISel("arm-force-fast-isel",
+ cl::init(false), cl::Hidden);
+
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -110,8 +89,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
+ ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU),
+ IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -133,6 +112,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV7Ops = false;
HasV8Ops = false;
HasV8_1aOps = false;
+ HasV8_2aOps = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
@@ -147,10 +127,11 @@ void ARMSubtarget::initializeEnvironment() {
UseSoftFloat = false;
HasThumb2 = false;
NoARM = false;
- IsR9Reserved = ReserveR9;
- UseMovt = false;
+ ReserveR9 = false;
+ NoMovt = false;
SupportsTailCall = false;
HasFP16 = false;
+ HasFullFP16 = false;
HasD16 = false;
HasHardwareDivide = false;
HasHardwareDivideInARM = false;
@@ -168,20 +149,36 @@ void ARMSubtarget::initializeEnvironment() {
HasCrypto = false;
HasCRC = false;
HasZeroCycleZeroing = false;
- AllowsUnalignedMem = false;
- Thumb2DSP = false;
+ StrictAlign = false;
+ HasDSP = false;
UseNaClTrap = false;
GenLongCalls = false;
UnsafeFPMath = false;
+
+ // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
+ // directly from it, but we can try to make sure they're consistent when both
+ // available.
+ UseSjLjEH = isTargetDarwin() && !isTargetWatchOS();
+ assert((!TM.getMCAsmInfo() ||
+ (TM.getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::SjLj) == UseSjLjEH) &&
+ "inconsistent sjlj choice between CodeGen and MC");
}
void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
- if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
- // Default to the Swift CPU when targeting armv7s/thumbv7s.
- CPUString = "swift";
- else
- CPUString = "generic";
+ CPUString = "generic";
+
+ if (isTargetDarwin()) {
+ StringRef ArchName = TargetTriple.getArchName();
+ if (ArchName.endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else if (ArchName.endswith("v7k"))
+ // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
+ // ARMv7k does not use SjLj exception handling.
+ CPUString = "cortex-a7";
+ }
}
// Insert the architecture feature derived from the target triple into the
@@ -212,44 +209,31 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isAAPCS_ABI())
stackAlignment = 8;
- if (isTargetNaCl())
+ if (isTargetNaCl() || isAAPCS16_ABI())
stackAlignment = 16;
- UseMovt = hasV6T2Ops() && ArmUseMOVT;
-
- if (isTargetMachO()) {
- IsR9Reserved = ReserveR9 || !HasV6Ops;
- SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
- } else {
- IsR9Reserved = ReserveR9;
- SupportsTailCall = !isThumb1Only();
- }
-
- if (Align == DefaultAlign) {
- // Assume pre-ARMv6 doesn't support unaligned accesses.
- //
- // ARMv6 may or may not support unaligned accesses depending on the
- // SCTLR.U bit, which is architecture-specific. We assume ARMv6
- // Darwin and NetBSD targets support unaligned accesses, and others don't.
- //
- // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
- // which raises an alignment fault on unaligned accesses. Linux
- // defaults this bit to 0 and handles it as a system-wide (not
- // per-process) setting. It is therefore safe to assume that ARMv7+
- // Linux targets support unaligned accesses. The same goes for NaCl.
- //
- // The above behavior is consistent with GCC.
- AllowsUnalignedMem =
- (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
- isTargetNetBSD())) ||
- (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
- } else {
- AllowsUnalignedMem = !(Align == StrictAlign);
- }
-
- // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
- if (isV6M())
- AllowsUnalignedMem = false;
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+
+ SupportsTailCall = !isThumb1Only();
+
+ if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
+ SupportsTailCall = false;
switch (IT) {
case DefaultIT:
@@ -276,9 +260,15 @@ bool ARMSubtarget::isAPCS_ABI() const {
}
bool ARMSubtarget::isAAPCS_ABI() const {
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
- return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
+ TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+}
+bool ARMSubtarget::isAAPCS16_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
+
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
bool
ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
@@ -321,11 +311,23 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
}
bool ARMSubtarget::hasSinCos() const {
- return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
+ return isTargetWatchOS() ||
+ (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
+}
+
+bool ARMSubtarget::enableMachineScheduler() const {
+ // Enable the MachineScheduler before register allocation for out-of-order
+ // architectures where we do not use the PostRA scheduler anymore (for now
+ // restricted to swift).
+ return getSchedModel().isOutOfOrder() && isSwift();
}
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
+ // No need for PostRA scheduling on out of order CPUs (for now restricted to
+ // swift).
+ if (getSchedModel().isOutOfOrder() && isSwift())
+ return false;
return (!isThumb() || hasThumb2());
}
@@ -333,15 +335,30 @@ bool ARMSubtarget::enableAtomicExpand() const {
return hasAnyDataBarrier() && !isThumb1Only();
}
+bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+ // For general targets, the prologue can grow when VFPs are allocated with
+ // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
+ // format which it's more important to get right.
+ return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize());
+}
+
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
- return UseMovt && (isTargetWindows() ||
- !MF.getFunction()->hasFnAttribute(Attribute::MinSize));
+ return !NoMovt && hasV6T2Ops() &&
+ (isTargetWindows() || !MF.getFunction()->optForMinSize());
}
bool ARMSubtarget::useFastISel() const {
+ // Enable fast-isel for any target, for testing only.
+ if (ForceFastISel)
+ return true;
+
+ // Limit fast-isel to the targets that are or have been tested.
+ if (!hasV6Ops())
+ return false;
+
// Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
return TM.Options.EnableFastISel &&
((isTargetMachO() && !isThumb1Only()) ||
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index dd101df..a8b2801 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -43,11 +43,17 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
+ CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
+ CortexA57, CortexA72, Krait, Swift
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
};
+ enum ARMArchEnum {
+ ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
+ ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
+ ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a
+ };
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily;
@@ -55,6 +61,9 @@ protected:
/// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
ARMProcClassEnum ARMProcClass;
+ /// ARMArch - ARM architecture
+ ARMArchEnum ARMArch;
+
/// HasV4TOps, HasV5TOps, HasV5TEOps,
/// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
@@ -68,6 +77,7 @@ protected:
bool HasV7Ops;
bool HasV8Ops;
bool HasV8_1aOps;
+ bool HasV8_2aOps;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
@@ -109,22 +119,24 @@ protected:
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
- /// IsR9Reserved - True if R9 is a not available as general purpose register.
- bool IsR9Reserved;
+ /// ReserveR9 - True if R9 is not available as a general purpose register.
+ bool ReserveR9;
- /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
- /// imms (including global addresses).
- bool UseMovt;
+ /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
+ /// 32-bit imms (including global addresses).
+ bool NoMovt;
/// SupportsTailCall - True if the OS supports tail call. The dynamic linker
/// must be able to synthesize call stubs for interworking between ARM and
/// Thumb.
bool SupportsTailCall;
- /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
- /// only so far)
+ /// HasFP16 - True if subtarget supports half-precision FP conversions
bool HasFP16;
+ /// HasFullFP16 - True if subtarget supports half-precision FP operations
+ bool HasFullFP16;
+
/// HasD16 - True if subtarget is limited to 16 double precision
/// FP registers for VFPv3.
bool HasD16;
@@ -190,18 +202,18 @@ protected:
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
- /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// StrictAlign - If true, the subtarget disallows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
- bool AllowsUnalignedMem;
+ bool StrictAlign;
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
/// blocks to conform to ARMv8 rule.
bool RestrictIT;
- /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
- /// and such) instructions in Thumb2 code.
- bool Thumb2DSP;
+ /// HasDSP - If true, the subtarget supports the DSP (saturating arith
+ /// and such) instructions.
+ bool HasDSP;
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
@@ -212,6 +224,9 @@ protected:
/// Target machine allowed unsafe FP math (such as use of NEON fp)
bool UnsafeFPMath;
+ /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
+ bool UseSjLjEH;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -297,6 +312,7 @@ public:
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
bool hasV8_1aOps() const { return HasV8_1aOps; }
+ bool hasV8_2aOps() const { return HasV8_2aOps; }
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -343,17 +359,20 @@ public:
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
- bool hasThumb2DSP() const { return Thumb2DSP; }
+ bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
+ bool useSjLjEH() const { return UseSjLjEH; }
bool genLongCalls() const { return GenLongCalls; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
+ bool hasFullFP16() const { return HasFullFP16; }
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
+ bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -375,6 +394,11 @@ public:
TargetTriple.getEnvironment() == Triple::EABIHF) &&
!isTargetDarwin() && !isTargetWindows();
}
+ bool isTargetGNUAEABI() const {
+ return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
// ARM Targets that support EHABI exception handling standard
// Darwin uses SjLj. Other targets might need more checks.
@@ -383,7 +407,7 @@ public:
TargetTriple.getEnvironment() == Triple::GNUEABI ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
- TargetTriple.getEnvironment() == Triple::Android) &&
+ isTargetAndroid()) &&
!isTargetDarwin() && !isTargetWindows();
}
@@ -391,14 +415,13 @@ public:
// FIXME: this is invalid for WindowsCE
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
- isTargetWindows();
- }
- bool isTargetAndroid() const {
- return TargetTriple.getEnvironment() == Triple::Android;
+ isTargetWindows() || isAAPCS16_ABI();
}
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
+ bool isAAPCS16_ABI() const;
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
@@ -409,17 +432,17 @@ public:
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
- bool isV6M() const {
- return isThumb1Only() && isMClass();
+ bool isR9Reserved() const {
+ return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
- bool isR9Reserved() const { return IsR9Reserved; }
+ bool useStride4VFPs(const MachineFunction &MF) const;
bool useMovt(const MachineFunction &MF) const;
bool supportsTailCall() const { return SupportsTailCall; }
- bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+ bool allowsUnalignedMem() const { return !StrictAlign; }
bool restrictIT() const { return RestrictIT; }
@@ -433,6 +456,9 @@ public:
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// Returns true if machine scheduler should be enabled.
+ bool enableMachineScheduler() const override;
+
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 93495d6..fca1901 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -66,7 +66,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName().startswith("aapcs"))
+ if (Options.MCOptions.getABIName() == "aapcs16")
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+ else if (Options.MCOptions.getABIName().startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
else if (Options.MCOptions.getABIName().startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
@@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else if (TT.isWatchOS()) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
} else {
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
}
@@ -106,7 +110,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
if (TT.isOSNetBSD())
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
else
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
}
}
@@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// to 64. We always ty to give them natural alignment.
if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
Ret += "-v64:32:64-v128:32:128";
- else
+ else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-v128:64:128";
// Try to align aggregates to 32 bits (the default is 64 bits, which has no
@@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
- if (TT.isOSNaCl())
+ if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
@@ -184,6 +188,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType =
Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+
+ // Default to triple-appropriate EABI
+ if (Options.EABIVersion == EABI::Default ||
+ Options.EABIVersion == EABI::Unknown) {
+ if (Subtarget.isTargetGNUAEABI())
+ this->Options.EABIVersion = EABI::GNU;
+ else
+ this->Options.EABIVersion = EABI::EABI5;
+ }
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
@@ -225,12 +238,12 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(ARMTTIImpl(this, F));
+ });
}
-
-void ARMTargetMachine::anchor() { }
+void ARMTargetMachine::anchor() {}
ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -244,7 +257,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
"support ARM mode execution!");
}
-void ARMLETargetMachine::anchor() { }
+void ARMLETargetMachine::anchor() {}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -253,7 +266,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void ARMBETargetMachine::anchor() { }
+void ARMBETargetMachine::anchor() {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -262,7 +275,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-void ThumbTargetMachine::anchor() { }
+void ThumbTargetMachine::anchor() {}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -273,7 +286,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-void ThumbLETargetMachine::anchor() { }
+void ThumbLETargetMachine::anchor() {}
ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -282,7 +295,7 @@ ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void ThumbBETargetMachine::anchor() { }
+void ThumbBETargetMachine::anchor() {}
ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -348,7 +361,13 @@ bool ARMPassConfig::addPreISel() {
// tricky when doing code gen per function.
bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
(EnableGlobalMerge == cl::BOU_UNSET);
- addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+ // Merging of extern globals is enabled by default on non-Mach-O as we
+ // expect it to be generally either beneficial or harmless. On Mach-O it
+ // is disabled as we emit the .subsections_via_symbols directive which
+ // means that merging extern globals is not safe.
+ bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+ addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
+ MergeExternalByDefault));
}
return false;
@@ -356,9 +375,6 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
-
- if (TM->getTargetTriple().isOSBinFormatELF() && TM->Options.EnableFastISel)
- addPass(createARMGlobalBaseRegPass());
return false;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index 8c98e08..8ad1f3d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -26,7 +26,8 @@ public:
enum ARMABI {
ARM_ABI_UNKNOWN,
ARM_ABI_APCS,
- ARM_ABI_AAPCS // ARM EABI
+ ARM_ABI_AAPCS, // ARM EABI
+ ARM_ABI_AAPCS16
} TargetABI;
protected:
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2f194cf..c152011 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
-unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -47,12 +47,12 @@ unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 3;
}
-unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
// Single to/from double precision conversions.
- static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
+ static const CostTblEntry NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ ISD::FP_ROUND, MVT::v2f64, 2 },
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
@@ -61,10 +61,9 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
- if (Idx != -1)
- return LT.first * NEONFltDblTbl[Idx].Cost;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
EVT SrcTy = TLI->getValueType(DL, Src);
@@ -76,8 +75,7 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONVectorConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -153,15 +151,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isVector() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar float to integer conversions.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONFloatConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
@@ -184,15 +181,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONFloatConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar integer to float conversions.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
@@ -216,15 +212,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isInteger() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONIntegerConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar integer conversion costs.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- ARMIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
@@ -236,17 +231,17 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isInteger()) {
- int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return ARMIntegerConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index) {
+int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
if (ST->isSwift() &&
@@ -255,28 +250,30 @@ unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getScalarSizeInBits() <= 32)
return 3;
- // Cross-class copies are expensive on many microarchitectures,
- // so assume they are expensive by default.
if ((Opcode == Instruction::InsertElement ||
- Opcode == Instruction::ExtractElement) &&
- ValTy->getVectorElementType()->isIntegerTy())
- return 3;
+ Opcode == Instruction::ExtractElement)) {
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if (ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
+ // Even if it's not a cross class copy, this likely leads to mixing
+ // of NEON and VFP code and should be therefore penalized.
+ if (ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
+ }
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
// Lowering of some vector selects is currently far from perfect.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONVectorSelectTbl[] = {
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
@@ -285,21 +282,20 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
- SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorSelectTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT()))
+ return Entry->Cost;
}
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -314,7 +310,7 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}
-unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+int ARMTTIImpl::getFPOpCost(Type *Ty) {
// Use similar logic that's in ARMISelLowering:
// Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
// to VFP.
@@ -333,14 +329,14 @@ unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
return TargetTransformInfo::TCC_Expensive;
}
-unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+ static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
// double word (vrev) or two if we shuffle a quad word (vrev, vext).
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
@@ -353,16 +349,16 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
- return LT.first * NEONShuffleTbl[Idx].Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
if (Kind == TTI::SK_Alternate) {
- static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+ static const CostTblEntry NEONAltShuffleTbl[] = {
// Alt shuffle cost table for ARM. Cost is the number of instructions
// required to create the shuffled vector.
@@ -379,27 +375,26 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int Idx =
- CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
- return LT.first * NEONAltShuffleTbl[Idx].Cost;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned ARMTTIImpl::getArithmeticInstrCost(
+int ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
+ static const CostTblEntry CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
@@ -440,16 +435,12 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
// Multiplication.
};
- int Idx = -1;
-
if (ST->hasNEON())
- Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
-
- if (Idx != -1)
- return LT.first * CostTbl[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
+ return LT.first * Entry->Cost;
- unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
@@ -465,10 +456,9 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
return Cost;
}
-unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
@@ -479,21 +469,21 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+ bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 84f256f..7d8d238 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -41,7 +41,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *getTLI() const { return TLI; }
public:
- explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
+ explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -52,11 +52,13 @@ public:
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
+ bool enableInterleavedAccessVectorization() { return true; }
+
/// \name Scalar TTI Implementations
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
/// @}
@@ -92,34 +94,31 @@ public:
return 1;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, bool IsComplex);
- unsigned getFPOpCost(Type *Ty);
+ int getFPOpCost(Type *Ty);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cf6b892..c69a741 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,7 +129,6 @@ public:
};
class ARMAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
UnwindContext UC;
@@ -247,48 +246,49 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandVector &Operands);
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[ARM::ModeThumb];
+ return getSTI().getFeatureBits()[ARM::ModeThumb];
}
bool isThumbOne() const {
- return isThumb() && !STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2];
}
bool isThumbTwo() const {
- return isThumb() && STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2];
}
bool hasThumb() const {
- return STI.getFeatureBits()[ARM::HasV4TOps];
+ return getSTI().getFeatureBits()[ARM::HasV4TOps];
}
bool hasV6Ops() const {
- return STI.getFeatureBits()[ARM::HasV6Ops];
+ return getSTI().getFeatureBits()[ARM::HasV6Ops];
}
bool hasV6MOps() const {
- return STI.getFeatureBits()[ARM::HasV6MOps];
+ return getSTI().getFeatureBits()[ARM::HasV6MOps];
}
bool hasV7Ops() const {
- return STI.getFeatureBits()[ARM::HasV7Ops];
+ return getSTI().getFeatureBits()[ARM::HasV7Ops];
}
bool hasV8Ops() const {
- return STI.getFeatureBits()[ARM::HasV8Ops];
+ return getSTI().getFeatureBits()[ARM::HasV8Ops];
}
bool hasARM() const {
- return !STI.getFeatureBits()[ARM::FeatureNoARM];
+ return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
}
- bool hasThumb2DSP() const {
- return STI.getFeatureBits()[ARM::FeatureDSPThumb2];
+ bool hasDSP() const {
+ return getSTI().getFeatureBits()[ARM::FeatureDSP];
}
bool hasD16() const {
- return STI.getFeatureBits()[ARM::FeatureD16];
+ return getSTI().getFeatureBits()[ARM::FeatureD16];
}
bool hasV8_1aOps() const {
- return STI.getFeatureBits()[ARM::HasV8_1aOps];
+ return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
}
void SwitchMode() {
+ MCSubtargetInfo &STI = copySTI();
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
bool isMClass() const {
- return STI.getFeatureBits()[ARM::FeatureMClass];
+ return getSTI().getFeatureBits()[ARM::FeatureMClass];
}
/// @name Auto-generated Match Functions
@@ -343,14 +343,15 @@ public:
Match_RequiresNotITBlock,
Match_RequiresV6,
Match_RequiresThumb2,
+ Match_RequiresV8,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "ARMGenAsmMatcher.inc"
};
- ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : STI(STI), MII(MII), UC(Parser) {
+ : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) {
MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
@@ -564,87 +565,6 @@ class ARMOperand : public MCParsedAsmOperand {
public:
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
- ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
- Kind = o.Kind;
- StartLoc = o.StartLoc;
- EndLoc = o.EndLoc;
- switch (Kind) {
- case k_CondCode:
- CC = o.CC;
- break;
- case k_ITCondMask:
- ITMask = o.ITMask;
- break;
- case k_Token:
- Tok = o.Tok;
- break;
- case k_CCOut:
- case k_Register:
- Reg = o.Reg;
- break;
- case k_RegisterList:
- case k_DPRRegisterList:
- case k_SPRRegisterList:
- Registers = o.Registers;
- break;
- case k_VectorList:
- case k_VectorListAllLanes:
- case k_VectorListIndexed:
- VectorList = o.VectorList;
- break;
- case k_CoprocNum:
- case k_CoprocReg:
- Cop = o.Cop;
- break;
- case k_CoprocOption:
- CoprocOption = o.CoprocOption;
- break;
- case k_Immediate:
- Imm = o.Imm;
- break;
- case k_MemBarrierOpt:
- MBOpt = o.MBOpt;
- break;
- case k_InstSyncBarrierOpt:
- ISBOpt = o.ISBOpt;
- case k_Memory:
- Memory = o.Memory;
- break;
- case k_PostIndexRegister:
- PostIdxReg = o.PostIdxReg;
- break;
- case k_MSRMask:
- MMask = o.MMask;
- break;
- case k_BankedReg:
- BankedReg = o.BankedReg;
- break;
- case k_ProcIFlags:
- IFlags = o.IFlags;
- break;
- case k_ShifterImmediate:
- ShifterImm = o.ShifterImm;
- break;
- case k_ShiftedRegister:
- RegShiftedReg = o.RegShiftedReg;
- break;
- case k_ShiftedImmediate:
- RegShiftedImm = o.RegShiftedImm;
- break;
- case k_RotateImmediate:
- RotImm = o.RotImm;
- break;
- case k_ModifiedImmediate:
- ModImm = o.ModImm;
- break;
- case k_BitfieldDescriptor:
- Bitfield = o.Bitfield;
- break;
- case k_VectorIndex:
- VectorIndex = o.VectorIndex;
- break;
- }
- }
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override { return StartLoc; }
@@ -4054,7 +3974,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
- if (!hasThumb2DSP() && (FlagsVal & 0x400))
+ if (!hasDSP() && (FlagsVal & 0x400))
// The _g and _nzcvqg versions are only valid if the DSP extension is
// available.
return MatchOperand_NoMatch;
@@ -5202,6 +5122,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// FALLTHROUGH
}
case AsmToken::Colon: {
+ S = Parser.getTok().getLoc();
// ":lower16:" and ":upper16:" expression prefixes
// FIXME: Check it's an expression prefix,
// e.g. (FOO - :lower16:BAR) isn't legal.
@@ -5220,8 +5141,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return false;
}
case AsmToken::Equal: {
+ S = Parser.getTok().getLoc();
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
- return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return Error(S, "unexpected token in operand");
Parser.Lex(); // Eat '='
const MCExpr *SubExprVal;
@@ -5229,7 +5151,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ const MCExpr *CPLoc =
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, S);
Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
return false;
}
@@ -5682,9 +5605,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
// VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+ (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
+ (static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
RegIdx = 4;
if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
@@ -8610,18 +8535,29 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
inITBlock())
return Match_RequiresNotITBlock;
+ } else if (isThumbOne()) {
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ if (Opc == ARM::tADDhirr && !hasV6MOps() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
}
- // Some high-register supporting Thumb1 encodings only allow both registers
- // to be from r0-r7 when in Thumb2.
- else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
- isARMLowRegister(Inst.getOperand(1).getReg()) &&
- isARMLowRegister(Inst.getOperand(2).getReg()))
- return Match_RequiresThumb2;
- // Others only require ARMv6 or later.
- else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
- isARMLowRegister(Inst.getOperand(0).getReg()) &&
- isARMLowRegister(Inst.getOperand(1).getReg()))
- return Match_RequiresV6;
+
+ for (unsigned I = 0; I < MCID.NumOperands; ++I)
+ if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
+ // rGPRRegClass excludes PC, and also excluded SP before ARMv8
+ if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+ return Match_RequiresV8;
+ else if (Inst.getOperand(I).getReg() == ARM::PC)
+ return Match_InvalidOperand;
+ }
+
return Match_Success;
}
@@ -8680,7 +8616,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return false;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
assert(ErrorInfo && "Unknown missing feature!");
@@ -8720,6 +8656,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_RequiresV8:
+ return Error(IDLoc, "instruction variant requires ARMv8 or later");
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
@@ -8868,7 +8806,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
return false;
}
- getParser().getStreamer().EmitValue(Value, Size);
+ getParser().getStreamer().EmitValue(Value, Size, L);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -9098,7 +9036,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
StringRef Arch = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARMTargetParser::parseArch(Arch);
+ unsigned ID = ARM::parseArch(Arch);
if (ID == ARM::AK_INVALID) {
Error(L, "Unknown arch name");
@@ -9106,7 +9044,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
}
Triple T;
- STI.setDefaultFeatures(T.getARMCPUForArch(Arch));
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
getTargetStreamer().emitArch(ID);
@@ -9233,12 +9172,13 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
// FIXME: This is using table-gen data, but should be moved to
// ARMTargetParser once that is table-gen'd.
- if (!STI.isCPUStringValid(CPU)) {
+ if (!getSTI().isCPUStringValid(CPU)) {
Error(L, "Unknown CPU name");
return false;
}
- STI.setDefaultFeatures(CPU);
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures(CPU, "");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
return false;
@@ -9249,13 +9189,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
SMLoc FPUNameLoc = getTok().getLoc();
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARMTargetParser::parseFPU(FPU);
+ unsigned ID = ARM::parseFPU(FPU);
std::vector<const char *> Features;
- if (!ARMTargetParser::getFPUFeatures(ID, Features)) {
+ if (!ARM::getFPUFeatures(ID, Features)) {
Error(FPUNameLoc, "Unknown FPU name");
return false;
}
+ MCSubtargetInfo &STI = copySTI();
for (auto Feature : Features)
STI.ApplyFeatureFlag(Feature);
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -9895,7 +9836,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
SMLoc ArchLoc = Parser.getTok().getLoc();
getLexer().Lex();
- unsigned ID = ARMTargetParser::parseArch(Arch);
+ unsigned ID = ARM::parseArch(Arch);
if (ID == ARM::AK_INVALID) {
Error(ArchLoc, "unknown architecture '" + Arch + "'");
@@ -9976,22 +9917,22 @@ extern "C" void LLVMInitializeARMAsmParser() {
// when we start to table-generate them, and we can use the ARM
// flags below, that were generated by table-gen.
static const struct {
- const ARM::ArchExtKind Kind;
- const unsigned ArchCheck;
+ const unsigned Kind;
+ const uint64_t ArchCheck;
const FeatureBitset Features;
} Extensions[] = {
{ ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass,
+ { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
{ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- // FIXME: Also available in ARMv6-K
- { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} },
+ { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
// FIXME: Only available in A-class, isel not predicated
{ ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+ { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
// FIXME: Unsupported extensions.
{ ARM::AEK_OS, Feature_None, {} },
{ ARM::AEK_IWMMXT, Feature_None, {} },
@@ -10020,7 +9961,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
EnableFeature = false;
Name = Name.substr(2);
}
- unsigned FeatureKind = ARMTargetParser::parseArchExt(Name);
+ unsigned FeatureKind = ARM::parseArchExt(Name);
if (FeatureKind == ARM::AEK_INVALID)
Error(ExtLoc, "unknown architectural extension: " + Name);
@@ -10037,6 +9978,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
return false;
}
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset ToggleFeatures = EnableFeature
? (~STI.getFeatureBits() & Extension.Features)
: ( STI.getFeatureBits() & Extension.Features);
@@ -10078,6 +10020,10 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
"expression value must be representable in 32 bits");
}
break;
+ case MCK_rGPR:
+ if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP)
+ return Match_Success;
+ break;
case MCK_GPRPair:
if (Op.isReg() &&
MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 097ec04..e63defe 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -59,7 +59,7 @@ namespace {
}
// Called when decoding an IT instruction. Sets the IT state for the following
- // instructions that for the IT block. Firstcond and Mask correspond to the
+ // instructions that for the IT block. Firstcond and Mask correspond to the
// fields in the IT instruction encoding.
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
@@ -459,21 +459,18 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
- MI.clear();
Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -485,7 +482,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
@@ -497,7 +493,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -509,7 +504,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -517,7 +511,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -525,7 +518,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -718,7 +710,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
STI);
if (Result) {
@@ -729,7 +720,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -763,7 +753,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn32 =
(Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -774,7 +763,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -784,7 +772,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
- MI.clear();
Result =
decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -794,7 +781,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- MI.clear();
Result =
decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -803,7 +789,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
- MI.clear();
Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
STI);
if (Result != MCDisassembler::Fail) {
@@ -814,7 +799,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
- MI.clear();
uint32_t NEONLdStInsn = Insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
@@ -828,7 +812,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
- MI.clear();
uint32_t NEONDataInsn = Insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -841,7 +824,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
uint32_t NEONCryptoInsn = Insn32;
NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -853,7 +835,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
uint32_t NEONv8Insn = Insn32;
NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
@@ -864,7 +845,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -902,7 +882,7 @@ static DecodeStatus
DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
-
+
if (RegNo == 15)
S = MCDisassembler::SoftFail;
@@ -986,8 +966,13 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (RegNo == 13 || RegNo == 15)
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if ((RegNo == 13 && !featureBits[ARM::HasV8Ops]) || RegNo == 15)
S = MCDisassembler::SoftFail;
+
Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
return S;
}
@@ -1147,7 +1132,7 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
unsigned imm = fieldFromInstruction(Val, 7, 5);
// Register-immediate
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
@@ -1658,7 +1643,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::STRD_POST:
if (P == 0 && W == 1)
S = MCDisassembler::SoftFail;
-
+
if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
S = MCDisassembler::SoftFail;
if (type && Rm == 15)
@@ -4131,7 +4116,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
// indicates the move for the GE{3:0} bits, the mask{0} bit can be set
// only if the processor includes the DSP extension.
if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
- (!(FeatureBits[ARM::FeatureDSPThumb2]) && (Mask & 1)))
+ (!(FeatureBits[ARM::FeatureDSP]) && (Mask & 1)))
S = MCDisassembler::SoftFail;
}
}
@@ -5065,6 +5050,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5075,10 +5064,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
- // VMOVv2f32 is ambiguous with these decodings.
- if (!(imm & 0x38) && cmode == 0xF) {
- if (op == 1) return MCDisassembler::Fail;
- Inst.setOpcode(ARM::VMOVv2f32);
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv2f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv1i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv8i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ }
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
@@ -5095,6 +5109,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5105,10 +5123,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
- // VMOVv4f32 is ambiguous with these decodings.
- if (!(imm & 0x38) && cmode == 0xF) {
- if (op == 1) return MCDisassembler::Fail;
- Inst.setOpcode(ARM::VMOVv4f32);
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv4f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv2i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv16i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ }
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
@@ -5132,7 +5175,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
unsigned Rm = fieldFromInstruction(Val, 0, 4);
Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
unsigned Cond = fieldFromInstruction(Val, 28, 4);
-
+
if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
S = MCDisassembler::SoftFail;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 0bff521..33fc85a 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -804,7 +805,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned Opcode = MI->getOpcode();
// For writes, handle extended mask bits if the DSP extension is present.
- if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSPThumb2]) {
+ if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
switch (SYSm) {
case 0x400:
O << "apsr_g";
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3927c9f..52f7115 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class ARMInstPrinter : public MCInstPrinter {
public:
ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1114635..fa52c93 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,13 +25,17 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -180,9 +184,8 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const {
switch ((unsigned)Fixup.getKind()) {
case ARM::fixup_arm_thumb_br: {
// Relaxing tB to t2B. tB has a signed 12-bit displacement with the
@@ -192,7 +195,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
//
// Relax if the value is too big for a (signed) i8.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 2046 || Offset < -2048;
+ if (Offset > 2046 || Offset < -2048)
+ return "out of range pc-relative fixup value";
+ break;
}
case ARM::fixup_arm_thumb_bcc: {
// Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
@@ -202,23 +207,40 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
//
// Relax if the value is too big for a (signed) i8.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 254 || Offset < -256;
+ if (Offset > 254 || Offset < -256)
+ return "out of range pc-relative fixup value";
+ break;
}
case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp: {
// If the immediate is negative, greater than 1020, or not a multiple
// of four, the wide version of the instruction must be used.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 1020 || Offset < 0 || Offset & 3;
+ if (Offset & 3)
+ return "misaligned pc-relative fixup value";
+ else if (Offset > 1020 || Offset < 0)
+ return "out of range pc-relative fixup value";
+ break;
}
- case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cb: {
// If we have a Thumb CBZ or CBNZ instruction and its target is the next
// instruction it is is actually out of range for the instruction.
// It will be changed to a NOP.
int64_t Offset = (Value & ~1);
- return Offset == 2;
+ if (Offset == 2)
+ return "will be converted to nop";
+ break;
}
- llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+ default:
+ llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
+ }
+ return nullptr;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return reasonForFixupRelaxation(Fixup, Value);
}
void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
@@ -317,9 +339,10 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
return Value;
}
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx,
- bool IsLittleEndian) {
+unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ bool IsPCRel, MCContext *Ctx,
+ bool IsLittleEndian,
+ bool IsResolved) const {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
@@ -372,8 +395,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
isAdd = false;
}
- if (Ctx && Value >= 4096)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 4096) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10,
@@ -383,8 +408,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
- case ARM::fixup_thumb_adr_pcrel_10:
- return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_adr_pcrel_12: {
// ARM PC-relative values are offset by 8.
Value -= 8;
@@ -393,8 +416,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
opc = 2; // 0b0010
}
- if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
// Encode the immediate and shift the opcode into place.
return ARM_AM::getSOImmVal(Value) | (opc << 21);
}
@@ -517,21 +542,44 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
((uint16_t)imm10LBits) << 1);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
+ case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp:
- // Offset by 4, and don't encode the low two bits. Two bytes of that
- // 'off by 4' is implicitly handled by the half-word ordering of the
- // Thumb encoding, so we only need to adjust by 2 here.
- return ((Value - 2) >> 2) & 0xff;
+ // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
+ // could have an error on our hands.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
+ // Offset by 4, and don't encode the low two bits.
+ return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_thumb_cb: {
// Offset by 4 and don't encode the lower bit, which is always 0.
+ // FIXME: diagnose if no Thumb2
uint32_t Binary = (Value - 4) >> 1;
return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
return ((Value - 4) >> 1) & 0xff;
case ARM::fixup_arm_pcrel_10_unscaled: {
Value = Value - 8; // ARM fixups offset by an additional word and don't
@@ -542,8 +590,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- if (Ctx && Value >= 256)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
return Value | (isAdd << 23);
}
@@ -561,8 +611,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- if (Ctx && Value >= 256)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -582,6 +634,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
const MCValue &Target, uint64_t &Value,
bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
// Some fixups to thumb function symbols need the low bit (thumb bit)
// twiddled.
if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
@@ -590,18 +643,21 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
(unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
(unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
- if (A) {
- const MCSymbol &Sym = A->getSymbol();
- if (Asm.isThumbFunc(&Sym))
+ if (Sym) {
+ if (Asm.isThumbFunc(Sym))
Value |= 1;
}
}
- // For Thumb1 BL instruction, it is possible to be a long jump between
- // the basic blocks of the same function. Thus, we would like to resolve
- // the offset when the destination has the same MCFragment.
- if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
- const MCSymbol &Sym = A->getSymbol();
- IsResolved = (Sym.getFragment() == DF);
+ if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ assert(Sym && "How did we resolve this?");
+
+ // If the symbol is external the linker will handle it.
+ // FIXME: Should we handle it as an optimization?
+
+ // If the symbol is out of range, produce a relocation and hope the
+ // linker can handle it. GNU AS produces an error in this case.
+ if (Sym->isExternal() || Value >= 0x400004)
+ IsResolved = false;
}
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
@@ -616,7 +672,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the instruction. This allows adjustFixupValue() to issue a diagnostic
// if the value aren't invalid.
(void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
- IsLittleEndian);
+ IsLittleEndian, IsResolved);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -719,7 +775,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
+ Value =
+ adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
@@ -743,6 +800,249 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
+namespace CU {
+
+/// \brief Compact unwind encoding values.
+enum CompactUnwindEncodings {
+ UNWIND_ARM_MODE_MASK = 0x0F000000,
+ UNWIND_ARM_MODE_FRAME = 0x01000000,
+ UNWIND_ARM_MODE_FRAME_D = 0x02000000,
+ UNWIND_ARM_MODE_DWARF = 0x04000000,
+
+ UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
+
+ UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
+
+ UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
+
+ UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000F00,
+
+ UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF
+};
+
+} // end CU namespace
+
+/// Generate compact unwind encoding for the function based on the CFI
+/// instructions. If the CFI instructions describe a frame that cannot be
+/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
+/// tells the runtime to fallback and unwind using dwarf.
+uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
+ // Only armv7k uses CFI based unwinding.
+ if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K)
+ return 0;
+ // No .cfi directives means no frame.
+ if (Instrs.empty())
+ return 0;
+ // Start off assuming CFA is at SP+0.
+ int CFARegister = ARM::SP;
+ int CFARegisterOffset = 0;
+ // Mark savable registers as initially unsaved
+ DenseMap<unsigned, int> RegOffsets;
+ int FloatRegCount = 0;
+ // Process each .cfi directive and build up compact unwind info.
+ for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+ int Reg;
+ const MCCFIInstruction &Inst = Instrs[i];
+ switch (Inst.getOperation()) {
+ case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
+ CFARegisterOffset = -Inst.getOffset();
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
+ CFARegisterOffset = -Inst.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpOffset: // DW_CFA_offset
+ Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RegOffsets[Reg] = Inst.getOffset();
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ RegOffsets[Reg] = Inst.getOffset();
+ ++FloatRegCount;
+ } else {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << ".cfi_offset on unknown register="
+ << Inst.getRegister() << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ break;
+ case MCCFIInstruction::OpRelOffset: // DW_CFA_advance_loc
+ // Ignore
+ break;
+ default:
+ // Directive not convertable to compact unwind, bail out.
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "CFI directive not compatiable with comact "
+ "unwind encoding, opcode=" << Inst.getOperation()
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ break;
+ }
+ }
+
+ // If no frame set up, return no unwind info.
+ if ((CFARegister == ARM::SP) && (CFARegisterOffset == 0))
+ return 0;
+
+ // Verify standard frame (lr/r7) was used.
+ if (CFARegister != ARM::R7) {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is "
+ << CFARegister
+ << " instead of r7\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ int StackAdjust = CFARegisterOffset - 8;
+ if (RegOffsets.lookup(ARM::LR) != (-4 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "LR not saved as standard frame, StackAdjust="
+ << StackAdjust
+ << ", CFARegisterOffset=" << CFARegisterOffset
+ << ", lr save at offset=" << RegOffsets[14] << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ if (RegOffsets.lookup(ARM::R7) != (-8 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "r7 not saved as standard frame\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ uint32_t CompactUnwindEncoding = CU::UNWIND_ARM_MODE_FRAME;
+
+ // If var-args are used, there may be a stack adjust required.
+ switch (StackAdjust) {
+ case 0:
+ break;
+ case 4:
+ CompactUnwindEncoding |= 0x00400000;
+ break;
+ case 8:
+ CompactUnwindEncoding |= 0x00800000;
+ break;
+ case 12:
+ CompactUnwindEncoding |= 0x00C00000;
+ break;
+ default:
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs()
+ << ".cfi_def_cfa stack adjust ("
+ << StackAdjust << ") out of range\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // If r6 is saved, it must be right below r7.
+ static struct {
+ unsigned Reg;
+ unsigned Encoding;
+ } GPRCSRegs[] = {{ARM::R6, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R6},
+ {ARM::R5, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R5},
+ {ARM::R4, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R4},
+ {ARM::R12, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R12},
+ {ARM::R11, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R11},
+ {ARM::R10, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R10},
+ {ARM::R9, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R9},
+ {ARM::R8, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R8}};
+
+ int CurOffset = -8 - StackAdjust;
+ for (auto CSReg : GPRCSRegs) {
+ auto Offset = RegOffsets.find(CSReg.Reg);
+ if (Offset == RegOffsets.end())
+ continue;
+
+ int RegOffset = Offset->second;
+ if (RegOffset != CurOffset - 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << MRI.getName(CSReg.Reg) << " saved at "
+ << RegOffset << " but only supported at "
+ << CurOffset << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CompactUnwindEncoding |= CSReg.Encoding;
+ CurOffset -= 4;
+ }
+
+ // If no floats saved, we are done.
+ if (FloatRegCount == 0)
+ return CompactUnwindEncoding;
+
+ // Switch mode to include D register saving.
+ CompactUnwindEncoding &= ~CU::UNWIND_ARM_MODE_MASK;
+ CompactUnwindEncoding |= CU::UNWIND_ARM_MODE_FRAME_D;
+
+ // FIXME: supporting more than 4 saved D-registers compactly would be trivial,
+ // but needs coordination with the linker and libunwind.
+ if (FloatRegCount > 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "unsupported number of D registers saved ("
+ << FloatRegCount << ")\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // Floating point registers must either be saved sequentially, or we defer to
+ // DWARF. No gaps allowed here so check that each saved d-register is
+ // precisely where it should be.
+ static unsigned FPRCSRegs[] = { ARM::D8, ARM::D10, ARM::D12, ARM::D14 };
+ for (int Idx = FloatRegCount - 1; Idx >= 0; --Idx) {
+ auto Offset = RegOffsets.find(FPRCSRegs[Idx]);
+ if (Offset == RegOffsets.end()) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " not saved\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ } else if (Offset->second != CurOffset - 8) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " saved at " << Offset->second
+ << ", expected at " << CurOffset - 8
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CurOffset -= 8;
+ }
+
+ return CompactUnwindEncoding | ((FloatRegCount - 1) << 8);
+}
+
+static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
+ unsigned AK = ARM::parseArch(Arch);
+ switch (AK) {
+ default:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV4T:
+ return MachO::CPU_SUBTYPE_ARM_V4T;
+ case ARM::AK_ARMV5T:
+ case ARM::AK_ARMV5TE:
+ case ARM::AK_ARMV5TEJ:
+ return MachO::CPU_SUBTYPE_ARM_V5;
+ case ARM::AK_ARMV6:
+ case ARM::AK_ARMV6K:
+ return MachO::CPU_SUBTYPE_ARM_V6;
+ case ARM::AK_ARMV7A:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV7S:
+ return MachO::CPU_SUBTYPE_ARM_V7S;
+ case ARM::AK_ARMV7K:
+ return MachO::CPU_SUBTYPE_ARM_V7K;
+ case ARM::AK_ARMV6M:
+ return MachO::CPU_SUBTYPE_ARM_V6M;
+ case ARM::AK_ARMV7M:
+ return MachO::CPU_SUBTYPE_ARM_V7M;
+ case ARM::AK_ARMV7EM:
+ return MachO::CPU_SUBTYPE_ARM_V7EM;
+ }
+}
+
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple, StringRef CPU,
@@ -751,19 +1051,8 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
default:
llvm_unreachable("unsupported object format");
case Triple::MachO: {
- MachO::CPUSubTypeARM CS =
- StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
- .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
- .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
- .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
- .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
- .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
- .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
- .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
- .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
- return new ARMAsmBackendDarwin(T, TheTriple, CS);
+ MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
+ return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
}
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 6b4abd5..28a6213 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -45,6 +45,10 @@ public:
const MCValue &Target, uint64_t &Value,
bool &IsResolved) override;
+ unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+ MCContext *Ctx, bool IsLittleEndian,
+ bool IsResolved) const;
+
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
@@ -52,6 +56,9 @@ public:
bool mayNeedRelaxation(const MCInst &Inst) const override;
+ const char *reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const;
+
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index a6206e3..995dd0f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -16,11 +16,12 @@ using namespace llvm;
namespace {
class ARMAsmBackendDarwin : public ARMAsmBackend {
+ const MCRegisterInfo &MRI;
public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const Triple &TT,
- MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+ const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
+ : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
HasDataInCodeSupport = true;
}
@@ -28,6 +29,9 @@ public:
return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
Subtype);
}
+
+ uint32_t generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const override;
};
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 804d353..52eba8be 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -95,7 +95,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_ARM_TLS_IE32;
break;
- case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
}
@@ -192,7 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTOFF:
Type = ELF::R_ARM_GOTOFF32;
break;
- case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
case MCSymbolRefExpr::VK_ARM_TARGET1:
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index d17fdb9..6084f22 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitAttribute(unsigned Attribute, unsigned Value) override;
void emitTextAttribute(unsigned Attribute, StringRef String) override;
void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
- StringRef StrinValue) override;
+ StringRef StringValue) override;
void emitArch(unsigned Arch) override;
void emitArchExtension(unsigned ArchExt) override;
void emitObjectArch(unsigned Arch) override;
@@ -195,16 +195,16 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
OS << "\n";
}
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
- OS << "\t.arch\t" << ARMTargetParser::getArchName(Arch) << "\n";
+ OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
- OS << "\t.arch_extension\t" << ARMTargetParser::getArchExtName(ArchExt) << "\n";
+ OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
- OS << "\t.object_arch\t" << ARMTargetParser::getArchName(Arch) << '\n';
+ OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
- OS << "\t.fpu\t" << ARMTargetParser::getFPUName(FPU) << "\n";
+ OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
void ARMTargetAsmStreamer::finishAttributeSection() {
}
@@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
class ARMTargetELFStreamer : public ARMTargetStreamer {
private:
// This structure holds all attributes, accounting for
- // their string/numeric value, so we can later emmit them
+ // their string/numeric value, so we can later emit them
// in declaration order, keeping all in the same vector
struct AttributeItem {
enum {
@@ -254,7 +254,7 @@ private:
} Type;
unsigned Tag;
unsigned IntValue;
- StringRef StringValue;
+ std::string StringValue;
static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
// The conformance tag must be emitted first when serialised
@@ -507,14 +507,15 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
- if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
- getContext().reportFatalError(Loc, "relocated expression must be 32-bit");
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
+ getContext().reportError(Loc, "relocated expression must be 32-bit");
+ return;
+ }
EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size);
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
void EmitAssemblerFlag(MCAssemblerFlag Flag) override {
@@ -684,16 +685,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
setAttributeItem(CPU_name,
- ARMTargetParser::getCPUAttr(Arch),
+ ARM::getCPUAttr(Arch),
false);
if (EmittedArch == ARM::AK_INVALID)
setAttributeItem(CPU_arch,
- ARMTargetParser::getArchAttr(Arch),
+ ARM::getArchAttr(Arch),
false);
else
setAttributeItem(CPU_arch,
- ARMTargetParser::getArchAttr(EmittedArch),
+ ARM::getArchAttr(EmittedArch),
false);
switch (Arch) {
@@ -702,7 +703,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV3:
case ARM::AK_ARMV3M:
case ARM::AK_ARMV4:
- case ARM::AK_ARMV5:
setAttributeItem(ARM_ISA_use, Allowed, false);
break;
@@ -710,7 +710,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV5T:
case ARM::AK_ARMV5TE:
case ARM::AK_ARMV6:
- case ARM::AK_ARMV6J:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
break;
@@ -721,8 +720,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
case ARM::AK_ARMV6K:
- case ARM::AK_ARMV6Z:
- case ARM::AK_ARMV6ZK:
+ case ARM::AK_ARMV6KZ:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
setAttributeItem(Virtualization_use, AllowTZ, false);
@@ -732,10 +730,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(THUMB_ISA_use, Allowed, false);
break;
- case ARM::AK_ARMV7:
- setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
- break;
-
case ARM::AK_ARMV7A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -755,6 +749,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV8A:
case ARM::AK_ARMV8_1A:
+ case ARM::AK_ARMV8_2A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1084,19 +1079,14 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
}
inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
- SwitchToEHSection(".ARM.extab",
- ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
- SectionKind::getDataRel(),
- FnStart);
+ SwitchToEHSection(".ARM.extab", ELF::SHT_PROGBITS, ELF::SHF_ALLOC,
+ SectionKind::getData(), FnStart);
}
inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
- SwitchToEHSection(".ARM.exidx",
- ELF::SHT_ARM_EXIDX,
+ SwitchToEHSection(".ARM.exidx", ELF::SHT_ARM_EXIDX,
ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
- SectionKind::getDataRel(),
- FnStart);
+ SectionKind::getData(), FnStart);
}
void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
MCDataFragment *Frag = getOrCreateDataFragment();
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1ac0815..bda37f6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -33,7 +33,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::SjLj;
+ ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS()
+ ? ExceptionHandling::SjLj
+ : ExceptionHandling::DwarfCFI;
UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 99a5fff..5e54816 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -19,34 +19,37 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
-
- class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
- };
-
- class ARMELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit ARMELFMCAsmInfo(const Triple &TT);
-
- void setUseIntegratedAssembler(bool Value) override;
- };
-
- class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor() override;
- public:
- explicit ARMCOFFMCAsmInfoMicrosoft();
- };
-
- class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
- void anchor() override;
- public:
- explicit ARMCOFFMCAsmInfoGNU();
- };
+class Triple;
+
+class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
+};
+
+class ARMELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit ARMELFMCAsmInfo(const Triple &TT);
+
+ void setUseIntegratedAssembler(bool Value) override;
+};
+
+class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoMicrosoft();
+};
+
+class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoGNU();
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 9146d4d..75dde80 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -63,8 +63,8 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS ARMMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 21c9fc1..8c8c249 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -134,101 +135,11 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
bool isThumb =
TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
- bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
- switch (TT.getSubArch()) {
- default:
- llvm_unreachable("invalid sub-architecture for ARM");
- case Triple::ARMSubArch_v8:
- if (NoCPU)
- // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC
- ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8";
- break;
- case Triple::ARMSubArch_v8_1a:
- if (NoCPU)
- // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
- ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8.1a";
- break;
- case Triple::ARMSubArch_v7m:
- isThumb = true;
- if (NoCPU)
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7em:
- if (NoCPU)
- // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7s:
- if (NoCPU)
- // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
- // Swift
- ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7:
- // v7 CPUs have lots of different feature sets. If no CPU is specified,
- // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
- // the "minimum" feature set and use CPU string to figure out the exact
- // features.
- if (NoCPU)
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
- ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v6t2:
- ARMArchFeature = "+v6t2";
- break;
- case Triple::ARMSubArch_v6k:
- ARMArchFeature = "+v6k";
- break;
- case Triple::ARMSubArch_v6m:
- isThumb = true;
- if (NoCPU)
- // v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6m,+noarm,+mclass";
- else
- ARMArchFeature = "+v6";
- break;
- case Triple::ARMSubArch_v6:
- ARMArchFeature = "+v6";
- break;
- case Triple::ARMSubArch_v5te:
- ARMArchFeature = "+v5te";
- break;
- case Triple::ARMSubArch_v5:
- ARMArchFeature = "+v5t";
- break;
- case Triple::ARMSubArch_v4t:
- ARMArchFeature = "+v4t";
- break;
- case Triple::NoSubArch:
- break;
- }
+
+ unsigned ArchID = ARM::parseArch(TT.getArchName());
+ if (ArchID != ARM::AK_INVALID && (CPU.empty() || CPU == "generic"))
+ ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str();
if (isThumb) {
if (ARMArchFeature.empty())
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index fd30623..c2bbc8e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -86,7 +86,8 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T,
// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll);
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
/// Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95d7ea7..cfd504e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -150,10 +150,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint32_t Value2 = 0;
@@ -163,10 +165,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
if (const MCSymbolRefExpr *B = Target.getSymB()) {
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
// Select the appropriate difference relocation type.
Type = MachO::ARM_RELOC_HALF_SECTDIFF;
@@ -251,10 +255,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -265,10 +271,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
// Select the appropriate difference relocation type.
Type = MachO::ARM_RELOC_SECTDIFF;
@@ -346,13 +354,15 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
unsigned RelocType = MachO::ARM_RELOC_VANILLA;
- if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
// If we failed to get fixup kind info, it's because there's no legal
// relocation type for the fixup kind. This happens when it's a fixup that's
// expected to always be resolvable at assembly time and not have any
// relocations needed.
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation on symbol");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
+ return;
+ }
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index b680db5..dad50f2 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -27,8 +27,8 @@ ARMTargetStreamer::~ARMTargetStreamer() {}
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
-const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
- return ConstantPools->addEntry(Streamer, Expr, 4);
+const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc) {
+ return ConstantPools->addEntry(Streamer, Expr, 4, Loc);
}
void ARMTargetStreamer::emitCurrentConstantPool() {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b993b1b..83fa084 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,11 +37,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
}
-MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
- MCAsmBackend &MAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll) {
- return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+MCStreamer *llvm::createARMWinCOFFStreamer(
+ MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) {
+ auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 3b4358b..93e0ac4 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -13,6 +13,7 @@
#include "Thumb1FrameLowering.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -84,7 +85,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -100,7 +100,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
@@ -168,8 +172,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
++MBBI;
- if (MBBI != MBB.end())
- dl = MBBI->getDebugLoc();
}
// Determine starting offsets of spill areas.
@@ -232,11 +234,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
}
-
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI)
- + GPRCS1Size + ArgRegsSaveSize;
+ FramePtrOffsetInBlock +=
+ MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
@@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert((MBBI->getOpcode() == ARM::tBX_RET ||
- MBBI->getOpcode() == ARM::tPOP_RET) &&
- "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ThumbRegisterInfo *RegInfo =
@@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
} else {
- if (MBBI->getOpcode() == ARM::tBX_RET &&
- &MBB.front() != MBBI &&
- std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@@ -388,66 +385,189 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- bool IsV4PopReturn = false;
- for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+ if (needPopSpecialFixUp(MF)) {
+ bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
+ (void)Done;
+ assert(Done && "Emission of the special fixup failed!?");
+ }
+}
+
+bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ if (!needPopSpecialFixUp(*MBB.getParent()))
+ return true;
+
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+ return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
+}
+
+bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
+ ARMFunctionInfo *AFI =
+ const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
+ if (AFI->getArgRegsSaveSize())
+ return true;
+
+ // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
if (CSI.getReg() == ARM::LR)
- IsV4PopReturn = true;
- IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
-
- // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
- // to LR, and we can't pop the value directly to the PC since
- // we need to update the SP after popping the value. So instead
- // we have to emit:
- // POP {r3}
- // ADD sp, #offset
- // BX r3
- // If this would clobber a return value, then generate this sequence instead:
- // MOV ip, r3
- // POP {r3}
- // ADD sp, #offset
- // MOV lr, r3
- // MOV r3, ip
- // BX lr
- if (ArgRegsSaveSize || IsV4PopReturn) {
- // Get the last instruction, tBX_RET
- MBBI = MBB.getLastNonDebugInstr();
- assert (MBBI->getOpcode() == ARM::tBX_RET);
- DebugLoc dl = MBBI->getDebugLoc();
-
- if (AFI->getReturnRegsCount() <= 3) {
- // Epilogue: pop saved LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
- .addReg(ARM::R3, RegState::Kill);
- AddDefaultPred(MIB);
- MIB.copyImplicitOps(&*MBBI);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ return true;
+
+ return false;
+}
+
+bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
+ bool DoIt) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
+ // If MBBI is a return instruction, or is a tPOP followed by a return
+ // instruction in the successor BB, we may be able to directly restore
+ // LR in the PC.
+ // This is only possible with v5T ops (v4T can't change the Thumb bit via
+ // a POP PC instruction), and only if we do not need to emit any SP update.
+ // Otherwise, we need a temporary register to pop the value
+ // and copy that value into LR.
+ auto MBBI = MBB.getFirstTerminator();
+ bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
+ if (CanRestoreDirectly) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
+ CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET);
+ else {
+ auto MBBI_prev = MBBI;
+ MBBI_prev--;
+ assert(MBBI_prev->getOpcode() == ARM::tPOP);
+ assert(MBB.succ_size() == 1);
+ if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
+ MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
+ else
+ CanRestoreDirectly = false;
+ }
+ }
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+ if (CanRestoreDirectly) {
+ if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
+ return true;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ // Copy implicit ops and popped registers, if any.
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
+ MIB.addOperand(MO);
+ MIB.addReg(ARM::PC, RegState::Define);
+ // Erase the old instruction (tBX_RET or tPOP).
+ MBB.erase(MBBI);
+ return true;
+ }
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ // Look for a temporary register to use.
+ // First, compute the liveness information.
+ LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+ // The semantic of pristines changed recently and now,
+ // the callee-saved registers that are touched in the function
+ // are not part of the pristines set anymore.
+ // Add those callee-saved now.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
+
+ DebugLoc dl = DebugLoc();
+ if (MBBI != MBB.end()) {
+ dl = MBBI->getDebugLoc();
+ auto InstUpToMBBI = MBB.end();
+ while (InstUpToMBBI != MBBI)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before MBBI.
+ UsedRegs.stepBackward(*--InstUpToMBBI);
+ }
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R3, RegState::Define)
- .addReg(ARM::R12, RegState::Kill));
- // Keep the tBX_RET instruction
+ // Look for a register that can be directly use in the POP.
+ unsigned PopReg = 0;
+ // And some temporary register, just in case.
+ unsigned TemporaryReg = 0;
+ BitVector PopFriendly =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+ // Rebuild the GPRs from the high registers because they are removed
+ // form the GPR reg class for thumb1.
+ BitVector GPRsNoLRSP =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ GPRsNoLRSP |= PopFriendly;
+ GPRsNoLRSP.reset(ARM::LR);
+ GPRsNoLRSP.reset(ARM::SP);
+ GPRsNoLRSP.reset(ARM::PC);
+ for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+ Register = GPRsNoLRSP.find_next(Register)) {
+ if (!UsedRegs.contains(Register)) {
+ // Remember the first pop-friendly register and exit.
+ if (PopFriendly.test(Register)) {
+ PopReg = Register;
+ TemporaryReg = 0;
+ break;
+ }
+ // Otherwise, remember that the register will be available to
+ // save a pop-friendly register.
+ TemporaryReg = Register;
}
}
+
+ if (!DoIt && !PopReg && !TemporaryReg)
+ return false;
+
+ assert((PopReg || TemporaryReg) && "Cannot get LR");
+
+ if (TemporaryReg) {
+ assert(!PopReg && "Unnecessary MOV is about to be inserted");
+ PopReg = PopFriendly.find_first();
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
+ // We couldn't use the direct restoration above, so
+ // perform the opposite conversion: tPOP_RET to tPOP.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ bool Popped = false;
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
+ MO.getReg() != ARM::PC) {
+ MIB.addOperand(MO);
+ if (!MO.isImplicit())
+ Popped = true;
+ }
+ // Is there anything left to pop?
+ if (!Popped)
+ MBB.erase(MIB.getInstr());
+ // Erase the old instruction.
+ MBB.erase(MBBI);
+ MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ }
+
+ assert(PopReg && "Do not know how to get LR");
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(PopReg, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+
+ if (TemporaryReg)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill));
+
+ return true;
}
bool Thumb1FrameLowering::
@@ -461,8 +581,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
const TargetInstrInfo &TII = *STI.getInstrInfo();
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -501,31 +619,38 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *STI.getInstrInfo();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
- bool NumRegs = false;
+ bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (Reg == ARM::LR) {
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (STI.hasV4TOps() && !STI.hasV5TOps())
+ if (MBB.succ_empty()) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!STI.hasV5TOps())
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(&*MI);
+ MI = MBB.erase(MI);
+ } else
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
continue;
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB.copyImplicitOps(&*MI);
- MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
- NumRegs = true;
+ NeedsPop = true;
}
// It's illegal to emit pop instruction without operands.
- if (NumRegs)
+ if (NeedsPop)
MBB.insert(MI, &*MIB);
else
MF.DeleteMachineInstr(MIB);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 31d5732..812f983 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,42 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+
+ /// Check whether or not the given \p MBB can be used as a epilogue
+ /// for the target.
+ /// The epilogue will be inserted before the first terminator of that block.
+ /// This method is used by the shrink-wrapping pass to decide if
+ /// \p MBB will be correctly handled by the target.
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
+private:
+ /// Check if the frame lowering of \p MF needs a special fixup
+ /// code sequence for the epilogue.
+ /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ /// to LR, and we can't pop the value directly to the PC when
+ /// we need to update the SP after popping the value. So instead
+ /// we have to emit:
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// BX r3
+ /// If this would clobber a return value, then generate this sequence instead:
+ /// MOV ip, r3
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// MOV lr, r3
+ /// MOV r3, ip
+ /// BX lr
+ bool needPopSpecialFixUp(const MachineFunction &MF) const;
+
+ /// Emit the special fixup code sequence for the epilogue.
+ /// \see needPopSpecialFixUp for more details.
+ /// \p DoIt, tells this method whether or not to actually insert
+ /// the code sequence in \p MBB. I.e., when \p DoIt is false,
+ /// \p MBB is left untouched.
+ /// \returns For \p DoIt == true: True when the emission succeeded
+ /// false otherwise. For \p DoIt == false: True when the emission
+ /// would have been possible, false otherwise.
+ bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 216e776..530e1d3 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -84,11 +84,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
@@ -112,11 +110,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 68736bc..bf0498d 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -256,8 +256,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
// Finalize the bundle.
- MachineBasicBlock::instr_iterator LI = LastITMI;
- finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI));
+ finalizeBundle(MBB, InsertPos.getInstrIterator(),
+ ++LastITMI->getIterator());
Modified = true;
++NumITs;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index dc74f4e..4da769f 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -131,11 +131,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
@@ -171,11 +169,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d9ab824..bcd0e57 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -125,7 +125,10 @@ namespace {
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
- // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
+ // tSTMIA_UPD is a change in semantics which can only be used if the base
+ // register is killed. This difference is correctly handled elsewhere.
+ { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
};
@@ -210,12 +213,12 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
- assert(false && "Duplicated entries?");
+ llvm_unreachable("Duplicated entries?");
}
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
+ for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
@@ -435,6 +438,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
+ case ARM::t2STMIA: {
+ // If the base register is killed, we don't care what its value is after the
+ // instruction, so we can use an updating STMIA.
+ if (!MI->getOperand(0).isKill())
+ return false;
+
+ break;
+ }
case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -492,6 +503,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
+
+ // tSTMIA_UPD takes a defining register operand. We've already checked that
+ // the register is killed, so mark it as dead here.
+ if (Entry.WideOpc == ARM::t2STMIA)
+ MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
+
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -633,10 +650,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
- if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
- STI->avoidMOVsShifterOperand())
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
// Don't issue movs with shifter operand for some CPUs unless we
- // are optimizing / minimizing for size.
+ // are optimizing for size.
return false;
unsigned Reg0 = MI->getOperand(0).getReg();
@@ -660,11 +676,13 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
}
} else if (Reg0 != Reg1) {
// Try to commute the operands to make it a 2-address instruction.
- unsigned CommOpIdx1, CommOpIdx2;
+ unsigned CommOpIdx1 = 1;
+ unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
- CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ MI->getOperand(CommOpIdx2).getReg() != Reg0)
return false;
- MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ MachineInstr *CommutedMI =
+ TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2);
if (!CommutedMI)
return false;
}
@@ -750,10 +768,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
- if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
- STI->avoidMOVsShifterOperand())
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
// Don't issue movs with shifter operand for some CPUs unless we
- // are optimizing / minimizing for size.
+ // are optimizing for size.
return false;
unsigned Limit = ~0U;
@@ -1012,9 +1029,9 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
- // Optimizing / minimizing size?
- OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
- MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ // Optimizing / minimizing size? Minimizing size implies optimizing for size.
+ OptimizeSize = MF.getFunction()->optForSize();
+ MinimizeSize = MF.getFunction()->optForMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/contrib/llvm/lib/Target/AVR/AVR.td b/contrib/llvm/lib/Target/AVR/AVR.td
new file mode 100644
index 0000000..9e80717
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVR.td
@@ -0,0 +1,563 @@
+//===-- AVR.td - Describe the AVR Target Machine ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+// This is the top level entry point for the AVR target.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===---------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===---------------------------------------------------------------------===//
+// AVR Subtarget Features.
+//===---------------------------------------------------------------------===//
+
+// :TODO: Implement the skip errata, see `gcc/config/avr/avr-arch.h` for details
+// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD.
+// In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
+// avr2 (with SRAM) adds the rest of the variants.
+// :TODO: s/AVRTiny/Tiny
+
+
+// A feature set aggregates features, grouping them. We don't want to create a
+// new member in AVRSubtarget (to store a value) for each set because we do not
+// care if the set is supported, only the subfeatures inside the set. We fix
+// this by simply setting the same dummy member for all feature sets, which is
+// then ignored.
+class FeatureSet<string name, string desc, list<SubtargetFeature> i>
+ : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+
+// A family of microcontrollers, defining a set of supported features.
+class Family<string name, list<SubtargetFeature> i>
+ : FeatureSet<name, !strconcat("The device is a part of the ",
+ name, " family"), i>;
+
+// The device has SRAM, and supports the bare minimum of
+// SRAM-relevant instructions.
+//
+// These are:
+// LD - all 9 variants
+// ST - all 9 variants
+// LDD - two variants for Y and Z
+// STD - two variants for Y and Z
+// `LDS Rd, K`
+// `STS k, Rr`
+// `PUSH`/`POP`
+def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
+ "The device has random access memory">;
+
+// The device supports the `JMP k` and `CALL k` instructions.
+def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+ "The device supports the `JMP` and "
+ "`CALL` instructions">;
+
+
+// The device supports the indirect branches `IJMP` and `ICALL`.
+def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
+ "true",
+ "The device supports `IJMP`/`ICALL`"
+ "instructions">;
+
+// The device supports the extended indirect branches `EIJMP` and `EICALL`.
+def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
+ "true", "The device supports the "
+ "`EIJMP`/`EICALL` instructions">;
+
+// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
+def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
+ "true", "Enable 16-bit register-immediate "
+ "addition and subtraction instructions">;
+
+// The device has an 8-bit stack pointer (SP) register.
+def FeatureSmallStack : SubtargetFeature<"smallstack", "m_hasSmallStack",
+ "true", "The device has an 8-bit "
+ "stack pointer">;
+
+// The device supports the 16-bit GPR pair MOVW instruction.
+def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
+ "The device supports the 16-bit MOVW "
+ "instruction">;
+
+// The device supports the `LPM` instruction, with implied destination being r0.
+def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
+ "The device supports the `LPM` instruction">;
+
+// The device supports the `LPM Rd, Z[+] instruction.
+def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+ "The device supports the `LPM Rd, Z[+]` "
+ "instruction">;
+
+// The device supports the `ELPM` instruction.
+def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
+ "The device supports the ELPM instruction">;
+
+// The device supports the `ELPM Rd, Z[+]` instructions.
+def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+ "The device supports the `ELPM Rd, Z[+]` "
+ "instructions">;
+
+// The device supports the `SPM` instruction.
+def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
+ "The device supports the `SPM` instruction">;
+
+// The device supports the `SPM Z+` instruction.
+def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+ "The device supports the `SPM Z+` "
+ "instruction">;
+
+// The device supports the `DES k` instruction.
+def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
+ "The device supports the `DES k` encryption "
+ "instruction">;
+
+// The device supports the Read-Write-Modify instructions
+// XCH, LAS, LAC, and LAT.
+def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+ "The device supports the read-write-modify "
+ "instructions: XCH, LAS, LAC, LAT">;
+
+// The device supports the `[F]MUL[S][U]` family of instructions.
+def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
+ "true", "The device supports the "
+ "multiplication instructions">;
+
+// The device supports the `BREAK` instruction.
+def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
+ "The device supports the `BREAK` debugging "
+ "instruction">;
+
+// The device has instruction encodings specific to the Tiny core.
+def FeatureTinyEncoding : SubtargetFeature<"tinyencoding",
+ "m_hasTinyEncoding", "true",
+ "The device has Tiny core specific "
+ "instruction encodings">;
+
+class ELFArch<string name> : SubtargetFeature<"", "ELFArch",
+ !strconcat("ELF::",name), "">;
+
+// ELF e_flags architecture values
+def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">;
+def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">;
+def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">;
+def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">;
+def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">;
+def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">;
+def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
+def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
+def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
+def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
+def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
+def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
+def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
+def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">;
+def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">;
+def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">;
+def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
+
+//===---------------------------------------------------------------------===//
+// AVR Families
+//===---------------------------------------------------------------------===//
+
+// The device has at least the bare minimum that **every** single AVR
+// device should have.
+def FamilyAVR0 : Family<"avr0", []>;
+
+def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM]>;
+
+def FamilyAVR2 : Family<"avr2",
+ [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
+ FeatureSRAM]>;
+
+def FamilyAVR25 : Family<"avr25",
+ [FamilyAVR2, FeatureMOVW, FeatureLPMX,
+ FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR3 : Family<"avr3",
+ [FamilyAVR2, FeatureJMPCALL]>;
+
+def FamilyAVR31 : Family<"avr31",
+ [FamilyAVR3, FeatureELPM]>;
+
+def FamilyAVR35 : Family<"avr35",
+ [FamilyAVR3, FeatureMOVW, FeatureLPMX,
+ FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR4 : Family<"avr4",
+ [FamilyAVR2, FeatureMultiplication,
+ FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK]>;
+
+def FamilyAVR5 : Family<"avr5",
+ [FamilyAVR3, FeatureMultiplication,
+ FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK]>;
+
+def FamilyAVR51 : Family<"avr51",
+ [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
+
+def FamilyAVR6 : Family<"avr6",
+ [FamilyAVR51]>;
+
+def FamilyAVRTiny : Family<"avrtiny",
+ [FamilyAVR0, FeatureBREAK, FeatureSRAM,
+ FeatureTinyEncoding]>;
+
+def FamilyXMEGA : Family<"xmega",
+ [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX,
+ FeatureDES]>;
+
+def FamilyXMEGAU : Family<"xmegau",
+ [FamilyXMEGA, FeatureRMW]>;
+
+def FeatureSetSpecial : FeatureSet<"special",
+ "Enable use of the entire instruction "
+ "set - used for debugging",
+ [FeatureSRAM, FeatureJMPCALL,
+ FeatureIJMPCALL, FeatureEIJMPCALL,
+ FeatureADDSUBIW, FeatureMOVW,
+ FeatureLPM, FeatureLPMX, FeatureELPM,
+ FeatureELPMX, FeatureSPM, FeatureSPMX,
+ FeatureDES, FeatureRMW,
+ FeatureMultiplication, FeatureBREAK]>;
+
+//===---------------------------------------------------------------------===//
+// AVR microcontrollers supported.
+//===---------------------------------------------------------------------===//
+
+class Device<string Name, Family Fam, ELFArch Arch,
+ list<SubtargetFeature> ExtraFeatures = []>
+ : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
+
+// Generic MCUs
+// Note that several versions of GCC has strange ELF architecture
+// settings for backwards compatibility - see `gas/config/tc-avr.c`
+// in AVR binutils. We do not replicate this.
+def : Device<"avr1", FamilyAVR1, ELFArchAVR1>;
+def : Device<"avr2", FamilyAVR2, ELFArchAVR2>;
+def : Device<"avr25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"avr3", FamilyAVR3, ELFArchAVR3>;
+def : Device<"avr31", FamilyAVR31, ELFArchAVR31>;
+def : Device<"avr35", FamilyAVR35, ELFArchAVR35>;
+def : Device<"avr4", FamilyAVR4, ELFArchAVR4>;
+def : Device<"avr5", FamilyAVR5, ELFArchAVR5>;
+def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
+def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
+def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
+def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>;
+
+// Specific MCUs
+def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
+def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
+def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25,
+ [FeatureMOVW, FeatureLPMX]>;
+def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
+def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
+def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
+def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
+def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
+def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>;
+def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8", FamilyAVR4, ELFArchAVR4>; // FIXME: family may be wrong
+def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega163", FamilyAVR3, ELFArchAVR5,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at94k", FamilyAVR3, ELFArchAVR5,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
+def : Device<"m3000", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega8e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>;
+def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>;
+
+//===---------------------------------------------------------------------===//
+// Register File Description
+//===---------------------------------------------------------------------===//
+
+include "AVRRegisterInfo.td"
+
+//===---------------------------------------------------------------------===//
+// Instruction Descriptions
+//===---------------------------------------------------------------------===//
+
+//include "AVRInstrInfo.td"
+
+//def AVRInstrInfo : InstrInfo;
+
+//===---------------------------------------------------------------------===//
+// Calling Conventions
+//===---------------------------------------------------------------------===//
+
+include "AVRCallingConv.td"
+
+//===---------------------------------------------------------------------===//
+// Assembly Printers
+//===---------------------------------------------------------------------===//
+
+// def AVRAsmWriter : AsmWriter {
+// string AsmWriterClassName = "InstPrinter";
+// bit isMCAsmWriter = 1;
+// }
+
+//===---------------------------------------------------------------------===//
+// Assembly Parsers
+//===---------------------------------------------------------------------===//
+
+// def AVRAsmParser : AsmParser {
+// let ShouldEmitMatchRegisterName = 1;
+// let ShouldEmitMatchRegisterAltName = 1;
+// }
+
+// def AVRAsmParserVariant : AsmParserVariant {
+// int Variant = 0;
+//
+// // Recognize hard coded registers.
+// string RegisterPrefix = "$";
+// }
+
+//===---------------------------------------------------------------------===//
+// Target Declaration
+//===---------------------------------------------------------------------===//
+
+def AVR : Target {
+// let InstructionSet = AVRInstrInfo;
+// let AssemblyWriters = [AVRAsmWriter];
+//
+// let AssemblyParsers = [AVRAsmParser];
+// let AssemblyParserVariants = [AVRAsmParserVariant];
+}
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRCallingConv.td b/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
new file mode 100644
index 0000000..d8cb3fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -0,0 +1,65 @@
+//===-- AVRCallingConv.td - Calling Conventions for AVR ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AVR architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AVR Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_AVR : CallingConv
+<[
+ // i8 is returned in R24.
+ CCIfType<[i8], CCAssignToReg<[R24]>>,
+
+ // i16 are returned in R25:R24, R23:R22, R21:R20 and R19:R18.
+ CCIfType<[i16], CCAssignToReg<[R25R24, R23R22, R21R20, R19R18]>>
+]>;
+
+// Special return value calling convention for runtime functions.
+def RetCC_AVR_RT : CallingConv
+<[
+ CCIfType<[i8], CCAssignToReg<[R24,R25]>>,
+ CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AVR Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// The calling conventions are implemented in custom C++ code
+
+// Calling convention for variadic functions.
+def ArgCC_AVR_Vararg : CallingConv
+<[
+ // i16 are always passed through the stack with an alignment of 1.
+ CCAssignToStack<2, 1>
+]>;
+
+// Special argument calling convention for
+// multiplication runtime functions.
+def ArgCC_AVR_RT_MUL : CallingConv
+<[
+ CCIfType<[i16], CCAssignToReg<[R27R26,R19R18]>>
+]>;
+
+// Special argument calling convention for
+// division runtime functions.
+def ArgCC_AVR_RT_DIV : CallingConv
+<[
+ CCIfType<[i8], CCAssignToReg<[R24,R22]>>,
+ CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
+def CSR_Interrupts : CalleeSavedRegs<(add (sequence "R%u", 31, 0))>;
diff --git a/contrib/llvm/lib/Target/AVR/AVRConfig.h b/contrib/llvm/lib/Target/AVR/AVRConfig.h
new file mode 100644
index 0000000..65588bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRConfig.h
@@ -0,0 +1,15 @@
+//===-- AVRConfig.h - AVR Backend Configuration Header ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_CONFIG_H
+#define LLVM_AVR_CONFIG_H
+
+#define LLVM_AVR_GCC_COMPAT
+
+#endif // LLVM_AVR_CONFIG_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
new file mode 100644
index 0000000..6571d5d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -0,0 +1,73 @@
+//===-- AVRMachineFuctionInfo.h - AVR machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AVR-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_MACHINE_FUNCTION_INFO_H
+#define LLVM_AVR_MACHINE_FUNCTION_INFO_H
+
+#include "AVRConfig.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/**
+ * Contains AVR-specific information for each MachineFunction.
+ */
+class AVRMachineFunctionInfo : public MachineFunctionInfo {
+ /// Indicates if a register has been spilled by the register
+ /// allocator.
+ bool HasSpills;
+
+ /// Indicates if there are any fixed size allocas present.
+ /// Note that if there are only variable sized allocas this is set to false.
+ bool HasAllocas;
+
+ /// Indicates if arguments passed using the stack are being
+ /// used inside the function.
+ bool HasStackArgs;
+
+ /// Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+public:
+ AVRMachineFunctionInfo()
+ : HasSpills(false), HasAllocas(false), HasStackArgs(false),
+ CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+
+ explicit AVRMachineFunctionInfo(MachineFunction &MF)
+ : HasSpills(false), HasAllocas(false), HasStackArgs(false),
+ CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+
+ bool getHasSpills() const { return HasSpills; }
+ void setHasSpills(bool B) { HasSpills = B; }
+
+ bool getHasAllocas() const { return HasAllocas; }
+ void setHasAllocas(bool B) { HasAllocas = B; }
+
+ bool getHasStackArgs() const { return HasStackArgs; }
+ void setHasStackArgs(bool B) { HasStackArgs = B; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+};
+
+} // end llvm namespace
+
+#endif // LLVM_AVR_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
new file mode 100644
index 0000000..32650fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -0,0 +1,216 @@
+//===-- AVRRegisterInfo.td - AVR Register defs -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the AVR register file
+//===----------------------------------------------------------------------===//
+
+// 8-bit General purpose register definition.
+class AVRReg<bits<16> num,
+ string name,
+ list<Register> subregs = [],
+ list<string> altNames = []>
+ : RegisterWithSubRegs<name, subregs>
+{
+ field bits<16> Num = num;
+
+ let HWEncoding = num;
+ let Namespace = "AVR";
+ let SubRegs = subregs;
+ let AltNames = altNames;
+}
+
+// Subregister indices.
+let Namespace = "AVR" in
+{
+ def sub_lo : SubRegIndex<8>;
+ def sub_hi : SubRegIndex<8, 8>;
+}
+
+let Namespace = "AVR" in {
+ def ptr : RegAltNameIndex;
+}
+
+
+//===----------------------------------------------------------------------===//
+// 8-bit general purpose registers
+//===----------------------------------------------------------------------===//
+
+def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>;
+def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AVRReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AVRReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AVRReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AVRReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AVRReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AVRReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AVRReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AVRReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AVRReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AVRReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AVRReg<20, "r20">, DwarfRegNum<[20]>;
+def R21 : AVRReg<21, "r21">, DwarfRegNum<[21]>;
+def R22 : AVRReg<22, "r22">, DwarfRegNum<[22]>;
+def R23 : AVRReg<23, "r23">, DwarfRegNum<[23]>;
+def R24 : AVRReg<24, "r24">, DwarfRegNum<[24]>;
+def R25 : AVRReg<25, "r25">, DwarfRegNum<[25]>;
+def R26 : AVRReg<26, "r26">, DwarfRegNum<[26]>;
+def R27 : AVRReg<27, "r27">, DwarfRegNum<[27]>;
+def R28 : AVRReg<28, "r28">, DwarfRegNum<[28]>;
+def R29 : AVRReg<29, "r29">, DwarfRegNum<[29]>;
+def R30 : AVRReg<30, "r30">, DwarfRegNum<[30]>;
+def R31 : AVRReg<31, "r31">, DwarfRegNum<[31]>;
+def SPL : AVRReg<32, "SPL">, DwarfRegNum<[32]>;
+def SPH : AVRReg<33, "SPH">, DwarfRegNum<[33]>;
+
+let SubRegIndices = [sub_lo, sub_hi],
+CoveredBySubRegs = 1 in
+{
+ // 16 bit GPR pairs.
+ def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>;
+
+ // The pointer registers (X,Y,Z) are a special case because they
+ // are printed as a `high:low` pair when a DREG is expected,
+ // but printed using `X`, `Y`, `Z` when a pointer register is expected.
+ let RegAltNameIndices = [ptr] in {
+ def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
+ def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
+ def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
+ }
+ def R25R24 : AVRReg<24, "r25:r24", [R24, R25]>, DwarfRegNum<[24]>;
+ def R23R22 : AVRReg<22, "r23:r22", [R22, R23]>, DwarfRegNum<[22]>;
+ def R21R20 : AVRReg<20, "r21:r20", [R20, R21]>, DwarfRegNum<[20]>;
+ def R19R18 : AVRReg<18, "r19:r18", [R18, R19]>, DwarfRegNum<[18]>;
+ def R17R16 : AVRReg<16, "r17:r16", [R16, R17]>, DwarfRegNum<[16]>;
+ def R15R14 : AVRReg<14, "r15:r14", [R14, R15]>, DwarfRegNum<[14]>;
+ def R13R12 : AVRReg<12, "r13:r12", [R12, R13]>, DwarfRegNum<[12]>;
+ def R11R10 : AVRReg<10, "r11:r10", [R10, R11]>, DwarfRegNum<[10]>;
+ def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>;
+ def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>;
+ def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
+ def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
+ def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+//:TODO: use proper set instructions instead of using always "add"
+
+// Main 8-bit register class.
+def GPR8 : RegisterClass<"AVR", [i8], 8,
+ (
+ // Return value and argument registers.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
+ R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
+ )>;
+
+// Simple lower registers r0..r15
+def GPR8lo : RegisterClass<"AVR", [i8], 8,
+ (
+ add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
+ )>;
+
+// 8-bit register class for instructions which take immediates.
+def LD8 : RegisterClass<"AVR", [i8], 8,
+ (
+ // Return value and arguments.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16
+ )>;
+
+// Simple lower registers r16..r23
+def LD8lo : RegisterClass<"AVR", [i8], 8,
+ (
+ add R23, R22, R21, R20, R19, R18, R17, R16
+ )>;
+
+// Main 16-bit pair register class.
+def DREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10,
+ R9R8, R7R6, R5R4, R3R2, R1R0
+ )>;
+
+// 16-bit register class for immediate instructions.
+def DLDREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16
+ )>;
+
+// 16-bit register class for the adiw/sbiw instructions.
+def IWREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28
+ )>;
+
+// 16-bit register class for the ld and st instructions.
+// AKA X,Y, and Z
+def PTRREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ add R27R26, // X
+ R29R28, // Y
+ R31R30 // Z
+ ), ptr>;
+
+// 16-bit register class for the ldd and std instructions.
+// AKA Y and Z.
+def PTRDISPREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ add R31R30, R29R28
+ ), ptr>;
+
+// We have a bunch of instructions with an explicit Z register argument. We
+// model this using a register class containing only the Z register.
+// :TODO: Rename to 'ZREG'.
+def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
+
+// Register class used for the stack read pseudo instruction.
+def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>;
+
+//:TODO: if we remove this we get an error in tablegen
+//:TODO: this is just a hack, remove it once add16 works!
+// Status register.
+def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>;
+def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)>
+{
+ let CopyCost = -1; // Don't allow copying of status registers
+}
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
new file mode 100644
index 0000000..a91dce8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -0,0 +1,4 @@
+
+extern "C" void LLVMInitializeAVRTarget() {
+
+}
diff --git a/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
new file mode 100644
index 0000000..c0e0d20
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AVRTargetInfo.cpp - AVR Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+Target TheAVRTarget;
+}
+
+extern "C" void LLVMInitializeAVRTargetInfo() {
+ llvm::RegisterTarget<llvm::Triple::avr> X(
+ llvm::TheAVRTarget, "avr", "Atmel AVR Microcontroller");
+}
+
+// FIXME: Temporary stub - this function must be defined for linking
+// to succeed. Remove once this function is properly implemented.
+extern "C" void LLVMInitializeAVRTargetMC() {
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPF.td b/contrib/llvm/lib/Target/BPF/BPF.td
index a4ce90a..8493b0f 100644
--- a/contrib/llvm/lib/Target/BPF/BPF.td
+++ b/contrib/llvm/lib/Target/BPF/BPF.td
@@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def BPFAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "BPF";
+ string BreakCharacters = ".";
+}
+
def BPF : Target {
let InstructionSet = BPFInstrInfo;
let AssemblyWriters = [BPFInstPrinter];
+ let AssemblyParserVariants = [BPFAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 7341828..6a5b37e 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -547,8 +547,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
+ MachineFunction::iterator I = ++BB->getIterator();
// ThisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index adcaff6..4276d08 100644
--- a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -17,8 +17,6 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-class MCOperand;
-
class BPFInstPrinter : public MCInstPrinter {
public:
BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 36f9926..8c358ca 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -68,16 +68,23 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
- return;
- }
- assert(Fixup.getKind() == FK_PCRel_2);
- Value = (uint16_t)((Value - 8) / 8);
- if (IsLittleEndian) {
- Data[Fixup.getOffset() + 2] = Value & 0xFF;
- Data[Fixup.getOffset() + 3] = Value >> 8;
+ } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
+ unsigned Size = Fixup.getKind() == FK_Data_4 ? 4 : 8;
+
+ for (unsigned i = 0; i != Size; ++i) {
+ unsigned Idx = IsLittleEndian ? i : Size - i;
+ Data[Fixup.getOffset() + Idx] = uint8_t(Value >> (i * 8));
+ }
} else {
- Data[Fixup.getOffset() + 2] = Value >> 8;
- Data[Fixup.getOffset() + 3] = Value & 0xFF;
+ assert(Fixup.getKind() == FK_PCRel_2);
+ Value = (uint16_t)((Value - 8) / 8);
+ if (IsLittleEndian) {
+ Data[Fixup.getOffset() + 2] = Value & 0xFF;
+ Data[Fixup.getOffset() + 3] = Value >> 8;
+ } else {
+ Data[Fixup.getOffset() + 2] = Value >> 8;
+ Data[Fixup.getOffset() + 3] = Value & 0xFF;
+ }
}
}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 05ba618..87cdd5e 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -44,6 +44,10 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
return ELF::R_X86_64_64;
case FK_SecRel_4:
return ELF::R_X86_64_PC32;
+ case FK_Data_8:
+ return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+ case FK_Data_4:
+ return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
}
}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index d63bbf4..1f440fe 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -34,6 +34,8 @@ public:
UsesELFSectionDirectiveForBSS = true;
HasSingleParameterDotFile = false;
HasDotTypeDotSizeDirective = false;
+
+ SupportsDebugInformation = true;
}
};
}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
index 272688e..5ea6551 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -551,7 +551,8 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
void CppWriter::printType(Type* Ty) {
// We don't print definitions for primitive types
if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() ||
- Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy())
+ Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy() ||
+ Ty->isTokenTy())
return;
// If we already defined this type, we don't need to define it again.
@@ -1355,23 +1356,18 @@ void CppWriter::printInstruction(const Instruction *I,
}
case Instruction::GetElementPtr: {
const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
- if (gep->getNumOperands() <= 2) {
- Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
- << opNames[0];
- if (gep->getNumOperands() == 2)
- Out << ", " << opNames[1];
- } else {
- Out << "std::vector<Value*> " << iName << "_indices;";
- nl(Out);
- for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
- Out << iName << "_indices.push_back("
- << opNames[i] << ");";
- nl(Out);
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << getCppName(gep->getSourceElementType()) << ", " << opNames[0] << ", {";
+ in();
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ if (i != 1) {
+ Out << ", ";
}
- Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
- << opNames[0] << ", " << iName << "_indices";
+ nl(Out);
+ Out << opNames[i];
}
- Out << ", \"";
+ out();
+ nl(Out) << "}, \"";
printEscapedString(gep->getName());
Out << "\", " << bbname << ");";
break;
@@ -1803,13 +1799,12 @@ void CppWriter::printFunctionBody(const Function *F) {
<< "->arg_begin();";
nl(Out);
}
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- Out << "Value* " << getCppName(AI) << " = args++;";
+ for (const Argument &AI : F->args()) {
+ Out << "Value* " << getCppName(&AI) << " = args++;";
nl(Out);
- if (AI->hasName()) {
- Out << getCppName(AI) << "->setName(\"";
- printEscapedString(AI->getName());
+ if (AI.hasName()) {
+ Out << getCppName(&AI) << "->setName(\"";
+ printEscapedString(AI.getName());
Out << "\");";
nl(Out);
}
@@ -1818,29 +1813,25 @@ void CppWriter::printFunctionBody(const Function *F) {
// Create all the basic blocks
nl(Out);
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
+ for (const BasicBlock &BI : *F) {
+ std::string bbname(getCppName(&BI));
Out << "BasicBlock* " << bbname <<
" = BasicBlock::Create(mod->getContext(), \"";
- if (BI->hasName())
- printEscapedString(BI->getName());
- Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ if (BI.hasName())
+ printEscapedString(BI.getName());
+ Out << "\"," << getCppName(BI.getParent()) << ",0);";
nl(Out);
}
// Output all of its basic blocks... for the function
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
- nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ for (const BasicBlock &BI : *F) {
+ std::string bbname(getCppName(&BI));
+ nl(Out) << "// Block " << BI.getName() << " (" << bbname << ")";
nl(Out);
// Output all of the instructions in the basic block...
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
- I != E; ++I) {
- printInstruction(I,bbname);
- }
+ for (const Instruction &I : BI)
+ printInstruction(&I, bbname);
}
// Loop over the ForwardRefs and resolve them now that all instructions
@@ -1883,7 +1874,7 @@ void CppWriter::printInline(const std::string& fname,
printFunctionUses(F);
printFunctionBody(F);
is_inline = false;
- Out << "return " << getCppName(F->begin()) << ";";
+ Out << "return " << getCppName(&F->front()) << ";";
nl(Out) << "}";
nl(Out);
}
@@ -1896,17 +1887,14 @@ void CppWriter::printModuleBody() {
// Functions can call each other and global variables can reference them so
// define all the functions first before emitting their function bodies.
nl(Out) << "// Function Declarations"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I)
- printFunctionHead(I);
+ for (const Function &I : *TheModule)
+ printFunctionHead(&I);
// Process the global variables declarations. We can't initialze them until
// after the constants are printed so just print a header for each global
nl(Out) << "// Global Variable Declarations\n"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableHead(I);
- }
+ for (const GlobalVariable &I : TheModule->globals())
+ printVariableHead(&I);
// Print out all the constants definitions. Constants don't recurse except
// through GlobalValues. All GlobalValues have been declared at this point
@@ -1918,21 +1906,18 @@ void CppWriter::printModuleBody() {
// been emitted. These definitions just couple the gvars with their constant
// initializers.
nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableBody(I);
- }
+ for (const GlobalVariable &I : TheModule->globals())
+ printVariableBody(&I);
// Finally, we can safely put out all of the function bodies.
nl(Out) << "// Function Definitions"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I) {
- if (!I->isDeclaration()) {
- nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ for (const Function &I : *TheModule) {
+ if (!I.isDeclaration()) {
+ nl(Out) << "// Function: " << I.getName() << " (" << getCppName(&I)
<< ")";
nl(Out) << "{";
nl(Out,1);
- printFunctionBody(I);
+ printFunctionBody(&I);
nl(Out,-1) << "}";
nl(Out);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
new file mode 100644
index 0000000..a8622a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -0,0 +1,2152 @@
+//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcasmparser"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCExpr.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCAsmInfo.h"
+#include "MCTargetDesc/HexagonShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableFutureRegs("mfuture-regs",
+ cl::desc("Enable future registers"));
+
+static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
+cl::desc("Warn for missing parenthesis around predicate registers"),
+cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
+cl::desc("Error for missing parenthesis around predicate registers"),
+cl::init(false));
+static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
+cl::desc("Warn for mismatching a signed and unsigned value"),
+cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
+cl::desc("Warn for register names that arent contigious"),
+cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
+cl::desc("Error for register names that aren't contigious"),
+cl::init(false));
+
+
+namespace {
+struct HexagonOperand;
+
+class HexagonAsmParser : public MCTargetAsmParser {
+
+ HexagonTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+ return static_cast<HexagonTargetStreamer &>(TS);
+ }
+
+ MCAsmParser &Parser;
+ MCAssembler *Assembler;
+ MCInstrInfo const &MCII;
+ MCInst MCB;
+ bool InBrackets;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAssembler *getAssembler() const { return Assembler; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool equalIsAsmAssignment() override { return false; }
+ bool isLabel(AsmToken &Token) override;
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ bool ParseDirectiveFalign(unsigned Size, SMLoc L);
+
+ virtual bool ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
+ bool ParseDirectiveSubsection(SMLoc L);
+ bool ParseDirectiveValue(unsigned Size, SMLoc L);
+ bool ParseDirectiveComm(bool IsLocal, SMLoc L);
+ bool RegisterMatchesArch(unsigned MatchNum) const;
+
+ bool matchBundleOptions();
+ bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc);
+ bool finishBundle(SMLoc IDLoc, MCStreamer &Out);
+ void canonicalizeImmediates(MCInst &MCI);
+ bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc,
+ OperandVector &InstOperands, uint64_t &ErrorInfo,
+ bool MatchingInlineAsm, bool &MustExtend);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+ void OutOfRange(SMLoc IDLoc, long long Val, long long Max);
+ int processInstruction(MCInst &Inst, OperandVector const &Operands,
+ SMLoc IDLoc, bool &MustExtend);
+
+ // Check if we have an assembler and, if so, set the ELF e_header flags.
+ void chksetELFHeaderEFlags(unsigned flags) {
+ if (getAssembler())
+ getAssembler()->setELFHeaderEFlags(flags);
+ }
+
+/// @name Auto-generated Match Functions
+/// {
+
+#define GET_ASSEMBLER_HEADER
+#include "HexagonGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, _STI), Parser(_Parser),
+ MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) {
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+ MCAsmParserExtension::Initialize(_Parser);
+
+ Assembler = nullptr;
+ // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+ if (!Parser.getStreamer().hasRawTextSupport()) {
+ MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+ Assembler = &MES->getAssembler();
+ }
+ }
+
+ bool mustExtend(OperandVector &Operands);
+ bool splitIdentifier(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands);
+ bool parseInstruction(OperandVector &Operands);
+ bool implicitExpressionLocation(OperandVector &Operands);
+ bool parseExpressionOrOperand(OperandVector &Operands);
+ bool parseExpression(MCExpr const *& Expr);
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override
+ {
+ llvm_unreachable("Unimplemented");
+ }
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ AsmToken ID, OperandVector &Operands) override;
+
+ virtual bool ParseDirective(AsmToken DirectiveID) override;
+};
+
+/// HexagonOperand - Instances of this class represent a parsed Hexagon machine
+/// instruction.
+struct HexagonOperand : public MCParsedAsmOperand {
+ enum KindTy { Token, Immediate, Register } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct TokTy {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegTy {
+ unsigned RegNum;
+ };
+
+ struct ImmTy {
+ const MCExpr *Val;
+ bool MustExtend;
+ };
+
+ struct InstTy {
+ OperandVector *SubInsts;
+ };
+
+ union {
+ struct TokTy Tok;
+ struct RegTy Reg;
+ struct ImmTy Imm;
+ };
+
+ HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+public:
+ HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { llvm_unreachable("No isMem"); }
+ bool isReg() const { return Kind == Register; }
+
+ bool CheckImmRange(int immBits, int zeroBits, bool isSigned,
+ bool isRelocatable, bool Extendable) const {
+ if (Kind == Immediate) {
+ const MCExpr *myMCExpr = getImm();
+ if (Imm.MustExtend && !Extendable)
+ return false;
+ int64_t Res;
+ if (myMCExpr->evaluateAsAbsolute(Res)) {
+ int bits = immBits + zeroBits;
+ // Field bit range is zerobits + bits
+ // zeroBits must be 0
+ if (Res & ((1 << zeroBits) - 1))
+ return false;
+ if (isSigned) {
+ if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1)))
+ return true;
+ } else {
+ if (bits == 64)
+ return true;
+ if (Res >= 0)
+ return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false;
+ else {
+ const int64_t high_bit_set = 1ULL << 63;
+ const uint64_t mask = (high_bit_set >> (63 - bits));
+ return (((uint64_t)Res & mask) == mask) ? true : false;
+ }
+ }
+ } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable)
+ return true;
+ else if (myMCExpr->getKind() == MCExpr::Binary ||
+ myMCExpr->getKind() == MCExpr::Unary)
+ return true;
+ }
+ return false;
+ }
+
+ bool isf32Ext() const { return false; }
+ bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); }
+ bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); }
+ bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); }
+ bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); }
+ bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); }
+ bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
+ bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
+ bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
+ bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
+ bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); }
+
+ bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); }
+ bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); }
+ bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
+ bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); }
+ bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
+ bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
+ bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
+ bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
+ bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
+ bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+ bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+ bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
+ bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); }
+ bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); }
+ bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+ bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); }
+ bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); }
+ bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); }
+ bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); }
+ bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); }
+ bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); }
+
+ bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+
+ bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
+ bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
+ bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
+ bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
+ bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
+ bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
+ bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
+ bool iss11_0Ext() const {
+ return CheckImmRange(11 + 26, 0, true, true, true);
+ }
+ bool iss11_1Ext() const {
+ return CheckImmRange(11 + 26, 1, true, true, true);
+ }
+ bool iss11_2Ext() const {
+ return CheckImmRange(11 + 26, 2, true, true, true);
+ }
+ bool iss11_3Ext() const {
+ return CheckImmRange(11 + 26, 3, true, true, true);
+ }
+
+ bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+ bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
+ bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
+ bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
+ bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
+ bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+ bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
+ bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
+ bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
+ bool isu32MustExt() const { return isImm() && Imm.MustExtend; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ }
+
+ void addSignedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ MCExpr const *Expr = getImm();
+ int64_t Value;
+ if (!Expr->evaluateAsAbsolute(Value)) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+ int64_t Extended = SignExtend64 (Value, 32);
+ if ((Extended < 0) == (Value < 0)) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+ // Flip bit 33 to signal signed unsigned mismatch
+ Extended ^= 0x100000000;
+ Inst.addOperand(MCOperand::createImm(Extended));
+ }
+
+ void addf32ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds32ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8Imm64Operands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds6ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds3ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+
+ void addu64ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu32ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu10ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu9ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu8ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu7ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu5ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu4ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addm6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addn8ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds16ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds12ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds10ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds9ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds6ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+
+ void addu6ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu7ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu8ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu9ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu10ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu32MustExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+ }
+
+ void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) {
+ HexagonOperand *Op = new HexagonOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+
+ static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ HexagonOperand *Op = new HexagonOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+
+ static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ HexagonOperand *Op = new HexagonOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->Imm.MustExtend = false;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+};
+
+} // end anonymous namespace.
+
+void HexagonOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS, nullptr);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+static unsigned MatchRegisterName(StringRef Name);
+
+bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
+ DEBUG(dbgs() << "Bundle:");
+ DEBUG(MCB.dump_pretty(dbgs()));
+ DEBUG(dbgs() << "--\n");
+
+ // Check the bundle for errors.
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI);
+
+ bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(),
+ getContext(), MCB,
+ &Check);
+
+ while (Check.getNextErrInfo() == true) {
+ unsigned Reg = Check.getErrRegister();
+ Twine R(RI->getName(Reg));
+
+ uint64_t Err = Check.getError();
+ if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
+ if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
+ Error(IDLoc,
+ "unconditional branch cannot precede another branch in packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
+ HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
+ Error(IDLoc, "register `" + R +
+ "' used with `.new' "
+ "but not validly modified in the same packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
+ Error(IDLoc, "register `" + R + "' modified more than once");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
+ Error(IDLoc, "cannot write to read-only register `" + R + "'");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
+ Error(IDLoc, "loop-setup and some branch instructions "
+ "cannot be in the same packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
+ Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
+ Error(IDLoc, "packet marked with `:endloop" + N + "' " +
+ "cannot contain instructions that modify register " +
+ "`" + R + "'");
+ }
+
+ if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
+ Error(IDLoc,
+ "instruction cannot appear in packet with other instructions");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
+ Error(IDLoc, "too many slots used in packet");
+
+ if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
+ uint64_t Erm = Check.getShuffleError();
+
+ if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
+ Error(IDLoc, "invalid instruction packet");
+ else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
+ Error(IDLoc, "invalid instruction packet: too many stores");
+ else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
+ Error(IDLoc, "invalid instruction packet: too many loads");
+ else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
+ Error(IDLoc, "too many branches in packet");
+ else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
+ Error(IDLoc, "invalid instruction packet: out of slots");
+ else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
+ Error(IDLoc, "invalid instruction packet: slot error");
+ else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
+ Error(IDLoc, "v60 packet violation");
+ else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
+ Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
+ else
+ Error(IDLoc, "unknown error in instruction packet");
+ }
+ }
+
+ unsigned Warn = Check.getWarning();
+ if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) {
+ if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn)
+ Warning(IDLoc, "register `" + R + "' used with `.cur' "
+ "but not used in the same packet");
+ else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn)
+ Warning(IDLoc, "register `" + R + "' used with `.tmp' "
+ "but not used in the same packet");
+ }
+ }
+
+ if (CheckOk) {
+ MCB.setLoc(IDLoc);
+ if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
+ assert(!HexagonMCInstrInfo::isInnerLoop(MCB));
+ assert(!HexagonMCInstrInfo::isOuterLoop(MCB));
+ // Empty packets are valid yet aren't emitted
+ return false;
+ }
+ Out.EmitInstruction(MCB, getSTI());
+ } else {
+ // If compounding and duplexing didn't reduce the size below
+ // 4 or less we have a packet that is too big.
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
+ Error(IDLoc, "invalid instruction packet: out of slots");
+ return true; // Error
+ }
+ }
+
+ return false; // No error
+}
+
+bool HexagonAsmParser::matchBundleOptions() {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ while (true) {
+ if (!Parser.getTok().is(AsmToken::Colon))
+ return false;
+ Lexer.Lex();
+ StringRef Option = Parser.getTok().getString();
+ if (Option.compare_lower("endloop0") == 0)
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ else if (Option.compare_lower("endloop1") == 0)
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ else if (Option.compare_lower("mem_noshuf") == 0)
+ HexagonMCInstrInfo::setMemReorderDisabled(MCB);
+ else if (Option.compare_lower("mem_shuf") == 0)
+ HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
+ else
+ return true;
+ Lexer.Lex();
+ }
+}
+
+// For instruction aliases, immediates are generated rather than
+// MCConstantExpr. Convert them for uniform MCExpr.
+// Also check for signed/unsigned mismatches and warn
+void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
+ MCInst NewInst;
+ NewInst.setOpcode(MCI.getOpcode());
+ for (MCOperand &I : MCI)
+ if (I.isImm()) {
+ int64_t Value (I.getImm());
+ if ((Value & 0x100000000) != (Value & 0x80000000)) {
+ // Detect flipped bit 33 wrt bit 32 and signal warning
+ Value ^= 0x100000000;
+ if (WarnSignedMismatch)
+ Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
+ }
+ NewInst.addOperand(MCOperand::createExpr(
+ MCConstantExpr::create(Value, getContext())));
+ }
+ else
+ NewInst.addOperand(I);
+ MCI = NewInst;
+}
+
+bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
+ OperandVector &InstOperands,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm,
+ bool &MustExtend) {
+ // Perform matching with tablegen asmmatcher generated function
+ int result =
+ MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm);
+ if (result == Match_Success) {
+ MCI.setLoc(IDLoc);
+ MustExtend = mustExtend(InstOperands);
+ canonicalizeImmediates(MCI);
+ result = processInstruction(MCI, InstOperands, IDLoc, MustExtend);
+
+ DEBUG(dbgs() << "Insn:");
+ DEBUG(MCI.dump_pretty(dbgs()));
+ DEBUG(dbgs() << "\n\n");
+
+ MCI.setLoc(IDLoc);
+ }
+
+ // Create instruction operand for bundle instruction
+ // Break this into a separate function Code here is less readable
+ // Think about how to get an instruction error to report correctly.
+ // SMLoc will return the "{"
+ switch (result) {
+ default:
+ break;
+ case Match_Success:
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "invalid instruction");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction");
+ case Match_InvalidOperand:
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= InstOperands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get()))
+ ->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool HexagonAsmParser::mustExtend(OperandVector &Operands) {
+ unsigned Count = 0;
+ for (std::unique_ptr<MCParsedAsmOperand> &i : Operands)
+ if (i->isImm())
+ if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend)
+ ++Count;
+ // Multiple extenders should have been filtered by iss9Ext et. al.
+ assert(Count < 2 && "Multiple extenders");
+ return Count == 1;
+}
+
+bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ if (!InBrackets) {
+ MCB.clear();
+ MCB.addOperand(MCOperand::createImm(0));
+ }
+ HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]);
+ if (FirstOperand.isToken() && FirstOperand.getToken() == "{") {
+ assert(Operands.size() == 1 && "Brackets should be by themselves");
+ if (InBrackets) {
+ getParser().Error(IDLoc, "Already in a packet");
+ return true;
+ }
+ InBrackets = true;
+ return false;
+ }
+ if (FirstOperand.isToken() && FirstOperand.getToken() == "}") {
+ assert(Operands.size() == 1 && "Brackets should be by themselves");
+ if (!InBrackets) {
+ getParser().Error(IDLoc, "Not in a packet");
+ return true;
+ }
+ InBrackets = false;
+ if (matchBundleOptions())
+ return true;
+ return finishBundle(IDLoc, Out);
+ }
+ MCInst *SubInst = new (getParser().getContext()) MCInst;
+ bool MustExtend = false;
+ if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo,
+ MatchingInlineAsm, MustExtend))
+ return true;
+ HexagonMCInstrInfo::extendIfNeeded(
+ getParser().getContext(), MCII, MCB, *SubInst,
+ HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend);
+ MCB.addOperand(MCOperand::createInst(SubInst));
+ if (!InBrackets)
+ return finishBundle(IDLoc, Out);
+ return false;
+}
+
+/// ParseDirective parses the Hexagon specific directives
+bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte"))
+ return ParseDirectiveValue(4, DirectiveID.getLoc());
+ if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" ||
+ IDVal.lower() == ".half")
+ return ParseDirectiveValue(2, DirectiveID.getLoc());
+ if (IDVal.lower() == ".falign")
+ return ParseDirectiveFalign(256, DirectiveID.getLoc());
+ if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon"))
+ return ParseDirectiveComm(true, DirectiveID.getLoc());
+ if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common"))
+ return ParseDirectiveComm(false, DirectiveID.getLoc());
+ if (IDVal.lower() == ".subsection")
+ return ParseDirectiveSubsection(DirectiveID.getLoc());
+
+ return true;
+}
+bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
+ const MCExpr *Subsection = 0;
+ int64_t Res;
+
+ assert((getLexer().isNot(AsmToken::EndOfStatement)) &&
+ "Invalid subsection directive");
+ getParser().parseExpression(Subsection);
+
+ if (!Subsection->evaluateAsAbsolute(Res))
+ return Error(L, "Cannot evaluate subsection number");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the
+ // negative subsections together and in the same order but at the opposite
+ // end of the section. Only legacy hexagon-gcc created assembly code
+ // used negative subsections.
+ if ((Res < 0) && (Res > -8193))
+ Subsection = MCConstantExpr::create(8192 + Res, this->getContext());
+
+ getStreamer().SubSection(Subsection);
+ return false;
+}
+
+/// ::= .falign [expression]
+bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
+
+ int64_t MaxBytesToFill = 15;
+
+ // if there is an arguement
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = L;
+
+ // Make sure we have a number (false is returned if expression is a number)
+ if (getParser().parseExpression(Value) == false) {
+ // Make sure this is a number that is in range
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue))
+ return Error(ExprLoc, "literal value out of range (256) for falign");
+ MaxBytesToFill = IntValue;
+ Lex();
+ } else {
+ return Error(ExprLoc, "not a valid expression for falign directive");
+ }
+ }
+
+ getTargetStreamer().emitFAlign(16, MaxBytesToFill);
+ Lex();
+
+ return false;
+}
+
+/// ::= .word [ expression (, expression)* ]
+bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ for (;;) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = L;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ // Special case constant expressions to match code generator.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else
+ getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+// This is largely a copy of AsmParser's ParseDirectiveComm extended to
+// accept a 3rd argument, AccessAlignment which indicates the smallest
+// memory access made to the symbol, expressed in bytes. If no
+// AccessAlignment is specified it defaults to the Alignment Value.
+// Hexagon's .lcomm:
+// .lcomm Symbol, Length, Alignment, AccessAlignment
+bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
+ // FIXME: need better way to detect if AsmStreamer (upstream removed
+ // getKind())
+ if (getStreamer().hasRawTextSupport())
+ return true; // Only object file output requires special treatment.
+
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+
+ int64_t ByteAlignment = 1;
+ SMLoc ByteAlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ ByteAlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(ByteAlignment))
+ return true;
+ if (!isPowerOf2_64(ByteAlignment))
+ return Error(ByteAlignmentLoc, "alignment must be a power of 2");
+ }
+
+ int64_t AccessAlignment = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ // The optional access argument specifies the size of the smallest memory
+ // access to be made to the symbol, expressed in bytes.
+ SMLoc AccessAlignmentLoc;
+ Lex();
+ AccessAlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(AccessAlignment))
+ return true;
+
+ if (!isPowerOf2_64(AccessAlignment))
+ return Error(AccessAlignmentLoc, "access alignment must be a power of 2");
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+ Lex();
+
+ // NOTE: a size of zero for a .comm should create a undefined symbol
+ // but a size of .lcomm creates a bss symbol of size zero.
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+ "be less than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (ByteAlignment < 0)
+ return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+ "alignment, can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(Loc, "invalid symbol redefinition");
+
+ HexagonMCELFStreamer &HexagonELFStreamer =
+ static_cast<HexagonMCELFStreamer &>(getStreamer());
+ if (IsLocal) {
+ HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment,
+ AccessAlignment);
+ return false;
+ }
+
+ HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment,
+ AccessAlignment);
+ return false;
+}
+
+// validate register against architecture
+bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+ return true;
+}
+
+// extern "C" void LLVMInitializeHexagonAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeHexagonAsmParser() {
+ RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget);
+}
+
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_REGISTER_MATCHER
+#include "HexagonGenAsmMatcher.inc"
+
+namespace {
+bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
+ if (Index >= Operands.size())
+ return false;
+ MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1];
+ if (!Operand.isToken())
+ return false;
+ return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String);
+}
+bool previousIsLoop(OperandVector &Operands, size_t Index) {
+ return previousEqual(Operands, Index, "loop0") ||
+ previousEqual(Operands, Index, "loop1") ||
+ previousEqual(Operands, Index, "sp1loop0") ||
+ previousEqual(Operands, Index, "sp2loop0") ||
+ previousEqual(Operands, Index, "sp3loop0");
+}
+}
+
+bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
+ AsmToken const &Token = getParser().getTok();
+ StringRef String = Token.getString();
+ SMLoc Loc = Token.getLoc();
+ getLexer().Lex();
+ do {
+ std::pair<StringRef, StringRef> HeadTail = String.split('.');
+ if (!HeadTail.first.empty())
+ Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc));
+ if (!HeadTail.second.empty())
+ Operands.push_back(HexagonOperand::CreateToken(
+ String.substr(HeadTail.first.size(), 1), Loc));
+ String = HeadTail.second;
+ } while (!String.empty());
+ return false;
+}
+
+bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
+ unsigned Register;
+ SMLoc Begin;
+ SMLoc End;
+ MCAsmLexer &Lexer = getLexer();
+ if (!ParseRegister(Register, Begin, End)) {
+ if (!ErrorMissingParenthesis)
+ switch (Register) {
+ default:
+ break;
+ case Hexagon::P0:
+ case Hexagon::P1:
+ case Hexagon::P2:
+ case Hexagon::P3:
+ if (previousEqual(Operands, 0, "if")) {
+ if (WarnMissingParenthesis)
+ Warning (Begin, "Missing parenthesis around predicate register");
+ static char const *LParen = "(";
+ static char const *RParen = ")";
+ Operands.push_back(HexagonOperand::CreateToken(LParen, Begin));
+ Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+ AsmToken MaybeDotNew = Lexer.getTok();
+ if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+ MaybeDotNew.getString().equals_lower(".new"))
+ splitIdentifier(Operands);
+ Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+ return false;
+ }
+ if (previousEqual(Operands, 0, "!") &&
+ previousEqual(Operands, 1, "if")) {
+ if (WarnMissingParenthesis)
+ Warning (Begin, "Missing parenthesis around predicate register");
+ static char const *LParen = "(";
+ static char const *RParen = ")";
+ Operands.insert(Operands.end () - 1,
+ HexagonOperand::CreateToken(LParen, Begin));
+ Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+ AsmToken MaybeDotNew = Lexer.getTok();
+ if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+ MaybeDotNew.getString().equals_lower(".new"))
+ splitIdentifier(Operands);
+ Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+ return false;
+ }
+ break;
+ }
+ Operands.push_back(HexagonOperand::CreateReg(
+ Register, Begin, End));
+ return false;
+ }
+ return splitIdentifier(Operands);
+}
+
+bool HexagonAsmParser::isLabel(AsmToken &Token) {
+ MCAsmLexer &Lexer = getLexer();
+ AsmToken const &Second = Lexer.getTok();
+ AsmToken Third = Lexer.peekTok();
+ StringRef String = Token.getString();
+ if (Token.is(AsmToken::TokenKind::LCurly) ||
+ Token.is(AsmToken::TokenKind::RCurly))
+ return false;
+ if (!Token.is(AsmToken::TokenKind::Identifier))
+ return true;
+ if (!MatchRegisterName(String.lower()))
+ return true;
+ (void)Second;
+ assert(Second.is(AsmToken::Colon));
+ StringRef Raw (String.data(), Third.getString().data() - String.data() +
+ Third.getString().size());
+ std::string Collapsed = Raw;
+ Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+ Collapsed.end());
+ StringRef Whole = Collapsed;
+ std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
+ if (!MatchRegisterName(DotSplit.first.lower()))
+ return true;
+ return false;
+}
+
+bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) {
+ if (!Contigious && ErrorNoncontigiousRegister) {
+ Error(Loc, "Register name is not contigious");
+ return true;
+ }
+ if (!Contigious && WarnNoncontigiousRegister)
+ Warning(Loc, "Register name is not contigious");
+ return false;
+}
+
+bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmLexer &Lexer = getLexer();
+ StartLoc = getLexer().getLoc();
+ SmallVector<AsmToken, 5> Lookahead;
+ StringRef RawString(Lexer.getTok().getString().data(), 0);
+ bool Again = Lexer.is(AsmToken::Identifier);
+ bool NeededWorkaround = false;
+ while (Again) {
+ AsmToken const &Token = Lexer.getTok();
+ RawString = StringRef(RawString.data(),
+ Token.getString().data() - RawString.data () +
+ Token.getString().size());
+ Lookahead.push_back(Token);
+ Lexer.Lex();
+ bool Contigious = Lexer.getTok().getString().data() ==
+ Lookahead.back().getString().data() +
+ Lookahead.back().getString().size();
+ bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) ||
+ Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) ||
+ Lexer.is(AsmToken::Colon);
+ bool Workaround = Lexer.is(AsmToken::Colon) ||
+ Lookahead.back().is(AsmToken::Colon);
+ Again = (Contigious && Type) || (Workaround && Type);
+ NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
+ }
+ std::string Collapsed = RawString;
+ Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+ Collapsed.end());
+ StringRef FullString = Collapsed;
+ std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
+ unsigned DotReg = MatchRegisterName(DotSplit.first.lower());
+ if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+ if (DotSplit.second.empty()) {
+ RegNo = DotReg;
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ } else {
+ RegNo = DotReg;
+ size_t First = RawString.find('.');
+ StringRef DotString (RawString.data() + First, RawString.size() - First);
+ Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ }
+ }
+ std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
+ unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower());
+ if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ RegNo = ColonReg;
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ }
+ while (!Lookahead.empty()) {
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ }
+ return true;
+}
+
+bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
+ if (previousEqual(Operands, 0, "call"))
+ return true;
+ if (previousEqual(Operands, 0, "jump"))
+ if (!getLexer().getTok().is(AsmToken::Colon))
+ return true;
+ if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1))
+ return true;
+ if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") &&
+ (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t")))
+ return true;
+ return false;
+}
+
+bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
+ llvm::SmallVector<AsmToken, 4> Tokens;
+ MCAsmLexer &Lexer = getLexer();
+ bool Done = false;
+ static char const * Comma = ",";
+ do {
+ Tokens.emplace_back (Lexer.getTok());
+ Lexer.Lex();
+ switch (Tokens.back().getKind())
+ {
+ case AsmToken::TokenKind::Hash:
+ if (Tokens.size () > 1)
+ if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) {
+ Tokens.insert(Tokens.end() - 2,
+ AsmToken(AsmToken::TokenKind::Comma, Comma));
+ Done = true;
+ }
+ break;
+ case AsmToken::TokenKind::RCurly:
+ case AsmToken::TokenKind::EndOfStatement:
+ case AsmToken::TokenKind::Eof:
+ Done = true;
+ break;
+ default:
+ break;
+ }
+ } while (!Done);
+ while (!Tokens.empty()) {
+ Lexer.UnLex(Tokens.back());
+ Tokens.pop_back();
+ }
+ return getParser().parseExpression(Expr);
+}
+
+bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) {
+ if (implicitExpressionLocation(Operands)) {
+ MCAsmParser &Parser = getParser();
+ SMLoc Loc = Parser.getLexer().getLoc();
+ std::unique_ptr<HexagonOperand> Expr =
+ HexagonOperand::CreateImm(nullptr, Loc, Loc);
+ MCExpr const *& Val = Expr->Imm.Val;
+ Operands.push_back(std::move(Expr));
+ return parseExpression(Val);
+ }
+ return parseOperand(Operands);
+}
+
+/// Parse an instruction.
+bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ while (true) {
+ AsmToken const &Token = Parser.getTok();
+ switch (Token.getKind()) {
+ case AsmToken::EndOfStatement: {
+ Lexer.Lex();
+ return false;
+ }
+ case AsmToken::LCurly: {
+ if (!Operands.empty())
+ return true;
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ return false;
+ }
+ case AsmToken::RCurly: {
+ if (Operands.empty()) {
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ }
+ return false;
+ }
+ case AsmToken::Comma: {
+ Lexer.Lex();
+ continue;
+ }
+ case AsmToken::EqualEqual:
+ case AsmToken::ExclaimEqual:
+ case AsmToken::GreaterEqual:
+ case AsmToken::GreaterGreater:
+ case AsmToken::LessEqual:
+ case AsmToken::LessLess: {
+ Operands.push_back(HexagonOperand::CreateToken(
+ Token.getString().substr(0, 1), Token.getLoc()));
+ Operands.push_back(HexagonOperand::CreateToken(
+ Token.getString().substr(1, 1), Token.getLoc()));
+ Lexer.Lex();
+ continue;
+ }
+ case AsmToken::Hash: {
+ bool MustNotExtend = false;
+ bool ImplicitExpression = implicitExpressionLocation(Operands);
+ std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm(
+ nullptr, Lexer.getLoc(), Lexer.getLoc());
+ if (!ImplicitExpression)
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ bool MustExtend = false;
+ bool HiOnly = false;
+ bool LoOnly = false;
+ if (Lexer.is(AsmToken::Hash)) {
+ Lexer.Lex();
+ MustExtend = true;
+ } else if (ImplicitExpression)
+ MustNotExtend = true;
+ AsmToken const &Token = Parser.getTok();
+ if (Token.is(AsmToken::Identifier)) {
+ StringRef String = Token.getString();
+ AsmToken IDToken = Token;
+ if (String.lower() == "hi") {
+ HiOnly = true;
+ } else if (String.lower() == "lo") {
+ LoOnly = true;
+ }
+ if (HiOnly || LoOnly) {
+ AsmToken LParen = Lexer.peekTok();
+ if (!LParen.is(AsmToken::LParen)) {
+ HiOnly = false;
+ LoOnly = false;
+ } else {
+ Lexer.Lex();
+ }
+ }
+ }
+ if (parseExpression(Expr->Imm.Val))
+ return true;
+ int64_t Value;
+ MCContext &Context = Parser.getContext();
+ assert(Expr->Imm.Val != nullptr);
+ if (Expr->Imm.Val->evaluateAsAbsolute(Value)) {
+ if (HiOnly)
+ Expr->Imm.Val = MCBinaryExpr::createLShr(
+ Expr->Imm.Val, MCConstantExpr::create(16, Context), Context);
+ if (HiOnly || LoOnly)
+ Expr->Imm.Val = MCBinaryExpr::createAnd(
+ Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context);
+ }
+ if (MustNotExtend)
+ Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context);
+ Expr->Imm.MustExtend = MustExtend;
+ Operands.push_back(std::move(Expr));
+ continue;
+ }
+ default:
+ break;
+ }
+ if (parseExpressionOrOperand(Operands))
+ return true;
+ }
+}
+
+bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name,
+ AsmToken ID,
+ OperandVector &Operands) {
+ getLexer().UnLex(ID);
+ return parseInstruction(Operands);
+}
+
+namespace {
+MCInst makeCombineInst(int opCode, MCOperand &Rdd,
+ MCOperand &MO1, MCOperand &MO2) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(opCode);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(MO1);
+ TmpInst.addOperand(MO2);
+
+ return TmpInst;
+}
+}
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp);
+
+ switch (Kind) {
+ case MCK_0: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ case MCK_1: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ case MCK__MINUS_1: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ }
+ if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) {
+ StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length);
+ if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind)
+ return Match_Success;
+ if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind)
+ return Match_Success;
+ }
+
+ DEBUG(dbgs() << "Unmatched Operand:");
+ DEBUG(Op->dump());
+ DEBUG(dbgs() << "\n");
+
+ return Match_InvalidOperand;
+}
+
+void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
+ std::string errStr;
+ raw_string_ostream ES(errStr);
+ ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: ";
+ if (Max >= 0)
+ ES << "0-" << Max;
+ else
+ ES << Max << "-" << (-Max - 1);
+ Error(IDLoc, ES.str().c_str());
+}
+
+int HexagonAsmParser::processInstruction(MCInst &Inst,
+ OperandVector const &Operands,
+ SMLoc IDLoc, bool &MustExtend) {
+ MCContext &Context = getParser().getContext();
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ std::string r = "r";
+ std::string v = "v";
+ std::string Colon = ":";
+
+ bool is32bit = false; // used to distinguish between CONST32 and CONST64
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+
+ case Hexagon::M4_mpyrr_addr:
+ case Hexagon::S4_addi_asl_ri:
+ case Hexagon::S4_addi_lsr_ri:
+ case Hexagon::S4_andi_asl_ri:
+ case Hexagon::S4_andi_lsr_ri:
+ case Hexagon::S4_ori_asl_ri:
+ case Hexagon::S4_ori_lsr_ri:
+ case Hexagon::S4_or_andix:
+ case Hexagon::S4_subi_asl_ri:
+ case Hexagon::S4_subi_lsr_ri: {
+ MCOperand &Ry = Inst.getOperand(0);
+ MCOperand &src = Inst.getOperand(2);
+ if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg()))
+ return Match_InvalidOperand;
+ break;
+ }
+
+ case Hexagon::C2_cmpgei: {
+ MCOperand &MO = Inst.getOperand(2);
+ MO.setExpr(MCBinaryExpr::createSub(
+ MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::C2_cmpgti);
+ break;
+ }
+
+ case Hexagon::C2_cmpgeui: {
+ MCOperand &MO = Inst.getOperand(2);
+ int64_t Value;
+ bool Success = MO.getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success && "Assured by matcher");
+ if (Value == 0) {
+ MCInst TmpInst;
+ MCOperand &Pd = Inst.getOperand(0);
+ MCOperand &Rt = Inst.getOperand(1);
+ TmpInst.setOpcode(Hexagon::C2_cmpeq);
+ TmpInst.addOperand(Pd);
+ TmpInst.addOperand(Rt);
+ TmpInst.addOperand(Rt);
+ Inst = TmpInst;
+ } else {
+ MO.setExpr(MCBinaryExpr::createSub(
+ MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::C2_cmpgtui);
+ }
+ break;
+ }
+ case Hexagon::J2_loop1r:
+ case Hexagon::J2_loop1i:
+ case Hexagon::J2_loop0r:
+ case Hexagon::J2_loop0i: {
+ MCOperand &MO = Inst.getOperand(0);
+ // Loop has different opcodes for extended vs not extended, but we should
+ // not use the other opcode as it is a legacy artifact of TD files.
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+ // if the operand can fit within a 7:2 field
+ if (Value < (1 << 8) && Value >= -(1 << 8)) {
+ SMLoc myLoc = Operands[2]->getStartLoc();
+ // # is left in startLoc in the case of ##
+ // If '##' found then force extension.
+ if (*myLoc.getPointer() == '#') {
+ MustExtend = true;
+ break;
+ }
+ } else {
+ // If immediate and out of 7:2 range.
+ MustExtend = true;
+ }
+ }
+ break;
+ }
+
+ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+ case Hexagon::A2_tfrp: {
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode(Hexagon::A2_combinew);
+ break;
+ }
+
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf: {
+ MCOperand &MO = Inst.getOperand(2);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+ ? Hexagon::C2_ccombinewt
+ : Hexagon::C2_ccombinewf);
+ break;
+ }
+ case Hexagon::A2_tfrptnew:
+ case Hexagon::A2_tfrpfnew: {
+ MCOperand &MO = Inst.getOperand(2);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
+ break;
+ }
+
+ // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
+ case Hexagon::CONST32:
+ case Hexagon::CONST32_Float_Real:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::FCONST32_nsdata:
+ is32bit = true;
+ // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
+ case Hexagon::CONST64_Float_Real:
+ case Hexagon::CONST64_Int_Real:
+
+ // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+ if (!Parser.getStreamer().hasRawTextSupport()) {
+ MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+ MCOperand &MO_1 = Inst.getOperand(1);
+ MCOperand &MO_0 = Inst.getOperand(0);
+
+ // push section onto section stack
+ MES->PushSection();
+
+ std::string myCharStr;
+ MCSectionELF *mySection;
+
+ // check if this as an immediate or a symbol
+ int64_t Value;
+ bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value);
+ if (Absolute) {
+ // Create a new section - one for each constant
+ // Some or all of the zeros are replaced with the given immediate.
+ if (is32bit) {
+ std::string myImmStr = utohexstr(static_cast<uint32_t>(Value));
+ myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000")
+ .drop_back(myImmStr.size())
+ .str() +
+ myImmStr;
+ } else {
+ std::string myImmStr = utohexstr(Value);
+ myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000")
+ .drop_back(myImmStr.size())
+ .str() +
+ myImmStr;
+ }
+
+ mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ } else if (MO_1.isExpr()) {
+ // .lita - for expressions
+ myCharStr = ".lita";
+ mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ } else
+ llvm_unreachable("unexpected type of machine operand!");
+
+ MES->SwitchSection(mySection);
+ unsigned byteSize = is32bit ? 4 : 8;
+ getStreamer().EmitCodeAlignment(byteSize, byteSize);
+
+ MCSymbol *Sym;
+
+ // for symbols, get rid of prepended ".gnu.linkonce.lx."
+
+ // emit symbol if needed
+ if (Absolute) {
+ Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16));
+ if (Sym->isUndefined()) {
+ getStreamer().EmitLabel(Sym);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+ getStreamer().EmitIntValue(Value, byteSize);
+ }
+ } else if (MO_1.isExpr()) {
+ const char *StringStart = 0;
+ const char *StringEnd = 0;
+ if (*Operands[4]->getStartLoc().getPointer() == '#') {
+ StringStart = Operands[5]->getStartLoc().getPointer();
+ StringEnd = Operands[6]->getStartLoc().getPointer();
+ } else { // no pound
+ StringStart = Operands[4]->getStartLoc().getPointer();
+ StringEnd = Operands[5]->getStartLoc().getPointer();
+ }
+
+ unsigned size = StringEnd - StringStart;
+ std::string DotConst = ".CONST_";
+ Sym = getContext().getOrCreateSymbol(DotConst +
+ StringRef(StringStart, size));
+
+ if (Sym->isUndefined()) {
+ // case where symbol is not yet defined: emit symbol
+ getStreamer().EmitLabel(Sym);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Local);
+ getStreamer().EmitValue(MO_1.getExpr(), 4);
+ }
+ } else
+ llvm_unreachable("unexpected type of machine operand!");
+
+ MES->PopSection();
+
+ if (Sym) {
+ MCInst TmpInst;
+ if (is32bit) // 32 bit
+ TmpInst.setOpcode(Hexagon::L2_loadrigp);
+ else // 64 bit
+ TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+
+ TmpInst.addOperand(MO_0);
+ TmpInst.addOperand(
+ MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+ Inst = TmpInst;
+ }
+ }
+ break;
+
+ // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)"
+ case Hexagon::A2_tfrpi: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO = Inst.getOperand(1);
+ int64_t Value;
+ int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0;
+ MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context)));
+ Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO);
+ break;
+ }
+
+ // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)"
+ case Hexagon::TFRI64_V4: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO = Inst.getOperand(1);
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+ unsigned long long u64 = Value;
+ signed int s8 = (u64 >> 32) & 0xFFFFFFFF;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ MCOperand imm(MCOperand::createExpr(
+ MCConstantExpr::create(s8, Context))); // upper 32
+ MCOperand imm2(MCOperand::createExpr(
+ MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2);
+ } else {
+ MCOperand imm(MCOperand::createExpr(
+ MCConstantExpr::create(0, Context))); // upper 32
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO);
+ }
+ break;
+ }
+
+ // Handle $Rdd = combine(##imm, #imm)"
+ case Hexagon::TFRI64_V2_ext: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO1 = Inst.getOperand(1);
+ MCOperand &MO2 = Inst.getOperand(2);
+ int64_t Value;
+ if (MO2.getExpr()->evaluateAsAbsolute(Value)) {
+ int s8 = Value;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ }
+ Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2);
+ break;
+ }
+
+ // Handle $Rdd = combine(#imm, ##imm)"
+ case Hexagon::A4_combineii: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO1 = Inst.getOperand(1);
+ int64_t Value;
+ if (MO1.getExpr()->evaluateAsAbsolute(Value)) {
+ int s8 = Value;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ }
+ MCOperand &MO2 = Inst.getOperand(2);
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2);
+ break;
+ }
+
+ case Hexagon::S2_tableidxb_goodsyntax: {
+ Inst.setOpcode(Hexagon::S2_tableidxb);
+ break;
+ }
+
+ case Hexagon::S2_tableidxh_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(1, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxh);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_tableidxw_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(2, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxw);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_tableidxd_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(3, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxd);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::M2_mpyui: {
+ Inst.setOpcode(Hexagon::M2_mpyi);
+ break;
+ }
+ case Hexagon::M2_mpysmi: {
+ MCInst TmpInst;
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (!MustExtend) {
+ if (Value < 0 && Value > -256) {
+ Imm.setExpr(MCConstantExpr::create(Value * -1, Context));
+ TmpInst.setOpcode(Hexagon::M2_mpysin);
+ } else if (Value < 256 && Value >= 0)
+ TmpInst.setOpcode(Hexagon::M2_mpysip);
+ else
+ return Match_InvalidOperand;
+ } else {
+ if (Value >= 0)
+ TmpInst.setOpcode(Hexagon::M2_mpysip);
+ else
+ return Match_InvalidOperand;
+ }
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+ MCOperand &Imm = Inst.getOperand(2);
+ MCInst TmpInst;
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) { // convert to $Rd = $Rs
+ TmpInst.setOpcode(Hexagon::A2_tfr);
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm);
+ }
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
+ MCInst TmpInst;
+ unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ Rss.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst = TmpInst;
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+ }
+ break;
+ }
+
+ case Hexagon::A4_boundscheck: {
+ MCOperand &Rs = Inst.getOperand(1);
+ unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+ if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+ Inst.setOpcode(Hexagon::A4_boundscheck_hi);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ } else { // raw:lo
+ Inst.setOpcode(Hexagon::A4_boundscheck_lo);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::A2_addsp: {
+ MCOperand &Rs = Inst.getOperand(1);
+ unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+ if (RegNum & 1) { // Odd mapped to raw:hi
+ Inst.setOpcode(Hexagon::A2_addsph);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped raw:lo
+ Inst.setOpcode(Hexagon::A2_addspl);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_s1: {
+ MCOperand &Rt = Inst.getOperand(2);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to sat:raw:hi
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped sat:raw:lo
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_acc_s1: {
+ MCInst TmpInst;
+ MCOperand &Rxx = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(2);
+ MCOperand &Rt = Inst.getOperand(3);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to sat:raw:hi
+ TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped sat:raw:lo
+ TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ // Registers are in different positions
+ TmpInst.addOperand(Rxx);
+ TmpInst.addOperand(Rxx);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(Rt);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_s1rp: {
+ MCOperand &Rt = Inst.getOperand(2);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped rnd:sat:raw:lo
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0)
+ Inst.setOpcode(Hexagon::S2_vsathub);
+ else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+ }
+ break;
+ }
+
+ case Hexagon::S5_vasrhrnd_goodsyntax: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) {
+ MCInst TmpInst;
+ unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ Rss.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst = TmpInst;
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S5_vasrhrnd);
+ }
+ break;
+ }
+
+ case Hexagon::A2_not: {
+ MCInst TmpInst;
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.setOpcode(Hexagon::A2_subri);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ TmpInst.addOperand(Rs);
+ Inst = TmpInst;
+ break;
+ }
+ } // switch
+
+ return Match_Success;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
index cb7e633..ea96eb0 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -868,7 +868,7 @@ void BT::visitNonBranch(const MachineInstr *MI) {
continue;
bool Changed = false;
- if (!Eval || !ResMap.has(RD.Reg)) {
+ if (!Eval || ResMap.count(RD.Reg) == 0) {
// Set to "ref" (aka "bottom").
uint16_t DefBW = ME.getRegBitWidth(RD);
RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
@@ -951,11 +951,11 @@ void BT::visitBranchesFrom(const MachineInstr *BI) {
// be processed.
for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
const MachineBasicBlock *SB = *I;
- if (SB->isLandingPad())
+ if (SB->isEHPad())
Targets.insert(SB);
}
if (FallsThrough) {
- MachineFunction::const_iterator BIt = &B;
+ MachineFunction::const_iterator BIt = B.getIterator();
MachineFunction::const_iterator Next = std::next(BIt);
if (Next != MF.end())
Targets.insert(&*Next);
@@ -1005,7 +1005,7 @@ void BT::put(RegisterRef RR, const RegisterCell &RC) {
// Replace all references to bits from OldRR with the corresponding bits
// in NewRR.
void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
- assert(Map.has(OldRR.Reg) && "OldRR not present in map");
+ assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map");
BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
uint16_t OMB = OM.first(), OME = OM.last();
@@ -1104,9 +1104,9 @@ void BT::run() {
}
// If block end has been reached, add the fall-through edge to the queue.
if (It == End) {
- MachineFunction::const_iterator BIt = &B;
+ MachineFunction::const_iterator BIt = B.getIterator();
MachineFunction::const_iterator Next = std::next(BIt);
- if (Next != MF.end()) {
+ if (Next != MF.end() && B.isSuccessor(&*Next)) {
int ThisN = B.getNumber();
int NextN = Next->getNumber();
FlowQ.push(CFGEdge(ThisN, NextN));
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
index ed002a7..959c831 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -36,9 +36,7 @@ struct BitTracker {
typedef SetVector<const MachineBasicBlock *> BranchTargetList;
- struct CellMapType : public std::map<unsigned,RegisterCell> {
- bool has(unsigned Reg) const;
- };
+ typedef std::map<unsigned, RegisterCell> CellMapType;
BitTracker(const MachineEvaluator &E, MachineFunction &F);
~BitTracker();
@@ -79,7 +77,6 @@ private:
// Abstraction of a reference to bit at position Pos from a register Reg.
struct BitTracker::BitRef {
BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
- BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {}
bool operator== (const BitRef &BR) const {
// If Reg is 0, disregard Pos.
return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
@@ -146,7 +143,6 @@ struct BitTracker::BitValue {
BitValue(ValueType T = Top) : Type(T) {}
BitValue(bool B) : Type(B ? One : Zero) {}
- BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {}
BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
bool operator== (const BitValue &V) const {
@@ -279,11 +275,6 @@ struct BitTracker::RegisterCell {
return !operator==(RC);
}
- const RegisterCell &operator=(const RegisterCell &RC) {
- Bits = RC.Bits;
- return *this;
- }
-
// Generate a "ref" cell for the corresponding register. In the resulting
// cell each bit will be described as being the same as the corresponding
// bit in register Reg (i.e. the cell is "defined" by register Reg).
@@ -344,11 +335,6 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
return RC;
}
-
-inline bool BitTracker::CellMapType::has(unsigned Reg) const {
- return find(Reg) != end();
-}
-
// A class to evaluate target's instructions and update the cell maps.
// This is used internally by the bit tracker. A target that wants to
// utilize this should implement the evaluation functions (noted below)
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 9cc1e94..4a9c341 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -7,42 +7,45 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagon-disassembler"
+
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
-
-#include "llvm/MC/MCContext.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/raw_ostream.h"
-#include <array>
+#include "llvm/Support/TargetRegistry.h"
#include <vector>
using namespace llvm;
using namespace Hexagon;
-#define DEBUG_TYPE "hexagon-disassembler"
-
-// Pull DecodeStatus and its enum values into the global namespace.
-typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
/// \brief Hexagon disassembler for all Hexagon platforms.
class HexagonDisassembler : public MCDisassembler {
public:
+ std::unique_ptr<MCInstrInfo const> const MCII;
std::unique_ptr<MCInst *> CurrentBundle;
- HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx)
- : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {}
+ HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {}
DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -52,23 +55,57 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
+
+ void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
+ void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
};
}
-static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+// Forward declare these because the auto-generated code will reference them.
+// Definitions are further down.
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- void const *Decoder);
+ const void *Decoder);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+ void const *Decoder);
static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
raw_ostream &os);
-static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst);
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t Address, const void *Decoder);
static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
@@ -95,129 +132,19 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-
-static const uint16_t IntRegDecoderTable[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
- Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
- Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
- Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
- Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
- Hexagon::R30, Hexagon::R31};
-
-static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
- Hexagon::P2, Hexagon::P3};
-
-static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
- const uint16_t Table[], size_t Size) {
- if (RegNo < Size) {
- Inst.addOperand(MCOperand::createReg(Table[RegNo]));
- return MCDisassembler::Success;
- } else
- return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
-
- unsigned Register = IntRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
- static const uint16_t CtrlRegDecoderTable[] = {
- Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
- Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7,
- Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
- Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH};
-
- if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0]))
- return MCDisassembler::Fail;
-
- if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
- return MCDisassembler::Fail;
-
- unsigned Register = CtrlRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
- static const uint16_t CtrlReg64DecoderTable[] = {
- Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2,
- Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister,
- Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8,
- Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister,
- Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC,
- Hexagon::NoRegister};
-
- if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0]))
- return MCDisassembler::Fail;
-
- if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
- return MCDisassembler::Fail;
-
- unsigned Register = CtrlReg64DecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
- unsigned Register = 0;
- switch (RegNo) {
- case 0:
- Register = Hexagon::M0;
- break;
- case 1:
- Register = Hexagon::M1;
- break;
- default:
- return MCDisassembler::Fail;
- }
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
- static const uint16_t DoubleRegDecoderTable[] = {
- Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
- Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
- Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
- Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
-
- return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable,
- sizeof(DoubleRegDecoderTable)));
-}
-
-static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
- if (RegNo > 3)
- return MCDisassembler::Fail;
-
- unsigned Register = PredRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
#include "HexagonGenDisassemblerTables.inc"
-static MCDisassembler *createHexagonDisassembler(Target const &T,
- MCSubtargetInfo const &STI,
+static MCDisassembler *createHexagonDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new HexagonDisassembler(STI, Ctx);
+ return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo());
}
extern "C" void LLVMInitializeHexagonDisassembler() {
@@ -235,8 +162,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Size = 0;
*CurrentBundle = &MI;
- MI.setOpcode(Hexagon::BUNDLE);
- MI.addOperand(MCOperand::createImm(0));
+ MI = HexagonMCInstrInfo::createBundle();
while (Result == Success && Complete == false) {
if (Bytes.size() < HEXAGON_INSTR_SIZE)
return MCDisassembler::Fail;
@@ -246,7 +172,21 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Size += HEXAGON_INSTR_SIZE;
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
}
- return Result;
+ if(Result == MCDisassembler::Fail)
+ return Result;
+ HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+ if(!Checker.check())
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+namespace {
+HexagonDisassembler const &disassembler(void const *Decoder) {
+ return *static_cast<HexagonDisassembler const *>(Decoder);
+}
+MCContext &contextFromDecoder(void const *Decoder) {
+ return disassembler(Decoder).getContext();
+}
}
DecodeStatus HexagonDisassembler::getSingleInstruction(
@@ -255,8 +195,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
uint32_t Instruction =
- llvm::support::endian::read<uint32_t, llvm::support::little,
- llvm::support::unaligned>(Bytes.data());
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
@@ -360,8 +299,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
MILow->setOpcode(opLow);
MCInst *MIHigh = new (getContext()) MCInst;
MIHigh->setOpcode(opHigh);
- AddSubinstOperands(MILow, opLow, instLow);
- AddSubinstOperands(MIHigh, opHigh, instHigh);
+ addSubinstOperands(MILow, opLow, instLow);
+ addSubinstOperands(MIHigh, opHigh, instHigh);
// see ConvertToSubInst() in
// lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -378,102 +317,774 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
// Calling the auto-generated decoder function.
Result =
decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
+
+ // If a, "standard" insn isn't found check special cases.
+ if (MCDisassembler::Success != Result ||
+ MI.getOpcode() == Hexagon::A4_ext) {
+ Result = decodeImmext(MI, Instruction, this);
+ if (MCDisassembler::Success != Result) {
+ Result = decodeSpecial(MI, Instruction);
+ }
+ } else {
+ // If the instruction is a compound instruction, register values will
+ // follow the duplex model, so the register values in the MCInst are
+ // incorrect. If the instruction is a compound, loop through the
+ // operands and change registers appropriately.
+ if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) ==
+ HexagonII::TypeCOMPOUND) {
+ for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
+ if (i->isReg()) {
+ unsigned reg = i->getReg() - Hexagon::R0;
+ i->setReg(getRegFromSubinstEncoding(reg));
+ }
+ }
+ }
+ }
+ }
+
+ if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) {
+ unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI);
+ MCOperand &MCO = MI.getOperand(OpIndex);
+ assert(MCO.isReg() && "New value consumers must be registers");
+ unsigned Register =
+ getContext().getRegisterInfo()->getEncodingValue(MCO.getReg());
+ if ((Register & 0x6) == 0)
+ // HexagonPRM 10.11 Bit 1-2 == 0 is reserved
+ return MCDisassembler::Fail;
+ unsigned Lookback = (Register & 0x6) >> 1;
+ unsigned Offset = 1;
+ bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.end() - 1;
+ for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
+ if (i == n)
+ // Couldn't find producer
+ return MCDisassembler::Fail;
+ if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst()))
+ // Skip scalars when calculating distances for vectors
+ ++Lookback;
+ if (HexagonMCInstrInfo::isImmext(*i->getInst()))
+ ++Lookback;
+ if (Offset == Lookback)
+ break;
+ }
+ auto const &Inst = *i->getInst();
+ bool SubregBit = (Register & 0x1) != 0;
+ if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) {
+ // If subreg bit is set we're selecting the second produced newvalue
+ unsigned Producer =
+ HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg();
+ assert(Producer != Hexagon::NoRegister);
+ MCO.setReg(Producer);
+ } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
+ unsigned Producer =
+ HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
+ if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
+ Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
+ else if (SubregBit)
+ // Subreg bit should not be set for non-doublevector newvalue producers
+ return MCDisassembler::Fail;
+ assert(Producer != Hexagon::NoRegister);
+ MCO.setReg(Producer);
+ } else
+ return MCDisassembler::Fail;
}
+ adjustExtendedInstructions(MI, MCB);
+ MCInst const *Extender =
+ HexagonMCInstrInfo::extenderForIndex(MCB,
+ HexagonMCInstrInfo::bundleSize(MCB));
+ if(Extender != nullptr) {
+ MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
+ *MI.getOperand(1).getInst() : MI;
+ if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
+ !HexagonMCInstrInfo::isExtended(*MCII, Inst))
+ return MCDisassembler::Fail;
+ }
return Result;
}
+void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
+ MCInst const &MCB) const {
+ if (!HexagonMCInstrInfo::hasExtenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
+ unsigned opcode;
+ // This code is used by the disassembler to disambiguate between GP
+ // relative and absolute addressing instructions since they both have
+ // same encoding bits. However, an absolute addressing instruction must
+ // follow an immediate extender. Disassembler alwaus select absolute
+ // addressing instructions first and uses this code to change them into
+ // GP relative instruction in the absence of the corresponding immediate
+ // extender.
+ switch (MCI.getOpcode()) {
+ case Hexagon::S2_storerbabs:
+ opcode = Hexagon::S2_storerbgp;
+ break;
+ case Hexagon::S2_storerhabs:
+ opcode = Hexagon::S2_storerhgp;
+ break;
+ case Hexagon::S2_storerfabs:
+ opcode = Hexagon::S2_storerfgp;
+ break;
+ case Hexagon::S2_storeriabs:
+ opcode = Hexagon::S2_storerigp;
+ break;
+ case Hexagon::S2_storerbnewabs:
+ opcode = Hexagon::S2_storerbnewgp;
+ break;
+ case Hexagon::S2_storerhnewabs:
+ opcode = Hexagon::S2_storerhnewgp;
+ break;
+ case Hexagon::S2_storerinewabs:
+ opcode = Hexagon::S2_storerinewgp;
+ break;
+ case Hexagon::S2_storerdabs:
+ opcode = Hexagon::S2_storerdgp;
+ break;
+ case Hexagon::L4_loadrb_abs:
+ opcode = Hexagon::L2_loadrbgp;
+ break;
+ case Hexagon::L4_loadrub_abs:
+ opcode = Hexagon::L2_loadrubgp;
+ break;
+ case Hexagon::L4_loadrh_abs:
+ opcode = Hexagon::L2_loadrhgp;
+ break;
+ case Hexagon::L4_loadruh_abs:
+ opcode = Hexagon::L2_loadruhgp;
+ break;
+ case Hexagon::L4_loadri_abs:
+ opcode = Hexagon::L2_loadrigp;
+ break;
+ case Hexagon::L4_loadrd_abs:
+ opcode = Hexagon::L2_loadrdgp;
+ break;
+ default:
+ opcode = MCI.getOpcode();
+ }
+ MCI.setOpcode(opcode);
+ }
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
+ ArrayRef<MCPhysReg> Table) {
+ if (RegNo < Table.size()) {
+ Inst.addOperand(MCOperand::createReg(Table[RegNo]));
+ return MCDisassembler::Success;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ static const MCPhysReg IntRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+ Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+ Hexagon::R30, Hexagon::R31};
+
+ return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
+}
+
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecRegDecoderTable[] = {
+ Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
+ Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
+ Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
+ Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19,
+ Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24,
+ Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29,
+ Hexagon::V30, Hexagon::V31};
+
+ return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable);
+}
+
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg DoubleRegDecoderTable[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
+ Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
+
+ return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecDblRegDecoderTable[] = {
+ Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3,
+ Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7,
+ Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11,
+ Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+
+ return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable));
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
+ Hexagon::P2, Hexagon::P3};
+
+ return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1,
+ Hexagon::Q2, Hexagon::Q3};
+
+ return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable);
+}
+
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg CtrlRegDecoderTable[] = {
+ Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+ Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
+ Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+ Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+ };
+
+ if (RegNo >= array_lengthof(CtrlRegDecoderTable))
+ return MCDisassembler::Fail;
+
+ if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg CtrlReg64DecoderTable[] = {
+ Hexagon::C1_0, Hexagon::NoRegister,
+ Hexagon::C3_2, Hexagon::NoRegister,
+ Hexagon::C7_6, Hexagon::NoRegister,
+ Hexagon::C9_8, Hexagon::NoRegister,
+ Hexagon::C11_10, Hexagon::NoRegister,
+ Hexagon::CS, Hexagon::NoRegister,
+ Hexagon::UPC, Hexagon::NoRegister
+ };
+
+ if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
+ return MCDisassembler::Fail;
+
+ if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = Hexagon::M0;
+ break;
+ case 1:
+ Register = Hexagon::M1;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+namespace {
+uint32_t fullValue(MCInstrInfo const &MCII,
+ MCInst &MCB,
+ MCInst &MI,
+ int64_t Value) {
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+ if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+ return Value;
+ unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+ int64_t Bits;
+ bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+ assert(Success);(void)Success;
+ uint32_t Upper26 = static_cast<uint32_t>(Bits);
+ uint32_t Operand = Upper26 | Lower6;
+ return Operand;
+}
+template <size_t T>
+void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, SignExtend64<T>(tmp));
+ int64_t Extended = SignExtend64<32>(FullValue);
+ HexagonMCInstrInfo::addConstant(MI, Extended,
+ Disassembler.getContext());
+}
+}
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, tmp);
+ assert(FullValue >= 0 && "Negative in unsigned decoder");
+ HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
+ return MCDisassembler::Success;
+}
+
static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<16>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<16>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<12>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<12>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<11>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<11>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<12>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
return MCDisassembler::Success;
}
static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<13>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<13>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<14>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<14>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<10>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<10>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
const void *Decoder) {
- uint64_t imm = SignExtend64<8>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<8>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<6>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<4>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<4>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<5>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<5>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<6>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<7>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<7>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<10>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<19>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+// custom decoder for various jump/call immediates
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+ // r13_2 is not extendable, so if there are no extent bits, it's r13_2
+ if (Bits == 0)
+ Bits = 15;
+ uint32_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, SignExtend64(tmp, Bits));
+ int64_t Extended = SignExtend64<32>(FullValue) + Address;
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
+ 0, 4))
+ HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+ return MCDisassembler::Success;
+}
+
+// Addressing mode dependent load store opcode map.
+// - If an insn is preceded by an extender the address is absolute.
+// - memw(##symbol) = r0
+// - If an insn is not preceded by an extender the address is GP relative.
+// - memw(gp + #symbol) = r0
+// Please note that the instructions must be ordered in the descending order
+// of their opcode.
+// HexagonII::INST_ICLASS_ST
+static const unsigned int StoreConditionalOpcodeData[][2] = {
+ {S4_pstorerdfnew_abs, 0xafc02084},
+ {S4_pstorerdtnew_abs, 0xafc02080},
+ {S4_pstorerdf_abs, 0xafc00084},
+ {S4_pstorerdt_abs, 0xafc00080},
+ {S4_pstorerinewfnew_abs, 0xafa03084},
+ {S4_pstorerinewtnew_abs, 0xafa03080},
+ {S4_pstorerhnewfnew_abs, 0xafa02884},
+ {S4_pstorerhnewtnew_abs, 0xafa02880},
+ {S4_pstorerbnewfnew_abs, 0xafa02084},
+ {S4_pstorerbnewtnew_abs, 0xafa02080},
+ {S4_pstorerinewf_abs, 0xafa01084},
+ {S4_pstorerinewt_abs, 0xafa01080},
+ {S4_pstorerhnewf_abs, 0xafa00884},
+ {S4_pstorerhnewt_abs, 0xafa00880},
+ {S4_pstorerbnewf_abs, 0xafa00084},
+ {S4_pstorerbnewt_abs, 0xafa00080},
+ {S4_pstorerifnew_abs, 0xaf802084},
+ {S4_pstoreritnew_abs, 0xaf802080},
+ {S4_pstorerif_abs, 0xaf800084},
+ {S4_pstorerit_abs, 0xaf800080},
+ {S4_pstorerhfnew_abs, 0xaf402084},
+ {S4_pstorerhtnew_abs, 0xaf402080},
+ {S4_pstorerhf_abs, 0xaf400084},
+ {S4_pstorerht_abs, 0xaf400080},
+ {S4_pstorerbfnew_abs, 0xaf002084},
+ {S4_pstorerbtnew_abs, 0xaf002080},
+ {S4_pstorerbf_abs, 0xaf000084},
+ {S4_pstorerbt_abs, 0xaf000080}};
+// HexagonII::INST_ICLASS_LD
+
+// HexagonII::INST_ICLASS_LD_ST_2
+static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000},
+ {L4_loadri_abs, 0x49800000},
+ {L4_loadruh_abs, 0x49600000},
+ {L4_loadrh_abs, 0x49400000},
+ {L4_loadrub_abs, 0x49200000},
+ {L4_loadrb_abs, 0x49000000},
+ {S2_storerdabs, 0x48c00000},
+ {S2_storerinewabs, 0x48a01000},
+ {S2_storerhnewabs, 0x48a00800},
+ {S2_storerbnewabs, 0x48a00000},
+ {S2_storeriabs, 0x48800000},
+ {S2_storerfabs, 0x48600000},
+ {S2_storerhabs, 0x48400000},
+ {S2_storerbabs, 0x48000000}};
+static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
+static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
+
+ unsigned MachineOpcode = 0;
+ unsigned LLVMOpcode = 0;
+
+ if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
+ for (size_t i = 0; i < NumCondS; ++i) {
+ if ((insn & StoreConditionalOpcodeData[i][1]) ==
+ StoreConditionalOpcodeData[i][1]) {
+ MachineOpcode = StoreConditionalOpcodeData[i][1];
+ LLVMOpcode = StoreConditionalOpcodeData[i][0];
+ break;
+ }
+ }
+ }
+ if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
+ for (size_t i = 0; i < NumLS; ++i) {
+ if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
+ MachineOpcode = LoadStoreOpcodeData[i][1];
+ LLVMOpcode = LoadStoreOpcodeData[i][0];
+ break;
+ }
+ }
+ }
+
+ if (MachineOpcode) {
+ unsigned Value = 0;
+ unsigned shift = 0;
+ MI.setOpcode(LLVMOpcode);
+ // Remove the parse bits from the insn.
+ insn &= ~HexagonII::INST_PARSE_MASK;
+
+ switch (LLVMOpcode) {
+ default:
+ return MCDisassembler::Fail;
+ break;
+
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdfnew_abs:
+ case Hexagon::S4_pstorerdtnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Rtt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::S4_pstorerbnewf_abs:
+ case Hexagon::S4_pstorerbnewt_abs:
+ case Hexagon::S4_pstorerbnewfnew_abs:
+ case Hexagon::S4_pstorerbnewtnew_abs:
+ case Hexagon::S4_pstorerhnewf_abs:
+ case Hexagon::S4_pstorerhnewt_abs:
+ case Hexagon::S4_pstorerhnewfnew_abs:
+ case Hexagon::S4_pstorerhnewtnew_abs:
+ case Hexagon::S4_pstorerinewf_abs:
+ case Hexagon::S4_pstorerinewt_abs:
+ case Hexagon::S4_pstorerinewfnew_abs:
+ case Hexagon::S4_pstorerinewtnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Nt
+ Value = (insn >> 8) & UINT64_C(7);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbfnew_abs:
+ case Hexagon::S4_pstorerbtnew_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhfnew_abs:
+ case Hexagon::S4_pstorerhtnew_abs:
+ case Hexagon::S4_pstorerif_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerifnew_abs:
+ case Hexagon::S4_pstoreritnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Rt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::L4_ploadrdf_abs:
+ case Hexagon::L4_ploadrdt_abs:
+ case Hexagon::L4_ploadrdfnew_abs:
+ case Hexagon::L4_ploadrdtnew_abs: {
+ // op: Rdd
+ Value = insn & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ // op: Pt
+ Value = ((insn >> 9) & UINT64_C(3));
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = ((insn >> 15) & UINT64_C(62));
+ Value |= ((insn >> 8) & UINT64_C(1));
+ MI.addOperand(MCOperand::createImm(Value));
+ break;
+ }
+
+ case Hexagon::L4_ploadrbf_abs:
+ case Hexagon::L4_ploadrbt_abs:
+ case Hexagon::L4_ploadrbfnew_abs:
+ case Hexagon::L4_ploadrbtnew_abs:
+ case Hexagon::L4_ploadrhf_abs:
+ case Hexagon::L4_ploadrht_abs:
+ case Hexagon::L4_ploadrhfnew_abs:
+ case Hexagon::L4_ploadrhtnew_abs:
+ case Hexagon::L4_ploadrubf_abs:
+ case Hexagon::L4_ploadrubt_abs:
+ case Hexagon::L4_ploadrubfnew_abs:
+ case Hexagon::L4_ploadrubtnew_abs:
+ case Hexagon::L4_ploadruhf_abs:
+ case Hexagon::L4_ploadruht_abs:
+ case Hexagon::L4_ploadruhfnew_abs:
+ case Hexagon::L4_ploadruhtnew_abs:
+ case Hexagon::L4_ploadrif_abs:
+ case Hexagon::L4_ploadrit_abs:
+ case Hexagon::L4_ploadrifnew_abs:
+ case Hexagon::L4_ploadritnew_abs:
+ // op: Rd
+ Value = insn & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ // op: Pt
+ Value = (insn >> 9) & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 15) & UINT64_C(62);
+ Value |= (insn >> 8) & UINT64_C(1);
+ MI.addOperand(MCOperand::createImm(Value));
+ break;
+
+ // op: g16_2
+ case (Hexagon::L4_loadri_abs):
+ ++shift;
+ // op: g16_1
+ case Hexagon::L4_loadrh_abs:
+ case Hexagon::L4_loadruh_abs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::L4_loadrb_abs:
+ case Hexagon::L4_loadrub_abs: {
+ // op: Rd
+ Value |= insn & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(511);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ break;
+ }
+
+ case Hexagon::L4_loadrd_abs: {
+ Value = insn & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(511);
+ MI.addOperand(MCOperand::createImm(Value << 3));
+ break;
+ }
+
+ case Hexagon::S2_storerdabs: {
+ // op: g16_3
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << 3));
+ // op: Rtt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ // op: g16_2
+ case Hexagon::S2_storerinewabs:
+ ++shift;
+ // op: g16_1
+ case Hexagon::S2_storerhnewabs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::S2_storerbnewabs: {
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ // op: Nt
+ Value = (insn >> 8) & UINT64_C(7);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ // op: g16_2
+ case Hexagon::S2_storeriabs:
+ ++shift;
+ // op: g16_1
+ case Hexagon::S2_storerhabs:
+ case Hexagon::S2_storerfabs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::S2_storerbabs: {
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ // op: Rt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+ }
+ return MCDisassembler::Success;
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+ void const *Decoder) {
+
+ // Instruction Class for a constant a extender: bits 31:28 = 0x0000
+ if ((~insn & 0xf0000000) == 0xf0000000) {
+ unsigned Value;
+ // 27:16 High 12 bits of 26-bit extender.
+ Value = (insn & 0x0fff0000) << 4;
+ // 13:0 Low 14 bits of 26-bit extender.
+ Value |= ((insn & 0x3fff) << 6);
+ MI.setOpcode(Hexagon::A4_ext);
+ HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
+ return MCDisassembler::Success;
+ }
+ return MCDisassembler::Fail;
+}
+
// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
enum subInstBinaryValues {
V4_SA1_addi_BITS = 0x0000,
@@ -731,6 +1342,8 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
return Hexagon::R0 + encoded_reg;
else if (encoded_reg < 16)
return Hexagon::R0 + encoded_reg + 8;
+
+ // patently false value
return Hexagon::NoRegister;
}
@@ -739,10 +1352,13 @@ static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
return Hexagon::D0 + encoded_dreg;
else if (encoded_dreg < 8)
return Hexagon::D0 + encoded_dreg + 4;
+
+ // patently false value
return Hexagon::NoRegister;
}
-static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
+void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
+ unsigned inst) const {
int64_t operand;
MCOperand Op;
switch (opcode) {
@@ -762,8 +1378,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
case Hexagon::V4_SS2_allocframe:
// u 8-4{5_3}
operand = ((inst & 0x1f0) >> 4) << 3;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL1_loadri_io:
// Rd 3-0, Rs 7-4, u 11-8{4_2}
@@ -774,8 +1389,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf00) >> 6;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL1_loadrub_io:
// Rd 3-0, Rs 7-4, u 11-8
@@ -786,8 +1400,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf00) >> 8;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadrb_io:
// Rd 3-0, Rs 7-4, u 10-8
@@ -798,8 +1411,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0x700) >> 8;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadrh_io:
case Hexagon::V4_SL2_loadruh_io:
@@ -811,8 +1423,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x700) >> 8) << 1;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadrd_sp:
// Rdd 2-0, u 7-3{5_3}
@@ -820,8 +1431,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x0f8) >> 3) << 3;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadri_sp:
// Rd 3-0, u 8-4{5_2}
@@ -829,8 +1439,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x1f0) >> 4) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_addi:
// Rx 3-0 (x2), s7 10-4
@@ -839,8 +1448,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
MI->addOperand(Op);
MI->addOperand(Op);
operand = SignExtend64<7>((inst & 0x7f0) >> 4);
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_addrx:
// Rx 3-0 (x2), Rs 7-4
@@ -873,8 +1481,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x3f0) >> 4) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_seti:
// Rd 3-0, u 9-4
@@ -882,8 +1489,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0x3f0) >> 4;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_clrf:
case Hexagon::V4_SA1_clrfnew:
@@ -901,8 +1507,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = inst & 0x3;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_combine0i:
case Hexagon::V4_SA1_combine1i:
@@ -913,8 +1518,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0x060) >> 5;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_combinerz:
case Hexagon::V4_SA1_combinezr:
@@ -932,8 +1536,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf00) >> 8;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
@@ -944,8 +1547,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0xf00) >> 8) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
@@ -957,8 +1559,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = inst & 0xf;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SS2_storewi0:
case Hexagon::V4_SS2_storewi1:
@@ -967,25 +1568,23 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SS2_stored_sp:
// s 8-3{6_3}, Rtt 2-0
operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getDRegFromSubinstEncoding(inst & 0x7);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
+ break;
case Hexagon::V4_SS2_storeh_io:
// Rs 7-4, u 10-8{3_1}, Rt 3-0
operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x700) >> 8) << 1;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
@@ -993,8 +1592,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
case Hexagon::V4_SS2_storew_sp:
// u 8-4{5_2}, Rd 3-0
operand = ((inst & 0x1f0) >> 4) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index d360be2..ed7d957 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -47,15 +47,8 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class MachineInstr;
- class MCInst;
- class MCInstrInfo;
- class HexagonAsmPrinter;
class HexagonTargetMachine;
- void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
- HexagonAsmPrinter &AP);
-
/// \brief Creates a Hexagon-specific Target Transformation Info pass.
ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
} // end namespace llvm;
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 53a687c..1189cfd 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -24,14 +24,32 @@ include "llvm/Target/Target.td"
// Hexagon Architectures
def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps",
+ "true", "Hexagon HVX instructions">;
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps",
+ "true", "Hexagon HVX Double instructions">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV5T : Predicate<"HST->hasV5TOps()">;
-def NoV5T : Predicate<"!HST->hasV5TOps()">;
-def UseMEMOP : Predicate<"HST->useMemOps()">;
-def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
+def HasV5T : Predicate<"HST->hasV5TOps()">;
+def NoV5T : Predicate<"!HST->hasV5TOps()">;
+def HasV55T : Predicate<"HST->hasV55TOps()">,
+ AssemblerPredicate<"ArchV55">;
+def HasV60T : Predicate<"HST->hasV60TOps()">,
+ AssemblerPredicate<"ArchV60">;
+def UseMEMOP : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
+def UseHVXDbl : Predicate<"HST->useHVXDblOps()">,
+ AssemblerPredicate<"ExtensionHVXDbl">;
+def UseHVXSgl : Predicate<"HST->useHVXSglOps()">;
+
+def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
+ AssemblerPredicate<"ExtensionHVX">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -53,6 +71,7 @@ class NewValueRel: PredNewRel;
// NewValueRel - Filter class used to relate load/store instructions having
// different addressing modes with each other.
class AddrModeRel: NewValueRel;
+class IntrinsicsRel;
//===----------------------------------------------------------------------===//
// Generate mapping table to relate non-predicate instructions with their
@@ -62,7 +81,7 @@ class AddrModeRel: NewValueRel;
def getPredOpcode : InstrMapping {
let FilterClass = "PredRel";
// Instructions with the same BaseOpcode and isNVStore values form a row.
- let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
// Instructions with the same predicate sense form a column.
let ColFields = ["PredSense"];
// The key column is the unpredicated instructions.
@@ -77,7 +96,7 @@ def getPredOpcode : InstrMapping {
//
def getFalsePredOpcode : InstrMapping {
let FilterClass = "PredRel";
- let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
let ColFields = ["PredSense"];
let KeyCol = ["true"];
let ValueCols = [["false"]];
@@ -89,7 +108,7 @@ def getFalsePredOpcode : InstrMapping {
//
def getTruePredOpcode : InstrMapping {
let FilterClass = "PredRel";
- let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
let ColFields = ["PredSense"];
let KeyCol = ["false"];
let ValueCols = [["true"]];
@@ -125,7 +144,7 @@ def getPredOldOpcode : InstrMapping {
//
def getNewValueOpcode : InstrMapping {
let FilterClass = "NewValueRel";
- let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
let ColFields = ["NValueST"];
let KeyCol = ["false"];
let ValueCols = [["true"]];
@@ -137,16 +156,16 @@ def getNewValueOpcode : InstrMapping {
//
def getNonNVStore : InstrMapping {
let FilterClass = "NewValueRel";
- let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
let ColFields = ["NValueST"];
let KeyCol = ["true"];
let ValueCols = [["false"]];
}
-def getBasedWithImmOffset : InstrMapping {
+def getBaseWithImmOffset : InstrMapping {
let FilterClass = "AddrModeRel";
let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
- "isMEMri", "isFloat"];
+ "isFloat"];
let ColFields = ["addrMode"];
let KeyCol = ["Absolute"];
let ValueCols = [["BaseImmOffset"]];
@@ -168,6 +187,37 @@ def getRegForm : InstrMapping {
let ValueCols = [["reg"]];
}
+def getRegShlForm : InstrMapping {
+ let FilterClass = "ImmRegShl";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
+def notTakenBranchPrediction : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"];
+ let ColFields = ["isBrTaken"];
+ let KeyCol = ["true"];
+ let ValueCols = [["false"]];
+}
+
+def takenBranchPrediction : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"];
+ let ColFields = ["isBrTaken"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+def getRealHWInstr : InstrMapping {
+ let FilterClass = "IntrinsicsRel";
+ let RowFields = ["BaseOpcode"];
+ let ColFields = ["InstrType"];
+ let KeyCol = ["Pseudo"];
+ let ValueCols = [["Pseudo"], ["Real"]];
+}
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -192,12 +242,22 @@ def : Proc<"hexagonv4", HexagonModelV4,
[ArchV4]>;
def : Proc<"hexagonv5", HexagonModelV4,
[ArchV4, ArchV5]>;
+def : Proc<"hexagonv55", HexagonModelV55,
+ [ArchV4, ArchV5, ArchV55]>;
+def : Proc<"hexagonv60", HexagonModelV60,
+ [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def HexagonAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string TokenizingCharacters = "#()=:.<>!+*";
+}
+
def Hexagon : Target {
// Pull in Instruction Info:
let InstructionSet = HexagonInstrInfo;
+ let AssemblyParserVariants = [HexagonAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 05728d2..e213089 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -40,11 +40,13 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
@@ -56,12 +58,27 @@
using namespace llvm;
+namespace llvm {
+ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
#define DEBUG_TYPE "asm-printer"
static cl::opt<bool> AlignCalls(
"hexagon-align-calls", cl::Hidden, cl::init(true),
cl::desc("Insert falign after call instruction for Hexagon target"));
+// Given a scalar register return its pair.
+inline static unsigned getHexagonRegisterPair(unsigned Reg,
+ const MCRegisterInfo *RI) {
+ assert(Hexagon::IntRegsRegClass.contains(Reg));
+ MCSuperRegIterator SR(Reg, RI, false);
+ unsigned Pair = *SR;
+ assert(Hexagon::DoubleRegsRegClass.contains(Pair));
+ return Pair;
+}
+
HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
@@ -102,9 +119,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
//
bool HexagonAsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
- if (MBB->hasAddressTaken()) {
+ if (MBB->hasAddressTaken())
return false;
- }
return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
}
@@ -117,7 +133,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &OS) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
switch (ExtraCode[0]) {
default:
@@ -173,45 +190,407 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+ MCStreamer &OutStreamer,
+ const MCOperand &Imm, int AlignSize) {
+ MCSymbol *Sym;
+ int64_t Value;
+ if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
+ StringRef sectionPrefix;
+ std::string ImmString;
+ StringRef Name;
+ if (AlignSize == 8) {
+ Name = ".CONST_0000000000000000";
+ sectionPrefix = ".gnu.linkonce.l8";
+ ImmString = utohexstr(Value);
+ } else {
+ Name = ".CONST_00000000";
+ sectionPrefix = ".gnu.linkonce.l4";
+ ImmString = utohexstr(static_cast<uint32_t>(Value));
+ }
+
+ std::string symbolName = // Yes, leading zeros are kept.
+ Name.drop_back(ImmString.size()).str() + ImmString;
+ std::string sectionName = sectionPrefix.str() + symbolName;
+
+ MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+ sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer.SwitchSection(Section);
+
+ Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
+ if (Sym->isUndefined()) {
+ OutStreamer.EmitLabel(Sym);
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ OutStreamer.EmitIntValue(Value, AlignSize);
+ OutStreamer.EmitCodeAlignment(AlignSize);
+ }
+ } else {
+ assert(Imm.isExpr() && "Expected expression and found none");
+ const MachineOperand &MO = MI.getOperand(1);
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = AP.getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = AP.GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = AP.GetJTISymbol(MO.getIndex());
+ else
+ llvm_unreachable("Unknown operand type!");
+
+ StringRef SymbolName = MOSymbol->getName();
+ std::string LitaName = ".CONST_" + SymbolName.str();
+
+ MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+ ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+
+ OutStreamer.SwitchSection(Section);
+ Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
+ if (Sym->isUndefined()) {
+ OutStreamer.EmitLabel(Sym);
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local);
+ OutStreamer.EmitValue(Imm.getExpr(), AlignSize);
+ OutStreamer.EmitCodeAlignment(AlignSize);
+ }
+ }
+ return Sym;
+}
+
+void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
+ const MachineInstr &MI) {
+ MCInst &MappedInst = static_cast <MCInst &>(Inst);
+ const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+
+ switch (Inst.getOpcode()) {
+ default: return;
+
+ // "$dst = CONST64(#$src1)",
+ case Hexagon::CONST64_Float_Real:
+ case Hexagon::CONST64_Int_Real:
+ if (!OutStreamer->hasRawTextSupport()) {
+ const MCOperand &Imm = MappedInst.getOperand(1);
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+
+ MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+
+ OutStreamer->SwitchSection(Current.first, Current.second);
+ MCInst TmpInst;
+ MCOperand &Reg = MappedInst.getOperand(0);
+ TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+ TmpInst.addOperand(Reg);
+ TmpInst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Sym, OutContext)));
+ MappedInst = TmpInst;
+
+ }
+ break;
+ case Hexagon::CONST32:
+ case Hexagon::CONST32_Float_Real:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::FCONST32_nsdata:
+ if (!OutStreamer->hasRawTextSupport()) {
+ MCOperand &Imm = MappedInst.getOperand(1);
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+ OutStreamer->SwitchSection(Current.first, Current.second);
+ MCInst TmpInst;
+ MCOperand &Reg = MappedInst.getOperand(0);
+ TmpInst.setOpcode(Hexagon::L2_loadrigp);
+ TmpInst.addOperand(Reg);
+ TmpInst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Sym, OutContext)));
+ MappedInst = TmpInst;
+ }
+ break;
+
+ // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use
+ // C2_or during instruction selection itself but it results
+ // into suboptimal code.
+ case Hexagon::C2_pxfer_map: {
+ MCOperand &Ps = Inst.getOperand(1);
+ MappedInst.setOpcode(Hexagon::C2_or);
+ MappedInst.addOperand(Ps);
+ return;
+ }
+
+ // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo
+ // The insn is mapped from the 4 operand to the 3 operand raw form taking
+ // 3 register pairs.
+ case Hexagon::M2_vrcmpys_acc_s1: {
+ MCOperand &Rt = Inst.getOperand(3);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+ case Hexagon::M2_vrcmpys_s1: {
+ MCOperand &Rt = Inst.getOperand(2);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+
+ case Hexagon::M2_vrcmpys_s1rp: {
+ MCOperand &Rt = Inst.getOperand(2);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+
+ case Hexagon::A4_boundscheck: {
+ MCOperand &Rs = Inst.getOperand(1);
+ assert (Rs.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rs.getReg());
+ if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+ MappedInst.setOpcode(Hexagon::A4_boundscheck_hi);
+ else // raw:lo
+ MappedInst.setOpcode(Hexagon::A4_boundscheck_lo);
+ Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
+ return;
+ }
+ case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ int64_t Imm;
+ MCExpr const *Expr = MO.getExpr();
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::S2_vsathub);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ MappedInst = TmpInst;
+ return;
+ }
+ TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ case Hexagon::S5_vasrhrnd_goodsyntax:
+ case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+ MCOperand &MO2 = MappedInst.getOperand(2);
+ MCExpr const *Expr = MO2.getExpr();
+ int64_t Imm;
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ MCOperand &MO1 = MappedInst.getOperand(1);
+ unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg);
+ // Add a new operand for the second register in the pair.
+ TmpInst.addOperand(MCOperand::createReg(High));
+ TmpInst.addOperand(MCOperand::createReg(Low));
+ MappedInst = TmpInst;
+ return;
+ }
+
+ if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax)
+ TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+ else
+ TmpInst.setOpcode(Hexagon::S5_vasrhrnd);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd
+ case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+ MCOperand &MO = Inst.getOperand(2);
+ MCExpr const *Expr = MO.getExpr();
+ int64_t Imm;
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::A2_tfr);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ MappedInst = TmpInst;
+ return;
+ }
+ TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ case Hexagon::TFRI_f:
+ MappedInst.setOpcode(Hexagon::A2_tfrsi);
+ return;
+ case Hexagon::TFRI_cPt_f:
+ MappedInst.setOpcode(Hexagon::C2_cmoveit);
+ return;
+ case Hexagon::TFRI_cNotPt_f:
+ MappedInst.setOpcode(Hexagon::C2_cmoveif);
+ return;
+ case Hexagon::MUX_ri_f:
+ MappedInst.setOpcode(Hexagon::C2_muxri);
+ return;
+ case Hexagon::MUX_ir_f:
+ MappedInst.setOpcode(Hexagon::C2_muxir);
+ return;
+
+ // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)"
+ case Hexagon::A2_tfrpi: {
+ MCInst TmpInst;
+ MCOperand &Rdd = MappedInst.getOperand(0);
+ MCOperand &MO = MappedInst.getOperand(1);
+
+ TmpInst.setOpcode(Hexagon::A2_combineii);
+ TmpInst.addOperand(Rdd);
+ int64_t Imm;
+ bool Success = MO.getExpr()->evaluateAsAbsolute(Imm);
+ if (Success && Imm < 0) {
+ const MCExpr *MOne = MCConstantExpr::create(-1, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(MOne));
+ } else {
+ const MCExpr *Zero = MCConstantExpr::create(0, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Zero));
+ }
+ TmpInst.addOperand(MO);
+ MappedInst = TmpInst;
+ return;
+ }
+ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+ case Hexagon::A2_tfrp: {
+ MCOperand &MO = MappedInst.getOperand(1);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode(Hexagon::A2_combinew);
+ return;
+ }
+
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+ ? Hexagon::C2_ccombinewt
+ : Hexagon::C2_ccombinewf);
+ return;
+ }
+ case Hexagon::A2_tfrptnew:
+ case Hexagon::A2_tfrpfnew: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
+ return;
+ }
+
+ case Hexagon::M2_mpysmi: {
+ MCOperand &Imm = MappedInst.getOperand(2);
+ MCExpr const *Expr = Imm.getExpr();
+ int64_t Value;
+ bool Success = Expr->evaluateAsAbsolute(Value);
+ assert(Success);(void)Success;
+ if (Value < 0 && Value > -256) {
+ MappedInst.setOpcode(Hexagon::M2_mpysin);
+ Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext));
+ }
+ else
+ MappedInst.setOpcode(Hexagon::M2_mpysip);
+ return;
+ }
+
+ case Hexagon::A2_addsp: {
+ MCOperand &Rt = Inst.getOperand(1);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::A2_addsph);
+ else
+ MappedInst.setOpcode(Hexagon::A2_addspl);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+ case Hexagon::HEXAGON_V6_vd0_pseudo:
+ case Hexagon::HEXAGON_V6_vd0_pseudo_128B: {
+ MCInst TmpInst;
+ assert (Inst.getOperand(0).isReg() &&
+ "Expected register and none was found");
+
+ TmpInst.setOpcode(Hexagon::V6_vxor);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ MappedInst = TmpInst;
+ return;
+ }
+
+ }
+}
+
/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
/// the current output stream.
///
void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- MCInst MCB;
- MCB.setOpcode(Hexagon::BUNDLE);
- MCB.addOperand(MCOperand::createImm(0));
+ MCInst MCB = HexagonMCInstrInfo::createBundle();
+ const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
if (MI->isBundle()) {
const MachineBasicBlock* MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator MII = MI;
+ MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
unsigned IgnoreCount = 0;
- for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) {
+ for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
++IgnoreCount;
- else {
- HexagonLowerToMC(MII, MCB, *this);
- }
- }
+ else
+ HexagonLowerToMC(MCII, &*MII, MCB, *this);
}
- else {
- HexagonLowerToMC(MI, MCB, *this);
- HexagonMCInstrInfo::padEndloop(MCB);
- }
- // Examine the packet and try to find instructions that can be converted
- // to compounds.
- HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(),
- OutStreamer->getContext(), MCB);
- // Examine the packet and convert pairs of instructions to duplex
- // instructions when possible.
- SmallVector<DuplexCandidate, 8> possibleDuplexes;
- possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(
- *Subtarget->getInstrInfo(), MCB);
- HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget,
- OutStreamer->getContext(), MCB, possibleDuplexes);
- EmitToStreamer(*OutStreamer, MCB);
+ else
+ HexagonLowerToMC(MCII, MI, MCB, *this);
+
+ bool Ok = HexagonMCInstrInfo::canonicalizePacket(
+ MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr);
+ assert(Ok);
+ (void)Ok;
+ if(HexagonMCInstrInfo::bundleSize(MCB) == 0)
+ return;
+ OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
}
extern "C" void LLVMInitializeHexagonAsmPrinter() {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 792fc8b..a78d97e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -42,6 +42,10 @@ namespace llvm {
void EmitInstruction(const MachineInstr *MI) override;
+ void HexagonProcessInstruction(MCInst &Inst,
+ const MachineInstr &MBB);
+
+
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
new file mode 100644
index 0000000..77907b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -0,0 +1,2778 @@
+//===--- HexagonBitSimplify.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexbit"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
+ FunctionPass *createHexagonBitSimplify();
+}
+
+namespace {
+ // Set of virtual registers, based on BitVector.
+ struct RegisterSet : private BitVector {
+ RegisterSet() : BitVector() {}
+ explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+ RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+ using BitVector::clear;
+ using BitVector::count;
+
+ unsigned find_first() const {
+ int First = BitVector::find_first();
+ if (First < 0)
+ return 0;
+ return x2v(First);
+ }
+
+ unsigned find_next(unsigned Prev) const {
+ int Next = BitVector::find_next(v2x(Prev));
+ if (Next < 0)
+ return 0;
+ return x2v(Next);
+ }
+
+ RegisterSet &insert(unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return static_cast<RegisterSet&>(BitVector::set(Idx));
+ }
+ RegisterSet &remove(unsigned R) {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return *this;
+ return static_cast<RegisterSet&>(BitVector::reset(Idx));
+ }
+
+ RegisterSet &insert(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+ }
+ RegisterSet &remove(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::reset(Rs));
+ }
+
+ reference operator[](unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return BitVector::operator[](Idx);
+ }
+ bool operator[](unsigned R) const {
+ unsigned Idx = v2x(R);
+ assert(Idx < size());
+ return BitVector::operator[](Idx);
+ }
+ bool has(unsigned R) const {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return false;
+ return BitVector::test(Idx);
+ }
+
+ bool empty() const {
+ return !BitVector::any();
+ }
+ bool includes(const RegisterSet &Rs) const {
+ // A.BitVector::test(B) <=> A-B != {}
+ return !Rs.BitVector::test(*this);
+ }
+ bool intersects(const RegisterSet &Rs) const {
+ return BitVector::anyCommon(Rs);
+ }
+
+ private:
+ void ensure(unsigned Idx) {
+ if (size() <= Idx)
+ resize(std::max(Idx+1, 32U));
+ }
+ static inline unsigned v2x(unsigned v) {
+ return TargetRegisterInfo::virtReg2Index(v);
+ }
+ static inline unsigned x2v(unsigned x) {
+ return TargetRegisterInfo::index2VirtReg(x);
+ }
+ };
+
+
+ struct PrintRegSet {
+ PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+ : RS(S), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS,
+ const PrintRegSet &P);
+ private:
+ const RegisterSet &RS;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+ OS << '{';
+ for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+ OS << ' ' << PrintReg(R, P.TRI);
+ OS << " }";
+ return OS;
+ }
+}
+
+
+namespace {
+ class Transformation;
+
+ class HexagonBitSimplify : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
+ initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon bit simplification";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
+ static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
+ static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
+ const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
+ static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W);
+ static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W);
+ static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W, uint64_t &U);
+ static bool replaceReg(unsigned OldR, unsigned NewR,
+ MachineRegisterInfo &MRI);
+ static bool getSubregMask(const BitTracker::RegisterRef &RR,
+ unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
+ static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
+ unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool parseRegSequence(const MachineInstr &I,
+ BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
+
+ static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+ uint16_t Begin);
+ static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
+ uint16_t Begin, const HexagonInstrInfo &HII);
+
+ static const TargetRegisterClass *getFinalVRegClass(
+ const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
+ static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
+ const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
+
+ private:
+ MachineDominatorTree *MDT;
+
+ bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
+ };
+
+ char HexagonBitSimplify::ID = 0;
+ typedef HexagonBitSimplify HBS;
+
+
+ // The purpose of this class is to provide a common facility to traverse
+ // the function top-down or bottom-up via the dominator tree, and keep
+ // track of the available registers.
+ class Transformation {
+ public:
+ bool TopDown;
+ Transformation(bool TD) : TopDown(TD) {}
+ virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
+ virtual ~Transformation() {}
+ };
+}
+
+INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
+ "Hexagon bit simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
+ "Hexagon bit simplification", false, false)
+
+
+bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
+ RegisterSet &AVs) {
+ MachineDomTreeNode *N = MDT->getNode(&B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ bool Changed = false;
+
+ if (T.TopDown)
+ Changed = T.processBlock(B, AVs);
+
+ RegisterSet Defs;
+ for (auto &I : B)
+ getInstrDefs(I, Defs);
+ RegisterSet NewAVs = AVs;
+ NewAVs.insert(Defs);
+
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ Changed |= visitBlock(*SB, T, NewAVs);
+ }
+ if (!T.TopDown)
+ Changed |= T.processBlock(B, AVs);
+
+ return Changed;
+}
+
+//
+// Utility functions:
+//
+void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
+ RegisterSet &Defs) {
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Defs.insert(R);
+ }
+}
+
+void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
+ RegisterSet &Uses) {
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.insert(R);
+ }
+}
+
+// Check if all the bits in range [B, E) in both cells are equal.
+bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
+ uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
+ uint16_t W) {
+ for (uint16_t i = 0; i < W; ++i) {
+ // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
+ if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
+ return false;
+ // Same for RC2[i].
+ if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
+ return false;
+ if (RC1[B1+i] != RC2[B2+i])
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W) {
+ assert(B < RC.width() && B+W <= RC.width());
+ for (uint16_t i = B; i < B+W; ++i)
+ if (!RC[i].num())
+ return false;
+ return true;
+}
+
+
+bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W) {
+ assert(B < RC.width() && B+W <= RC.width());
+ for (uint16_t i = B; i < B+W; ++i)
+ if (!RC[i].is(0))
+ return false;
+ return true;
+}
+
+
+bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W, uint64_t &U) {
+ assert(B < RC.width() && B+W <= RC.width());
+ int64_t T = 0;
+ for (uint16_t i = B+W; i > B; --i) {
+ const BitTracker::BitValue &BV = RC[i-1];
+ T <<= 1;
+ if (BV.is(1))
+ T |= 1;
+ else if (!BV.is(0))
+ return false;
+ }
+ U = T;
+ return true;
+}
+
+
+bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
+ MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(NewR);
+ }
+ return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(NewR);
+ I->setSubReg(NewSR);
+ }
+ return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
+ unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ if (I->getSubReg() != OldSR)
+ continue;
+ I->setReg(NewR);
+ I->setSubReg(NewSR);
+ }
+ return Begin != End;
+}
+
+
+// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
+// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
+// if this succeeded, otherwise return false.
+bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
+ unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
+ const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ assert(RR.Sub == 0);
+ Begin = 0;
+ Width = 32;
+ return true;
+ }
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ if (RR.Sub == 0) {
+ Begin = 0;
+ Width = 64;
+ return true;
+ }
+ assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
+ Width = 32;
+ Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
+ return true;
+ }
+ return false;
+}
+
+
+// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
+// subregister.
+bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
+ BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
+ assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
+ unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
+ assert(Sub1 != Sub2);
+ if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
+ SL = I.getOperand(1);
+ SH = I.getOperand(3);
+ return true;
+ }
+ if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
+ SH = I.getOperand(1);
+ SL = I.getOperand(3);
+ return true;
+ }
+ return false;
+}
+
+
+// All stores (except 64-bit stores) take a 32-bit register as the source
+// of the value to be stored. If the instruction stores into a location
+// that is shorter than 32 bits, some bits of the source register are not
+// used. For each store instruction, calculate the set of used bits in
+// the source register, and set appropriate bits in Bits. Return true if
+// the bits are calculated, false otherwise.
+bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+ uint16_t Begin) {
+ using namespace Hexagon;
+
+ switch (Opc) {
+ // Store byte
+ case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
+ case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
+ case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
+ case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
+ case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
+ case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
+ case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
+ case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
+ case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+ case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+ case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
+ case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+ case S4_storerb_ap: // memb(Re32=#U6)=Rt32
+ case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
+ case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
+ case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
+ case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
+ case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
+ case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
+ case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
+ case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
+ case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
+ case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
+ case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
+ case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S2_storerbgp: // memb(gp+#u16:0)=Rt32
+ case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
+ case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
+ case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
+ case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
+ case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
+ case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
+ case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
+ case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
+ case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
+ Bits.set(Begin, Begin+8);
+ return true;
+
+ // Store low half
+ case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
+ case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
+ case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
+ case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
+ case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
+ case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
+ case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
+ case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
+ case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+ case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+ case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
+ case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+ case S4_storerh_ap: // memh(Re32=#U6)=Rt32
+ case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
+ case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
+ case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
+ case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
+ case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
+ case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
+ case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
+ case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
+ case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
+ case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
+ case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
+ case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S2_storerhgp: // memh(gp+#u16:1)=Rt32
+ case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
+ case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
+ case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
+ case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
+ case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
+ case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
+ case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
+ case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
+ case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
+ Bits.set(Begin, Begin+16);
+ return true;
+
+ // Store high half
+ case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
+ case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
+ case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
+ case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+ case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+ case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+ case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
+ case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
+ case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
+ case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
+ case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
+ case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
+ case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
+ case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
+ case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
+ case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
+ case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+
+ return false;
+}
+
+
+// For an instruction with opcode Opc, calculate the set of bits that it
+// uses in a register in operand OpN. This only calculates the set of used
+// bits for cases where it does not depend on any operands (as is the case
+// in shifts, for example). For concrete instructions from a program, the
+// operand may be a subregister of a larger register, while Bits would
+// correspond to the larger register in its entirety. Because of that,
+// the parameter Begin can be used to indicate which bit of Bits should be
+// considered the LSB of of the operand.
+bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
+ BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
+ using namespace Hexagon;
+
+ const MCInstrDesc &D = HII.get(Opc);
+ if (D.mayStore()) {
+ if (OpN == D.getNumOperands()-1)
+ return getUsedBitsInStore(Opc, Bits, Begin);
+ return false;
+ }
+
+ switch (Opc) {
+ // One register source. Used bits: R1[0-7].
+ case A2_sxtb:
+ case A2_zxtb:
+ case A4_cmpbeqi:
+ case A4_cmpbgti:
+ case A4_cmpbgtui:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+8);
+ return true;
+ }
+ break;
+
+ // One register source. Used bits: R1[0-15].
+ case A2_aslh:
+ case A2_sxth:
+ case A2_zxth:
+ case A4_cmpheqi:
+ case A4_cmphgti:
+ case A4_cmphgtui:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // One register source. Used bits: R1[16-31].
+ case A2_asrh:
+ if (OpN == 1) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-7], R2[0-7].
+ case A4_cmpbeq:
+ case A4_cmpbgt:
+ case A4_cmpbgtu:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+8);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-15], R2[0-15].
+ case A4_cmpheq:
+ case A4_cmphgt:
+ case A4_cmphgtu:
+ case A2_addh_h16_ll:
+ case A2_addh_h16_sat_ll:
+ case A2_addh_l16_ll:
+ case A2_addh_l16_sat_ll:
+ case A2_combine_ll:
+ case A2_subh_h16_ll:
+ case A2_subh_h16_sat_ll:
+ case A2_subh_l16_ll:
+ case A2_subh_l16_sat_ll:
+ case M2_mpy_acc_ll_s0:
+ case M2_mpy_acc_ll_s1:
+ case M2_mpy_acc_sat_ll_s0:
+ case M2_mpy_acc_sat_ll_s1:
+ case M2_mpy_ll_s0:
+ case M2_mpy_ll_s1:
+ case M2_mpy_nac_ll_s0:
+ case M2_mpy_nac_ll_s1:
+ case M2_mpy_nac_sat_ll_s0:
+ case M2_mpy_nac_sat_ll_s1:
+ case M2_mpy_rnd_ll_s0:
+ case M2_mpy_rnd_ll_s1:
+ case M2_mpy_sat_ll_s0:
+ case M2_mpy_sat_ll_s1:
+ case M2_mpy_sat_rnd_ll_s0:
+ case M2_mpy_sat_rnd_ll_s1:
+ case M2_mpyd_acc_ll_s0:
+ case M2_mpyd_acc_ll_s1:
+ case M2_mpyd_ll_s0:
+ case M2_mpyd_ll_s1:
+ case M2_mpyd_nac_ll_s0:
+ case M2_mpyd_nac_ll_s1:
+ case M2_mpyd_rnd_ll_s0:
+ case M2_mpyd_rnd_ll_s1:
+ case M2_mpyu_acc_ll_s0:
+ case M2_mpyu_acc_ll_s1:
+ case M2_mpyu_ll_s0:
+ case M2_mpyu_ll_s1:
+ case M2_mpyu_nac_ll_s0:
+ case M2_mpyu_nac_ll_s1:
+ case M2_mpyud_acc_ll_s0:
+ case M2_mpyud_acc_ll_s1:
+ case M2_mpyud_ll_s0:
+ case M2_mpyud_ll_s1:
+ case M2_mpyud_nac_ll_s0:
+ case M2_mpyud_nac_ll_s1:
+ if (OpN == 1 || OpN == 2) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-15], R2[16-31].
+ case A2_addh_h16_lh:
+ case A2_addh_h16_sat_lh:
+ case A2_combine_lh:
+ case A2_subh_h16_lh:
+ case A2_subh_h16_sat_lh:
+ case M2_mpy_acc_lh_s0:
+ case M2_mpy_acc_lh_s1:
+ case M2_mpy_acc_sat_lh_s0:
+ case M2_mpy_acc_sat_lh_s1:
+ case M2_mpy_lh_s0:
+ case M2_mpy_lh_s1:
+ case M2_mpy_nac_lh_s0:
+ case M2_mpy_nac_lh_s1:
+ case M2_mpy_nac_sat_lh_s0:
+ case M2_mpy_nac_sat_lh_s1:
+ case M2_mpy_rnd_lh_s0:
+ case M2_mpy_rnd_lh_s1:
+ case M2_mpy_sat_lh_s0:
+ case M2_mpy_sat_lh_s1:
+ case M2_mpy_sat_rnd_lh_s0:
+ case M2_mpy_sat_rnd_lh_s1:
+ case M2_mpyd_acc_lh_s0:
+ case M2_mpyd_acc_lh_s1:
+ case M2_mpyd_lh_s0:
+ case M2_mpyd_lh_s1:
+ case M2_mpyd_nac_lh_s0:
+ case M2_mpyd_nac_lh_s1:
+ case M2_mpyd_rnd_lh_s0:
+ case M2_mpyd_rnd_lh_s1:
+ case M2_mpyu_acc_lh_s0:
+ case M2_mpyu_acc_lh_s1:
+ case M2_mpyu_lh_s0:
+ case M2_mpyu_lh_s1:
+ case M2_mpyu_nac_lh_s0:
+ case M2_mpyu_nac_lh_s1:
+ case M2_mpyud_acc_lh_s0:
+ case M2_mpyud_acc_lh_s1:
+ case M2_mpyud_lh_s0:
+ case M2_mpyud_lh_s1:
+ case M2_mpyud_nac_lh_s0:
+ case M2_mpyud_nac_lh_s1:
+ // These four are actually LH.
+ case A2_addh_l16_hl:
+ case A2_addh_l16_sat_hl:
+ case A2_subh_l16_hl:
+ case A2_subh_l16_sat_hl:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ if (OpN == 2) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+
+ // Two register sources, used bits: R1[16-31], R2[0-15].
+ case A2_addh_h16_hl:
+ case A2_addh_h16_sat_hl:
+ case A2_combine_hl:
+ case A2_subh_h16_hl:
+ case A2_subh_h16_sat_hl:
+ case M2_mpy_acc_hl_s0:
+ case M2_mpy_acc_hl_s1:
+ case M2_mpy_acc_sat_hl_s0:
+ case M2_mpy_acc_sat_hl_s1:
+ case M2_mpy_hl_s0:
+ case M2_mpy_hl_s1:
+ case M2_mpy_nac_hl_s0:
+ case M2_mpy_nac_hl_s1:
+ case M2_mpy_nac_sat_hl_s0:
+ case M2_mpy_nac_sat_hl_s1:
+ case M2_mpy_rnd_hl_s0:
+ case M2_mpy_rnd_hl_s1:
+ case M2_mpy_sat_hl_s0:
+ case M2_mpy_sat_hl_s1:
+ case M2_mpy_sat_rnd_hl_s0:
+ case M2_mpy_sat_rnd_hl_s1:
+ case M2_mpyd_acc_hl_s0:
+ case M2_mpyd_acc_hl_s1:
+ case M2_mpyd_hl_s0:
+ case M2_mpyd_hl_s1:
+ case M2_mpyd_nac_hl_s0:
+ case M2_mpyd_nac_hl_s1:
+ case M2_mpyd_rnd_hl_s0:
+ case M2_mpyd_rnd_hl_s1:
+ case M2_mpyu_acc_hl_s0:
+ case M2_mpyu_acc_hl_s1:
+ case M2_mpyu_hl_s0:
+ case M2_mpyu_hl_s1:
+ case M2_mpyu_nac_hl_s0:
+ case M2_mpyu_nac_hl_s1:
+ case M2_mpyud_acc_hl_s0:
+ case M2_mpyud_acc_hl_s1:
+ case M2_mpyud_hl_s0:
+ case M2_mpyud_hl_s1:
+ case M2_mpyud_nac_hl_s0:
+ case M2_mpyud_nac_hl_s1:
+ if (OpN == 1) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ if (OpN == 2) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // Two register sources, used bits: R1[16-31], R2[16-31].
+ case A2_addh_h16_hh:
+ case A2_addh_h16_sat_hh:
+ case A2_combine_hh:
+ case A2_subh_h16_hh:
+ case A2_subh_h16_sat_hh:
+ case M2_mpy_acc_hh_s0:
+ case M2_mpy_acc_hh_s1:
+ case M2_mpy_acc_sat_hh_s0:
+ case M2_mpy_acc_sat_hh_s1:
+ case M2_mpy_hh_s0:
+ case M2_mpy_hh_s1:
+ case M2_mpy_nac_hh_s0:
+ case M2_mpy_nac_hh_s1:
+ case M2_mpy_nac_sat_hh_s0:
+ case M2_mpy_nac_sat_hh_s1:
+ case M2_mpy_rnd_hh_s0:
+ case M2_mpy_rnd_hh_s1:
+ case M2_mpy_sat_hh_s0:
+ case M2_mpy_sat_hh_s1:
+ case M2_mpy_sat_rnd_hh_s0:
+ case M2_mpy_sat_rnd_hh_s1:
+ case M2_mpyd_acc_hh_s0:
+ case M2_mpyd_acc_hh_s1:
+ case M2_mpyd_hh_s0:
+ case M2_mpyd_hh_s1:
+ case M2_mpyd_nac_hh_s0:
+ case M2_mpyd_nac_hh_s1:
+ case M2_mpyd_rnd_hh_s0:
+ case M2_mpyd_rnd_hh_s1:
+ case M2_mpyu_acc_hh_s0:
+ case M2_mpyu_acc_hh_s1:
+ case M2_mpyu_hh_s0:
+ case M2_mpyu_hh_s1:
+ case M2_mpyu_nac_hh_s0:
+ case M2_mpyu_nac_hh_s1:
+ case M2_mpyud_acc_hh_s0:
+ case M2_mpyud_acc_hh_s1:
+ case M2_mpyud_hh_s0:
+ case M2_mpyud_hh_s1:
+ case M2_mpyud_nac_hh_s0:
+ case M2_mpyud_nac_hh_s1:
+ if (OpN == 1 || OpN == 2) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+
+// Calculate the register class that matches Reg:Sub. For example, if
+// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
+// register class.
+const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
+ const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return nullptr;
+ auto *RC = MRI.getRegClass(RR.Reg);
+ if (RR.Sub == 0)
+ return RC;
+
+ auto VerifySR = [] (unsigned Sub) -> void {
+ assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
+ };
+
+ switch (RC->getID()) {
+ case Hexagon::DoubleRegsRegClassID:
+ VerifySR(RR.Sub);
+ return &Hexagon::IntRegsRegClass;
+ }
+ return nullptr;
+}
+
+
+// Check if RD could be replaced with RS at any possible use of RD.
+// For example a predicate register cannot be replaced with a integer
+// register, but a 64-bit register with a subregister can be replaced
+// with a 32-bit register.
+bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
+ const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
+ !TargetRegisterInfo::isVirtualRegister(RS.Reg))
+ return false;
+ // Return false if one (or both) classes are nullptr.
+ auto *DRC = getFinalVRegClass(RD, MRI);
+ if (!DRC)
+ return false;
+
+ return DRC == getFinalVRegClass(RS, MRI);
+}
+
+
+//
+// Dead code elimination
+//
+namespace {
+ class DeadCodeElimination {
+ public:
+ DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
+ : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
+ MDT(mdt), MRI(mf.getRegInfo()) {}
+
+ bool run() {
+ return runOnNode(MDT.getRootNode());
+ }
+
+ private:
+ bool isDead(unsigned R) const;
+ bool runOnNode(MachineDomTreeNode *N);
+
+ MachineFunction &MF;
+ const HexagonInstrInfo &HII;
+ MachineDominatorTree &MDT;
+ MachineRegisterInfo &MRI;
+ };
+}
+
+
+bool DeadCodeElimination::isDead(unsigned R) const {
+ for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (UseI->isDebugValue())
+ continue;
+ if (UseI->isPHI()) {
+ assert(!UseI->getOperand(0).getSubReg());
+ unsigned DR = UseI->getOperand(0).getReg();
+ if (DR == R)
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+
+bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
+ bool Changed = false;
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ Changed |= runOnNode(*I);
+
+ MachineBasicBlock *B = N->getBlock();
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ for (auto MI : Instrs) {
+ unsigned Opc = MI->getOpcode();
+ // Do not touch lifetime markers. This is why the target-independent DCE
+ // cannot be used.
+ if (Opc == TargetOpcode::LIFETIME_START ||
+ Opc == TargetOpcode::LIFETIME_END)
+ continue;
+ bool Store = false;
+ if (MI->isInlineAsm())
+ continue;
+ // Delete PHIs if possible.
+ if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
+ continue;
+
+ bool AllDead = true;
+ SmallVector<unsigned,2> Regs;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
+ AllDead = false;
+ break;
+ }
+ Regs.push_back(R);
+ }
+ if (!AllDead)
+ continue;
+
+ B->erase(MI);
+ for (unsigned i = 0, n = Regs.size(); i != n; ++i)
+ MRI.markUsesInDebugValueAsUndef(Regs[i]);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+//
+// Eliminate redundant instructions
+//
+// This transformation will identify instructions where the output register
+// is the same as one of its input registers. This only works on instructions
+// that define a single register (unlike post-increment loads, for example).
+// The equality check is actually more detailed: the code calculates which
+// bits of the output are used, and only compares these bits with the input
+// registers.
+// If the output matches an input, the instruction is replaced with COPY.
+// The copies will be removed by another transformation.
+namespace {
+ class RedundantInstrElimination : public Transformation {
+ public:
+ RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
+ unsigned &LostB, unsigned &LostE);
+ bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
+ unsigned &LostB, unsigned &LostE);
+ bool computeUsedBits(unsigned Reg, BitVector &Bits);
+ bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
+ uint16_t Begin);
+ bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+
+// Check if the instruction is a lossy shift left, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
+ unsigned OpN, unsigned &LostB, unsigned &LostE) {
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+ unsigned ImN, RegN, Width;
+ switch (Opc) {
+ case S2_asl_i_p:
+ ImN = 2;
+ RegN = 1;
+ Width = 64;
+ break;
+ case S2_asl_i_p_acc:
+ case S2_asl_i_p_and:
+ case S2_asl_i_p_nac:
+ case S2_asl_i_p_or:
+ case S2_asl_i_p_xacc:
+ ImN = 3;
+ RegN = 2;
+ Width = 64;
+ break;
+ case S2_asl_i_r:
+ ImN = 2;
+ RegN = 1;
+ Width = 32;
+ break;
+ case S2_addasl_rrri:
+ case S4_andi_asl_ri:
+ case S4_ori_asl_ri:
+ case S4_addi_asl_ri:
+ case S4_subi_asl_ri:
+ case S2_asl_i_r_acc:
+ case S2_asl_i_r_and:
+ case S2_asl_i_r_nac:
+ case S2_asl_i_r_or:
+ case S2_asl_i_r_sat:
+ case S2_asl_i_r_xacc:
+ ImN = 3;
+ RegN = 2;
+ Width = 32;
+ break;
+ default:
+ return false;
+ }
+
+ if (RegN != OpN)
+ return false;
+
+ assert(MI.getOperand(ImN).isImm());
+ unsigned S = MI.getOperand(ImN).getImm();
+ if (S == 0)
+ return false;
+ LostB = Width-S;
+ LostE = Width;
+ return true;
+}
+
+
+// Check if the instruction is a lossy shift right, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
+ unsigned OpN, unsigned &LostB, unsigned &LostE) {
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+ unsigned ImN, RegN;
+ switch (Opc) {
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ ImN = 2;
+ RegN = 1;
+ break;
+ case S2_asr_i_p_acc:
+ case S2_asr_i_p_and:
+ case S2_asr_i_p_nac:
+ case S2_asr_i_p_or:
+ case S2_lsr_i_p_acc:
+ case S2_lsr_i_p_and:
+ case S2_lsr_i_p_nac:
+ case S2_lsr_i_p_or:
+ case S2_lsr_i_p_xacc:
+ ImN = 3;
+ RegN = 2;
+ break;
+ case S2_asr_i_r:
+ case S2_lsr_i_r:
+ ImN = 2;
+ RegN = 1;
+ break;
+ case S4_andi_lsr_ri:
+ case S4_ori_lsr_ri:
+ case S4_addi_lsr_ri:
+ case S4_subi_lsr_ri:
+ case S2_asr_i_r_acc:
+ case S2_asr_i_r_and:
+ case S2_asr_i_r_nac:
+ case S2_asr_i_r_or:
+ case S2_lsr_i_r_acc:
+ case S2_lsr_i_r_and:
+ case S2_lsr_i_r_nac:
+ case S2_lsr_i_r_or:
+ case S2_lsr_i_r_xacc:
+ ImN = 3;
+ RegN = 2;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (RegN != OpN)
+ return false;
+
+ assert(MI.getOperand(ImN).isImm());
+ unsigned S = MI.getOperand(ImN).getImm();
+ LostB = 0;
+ LostE = S;
+ return true;
+}
+
+
+// Calculate the bit vector that corresponds to the used bits of register Reg.
+// The vector Bits has the same size, as the size of Reg in bits. If the cal-
+// culation fails (i.e. the used bits are unknown), it returns false. Other-
+// wise, it returns true and sets the corresponding bits in Bits.
+bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
+ BitVector Used(Bits.size());
+ RegisterSet Visited;
+ std::vector<unsigned> Pending;
+ Pending.push_back(Reg);
+
+ for (unsigned i = 0; i < Pending.size(); ++i) {
+ unsigned R = Pending[i];
+ if (Visited.has(R))
+ continue;
+ Visited.insert(R);
+ for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+ BitTracker::RegisterRef UR = *I;
+ unsigned B, W;
+ if (!HBS::getSubregMask(UR, B, W, MRI))
+ return false;
+ MachineInstr &UseI = *I->getParent();
+ if (UseI.isPHI() || UseI.isCopy()) {
+ unsigned DefR = UseI.getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ return false;
+ Pending.push_back(DefR);
+ } else {
+ if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
+ return false;
+ }
+ }
+ }
+ Bits |= Used;
+ return true;
+}
+
+
+// Calculate the bits used by instruction MI in a register in operand OpN.
+// Return true/false if the calculation succeeds/fails. If is succeeds, set
+// used bits in Bits. This function does not reset any bits in Bits, so
+// subsequent calls over different instructions will result in the union
+// of the used bits in all these instructions.
+// The register in question may be used with a sub-register, whereas Bits
+// holds the bits for the entire register. To keep track of that, the
+// argument Begin indicates where in Bits is the lowest-significant bit
+// of the register used in operand OpN. For example, in instruction:
+// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
+// the operand 1 is a 32-bit register, which happens to be a subregister
+// of the 64-bit register vreg2, and that subregister starts at position 32.
+// In this case Begin=32, since Bits[32] would be the lowest-significant bit
+// of vreg2:subreg_hireg.
+bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
+ unsigned OpN, BitVector &Bits, uint16_t Begin) {
+ unsigned Opc = MI.getOpcode();
+ BitVector T(Bits.size());
+ bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
+ // Even if we don't have bits yet, we could still provide some information
+ // if the instruction is a lossy shift: the lost bits will be marked as
+ // not used.
+ unsigned LB, LE;
+ if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
+ assert(MI.getOperand(OpN).isReg());
+ BitTracker::RegisterRef RR = MI.getOperand(OpN);
+ const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
+ uint16_t Width = RC->getSize()*8;
+
+ if (!GotBits)
+ T.set(Begin, Begin+Width);
+ assert(LB <= LE && LB < Width && LE <= Width);
+ T.reset(Begin+LB, Begin+LE);
+ GotBits = true;
+ }
+ if (GotBits)
+ Bits |= T;
+ return GotBits;
+}
+
+
+// Calculates the used bits in RD ("defined register"), and checks if these
+// bits in RS ("used register") and RD are identical.
+bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
+ BitTracker::RegisterRef RS) {
+ const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+
+ unsigned DB, DW;
+ if (!HBS::getSubregMask(RD, DB, DW, MRI))
+ return false;
+ unsigned SB, SW;
+ if (!HBS::getSubregMask(RS, SB, SW, MRI))
+ return false;
+ if (SW != DW)
+ return false;
+
+ BitVector Used(DC.width());
+ if (!computeUsedBits(RD.Reg, Used))
+ return false;
+
+ for (unsigned i = 0; i != DW; ++i)
+ if (Used[i+DB] && DC[DB+i] != SC[SB+i])
+ return false;
+ return true;
+}
+
+
+bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
+ const RegisterSet&) {
+ bool Changed = false;
+
+ for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
+ NextI = std::next(I);
+ MachineInstr *MI = &*I;
+
+ if (MI->getOpcode() == TargetOpcode::COPY)
+ continue;
+ if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+ continue;
+ unsigned NumD = MI->getDesc().getNumDefs();
+ if (NumD != 1)
+ continue;
+
+ BitTracker::RegisterRef RD = MI->getOperand(0);
+ if (!BT.has(RD.Reg))
+ continue;
+ const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+
+ // Find a source operand that is equal to the result.
+ for (auto &Op : MI->uses()) {
+ if (!Op.isReg())
+ continue;
+ BitTracker::RegisterRef RS = Op;
+ if (!BT.has(RS.Reg))
+ continue;
+ if (!HBS::isTransparentCopy(RD, RS, MRI))
+ continue;
+
+ unsigned BN, BW;
+ if (!HBS::getSubregMask(RS, BN, BW, MRI))
+ continue;
+
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
+ continue;
+
+ // If found, replace the instruction with a COPY.
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(RS.Reg, 0, RS.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), SC);
+ Changed = true;
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+
+//
+// Const generation
+//
+// Recognize instructions that produce constant values known at compile-time.
+// Replace them with register definitions that load these constants directly.
+namespace {
+ class ConstGeneration : public Transformation {
+ public:
+ ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool isTfrConst(const MachineInstr *MI) const;
+ bool isConst(unsigned R, int64_t &V) const;
+ unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
+ if (!BT.has(R))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(R);
+ int64_t T = 0;
+ for (unsigned i = RC.width(); i > 0; --i) {
+ const BitTracker::BitValue &V = RC[i-1];
+ T <<= 1;
+ if (V.is(1))
+ T |= 1;
+ else if (!V.is(0))
+ return false;
+ }
+ C = T;
+ return true;
+}
+
+
+bool ConstGeneration::isTfrConst(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_tfrsi:
+ case Hexagon::A2_tfrpi:
+ case Hexagon::TFR_PdTrue:
+ case Hexagon::TFR_PdFalse:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real:
+ return true;
+ }
+ return false;
+}
+
+
+// Generate a transfer-immediate instruction that is appropriate for the
+// register class and the actual value being transferred.
+unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
+ unsigned Reg = MRI.createVirtualRegister(RC);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
+ .addImm(int32_t(C));
+ return Reg;
+ }
+
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ if (isInt<8>(C)) {
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
+ .addImm(C);
+ return Reg;
+ }
+
+ unsigned Lo = Lo_32(C), Hi = Hi_32(C);
+ if (isInt<8>(Lo) || isInt<8>(Hi)) {
+ unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
+ : Hexagon::A4_combineii;
+ BuildMI(B, At, DL, HII.get(Opc), Reg)
+ .addImm(int32_t(Hi))
+ .addImm(int32_t(Lo));
+ return Reg;
+ }
+
+ BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
+ .addImm(C);
+ return Reg;
+ }
+
+ if (RC == &Hexagon::PredRegsRegClass) {
+ unsigned Opc;
+ if (C == 0)
+ Opc = Hexagon::TFR_PdFalse;
+ else if ((C & 0xFF) == 0xFF)
+ Opc = Hexagon::TFR_PdTrue;
+ else
+ return 0;
+ BuildMI(B, At, DL, HII.get(Opc), Reg);
+ return Reg;
+ }
+
+ return 0;
+}
+
+
+bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ bool Changed = false;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(); I != E; ++I) {
+ if (isTfrConst(I))
+ continue;
+ Defs.clear();
+ HBS::getInstrDefs(*I, Defs);
+ if (Defs.count() != 1)
+ continue;
+ unsigned DR = Defs.find_first();
+ if (!TargetRegisterInfo::isVirtualRegister(DR))
+ continue;
+ int64_t C;
+ if (isConst(DR, C)) {
+ DebugLoc DL = I->getDebugLoc();
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+ unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
+ if (ImmReg) {
+ HBS::replaceReg(DR, ImmReg, MRI);
+ BT.put(ImmReg, BT.lookup(DR));
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+
+//
+// Copy generation
+//
+// Identify pairs of available registers which hold identical values.
+// In such cases, only one of them needs to be calculated, the other one
+// will be defined as a copy of the first.
+//
+// Copy propagation
+//
+// Eliminate register copies RD = RS, by replacing the uses of RD with
+// with uses of RS.
+namespace {
+ class CopyGeneration : public Transformation {
+ public:
+ CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool findMatch(const BitTracker::RegisterRef &Inp,
+ BitTracker::RegisterRef &Out, const RegisterSet &AVs);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+
+ class CopyPropagation : public Transformation {
+ public:
+ CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+ : Transformation(false), MRI(mri) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ static bool isCopyReg(unsigned Opc);
+ private:
+ bool propagateRegCopy(MachineInstr &MI);
+
+ MachineRegisterInfo &MRI;
+ };
+
+}
+
+
+/// Check if there is a register in AVs that is identical to Inp. If so,
+/// set Out to the found register. The output may be a pair Reg:Sub.
+bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
+ BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
+ if (!BT.has(Inp.Reg))
+ return false;
+ const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+ unsigned B, W;
+ if (!HBS::getSubregMask(Inp, B, W, MRI))
+ return false;
+
+ for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
+ if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+ continue;
+ const BitTracker::RegisterCell &RC = BT.lookup(R);
+ unsigned RW = RC.width();
+ if (W == RW) {
+ if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+ continue;
+ if (!HBS::isEqual(InpRC, B, RC, 0, W))
+ continue;
+ Out.Reg = R;
+ Out.Sub = 0;
+ return true;
+ }
+ // Check if there is a super-register, whose part (with a subregister)
+ // is equal to the input.
+ // Only do double registers for now.
+ if (W*2 != RW)
+ continue;
+ if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
+ continue;
+
+ if (HBS::isEqual(InpRC, B, RC, 0, W))
+ Out.Sub = Hexagon::subreg_loreg;
+ else if (HBS::isEqual(InpRC, B, RC, W, W))
+ Out.Sub = Hexagon::subreg_hireg;
+ else
+ continue;
+ Out.Reg = R;
+ return true;
+ }
+ return false;
+}
+
+
+bool CopyGeneration::processBlock(MachineBasicBlock &B,
+ const RegisterSet &AVs) {
+ RegisterSet AVB(AVs);
+ bool Changed = false;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
+ ++I, AVB.insert(Defs)) {
+ NextI = std::next(I);
+ Defs.clear();
+ HBS::getInstrDefs(*I, Defs);
+
+ unsigned Opc = I->getOpcode();
+ if (CopyPropagation::isCopyReg(Opc))
+ continue;
+
+ for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
+ BitTracker::RegisterRef MR;
+ if (!findMatch(R, MR, AVB))
+ continue;
+ DebugLoc DL = I->getDebugLoc();
+ auto *FRC = HBS::getFinalVRegClass(MR, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+ BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(MR.Reg, 0, MR.Sub);
+ BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+ }
+ }
+
+ return Changed;
+}
+
+
+bool CopyPropagation::isCopyReg(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::REG_SEQUENCE:
+ case Hexagon::A2_tfr:
+ case Hexagon::A2_tfrp:
+ case Hexagon::A2_combinew:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
+ bool Changed = false;
+ unsigned Opc = MI.getOpcode();
+ BitTracker::RegisterRef RD = MI.getOperand(0);
+ assert(MI.getOperand(0).getSubReg() == 0);
+
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case Hexagon::A2_tfr:
+ case Hexagon::A2_tfrp: {
+ BitTracker::RegisterRef RS = MI.getOperand(1);
+ if (!HBS::isTransparentCopy(RD, RS, MRI))
+ break;
+ if (RS.Sub != 0)
+ Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
+ else
+ Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
+ break;
+ }
+ case TargetOpcode::REG_SEQUENCE: {
+ BitTracker::RegisterRef SL, SH;
+ if (HBS::parseRegSequence(MI, SL, SH)) {
+ Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+ SL.Reg, SL.Sub, MRI);
+ Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+ SH.Reg, SH.Sub, MRI);
+ }
+ break;
+ }
+ case Hexagon::A2_combinew: {
+ BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
+ Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+ RL.Reg, RL.Sub, MRI);
+ Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+ RH.Reg, RH.Sub, MRI);
+ break;
+ }
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineri: {
+ unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
+ unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
+ : Hexagon::subreg_hireg;
+ BitTracker::RegisterRef RS = MI.getOperand(SrcX);
+ Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
+ break;
+ }
+ }
+ return Changed;
+}
+
+
+bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ bool Changed = false;
+ for (auto I : Instrs) {
+ unsigned Opc = I->getOpcode();
+ if (!CopyPropagation::isCopyReg(Opc))
+ continue;
+ Changed |= propagateRegCopy(*I);
+ }
+
+ return Changed;
+}
+
+
+//
+// Bit simplification
+//
+// Recognize patterns that can be simplified and replace them with the
+// simpler forms.
+// This is by no means complete
+namespace {
+ class BitSimplification : public Transformation {
+ public:
+ BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ struct RegHalf : public BitTracker::RegisterRef {
+ bool Low; // Low/High halfword.
+ };
+
+ bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
+ unsigned B, RegHalf &RH);
+
+ bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
+ BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
+ unsigned getCombineOpcode(bool HLow, bool LLow);
+
+ bool genStoreUpperHalf(MachineInstr *MI);
+ bool genStoreImmediate(MachineInstr *MI);
+ bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+
+// Check if the bits [B..B+16) in register cell RC form a valid halfword,
+// i.e. [0..16), [16..32), etc. of some register. If so, return true and
+// set the information about the found register in RH.
+bool BitSimplification::matchHalf(unsigned SelfR,
+ const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
+ // XXX This could be searching in the set of available registers, in case
+ // the match is not exact.
+
+ // Match 16-bit chunks, where the RC[B..B+15] references exactly one
+ // register and all the bits B..B+15 match between RC and the register.
+ // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
+ // and RC = { [0]:0 [1-15]:v1[1-15]... }.
+ bool Low = false;
+ unsigned I = B;
+ while (I < B+16 && RC[I].num())
+ I++;
+ if (I == B+16)
+ return false;
+
+ unsigned Reg = RC[I].RefI.Reg;
+ unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
+ if (P < I-B)
+ return false;
+ unsigned Pos = P - (I-B);
+
+ if (Reg == 0 || Reg == SelfR) // Don't match "self".
+ return false;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!BT.has(Reg))
+ return false;
+
+ const BitTracker::RegisterCell &SC = BT.lookup(Reg);
+ if (Pos+16 > SC.width())
+ return false;
+
+ for (unsigned i = 0; i < 16; ++i) {
+ const BitTracker::BitValue &RV = RC[i+B];
+ if (RV.Type == BitTracker::BitValue::Ref) {
+ if (RV.RefI.Reg != Reg)
+ return false;
+ if (RV.RefI.Pos != i+Pos)
+ return false;
+ continue;
+ }
+ if (RC[i+B] != SC[i+Pos])
+ return false;
+ }
+
+ unsigned Sub = 0;
+ switch (Pos) {
+ case 0:
+ Sub = Hexagon::subreg_loreg;
+ Low = true;
+ break;
+ case 16:
+ Sub = Hexagon::subreg_loreg;
+ Low = false;
+ break;
+ case 32:
+ Sub = Hexagon::subreg_hireg;
+ Low = true;
+ break;
+ case 48:
+ Sub = Hexagon::subreg_hireg;
+ Low = false;
+ break;
+ default:
+ return false;
+ }
+
+ RH.Reg = Reg;
+ RH.Sub = Sub;
+ RH.Low = Low;
+ // If the subregister is not valid with the register, set it to 0.
+ if (!HBS::getFinalVRegClass(RH, MRI))
+ RH.Sub = 0;
+
+ return true;
+}
+
+
+// Check if RC matches the pattern of a S2_packhl. If so, return true and
+// set the inputs Rs and Rt.
+bool BitSimplification::matchPackhl(unsigned SelfR,
+ const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
+ BitTracker::RegisterRef &Rt) {
+ RegHalf L1, H1, L2, H2;
+
+ if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
+ return false;
+ if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
+ return false;
+
+ // Rs = H1.L1, Rt = H2.L2
+ if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
+ return false;
+ if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
+ return false;
+
+ Rs = H1;
+ Rt = H2;
+ return true;
+}
+
+
+unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
+ return HLow ? LLow ? Hexagon::A2_combine_ll
+ : Hexagon::A2_combine_lh
+ : LLow ? Hexagon::A2_combine_hl
+ : Hexagon::A2_combine_hh;
+}
+
+
+// If MI stores the upper halfword of a register (potentially obtained via
+// shifts or extracts), replace it with a storerf instruction. This could
+// cause the "extraction" code to become dead.
+bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::S2_storerh_io)
+ return false;
+
+ MachineOperand &ValOp = MI->getOperand(2);
+ BitTracker::RegisterRef RS = ValOp;
+ if (!BT.has(RS.Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+ RegHalf H;
+ if (!matchHalf(0, RC, 0, H))
+ return false;
+ if (H.Low)
+ return false;
+ MI->setDesc(HII.get(Hexagon::S2_storerf_io));
+ ValOp.setReg(H.Reg);
+ ValOp.setSubReg(H.Sub);
+ return true;
+}
+
+
+// If MI stores a value known at compile-time, and the value is within a range
+// that avoids using constant-extenders, replace it with a store-immediate.
+bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ unsigned Align = 0;
+ switch (Opc) {
+ case Hexagon::S2_storeri_io:
+ Align++;
+ case Hexagon::S2_storerh_io:
+ Align++;
+ case Hexagon::S2_storerb_io:
+ break;
+ default:
+ return false;
+ }
+
+ // Avoid stores to frame-indices (due to an unknown offset).
+ if (!MI->getOperand(0).isReg())
+ return false;
+ MachineOperand &OffOp = MI->getOperand(1);
+ if (!OffOp.isImm())
+ return false;
+
+ int64_t Off = OffOp.getImm();
+ // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
+ if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
+ return false;
+ // Source register:
+ BitTracker::RegisterRef RS = MI->getOperand(2);
+ if (!BT.has(RS.Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+ uint64_t U;
+ if (!HBS::getConst(RC, 0, RC.width(), U))
+ return false;
+
+ // Only consider 8-bit values to avoid constant-extenders.
+ int V;
+ switch (Opc) {
+ case Hexagon::S2_storerb_io:
+ V = int8_t(U);
+ break;
+ case Hexagon::S2_storerh_io:
+ V = int16_t(U);
+ break;
+ case Hexagon::S2_storeri_io:
+ V = int32_t(U);
+ break;
+ }
+ if (!isInt<8>(V))
+ return false;
+
+ MI->RemoveOperand(2);
+ switch (Opc) {
+ case Hexagon::S2_storerb_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
+ break;
+ case Hexagon::S2_storerh_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
+ break;
+ case Hexagon::S2_storeri_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
+ break;
+ }
+ MI->addOperand(MachineOperand::CreateImm(V));
+ return true;
+}
+
+
+// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
+// last instruction in a sequence that results in something equivalent to
+// the pack-halfwords. The intent is to cause the entire sequence to become
+// dead.
+bool BitSimplification::genPackhl(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == Hexagon::S2_packhl)
+ return false;
+ BitTracker::RegisterRef Rs, Rt;
+ if (!matchPackhl(RD.Reg, RC, Rs, Rt))
+ return false;
+
+ MachineBasicBlock &B = *MI->getParent();
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+ .addReg(Rs.Reg, 0, Rs.Sub)
+ .addReg(Rt.Reg, 0, Rt.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI produces halfword of the input in the low half of the output,
+// replace it with zero-extend or extractu.
+bool BitSimplification::genExtractHalf(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ RegHalf L;
+ // Check for halfword in low 16 bits, zeros elsewhere.
+ if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Prefer zxth, since zxth can go in any slot, while extractu only in
+ // slots 2 and 3.
+ unsigned NewR = 0;
+ if (L.Low && Opc != Hexagon::A2_zxth) {
+ NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+ .addReg(L.Reg, 0, L.Sub);
+ } else if (!L.Low && Opc != Hexagon::S2_extractu) {
+ NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR)
+ .addReg(L.Reg, 0, L.Sub)
+ .addImm(16)
+ .addImm(16);
+ }
+ if (NewR == 0)
+ return false;
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
+// combine.
+bool BitSimplification::genCombineHalf(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ RegHalf L, H;
+ // Check for combine h/l
+ if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
+ return false;
+ // Do nothing if this is just a reg copy.
+ if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ unsigned COpc = getCombineOpcode(H.Low, L.Low);
+ if (COpc == Opc)
+ return false;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(COpc), NewR)
+ .addReg(H.Reg, 0, H.Sub)
+ .addReg(L.Reg, 0, L.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI resets high bits of a register and keeps the lower ones, replace it
+// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
+bool BitSimplification::genExtractLow(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ case Hexagon::S2_extractu:
+ return false;
+ }
+ if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
+ int32_t Imm = MI->getOperand(2).getImm();
+ if (isInt<10>(Imm))
+ return false;
+ }
+
+ if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+ return false;
+ unsigned W = RC.width();
+ while (W > 0 && RC[W-1].is(0))
+ W--;
+ if (W == 0 || W == RC.width())
+ return false;
+ unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
+ : (W == 16) ? Hexagon::A2_zxth
+ : (W < 10) ? Hexagon::A2_andir
+ : Hexagon::S2_extractu;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->uses()) {
+ if (!Op.isReg())
+ continue;
+ BitTracker::RegisterRef RS = Op;
+ if (!BT.has(RS.Reg))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ unsigned BN, BW;
+ if (!HBS::getSubregMask(RS, BN, BW, MRI))
+ continue;
+ if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
+ continue;
+
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+ .addReg(RS.Reg, 0, RS.Sub);
+ if (NewOpc == Hexagon::A2_andir)
+ MIB.addImm((1 << W) - 1);
+ else if (NewOpc == Hexagon::S2_extractu)
+ MIB.addImm(W).addImm(0);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+ }
+ return false;
+}
+
+
+// Check for tstbit simplification opportunity, where the bit being checked
+// can be tracked back to another register. For example:
+// vreg2 = S2_lsr_i_r vreg1, 5
+// vreg3 = S2_tstbit_i vreg2, 0
+// =>
+// vreg3 = S2_tstbit_i vreg1, 5
+bool BitSimplification::simplifyTstbit(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::S2_tstbit_i)
+ return false;
+
+ unsigned BN = MI->getOperand(2).getImm();
+ BitTracker::RegisterRef RS = MI->getOperand(1);
+ unsigned F, W;
+ DebugLoc DL = MI->getDebugLoc();
+ if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
+ return false;
+ MachineBasicBlock &B = *MI->getParent();
+
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ const BitTracker::BitValue &V = SC[F+BN];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
+ const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
+ // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
+ // a double register, need to use a subregister and adjust bit
+ // number.
+ unsigned P = UINT_MAX;
+ BitTracker::RegisterRef RR(V.RefI.Reg, 0);
+ if (TC == &Hexagon::DoubleRegsRegClass) {
+ P = V.RefI.Pos;
+ RR.Sub = Hexagon::subreg_loreg;
+ if (P >= 32) {
+ P -= 32;
+ RR.Sub = Hexagon::subreg_hireg;
+ }
+ } else if (TC == &Hexagon::IntRegsRegClass) {
+ P = V.RefI.Pos;
+ }
+ if (P != UINT_MAX) {
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+ .addReg(RR.Reg, 0, RR.Sub)
+ .addImm(P);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BT.put(NewR, RC);
+ return true;
+ }
+ } else if (V.is(0) || V.is(1)) {
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
+ BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool BitSimplification::processBlock(MachineBasicBlock &B,
+ const RegisterSet &AVs) {
+ bool Changed = false;
+ RegisterSet AVB = AVs;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
+ MachineInstr *MI = &*I;
+ Defs.clear();
+ HBS::getInstrDefs(*MI, Defs);
+
+ unsigned Opc = MI->getOpcode();
+ if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
+ continue;
+
+ if (MI->mayStore()) {
+ bool T = genStoreUpperHalf(MI);
+ T = T || genStoreImmediate(MI);
+ Changed |= T;
+ continue;
+ }
+
+ if (Defs.count() != 1)
+ continue;
+ const MachineOperand &Op0 = MI->getOperand(0);
+ if (!Op0.isReg() || !Op0.isDef())
+ continue;
+ BitTracker::RegisterRef RD = Op0;
+ if (!BT.has(RD.Reg))
+ continue;
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
+
+ if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
+ bool T = genPackhl(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+
+ if (FRC->getID() == Hexagon::IntRegsRegClassID) {
+ bool T = genExtractHalf(MI, RD, RC);
+ T = T || genCombineHalf(MI, RD, RC);
+ T = T || genExtractLow(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+
+ if (FRC->getID() == Hexagon::PredRegsRegClassID) {
+ bool T = simplifyTstbit(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+ }
+ return Changed;
+}
+
+
+bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HRI = *HST.getRegisterInfo();
+ auto &HII = *HST.getInstrInfo();
+
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool Changed;
+
+ Changed = DeadCodeElimination(MF, *MDT).run();
+
+ const HexagonEvaluator HE(HRI, MRI, HII, MF);
+ BitTracker BT(HE, MF);
+ DEBUG(BT.trace(true));
+ BT.run();
+
+ MachineBasicBlock &Entry = MF.front();
+
+ RegisterSet AIG; // Available registers for IG.
+ ConstGeneration ImmG(BT, HII, MRI);
+ Changed |= visitBlock(Entry, ImmG, AIG);
+
+ RegisterSet ARE; // Available registers for RIE.
+ RedundantInstrElimination RIE(BT, HII, MRI);
+ Changed |= visitBlock(Entry, RIE, ARE);
+
+ RegisterSet ACG; // Available registers for CG.
+ CopyGeneration CopyG(BT, HII, MRI);
+ Changed |= visitBlock(Entry, CopyG, ACG);
+
+ RegisterSet ACP; // Available registers for CP.
+ CopyPropagation CopyP(HRI, MRI);
+ Changed |= visitBlock(Entry, CopyP, ACP);
+
+ Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+ BT.run();
+ RegisterSet ABS; // Available registers for BS.
+ BitSimplification BitS(BT, HII, MRI);
+ Changed |= visitBlock(Entry, BitS, ABS);
+
+ Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+ if (Changed) {
+ for (auto &B : MF)
+ for (auto &I : B)
+ I.clearKillInfo();
+ DeadCodeElimination(MF, *MDT).run();
+ }
+ return Changed;
+}
+
+
+// Recognize loops where the code at the end of the loop matches the code
+// before the entry of the loop, and the matching code is such that is can
+// be simplified. This pass relies on the bit simplification above and only
+// prepares code in a way that can be handled by the bit simplifcation.
+//
+// This is the motivating testcase (and explanation):
+//
+// {
+// loop0(.LBB0_2, r1) // %for.body.preheader
+// r5:4 = memd(r0++#8)
+// }
+// {
+// r3 = lsr(r4, #16)
+// r7:6 = combine(r5, r5)
+// }
+// {
+// r3 = insert(r5, #16, #16)
+// r7:6 = vlsrw(r7:6, #16)
+// }
+// .LBB0_2:
+// {
+// memh(r2+#4) = r5
+// memh(r2+#6) = r6 # R6 is really R5.H
+// }
+// {
+// r2 = add(r2, #8)
+// memh(r2+#0) = r4
+// memh(r2+#2) = r3 # R3 is really R4.H
+// }
+// {
+// r5:4 = memd(r0++#8)
+// }
+// { # "Shuffling" code that sets up R3 and R6
+// r3 = lsr(r4, #16) # so that their halves can be stored in the
+// r7:6 = combine(r5, r5) # next iteration. This could be folded into
+// } # the stores if the code was at the beginning
+// { # of the loop iteration. Since the same code
+// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
+// r7:6 = vlsrw(r7:6, #16) # there.
+// }:endloop0
+//
+//
+// The outcome:
+//
+// {
+// loop0(.LBB0_2, r1)
+// r5:4 = memd(r0++#8)
+// }
+// .LBB0_2:
+// {
+// memh(r2+#4) = r5
+// memh(r2+#6) = r5.h
+// }
+// {
+// r2 = add(r2, #8)
+// memh(r2+#0) = r4
+// memh(r2+#2) = r4.h
+// }
+// {
+// r5:4 = memd(r0++#8)
+// }:endloop0
+
+namespace llvm {
+ FunctionPass *createHexagonLoopRescheduling();
+ void initializeHexagonLoopReschedulingPass(PassRegistry&);
+}
+
+namespace {
+ class HexagonLoopRescheduling : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonLoopRescheduling() : MachineFunctionPass(ID),
+ HII(0), HRI(0), MRI(0), BTP(0) {
+ initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+ MachineRegisterInfo *MRI;
+ BitTracker *BTP;
+
+ struct LoopCand {
+ LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
+ MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
+ MachineBasicBlock *LB, *PB, *EB;
+ };
+ typedef std::vector<MachineInstr*> InstrList;
+ struct InstrGroup {
+ BitTracker::RegisterRef Inp, Out;
+ InstrList Ins;
+ };
+ struct PhiInfo {
+ PhiInfo(MachineInstr &P, MachineBasicBlock &B);
+ unsigned DefR;
+ BitTracker::RegisterRef LR, PR;
+ MachineBasicBlock *LB, *PB;
+ };
+
+ static unsigned getDefReg(const MachineInstr *MI);
+ bool isConst(unsigned Reg) const;
+ bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
+ bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
+ bool isShuffleOf(unsigned OutR, unsigned InpR) const;
+ bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
+ unsigned &InpR2) const;
+ void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
+ MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
+ bool processLoop(LoopCand &C);
+ };
+}
+
+char HexagonLoopRescheduling::ID = 0;
+
+INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
+ "Hexagon Loop Rescheduling", false, false)
+
+
+HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
+ MachineBasicBlock &B) {
+ DefR = HexagonLoopRescheduling::getDefReg(&P);
+ LB = &B;
+ PB = nullptr;
+ for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
+ const MachineOperand &OpB = P.getOperand(i+1);
+ if (OpB.getMBB() == &B) {
+ LR = P.getOperand(i);
+ continue;
+ }
+ PB = OpB.getMBB();
+ PR = P.getOperand(i);
+ }
+}
+
+
+unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
+ RegisterSet Defs;
+ HBS::getInstrDefs(*MI, Defs);
+ if (Defs.count() != 1)
+ return 0;
+ return Defs.find_first();
+}
+
+
+bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
+ if (!BTP->has(Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
+ for (unsigned i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (!V.is(0) && !V.is(1))
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
+ unsigned DefR) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case Hexagon::S2_lsr_i_r:
+ case Hexagon::S2_asr_i_r:
+ case Hexagon::S2_asl_i_r:
+ case Hexagon::S2_lsr_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_insert:
+ case Hexagon::A2_or:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_and:
+ case Hexagon::A2_andp:
+ case Hexagon::A2_combinew:
+ case Hexagon::A4_combineri:
+ case Hexagon::A4_combineir:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_combine_ll:
+ case Hexagon::A2_combine_lh:
+ case Hexagon::A2_combine_hl:
+ case Hexagon::A2_combine_hh:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
+ unsigned InpR) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg())
+ continue;
+ if (Op.getReg() == InpR)
+ return i == n-1;
+ }
+ return false;
+}
+
+
+bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
+ if (!BTP->has(OutR) || !BTP->has(InpR))
+ return false;
+ const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
+ for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = OutC[i];
+ if (V.Type != BitTracker::BitValue::Ref)
+ continue;
+ if (V.RefI.Reg != InpR)
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
+ unsigned OutR2, unsigned &InpR2) const {
+ if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
+ return false;
+ const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
+ const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
+ unsigned W = OutC1.width();
+ unsigned MatchR = 0;
+ if (W != OutC2.width())
+ return false;
+ for (unsigned i = 0; i < W; ++i) {
+ const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
+ if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
+ return false;
+ if (V1.Type != BitTracker::BitValue::Ref)
+ continue;
+ if (V1.RefI.Pos != V2.RefI.Pos)
+ return false;
+ if (V1.RefI.Reg != InpR1)
+ return false;
+ if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
+ return false;
+ if (!MatchR)
+ MatchR = V2.RefI.Reg;
+ else if (V2.RefI.Reg != MatchR)
+ return false;
+ }
+ InpR2 = MatchR;
+ return true;
+}
+
+
+void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
+ MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
+ unsigned NewPredR) {
+ DenseMap<unsigned,unsigned> RegMap;
+
+ const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
+ unsigned PhiR = MRI->createVirtualRegister(PhiRC);
+ BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
+ .addReg(NewPredR)
+ .addMBB(&PB)
+ .addReg(G.Inp.Reg)
+ .addMBB(&LB);
+ RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
+
+ for (unsigned i = G.Ins.size(); i > 0; --i) {
+ const MachineInstr *SI = G.Ins[i-1];
+ unsigned DR = getDefReg(SI);
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ unsigned NewDR = MRI->createVirtualRegister(RC);
+ DebugLoc DL = SI->getDebugLoc();
+
+ auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
+ for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
+ const MachineOperand &Op = SI->getOperand(j);
+ if (!Op.isReg()) {
+ MIB.addOperand(Op);
+ continue;
+ }
+ if (!Op.isUse())
+ continue;
+ unsigned UseR = RegMap[Op.getReg()];
+ MIB.addReg(UseR, 0, Op.getSubReg());
+ }
+ RegMap.insert(std::make_pair(DR, NewDR));
+ }
+
+ HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
+}
+
+
+bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
+ DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
+ std::vector<PhiInfo> Phis;
+ for (auto &I : *C.LB) {
+ if (!I.isPHI())
+ break;
+ unsigned PR = getDefReg(&I);
+ if (isConst(PR))
+ continue;
+ bool BadUse = false, GoodUse = false;
+ for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseI = UI->getParent();
+ if (UseI->getParent() != C.LB) {
+ BadUse = true;
+ break;
+ }
+ if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
+ GoodUse = true;
+ }
+ if (BadUse || !GoodUse)
+ continue;
+
+ Phis.push_back(PhiInfo(I, *C.LB));
+ }
+
+ DEBUG({
+ dbgs() << "Phis: {";
+ for (auto &I : Phis) {
+ dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
+ << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
+ << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
+ << I.LB->getNumber() << ')';
+ }
+ dbgs() << " }\n";
+ });
+
+ if (Phis.empty())
+ return false;
+
+ bool Changed = false;
+ InstrList ShufIns;
+
+ // Go backwards in the block: for each bit shuffling instruction, check
+ // if that instruction could potentially be moved to the front of the loop:
+ // the output of the loop cannot be used in a non-shuffling instruction
+ // in this loop.
+ for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
+ if (I->isTerminator())
+ continue;
+ if (I->isPHI())
+ break;
+
+ RegisterSet Defs;
+ HBS::getInstrDefs(*I, Defs);
+ if (Defs.count() != 1)
+ continue;
+ unsigned DefR = Defs.find_first();
+ if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ continue;
+ if (!isBitShuffle(&*I, DefR))
+ continue;
+
+ bool BadUse = false;
+ for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseI = UI->getParent();
+ if (UseI->getParent() == C.LB) {
+ if (UseI->isPHI()) {
+ // If the use is in a phi node in this loop, then it should be
+ // the value corresponding to the back edge.
+ unsigned Idx = UI.getOperandNo();
+ if (UseI->getOperand(Idx+1).getMBB() != C.LB)
+ BadUse = true;
+ } else {
+ auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
+ if (F == ShufIns.end())
+ BadUse = true;
+ }
+ } else {
+ // There is a use outside of the loop, but there is no epilog block
+ // suitable for a copy-out.
+ if (C.EB == nullptr)
+ BadUse = true;
+ }
+ if (BadUse)
+ break;
+ }
+
+ if (BadUse)
+ continue;
+ ShufIns.push_back(&*I);
+ }
+
+ // Partition the list of shuffling instructions into instruction groups,
+ // where each group has to be moved as a whole (i.e. a group is a chain of
+ // dependent instructions). A group produces a single live output register,
+ // which is meant to be the input of the loop phi node (although this is
+ // not checked here yet). It also uses a single register as its input,
+ // which is some value produced in the loop body. After moving the group
+ // to the beginning of the loop, that input register would need to be
+ // the loop-carried register (through a phi node) instead of the (currently
+ // loop-carried) output register.
+ typedef std::vector<InstrGroup> InstrGroupList;
+ InstrGroupList Groups;
+
+ for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
+ MachineInstr *SI = ShufIns[i];
+ if (SI == nullptr)
+ continue;
+
+ InstrGroup G;
+ G.Ins.push_back(SI);
+ G.Out.Reg = getDefReg(SI);
+ RegisterSet Inputs;
+ HBS::getInstrUses(*SI, Inputs);
+
+ for (unsigned j = i+1; j < n; ++j) {
+ MachineInstr *MI = ShufIns[j];
+ if (MI == nullptr)
+ continue;
+ RegisterSet Defs;
+ HBS::getInstrDefs(*MI, Defs);
+ // If this instruction does not define any pending inputs, skip it.
+ if (!Defs.intersects(Inputs))
+ continue;
+ // Otherwise, add it to the current group and remove the inputs that
+ // are defined by MI.
+ G.Ins.push_back(MI);
+ Inputs.remove(Defs);
+ // Then add all registers used by MI.
+ HBS::getInstrUses(*MI, Inputs);
+ ShufIns[j] = nullptr;
+ }
+
+ // Only add a group if it requires at most one register.
+ if (Inputs.count() > 1)
+ continue;
+ auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+ return G.Out.Reg == P.LR.Reg;
+ };
+ if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
+ continue;
+
+ G.Inp.Reg = Inputs.find_first();
+ Groups.push_back(G);
+ }
+
+ DEBUG({
+ for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+ InstrGroup &G = Groups[i];
+ dbgs() << "Group[" << i << "] inp: "
+ << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
+ << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
+ for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
+ dbgs() << " " << *G.Ins[j];
+ }
+ });
+
+ for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+ InstrGroup &G = Groups[i];
+ if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
+ continue;
+ auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+ return G.Out.Reg == P.LR.Reg;
+ };
+ auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
+ if (F == Phis.end())
+ continue;
+ unsigned PredR = 0;
+ if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
+ const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
+ unsigned Opc = DefPredR->getOpcode();
+ if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
+ continue;
+ if (!DefPredR->getOperand(1).isImm())
+ continue;
+ if (DefPredR->getOperand(1).getImm() != 0)
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
+ if (RC != MRI->getRegClass(F->PR.Reg)) {
+ PredR = MRI->createVirtualRegister(RC);
+ unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
+ : Hexagon::A2_tfrpi;
+ auto T = C.PB->getFirstTerminator();
+ DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
+ BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
+ .addImm(0);
+ } else {
+ PredR = F->PR.Reg;
+ }
+ }
+ assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
+ moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ HII = HST.getInstrInfo();
+ HRI = HST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+ BitTracker BT(HE, MF);
+ DEBUG(BT.trace(true));
+ BT.run();
+ BTP = &BT;
+
+ std::vector<LoopCand> Cand;
+
+ for (auto &B : MF) {
+ if (B.pred_size() != 2 || B.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PB = nullptr;
+ bool IsLoop = false;
+ for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
+ if (*PI != &B)
+ PB = *PI;
+ else
+ IsLoop = true;
+ }
+ if (!IsLoop)
+ continue;
+
+ MachineBasicBlock *EB = nullptr;
+ for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
+ if (*SI == &B)
+ continue;
+ // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
+ // edge from B to EP is non-critical.
+ if ((*SI)->pred_size() == 1)
+ EB = *SI;
+ break;
+ }
+
+ Cand.push_back(LoopCand(&B, PB, EB));
+ }
+
+ bool Changed = false;
+ for (auto &C : Cand)
+ Changed |= processLoop(C);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonLoopRescheduling() {
+ return new HexagonLoopRescheduling();
+}
+
+FunctionPass *llvm::createHexagonBitSimplify() {
+ return new HexagonBitSimplify();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 021e58a..d5848dc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -84,6 +84,8 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
switch (ID) {
case DoubleRegsRegClassID:
+ case VecDblRegsRegClassID:
+ case VecDblRegs128BRegClassID:
return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
: BT::BitMask(RW, 2*RW-1);
default:
@@ -95,30 +97,29 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
llvm_unreachable("Unexpected register/subregister");
}
-
namespace {
- struct RegisterRefs : public std::vector<BT::RegisterRef> {
- typedef std::vector<BT::RegisterRef> Base;
- RegisterRefs(const MachineInstr *MI);
- const BT::RegisterRef &operator[](unsigned n) const {
- // The main purpose of this operator is to assert with bad argument.
- assert(n < size());
- return Base::operator[](n);
- }
- };
+class RegisterRefs {
+ std::vector<BT::RegisterRef> Vector;
- RegisterRefs::RegisterRefs(const MachineInstr *MI)
- : Base(MI->getNumOperands()) {
- for (unsigned i = 0, n = size(); i < n; ++i) {
+public:
+ RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) {
+ for (unsigned i = 0, n = Vector.size(); i < n; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg())
- at(i) = BT::RegisterRef(MO);
+ Vector[i] = BT::RegisterRef(MO);
// For indices that don't correspond to registers, the entry will
// remain constructed via the default constructor.
}
}
-}
+ size_t size() const { return Vector.size(); }
+ const BT::RegisterRef &operator[](unsigned n) const {
+ // The main purpose of this operator is to assert with bad argument.
+ assert(n < Vector.size());
+ return Vector[n];
+ }
+};
+}
bool HexagonEvaluator::evaluate(const MachineInstr *MI,
const CellMapType &Inputs, CellMapType &Outputs) const {
@@ -189,7 +190,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI,
return true;
};
// Get the cell corresponding to the N-th operand.
- auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W)
+ auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W)
-> BT::RegisterCell {
const MachineOperand &Op = MI->getOperand(N);
if (Op.isImm())
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 3753b745..efafdd0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -102,7 +102,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block.
MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
@@ -186,13 +186,11 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
if (case1 || case2) {
InvertAndChangeJumpTarget(MI, UncondTarget);
- MBB->removeSuccessor(JumpAroundTarget);
- MBB->addSuccessor(UncondTarget);
+ MBB->replaceSuccessor(JumpAroundTarget, UncondTarget);
// Remove the unconditional branch in LayoutSucc.
LayoutSucc->erase(LayoutSucc->begin());
- LayoutSucc->removeSuccessor(UncondTarget);
- LayoutSucc->addSuccessor(JumpAroundTarget);
+ LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget);
// This code performs the conversion for case 2, which moves
// the block to the fall-thru case (BB3 in the code above).
@@ -210,16 +208,15 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// The live-in to LayoutSucc is now all values live-in to
// JumpAroundTarget.
//
- std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
- LayoutSucc->livein_end());
- std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
- JumpAroundTarget->livein_end());
- for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
- LayoutSucc->removeLiveIn(OrigLiveIn[i]);
- }
- for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
- LayoutSucc->addLiveIn(NewLiveIn[i]);
- }
+ std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn(
+ LayoutSucc->livein_begin(), LayoutSucc->livein_end());
+ std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn(
+ JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (const auto &OrigLI : OrigLiveIn)
+ LayoutSucc->removeLiveIn(OrigLI.PhysReg);
+ for (const auto &NewLI : NewLiveIn)
+ LayoutSucc->addLiveIn(NewLI);
}
}
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 9f5fac1..931db66 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -59,30 +59,23 @@ namespace {
// Numbering map for gep nodes. Used to keep track of ordering for
// gep nodes.
- struct NodeNumbering : public std::map<const GepNode*,unsigned> {
- };
-
- struct NodeOrdering : public NodeNumbering {
+ struct NodeOrdering {
NodeOrdering() : LastNum(0) {}
-#ifdef _MSC_VER
- void special_insert_for_special_msvc(const GepNode *N)
-#else
- using NodeNumbering::insert;
- void insert(const GepNode* N)
-#endif
- {
- insert(std::make_pair(N, ++LastNum));
- }
- bool operator() (const GepNode* N1, const GepNode *N2) const {
- const_iterator F1 = find(N1), F2 = find(N2);
- assert(F1 != end() && F2 != end());
+
+ void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); }
+ void clear() { Map.clear(); }
+
+ bool operator()(const GepNode *N1, const GepNode *N2) const {
+ auto F1 = Map.find(N1), F2 = Map.find(N2);
+ assert(F1 != Map.end() && F2 != Map.end());
return F1->second < F2->second;
}
+
private:
+ std::map<const GepNode *, unsigned> Map;
unsigned LastNum;
};
-
class HexagonCommonGEP : public FunctionPass {
public:
static char ID;
@@ -360,11 +353,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
Us.insert(&UI.getUse());
}
Nodes.push_back(N);
-#ifdef _MSC_VER
- NodeOrder.special_insert_for_special_msvc(N);
-#else
NodeOrder.insert(N);
-#endif
// Skip the first index operand, since we only handle 0. This dereferences
// the pointer operand.
@@ -379,11 +368,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
Nx->PTy = PtrTy;
Nx->Idx = Op;
Nodes.push_back(Nx);
-#ifdef _MSC_VER
- NodeOrder.special_insert_for_special_msvc(Nx);
-#else
NodeOrder.insert(Nx);
-#endif
PN = Nx;
PtrTy = next_type(PtrTy, Op);
@@ -404,7 +389,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
void HexagonCommonGEP::collect() {
// Establish depth-first traversal order of the dominator tree.
ValueVect BO;
- getBlockTraversalOrder(Fn->begin(), BO);
+ getBlockTraversalOrder(&Fn->front(), BO);
// The creation of gep nodes requires DT-traversal. When processing a GEP
// instruction that uses another GEP instruction as the base pointer, the
@@ -737,7 +722,7 @@ namespace {
Instruction *In = cast<Instruction>(V);
if (In->getParent() != B)
continue;
- BasicBlock::iterator It = In;
+ BasicBlock::iterator It = In->getIterator();
if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
FirstUse = It;
}
@@ -1135,7 +1120,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
ArrayRef<Value*> A(IdxList, IdxC);
Type *InpTy = Input->getType();
Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
- NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At);
+ NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
Input = NewInst;
} while (nax <= Num);
@@ -1213,7 +1198,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
Last = Child;
} while (true);
- BasicBlock::iterator InsertAt = LastB->getTerminator();
+ BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator();
if (LastUsed || LastCN > 0) {
ValueVect Urs;
getAllUsersForNode(Root, Urs, NCM);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
new file mode 100644
index 0000000..ee0c318
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -0,0 +1,1063 @@
+//===--- HexagonEarlyIfConv.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a Hexagon-specific if-conversion pass that runs on the
+// SSA form.
+// In SSA it is not straightforward to represent instructions that condi-
+// tionally define registers, since a conditionally-defined register may
+// only be used under the same condition on which the definition was based.
+// To avoid complications of this nature, this patch will only generate
+// predicated stores, and speculate other instructions from the "if-conver-
+// ted" block.
+// The code will recognize CFG patterns where a block with a conditional
+// branch "splits" into a "true block" and a "false block". Either of these
+// could be omitted (in case of a triangle, for example).
+// If after conversion of the side block(s) the CFG allows it, the resul-
+// ting blocks may be merged. If the "join" block contained PHI nodes, they
+// will be replaced with MUX (or MUX-like) instructions to maintain the
+// semantics of the PHI.
+//
+// Example:
+//
+// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead>
+// J2_jump <BB#4>, %PC<imp-def,dead>
+// Successors according to CFG: BB#4(62) BB#5(62)
+//
+// BB#4: derived from LLVM BB %if.then
+// Predecessors according to CFG: BB#3
+// %vreg11<def> = A2_addp %vreg6, %vreg10
+// S2_storerd_io %vreg32, 16, %vreg11
+// Successors according to CFG: BB#5
+//
+// BB#5: derived from LLVM BB %if.end
+// Predecessors according to CFG: BB#3 BB#4
+// %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4>
+// %vreg13<def> = A2_addp %vreg7, %vreg12
+// %vreg42<def> = C2_cmpeqi %vreg9, 10
+// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+// J2_jump <BB#6>, %PC<imp-def,dead>
+// Successors according to CFG: BB#6(4) BB#3(124)
+//
+// would become:
+//
+// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// spec-> %vreg11<def> = A2_addp %vreg6, %vreg10
+// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
+// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
+// %vreg13<def> = A2_addp %vreg7, %vreg46
+// %vreg42<def> = C2_cmpeqi %vreg9, 10
+// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+// J2_jump <BB#6>, %PC<imp-def,dead>
+// Successors according to CFG: BB#6 BB#3
+
+#define DEBUG_TYPE "hexagon-eif"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonEarlyIfConversion();
+ void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
+}
+
+namespace {
+ cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
+ cl::init(false), cl::desc("Enable branch probability info"));
+ cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
+ cl::desc("Size limit in Hexagon early if-conversion"));
+
+ struct PrintMB {
+ PrintMB(const MachineBasicBlock *B) : MB(B) {}
+ const MachineBasicBlock *MB;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) {
+ if (!P.MB)
+ return OS << "<none>";
+ return OS << '#' << P.MB->getNumber();
+ }
+
+ struct FlowPattern {
+ FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
+ FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
+ MachineBasicBlock *FB, MachineBasicBlock *JB)
+ : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
+
+ MachineBasicBlock *SplitB;
+ MachineBasicBlock *TrueB, *FalseB, *JoinB;
+ unsigned PredR;
+ };
+ struct PrintFP {
+ PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
+ : FP(P), TRI(T) {}
+ const FlowPattern &FP;
+ const TargetRegisterInfo &TRI;
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
+ };
+ raw_ostream &operator<<(raw_ostream &OS,
+ const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
+ OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
+ << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
+ << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
+ << PrintMB(P.FP.FalseB)
+ << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
+ return OS;
+ }
+
+ class HexagonEarlyIfConversion : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonEarlyIfConversion() : MachineFunctionPass(ID),
+ TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
+ initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon early if conversion";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ typedef DenseSet<MachineBasicBlock*> BlockSetType;
+
+ bool isPreheader(const MachineBasicBlock *B) const;
+ bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L,
+ FlowPattern &FP);
+ bool visitBlock(MachineBasicBlock *B, MachineLoop *L);
+ bool visitLoop(MachineLoop *L);
+
+ bool hasEHLabel(const MachineBasicBlock *B) const;
+ bool hasUncondBranch(const MachineBasicBlock *B) const;
+ bool isValidCandidate(const MachineBasicBlock *B) const;
+ bool usesUndefVReg(const MachineInstr *MI) const;
+ bool isValid(const FlowPattern &FP) const;
+ unsigned countPredicateDefs(const MachineBasicBlock *B) const;
+ unsigned computePhiCost(MachineBasicBlock *B) const;
+ bool isProfitable(const FlowPattern &FP) const;
+ bool isPredicableStore(const MachineInstr *MI) const;
+ bool isSafeToSpeculate(const MachineInstr *MI) const;
+
+ unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
+ void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
+ MachineInstr *MI, unsigned PredR, bool IfTrue);
+ void predicateBlockNB(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+ unsigned PredR, bool IfTrue);
+
+ void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
+ void convert(const FlowPattern &FP);
+
+ void removeBlock(MachineBasicBlock *B);
+ void eliminatePhis(MachineBasicBlock *B);
+ void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
+ void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
+ void simplifyFlowGraph(const FlowPattern &FP);
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MFN;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ MachineLoopInfo *MLI;
+ BlockSetType Deleted;
+ const MachineBranchProbabilityInfo *MBPI;
+ };
+
+ char HexagonEarlyIfConversion::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
+ "Hexagon early if conversion", false, false)
+
+bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
+ if (B->succ_size() != 1)
+ return false;
+ MachineBasicBlock *SB = *B->succ_begin();
+ MachineLoop *L = MLI->getLoopFor(SB);
+ return L && SB == L->getHeader();
+}
+
+
+bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
+ MachineLoop *L, FlowPattern &FP) {
+ DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
+
+ // Interested only in conditional branches, no .new, no new-value, etc.
+ // Check the terminators directly, it's easier than handling all responses
+ // from AnalyzeBranch.
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
+ if (T1I == B->end())
+ return false;
+ unsigned Opc = T1I->getOpcode();
+ if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
+ return false;
+ unsigned PredR = T1I->getOperand(0).getReg();
+
+ // Get the layout successor, or 0 if B does not have one.
+ MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
+ MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
+
+ MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
+ MachineBasicBlock::const_iterator T2I = std::next(T1I);
+ // The second terminator should be an unconditional branch.
+ assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump);
+ MachineBasicBlock *T2B = (T2I == B->end()) ? NextB
+ : T2I->getOperand(0).getMBB();
+ if (T1B == T2B) {
+ // XXX merge if T1B == NextB, or convert branch to unconditional.
+ // mark as diamond with both sides equal?
+ return false;
+ }
+ // Loop could be null for both.
+ if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
+ return false;
+
+ // Record the true/false blocks in such a way that "true" means "if (PredR)",
+ // and "false" means "if (!PredR)".
+ if (Opc == Hexagon::J2_jumpt)
+ TB = T1B, FB = T2B;
+ else
+ TB = T2B, FB = T1B;
+
+ if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB))
+ return false;
+
+ // Detect triangle first. In case of a triangle, one of the blocks TB/FB
+ // can fall through into the other, in other words, it will be executed
+ // in both cases. We only want to predicate the block that is executed
+ // conditionally.
+ unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
+ unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
+
+ // A block is predicable if it has one predecessor (it must be B), and
+ // it has a single successor. In fact, the block has to end either with
+ // an unconditional branch (which can be predicated), or with a fall-
+ // through.
+ bool TOk = (TNP == 1) && (TNS == 1);
+ bool FOk = (FNP == 1) && (FNS == 1);
+
+ // If neither is predicable, there is nothing interesting.
+ if (!TOk && !FOk)
+ return false;
+
+ MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
+ MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
+ MachineBasicBlock *JB = 0;
+
+ if (TOk) {
+ if (FOk) {
+ if (TSB == FSB)
+ JB = TSB;
+ // Diamond: "if (P) then TB; else FB;".
+ } else {
+ // TOk && !FOk
+ if (TSB == FB) {
+ JB = FB;
+ FB = 0;
+ }
+ }
+ } else {
+ // !TOk && FOk (at least one must be true by now).
+ if (FSB == TB) {
+ JB = TB;
+ TB = 0;
+ }
+ }
+ // Don't try to predicate loop preheaders.
+ if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
+ DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
+ << " is a loop preheader. Skipping.\n");
+ return false;
+ }
+
+ FP = FlowPattern(B, PredR, TB, FB, JB);
+ DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
+ return true;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
+// contains EH_LABEL.
+bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
+ for (auto &I : *B)
+ if (I.isEHLabel())
+ return true;
+ return false;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
+// that a block can never fall-through.
+bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
+ const {
+ MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+ while (I != E) {
+ if (I->isBarrier())
+ return true;
+ ++I;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
+ const {
+ if (!B)
+ return true;
+ if (B->isEHPad() || B->hasAddressTaken())
+ return false;
+ if (B->succ_size() == 0)
+ return false;
+
+ for (auto &MI : *B) {
+ if (MI.isDebugValue())
+ continue;
+ if (MI.isConditionalBranch())
+ return false;
+ unsigned Opc = MI.getOpcode();
+ bool IsJMP = (Opc == Hexagon::J2_jump);
+ if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI))
+ return false;
+ // Look for predicate registers defined by this instruction. It's ok
+ // to speculate such an instruction, but the predicate register cannot
+ // be used outside of this block (or else it won't be possible to
+ // update the use of it after predication). PHI uses will be updated
+ // to use a result of a MUX, and a MUX cannot be created for predicate
+ // registers.
+ for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
+ continue;
+ for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
+ if (U->getParent()->isPHI())
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isUse())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ const MachineInstr *DefI = MRI->getVRegDef(R);
+ // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
+ assert(DefI && "Expecting a reaching def in MRI");
+ if (DefI->isImplicitDef())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
+ if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition
+ return false;
+ if (FP.TrueB && !isValidCandidate(FP.TrueB))
+ return false;
+ if (FP.FalseB && !isValidCandidate(FP.FalseB))
+ return false;
+ // Check the PHIs in the join block. If any of them use a register
+ // that is defined as IMPLICIT_DEF, do not convert this. This can
+ // legitimately happen if one side of the split never executes, but
+ // the compiler is unable to prove it. That side may then seem to
+ // provide an "undef" value to the join block, however it will never
+ // execute at run-time. If we convert this case, the "undef" will
+ // be used in a MUX instruction, and that may seem like actually
+ // using an undefined value to other optimizations. This could lead
+ // to trouble further down the optimization stream, cause assertions
+ // to fail, etc.
+ if (FP.JoinB) {
+ const MachineBasicBlock &B = *FP.JoinB;
+ for (auto &MI : B) {
+ if (!MI.isPHI())
+ break;
+ if (usesUndefVReg(&MI))
+ return false;
+ unsigned DefR = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+ if (RC == &Hexagon::PredRegsRegClass)
+ return false;
+ }
+ }
+ return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
+ assert(B->pred_size() <= 2);
+ if (B->pred_size() < 2)
+ return 0;
+
+ unsigned Cost = 0;
+ MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
+ for (I = B->begin(); I != E; ++I) {
+ const MachineOperand &RO1 = I->getOperand(1);
+ const MachineOperand &RO3 = I->getOperand(3);
+ assert(RO1.isReg() && RO3.isReg());
+ // Must have a MUX if the phi uses a subregister.
+ if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+ Cost++;
+ continue;
+ }
+ MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
+ MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+ if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3))
+ Cost++;
+ }
+ return Cost;
+}
+
+
+unsigned HexagonEarlyIfConversion::countPredicateDefs(
+ const MachineBasicBlock *B) const {
+ unsigned PredDefs = 0;
+ for (auto &MI : *B) {
+ for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
+ PredDefs++;
+ }
+ }
+ return PredDefs;
+}
+
+
+bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
+ if (FP.TrueB && FP.FalseB) {
+
+ // Do not IfCovert if the branch is one sided.
+ if (MBPI) {
+ BranchProbability Prob(9, 10);
+ if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
+ return false;
+ if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
+ return false;
+ }
+
+ // If both sides are predicable, convert them if they join, and the
+ // join block has no other predecessors.
+ MachineBasicBlock *TSB = *FP.TrueB->succ_begin();
+ MachineBasicBlock *FSB = *FP.FalseB->succ_begin();
+ if (TSB != FSB)
+ return false;
+ if (TSB->pred_size() != 2)
+ return false;
+ }
+
+ // Calculate the total size of the predicated blocks.
+ // Assume instruction counts without branches to be the approximation of
+ // the code size. If the predicated blocks are smaller than a packet size,
+ // approximate the spare room in the packet that could be filled with the
+ // predicated/speculated instructions.
+ unsigned TS = 0, FS = 0, Spare = 0;
+ if (FP.TrueB) {
+ TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
+ if (TS < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-TS;
+ }
+ if (FP.FalseB) {
+ FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
+ if (FS < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-TS;
+ }
+ unsigned TotalIn = TS+FS;
+ DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
+ << TotalIn << ", spare room: " << Spare << "\n");
+ if (TotalIn >= SizeLimit+Spare)
+ return false;
+
+ // Count the number of PHI nodes that will need to be updated (converted
+ // to MUX). Those can be later converted to predicated instructions, so
+ // they aren't always adding extra cost.
+ // KLUDGE: Also, count the number of predicate register definitions in
+ // each block. The scheduler may increase the pressure of these and cause
+ // expensive spills (e.g. bitmnp01).
+ unsigned TotalPh = 0;
+ unsigned PredDefs = countPredicateDefs(FP.SplitB);
+ if (FP.JoinB) {
+ TotalPh = computePhiCost(FP.JoinB);
+ PredDefs += countPredicateDefs(FP.JoinB);
+ } else {
+ if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+ MachineBasicBlock *SB = *FP.TrueB->succ_begin();
+ TotalPh += computePhiCost(SB);
+ PredDefs += countPredicateDefs(SB);
+ }
+ if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+ MachineBasicBlock *SB = *FP.FalseB->succ_begin();
+ TotalPh += computePhiCost(SB);
+ PredDefs += countPredicateDefs(SB);
+ }
+ }
+ DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
+ << TotalPh << "\n");
+ if (TotalIn+TotalPh >= SizeLimit+Spare)
+ return false;
+
+ DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
+ if (PredDefs > 4)
+ return false;
+
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
+ MachineLoop *L) {
+ bool Changed = false;
+
+ // Visit all dominated blocks from the same loop first, then process B.
+ MachineDomTreeNode *N = MDT->getNode(B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ // We will change CFG/DT during this traversal, so take precautions to
+ // avoid problems related to invalidated iterators. In fact, processing
+ // a child C of B cannot cause another child to be removed, but it can
+ // cause a new child to be added (which was a child of C before C itself
+ // was removed. This new child C, however, would have been processed
+ // prior to processing B, so there is no need to process it again.
+ // Simply keep a list of children of B, and traverse that list.
+ typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+ DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+ for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ if (!Deleted.count(SB))
+ Changed |= visitBlock(SB, L);
+ }
+ // When walking down the dominator tree, we want to traverse through
+ // blocks from nested (other) loops, because they can dominate blocks
+ // that are in L. Skip the non-L blocks only after the tree traversal.
+ if (MLI->getLoopFor(B) != L)
+ return Changed;
+
+ FlowPattern FP;
+ if (!matchFlowPattern(B, L, FP))
+ return Changed;
+
+ if (!isValid(FP)) {
+ DEBUG(dbgs() << "Conversion is not valid\n");
+ return Changed;
+ }
+ if (!isProfitable(FP)) {
+ DEBUG(dbgs() << "Conversion is not profitable\n");
+ return Changed;
+ }
+
+ convert(FP);
+ simplifyFlowGraph(FP);
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
+ MachineBasicBlock *HB = L ? L->getHeader() : 0;
+ DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
+ : dbgs() << "Visiting function") << "\n");
+ bool Changed = false;
+ if (L) {
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= visitLoop(*I);
+ }
+
+ MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
+ Changed |= visitBlock(L ? HB : EntryB, L);
+ return Changed;
+}
+
+
+bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
+ const {
+ // Exclude post-increment stores. Those return a value, so we cannot
+ // predicate them.
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+ switch (Opc) {
+ // Store byte:
+ case S2_storerb_io: case S4_storerb_rr:
+ case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp:
+ // Store halfword:
+ case S2_storerh_io: case S4_storerh_rr:
+ case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp:
+ // Store upper halfword:
+ case S2_storerf_io: case S4_storerf_rr:
+ case S2_storerfabs: case S2_storerfgp:
+ // Store word:
+ case S2_storeri_io: case S4_storeri_rr:
+ case S2_storeriabs: case S4_storeiri_io: case S2_storerigp:
+ // Store doubleword:
+ case S2_storerd_io: case S4_storerd_rr:
+ case S2_storerdabs: case S2_storerdgp:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
+ const {
+ if (MI->mayLoad() || MI->mayStore())
+ return false;
+ if (MI->isCall() || MI->isBarrier() || MI->isBranch())
+ return false;
+ if (MI->hasUnmodeledSideEffects())
+ return false;
+
+ return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
+ bool IfTrue) const {
+ // Exclude post-increment stores.
+ using namespace Hexagon;
+ switch (Opc) {
+ case S2_storerb_io:
+ return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
+ case S4_storerb_rr:
+ return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
+ case S2_storerbabs:
+ case S2_storerbgp:
+ return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
+ case S4_storeirb_io:
+ return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
+ case S2_storerh_io:
+ return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
+ case S4_storerh_rr:
+ return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
+ case S2_storerhabs:
+ case S2_storerhgp:
+ return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
+ case S2_storerf_io:
+ return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
+ case S4_storerf_rr:
+ return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
+ case S2_storerfabs:
+ case S2_storerfgp:
+ return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
+ case S4_storeirh_io:
+ return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
+ case S2_storeri_io:
+ return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
+ case S4_storeri_rr:
+ return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
+ case S2_storeriabs:
+ case S2_storerigp:
+ return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
+ case S4_storeiri_io:
+ return IfTrue ? S4_storeirit_io : S4_storeirif_io;
+ case S2_storerd_io:
+ return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
+ case S4_storerd_rr:
+ return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
+ case S2_storerdabs:
+ case S2_storerdgp:
+ return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
+ }
+ llvm_unreachable("Unexpected opcode");
+ return 0;
+}
+
+
+void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineInstr *MI,
+ unsigned PredR, bool IfTrue) {
+ DebugLoc DL;
+ if (At != ToB->end())
+ DL = At->getDebugLoc();
+ else if (!ToB->empty())
+ DL = ToB->back().getDebugLoc();
+
+ unsigned Opc = MI->getOpcode();
+
+ if (isPredicableStore(MI)) {
+ unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
+ assert(COpc);
+ MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
+ .addReg(PredR);
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ MIB.addOperand(*MO);
+
+ // Set memory references.
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ MI->eraseFromParent();
+ return;
+ }
+
+ if (Opc == Hexagon::J2_jump) {
+ MachineBasicBlock *TB = MI->getOperand(0).getMBB();
+ const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
+ : Hexagon::J2_jumpf);
+ BuildMI(*ToB, At, DL, D)
+ .addReg(PredR)
+ .addMBB(TB);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Print the offending instruction unconditionally as we are about to
+ // abort.
+ dbgs() << *MI;
+ llvm_unreachable("Unexpected instruction");
+}
+
+
+// Predicate/speculate non-branch instructions from FromB into block ToB.
+// Leave the branches alone, they will be handled later. Btw, at this point
+// FromB should have at most one branch, and it should be unconditional.
+void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+ unsigned PredR, bool IfTrue) {
+ DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
+ MachineBasicBlock::iterator End = FromB->getFirstTerminator();
+ MachineBasicBlock::iterator I, NextI;
+
+ for (I = FromB->begin(); I != End; I = NextI) {
+ assert(!I->isPHI());
+ NextI = std::next(I);
+ if (isSafeToSpeculate(&*I))
+ ToB->splice(At, FromB, I);
+ else
+ predicateInstr(ToB, At, &*I, PredR, IfTrue);
+ }
+}
+
+
+void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
+ const FlowPattern &FP) {
+ // Visit all PHI nodes in the WhereB block and generate MUX instructions
+ // in the split block. Update the PHI nodes with the values of the MUX.
+ auto NonPHI = WhereB->getFirstNonPHI();
+ for (auto I = WhereB->begin(); I != NonPHI; ++I) {
+ MachineInstr *PN = &*I;
+ // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
+ unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0;
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1);
+ if (BO.getMBB() == FP.SplitB)
+ SR = RO.getReg(), SSR = RO.getSubReg();
+ else if (BO.getMBB() == FP.TrueB)
+ TR = RO.getReg(), TSR = RO.getSubReg();
+ else if (BO.getMBB() == FP.FalseB)
+ FR = RO.getReg(), FSR = RO.getSubReg();
+ else
+ continue;
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ if (TR == 0)
+ TR = SR, TSR = SSR;
+ else if (FR == 0)
+ FR = SR, FSR = SSR;
+ assert(TR && FR);
+
+ using namespace Hexagon;
+ unsigned DR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
+ : TII->get(MUX64_rr);
+
+ MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
+ DebugLoc DL;
+ if (MuxAt != FP.SplitB->end())
+ DL = MuxAt->getDebugLoc();
+ unsigned MuxR = MRI->createVirtualRegister(RC);
+ BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
+ .addReg(FP.PredR)
+ .addReg(TR, 0, TSR)
+ .addReg(FR, 0, FSR);
+
+ PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+ PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
+ }
+}
+
+
+void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
+ MachineBasicBlock *TSB = 0, *FSB = 0;
+ MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
+ assert(OldTI != FP.SplitB->end());
+ DebugLoc DL = OldTI->getDebugLoc();
+
+ if (FP.TrueB) {
+ TSB = *FP.TrueB->succ_begin();
+ predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true);
+ }
+ if (FP.FalseB) {
+ FSB = *FP.FalseB->succ_begin();
+ MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator();
+ predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false);
+ }
+
+ // Regenerate new terminators in the split block and update the successors.
+ // First, remember any information that may be needed later and remove the
+ // existing terminators/successors from the split block.
+ MachineBasicBlock *SSB = 0;
+ FP.SplitB->erase(OldTI, FP.SplitB->end());
+ while (FP.SplitB->succ_size() > 0) {
+ MachineBasicBlock *T = *FP.SplitB->succ_begin();
+ // It's possible that the split block had a successor that is not a pre-
+ // dicated block. This could only happen if there was only one block to
+ // be predicated. Example:
+ // split_b:
+ // if (p) jump true_b
+ // jump unrelated2_b
+ // unrelated1_b:
+ // ...
+ // unrelated2_b: ; can have other predecessors, so it's not "false_b"
+ // jump other_b
+ // true_b: ; only reachable from split_b, can be predicated
+ // ...
+ //
+ // Find this successor (SSB) if it exists.
+ if (T != FP.TrueB && T != FP.FalseB) {
+ assert(!SSB);
+ SSB = T;
+ }
+ FP.SplitB->removeSuccessor(FP.SplitB->succ_begin());
+ }
+
+ // Insert new branches and update the successors of the split block. This
+ // may create unconditional branches to the layout successor, etc., but
+ // that will be cleaned up later. For now, make sure that correct code is
+ // generated.
+ if (FP.JoinB) {
+ assert(!SSB || SSB == FP.JoinB);
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+ .addMBB(FP.JoinB);
+ FP.SplitB->addSuccessor(FP.JoinB);
+ } else {
+ bool HasBranch = false;
+ if (TSB) {
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
+ .addReg(FP.PredR)
+ .addMBB(TSB);
+ FP.SplitB->addSuccessor(TSB);
+ HasBranch = true;
+ }
+ if (FSB) {
+ const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
+ : TII->get(Hexagon::J2_jumpf);
+ MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
+ if (!HasBranch)
+ MIB.addReg(FP.PredR);
+ MIB.addMBB(FSB);
+ FP.SplitB->addSuccessor(FSB);
+ }
+ if (SSB) {
+ // This cannot happen if both TSB and FSB are set. [TF]SB are the
+ // successor blocks of the TrueB and FalseB (or null of the TrueB
+ // or FalseB block is null). SSB is the potential successor block
+ // of the SplitB that is neither TrueB nor FalseB.
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+ .addMBB(SSB);
+ FP.SplitB->addSuccessor(SSB);
+ }
+ }
+
+ // What is left to do is to update the PHI nodes that could have entries
+ // referring to predicated blocks.
+ if (FP.JoinB) {
+ updatePhiNodes(FP.JoinB, FP);
+ } else {
+ if (TSB)
+ updatePhiNodes(TSB, FP);
+ if (FSB)
+ updatePhiNodes(FSB, FP);
+ // Nothing to update in SSB, since SSB's predecessors haven't changed.
+ }
+}
+
+
+void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
+ DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
+
+ // Transfer the immediate dominator information from B to its descendants.
+ MachineDomTreeNode *N = MDT->getNode(B);
+ MachineDomTreeNode *IDN = N->getIDom();
+ if (IDN) {
+ MachineBasicBlock *IDB = IDN->getBlock();
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+ DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+ for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ MDT->changeImmediateDominator(SB, IDB);
+ }
+ }
+
+ while (B->succ_size() > 0)
+ B->removeSuccessor(B->succ_begin());
+
+ for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
+ (*I)->removeSuccessor(B, true);
+
+ Deleted.insert(B);
+ MDT->eraseNode(B);
+ MFN->erase(B->getIterator());
+}
+
+
+void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
+ DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
+ MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
+ for (I = B->begin(); I != NonPHI; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr *PN = &*I;
+ assert(PN->getNumOperands() == 3 && "Invalid phi node");
+ MachineOperand &UO = PN->getOperand(1);
+ unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
+ unsigned DefR = PN->getOperand(0).getReg();
+ unsigned NewR = UseR;
+ if (UseSR) {
+ // MRI.replaceVregUsesWith does not allow to update the subregister,
+ // so instead of doing the use-iteration here, create a copy into a
+ // "non-subregistered" register.
+ DebugLoc DL = PN->getDebugLoc();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+ NewR = MRI->createVirtualRegister(RC);
+ NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
+ .addReg(UseR, 0, UseSR);
+ }
+ MRI->replaceRegWith(DefR, NewR);
+ B->erase(I);
+ }
+}
+
+
+void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
+ MachineBasicBlock *NewB) {
+ for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *SB = *I;
+ MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
+ for (P = SB->begin(); P != N; ++P) {
+ MachineInstr *PN = &*P;
+ for (MIOperands MO(PN); MO.isValid(); ++MO)
+ if (MO->isMBB() && MO->getMBB() == OldB)
+ MO->setMBB(NewB);
+ }
+ }
+}
+
+
+void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
+ MachineBasicBlock *SuccB) {
+ DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
+ << PrintMB(SuccB) << "\n");
+ bool TermOk = hasUncondBranch(SuccB);
+ eliminatePhis(SuccB);
+ TII->RemoveBranch(*PredB);
+ PredB->removeSuccessor(SuccB);
+ PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
+ MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
+ for (I = SuccB->succ_begin(); I != E; ++I)
+ PredB->addSuccessor(*I);
+ PredB->normalizeSuccProbs();
+ replacePhiEdges(SuccB, PredB);
+ removeBlock(SuccB);
+ if (!TermOk)
+ PredB->updateTerminator();
+}
+
+
+void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
+ if (FP.TrueB)
+ removeBlock(FP.TrueB);
+ if (FP.FalseB)
+ removeBlock(FP.FalseB);
+
+ FP.SplitB->updateTerminator();
+ if (FP.SplitB->succ_size() != 1)
+ return;
+
+ MachineBasicBlock *SB = *FP.SplitB->succ_begin();
+ if (SB->pred_size() != 1)
+ return;
+
+ // By now, the split block has only one successor (SB), and SB has only
+ // one predecessor. We can try to merge them. We will need to update ter-
+ // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
+ // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
+ // with an unconditional branch, we won't need to touch the terminators.
+ if (!hasEHLabel(SB) || hasUncondBranch(SB))
+ mergeBlocks(FP.SplitB, SB);
+}
+
+
+bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
+ auto &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MFN = &MF;
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
+ nullptr;
+
+ Deleted.clear();
+ bool Changed = false;
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+ Changed |= visitLoop(*I);
+ Changed |= visitLoop(0);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+FunctionPass *llvm::createHexagonEarlyIfConversion() {
+ return new HexagonEarlyIfConversion();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index e4c8d8f..6e2dbc0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -74,7 +74,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block.
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
++MII) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 21a8996..7a52a1c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -147,6 +147,48 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
"shrink-wraps"));
+static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
+ cl::Hidden, cl::desc("Use allocframe more conservatively"));
+
+
+namespace llvm {
+ void initializeHexagonCallFrameInformationPass(PassRegistry&);
+ FunctionPass *createHexagonCallFrameInformation();
+}
+
+namespace {
+ class HexagonCallFrameInformation : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonCallFrameInformation() : MachineFunctionPass(ID) {
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeHexagonCallFrameInformationPass(PR);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ };
+
+ char HexagonCallFrameInformation::ID = 0;
+}
+
+bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
+ auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
+ bool NeedCFI = MF.getMMI().hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ if (!NeedCFI)
+ return false;
+ HFI.insertCFIInstructions(MF);
+ return true;
+}
+
+INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
+ "Hexagon call frame information", false, false)
+
+FunctionPass *llvm::createHexagonCallFrameInformation() {
+ return new HexagonCallFrameInformation();
+}
+
+
namespace {
/// Map a register pair Reg to the subregister that has the greater "number",
/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
@@ -370,11 +412,11 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
insertEpilogueInBlock(*EpilogB);
} else {
for (auto &B : MF)
- if (!B.empty() && B.back().isReturn())
+ if (B.isReturnBlock())
insertCSRRestoresInBlock(B, CSI, HRI);
for (auto &B : MF)
- if (!B.empty() && B.back().isReturn())
+ if (B.isReturnBlock())
insertEpilogueInBlock(B);
}
}
@@ -383,10 +425,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
- auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
DebugLoc dl;
@@ -405,10 +444,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
bool AlignStack = (MaxAlign > getStackAlignment());
- // Check if frame moves are needed for EH.
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- MF.getFunction()->needsUnwindTableEntry();
-
// Get the number of bytes to allocate from the FrameInfo.
unsigned NumBytes = MFI->getStackSize();
unsigned SP = HRI.getStackRegister();
@@ -424,14 +459,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
MI->eraseFromParent();
}
- //
- // Only insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
- // that this shouldn't be required, but doing so now because gcc does and
- // gdb can't break at the start of the function without it. Will remove if
- // this turns out to be a gdb bug.
- //
- bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None);
- if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF))
+ if (!hasFP(MF))
return;
// Check for overflow.
@@ -469,92 +497,11 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
.addReg(SP)
.addImm(-int64_t(MaxAlign));
}
-
- if (needsFrameMoves) {
- std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
- MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
-
- // Advance CFA. DW_CFA_def_cfa
- unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
- unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
-
- // CFA = FP + 8
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
- FrameLabel, DwFPReg, -8));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // R31 (return addr) = CFA - #4
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- FrameLabel, DwRAReg, -4));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // R30 (frame ptr) = CFA - #8)
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- FrameLabel, DwFPReg, -8));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- unsigned int regsToMove[] = {
- Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
- Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
- Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
- Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
- Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, Hexagon::D10,
- Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister
- };
-
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
- for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) {
- for (unsigned I = 0, E = CSI.size(); I < E; ++I) {
- if (CSI[I].getReg() == regsToMove[i]) {
- // Subtract 8 to make room for R30 and R31, which are added above.
- int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8;
-
- if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) {
- unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel,
- DwarfReg, Offset));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- } else {
- // Split the double regs into subregs, and generate appropriate
- // cfi_offsets.
- // The only reason, we are split double regs is, llvm-mc does not
- // understand paired registers for cfi_offset.
- // Eg .cfi_offset r1:0, -64
- unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI);
- unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false);
- unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
- unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
- unsigned HiCFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel,
- HiDwarfReg, Offset+4));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(HiCFIIndex);
- unsigned LoCFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel,
- LoDwarfReg, Offset));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(LoCFIIndex);
- }
- break;
- }
- } // for CSI.size()
- } // for regsToMove
- } // needsFrameMoves
}
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
- //
- // Only insert deallocframe if we need to. Also at -O0. See comment
- // in insertPrologueInBlock above.
- //
- if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None)
+ if (!hasFP(MF))
return;
auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
@@ -630,12 +577,172 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
}
+namespace {
+ bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
+ if (!It->isBundle())
+ return It->getOpcode() == Hexagon::S2_allocframe;
+ auto End = It->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
+ while (++I != End && I->isBundled())
+ if (I->getOpcode() == Hexagon::S2_allocframe)
+ return true;
+ return false;
+ }
+
+ MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
+ for (auto &I : B)
+ if (IsAllocFrame(I))
+ return I;
+ return B.end();
+ }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
+ for (auto &B : MF) {
+ auto AF = FindAllocFrame(B);
+ if (AF == B.end())
+ continue;
+ insertCFIInstructionsAt(B, ++AF);
+ }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator At) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+
+ // If CFI instructions have debug information attached, something goes
+ // wrong with the final assembly generation: the prolog_end is placed
+ // in a wrong location.
+ DebugLoc DL;
+ const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
+
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+ if (hasFP(MF)) {
+ unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+ unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+ // Define CFA via an offset from the value of FP.
+ //
+ // -8 -4 0 (SP)
+ // --+----+----+---------------------
+ // | FP | LR | increasing addresses -->
+ // --+----+----+---------------------
+ // | +-- Old SP (before allocframe)
+ // +-- New FP (after allocframe)
+ //
+ // MCCFIInstruction::createDefCfa subtracts the offset from the register.
+ // MCCFIInstruction::createOffset takes the offset without sign change.
+ auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(DefCfa));
+ // R31 (return addr) = CFA - 4
+ auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffR31));
+ // R30 (frame ptr) = CFA - 8
+ auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffR30));
+ }
+
+ static unsigned int RegsToMove[] = {
+ Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
+ Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+ Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+ Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+ Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9,
+ Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
+ Hexagon::NoRegister
+ };
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
+ unsigned Reg = RegsToMove[i];
+ auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
+ return C.getReg() == Reg;
+ };
+ auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
+ if (F == CSI.end())
+ continue;
+
+ // Subtract 8 to make room for R30 and R31, which are added above.
+ unsigned FrameReg;
+ int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8;
+
+ if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
+ unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
+ auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
+ Offset);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffReg));
+ } else {
+ // Split the double regs into subregs, and generate appropriate
+ // cfi_offsets.
+ // The only reason, we are split double regs is, llvm-mc does not
+ // understand paired registers for cfi_offset.
+ // Eg .cfi_offset r1:0, -64
+
+ unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
+ unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
+ unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+ unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+ auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
+ Offset+4);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffHi));
+ auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
+ Offset);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffLo));
+ }
+ }
+}
+
+
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const HexagonMachineFunctionInfo *FuncInfo =
- MF.getInfo<HexagonMachineFunctionInfo>();
- return MFI->hasCalls() || MFI->getStackSize() > 0 ||
- FuncInfo->hasClobberLR();
+ auto &MFI = *MF.getFrameInfo();
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+ bool HasFixed = MFI.getNumFixedObjects();
+ bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
+ .getLocalFrameObjectCount();
+ bool HasExtraAlign = HRI.needsStackRealignment(MF);
+ bool HasAlloca = MFI.hasVarSizedObjects();
+
+ // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
+ // that this shouldn't be required, but doing so now because gcc does and
+ // gdb can't break at the start of the function without it. Will remove if
+ // this turns out to be a gdb bug.
+ //
+ if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+ return true;
+
+ // By default we want to use SP (since it's always there). FP requires
+ // some setup (i.e. ALLOCFRAME).
+ // Fixed and preallocated objects need FP if the distance from them to
+ // the SP is unknown (as is with alloca or aligna).
+ if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+ return true;
+
+ if (MFI.getStackSize() > 0) {
+ if (UseAllocframe)
+ return true;
+ }
+
+ if (MFI.hasCalls() ||
+ MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+ return true;
+
+ return false;
}
@@ -718,9 +825,89 @@ static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
}
-int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- return MF.getFrameInfo()->getObjectOffset(FI);
+int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ auto &MFI = *MF.getFrameInfo();
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+ // Large parts of this code are shared with HRI::eliminateFrameIndex.
+ int Offset = MFI.getObjectOffset(FI);
+ bool HasAlloca = MFI.hasVarSizedObjects();
+ bool HasExtraAlign = HRI.needsStackRealignment(MF);
+ bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+
+ unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
+ unsigned AP = 0;
+ if (const MachineInstr *AI = getAlignaInstr(MF))
+ AP = AI->getOperand(0).getReg();
+ unsigned FrameSize = MFI.getStackSize();
+
+ bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).
+ // Use FP at -O0, except when there are objects with extra alignment.
+ // That additional alignment requirement may cause a pad to be inserted,
+ // which will make it impossible to use FP to access objects located
+ // past the pad.
+ if (NoOpt && !HasExtraAlign)
+ UseFP = true;
+ if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+ // Fixed and preallocated objects will be located before any padding
+ // so FP must be used to access them.
+ UseFP |= (HasAlloca || HasExtraAlign);
+ } else {
+ if (HasAlloca) {
+ if (HasExtraAlign)
+ UseAP = true;
+ else
+ UseFP = true;
+ }
+ }
+
+ // If FP was picked, then there had better be FP.
+ bool HasFP = hasFP(MF);
+ assert((HasFP || !UseFP) && "This function must have frame pointer");
+
+ // Having FP implies allocframe. Allocframe will store extra 8 bytes:
+ // FP/LR. If the base register is used to access an object across these
+ // 8 bytes, then the offset will need to be adjusted by 8.
+ //
+ // After allocframe:
+ // HexagonISelLowering adds 8 to ---+
+ // the offsets of all stack-based |
+ // arguments (*) |
+ // |
+ // getObjectOffset < 0 0 8 getObjectOffset >= 8
+ // ------------------------+-----+------------------------> increasing
+ // <local objects> |FP/LR| <input arguments> addresses
+ // -----------------+------+-----+------------------------>
+ // | |
+ // SP/AP point --+ +-- FP points here (**)
+ // somewhere on
+ // this side of FP/LR
+ //
+ // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
+ // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
+
+ // The lowering assumes that FP/LR is present, and so the offsets of
+ // the formal arguments start at 8. If FP/LR is not there we need to
+ // reduce the offset by 8.
+ if (Offset > 0 && !HasFP)
+ Offset -= 8;
+
+ if (UseFP)
+ FrameReg = FP;
+ else if (UseAP)
+ FrameReg = AP;
+ else
+ FrameReg = SP;
+
+ // Calculate the actual offset in the instruction. If there is no FP
+ // (in other words, no allocframe), then SP will not be adjusted (i.e.
+ // there will be no SP -= FrameSize), so the frame size should not be
+ // added to the calculated offset.
+ int RealOffset = Offset;
+ if (!UseFP && !UseAP && HasFP)
+ RealOffset = FrameSize+Offset;
+ return RealOffset;
}
@@ -731,7 +918,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI = MBB.begin();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
if (useSpillFunction(MF, CSI)) {
unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
@@ -739,7 +926,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
// Call spill function.
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
MachineInstr *SaveRegsCall =
- BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+ BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
.addExternalSymbol(SpillFun);
// Add callee-saved registers as use.
addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
@@ -757,7 +944,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
int FI = CSI[i].getFrameIdx();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+ HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
if (IsKill)
MBB.addLiveIn(Reg);
}
@@ -772,7 +959,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
if (useRestoreFunction(MF, CSI)) {
bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
@@ -787,14 +974,14 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
if (HasTC) {
unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
- DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc))
+ DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
.addExternalSymbol(RestoreFn);
} else {
// The block has a return.
MachineBasicBlock::iterator It = MBB.getFirstTerminator();
assert(It->isReturn() && std::next(It) == MBB.end());
unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
- DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc))
+ DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
.addExternalSymbol(RestoreFn);
// Transfer the function live-out registers.
DeallocCall->copyImplicitOps(MF, It);
@@ -807,7 +994,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
int FI = CSI[i].getFrameIdx();
- TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
+ HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
}
return true;
}
@@ -832,9 +1019,9 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
// via AP, which may not be available at the particular place in the program.
MachineFrameInfo *MFI = MF.getFrameInfo();
bool HasAlloca = MFI->hasVarSizedObjects();
- bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment());
+ bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
- if (!HasAlloca || !HasAligna)
+ if (!HasAlloca || !NeedsAlign)
return;
unsigned LFS = MFI->getLocalFrameSize();
@@ -864,13 +1051,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
// Check for an unused caller-saved register.
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
MCPhysReg FreeReg = *CallerSavedRegs;
- if (MRI.isPhysRegUsed(FreeReg))
+ if (!MRI.reg_nodbg_empty(FreeReg))
continue;
// Check aliased register usage.
bool IsCurrentRegUsed = false;
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
- if (MRI.isPhysRegUsed(*AI)) {
+ if (!MRI.reg_nodbg_empty(*AI)) {
IsCurrentRegUsed = true;
break;
}
@@ -896,7 +1083,7 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block.
MachineBasicBlock::iterator NextII;
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
@@ -1210,7 +1397,8 @@ bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
}
-MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
+const MachineInstr *HexagonFrameLowering::getAlignaInstr(
+ const MachineFunction &MF) const {
for (auto &B : MF)
for (auto &I : B)
if (I.getOpcode() == Hexagon::ALIGNA)
@@ -1219,6 +1407,7 @@ MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
}
+// FIXME: Use Function::optForSize().
inline static bool isOptSize(const MachineFunction &MF) {
AttributeSet AF = MF.getFunction()->getAttributes();
return AF.hasAttribute(AttributeSet::FunctionIndex,
@@ -1226,8 +1415,7 @@ inline static bool isOptSize(const MachineFunction &MF) {
}
inline static bool isMinSize(const MachineFunction &MF) {
- AttributeSet AF = MF.getFunction()->getAttributes();
- return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ return MF.getFunction()->optForMinSize();
}
@@ -1289,4 +1477,3 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
: SpillFuncThreshold;
return Threshold < NumCSI;
}
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index d39ee2c..683b303 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -51,7 +51,8 @@ public:
bool targetHandlesStackFrameRounding() const override {
return true;
}
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
bool hasFP(const MachineFunction &MF) const override;
const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
@@ -73,7 +74,9 @@ public:
const override;
bool needsAligna(const MachineFunction &MF) const;
- MachineInstr *getAlignaInstr(MachineFunction &MF) const;
+ const MachineInstr *getAlignaInstr(const MachineFunction &MF) const;
+
+ void insertCFIInstructions(MachineFunction &MF) const;
private:
typedef std::vector<CalleeSavedInfo> CSIVect;
@@ -86,6 +89,8 @@ private:
const HexagonRegisterInfo &HRI) const;
bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
const HexagonRegisterInfo &HRI) const;
+ void insertCFIInstructionsAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator At) const;
void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
@@ -94,7 +99,7 @@ private:
void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
MachineBasicBlock *&EpilogB) const;
- bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const;
+ bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const;
bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index 4d32208..f26e2ff 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -195,7 +195,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
return false;
}
- IRBuilder<> IRB(BB, In);
+ IRBuilder<> IRB(In);
Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
: Intrinsic::hexagon_S2_extractup;
Module *Mod = BB->getParent()->getParent();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 096da94..64a2b6c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -77,9 +77,8 @@ namespace {
namespace {
// Set of virtual registers, based on BitVector.
struct RegisterSet : private BitVector {
- RegisterSet() : BitVector() {}
+ RegisterSet() = default;
explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
- RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
using BitVector::clear;
@@ -1496,7 +1495,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
// version of DCE that preserves lifetime markers. Without it, merging
// of stack objects can fail to recognize and merge disjoint objects
// leading to unnecessary stack growth.
- Changed |= removeDeadCode(MDT->getRootNode());
+ Changed = removeDeadCode(MDT->getRootNode());
const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
BitTracker BTLoc(HE, MF);
@@ -1534,7 +1533,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
}
if (IFMap.empty())
- return false;
+ return Changed;
{
NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
@@ -1547,7 +1546,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
}
if (IFMap.empty())
- return false;
+ return Changed;
{
NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
@@ -1572,13 +1571,15 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, n = Out.size(); i < n; ++i)
IFMap.erase(Out[i]);
}
+ if (IFMap.empty())
+ return Changed;
{
NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
- Changed = generateInserts();
+ generateInserts();
}
- return Changed;
+ return true;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
new file mode 100644
index 0000000..c059d56
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -0,0 +1,319 @@
+//===--- HexagonGenMux.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// During instruction selection, MUX instructions are generated for
+// conditional assignments. Since such assignments often present an
+// opportunity to predicate instructions, HexagonExpandCondsets
+// expands MUXes into pairs of conditional transfers, and then proceeds
+// with predication of the producers/consumers of the registers involved.
+// This happens after exiting from the SSA form, but before the machine
+// instruction scheduler. After the scheduler and after the register
+// allocation there can be cases of pairs of conditional transfers
+// resulting from a MUX where neither of them was further predicated. If
+// these transfers are now placed far enough from the instruction defining
+// the predicate register, they cannot use the .new form. In such cases it
+// is better to collapse them back to a single MUX instruction.
+
+#define DEBUG_TYPE "hexmux"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonGenMux();
+ void initializeHexagonGenMuxPass(PassRegistry& Registry);
+}
+
+namespace {
+ class HexagonGenMux : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) {
+ initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon generate mux instructions";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+ struct CondsetInfo {
+ unsigned PredR;
+ unsigned TrueX, FalseX;
+ CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {}
+ };
+ struct DefUseInfo {
+ BitVector Defs, Uses;
+ DefUseInfo() : Defs(), Uses() {}
+ DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {}
+ };
+ struct MuxInfo {
+ MachineBasicBlock::iterator At;
+ unsigned DefR, PredR;
+ MachineOperand *SrcT, *SrcF;
+ MachineInstr *Def1, *Def2;
+ MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR,
+ MachineOperand *TOp, MachineOperand *FOp,
+ MachineInstr *D1, MachineInstr *D2)
+ : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1),
+ Def2(D2) {}
+ };
+ typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap;
+ typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap;
+ typedef SmallVector<MuxInfo,4> MuxInfoList;
+
+ bool isRegPair(unsigned Reg) const {
+ return Hexagon::DoubleRegsRegClass.contains(Reg);
+ }
+ void getSubRegs(unsigned Reg, BitVector &SRs) const;
+ void expandReg(unsigned Reg, BitVector &Set) const;
+ void getDefsUses(const MachineInstr *MI, BitVector &Defs,
+ BitVector &Uses) const;
+ void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+ DefUseInfoMap &DUM);
+ bool isCondTransfer(unsigned Opc) const;
+ unsigned getMuxOpcode(const MachineOperand &Src1,
+ const MachineOperand &Src2) const;
+ bool genMuxInBlock(MachineBasicBlock &B);
+ };
+
+ char HexagonGenMux::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
+ "Hexagon generate mux instructions", false, false)
+
+
+void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
+ for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
+ SRs[*I] = true;
+}
+
+
+void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
+ if (isRegPair(Reg))
+ getSubRegs(Reg, Set);
+ else
+ Set[Reg] = true;
+}
+
+
+void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
+ BitVector &Uses) const {
+ // First, get the implicit defs and uses for this instruction.
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &D = HII->get(Opc);
+ if (const MCPhysReg *R = D.ImplicitDefs)
+ while (*R)
+ expandReg(*R++, Defs);
+ if (const MCPhysReg *R = D.ImplicitUses)
+ while (*R)
+ expandReg(*R++, Uses);
+
+ // Look over all operands, and collect explicit defs and uses.
+ for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+ if (!Mo->isReg() || Mo->isImplicit())
+ continue;
+ unsigned R = Mo->getReg();
+ BitVector &Set = Mo->isDef() ? Defs : Uses;
+ expandReg(R, Set);
+ }
+}
+
+
+void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+ DefUseInfoMap &DUM) {
+ unsigned Index = 0;
+ unsigned NR = HRI->getNumRegs();
+ BitVector Defs(NR), Uses(NR);
+
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ I2X.insert(std::make_pair(MI, Index));
+ Defs.reset();
+ Uses.reset();
+ getDefsUses(MI, Defs, Uses);
+ DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
+ Index++;
+ }
+}
+
+
+bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
+ switch (Opc) {
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ return true;
+ }
+ return false;
+}
+
+
+unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
+ const MachineOperand &Src2) const {
+ bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
+ if (IsReg1)
+ return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir;
+ if (IsReg2)
+ return Hexagon::C2_muxri;
+
+ // Neither is a register. The first source is extendable, but the second
+ // is not (s8).
+ if (Src2.isImm() && isInt<8>(Src2.getImm()))
+ return Hexagon::C2_muxii;
+
+ return 0;
+}
+
+
+bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ InstrIndexMap I2X;
+ DefUseInfoMap DUM;
+ buildMaps(B, I2X, DUM);
+
+ typedef DenseMap<unsigned,CondsetInfo> CondsetMap;
+ CondsetMap CM;
+ MuxInfoList ML;
+
+ MachineBasicBlock::iterator NextI, End = B.end();
+ for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
+ MachineInstr *MI = &*I;
+ NextI = std::next(I);
+ unsigned Opc = MI->getOpcode();
+ if (!isCondTransfer(Opc))
+ continue;
+ unsigned DR = MI->getOperand(0).getReg();
+ if (isRegPair(DR))
+ continue;
+
+ unsigned PR = MI->getOperand(1).getReg();
+ unsigned Idx = I2X.lookup(MI);
+ CondsetMap::iterator F = CM.find(DR);
+ bool IfTrue = HII->isPredicatedTrue(Opc);
+
+ // If there is no record of a conditional transfer for this register,
+ // or the predicate register differs, create a new record for it.
+ if (F != CM.end() && F->second.PredR != PR) {
+ CM.erase(F);
+ F = CM.end();
+ }
+ if (F == CM.end()) {
+ auto It = CM.insert(std::make_pair(DR, CondsetInfo()));
+ F = It.first;
+ F->second.PredR = PR;
+ }
+ CondsetInfo &CI = F->second;
+ if (IfTrue)
+ CI.TrueX = Idx;
+ else
+ CI.FalseX = Idx;
+ if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
+ continue;
+
+ // There is now a complete definition of DR, i.e. we have the predicate
+ // register, the definition if-true, and definition if-false.
+
+ // First, check if both definitions are far enough from the definition
+ // of the predicate register.
+ unsigned MinX = std::min(CI.TrueX, CI.FalseX);
+ unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
+ unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
+ bool NearDef = false;
+ for (unsigned X = SearchX; X < MaxX; ++X) {
+ const DefUseInfo &DU = DUM.lookup(X);
+ if (!DU.Defs[PR])
+ continue;
+ NearDef = true;
+ break;
+ }
+ if (NearDef)
+ continue;
+
+ // The predicate register is not defined in the last few instructions.
+ // Check if the conversion to MUX is possible (either "up", i.e. at the
+ // place of the earlier partial definition, or "down", where the later
+ // definition is located). Examine all defs and uses between these two
+ // definitions.
+ // SR1, SR2 - source registers from the first and the second definition.
+ MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin();
+ std::advance(It1, MinX);
+ std::advance(It2, MaxX);
+ MachineInstr *Def1 = It1, *Def2 = It2;
+ MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2);
+ unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
+ unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+ bool Failure = false, CanUp = true, CanDown = true;
+ for (unsigned X = MinX+1; X < MaxX; X++) {
+ const DefUseInfo &DU = DUM.lookup(X);
+ if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
+ Failure = true;
+ break;
+ }
+ if (CanDown && DU.Defs[SR1])
+ CanDown = false;
+ if (CanUp && DU.Defs[SR2])
+ CanUp = false;
+ }
+ if (Failure || (!CanUp && !CanDown))
+ continue;
+
+ MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2;
+ MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2;
+ // Prefer "down", since this will move the MUX farther away from the
+ // predicate definition.
+ MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
+ ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
+ }
+
+ for (unsigned I = 0, N = ML.size(); I < N; ++I) {
+ MuxInfo &MX = ML[I];
+ MachineBasicBlock &B = *MX.At->getParent();
+ DebugLoc DL = MX.At->getDebugLoc();
+ unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
+ if (!MxOpc)
+ continue;
+ BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
+ .addReg(MX.PredR)
+ .addOperand(*MX.SrcT)
+ .addOperand(*MX.SrcF);
+ B.erase(MX.Def1);
+ B.erase(MX.Def2);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) {
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ bool Changed = false;
+ for (auto &I : MF)
+ Changed |= genMuxInBlock(I);
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonGenMux() {
+ return new HexagonGenMux();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 6905c4f..d9675b5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -250,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
unsigned NewPR = MRI->createVirtualRegister(PredRC);
// For convertible instructions, do not modify them, so that they can
- // be coverted later. Generate a copy from Reg to NewPR.
+ // be converted later. Generate a copy from Reg to NewPR.
if (isConvertibleToPredForm(DefI)) {
MachineBasicBlock::iterator DefIt = DefI;
BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 53b6bf6..d20a809 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -727,9 +727,9 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// Phis that may feed into the loop.
LoopFeederMap LoopFeederPhi;
- // Check if the inital value may be zero and can be decremented in the first
+ // Check if the initial value may be zero and can be decremented in the first
// iteration. If the value is zero, the endloop instruction will not decrement
- // the loop counter, so we shoudn't generate a hardware loop in this case.
+ // the loop counter, so we shouldn't generate a hardware loop in this case.
if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
LoopFeederPhi))
return nullptr;
@@ -1288,14 +1288,14 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
typedef MachineBasicBlock::instr_iterator instr_iterator;
// Check if things are in order to begin with.
- for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+ for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
if (&*I == CmpI)
return true;
// Out of order.
unsigned PredR = CmpI->getOperand(0).getReg();
bool FoundBump = false;
- instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt);
+ instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
MachineInstr *In = &*I;
for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
@@ -1307,9 +1307,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
}
if (In == BumpI) {
- instr_iterator After = BumpI;
- instr_iterator From = CmpI;
- BB->splice(std::next(After), BB, From);
+ BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
FoundBump = true;
break;
}
@@ -1440,7 +1438,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
if (Comparison::isSigned(Cmp))
return false;
- // Check if there is a comparison of the inital value. If the initial value
+ // Check if there is a comparison of the initial value. If the initial value
// is greater than or not equal to another value, then assume this is a
// range check.
if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
@@ -1850,7 +1848,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
}
MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
- MF->insert(Header, NewPH);
+ MF->insert(Header->getIterator(), NewPH);
if (Header->pred_size() > 2) {
// Ensure that the header has only two predecessors: the preheader and
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9123057..a0da945 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -50,16 +50,21 @@ namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
const HexagonTargetMachine& HTM;
const HexagonSubtarget *HST;
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
public:
explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), HTM(tm) {
+ : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+ HRI(nullptr) {
initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
HST = &MF.getSubtarget<HexagonSubtarget>();
+ HII = HST->getInstrInfo();
+ HRI = HST->getRegisterInfo();
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -104,7 +109,6 @@ public:
SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
SDNode *SelectBitOp(SDNode *N);
- bool isConstExtProfitable(SDNode *N) const;
// XformMskToBitPosU5Imm - Returns the bit position which
// the single bit 32 bit mask represents.
@@ -139,8 +143,8 @@ public:
// type i32 where the negative literal is transformed into a positive literal
// for use in -= memops.
inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
- assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
- return CurDAG->getTargetConstant( - Imm, DL, MVT::i32);
+ assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant(-Imm, DL, MVT::i32);
}
// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
@@ -203,11 +207,10 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
// Intrinsics that return a a predicate.
-static unsigned doesIntrinsicReturnPredicate(unsigned ID)
-{
+static bool doesIntrinsicReturnPredicate(unsigned ID) {
switch (ID) {
default:
- return 0;
+ return false;
case Intrinsic::hexagon_C2_cmpeq:
case Intrinsic::hexagon_C2_cmpgt:
case Intrinsic::hexagon_C2_cmpgtu:
@@ -244,7 +247,7 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
case Intrinsic::hexagon_C2_tfrrp:
case Intrinsic::hexagon_S2_tstbit_i:
case Intrinsic::hexagon_S2_tstbit_r:
- return 1;
+ return true;
}
}
@@ -258,8 +261,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
- if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
MVT::Other, Base, TargetConst,
@@ -312,8 +314,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
- if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
@@ -378,29 +379,46 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
// loads.
ISD::LoadExtType ExtType = LD->getExtensionType();
bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
+ bool HasVecOffset = false;
// Figure out the opcode.
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
if (LoadedVT == MVT::i64) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadrd_pi;
else
Opcode = Hexagon::L2_loadrd_io;
} else if (LoadedVT == MVT::i32) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadri_pi;
else
Opcode = Hexagon::L2_loadri_io;
} else if (LoadedVT == MVT::i16) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
else
Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
} else if (LoadedVT == MVT::i8) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
else
Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+ } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 ||
+ LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) {
+ HasVecOffset = true;
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ Opcode = Hexagon::V6_vL32b_pi;
+ }
+ else
+ Opcode = Hexagon::V6_vL32b_ai;
+ // 128B
+ } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 ||
+ LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) {
+ HasVecOffset = true;
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ Opcode = Hexagon::V6_vL32b_pi_128B;
+ }
+ else
+ Opcode = Hexagon::V6_vL32b_ai_128B;
} else
llvm_unreachable("unknown memory type");
@@ -411,7 +429,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
- if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
@@ -420,15 +438,25 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result, 0),
- SDValue(Result, 1),
- SDValue(Result, 2)
- };
- ReplaceUses(Froms, Tos, 3);
+ if (HasVecOffset) {
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 2);
+ } else {
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ }
return Result;
} else {
SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
@@ -487,8 +515,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
- if (TII.isValidAutoIncImm(StoredVT, Val)) {
+ if (HII->isValidAutoIncImm(StoredVT, Val)) {
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -496,7 +523,15 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi;
else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi;
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
- else llvm_unreachable("unknown memory type");
+ else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+ StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) {
+ Opcode = Hexagon::V6_vS32b_pi;
+ }
+ // 128B
+ else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+ StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) {
+ Opcode = Hexagon::V6_vS32b_pi_128B;
+ } else llvm_unreachable("unknown memory type");
if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
@@ -530,6 +565,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
+ else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+ StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8)
+ Opcode = Hexagon::V6_vS32b_ai;
+ // 128B
+ else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+ StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8)
+ Opcode = Hexagon::V6_vS32b_ai_128B;
else llvm_unreachable("unknown memory type");
// Build regular store.
@@ -1113,14 +1155,12 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
}
if (Opc == ISD::AND) {
- if (((ValueVT == MVT::i32) &&
- (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
- ((ValueVT == MVT::i64) &&
- (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
- // If it's simple AND, do the normal op.
- return SelectCode(N);
- else
+ // Check if this is a bit-clearing AND, if not select code the usual way.
+ if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) ||
+ (ValueVT == MVT::i64 && isPowerOf2_64(~Val)))
Val = ~Val;
+ else
+ return SelectCode(N);
}
// If OR or AND is being fed by shl, srl and, sra don't do this change,
@@ -1128,7 +1168,8 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
// Traverse the DAG to see if there is shl, srl and sra.
if (Opc == ISD::OR || Opc == ISD::AND) {
switch (N->getOperand(0)->getOpcode()) {
- default: break;
+ default:
+ break;
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -1137,23 +1178,24 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
}
// Make sure it's power of 2.
- unsigned bitpos = 0;
+ unsigned BitPos = 0;
if (Opc != ISD::FABS && Opc != ISD::FNEG) {
- if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
- ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+ if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) ||
+ (ValueVT == MVT::i64 && !isPowerOf2_64(Val)))
return SelectCode(N);
// Get the bit position.
- bitpos = countTrailingZeros(uint64_t(Val));
+ BitPos = countTrailingZeros(uint64_t(Val));
} else {
// For fabs and fneg, it's always the 31st bit.
- bitpos = 31;
+ BitPos = 31;
}
unsigned BitOpc = 0;
// Set the right opcode for bitwise operations.
- switch(Opc) {
- default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+ switch (Opc) {
+ default:
+ llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
case ISD::AND:
case ISD::FABS:
BitOpc = Hexagon::S2_clrbit_i;
@@ -1169,7 +1211,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
SDNode *Result;
// Get the right SDVal for the opcode.
- SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32);
+ SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32);
if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
@@ -1198,7 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
MVT::i32, SDValue(Reg, 0));
// Clear/set/toggle hi or lo registers depending on the bit position.
- if (SubValueVT != MVT::f32 && bitpos < 32) {
+ if (SubValueVT != MVT::f32 && BitPos < 32) {
SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
SubregLO, SDVal);
const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
@@ -1207,7 +1249,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
dl, ValueVT, Ops);
} else {
if (Opc != ISD::FABS && Opc != ISD::FNEG)
- SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32);
+ SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32);
SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
SubregHI, SDVal);
const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
@@ -1328,25 +1370,12 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
return false;
}
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
- unsigned UseCount = 0;
- unsigned CallCount = 0;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- // Ignore call instructions.
- if (I->getOpcode() == ISD::CopyToReg)
- ++CallCount;
- UseCount++;
- }
-
- return (UseCount <= 1) || (CallCount > 1);
-
-}
void HexagonDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG &DAG = *CurDAG;
std::vector<SDNode*> Nodes;
- for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
- Nodes.push_back(I);
+ for (SDNode &Node : DAG.allnodes())
+ Nodes.push_back(&Node);
// Simplify: (or (select c x 0) z) -> (select c (or x z) z)
// (or (select c 0 y) z) -> (select c z (or y z))
@@ -1397,11 +1426,10 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
return;
MachineFrameInfo *MFI = MF->getFrameInfo();
- MachineBasicBlock *EntryBB = MF->begin();
+ MachineBasicBlock *EntryBB = &MF->front();
unsigned AR = FuncInfo->CreateReg(MVT::i32);
unsigned MaxA = MFI->getMaxAlignment();
- auto &HII = *HST.getInstrInfo();
- BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR)
+ BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR)
.addImm(MaxA);
MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index c739afb..0167090 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
#define DEBUG_TYPE "hexagon-lowering"
-static cl::opt<bool>
-EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
+ cl::init(true), cl::Hidden,
cl::desc("Control jump table emission on Hexagon target"));
static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
@@ -98,6 +98,9 @@ public:
}
// Implement calling convention for Hexagon.
+
+static bool IsHvxVectorType(MVT ty);
+
static bool
CC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
@@ -114,6 +117,11 @@ CC_Hexagon64(unsigned ValNo, MVT ValVT,
ISD::ArgFlagsTy ArgFlags, CCState &State);
static bool
+CC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
RetCC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
@@ -129,6 +137,11 @@ RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
ISD::ArgFlagsTy ArgFlags, CCState &State);
static bool
+RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -169,15 +182,43 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
+ if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
+ LocVT == MVT::v16i8) {
+ ofst = State.AllocateStack(16, 16);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
+ LocVT == MVT::v32i8) {
+ ofst = State.AllocateStack(32, 32);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
+ ofst = State.AllocateStack(64, 64);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
+ ofst = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v256i8) {
+ ofst = State.AllocateStack(256, 256);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+
llvm_unreachable(nullptr);
}
-static bool
-CC_Hexagon (unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
+static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
if (ArgFlags.isByVal()) {
// Passed on stack.
unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
@@ -213,6 +254,17 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
return false;
}
+ if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (IsHvxVectorType(LocVT)) {
+ if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
return true; // CC didn't match.
}
@@ -260,10 +312,82 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
+static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
+ Hexagon::V2, Hexagon::V3,
+ Hexagon::V4, Hexagon::V5,
+ Hexagon::V6, Hexagon::V7,
+ Hexagon::V8, Hexagon::V9,
+ Hexagon::V10, Hexagon::V11,
+ Hexagon::V12, Hexagon::V13,
+ Hexagon::V14, Hexagon::V15};
+ static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
+ Hexagon::W2, Hexagon::W3,
+ Hexagon::W4, Hexagon::W5,
+ Hexagon::W6, Hexagon::W7};
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ if ((UseHVX && !UseHVXDbl) &&
+ (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
+ if (unsigned Reg = State.AllocateReg(VecLstS)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(64, 64);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ if ((UseHVX && !UseHVXDbl) &&
+ (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8)) {
+ if (unsigned Reg = State.AllocateReg(VecLstD)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ // 128B Mode
+ if ((UseHVX && UseHVXDbl) &&
+ (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v256i8)) {
+ if (unsigned Reg = State.AllocateReg(VecLstD)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(256, 256);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ if ((UseHVX && UseHVXDbl) &&
+ (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
+ if (unsigned Reg = State.AllocateReg(VecLstS)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ return true;
+}
+
static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
if (LocVT == MVT::i1 ||
LocVT == MVT::i8 ||
@@ -282,8 +406,24 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
} else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
LocVT = MVT::i64;
LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v16i32 || LocVT == MVT::v8i64 ||
+ LocVT == MVT::v512i1) {
+ LocVT = MVT::v16i32;
+ ValVT = MVT::v16i32;
+ LocInfo = CCValAssign::Full;
+ } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v32i32 || LocVT == MVT::v16i64 ||
+ (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) {
+ LocVT = MVT::v32i32;
+ ValVT = MVT::v32i32;
+ LocInfo = CCValAssign::Full;
+ } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v64i32 || LocVT == MVT::v32i64) {
+ LocVT = MVT::v64i32;
+ ValVT = MVT::v64i32;
+ LocInfo = CCValAssign::Full;
}
-
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
@@ -293,7 +433,10 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
}
-
+ if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
+ if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
return true; // CC didn't match.
}
@@ -328,6 +471,52 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
+static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ unsigned OffSiz = 64;
+ if (LocVT == MVT::v16i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ } else if (LocVT == MVT::v32i32) {
+ unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0;
+ if (unsigned Reg = State.AllocateReg(Req)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ OffSiz = 128;
+ } else if (LocVT == MVT::v64i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ OffSiz = 256;
+ }
+
+ unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+}
+
SDValue
HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
const {
@@ -351,6 +540,15 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
MachinePointerInfo(), MachinePointerInfo());
}
+static bool IsHvxVectorType(MVT ty) {
+ return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
+ ty == MVT::v64i8 ||
+ ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
+ ty == MVT::v128i8 ||
+ ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
+ ty == MVT::v256i8 ||
+ ty == MVT::v512i1 || ty == MVT::v1024i1);
+}
// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
// passed by value, the function prototype is modified to return void and
@@ -463,19 +661,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Check for varargs.
int NumNamedVarArgParams = -1;
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
- {
- const Function* CalleeFn = nullptr;
- Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
- if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
- {
+ if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = GAN->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ if (const Function* F = dyn_cast<Function>(GV)) {
// If a function has zero args and is a vararg function, that's
// disallowed so it must be an undeclared function. Do not assume
// varargs if the callee is undefined.
- if (CalleeFn->isVarArg() &&
- CalleeFn->getFunctionType()->getNumParams() != 0) {
- NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
- }
+ if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
+ NumNamedVarArgParams = F->getFunctionType()->getNumParams();
}
}
@@ -519,11 +713,16 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
+ bool NeedsArgAlign = false;
+ unsigned LargestAlignSeen = 0;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Record if we need > 8 byte alignment on an argument.
+ bool ArgAlign = IsHvxVectorType(VA.getValVT());
+ NeedsArgAlign |= ArgAlign;
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -549,13 +748,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
StackPtr.getValueType());
MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
+ if (ArgAlign)
+ LargestAlignSeen = std::max(LargestAlignSeen,
+ VA.getLocVT().getStoreSizeInBits() >> 3);
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
// is is pointer.
MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
Flags, DAG, dl));
} else {
- MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset);
+ MachinePointerInfo LocPI = MachinePointerInfo::getStack(
+ DAG.getMachineFunction(), LocMemOffset);
SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
false, 0);
MemOpChains.push_back(S);
@@ -569,6 +772,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
}
+ if (NeedsArgAlign && Subtarget.hasV60TOps()) {
+ DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+ MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
+ // V6 vectors passed by value have 64 or 128 byte alignment depending
+ // on whether we are 64 byte vector mode or 128 byte.
+ bool UseHVXDbl = Subtarget.useHVXDblOps();
+ assert(Subtarget.useHVXOps());
+ const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
+ LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+ MFI->ensureMaxAlignment(LargestAlignSeen);
+ }
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -613,12 +827,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- if (flag_aligned_memcpy) {
- const char *MemcpyName =
- "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
- Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT);
- flag_aligned_memcpy = false;
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
} else if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
@@ -668,7 +877,19 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
if (Ptr->getOpcode() != ISD::ADD)
return false;
- if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ bool ValidHVXDblType =
+ (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+ VT == MVT::v64i16 || VT == MVT::v128i8);
+ bool ValidHVXType =
+ UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+ VT == MVT::v32i16 || VT == MVT::v64i8);
+
+ if (ValidHVXDblType || ValidHVXType ||
+ VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
isInc = (Ptr->getOpcode() == ISD::ADD);
Base = Ptr->getOperand(0);
Offset = Ptr->getOperand(1);
@@ -679,23 +900,6 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
return false;
}
-// TODO: Put this function along with the other isS* functions in
-// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
-// functions defined in HexagonOperands.td.
-static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- int64_t m = 0;
- if (ShiftAmount > 0) {
- m = v % ShiftAmount;
- v = v >> ShiftAmount;
- }
- return (v <= 7) && (v >= -8) && (m == 0);
-}
-
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
@@ -724,18 +928,20 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isInc = false;
bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
- // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
- int ShiftAmount = VT.getSizeInBits() / 16;
- if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
- AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
- return true;
+ if (isLegal) {
+ auto &HII = *Subtarget.getInstrInfo();
+ int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ if (HII.isValidAutoIncImm(VT, OffsetVal)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
}
return false;
}
-SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue
+HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
@@ -784,47 +990,6 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
return Op;
}
-
-//
-// Taken from the XCore backend.
-//
-SDValue HexagonTargetLowering::
-LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue Table = Op.getOperand(1);
- SDValue Index = Op.getOperand(2);
- SDLoc dl(Op);
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
- unsigned JTI = JT->getIndex();
- MachineFunction &MF = DAG.getMachineFunction();
- const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
- SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
-
- // Mark all jump table targets as address taken.
- const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
- const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
- for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
- MachineBasicBlock *MBB = JTBBs[i];
- MBB->setHasAddressTaken();
- // This line is needed to set the hasAddressTaken flag on the BasicBlock
- // object.
- BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
- }
-
- SDValue JumpTableBase = DAG.getNode(
- HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT);
- SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
- DAG.getConstant(2, dl, MVT::i32));
- SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
- ShiftIndex);
- SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
- MachinePointerInfo(), false, false, false,
- 0);
- return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
-}
-
-
SDValue
HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
@@ -850,7 +1015,10 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue AC = DAG.getConstant(A, dl, MVT::i32);
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
- return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+ SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+ if (Op.getNode()->getHasDebugValue())
+ DAG.TransferDbgValues(Op, AA);
+ return AA;
}
SDValue
@@ -882,7 +1050,8 @@ const {
// equal to) 8 bytes. If not, no address will be passed into callee and
// callee return the result direclty through R0/R1.
- SmallVector<SDValue, 4> MemOps;
+ SmallVector<SDValue, 8> MemOps;
+ bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -908,6 +1077,42 @@ const {
RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+ // Single Vector
+ } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 ||
+ RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (UseHVX && UseHVXDbl &&
+ ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+ RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+ // Double Vector
+ } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+ RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (UseHVX && UseHVXDbl &&
+ ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 ||
+ RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
+ assert(0 && "need to support VecPred regs");
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
} else {
assert (0);
}
@@ -1056,8 +1261,8 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
- const {
+SDValue
+HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue PredOp = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
EVT OpVT = Op1.getValueType();
@@ -1163,16 +1368,33 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
- SDLoc dl(Op);
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- SDValue Res;
- if (CP->isMachineConstantPoolEntry())
- Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy,
- CP->getAlignment());
+ ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
+ unsigned Align = CPN->getAlignment();
+ Reloc::Model RM = HTM.getRelocationModel();
+ unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0;
+
+ SDValue T;
+ if (CPN->isMachineConstantPoolEntry())
+ T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
else
- Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
- CP->getAlignment());
- return DAG.getNode(HexagonISD::CP, dl, ValTy, Res);
+ T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
+ if (RM == Reloc::PIC_)
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
+ return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int Idx = cast<JumpTableSDNode>(Op)->getIndex();
+ Reloc::Model RM = HTM.getRelocationModel();
+ if (RM == Reloc::PIC_) {
+ SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
+ }
+
+ SDValue T = DAG.getTargetJumpTable(Idx, VT);
+ return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
}
SDValue
@@ -1219,52 +1441,70 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
- SelectionDAG& DAG) const {
+SDValue
+HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
SDLoc dl(Op);
return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
}
-SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue Result;
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+SDValue
+HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
+ auto *GAN = cast<GlobalAddressSDNode>(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+ auto *GV = GAN->getGlobal();
+ int64_t Offset = GAN->getOffset();
+
+ auto &HLOF = *HTM.getObjFileLowering();
+ Reloc::Model RM = HTM.getRelocationModel();
- const HexagonTargetObjectFile *TLOF =
- static_cast<const HexagonTargetObjectFile *>(
- getTargetMachine().getObjFileLowering());
- if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
- return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result);
+ if (RM == Reloc::Static) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+ if (HLOF.IsGlobalInSmallSection(GV, HTM))
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
+ return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
}
- return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result);
+ bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV));
+ if (UsePCRel) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
+ HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
+ }
+
+ // Use GOT index.
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
+ SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
+ return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
}
// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
-void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
- if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDLoc dl(Op);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ Reloc::Model RM = HTM.getRelocationModel();
+ if (RM == Reloc::Static) {
+ SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
}
+
+ SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
}
SDValue
-HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32);
- SDLoc dl(Op);
- return DAG.getNode(HexagonISD::CONST32_GP, dl,
- getPointerTy(DAG.getDataLayout()), BA_SD);
+HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
+ const {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
+ HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
}
//===----------------------------------------------------------------------===//
@@ -1272,18 +1512,19 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
//===----------------------------------------------------------------------===//
HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
- const HexagonSubtarget &STI)
+ const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
- Subtarget(STI) {
+ Subtarget(ST) {
bool IsV4 = !Subtarget.hasV5TOps();
auto &HRI = *Subtarget.getRegisterInfo();
+ bool UseHVX = Subtarget.useHVXOps();
+ bool UseHVXSgl = Subtarget.useHVXSglOps();
+ bool UseHVXDbl = Subtarget.useHVXDblOps();
setPrefLoopAlignment(4);
setPrefFunctionAlignment(4);
setMinFunctionAlignment(2);
setInsertFencesForAtomic(false);
- setExceptionPointerRegister(Hexagon::R0);
- setExceptionSelectorRegister(Hexagon::R1);
setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
if (EnableHexSDNodeSched)
@@ -1320,6 +1561,31 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
+ if (Subtarget.hasV60TOps()) {
+ if (Subtarget.useHVXSglOps()) {
+ addRegisterClass(MVT::v64i8, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v8i64, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass);
+ } else if (Subtarget.useHVXDblOps()) {
+ addRegisterClass(MVT::v128i8, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v16i64, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v256i8, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v64i32, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
+ }
+
+ }
+
//
// Handling of scalar operations.
//
@@ -1336,10 +1602,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom legalize GlobalAddress nodes into CONST32.
@@ -1361,11 +1629,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
if (EmitJumpTables)
- setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ setMinimumJumpTableEntries(2);
else
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- // Increase jump tables cutover to 5, was 4.
- setMinimumJumpTableEntries(MinimumJumpTables);
+ setMinimumJumpTableEntries(MinimumJumpTables);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
// Hexagon has instructions for add/sub with carry. The problem with
// modeling these instructions is that they produce 2 results: Rdd and Px.
@@ -1420,9 +1687,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::i64, Expand);
for (unsigned IntExpOp :
- {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM,
- ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS,
- ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+ { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
+ ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
setOperationAction(IntExpOp, MVT::i32, Expand);
setOperationAction(IntExpOp, MVT::i64, Expand);
}
@@ -1475,7 +1743,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
- static unsigned VectExpOps[] = {
+ static const unsigned VectExpOps[] = {
// Integer arithmetic:
ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
@@ -1539,7 +1807,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
-
+ if (UseHVX) {
+ if (UseHVXSgl) {
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom);
+ } else if (UseHVXDbl) {
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom);
+ } else {
+ llvm_unreachable("Unrecognized HVX mode");
+ }
+ }
// Subtarget-specific operation actions.
//
if (Subtarget.hasV5TOps()) {
@@ -1586,7 +1868,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
for (ISD::CondCode FPExpCCV4 :
{ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
- ISD::SETUO, ISD::SETO}) {
+ ISD::SETUO, ISD::SETO}) {
setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
}
@@ -1599,6 +1881,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
}
+ if (UseHVXDbl) {
+ for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+ setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+ setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+ }
+ }
+
computeRegisterProperties(&HRI);
//
@@ -1720,7 +2009,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
- case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
case HexagonISD::CALLR: return "HexagonISD::CALLR";
case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
@@ -1737,7 +2025,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
- case HexagonISD::PIC_ADD: return "HexagonISD::PIC_ADD";
case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
@@ -1754,6 +2041,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
+ case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
@@ -1923,8 +2211,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned Size = VT.getSizeInBits();
- // A vector larger than 64 bits cannot be represented in Hexagon.
- // Expand will split the vector.
+ // Only handle vectors of 64 bits or shorter.
if (Size > 64)
return SDValue();
@@ -2058,58 +2345,61 @@ SDValue
HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ bool UseHVX = Subtarget.useHVXOps();
EVT VT = Op.getValueType();
unsigned NElts = Op.getNumOperands();
- SDValue Vec = Op.getOperand(0);
- EVT VecVT = Vec.getValueType();
- SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64);
- SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
- DAG.getConstant(32, dl, MVT::i64));
- SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64);
-
- ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
- ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
-
- if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
- if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
- // We are trying to concat two v2i16 to a single v4i16.
- SDValue Vec0 = Op.getOperand(1);
- SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
- return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ SDValue Vec0 = Op.getOperand(0);
+ EVT VecVT = Vec0.getValueType();
+ unsigned Width = VecVT.getSizeInBits();
+
+ if (NElts == 2) {
+ MVT ST = VecVT.getSimpleVT();
+ // We are trying to concat two v2i16 to a single v4i16, or two v4i8
+ // into a single v8i8.
+ if (ST == MVT::v2i16 || ST == MVT::v4i8)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
+
+ if (UseHVX) {
+ assert((Width == 64*8 && Subtarget.useHVXSglOps()) ||
+ (Width == 128*8 && Subtarget.useHVXDblOps()));
+ SDValue Vec1 = Op.getOperand(1);
+ MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32;
+ MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32;
+ SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
+ SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
+ SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
+ return DAG.getNode(ISD::BITCAST, dl, VT, VC);
}
}
- if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
- if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
- // We are trying to concat two v4i8 to a single v8i8.
- SDValue Vec0 = Op.getOperand(1);
- SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
- return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
- }
- }
+ if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
+ return SDValue();
+
+ SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
+ SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
+ SDValue W = DAG.getConstant(Width, dl, MVT::i64);
+ // Create the "width" part of the argument to insert_rp/insertp_rp.
+ SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
+ SDValue V = C0;
for (unsigned i = 0, e = NElts; i != e; ++i) {
- unsigned OpIdx = NElts - i - 1;
- SDValue Operand = Op.getOperand(OpIdx);
+ unsigned N = NElts-i-1;
+ SDValue OpN = Op.getOperand(N);
- if (VT.getSizeInBits() == 64 &&
- Operand.getValueType().getSizeInBits() == 32) {
+ if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
SDValue C = DAG.getConstant(0, dl, MVT::i32);
- Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
}
-
- SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
- SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
- SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
- const SDValue Ops[] = {ConstVal, Operand, Combined};
-
+ SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
if (VT.getSizeInBits() == 32)
- ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
else
- ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+ V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
}
- return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
}
SDValue
@@ -2301,6 +2591,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL:
case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
// Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -2308,8 +2599,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::BR_JT: return LowerBR_JT(Op, DAG);
// Custom lower some vector loads.
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
@@ -2321,6 +2612,16 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+/// Returns relocation base for the given PIC jumptable.
+SDValue
+HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ int Idx = cast<JumpTableSDNode>(Table)->getIndex();
+ EVT VT = Table.getValueType();
+ SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
+}
+
MachineBasicBlock *
HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB)
@@ -2343,6 +2644,8 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
std::pair<unsigned, const TargetRegisterClass *>
HexagonTargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r': // R0-R31
@@ -2358,6 +2661,42 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
case MVT::f64:
return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
}
+ case 'q': // q0-q3
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::v1024i1:
+ case MVT::v512i1:
+ case MVT::v32i16:
+ case MVT::v16i32:
+ case MVT::v64i8:
+ case MVT::v8i64:
+ return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+ }
+ case 'v': // V0-V31
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8:
+ case MVT::v8i64:
+ return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
+ case MVT::v32i32:
+ case MVT::v64i16:
+ case MVT::v16i64:
+ case MVT::v128i8:
+ if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
+ return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
+ else
+ return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
+ case MVT::v256i8:
+ case MVT::v128i16:
+ case MVT::v64i32:
+ case MVT::v32i64:
+ return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
+ }
+
default:
llvm_unreachable("Unknown asm register class");
}
@@ -2397,6 +2736,14 @@ bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
}
+/// Return true if folding a constant offset with the given GlobalAddress is
+/// legal. It is frequently not legal in PIC relocation models.
+bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
+ const {
+ return HTM.getRelocationModel() == Reloc::Static;
+}
+
+
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can compare
/// a register against the immediate without having to materialize the
@@ -2428,8 +2775,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
// ***************************************************************************
// If this is a tail call via a function pointer, then don't do it!
- if (!(dyn_cast<GlobalAddressSDNode>(Callee))
- && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+ if (!(isa<GlobalAddressSDNode>(Callee)) &&
+ !(isa<ExternalSymbolSDNode>(Callee))) {
return false;
}
@@ -2467,6 +2814,41 @@ bool llvm::isPositiveHalfWord(SDNode *N) {
}
}
+std::pair<const TargetRegisterClass*, uint8_t>
+HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RRC = nullptr;
+
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(TRI, VT);
+ case MVT::v64i8:
+ case MVT::v32i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
+ RRC = &Hexagon::VectorRegsRegClass;
+ break;
+ case MVT::v128i8:
+ case MVT::v64i16:
+ case MVT::v32i32:
+ case MVT::v16i64:
+ if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
+ Subtarget.useHVXDblOps())
+ RRC = &Hexagon::VectorRegs128BRegClass;
+ else
+ RRC = &Hexagon::VecDblRegsRegClass;
+ break;
+ case MVT::v256i8:
+ case MVT::v128i16:
+ case MVT::v64i32:
+ case MVT::v32i64:
+ RRC = &Hexagon::VecDblRegs128BRegClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
BasicBlock *BB = Builder.GetInsertBlock();
@@ -2498,13 +2880,15 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
return Ext;
}
-bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// Do not expand loads and stores that don't exceed 64 bits.
- return LI->getType()->getPrimitiveSizeInBits() > 64;
+ return LI->getType()->getPrimitiveSizeInBits() > 64
+ ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Do not expand loads and stores that don't exceed 64 bits.
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
}
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 2642abf..bf378b9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -35,16 +35,14 @@ bool isPositiveHalfWord(SDNode *N);
ALLOCA,
ARGEXTEND,
- PIC_ADD,
- AT_GOT,
- AT_PCREL,
+ AT_GOT, // Index in GOT.
+ AT_PCREL, // Offset relative to PC.
CALLv3, // A V3+ call instruction.
CALLv3nr, // A V3+ call instruction that doesn't return.
CALLR,
RET_FLAG, // Return with a flag operand.
- BR_JT, // Branch through jump table.
BARRIER, // Memory barrier.
JT, // Jump table.
CP, // Constant pool.
@@ -80,6 +78,7 @@ bool isPositiveHalfWord(SDNode *N);
INSERTRP,
EXTRACTU,
EXTRACTURP,
+ VCOMBINE,
TC_RETURN,
EH_RETURN,
DCFETCH,
@@ -127,7 +126,6 @@ bool isPositiveHalfWord(SDNode *N);
SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
@@ -137,6 +135,7 @@ bool isPositiveHalfWord(SDNode *N);
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
@@ -163,8 +162,23 @@ bool isPositiveHalfWord(SDNode *N);
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R1;
+ }
+
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
EVT VT) const override {
if (!VT.isVector())
@@ -200,6 +214,10 @@ bool isPositiveHalfWord(SDNode *N);
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
+ /// Return true if folding a constant offset with the given GlobalAddress
+ /// is legal. It is frequently not legal in PIC relocation models.
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -208,20 +226,26 @@ bool isPositiveHalfWord(SDNode *N);
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
+ /// Returns relocation base for the given PIC jumptable.
+ SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
+ const override;
+
// Handling of atomic RMW instructions.
- bool hasLoadLinkedStoreConditional() const override {
- return true;
- }
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI)
- const override {
- return AtomicRMWExpansionKind::LLSC;
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
+ return AtomicExpansionKind::LLSC;
}
+
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
+ const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
new file mode 100644
index 0000000..5a1a69b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
@@ -0,0 +1,462 @@
+//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Mappings
+//===----------------------------------------------------------------------===//
+
+
+def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
+ (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
+ (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
+ (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memb({GP}+#$addr) = $Nt",
+ (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt",
+ (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
+ (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt",
+ (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memd({GP}+#$addr) = $Nt",
+ (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
+
+def : InstAlias<"$Nt = memb({GP}+#$addr)",
+ (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memub({GP}+#$addr)",
+ (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memh({GP}+#$addr)",
+ (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memuh({GP}+#$addr)",
+ (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memw({GP}+#$addr)",
+ (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
+def : InstAlias<"$Nt = memd({GP}+#$addr)",
+ (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
+
+// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
+def : InstAlias<"memb($Rs) = $Rt",
+ (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt",
+ (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.h",
+ (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt",
+ (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = $Rt.new",
+ (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.new",
+ (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt.new",
+ (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = #$S8",
+ (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memh($Rs) = #$S8",
+ (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memw($Rs) = #$S8",
+ (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memd($Rs) = $Rtt",
+ (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"memb($Rs) = setbit(#$U5)",
+ (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = setbit(#$U5)",
+ (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = setbit(#$U5)",
+ (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memb($Rs) = clrbit(#$U5)",
+ (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = clrbit(#$U5)",
+ (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = clrbit(#$U5)",
+ (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
+def : InstAlias<"$Rd = memb($Rs)",
+ (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memub($Rs)",
+ (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memh($Rs)",
+ (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memuh($Rs)",
+ (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memw($Rs)",
+ (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memd($Rs)",
+ (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memubh($Rs)",
+ (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memubh($Rs)",
+ (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = membh($Rs)",
+ (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = membh($Rs)",
+ (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memb_fifo($Rs)",
+ (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memh_fifo($Rs)",
+ (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
+// to: if ($Pt) $Rd = memXX($Rs)
+def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
+ (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
+ (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
+ (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
+ (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
+ (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
+ (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
+// to: if ($Pt) memXX($Rs) = $Rt
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
+ (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
+ (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
+ (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
+ (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
+ (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
+ (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
+ (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
+ (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
+ (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
+ (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
+ (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+
+// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
+// to: if (!$Pt) $Rd = memXX($Rs)
+def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
+ (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
+ (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
+ (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
+ (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
+ (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
+ (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
+// to: if (!$Pt) memXX($Rs) = $Rt
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
+ (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
+ (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
+ (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
+ (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
+ (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
+ (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
+ (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
+ (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
+ (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
+ (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
+ (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
+ (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
+ (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
+ (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
+ (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
+ (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
+ (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
+ (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
+ (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
+ (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
+ (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
+ (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
+ (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
+// to: memXX($Rs) |= $Rt
+def : InstAlias<"memb($Rs) &= $Rt",
+ (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) |= $Rt",
+ (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += $Rt",
+ (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= $Rt",
+ (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += #$U5",
+ (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= #$U5",
+ (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) &= $Rt",
+ (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) |= $Rt",
+ (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += $Rt",
+ (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= $Rt",
+ (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += #$U5",
+ (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= #$U5",
+ (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) &= $Rt",
+ (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) |= $Rt",
+ (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += $Rt",
+ (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= $Rt",
+ (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += #$U5",
+ (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= #$U5",
+ (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+//
+// Alias of: if ($Pv.new) memX($Rs) = $Rt
+// to: if (p3.new) memX(r17 + #0) = $Rt
+def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
+ (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
+ (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
+ (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
+ (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
+ (S4_pstorerdtnew_io
+ PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
+ (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
+ (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
+ (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
+ (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
+ (S4_pstorerdfnew_io
+ PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+//
+// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
+// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
+def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
+ (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
+ (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
+ (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
+ (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
+ (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
+ (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
+ (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
+ (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
+ (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
+ (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
+ (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
+ (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"dcfetch($Rs)",
+ (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
+
+// Alias of some insn mappings, others must be handled by the parser
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+ (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+ (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
+// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
+def : InstAlias<"$Rd = neg($Rs)",
+ (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
+
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+def : InstAlias<"$Pd = $Ps",
+ (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
+
+def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
+ (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
+
+def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
+ (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
+ (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
+
+// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+ (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+ (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
new file mode 100644
index 0000000..280832f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
@@ -0,0 +1,1019 @@
+class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { opc{14-4}, src2};
+ let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
+}
+
+class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
+class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
+class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
+class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
+class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
+class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
+class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
+class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
+class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
+class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
+class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
+class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
+class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
+class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
+class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
+class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
+class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
+class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
+class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
+class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
+class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
+class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
+class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
+class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
+class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
+class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
+class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
+class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
+class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
+class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
+class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
+class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
+class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
+class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
+class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
+class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
+class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
+class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
+class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
+class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
+class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
+class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
+class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
+class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
+class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
+class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
+class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
+class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
+class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
+class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
+class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
+class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
+class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
+class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
+class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
+class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
+class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
+class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
+class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
+class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
+class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
+class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
+class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
+class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
+class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
+class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
+class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
+class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
+class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
+class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
+class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
+class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
+class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
+class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
+class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
+class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
+class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
+class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
+class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
+class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
+class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
+class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
+class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
+class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
+class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
+class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
+class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
+class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
+class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
+class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
+class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
+class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
+class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
+class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
+class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
+class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
+class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
+class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
+class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
+class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
+class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
+class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
+class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
+class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
+class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
+class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
+class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
+class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
+class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
+class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
+class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
+class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
+class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
+class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
+class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
+class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
+class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
+class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
+class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
+class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
+class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
+class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
+class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
+class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
+class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
+class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
+class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
+class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
+class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
+class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
+class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
+class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
+class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
+class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
+class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
+class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
+class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
+class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
+class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
+class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
+class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
+class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
+class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
+class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
+class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
+class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
+class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
+class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
+class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
+class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
+class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
+class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
+class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
+class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
+class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
+class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
+class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
+class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
+class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
+class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
+class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
+class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
+class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
+class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
+class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
+class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
+class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
+class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
+class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
+class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
+class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
+class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
+class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
+class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
+class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
+class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
+class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
+class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
+class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
+class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
+class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
+class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
+class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
+class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
+class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
+
+class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
+ let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
+}
+
+class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
+class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
+class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
+class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
+class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
+class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
+class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
+class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
+class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
+class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
+class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
+class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
+class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
+class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
+class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
+class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
+class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
+class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
+class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
+class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
+class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
+class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
+class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
+class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
+class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
+class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
+class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
+class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
+class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
+class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
+class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
+class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
+class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
+class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
+class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
+class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
+class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
+class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
+
+class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
+ let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
+class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
+class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
+class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
+class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
+class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
+class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
+class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
+class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
+class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
+class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
+class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
+
+class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { 0b00011110000000, opc{5-4} };
+ let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
+class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
+class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
+class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
+class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
+class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
+class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
+class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
+class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
+class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
+class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
+class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
+class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
+class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
+class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
+class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
+class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
+class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
+class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
+class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
+class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
+class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
+class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
+class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
+class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
+
+class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
+class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
+class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
+class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
+class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
+class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
+class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
+class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
+class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
+class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
+class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
+class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
+class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
+
+class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
+class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
+class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
+
+class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
+class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
+class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
+class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
+class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<4> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{9-6};
+ let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<11> src3;
+ bits<4> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{10-7};
+ let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
+class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
+class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
+class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
+class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
+class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
+class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
+class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
+class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
+class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
+
+class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
+class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
+class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
+class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
+class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
+class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
+class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
+class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
+class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
+class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<4> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{9-6};
+ let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
+class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
+class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
+class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<11> src3;
+ bits<4> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{10-7};
+ let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
+class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
+class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
+class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
+
+// TODO: Change script to generate dst, src1, src2 instead of
+// dst, dst2, src1.
+class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
+class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
+class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
+class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
+class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
+class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
+class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
+class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
+class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
+class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
+class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
+class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
+class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
+
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+ let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
+class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
+class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
+
+class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+ let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
+class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
+class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
+class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
+class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{8-6};
+ let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
+class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
+class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
+class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
+class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
+class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
+class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
+class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
+class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
+class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<3> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{9-7};
+ let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
+class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
+class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
+class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
+class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
+class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
+class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
+class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
+class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
+class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{8-6};
+ let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
+class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
+class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
+class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<3> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{9-7};
+ let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
+class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
+class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
+class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
+
+class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<1> src2;
+
+ let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
+class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
+class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
+class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
+class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
+class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
+class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
+
+class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<1> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
+class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
+class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<1> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
+}
+
+class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
+class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<1> src3;
+ bits<5> src4;
+
+ let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
+class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
+class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
+class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
+class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
+class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
+class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
+class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
+class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
+class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<1> src3;
+ bits<3> src4;
+
+ let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
+class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
+class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
+class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
+
+
+class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<1> src3;
+
+ let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
+ let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
+}
+
+class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
+class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
+class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
+class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
+class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
+class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
+
+class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
+ let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
+}
+
+class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
+class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
+
+class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b00011001111, src3{4-0} };
+ let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
+}
+
+class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
+class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
+
+
+class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
+ let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
+class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
+
+class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
+ let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
+}
+
+class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
+class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
+
+class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
+ let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
+class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
+class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
+class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
+class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
+class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
+class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
+class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
+class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
+class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
+class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
+class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
+class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
+class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
+class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
+
+class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
+ let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
+}
+
+class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
+class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
+class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
+class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
+class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
+class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
+class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
+class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
+
+class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<2> src1;
+ bits<2> src2;
+
+ let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
+ let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
+}
+
+class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
+class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
+class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
+class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
+class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
+
+class V6_pred_not_enc : OpcodeHexagon {
+ bits<2> dst;
+ bits<2> src1;
+
+ let Inst{31-16} = { 0b0001111000000011 };
+ let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
+}
+
+class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
+ let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
+class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
+
+class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{15-5}, src1{4-0} };
+ let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
+}
+
+class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
+class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
+class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
+
+
+class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
+ let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
+}
+
+class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
+class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
+
+class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+
+ let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
+ let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
+class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
+class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
+class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
+class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
+class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
+
+class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { opc{14-4}, src1{4-0} };
+ let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
+class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
+class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
+class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
+class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
+class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
+class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
+class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
+
+class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
+
+ let Inst{31-16} = { opc{24-10}, 0 };
+ let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
+}
+
+class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
+class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
+class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
+class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
+
+class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
+ let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
+}
+
+class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
+class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
+
+class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
+ bits<5> src1;
+
+ let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
+ let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
+}
+
+class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
+class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
+
+class A5_ACS_enc : OpcodeHexagon {
+ bits<5> dst1;
+ bits<2> dst2;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b11101010101, src1{4-0} };
+ let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
+
+class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<2> src3;
+
+ let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
+ let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
+}
+
+class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
+class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
+class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
+
+class V6_vhistq_enc : OpcodeHexagon {
+ bits<2> src1;
+
+ let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
+ let Inst{13-0} = { 0b10000010000000 };
+}
+
+// TODO: Change script to generate dst1 instead of dst.
+class A6_vminub_RdP_enc : OpcodeHexagon {
+ bits<5> dst1;
+ bits<2> dst2;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b11101010111, src2{4-0} };
+ let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 44bab29..3c5ec17 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -34,6 +34,8 @@ class SubTarget<bits<6> value> {
def HasAnySubT : SubTarget<0x3f>; // 111111
def HasV5SubT : SubTarget<0x3e>; // 111110
+def HasV55SubT : SubTarget<0x3c>; // 111100
+def HasV60SubT : SubTarget<0x38>; // 111000
// Addressing modes for load/store instructions
class AddrModeType<bits<3> value> {
@@ -57,6 +59,8 @@ def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
def WordAccess : MemAccessSize<3>;// Word access instruction (memw).
def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+def Vector64Access : MemAccessSize<7>;// Vector access instruction (memv)
+def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv)
//===----------------------------------------------------------------------===//
@@ -167,14 +171,23 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
bits<1> isFP = 0;
let TSFlags {48} = isFP; // Floating-point.
+ bits<1> hasNewValue2 = 0;
+ let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+ bits<3> opNewValue2 = 0;
+ let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+
+ bits<1> isAccumulator = 0;
+ let TSFlags{54} = isAccumulator;
+
// Fields used for relation models.
+ bit isNonTemporal = 0;
+ string isNT = ""; // set to "true" for non-temporal vector stores.
string BaseOpcode = "";
string CextOpcode = "";
string PredSense = "";
string PNewValue = "";
string NValueST = ""; // Set to "true" for new-value stores.
string InputType = ""; // Input is "imm" or "reg" type.
- string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
string isFloat = "false"; // Set to "true" for the floating-point load/store.
string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions
@@ -182,6 +195,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
"");
let PNewValue = !if(isPredicatedNew, "new", "");
let NValueST = !if(isNVStore, "true", "false");
+ let isNT = !if(isNonTemporal, "true", "false");
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
@@ -217,6 +231,11 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
+let mayLoad = 1 in
+class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
@@ -234,6 +253,12 @@ class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+// Same as ST0Inst but doesn't derive from OpcodeHexagon.
+let mayStore = 1 in
+class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
@@ -277,6 +302,11 @@ class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
OpcodeHexagon;
+// Same as above but doesn't derive from OpcodeHexagon
+class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
@@ -294,6 +324,10 @@ class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
OpcodeHexagon;
+class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
@@ -402,3 +436,13 @@ include "HexagonInstrFormatsV4.td"
//===----------------------------------------------------------------------===//
// V4 Instruction Format Definitions +
//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index db83ef6..2d1dea5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -21,8 +21,6 @@ def TypeMEMOP : IType<9>;
def TypeNV : IType<10>;
def TypeDUPLEX : IType<11>;
def TypeCOMPOUND : IType<12>;
-def TypeAG_VX : IType<28>;
-def TypeAG_VM : IType<29>;
def TypePREFIX : IType<30>;
// Duplex Instruction Class Declaration
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
new file mode 100644
index 0000000..f3d43de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -0,0 +1,238 @@
+//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+// Hexagon Intruction Flags +
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeCVI_VA : IType<13>;
+def TypeCVI_VA_DV : IType<14>;
+def TypeCVI_VX : IType<15>;
+def TypeCVI_VX_DV : IType<16>;
+def TypeCVI_VP : IType<17>;
+def TypeCVI_VP_VS : IType<18>;
+def TypeCVI_VS : IType<19>;
+def TypeCVI_VINLANESAT : IType<20>;
+def TypeCVI_VM_LD : IType<21>;
+def TypeCVI_VM_TMP_LD : IType<22>;
+def TypeCVI_VM_CUR_LD : IType<23>;
+def TypeCVI_VM_VP_LDU : IType<24>;
+def TypeCVI_VM_ST : IType<25>;
+def TypeCVI_VM_NEW_ST : IType<26>;
+def TypeCVI_VM_STU : IType<27>;
+def TypeCVI_HIST : IType<28>;
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_late<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_LATE>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV_SLOT2>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_EARLY>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_LONG_EARLY>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VS>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VINLANESAT>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VS>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_TMP_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_TMP_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_CUR_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_VP_LDU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_VP_LDU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_NEW_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_NEW_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_STU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_STU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_HIST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+}
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_HIST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+ Requires<[HasV60T, UseHVX]>;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 3cb0823..eb3590c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -23,9 +23,11 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
using namespace llvm;
@@ -36,9 +38,41 @@ using namespace llvm;
#include "HexagonGenInstrInfo.inc"
#include "HexagonGenDFAPacketizer.inc"
+using namespace llvm;
+
+cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
+ cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
+ "packetization boundary."));
+
+static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction",
+ cl::Hidden, cl::init(true), cl::desc("Enable branch prediction"));
+
+static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable schedule adjustment for new value stores."));
+
+static cl::opt<bool> EnableTimingClassLatency(
+ "enable-timing-class-latency", cl::Hidden, cl::init(false),
+ cl::desc("Enable timing class latency"));
+
+static cl::opt<bool> EnableALUForwarding(
+ "enable-alu-forwarding", cl::Hidden, cl::init(true),
+ cl::desc("Enable vec alu forwarding"));
+
+static cl::opt<bool> EnableACCForwarding(
+ "enable-acc-forwarding", cl::Hidden, cl::init(true),
+ cl::desc("Enable vec acc forwarding"));
+
+static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
+
///
/// Constants for Hexagon instructions.
///
+const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7
+const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7
+const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6
+const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6
const int Hexagon_MEMW_OFFSET_MAX = 4095;
const int Hexagon_MEMW_OFFSET_MIN = -4096;
const int Hexagon_MEMD_OFFSET_MAX = 8191;
@@ -57,71 +91,49 @@ const int Hexagon_MEMH_AUTOINC_MAX = 14;
const int Hexagon_MEMH_AUTOINC_MIN = -16;
const int Hexagon_MEMB_AUTOINC_MAX = 7;
const int Hexagon_MEMB_AUTOINC_MIN = -8;
+const int Hexagon_MEMV_AUTOINC_MAX = 192;
+const int Hexagon_MEMV_AUTOINC_MIN = -256;
+const int Hexagon_MEMV_AUTOINC_MAX_128B = 384;
+const int Hexagon_MEMV_AUTOINC_MIN_128B = -512;
// Pin the vtable to this file.
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
: HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
- RI(), Subtarget(ST) {}
+ RI() {}
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
+static bool isIntRegForSubInst(unsigned Reg) {
+ return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+ (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
+}
- switch (MI->getOpcode()) {
- default: break;
- case Hexagon::L2_loadri_io:
- case Hexagon::L2_loadrd_io:
- case Hexagon::L2_loadrh_io:
- case Hexagon::L2_loadrb_io:
- case Hexagon::L2_loadrub_io:
- if (MI->getOperand(2).isFI() &&
- MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
- FrameIndex = MI->getOperand(2).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
+
+static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
+ return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) &&
+ isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg));
}
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case Hexagon::S2_storeri_io:
- case Hexagon::S2_storerd_io:
- case Hexagon::S2_storerh_io:
- case Hexagon::S2_storerb_io:
- if (MI->getOperand(2).isFI() &&
- MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
- FrameIndex = MI->getOperand(0).getIndex();
- return MI->getOperand(2).getReg();
- }
- break;
+/// Calculate number of instructions excluding the debug instructions.
+static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
+ MachineBasicBlock::const_instr_iterator MIE) {
+ unsigned Count = 0;
+ for (; MIB != MIE; ++MIB) {
+ if (!MIB->isDebugValue())
+ ++Count;
}
- return 0;
+ return Count;
}
-// Find the hardware loop instruction used to set-up the specified loop.
-// On Hexagon, we have two instructions used to set-up the hardware loop
-// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
-// to indicate the end of a loop.
-static MachineInstr *
-findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
- SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+
+/// Find the hardware loop instruction used to set-up the specified loop.
+/// On Hexagon, we have two instructions used to set-up the hardware loop
+/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+/// to indicate the end of a loop.
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
int LOOPi;
int LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
@@ -157,100 +169,108 @@ findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
return 0;
}
-unsigned HexagonInstrInfo::InsertBranch(
- MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
- Opcode_t BOpc = Hexagon::J2_jump;
- Opcode_t BccOpc = Hexagon::J2_jumpt;
+/// Gather register def/uses from MI.
+/// This treats possible (predicated) defs as actually happening ones
+/// (conservatively).
+static inline void parseOperands(const MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) {
+ Defs.clear();
+ Uses.clear();
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
- // Check if ReverseBranchCondition has asked to reverse this branch
- // If we want to reverse the branch an odd number of times, we want
- // J2_jumpf.
- if (!Cond.empty() && Cond[0].isImm())
- BccOpc = Cond[0].getImm();
+ if (!MO.isReg())
+ continue;
- if (!FBB) {
- if (Cond.empty()) {
- // Due to a bug in TailMerging/CFG Optimization, we need to add a
- // special case handling of a predicated jump followed by an
- // unconditional jump. If not, Tail Merging and CFG Optimization go
- // into an infinite loop.
- MachineBasicBlock *NewTBB, *NewFBB;
- SmallVector<MachineOperand, 4> Cond;
- MachineInstr *Term = MBB.getFirstTerminator();
- if (Term != MBB.end() && isPredicated(Term) &&
- !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
- MachineBasicBlock *NextBB =
- std::next(MachineFunction::iterator(&MBB));
- if (NewTBB == NextBB) {
- ReverseBranchCondition(Cond);
- RemoveBranch(MBB);
- return InsertBranch(MBB, TBB, nullptr, Cond, DL);
- }
- }
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- } else if (isEndLoopN(Cond[0].getImm())) {
- int EndLoopOp = Cond[0].getImm();
- assert(Cond[1].isMBB());
- // Since we're adding an ENDLOOP, there better be a LOOP instruction.
- // Check for it, and change the BB target if needed.
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
- assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
- Loop->getOperand(0).setMBB(TBB);
- // Add the ENDLOOP after the finding the LOOP0.
- BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
- } else if (isNewValueJump(Cond[0].getImm())) {
- assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
- // New value jump
- // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
- // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
- unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
- DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
- if (Cond[2].isReg()) {
- unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
- BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
- addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
- } else if(Cond[2].isImm()) {
- BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
- addImm(Cond[2].getImm()).addMBB(TBB);
- } else
- llvm_unreachable("Invalid condition for branching");
- } else {
- assert((Cond.size() == 2) && "Malformed cond vector");
- const MachineOperand &RO = Cond[1];
- unsigned Flags = getUndefRegState(RO.isUndef());
- BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
- }
- return 1;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (MO.isUse())
+ Uses.push_back(MO.getReg());
+
+ if (MO.isDef())
+ Defs.push_back(MO.getReg());
}
- assert((!Cond.empty()) &&
- "Cond. cannot be empty when multiple branchings are required");
- assert((!isNewValueJump(Cond[0].getImm())) &&
- "NV-jump cannot be inserted with another branch");
- // Special case for hardware loops. The condition is a basic block.
- if (isEndLoopN(Cond[0].getImm())) {
- int EndLoopOp = Cond[0].getImm();
- assert(Cond[1].isMBB());
- // Since we're adding an ENDLOOP, there better be a LOOP instruction.
- // Check for it, and change the BB target if needed.
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
- assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
- Loop->getOperand(0).setMBB(TBB);
- // Add the ENDLOOP after the finding the LOOP0.
- BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
- } else {
- const MachineOperand &RO = Cond[1];
- unsigned Flags = getUndefRegState(RO.isUndef());
- BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+}
+
+
+// Position dependent, so check twice for swap.
+static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ return false;
+ case HexagonII::HSIG_L1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_L2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_A:
+ return (Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_Compound:
+ return (Gb == HexagonII::HSIG_Compound);
}
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return false;
+}
- return 2;
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::L2_loadri_io:
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ }
+ return 0;
}
@@ -269,9 +289,6 @@ unsigned HexagonInstrInfo::InsertBranch(
/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
/// Cond[1] = R
/// Cond[2] = Imm
-/// @note Related function is \fn findInstrPredicate which fills in
-/// Cond. vector when a predicated instruction is passed to it.
-/// We follow same protocol in that case too.
///
bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -314,7 +331,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
-
+
bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
I->getOperand(0).isMBB();
// Delete the J2_jump if it's equivalent to a fall-through.
@@ -327,17 +344,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
- if (!isUnpredicatedTerminator(I))
+ if (!isUnpredicatedTerminator(&*I))
return false;
// Get the last instruction in the block.
- MachineInstr *LastInst = I;
+ MachineInstr *LastInst = &*I;
MachineInstr *SecondLastInst = nullptr;
// Find one more terminator if present.
- do {
- if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
+ for (;;) {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
if (!SecondLastInst)
- SecondLastInst = I;
+ SecondLastInst = &*I;
else
// This is a third branch.
return true;
@@ -345,7 +362,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
if (I == MBB.instr_begin())
break;
--I;
- } while(I);
+ }
int LastOpcode = LastInst->getOpcode();
int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
@@ -418,7 +435,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// executed, so remove it.
if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
+ I = LastInst->getIterator();
if (AllowModify)
I->eraseFromParent();
return false;
@@ -438,6 +455,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
+
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
MachineBasicBlock::iterator I = MBB.end();
@@ -458,100 +476,127 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return Count;
}
-/// \brief For a comparison instruction, return the source registers in
-/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
-/// compares against in CmpValue. Return true if the comparison instruction
-/// can be analyzed.
-bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const {
- unsigned Opc = MI->getOpcode();
- // Set mask and the first source register.
- switch (Opc) {
- case Hexagon::C2_cmpeq:
- case Hexagon::C2_cmpeqp:
- case Hexagon::C2_cmpgt:
- case Hexagon::C2_cmpgtp:
- case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgtup:
- case Hexagon::C4_cmpneq:
- case Hexagon::C4_cmplte:
- case Hexagon::C4_cmplteu:
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpgti:
- case Hexagon::C2_cmpgtui:
- case Hexagon::C4_cmpneqi:
- case Hexagon::C4_cmplteui:
- case Hexagon::C4_cmpltei:
- SrcReg = MI->getOperand(1).getReg();
- Mask = ~0;
- break;
- case Hexagon::A4_cmpbeq:
- case Hexagon::A4_cmpbgt:
- case Hexagon::A4_cmpbgtu:
- case Hexagon::A4_cmpbeqi:
- case Hexagon::A4_cmpbgti:
- case Hexagon::A4_cmpbgtui:
- SrcReg = MI->getOperand(1).getReg();
- Mask = 0xFF;
- break;
- case Hexagon::A4_cmpheq:
- case Hexagon::A4_cmphgt:
- case Hexagon::A4_cmphgtu:
- case Hexagon::A4_cmpheqi:
- case Hexagon::A4_cmphgti:
- case Hexagon::A4_cmphgtui:
- SrcReg = MI->getOperand(1).getReg();
- Mask = 0xFFFF;
- break;
- }
+unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
+ unsigned BOpc = Hexagon::J2_jump;
+ unsigned BccOpc = Hexagon::J2_jumpt;
+ assert(validateBranchCond(Cond) && "Invalid branching condition");
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- // Set the value/second source register.
- switch (Opc) {
- case Hexagon::C2_cmpeq:
- case Hexagon::C2_cmpeqp:
- case Hexagon::C2_cmpgt:
- case Hexagon::C2_cmpgtp:
- case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgtup:
- case Hexagon::A4_cmpbeq:
- case Hexagon::A4_cmpbgt:
- case Hexagon::A4_cmpbgtu:
- case Hexagon::A4_cmpheq:
- case Hexagon::A4_cmphgt:
- case Hexagon::A4_cmphgtu:
- case Hexagon::C4_cmpneq:
- case Hexagon::C4_cmplte:
- case Hexagon::C4_cmplteu:
- SrcReg2 = MI->getOperand(2).getReg();
- return true;
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // J2_jumpf.
+ if (!Cond.empty() && Cond[0].isImm())
+ BccOpc = Cond[0].getImm();
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpgtui:
- case Hexagon::C2_cmpgti:
- case Hexagon::C4_cmpneqi:
- case Hexagon::C4_cmplteui:
- case Hexagon::C4_cmpltei:
- case Hexagon::A4_cmpbeqi:
- case Hexagon::A4_cmpbgti:
- case Hexagon::A4_cmpbgtui:
- case Hexagon::A4_cmpheqi:
- case Hexagon::A4_cmphgti:
- case Hexagon::A4_cmphgtui:
- SrcReg2 = 0;
- Value = MI->getOperand(2).getImm();
- return true;
+ if (!FBB) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && isPredicated(Term) &&
+ !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+ MachineBasicBlock *NextBB = &*++MBB.getIterator();
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, nullptr, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else if (isNewValueJump(Cond[0].getImm())) {
+ assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+ // New value jump
+ // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+ // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+ unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+ DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+ if (Cond[2].isReg()) {
+ unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+ } else if(Cond[2].isImm()) {
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addImm(Cond[2].getImm()).addMBB(TBB);
+ } else
+ llvm_unreachable("Invalid condition for branching");
+ } else {
+ assert((Cond.size() == 2) && "Malformed cond vector");
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ return 1;
}
+ assert((!Cond.empty()) &&
+ "Cond. cannot be empty when multiple branchings are required");
+ assert((!isNewValueJump(Cond[0].getImm())) &&
+ "NV-jump cannot be inserted with another branch");
+ // Special case for hardware loops. The condition is a basic block.
+ if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else {
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
- return false;
+ return 2;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const {
+ return nonDbgBBSize(&MBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
+ const {
+ return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs, BranchProbability Probability) const {
+ return NumInstrs <= 4;
}
void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
+ auto &HRI = getRegisterInfo();
if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
return;
@@ -599,28 +644,74 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
addReg(SrcReg, getKillRegState(KillSrc));
return;
}
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+ getKillRegState(KillSrc)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+ getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg).
+ addReg(SrcReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(SrcReg) &&
+ Hexagon::VectorRegsRegClass.contains(DestReg)) {
+ llvm_unreachable("Unimplemented pred to vec");
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(DestReg) &&
+ Hexagon::VectorRegsRegClass.contains(SrcReg)) {
+ llvm_unreachable("Unimplemented vec to pred");
+ return;
+ }
+ if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+ HRI.getSubReg(DestReg, Hexagon::subreg_hireg)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+ getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+ HRI.getSubReg(DestReg, Hexagon::subreg_loreg)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+ getKillRegState(KillSrc));
+ return;
+ }
+#ifndef NDEBUG
+ // Show the invalid registers to ease debugging.
+ dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber()
+ << ": " << PrintReg(DestReg, &HRI)
+ << " = " << PrintReg(SrcReg, &HRI) << '\n';
+#endif
llvm_unreachable("Unimplemented");
}
-void HexagonInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
-
+void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
@@ -640,33 +731,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
-void HexagonInstrInfo::storeRegToAddr(
- MachineFunction &MF, unsigned SrcReg,
- bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const
-{
- llvm_unreachable("Unimplemented");
-}
-
-
-void HexagonInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned DestReg, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
if (RC == &Hexagon::IntRegsRegClass) {
BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
@@ -682,27 +757,136 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
-void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
- llvm_unreachable("Unimplemented");
-}
-bool
-HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- const HexagonRegisterInfo &TRI = getRegisterInfo();
+/// expandPostRAPseudo - This function is called for all pseudo instructions
+/// that remain after register allocation. Many pseudo instructions are
+/// created to help register allocation. This is the place to convert them
+/// into real instructions. The target can edit MI in place, or it can insert
+/// new instructions and erase MI. The function should return true if
+/// anything was changed.
+bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI)
+ const {
+ const HexagonRegisterInfo &HRI = getRegisterInfo();
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
+ const unsigned VecOffset = 1;
+ bool Is128B = false;
switch (Opc) {
case Hexagon::ALIGNA:
BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
- .addReg(TRI.getFrameRegister())
+ .addReg(HRI.getFrameRegister())
.addImm(-MI->getOperand(1).getImm());
MBB.erase(MI);
return true;
+ case Hexagon::HEXAGON_V6_vassignp_128B:
+ case Hexagon::HEXAGON_V6_vassignp: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (SrcReg != DstReg)
+ copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::HEXAGON_V6_lo_128B:
+ case Hexagon::HEXAGON_V6_lo: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+ copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ MRI.clearKillFlags(SrcSubLo);
+ return true;
+ }
+ case Hexagon::HEXAGON_V6_hi_128B:
+ case Hexagon::HEXAGON_V6_hi: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+ copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ MRI.clearKillFlags(SrcSubHi);
+ return true;
+ }
+ case Hexagon::STrivv_indexed_128B:
+ Is128B = true;
+ case Hexagon::STrivv_indexed: {
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+ unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+ unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
+ : Hexagon::V6_vS32b_ai;
+ unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd))
+ .addOperand(MI->getOperand(0))
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SrcSubLo)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MI1New->getOperand(0).setIsKill(false);
+ BuildMI(MBB, MI, DL, get(NewOpcd))
+ .addOperand(MI->getOperand(0))
+ // The Vectors are indexed in multiples of vector size.
+ .addImm(MI->getOperand(1).getImm()+Offset)
+ .addReg(SrcSubHi)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::LDrivv_pseudo_V6_128B:
+ case Hexagon::LDrivv_indexed_128B:
+ Is128B = true;
+ case Hexagon::LDrivv_pseudo_V6:
+ case Hexagon::LDrivv_indexed: {
+ unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
+ : Hexagon::V6_vL32b_ai;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+ MachineInstr *MI1New =
+ BuildMI(MBB, MI, DL, get(NewOpcd),
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+ .addOperand(MI->getOperand(1))
+ .addImm(MI->getOperand(2).getImm());
+ MI1New->getOperand(1).setIsKill(false);
+ BuildMI(MBB, MI, DL, get(NewOpcd),
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+ .addOperand(MI->getOperand(1))
+ // The Vectors are indexed in multiples of vector size.
+ .addImm(MI->getOperand(2).getImm() + Offset)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::LDriv_pseudo_V6_128B:
+ Is128B = true;
+ case Hexagon::LDriv_pseudo_V6: {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
+ : Hexagon::V6_vL32b_ai;
+ int32_t Off = MI->getOperand(2).getImm();
+ int32_t Idx = Off;
+ BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
+ .addOperand(MI->getOperand(1))
+ .addImm(Idx)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::STriv_pseudo_V6_128B:
+ Is128B = true;
+ case Hexagon::STriv_pseudo_V6: {
+ unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
+ : Hexagon::V6_vS32b_ai;
+ int32_t Off = MI->getOperand(1).getImm();
+ int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6);
+ BuildMI(MBB, MI, DL, get(NewOpc))
+ .addOperand(MI->getOperand(0))
+ .addImm(Idx)
+ .addOperand(MI->getOperand(2))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
case Hexagon::TFR_PdTrue: {
unsigned Reg = MI->getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
@@ -724,15 +908,15 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
unsigned DstReg = MI->getOperand(0).getReg();
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
- unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
- unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
- unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
- unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
- TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
- TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
@@ -747,17 +931,17 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
unsigned Src3Reg = MI->getOperand(3).getReg();
- unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
- unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
- unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
- unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
- unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
- unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+ unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+ unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
- TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi).addReg(Src3SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
- TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo).addReg(Src3SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
@@ -768,104 +952,168 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MRI.clearKillFlags(Src3SubLo);
return true;
}
+ case Hexagon::MUX64_rr: {
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ const MachineOperand &Op2 = MI->getOperand(2);
+ const MachineOperand &Op3 = MI->getOperand(3);
+ unsigned Rd = Op0.getReg();
+ unsigned Pu = Op1.getReg();
+ unsigned Rs = Op2.getReg();
+ unsigned Rt = Op3.getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned K1 = getKillRegState(Op1.isKill());
+ unsigned K2 = getKillRegState(Op2.isKill());
+ unsigned K3 = getKillRegState(Op3.isKill());
+ if (Rd != Rs)
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd)
+ .addReg(Pu, (Rd == Rt) ? K1 : 0)
+ .addReg(Rs, K2);
+ if (Rd != Rt)
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd)
+ .addReg(Pu, K1)
+ .addReg(Rt, K3);
+ MBB.erase(MI);
+ return true;
+ }
case Hexagon::TCRETURNi:
MI->setDesc(get(Hexagon::J2_jump));
return true;
case Hexagon::TCRETURNr:
MI->setDesc(get(Hexagon::J2_jumpr));
return true;
+ case Hexagon::TFRI_f:
+ case Hexagon::TFRI_cPt_f:
+ case Hexagon::TFRI_cNotPt_f: {
+ unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2;
+ APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF();
+ APInt IVal = FVal.bitcastToAPInt();
+ MI->RemoveOperand(Opx);
+ unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi :
+ (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit :
+ Hexagon::C2_cmoveif;
+ MI->setDesc(get(NewOpc));
+ MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue()));
+ return true;
+ }
}
return false;
}
-MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, int FI) const {
- // Hexagon_TODO: Implement.
- return nullptr;
+
+// We indicate that we want to reverse the branch by
+// inserting the reversed branching opcode.
+bool HexagonInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return true;
+ assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+ unsigned opcode = Cond[0].getImm();
+ //unsigned temp;
+ assert(get(opcode).isBranch() && "Should be a branching condition.");
+ if (isEndLoopN(opcode))
+ return true;
+ unsigned NewOpcode = getInvertedPredicatedOpcode(opcode);
+ Cond[0].setImm(NewOpcode);
+ return false;
}
-unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetRegisterClass *TRC;
- if (VT == MVT::i1) {
- TRC = &Hexagon::PredRegsRegClass;
- } else if (VT == MVT::i32 || VT == MVT::f32) {
- TRC = &Hexagon::IntRegsRegClass;
- } else if (VT == MVT::i64 || VT == MVT::f64) {
- TRC = &Hexagon::DoubleRegsRegClass;
- } else {
- llvm_unreachable("Cannot handle this register class");
- }
+void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
+}
- unsigned NewReg = RegInfo.createVirtualRegister(TRC);
- return NewReg;
+
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
+// Note: New-value stores are not included here as in the current
+// implementation, we don't need to check their predicate sense.
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
}
-bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
- const MCInstrDesc &MID = MI->getDesc();
- const uint64_t F = MID.TSFlags;
- if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
- return true;
- // TODO: This is largely obsolete now. Will need to be removed
- // in consecutive patches.
- switch(MI->getOpcode()) {
- // TFR_FI Remains a special case.
- case Hexagon::TFR_FI:
- return true;
- default:
- return false;
+bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Cond) const {
+ if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
+ isEndLoopN(Cond[0].getImm())) {
+ DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+ return false;
}
- return false;
-}
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = predOpcodeHasNot(Cond);
-// This returns true in two cases:
-// - The OP code itself indicates that this is an extended instruction.
-// - One of MOs has been marked with HMOTF_ConstExtended flag.
-bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
- // First check if this is permanently extended op code.
- const uint64_t F = MI->getDesc().TSFlags;
- if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
- return true;
- // Use MO operand flags to determine if one of MI's operands
- // has HMOTF_ConstExtended flag set.
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
- if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
- return true;
+ // We have to predicate MI "in place", i.e. after this function returns,
+ // MI will need to be transformed into a predicated form. To avoid com-
+ // plicated manipulations with the operands (handling tied operands,
+ // etc.), build a new temporary instruction, then overwrite MI with it.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned PredOpc = getCondOpcode(Opc, invertJump);
+ MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+ unsigned NOp = 0, NumOps = MI->getNumOperands();
+ while (NOp < NumOps) {
+ MachineOperand &Op = MI->getOperand(NOp);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ break;
+ T.addOperand(Op);
+ NOp++;
}
- return false;
-}
-bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const {
- return MI->getDesc().isBranch();
-}
+ unsigned PredReg, PredRegPos, PredRegFlags;
+ bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+ (void)GotPredReg;
+ assert(GotPredReg);
+ T.addReg(PredReg, PredRegFlags);
+ while (NOp < NumOps)
+ T.addOperand(MI->getOperand(NOp++));
-bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
- if (isNewValueJump(MI))
- return true;
+ MI->setDesc(get(PredOpc));
+ while (unsigned n = MI->getNumOperands())
+ MI->RemoveOperand(n-1);
+ for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+ MI->addOperand(T->getOperand(i));
- if (isNewValueStore(MI))
- return true;
+ MachineBasicBlock::instr_iterator TI = T->getIterator();
+ B.erase(TI);
- return false;
+ MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+ MRI.clearKillFlags(PredReg);
+ return true;
}
-bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
-}
-bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
- const uint64_t F = get(Opcode).TSFlags;
- return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const {
+ // TODO: Fix this
+ return false;
}
-bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
- return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
+
+bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ auto &HRI = getRegisterInfo();
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == &Hexagon::PredRegsRegClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
}
bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
@@ -875,10 +1123,21 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
return false;
const int Opc = MI->getOpcode();
+ int NumOperands = MI->getNumOperands();
+
+ // Keep a flag for upto 4 operands in the instructions, to indicate if
+ // that operand has been constant extended.
+ bool OpCExtended[4];
+ if (NumOperands > 4)
+ NumOperands = 4;
+
+ for (int i = 0; i < NumOperands; i++)
+ OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI));
switch(Opc) {
case Hexagon::A2_tfrsi:
- return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
+ return (isOperandExtended(MI, 1) && isConstExtended(MI)) ||
+ isInt<12>(MI->getOperand(1).getImm());
case Hexagon::S2_storerd_io:
return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
@@ -926,8 +1185,8 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
case Hexagon::S4_storeirb_io:
case Hexagon::S4_storeirh_io:
case Hexagon::S4_storeiri_io:
- return (isUInt<6>(MI->getOperand(1).getImm()) &&
- isInt<6>(MI->getOperand(2).getImm()));
+ return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) &&
+ (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm()));
case Hexagon::A2_addi:
return isInt<8>(MI->getOperand(2).getImm());
@@ -944,269 +1203,1117 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
-// This function performs the following inversiones:
-//
-// cPt ---> cNotPt
-// cNotPt ---> cPt
-//
-unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
- int InvPredOpcode;
- InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
- : Hexagon::getTruePredOpcode(Opc);
- if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
- return InvPredOpcode;
- switch(Opc) {
- default: llvm_unreachable("Unexpected predicated instruction");
- case Hexagon::C2_ccombinewt:
- return Hexagon::C2_ccombinewf;
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB, const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Throwing call is a boundary.
+ if (MI->isCall()) {
+ // If any of the block's successors is a landing pad, this could be a
+ // throwing call.
+ for (auto I : MBB->successors())
+ if (I->isEHPad())
+ return true;
+ }
+
+ // Don't mess around with no return calls.
+ if (MI->getOpcode() == Hexagon::CALLv3nr)
+ return true;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isPosition())
+ return true;
+
+ if (MI->isInlineAsm() && !ScheduleInlineAsm)
+ return true;
+
+ return false;
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// Hexagon counts the number of ##'s and adjust for that many
+/// constant exenders.
+unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+ StringRef AStr(Str);
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ // Add to size number of constant extenders seen * 4.
+ StringRef Occ("##");
+ Length += AStr.count(Occ)*4;
+ return Length;
+}
+
+
+ScheduleHazardRecognizer*
+HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *DAG) const {
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Set mask and the first source register.
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = ~0;
+ break;
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFF;
+ break;
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFFFF;
+ break;
+ }
+
+ // Set the value/second source register.
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+
+unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost) const {
+ return getInstrTimingClassLatency(ItinData, MI);
+}
+
+
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+ const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
+}
+
+
+// Inspired by this pair:
+// %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
+// S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1]
+// Currently AA considers the addresses in these instructions to be aliasing.
+bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb, AliasAnalysis *AA) const {
+ int OffsetA = 0, OffsetB = 0;
+ unsigned SizeA = 0, SizeB = 0;
+
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+ MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef())
+ return false;
+
+ // Instructions that are pure loads, not loads and stores like memops are not
+ // dependent.
+ if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb))
+ return true;
+
+ // Get base, offset, and access size in MIa.
+ unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA);
+ if (!BaseRegA || !SizeA)
+ return false;
+
+ // Get base, offset, and access size in MIb.
+ unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB);
+ if (!BaseRegB || !SizeB)
+ return false;
+
+ if (BaseRegA != BaseRegB)
+ return false;
+
+ // This is a mem access with the same base register and known offsets from it.
+ // Reason about it.
+ if (OffsetA > OffsetB) {
+ uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB);
+ return (SizeB <= offDiff);
+ } else if (OffsetA < OffsetB) {
+ uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA);
+ return (SizeA <= offDiff);
+ }
+
+ return false;
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = &Hexagon::PredRegsRegClass;
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
+ TRC = &Hexagon::IntRegsRegClass;
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
+ TRC = &Hexagon::DoubleRegsRegClass;
+ } else {
+ llvm_unreachable("Cannot handle this register class");
+ }
+
+ unsigned NewReg = MRI.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+
+bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const {
+ return (getAddrMode(MI) == HexagonII::AbsoluteSet);
+}
+
+
+bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
+}
+
+
+bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ if (!(isTC1(MI))
+ && !(QII->isTC2Early(MI))
+ && !(MI->getDesc().mayLoad())
+ && !(MI->getDesc().mayStore())
+ && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe)
+ && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe)
+ && !(QII->isMemOp(MI))
+ && !(MI->isBranch())
+ && !(MI->isReturn())
+ && !MI->isCall())
+ return true;
+
+ return false;
+}
+
+
+// Return true if the instruction is a compund branch instruction.
+bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const {
+ return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch());
+}
+
+
+bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const {
+ return (MI->isBranch() && isPredicated(MI)) ||
+ isConditionalTransfer(MI) ||
+ isConditionalALU32(MI) ||
+ isConditionalLoad(MI) ||
+ // Predicated stores which don't have a .new on any operands.
+ (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
+ !isPredicatedNew(MI));
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::A2_paddf:
+ case Hexagon::A2_paddfnew:
+ case Hexagon::A2_paddif:
+ case Hexagon::A2_paddifnew:
+ case Hexagon::A2_paddit:
+ case Hexagon::A2_padditnew:
+ case Hexagon::A2_paddt:
+ case Hexagon::A2_paddtnew:
+ case Hexagon::A2_pandf:
+ case Hexagon::A2_pandfnew:
+ case Hexagon::A2_pandt:
+ case Hexagon::A2_pandtnew:
+ case Hexagon::A2_porf:
+ case Hexagon::A2_porfnew:
+ case Hexagon::A2_port:
+ case Hexagon::A2_portnew:
+ case Hexagon::A2_psubf:
+ case Hexagon::A2_psubfnew:
+ case Hexagon::A2_psubt:
+ case Hexagon::A2_psubtnew:
+ case Hexagon::A2_pxorf:
+ case Hexagon::A2_pxorfnew:
+ case Hexagon::A2_pxort:
+ case Hexagon::A2_pxortnew:
+ case Hexagon::A4_paslhf:
+ case Hexagon::A4_paslhfnew:
+ case Hexagon::A4_paslht:
+ case Hexagon::A4_paslhtnew:
+ case Hexagon::A4_pasrhf:
+ case Hexagon::A4_pasrhfnew:
+ case Hexagon::A4_pasrht:
+ case Hexagon::A4_pasrhtnew:
+ case Hexagon::A4_psxtbf:
+ case Hexagon::A4_psxtbfnew:
+ case Hexagon::A4_psxtbt:
+ case Hexagon::A4_psxtbtnew:
+ case Hexagon::A4_psxthf:
+ case Hexagon::A4_psxthfnew:
+ case Hexagon::A4_psxtht:
+ case Hexagon::A4_psxthtnew:
+ case Hexagon::A4_pzxtbf:
+ case Hexagon::A4_pzxtbfnew:
+ case Hexagon::A4_pzxtbt:
+ case Hexagon::A4_pzxtbtnew:
+ case Hexagon::A4_pzxthf:
+ case Hexagon::A4_pzxthfnew:
+ case Hexagon::A4_pzxtht:
+ case Hexagon::A4_pzxthtnew:
case Hexagon::C2_ccombinewf:
- return Hexagon::C2_ccombinewt;
+ case Hexagon::C2_ccombinewt:
+ return true;
+ }
+ return false;
+}
+
+
+// FIXME - Function name and it's functionality don't match.
+// It should be renamed to hasPredNewOpcode()
+bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
+ if (!MI->getDesc().mayLoad() || !isPredicated(MI))
+ return false;
+
+ int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+ // Instruction with valid predicated-new opcode can be promoted to .new.
+ return PNewOpcode >= 0;
+}
+
- // Dealloc_return.
- case Hexagon::L4_return_t:
- return Hexagon::L4_return_f;
- case Hexagon::L4_return_f:
- return Hexagon::L4_return_t;
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ case Hexagon::S4_pstorerbt_rr:
+ case Hexagon::S4_pstorerbf_rr:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S2_pstorerbt_pi:
+ case Hexagon::S2_pstorerbf_pi:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ case Hexagon::S4_pstorerdt_rr:
+ case Hexagon::S4_pstorerdf_rr:
+ case Hexagon::S2_pstorerdt_pi:
+ case Hexagon::S2_pstorerdf_pi:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ case Hexagon::S4_pstorerht_rr:
+ case Hexagon::S4_pstorerhf_rr:
+ case Hexagon::S2_pstorerht_pi:
+ case Hexagon::S2_pstorerhf_pi:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ case Hexagon::S4_pstorerit_rr:
+ case Hexagon::S4_pstorerif_rr:
+ case Hexagon::S2_pstorerit_pi:
+ case Hexagon::S2_pstorerif_pi:
+
+ // V4 global address store before promoting to dot new.
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerif_abs:
+ return true;
+
+ // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+ // from the "Conditional Store" list. Because a predicated new value store
+ // would NOT be promoted to a double dot new store.
+ // This function returns yes for those stores that are predicated but not
+ // yet promoted to predicate dot new instructions.
}
}
-// New Value Store instructions.
-bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+
+bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::A2_tfrtnew:
+ case Hexagon::A2_tfrfnew:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmovenewif:
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
+// isFPImm and later getFPImm as well.
+bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+ if (isExtended) // Instruction must be extended.
+ return true;
+
+ unsigned isExtendable =
+ (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+ if (!isExtendable)
+ return false;
+
+ if (MI->isCall())
+ return false;
+
+ short ExtOpNum = getCExtOpNum(MI);
+ const MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // Use MO operand flags to determine if MO
+ // has the HMOTF_ConstExtended flag set.
+ if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ // If this is a Machine BB address we are talking about, and it is
+ // not marked as extended, say so.
+ if (MO.isMBB())
+ return false;
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
+ MO.isJTI() || MO.isCPI())
+ return true;
- return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int MinValue = getMinValue(MI);
+ int MaxValue = getMaxValue(MI);
+ int ImmValue = MO.getImm();
+
+ return (ImmValue < MinValue || ImmValue > MaxValue);
}
-bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::L4_return :
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ case Hexagon::L4_return_fnew_pt :
+ return true;
+ }
+ return false;
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ const MCInstrDesc &ProdMCID = ProdMI->getDesc();
+ if (!ProdMCID.getNumDefs())
+ return false;
+
+ auto &HRI = getRegisterInfo();
+
+ SmallVector<unsigned, 4> DefsA;
+ SmallVector<unsigned, 4> DefsB;
+ SmallVector<unsigned, 8> UsesA;
+ SmallVector<unsigned, 8> UsesB;
+
+ parseOperands(ProdMI, DefsA, UsesA);
+ parseOperands(ConsMI, DefsB, UsesB);
+
+ for (auto &RegA : DefsA)
+ for (auto &RegB : UsesB) {
+ // True data dependency.
+ if (RegA == RegB)
+ return true;
+
+ if (Hexagon::DoubleRegsRegClass.contains(RegA))
+ for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
+ if (RegB == *SubRegs)
+ return true;
+
+ if (Hexagon::DoubleRegsRegClass.contains(RegB))
+ for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
+ if (RegA == *SubRegs)
+ return true;
+ }
+
+ return false;
+}
+
+
+// Returns true if the instruction is alread a .cur.
+bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::V6_vL32b_cur_pi:
+ case Hexagon::V6_vL32b_cur_ai:
+ case Hexagon::V6_vL32b_cur_pi_128B:
+ case Hexagon::V6_vL32b_cur_ai_128B:
+ return true;
+ }
+ return false;
+}
+
+
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
+ if (isNewValueInst(MI) ||
+ (isPredicated(MI) && isPredicatedNew(MI)))
+ return true;
+
+ return false;
+}
+
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa,
+ const MachineInstr *MIb) const {
+ HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa);
+ HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb);
+ return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+
+bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ if (MI->mayLoad() || MI->mayStore() || MI->isCompare())
+ return true;
+
+ // Multiply
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23)
+ return true;
+ return false;
+}
+
+
+bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
+ return (Opcode == Hexagon::ENDLOOP0 ||
+ Opcode == Hexagon::ENDLOOP1);
+}
+
+
+bool HexagonInstrInfo::isExpr(unsigned OpType) const {
+ switch(OpType) {
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+ const MCInstrDesc &MID = MI->getDesc();
+ const uint64_t F = MID.TSFlags;
+ if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+ return true;
+
+ // TODO: This is largely obsolete now. Will need to be removed
+ // in consecutive patches.
+ switch(MI->getOpcode()) {
+ // TFR_FI Remains a special case.
+ case Hexagon::TFR_FI:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+ // First check if this is permanently extended op code.
+ const uint64_t F = MI->getDesc().TSFlags;
+ if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+ return true;
+ // Use MO operand flags to determine if one of MI's operands
+ // has HMOTF_ConstExtended flag set.
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::FPPos) & HexagonII::FPMask;
+}
- return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
+
+// No V60 HVX VMEM with A_INDIRECT.
+bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I,
+ const MachineInstr *J) const {
+ if (!isV60VectorInstruction(I))
+ return false;
+ if (!I->mayLoad() && !I->mayStore())
+ return false;
+ return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
}
-int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
- enum Hexagon::PredSense inPredSense;
- inPredSense = invertPredicate ? Hexagon::PredSense_false :
- Hexagon::PredSense_true;
- int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
- if (CondOpcode >= 0) // Valid Conditional opcode/instruction
- return CondOpcode;
- // This switch case will be removed once all the instructions have been
- // modified to use relation maps.
- switch(Opc) {
- case Hexagon::TFRI_f:
- return !invertPredicate ? Hexagon::TFRI_cPt_f :
- Hexagon::TFRI_cNotPt_f;
- case Hexagon::A2_combinew:
- return !invertPredicate ? Hexagon::C2_ccombinewt :
- Hexagon::C2_ccombinewf;
+bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_callr :
+ case Hexagon::J2_callrf :
+ case Hexagon::J2_callrt :
+ return true;
+ }
+ return false;
+}
- // DEALLOC_RETURN.
- case Hexagon::L4_return:
- return !invertPredicate ? Hexagon::L4_return_t:
- Hexagon::L4_return_f;
+
+bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::L4_return :
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_fnew_pt :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ return true;
}
- llvm_unreachable("Unexpected predicable instruction");
+ return false;
}
-bool HexagonInstrInfo::
-PredicateInstruction(MachineInstr *MI,
- ArrayRef<MachineOperand> Cond) const {
- if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
- DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_jumpr :
+ case Hexagon::J2_jumprt :
+ case Hexagon::J2_jumprf :
+ case Hexagon::J2_jumprtnewpt :
+ case Hexagon::J2_jumprfnewpt :
+ case Hexagon::J2_jumprtnew :
+ case Hexagon::J2_jumprfnew :
+ return true;
+ }
+ return false;
+}
+
+
+// Return true if a given MI can accomodate given offset.
+// Use abs estimate as oppose to the exact number.
+// TODO: This will need to be changed to use MC level
+// definition of instruction extendable field size.
+bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
+ unsigned offset) const {
+ // This selection of jump instructions matches to that what
+ // AnalyzeBranch can parse, plus NVJ.
+ if (isNewValueJump(MI)) // r9:2
+ return isInt<11>(offset);
+
+ switch (MI->getOpcode()) {
+ // Still missing Jump to address condition on register value.
+ default:
return false;
+ case Hexagon::J2_jump: // bits<24> dst; // r22:2
+ case Hexagon::J2_call:
+ case Hexagon::CALLv3nr:
+ return isInt<24>(offset);
+ case Hexagon::J2_jumpt: //bits<17> dst; // r15:2
+ case Hexagon::J2_jumpf:
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumpfnewpt:
+ case Hexagon::J2_callt:
+ case Hexagon::J2_callf:
+ return isInt<17>(offset);
+ case Hexagon::J2_loop0i:
+ case Hexagon::J2_loop0iext:
+ case Hexagon::J2_loop0r:
+ case Hexagon::J2_loop0rext:
+ case Hexagon::J2_loop1i:
+ case Hexagon::J2_loop1iext:
+ case Hexagon::J2_loop1r:
+ case Hexagon::J2_loop1rext:
+ return isInt<9>(offset);
+ // TODO: Add all the compound branches here. Can we do this in Relation model?
+ case Hexagon::J4_cmpeqi_tp0_jump_nt:
+ case Hexagon::J4_cmpeqi_tp1_jump_nt:
+ return isInt<11>(offset);
}
- int Opc = MI->getOpcode();
- assert (isPredicable(MI) && "Expected predicable instruction");
- bool invertJump = predOpcodeHasNot(Cond);
+}
- // We have to predicate MI "in place", i.e. after this function returns,
- // MI will need to be transformed into a predicated form. To avoid com-
- // plicated manipulations with the operands (handling tied operands,
- // etc.), build a new temporary instruction, then overwrite MI with it.
- MachineBasicBlock &B = *MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned PredOpc = getCondOpcode(Opc, invertJump);
- MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
- unsigned NOp = 0, NumOps = MI->getNumOperands();
- while (NOp < NumOps) {
- MachineOperand &Op = MI->getOperand(NOp);
- if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
- break;
- T.addOperand(Op);
- NOp++;
+bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+ const MachineInstr *ESMI) const {
+ if (!LRMI || !ESMI)
+ return false;
+
+ bool isLate = isLateResultInstr(LRMI);
+ bool isEarly = isEarlySourceInstr(ESMI);
+
+ DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- "));
+ DEBUG(LRMI->dump());
+ DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- "));
+ DEBUG(ESMI->dump());
+
+ if (isLate && isEarly) {
+ DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
+ return true;
}
- unsigned PredReg, PredRegPos, PredRegFlags;
- bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
- (void)GotPredReg;
- assert(GotPredReg);
- T.addReg(PredReg, PredRegFlags);
- while (NOp < NumOps)
- T.addOperand(MI->getOperand(NOp++));
+ return false;
+}
- MI->setDesc(get(PredOpc));
- while (unsigned n = MI->getNumOperands())
- MI->RemoveOperand(n-1);
- for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
- MI->addOperand(T->getOperand(i));
- MachineBasicBlock::instr_iterator TI = &*T;
- B.erase(TI);
+bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
- MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
- MRI.clearKillFlags(PredReg);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::PHI:
+ return false;
+ default:
+ break;
+ }
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_1_SLOT23:
+ case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+ case Hexagon::Sched::S_2op_tc_1_SLOT23:
+ case Hexagon::Sched::S_3op_tc_1_SLOT23:
+ case Hexagon::Sched::V2LDST_tc_ld_SLOT01:
+ case Hexagon::Sched::V2LDST_tc_st_SLOT0:
+ case Hexagon::Sched::V2LDST_tc_st_SLOT01:
+ case Hexagon::Sched::V4LDST_tc_ld_SLOT01:
+ case Hexagon::Sched::V4LDST_tc_st_SLOT0:
+ case Hexagon::Sched::V4LDST_tc_st_SLOT01:
+ return false;
+ }
return true;
}
-bool
-HexagonInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
- return true;
+bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
+ // resource, but all operands can be received late like an ALU instruction.
+ return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
}
-bool
-HexagonInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles,
- unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles,
- unsigned ExtraFCycles,
- const BranchProbability &Probability) const {
- return true;
+bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+ return Opcode == Hexagon::J2_loop0i ||
+ Opcode == Hexagon::J2_loop0r ||
+ Opcode == Hexagon::J2_loop0iext ||
+ Opcode == Hexagon::J2_loop0rext ||
+ Opcode == Hexagon::J2_loop1i ||
+ Opcode == Hexagon::J2_loop1r ||
+ Opcode == Hexagon::J2_loop1iext ||
+ Opcode == Hexagon::J2_loop1rext;
+}
+
+
+bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::L4_iadd_memopw_io :
+ case Hexagon::L4_isub_memopw_io :
+ case Hexagon::L4_add_memopw_io :
+ case Hexagon::L4_sub_memopw_io :
+ case Hexagon::L4_and_memopw_io :
+ case Hexagon::L4_or_memopw_io :
+ case Hexagon::L4_iadd_memoph_io :
+ case Hexagon::L4_isub_memoph_io :
+ case Hexagon::L4_add_memoph_io :
+ case Hexagon::L4_sub_memoph_io :
+ case Hexagon::L4_and_memoph_io :
+ case Hexagon::L4_or_memoph_io :
+ case Hexagon::L4_iadd_memopb_io :
+ case Hexagon::L4_isub_memopb_io :
+ case Hexagon::L4_add_memopb_io :
+ case Hexagon::L4_sub_memopb_io :
+ case Hexagon::L4_and_memopb_io :
+ case Hexagon::L4_or_memopb_io :
+ case Hexagon::L4_ior_memopb_io:
+ case Hexagon::L4_ior_memoph_io:
+ case Hexagon::L4_ior_memopw_io:
+ case Hexagon::L4_iand_memopb_io:
+ case Hexagon::L4_iand_memoph_io:
+ case Hexagon::L4_iand_memopw_io:
+ return true;
+ }
+ return false;
}
-// Returns true if an instruction is predicated irrespective of the predicate
-// sense. For example, all of the following will return true.
-// if (p0) R1 = add(R2, R3)
-// if (!p0) R1 = add(R2, R3)
-// if (p0.new) R1 = add(R2, R3)
-// if (!p0.new) R1 = add(R2, R3)
-bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
}
-bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+
+bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
- return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+ return isNewValueJump(MI) || isNewValueStore(MI);
}
-bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- assert(isPredicated(MI));
- return (!((F >> HexagonII::PredicatedFalsePos) &
- HexagonII::PredicatedFalseMask));
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ return isNewValue(MI) && MI->isBranch();
}
-bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+
+bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
+ return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
- // Make sure that the instruction is predicated.
- assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
- return (!((F >> HexagonII::PredicatedFalsePos) &
- HexagonII::PredicatedFalseMask));
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+ unsigned OperandNum) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
}
+
+bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const {
+ return getAddrMode(MI) == HexagonII::PostInc;
+}
+
+
bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
-
assert(isPredicated(MI));
- return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+ return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
}
+
bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
-
assert(isPredicated(Opcode));
- return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+ return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
}
-// Returns true, if a ST insn can be promoted to a new-value store.
-bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
+ return !((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask);
+}
+
- return ((F >> HexagonII::mayNVStorePos) &
- HexagonII::mayNVStoreMask);
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ // Make sure that the instruction is predicated.
+ assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+ return !((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask);
}
-bool
-HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
- for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
- MachineOperand MO = MI->getOperand(oper);
- if (MO.isReg() && MO.isDef()) {
- const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
- if (RC == &Hexagon::PredRegsRegClass) {
- Pred.push_back(MO);
- return true;
- }
- }
+
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+}
+
+
+bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ assert(get(Opcode).isBranch() &&
+ (isPredicatedNew(Opcode) || isNewValue(Opcode)));
+ return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
+}
+
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+ return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
+ MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
+}
+
+
+bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
+}
+
+
+bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ default:
+ return false;
}
- return false;
}
-bool
-HexagonInstrInfo::
-SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
- ArrayRef<MachineOperand> Pred2) const {
- // TODO: Fix this
- return false;
+// Returns true when SU has a timing class TC1.
+bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_1_SLOT23:
+ case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+ //case Hexagon::Sched::M_tc_1_SLOT23:
+ case Hexagon::Sched::S_2op_tc_1_SLOT23:
+ case Hexagon::Sched::S_3op_tc_1_SLOT23:
+ return true;
+
+ default:
+ return false;
+ }
}
-//
-// We indicate that we want to reverse the branch by
-// inserting the reversed branching opcode.
-//
-bool HexagonInstrInfo::ReverseBranchCondition(
- SmallVectorImpl<MachineOperand> &Cond) const {
- if (Cond.empty())
+bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2_SLOT23:
+ case Hexagon::Sched::CR_tc_2_SLOT3:
+ case Hexagon::Sched::M_tc_2_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2_SLOT23:
return true;
- assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
- Opcode_t opcode = Cond[0].getImm();
- //unsigned temp;
- assert(get(opcode).isBranch() && "Should be a branching condition.");
- if (isEndLoopN(opcode))
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2early_SLOT23:
+ case Hexagon::Sched::CR_tc_2early_SLOT23:
+ case Hexagon::Sched::CR_tc_2early_SLOT3:
+ case Hexagon::Sched::J_tc_2early_SLOT0123:
+ case Hexagon::Sched::J_tc_2early_SLOT2:
+ case Hexagon::Sched::J_tc_2early_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2early_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2early_SLOT23:
return true;
- Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
- Cond[0].setImm(NewOpcode);
- return false;
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
}
-bool HexagonInstrInfo::
-isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
- const BranchProbability &Probability) const {
- return (NumInstrs <= 4);
+bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ const uint64_t V = getType(MI);
+ return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
}
-bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::L4_return:
- case Hexagon::L4_return_t:
- case Hexagon::L4_return_f:
- case Hexagon::L4_return_tnew_pnt:
- case Hexagon::L4_return_fnew_pnt:
- case Hexagon::L4_return_tnew_pt:
- case Hexagon::L4_return_fnew_pt:
- return true;
+
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
+ if (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+ VT == MVT::v32i16 || VT == MVT::v64i8) {
+ return (Offset >= Hexagon_MEMV_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMV_AUTOINC_MAX &&
+ (Offset & 0x3f) == 0);
+ }
+ // 128B
+ if (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+ VT == MVT::v64i16 || VT == MVT::v128i8) {
+ return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B &&
+ Offset <= Hexagon_MEMV_AUTOINC_MAX_128B &&
+ (Offset & 0x7f) == 0);
+ }
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
}
+ llvm_unreachable("Not an auto-inc opc!");
}
@@ -1222,6 +2329,40 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
// misaligns with respect to load size.
switch (Opcode) {
+ case Hexagon::STriq_pred_V6:
+ case Hexagon::STriq_pred_vec_V6:
+ case Hexagon::STriv_pseudo_V6:
+ case Hexagon::STrivv_pseudo_V6:
+ case Hexagon::LDriq_pred_V6:
+ case Hexagon::LDriq_pred_vec_V6:
+ case Hexagon::LDriv_pseudo_V6:
+ case Hexagon::LDrivv_pseudo_V6:
+ case Hexagon::LDrivv_indexed:
+ case Hexagon::STrivv_indexed:
+ case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vL32Ub_ai:
+ case Hexagon::V6_vS32Ub_ai:
+ return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMV_OFFSET_MAX);
+
+ case Hexagon::STriq_pred_V6_128B:
+ case Hexagon::STriq_pred_vec_V6_128B:
+ case Hexagon::STriv_pseudo_V6_128B:
+ case Hexagon::STrivv_pseudo_V6_128B:
+ case Hexagon::LDriq_pred_V6_128B:
+ case Hexagon::LDriq_pred_vec_V6_128B:
+ case Hexagon::LDriv_pseudo_V6_128B:
+ case Hexagon::LDrivv_pseudo_V6_128B:
+ case Hexagon::LDrivv_indexed_128B:
+ case Hexagon::STrivv_indexed_128B:
+ case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vL32Ub_ai_128B:
+ case Hexagon::V6_vS32Ub_ai_128B:
+ return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) &&
+ (Offset <= Hexagon_MEMV_OFFSET_MAX_128B);
+
case Hexagon::J2_loop0i:
case Hexagon::J2_loop1i:
return isUInt<10>(Offset);
@@ -1248,8 +2389,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
(Offset <= Hexagon_MEMH_OFFSET_MAX);
case Hexagon::L2_loadrb_io:
- case Hexagon::S2_storerb_io:
case Hexagon::L2_loadrub_io:
+ case Hexagon::S2_storerb_io:
return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
(Offset <= Hexagon_MEMB_OFFSET_MAX);
@@ -1257,28 +2398,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
- case Hexagon::L4_iadd_memopw_io:
- case Hexagon::L4_isub_memopw_io:
- case Hexagon::L4_add_memopw_io:
- case Hexagon::L4_sub_memopw_io:
- case Hexagon::L4_and_memopw_io:
- case Hexagon::L4_or_memopw_io:
+ case Hexagon::L4_iadd_memopw_io :
+ case Hexagon::L4_isub_memopw_io :
+ case Hexagon::L4_add_memopw_io :
+ case Hexagon::L4_sub_memopw_io :
+ case Hexagon::L4_and_memopw_io :
+ case Hexagon::L4_or_memopw_io :
return (0 <= Offset && Offset <= 255);
- case Hexagon::L4_iadd_memoph_io:
- case Hexagon::L4_isub_memoph_io:
- case Hexagon::L4_add_memoph_io:
- case Hexagon::L4_sub_memoph_io:
- case Hexagon::L4_and_memoph_io:
- case Hexagon::L4_or_memoph_io:
+ case Hexagon::L4_iadd_memoph_io :
+ case Hexagon::L4_isub_memoph_io :
+ case Hexagon::L4_add_memoph_io :
+ case Hexagon::L4_sub_memoph_io :
+ case Hexagon::L4_and_memoph_io :
+ case Hexagon::L4_or_memoph_io :
return (0 <= Offset && Offset <= 127);
- case Hexagon::L4_iadd_memopb_io:
- case Hexagon::L4_isub_memopb_io:
- case Hexagon::L4_add_memopb_io:
- case Hexagon::L4_sub_memopb_io:
- case Hexagon::L4_and_memopb_io:
- case Hexagon::L4_or_memopb_io:
+ case Hexagon::L4_iadd_memopb_io :
+ case Hexagon::L4_isub_memopb_io :
+ case Hexagon::L4_add_memopb_io :
+ case Hexagon::L4_sub_memopb_io :
+ case Hexagon::L4_and_memopb_io :
+ case Hexagon::L4_or_memopb_io :
return (0 <= Offset && Offset <= 63);
// LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -1291,223 +2432,556 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::TFR_FIA:
case Hexagon::INLINEASM:
return true;
- }
+
+ case Hexagon::L2_ploadrbt_io:
+ case Hexagon::L2_ploadrbf_io:
+ case Hexagon::L2_ploadrubt_io:
+ case Hexagon::L2_ploadrubf_io:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ return isUInt<6>(Offset);
+
+ case Hexagon::L2_ploadrht_io:
+ case Hexagon::L2_ploadrhf_io:
+ case Hexagon::L2_ploadruht_io:
+ case Hexagon::L2_ploadruhf_io:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ return isShiftedUInt<6,1>(Offset);
+
+ case Hexagon::L2_ploadrit_io:
+ case Hexagon::L2_ploadrif_io:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeiri_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ return isShiftedUInt<6,2>(Offset);
+
+ case Hexagon::L2_ploadrdt_io:
+ case Hexagon::L2_ploadrdf_io:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ return isShiftedUInt<6,3>(Offset);
+ } // switch
llvm_unreachable("No offset range is defined for this opcode. "
"Please define it in the above switch statement!");
}
-//
-// Check if the Offset is a valid auto-inc imm by Load/Store Type.
-//
-bool HexagonInstrInfo::
-isValidAutoIncImm(const EVT VT, const int Offset) const {
+bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const {
+ return MI && isV60VectorInstruction(MI) && isAccumulator(MI);
+}
- if (VT == MVT::i64) {
- return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
- Offset <= Hexagon_MEMD_AUTOINC_MAX &&
- (Offset & 0x7) == 0);
- }
- if (VT == MVT::i32) {
- return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
- Offset <= Hexagon_MEMW_AUTOINC_MAX &&
- (Offset & 0x3) == 0);
- }
- if (VT == MVT::i16) {
- return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
- Offset <= Hexagon_MEMH_AUTOINC_MAX &&
- (Offset & 0x1) == 0);
- }
- if (VT == MVT::i8) {
- return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
- Offset <= Hexagon_MEMB_AUTOINC_MAX);
+
+bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ const uint64_t F = get(MI->getOpcode()).TSFlags;
+ const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+ return
+ V == HexagonII::TypeCVI_VA ||
+ V == HexagonII::TypeCVI_VA_DV;
+}
+
+
+bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
+ return true;
+
+ if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI)))
+ return true;
+
+ if (mayBeNewStore(ConsMI))
+ return true;
+
+ return false;
+}
+
+
+/// \brief Can these instructions execute at the same time in a bundle.
+bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
+ const MachineInstr *Second) const {
+ if (DisableNVSchedule)
+ return false;
+ if (mayBeNewStore(Second)) {
+ // Make sure the definition of the first instruction is the value being
+ // stored.
+ const MachineOperand &Stored =
+ Second->getOperand(Second->getNumOperands() - 1);
+ if (!Stored.isReg())
+ return false;
+ for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) {
+ const MachineOperand &Op = First->getOperand(i);
+ if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg())
+ return true;
+ }
}
- llvm_unreachable("Not an auto-inc opc!");
+ return false;
+}
+
+
+bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
+ for (auto &I : *B)
+ if (I.isEHLabel())
+ return true;
+ return false;
}
-bool HexagonInstrInfo::
-isMemOp(const MachineInstr *MI) const {
-// return MI->getDesc().mayLoad() && MI->getDesc().mayStore();
-
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::L4_iadd_memopw_io:
- case Hexagon::L4_isub_memopw_io:
- case Hexagon::L4_add_memopw_io:
- case Hexagon::L4_sub_memopw_io:
- case Hexagon::L4_and_memopw_io:
- case Hexagon::L4_or_memopw_io:
- case Hexagon::L4_iadd_memoph_io:
- case Hexagon::L4_isub_memoph_io:
- case Hexagon::L4_add_memoph_io:
- case Hexagon::L4_sub_memoph_io:
- case Hexagon::L4_and_memoph_io:
- case Hexagon::L4_or_memoph_io:
- case Hexagon::L4_iadd_memopb_io:
- case Hexagon::L4_isub_memopb_io:
- case Hexagon::L4_add_memopb_io:
- case Hexagon::L4_sub_memopb_io:
- case Hexagon::L4_and_memopb_io:
- case Hexagon::L4_or_memopb_io:
- case Hexagon::L4_ior_memopb_io:
- case Hexagon::L4_ior_memoph_io:
- case Hexagon::L4_ior_memopw_io:
- case Hexagon::L4_iand_memopb_io:
- case Hexagon::L4_iand_memoph_io:
- case Hexagon::L4_iand_memopw_io:
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
+ short NonExtOpcode;
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+ return true;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retrieve non-ext equivalent instruction.
+
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ // Load/store with absolute addressing mode can be converted into
+ // base+offset mode.
+ NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseImmOffset :
+ // Load/store with base+offset addressing mode can be converted into
+ // base+register offset addressing mode. However left shift operand should
+ // be set to 0.
+ NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseLongOffset:
+ NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode());
+ break;
+ default:
+ return false;
+ }
+ if (NonExtOpcode < 0)
+ return false;
return true;
}
return false;
}
-bool HexagonInstrInfo::
-isSpillPredRegOp(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::STriw_pred :
- case Hexagon::LDriw_pred :
+bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(),
+ Hexagon::InstrType_Pseudo) >= 0;
+}
+
+
+bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
+ const {
+ MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+ while (I != E) {
+ if (I->isBarrier())
return true;
+ ++I;
}
+ return false;
}
-bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::C2_cmpeq:
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpgt:
- case Hexagon::C2_cmpgti:
- case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgtui:
+
+// Returns true, if a LD insn can be promoted to a cur load.
+bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const {
+ auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
+ HST.hasV60TOps();
+}
+
+
+// Returns true, if a ST insn can be promoted to a new-value store.
+bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ // There is no stall when ProdMI is not a V60 vector.
+ if (!isV60VectorInstruction(ProdMI))
+ return false;
+
+ // There is no stall when ProdMI and ConsMI are not dependent.
+ if (!isDependent(ProdMI, ConsMI))
+ return false;
+
+ // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI
+ // are scheduled in consecutive packets.
+ if (isVecUsableNextPacket(ProdMI, ConsMI))
+ return false;
+
+ return true;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
+ MachineBasicBlock::const_instr_iterator BII) const {
+ // There is no stall when I is not a V60 vector.
+ if (!isV60VectorInstruction(MI))
+ return false;
+
+ MachineBasicBlock::const_instr_iterator MII = BII;
+ MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
+
+ if (!(*MII).isBundle()) {
+ const MachineInstr *J = &*MII;
+ if (!isV60VectorInstruction(J))
+ return false;
+ else if (isVecUsableNextPacket(J, MI))
+ return false;
+ return true;
+ }
+
+ for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
+ const MachineInstr *J = &*MII;
+ if (producesStall(J, MI))
return true;
}
+ return false;
+}
+
+
+bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
+ unsigned PredReg) const {
+ for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
+ return false; // Predicate register must be explicitly defined.
+ }
+
+ // Hexagon Programmer's Reference says that decbin, memw_locked, and
+ // memd_locked cannot be used as .new as well,
+ // but we don't seem to have these instructions defined.
+ return MI->getOpcode() != Hexagon::A4_tlbmatch;
+}
+
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
+ return (Opcode == Hexagon::J2_jumpt) ||
+ (Opcode == Hexagon::J2_jumpf) ||
+ (Opcode == Hexagon::J2_jumptnew) ||
+ (Opcode == Hexagon::J2_jumpfnew) ||
+ (Opcode == Hexagon::J2_jumptnewpt) ||
+ (Opcode == Hexagon::J2_jumpfnewpt);
+}
+
+
+bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
+ if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+ return false;
+ return !isPredicatedTrue(Cond[0].getImm());
+}
+
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
+}
+
+
+// Returns the base register in a memory access (load/store). The offset is
+// returned in Offset and the access size is returned in AccessSize.
+unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
+ int &Offset, unsigned &AccessSize) const {
+ // Return if it is not a base+offset type instruction or a MemOp.
+ if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
+ getAddrMode(MI) != HexagonII::BaseLongOffset &&
+ !isMemOp(MI) && !isPostIncrement(MI))
+ return 0;
+
+ // Since it is a memory access instruction, getMemAccessSize() should never
+ // return 0.
+ assert (getMemAccessSize(MI) &&
+ "BaseImmOffset or BaseLongOffset or MemOp without accessSize");
+
+ // Return Values of getMemAccessSize() are
+ // 0 - Checked in the assert above.
+ // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these.
+ // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+ AccessSize = (1U << (getMemAccessSize(MI) - 1));
+
+ unsigned basePos = 0, offsetPos = 0;
+ if (!getBaseAndOffsetPosition(MI, basePos, offsetPos))
+ return 0;
+
+ // Post increment updates its EA after the mem access,
+ // so we need to treat its offset as zero.
+ if (isPostIncrement(MI))
+ Offset = 0;
+ else {
+ Offset = MI->getOperand(offsetPos).getImm();
+ }
+
+ return MI->getOperand(basePos).getReg();
+}
+
+
+/// Return the position of the base and offset operands for this instruction.
+bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
+ unsigned &BasePos, unsigned &OffsetPos) const {
+ // Deal with memops first.
+ if (isMemOp(MI)) {
+ assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Bad Memop.");
+ BasePos = 0;
+ OffsetPos = 1;
+ } else if (MI->mayStore()) {
+ BasePos = 0;
+ OffsetPos = 1;
+ } else if (MI->mayLoad()) {
+ BasePos = 1;
+ OffsetPos = 2;
+ } else
+ return false;
+
+ if (isPredicated(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+ if (isPostIncrement(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+
+ if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm())
+ return false;
+
+ return true;
+}
+
+
+// Inserts branching instructions in reverse order of their occurence.
+// e.g. jump_t t1 (i1)
+// jump t2 (i2)
+// Jumpers = {i2, i1}
+SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
+ MachineBasicBlock& MBB) const {
+ SmallVector<MachineInstr*, 2> Jumpers;
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
+ return Jumpers;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return Jumpers;
+ } while (I != MBB.instr_begin());
+
+ I = MBB.instr_end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.instr_begin())
+ return Jumpers;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(&*I))
+ return Jumpers;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ Jumpers.push_back(LastInst);
+ MachineInstr *SecondLastInst = nullptr;
+ // Find one more terminator if present.
+ do {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+ if (!SecondLastInst) {
+ SecondLastInst = &*I;
+ Jumpers.push_back(SecondLastInst);
+ } else // This is a third branch.
+ return Jumpers;
+ }
+ if (I == MBB.instr_begin())
+ break;
+ --I;
+ } while (true);
+ return Jumpers;
}
-bool HexagonInstrInfo::
-isConditionalTransfer (const MachineInstr *MI) const {
+
+// Returns Operand Index for the constant extended instruction.
+unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask;
+}
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
+ const MachineInstr *MI) const {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::A2_tfrt:
- case Hexagon::A2_tfrf:
- case Hexagon::C2_cmoveit:
- case Hexagon::C2_cmoveif:
- case Hexagon::A2_tfrtnew:
- case Hexagon::A2_tfrfnew:
- case Hexagon::C2_cmovenewit:
- case Hexagon::C2_cmovenewif:
- return true;
+ default:
+ return HexagonII::HCG_None;
+ //
+ // Compound pairs.
+ // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+ // "Rd16=#U6 ; jump #r9:2"
+ // "Rd16=Rs16 ; jump #r9:2"
+ //
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+ ((isUInt<5>(MI->getOperand(2).getImm())) ||
+ (MI->getOperand(2).getImm() == -1)))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ // Rd = #u6
+ // Do not test for #u6 size since the const is getting extended
+ // regardless and compound could be formed.
+ DstReg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(DstReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::S2_tstbit_i:
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ MI->getOperand(2).isImm() &&
+ isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0))
+ return HexagonII::HCG_A;
+ break;
+ // The fact that .new form is used pretty much guarantees
+ // that predicate register will match. Nevertheless,
+ // there could be some false positives without additional
+ // checking.
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnewpt:
+ Src1Reg = MI->getOperand(0).getReg();
+ if (Hexagon::PredRegsRegClass.contains(Src1Reg) &&
+ (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg))
+ return HexagonII::HCG_B;
+ break;
+ // Transfer and jump:
+ // Rd=#U6 ; jump #r9:2
+ // Rd=Rs ; jump #r9:2
+ // Do not test for jump range here.
+ case Hexagon::J2_jump:
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ return HexagonII::HCG_C;
+ break;
}
+
+ return HexagonII::HCG_None;
+}
+
+
+// Returns -1 when there is no opcode found.
+unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA,
+ const MachineInstr *GB) const {
+ assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A);
+ assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B);
+ if ((GA->getOpcode() != Hexagon::C2_cmpeqi) ||
+ (GB->getOpcode() != Hexagon::J2_jumptnew))
+ return -1;
+ unsigned DestReg = GA->getOperand(0).getReg();
+ if (!GB->readsRegister(DestReg))
+ return -1;
+ if (DestReg == Hexagon::P0)
+ return Hexagon::J4_cmpeqi_tp0_jump_nt;
+ if (DestReg == Hexagon::P1)
+ return Hexagon::J4_cmpeqi_tp1_jump_nt;
+ return -1;
}
-bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::A2_paddf:
- case Hexagon::A2_paddfnew:
- case Hexagon::A2_paddt:
- case Hexagon::A2_paddtnew:
- case Hexagon::A2_pandf:
- case Hexagon::A2_pandfnew:
- case Hexagon::A2_pandt:
- case Hexagon::A2_pandtnew:
- case Hexagon::A4_paslhf:
- case Hexagon::A4_paslhfnew:
- case Hexagon::A4_paslht:
- case Hexagon::A4_paslhtnew:
- case Hexagon::A4_pasrhf:
- case Hexagon::A4_pasrhfnew:
- case Hexagon::A4_pasrht:
- case Hexagon::A4_pasrhtnew:
- case Hexagon::A2_porf:
- case Hexagon::A2_porfnew:
- case Hexagon::A2_port:
- case Hexagon::A2_portnew:
- case Hexagon::A2_psubf:
- case Hexagon::A2_psubfnew:
- case Hexagon::A2_psubt:
- case Hexagon::A2_psubtnew:
- case Hexagon::A2_pxorf:
- case Hexagon::A2_pxorfnew:
- case Hexagon::A2_pxort:
- case Hexagon::A2_pxortnew:
- case Hexagon::A4_psxthf:
- case Hexagon::A4_psxthfnew:
- case Hexagon::A4_psxtht:
- case Hexagon::A4_psxthtnew:
- case Hexagon::A4_psxtbf:
- case Hexagon::A4_psxtbfnew:
- case Hexagon::A4_psxtbt:
- case Hexagon::A4_psxtbtnew:
- case Hexagon::A4_pzxtbf:
- case Hexagon::A4_pzxtbfnew:
- case Hexagon::A4_pzxtbt:
- case Hexagon::A4_pzxtbtnew:
- case Hexagon::A4_pzxthf:
- case Hexagon::A4_pzxthfnew:
- case Hexagon::A4_pzxtht:
- case Hexagon::A4_pzxthtnew:
- case Hexagon::A2_paddit:
- case Hexagon::A2_paddif:
- case Hexagon::C2_ccombinewt:
- case Hexagon::C2_ccombinewf:
- return true;
+
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
+ enum Hexagon::PredSense inPredSense;
+ inPredSense = invertPredicate ? Hexagon::PredSense_false :
+ Hexagon::PredSense_true;
+ int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
+ if (CondOpcode >= 0) // Valid Conditional opcode/instruction
+ return CondOpcode;
+
+ // This switch case will be removed once all the instructions have been
+ // modified to use relation maps.
+ switch(Opc) {
+ case Hexagon::TFRI_f:
+ return !invertPredicate ? Hexagon::TFRI_cPt_f :
+ Hexagon::TFRI_cNotPt_f;
}
+
+ llvm_unreachable("Unexpected predicable instruction");
}
-bool HexagonInstrInfo::
-isConditionalLoad (const MachineInstr* MI) const {
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::L2_ploadrdt_io :
- case Hexagon::L2_ploadrdf_io:
- case Hexagon::L2_ploadrit_io:
- case Hexagon::L2_ploadrif_io:
- case Hexagon::L2_ploadrht_io:
- case Hexagon::L2_ploadrhf_io:
- case Hexagon::L2_ploadrbt_io:
- case Hexagon::L2_ploadrbf_io:
- case Hexagon::L2_ploadruht_io:
- case Hexagon::L2_ploadruhf_io:
- case Hexagon::L2_ploadrubt_io:
- case Hexagon::L2_ploadrubf_io:
- case Hexagon::L2_ploadrdt_pi:
- case Hexagon::L2_ploadrdf_pi:
- case Hexagon::L2_ploadrit_pi:
- case Hexagon::L2_ploadrif_pi:
- case Hexagon::L2_ploadrht_pi:
- case Hexagon::L2_ploadrhf_pi:
- case Hexagon::L2_ploadrbt_pi:
- case Hexagon::L2_ploadrbf_pi:
- case Hexagon::L2_ploadruht_pi:
- case Hexagon::L2_ploadruhf_pi:
- case Hexagon::L2_ploadrubt_pi:
- case Hexagon::L2_ploadrubf_pi:
- case Hexagon::L4_ploadrdt_rr:
- case Hexagon::L4_ploadrdf_rr:
- case Hexagon::L4_ploadrbt_rr:
- case Hexagon::L4_ploadrbf_rr:
- case Hexagon::L4_ploadrubt_rr:
- case Hexagon::L4_ploadrubf_rr:
- case Hexagon::L4_ploadrht_rr:
- case Hexagon::L4_ploadrhf_rr:
- case Hexagon::L4_ploadruht_rr:
- case Hexagon::L4_ploadruhf_rr:
- case Hexagon::L4_ploadrit_rr:
- case Hexagon::L4_ploadrif_rr:
- return true;
+
+// Return the cur value instruction for a given store.
+int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown .cur type");
+ case Hexagon::V6_vL32b_pi:
+ return Hexagon::V6_vL32b_cur_pi;
+ case Hexagon::V6_vL32b_ai:
+ return Hexagon::V6_vL32b_cur_ai;
+ //128B
+ case Hexagon::V6_vL32b_pi_128B:
+ return Hexagon::V6_vL32b_cur_pi_128B;
+ case Hexagon::V6_vL32b_ai_128B:
+ return Hexagon::V6_vL32b_cur_ai_128B;
}
+ return 0;
}
-// Returns true if an instruction is a conditional store.
-//
-// Note: It doesn't include conditional new-value stores as they can't be
-// converted to .new predicate.
+
+
+// The diagram below shows the steps involved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
//
// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
// ^ ^
@@ -1524,8 +2998,6 @@ isConditionalLoad (const MachineInstr* MI) const {
// p.old store
// [if (p0)memw(R0+#0)=R2]
//
-// The above diagram shows the steps involoved in the conversion of a predicated
-// store instruction to its .new predicated new-value form.
//
// The following set of instructions further explains the scenario where
// conditional new-value store becomes invalid when promoted to .new predicate
@@ -1538,105 +3010,33 @@ isConditionalLoad (const MachineInstr* MI) const {
// the first two instructions because in instr 1, r0 is conditional on old value
// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
// is not valid for new-value stores.
-bool HexagonInstrInfo::
-isConditionalStore (const MachineInstr* MI) const {
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::S4_storeirbt_io:
- case Hexagon::S4_storeirbf_io:
- case Hexagon::S4_pstorerbt_rr:
- case Hexagon::S4_pstorerbf_rr:
- case Hexagon::S2_pstorerbt_io:
- case Hexagon::S2_pstorerbf_io:
- case Hexagon::S2_pstorerbt_pi:
- case Hexagon::S2_pstorerbf_pi:
- case Hexagon::S2_pstorerdt_io:
- case Hexagon::S2_pstorerdf_io:
- case Hexagon::S4_pstorerdt_rr:
- case Hexagon::S4_pstorerdf_rr:
- case Hexagon::S2_pstorerdt_pi:
- case Hexagon::S2_pstorerdf_pi:
- case Hexagon::S2_pstorerht_io:
- case Hexagon::S2_pstorerhf_io:
- case Hexagon::S4_storeirht_io:
- case Hexagon::S4_storeirhf_io:
- case Hexagon::S4_pstorerht_rr:
- case Hexagon::S4_pstorerhf_rr:
- case Hexagon::S2_pstorerht_pi:
- case Hexagon::S2_pstorerhf_pi:
- case Hexagon::S2_pstorerit_io:
- case Hexagon::S2_pstorerif_io:
- case Hexagon::S4_storeirit_io:
- case Hexagon::S4_storeirif_io:
- case Hexagon::S4_pstorerit_rr:
- case Hexagon::S4_pstorerif_rr:
- case Hexagon::S2_pstorerit_pi:
- case Hexagon::S2_pstorerif_pi:
-
- // V4 global address store before promoting to dot new.
- case Hexagon::S4_pstorerdt_abs:
- case Hexagon::S4_pstorerdf_abs:
- case Hexagon::S4_pstorerbt_abs:
- case Hexagon::S4_pstorerbf_abs:
- case Hexagon::S4_pstorerht_abs:
- case Hexagon::S4_pstorerhf_abs:
- case Hexagon::S4_pstorerit_abs:
- case Hexagon::S4_pstorerif_abs:
- return true;
-
- // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
- // from the "Conditional Store" list. Because a predicated new value store
- // would NOT be promoted to a double dot new store. See diagram below:
- // This function returns yes for those stores that are predicated but not
- // yet promoted to predicate dot new instructions.
- //
- // +---------------------+
- // /-----| if (p0) memw(..)=r0 |---------\~
- // || +---------------------+ ||
- // promote || /\ /\ || promote
- // || /||\ /||\ ||
- // \||/ demote || \||/
- // \/ || || \/
- // +-------------------------+ || +-------------------------+
- // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
- // +-------------------------+ || +-------------------------+
- // || || ||
- // || demote \||/
- // promote || \/ NOT possible
- // || || /\~
- // \||/ || /||\~
- // \/ || ||
- // +-----------------------------+
- // | if (p0.new) memw(..)=r0.new |
- // +-----------------------------+
- // Double Dot New Store
- //
- }
-}
-
-
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
- if (isNewValue(MI) && isBranch(MI))
- return true;
- return false;
-}
-
-bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
- return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
-}
-
-bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
- return (getAddrMode(MI) == HexagonII::PostInc);
-}
-
-// Returns true, if any one of the operands is a dot new
-// insn, whether it is predicated dot new or register dot new.
-bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
- return (isNewValueInst(MI) ||
- (isPredicated(MI) && isPredicatedNew(MI)));
-}
-
+// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+// from the "Conditional Store" list. Because a predicated new value store
+// would NOT be promoted to a double dot new store. See diagram below:
+// This function returns yes for those stores that are predicated but not
+// yet promoted to predicate dot new instructions.
+//
+// +---------------------+
+// /-----| if (p0) memw(..)=r0 |---------\~
+// || +---------------------+ ||
+// promote || /\ /\ || promote
+// || /||\ /||\ ||
+// \||/ demote || \||/
+// \/ || || \/
+// +-------------------------+ || +-------------------------+
+// | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
+// +-------------------------+ || +-------------------------+
+// || || ||
+// || demote \||/
+// promote || \/ NOT possible
+// || || /\~
+// \||/ || /||\~
+// \/ || ||
+// +-----------------------------+
+// | if (p0.new) memw(..)=r0.new |
+// +-----------------------------+
+// Double Dot New Store
+//
// Returns the most basic instruction for the .new predicated instructions and
// new-value stores.
// For example, all of the following instructions will be converted back to the
@@ -1645,24 +3045,23 @@ bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1
// 3) if (p0.new) memw(R0+#0) = R1 --->
//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+// if (p0.new) R2 = add(R3, R4)
+// R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
-int HexagonInstrInfo::GetDotOldOp(const int opc) const {
- int NewOp = opc;
- if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
- NewOp = Hexagon::getPredOldOpcode(NewOp);
- assert(NewOp >= 0 &&
- "Couldn't change predicate new instruction to its old form.");
- }
-
- if (isNewValueStore(NewOp)) { // Convert into non-new-value format
- NewOp = Hexagon::getNonNVStore(NewOp);
- assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
- }
- return NewOp;
-}
// Return the new value instruction for a given store.
-int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
+int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
if (NVOpcode >= 0) // Valid new-value store instruction.
return NVOpcode;
@@ -1672,12 +3071,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
- case Hexagon::S4_storerh_ur:
- return Hexagon::S4_storerhnew_ur;
-
- case Hexagon::S4_storeri_ur:
- return Hexagon::S4_storerinew_ur;
-
case Hexagon::S2_storerb_pci:
return Hexagon::S2_storerb_pci;
@@ -1692,203 +3085,496 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
case Hexagon::S2_storerf_pci:
return Hexagon::S2_storerf_pci;
+
+ case Hexagon::V6_vS32b_ai:
+ return Hexagon::V6_vS32b_new_ai;
+
+ case Hexagon::V6_vS32b_pi:
+ return Hexagon::V6_vS32b_new_pi;
+
+ // 128B
+ case Hexagon::V6_vS32b_ai_128B:
+ return Hexagon::V6_vS32b_new_ai_128B;
+
+ case Hexagon::V6_vS32b_pi_128B:
+ return Hexagon::V6_vS32b_new_pi_128B;
}
return 0;
}
-// Return .new predicate version for an instruction.
-int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI,
- const MachineBranchProbabilityInfo
- *MBPI) const {
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const {
+ // We assume that block can have at most two successors.
+ bool taken = false;
+ const MachineBasicBlock *Src = MI->getParent();
+ const MachineOperand *BrTarget = &MI->getOperand(1);
+ const MachineBasicBlock *Dst = BrTarget->getMBB();
+ const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+ if (Prediction >= BranchProbability(1,2))
+ taken = true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_jumpt:
+ return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+ case Hexagon::J2_jumpf:
+ return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
+
+ default:
+ llvm_unreachable("Unexpected jump instruction.");
+ }
+}
+
+
+// Return .new predicate version for an instruction.
+int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const {
int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
if (NewOpcode >= 0) // Valid predicate new instruction
return NewOpcode;
switch (MI->getOpcode()) {
- default: llvm_unreachable("Unknown .new type");
// Condtional Jumps
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
return getDotNewPredJumpOp(MI, MBPI);
- case Hexagon::J2_jumprt:
- return Hexagon::J2_jumptnewpt;
-
- case Hexagon::J2_jumprf:
- return Hexagon::J2_jumprfnewpt;
-
- case Hexagon::JMPrett:
- return Hexagon::J2_jumprtnewpt;
-
- case Hexagon::JMPretf:
- return Hexagon::J2_jumprfnewpt;
-
-
- // Conditional combine
- case Hexagon::C2_ccombinewt:
- return Hexagon::C2_ccombinewnewt;
- case Hexagon::C2_ccombinewf:
- return Hexagon::C2_ccombinewnewf;
+ default:
+ assert(0 && "Unknown .new type");
}
+ return 0;
}
-unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
+int HexagonInstrInfo::getDotOldOp(const int opc) const {
+ int NewOp = opc;
+ if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
+ NewOp = Hexagon::getPredOldOpcode(NewOp);
+ assert(NewOp >= 0 &&
+ "Couldn't change predicate new instruction to its old form.");
+ }
- return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
+ if (isNewValueStore(NewOp)) { // Convert into non-new-value format
+ NewOp = Hexagon::getNonNVStore(NewOp);
+ assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
+ }
+ return NewOp;
}
-/// immediateExtend - Changes the instruction in place to one using an immediate
-/// extender.
-void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
- assert((isExtendable(MI)||isConstExtended(MI)) &&
- "Instruction must be extendable");
- // Find which operand is extendable.
- short ExtOpNum = getCExtOpNum(MI);
- MachineOperand &MO = MI->getOperand(ExtOpNum);
- // This needs to be something we understand.
- assert((MO.isMBB() || MO.isImm()) &&
- "Branch with unknown extendable field type");
- // Mark given operand as extended.
- MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
-}
-DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
- const TargetSubtargetInfo &STI) const {
- const InstrItineraryData *II = STI.getInstrItineraryData();
- return static_cast<const HexagonSubtarget &>(STI).createDFAPacketizer(II);
-}
-
-bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const {
- // Debug info is never a scheduling boundary. It's necessary to be explicit
- // due to the special treatment of IT instructions below, otherwise a
- // dbg_value followed by an IT will result in the IT instruction being
- // considered a scheduling hazard, which is wrong. It should be the actual
- // instruction preceding the dbg_value instruction(s), just like it is
- // when debug info is not present.
- if (MI->isDebugValue())
- return false;
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
+ const MachineInstr *MI) const {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+ auto &HRI = getRegisterInfo();
- // Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isPosition() || MI->isInlineAsm())
- return true;
+ switch (MI->getOpcode()) {
+ default:
+ return HexagonII::HSIG_None;
+ //
+ // Group L1:
+ //
+ // Rd = memw(Rs+#u4:2)
+ // Rd = memub(Rs+#u4:0)
+ case Hexagon::L2_loadri_io:
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ // Special case this one from Group L2.
+ // Rd = memw(r29+#u5:2)
+ if (isIntRegForSubInst(DstReg)) {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<5,2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ // Rd = memw(Rs+#u4:2)
+ if (isIntRegForSubInst(SrcReg) &&
+ (MI->getOperand(2).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(2).getImm())))
+ return HexagonII::HSIG_L1;
+ }
+ break;
+ case Hexagon::L2_loadrub_io:
+ // Rd = memub(Rs+#u4:0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L1;
+ break;
+ //
+ // Group L2:
+ //
+ // Rd = memh/memuh(Rs+#u3:1)
+ // Rd = memb(Rs+#u3:0)
+ // Rd = memw(r29+#u5:2) - Handled above.
+ // Rdd = memd(r29+#u5:3)
+ // deallocframe
+ // [if ([!]p0[.new])] dealloc_return
+ // [if ([!]p0[.new])] jumpr r31
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ // Rd = memh/memuh(Rs+#u3:1)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<3,1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L2_loadrb_io:
+ // Rd = memb(Rs+#u3:0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ isUInt<3>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L2_loadrd_io:
+ // Rdd = memd(r29+#u5:3)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<5,3>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ // dealloc_return is not documented in Hexagon Manual, but marked
+ // with A_SUBINSN attribute in iset_v4classic.py.
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ case Hexagon::L4_return:
+ case Hexagon::L2_deallocframe:
+ return HexagonII::HSIG_L2;
+ case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::JMPret :
+ // jumpr r31
+ // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+ DstReg = MI->getOperand(0).getReg();
+ if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::JMPrett:
+ case Hexagon::JMPretf:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPretfnewpt :
+ case Hexagon::JMPrettnew :
+ case Hexagon::JMPretfnew :
+ DstReg = MI->getOperand(1).getReg();
+ SrcReg = MI->getOperand(0).getReg();
+ // [if ([!]p0[.new])] jumpr r31
+ if ((Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ (Hexagon::P0 == SrcReg)) &&
+ (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ case Hexagon::L4_return_fnew_pt :
+ // [if ([!]p0[.new])] dealloc_return
+ SrcReg = MI->getOperand(0).getReg();
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg))
+ return HexagonII::HSIG_L2;
+ break;
+ //
+ // Group S1:
+ //
+ // memw(Rs+#u4:2) = Rt
+ // memb(Rs+#u4:0) = Rt
+ case Hexagon::S2_storeri_io:
+ // Special case this one from Group S2.
+ // memw(r29+#u5:2) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+ isIntRegForSubInst(Src2Reg) &&
+ HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+ isShiftedUInt<5,2>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S2;
+ // memw(Rs+#u4:2) = Rt
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ case Hexagon::S2_storerb_io:
+ // memb(Rs+#u4:0) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ //
+ // Group S2:
+ //
+ // memh(Rs+#u3:1) = Rt
+ // memw(r29+#u5:2) = Rt
+ // memd(r29+#s6:3) = Rtt
+ // memw(Rs+#u4:2) = #U1
+ // memb(Rs+#u4) = #U1
+ // allocframe(#u5:3)
+ case Hexagon::S2_storerh_io:
+ // memh(Rs+#u3:1) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() &&
+ isShiftedUInt<3,1>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ case Hexagon::S2_storerd_io:
+ // memd(r29+#s6:3) = Rtt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isDblRegForSubInst(Src2Reg, HRI) &&
+ Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+ HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+ isShiftedInt<6,3>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S4_storeiri_io:
+ // memw(Rs+#u4:2) = #U1
+ Src1Reg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(1).getImm()) &&
+ MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S4_storeirb_io:
+ // memb(Rs+#u4) = #U1
+ Src1Reg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+ isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S2_allocframe:
+ if (MI->getOperand(0).isImm() &&
+ isShiftedUInt<5,3>(MI->getOperand(0).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ //
+ // Group A:
+ //
+ // Rx = add(Rx,#s7)
+ // Rd = Rs
+ // Rd = #u6
+ // Rd = #-1
+ // if ([!]P0[.new]) Rd = #0
+ // Rd = add(r29,#u6:2)
+ // Rx = add(Rx,Rs)
+ // P0 = cmp.eq(Rs,#u2)
+ // Rdd = combine(#0,Rs)
+ // Rdd = combine(Rs,#0)
+ // Rdd = combine(#u2,#U2)
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ // Rd = and(Rs,#1)
+ case Hexagon::A2_addi:
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg)) {
+ // Rd = add(r29,#u6:2)
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() &&
+ isShiftedUInt<6,2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ // Rx = add(Rx,#s7)
+ if ((DstReg == SrcReg) && MI->getOperand(2).isImm() &&
+ isInt<7>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+ ((MI->getOperand(2).getImm() == 1) ||
+ (MI->getOperand(2).getImm() == -1)))
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_add:
+ // Rx = add(Rx,Rs)
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+ isIntRegForSubInst(Src2Reg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_andir:
+ // Same as zxtb.
+ // Rd16=and(Rs16,#255)
+ // Rd16=and(Rs16,#1)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ ((MI->getOperand(2).getImm() == 1) ||
+ (MI->getOperand(2).getImm() == 255)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ // Rd = #u6
+ // Do not test for #u6 size since the const is getting extended
+ // regardless and compound could be formed.
+ // Rd = #-1
+ DstReg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(DstReg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::C2_cmovenewif:
+ // if ([!]P0[.new]) Rd = #0
+ // Actual form:
+ // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) &&
+ Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg &&
+ MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ // Rdd = combine(#u2,#U2)
+ DstReg = MI->getOperand(0).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) &&
+ ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) ||
+ (MI->getOperand(1).isGlobal() &&
+ isUInt<2>(MI->getOperand(1).getOffset()))) &&
+ ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) ||
+ (MI->getOperand(2).isGlobal() &&
+ isUInt<2>(MI->getOperand(2).getOffset()))))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A4_combineri:
+ // Rdd = combine(Rs,#0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+ ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) ||
+ (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A4_combineir:
+ // Rdd = combine(#0,Rs)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(2).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+ ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) ||
+ (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HSIG_A;
+ break;
+ }
- return false;
+ return HexagonII::HSIG_None;
}
-bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
- if (isExtended) // Instruction must be extended.
- return true;
- unsigned isExtendable =
- (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
- if (!isExtendable)
- return false;
-
- short ExtOpNum = getCExtOpNum(MI);
- const MachineOperand &MO = MI->getOperand(ExtOpNum);
- // Use MO operand flags to determine if MO
- // has the HMOTF_ConstExtended flag set.
- if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
- return true;
- // If this is a Machine BB address we are talking about, and it is
- // not marked as extended, say so.
- if (MO.isMBB())
- return false;
-
- // We could be using an instruction with an extendable immediate and shoehorn
- // a global address into it. If it is a global address it will be constant
- // extended. We do this for COMBINE.
- // We currently only handle isGlobal() because it is the only kind of
- // object we are going to end up with here for now.
- // In the future we probably should add isSymbol(), etc.
- if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
- MO.isJTI() || MO.isCPI())
- return true;
-
- // If the extendable operand is not 'Immediate' type, the instruction should
- // have 'isExtended' flag set.
- assert(MO.isImm() && "Extendable operand must be Immediate type");
+short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real);
+}
- int MinValue = getMinValue(MI);
- int MaxValue = getMaxValue(MI);
- int ImmValue = MO.getImm();
- return (ImmValue < MinValue || ImmValue > MaxValue);
+// Return first non-debug instruction in the basic block.
+MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
+ const {
+ for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
+ MachineInstr *MI = &*MII;
+ if (MI->isDebugValue())
+ continue;
+ return MI;
+ }
+ return nullptr;
}
-// Return the number of bytes required to encode the instruction.
-// Hexagon instructions are fixed length, 4 bytes, unless they
-// use a constant extender, which requires another 4 bytes.
-// For debug instructions and prolog labels, return 0.
-unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
- if (MI->isDebugValue() || MI->isPosition())
- return 0;
+unsigned HexagonInstrInfo::getInstrTimingClassLatency(
+ const InstrItineraryData *ItinData, const MachineInstr *MI) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return getInstrLatency(ItinData, MI);
- unsigned Size = MI->getDesc().getSize();
- if (!Size)
- // Assume the default insn size in case it cannot be determined
- // for whatever reason.
- Size = HEXAGON_INSTR_SIZE;
-
- if (isConstExtended(MI) || isExtended(MI))
- Size += HEXAGON_INSTR_SIZE;
-
- return Size;
+ // Get the latency embedded in the itinerary. If we're not using timing class
+ // latencies or if we using BSB scheduling, then restrict the maximum latency
+ // to 1 (that is, either 0 or 1).
+ if (MI->isTransient())
+ return 0;
+ unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass());
+ if (!EnableTimingClassLatency ||
+ MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>().
+ useBSBScheduling())
+ if (Latency > 1)
+ Latency = 1;
+ return Latency;
}
-// Returns the opcode to use when converting MI, which is a conditional jump,
-// into a conditional instruction which uses the .new value of the predicate.
-// We also use branch probabilities to add a hint to the jump.
-int
-HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
- const
- MachineBranchProbabilityInfo *MBPI) const {
-
- // We assume that block can have at most two successors.
- bool taken = false;
- MachineBasicBlock *Src = MI->getParent();
- MachineOperand *BrTarget = &MI->getOperand(1);
- MachineBasicBlock *Dst = BrTarget->getMBB();
- const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
- if (Prediction >= BranchProbability(1,2))
- taken = true;
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::getInvertedPredSense(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return false;
+ unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm());
+ Cond[0].setImm(Opc);
+ return true;
+}
- switch (MI->getOpcode()) {
- case Hexagon::J2_jumpt:
- return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
- case Hexagon::J2_jumpf:
- return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
- default:
- llvm_unreachable("Unexpected jump instruction.");
- }
-}
-// Returns true if a particular operand is extendable for an instruction.
-bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
- unsigned short OperandNum) const {
- const uint64_t F = MI->getDesc().TSFlags;
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+ int InvPredOpcode;
+ InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+ : Hexagon::getTruePredOpcode(Opc);
+ if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+ return InvPredOpcode;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
- == OperandNum;
+ llvm_unreachable("Unexpected predicated instruction");
}
-// Returns Operand Index for the constant extended instruction.
-unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
-}
-// Returns the min value that doesn't need to be extended.
-int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
& HexagonII::ExtentSignedMask;
@@ -1896,13 +3582,20 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return -1U << (bits - 1);
+ return ~(-1U << (bits - 1));
else
- return 0;
+ return ~(-1U << bits);
}
-// Returns the max value that doesn't need to be extended.
-int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+
+unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
+}
+
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
& HexagonII::ExtentSignedMask;
@@ -1910,49 +3603,14 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return ~(-1U << (bits - 1));
+ return -1U << (bits - 1);
else
- return ~(-1U << bits);
+ return 0;
}
-// Returns true if an instruction can be converted into a non-extended
-// equivalent instruction.
-bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
-
- short NonExtOpcode;
- // Check if the instruction has a register form that uses register in place
- // of the extended operand, if so return that as the non-extended form.
- if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
- return true;
-
- if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
- // Check addressing mode and retrieve non-ext equivalent instruction.
-
- switch (getAddrMode(MI)) {
- case HexagonII::Absolute :
- // Load/store with absolute addressing mode can be converted into
- // base+offset mode.
- NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
- break;
- case HexagonII::BaseImmOffset :
- // Load/store with base+offset addressing mode can be converted into
- // base+register offset addressing mode. However left shift operand should
- // be set to 0.
- NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
- break;
- default:
- return false;
- }
- if (NonExtOpcode < 0)
- return false;
- return true;
- }
- return false;
-}
// Returns opcode of the non-extended equivalent instruction.
-short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
-
+short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const {
// Check if the instruction has a register form that uses register in place
// of the extended operand, if so return that as the non-extended form.
short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
@@ -1963,9 +3621,12 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
// Check addressing mode and retrieve non-ext equivalent instruction.
switch (getAddrMode(MI)) {
case HexagonII::Absolute :
- return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ return Hexagon::getBaseWithImmOffset(MI->getOpcode());
case HexagonII::BaseImmOffset :
return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ case HexagonII::BaseLongOffset:
+ return Hexagon::getRegShlForm(MI->getOpcode());
+
default:
return -1;
}
@@ -1973,29 +3634,9 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
return -1;
}
-bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
- return (Opcode == Hexagon::J2_jumpt) ||
- (Opcode == Hexagon::J2_jumpf) ||
- (Opcode == Hexagon::J2_jumptnewpt) ||
- (Opcode == Hexagon::J2_jumpfnewpt) ||
- (Opcode == Hexagon::J2_jumpt) ||
- (Opcode == Hexagon::J2_jumpf);
-}
-
-bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
- if (Cond.empty() || !isPredicated(Cond[0].getImm()))
- return false;
- return !isPredicatedTrue(Cond[0].getImm());
-}
-
-bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
- return (Opcode == Hexagon::ENDLOOP0 ||
- Opcode == Hexagon::ENDLOOP1);
-}
bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
- unsigned &PredReg, unsigned &PredRegPos,
- unsigned &PredRegFlags) const {
+ unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
if (Cond.empty())
return false;
assert(Cond.size() == 2);
@@ -2014,3 +3655,174 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
return true;
}
+
+short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo);
+}
+
+
+short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
+ return Hexagon::getRegForm(MI->getOpcode());
+}
+
+
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+ if (MI->isDebugValue() || MI->isPosition())
+ return 0;
+
+ unsigned Size = MI->getDesc().getSize();
+ if (!Size)
+ // Assume the default insn size in case it cannot be determined
+ // for whatever reason.
+ Size = HEXAGON_INSTR_SIZE;
+
+ if (isConstExtended(MI) || isExtended(MI))
+ Size += HEXAGON_INSTR_SIZE;
+
+ // Try and compute number of instructions in asm.
+ if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+ // Disassemble the AsmStr and approximate number of instructions.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ Size = getInlineAsmLength(AsmStr, *MAI);
+ }
+
+ return Size;
+}
+
+
+uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
+}
+
+
+unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const {
+ const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget();
+ const InstrItineraryData &II = *ST.getInstrItineraryData();
+ const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass());
+
+ return IS.getUnits();
+}
+
+
+unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
+}
+
+
+// Calculate size of the basic block without debug instructions.
+unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
+ return nonDbgMICount(BB->instr_begin(), BB->instr_end());
+}
+
+
+unsigned HexagonInstrInfo::nonDbgBundleSize(
+ MachineBasicBlock::const_iterator BundleHead) const {
+ assert(BundleHead->isBundle() && "Not a bundle header");
+ auto MII = BundleHead.getInstrIterator();
+ // Skip the bundle header.
+ return nonDbgMICount(++MII, getBundleEnd(BundleHead));
+}
+
+
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+ assert((isExtendable(MI)||isConstExtended(MI)) &&
+ "Instruction must be extendable");
+ // Find which operand is extendable.
+ short ExtOpNum = getCExtOpNum(MI);
+ MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // This needs to be something we understand.
+ assert((MO.isMBB() || MO.isImm()) &&
+ "Branch with unknown extendable field type");
+ // Mark given operand as extended.
+ MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
+
+
+bool HexagonInstrInfo::invertAndChangeJumpTarget(
+ MachineInstr* MI, MachineBasicBlock* NewTarget) const {
+ DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
+ << NewTarget->getNumber(); MI->dump(););
+ assert(MI->isBranch());
+ unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode());
+ int TargetPos = MI->getNumOperands() - 1;
+ // In general branch target is the last operand,
+ // but some implicit defs added at the end might change it.
+ while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB())
+ --TargetPos;
+ assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB());
+ MI->getOperand(TargetPos).setMBB(NewTarget);
+ if (EnableBranchPrediction && isPredicatedNew(MI)) {
+ NewOpcode = reversePrediction(NewOpcode);
+ }
+ MI->setDesc(get(NewOpcode));
+ return true;
+}
+
+
+void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
+ /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
+ MachineFunction::iterator A = MF.begin();
+ MachineBasicBlock &B = *A;
+ MachineBasicBlock::iterator I = B.begin();
+ MachineInstr *MI = &*I;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewMI;
+
+ for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
+ insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
+ NewMI = BuildMI(B, MI, DL, get(insn));
+ DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
+ " Class: " << NewMI->getDesc().getSchedClass());
+ NewMI->eraseFromParent();
+ }
+ /* --- The code above is used to generate complete set of Hexagon Insn --- */
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const {
+ DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump());
+ MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode())));
+ return true;
+}
+
+
+// Reverse the branch prediction.
+unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
+ int PredRevOpcode = -1;
+ if (isPredictedTaken(Opcode))
+ PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode);
+ else
+ PredRevOpcode = Hexagon::takenBranchPrediction(Opcode);
+ assert(PredRevOpcode > 0);
+ return PredRevOpcode;
+}
+
+
+// TODO: Add more rigorous validation.
+bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
+ const {
+ return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index d0b8a46..9530d9f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,4 +1,3 @@
-
//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
@@ -28,23 +27,18 @@ namespace llvm {
struct EVT;
class HexagonSubtarget;
+
class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
- const HexagonSubtarget &Subtarget;
public:
- typedef unsigned Opcode_t;
-
explicit HexagonInstrInfo(HexagonSubtarget &ST);
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
+ /// TargetInstrInfo overrides.
///
- const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
- /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
@@ -52,7 +46,7 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
- /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
@@ -60,50 +54,118 @@ public:
unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
-
+ /// Analyze the branching code at the end of MBB, returning
+ /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+ /// implemented for a target). Upon success, this returns false and returns
+ /// with the following information in various cases:
+ ///
+ /// 1. If this block ends with no branches (it just falls through to its succ)
+ /// just return false, leaving TBB/FBB null.
+ /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+ /// the destination block.
+ /// 3. If this block ends with a conditional branch and it falls through to a
+ /// successor block, it sets TBB to be the branch destination block and a
+ /// list of operands that evaluate the condition. These operands can be
+ /// passed to other TargetInstrInfo methods to create new branches.
+ /// 4. If this block ends with a conditional branch followed by an
+ /// unconditional branch, it returns the 'true' destination in TBB, the
+ /// 'false' destination in FBB, and a list of operands that evaluate the
+ /// condition. These operands can be passed to other TargetInstrInfo
+ /// methods to create new branches.
+ ///
+ /// Note that RemoveBranch and InsertBranch must be implemented to support
+ /// cases where this method returns success.
+ ///
+ /// If AllowModify is true, then this routine is allowed to modify the basic
+ /// block (e.g. delete instructions after the unconditional branch).
+ ///
bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
+ /// Remove the branching code at the end of the specific MBB.
+ /// This is only invoked in cases where AnalyzeBranch returns success. It
+ /// returns the number of instructions that were removed.
unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ /// Insert branch code into the end of the specified MachineBasicBlock.
+ /// The operands to this method are the same as those
+ /// returned by AnalyzeBranch. This is only invoked in cases where
+ /// AnalyzeBranch returns success. It returns the number of instructions
+ /// inserted.
+ ///
+ /// It is also invoked by tail merging to add unconditional branches in
+ /// cases where AnalyzeBranch doesn't apply because there was no original
+ /// branch to analyze. At least this much must be implemented, else tail
+ /// merging needs to be disabled.
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const override;
- bool analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const override;
+ /// Return true if it's profitable to predicate
+ /// instructions with accumulated instruction latency of "NumCycles"
+ /// of the specified basic block, where the probability of the instructions
+ /// being executed is given by Probability, and Confidence is a measure
+ /// of our confidence that it will be properly predicted.
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ BranchProbability Probability) const override;
+
+ /// Second variant of isProfitableToIfCvt. This one
+ /// checks for the case where two basic blocks from true and false path
+ /// of a if-then-else (diamond) are predicated on mutally exclusive
+ /// predicates, where the probability of the true path being taken is given
+ /// by Probability, and Confidence is a measure of our confidence that it
+ /// will be properly predicted.
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ BranchProbability Probability) const override;
+
+ /// Return true if it's profitable for if-converter to duplicate instructions
+ /// of specified accumulated instruction latencies in the specified MBB to
+ /// enable if-conversion.
+ /// The probability of the instructions being executed is given by
+ /// Probability, and Confidence is a measure of our confidence that it
+ /// will be properly predicted.
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override;
+ /// Emit instructions to copy a pair of physical registers.
+ ///
+ /// This function should support copies within any legal register class as
+ /// well as any cross-class copies created during instruction selection.
+ ///
+ /// The source and destination registers may overlap, which may require a
+ /// careful implementation when multiple copy instructions are required for
+ /// large registers. See for example the ARM target.
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
+ /// Store the specified register of the given register class to the specified
+ /// stack frame index. The store instruction is to be added to the given
+ /// machine basic block before the specified machine instruction. If isKill
+ /// is true, the register operand is the last use and must be marked kill.
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
+ /// Load the specified register of the given register class from the specified
+ /// stack frame index. The load instruction is to be added to the given
+ /// machine basic block before the specified machine instruction.
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- /// expandPostRAPseudo - This function is called for all pseudo instructions
+ /// This function is called for all pseudo instructions
/// that remain after register allocation. Many pseudo instructions are
/// created to help register allocation. This is the place to convert them
/// into real instructions. The target can edit MI in place, or it can insert
@@ -111,122 +173,228 @@ public:
/// anything was changed.
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- int FrameIndex) const override;
+ /// Reverses the branch condition of the specified condition list,
+ /// returning false on success and true if it cannot be reversed.
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- MachineInstr *LoadMI) const override {
- return nullptr;
- }
+ /// Insert a noop into the instruction stream at the specified point.
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
- unsigned createVR(MachineFunction* MF, MVT VT) const;
+ /// Returns true if the instruction is already predicated.
+ bool isPredicated(const MachineInstr *MI) const override;
- bool isBranch(const MachineInstr *MI) const;
- bool isPredicable(MachineInstr *MI) const override;
+ /// Convert the instruction into a predicated instruction.
+ /// It returns true if the operation was successful.
bool PredicateInstruction(MachineInstr *MI,
ArrayRef<MachineOperand> Cond) const override;
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
-
- bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles, unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const override;
+ /// Returns true if the first specified predicate
+ /// subsumes the second, e.g. GE subsumes GT.
+ bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const override;
- bool isPredicated(const MachineInstr *MI) const override;
- bool isPredicated(unsigned Opcode) const;
- bool isPredicatedTrue(const MachineInstr *MI) const;
- bool isPredicatedTrue(unsigned Opcode) const;
- bool isPredicatedNew(const MachineInstr *MI) const;
- bool isPredicatedNew(unsigned Opcode) const;
+ /// If the specified instruction defines any predicate
+ /// or condition code register(s) used for predication, returns true as well
+ /// as the definition predicate(s) by reference.
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const override;
- bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
- ArrayRef<MachineOperand> Pred2) const override;
- bool
- ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ /// Return true if the specified instruction can be predicated.
+ /// By default, this returns true for every instruction with a
+ /// PredicateOperand.
+ bool isPredicable(MachineInstr *MI) const override;
- bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- const BranchProbability &Probability) const override;
+ /// Test if the given instruction should be considered a scheduling boundary.
+ /// This primarily includes labels and terminators.
+ bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+ /// Measure the specified inline asm to determine an approximation of its
+ /// length.
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const override;
+
+ /// Allocate and return a hazard recognizer to use for this target when
+ /// scheduling the machine instructions after register allocation.
+ ScheduleHazardRecognizer*
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const override;
+
+ /// For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
+
+ /// Compute the instruction latency of a given instruction.
+ /// If the instruction has higher cost when predicated, it's returned via
+ /// PredCost.
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const override;
+
+ /// Create machine specific model for scheduling.
DFAPacketizer *
CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
- bool isSchedulingBoundary(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const override;
- bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
- bool isValidAutoIncImm(const EVT VT, const int Offset) const;
- bool isMemOp(const MachineInstr *MI) const;
- bool isSpillPredRegOp(const MachineInstr *MI) const;
- bool isU6_3Immediate(const int value) const;
- bool isU6_2Immediate(const int value) const;
- bool isU6_1Immediate(const int value) const;
- bool isU6_0Immediate(const int value) const;
- bool isS4_3Immediate(const int value) const;
- bool isS4_2Immediate(const int value) const;
- bool isS4_1Immediate(const int value) const;
- bool isS4_0Immediate(const int value) const;
- bool isS12_Immediate(const int value) const;
- bool isU6_Immediate(const int value) const;
- bool isS8_Immediate(const int value) const;
- bool isS6_Immediate(const int value) const;
-
- bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
- bool isConditionalTransfer(const MachineInstr* MI) const;
+ // Sometimes, it is possible for the target
+ // to tell, even without aliasing information, that two MIs access different
+ // memory addresses. This function returns true if two MIs access different
+ // memory addresses and false otherwise.
+ bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr)
+ const override;
+
+
+ /// HexagonInstrInfo specifics.
+ ///
+
+ const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ bool isAbsoluteSet(const MachineInstr* MI) const;
+ bool isAccumulator(const MachineInstr *MI) const;
+ bool isComplex(const MachineInstr *MI) const;
+ bool isCompoundBranchInstr(const MachineInstr *MI) const;
+ bool isCondInst(const MachineInstr *MI) const;
bool isConditionalALU32 (const MachineInstr* MI) const;
- bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isConditionalLoad(const MachineInstr* MI) const;
bool isConditionalStore(const MachineInstr* MI) const;
- bool isNewValueInst(const MachineInstr* MI) const;
- bool isNewValue(const MachineInstr* MI) const;
- bool isNewValue(Opcode_t Opcode) const;
- bool isDotNewInst(const MachineInstr* MI) const;
- int GetDotOldOp(const int opc) const;
- int GetDotNewOp(const MachineInstr* MI) const;
- int GetDotNewPredOp(MachineInstr *MI,
- const MachineBranchProbabilityInfo
- *MBPI) const;
- bool mayBeNewStore(const MachineInstr* MI) const;
+ bool isConditionalTransfer(const MachineInstr* MI) const;
+ bool isConstExtended(const MachineInstr *MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
- unsigned getInvertedPredicatedOpcode(const int Opc) const;
+ bool isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+ bool isDotCurInst(const MachineInstr* MI) const;
+ bool isDotNewInst(const MachineInstr* MI) const;
+ bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const;
+ bool isEarlySourceInstr(const MachineInstr *MI) const;
+ bool isEndLoopN(unsigned Opcode) const;
+ bool isExpr(unsigned OpType) const;
bool isExtendable(const MachineInstr* MI) const;
bool isExtended(const MachineInstr* MI) const;
- bool isPostIncrement(const MachineInstr* MI) const;
+ bool isFloat(const MachineInstr *MI) const;
+ bool isHVXMemWithAIndirect(const MachineInstr *I,
+ const MachineInstr *J) const;
+ bool isIndirectCall(const MachineInstr *MI) const;
+ bool isIndirectL4Return(const MachineInstr *MI) const;
+ bool isJumpR(const MachineInstr *MI) const;
+ bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const;
+ bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+ const MachineInstr *ESMI) const;
+ bool isLateResultInstr(const MachineInstr *MI) const;
+ bool isLateSourceInstr(const MachineInstr *MI) const;
+ bool isLoopN(const MachineInstr *MI) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isNewValue(const MachineInstr* MI) const;
+ bool isNewValue(unsigned Opcode) const;
+ bool isNewValueInst(const MachineInstr* MI) const;
+ bool isNewValueJump(const MachineInstr* MI) const;
+ bool isNewValueJump(unsigned Opcode) const;
bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueStore(unsigned Opcode) const;
- bool isNewValueJump(const MachineInstr* MI) const;
- bool isNewValueJump(Opcode_t Opcode) const;
- bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+ bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const;
+ bool isPostIncrement(const MachineInstr* MI) const;
+ bool isPredicatedNew(const MachineInstr *MI) const;
+ bool isPredicatedNew(unsigned Opcode) const;
+ bool isPredicatedTrue(const MachineInstr *MI) const;
+ bool isPredicatedTrue(unsigned Opcode) const;
+ bool isPredicated(unsigned Opcode) const;
+ bool isPredicateLate(unsigned Opcode) const;
+ bool isPredictedTaken(unsigned Opcode) const;
+ bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const;
+ bool isSolo(const MachineInstr* MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isTC1(const MachineInstr *MI) const;
+ bool isTC2(const MachineInstr *MI) const;
+ bool isTC2Early(const MachineInstr *MI) const;
+ bool isTC4x(const MachineInstr *MI) const;
+ bool isV60VectorInstruction(const MachineInstr *MI) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
+ bool isVecAcc(const MachineInstr *MI) const;
+ bool isVecALU(const MachineInstr *MI) const;
+ bool isVecUsableNextPacket(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+
+
+ bool canExecuteInBundle(const MachineInstr *First,
+ const MachineInstr *Second) const;
+ bool hasEHLabel(const MachineBasicBlock *B) const;
+ bool hasNonExtEquivalent(const MachineInstr *MI) const;
+ bool hasPseudoInstrPair(const MachineInstr *MI) const;
+ bool hasUncondBranch(const MachineBasicBlock *B) const;
+ bool mayBeCurLoad(const MachineInstr* MI) const;
+ bool mayBeNewStore(const MachineInstr* MI) const;
+ bool producesStall(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+ bool producesStall(const MachineInstr *MI,
+ MachineBasicBlock::const_instr_iterator MII) const;
+ bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const;
+ bool PredOpcodeHasJMP_c(unsigned Opcode) const;
+ bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
- void immediateExtend(MachineInstr *MI) const;
- bool isConstExtended(const MachineInstr *MI) const;
- unsigned getSize(const MachineInstr *MI) const;
- int getDotNewPredJumpOp(MachineInstr *MI,
- const MachineBranchProbabilityInfo *MBPI) const;
unsigned getAddrMode(const MachineInstr* MI) const;
- bool isOperandExtended(const MachineInstr *MI,
- unsigned short OperandNum) const;
- unsigned short getCExtOpNum(const MachineInstr *MI) const;
- int getMinValue(const MachineInstr *MI) const;
+ unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset,
+ unsigned &AccessSize) const;
+ bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos,
+ unsigned &OffsetPos) const;
+ SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
+ unsigned getCExtOpNum(const MachineInstr *MI) const;
+ HexagonII::CompoundGroup
+ getCompoundCandidateGroup(const MachineInstr *MI) const;
+ unsigned getCompoundOpcode(const MachineInstr *GA,
+ const MachineInstr *GB) const;
+ int getCondOpcode(int Opc, bool sense) const;
+ int getDotCurOp(const MachineInstr* MI) const;
+ int getDotNewOp(const MachineInstr* MI) const;
+ int getDotNewPredJumpOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
+ int getDotNewPredOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
+ int getDotOldOp(const int opc) const;
+ HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI)
+ const;
+ short getEquivalentHWInstr(const MachineInstr *MI) const;
+ MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const;
+ unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+ bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const;
+ unsigned getInvertedPredicatedOpcode(const int Opc) const;
int getMaxValue(const MachineInstr *MI) const;
- bool NonExtEquivalentExists (const MachineInstr *MI) const;
+ unsigned getMemAccessSize(const MachineInstr* MI) const;
+ int getMinValue(const MachineInstr *MI) const;
short getNonExtOpcode(const MachineInstr *MI) const;
- bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
- bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
- bool isEndLoopN(Opcode_t Opcode) const;
bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
unsigned &PredRegPos, unsigned &PredRegFlags) const;
- int getCondOpcode(int Opc, bool sense) const;
+ short getPseudoInstrPair(const MachineInstr *MI) const;
+ short getRegForm(const MachineInstr *MI) const;
+ unsigned getSize(const MachineInstr *MI) const;
+ uint64_t getType(const MachineInstr* MI) const;
+ unsigned getUnits(const MachineInstr* MI) const;
+ unsigned getValidSubTargets(const unsigned Opcode) const;
+
+ /// getInstrTimingClassLatency - Compute the instruction latency of a given
+ /// instruction using Timing Class information, if available.
+ unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
+ unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
+
+
+ void immediateExtend(MachineInstr *MI) const;
+ bool invertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) const;
+ void genAllInsnTimingClasses(MachineFunction &MF) const;
+ bool reversePredSense(MachineInstr* MI) const;
+ unsigned reversePrediction(unsigned Opcode) const;
+ bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const;
};
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 3b32c10..5cfeba7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -13,7 +13,7 @@
include "HexagonInstrFormats.td"
include "HexagonOperands.td"
-
+include "HexagonInstrEnc.td"
// Pattern fragment that combines the value type and the register class
// into a single parameter.
// The pat frags in the definitions below need to have a named register,
@@ -1426,9 +1426,6 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-
class CondStr<string CReg, bit True, bit New> {
string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
}
@@ -1606,8 +1603,6 @@ def EH_RETURN_JMPR : T_JMPr;
def: Pat<(eh_return),
(EH_RETURN_JMPR (i32 R31))>;
-def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
- (J2_jumpr IntRegs:$dst)>;
def: Pat<(brind (i32 IntRegs:$dst)),
(J2_jumpr IntRegs:$dst)>;
@@ -2825,7 +2820,7 @@ let CextOpcode = "ADD_acc" in {
let isExtentSigned = 1 in
def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
[(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
+ (add (add (i32 IntRegs:$src2), s32ImmPred:$src3),
(i32 IntRegs:$src1)))]>, ImmRegRel;
def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
@@ -2859,7 +2854,7 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>;
def : T_MType_acc_pat2 <M2_nacci, add, sub>;
//===----------------------------------------------------------------------===//
@@ -3303,7 +3298,8 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
!if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
/* s4_0Imm */ offset{3-0})));
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
let IClass = 0b1010;
@@ -3322,7 +3318,7 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
//===----------------------------------------------------------------------===//
let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew >
+ bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
: STInst <(outs IntRegs:$_dst_),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
!if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -3341,7 +3337,8 @@ class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
/* s4_0Imm */ offset{3-0})));
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
let isPredicatedNew = isPredNew;
let isPredicatedFalse = isPredNot;
@@ -3404,7 +3401,6 @@ def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
//===----------------------------------------------------------------------===//
// Template class for post increment stores with register offset.
//===----------------------------------------------------------------------===//
-let isNVStorable = 1 in
class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
MemAccessSize AccessSz, bit isHalf = 0>
: STInst <(outs IntRegs:$_dst_),
@@ -3416,6 +3412,9 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
bits<5> src3;
let accessSize = AccessSz;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b1010;
let Inst{27-24} = 0b1101;
@@ -3430,12 +3429,11 @@ def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
-
def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3>MajOp, bit isH = 0>
+ bits<3> MajOp, bit isH = 0>
: STInst <(outs),
(ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
@@ -3455,6 +3453,8 @@ class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
!if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
/* s11_0Ext */ src2{10-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
let IClass = 0b1010;
let Inst{27} = 0b0;
@@ -3494,7 +3494,10 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
!if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
/* u6_0Ext */ src3{5-0})));
- let IClass = 0b0100;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+ let IClass = 0b0100;
let Inst{27} = 0b0;
let Inst{26} = PredNot;
@@ -3508,7 +3511,7 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
let Inst{1-0} = src1;
}
-let isExtendable = 1, isNVStorable = 1, hasSideEffects = 0 in
+let isExtendable = 1, hasSideEffects = 0 in
multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
@@ -3665,7 +3668,7 @@ def S2_allocframe: ST0Inst <
// S2_storer[bhwdf]_pci: Store byte/half/word/double.
// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
-let Uses = [CS], isNVStorable = 1 in
+let Uses = [CS] in
class T_store_pci <string mnemonic, RegisterClass RC,
Operand Imm, bits<4>MajOp,
MemAccessSize AlignSize, string RegSrc = "Rt">
@@ -3679,6 +3682,8 @@ class T_store_pci <string mnemonic, RegisterClass RC,
bits<1> Mu;
bits<5> Rt;
let accessSize = AlignSize;
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+ !if(!eq(RegSrc,"Rt.h"), 0, 1));
let IClass = 0b1010;
let Inst{27-25} = 0b100;
@@ -3696,15 +3701,15 @@ class T_store_pci <string mnemonic, RegisterClass RC,
}
def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
- ByteAccess>;
+ ByteAccess>;
def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
- HalfWordAccess>;
+ HalfWordAccess>;
def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
- HalfWordAccess, "Rt.h">;
+ HalfWordAccess, "Rt.h">;
def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
- WordAccess>;
+ WordAccess>;
def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
- DoubleWordAccess>;
+ DoubleWordAccess>;
let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
class T_storenew_pci <string mnemonic, Operand Imm,
@@ -3762,7 +3767,7 @@ def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
//===----------------------------------------------------------------------===//
// Circular stores with auto-increment register
//===----------------------------------------------------------------------===//
-let Uses = [CS], isNVStorable = 1 in
+let Uses = [CS] in
class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
MemAccessSize AlignSize, string RegSrc = "Rt">
: STInst <(outs IntRegs:$_dst_),
@@ -3775,6 +3780,8 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
bits<5> Rt;
let accessSize = AlignSize;
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+ !if(!eq(RegSrc,"Rt.h"), 0, 1));
let IClass = 0b1010;
let Inst{27-25} = 0b100;
@@ -5784,7 +5791,19 @@ include "HexagonInstrInfoV5.td"
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// V60 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
// ALU32/64/Vector +
//===----------------------------------------------------------------------===///
include "HexagonInstrInfoVector.td"
+
+include "HexagonInstrAlias.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 65b0f49..87d6b35 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -684,7 +684,7 @@ def: Pat<(i64 (zext (i32 IntRegs:$src1))),
// Template class for store instructions with Absolute set addressing mode.
//===----------------------------------------------------------------------===//
let isExtended = 1, opExtendable = 1, opExtentBits = 6,
- addrMode = AbsoluteSet, isNVStorable = 1 in
+ addrMode = AbsoluteSet in
class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
: STInst<(outs IntRegs:$dst),
@@ -696,6 +696,9 @@ class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
let accessSize = AccessSz;
let BaseOpcode = BaseOp#"_AbsSet";
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b1010;
let Inst{27-24} = 0b1011;
@@ -750,7 +753,7 @@ let mayStore = 1, addrMode = AbsoluteSet in {
}
let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
-addrMode = BaseLongOffset, AddedComplexity = 40 in
+ addrMode = BaseLongOffset, AddedComplexity = 40 in
class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
: STInst<(outs),
@@ -766,6 +769,10 @@ class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
let accessSize = AccessSz;
let CextOpcode = CextOp;
let BaseOpcode = CextOp#"_shl";
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b1010;
let Inst{27-24} =0b1101;
@@ -856,6 +863,9 @@ class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
bits<2> u2;
bits<5> Rt;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
let IClass = 0b0011;
let Inst{27-24} = 0b1011;
@@ -888,6 +898,8 @@ class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
let isPredicatedFalse = isNot;
let isPredicatedNew = isPredNew;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
let IClass = 0b0011;
@@ -1826,43 +1838,22 @@ def: LogLogNot_pat<or, or, C4_or_orn>;
// below are needed to support code generation for PIC
//===----------------------------------------------------------------------===//
-def SDT_HexagonPICAdd
+def SDT_HexagonAtGot
+ : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_HexagonGOTAdd
- : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-
-def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
-def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
-def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL",
- SDT_HexagonGOTAddInternal>;
-def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
- SDT_HexagonGOTAddInternalJT>;
-def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
- SDT_HexagonGOTAddInternalBA>;
-
-// PIC: Map from a block address computation to a PC-relative add
-def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
- (C4_addipc u32ImmPred:$src1)>;
-
-// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
-def: Pat<(Hexagonpic_add texternalsym:$src1),
- (C4_addipc u32ImmPred:$src1)>;
-// PIC: Map from a jump table address computation to a PC-relative add
-def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
- (C4_addipc u32ImmPred:$src1)>;
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
-// PIC: Map from a GOT-relative symbol reference to a load
-def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
- (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
-
-// PIC: Map from a static symbol reference to a PC-relative add
-def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
- (C4_addipc u32ImmPred:$src1)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+ (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+ (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+ (C4_addipc imm:$addr)>;
//===----------------------------------------------------------------------===//
// CR -
@@ -1903,7 +1894,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
"$Rd = add($Rs, add($Ru, #$s6))" ,
[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
- (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
+ (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))],
"", ALU64_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -3290,27 +3281,33 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
}
// Restore registers and dealloc frame before a tail call.
let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
}
// Save registers function call.
let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+ let isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
}
//===----------------------------------------------------------------------===//
// Template class for non predicated store instructions with
// GP-Relative or absolute addressing.
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in
+let hasSideEffects = 0, isPredicable = 1 in
class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<2>MajOp, Operand AddrOp, bit isAbs, bit isHalf>
- : STInst<(outs), (ins AddrOp:$addr, RC:$src),
- mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""),
+ bits<2>MajOp, bit isAbs, bit isHalf>
+ : STInst<(outs), (ins ImmOp:$addr, RC:$src),
+ mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
[], "", V2LDST_tc_st_SLOT01> {
bits<19> addr;
bits<5> src;
@@ -3321,6 +3318,9 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
!if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
/* u16_0Imm */ addr{15-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b0100;
let Inst{27} = 1;
let Inst{26-25} = offsetBits{15-14};
@@ -3337,11 +3337,10 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
// Template class for predicated store instructions with
// GP-Relative or absolute addressing.
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, isNVStorable = 1, opExtentBits = 6,
- opExtendable = 1 in
+let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
bit isHalf, bit isNot, bit isNew>
- : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2),
+ : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
[], "", ST_tc_st_SLOT01>, AddrModeRel {
@@ -3351,6 +3350,8 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
let isPredicatedNew = isNew;
let isPredicatedFalse = isNot;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
let IClass = 0b1010;
@@ -3371,7 +3372,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
//===----------------------------------------------------------------------===//
class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
+ : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>,
AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3538,7 +3539,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
let isAsmParserOnly = 1 in
class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> {
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
// Set BaseOpcode same as absolute addressing instructions so that
// non-predicated GP-Rel instructions can have relate with predicated
// Absolute instruction.
@@ -3553,7 +3554,7 @@ multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
// Absolute instruction.
let BaseOpcode = BaseOp#_abs in {
def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
- globaladdress, 0, isHalf>;
+ 0, isHalf>;
// New-value store
def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ;
}
@@ -3615,9 +3616,9 @@ let AddedComplexity = 100 in {
//===----------------------------------------------------------------------===//
let isPredicable = 1, hasSideEffects = 0 in
class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp, Operand AddrOp, bit isAbs>
- : LDInst <(outs RC:$dst), (ins AddrOp:$addr),
- "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)",
+ bits<3> MajOp>
+ : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
+ "$dst = "#mnemonic# "(#$addr)",
[], "", V2LDST_tc_ld_SLOT01> {
bits<5> dst;
bits<19> addr;
@@ -3642,7 +3643,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
+ : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3660,10 +3661,11 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
// Template class for predicated load instructions with
// absolute addressing mode.
//===----------------------------------------------------------------------===//
-let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
+ opExtendable = 2 in
class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
bit isPredNot, bit isPredNew>
- : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr),
+ : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr),
!if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
bits<5> dst;
@@ -3737,7 +3739,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
let isAsmParserOnly = 1 in
class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel {
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
let BaseOpcode = BaseOp#_abs;
}
@@ -3841,26 +3843,6 @@ let AddedComplexity = 100 in {
def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
}
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
-
-// Transfer global address into a register
-let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
-isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
-def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
- "$dst = #$src1",
- [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>;
-
-// Transfer a block address into a register
-def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
- (TFRI_V4 tblockaddress:$src1)>;
-
-let AddedComplexity = 50 in
-def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
- (TFRI_V4 tglobaladdr:$src1)>;
-
// i8/i16/i32 -> i64 loads
// We need a complexity of 120 here to override preceding handling of
// zextload.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 337f4ea..823961f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -98,21 +98,21 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
// HexagonInstrInfo.td patterns.
let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, isPseudo = 1 in
def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
"$dst = #$src1",
[(set F32:$dst, fpimm:$src1)]>,
Requires<[HasV5T]>;
-let isExtended = 1, opExtendable = 2, isPredicated = 1,
- hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in
+let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0,
+ validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in
def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
"if ($src1) $dst = #$src2", []>,
Requires<[HasV5T]>;
-let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1,
- isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in
+let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
+ hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in
def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
"if (!$src1) $dst = #$src2", []>,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
new file mode 100644
index 0000000..897ada0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
@@ -0,0 +1,2241 @@
+//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Vector store
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+{
+ class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
+
+}
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+ class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_LD,
+ IType type = TypeCVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+//===----------------------------------------------------------------------===//
+// Vector loads with base + immediate offset
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vload_ai<string asmStr>
+ : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
+ asmStr>;
+
+let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
+class T_vload_ai_128B<string asmStr>
+ : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
+ asmStr>;
+
+let isCVLoadable = 1, hasNewValue = 1 in {
+ def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">,
+ V6_vL32b_ai_enc;
+ def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_ai_enc;
+ // 128B
+ def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
+ V6_vL32b_ai_128B_enc;
+ def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
+ def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
+ V6_vL32Ub_ai_enc;
+ def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
+ V6_vL32Ub_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
+ hasNewValue = 1 in {
+ def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
+ V6_vL32b_cur_ai_enc;
+ def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_cur_ai_enc;
+ // 128B
+ def V6_vL32b_cur_ai_128B : T_vload_ai_128B
+ <"$dst.cur = vmem($src1+#$src2)">,
+ V6_vL32b_cur_ai_128B_enc;
+ def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
+ <"$dst.cur = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_cur_ai_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
+ def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_tmp_ai_enc;
+ def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_tmp_ai_enc;
+ // 128B
+ def V6_vL32b_tmp_ai_128B : T_vload_ai_128B
+ <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_tmp_ai_128B_enc;
+ def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
+ <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_nt_tmp_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isNT>
+ : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">,
+ V6_vS32b_ai_enc;
+ def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">,
+ V6_vS32b_ai_128B_enc;
+}
+
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_nt_ai_enc;
+ def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">,
+ V6_vS32Ub_ai_enc;
+ def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">,
+ V6_vS32Ub_ai_128B_enc;
+}
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
+ Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
+class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+ : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
+ : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
+ : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
+def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
+ V6_vS32b_new_ai_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>,
+ V6_vS32b_nt_new_ai_enc;
+ def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
+ V6_vS32b_nt_new_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1 in
+class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "
+ #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
+ V6_vS32b_pred_ai_enc;
+ def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_npred_ai_enc;
+ // 128B
+ def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
+ V6_vS32b_pred_ai_128B_enc;
+ def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_npred_ai_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
+ V6_vS32b_nt_pred_ai_enc;
+ def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
+ V6_vS32b_nt_npred_ai_enc;
+ // 128B
+ def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B
+ <"vmem", "vS32b_ai", 0, 1>,
+ V6_vS32b_nt_pred_ai_128B_enc;
+ def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
+ <"vmem", "vS32b_ai", 1, 1>,
+ V6_vS32b_nt_npred_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
+ V6_vS32Ub_pred_ai_enc;
+ def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
+ V6_vS32Ub_npred_ai_enc;
+ // 128B
+ def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
+ V6_vS32Ub_pred_ai_128B_enc;
+ def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
+ V6_vS32Ub_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset in
+class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
+ bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
+def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
+ V6_vS32b_nqpred_ai_enc;
+def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>,
+ V6_vS32b_nt_qpred_ai_enc;
+def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
+ V6_vS32b_nt_nqpred_ai_enc;
+// 128B
+def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
+def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
+ V6_vS32b_nqpred_ai_128B_enc;
+def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>,
+ V6_vS32b_nt_qpred_ai_128B_enc;
+def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
+ V6_vS32b_nt_nqpred_ai_128B_enc;
+
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
+ isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
+class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
+ bit isPredNot, bit isNT>
+ : V6_STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+
+def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">,
+ V6_vS32b_new_pred_ai_enc;
+def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
+ V6_vS32b_new_npred_ai_enc;
+// 128B
+def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">,
+ V6_vS32b_new_pred_ai_128B_enc;
+def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
+ V6_vS32b_new_npred_ai_128B_enc;
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
+ V6_vS32b_nt_new_pred_ai_enc;
+ def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
+ V6_vS32b_nt_new_npred_ai_enc;
+ // 128B
+ def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B
+ <"vS32b_ai", 0, 1>,
+ V6_vS32b_nt_new_pred_ai_128B_enc;
+ def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
+ <"vS32b_ai", 1, 1>,
+ V6_vS32b_nt_new_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, hasNewValue = 1 in
+class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
+ : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
+ "$src1 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vload_pi_64B <string asmStr>
+ : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vload_pi_128B <string asmStr>
+ : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
+
+let isCVLoadable = 1 in {
+ def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
+ V6_vL32b_pi_enc;
+ def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_pi_enc;
+ // 128B
+ def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
+ V6_vL32b_pi_128B_enc;
+ def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
+ def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
+ V6_vL32Ub_pi_enc;
+ // 128B
+ def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
+ V6_vL32Ub_pi_128B_enc;
+}
+
+let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
+ def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
+ V6_vL32b_cur_pi_enc;
+ def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_cur_pi_enc;
+ // 128B
+ def V6_vL32b_cur_pi_128B : T_vload_pi_128B
+ <"$dst.cur = vmem($src1++#$src2)">,
+ V6_vL32b_cur_pi_128B_enc;
+ def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
+ <"$dst.cur = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_cur_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+ def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
+ V6_vL32b_tmp_pi_enc;
+ def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_tmp_pi_enc;
+ //128B
+ def V6_vL32b_tmp_pi_128B : T_vload_pi_128B
+ <"$dst.tmp = vmem($src1++#$src2)">,
+ V6_vL32b_tmp_pi_128B_enc;
+ def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
+ <"$dst.tmp = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_tmp_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let accessSize = Vector64Access in
+class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
+ def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
+ V6_vS32b_pi_128B_enc;
+}
+
+let isNVStorable = 1 , isNonTemporal = 1 in {
+ def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_nt_pi_enc;
+ def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_nt_pi_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pi_enc;
+ def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment unconditional .new vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, isNVStore = 1 in
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+ opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+ "$src1 = $_dst_">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
+ : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
+ : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
+
+
+def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">,
+ V6_vS32b_new_pi_enc;
+def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
+ V6_vS32b_new_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>,
+ V6_vS32b_nt_new_pi_enc;
+ def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
+ V6_vS32b_nt_new_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, addrMode = PostInc in
+class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isPredNot, bit isNT>
+ : V6_STInst<(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
+ V6_vS32b_pred_pi_enc;
+ def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_npred_pi_enc;
+ // 128B
+ def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
+ V6_vS32b_pred_pi_128B_enc;
+ def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_npred_pi_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
+ V6_vS32b_nt_pred_pi_enc;
+ def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
+ V6_vS32b_nt_npred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B
+ <"vmem", "vS32b_pi", 0, 1>,
+ V6_vS32b_nt_pred_pi_128B_enc;
+ def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
+ <"vmem", "vS32b_pi", 1, 1>,
+ V6_vS32b_nt_npred_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pred_pi_enc;
+ def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
+ V6_vS32Ub_npred_pi_enc;
+ // 128B
+ def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pred_pi_128B_enc;
+ def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
+ V6_vS32Ub_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
+ bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
+def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
+// 128B
+def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B,
+ V6_vS32b_qpred_pi_128B_enc;
+def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
+ V6_vS32b_nqpred_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>,
+ V6_vS32b_nt_qpred_pi_enc;
+ def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
+ V6_vS32b_nt_nqpred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>,
+ V6_vS32b_nt_qpred_pi_128B_enc;
+ def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
+ V6_vS32b_nt_nqpred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+ isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
+class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
+ bit isPredNot, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new", [],
+ "$src2 = $_dst_"> , NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">,
+ V6_vS32b_new_pred_pi_enc;
+def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
+ V6_vS32b_new_npred_pi_enc;
+// 128B
+def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">,
+ V6_vS32b_new_pred_pi_128B_enc;
+def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
+ V6_vS32b_new_npred_pi_128B_enc;
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
+ V6_vS32b_nt_new_pred_pi_enc;
+ def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
+ V6_vS32b_nt_new_npred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
+ <"vS32b_pi", 0, 1>,
+ V6_vS32b_nt_new_pred_pi_128B_enc;
+ def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
+ <"vS32b_pi", 1, 1>,
+ V6_vS32b_nt_new_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with register offset
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1 in
+class T_vload_ppu<string asmStr>
+ : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let isCVLoadable = 1 in {
+ def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">,
+ V6_vL32b_ppu_enc;
+ def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_ppu_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
+def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
+ V6_vL32Ub_ppu_enc;
+
+let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
+ def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
+ V6_vL32b_cur_ppu_enc;
+ def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_cur_ppu_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+ def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
+ V6_vL32b_tmp_ppu_enc;
+ def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_tmp_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset
+//===----------------------------------------------------------------------===//
+class T_vstore_ppu <string mnemonic, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+ mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_ppu : T_vstore_ppu <"vmem">,
+ V6_vS32b_ppu_enc;
+ let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
+ def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
+ V6_vS32b_nt_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
+def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+ opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_ppu <bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+ "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let BaseOpcode = "vS32b_ppu" in
+def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
+def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst<(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
+ def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>,
+ V6_vS32b_nt_pred_ppu_enc;
+ def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
+ V6_vS32b_nt_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
+ Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">,
+ V6_vS32Ub_pred_ppu_enc;
+ def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
+ V6_vS32Ub_npred_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel;
+
+def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
+def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
+def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>,
+ V6_vS32b_nt_qpred_ppu_enc;
+def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
+ V6_vS32b_nt_nqpred_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+ isNewValue = 1, opNewValue = 4, isNVStore = 1 in
+class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+}
+
+let BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu,
+ V6_vS32b_new_pred_ppu_enc;
+ def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
+ V6_vS32b_new_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>,
+ V6_vS32b_nt_new_pred_ppu_enc;
+def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
+ V6_vS32b_nt_new_npred_ppu_enc;
+}
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
+ VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
+ #mnemonic#"($addr+#$off) = $src", []>;
+
+def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
+ Requires<[HasV60T, UseHVXSgl]>;
+def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
+ Requires<[HasV60T, UseHVXDbl]>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (STrivv_indexed_128B IntRegs:$addr, #0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned stores
+ def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Aligned stores
+ def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+IFF into vector store.
+ let AddedComplexity = 10 in
+ def : Pat<(store (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, s4_6ImmPred:$offset)),
+ (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // Fold Add R+IFF into vector store 128B.
+ let AddedComplexity = 10 in
+ def : Pat<(store (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, s4_7ImmPred:$offset)),
+ (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : vS32b_ai_pats <v64i8, v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64, v16i64>;
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
+ : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
+ "$dst="#mnemonic#"($addr+#$off)",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
+def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat < (VTSgl (load IntRegs:$addr)),
+ (LDrivv_indexed IntRegs:$addr, #0) >,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat < (VTDbl (load IntRegs:$addr)),
+ (LDrivv_indexed_128B IntRegs:$addr, #0) >,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned loads
+ def : Pat < (VTSgl (load IntRegs:$addr)),
+ (V6_vL32b_ai IntRegs:$addr, #0) >,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Load
+ def : Pat < (VTDbl (load IntRegs:$addr)),
+ (V6_vL32b_ai_128B IntRegs:$addr, #0) >,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+IFF into vector load.
+ let AddedComplexity = 10 in
+ def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))),
+ (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ Requires<[UseHVXDbl]>;
+
+ let AddedComplexity = 10 in
+ def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))),
+ (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ Requires<[UseHVXSgl]>;
+}
+
+defm : vL32b_ai_pats <v64i8, v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64, v16i64>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriq_pred_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_vec_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+def STriq_pred_vec_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector predicate pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store vector pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriv_pseudo_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def STriv_pseudo_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STrivv_pseudo_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def STrivv_pseudo_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VA_DV,
+ IType type = TypeCVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst),
+ (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst),
+ (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+}
+
+def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs),
+ (v16i32 VectorRegs:$tval),
+ (v16i32 VectorRegs:$fval), SETEQ)),
+ (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs),
+ (i32 IntRegs:$rhs))),
+ (v16i32 VectorRegs:$tval),
+ (v16i32 VectorRegs:$fval)))>;
+
+
+let hasNewValue = 1 in
+class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+ asmString >;
+
+multiclass T_vmpy <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_vmpy <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_vmpy_VV <string asmString>:
+ T_vmpy <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_vmpy_WW <string asmString>:
+ T_vmpy <asmString, VecDblRegs, VecDblRegs>;
+
+multiclass T_vmpy_VW <string asmString>:
+ T_vmpy <asmString, VectorRegs, VecDblRegs>;
+
+multiclass T_vmpy_WV <string asmString>:
+ T_vmpy <asmString, VecDblRegs, VectorRegs>;
+
+defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
+defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
+defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
+defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
+defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
+defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
+defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
+defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
+defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
+defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
+defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
+
+let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
+defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
+
+defm V6_vdmpybus_dv :
+ T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
+defm V6_vdmpyhsusat :
+ T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
+defm V6_vdmpyhsuisat :
+ T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
+defm V6_vdmpyhsat :
+ T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
+defm V6_vdmpyhisat :
+ T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
+defm V6_vdmpyhb_dv :
+ T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
+defm V6_vmpyhss :
+ T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
+defm V6_vmpyhsrs :
+ T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in
+defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
+defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
+defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
+defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
+defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
+defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
+defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
+defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
+defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
+defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
+}
+
+let hasNewValue = 1 in
+class T_HVX_alu <string asmString, InstrItinClass itin,
+ RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+ asmString >{
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu <string asmString, RegisterClass RCout,
+ RegisterClass RCin, InstrItinClass itin> {
+ def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_alu_VV <string asmString>:
+ T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_WW <string asmString>:
+ T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
+
+multiclass T_HVX_alu_WV <string asmString>:
+ T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
+
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vrmpyubv :
+ T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
+defm V6_vrmpybv :
+ T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
+defm V6_vrmpybusv :
+ T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
+defm V6_vabsdiffub :
+ T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
+defm V6_vabsdiffh :
+ T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
+defm V6_vabsdiffuh :
+ T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
+defm V6_vabsdiffw :
+ T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
+}
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhvsat :
+ T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
+defm V6_vmpyhvsrs :
+ T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
+defm V6_vmpyih :
+ T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
+}
+
+defm V6_vand :
+ T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
+defm V6_vor :
+ T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
+defm V6_vxor :
+ T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
+defm V6_vaddw :
+ T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
+defm V6_vaddubsat :
+ T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
+defm V6_vadduhsat :
+ T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
+defm V6_vaddhsat :
+ T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
+defm V6_vaddwsat :
+ T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
+defm V6_vsubb :
+ T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
+defm V6_vsubh :
+ T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
+defm V6_vsubw :
+ T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
+defm V6_vsububsat :
+ T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
+defm V6_vsubuhsat :
+ T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
+defm V6_vsubhsat :
+ T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
+defm V6_vsubwsat :
+ T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
+defm V6_vavgub :
+ T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
+defm V6_vavguh :
+ T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
+defm V6_vavgh :
+ T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
+defm V6_vavgw :
+ T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
+defm V6_vnavgub :
+ T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
+defm V6_vnavgh :
+ T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
+defm V6_vnavgw :
+ T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
+defm V6_vavgubrnd :
+ T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
+defm V6_vavguhrnd :
+ T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
+defm V6_vavghrnd :
+ T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
+defm V6_vavgwrnd :
+ T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
+
+defm V6_vmpybv :
+ T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
+defm V6_vmpyubv :
+ T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
+defm V6_vmpybusv :
+ T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
+defm V6_vmpyhv :
+ T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
+defm V6_vmpyuhv :
+ T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
+defm V6_vmpyhus :
+ T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
+defm V6_vaddubh :
+ T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
+defm V6_vadduhw :
+ T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
+defm V6_vaddhw :
+ T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
+defm V6_vsububh :
+ T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
+defm V6_vsubuhw :
+ T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
+defm V6_vsubhw :
+ T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
+
+defm V6_vaddb_dv :
+ T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
+defm V6_vaddh_dv :
+ T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
+defm V6_vaddw_dv :
+ T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
+defm V6_vaddubsat_dv :
+ T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
+defm V6_vadduhsat_dv :
+ T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
+defm V6_vaddhsat_dv :
+ T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
+defm V6_vaddwsat_dv :
+ T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
+defm V6_vsubb_dv :
+ T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
+defm V6_vsubh_dv :
+ T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
+defm V6_vsubw_dv :
+ T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
+defm V6_vsububsat_dv :
+ T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
+defm V6_vsubuhsat_dv :
+ T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
+defm V6_vsubhsat_dv :
+ T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
+defm V6_vsubwsat_dv :
+ T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
+defm V6_vmpabusv :
+ T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
+defm V6_vmpabuuv :
+ T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1 in
+class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
+ RegisterClass RCin1, RegisterClass RCin2>
+ : CVI_VA_Resource1 <(outs RCout:$dst),
+ (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
+ RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
+ def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin1#"128B"),
+ !cast<RegisterClass>(RCin2#
+ !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
+}
+
+multiclass T_HVX_vmpyacc_VVR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
+
+multiclass T_HVX_vmpyacc_VWR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WWR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_VVV <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVV <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+
+defm V6_vtmpyb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
+ V6_vtmpyb_acc_enc;
+defm V6_vtmpybus_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
+ V6_vtmpybus_acc_enc;
+defm V6_vtmpyhb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
+ V6_vtmpyhb_acc_enc;
+defm V6_vdmpyhb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+ V6_vdmpyhb_acc_enc;
+defm V6_vrmpyub_acc :
+ T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+ V6_vrmpyub_acc_enc;
+defm V6_vrmpybus_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+ V6_vrmpybus_acc_enc;
+defm V6_vdmpybus_acc :
+ T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+ V6_vdmpybus_acc_enc;
+defm V6_vdmpybus_dv_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+ V6_vdmpybus_dv_acc_enc;
+defm V6_vdmpyhsuisat_acc :
+ T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
+ V6_vdmpyhsuisat_acc_enc;
+defm V6_vdmpyhisat_acc :
+ T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhisat_acc_enc;
+defm V6_vdmpyhb_dv_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+ V6_vdmpyhb_dv_acc_enc;
+defm V6_vmpybus_acc :
+ T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
+ V6_vmpybus_acc_enc;
+defm V6_vmpabus_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
+ V6_vmpabus_acc_enc;
+defm V6_vmpahb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
+ V6_vmpahb_acc_enc;
+defm V6_vmpyhsat_acc :
+ T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
+ V6_vmpyhsat_acc_enc;
+defm V6_vmpyuh_acc :
+ T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+ V6_vmpyuh_acc_enc;
+defm V6_vmpyiwb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
+ V6_vmpyiwb_acc_enc;
+defm V6_vdsaduh_acc :
+ T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
+ V6_vdsaduh_acc_enc;
+defm V6_vmpyihb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
+ V6_vmpyihb_acc_enc;
+defm V6_vmpyub_acc :
+ T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+ V6_vmpyub_acc_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhsusat_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
+ V6_vdmpyhsusat_acc_enc;
+defm V6_vdmpyhsat_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhsat_acc_enc;
+defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
+ <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vaslw_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
+defm V6_vasrw_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
+}
+
+defm V6_vdmpyhvsat_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhvsat_acc_enc;
+defm V6_vmpybusv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
+ V6_vmpybusv_acc_enc;
+defm V6_vmpybv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
+defm V6_vmpyhus_acc :
+ T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
+defm V6_vmpyhv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
+defm V6_vmpyiewh_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
+ V6_vmpyiewh_acc_enc;
+defm V6_vmpyiewuh_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
+ V6_vmpyiewuh_acc_enc;
+defm V6_vmpyih_acc :
+ T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
+defm V6_vmpyowh_rnd_sacc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
+ V6_vmpyowh_rnd_sacc_enc;
+defm V6_vmpyowh_sacc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
+ V6_vmpyowh_sacc_enc;
+defm V6_vmpyubv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+ V6_vmpyubv_acc_enc;
+defm V6_vmpyuhv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+ V6_vmpyuhv_acc_enc;
+defm V6_vrmpybusv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+ V6_vrmpybusv_acc_enc;
+defm V6_vrmpybv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
+defm V6_vrmpyubv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+ V6_vrmpyubv_acc_enc;
+
+
+class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = CVI_VA;
+ let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_vcmp <string asmString> {
+ def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
+defm V6_veqh_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
+defm V6_veqw_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
+defm V6_vgtb_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
+defm V6_vgth_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
+defm V6_vgtw_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
+defm V6_vgtub_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
+defm V6_vgtuh_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
+defm V6_vgtuw_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
+defm V6_veqb_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
+defm V6_veqh_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
+defm V6_veqw_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
+defm V6_vgtb_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
+defm V6_vgth_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
+defm V6_vgtw_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
+defm V6_vgtub_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
+defm V6_vgtuh_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
+defm V6_vgtuw_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
+defm V6_veqb_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
+defm V6_veqh_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
+defm V6_veqw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
+defm V6_vgtb_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
+defm V6_vgth_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
+defm V6_vgtw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
+defm V6_vgtub_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
+defm V6_vgtuh_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
+defm V6_vgtuw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
+
+defm V6_vminub :
+ T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
+defm V6_vminuh :
+ T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
+defm V6_vminh :
+ T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
+defm V6_vminw :
+ T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
+defm V6_vmaxub :
+ T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
+defm V6_vmaxuh :
+ T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
+defm V6_vmaxh :
+ T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
+defm V6_vmaxw :
+ T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
+defm V6_vshuffeb :
+ T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
+defm V6_vshuffob :
+ T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
+defm V6_vshufeh :
+ T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
+defm V6_vshufoh :
+ T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vmpyowh_rnd :
+ T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
+ V6_vmpyowh_rnd_enc;
+defm V6_vmpyiewuh :
+ T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
+defm V6_vmpyewuh :
+ T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
+defm V6_vmpyowh :
+ T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
+defm V6_vmpyiowh :
+ T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
+}
+let Itinerary = CVI_VX, Type = TypeCVI_VX in
+defm V6_vmpyieoh :
+ T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
+defm V6_vshufoeh :
+ T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
+defm V6_vshufoeb :
+ T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
+}
+
+let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+defm V6_vcombine :
+ T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+ SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+ (v16i32 VectorRegs:$Vt))),
+ (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+ (v32i32 VecDblRegs:$Vt))),
+ (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+
+let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
+defm V6_vsathub :
+ T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
+defm V6_vsatwh :
+ T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vroundwh :
+ T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
+defm V6_vroundwuh :
+ T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
+defm V6_vroundhb :
+ T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
+defm V6_vroundhub :
+ T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
+defm V6_vasrwv :
+ T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
+defm V6_vlsrwv :
+ T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
+defm V6_vlsrhv :
+ T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
+defm V6_vasrhv :
+ T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
+defm V6_vaslwv :
+ T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
+defm V6_vaslhv :
+ T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
+}
+
+defm V6_vaddb :
+ T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
+defm V6_vaddh :
+ T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdelta :
+ T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
+defm V6_vrdelta :
+ T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
+defm V6_vdealb4w :
+ T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
+defm V6_vpackeb :
+ T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
+defm V6_vpackeh :
+ T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
+defm V6_vpackhub_sat :
+ T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
+defm V6_vpackhb_sat :
+ T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
+defm V6_vpackwuh_sat :
+ T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
+defm V6_vpackwh_sat :
+ T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
+defm V6_vpackob :
+ T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
+defm V6_vpackoh :
+ T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
+ : CVI_VA_Resource1 <(outs RC2:$dst),
+ (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = CVI_VA;
+ let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_condALU <string asmString> {
+ def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
+ V6_vaddbq_enc;
+defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
+ V6_vaddhq_enc;
+defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
+ V6_vaddwq_enc;
+defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
+ V6_vsubbq_enc;
+defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
+ V6_vsubhq_enc;
+defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
+ V6_vsubwq_enc;
+defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
+ V6_vaddbnq_enc;
+defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
+ V6_vaddhnq_enc;
+defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
+ V6_vaddwnq_enc;
+defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
+ V6_vsubbnq_enc;
+defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
+ V6_vsubhnq_enc;
+defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
+ V6_vsubwnq_enc;
+
+let hasNewValue = 1 in
+class T_HVX_alu_2op <string asmString, InstrItinClass itin,
+ RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
+ asmString >{
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
+ RegisterClass RCin, InstrItinClass itin> {
+ def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu_2op <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+let hasNewValue = 1 in
+multiclass T_HVX_alu_2op_VV <string asmString>:
+ T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_2op_WV <string asmString>:
+ T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
+
+
+defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
+ V6_vabsh_enc;
+defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
+ V6_vabsw_enc;
+defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
+ V6_vabsh_sat_enc;
+defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
+ V6_vabsw_sat_enc;
+defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
+ V6_vnot_enc;
+defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">,
+ V6_vassign_enc;
+
+defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
+ V6_vzb_enc;
+defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
+ V6_vzh_enc;
+defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
+ V6_vsb_enc;
+defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
+ V6_vsh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
+ V6_vdealh_enc;
+defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
+ V6_vdealb_enc;
+defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
+ V6_vshuffh_enc;
+defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
+ V6_vshuffb_enc;
+}
+
+let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
+defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
+ V6_vunpackub_enc;
+defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
+ V6_vunpackuh_enc;
+defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
+ V6_vunpackb_enc;
+defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
+ V6_vunpackh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
+ V6_vcl0w_enc;
+defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
+ V6_vcl0h_enc;
+defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
+ V6_vnormamtw_enc;
+defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
+ V6_vnormamth_enc;
+defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
+ V6_vpopcounth_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
+ Type = TypeCVI_VX_DV in
+class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$dst),
+ (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3),
+ asmString, [], "$dst = $_src_" > ;
+
+
+multiclass T_HVX_vmpyacc2 <string asmString> {
+ def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi_acc :
+ T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
+ V6_vrmpybusi_acc_enc;
+defm V6_vrsadubi_acc :
+ T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
+ V6_vrsadubi_acc_enc;
+defm V6_vrmpyubi_acc :
+ T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
+ V6_vrmpyubi_acc_enc;
+
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
+class T_HVX_vmpy2 <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3),
+ asmString>;
+
+
+multiclass T_HVX_vmpy2 <string asmString> {
+ def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi :
+ T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
+defm V6_vrsadubi :
+ T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
+defm V6_vrmpyubi :
+ T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
+
+
+let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
+ hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
+class T_HVX_perm <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
+ (ins RC:$src1, RC:$src2, IntRegs:$src3),
+ asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
+
+multiclass T_HVX_perm <string asmString> {
+ def NAME : T_HVX_perm <asmString, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
+ defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
+ defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
+}
+
+// Conditional vector move.
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_cmov <bit isPredNot, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
+ "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_cmov <bit isPredNot = 0> {
+ def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
+}
+
+defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
+defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
+
+// Conditional vector combine.
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
+ hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 < (outs RCout:$dst),
+ (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
+ "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_ccombine <bit isPredNot = 0> {
+ def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
+defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
+
+let hasNewValue = 1 in
+class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst),
+ (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+ asmString >;
+
+multiclass T_HVX_shift <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_shift <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_shift_VV <string asmString>:
+ T_HVX_shift <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_shift_WV <string asmString>:
+ T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
+defm V6_valignb :
+ T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
+defm V6_vlalignb :
+ T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrwh :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
+defm V6_vasrwhsat :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
+ V6_vasrwhsat_enc;
+defm V6_vasrwhrndsat :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
+ V6_vasrwhrndsat_enc;
+defm V6_vasrwuhsat :
+ T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
+ V6_vasrwuhsat_enc;
+defm V6_vasrhubsat :
+ T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
+ V6_vasrhubsat_enc;
+defm V6_vasrhubrndsat :
+ T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+ V6_vasrhubrndsat_enc;
+defm V6_vasrhbrndsat :
+ T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+ V6_vasrhbrndsat_enc;
+}
+
+// Assembler mapped -- alias?
+//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
+defm V6_vshuffvdd :
+ T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
+defm V6_vdealvdd :
+ T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
+}
+
+let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
+class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
+ asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_unpack <string asmString> {
+ def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
+defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
+ hasSideEffects = 0 in
+class T_HVX_valign <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3),
+ asmString>;
+
+multiclass T_HVX_valign <string asmString> {
+ def NAME : T_HVX_valign <asmString, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
+}
+
+defm V6_valignbi :
+ T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
+defm V6_vlalignbi :
+ T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+class T_HVX_predAlu <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asmString>;
+
+multiclass T_HVX_predAlu <string asmString> {
+ def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
+}
+
+defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
+defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
+defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
+defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
+defm V6_pred_and_n :
+ T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_prednot <RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
+ "$dst = not($src1)">, V6_pred_not_enc;
+
+def V6_pred_not : T_HVX_prednot <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+ asmString >;
+
+multiclass T_HVX_vcmp2 <string asmString> {
+ def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
+defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
+defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
+defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
+defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
+defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
+defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
+defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
+defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
+
+let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+ "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
+
+def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
+
+let isAccumulator = 1 in
+class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+ "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
+
+def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCin:$src1, IntRegs:$src2),
+ "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
+
+def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_lvsplatw <RegisterClass RC>
+ : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
+ "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
+
+def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
+
+
+let hasNewValue = 1 in
+class T_V6_vinsertwr <RegisterClass RC>
+ : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
+ "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
+ V6_vinsertwr_enc;
+
+def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
+class T_V6_pred_scalar2 <RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
+ "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
+
+def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
+
+class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+ "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
+
+def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
+ : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>;
+
+class T_HVX_rol_R <string asmString>
+ : T_HVX_rol <asmString, IntRegs, u5Imm>;
+class T_HVX_rol_P <string asmString>
+ : T_HVX_rol <asmString, DoubleRegs, u6Imm>;
+
+def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
+let hasNewValue = 1, opNewValue = 0 in
+def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
+ : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
+ asmString, [], "$dst = $_src_" >;
+
+class T_HVX_rol_acc_P <string asmString>
+ : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>;
+
+class T_HVX_rol_acc_R <string asmString>
+ : T_HVX_rol_acc <asmString, IntRegs, u5Imm>;
+
+def S6_rol_i_p_nac :
+ T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
+def S6_rol_i_p_acc :
+ T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
+def S6_rol_i_p_and :
+ T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
+def S6_rol_i_p_or :
+ T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
+def S6_rol_i_p_xacc :
+ T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
+
+let hasNewValue = 1, opNewValue = 0 in {
+def S6_rol_i_r_nac :
+ T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
+def S6_rol_i_r_acc :
+ T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
+def S6_rol_i_r_and :
+ T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
+def S6_rol_i_r_or :
+ T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
+def S6_rol_i_r_xacc :
+ T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
+}
+
+let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
+class T_V6_extractw <RegisterClass RC>
+ : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
+ "$dst = vextract($src1,$src2)">, V6_extractw_enc;
+
+def V6_extractw : T_V6_extractw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
+
+let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in
+class T_sys0op <string asmString>
+ : ST1Inst <(outs), (ins), asmString>;
+
+let isSolo = 1, validSubTargets = HasV55SubT in {
+def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
+def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
+def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
+}
+
+class T_sys1op <string asmString, RegisterClass RC>
+ : ST1Inst <(outs), (ins RC:$src1), asmString>;
+
+class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
+class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
+
+let isSoloAX = 1, validSubTargets = HasV55SubT in
+def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
+
+let isSolo = 1, validSubTargets = HasV60SubT in {
+def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
+def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
+}
+
+let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
+ validSubTargets = HasV55SubT in
+def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = l2locka($src1)">, Y5_l2locka_enc;
+
+// not defined on etc side. why?
+// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
+
+let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
+ hasSideEffects = 0,
+validSubTargets = HasV55SubT in
+def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
+ (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst1,$dst2 = vacsh($src1,$src2)", [],
+ "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
+ hasSideEffects = 0 in
+class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
+ RegisterClass RCin2>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
+
+multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
+ def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
+ VecPredRegs128B, VectorRegs128B>;
+}
+
+multiclass T_HVX_alu2_V <string asmString> :
+ T_HVX_alu2 <asmString, VectorRegs>;
+
+multiclass T_HVX_alu2_W <string asmString> :
+ T_HVX_alu2 <asmString, VecDblRegs>;
+
+defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
+ hasSideEffects = 0 in
+defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
+
+class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
+
+multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_V <string asmString> :
+ T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_W <string asmString> :
+ T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
+class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+ RegisterClass RCin>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+ asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vlutb_acc<asmString,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_acc_V <string asmString> :
+ T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_acc_W <string asmString> :
+ T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
+defm V6_vlutvvb:
+ T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
+defm V6_vlutvwh:
+ T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
+
+let hasNewValue = 1 in {
+ defm V6_vlutvvb_oracc:
+ T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
+ V6_vlutvvb_oracc_enc;
+ defm V6_vlutvwh_oracc:
+ T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
+ V6_vlutvwh_oracc_enc;
+}
+
+// It's a fake instruction and should not be defined?
+def S2_cabacencbin
+ : SInst2<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
+
+// Vhist instructions
+def V6_vhistq
+ : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
+ "vhist($src1)">, V6_vhistq_enc;
+
+def V6_vhist
+ : CVI_HIST_Resource1 <(outs), (ins),
+ "vhist" >, V6_vhist_enc;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
index f4fb946..96dd531 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -35,6 +35,34 @@ multiclass bitconvert_64<ValueType a, ValueType b> {
(a DoubleRegs:$src)>;
}
+multiclass bitconvert_vec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VectorRegs:$src))),
+ (b VectorRegs:$src)>;
+ def : Pat <(a (bitconvert (b VectorRegs:$src))),
+ (a VectorRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecDblRegs:$src))),
+ (b VecDblRegs:$src)>;
+ def : Pat <(a (bitconvert (b VecDblRegs:$src))),
+ (a VecDblRegs:$src)>;
+}
+
+multiclass bitconvert_predvec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecPredRegs:$src))),
+ (b VectorRegs:$src)>;
+ def : Pat <(a (bitconvert (b VectorRegs:$src))),
+ (a VecPredRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
+ (b VecDblRegs128B:$src)>;
+ def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
+ (a VecDblRegs128B:$src)>;
+}
+
// Bit convert vector types.
defm : bitconvert_32<v4i8, i32>;
defm : bitconvert_32<v2i16, i32>;
@@ -47,6 +75,21 @@ defm : bitconvert_64<v8i8, v4i16>;
defm : bitconvert_64<v8i8, v2i32>;
defm : bitconvert_64<v4i16, v2i32>;
+defm : bitconvert_vec<v64i8, v16i32>;
+defm : bitconvert_vec<v8i64 , v16i32>;
+defm : bitconvert_vec<v32i16, v16i32>;
+
+defm : bitconvert_dblvec<v16i64, v128i8>;
+defm : bitconvert_dblvec<v32i32, v128i8>;
+defm : bitconvert_dblvec<v64i16, v128i8>;
+
+defm : bitconvert_dblvec128B<v64i32, v128i16>;
+defm : bitconvert_dblvec128B<v256i8, v128i16>;
+defm : bitconvert_dblvec128B<v32i64, v128i16>;
+
+defm : bitconvert_dblvec128B<v64i32, v256i8>;
+defm : bitconvert_dblvec128B<v32i64, v256i8>;
+defm : bitconvert_dblvec128B<v128i16, v256i8>;
// Vector shift support. Vector shifting in Hexagon is rather different
// from internal representation of LLVM.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 1d0d015..b207aaf 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -691,15 +691,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
(i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
// Mux
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
// Shift halfword
def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -720,17 +720,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>;
-def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>;
-def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
-def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
(i32 (C2_cmpgti (I32:$src1),
- (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
-def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
(i32 (C2_cmpgtui (I32:$src1),
- (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+ (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
@@ -1289,3 +1289,5 @@ def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
+include "HexagonIntrinsicsV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
new file mode 100644
index 0000000..24a3e4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -0,0 +1,836 @@
+//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins ),
+ "$dst=#0",
+ [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>;
+
+def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins ),
+ "$dst=#0",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>;
+}
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=vassignp_W($src1)",
+ [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=vassignp_W_128B($src1)",
+ [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B
+ VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=lo_W($src1)",
+ [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=hi_W($src1)",
+ [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=lo_W($src1)",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=hi_W($src1)",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>;
+
+let AddedComplexity = 100 in {
+def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >,
+ Requires<[UseHVXSgl]>;
+
+def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >,
+ Requires<[UseHVXSgl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+ subreg_loreg)) >,
+ Requires<[UseHVXDbl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+ subreg_hireg)) >,
+ Requires<[UseHVXDbl]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v64i8 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v8i64 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v8i64 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v64i8 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v64i8 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v8i64 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v8i64 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v128i8 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v16i64 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v16i64 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v128i8 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v128i8 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v16i64 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v16i64 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+ (v512i1 (V6_vandvrt
+ (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai_128B IntRegs:$addr, 0,
+ (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+ (v1024i1 (V6_vandvrt_128B
+ (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+ (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1),
+ (MI VectorRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1),
+ (MI VecPredRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+ (MI VecDblRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+ (MI VectorRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2),
+ (MI VecDblRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+ (MI VectorRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+ (MI VecPredRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+ (MI VecPredRegs:$src1, VecPredRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VecPredRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VecPredRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VecPredRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VecPredRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+
+multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3),
+ (MI VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ IntRegs:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ IntRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>;
+defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>;
+defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>;
+defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>;
+defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>;
+defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>;
+defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>;
+defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>;
+defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>;
+defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>;
+defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>;
+defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>;
+defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>;
+defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>;
+defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>;
+defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>;
+defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>;
+defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>;
+defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>;
+defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>;
+defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>;
+defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>;
+defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>;
+defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>;
+defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>;
+defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>;
+defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>;
+defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>;
+defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>;
+defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>;
+defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>;
+defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>;
+
+defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>;
+defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>;
+defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>;
+defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>;
+defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>;
+defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>;
+defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>;
+defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>;
+defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>;
+defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>;
+defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>;
+defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>;
+defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>;
+defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>;
+defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>;
+defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>;
+defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>;
+defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>;
+defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>;
+defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>;
+defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>;
+defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>;
+defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>;
+defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>;
+defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>;
+defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>;
+defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>;
+defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>;
+defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>;
+defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>;
+defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>;
+defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>;
+defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>;
+defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>;
+defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>;
+defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>;
+defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>;
+defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>;
+defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>;
+defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>;
+defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>;
+defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>;
+defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>;
+defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>;
+defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>;
+defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>;
+defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>;
+defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>;
+defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>;
+defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>;
+defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>;
+defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>;
+defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>;
+defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>;
+defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>;
+defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>;
+defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>;
+defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>;
+defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>;
+defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>;
+defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>;
+defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>;
+defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>;
+defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>;
+
+defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>;
+defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>;
+defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>;
+defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>;
+defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>;
+defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>;
+defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>;
+defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>;
+defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>;
+
+defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>;
+defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>;
+
+defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>;
+defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>;
+defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>;
+defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>;
+
+defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>;
+defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>;
+defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>;
+defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>;
+defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>;
+defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>;
+defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>;
+defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>;
+
+defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>;
+defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>;
+defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>;
+defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>;
+defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>;
+defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>;
+defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>;
+defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>;
+defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>;
+defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>;
+defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>;
+defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>;
+defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>;
+defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>;
+defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>;
+
+// Compare instructions
+defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>;
+defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>;
+defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>;
+defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>;
+defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>;
+defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>;
+defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>;
+defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>;
+defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>;
+defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>;
+defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>;
+defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>;
+defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>;
+defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>;
+defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>;
+defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>;
+defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>;
+defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>;
+defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>;
+defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>;
+defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>;
+defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>;
+defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>;
+defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>;
+defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>;
+defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>;
+defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>;
+
+defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>;
+defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>;
+defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>;
+defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>;
+defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>;
+defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>;
+defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>;
+defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>;
+defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>;
+defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>;
+defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>;
+defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>;
+defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>;
+defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>;
+defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>;
+defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>;
+defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>;
+defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>;
+defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>;
+defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>;
+defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>;
+defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>;
+defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>;
+defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>;
+defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>;
+defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>;
+defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>;
+defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>;
+defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>;
+defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>;
+defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>;
+defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>;
+defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>;
+defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>;
+defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>;
+defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>;
+defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>;
+defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>;
+defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>;
+defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>;
+defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>;
+defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>;
+defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>;
+defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>;
+defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>;
+defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>;
+
+defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>;
+defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>;
+defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>;
+defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>;
+defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>;
+defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>;
+defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>;
+defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>;
+defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>;
+defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>;
+defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>;
+defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>;
+
+defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>;
+defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>;
+defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>;
+defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>;
+defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>;
+defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>;
+defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>;
+defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>;
+defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>;
+defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>;
+defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>;
+defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>;
+defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>;
+defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>;
+defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>;
+defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>;
+defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>;
+defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>;
+defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>;
+defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>;
+defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>;
+defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>;
+defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>;
+
+defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>;
+defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>;
+defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>;
+
+defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>;
+defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>;
+defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>;
+
+// assembler mapped.
+//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>;
+// not present earlier.. need to add intrinsic
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>;
+defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>;
+defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>;
+defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>;
+defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>;
+defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>;
+defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>;
+
+defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>;
+defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>;
+
+defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>;
+defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>;
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>;
+
+defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>;
+defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>;
+defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>;
+defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>;
+defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>;
+defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>;
+defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>;
+defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>;
+defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>;
+defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>;
+defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>;
+defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>;
+defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>;
+defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>;
+defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>;
+defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>;
+defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>;
+
+defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>;
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>;
+defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>;
+defm : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>;
+defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>;
+
+defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>;
+defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>;
+defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>;
+defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>;
+
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>;
+def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>;
+def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>;
+def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>;
+def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>;
+def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>;
+def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>;
+def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>;
+def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>;
+def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>;
+def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>;
+def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
+
+defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
+defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
+
+def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+ (v64i16 (V6_vpackwh_sat_128B
+ (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)),
+ (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>,
+ Requires<[UseHVXDbl]>;
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 75189b6..624c0f6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -26,39 +26,71 @@
using namespace llvm;
-static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
- HexagonAsmPrinter& Printer) {
+namespace llvm {
+ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ HexagonAsmPrinter &Printer) {
MCContext &MC = Printer.OutContext;
const MCExpr *ME;
- ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC);
+ // Populate the relocation type based on Hexagon target flags
+ // set on an operand
+ MCSymbolRefExpr::VariantKind RelocationType;
+ switch (MO.getTargetFlags()) {
+ default:
+ RelocationType = MCSymbolRefExpr::VK_None;
+ break;
+ case HexagonII::MO_PCREL:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL;
+ break;
+ case HexagonII::MO_GOT:
+ RelocationType = MCSymbolRefExpr::VK_GOT;
+ break;
+ case HexagonII::MO_LO16:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16;
+ break;
+ case HexagonII::MO_HI16:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16;
+ break;
+ case HexagonII::MO_GPREL:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL;
+ break;
+ }
+
+ ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC);
if (!MO.isJTI() && MO.getOffset())
ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC),
MC);
- return (MCOperand::createExpr(ME));
+ return MCOperand::createExpr(ME);
}
// Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
- HexagonAsmPrinter& AP) {
- if(MI->getOpcode() == Hexagon::ENDLOOP0){
+void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP) {
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
HexagonMCInstrInfo::setInnerLoop(MCB);
return;
}
- if(MI->getOpcode() == Hexagon::ENDLOOP1){
+ if (MI->getOpcode() == Hexagon::ENDLOOP1) {
HexagonMCInstrInfo::setOuterLoop(MCB);
return;
}
- MCInst* MCI = new (AP.OutContext) MCInst;
+ MCInst *MCI = new (AP.OutContext) MCInst;
MCI->setOpcode(MI->getOpcode());
assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
"MCI opcode should have been set on construction");
+ bool MustExtend = false;
for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCO;
+ if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
+ MustExtend = true;
switch (MO.getType()) {
default:
@@ -73,11 +105,14 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
APFloat Val = MO.getFPImm()->getValueAPF();
// FP immediates are used only when setting GPRs, so they may be dealt
// with like regular immediates from this point on.
- MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData());
+ MCO = MCOperand::createExpr(
+ MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(),
+ AP.OutContext));
break;
}
case MachineOperand::MO_Immediate:
- MCO = MCOperand::createImm(MO.getImm());
+ MCO = MCOperand::createExpr(
+ MCConstantExpr::create(MO.getImm(), AP.OutContext));
break;
case MachineOperand::MO_MachineBasicBlock:
MCO = MCOperand::createExpr
@@ -104,5 +139,8 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
MCI->addOperand(MCO);
}
+ AP.HexagonProcessInstruction(*MCI, *MI);
+ HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI,
+ MustExtend);
MCB.addOperand(MCOperand::createInst(MCI));
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 35f732c..7a52d68 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -179,7 +179,11 @@ void VLIWMachineScheduler::schedule() {
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
if (!checkSchedLimit())
break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 707bfdb..20c4ab1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -92,6 +92,7 @@ namespace {
/// \brief A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
+ bool isNewValueJumpCandidate(const MachineInstr *MI) const;
};
} // end of anonymous namespace
@@ -280,9 +281,9 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
return true;
}
-// Given a compare operator, return a matching New Value Jump
-// compare operator. Make sure that MI here is included in
-// HexagonInstrInfo.cpp::isNewValueJumpCandidate
+
+// Given a compare operator, return a matching New Value Jump compare operator.
+// Make sure that MI here is included in isNewValueJumpCandidate.
static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
bool secondRegNewified,
MachineBasicBlock *jmpTarget,
@@ -341,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
: Hexagon::J4_cmpgtui_t_jumpnv_nt;
+ case Hexagon::C4_cmpneq:
+ return taken ? Hexagon::J4_cmpeq_f_jumpnv_t
+ : Hexagon::J4_cmpeq_f_jumpnv_nt;
+
+ case Hexagon::C4_cmplte:
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmplt_f_jumpnv_t
+ : Hexagon::J4_cmplt_f_jumpnv_nt;
+ return taken ? Hexagon::J4_cmpgt_f_jumpnv_t
+ : Hexagon::J4_cmpgt_f_jumpnv_nt;
+
+ case Hexagon::C4_cmplteu:
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmpltu_f_jumpnv_t
+ : Hexagon::J4_cmpltu_f_jumpnv_nt;
+ return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t
+ : Hexagon::J4_cmpgtu_f_jumpnv_nt;
+
default:
llvm_unreachable("Could not find matching New Value Jump instruction.");
}
@@ -348,6 +367,26 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
return 0;
}
+bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI)
+ const {
+ switch (MI->getOpcode()) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
@@ -372,7 +411,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// Loop through all the bb's of the function
for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
DEBUG(dbgs() << "** dumping bb ** "
<< MBB->getNumber() << "\n");
@@ -468,7 +507,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
MI->getOperand(0).getReg() == predReg) {
// Not all compares can be new value compare. Arch Spec: 7.6.1.1
- if (QII->isNewValueJumpCandidate(MI)) {
+ if (isNewValueJumpCandidate(MI)) {
assert((MI->getDesc().isCompare()) &&
"Only compare instruction can be collapsed into New Value Jump");
@@ -591,8 +630,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DebugLoc dl = MI->getDebugLoc();
MachineInstr *NewMI;
- assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
- "This compare is not a New Value Jump candidate.");
+ assert((isNewValueJumpCandidate(cmpInstr)) &&
+ "This compare is not a New Value Jump candidate.");
unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
isSecondOpNewified,
jmpTarget, MBPI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
index 2bece8f..fbd29cd 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,59 +7,114 @@
//
//===----------------------------------------------------------------------===//
+def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; }
+def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; }
+def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; }
+def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; }
+def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; }
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
-
+def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
+def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
+def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; }
+def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; }
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
+def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; }
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
+def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
+def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
+def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
+def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; }
+def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; }
+def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; }
+def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; }
+def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; }
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
+def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
+def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; }
+def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; }
+def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; }
+def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; }
+def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; }
+def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; }
// Immediate operands.
-let PrintMethod = "printImmOperand" in {
- def s32Imm : Operand<i32>;
- def s8Imm : Operand<i32>;
- def s8Imm64 : Operand<i64>;
- def s6Imm : Operand<i32>;
+let OperandType = "OPERAND_IMMEDIATE",
+ DecoderMethod = "unsignedImmDecoder" in {
+ def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand;
+ let DecoderMethod = "s32ImmDecoder"; }
+ def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s8Imm64 : Operand<i64> { let ParserMatchClass = s8Imm64Operand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand;
+ let DecoderMethod = "s6_0ImmDecoder"; }
def s6_3Imm : Operand<i32>;
- def s4Imm : Operand<i32>;
- def s4_0Imm : Operand<i32> { let DecoderMethod = "s4_0ImmDecoder"; }
- def s4_1Imm : Operand<i32> { let DecoderMethod = "s4_1ImmDecoder"; }
- def s4_2Imm : Operand<i32> { let DecoderMethod = "s4_2ImmDecoder"; }
- def s4_3Imm : Operand<i32> { let DecoderMethod = "s4_3ImmDecoder"; }
- def u64Imm : Operand<i64>;
- def u32Imm : Operand<i32>;
- def u26_6Imm : Operand<i32>;
- def u16Imm : Operand<i32>;
- def u16_0Imm : Operand<i32>;
- def u16_1Imm : Operand<i32>;
- def u16_2Imm : Operand<i32>;
- def u16_3Imm : Operand<i32>;
- def u11_3Imm : Operand<i32>;
- def u10Imm : Operand<i32>;
- def u9Imm : Operand<i32>;
- def u8Imm : Operand<i32>;
- def u7Imm : Operand<i32>;
- def u6Imm : Operand<i32>;
- def u6_0Imm : Operand<i32>;
- def u6_1Imm : Operand<i32>;
- def u6_2Imm : Operand<i32>;
- def u6_3Imm : Operand<i32>;
- def u5Imm : Operand<i32>;
+ def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand;
+ let DecoderMethod = "s4_0ImmDecoder"; }
+ def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
+ let DecoderMethod = "s4_0ImmDecoder"; }
+ def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
+ let DecoderMethod = "s4_1ImmDecoder"; }
+ def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
+ let DecoderMethod = "s4_2ImmDecoder"; }
+ def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
+ let DecoderMethod = "s4_3ImmDecoder"; }
+ def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; }
+ def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; }
+ def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
+ def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; }
+ def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
+ def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
+ def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
+ def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
+ def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
+ def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; }
+ def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; }
+ def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; }
+ def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; }
+ def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; }
+ def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
+ def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
+ def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
+ def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
+ def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; }
+ def u5_0Imm : Operand<i32>;
+ def u5_1Imm : Operand<i32>;
def u5_2Imm : Operand<i32>;
def u5_3Imm : Operand<i32>;
- def u4Imm : Operand<i32>;
+ def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; }
def u4_0Imm : Operand<i32>;
+ def u4_1Imm : Operand<i32>;
def u4_2Imm : Operand<i32>;
- def u3Imm : Operand<i32>;
+ def u4_3Imm : Operand<i32>;
+ def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; }
def u3_0Imm : Operand<i32>;
def u3_1Imm : Operand<i32>;
- def u2Imm : Operand<i32>;
- def u1Imm : Operand<i32>;
- def n8Imm : Operand<i32>;
- def m6Imm : Operand<i32>;
+ def u3_2Imm : Operand<i32>;
+ def u3_3Imm : Operand<i32>;
+ def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; }
+ def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; }
+ def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; }
}
-let PrintMethod = "printNOneImmOperand" in
-def nOneImm : Operand<i32>;
+let OperandType = "OPERAND_IMMEDIATE" in {
+ def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
+ let PrintMethod = "prints4_6ImmOperand";
+ let DecoderMethod = "s4_6ImmDecoder";}
+ def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
+ let DecoderMethod = "s4_6ImmDecoder";}
+ def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
+ let PrintMethod = "prints3_6ImmOperand";
+ let DecoderMethod = "s3_6ImmDecoder";}
+ def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
+ let DecoderMethod = "s3_6ImmDecoder";}
+}
//
// Immediate predicates
@@ -81,32 +136,12 @@ def s31_1ImmPred : PatLeaf<(i32 imm), [{
def s30_2ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<31,1>(v);
+ return isShiftedInt<30,2>(v);
}]>;
def s29_3ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<31,1>(v);
-}]>;
-
-def s22_10ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<22,10>(v);
-}]>;
-
-def s8_24ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<8,24>(v);
-}]>;
-
-def s16_16ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<16,16>(v);
-}]>;
-
-def s26_6ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<26,6>(v);
+ return isShiftedInt<29,3>(v);
}]>;
def s16ImmPred : PatLeaf<(i32 imm), [{
@@ -114,16 +149,6 @@ def s16ImmPred : PatLeaf<(i32 imm), [{
return isInt<16>(v);
}]>;
-def s13ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<13>(v);
-}]>;
-
-def s12ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<12>(v);
-}]>;
-
def s11_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isInt<11>(v);
@@ -149,16 +174,6 @@ def s10ImmPred : PatLeaf<(i32 imm), [{
return isInt<10>(v);
}]>;
-def s9ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<9>(v);
-}]>;
-
-def m9ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<9>(v) && (v != -256);
-}]>;
-
def s8ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
@@ -194,7 +209,6 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{
return isShiftedInt<4,3>(v);
}]>;
-
def u64ImmPred : PatLeaf<(i64 imm), [{
// Adding "N ||" to suppress gcc unused warning.
return (N || true);
@@ -230,19 +244,19 @@ def u26_6ImmPred : PatLeaf<(i32 imm), [{
return isShiftedUInt<26,6>(v);
}]>;
-def u16ImmPred : PatLeaf<(i32 imm), [{
+def u16_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
-def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+def u16_1ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<16,8>(v);
+ return isShiftedUInt<16,1>(v);
}]>;
-def u16_0ImmPred : PatLeaf<(i32 imm), [{
+def u16_2ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isUInt<16>(v);
+ return isShiftedUInt<16,2>(v);
}]>;
def u11_3ImmPred : PatLeaf<(i32 imm), [{
@@ -250,6 +264,11 @@ def u11_3ImmPred : PatLeaf<(i32 imm), [{
return isShiftedUInt<11,3>(v);
}]>;
+def u10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<10>(v);
+}]>;
+
def u9ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
@@ -321,6 +340,11 @@ def u1ImmPred : PatLeaf<(i1 imm), [{
return isUInt<1>(v);
}]>;
+def u1ImmPred32 : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
def m5BImmPred : PatLeaf<(i32 imm), [{
// m5BImmPred predicate - True if the (char) number is in range -1 .. -31
// and will fit in a 5 bit field when made positive, for use in memops.
@@ -379,7 +403,7 @@ def Clr5ImmPred : PatLeaf<(i32 imm), [{
}]>;
def SetClr5ImmPred : PatLeaf<(i32 imm), [{
- // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+ // True if the immediate is in range 0..31.
int32_t v = (int32_t)N->getSExtValue();
return (v >= 0 && v <= 31);
}]>;
@@ -404,14 +428,13 @@ def Clr4ImmPred : PatLeaf<(i32 imm), [{
}]>;
def SetClr4ImmPred : PatLeaf<(i32 imm), [{
- // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+ // True if the immediate is in the range 0..15.
int16_t v = (int16_t)N->getSExtValue();
return (v >= 0 && v <= 15);
}]>;
def Set3ImmPred : PatLeaf<(i32 imm), [{
- // Set3ImmPred predicate - True if the number is in the series of values:
- // [ 2^0, 2^1, ... 2^7 ].
+ // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ].
// For use in setbit immediate.
uint8_t v = (int8_t)N->getSExtValue();
// Constrain to 8 bits, and then check for single bit.
@@ -419,9 +442,7 @@ def Set3ImmPred : PatLeaf<(i32 imm), [{
}]>;
def Clr3ImmPred : PatLeaf<(i32 imm), [{
- // Clr3ImmPred predicate - True if the number is in the series of
- // bit negated values:
- // [ 2^0, 2^1, ... 2^7 ].
+ // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ].
// For use in setbit and clrbit immediate.
uint8_t v = ~ (int8_t)N->getSExtValue();
// Constrain to 8 bits, and then check for single bit.
@@ -429,76 +450,109 @@ def Clr3ImmPred : PatLeaf<(i32 imm), [{
}]>;
def SetClr3ImmPred : PatLeaf<(i32 imm), [{
- // SetClr3ImmPred predicate - True if the immediate is in the range 0..7.
+ // True if the immediate is in the range 0..7.
int8_t v = (int8_t)N->getSExtValue();
return (v >= 0 && v <= 7);
}]>;
// Extendable immediate operands.
-
-let PrintMethod = "printExtOperand" in {
- def f32Ext : Operand<f32>;
- def s16Ext : Operand<i32> { let DecoderMethod = "s16ImmDecoder"; }
- def s12Ext : Operand<i32> { let DecoderMethod = "s12ImmDecoder"; }
- def s11_0Ext : Operand<i32> { let DecoderMethod = "s11_0ImmDecoder"; }
- def s11_1Ext : Operand<i32> { let DecoderMethod = "s11_1ImmDecoder"; }
- def s11_2Ext : Operand<i32> { let DecoderMethod = "s11_2ImmDecoder"; }
- def s11_3Ext : Operand<i32> { let DecoderMethod = "s11_3ImmDecoder"; }
- def s10Ext : Operand<i32> { let DecoderMethod = "s10ImmDecoder"; }
- def s9Ext : Operand<i32> { let DecoderMethod = "s90ImmDecoder"; }
- def s8Ext : Operand<i32> { let DecoderMethod = "s8ImmDecoder"; }
- def s7Ext : Operand<i32>;
- def s6Ext : Operand<i32> { let DecoderMethod = "s6_0ImmDecoder"; }
- def u6Ext : Operand<i32>;
- def u7Ext : Operand<i32>;
- def u8Ext : Operand<i32>;
- def u9Ext : Operand<i32>;
- def u10Ext : Operand<i32>;
- def u6_0Ext : Operand<i32>;
- def u6_1Ext : Operand<i32>;
- def u6_2Ext : Operand<i32>;
- def u6_3Ext : Operand<i32>;
+def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
+def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; }
+def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; }
+def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; }
+def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; }
+def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; }
+def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; }
+def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; }
+def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
+def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
+def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
+def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
+def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; }
+def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; }
+def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; }
+def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; }
+def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; }
+def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
+def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
+def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
+def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
+def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; }
+
+
+
+let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
+ DecoderMethod = "unsignedImmDecoder" in {
+ def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
+ def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand;
+ let DecoderMethod = "s16ImmDecoder"; }
+ def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand;
+ let DecoderMethod = "s12ImmDecoder"; }
+ def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
+ let DecoderMethod = "s11_0ImmDecoder"; }
+ def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
+ let DecoderMethod = "s11_1ImmDecoder"; }
+ def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
+ let DecoderMethod = "s11_2ImmDecoder"; }
+ def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
+ let DecoderMethod = "s11_3ImmDecoder"; }
+ def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand;
+ let DecoderMethod = "s10ImmDecoder"; }
+ def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand;
+ let DecoderMethod = "s90ImmDecoder"; }
+ def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; }
+ def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand;
+ let DecoderMethod = "s6_0ImmDecoder"; }
+ def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; }
+ def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; }
+ def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; }
+ def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; }
+ def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; }
+ def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
+ def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
+ def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
+ def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
+ def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; }
}
-def s10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<10>(v))
- return true;
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
+def s4_7ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field and
+ // is 128-byte aligned.
+ return isShiftedInt<4,7>(v);
+ return false;
}]>;
-def s8ExtPred : PatLeaf<(i32 imm), [{
+def s3_7ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field and
+ // is 128-byte aligned.
+ return isShiftedInt<3,7>(v);
+ return false;
}]>;
-def u8ExtPred : PatLeaf<(i32 imm), [{
+def s4_6ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field and
+ // is 64-byte aligned.
+ return isShiftedInt<4,6>(v);
+ return false;
}]>;
-def u9ExtPred : PatLeaf<(i32 imm), [{
+def s3_6ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field and
+ // is 64-byte aligned.
+ return isShiftedInt<3,6>(v);
+ return false;
}]>;
@@ -523,21 +577,21 @@ let PrintMethod = "printGlobalOperand" in {
let PrintMethod = "printJumpTable" in
def jumptablebase : Operand<i32>;
-def brtarget : Operand<OtherVT>;
+def brtarget : Operand<OtherVT> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
def brtargetExt : Operand<OtherVT> {
- let PrintMethod = "printExtBrtarget";
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
+def calltarget : Operand<i32> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
}
-def calltarget : Operand<i32>;
def bblabel : Operand<i32>;
-def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
-
-def symbolHi32 : Operand<i32> {
- let PrintMethod = "printSymbolHi";
-}
-def symbolLo32 : Operand<i32> {
- let PrintMethod = "printSymbolLo";
-}
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
// Return true if for a 32 to 64-bit sign-extended load.
def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
new file mode 100644
index 0000000..1723771
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -0,0 +1,150 @@
+//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include "Hexagon.h"
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonOptimizeSZextends();
+ void initializeHexagonOptimizeSZextendsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonOptimizeSZextends : public FunctionPass {
+ public:
+ static char ID;
+ HexagonOptimizeSZextends() : FunctionPass(ID) {
+ initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ const char *getPassName() const override {
+ return "Remove sign extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool intrinsicAlreadySextended(Intrinsic::ID IntID);
+ };
+}
+
+char HexagonOptimizeSZextends::ID = 0;
+
+INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs",
+ "Remove Sign and Zero Extends for Args", false, false)
+
+bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) {
+ switch(IntID) {
+ case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ // Try to optimize sign extends in formal parameters. It's relying on
+ // callee already sign extending the values. I'm not sure if our ABI
+ // requires callee to sign extend though.
+ for (auto &Arg : F.args()) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+ if (!isa<PointerType>(Arg.getType())) {
+ for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(&Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = &F.getEntryBlock().front();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ ++Idx;
+ }
+
+ // Try to remove redundant sext operations on Hexagon. The hardware
+ // already sign extends many 16 bit intrinsic operations to 32 bits.
+ // For example:
+ // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y)
+ // %sext233 = shl i32 %34, 16
+ // %conv52 = ashr exact i32 %sext233, 16
+ for (auto &B : F) {
+ for (auto &I : B) {
+ // Look for arithmetic shift right by 16.
+ BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I);
+ if (!(Ashr && Ashr->getOpcode() == Instruction::AShr))
+ continue;
+ Value *AshrOp1 = Ashr->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1);
+ // Right shifted by 16.
+ if (!(C && C->getSExtValue() == 16))
+ continue;
+
+ // The first operand of Ashr comes from logical shift left.
+ Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0));
+ if (!(Shl && Shl->getOpcode() == Instruction::Shl))
+ continue;
+ Value *Intr = Shl->getOperand(0);
+ Value *ShlOp1 = Shl->getOperand(1);
+ C = dyn_cast<ConstantInt>(ShlOp1);
+ // Left shifted by 16.
+ if (!(C && C->getSExtValue() == 16))
+ continue;
+
+ // The first operand of Shl comes from an intrinsic.
+ if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) {
+ if (!intrinsicAlreadySextended(I->getIntrinsicID()))
+ continue;
+ // All is well. Replace all uses of AShr with I.
+ for (auto UI = Ashr->user_begin(), UE = Ashr->user_end();
+ UI != UE; ++UI) {
+ const Use &TheUse = UI.getUse();
+ if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) {
+ J->replaceUsesOfWith(Ashr, I);
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+
+FunctionPass *llvm::createHexagonOptimizeSZextends() {
+ return new HexagonOptimizeSZextends();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 93dcbe2..e68ff85 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -124,7 +124,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
PeepholeMap.clear();
PeepholeDoubleRegsMap.clear();
@@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
unsigned DstReg = Dst.getReg();
unsigned SrcReg = Src1.getReg();
PeepholeDoubleRegsMap[DstReg] =
- std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
+ std::make_pair(*&SrcReg, Hexagon::subreg_hireg);
}
// Look for P=NOT(P).
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index f6bb4a0..61c0589 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -66,6 +66,8 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
switch (HST.getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
+ case HexagonSubtarget::V55:
+ case HexagonSubtarget::V60:
return CallerSavedRegsV4;
}
llvm_unreachable(
@@ -84,6 +86,8 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
+ case HexagonSubtarget::V55:
+ case HexagonSubtarget::V60:
return CalleeSavedRegsV3;
}
llvm_unreachable("Callee saved registers requested for unknown architecture "
@@ -98,7 +102,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::R29);
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
- Reserved.set(Hexagon::D14);
+ Reserved.set(Hexagon::PC);
Reserved.set(Hexagon::D15);
Reserved.set(Hexagon::LC0);
Reserved.set(Hexagon::LC1);
@@ -116,62 +120,21 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
-
MachineBasicBlock &MB = *MI.getParent();
MachineFunction &MF = *MB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HFI = *HST.getFrameLowering();
+ unsigned BP = 0;
int FI = MI.getOperand(FIOp).getIndex();
- int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm();
- bool HasAlloca = MFI.hasVarSizedObjects();
- bool HasAlign = needsStackRealignment(MF);
-
- // XXX: Fixed objects cannot be accessed through SP if there are aligned
- // objects in the local frame, or if there are dynamically allocated objects.
- // In such cases, there has to be FP available.
- if (!HFI.hasFP(MF)) {
- assert(!HasAlloca && !HasAlign && "This function must have frame pointer");
- // We will not reserve space on the stack for the lr and fp registers.
- Offset -= 8;
- }
-
- unsigned SP = getStackRegister(), FP = getFrameRegister();
- unsigned AP = 0;
- if (MachineInstr *AI = HFI.getAlignaInstr(MF))
- AP = AI->getOperand(0).getReg();
- unsigned FrameSize = MFI.getStackSize();
-
- // Special handling of dbg_value instructions and INLINEASM.
- if (MI.isDebugValue() || MI.isInlineAsm()) {
- MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/);
- MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize);
- return;
- }
-
- bool UseFP = false, UseAP = false; // Default: use SP.
- if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
- UseFP = HasAlloca || HasAlign;
- } else {
- if (HasAlloca) {
- if (HasAlign)
- UseAP = true;
- else
- UseFP = true;
- }
- }
+ // Select the base pointer (BP) and calculate the actual offset from BP
+ // to the beginning of the object at index FI.
+ int Offset = HFI.getFrameIndexReference(MF, FI, BP);
+ // Add the offset from the instruction.
+ int RealOffset = Offset + MI.getOperand(FIOp+1).getImm();
unsigned Opc = MI.getOpcode();
- bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset);
- bool ValidFP = HII.isValidOffset(Opc, Offset);
-
- // Calculate the actual offset in the instruction.
- int64_t RealOffset = Offset;
- if (!UseFP && !UseAP)
- RealOffset = FrameSize+Offset;
-
switch (Opc) {
case Hexagon::TFR_FIA:
MI.setDesc(HII.get(Hexagon::A2_addi));
@@ -184,20 +147,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
break;
}
- unsigned BP = 0;
- bool Valid = false;
- if (UseFP) {
- BP = FP;
- Valid = ValidFP;
- } else if (UseAP) {
- BP = AP;
- Valid = ValidFP;
- } else {
- BP = SP;
- Valid = ValidSP;
- }
-
- if (Valid) {
+ if (HII.isValidOffset(Opc, RealOffset)) {
MI.getOperand(FIOp).ChangeToRegister(BP, false);
MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
return;
@@ -223,8 +173,8 @@ unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
const HexagonFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF))
- return Hexagon::R30;
- return Hexagon::R29;
+ return getFrameRegister();
+ return getStackRegister();
}
@@ -238,17 +188,9 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
}
-bool
-HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
- const HexagonFrameLowering *TFI = getFrameLowering(MF);
- return TFI->hasFP(MF);
-}
-
-
-bool
-HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getMaxAlignment() > 8;
+bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
+ const {
+ return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 7edefee..db7e0f2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -63,8 +63,6 @@ public:
return true;
}
- bool needsStackRealignment(const MachineFunction &MF) const override;
-
/// Returns true if the frame pointer is valid.
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index edf1c25..81629dc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -53,6 +53,12 @@ let Namespace = "Hexagon" in {
let Num = num;
}
+
+ // Rq - vector predicate registers
+ class Rq<bits<3> num, string n> : Register<n, []> {
+ let HWEncoding{2-0} = num;
+ }
+
// Rc - control registers
class Rc<bits<5> num, string n,
list<string> alt = [], list<Register> alias = []> :
@@ -131,20 +137,21 @@ let Namespace = "Hexagon" in {
def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
DwarfRegNum<[71]>;
- def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[72]>;
- def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[73]>;
+ def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use
+ def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
+ def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
- def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[74]> {
+ def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> {
let SubRegIndices = [subreg_overflow];
let SubRegs = [USR_OVF];
}
- def PC : Rc<9, "pc">, DwarfRegNum<[75]>;
- def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[76]>;
- def GP : Rc<11, "gp">, DwarfRegNum<[77]>;
- def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[78]>;
- def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[79]>;
- def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[80]>;
- def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[81]>;
+ def PC : Rc<9, "pc">, DwarfRegNum<[76]>;
+ def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
+ def GP : Rc<11, "gp">, DwarfRegNum<[78]>;
+ def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
+ def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
+ def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
+ def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
}
// Control registers pairs.
@@ -158,6 +165,36 @@ let Namespace = "Hexagon" in {
def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>;
}
+ foreach i = 0-31 in {
+ def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
+ }
+
+ // Aliases of the V* registers used to hold double vec values.
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>;
+ def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>;
+ def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>;
+ def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>;
+ def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>;
+ def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
+ def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
+ def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
+ def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
+ def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
+ def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
+ def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
+ def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
+ def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
+ def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
+ def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+ }
+
+ // Vector Predicate registers.
+ def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
+ def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
+ def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
+ def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -169,10 +206,34 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
R10, R11, R29, R30, R31)> {
}
+// Registers are listed in reverse order for allocation preference reasons.
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+ (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4),
(sequence "D%u", 6, 13), D5, D14, D15)>;
+def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
+ (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs : RegisterClass<"Hexagon",
+ [v128i8, v64i16, v32i32, v16i64], 1024,
+ (add (sequence "W%u", 0, 15))>;
+
+def VectorRegs128B : RegisterClass<"Hexagon",
+ [v128i8, v64i16, v32i32, v16i64], 1024,
+ (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs128B : RegisterClass<"Hexagon",
+ [v256i8,v128i16,v64i32,v32i64], 2048,
+ (add (sequence "W%u", 0, 15))>;
+
+def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512,
+ (add (sequence "Q%u", 0, 3))>;
+
+def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024,
+ (add (sequence "Q%u", 0, 3))>;
def PredRegs : RegisterClass<"Hexagon",
[i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
deleted file mode 100644
index 7069ad3..0000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends //
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Pass that removes sign extends for function parameters. These parameters
-// are already sign extended by the caller per Hexagon's ABI
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-namespace llvm {
- FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
- void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
-}
-
-namespace {
- struct HexagonRemoveExtendArgs : public FunctionPass {
- public:
- static char ID;
- HexagonRemoveExtendArgs() : FunctionPass(ID) {
- initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
-
- const char *getPassName() const override {
- return "Remove sign extends";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- FunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-char HexagonRemoveExtendArgs::ID = 0;
-
-INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs",
- "Remove Sign and Zero Extends for Args", false, false)
-
-bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
- unsigned Idx = 1;
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
- ++AI, ++Idx) {
- if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
- Argument* Arg = AI;
- if (!isa<PointerType>(Arg->getType())) {
- for (auto UI = Arg->user_begin(); UI != Arg->user_end();) {
- if (isa<SExtInst>(*UI)) {
- Instruction* I = cast<Instruction>(*UI);
- SExtInst* SI = new SExtInst(Arg, I->getType());
- assert (EVT::getEVT(SI->getType()) ==
- (EVT::getEVT(I->getType())));
- ++UI;
- I->replaceAllUsesWith(SI);
- Instruction* First = F.getEntryBlock().begin();
- SI->insertBefore(First);
- I->eraseFromParent();
- } else {
- ++UI;
- }
- }
- }
- }
- }
- return true;
-}
-
-
-
-FunctionPass*
-llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) {
- return new HexagonRemoveExtendArgs();
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
index 528cafc..6e4987b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -13,6 +13,12 @@
include "HexagonScheduleV4.td"
+// V55 Machine Info +
+include "HexagonScheduleV55.td"
+
//===----------------------------------------------------------------------===//
-// V4 Machine Info -
+// V60 Machine Info -
//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
index a7d2d47..67af147 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -35,10 +35,11 @@ def SLOT_ENDLOOP: FuncUnit;
// Itinerary classes.
def PSEUDO : InstrItinClass;
-def PSEUDOM : InstrItinClass;
+def PSEUDOM : InstrItinClass;
// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
def DUPLEX : InstrItinClass;
def PREFIX : InstrItinClass;
+def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass;
def COMPOUND : InstrItinClass;
def ALU32_2op_tc_1_SLOT0123 : InstrItinClass;
@@ -58,6 +59,7 @@ def CR_tc_2early_SLOT3 : InstrItinClass;
def CR_tc_3x_SLOT23 : InstrItinClass;
def CR_tc_3x_SLOT3 : InstrItinClass;
def J_tc_2early_SLOT23 : InstrItinClass;
+def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass;
def J_tc_2early_SLOT2 : InstrItinClass;
def LD_tc_ld_SLOT01 : InstrItinClass;
def LD_tc_ld_SLOT0 : InstrItinClass;
@@ -91,6 +93,7 @@ def V4LDST_tc_st_SLOT0 : InstrItinClass;
def V4LDST_tc_st_SLOT01 : InstrItinClass;
def J_tc_2early_SLOT0123 : InstrItinClass;
def EXTENDER_tc_1_SLOT0123 : InstrItinClass;
+def S_3op_tc_3stall_SLOT23 : InstrItinClass;
def HexagonItinerariesV4 :
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
new file mode 100644
index 0000000..d9ad25d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -0,0 +1,170 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+def CJ_tc_1_SLOT23 : InstrItinClass;
+def CJ_tc_2early_SLOT23 : InstrItinClass;
+def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass;
+def COPROC_VX_vtc_long_SLOT23 : InstrItinClass;
+def COPROC_VX_vtc_SLOT23 : InstrItinClass;
+def J_tc_3stall_SLOT2 : InstrItinClass;
+def MAPPING_tc_1_SLOT0123 : InstrItinClass;
+def M_tc_3stall_SLOT23 : InstrItinClass;
+def SUBINSN_tc_1_SLOT01 : InstrItinClass;
+def SUBINSN_tc_2early_SLOT0 : InstrItinClass;
+def SUBINSN_tc_2early_SLOT01 : InstrItinClass;
+def SUBINSN_tc_3stall_SLOT0 : InstrItinClass;
+def SUBINSN_tc_ld_SLOT0 : InstrItinClass;
+def SUBINSN_tc_ld_SLOT01 : InstrItinClass;
+def SUBINSN_tc_st_SLOT01 : InstrItinClass;
+
+def HexagonItinerariesV55 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Subinsn
+ InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_2early_SLOT01,
+ [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+ // Vector
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Misc
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
+
+ ]>;
+
+def HexagonModelV55 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV55;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
new file mode 100644
index 0000000..2ccff82
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -0,0 +1,310 @@
+//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec".
+def CVI_ST : FuncUnit;
+def CVI_XLANE : FuncUnit;
+def CVI_SHIFT : FuncUnit;
+def CVI_MPY0 : FuncUnit;
+def CVI_MPY1 : FuncUnit;
+def CVI_LD : FuncUnit;
+
+// Combined functional units.
+def CVI_XLSHF : FuncUnit;
+def CVI_MPY01 : FuncUnit;
+def CVI_ALL : FuncUnit;
+
+// Combined functional unit data.
+def HexagonComboFuncsV60 :
+ ComboFuncUnits<[
+ ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>,
+ ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>,
+ ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1, CVI_LD]>
+ ]>;
+
+// Note: When adding additional vector scheduling classes, add the
+// corresponding methods to the class HexagonInstrInfo.
+def CVI_VA : InstrItinClass;
+def CVI_VA_DV : InstrItinClass;
+def CVI_VX_LONG : InstrItinClass;
+def CVI_VX_LATE : InstrItinClass;
+def CVI_VX : InstrItinClass;
+def CVI_VX_DV_LONG : InstrItinClass;
+def CVI_VX_DV : InstrItinClass;
+def CVI_VX_DV_SLOT2 : InstrItinClass;
+def CVI_VP : InstrItinClass;
+def CVI_VP_LONG : InstrItinClass;
+def CVI_VP_VS_EARLY : InstrItinClass;
+def CVI_VP_VS_LONG_EARLY : InstrItinClass;
+def CVI_VP_VS_LONG : InstrItinClass;
+def CVI_VP_VS : InstrItinClass;
+def CVI_VP_DV : InstrItinClass;
+def CVI_VS : InstrItinClass;
+def CVI_VINLANESAT : InstrItinClass;
+def CVI_VM_LD : InstrItinClass;
+def CVI_VM_TMP_LD : InstrItinClass;
+def CVI_VM_CUR_LD : InstrItinClass;
+def CVI_VM_VP_LDU : InstrItinClass;
+def CVI_VM_ST : InstrItinClass;
+def CVI_VM_NEW_ST : InstrItinClass;
+def CVI_VM_STU : InstrItinClass;
+def CVI_HIST : InstrItinClass;
+def CVI_VA_EXT : InstrItinClass;
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine.
+// This file describes that machine information.
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+//
+//
+// In addition to using the above SLOTS, there are also six vector pipelines
+// in the CVI co-processor in the Hexagon V60 machine.
+//
+// |=========| |=========| |=========| |=========| |=========| |=========|
+// SLOT | CVI_LD | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST |
+// ==== |=========| |=========| |=========| |=========| |=========| |=========|
+// S0-3 | | | CVI_VA | | CVI_VA | | CVI_VA | | CVI_VA | | |
+// S2-3 | | | CVI_VX | | CVI_VX | | | | | | |
+// S0-3 | | | | | | | | | CVI_VP | | |
+// S0-3 | | | | | | | CVI_VS | | | | |
+// S0-1 |(CVI_LD) | | CVI_LD | | CVI_LD | | CVI_LD | | CVI_LD | | |
+// S0-1 |(C*TMP_LD) | | | | | | | | | |
+// S01 |(C*_LDU) | | | | | | | | C*_LDU | | |
+// S0 | | | CVI_ST | | CVI_ST | | CVI_ST | | CVI_ST | |(CVI_ST) |
+// S0 | | | | | | | | | | |(C*TMP_ST)
+// S01 | | | | | | | | | VSTU | |(C*_STU) |
+// |=========| |=========| |=========| |=========| |=========| |=========|
+// |=====================| |=====================|
+// | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT|
+// |=====================| |=====================|
+// S0-3 | CVI_VA_DV | | CVI_VA_DV |
+// S0-3 | | | CVI_VP_DV |
+// S2-3 | CVI_VX_DV | | |
+// |=====================| |=====================|
+// |=====================================================================|
+// S0-3 | CVI_HIST Histogram |
+// S0123| CVI_VA_EXT Extract |
+// |=====================================================================|
+
+def HexagonItinerariesV60 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+ [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Subinsn
+ InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_2early_SLOT01,
+ [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+ // Vector
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Duplex and Compound
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // Misc
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // Latest CVI spec definitions.
+ InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VA_DV,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX_DV_LONG,
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_DV,
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_DV_SLOT2,
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VP_VS_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS_LONG,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS_LONG_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_DV , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VS,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>]>,
+ InstrItinData<CVI_VINLANESAT,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>]>,
+ InstrItinData<CVI_VM_LD , [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>]>,
+ InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VM_ST , [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>]>,
+ InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_HIST , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>]>
+ ]>;
+
+def HexagonModelV60 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV60;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V60 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 276cc69..239dbda 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -12,12 +12,11 @@
//===----------------------------------------------------------------------===//
#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
#define DEBUG_TYPE "hexagon-selectiondag-info"
-bool llvm::flag_aligned_memcpy;
-
SDValue
HexagonSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
@@ -25,15 +24,40 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
- flag_aligned_memcpy = false;
- if ((Align & 0x3) == 0) {
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (ConstantSize) {
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if ((SizeVal > 32) && ((SizeVal % 8) == 0))
- flag_aligned_memcpy = true;
- }
- }
-
- return SDValue();
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
+ return SDValue();
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (SizeVal < 32 || (SizeVal % 8) != 0)
+ return SDValue();
+
+ // Special case aligned memcpys with size >= 32 bytes and a multiple of 8.
+ //
+ const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+ Entry.Node = Src;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ const char *SpecialMemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getTargetExternalSymbol(
+ SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ return CallResult.second;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index d3eb56f..10fe606 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -81,7 +81,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block
MachineBasicBlock::iterator MII = MBB->begin();
MachineBasicBlock::iterator MIE = MBB->end ();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
new file mode 100644
index 0000000..d4e95b0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -0,0 +1,1209 @@
+//===--- HexagonSplitDouble.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hsdr"
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonSplitDoubleRegs();
+ void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+}
+
+namespace {
+ static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
+ cl::desc("Maximum number of split partitions"));
+ static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
+ cl::desc("Do not split loads or stores"));
+
+ class HexagonSplitDoubleRegs : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
+ TII(nullptr) {
+ initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon Split Double Registers";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ static const TargetRegisterClass *const DoubleRC;
+
+ const HexagonRegisterInfo *TRI;
+ const HexagonInstrInfo *TII;
+ const MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+
+ typedef std::set<unsigned> USet;
+ typedef std::map<unsigned,USet> UUSetMap;
+ typedef std::pair<unsigned,unsigned> UUPair;
+ typedef std::map<unsigned,UUPair> UUPairMap;
+ typedef std::map<const MachineLoop*,USet> LoopRegMap;
+
+ bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
+ bool isVolatileInstr(const MachineInstr *MI) const;
+ bool isFixedInstr(const MachineInstr *MI) const;
+ void partitionRegisters(UUSetMap &P2Rs);
+ int32_t profit(const MachineInstr *MI) const;
+ bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
+
+ void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
+ void collectIndRegs(LoopRegMap &IRM);
+
+ void createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR);
+ void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
+ void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
+ void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitPartition(const USet &Part);
+
+ static int Counter;
+ static void dump_partition(raw_ostream&, const USet&,
+ const TargetRegisterInfo&);
+ };
+ char HexagonSplitDoubleRegs::ID;
+ int HexagonSplitDoubleRegs::Counter = 0;
+ const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
+ = &Hexagon::DoubleRegsRegClass;
+}
+
+INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
+ "Hexagon Split Double Registers", false, false)
+
+
+static inline uint32_t getRegState(const MachineOperand &R) {
+ assert(R.isReg());
+ return getDefRegState(R.isDef()) |
+ getImplRegState(R.isImplicit()) |
+ getKillRegState(R.isKill()) |
+ getDeadRegState(R.isDead()) |
+ getUndefRegState(R.isUndef()) |
+ getInternalReadRegState(R.isInternalRead()) |
+ (R.isDebug() ? RegState::Debug : 0);
+}
+
+
+void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+ const USet &Part, const TargetRegisterInfo &TRI) {
+ dbgs() << '{';
+ for (auto I : Part)
+ dbgs() << ' ' << PrintReg(I, &TRI);
+ dbgs() << " }";
+}
+
+
+bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
+ for (auto I : IRM) {
+ const USet &Rs = I.second;
+ if (Rs.find(Reg) != Rs.end())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
+ for (auto &I : MI->memoperands())
+ if (I->isVolatile())
+ return true;
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
+ if (MI->mayLoad() || MI->mayStore())
+ if (MemRefsFixed || isVolatileInstr(MI))
+ return true;
+ if (MI->isDebugValue())
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ return true;
+
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY:
+ break;
+
+ case Hexagon::L2_loadrd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(1).isReg())
+ break;
+ return true;
+ case Hexagon::S2_storerd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(0).isReg())
+ break;
+ return true;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A2_combinew:
+ case Hexagon::CONST64_Int_Real:
+
+ case Hexagon::A2_sxtw:
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ case Hexagon::S2_asl_i_p_or:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ break;
+ }
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
+ typedef std::map<unsigned,unsigned> UUMap;
+ typedef std::vector<unsigned> UVect;
+
+ unsigned NumRegs = MRI->getNumVirtRegs();
+ BitVector DoubleRegs(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->getRegClass(R) == DoubleRC)
+ DoubleRegs.set(i);
+ }
+
+ BitVector FixedRegs(NumRegs);
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ MachineInstr *DefI = MRI->getVRegDef(R);
+ // In some cases a register may exist, but never be defined or used.
+ // It should never appear anywhere, but mark it as "fixed", just to be
+ // safe.
+ if (!DefI || isFixedInstr(DefI))
+ FixedRegs.set(x);
+ }
+
+ UUSetMap AssocMap;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ if (FixedRegs[x])
+ continue;
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
+ USet &Asc = AssocMap[R];
+ for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
+ U != Z; ++U) {
+ MachineOperand &Op = *U;
+ MachineInstr *UseI = Op.getParent();
+ if (isFixedInstr(UseI))
+ continue;
+ for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = UseI->getOperand(i);
+ // Skip non-registers or registers with subregisters.
+ if (&MO == &Op || !MO.isReg() || MO.getSubReg())
+ continue;
+ unsigned T = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(T)) {
+ FixedRegs.set(x);
+ continue;
+ }
+ if (MRI->getRegClass(T) != DoubleRC)
+ continue;
+ unsigned u = TargetRegisterInfo::virtReg2Index(T);
+ if (FixedRegs[u])
+ continue;
+ DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
+ Asc.insert(T);
+ // Make it symmetric.
+ AssocMap[T].insert(R);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ UUMap R2P;
+ unsigned NextP = 1;
+ USet Visited;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ if (Visited.count(R))
+ continue;
+ // Create a new partition for R.
+ unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
+ UVect WorkQ;
+ WorkQ.push_back(R);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ unsigned T = WorkQ[i];
+ if (Visited.count(T))
+ continue;
+ R2P[T] = ThisP;
+ Visited.insert(T);
+ // Add all registers associated with T.
+ USet &Asc = AssocMap[T];
+ for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
+ WorkQ.push_back(*J);
+ }
+ }
+
+ for (auto I : R2P)
+ P2Rs[I.second].insert(I.first);
+}
+
+
+static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+ int32_t P = 0;
+ bool LoZ1 = false, HiZ1 = false;
+ if (Lo == 0 || Lo == 0xFFFFFFFF)
+ P += 10, LoZ1 = true;
+ if (Hi == 0 || Hi == 0xFFFFFFFF)
+ P += 10, HiZ1 = true;
+ if (!LoZ1 && !HiZ1 && Lo == Hi)
+ P += 3;
+ return P;
+}
+
+
+int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
+ unsigned ImmX = 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ for (const auto &Op : MI->operands())
+ if (!Op.getSubReg())
+ return 0;
+ return 10;
+ case TargetOpcode::COPY:
+ if (MI->getOperand(1).getSubReg() != 0)
+ return 10;
+ return 0;
+
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::S2_storerd_io:
+ return -1;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+ return 2;
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::CONST64_Int_Real: {
+ uint64_t D = MI->getOperand(1).getImm();
+ unsigned Lo = D & 0xFFFFFFFFULL;
+ unsigned Hi = D >> 32;
+ return profitImm(Lo, Hi);
+ }
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ return profitImm(MI->getOperand(1).getImm(),
+ MI->getOperand(2).getImm());
+ case Hexagon::A4_combineri:
+ ImmX++;
+ case Hexagon::A4_combineir: {
+ ImmX++;
+ int64_t V = MI->getOperand(ImmX).getImm();
+ if (V == 0 || V == -1)
+ return 10;
+ // Fall through into A2_combinew.
+ }
+ case Hexagon::A2_combinew:
+ return 2;
+
+ case Hexagon::A2_sxtw:
+ return 3;
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ return 1;
+
+ case Hexagon::S2_asl_i_p_or: {
+ unsigned S = MI->getOperand(3).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ return -1;
+ }
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ unsigned S = MI->getOperand(2).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ if (S == 16)
+ return 5;
+ if (S == 48)
+ return 7;
+ return -10;
+ }
+
+ return 0;
+}
+
+
+bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
+ const {
+ unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+ int32_t TotalP = 0;
+
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ int32_t P = profit(DefI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ // Reduce the profitability of splitting induction registers.
+ if (isInduction(DR, IRM))
+ TotalP -= 30;
+
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U) {
+ MachineInstr *UseI = U->getParent();
+ if (isFixedInstr(UseI)) {
+ FixedNum++;
+ // Calculate the cost of generating REG_SEQUENCE instructions.
+ for (auto &Op : UseI->operands()) {
+ if (Op.isReg() && Part.count(Op.getReg()))
+ if (Op.getSubReg())
+ TotalP -= 2;
+ }
+ continue;
+ }
+ // If a register from this partition is used in a fixed instruction,
+ // and there is also a register in this partition that is used in
+ // a loop phi node, then decrease the splitting profit as this can
+ // confuse the modulo scheduler.
+ if (UseI->isPHI()) {
+ const MachineBasicBlock *PB = UseI->getParent();
+ const MachineLoop *L = MLI->getLoopFor(PB);
+ if (L && L->getHeader() == PB)
+ LoopPhiNum++;
+ }
+ // Splittable instruction.
+ SplitNum++;
+ int32_t P = profit(UseI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ }
+ }
+
+ if (FixedNum > 0 && LoopPhiNum > 0)
+ TotalP -= 20*LoopPhiNum;
+
+ DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+ return TotalP > 0;
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
+ USet &Rs) {
+ const MachineBasicBlock *HB = L->getHeader();
+ const MachineBasicBlock *LB = L->getLoopLatch();
+ if (!HB || !LB)
+ return;
+
+ // Examine the latch branch. Expect it to be a conditional branch to
+ // the header (either "br-cond header" or "br-cond exit; br header").
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
+ SmallVector<MachineOperand,2> Cond;
+ bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false);
+ // Only analyzable conditional branches. HII::AnalyzeBranch will put
+ // the branch opcode as the first element of Cond, and the predicate
+ // operand as the second.
+ if (BadLB || Cond.size() != 2)
+ return;
+ // Only simple jump-conditional (with or without negation).
+ if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
+ return;
+ // Must go to the header.
+ if (TB != HB && FB != HB)
+ return;
+ assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+ // Expect a predicate register.
+ unsigned PR = Cond[1].getReg();
+ assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
+
+ // Get the registers on which the loop controlling compare instruction
+ // depends.
+ unsigned CmpR1 = 0, CmpR2 = 0;
+ const MachineInstr *CmpI = MRI->getVRegDef(PR);
+ while (CmpI->getOpcode() == Hexagon::C2_not)
+ CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
+
+ int Mask = 0, Val = 0;
+ bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val);
+ if (!OkCI)
+ return;
+ // Eliminate non-double input registers.
+ if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
+ CmpR1 = 0;
+ if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
+ CmpR2 = 0;
+ if (!CmpR1 && !CmpR2)
+ return;
+
+ // Now examine the top of the loop: the phi nodes that could poten-
+ // tially define loop induction registers. The registers defined by
+ // such a phi node would be used in a 64-bit add, which then would
+ // be used in the loop compare instruction.
+
+ // Get the set of all double registers defined by phi nodes in the
+ // loop header.
+ typedef std::vector<unsigned> UVect;
+ UVect DP;
+ for (auto &MI : *HB) {
+ if (!MI.isPHI())
+ break;
+ const MachineOperand &MD = MI.getOperand(0);
+ unsigned R = MD.getReg();
+ if (MRI->getRegClass(R) == DoubleRC)
+ DP.push_back(R);
+ }
+ if (DP.empty())
+ return;
+
+ auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
+ for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ const MachineInstr *UseI = I->getParent();
+ if (UseI->getOpcode() != Hexagon::A2_addp)
+ continue;
+ // Get the output from the add. If it is one of the inputs to the
+ // loop-controlling compare instruction, then R is likely an induc-
+ // tion register.
+ unsigned T = UseI->getOperand(0).getReg();
+ if (T == CmpR1 || T == CmpR2)
+ return false;
+ }
+ return true;
+ };
+ UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+ Rs.insert(DP.begin(), End);
+ Rs.insert(CmpR1);
+ Rs.insert(CmpR2);
+
+ DEBUG({
+ dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
+ dump_partition(dbgs(), Rs, *TRI);
+ dbgs() << '\n';
+ });
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
+ typedef std::vector<MachineLoop*> LoopVector;
+ LoopVector WorkQ;
+
+ for (auto I : *MLI)
+ WorkQ.push_back(I);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ for (auto I : *WorkQ[i])
+ WorkQ.push_back(I);
+ }
+
+ USet Rs;
+ for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
+ MachineLoop *L = WorkQ[i];
+ Rs.clear();
+ collectIndRegsForLoop(L, Rs);
+ if (!Rs.empty())
+ IRM.insert(std::make_pair(L, Rs));
+ }
+}
+
+
+void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg()) {
+ NewI->addOperand(Op);
+ continue;
+ }
+ // For register operands, set the subregister.
+ unsigned R = Op.getReg();
+ unsigned SR = Op.getSubReg();
+ bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+ bool isKill = Op.isKill();
+ if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
+ isKill = false;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end()) {
+ SR = SubR;
+ } else {
+ const UUPair &P = F->second;
+ R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+ SR = 0;
+ }
+ }
+ auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
+ Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
+ Op.isInternalRead());
+ NewI->addOperand(CO);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ bool Load = MI->mayLoad();
+ unsigned OrigOpc = MI->getOpcode();
+ bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
+ OrigOpc == Hexagon::S2_storerd_pi);
+ MachineInstr *LowI, *HighI;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Index of the base-address-register operand.
+ unsigned AdrX = PostInc ? (Load ? 2 : 1)
+ : (Load ? 1 : 0);
+ MachineOperand &AdrOp = MI->getOperand(AdrX);
+ unsigned RSA = getRegState(AdrOp);
+ MachineOperand &ValOp = Load ? MI->getOperand(0)
+ : (PostInc ? MI->getOperand(3)
+ : MI->getOperand(2));
+ UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
+ assert(F != PairMap.end());
+
+ if (Load) {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4);
+ } else {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off)
+ .addReg(P.first);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4)
+ .addReg(P.second);
+ }
+
+ if (PostInc) {
+ // Create the increment of the address register.
+ int64_t Inc = Load ? MI->getOperand(3).getImm()
+ : MI->getOperand(2).getImm();
+ MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
+ const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ assert(!UpdOp.getSubReg() && "Def operand with subreg");
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
+ .addReg(AdrOp.getReg(), RSA)
+ .addImm(Inc);
+ MRI->replaceRegWith(UpdOp.getReg(), NewR);
+ // The original instruction will be deleted later.
+ }
+
+ // Generate a new pair of memory-operands.
+ MachineFunction &MF = *B.getParent();
+ for (auto &MO : MI->memoperands()) {
+ const MachinePointerInfo &Ptr = MO->getPointerInfo();
+ unsigned F = MO->getFlags();
+ int A = MO->getAlignment();
+
+ auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+ LowI->addMemOperand(MF, Tmp1);
+ auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+ HighI->addMemOperand(MF, Tmp2);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isImm());
+ uint64_t V = Op1.getImm();
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ // The operand to A2_tfrsi can only have 32 significant bits. Immediate
+ // values in MachineOperand are stored as 64-bit integers, and so the
+ // value -1 may be represented either as 64-bit -1, or 4294967295. Both
+ // will have the 32 higher bits truncated in the end, but -1 will remain
+ // as -1, while the latter may appear to be a large unsigned value
+ // requiring a constant extender. The casting to int32_t will select the
+ // former representation. (The same reasoning applies to all 32-bit
+ // values.)
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(int32_t(V & 0xFFFFFFFFULL));
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(int32_t(V >> 32));
+}
+
+
+void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ if (Op1.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(Op1.getImm());
+ } else if (Op1.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
+ .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+
+ if (Op2.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(Op2.getImm());
+ } else if (Op2.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+}
+
+
+void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned RS = getRegState(Op1);
+
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
+ BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
+ .addReg(Op1.getReg(), RS, Op1.getSubReg())
+ .addImm(31);
+}
+
+
+void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
+ int64_t Sh64 = Op2.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ unsigned Opc = MI->getOpcode();
+ bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
+ bool Left = !Right;
+ bool Signed = (Opc == S2_asr_i_p);
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS = getRegState(Op1);
+ unsigned ShiftOpc = Left ? S2_asl_i_r
+ : (Signed ? S2_asr_i_r : S2_lsr_i_r);
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ if (S == 0) {
+ // No shift, subregister copy.
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
+ .addReg(Op1.getReg(), RS, HiSR);
+ } else if (S < 32) {
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+ unsigned TmpR = MRI->createVirtualRegister(IntRC);
+ // Expansion:
+ // Shift left: DR = shl R, #s
+ // LoR = shl R.lo, #s
+ // TmpR = extractu R.lo, #s, #32-s
+ // HiR = or (TmpR, asl(R.hi, #s))
+ // Shift right: DR = shr R, #s
+ // HiR = shr R.hi, #s
+ // TmpR = shr R.lo, #s
+ // LoR = insert TmpR, R.hi, #s, #32-s
+
+ // Shift left:
+ // LoR = shl R.lo, #s
+ // Shift right:
+ // TmpR = shr R.lo, #s
+
+ // Make a special case for A2_aslh and A2_asrh (they are predicable as
+ // opposed to S2_asl_i_r/S2_asr_i_r).
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S);
+
+ if (Left) {
+ // TmpR = extractu R.lo, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ // HiR = or (TmpR, asl(R.hi, #s))
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S);
+ } else {
+ // HiR = shr R.hi, #s
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
+ .addImm(S);
+ // LoR = insert TmpR, R.hi, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S)
+ .addImm(32-S);
+ }
+ } else if (S == 32) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
+ if (!Signed)
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ else // Must be right shift.
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ } else if (S < 64) {
+ S -= 32;
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
+ .addImm(S);
+
+ if (Signed)
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ else
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
+ int64_t Sh64 = Op3.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS1 = getRegState(Op1);
+ unsigned RS2 = getRegState(Op2);
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ // Op0 = S2_asl_i_p_or Op1, Op2, Op3
+ // means: Op0 = or (Op1, asl(Op2, Op3))
+
+ // Expansion of
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = or (R1.lo, asl(R2.lo, #s))
+ // Tmp1 = extractu R2.lo, #s, #32-s
+ // Tmp2 = or R1.hi, Tmp1
+ // HiR = or (Tmp2, asl(R2.hi, #s))
+
+ if (S == 0) {
+ // DR = or (R1, asl(R2, #0))
+ // -> or (R1, R2)
+ // i.e. LoR = or R1.lo, R2.lo
+ // HiR = or R1.hi, R2.hi
+ BuildMI(B, MI, DL, TII->get(A2_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, HiSR);
+ } else if (S < 32) {
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S);
+ unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(TmpR1);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR2)
+ .addReg(Op2.getReg(), RS2, HiSR)
+ .addImm(S);
+ } else if (S == 32) {
+ // DR = or (R1, asl(R2, #32))
+ // -> or R1, R2.lo
+ // LoR = R1.lo
+ // HiR = or R1.hi, R2.lo
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR);
+ } else if (S < 64) {
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = R1:lo
+ // HiR = or (R1:hi, asl(R2:lo, #s-32))
+ S -= 32;
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR)
+ .addImm(S);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ DEBUG(dbgs() << "Splitting: " << *MI);
+ bool Split = false;
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY: {
+ unsigned DstR = MI->getOperand(0).getReg();
+ if (MRI->getRegClass(DstR) == DoubleRC) {
+ createHalfInstr(Opc, MI, PairMap, subreg_loreg);
+ createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+ Split = true;
+ }
+ break;
+ }
+ case A2_andp:
+ createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_orp:
+ createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_xorp:
+ createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+
+ case L2_loadrd_io:
+ case L2_loadrd_pi:
+ case S2_storerd_io:
+ case S2_storerd_pi:
+ splitMemRef(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_tfrpi:
+ case CONST64_Int_Real:
+ splitImmediate(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_combineii:
+ case A4_combineir:
+ case A4_combineii:
+ case A4_combineri:
+ case A2_combinew:
+ splitCombine(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_sxtw:
+ splitExt(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p:
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ splitShift(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p_or:
+ splitAslOr(MI, PairMap);
+ Split = true;
+ break;
+
+ default:
+ llvm_unreachable("Instruction not splitable");
+ return false;
+ }
+
+ return Split;
+}
+
+
+void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
+ continue;
+ unsigned R = Op.getReg();
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &P = F->second;
+ switch (Op.getSubReg()) {
+ case Hexagon::subreg_loreg:
+ Op.setReg(P.first);
+ break;
+ case Hexagon::subreg_hireg:
+ Op.setReg(P.second);
+ break;
+ }
+ Op.setSubReg(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
+ continue;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &Pr = F->second;
+ unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
+ .addReg(Pr.first)
+ .addImm(Hexagon::subreg_loreg)
+ .addReg(Pr.second)
+ .addImm(Hexagon::subreg_hireg);
+ Op.setReg(NewDR);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+ typedef std::set<MachineInstr*> MISet;
+ bool Changed = false;
+
+ DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
+ dbgs() << '\n');
+
+ UUPairMap PairMap;
+
+ MISet SplitIns;
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ SplitIns.insert(DefI);
+
+ // Collect all instructions, including fixed ones. We won't split them,
+ // but we need to visit them again to insert the REG_SEQUENCE instructions.
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ SplitIns.insert(U->getParent());
+
+ unsigned LoR = MRI->createVirtualRegister(IntRC);
+ unsigned HiR = MRI->createVirtualRegister(IntRC);
+ DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
+ << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
+ PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
+ }
+
+ MISet Erase;
+ for (auto MI : SplitIns) {
+ if (isFixedInstr(MI)) {
+ collapseRegPairs(MI, PairMap);
+ } else {
+ bool Done = splitInstr(MI, PairMap);
+ if (Done)
+ Erase.insert(MI);
+ Changed |= Done;
+ }
+ }
+
+ for (unsigned DR : Part) {
+ // Before erasing "double" instructions, revisit all uses of the double
+ // registers in this partition, and replace all uses of them with subre-
+ // gisters, with the corresponding single registers.
+ MISet Uses;
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ Uses.insert(U->getParent());
+ for (auto M : Uses)
+ replaceSubregUses(M, PairMap);
+ }
+
+ for (auto MI : Erase) {
+ MachineBasicBlock *B = MI->getParent();
+ B->erase(MI);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Splitting double registers in function: "
+ << MF.getName() << '\n');
+
+ auto &ST = MF.getSubtarget<HexagonSubtarget>();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+
+ UUSetMap P2Rs;
+ LoopRegMap IRM;
+
+ collectIndRegs(IRM);
+ partitionRegisters(P2Rs);
+
+ DEBUG({
+ dbgs() << "Register partitioning: (partition #0 is fixed)\n";
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ dbgs() << '#' << I->first << " -> ";
+ dump_partition(dbgs(), I->second, *TRI);
+ dbgs() << '\n';
+ }
+ });
+
+ bool Changed = false;
+ int Limit = MaxHSDR;
+
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ if (I->first == 0)
+ continue;
+ if (Limit >= 0 && Counter >= Limit)
+ break;
+ USet &Part = I->second;
+ DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+ if (!isProfitable(Part, IRM))
+ continue;
+ Counter++;
+ Changed |= splitPartition(Part);
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonSplitDoubleRegs() {
+ return new HexagonSplitDoubleRegs();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
new file mode 100644
index 0000000..b5339ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -0,0 +1,616 @@
+//===--- HexagonStoreWidening.cpp------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte
+// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte
+// with
+// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword
+// The above is the general idea. The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure". Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-widen-stores"
+
+#include "HexagonTargetMachine.h"
+
+#include "llvm/PassSupport.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <algorithm>
+
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonStoreWidening();
+ void initializeHexagonStoreWideningPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonStoreWidening : public MachineFunctionPass {
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ AliasAnalysis *AA;
+ MachineFunction *MF;
+
+ public:
+ static char ID;
+ HexagonStoreWidening() : MachineFunctionPass(ID) {
+ initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "Hexagon Store Widening";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static bool handledStoreType(const MachineInstr *MI);
+
+ private:
+ static const int MaxWideSize = 4;
+
+ typedef std::vector<MachineInstr*> InstrGroup;
+ typedef std::vector<InstrGroup> InstrGroupList;
+
+ bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
+ bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
+ void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
+ InstrGroup::iterator End, InstrGroup &Group);
+ void createStoreGroups(MachineBasicBlock &MBB,
+ InstrGroupList &StoreGroups);
+ bool processBasicBlock(MachineBasicBlock &MBB);
+ bool processStoreGroup(InstrGroup &Group);
+ bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
+ InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+ bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+ bool replaceStores(InstrGroup &OG, InstrGroup &NG);
+ bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+ };
+
+} // namespace
+
+
+namespace {
+
+// Some local helper functions...
+unsigned getBaseAddressRegister(const MachineInstr *MI) {
+ const MachineOperand &MO = MI->getOperand(0);
+ assert(MO.isReg() && "Expecting register operand");
+ return MO.getReg();
+}
+
+int64_t getStoreOffset(const MachineInstr *MI) {
+ unsigned OpC = MI->getOpcode();
+ assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
+
+ switch (OpC) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io: {
+ const MachineOperand &MO = MI->getOperand(1);
+ assert(MO.isImm() && "Expecting immediate offset");
+ return MO.getImm();
+ }
+ }
+ dbgs() << *MI;
+ llvm_unreachable("Store offset calculation missing for a handled opcode");
+ return 0;
+}
+
+const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
+ assert(!MI->memoperands_empty() && "Expecting memory operands");
+ return **MI->memoperands_begin();
+}
+
+} // namespace
+
+
+char HexagonStoreWidening::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexason Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexagon Store Widening", false, false)
+
+
+// Filtering function: any stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
+ // For now, only handle stores of immediate values.
+ // Also, reject stores to stack slots.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io:
+ // Base address must be a register. (Implement FI later.)
+ return MI->getOperand(0).isReg();
+ default:
+ return false;
+ }
+}
+
+
+// Check if the machine memory operand MMO is aliased with any of the
+// stores in the store group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+ const MachineMemOperand &MMO) {
+ if (!MMO.getValue())
+ return true;
+
+ MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
+
+ for (auto SI : Stores) {
+ const MachineMemOperand &SMO = getStoreTarget(SI);
+ if (!SMO.getValue())
+ return true;
+
+ MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
+ if (AA->alias(L, SL))
+ return true;
+ }
+
+ return false;
+}
+
+
+// Check if the machine instruction MI accesses any storage aliased with
+// any store in the group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+ const MachineInstr *MI) {
+ for (auto &I : MI->memoperands())
+ if (instrAliased(Stores, *I))
+ return true;
+ return false;
+}
+
+
+// Inspect a machine basic block, and generate store groups out of stores
+// encountered in the block.
+//
+// A store group is a group of stores that use the same base register,
+// and which can be reordered within that group without altering the
+// semantics of the program. A single store group could be widened as
+// a whole, if there existed a single store instruction with the same
+// semantics as the entire group. In many cases, a single store group
+// may need more than one wide store.
+void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
+ InstrGroupList &StoreGroups) {
+ InstrGroup AllInsns;
+
+ // Copy all instruction pointers from the basic block to a temporary
+ // list. This will allow operating on the list, and modifying its
+ // elements without affecting the basic block.
+ for (auto &I : MBB)
+ AllInsns.push_back(&I);
+
+ // Traverse all instructions in the AllInsns list, and if we encounter
+ // a store, then try to create a store group starting at that instruction
+ // i.e. a sequence of independent stores that can be widened.
+ for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ // Skip null pointers (processed instructions).
+ if (!MI || !handledStoreType(MI))
+ continue;
+
+ // Found a store. Try to create a store group.
+ InstrGroup G;
+ createStoreGroup(MI, I+1, E, G);
+ if (G.size() > 1)
+ StoreGroups.push_back(G);
+ }
+}
+
+
+// Create a single store group. The stores need to be independent between
+// themselves, and also there cannot be other instructions between them
+// that could read or modify storage being stored into.
+void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
+ InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
+ assert(handledStoreType(BaseStore) && "Unexpected instruction");
+ unsigned BaseReg = getBaseAddressRegister(BaseStore);
+ InstrGroup Other;
+
+ Group.push_back(BaseStore);
+
+ for (auto I = Begin; I != End; ++I) {
+ MachineInstr *MI = *I;
+ if (!MI)
+ continue;
+
+ if (handledStoreType(MI)) {
+ // If this store instruction is aliased with anything already in the
+ // group, terminate the group now.
+ if (instrAliased(Group, getStoreTarget(MI)))
+ return;
+ // If this store is aliased to any of the memory instructions we have
+ // seen so far (that are not a part of this group), terminate the group.
+ if (instrAliased(Other, getStoreTarget(MI)))
+ return;
+
+ unsigned BR = getBaseAddressRegister(MI);
+ if (BR == BaseReg) {
+ Group.push_back(MI);
+ *I = 0;
+ continue;
+ }
+ }
+
+ // Assume calls are aliased to everything.
+ if (MI->isCall() || MI->hasUnmodeledSideEffects())
+ return;
+
+ if (MI->mayLoad() || MI->mayStore()) {
+ if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
+ return;
+ Other.push_back(MI);
+ }
+ } // for
+}
+
+
+// Check if store instructions S1 and S2 are adjacent. More precisely,
+// S2 has to access memory immediately following that accessed by S1.
+bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
+ const MachineInstr *S2) {
+ if (!handledStoreType(S1) || !handledStoreType(S2))
+ return false;
+
+ const MachineMemOperand &S1MO = getStoreTarget(S1);
+
+ // Currently only handling immediate stores.
+ int Off1 = S1->getOperand(1).getImm();
+ int Off2 = S2->getOperand(1).getImm();
+
+ return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
+ : int(Off1+S1MO.getSize()) == Off2;
+}
+
+
+/// Given a sequence of adjacent stores, and a maximum size of a single wide
+/// store, pick a group of stores that can be replaced by a single store
+/// of size not exceeding MaxSize. The selected sequence will be recorded
+/// in OG ("old group" of instructions).
+/// OG should be empty on entry, and should be left empty if the function
+/// fails.
+bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
+ InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
+ unsigned MaxSize) {
+ assert(Begin != End && "No instructions to analyze");
+ assert(OG.empty() && "Old group not empty on entry");
+
+ if (std::distance(Begin, End) <= 1)
+ return false;
+
+ MachineInstr *FirstMI = *Begin;
+ assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
+ const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
+ unsigned Alignment = FirstMMO.getAlignment();
+ unsigned SizeAccum = FirstMMO.getSize();
+ unsigned FirstOffset = getStoreOffset(FirstMI);
+
+ // The initial value of SizeAccum should always be a power of 2.
+ assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
+
+ // If the size of the first store equals to or exceeds the limit, do nothing.
+ if (SizeAccum >= MaxSize)
+ return false;
+
+ // If the size of the first store is greater than or equal to the address
+ // stored to, then the store cannot be made any wider.
+ if (SizeAccum >= Alignment)
+ return false;
+
+ // The offset of a store will put restrictions on how wide the store can be.
+ // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
+ // If the first store already exhausts the offset limits, quit. Test this
+ // by checking if the next wider size would exceed the limit.
+ if ((2*SizeAccum-1) & FirstOffset)
+ return false;
+
+ OG.push_back(FirstMI);
+ MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
+ InstrGroup::iterator I = Begin+1;
+
+ // Pow2Num will be the largest number of elements in OG such that the sum
+ // of sizes of stores 0...Pow2Num-1 will be a power of 2.
+ unsigned Pow2Num = 1;
+ unsigned Pow2Size = SizeAccum;
+
+ // Be greedy: keep accumulating stores as long as they are to adjacent
+ // memory locations, and as long as the total number of bytes stored
+ // does not exceed the limit (MaxSize).
+ // Keep track of when the total size covered is a power of 2, since
+ // this is a size a single store can cover.
+ while (I != End) {
+ S2 = *I;
+ // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
+ // any other store to fill the "hole".
+ if (!storesAreAdjacent(S1, S2))
+ break;
+
+ unsigned S2Size = getStoreTarget(S2).getSize();
+ if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
+ break;
+
+ OG.push_back(S2);
+ SizeAccum += S2Size;
+ if (isPowerOf2_32(SizeAccum)) {
+ Pow2Num = OG.size();
+ Pow2Size = SizeAccum;
+ }
+ if ((2*Pow2Size-1) & FirstOffset)
+ break;
+
+ S1 = S2;
+ ++I;
+ }
+
+ // The stores don't add up to anything that can be widened. Clean up.
+ if (Pow2Num <= 1) {
+ OG.clear();
+ return false;
+ }
+
+ // Only leave the stored being widened.
+ OG.resize(Pow2Num);
+ TotalSize = Pow2Size;
+ return true;
+}
+
+
+/// Given an "old group" OG of stores, create a "new group" NG of instructions
+/// to replace them. Ideally, NG would only have a single instruction in it,
+/// but that may only be possible for store-immediate.
+bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
+ unsigned TotalSize) {
+ // XXX Current limitations:
+ // - only expect stores of immediate values in OG,
+ // - only handle a TotalSize of up to 4.
+
+ if (TotalSize > 4)
+ return false;
+
+ unsigned Acc = 0; // Value accumulator.
+ unsigned Shift = 0;
+
+ for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ const MachineMemOperand &MMO = getStoreTarget(MI);
+ MachineOperand &SO = MI->getOperand(2); // Source.
+ assert(SO.isImm() && "Expecting an immediate operand");
+
+ unsigned NBits = MMO.getSize()*8;
+ unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
+ unsigned Val = (SO.getImm() & Mask) << Shift;
+ Acc |= Val;
+ Shift += NBits;
+ }
+
+
+ MachineInstr *FirstSt = OG.front();
+ DebugLoc DL = OG.back()->getDebugLoc();
+ const MachineMemOperand &OldM = getStoreTarget(FirstSt);
+ MachineMemOperand *NewM =
+ MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
+ TotalSize, OldM.getAlignment(),
+ OldM.getAAInfo());
+
+ if (Acc < 0x10000) {
+ // Create mem[hw] = #Acc
+ unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
+ (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
+ assert(WOpc && "Unexpected size");
+
+ int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
+ const MCInstrDesc &StD = TII->get(WOpc);
+ MachineOperand &MR = FirstSt->getOperand(0);
+ int64_t Off = FirstSt->getOperand(1).getImm();
+ MachineInstr *StI = BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+ .addImm(Off)
+ .addImm(Val);
+ StI->addMemOperand(*MF, NewM);
+ NG.push_back(StI);
+ } else {
+ // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
+ const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
+ const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+ unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
+ .addImm(int(Acc));
+ NG.push_back(TfrI);
+
+ unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
+ (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
+ assert(WOpc && "Unexpected size");
+
+ const MCInstrDesc &StD = TII->get(WOpc);
+ MachineOperand &MR = FirstSt->getOperand(0);
+ int64_t Off = FirstSt->getOperand(1).getImm();
+ MachineInstr *StI = BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+ .addImm(Off)
+ .addReg(VReg, RegState::Kill);
+ StI->addMemOperand(*MF, NewM);
+ NG.push_back(StI);
+ }
+
+ return true;
+}
+
+
+// Replace instructions from the old group OG with instructions from the
+// new group NG. Conceptually, remove all instructions in OG, and then
+// insert all instructions in NG, starting at where the first instruction
+// from OG was (in the order in which they appeared in the basic block).
+// (The ordering in OG does not have to match the order in the basic block.)
+bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
+ DEBUG({
+ dbgs() << "Replacing:\n";
+ for (auto I : OG)
+ dbgs() << " " << *I;
+ dbgs() << "with\n";
+ for (auto I : NG)
+ dbgs() << " " << *I;
+ });
+
+ MachineBasicBlock *MBB = OG.back()->getParent();
+ MachineBasicBlock::iterator InsertAt = MBB->end();
+
+ // Need to establish the insertion point. The best one is right before
+ // the first store in the OG, but in the order in which the stores occur
+ // in the program list. Since the ordering in OG does not correspond
+ // to the order in the program list, we need to do some work to find
+ // the insertion point.
+
+ // Create a set of all instructions in OG (for quick lookup).
+ SmallPtrSet<MachineInstr*, 4> InstrSet;
+ for (auto I : OG)
+ InstrSet.insert(I);
+
+ // Traverse the block, until we hit an instruction from OG.
+ for (auto &I : *MBB) {
+ if (InstrSet.count(&I)) {
+ InsertAt = I;
+ break;
+ }
+ }
+
+ assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
+
+ bool AtBBStart = false;
+
+ // InsertAt points at the first instruction that will be removed. We need
+ // to move it out of the way, so it remains valid after removing all the
+ // old stores, and so we are able to recover it back to the proper insertion
+ // position.
+ if (InsertAt != MBB->begin())
+ --InsertAt;
+ else
+ AtBBStart = true;
+
+ for (auto I : OG)
+ I->eraseFromParent();
+
+ if (!AtBBStart)
+ ++InsertAt;
+ else
+ InsertAt = MBB->begin();
+
+ for (auto I : NG)
+ MBB->insert(InsertAt, I);
+
+ return true;
+}
+
+
+// Break up the group into smaller groups, each of which can be replaced by
+// a single wide store. Widen each such smaller group and replace the old
+// instructions with the widened ones.
+bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
+ bool Changed = false;
+ InstrGroup::iterator I = Group.begin(), E = Group.end();
+ InstrGroup OG, NG; // Old and new groups.
+ unsigned CollectedSize;
+
+ while (I != E) {
+ OG.clear();
+ NG.clear();
+
+ bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
+ createWideStores(OG, NG, CollectedSize) &&
+ replaceStores(OG, NG);
+ if (!Succ)
+ continue;
+
+ assert(OG.size() > 1 && "Created invalid group");
+ assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
+ I += OG.size()-1;
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+// Process a single basic block: create the store groups, and replace them
+// with the widened stores, if possible. Processing of each basic block
+// is independent from processing of any other basic block. This transfor-
+// mation could be stopped after having processed any basic block without
+// any ill effects (other than not having performed widening in the unpro-
+// cessed blocks). Also, the basic blocks can be processed in any order.
+bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
+ InstrGroupList SGs;
+ bool Changed = false;
+
+ createStoreGroups(MBB, SGs);
+
+ auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
+ return getStoreOffset(A) < getStoreOffset(B);
+ };
+ for (auto &G : SGs) {
+ assert(G.size() > 1 && "Store group with fewer than 2 elements");
+ std::sort(G.begin(), G.end(), Less);
+
+ Changed |= processStoreGroup(G);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
+ MF = &MFn;
+ auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MRI = &MFn.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ bool Changed = false;
+
+ for (auto &B : MFn)
+ Changed |= processBasicBlock(B);
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonStoreWidening() {
+ return new HexagonStoreWidening();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index cd482b3..aa0efd4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -16,6 +16,8 @@
#include "HexagonRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include <map>
+
using namespace llvm;
#define DEBUG_TYPE "hexagon-subtarget"
@@ -24,49 +26,65 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#include "HexagonGenSubtargetInfo.inc"
-static cl::opt<bool>
-EnableV3("enable-hexagon-v3", cl::Hidden,
- cl::desc("Enable Hexagon V3 instructions."));
-
-static cl::opt<bool>
-EnableMemOps(
- "enable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
- cl::desc(
- "Generate V4 MEMOP in code generation for Hexagon target"));
-
-static cl::opt<bool>
-DisableMemOps(
- "disable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
- cl::desc(
- "Do not generate V4 MEMOP in code generation for Hexagon target"));
-
-static cl::opt<bool>
-EnableIEEERndNear(
- "enable-hexagon-ieee-rnd-near",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Generate non-chopped conversion from fp to int."));
+static cl::opt<bool> EnableMemOps("enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+ cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> DisableMemOps("disable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+ cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Generate non-chopped conversion from fp to int."));
+
+static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon Double Vector eXtensions"));
+
+static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon Vector eXtensions"));
static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
+void HexagonSubtarget::initializeEnvironment() {
+ UseMemOps = false;
+ ModeIEEERndNear = false;
+ UseBSBScheduling = false;
+}
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- // If the programmer has not specified a Hexagon version, default to -mv4.
- if (CPUString.empty())
- CPUString = "hexagonv4";
-
- if (CPUString == "hexagonv4") {
- HexagonArchVersion = V4;
- } else if (CPUString == "hexagonv5") {
- HexagonArchVersion = V5;
- } else {
+ CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU);
+
+ static std::map<StringRef, HexagonArchEnum> CpuTable {
+ { "hexagonv4", V4 },
+ { "hexagonv5", V5 },
+ { "hexagonv55", V55 },
+ { "hexagonv60", V60 },
+ };
+
+ auto foundIt = CpuTable.find(CPUString);
+ if (foundIt != CpuTable.end())
+ HexagonArchVersion = foundIt->second;
+ else
llvm_unreachable("Unrecognized Hexagon processor version");
- }
+ UseHVXOps = false;
+ UseHVXDblOps = false;
ParseSubtargetFeatures(CPUString, FS);
+
+ if (EnableHexagonHVX.getPosition())
+ UseHVXOps = EnableHexagonHVX;
+ if (EnableHexagonHVXDouble.getPosition())
+ UseHVXDblOps = EnableHexagonHVXDouble;
+
return *this;
}
@@ -76,6 +94,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
FrameLowering() {
+ initializeEnvironment();
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
@@ -91,6 +111,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
ModeIEEERndNear = true;
else
ModeIEEERndNear = false;
+
+ UseBSBScheduling = hasV60TOps() && EnableBSBSched;
}
// Pin the vtable to this file.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 34cdad7..c7ae139 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -34,15 +34,19 @@ namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
virtual void anchor();
- bool UseMemOps;
+ bool UseMemOps, UseHVXOps, UseHVXDblOps;
bool ModeIEEERndNear;
public:
enum HexagonArchEnum {
- V4, V5
+ V4, V5, V55, V60
};
HexagonArchEnum HexagonArchVersion;
+ /// True if the target should use Back-Skip-Back scheduling. This is the
+ /// default for V60.
+ bool UseBSBScheduling;
+
private:
std::string CPUString;
HexagonInstrInfo InstrInfo;
@@ -50,6 +54,7 @@ private:
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
InstrItineraryData InstrItins;
+ void initializeEnvironment();
public:
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -84,7 +89,16 @@ public:
bool useMemOps() const { return UseMemOps; }
bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+ bool hasV55TOps() const { return getHexagonArchVersion() >= V55; }
+ bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
+ bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
+ bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool useHVXOps() const { return UseHVXOps; }
+ bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
+ bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; }
+
+ bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
// Always use the TargetLowering default scheduler.
// FIXME: This will use the vliw scheduler which is probably just hurting
@@ -98,7 +112,7 @@ public:
return Hexagon_SMALL_DATA_THRESHOLD;
}
const HexagonArchEnum &getHexagonArchVersion() const {
- return HexagonArchVersion;
+ return HexagonArchVersion;
}
};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index b504429..9dccd69 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -16,12 +16,12 @@
#include "HexagonISelLowering.h"
#include "HexagonMachineScheduler.h"
#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -33,10 +33,16 @@ static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon CFG Optimization"));
+static cl::opt<bool> DisableStoreWidening("disable-store-widen",
+ cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
+
static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Early expansion of MUX"));
+static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable early if-conversion"));
+
static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
cl::Hidden, cl::desc("Generate \"insert\" instructions"));
@@ -46,10 +52,22 @@ static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
cl::Hidden, cl::desc("Generate \"extract\" instructions"));
+static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden,
+ cl::desc("Enable converting conditional transfers into MUX instructions"));
+
static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
"predicate instructions"));
+static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
+ cl::desc("Disable splitting double registers"));
+
+static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
+ cl::Hidden, cl::desc("Bit simplification"));
+
+static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
+ cl::Hidden, cl::desc("Loop rescheduling"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -72,23 +90,30 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
createVLIWMachineSched);
namespace llvm {
+ FunctionPass *createHexagonBitSimplify();
+ FunctionPass *createHexagonCallFrameInformation();
FunctionPass *createHexagonCFGOptimizer();
FunctionPass *createHexagonCommonGEP();
FunctionPass *createHexagonCopyToCombine();
+ FunctionPass *createHexagonEarlyIfConversion();
FunctionPass *createHexagonExpandCondsets();
FunctionPass *createHexagonExpandPredSpillCode();
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
+ FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createHexagonLoopRescheduling();
FunctionPass *createHexagonNewValueJump();
+ FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonPeephole();
- FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
FunctionPass *createHexagonSplitConst32AndConst64();
+ FunctionPass *createHexagonSplitDoubleRegs();
+ FunctionPass *createHexagonStoreWidening();
} // end namespace llvm;
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
@@ -101,13 +126,46 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
- Options, RM, CM, OL),
- TLOF(make_unique<HexagonTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
- initAsmInfo();
+ : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-"
+ "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-"
+ "n16:32", TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<HexagonTargetObjectFile>()) {
+ initAsmInfo();
+}
+
+const HexagonSubtarget *
+HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
+
+TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(HexagonTTIImpl(this, F));
+ });
}
+
HexagonTargetMachine::~HexagonTargetMachine() {}
namespace {
@@ -166,7 +224,7 @@ bool HexagonPassConfig::addInstSelector() {
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
if (!NoOpt)
- addPass(createHexagonRemoveExtendArgs(TM));
+ addPass(createHexagonOptimizeSZextends());
addPass(createHexagonISelDag(TM, getOptLevel()));
@@ -174,19 +232,33 @@ bool HexagonPassConfig::addInstSelector() {
// Create logical operations on predicate registers.
if (EnableGenPred)
addPass(createHexagonGenPredicate(), false);
+ // Rotate loops to expose bit-simplification opportunities.
+ if (EnableLoopResched)
+ addPass(createHexagonLoopRescheduling(), false);
+ // Split double registers.
+ if (!DisableHSDR)
+ addPass(createHexagonSplitDoubleRegs());
+ // Bit simplification.
+ if (EnableBitSimplify)
+ addPass(createHexagonBitSimplify(), false);
addPass(createHexagonPeephole());
printAndVerify("After hexagon peephole pass");
if (EnableGenInsert)
addPass(createHexagonGenInsert(), false);
+ if (EnableEarlyIf)
+ addPass(createHexagonEarlyIfConversion(), false);
}
return false;
}
void HexagonPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!DisableStoreWidening)
+ addPass(createHexagonStoreWidening(), false);
if (!DisableHardwareLoops)
addPass(createHexagonHardwareLoops(), false);
+ }
}
void HexagonPassConfig::addPostRegAlloc() {
@@ -215,6 +287,13 @@ void HexagonPassConfig::addPreEmitPass() {
if (!NoOpt) {
if (!DisableHardwareLoops)
addPass(createHexagonFixupHwLoops(), false);
+ // Generate MUX from pairs of conditional transfers.
+ if (EnableGenMux)
+ addPass(createHexagonGenMux(), false);
+
addPass(createHexagonPacketizer(), false);
}
+
+ // Add CFI instructions if necessary.
+ addPass(createHexagonCallFrameInformation(), false);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 115eadb..968814b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -16,6 +16,7 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -24,7 +25,7 @@ class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- HexagonSubtarget Subtarget;
+ mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
public:
HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -32,20 +33,18 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~HexagonTargetMachine() override;
- const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
+
static unsigned getModuleMatchQuality(const Module &M);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
- TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF.get();
+ HexagonTargetObjectFile *getObjFileLowering() const override {
+ return static_cast<HexagonTargetObjectFile*>(TLOF.get());
}
};
-extern bool flag_aligned_memcpy;
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 4ea0e0d..ccca620 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -73,9 +73,10 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (!GVA)
return false;
- if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+ if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) {
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
}
return false;
@@ -89,7 +90,7 @@ HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
- if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
// Otherwise, we work the same as ELF.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
new file mode 100644
index 0000000..a05443e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -0,0 +1,38 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagontti"
+
+TargetTransformInfo::PopcntSupportKind
+HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
+ // Return Fast Hardware support as every input < 64 bits will be promoted
+ // to 64 bits.
+ return TargetTransformInfo::PSK_FastHardware;
+}
+
+// The Hexagon target can unroll loops with run-time trip counts.
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ UP.Runtime = UP.Partial = true;
+}
+
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
+ return vector ? 0 : 32;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
new file mode 100644
index 0000000..71ae17a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
+ typedef BasicTTIImplBase<HexagonTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const HexagonSubtarget *ST;
+ const HexagonTargetLowering *TLI;
+
+ const HexagonSubtarget *getST() const { return ST; }
+ const HexagonTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ HexagonTTIImpl(const HexagonTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ HexagonTTIImpl(HexagonTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+ // The Hexagon target can unroll loops with run-time trip counts.
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool vector) const;
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index b91a3f6..8185054 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -16,35 +16,19 @@
// prune the dependence.
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/DFAPacketizer.h"
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "HexagonVLIWPacketizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <map>
#include <vector>
@@ -52,9 +36,22 @@ using namespace llvm;
#define DEBUG_TYPE "packets"
+static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon packetizer pass"));
+
static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
- cl::ZeroOrMore, cl::Hidden, cl::init(true),
- cl::desc("Allow non-solo packetization of volatile memory references"));
+ cl::ZeroOrMore, cl::Hidden, cl::init(true),
+ cl::desc("Allow non-solo packetization of volatile memory references"));
+
+static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
+
+static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
+ cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Disable vector double new-value-stores"));
+
+extern cl::opt<bool> ScheduleInlineAsm;
namespace llvm {
FunctionPass *createHexagonPacketizer();
@@ -64,7 +61,6 @@ namespace llvm {
namespace {
class HexagonPacketizer : public MachineFunctionPass {
-
public:
static char ID;
HexagonPacketizer() : MachineFunctionPass(ID) {
@@ -73,103 +69,25 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
const char *getPassName() const override {
return "Hexagon Packetizer";
}
-
bool runOnMachineFunction(MachineFunction &Fn) override;
- };
- char HexagonPacketizer::ID = 0;
-
- class HexagonPacketizerList : public VLIWPacketizerList {
private:
-
- // Has the instruction been promoted to a dot-new instruction.
- bool PromotedToDotNew;
-
- // Has the instruction been glued to allocframe.
- bool GlueAllocframeStore;
-
- // Has the feeder instruction been glued to new value jump.
- bool GlueToNewValueJump;
-
- // Check if there is a dependence between some instruction already in this
- // packet and this instruction.
- bool Dependence;
-
- // Only check for dependence if there are resources available to
- // schedule this instruction.
- bool FoundSequentialDependence;
-
- /// \brief A handle to the branch probability pass.
- const MachineBranchProbabilityInfo *MBPI;
-
- // Track MIs with ignored dependece.
- std::vector<MachineInstr*> IgnoreDepMIs;
-
- public:
- // Ctor.
- HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- const MachineBranchProbabilityInfo *MBPI);
-
- // initPacketizerState - initialize some internal flags.
- void initPacketizerState() override;
-
- // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) override;
-
- // isSoloInstruction - return true if instruction MI can not be packetized
- // with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI) override;
-
- // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
- // together.
- bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
-
- // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
- // and SUJ.
- bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
-
- MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
- private:
- bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
- bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC);
- bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass *RC);
- bool
- CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII);
- bool CanPromoteToNewValueStore(
- MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit);
- bool DemoteToDotOld(MachineInstr *MI);
- bool ArePredicatesComplements(
- MachineInstr *MI1, MachineInstr *MI2,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit);
- bool RestrictingDepExistInPacket(MachineInstr *, unsigned,
- const std::map<MachineInstr *, SUnit *> &);
- bool isNewifiable(MachineInstr* MI);
- bool isCondInst(MachineInstr* MI);
- bool tryAllocateResourcesForConstExt(MachineInstr* MI);
- bool canReserveResourcesForConstExt(MachineInstr *MI);
- void reserveResourcesForConstExt(MachineInstr* MI);
- bool isNewValueInst(MachineInstr* MI);
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
};
+
+ char HexagonPacketizer::ID = 0;
}
INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
@@ -177,26 +95,93 @@ INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
false, false)
-// HexagonPacketizerList Ctor.
-HexagonPacketizerList::HexagonPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, true) {
- this->MBPI = MBPI;
+HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
+ MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const MachineBranchProbabilityInfo *MBPI)
+ : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
}
-bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- const MachineBranchProbabilityInfo *MBPI =
- &getAnalysis<MachineBranchProbabilityInfo>();
+// Check if FirstI modifies a register that SecondI reads.
+static bool hasWriteToReadDep(const MachineInstr *FirstI,
+ const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
+ for (auto &MO : FirstI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (SecondI->readsRegister(R, TRI))
+ return true;
+ }
+ return false;
+}
+
+
+static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
+ MachineBasicBlock::iterator BundleIt, bool Before) {
+ MachineBasicBlock::instr_iterator InsertPt;
+ if (Before)
+ InsertPt = BundleIt.getInstrIterator();
+ else
+ InsertPt = std::next(BundleIt).getInstrIterator();
+
+ MachineBasicBlock &B = *MI->getParent();
+ // The instruction should at least be bundled with the preceding instruction
+ // (there will always be one, i.e. BUNDLE, if nothing else).
+ assert(MI->isBundledWithPred());
+ if (MI->isBundledWithSucc()) {
+ MI->clearFlag(MachineInstr::BundledSucc);
+ MI->clearFlag(MachineInstr::BundledPred);
+ } else {
+ // If it's not bundled with the successor (i.e. it is the last one
+ // in the bundle), then we can simply unbundle it from the predecessor,
+ // which will take care of updating the predecessor's flag.
+ MI->unbundleFromPred();
+ }
+ B.splice(InsertPt, &B, MI);
+
+ // Get the size of the bundle without asserting.
+ MachineBasicBlock::const_instr_iterator I(BundleIt);
+ MachineBasicBlock::const_instr_iterator E = B.instr_end();
+ unsigned Size = 0;
+ for (++I; I != E && I->isBundledWithPred(); ++I)
+ ++Size;
+
+ // If there are still two or more instructions, then there is nothing
+ // else to be done.
+ if (Size > 1)
+ return BundleIt;
+
+ // Otherwise, extract the single instruction out and delete the bundle.
+ MachineBasicBlock::iterator NextIt = std::next(BundleIt);
+ MachineInstr *SingleI = BundleIt->getNextNode();
+ SingleI->unbundleFromPred();
+ assert(!SingleI->isBundledWithSucc());
+ BundleIt->eraseFromParent();
+ return NextIt;
+}
+
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePacketizer)
+ return false;
+
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+ if (EnableGenAllInsnClass)
+ HII->genAllInsnTimingClasses(MF);
+
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(Fn, MLI, MBPI);
+ HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -211,162 +196,107 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
//
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock::iterator End = MBB->end();
- MachineBasicBlock::iterator MI = MBB->begin();
+ for (auto &MB : MF) {
+ auto End = MB.end();
+ auto MI = MB.begin();
while (MI != End) {
+ auto NextI = std::next(MI);
if (MI->isKill()) {
- MachineBasicBlock::iterator DeleteMI = MI;
- ++MI;
- MBB->erase(DeleteMI);
- End = MBB->end();
- continue;
+ MB.erase(MI);
+ End = MB.end();
}
- ++MI;
+ MI = NextI;
}
}
// Loop over all of the basic blocks.
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- // Find scheduling regions and schedule / packetize each region.
- unsigned RemainingCount = MBB->size();
- for(MachineBasicBlock::iterator RegionEnd = MBB->end();
- RegionEnd != MBB->begin();) {
- // The next region starts above the previous region. Look backward in the
- // instruction stream until we find the nearest boundary.
- MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingCount) {
- if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
- break;
- }
- I = MBB->begin();
-
- // Skip empty scheduling regions.
- if (I == RegionEnd) {
- RegionEnd = std::prev(RegionEnd);
- --RemainingCount;
- continue;
- }
- // Skip regions with one instruction.
- if (I == std::prev(RegionEnd)) {
- RegionEnd = std::prev(RegionEnd);
- continue;
- }
-
- Packetizer.PacketizeMIs(MBB, I, RegionEnd);
- RegionEnd = I;
+ for (auto &MB : MF) {
+ auto Begin = MB.begin(), End = MB.end();
+ while (Begin != End) {
+ // First the first non-boundary starting from the end of the last
+ // scheduling region.
+ MachineBasicBlock::iterator RB = Begin;
+ while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
+ ++RB;
+ // First the first boundary starting from the beginning of the new
+ // region.
+ MachineBasicBlock::iterator RE = RB;
+ while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
+ ++RE;
+ // Add the scheduling boundary if it's not block end.
+ if (RE != End)
+ ++RE;
+ // If RB == End, then RE == End.
+ if (RB != End)
+ Packetizer.PacketizeMIs(&MB, RB, RE);
+
+ Begin = RE;
}
}
+ Packetizer.unpacketizeSoloInstrs(MF);
return true;
}
-static bool IsIndirectCall(MachineInstr* MI) {
- return MI->getOpcode() == Hexagon::J2_callr;
+// Reserve resources for a constant extender. Trigger an assertion if the
+// reservation fails.
+void HexagonPacketizerList::reserveResourcesForConstExt() {
+ if (!tryAllocateResourcesForConstExt(true))
+ llvm_unreachable("Resources not available");
}
-// Reserve resources for constant extender. Trigure an assertion if
-// reservation fail.
-void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
-
- if (ResourceTracker->canReserveResources(PseudoMI)) {
- ResourceTracker->reserveResources(PseudoMI);
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
- } else {
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
- llvm_unreachable("can not reserve resources for constant extender.");
- }
- return;
+bool HexagonPacketizerList::canReserveResourcesForConstExt() {
+ return tryAllocateResourcesForConstExt(false);
}
-bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
- "Should only be called for constant extended instructions");
- MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
- bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
- MF.DeleteMachineInstr(PseudoMI);
- return CanReserve;
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
+// return true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
+ auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
+ bool Avail = ResourceTracker->canReserveResources(ExtMI);
+ if (Reserve && Avail)
+ ResourceTracker->reserveResources(ExtMI);
+ MF.DeleteMachineInstr(ExtMI);
+ return Avail;
}
-// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
-// true, otherwise, return false.
-bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
- if (ResourceTracker->canReserveResources(PseudoMI)) {
- ResourceTracker->reserveResources(PseudoMI);
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
+ SDep::Kind DepType, unsigned DepReg) {
+ // Check for LR dependence.
+ if (DepReg == HRI->getRARegister())
return true;
- } else {
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
- return false;
- }
-}
-
-
-bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
- SDep::Kind DepType,
- unsigned DepReg) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
-
- // Check for lr dependence
- if (DepReg == QRI->getRARegister()) {
- return true;
- }
- if (QII->isDeallocRet(MI)) {
- if (DepReg == QRI->getFrameRegister() ||
- DepReg == QRI->getStackRegister())
+ if (HII->isDeallocRet(MI))
+ if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
return true;
- }
- // Check if this is a predicate dependence
- const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
- if (RC == &Hexagon::PredRegsRegClass) {
+ // Check if this is a predicate dependence.
+ const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
+ if (RC == &Hexagon::PredRegsRegClass)
return true;
- }
- //
- // Lastly check for an operand used in an indirect call
- // If we had an attribute for checking if an instruction is an indirect call,
- // then we could have avoided this relatively brittle implementation of
- // IsIndirectCall()
- //
- // Assumes that the first operand of the CALLr is the function address
- //
- if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+ // Assumes that the first operand of the CALLr is the function address.
+ if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
MachineOperand MO = MI->getOperand(0);
- if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+ if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
return true;
- }
}
return false;
}
-static bool IsRegDependence(const SDep::Kind DepType) {
- return (DepType == SDep::Data || DepType == SDep::Anti ||
- DepType == SDep::Output);
+static bool isRegDependence(const SDep::Kind DepType) {
+ return DepType == SDep::Data || DepType == SDep::Anti ||
+ DepType == SDep::Output;
}
-static bool IsDirectJump(MachineInstr* MI) {
- return (MI->getOpcode() == Hexagon::J2_jump);
+static bool isDirectJump(const MachineInstr* MI) {
+ return MI->getOpcode() == Hexagon::J2_jump;
}
-static bool IsSchedBarrier(MachineInstr* MI) {
+static bool isSchedBarrier(const MachineInstr* MI) {
switch (MI->getOpcode()) {
case Hexagon::Y2_barrier:
return true;
@@ -374,76 +304,127 @@ static bool IsSchedBarrier(MachineInstr* MI) {
return false;
}
-static bool IsControlFlow(MachineInstr* MI) {
+static bool isControlFlow(const MachineInstr* MI) {
return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
}
-static bool IsLoopN(MachineInstr *MI) {
- return (MI->getOpcode() == Hexagon::J2_loop0i ||
- MI->getOpcode() == Hexagon::J2_loop0r);
-}
-/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
-/// callee-saved register.
-static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+/// Returns true if the instruction modifies a callee-saved register.
+static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- for (const MCPhysReg *CSR =
- TRI->getCalleeSavedRegs(MI->getParent()->getParent());
- *CSR; ++CSR) {
- unsigned CalleeSavedReg = *CSR;
- if (MI->modifiesRegister(CalleeSavedReg, TRI))
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ if (MI->modifiesRegister(*CSR, TRI))
return true;
- }
return false;
}
-// Returns true if an instruction can be promoted to .new predicate
-// or new-value store.
-bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- return isCondInst(MI) || QII->mayBeNewStore(MI);
+// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+// Returns true if an instruction can be promoted to .new predicate or
+// new-value store.
+bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
+ return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
}
-bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const MCInstrDesc& TID = MI->getDesc();
- // bug 5670: until that is fixed,
- // this portion is disabled.
- if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) ||
- || QII->isConditionalTransfer(MI)
- || QII->isConditionalALU32(MI)
- || QII->isConditionalLoad(MI)
- || QII->isConditionalStore(MI)) {
- return true;
+// Promote an instructiont to its .cur form.
+// At this time, we have already made a call to canPromoteToDotCur and made
+// sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ assert(DepType == SDep::Data);
+ int CurOpcode = HII->getDotCurOp(MI);
+ MI->setDesc(HII->get(CurOpcode));
+ return true;
+}
+
+void HexagonPacketizerList::cleanUpDotCur() {
+ MachineInstr *MI = NULL;
+ for (auto BI : CurrentPacketMIs) {
+ DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
+ if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
+ MI = BI;
+ continue;
+ }
+ if (MI) {
+ for (auto &MO : BI->operands())
+ if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
+ return;
+ }
}
- return false;
+ if (!MI)
+ return;
+ // We did not find a use of the CUR, so de-cur it.
+ MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
+ DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
}
+// Check to see if an instruction can be dot cur.
+bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass *RC) {
+ if (!HII->isV60VectorInstruction(MI))
+ return false;
+ if (!HII->isV60VectorInstruction(MII))
+ return false;
-// Promote an instructiont to its .new form.
-// At this time, we have already made a call to CanPromoteToDotNew
-// and made sure that it can *indeed* be promoted.
-bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
- SDep::Kind DepType, MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC) {
+ // Already a dot new instruction.
+ if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
+ return false;
- assert (DepType == SDep::Data);
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ if (!HII->mayBeCurLoad(MI))
+ return false;
+
+ // The "cur value" cannot come from inline asm.
+ if (PacketSU->getInstr()->isInlineAsm())
+ return false;
+
+ // Make sure candidate instruction uses cur.
+ DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
+ MI->dump();
+ dbgs() << "in packet\n";);
+ MachineInstr *MJ = MII;
+ DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
+ unsigned DestReg = MI->getOperand(0).getReg();
+ bool FoundMatch = false;
+ for (auto &MO : MJ->operands())
+ if (MO.isReg() && MO.getReg() == DestReg)
+ FoundMatch = true;
+ if (!FoundMatch)
+ return false;
+
+ // Check for existing uses of a vector register within the packet which
+ // would be affected by converting a vector load into .cur formt.
+ for (auto BI : CurrentPacketMIs) {
+ DEBUG(dbgs() << "packet has "; BI->dump(););
+ if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
+ // We can convert the opcode into a .cur.
+ return true;
+}
+// Promote an instruction to its .new form. At this time, we have already
+// made a call to canPromoteToDotNew and made sure that it can *indeed* be
+// promoted.
+bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ assert (DepType == SDep::Data);
int NewOpcode;
if (RC == &Hexagon::PredRegsRegClass)
- NewOpcode = QII->GetDotNewPredOp(MI, MBPI);
+ NewOpcode = HII->getDotNewPredOp(MI, MBPI);
else
- NewOpcode = QII->GetDotNewOp(MI);
- MI->setDesc(QII->get(NewOpcode));
-
+ NewOpcode = HII->getDotNewOp(MI);
+ MI->setDesc(HII->get(NewOpcode));
return true;
}
-bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- int NewOpcode = QII->GetDotOldOp(MI->getOpcode());
- MI->setDesc(QII->get(NewOpcode));
+bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
+ int NewOpcode = HII->getDotOldOp(MI->getOpcode());
+ MI->setDesc(HII->get(NewOpcode));
return true;
}
@@ -455,175 +436,173 @@ enum PredicateKind {
/// Returns true if an instruction is predicated on p0 and false if it's
/// predicated on !p0.
-static PredicateKind getPredicateSense(MachineInstr* MI,
- const HexagonInstrInfo *QII) {
- if (!QII->isPredicated(MI))
+static PredicateKind getPredicateSense(const MachineInstr *MI,
+ const HexagonInstrInfo *HII) {
+ if (!HII->isPredicated(MI))
return PK_Unknown;
-
- if (QII->isPredicatedTrue(MI))
+ if (HII->isPredicatedTrue(MI))
return PK_True;
-
return PK_False;
}
-static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
- const HexagonInstrInfo *QII) {
- assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
+ const HexagonInstrInfo *HII) {
+ assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
#ifndef NDEBUG
// Post Increment means duplicates. Use dense map to find duplicates in the
// list. Caution: Densemap initializes with the minimum of 64 buckets,
// whereas there are at most 5 operands in the post increment.
- DenseMap<unsigned, unsigned> DefRegsSet;
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
- if (MI->getOperand(opNum).isReg() &&
- MI->getOperand(opNum).isDef()) {
- DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
- }
-
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
- if (MI->getOperand(opNum).isReg() &&
- MI->getOperand(opNum).isUse()) {
- if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
- return MI->getOperand(opNum);
- }
- }
+ DenseSet<unsigned> DefRegsSet;
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && MO.isDef())
+ DefRegsSet.insert(MO.getReg());
+
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
+ return MO;
#else
- if (MI->getDesc().mayLoad()) {
+ if (MI->mayLoad()) {
+ const MachineOperand &Op1 = MI->getOperand(1);
// The 2nd operand is always the post increment operand in load.
- assert(MI->getOperand(1).isReg() &&
- "Post increment operand has be to a register.");
- return (MI->getOperand(1));
+ assert(Op1.isReg() && "Post increment operand has be to a register.");
+ return Op1;
}
if (MI->getDesc().mayStore()) {
+ const MachineOperand &Op0 = MI->getOperand(0);
// The 1st operand is always the post increment operand in store.
- assert(MI->getOperand(0).isReg() &&
- "Post increment operand has be to a register.");
- return (MI->getOperand(0));
+ assert(Op0.isReg() && "Post increment operand has be to a register.");
+ return Op0;
}
#endif
// we should never come here.
llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
}
-// get the value being stored
-static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+// Get the value being stored.
+static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
// value being stored is always the last operand.
- return (MI->getOperand(MI->getNumOperands()-1));
+ return MI->getOperand(MI->getNumOperands()-1);
+}
+
+static bool isLoadAbsSet(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::L4_loadrd_ap:
+ case Hexagon::L4_loadrb_ap:
+ case Hexagon::L4_loadrh_ap:
+ case Hexagon::L4_loadrub_ap:
+ case Hexagon::L4_loadruh_ap:
+ case Hexagon::L4_loadri_ap:
+ return true;
+ }
+ return false;
}
-// can be new value store?
+static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
+ assert(isLoadAbsSet(MI));
+ return MI->getOperand(1);
+}
+
+
+// Can be new value store?
// Following restrictions are to be respected in convert a store into
// a new value store.
// 1. If an instruction uses auto-increment, its address register cannot
// be a new-value register. Arch Spec 5.4.2.1
-// 2. If an instruction uses absolute-set addressing mode,
-// its address register cannot be a new-value register.
-// Arch Spec 5.4.2.1.TODO: This is not enabled as
-// as absolute-set address mode patters are not implemented.
+// 2. If an instruction uses absolute-set addressing mode, its address
+// register cannot be a new-value register. Arch Spec 5.4.2.1.
// 3. If an instruction produces a 64-bit result, its registers cannot be used
// as new-value registers. Arch Spec 5.4.2.2.
-// 4. If the instruction that sets a new-value register is conditional, then
+// 4. If the instruction that sets the new-value register is conditional, then
// the instruction that uses the new-value register must also be conditional,
// and both must always have their predicates evaluate identically.
// Arch Spec 5.4.2.3.
-// 5. There is an implied restriction of a packet can not have another store,
-// if there is a new value store in the packet. Corollary, if there is
+// 5. There is an implied restriction that a packet cannot have another store,
+// if there is a new value store in the packet. Corollary: if there is
// already a store in a packet, there can not be a new value store.
// Arch Spec: 3.4.4.2
-bool HexagonPacketizerList::CanPromoteToNewValueStore(
- MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
+ const MachineInstr *PacketMI, unsigned DepReg) {
// Make sure we are looking at the store, that can be promoted.
- if (!QII->mayBeNewStore(MI))
+ if (!HII->mayBeNewStore(MI))
return false;
- // Make sure there is dependency and can be new value'ed
- if (GetStoreValueOperand(MI).isReg() &&
- GetStoreValueOperand(MI).getReg() != DepReg)
+ // Make sure there is dependency and can be new value'd.
+ const MachineOperand &Val = getStoreValueOperand(MI);
+ if (Val.isReg() && Val.getReg() != DepReg)
return false;
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
const MCInstrDesc& MCID = PacketMI->getDesc();
- // first operand is always the result
-
- const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF);
-
- // if there is already an store in the packet, no can do new value store
- // Arch Spec 3.4.4.2.
- for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
- VE = CurrentPacketMIs.end();
- (VI != VE); ++VI) {
- SUnit *PacketSU = MIToSUnit.find(*VI)->second;
- if (PacketSU->getInstr()->getDesc().mayStore() ||
- // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
- // then we don't need this
- PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
- PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe)
- return false;
- }
- if (PacketRC == &Hexagon::DoubleRegsRegClass) {
- // new value store constraint: double regs can not feed into new value store
- // arch spec section: 5.4.2.2
+ // First operand is always the result.
+ const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
+ // Double regs can not feed into new value store: PRM section: 5.4.2.2.
+ if (PacketRC == &Hexagon::DoubleRegsRegClass)
return false;
+
+ // New-value stores are of class NV (slot 0), dual stores require class ST
+ // in slot 0 (PRM 5.5).
+ for (auto I : CurrentPacketMIs) {
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+ if (PacketSU->getInstr()->mayStore())
+ return false;
}
// Make sure it's NOT the post increment register that we are going to
// new value.
- if (QII->isPostIncrement(MI) &&
- MI->getDesc().mayStore() &&
- GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+ if (HII->isPostIncrement(MI) &&
+ getPostIncrementOperand(MI, HII).getReg() == DepReg) {
return false;
}
- if (QII->isPostIncrement(PacketMI) &&
- PacketMI->getDesc().mayLoad() &&
- GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
- // if source is post_inc, or absolute-set addressing,
- // it can not feed into new value store
- // r3 = memw(r2++#4)
- // memw(r30 + #-1404) = r2.new -> can not be new value store
- // arch spec section: 5.4.2.1
+ if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
+ getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
+ // If source is post_inc, or absolute-set addressing, it can not feed
+ // into new value store
+ // r3 = memw(r2++#4)
+ // memw(r30 + #-1404) = r2.new -> can not be new value store
+ // arch spec section: 5.4.2.1.
return false;
}
+ if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
+ return false;
+
// If the source that feeds the store is predicated, new value store must
// also be predicated.
- if (QII->isPredicated(PacketMI)) {
- if (!QII->isPredicated(MI))
+ if (HII->isPredicated(PacketMI)) {
+ if (!HII->isPredicated(MI))
return false;
// Check to make sure that they both will have their predicates
- // evaluate identically
+ // evaluate identically.
unsigned predRegNumSrc = 0;
unsigned predRegNumDst = 0;
const TargetRegisterClass* predRegClass = nullptr;
- // Get predicate register used in the source instruction
- for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
- if ( PacketMI->getOperand(opNum).isReg())
- predRegNumSrc = PacketMI->getOperand(opNum).getReg();
- predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
- if (predRegClass == &Hexagon::PredRegsRegClass) {
+ // Get predicate register used in the source instruction.
+ for (auto &MO : PacketMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ predRegNumSrc = MO.getReg();
+ predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
+ if (predRegClass == &Hexagon::PredRegsRegClass)
break;
- }
}
- assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
- ("predicate register not found in a predicated PacketMI instruction"));
-
- // Get predicate register used in new-value store instruction
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
- if ( MI->getOperand(opNum).isReg())
- predRegNumDst = MI->getOperand(opNum).getReg();
- predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
- if (predRegClass == &Hexagon::PredRegsRegClass) {
+ assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+ "predicate register not found in a predicated PacketMI instruction");
+
+ // Get predicate register used in new-value store instruction.
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ predRegNumDst = MO.getReg();
+ predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
+ if (predRegClass == &Hexagon::PredRegsRegClass)
break;
- }
}
- assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
- ("predicate register not found in a predicated MI instruction"));
+ assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+ "predicate register not found in a predicated MI instruction");
// New-value register producer and user (store) need to satisfy these
// constraints:
@@ -632,13 +611,11 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore(
// should also be .new predicated and if producer is not .new predicated
// then store should not be .new predicated.
// 3) Both new-value register producer and user should have same predicate
- // sense, i.e, either both should be negated or both should be none negated.
-
- if (( predRegNumDst != predRegNumSrc) ||
- QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) ||
- getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) {
+ // sense, i.e, either both should be negated or both should be non-negated.
+ if (predRegNumDst != predRegNumSrc ||
+ HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
+ getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
return false;
- }
}
// Make sure that other than the new-value register no other store instruction
@@ -649,81 +626,77 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore(
// including PacketMI. Howerver, we need to perform the check for the
// remaining instructions in the packet.
- std::vector<MachineInstr*>::iterator VI;
- std::vector<MachineInstr*>::iterator VE;
unsigned StartCheck = 0;
- for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
- (VI != VE); ++VI) {
- SUnit *TempSU = MIToSUnit.find(*VI)->second;
+ for (auto I : CurrentPacketMIs) {
+ SUnit *TempSU = MIToSUnit.find(I)->second;
MachineInstr* TempMI = TempSU->getInstr();
// Following condition is true for all the instructions until PacketMI is
// reached (StartCheck is set to 0 before the for loop).
// StartCheck flag is 1 for all the instructions after PacketMI.
- if (TempMI != PacketMI && !StartCheck) // start processing only after
- continue; // encountering PacketMI
+ if (TempMI != PacketMI && !StartCheck) // Start processing only after
+ continue; // encountering PacketMI.
StartCheck = 1;
- if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+ if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
continue;
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
- if (MI->getOperand(opNum).isReg() &&
- TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(),
- QRI))
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
return false;
- }
}
// Make sure that for non-POST_INC stores:
// 1. The only use of reg is DepReg and no other registers.
// This handles V4 base+index registers.
// The following store can not be dot new.
- // Eg. r0 = add(r0, #3)a
+ // Eg. r0 = add(r0, #3)
// memw(r1+r0<<#2) = r0
- if (!QII->isPostIncrement(MI) &&
- GetStoreValueOperand(MI).isReg() &&
- GetStoreValueOperand(MI).getReg() == DepReg) {
- for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
- if (MI->getOperand(opNum).isReg() &&
- MI->getOperand(opNum).getReg() == DepReg) {
- return false;
- }
- }
- // 2. If data definition is because of implicit definition of the register,
- // do not newify the store. Eg.
- // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
- // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
- for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
- if (PacketMI->getOperand(opNum).isReg() &&
- PacketMI->getOperand(opNum).getReg() == DepReg &&
- PacketMI->getOperand(opNum).isDef() &&
- PacketMI->getOperand(opNum).isImplicit()) {
+ if (!HII->isPostIncrement(MI)) {
+ for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg() && MO.getReg() == DepReg)
return false;
- }
}
}
+ // If data definition is because of implicit definition of the register,
+ // do not newify the store. Eg.
+ // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+ // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+ for (auto &MO : PacketMI->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
+ continue;
+ unsigned R = MO.getReg();
+ if (R == DepReg || HRI->isSuperRegister(DepReg, R))
+ return false;
+ }
+
+ // Handle imp-use of super reg case. There is a target independent side
+ // change that should prevent this situation but I am handling it for
+ // just-in-case. For example, we cannot newify R2 in the following case:
+ // %R3<def> = A2_tfrsi 0;
+ // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
+ return false;
+ }
+
// Can be dot new store.
return true;
}
-// can this MI to promoted to either
-// new value store or new value jump
-bool HexagonPacketizerList::CanPromoteToNewValue(
- MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if (!QII->mayBeNewStore(MI))
+// Can this MI to promoted to either new value store or new value jump.
+bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg,
+ MachineBasicBlock::iterator &MII) {
+ if (!HII->mayBeNewStore(MI))
return false;
- MachineInstr *PacketMI = PacketSU->getInstr();
-
// Check to see the store can be new value'ed.
- if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+ MachineInstr *PacketMI = PacketSU->getInstr();
+ if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
return true;
// Check to see the compare/jump can be new value'ed.
@@ -731,93 +704,110 @@ bool HexagonPacketizerList::CanPromoteToNewValue(
return false;
}
+static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
+ return true;
+ return false;
+}
+
// Check to see if an instruction can be dot new
// There are three kinds.
// 1. dot new on predicate - V2/V3/V4
// 2. dot new on stores NV/ST - V4
// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
-bool HexagonPacketizerList::CanPromoteToDotNew(
- MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
// Already a dot new instruction.
- if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI))
+ if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
return false;
if (!isNewifiable(MI))
return false;
+ const MachineInstr *PI = PacketSU->getInstr();
+
+ // The "new value" cannot come from inline asm.
+ if (PI->isInlineAsm())
+ return false;
+
+ // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
+ // sense.
+ if (PI->isImplicitDef())
+ return false;
+
+ // If dependency is trough an implicitly defined register, we should not
+ // newify the use.
+ if (isImplicitDependency(PI, DepReg))
+ return false;
+
+ const MCInstrDesc& MCID = PI->getDesc();
+ const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
+ if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
+ return false;
+
// predicate .new
- if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI))
- return true;
- else if (RC != &Hexagon::PredRegsRegClass &&
- !QII->mayBeNewStore(MI)) // MI is not a new-value store
+ // bug 5670: until that is fixed
+ // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+ if (RC == &Hexagon::PredRegsRegClass)
+ if (HII->isCondInst(MI) || MI->isReturn())
+ return HII->predCanBeUsedAsDotNew(PI, DepReg);
+
+ if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
+ return false;
+
+ // Create a dot new machine instruction to see if resources can be
+ // allocated. If not, bail out now.
+ int NewOpcode = HII->getDotNewOp(MI);
+ const MCInstrDesc &D = HII->get(NewOpcode);
+ MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
+ bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+ MF.DeleteMachineInstr(NewMI);
+ if (!ResourcesAvailable)
+ return false;
+
+ // New Value Store only. New Value Jump generated as a separate pass.
+ if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
return false;
- else {
- // Create a dot new machine instruction to see if resources can be
- // allocated. If not, bail out now.
- int NewOpcode = QII->GetDotNewOp(MI);
- const MCInstrDesc &desc = QII->get(NewOpcode);
- DebugLoc dl;
- MachineInstr *NewMI =
- MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
- bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
- MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
-
- if (!ResourcesAvailable)
- return false;
- // new value store only
- // new new value jump generated as a passes
- if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
- return false;
- }
- }
return true;
}
-// Go through the packet instructions and search for anti dependency
-// between them and DepReg from MI
-// Consider this case:
+// Go through the packet instructions and search for an anti dependency between
+// them and DepReg from MI. Consider this case:
// Trying to add
// a) %R1<def> = TFRI_cdNotPt %P3, 2
// to this packet:
// {
-// b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
-// c) %P3<def> = TFR_PdRs %R23
-// d) %R1<def> = TFRI_cdnPt %P3, 4
+// b) %P0<def> = C2_or %P3<kill>, %P0<kill>
+// c) %P3<def> = C2_tfrrp %R23
+// d) %R1<def> = C2_cmovenewit %P3, 4
// }
// The P3 from a) and d) will be complements after
// a)'s P3 is converted to .new form
-// Anti Dep between c) and b) is irrelevant for this case
-bool HexagonPacketizerList::RestrictingDepExistInPacket(
- MachineInstr *MI, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+// Anti-dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
+ unsigned DepReg) {
SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
- for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
- VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
-
+ for (auto I : CurrentPacketMIs) {
// We only care for dependencies to predicated instructions
- if(!QII->isPredicated(*VIN)) continue;
+ if (!HII->isPredicated(I))
+ continue;
// Scheduling Unit for current insn in the packet
- SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
- // Look at dependencies between current members of the packet
- // and predicate defining instruction MI.
- // Make sure that dependency is on the exact register
- // we care about.
+ // Look at dependencies between current members of the packet and
+ // predicate defining instruction MI. Make sure that dependency is
+ // on the exact register we care about.
if (PacketSU->isSucc(PacketSUDep)) {
for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
- if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
- (PacketSU->Succs[i].getKind() == SDep::Anti) &&
- (PacketSU->Succs[i].getReg() == DepReg)) {
+ auto &Dep = PacketSU->Succs[i];
+ if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
+ Dep.getReg() == DepReg)
return true;
- }
}
}
}
@@ -831,276 +821,362 @@ static unsigned getPredicatedRegister(MachineInstr *MI,
const HexagonInstrInfo *QII) {
/// We use the following rule: The first predicate register that is a use is
/// the predicate register of a predicated instruction.
-
assert(QII->isPredicated(MI) && "Must be predicated instruction");
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- MachineOperand &Op = *OI;
+ for (auto &Op : MI->operands()) {
if (Op.isReg() && Op.getReg() && Op.isUse() &&
Hexagon::PredRegsRegClass.contains(Op.getReg()))
return Op.getReg();
}
llvm_unreachable("Unknown instruction operand layout");
-
return 0;
}
// Given two predicated instructions, this function detects whether
-// the predicates are complements
-bool HexagonPacketizerList::ArePredicatesComplements(
- MachineInstr *MI1, MachineInstr *MI2,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
+// the predicates are complements.
+bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
+ MachineInstr *MI2) {
// If we don't know the predicate sense of the instructions bail out early, we
// need it later.
- if (getPredicateSense(MI1, QII) == PK_Unknown ||
- getPredicateSense(MI2, QII) == PK_Unknown)
+ if (getPredicateSense(MI1, HII) == PK_Unknown ||
+ getPredicateSense(MI2, HII) == PK_Unknown)
return false;
- // Scheduling unit for candidate
- SUnit *SU = MIToSUnit.find(MI1)->second;
+ // Scheduling unit for candidate.
+ SUnit *SU = MIToSUnit[MI1];
// One corner case deals with the following scenario:
// Trying to add
- // a) %R24<def> = TFR_cPt %P0, %R25
+ // a) %R24<def> = A2_tfrt %P0, %R25
// to this packet:
- //
// {
- // b) %R25<def> = TFR_cNotPt %P0, %R24
- // c) %P0<def> = CMPEQri %R26, 1
+ // b) %R25<def> = A2_tfrf %P0, %R24
+ // c) %P0<def> = C2_cmpeqi %R26, 1
// }
//
- // On general check a) and b) are complements, but
- // presence of c) will convert a) to .new form, and
- // then it is not a complement
- // We attempt to detect it by analyzing existing
- // dependencies in the packet
+ // On general check a) and b) are complements, but presence of c) will
+ // convert a) to .new form, and then it is not a complement.
+ // We attempt to detect it by analyzing existing dependencies in the packet.
// Analyze relationships between all existing members of the packet.
- // Look for Anti dependecy on the same predicate reg
- // as used in the candidate
- for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
- VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
-
- // Scheduling Unit for current insn in the packet
- SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
+ // Look for Anti dependecy on the same predicate reg as used in the
+ // candidate.
+ for (auto I : CurrentPacketMIs) {
+ // Scheduling Unit for current insn in the packet.
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
// If this instruction in the packet is succeeded by the candidate...
if (PacketSU->isSucc(SU)) {
for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
- // The corner case exist when there is true data
- // dependency between candidate and one of current
- // packet members, this dep is on predicate reg, and
- // there already exist anti dep on the same pred in
+ auto Dep = PacketSU->Succs[i];
+ // The corner case exist when there is true data dependency between
+ // candidate and one of current packet members, this dep is on
+ // predicate reg, and there already exist anti dep on the same pred in
// the packet.
- if (PacketSU->Succs[i].getSUnit() == SU &&
- PacketSU->Succs[i].getKind() == SDep::Data &&
- Hexagon::PredRegsRegClass.contains(
- PacketSU->Succs[i].getReg()) &&
- // Here I know that *VIN is predicate setting instruction
- // with true data dep to candidate on the register
- // we care about - c) in the above example.
- // Now I need to see if there is an anti dependency
- // from c) to any other instruction in the
- // same packet on the pred reg of interest
- RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
- MIToSUnit)) {
- return false;
+ if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
+ Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
+ // Here I know that I is predicate setting instruction with true
+ // data dep to candidate on the register we care about - c) in the
+ // above example. Now I need to see if there is an anti dependency
+ // from c) to any other instruction in the same packet on the pred
+ // reg of interest.
+ if (restrictingDepExistInPacket(I, Dep.getReg()))
+ return false;
}
}
}
}
- // If the above case does not apply, check regular
- // complement condition.
- // Check that the predicate register is the same and
- // that the predicate sense is different
- // We also need to differentiate .old vs. .new:
- // !p0 is not complimentary to p0.new
- unsigned PReg1 = getPredicatedRegister(MI1, QII);
- unsigned PReg2 = getPredicatedRegister(MI2, QII);
- return ((PReg1 == PReg2) &&
- Hexagon::PredRegsRegClass.contains(PReg1) &&
- Hexagon::PredRegsRegClass.contains(PReg2) &&
- (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) &&
- (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
+ // If the above case does not apply, check regular complement condition.
+ // Check that the predicate register is the same and that the predicate
+ // sense is different We also need to differentiate .old vs. .new: !p0
+ // is not complementary to p0.new.
+ unsigned PReg1 = getPredicatedRegister(MI1, HII);
+ unsigned PReg2 = getPredicatedRegister(MI2, HII);
+ return PReg1 == PReg2 &&
+ Hexagon::PredRegsRegClass.contains(PReg1) &&
+ Hexagon::PredRegsRegClass.contains(PReg2) &&
+ getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
+ HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
}
-// initPacketizerState - Initialize packetizer flags
+// Initialize packetizer flags.
void HexagonPacketizerList::initPacketizerState() {
-
Dependence = false;
PromotedToDotNew = false;
GlueToNewValueJump = false;
GlueAllocframeStore = false;
FoundSequentialDependence = false;
-
- return;
}
-// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+// Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock*) {
if (MI->isDebugValue())
return true;
if (MI->isCFIInstruction())
return false;
- // We must print out inline assembly
+ // We must print out inline assembly.
if (MI->isInlineAsm())
return false;
- // We check if MI has any functional units mapped to it.
- // If it doesn't, we ignore the instruction.
+ if (MI->isImplicitDef())
+ return false;
+
+ // We check if MI has any functional units mapped to it. If it doesn't,
+ // we ignore the instruction.
const MCInstrDesc& TID = MI->getDesc();
- unsigned SchedClass = TID.getSchedClass();
- const InstrStage* IS =
- ResourceTracker->getInstrItins()->beginStage(SchedClass);
+ auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
unsigned FuncUnits = IS->getUnits();
return !FuncUnits;
}
-// isSoloInstruction: - Returns true for instructions that must be
-// scheduled in their own packet.
-bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
if (MI->isEHLabel() || MI->isCFIInstruction())
return true;
- if (MI->isInlineAsm())
+ // Consider inline asm to not be a solo instruction by default.
+ // Inline asm will be put in a packet temporarily, but then it will be
+ // removed, and placed outside of the packet (before or after, depending
+ // on dependencies). This is to reduce the impact of inline asm as a
+ // "packet splitting" instruction.
+ if (MI->isInlineAsm() && !ScheduleInlineAsm)
return true;
// From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
// trap, pause, barrier, icinva, isync, and syncht are solo instructions.
// They must not be grouped with other instructions in a packet.
- if (IsSchedBarrier(MI))
+ if (isSchedBarrier(MI))
+ return true;
+
+ if (HII->isSolo(MI))
+ return true;
+
+ if (MI->getOpcode() == Hexagon::A2_nop)
return true;
return false;
}
-// isLegalToPacketizeTogether:
-// SUI is the current instruction that is out side of the current packet.
-// SUJ is the current instruction inside the current packet against which that
-// SUI will be packetized.
-bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
- MachineInstr *I = SUI->getInstr();
- MachineInstr *J = SUJ->getInstr();
- assert(I && J && "Unable to packetize null instruction!");
- const MCInstrDesc &MCIDI = I->getDesc();
- const MCInstrDesc &MCIDJ = J->getDesc();
+// Quick check if instructions MI and MJ cannot coexist in the same packet.
+// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm",
+// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
+// For full test call this function twice:
+// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
+// Doing the test only one way saves the amount of code in this function,
+// since every test would need to be repeated with the MI and MJ reversed.
+static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
+ const HexagonInstrInfo &HII) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
+ HII.isHVXMemWithAIndirect(MI, MJ))
+ return true;
- MachineBasicBlock::iterator II = I;
+ // An inline asm cannot be together with a branch, because we may not be
+ // able to remove the asm out after packetizing (i.e. if the asm must be
+ // moved past the bundle). Similarly, two asms cannot be together to avoid
+ // complications when determining their relative order outside of a bundle.
+ if (MI->isInlineAsm())
+ return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
+ MJ->isCall() || MJ->isTerminator();
- const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // "False" really means that the quick check failed to determine if
+ // I and J cannot coexist.
+ return false;
+}
- // Inline asm cannot go in the packet.
- if (I->getOpcode() == Hexagon::INLINEASM)
- llvm_unreachable("Should not meet inline asm here!");
- if (isSoloInstruction(I))
- llvm_unreachable("Should not meet solo instr here!");
+// Full, symmetric check.
+bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
+ const MachineInstr *MJ) {
+ return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
+}
- // A save callee-save register function call can only be in a packet
- // with instructions that don't write to the callee-save registers.
- if ((QII->isSaveCalleeSavedRegsCall(I) &&
- DoesModifyCalleeSavedReg(J, QRI)) ||
- (QII->isSaveCalleeSavedRegsCall(J) &&
- DoesModifyCalleeSavedReg(I, QRI))) {
- Dependence = true;
- return false;
+void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
+ for (auto &B : MF) {
+ MachineBasicBlock::iterator BundleIt;
+ MachineBasicBlock::instr_iterator NextI;
+ for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr *MI = &*I;
+ if (MI->isBundle())
+ BundleIt = I;
+ if (!MI->isInsideBundle())
+ continue;
+
+ // Decide on where to insert the instruction that we are pulling out.
+ // Debug instructions always go before the bundle, but the placement of
+ // INLINE_ASM depends on potential dependencies. By default, try to
+ // put it before the bundle, but if the asm writes to a register that
+ // other instructions in the bundle read, then we need to place it
+ // after the bundle (to preserve the bundle semantics).
+ bool InsertBeforeBundle;
+ if (MI->isInlineAsm())
+ InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
+ else if (MI->isDebugValue())
+ InsertBeforeBundle = true;
+ else
+ continue;
+
+ BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
+ }
}
+}
- // Two control flow instructions cannot go in the same packet.
- if (IsControlFlow(I) && IsControlFlow(J)) {
- Dependence = true;
- return false;
+// Check if a given instruction is of class "system".
+static bool isSystemInstr(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::Y2_barrier:
+ case Hexagon::Y2_dcfetchbo:
+ return true;
}
+ return false;
+}
- // A LoopN instruction cannot appear in the same packet as a jump or call.
- if (IsLoopN(I) &&
- (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) {
- Dependence = true;
+bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ // The dependence graph may not include edges between dead definitions,
+ // so without extra checks, we could end up packetizing two instruction
+ // defining the same (dead) register.
+ if (I->isCall() || J->isCall())
return false;
- }
- if (IsLoopN(J) &&
- (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) {
- Dependence = true;
+ if (HII->isPredicated(I) || HII->isPredicated(J))
return false;
+
+ BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
+ for (auto &MO : I->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+ continue;
+ DeadDefs[MO.getReg()] = true;
}
+ for (auto &MO : J->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+ continue;
+ unsigned R = MO.getReg();
+ if (R != Hexagon::USR_OVF && DeadDefs[R])
+ return true;
+ }
+ return false;
+}
+
+bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ // A save callee-save register function call can only be in a packet
+ // with instructions that don't write to the callee-save registers.
+ if ((HII->isSaveCalleeSavedRegsCall(I) &&
+ doesModifyCalleeSavedReg(J, HRI)) ||
+ (HII->isSaveCalleeSavedRegsCall(J) &&
+ doesModifyCalleeSavedReg(I, HRI)))
+ return true;
+
+ // Two control flow instructions cannot go in the same packet.
+ if (isControlFlow(I) && isControlFlow(J))
+ return true;
+
+ // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
+ // contain a speculative indirect jump,
+ // a new-value compare jump or a dealloc_return.
+ auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
+ if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
+ return true;
+ if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
+ return true;
+ return false;
+ };
+
+ if (HII->isLoopN(I) && isBadForLoopN(J))
+ return true;
+ if (HII->isLoopN(J) && isBadForLoopN(I))
+ return true;
+
// dealloc_return cannot appear in the same packet as a conditional or
// unconditional jump.
- if (QII->isDeallocRet(I) &&
- (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) {
- Dependence = true;
- return false;
+ return HII->isDeallocRet(I) &&
+ (J->isBranch() || J->isCall() || J->isBarrier());
+}
+
+bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
+ bool StoreI = I->mayStore(), StoreJ = J->mayStore();
+ if ((SysI && StoreJ) || (SysJ && StoreI))
+ return true;
+
+ if (StoreI && StoreJ) {
+ if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
+ return true;
+ } else {
+ // A memop cannot be in the same packet with another memop or a store.
+ // Two stores can be together, but here I and J cannot both be stores.
+ bool MopStI = HII->isMemOp(I) || StoreI;
+ bool MopStJ = HII->isMemOp(J) || StoreJ;
+ if (MopStI && MopStJ)
+ return true;
}
+ return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
+}
- // V4 allows dual store. But does not allow second store, if the
- // first store is not in SLOT0. New value store, new value jump,
- // dealloc_return and memop always take SLOT0.
- // Arch spec 3.4.4.2
- if (MCIDI.mayStore() && MCIDJ.mayStore() &&
- (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
- Dependence = true;
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ // Clear IgnoreDepMIs when Packet starts.
+ if (CurrentPacketMIs.size() == 1)
+ IgnoreDepMIs.clear();
+
+ MachineBasicBlock::iterator II = I;
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+ // Solo instructions cannot go in the packet.
+ assert(!isSoloInstruction(I) && "Unexpected solo instr!");
+
+ if (cannotCoexist(I, J))
return false;
- }
- if ((QII->isMemOp(J) && MCIDI.mayStore())
- || (MCIDJ.mayStore() && QII->isMemOp(I))
- || (QII->isMemOp(J) && QII->isMemOp(I))) {
- Dependence = true;
+ Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
+ if (Dependence)
return false;
- }
- //if dealloc_return
- if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
- Dependence = true;
+ // V4 allows dual stores. It does not allow second store, if the first
+ // store is not in SLOT0. New value store, new value jump, dealloc_return
+ // and memop always take SLOT0. Arch spec 3.4.4.2.
+ Dependence = hasV4SpecificDependence(I, J);
+ if (Dependence)
return false;
- }
// If an instruction feeds new value jump, glue it.
MachineBasicBlock::iterator NextMII = I;
++NextMII;
- if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+ if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
MachineInstr *NextMI = NextMII;
bool secondRegMatch = false;
- bool maintainNewValueJump = false;
+ const MachineOperand &NOp0 = NextMI->getOperand(0);
+ const MachineOperand &NOp1 = NextMI->getOperand(1);
- if (NextMI->getOperand(1).isReg() &&
- I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+ if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
secondRegMatch = true;
- maintainNewValueJump = true;
- }
-
- if (!secondRegMatch &&
- I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
- maintainNewValueJump = true;
- }
- for (std::vector<MachineInstr*>::iterator
- VI = CurrentPacketMIs.begin(),
- VE = CurrentPacketMIs.end();
- (VI != VE && maintainNewValueJump); ++VI) {
- SUnit *PacketSU = MIToSUnit.find(*VI)->second;
-
- // NVJ can not be part of the dual jump - Arch Spec: section 7.8
- if (PacketSU->getInstr()->getDesc().isCall()) {
+ for (auto I : CurrentPacketMIs) {
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+ MachineInstr *PI = PacketSU->getInstr();
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
+ if (PI->isCall()) {
Dependence = true;
break;
}
- // Validate
+ // Validate:
// 1. Packet does not have a store in it.
// 2. If the first operand of the nvj is newified, and the second
// operand is also a reg, it (second reg) is not defined in
@@ -1108,302 +1184,413 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// 3. If the second operand of the nvj is newified, (which means
// first operand is also a reg), first reg is not defined in
// the same packet.
- if (PacketSU->getInstr()->getDesc().mayStore() ||
- PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
- // Check #2.
- (!secondRegMatch && NextMI->getOperand(1).isReg() &&
- PacketSU->getInstr()->modifiesRegister(
- NextMI->getOperand(1).getReg(), QRI)) ||
- // Check #3.
- (secondRegMatch &&
- PacketSU->getInstr()->modifiesRegister(
- NextMI->getOperand(0).getReg(), QRI))) {
+ if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
+ HII->isLoopN(PI)) {
+ Dependence = true;
+ break;
+ }
+ // Check #2/#3.
+ const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
+ if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
Dependence = true;
break;
}
}
- if (!Dependence)
- GlueToNewValueJump = true;
- else
+
+ if (Dependence)
return false;
+ GlueToNewValueJump = true;
}
- if (SUJ->isSucc(SUI)) {
- for (unsigned i = 0;
- (i < SUJ->Succs.size()) && !FoundSequentialDependence;
- ++i) {
+ // There no dependency between a prolog instruction and its successor.
+ if (!SUJ->isSucc(SUI))
+ return true;
- if (SUJ->Succs[i].getSUnit() != SUI) {
- continue;
- }
+ for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
+ if (FoundSequentialDependence)
+ break;
- SDep::Kind DepType = SUJ->Succs[i].getKind();
+ if (SUJ->Succs[i].getSUnit() != SUI)
+ continue;
- // For direct calls:
- // Ignore register dependences for call instructions for
- // packetization purposes except for those due to r31 and
- // predicate registers.
- //
- // For indirect calls:
- // Same as direct calls + check for true dependences to the register
- // used in the indirect call.
- //
- // We completely ignore Order dependences for call instructions
- //
- // For returns:
- // Ignore register dependences for return instructions like jumpr,
- // dealloc return unless we have dependencies on the explicit uses
- // of the registers used by jumpr (like r31) or dealloc return
- // (like r29 or r30).
- //
- // TODO: Currently, jumpr is handling only return of r31. So, the
- // following logic (specificaly IsCallDependent) is working fine.
- // We need to enable jumpr for register other than r31 and then,
- // we need to rework the last part, where it handles indirect call
- // of that (IsCallDependent) function. Bug 6216 is opened for this.
- //
- unsigned DepReg = 0;
- const TargetRegisterClass* RC = nullptr;
- if (DepType == SDep::Data) {
- DepReg = SUJ->Succs[i].getReg();
- RC = QRI->getMinimalPhysRegClass(DepReg);
- }
- if ((MCIDI.isCall() || MCIDI.isReturn()) &&
- (!IsRegDependence(DepType) ||
- !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
- /* do nothing */
- }
+ SDep::Kind DepType = SUJ->Succs[i].getKind();
+ // For direct calls:
+ // Ignore register dependences for call instructions for packetization
+ // purposes except for those due to r31 and predicate registers.
+ //
+ // For indirect calls:
+ // Same as direct calls + check for true dependences to the register
+ // used in the indirect call.
+ //
+ // We completely ignore Order dependences for call instructions.
+ //
+ // For returns:
+ // Ignore register dependences for return instructions like jumpr,
+ // dealloc return unless we have dependencies on the explicit uses
+ // of the registers used by jumpr (like r31) or dealloc return
+ // (like r29 or r30).
+ //
+ // TODO: Currently, jumpr is handling only return of r31. So, the
+ // following logic (specificaly isCallDependent) is working fine.
+ // We need to enable jumpr for register other than r31 and then,
+ // we need to rework the last part, where it handles indirect call
+ // of that (isCallDependent) function. Bug 6216 is opened for this.
+ unsigned DepReg = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (DepType == SDep::Data) {
+ DepReg = SUJ->Succs[i].getReg();
+ RC = HRI->getMinimalPhysRegClass(DepReg);
+ }
- // For instructions that can be promoted to dot-new, try to promote.
- else if ((DepType == SDep::Data) &&
- CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
- PromoteToDotNew(I, DepType, II, RC)) {
- PromotedToDotNew = true;
- /* do nothing */
- }
+ if (I->isCall() || I->isReturn()) {
+ if (!isRegDependence(DepType))
+ continue;
+ if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
+ continue;
+ }
- else if ((DepType == SDep::Data) &&
- (QII->isNewValueJump(I))) {
- /* do nothing */
- }
+ if (DepType == SDep::Data) {
+ if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
+ if (promoteToDotCur(J, DepType, II, RC))
+ continue;
+ }
- // For predicated instructions, if the predicates are complements
- // then there can be no dependence.
- else if (QII->isPredicated(I) &&
- QII->isPredicated(J) &&
- ArePredicatesComplements(I, J, MIToSUnit)) {
- /* do nothing */
+ // Data dpendence ok if we have load.cur.
+ if (DepType == SDep::Data && HII->isDotCurInst(J)) {
+ if (HII->isV60VectorInstruction(I))
+ continue;
+ }
+ // For instructions that can be promoted to dot-new, try to promote.
+ if (DepType == SDep::Data) {
+ if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
+ if (promoteToDotNew(I, DepType, II, RC)) {
+ PromotedToDotNew = true;
+ continue;
+ }
}
- else if (IsDirectJump(I) &&
- !MCIDJ.isBranch() &&
- !MCIDJ.isCall() &&
- (DepType == SDep::Order)) {
- // Ignore Order dependences between unconditional direct branches
- // and non-control-flow instructions
- /* do nothing */
- }
- else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
- (DepType != SDep::Output)) {
- // Ignore all dependences for jumps except for true and output
- // dependences
- /* do nothing */
- }
-
- // Ignore output dependences due to superregs. We can
- // write to two different subregisters of R1:0 for instance
- // in the same cycle
- //
+ if (HII->isNewValueJump(I))
+ continue;
+ }
+ // For predicated instructions, if the predicates are complements then
+ // there can be no dependence.
+ if (HII->isPredicated(I) && HII->isPredicated(J) &&
+ arePredicatesComplements(I, J)) {
+ // Not always safe to do this translation.
+ // DAG Builder attempts to reduce dependence edges using transitive
+ // nature of dependencies. Here is an example:
//
- // Let the
- // If neither I nor J defines DepReg, then this is a
- // superfluous output dependence. The dependence must be of the
- // form:
- // R0 = ...
- // R1 = ...
- // and there is an output dependence between the two instructions
- // with
- // DepReg = D0
- // We want to ignore these dependences.
- // Ideally, the dependence constructor should annotate such
- // dependences. We can then avoid this relatively expensive check.
+ // r0 = tfr_pt ... (1)
+ // r0 = tfr_pf ... (2)
+ // r0 = tfr_pt ... (3)
//
- else if (DepType == SDep::Output) {
- // DepReg is the register that's responsible for the dependence.
- unsigned DepReg = SUJ->Succs[i].getReg();
+ // There will be an output dependence between (1)->(2) and (2)->(3).
+ // However, there is no dependence edge between (1)->(3). This results
+ // in all 3 instructions going in the same packet. We ignore dependce
+ // only once to avoid this situation.
+ auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
+ if (Itr != IgnoreDepMIs.end()) {
+ Dependence = true;
+ return false;
+ }
+ IgnoreDepMIs.push_back(I);
+ continue;
+ }
+
+ // Ignore Order dependences between unconditional direct branches
+ // and non-control-flow instructions.
+ if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
+ DepType == SDep::Order)
+ continue;
+
+ // Ignore all dependences for jumps except for true and output
+ // dependences.
+ if (I->isConditionalBranch() && DepType != SDep::Data &&
+ DepType != SDep::Output)
+ continue;
+
+ // Ignore output dependences due to superregs. We can write to two
+ // different subregisters of R1:0 for instance in the same cycle.
+
+ // If neither I nor J defines DepReg, then this is a superfluous output
+ // dependence. The dependence must be of the form:
+ // R0 = ...
+ // R1 = ...
+ // and there is an output dependence between the two instructions with
+ // DepReg = D0.
+ // We want to ignore these dependences. Ideally, the dependence
+ // constructor should annotate such dependences. We can then avoid this
+ // relatively expensive check.
+ //
+ if (DepType == SDep::Output) {
+ // DepReg is the register that's responsible for the dependence.
+ unsigned DepReg = SUJ->Succs[i].getReg();
+
+ // Check if I and J really defines DepReg.
+ if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
+ continue;
+ FoundSequentialDependence = true;
+ break;
+ }
- // Check if I and J really defines DepReg.
- if (I->definesRegister(DepReg) ||
- J->definesRegister(DepReg)) {
+ // For Order dependences:
+ // 1. On V4 or later, volatile loads/stores can be packetized together,
+ // unless other rules prevent is.
+ // 2. Store followed by a load is not allowed.
+ // 3. Store followed by a store is only valid on V4 or later.
+ // 4. Load followed by any memory operation is allowed.
+ if (DepType == SDep::Order) {
+ if (!PacketizeVolatiles) {
+ bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
+ if (OrdRefs) {
FoundSequentialDependence = true;
break;
}
}
-
- // We ignore Order dependences for
- // 1. Two loads unless they are volatile.
- // 2. Two stores in V4 unless they are volatile.
- else if ((DepType == SDep::Order) &&
- !I->hasOrderedMemoryRef() &&
- !J->hasOrderedMemoryRef()) {
- if (MCIDI.mayStore() && MCIDJ.mayStore()) {
- /* do nothing */
- }
- // store followed by store-- not OK on V2
- // store followed by load -- not OK on all (OK if addresses
- // are not aliased)
- // load followed by store -- OK on all
- // load followed by load -- OK on all
- else if ( !MCIDJ.mayStore()) {
- /* do nothing */
- }
- else {
+ // J is first, I is second.
+ bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
+ bool LoadI = I->mayLoad(), StoreI = I->mayStore();
+ if (StoreJ) {
+ // Two stores are only allowed on V4+. Load following store is never
+ // allowed.
+ if (LoadI) {
FoundSequentialDependence = true;
break;
}
- }
-
- // For V4, special case ALLOCFRAME. Even though there is dependency
- // between ALLOCFRAME and subsequent store, allow it to be
- // packetized in a same packet. This implies that the store is using
- // caller's SP. Hence, offset needs to be updated accordingly.
- else if (DepType == SDep::Data
- && J->getOpcode() == Hexagon::S2_allocframe
- && (I->getOpcode() == Hexagon::S2_storerd_io
- || I->getOpcode() == Hexagon::S2_storeri_io
- || I->getOpcode() == Hexagon::S2_storerb_io)
- && I->getOperand(0).getReg() == QRI->getStackRegister()
- && QII->isValidOffset(I->getOpcode(),
- I->getOperand(1).getImm() -
- (FrameSize + HEXAGON_LRFP_SIZE)))
- {
- GlueAllocframeStore = true;
- // Since this store is to be glued with allocframe in the same
- // packet, it will use SP of the previous stack frame, i.e
- // caller's SP. Therefore, we need to recalculate offset according
- // to this change.
- I->getOperand(1).setImm(I->getOperand(1).getImm() -
- (FrameSize + HEXAGON_LRFP_SIZE));
- }
-
- //
- // Skip over anti-dependences. Two instructions that are
- // anti-dependent can share a packet
- //
- else if (DepType != SDep::Anti) {
+ } else if (!LoadJ || (!LoadI && !StoreI)) {
+ // If J is neither load nor store, assume a dependency.
+ // If J is a load, but I is neither, also assume a dependency.
FoundSequentialDependence = true;
break;
}
+ // Store followed by store: not OK on V2.
+ // Store followed by load: not OK on all.
+ // Load followed by store: OK on all.
+ // Load followed by load: OK on all.
+ continue;
}
- if (FoundSequentialDependence) {
- Dependence = true;
- return false;
+ // For V4, special case ALLOCFRAME. Even though there is dependency
+ // between ALLOCFRAME and subsequent store, allow it to be packetized
+ // in a same packet. This implies that the store is using the caller's
+ // SP. Hence, offset needs to be updated accordingly.
+ if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
+ if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
+ int64_t Imm = I->getOperand(1).getImm();
+ int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
+ if (HII->isValidOffset(Opc, NewOff)) {
+ GlueAllocframeStore = true;
+ // Since this store is to be glued with allocframe in the same
+ // packet, it will use SP of the previous stack frame, i.e.
+ // caller's SP. Therefore, we need to recalculate offset
+ // according to this change.
+ I->getOperand(1).setImm(NewOff);
+ continue;
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ // Skip over anti-dependences. Two instructions that are anti-dependent
+ // can share a packet.
+ if (DepType != SDep::Anti) {
+ FoundSequentialDependence = true;
+ break;
}
}
+ if (FoundSequentialDependence) {
+ Dependence = true;
+ return false;
+ }
+
return true;
}
-// isLegalToPruneDependencies
bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
MachineInstr *I = SUI->getInstr();
- assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
-
- const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
-
- if (Dependence) {
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
- // Check if the instruction was promoted to a dot-new. If so, demote it
- // back into a dot-old.
- if (PromotedToDotNew) {
- DemoteToDotOld(I);
- }
+ if (cannotCoexist(I, J))
+ return false;
- // Check if the instruction (must be a store) was glued with an Allocframe
- // instruction. If so, restore its offset to its original value, i.e. use
- // curent SP instead of caller's SP.
- if (GlueAllocframeStore) {
- I->getOperand(1).setImm(I->getOperand(1).getImm() +
- FrameSize + HEXAGON_LRFP_SIZE);
- }
+ if (!Dependence)
+ return true;
- return false;
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old.
+ if (PromotedToDotNew)
+ demoteToDotOld(I);
+
+ cleanUpDotCur();
+ // Check if the instruction (must be a store) was glued with an allocframe
+ // instruction. If so, restore its offset to its original value, i.e. use
+ // current SP instead of caller's SP.
+ if (GlueAllocframeStore) {
+ unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+ MachineOperand &MOff = I->getOperand(1);
+ MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
}
- return true;
+ return false;
}
+
MachineBasicBlock::iterator
HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (MI->isImplicitDef()) {
+ unsigned R = MI->getOperand(0).getReg();
+ if (Hexagon::IntRegsRegClass.contains(R)) {
+ MCSuperRegIterator S(R, HRI, false);
+ MI->addOperand(MachineOperand::CreateReg(*S, true, true));
+ }
+ return MII;
+ }
+ assert(ResourceTracker->canReserveResources(MI));
+
+ bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
+ bool Good = true;
+
+ if (GlueToNewValueJump) {
+ MachineInstr *NvjMI = ++MII;
+ // We need to put both instructions in the same packet: MI and NvjMI.
+ // Either of them can require a constant extender. Try to add both to
+ // the current packet, and if that fails, end the packet and start a
+ // new one.
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI)
+ Good = tryAllocateResourcesForConstExt(true);
+
+ bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
+ if (Good) {
+ if (ResourceTracker->canReserveResources(NvjMI))
+ ResourceTracker->reserveResources(NvjMI);
+ else
+ Good = false;
+ }
+ if (Good && ExtNvjMI)
+ Good = tryAllocateResourcesForConstExt(true);
- MachineBasicBlock::iterator MII = MI;
- MachineBasicBlock *MBB = MI->getParent();
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
- if (GlueToNewValueJump) {
-
- ++MII;
- MachineInstr *nvjMI = MII;
+ if (!Good) {
+ endPacket(MBB, MI);
assert(ResourceTracker->canReserveResources(MI));
ResourceTracker->reserveResources(MI);
- if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
- !tryAllocateResourcesForConstExt(MI)) {
- endPacket(MBB, MI);
- ResourceTracker->reserveResources(MI);
- assert(canReserveResourcesForConstExt(MI) &&
- "Ensure that there is a slot");
- reserveResourcesForConstExt(MI);
- // Reserve resources for new value jump constant extender.
- assert(canReserveResourcesForConstExt(MI) &&
- "Ensure that there is a slot");
- reserveResourcesForConstExt(nvjMI);
- assert(ResourceTracker->canReserveResources(nvjMI) &&
- "Ensure that there is a slot");
-
- } else if ( // Extended instruction takes two slots in the packet.
- // Try reserve and allocate 4-byte in the current packet first.
- (QII->isExtended(nvjMI)
- && (!tryAllocateResourcesForConstExt(nvjMI)
- || !ResourceTracker->canReserveResources(nvjMI)))
- || // For non-extended instruction, no need to allocate extra 4 bytes.
- (!QII->isExtended(nvjMI) &&
- !ResourceTracker->canReserveResources(nvjMI)))
- {
- endPacket(MBB, MI);
- // A new and empty packet starts.
- // We are sure that the resources requirements can be satisfied.
- // Therefore, do not need to call "canReserveResources" anymore.
- ResourceTracker->reserveResources(MI);
- if (QII->isExtended(nvjMI))
- reserveResourcesForConstExt(nvjMI);
+ if (ExtMI) {
+ assert(canReserveResourcesForConstExt());
+ tryAllocateResourcesForConstExt(true);
}
- // Here, we are sure that "reserveResources" would succeed.
- ResourceTracker->reserveResources(nvjMI);
- CurrentPacketMIs.push_back(MI);
- CurrentPacketMIs.push_back(nvjMI);
- } else {
- if ( (QII->isExtended(MI) || QII->isConstExtended(MI))
- && ( !tryAllocateResourcesForConstExt(MI)
- || !ResourceTracker->canReserveResources(MI)))
- {
- endPacket(MBB, MI);
- // Check if the instruction was promoted to a dot-new. If so, demote it
- // back into a dot-old
- if (PromotedToDotNew) {
- DemoteToDotOld(MI);
- }
- reserveResourcesForConstExt(MI);
+ assert(ResourceTracker->canReserveResources(NvjMI));
+ ResourceTracker->reserveResources(NvjMI);
+ if (ExtNvjMI) {
+ assert(canReserveResourcesForConstExt());
+ reserveResourcesForConstExt();
}
- // In case that "MI" is not an extended insn,
- // the resource availability has already been checked.
- ResourceTracker->reserveResources(MI);
- CurrentPacketMIs.push_back(MI);
}
+ CurrentPacketMIs.push_back(MI);
+ CurrentPacketMIs.push_back(NvjMI);
return MII;
+ }
+
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
+ endPacket(MBB, MI);
+ if (PromotedToDotNew)
+ demoteToDotOld(MI);
+ ResourceTracker->reserveResources(MI);
+ reserveResourcesForConstExt();
+ }
+
+ CurrentPacketMIs.push_back(MI);
+ return MII;
+}
+
+void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ OldPacketMIs = CurrentPacketMIs;
+ VLIWPacketizerList::endPacket(MBB, MI);
+}
+
+bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
+ return !producesStall(MI);
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+static bool isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) {
+ if (!ProdMI->getOperand(0).isReg())
+ return false;
+ unsigned DstReg = ProdMI->getOperand(0).getReg();
+
+ for (auto &Op : ConsMI->operands())
+ if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
+ // The MIs depend on each other.
+ return true;
+
+ return false;
}
+// V60 forward scheduling.
+bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
+ // Check whether the previous packet is in a different loop. If this is the
+ // case, there is little point in trying to avoid a stall because that would
+ // favor the rare case (loop entry) over the common case (loop iteration).
+ //
+ // TODO: We should really be able to check all the incoming edges if this is
+ // the first packet in a basic block, so we can avoid stalls from the loop
+ // backedge.
+ if (!OldPacketMIs.empty()) {
+ auto *OldBB = OldPacketMIs.front()->getParent();
+ auto *ThisBB = I->getParent();
+ if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
+ return false;
+ }
+
+ // Check for stall between two vector instructions.
+ if (HII->isV60VectorInstruction(I)) {
+ for (auto J : OldPacketMIs) {
+ if (!HII->isV60VectorInstruction(J))
+ continue;
+ if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
+ return true;
+ }
+ return false;
+ }
+
+ // Check for stall between two scalar instructions. First, check that
+ // there is no definition of a use in the current packet, because it
+ // may be a candidate for .new.
+ for (auto J : CurrentPacketMIs)
+ if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
+ return false;
+
+ // Check for stall between I and instructions in the previous packet.
+ if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
+ for (auto J : OldPacketMIs) {
+ if (HII->isV60VectorInstruction(J))
+ continue;
+ if (!HII->isLateInstrFeedsEarlyInstr(J, I))
+ continue;
+ if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
new file mode 100644
index 0000000..960cf6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -0,0 +1,114 @@
+#ifndef HEXAGONVLIWPACKETIZER_H
+#define HEXAGONVLIWPACKETIZER_H
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+namespace llvm {
+class HexagonPacketizerList : public VLIWPacketizerList {
+ // Vector of instructions assigned to the packet that has just been created.
+ std::vector<MachineInstr*> OldPacketMIs;
+
+ // Has the instruction been promoted to a dot-new instruction.
+ bool PromotedToDotNew;
+
+ // Has the instruction been glued to allocframe.
+ bool GlueAllocframeStore;
+
+ // Has the feeder instruction been glued to new value jump.
+ bool GlueToNewValueJump;
+
+ // Check if there is a dependence between some instruction already in this
+ // packet and this instruction.
+ bool Dependence;
+
+ // Only check for dependence if there are resources available to
+ // schedule this instruction.
+ bool FoundSequentialDependence;
+
+ // Track MIs with ignored dependence.
+ std::vector<MachineInstr*> IgnoreDepMIs;
+
+protected:
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+ const MachineLoopInfo *MLI;
+
+private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+public:
+ // Ctor.
+ HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ AliasAnalysis *AA,
+ const MachineBranchProbabilityInfo *MBPI);
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState() override;
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock *MBB) override;
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(const MachineInstr *MI) override;
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
+ void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override;
+ bool shouldAddToPacket(const MachineInstr *MI) override;
+
+ void unpacketizeSoloInstrs(MachineFunction &MF);
+
+protected:
+ bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType,
+ unsigned DepReg);
+ bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ void cleanUpDotCur();
+
+ bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII);
+ bool canPromoteToNewValueStore(const MachineInstr* MI,
+ const MachineInstr* PacketMI, unsigned DepReg);
+ bool demoteToDotOld(MachineInstr* MI);
+ bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2);
+ bool restrictingDepExistInPacket(MachineInstr*, unsigned);
+ bool isNewifiable(const MachineInstr *MI);
+ bool isCurifiable(MachineInstr* MI);
+ bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ);
+ inline bool isPromotedToDotNew() const {
+ return PromotedToDotNew;
+ }
+ bool tryAllocateResourcesForConstExt(bool Reserve);
+ bool canReserveResourcesForConstExt();
+ void reserveResourcesForConstExt();
+ bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J);
+ bool hasControlDependence(const MachineInstr *I, const MachineInstr *J);
+ bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J);
+ bool producesStall(const MachineInstr *MI);
+};
+} // namespace llvm
+#endif // HEXAGONVLIWPACKETIZER_H
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 99ea2fa..b73af82 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -13,7 +13,9 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -33,14 +35,28 @@ class HexagonAsmBackend : public MCAsmBackend {
mutable uint64_t relaxedCnt;
std::unique_ptr <MCInstrInfo> MCII;
std::unique_ptr <MCInst *> RelaxTarget;
+ MCInst * Extender;
public:
HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) :
- OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){}
+ OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+ Extender(nullptr) {}
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createHexagonELFObjectWriter(OS, OSABI, CPU);
}
+ void setExtender(MCContext &Context) const {
+ if (Extender == nullptr)
+ const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst;
+ }
+
+ MCInst *takeExtender() const {
+ assert(Extender != nullptr);
+ MCInst * Result = Extender;
+ const_cast<HexagonAsmBackend *>(this)->Extender = nullptr;
+ return Result;
+ }
+
unsigned getNumFixupKinds() const override {
return Hexagon::NumTargetFixupKinds;
}
@@ -222,6 +238,7 @@ public:
if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
++relaxedCnt;
*RelaxTarget = &MCI;
+ setExtender(Layout.getAssembler().getContext());
return true;
} else {
return false;
@@ -262,6 +279,7 @@ public:
if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
++relaxedCnt;
*RelaxTarget = &MCI;
+ setExtender(Layout.getAssembler().getContext());
return true;
}
}
@@ -276,9 +294,35 @@ public:
llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
}
- void relaxInstruction(MCInst const & /*Inst*/,
- MCInst & /*Res*/) const override {
- llvm_unreachable("relaxInstruction() unimplemented");
+ void relaxInstruction(MCInst const & Inst,
+ MCInst & Res) const override {
+ assert(HexagonMCInstrInfo::isBundle(Inst) &&
+ "Hexagon relaxInstruction only works on bundles");
+
+ Res = HexagonMCInstrInfo::createBundle();
+ // Copy the results into the bundle.
+ bool Update = false;
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+ MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst());
+
+ // if immediate extender needed, add it in
+ if (*RelaxTarget == &CrntHMI) {
+ Update = true;
+ assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) &&
+ "No room to insert extender for relaxation");
+
+ MCInst *HMIx = takeExtender();
+ *HMIx = HexagonMCInstrInfo::deriveExtender(
+ *MCII, CrntHMI,
+ HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI));
+ Res.addOperand(MCOperand::createInst(HMIx));
+ *RelaxTarget = nullptr;
+ }
+ // now copy over the original instruction(the one we may have extended)
+ Res.addOperand(MCOperand::createInst(I.getInst()));
+ }
+ (void)Update;
+ assert(Update && "Didn't find relaxation target");
}
bool writeNopData(uint64_t Count,
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index f4d162c..47a6f86 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -44,6 +44,25 @@ namespace HexagonII {
TypeMEMOP = 9,
TypeNV = 10,
TypeDUPLEX = 11,
+ TypeCOMPOUND = 12,
+ TypeCVI_FIRST = 13,
+ TypeCVI_VA = TypeCVI_FIRST,
+ TypeCVI_VA_DV = 14,
+ TypeCVI_VX = 15,
+ TypeCVI_VX_DV = 16,
+ TypeCVI_VP = 17,
+ TypeCVI_VP_VS = 18,
+ TypeCVI_VS = 19,
+ TypeCVI_VINLANESAT= 20,
+ TypeCVI_VM_LD = 21,
+ TypeCVI_VM_TMP_LD = 22,
+ TypeCVI_VM_CUR_LD = 23,
+ TypeCVI_VM_VP_LDU = 24,
+ TypeCVI_VM_ST = 25,
+ TypeCVI_VM_NEW_ST = 26,
+ TypeCVI_VM_STU = 27,
+ TypeCVI_HIST = 28,
+ TypeCVI_LAST = TypeCVI_HIST,
TypePREFIX = 30, // Such as extenders.
TypeENDLOOP = 31 // Such as end of a HW loop.
};
@@ -71,12 +90,16 @@ namespace HexagonII {
PostInc = 6 // Post increment addressing mode
};
+ // MemAccessSize is represented as 1+log2(N) where N is size in bits.
enum class MemAccessSize {
NoMemAccess = 0, // Not a memory acces instruction.
ByteAccess = 1, // Byte access instruction (memb).
HalfWordAccess = 2, // Half word access instruction (memh).
WordAccess = 3, // Word access instruction (memw).
- DoubleWordAccess = 4 // Double word access instruction (memd)
+ DoubleWordAccess = 4, // Double word access instruction (memd)
+ // 5, // We do not have a 16 byte vector access.
+ Vector64Access = 7, // 64 Byte vector access instruction (vmem).
+ Vector128Access = 8 // 128 Byte vector access instruction (vmem).
};
// MCInstrDesc TSFlags
@@ -156,7 +179,7 @@ namespace HexagonII {
AddrModeMask = 0x7,
// Access size for load/store instructions.
MemAccessSizePos = 43,
- MemAccesSizeMask = 0x7,
+ MemAccesSizeMask = 0xf,
// Branch predicted taken.
TakenPos = 47,
@@ -164,7 +187,23 @@ namespace HexagonII {
// Floating-point instructions.
FPPos = 48,
- FPMask = 0x1
+ FPMask = 0x1,
+
+ // New-Value producer-2 instructions.
+ hasNewValuePos2 = 50,
+ hasNewValueMask2 = 0x1,
+
+ // Which operand consumes or produces a new value.
+ NewValueOpPos2 = 51,
+ NewValueOpMask2 = 0x7,
+
+ // Accumulator instructions.
+ AccumulatorPos = 54,
+ AccumulatorMask = 0x1,
+
+ // Complex XU, prevent xu competition by prefering slot3
+ PrefersSlot3Pos = 55,
+ PrefersSlot3Mask = 0x1,
};
// *** The code above must match HexagonInstrFormat*.td *** //
@@ -219,6 +258,26 @@ namespace HexagonII {
INST_PARSE_EXTENDER = 0x00000000
};
+ enum InstIClassBits : unsigned {
+ INST_ICLASS_MASK = 0xf0000000,
+ INST_ICLASS_EXTENDER = 0x00000000,
+ INST_ICLASS_J_1 = 0x10000000,
+ INST_ICLASS_J_2 = 0x20000000,
+ INST_ICLASS_LD_ST_1 = 0x30000000,
+ INST_ICLASS_LD_ST_2 = 0x40000000,
+ INST_ICLASS_J_3 = 0x50000000,
+ INST_ICLASS_CR = 0x60000000,
+ INST_ICLASS_ALU32_1 = 0x70000000,
+ INST_ICLASS_XTYPE_1 = 0x80000000,
+ INST_ICLASS_LD = 0x90000000,
+ INST_ICLASS_ST = 0xa0000000,
+ INST_ICLASS_ALU32_2 = 0xb0000000,
+ INST_ICLASS_XTYPE_2 = 0xc0000000,
+ INST_ICLASS_XTYPE_3 = 0xd0000000,
+ INST_ICLASS_XTYPE_4 = 0xe0000000,
+ INST_ICLASS_ALU32_3 = 0xf0000000
+ };
+
} // End namespace HexagonII.
} // End namespace llvm.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 36f8146..06ccec5 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "HexagonAsmPrinter.h"
-#include "Hexagon.h"
#include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -28,104 +28,33 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
-HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter)
- : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {}
-
-void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
- StringRef Annot,
- MCSubtargetInfo const &STI) {
- assert(HexagonMCInstrInfo::isBundle(*MI));
- assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
- std::string Buffer;
- {
- raw_string_ostream TempStream(Buffer);
- RawPrinter->printInst(MI, TempStream, "", STI);
- }
- StringRef Contents(Buffer);
- auto PacketBundle = Contents.rsplit('\n');
- auto HeadTail = PacketBundle.first.split('\n');
- auto Preamble = "\t{\n\t\t";
- auto Separator = "";
- while(!HeadTail.first.empty()) {
- O << Separator;
- StringRef Inst;
- auto Duplex = HeadTail.first.split('\v');
- if(!Duplex.second.empty()){
- O << Duplex.first << "\n";
- Inst = Duplex.second;
- }
- else
- Inst = Duplex.first;
- O << Preamble;
- O << Inst;
- HeadTail = HeadTail.second.split('\n');
- Preamble = "";
- Separator = "\n\t\t";
- }
- O << "\n\t}" << PacketBundle.second;
-}
-
-void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
- RawPrinter->printRegName(O, RegNo);
-}
-
-// Return the minimum value that a constant extendable operand can have
-// without being extended.
-static int getMinValue(uint64_t TSFlags) {
- unsigned isSigned =
- (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits =
- (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
- if (isSigned)
- return -1U << (bits - 1);
-
- return 0;
-}
-
-// Return the maximum value that a constant extendable operand can have
-// without being extended.
-static int getMaxValue(uint64_t TSFlags) {
- unsigned isSigned =
- (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits =
- (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
- if (isSigned)
- return ~(-1U << (bits - 1));
-
- return ~(-1U << bits);
-}
-
-// Return true if the instruction must be extended.
-static bool isExtended(uint64_t TSFlags) {
- return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
-}
-
-// Currently just used in an assert statement
-static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED;
-// Return true if the instruction may be extended based on the operand value.
-static bool isExtendable(uint64_t TSFlags) {
- return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
+ MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI)
+ : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
}
StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
}
-void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ O << getRegName(RegNo);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
}
void HexagonInstPrinter::setExtender(MCInst const &MCI) {
HasExtender = HexagonMCInstrInfo::isImmext(MCI);
}
-void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
- StringRef Annot,
- MCSubtargetInfo const &STI) {
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot, const MCSubtargetInfo &STI) {
assert(HexagonMCInstrInfo::isBundle(*MI));
assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+ assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
HasExtender = false;
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
MCInst const &MCI = *I.getInst();
@@ -157,145 +86,148 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
}
}
-void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
- const MCOperand& MO = MI->getOperand(OpNo);
-
+ if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo &&
+ (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)))
+ O << "#";
+ MCOperand const &MO = MI->getOperand(OpNo);
if (MO.isReg()) {
- printRegName(O, MO.getReg());
- } else if(MO.isExpr()) {
- MO.getExpr()->print(O, &MAI);
- } else if(MO.isImm()) {
- printImmOperand(MI, OpNo, O);
- } else {
- llvm_unreachable("Unknown operand");
- }
-}
-
-void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand& MO = MI->getOperand(OpNo);
-
- if(MO.isExpr()) {
- MO.getExpr()->print(O, &MAI);
- } else if(MO.isImm()) {
- O << MI->getOperand(OpNo).getImm();
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isExpr()) {
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value))
+ O << formatImm(Value);
+ else
+ O << *MO.getExpr();
} else {
llvm_unreachable("Unknown operand");
}
}
-void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
- const MCOperand &MO = MI->getOperand(OpNo);
- const MCInstrDesc &MII = getMII().get(MI->getOpcode());
-
- assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
- "Expecting an extendable operand");
-
- if (MO.isExpr() || isExtended(MII.TSFlags)) {
- O << "#";
- } else if (MO.isImm()) {
- int ImmValue = MO.getImm();
- if (ImmValue < getMinValue(MII.TSFlags) ||
- ImmValue > getMaxValue(MII.TSFlags))
- O << "#";
- }
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
- unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
+ unsigned OpNo,
+ raw_ostream &O) const {
O << MI->getOperand(OpNo).getImm();
}
-void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
O << -MI->getOperand(OpNo).getImm();
}
-void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
O << -1;
}
-void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand& MO0 = MI->getOperand(OpNo);
- const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<9>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+ O << formatImm(Imm/64);
+}
- printRegName(O, MO0.getReg());
- O << " + #" << MO1.getImm();
+void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<10>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+ O << formatImm(Imm/128);
}
-void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand& MO0 = MI->getOperand(OpNo);
- const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<10>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+ O << formatImm(Imm/64);
+}
- printRegName(O, MO0.getReg());
- O << ", #" << MO1.getImm();
+void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<11>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+ O << formatImm(Imm/128);
}
-void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
- assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
// Branches can take an immediate operand. This is used by the branch
// selection pass to print $+8, an eight byte displacement from the PC.
llvm_unreachable("Unknown branch operand.");
}
-void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
-}
+void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
-void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
-}
+void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
-void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
-}
+void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
-void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
raw_ostream &O, bool hi) const {
- assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand");
+ MCOperand const &MO = MI->getOperand(OpNo);
- O << '#' << (hi ? "HI" : "LO") << "(#";
- printOperand(MI, OpNo, O);
+ O << '#' << (hi ? "HI" : "LO") << '(';
+ if (MO.isImm()) {
+ O << '#';
+ printOperand(MI, OpNo, O);
+ } else {
+ printOperand(MI, OpNo, O);
+ assert("Unknown symbol operand");
+ }
O << ')';
}
-void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand &MO = MI->getOperand(OpNo);
- const MCInstrDesc &MII = getMII().get(MI->getOpcode());
-
- assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
- "Expecting an extendable operand");
-
- if (MO.isExpr() || isExtended(MII.TSFlags)) {
- O << "##";
+void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ MCOperand const &MO = MI->getOperand(OpNo);
+ assert (MO.isExpr());
+ MCExpr const &Expr = *MO.getExpr();
+ int64_t Value;
+ if (Expr.evaluateAsAbsolute(Value))
+ O << format("0x%" PRIx64, Value);
+ else {
+ if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))
+ if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo)
+ O << "##";
+ O << Expr;
}
- printOperand(MI, OpNo, O);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 534ac23..5f42118 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
//
-// This class prints an Hexagon MCInst to a .s file.
//
//===----------------------------------------------------------------------===//
@@ -15,17 +14,8 @@
#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
-class HexagonAsmInstPrinter : public MCInstPrinter {
-public:
- HexagonAsmInstPrinter(MCInstPrinter *RawPrinter);
- void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
- MCSubtargetInfo const &STI) override;
- void printRegName(raw_ostream &O, unsigned RegNo) const override;
- std::unique_ptr<MCInstPrinter> RawPrinter;
-};
/// Prints bundles as a newline separated list of individual instructions
/// Duplexes are separated by a vertical tab \v character
/// A trailing line includes bundle properties such as endloop0/1
@@ -33,68 +23,69 @@ public:
/// r0 = add(r1, r2)
/// r0 = #0 \v jump 0x0
/// :endloop0 :endloop1
- class HexagonInstPrinter : public MCInstPrinter {
- public:
- explicit HexagonInstPrinter(MCAsmInfo const &MAI,
- MCInstrInfo const &MII,
- MCRegisterInfo const &MRI)
- : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
-
- void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
- virtual StringRef getOpcodeName(unsigned Opcode) const;
- void printInstruction(const MCInst *MI, raw_ostream &O);
- void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- static const char *getRegisterName(unsigned RegNo);
+class HexagonInstPrinter : public MCInstPrinter {
+public:
+ explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI);
+ void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ void printInstruction(MCInst const *MI, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ StringRef getRegName(unsigned RegNo) const;
+ static char const *getRegisterName(unsigned RegNo);
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
- void printConstantPool(const MCInst *MI, unsigned OpNo,
+ void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNegImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const;
+ void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printBranchOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printPredicateOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printGlobalOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
- { printSymbol(MI, OpNo, O, true); }
- void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
- { printSymbol(MI, OpNo, O, false); }
+ void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+ printSymbol(MI, OpNo, O, true);
+ }
+ void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+ printSymbol(MI, OpNo, O, false);
+ }
- const MCInstrInfo &getMII() const {
- return MII;
- }
+ MCAsmInfo const &getMAI() const { return MAI; }
+ MCInstrInfo const &getMII() const { return MII; }
- protected:
- void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
- const;
+protected:
+ void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
+ bool hi) const;
- private:
- const MCInstrInfo &MII;
+private:
+ MCInstrInfo const &MII;
- bool HasExtender;
- void setExtender(MCInst const &MCI);
- };
+ bool HasExtender;
+ void setExtender(MCInst const &MCI);
+};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index dc07069..a8456b4 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -18,13 +18,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
+class Triple;
- class HexagonMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit HexagonMCAsmInfo(const Triple &TT);
- };
+class HexagonMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit HexagonMCAsmInfo(const Triple &TT);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
new file mode 100644
index 0000000..46b7b41
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -0,0 +1,581 @@
+//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCChecker.h"
+
+#include "HexagonBaseInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false),
+ cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity"));
+
+const HexagonMCChecker::PredSense
+ HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
+
+void HexagonMCChecker::init() {
+ // Initialize read-only registers set.
+ ReadOnly.insert(Hexagon::PC);
+
+ // Figure out the loop-registers definitions.
+ if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+ Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0?
+ Defs[Hexagon::LC0].insert(Unconditional);
+ }
+ if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0?
+ Defs[Hexagon::LC1].insert(Unconditional);
+ }
+
+ if (HexagonMCInstrInfo::isBundle(MCB))
+ // Unfurl a bundle.
+ for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ init(*I.getInst());
+ }
+ else
+ init(MCB);
+}
+
+void HexagonMCChecker::init(MCInst const& MCI) {
+ const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
+ unsigned PredReg = Hexagon::NoRegister;
+ bool isTrue = false;
+
+ // Get used registers.
+ for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+ if (MCI.getOperand(i).isReg()) {
+ unsigned R = MCI.getOperand(i).getReg();
+
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+ // Note an used predicate register.
+ PredReg = R;
+ isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+ // Note use of new predicate register.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ NewPreds.insert(PredReg);
+ }
+ else
+ // Note register use. Super-registers are not tracked directly,
+ // but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers used indirectly.
+ Uses.insert(*SRI);
+ }
+
+ // Get implicit register definitions.
+ if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
+ for (; *ImpDef; ++ImpDef) {
+ unsigned R = *ImpDef;
+
+ if (Hexagon::R31 != R && MCID.isCall())
+ // Any register other than the LR and the PC are actually volatile ones
+ // as defined by the ABI, not modified implicitly by the call insn.
+ continue;
+ if (Hexagon::PC == R)
+ // Branches are the only insns that can change the PC,
+ // otherwise a read-only register.
+ continue;
+
+ if (Hexagon::USR_OVF == R)
+ // Many insns change the USR implicitly, but only one or another flag.
+ // The instruction table models the USR.OVF flag, which can be implicitly
+ // modified more than once, but cannot be modified in the same packet
+ // with an instruction that modifies is explicitly. Deal with such situ-
+ // ations individually.
+ SoftDefs.insert(R);
+ else if (isPredicateRegister(R) &&
+ HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
+ // Include implicit late predicates.
+ LatePreds.insert(R);
+ else
+ Defs[R].insert(PredSense(PredReg, isTrue));
+ }
+
+ // Figure out explicit register definitions.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i) {
+ unsigned R = MCI.getOperand(i).getReg(),
+ S = Hexagon::NoRegister;
+
+ // Note register definitions, direct ones as well as indirect side-effects.
+ // Super-registers are not tracked directly, but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI) {
+ if (MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers defined indirectly.
+ continue;
+
+ if (R == *SRI) {
+ if (S == R)
+ // Avoid scoring the defined register multiple times.
+ continue;
+ else
+ // Note that the defined register has already been scored.
+ S = R;
+ }
+
+ if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI)
+ // P3:0 is a special case, since multiple predicate register definitions
+ // in a packet is allowed as the equivalent of their logical "and".
+ // Only an explicit definition of P3:0 is noted as such; if a
+ // side-effect, then note as a soft definition.
+ SoftDefs.insert(*SRI);
+ else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI))
+ // Some insns produce predicates too late to be used in the same packet.
+ LatePreds.insert(*SRI);
+ else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD)
+ // Current loads should be used in the same packet.
+ // TODO: relies on the impossibility of a current and a temporary loads
+ // in the same packet.
+ CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
+ else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD)
+ // Temporary loads should be used in the same packet, but don't commit
+ // results, so it should be disregarded if another insn changes the same
+ // register.
+ // TODO: relies on the impossibility of a current and a temporary loads
+ // in the same packet.
+ TmpDefs.insert(*SRI);
+ else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) )
+ // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
+ // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
+ Uses.insert(*SRI);
+ else
+ Defs[*SRI].insert(PredSense(PredReg, isTrue));
+ }
+ }
+
+ // Figure out register definitions that produce new values.
+ if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) {
+ unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+ if (HexagonMCInstrInfo::isCompound(MCII, MCI))
+ compoundRegisterMap(R); // Compound insns have a limited register range.
+
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // No super-registers defined indirectly.
+ NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+ HexagonMCInstrInfo::isFloat(MCII, MCI)));
+
+ // For fairly unique 2-dot-new producers, example:
+ // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers.
+ if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
+ unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
+
+ for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+ HexagonMCInstrInfo::isFloat(MCII, MCI)));
+ }
+ }
+
+ // Figure out definitions of new predicate registers.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+ if (MCI.getOperand(i).isReg()) {
+ unsigned P = MCI.getOperand(i).getReg();
+
+ if (isPredicateRegister(P))
+ NewPreds.insert(P);
+ }
+
+ // Figure out uses of new values.
+ if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) {
+ unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+ if (!MCSubRegIterator(N, &RI).isValid()) {
+ // Super-registers cannot use new values.
+ if (MCID.isBranch())
+ NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+ else
+ NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+ }
+ }
+}
+
+HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx,
+ MCRegisterInfo const &ri)
+ : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI),
+ bLoadErrInfo(false) {
+ init();
+}
+
+bool HexagonMCChecker::check() {
+ bool chkB = checkBranches();
+ bool chkP = checkPredicates();
+ bool chkNV = checkNewValues();
+ bool chkR = checkRegisters();
+ bool chkS = checkSolo();
+ bool chkSh = checkShuffle();
+ bool chkSl = checkSlots();
+ bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
+
+ return chk;
+}
+
+bool HexagonMCChecker::checkSlots()
+
+{
+ unsigned slotsUsed = 0;
+ for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) {
+ MCInst const& MCI = *HMI.getInst();
+ if (HexagonMCInstrInfo::isImmext(MCI))
+ continue;
+ if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
+ slotsUsed += 2;
+ else
+ ++slotsUsed;
+ }
+
+ if (slotsUsed > HEXAGON_PACKET_SIZE) {
+ HexagonMCErrInfo errInfo;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS);
+ addErrInfo(errInfo);
+ return false;
+ }
+ return true;
+}
+
+// Check legal use of branches.
+bool HexagonMCChecker::checkBranches() {
+ HexagonMCErrInfo errInfo;
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ bool hasConditional = false;
+ unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
+ NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+ Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
+
+ for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
+ i < MCB.size(); ++i) {
+ MCInst const &MCI = *MCB.begin()[i].getInst();
+
+ if (HexagonMCInstrInfo::isImmext(MCI))
+ continue;
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
+ ++Branches;
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
+ HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ ++NewIndirectBranches;
+ if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
+ ++NewValueBranches;
+
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
+ HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
+ hasConditional = true;
+ Conditional = i; // Record the position of the conditional branch.
+ } else {
+ Unconditional = i; // Record the position of the unconditional branch.
+ }
+ }
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
+ ++Returns;
+ }
+
+ if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
+ if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+ HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ // Error out if there's any branch in a loop-end packet.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (Branches > 1)
+ if (!hasConditional || Conditional > Unconditional) {
+ // Error out if more than one unconditional branch or
+ // the conditional branch appears after the unconditional one.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check legal use of predicate registers.
+bool HexagonMCChecker::checkPredicates() {
+ HexagonMCErrInfo errInfo;
+ // Check for proper use of new predicate registers.
+ for (const auto& I : NewPreds) {
+ unsigned P = I;
+
+ if (!Defs.count(P) || LatePreds.count(P)) {
+ // Error out if the new predicate register is not defined,
+ // or defined "late"
+ // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ // Check for proper use of auto-anded of predicate registers.
+ for (const auto& I : LatePreds) {
+ unsigned P = I;
+
+ if (LatePreds.count(P) > 1 || Defs.count(P)) {
+ // Error out if predicate register defined "late" multiple times or
+ // defined late and regularly defined
+ // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }".
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check legal use of new values.
+bool HexagonMCChecker::checkNewValues() {
+ HexagonMCErrInfo errInfo;
+ memset(&errInfo, 0, sizeof(errInfo));
+ for (auto& I : NewUses) {
+ unsigned R = I.first;
+ NewSense &US = I.second;
+
+ if (!hasValidNewValueDef(US, NewDefs[R])) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check for legal register uses and definitions.
+bool HexagonMCChecker::checkRegisters() {
+ HexagonMCErrInfo errInfo;
+ // Check for proper register definitions.
+ for (const auto& I : Defs) {
+ unsigned R = I.first;
+
+ if (ReadOnly.count(R)) {
+ // Error out for definitions of read-only registers.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (isLoopRegister(R) && Defs.count(R) > 1 &&
+ (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+ HexagonMCInstrInfo::isOuterLoop(MCB))) {
+ // Error out for definitions of loop registers at the end of a loop.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (SoftDefs.count(R)) {
+ // Error out for explicit changes to registers also weakly defined
+ // (e.g., "{ usr = r0; r0 = sfadd(...) }").
+ unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
+ unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (!isPredicateRegister(R) && Defs[R].size() > 1) {
+ // Check for multiple register definitions.
+ PredSet &PM = Defs[R];
+
+ // Check for multiple unconditional register definitions.
+ if (PM.count(Unconditional)) {
+ // Error out on an unconditional change when there are any other
+ // changes, conditional or not.
+ unsigned UsrR = Hexagon::USR;
+ unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+ addErrInfo(errInfo);
+ return false;
+ }
+ // Check for multiple conditional register definitions.
+ for (const auto& J : PM) {
+ PredSense P = J;
+
+ // Check for multiple uses of the same condition.
+ if (PM.count(P) > 1) {
+ // Error out on conditional changes based on the same predicate
+ // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ // Check for the use of the complementary condition.
+ P.second = !P.second;
+ if (PM.count(P) && PM.size() > 2) {
+ // Error out on conditional changes based on the same predicate
+ // multiple times
+ // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ }
+ }
+
+ // Check for use of current definitions.
+ for (const auto& I : CurDefs) {
+ unsigned R = I;
+
+ if (!Uses.count(R)) {
+ // Warn on an unused current definition.
+ errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R);
+ addErrInfo(errInfo);
+ return true;
+ }
+ }
+
+ // Check for use of temporary definitions.
+ for (const auto& I : TmpDefs) {
+ unsigned R = I;
+
+ if (!Uses.count(R)) {
+ // special case for vhist
+ bool vHistFound = false;
+ for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) {
+ vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp
+ break;
+ }
+ }
+ // Warn on an unused temporary definition.
+ if (vHistFound == false) {
+ errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R);
+ addErrInfo(errInfo);
+ return true;
+ }
+ }
+ }
+
+ return true;
+}
+
+// Check for legal use of solo insns.
+bool HexagonMCChecker::checkSolo() {
+ HexagonMCErrInfo errInfo;
+ if (HexagonMCInstrInfo::isBundle(MCB) &&
+ HexagonMCInstrInfo::bundleSize(MCB) > 1) {
+ for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool HexagonMCChecker::checkShuffle() {
+ HexagonMCErrInfo errInfo;
+ // Branch info is lost when duplexing. The unduplexed insns must be
+ // checked and only branch errors matter for this case.
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+ if (!MCS.check()) {
+ if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+ errInfo.setShuffleError(MCS.getError());
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+ if (!MCSDX.check()) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+ errInfo.setShuffleError(MCSDX.getError());
+ addErrInfo(errInfo);
+ return false;
+ }
+ return true;
+}
+
+void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
+ switch (Register) {
+ default:
+ break;
+ case Hexagon::R15:
+ Register = Hexagon::R23;
+ break;
+ case Hexagon::R14:
+ Register = Hexagon::R22;
+ break;
+ case Hexagon::R13:
+ Register = Hexagon::R21;
+ break;
+ case Hexagon::R12:
+ Register = Hexagon::R20;
+ break;
+ case Hexagon::R11:
+ Register = Hexagon::R19;
+ break;
+ case Hexagon::R10:
+ Register = Hexagon::R18;
+ break;
+ case Hexagon::R9:
+ Register = Hexagon::R17;
+ break;
+ case Hexagon::R8:
+ Register = Hexagon::R16;
+ break;
+ }
+}
+
+bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
+ const NewSenseList &Defs) const {
+ bool Strict = !RelaxNVChecks;
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ const NewSense &Def = Defs[i];
+ // NVJ cannot use a new FP value [7.6.1]
+ if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0))
+ continue;
+ // If the definition was not predicated, then it does not matter if
+ // the use is.
+ if (Def.PredReg == 0)
+ return true;
+ // With the strict checks, both the definition and the use must be
+ // predicated on the same register and condition.
+ if (Strict) {
+ if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond)
+ return true;
+ } else {
+ // With the relaxed checks, if the definition was predicated, the only
+ // detectable violation is if the use is predicated on the opposing
+ // condition, otherwise, it's ok.
+ if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond)
+ return true;
+ }
+ }
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
new file mode 100644
index 0000000..5fc0bde
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -0,0 +1,218 @@
+//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCHECKER_H
+#define HEXAGONMCCHECKER_H
+
+#include <map>
+#include <set>
+#include <queue>
+#include "MCTargetDesc/HexagonMCShuffler.h"
+
+using namespace llvm;
+
+namespace llvm {
+class MCOperandInfo;
+
+typedef struct {
+ unsigned Error, Warning, ShuffleError;
+ unsigned Register;
+} ErrInfo_T;
+
+class HexagonMCErrInfo {
+public:
+ enum {
+ CHECK_SUCCESS = 0,
+ // Errors.
+ CHECK_ERROR_BRANCHES = 0x00001,
+ CHECK_ERROR_NEWP = 0x00002,
+ CHECK_ERROR_NEWV = 0x00004,
+ CHECK_ERROR_REGISTERS = 0x00008,
+ CHECK_ERROR_READONLY = 0x00010,
+ CHECK_ERROR_LOOP = 0x00020,
+ CHECK_ERROR_ENDLOOP = 0x00040,
+ CHECK_ERROR_SOLO = 0x00080,
+ CHECK_ERROR_SHUFFLE = 0x00100,
+ CHECK_ERROR_NOSLOTS = 0x00200,
+ CHECK_ERROR_UNKNOWN = 0x00400,
+ // Warnings.
+ CHECK_WARN_CURRENT = 0x10000,
+ CHECK_WARN_TEMPORARY = 0x20000
+ };
+ ErrInfo_T s;
+
+ void reset() {
+ s.Error = CHECK_SUCCESS;
+ s.Warning = CHECK_SUCCESS;
+ s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS;
+ s.Register = Hexagon::NoRegister;
+ };
+ HexagonMCErrInfo() {
+ reset();
+ };
+
+ void setError(unsigned e, unsigned r = Hexagon::NoRegister)
+ { s.Error = e; s.Register = r; };
+ void setWarning(unsigned w, unsigned r = Hexagon::NoRegister)
+ { s.Warning = w; s.Register = r; };
+ void setShuffleError(unsigned e) { s.ShuffleError = e; };
+};
+
+/// Check for a valid bundle.
+class HexagonMCChecker {
+ /// Insn bundle.
+ MCInst& MCB;
+ MCInst& MCBDX;
+ const MCRegisterInfo& RI;
+ MCInstrInfo const &MCII;
+ MCSubtargetInfo const &STI;
+ bool bLoadErrInfo;
+
+ /// Set of definitions: register #, if predicated, if predicated true.
+ typedef std::pair<unsigned, bool> PredSense;
+ static const PredSense Unconditional;
+ typedef std::multiset<PredSense> PredSet;
+ typedef std::multiset<PredSense>::iterator PredSetIterator;
+
+ typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator;
+ llvm::DenseMap<unsigned, PredSet> Defs;
+
+ /// Information about how a new-value register is defined or used:
+ /// PredReg = predicate register, 0 if use/def not predicated,
+ /// Cond = true/false for if(PredReg)/if(!PredReg) respectively,
+ /// IsFloat = true if definition produces a floating point value
+ /// (not valid for uses),
+ /// IsNVJ = true if the use is a new-value branch (not valid for
+ /// definitions).
+ struct NewSense {
+ unsigned PredReg;
+ bool IsFloat, IsNVJ, Cond;
+ // The special-case "constructors":
+ static NewSense Jmp(bool isNVJ) {
+ NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ,
+ /*Cond=*/ false };
+ return NS;
+ }
+ static NewSense Use(unsigned PR, bool True) {
+ NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false,
+ /*Cond=*/ True };
+ return NS;
+ }
+ static NewSense Def(unsigned PR, bool True, bool Float) {
+ NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false,
+ /*Cond=*/ True };
+ return NS;
+ }
+ };
+ /// Set of definitions that produce new register:
+ typedef llvm::SmallVector<NewSense,2> NewSenseList;
+ typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator;
+ llvm::DenseMap<unsigned, NewSenseList> NewDefs;
+
+ /// Set of weak definitions whose clashes should be enforced selectively.
+ typedef std::set<unsigned>::iterator SoftDefsIterator;
+ std::set<unsigned> SoftDefs;
+
+ /// Set of current definitions committed to the register file.
+ typedef std::set<unsigned>::iterator CurDefsIterator;
+ std::set<unsigned> CurDefs;
+
+ /// Set of temporary definitions not committed to the register file.
+ typedef std::set<unsigned>::iterator TmpDefsIterator;
+ std::set<unsigned> TmpDefs;
+
+ /// Set of new predicates used.
+ typedef std::set<unsigned>::iterator NewPredsIterator;
+ std::set<unsigned> NewPreds;
+
+ /// Set of predicates defined late.
+ typedef std::multiset<unsigned>::iterator LatePredsIterator;
+ std::multiset<unsigned> LatePreds;
+
+ /// Set of uses.
+ typedef std::set<unsigned>::iterator UsesIterator;
+ std::set<unsigned> Uses;
+
+ /// Set of new values used: new register, if new-value jump.
+ typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator;
+ llvm::DenseMap<unsigned, NewSense> NewUses;
+
+ /// Pre-defined set of read-only registers.
+ typedef std::set<unsigned>::iterator ReadOnlyIterator;
+ std::set<unsigned> ReadOnly;
+
+ std::queue<ErrInfo_T> ErrInfoQ;
+ HexagonMCErrInfo CrntErrInfo;
+
+ void getErrInfo() {
+ if (bLoadErrInfo == true) {
+ if (ErrInfoQ.empty()) {
+ CrntErrInfo.reset();
+ } else {
+ CrntErrInfo.s = ErrInfoQ.front();
+ ErrInfoQ.pop();
+ }
+ }
+ bLoadErrInfo = false;
+ }
+
+ void init();
+ void init(MCInst const&);
+
+ // Checks performed.
+ bool checkBranches();
+ bool checkPredicates();
+ bool checkNewValues();
+ bool checkRegisters();
+ bool checkSolo();
+ bool checkShuffle();
+ bool checkSlots();
+
+ static void compoundRegisterMap(unsigned&);
+
+ bool isPredicateRegister(unsigned R) const {
+ return (Hexagon::P0 == R || Hexagon::P1 == R ||
+ Hexagon::P2 == R || Hexagon::P3 == R);
+ };
+ bool isLoopRegister(unsigned R) const {
+ return (Hexagon::SA0 == R || Hexagon::LC0 == R ||
+ Hexagon::SA1 == R || Hexagon::LC1 == R);
+ };
+
+ bool hasValidNewValueDef(const NewSense &Use,
+ const NewSenseList &Defs) const;
+
+ public:
+ explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
+ const MCRegisterInfo& ri);
+
+ bool check();
+
+ /// add a new error/warning
+ void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
+
+ /// Return the error code for the last operation in the insn bundle.
+ unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; };
+ unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; };
+ unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; };
+ unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; };
+ bool getNextErrInfo() {
+ bLoadErrInfo = true;
+ return (ErrInfoQ.empty()) ? false : (getErrInfo(), true);
+ }
+};
+
+}
+
+#endif // HEXAGONMCCHECKER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 9fc4e2a..c2c6275 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -96,6 +96,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
assert(!HexagonMCInstrInfo::isBundle(HMB));
uint64_t Binary;
+ // Compound instructions are limited to using registers 0-7 and 16-23
+ // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
+ static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10,
+ Hexagon::R11, Hexagon::R12, Hexagon::R13,
+ Hexagon::R14, Hexagon::R15};
+
// Pseudo instructions don't get encoded and shouldn't be here
// in the first place!
assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
@@ -104,6 +110,16 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
" `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
"\n");
+ if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
+ for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
+ if (HMB.getOperand(i).isReg()) {
+ unsigned Reg =
+ MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
+ if ((Reg <= 23) && (Reg >= 16))
+ HMB.getOperand(i).setReg(RegMap[Reg - 16]);
+ }
+ }
+
if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
// Calculate the new value distance to the associated producer
MCOperand &MCO =
@@ -318,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
// The only relocs left should be GP relative:
default:
if (MCID.mayStore() || MCID.mayLoad()) {
- for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses;
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
++ImpUses) {
if (*ImpUses == Hexagon::GP) {
switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
@@ -389,10 +405,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
return cast<MCConstantExpr>(ME)->getValue();
}
if (MK == MCExpr::Binary) {
- unsigned Res;
- Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
- Res +=
- getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
return 0;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 886f8db..d194bea 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -115,8 +115,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
SrcReg = MI.getOperand(1).getReg();
if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) ||
- (MI.getOperand(2).getImm() == -1)))
+ (HexagonMCInstrInfo::inRange<5>(MI, 2) ||
+ HexagonMCInstrInfo::minConstant(MI, 2) == -1))
return HexagonII::HCG_A;
break;
case Hexagon::A2_tfr:
@@ -134,8 +134,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
return false;
// Rd = #u6
DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 &&
- MI.getOperand(1).getImm() >= 0 &&
+ if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 &&
+ HexagonMCInstrInfo::minConstant(MI, 1) >= 0 &&
HexagonMCInstrInfo::isIntRegForSubInst(DstReg))
return HexagonII::HCG_A;
break;
@@ -145,9 +145,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
DstReg = MI.getOperand(0).getReg();
Src1Reg = MI.getOperand(1).getReg();
if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
- MI.getOperand(2).isImm() &&
HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
- (MI.getOperand(2).getImm() == 0))
+ HexagonMCInstrInfo::minConstant(MI, 2) == 0)
return HexagonII::HCG_A;
break;
// The fact that .new form is used pretty much guarantees
@@ -206,6 +205,8 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
MCInst *CompoundInsn = 0;
unsigned compoundOpcode;
MCOperand Rs, Rt;
+ int64_t Value;
+ bool Success;
switch (L.getOpcode()) {
default:
@@ -277,7 +278,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
case Hexagon::C2_cmpeqi:
DEBUG(dbgs() << "CX: C2_cmpeqi\n");
- if (L.getOperand(2).getImm() == -1)
+ Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success);
+ if (Value == -1)
compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)];
else
compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
@@ -286,14 +290,17 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
CompoundInsn = new (Context) MCInst;
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
- if (L.getOperand(2).getImm() != -1)
+ if (Value != -1)
CompoundInsn->addOperand(L.getOperand(2));
CompoundInsn->addOperand(R.getOperand(1));
break;
case Hexagon::C2_cmpgti:
DEBUG(dbgs() << "CX: C2_cmpgti\n");
- if (L.getOperand(2).getImm() == -1)
+ Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success);
+ if (Value == -1)
compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)];
else
compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
@@ -302,7 +309,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
CompoundInsn = new (Context) MCInst;
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
- if (L.getOperand(2).getImm() != -1)
+ if (Value != -1)
CompoundInsn->addOperand(L.getOperand(2));
CompoundInsn->addOperand(R.getOperand(1));
break;
@@ -404,7 +411,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
/// additional slot.
void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
MCContext &Context, MCInst &MCI) {
- assert(MCI.getOpcode() == Hexagon::BUNDLE &&
+ assert(HexagonMCInstrInfo::isBundle(MCI) &&
"Non-Bundle where Bundle expected");
// By definition a compound must have 2 insn.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 7e9247c..e6194f6 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -26,7 +26,7 @@ using namespace Hexagon;
#define DEBUG_TYPE "hexagon-mcduplex-info"
// pair table of subInstructions with opcodes
-static std::pair<unsigned, unsigned> opcodeData[] = {
+static const std::pair<unsigned, unsigned> opcodeData[] = {
std::make_pair((unsigned)V4_SA1_addi, 0),
std::make_pair((unsigned)V4_SA1_addrx, 6144),
std::make_pair((unsigned)V4_SA1_addsp, 3072),
@@ -81,8 +81,7 @@ static std::pair<unsigned, unsigned> opcodeData[] = {
std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
static std::map<unsigned, unsigned>
- subinstOpcodeMap(opcodeData,
- opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0]));
+ subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData));
bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
switch (Ga) {
@@ -195,15 +194,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// Special case this one from Group L2.
// Rd = memw(r29+#u5:2)
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
- if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) {
+ if (HexagonMCInstrInfo::isIntReg(SrcReg) &&
+ Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
// Rd = memw(Rs+#u4:2)
if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- (MCI.getOperand(2).isImm() &&
- isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) {
+ inRange<4, 2>(MCI, 2)) {
return HexagonII::HSIG_L1;
}
}
@@ -214,7 +211,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) {
+ inRange<4>(MCI, 2)) {
return HexagonII::HSIG_L1;
}
break;
@@ -235,8 +232,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) {
+ inRange<3, 1>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
break;
@@ -246,7 +242,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) {
+ inRange<3>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
break;
@@ -256,8 +252,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) {
+ inRange<5, 3>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
break;
@@ -326,15 +321,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isIntReg(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() &&
- isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) {
+ Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) {
return HexagonII::HSIG_S2;
}
// memw(Rs+#u4:2) = Rt
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- MCI.getOperand(1).isImm() &&
- isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) {
+ inRange<4, 2>(MCI, 1)) {
return HexagonII::HSIG_S1;
}
break;
@@ -344,7 +337,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) {
+ inRange<4>(MCI, 1)) {
return HexagonII::HSIG_S1;
}
break;
@@ -363,8 +356,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- MCI.getOperand(1).isImm() &&
- isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) {
+ inRange<3, 1>(MCI, 1)) {
return HexagonII::HSIG_S2;
}
break;
@@ -374,8 +366,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) &&
HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg &&
- MCI.getOperand(1).isImm() &&
- isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) {
+ inSRange<6, 3>(MCI, 1)) {
return HexagonII::HSIG_S2;
}
break;
@@ -383,9 +374,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// memw(Rs+#u4:2) = #U1
Src1Reg = MCI.getOperand(0).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
- MCI.getOperand(1).isImm() &&
- isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) &&
- MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+ inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) {
return HexagonII::HSIG_S2;
}
break;
@@ -393,16 +382,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// memb(Rs+#u4) = #U1
Src1Reg = MCI.getOperand(0).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
- MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) &&
- MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+ inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) {
return HexagonII::HSIG_S2;
}
break;
case Hexagon::S2_allocframe:
- if (MCI.getOperand(0).isImm() &&
- isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) {
+ if (inRange<5, 3>(MCI, 0))
return HexagonII::HSIG_S2;
- }
break;
//
// Group A:
@@ -428,8 +414,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
// Rd = add(r29,#u6:2)
if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) {
+ inRange<6, 2>(MCI, 2)) {
return HexagonII::HSIG_A;
}
// Rx = add(Rx,#s7)
@@ -439,8 +424,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// Rd = add(Rs,#1)
// Rd = add(Rs,#-1)
if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
- (MCI.getOperand(2).getImm() == -1))) {
+ (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) {
return HexagonII::HSIG_A;
}
}
@@ -460,8 +444,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
- (MCI.getOperand(2).getImm() == 255))) {
+ (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) {
return HexagonII::HSIG_A;
}
break;
@@ -491,8 +474,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
DstReg = MCI.getOperand(0).getReg(); // Rd
PredReg = MCI.getOperand(1).getReg(); // P0
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
- Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() &&
- MCI.getOperand(2).getImm() == 0) {
+ Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) {
return HexagonII::HSIG_A;
}
break;
@@ -502,7 +484,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (Hexagon::P0 == DstReg &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) {
+ inRange<2>(MCI, 2)) {
return HexagonII::HSIG_A;
}
break;
@@ -511,10 +493,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// Rdd = combine(#u2,#U2)
DstReg = MCI.getOperand(0).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
- // TODO: Handle Globals/Symbols
- (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) &&
- ((MCI.getOperand(2).isImm() &&
- isUInt<2>(MCI.getOperand(2).getImm())))) {
+ inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) {
return HexagonII::HSIG_A;
}
break;
@@ -524,7 +503,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) {
+ minConstant(MCI, 2) == 0) {
return HexagonII::HSIG_A;
}
break;
@@ -534,7 +513,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) {
+ minConstant(MCI, 1) == 0) {
return HexagonII::HSIG_A;
}
break;
@@ -556,19 +535,17 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
}
bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
-
unsigned DstReg, SrcReg;
-
switch (potentialDuplex.getOpcode()) {
case Hexagon::A2_addi:
// testing for case of: Rx = add(Rx,#s7)
DstReg = potentialDuplex.getOperand(0).getReg();
SrcReg = potentialDuplex.getOperand(1).getReg();
if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
- if (potentialDuplex.getOperand(2).isExpr())
+ int64_t Value;
+ if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value))
return true;
- if (potentialDuplex.getOperand(2).isImm() &&
- !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm())))
+ if (!isShiftedInt<7, 0>(Value))
return true;
}
break;
@@ -576,15 +553,14 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
DstReg = potentialDuplex.getOperand(0).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
- if (potentialDuplex.getOperand(1).isExpr())
+ int64_t Value;
+ if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value))
return true;
// Check for case of Rd = #-1.
- if (potentialDuplex.getOperand(1).isImm() &&
- (potentialDuplex.getOperand(1).getImm() == -1))
+ if (Value == -1)
return false;
// Check for case of Rd = #u6.
- if (potentialDuplex.getOperand(1).isImm() &&
- !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm()))
+ if (!isShiftedUInt<6, 0>(Value))
return true;
}
break;
@@ -712,19 +688,23 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
MCInst Result;
+ bool Absolute;
+ int64_t Value;
switch (Inst.getOpcode()) {
default:
// dbgs() << "opcode: "<< Inst->getOpcode() << "\n";
llvm_unreachable("Unimplemented subinstruction \n");
break;
case Hexagon::A2_addi:
- if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) {
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 1) {
Result.setOpcode(Hexagon::V4_SA1_inc);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
break;
} // 1,2 SUBInst $Rd = add($Rs, #1)
- else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) {
+ else if (Value == -1) {
Result.setOpcode(Hexagon::V4_SA1_dec);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -754,7 +734,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 0);
break; // 1 SUBInst allocframe(#$u5_3)
case Hexagon::A2_andir:
- if (Inst.getOperand(2).getImm() == 255) {
+ if (minConstant(Inst, 2) == 255) {
Result.setOpcode(Hexagon::V4_SA1_zxtb);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -772,26 +752,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
case Hexagon::A4_combineii:
case Hexagon::A2_combineii:
- if (Inst.getOperand(1).getImm() == 1) {
+ Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 1) {
Result.setOpcode(Hexagon::V4_SA1_combine1i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#1, #$u2)
}
-
- if (Inst.getOperand(1).getImm() == 3) {
+ if (Value == 3) {
Result.setOpcode(Hexagon::V4_SA1_combine3i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#3, #$u2)
}
- if (Inst.getOperand(1).getImm() == 0) {
+ if (Value == 0) {
Result.setOpcode(Hexagon::V4_SA1_combine0i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#0, #$u2)
}
- if (Inst.getOperand(1).getImm() == 2) {
+ if (Value == 2) {
Result.setOpcode(Hexagon::V4_SA1_combine2i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
@@ -894,12 +875,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2)
}
case Hexagon::S4_storeirb_io:
- if (Inst.getOperand(2).getImm() == 0) {
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 0) {
Result.setOpcode(Hexagon::V4_SS2_storebi0);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0
- } else if (Inst.getOperand(2).getImm() == 1) {
+ } else if (Value == 1) {
Result.setOpcode(Hexagon::V4_SS2_storebi1);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -923,12 +906,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 2);
break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
case Hexagon::S4_storeiri_io:
- if (Inst.getOperand(2).getImm() == 0) {
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 0) {
Result.setOpcode(Hexagon::V4_SS2_storewi0);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0
- } else if (Inst.getOperand(2).getImm() == 1) {
+ } else if (Value == 1) {
Result.setOpcode(Hexagon::V4_SS2_storewi1);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -983,7 +968,8 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 0);
break; // 2 SUBInst if (p0) $Rd = #0
case Hexagon::A2_tfrsi:
- if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) {
+ Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+ if (Absolute && Value == -1) {
Result.setOpcode(Hexagon::V4_SA1_setin1);
addOps(Result, Inst, 0);
break; // 2 1 SUBInst $Rd = #-1
@@ -1044,6 +1030,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
<< "\n");
bisReversable = false;
}
+ if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf
+ bisReversable = false;
// Try in order.
if (isOrderedDuplexPair(
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index bf51c35..eaa3550 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -37,9 +37,7 @@ static cl::opt<unsigned>
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
const MCSubtargetInfo &STI) {
- MCInst HMI;
- HMI.setOpcode(Hexagon::BUNDLE);
- HMI.addOperand(MCOperand::createImm(0));
+ MCInst HMI = HexagonMCInstrInfo::createBundle();
MCInst *MCB;
if (MCK.getOpcode() != Hexagon::BUNDLE) {
@@ -50,7 +48,7 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
// Examines packet and pad the packet, if needed, when an
// end-loop is in the bundle.
- HexagonMCInstrInfo::padEndloop(*MCB);
+ HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
HexagonMCShuffle(*MCII, STI, *MCB);
assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
@@ -60,9 +58,9 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
if (Extended) {
if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
- HexagonMCInstrInfo::clampExtended(*MCII, *SubInst);
+ HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst);
} else {
- HexagonMCInstrInfo::clampExtended(*MCII, *MCI);
+ HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI);
}
Extended = false;
} else {
@@ -114,7 +112,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
MCSection *Section = getAssembler().getContext().getELFSection(
SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
SwitchSection(Section);
- AssignSection(Symbol, Section);
+ AssignFragment(Symbol, getCurrentFragment());
MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment);
SwitchSection(CrntSection);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
new file mode 100644
index 0000000..fc62626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -0,0 +1,49 @@
+//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
+//----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-mcexpr"
+
+HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) HexagonNoExtendOperand(Expr);
+}
+
+bool HexagonNoExtendOperand::evaluateAsRelocatableImpl(
+ MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const {
+ return Expr->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {}
+
+MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const {
+ return Expr->findAssociatedFragment();
+}
+
+void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; }
+
+bool HexagonNoExtendOperand::classof(MCExpr const *E) {
+ return E->getKind() == MCExpr::Target;
+}
+
+HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr)
+ : Expr(Expr) {}
+
+void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ Expr->print(OS, MAI);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
new file mode 100644
index 0000000..60f180f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -0,0 +1,35 @@
+//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCInst;
+class HexagonNoExtendOperand : public MCTargetExpr {
+public:
+ static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx);
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ MCFragment *findAssociatedFragment() const override;
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+ static bool classof(MCExpr const *E);
+ MCExpr const *getExpr() const;
+
+private:
+ HexagonNoExtendOperand(MCExpr const *Expr);
+ MCExpr const *Expr;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 48b15f8..e684207 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -15,17 +15,37 @@
#include "Hexagon.h"
#include "HexagonBaseInfo.h"
+#include "HexagonMCChecker.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
+void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value,
+ MCContext &Context) {
+ MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context)));
+}
+
+void HexagonMCInstrInfo::addConstExtender(MCContext &Context,
+ MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI) {
+ assert(HexagonMCInstrInfo::isBundle(MCB));
+ MCOperand const &exOp =
+ MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+
+ // Create the extender.
+ MCInst *XMCI =
+ new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp));
+
+ MCB.addOperand(MCOperand::createInst(XMCI));
+}
+
iterator_range<MCInst::const_iterator>
HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
assert(isBundle(MCI));
- return iterator_range<MCInst::const_iterator>(
- MCI.begin() + bundleInstructionsOffset, MCI.end());
+ return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end());
}
size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
@@ -35,7 +55,40 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
return (1);
}
-void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
+bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Check) {
+ // Examine the packet and convert pairs of instructions to compound
+ // instructions when possible.
+ if (!HexagonDisableCompound)
+ HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
+ // Check the bundle for errors.
+ bool CheckOk = Check ? Check->check() : true;
+ if (!CheckOk)
+ return false;
+ HexagonMCShuffle(MCII, STI, MCB);
+ // Examine the packet and convert pairs of instructions to duplex
+ // instructions when possible.
+ MCInst InstBundlePreDuplex = MCInst(MCB);
+ if (!HexagonDisableDuplex) {
+ SmallVector<DuplexCandidate, 8> possibleDuplexes;
+ possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+ HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
+ }
+ // Examines packet and pad the packet, if needed, when an
+ // end-loop is in the bundle.
+ HexagonMCInstrInfo::padEndloop(Context, MCB);
+ // If compounding and duplexing didn't reduce the size below
+ // 4 or less we have a packet that is too big.
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
+ return false;
+ HexagonMCShuffle(MCII, STI, MCB);
+ return true;
+}
+
+void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII,
+ MCContext &Context, MCInst &MCI) {
assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
HexagonMCInstrInfo::isExtended(MCII, MCI));
MCOperand &exOp =
@@ -43,13 +96,20 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
// If the extended value is a constant, then use it for the extended and
// for the extender instructions, masking off the lower 6 bits and
// including the assumed bits.
- if (exOp.isImm()) {
+ int64_t Value;
+ if (exOp.getExpr()->evaluateAsAbsolute(Value)) {
unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
- int64_t Bits = exOp.getImm();
- exOp.setImm((Bits & 0x3f) << Shift);
+ exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context));
}
}
+MCInst HexagonMCInstrInfo::createBundle() {
+ MCInst Result;
+ Result.setOpcode(Hexagon::BUNDLE);
+ Result.addOperand(MCOperand::createImm(0));
+ return Result;
+}
+
MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
MCInst const &inst0,
MCInst const &inst1) {
@@ -64,6 +124,27 @@ MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
return duplexInst;
}
+MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
+ MCInst const &Inst,
+ MCOperand const &MO) {
+ assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
+ HexagonMCInstrInfo::isExtended(MCII, Inst));
+
+ MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
+ MCInst XMI;
+ XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
+ HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
+ ? Hexagon::A4_ext_b
+ : Hexagon::A4_ext);
+ if (MO.isImm())
+ XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
+ else if (MO.isExpr())
+ XMI.addOperand(MCOperand::createExpr(MO.getExpr()));
+ else
+ llvm_unreachable("invalid extendable operand");
+ return XMI;
+}
+
MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
size_t Index) {
assert(Index <= bundleSize(MCB));
@@ -76,6 +157,13 @@ MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
return nullptr;
}
+void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context,
+ MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI, bool MustExtend) {
+ if (isConstExtended(MCII, MCI) || MustExtend)
+ addConstExtender(Context, MCII, MCB, MCI);
+}
+
HexagonII::MemAccessSize
HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -186,6 +274,25 @@ MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
return (MCO);
}
+/// Return the new value or the newly produced value.
+unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI);
+ MCOperand const &MCO = MCI.getOperand(O);
+
+ assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+ HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) &&
+ MCO.isReg());
+ return (MCO);
+}
+
int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -242,6 +349,13 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
}
+/// Return whether the insn produces a second value.
+bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2);
+}
+
MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
assert(isBundle(MCB));
assert(Index < HEXAGON_PACKET_SIZE);
@@ -261,6 +375,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
}
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
+}
+
bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
(Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
@@ -282,14 +401,21 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
MCInst const &MCI) {
if (HexagonMCInstrInfo::isExtended(MCII, MCI))
return true;
-
- if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+ // Branch insns are handled as necessary by relaxation.
+ if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
+ return false;
+ // Otherwise loop instructions and other CR insts are handled by relaxation
+ else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) &&
+ (MCI.getOpcode() != Hexagon::C4_addipc))
+ return false;
+ else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
return false;
- short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI);
- int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
- int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
- MCOperand const &MO = MCI.getOperand(ExtOpNum);
+ MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI);
// We could be using an instruction with an extendable immediate and shoehorn
// a global address into it. If it is a global address it will be constant
@@ -297,15 +423,13 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
// We currently only handle isGlobal() because it is the only kind of
// object we are going to end up with here for now.
// In the future we probably should add isSymbol(), etc.
- if (MO.isExpr())
+ assert(!MO.isImm());
+ int64_t Value;
+ if (!MO.getExpr()->evaluateAsAbsolute(Value))
return true;
-
- // If the extendable operand is not 'Immediate' type, the instruction should
- // have 'isExtended' flag set.
- assert(MO.isImm() && "Extendable operand must be Immediate type");
-
- int ImmValue = MO.getImm();
- return (ImmValue < MinValue || ImmValue > MaxValue);
+ int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+ int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+ return (MinValue > Value || Value > MaxValue);
}
bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
@@ -374,6 +498,19 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask);
+}
+
+/// Return whether the insn is newly predicated.
+bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -394,6 +531,18 @@ bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
}
+bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ auto Flags = MCI.getOperand(0).getImm();
+ return (Flags & memReorderDisabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ auto Flags = MCI.getOperand(0).getImm();
+ return (Flags & memStoreReorderEnabledMask) != 0;
+}
+
bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
@@ -405,7 +554,28 @@ bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
}
-void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
+bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
+ if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
+ (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
+ return true;
+ return false;
+}
+
+int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
+ auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
+ << 8;
+ if (MCI.size() <= Index)
+ return Sentinal;
+ MCOperand const &MCO = MCI.getOperand(Index);
+ if (!MCO.isExpr())
+ return Sentinal;
+ int64_t Value;
+ if (!MCO.getExpr()->evaluateAsAbsolute(Value))
+ return Sentinal;
+ return Value;
+}
+
+void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
MCInst Nop;
Nop.setOpcode(Hexagon::A2_nop);
assert(isBundle(MCB));
@@ -413,7 +583,7 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
(HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
((HexagonMCInstrInfo::isOuterLoop(MCB) &&
(HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
- MCB.addOperand(MCOperand::createInst(new MCInst(Nop)));
+ MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop)));
}
bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
@@ -456,6 +626,20 @@ void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) {
Operand.setImm(Operand.getImm() | innerLoopMask);
}
+void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | memReorderDisabledMask);
+ assert(isMemReorderDisabled(MCI));
+}
+
+void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
+ assert(isMemStoreReorderEnabled(MCI));
+}
+
void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
assert(isBundle(MCI));
MCOperand &Operand = MCI.getOperand(0);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 32d61a4..0237b28 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -14,9 +14,11 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+#include "HexagonMCExpr.h"
#include "llvm/MC/MCInst.h"
namespace llvm {
+class HexagonMCChecker;
class MCContext;
class MCInstrDesc;
class MCInstrInfo;
@@ -39,20 +41,47 @@ int64_t const innerLoopMask = 1 << innerLoopOffset;
size_t const outerLoopOffset = 1;
int64_t const outerLoopMask = 1 << outerLoopOffset;
+// do not reorder memory load/stores by default load/stores are re-ordered
+// and by default loads can be re-ordered
+size_t const memReorderDisabledOffset = 2;
+int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset;
+
+// allow re-ordering of memory stores by default stores cannot be re-ordered
+size_t const memStoreReorderEnabledOffset = 3;
+int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset;
+
size_t const bundleInstructionsOffset = 1;
+void addConstant(MCInst &MI, uint64_t Value, MCContext &Context);
+void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI);
+
// Returns a iterator range of instructions in this bundle
iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
// Returns the number of instructions in the bundle
size_t bundleSize(MCInst const &MCI);
+// Put the packet in to canonical form, compound, duplex, pad, and shuffle
+bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Checker);
+
// Clamp off upper 26 bits of extendable operand for emission
-void clampExtended(MCInstrInfo const &MCII, MCInst &MCI);
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI, bool MustExtend);
// Create a duplex instruction given the two subinsts
MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
MCInst const &inst1);
+MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
+ MCOperand const &MO);
// Convert this instruction in to a duplex subinst
MCInst deriveSubInst(MCInst const &Inst);
@@ -108,6 +137,9 @@ unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the operand that consumes or produces a new value.
MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
+unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
+MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
+ MCInst const &MCI);
int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -125,6 +157,7 @@ bool hasImmExt(MCInst const &MCI);
// Return whether the instruction is a legal new-value producer.
bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the instruction at Index
MCInst const &instruction(MCInst const &MCB, size_t Index);
@@ -134,10 +167,24 @@ bool isBundle(MCInst const &MCI);
// Return whether the insn is an actual insn.
bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the duplex iclass given the two duplex classes
unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
+int64_t minConstant(MCInst const &MCI, size_t Index);
+template <unsigned N, unsigned S>
+bool inRange(MCInst const &MCI, size_t Index) {
+ return isShiftedUInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N, unsigned S>
+bool inSRange(MCInst const &MCI, size_t Index) {
+ return isShiftedInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
+ return isUInt<N>(minConstant(MCI, Index));
+}
+
// Return whether the instruction needs to be constant extended.
bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -173,6 +220,8 @@ bool isIntReg(unsigned Reg);
// Is this register suitable for use in a duplex subinst
bool isIntRegForSubInst(unsigned Reg);
+bool isMemReorderDisabled(MCInst const &MCI);
+bool isMemStoreReorderEnabled(MCInst const &MCI);
// Return whether the insn is a new-value consumer.
bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -191,6 +240,8 @@ bool isOuterLoop(MCInst const &MCI);
// Return whether this instruction is predicated
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI);
// Return whether the predicate sense is true
bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -209,9 +260,10 @@ bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI);
/// Return whether the insn can be packaged only with an A-type insn in slot #1.
bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isVector(MCInstrInfo const &MCII, MCInst const &MCI);
// Pad the bundle with nops to satisfy endloop requirements
-void padEndloop(MCInst &MCI);
+void padEndloop(MCContext &Context, MCInst &MCI);
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -220,6 +272,8 @@ void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
// Marks a bundle as endloop0
void setInnerLoop(MCInst &MCI);
+void setMemReorderDisabled(MCInst &MCI);
+void setMemStoreReorderEnabled(MCInst &MCI);
// Marks a bundle as endloop1
void setOuterLoop(MCInst &MCI);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 53305d8..9a29257 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -40,6 +40,20 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "HexagonGenRegisterInfo.inc"
+cl::opt<bool> llvm::HexagonDisableCompound
+ ("mno-compound",
+ cl::desc("Disable looking for compound instructions for Hexagon"));
+
+cl::opt<bool> llvm::HexagonDisableDuplex
+ ("mno-pairing",
+ cl::desc("Disable looking for duplex instructions for Hexagon"));
+
+StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
+ if (CPU.empty())
+ CPU = "hexagonv60";
+ return CPU;
+}
+
MCInstrInfo *llvm::createHexagonMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitHexagonMCInstrInfo(X);
@@ -54,6 +68,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU);
return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
}
@@ -76,28 +91,23 @@ public:
StringRef Contents(Buffer);
auto PacketBundle = Contents.rsplit('\n');
auto HeadTail = PacketBundle.first.split('\n');
- auto Preamble = "\t{\n\t\t";
- auto Separator = "";
- while(!HeadTail.first.empty()) {
- OS << Separator;
- StringRef Inst;
+ StringRef Separator = "\n";
+ StringRef Indent = "\t\t";
+ OS << "\t{\n";
+ while (!HeadTail.first.empty()) {
+ StringRef InstTxt;
auto Duplex = HeadTail.first.split('\v');
- if(!Duplex.second.empty()){
- OS << Duplex.first << "\n";
- Inst = Duplex.second;
- }
- else {
- if(!HeadTail.first.startswith("immext"))
- Inst = Duplex.first;
+ if (!Duplex.second.empty()) {
+ OS << Indent << Duplex.first << Separator;
+ InstTxt = Duplex.second;
+ } else if (!HeadTail.first.trim().startswith("immext")) {
+ InstTxt = Duplex.first;
}
- OS << Preamble;
- OS << Inst;
+ if (!InstTxt.empty())
+ OS << Indent << InstTxt << Separator;
HeadTail = HeadTail.second.split('\n');
- Preamble = "";
- Separator = "\n\t\t";
}
- if(HexagonMCInstrInfo::bundleSize(Inst) != 0)
- OS << "\n\t}" << PacketBundle.second;
+ OS << "\t}" << PacketBundle.second;
}
};
}
@@ -154,9 +164,9 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- // For the time being, use static relocations, since there's really no
- // support for PIC yet.
- X->initMCCodeGenInfo(Reloc::Static, CM, OL);
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index cb62650..a005a01 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -16,6 +16,8 @@
#include <cstdint>
+#include "llvm/Support/CommandLine.h"
+
namespace llvm {
struct InstrItinerary;
struct InstrStage;
@@ -33,22 +35,27 @@ class raw_ostream;
class raw_pwrite_stream;
extern Target TheHexagonTarget;
-
+extern cl::opt<bool> HexagonDisableCompound;
+extern cl::opt<bool> HexagonDisableDuplex;
extern const InstrStage HexagonStages[];
MCInstrInfo *createHexagonMCInstrInfo();
-MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
- MCRegisterInfo const &MRI,
+MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
MCContext &MCT);
-MCAsmBackend *createHexagonAsmBackend(Target const &T,
- MCRegisterInfo const &MRI,
+MCAsmBackend *createHexagonAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU);
MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI, StringRef CPU);
+namespace HEXAGON_MC {
+ StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+}
+
} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 41112ac..6ceb848 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -27,6 +27,7 @@
using namespace llvm;
+namespace {
// Insn shuffling priority.
class HexagonBid {
// The priority is directly proportional to how restricted the insn is based
@@ -75,6 +76,7 @@ public:
return false;
};
};
+} // end anonymous namespace
unsigned HexagonResource::setWeight(unsigned s) {
const unsigned SlotWeight = 8;
@@ -93,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) {
return (Weight);
}
+HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
+
+bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
+
+bool HexagonCVIResource::setup() {
+ assert(!TUL);
+ TUL = new (TypeUnitsAndLanes);
+
+ (*TUL)[HexagonII::TypeCVI_VA] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
+ (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2);
+ (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
+ (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_LD] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
+ (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_ST] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
+ (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
+
+ return true;
+}
+
+HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+ MCInst const *id)
+ : HexagonResource(s) {
+ unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
+
+ if (TUL->count(T)) {
+ // For an HVX insn.
+ Valid = true;
+ setUnits((*TUL)[T].first);
+ setLanes((*TUL)[T].second);
+ setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
+ setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
+ } else {
+ // For core insns.
+ Valid = false;
+ setUnits(0);
+ setLanes(0);
+ setLoad(false);
+ setStore(false);
+ }
+}
+
HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
: MCII(MCII), STI(STI) {
@@ -107,7 +163,7 @@ void HexagonShuffler::reset() {
void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
unsigned S, bool X) {
- HexagonInstr PI(ID, Extender, S, X);
+ HexagonInstr PI(MCII, ID, Extender, S, X);
Packet.push_back(PI);
}
@@ -126,6 +182,8 @@ bool HexagonShuffler::check() {
// Number of memory operations, loads, solo loads, stores, solo stores, single
// stores.
unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+ // Number of HVX loads, HVX stores.
+ unsigned CVIloads = 0, CVIstores = 0;
// Number of duplex insns, solo insns.
unsigned duplex = 0, solo = 0;
// Number of insns restricting other insns in the packet to A and X types,
@@ -168,6 +226,12 @@ bool HexagonShuffler::check() {
case HexagonII::TypeJ:
++jumps;
break;
+ case HexagonII::TypeCVI_VM_VP_LDU:
+ ++onlyNo1;
+ case HexagonII::TypeCVI_VM_LD:
+ case HexagonII::TypeCVI_VM_TMP_LD:
+ case HexagonII::TypeCVI_VM_CUR_LD:
+ ++CVIloads;
case HexagonII::TypeLD:
++loads;
++memory;
@@ -176,6 +240,11 @@ bool HexagonShuffler::check() {
if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
break;
+ case HexagonII::TypeCVI_VM_STU:
+ ++onlyNo1;
+ case HexagonII::TypeCVI_VM_ST:
+ case HexagonII::TypeCVI_VM_NEW_ST:
+ ++CVIstores;
case HexagonII::TypeST:
++stores;
++memory;
@@ -203,9 +272,9 @@ bool HexagonShuffler::check() {
}
// Check if the packet is legal.
- if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) ||
- (solo && size() > 1) || (onlyAX && neitherAnorX > 1) ||
- (onlyAX && xtypeFloat)) {
+ if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
+ (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
+ (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
Error = SHUFFLE_ERROR_INVALID;
return false;
}
@@ -336,6 +405,19 @@ bool HexagonShuffler::check() {
return false;
}
}
+ // Verify the CVI slot subscriptions.
+ {
+ HexagonUnitAuction AuctionCVI;
+
+ std::sort(begin(), end(), HexagonInstr::lessCVI);
+
+ for (iterator I = begin(); I != end(); ++I)
+ for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
+ if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
+ Error = SHUFFLE_ERROR_SLOTS;
+ return false;
+ }
+ }
Error = SHUFFLE_SUCCESS;
return true;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 8b6c72e..174f10f 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -51,6 +51,44 @@ public:
};
};
+// HVX insn resources.
+class HexagonCVIResource : public HexagonResource {
+ typedef std::pair<unsigned, unsigned> UnitsAndLanes;
+ typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+
+ // Available HVX slots.
+ enum {
+ CVI_NONE = 0,
+ CVI_XLANE = 1 << 0,
+ CVI_SHIFT = 1 << 1,
+ CVI_MPY0 = 1 << 2,
+ CVI_MPY1 = 1 << 3
+ };
+
+ static bool SetUp;
+ static bool setup();
+ static TypeUnitsAndLanes *TUL;
+
+ // Count of adjacent slots that the insn requires to be executed.
+ unsigned Lanes;
+ // Flag whether the insn is a load or a store.
+ bool Load, Store;
+ // Flag whether the HVX resources are valid.
+ bool Valid;
+
+ void setLanes(unsigned l) { Lanes = l; };
+ void setLoad(bool f = true) { Load = f; };
+ void setStore(bool f = true) { Store = f; };
+
+public:
+ HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+
+ bool isValid() const { return (Valid); };
+ unsigned getLanes() const { return (Lanes); };
+ bool mayLoad() const { return (Load); };
+ bool mayStore() const { return (Store); };
+};
+
// Handle to an insn used by the shuffling algorithm.
class HexagonInstr {
friend class HexagonShuffler;
@@ -58,12 +96,14 @@ class HexagonInstr {
MCInst const *ID;
MCInst const *Extender;
HexagonResource Core;
+ HexagonCVIResource CVI;
bool SoloException;
public:
- HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s,
- bool x = false)
- : ID(id), Extender(Extender), Core(s), SoloException(x){};
+ HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+ MCInst const *Extender, unsigned s, bool x = false)
+ : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
+ SoloException(x){};
MCInst const *getDesc() const { return (ID); };
@@ -79,6 +119,10 @@ public:
static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) {
return (HexagonResource::lessUnits(A.Core, B.Core));
};
+ // Check if the handles are in ascending order by HVX slots.
+ static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) {
+ return (HexagonResource::lessUnits(A.CVI, B.CVI));
+ };
};
// Bundle shuffler.
@@ -108,6 +152,8 @@ public:
SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns.
SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots.
+ SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60).
+ SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict
SHUFFLE_ERROR_UNKNOWN ///< Unknown error.
};
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 70141a9..72afec1 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -17,8 +17,6 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
- class MCOperand;
-
class MSP430InstPrinter : public MCInstPrinter {
public:
MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index ff5b0b6..183dee3 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -17,13 +17,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
+class Triple;
- class MSP430MCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit MSP430MCAsmInfo(const Triple &TT);
- };
+class MSP430MCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit MSP430MCAsmInfo(const Triple &TT);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
index ffcf222..606abc2 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -64,7 +64,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned FuncSize = 0;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
- MachineBasicBlock *MBB = MFI;
+ MachineBasicBlock *MBB = &*MFI;
unsigned BlockSize = 0;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 29bc8b3..18f38b7 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -69,10 +69,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
computeRegisterProperties(STI.getRegisterInfo());
// Provide all sorts of operation actions
-
- // Division is expensive
- setIntDivIsCheap(false);
-
setStackPointerRegisterToSaveRestore(MSP430::SP);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -508,9 +504,10 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
- InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ InVal = DAG.getLoad(
+ VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
}
InVals.push_back(InVal);
@@ -1231,8 +1228,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
}
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
+ MachineFunction::iterator I = ++BB->getIterator();
// Create loop block
MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -1320,8 +1316,7 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
+ MachineFunction::iterator I = ++BB->getIterator();
// thisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 72b1780..d4f82bd 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -44,11 +44,10 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
@@ -72,11 +71,10 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index 54154a8..47b0e27 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -50,9 +50,9 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetJumpTableSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout &DL = Printer.getDataLayout();
SmallString<256> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
<< Printer.getFunctionNumber() << '_'
<< MO.getIndex();
@@ -67,9 +67,9 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout &DL = Printer.getDataLayout();
SmallString<256> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "CPI"
<< Printer.getFunctionNumber() << '_'
<< MO.getIndex();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
index 0f75399..b442fc0 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===//
+//===-- MSP430MachineFunctionInfo.cpp - MSP430 machine function info ------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index fcc5f5b..2d93731 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//=== MSP430MachineFunctionInfo.h - MSP430 machine function info -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 5107d2a..d4e061f 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsRegisterInfo.h"
+#include "MipsTargetObjectFile.h"
#include "MipsTargetStreamer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
@@ -106,7 +107,6 @@ class MipsAsmParser : public MCTargetAsmParser {
return static_cast<MipsTargetStreamer &>(TS);
}
- MCSubtargetInfo &STI;
MipsABIInfo ABI;
SmallVector<std::unique_ptr<MipsAssemblerOptions>, 2> AssemblerOptions;
MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
@@ -114,6 +114,12 @@ class MipsAsmParser : public MCTargetAsmParser {
// selected. This usually happens after an '.end func'
// directive.
bool IsLittleEndian;
+ bool IsPicEnabled;
+ bool IsCpRestoreSet;
+ int CpRestoreOffset;
+ unsigned CpSaveLocation;
+ /// If true, then CpSaveLocation is a register, otherwise it's an offset.
+ bool CpSaveLocationIsRegister;
// Print a warning along with its fix-it message at the given range.
void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
@@ -141,50 +147,41 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID) override;
- MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
+ OperandMatchResultTy parseMemOperand(OperandVector &Operands);
+ OperandMatchResultTy
matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
StringRef Identifier, SMLoc S);
-
- MipsAsmParser::OperandMatchResultTy
- matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
-
- MipsAsmParser::OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseImm(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseLSAImm(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
- parseRegisterPair (OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
- parseMovePRegPair(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
- parseRegisterList (OperandVector &Operands);
+ OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
+ SMLoc S);
+ OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+ OperandMatchResultTy parseImm(OperandVector &Operands);
+ OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
+ OperandMatchResultTy parseInvNum(OperandVector &Operands);
+ OperandMatchResultTy parseLSAImm(OperandVector &Operands);
+ OperandMatchResultTy parseRegisterPair(OperandVector &Operands);
+ OperandMatchResultTy parseMovePRegPair(OperandVector &Operands);
+ OperandMatchResultTy parseRegisterList(OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
bool parseOperand(OperandVector &, StringRef Mnemonic);
- bool needsExpansion(MCInst &Inst);
+ enum MacroExpanderResultTy {
+ MER_NotAMacro,
+ MER_Success,
+ MER_Fail,
+ };
// Expands assembly pseudo instructions.
- // Returns false on success, true otherwise.
- bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ MacroExpanderResultTy
+ tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
bool expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
bool loadImmediate(int64_t ImmValue, unsigned DstReg, unsigned SrcReg,
- bool Is32BitImm, SMLoc IDLoc,
+ bool Is32BitImm, bool IsAddress, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg,
@@ -194,11 +191,10 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- bool expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ bool expandLoadAddress(unsigned DstReg, unsigned BaseReg,
+ const MCOperand &Offset, bool Is32BitAddress,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions);
- bool expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
@@ -209,24 +205,43 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ bool expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
bool expandBranchImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
bool expandCondBranches(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- bool expandUlhu(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ bool expandDiv(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions, const bool IsMips64,
+ const bool Signed);
+
+ bool expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
bool expandUlw(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ bool expandRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ bool expandRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ bool expandDRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ bool expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg,
bool Is64Bit, SmallVectorImpl<MCInst> &Instructions);
+ void createCpRestoreMemOp(bool IsLoad, int StackOffset, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -239,8 +254,11 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetMips0Directive();
bool parseSetArchDirective();
bool parseSetFeature(uint64_t Feature);
+ bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
bool parseDirectiveCpLoad(SMLoc Loc);
+ bool parseDirectiveCpRestore(SMLoc Loc);
bool parseDirectiveCPSetup();
+ bool parseDirectiveCPReturn();
bool parseDirectiveNaN();
bool parseDirectiveSet();
bool parseDirectiveOption();
@@ -337,6 +355,7 @@ class MipsAsmParser : public MCTargetAsmParser {
// FeatureMipsGP64 | FeatureMips1)
// Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4).
void selectArch(StringRef ArchFeature) {
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset FeatureBits = STI.getFeatureBits();
FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask;
STI.setFeatureBits(FeatureBits);
@@ -346,7 +365,8 @@ class MipsAsmParser : public MCTargetAsmParser {
}
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (!(STI.getFeatureBits()[Feature])) {
+ if (!(getSTI().getFeatureBits()[Feature])) {
+ MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
@@ -354,7 +374,8 @@ class MipsAsmParser : public MCTargetAsmParser {
}
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (STI.getFeatureBits()[Feature]) {
+ if (getSTI().getFeatureBits()[Feature]) {
+ MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
@@ -363,26 +384,25 @@ class MipsAsmParser : public MCTargetAsmParser {
void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
setFeatureBits(Feature, FeatureString);
- AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
}
void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
clearFeatureBits(Feature, FeatureString);
- AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
}
public:
enum MipsMatchResultTy {
- Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
+ Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "MipsGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
-
};
- MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ MipsAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti),
+ : MCTargetAsmParser(Options, sti),
ABI(MipsABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
sti.getCPU(), Options)) {
MCAsmParserExtension::Initialize(parser);
@@ -390,15 +410,15 @@ public:
parser.addAliasForDirective(".asciiz", ".asciz");
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
-
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
// Remember the initial assembler options. The user can not modify these.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
-
+ llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
+
// Create an assembler options environment for the user to modify.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
+ llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
getTargetStreamer().updateABIInfo(*this);
@@ -407,6 +427,12 @@ public:
CurrentFn = nullptr;
+ IsPicEnabled =
+ (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_);
+
+ IsCpRestoreSet = false;
+ CpRestoreOffset = -1;
+
Triple TheTriple(sti.getTargetTriple());
if ((TheTriple.getArch() == Triple::mips) ||
(TheTriple.getArch() == Triple::mips64))
@@ -418,70 +444,103 @@ public:
/// True if all of $fcc0 - $fcc7 exist for the current ISA.
bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
- bool isGP64bit() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
- bool isFP64bit() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
+ bool isGP64bit() const {
+ return getSTI().getFeatureBits()[Mips::FeatureGP64Bit];
+ }
+ bool isFP64bit() const {
+ return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
+ }
const MipsABIInfo &getABI() const { return ABI; }
bool isABI_N32() const { return ABI.IsN32(); }
bool isABI_N64() const { return ABI.IsN64(); }
bool isABI_O32() const { return ABI.IsO32(); }
- bool isABI_FPXX() const { return STI.getFeatureBits()[Mips::FeatureFPXX]; }
+ bool isABI_FPXX() const {
+ return getSTI().getFeatureBits()[Mips::FeatureFPXX];
+ }
bool useOddSPReg() const {
- return !(STI.getFeatureBits()[Mips::FeatureNoOddSPReg]);
+ return !(getSTI().getFeatureBits()[Mips::FeatureNoOddSPReg]);
}
bool inMicroMipsMode() const {
- return STI.getFeatureBits()[Mips::FeatureMicroMips];
+ return getSTI().getFeatureBits()[Mips::FeatureMicroMips];
+ }
+ bool hasMips1() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips1];
+ }
+ bool hasMips2() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips2];
+ }
+ bool hasMips3() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips3];
+ }
+ bool hasMips4() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips4];
+ }
+ bool hasMips5() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips5];
}
- bool hasMips1() const { return STI.getFeatureBits()[Mips::FeatureMips1]; }
- bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
- bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
- bool hasMips4() const { return STI.getFeatureBits()[Mips::FeatureMips4]; }
- bool hasMips5() const { return STI.getFeatureBits()[Mips::FeatureMips5]; }
bool hasMips32() const {
- return STI.getFeatureBits()[Mips::FeatureMips32];
+ return getSTI().getFeatureBits()[Mips::FeatureMips32];
}
bool hasMips64() const {
- return STI.getFeatureBits()[Mips::FeatureMips64];
+ return getSTI().getFeatureBits()[Mips::FeatureMips64];
}
bool hasMips32r2() const {
- return STI.getFeatureBits()[Mips::FeatureMips32r2];
+ return getSTI().getFeatureBits()[Mips::FeatureMips32r2];
}
bool hasMips64r2() const {
- return STI.getFeatureBits()[Mips::FeatureMips64r2];
+ return getSTI().getFeatureBits()[Mips::FeatureMips64r2];
}
bool hasMips32r3() const {
- return (STI.getFeatureBits()[Mips::FeatureMips32r3]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]);
}
bool hasMips64r3() const {
- return (STI.getFeatureBits()[Mips::FeatureMips64r3]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]);
}
bool hasMips32r5() const {
- return (STI.getFeatureBits()[Mips::FeatureMips32r5]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]);
}
bool hasMips64r5() const {
- return (STI.getFeatureBits()[Mips::FeatureMips64r5]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]);
}
bool hasMips32r6() const {
- return STI.getFeatureBits()[Mips::FeatureMips32r6];
+ return getSTI().getFeatureBits()[Mips::FeatureMips32r6];
}
bool hasMips64r6() const {
- return STI.getFeatureBits()[Mips::FeatureMips64r6];
+ return getSTI().getFeatureBits()[Mips::FeatureMips64r6];
}
- bool hasDSP() const { return STI.getFeatureBits()[Mips::FeatureDSP]; }
- bool hasDSPR2() const { return STI.getFeatureBits()[Mips::FeatureDSPR2]; }
- bool hasMSA() const { return STI.getFeatureBits()[Mips::FeatureMSA]; }
+ bool hasDSP() const {
+ return getSTI().getFeatureBits()[Mips::FeatureDSP];
+ }
+ bool hasDSPR2() const {
+ return getSTI().getFeatureBits()[Mips::FeatureDSPR2];
+ }
+ bool hasDSPR3() const {
+ return getSTI().getFeatureBits()[Mips::FeatureDSPR3];
+ }
+ bool hasMSA() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMSA];
+ }
bool hasCnMips() const {
- return (STI.getFeatureBits()[Mips::FeatureCnMips]);
+ return (getSTI().getFeatureBits()[Mips::FeatureCnMips]);
+ }
+
+ bool inPicMode() {
+ return IsPicEnabled;
}
bool inMips16Mode() const {
- return STI.getFeatureBits()[Mips::FeatureMips16];
+ return getSTI().getFeatureBits()[Mips::FeatureMips16];
+ }
+
+ bool useTraps() const {
+ return getSTI().getFeatureBits()[Mips::FeatureUseTCCInDIV];
}
bool useSoftFloat() const {
- return STI.getFeatureBits()[Mips::FeatureSoftFloat];
+ return getSTI().getFeatureBits()[Mips::FeatureSoftFloat];
}
/// Warn if RegIndex is the same as the current AT.
@@ -869,6 +928,16 @@ public:
Inst.addOperand(MCOperand::createReg(getHWRegsReg()));
}
+ template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
+ void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ uint64_t Imm = getConstantImm() - Offset;
+ Imm &= (1 << Bits) - 1;
+ Imm += Offset;
+ Imm += AdjustOffset;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCExpr *Expr = getImm();
@@ -878,7 +947,9 @@ public:
void addMemOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR32Reg()));
+ Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit()
+ ? getMemBase()->getGPR64Reg()
+ : getMemBase()->getGPR32Reg()));
const MCExpr *Expr = getMemOff();
addExpr(Inst, Expr);
@@ -924,10 +995,16 @@ public:
bool isRegIdx() const { return Kind == k_RegisterIndex; }
bool isImm() const override { return Kind == k_Immediate; }
bool isConstantImm() const {
- return isImm() && dyn_cast<MCConstantExpr>(getImm());
+ return isImm() && isa<MCConstantExpr>(getImm());
+ }
+ bool isConstantImmz() const {
+ return isConstantImm() && getConstantImm() == 0;
}
- template <unsigned Bits> bool isUImm() const {
- return isImm() && isConstantImm() && isUInt<Bits>(getConstantImm());
+ template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
+ return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
+ }
+ template <unsigned Bits> bool isConstantSImm() const {
+ return isConstantImm() && isInt<Bits>(getConstantImm());
}
bool isToken() const override {
// Note: It's not possible to pretend that other operand kinds are tokens.
@@ -936,10 +1013,15 @@ public:
}
bool isMem() const override { return Kind == k_Memory; }
bool isConstantMemOff() const {
- return isMem() && dyn_cast<MCConstantExpr>(getMemOff());
+ return isMem() && isa<MCConstantExpr>(getMemOff());
}
template <unsigned Bits> bool isMemWithSimmOffset() const {
- return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff());
+ return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff())
+ && getMemBase()->isGPRAsmReg();
+ }
+ template <unsigned Bits> bool isMemWithSimmOffsetGPR() const {
+ return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff()) &&
+ getMemBase()->isGPRAsmReg();
}
bool isMemWithGRPMM16Base() const {
return isMem() && getMemBase()->isMM16AsmReg();
@@ -953,13 +1035,23 @@ public:
&& (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
&& (getMemBase()->getGPR32Reg() == Mips::SP);
}
+ template <unsigned Bits, unsigned ShiftLeftAmount>
+ bool isScaledUImm() const {
+ return isConstantImm() &&
+ isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
+ }
bool isRegList16() const {
if (!isRegList())
return false;
int Size = RegList.List->size();
- if (Size < 2 || Size > 5 || *RegList.List->begin() != Mips::S0 ||
- RegList.List->back() != Mips::RA)
+ if (Size < 2 || Size > 5)
+ return false;
+
+ unsigned R0 = RegList.List->front();
+ unsigned R1 = RegList.List->back();
+ if (!((R0 == Mips::S0 && R1 == Mips::RA) ||
+ (R0 == Mips::S0_64 && R1 == Mips::RA_64)))
return false;
int PrevReg = *RegList.List->begin();
@@ -1304,9 +1396,123 @@ static bool hasShortDelaySlot(unsigned Opcode) {
}
}
+static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) {
+ if (const MCSymbolRefExpr *SRExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ return &SRExpr->getSymbol();
+ }
+
+ if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+ const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS());
+ const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS());
+
+ if (LHSSym)
+ return LHSSym;
+
+ if (RHSSym)
+ return RHSSym;
+
+ return nullptr;
+ }
+
+ if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
+ return getSingleMCSymbol(UExpr->getSubExpr());
+
+ return nullptr;
+}
+
+static unsigned countMCSymbolRefExpr(const MCExpr *Expr) {
+ if (isa<MCSymbolRefExpr>(Expr))
+ return 1;
+
+ if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr))
+ return countMCSymbolRefExpr(BExpr->getLHS()) +
+ countMCSymbolRefExpr(BExpr->getRHS());
+
+ if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
+ return countMCSymbolRefExpr(UExpr->getSubExpr());
+
+ return 0;
+}
+
+namespace {
+void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(Reg0));
+ tmpInst.addOperand(Op1);
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, Instructions);
+}
+
+void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, Instructions);
+}
+
+void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createImm(Imm1));
+ tmpInst.addOperand(MCOperand::createImm(Imm2));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(Reg0));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(Reg0));
+ tmpInst.addOperand(MCOperand::createReg(Reg1));
+ tmpInst.addOperand(Op2);
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc,
+ Instructions);
+}
+
+void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc,
+ Instructions);
+}
+
+void emitAppropriateDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ if (ShiftAmount >= 32) {
+ emitRRI(Mips::DSLL32, DstReg, SrcReg, ShiftAmount - 32, IDLoc,
+ Instructions);
+ return;
+ }
+
+ emitRRI(Mips::DSLL, DstReg, SrcReg, ShiftAmount, IDLoc, Instructions);
+}
+} // end anonymous namespace.
+
bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ bool ExpandedJalSym = false;
Inst.setLoc(IDLoc);
@@ -1365,12 +1571,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BEQZ16_MM:
+ case Mips::BEQZC16_MMR6:
case Mips::BNEZ16_MM:
+ case Mips::BNEZC16_MMR6:
assert(MCID.getNumOperands() == 2 && "unexpected number of operands");
Offset = Inst.getOperand(1);
if (!Offset.isImm())
break; // We'll deal with this situation later on when applying fixups.
- if (!isIntN(8, Offset.getImm()))
+ if (!isInt<8>(Offset.getImm()))
return Error(IDLoc, "branch target out of range");
if (OffsetToAlignment(Offset.getImm(), 2LL))
return Error(IDLoc, "branch to misaligned address");
@@ -1415,32 +1623,6 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
break;
- case Mips::CINS:
- case Mips::CINS32:
- case Mips::EXTS:
- case Mips::EXTS32:
- assert(MCID.getNumOperands() == 4 && "unexpected number of operands");
- // Check length
- Opnd = Inst.getOperand(3);
- if (!Opnd.isImm())
- return Error(IDLoc, "expected immediate operand kind");
- Imm = Opnd.getImm();
- if (Imm < 0 || Imm > 31)
- return Error(IDLoc, "immediate operand value out of range");
- // Check position
- Opnd = Inst.getOperand(2);
- if (!Opnd.isImm())
- return Error(IDLoc, "expected immediate operand kind");
- Imm = Opnd.getImm();
- if (Imm < 0 || Imm > (Opcode == Mips::CINS ||
- Opcode == Mips::EXTS ? 63 : 31))
- return Error(IDLoc, "immediate operand value out of range");
- if (Imm > 31) {
- Inst.setOpcode(Opcode == Mips::CINS ? Mips::CINS32 : Mips::EXTS32);
- Inst.getOperand(2).setImm(Imm - 32);
- }
- break;
-
case Mips::SEQi:
case Mips::SNEi:
assert(MCID.getNumOperands() == 3 && "unexpected number of operands");
@@ -1454,6 +1636,81 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
+ // This expansion is not in a function called by tryExpandInstruction()
+ // because the pseudo-instruction doesn't have a distinct opcode.
+ if ((Inst.getOpcode() == Mips::JAL || Inst.getOpcode() == Mips::JAL_MM) &&
+ inPicMode()) {
+ warnIfNoMacro(IDLoc);
+
+ const MCExpr *JalExpr = Inst.getOperand(0).getExpr();
+
+ // We can do this expansion if there's only 1 symbol in the argument
+ // expression.
+ if (countMCSymbolRefExpr(JalExpr) > 1)
+ return Error(IDLoc, "jal doesn't support multiple symbols in PIC mode");
+
+ // FIXME: This is checking the expression can be handled by the later stages
+ // of the assembler. We ought to leave it to those later stages but
+ // we can't do that until we stop evaluateRelocExpr() rewriting the
+ // expressions into non-equivalent forms.
+ const MCSymbol *JalSym = getSingleMCSymbol(JalExpr);
+
+ // FIXME: Add support for label+offset operands (currently causes an error).
+ // FIXME: Add support for forward-declared local symbols.
+ // FIXME: Add expansion for when the LargeGOT option is enabled.
+ if (JalSym->isInSection() || JalSym->isTemporary()) {
+ if (isABI_O32()) {
+ // If it's a local symbol and the O32 ABI is being used, we expand to:
+ // lw $25, 0($gp)
+ // R_(MICRO)MIPS_GOT16 label
+ // addiu $25, $25, 0
+ // R_(MICRO)MIPS_LO16 label
+ // jalr $25
+ const MCExpr *Got16RelocExpr = evaluateRelocExpr(JalExpr, "got");
+ const MCExpr *Lo16RelocExpr = evaluateRelocExpr(JalExpr, "lo");
+
+ emitRRX(Mips::LW, Mips::T9, Mips::GP,
+ MCOperand::createExpr(Got16RelocExpr), IDLoc, Instructions);
+ emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
+ MCOperand::createExpr(Lo16RelocExpr), IDLoc, Instructions);
+ } else if (isABI_N32() || isABI_N64()) {
+ // If it's a local symbol and the N32/N64 ABIs are being used,
+ // we expand to:
+ // lw/ld $25, 0($gp)
+ // R_(MICRO)MIPS_GOT_DISP label
+ // jalr $25
+ const MCExpr *GotDispRelocExpr = evaluateRelocExpr(JalExpr, "got_disp");
+
+ emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+ MCOperand::createExpr(GotDispRelocExpr), IDLoc, Instructions);
+ }
+ } else {
+ // If it's an external/weak symbol, we expand to:
+ // lw/ld $25, 0($gp)
+ // R_(MICRO)MIPS_CALL16 label
+ // jalr $25
+ const MCExpr *Call16RelocExpr = evaluateRelocExpr(JalExpr, "call16");
+
+ emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+ MCOperand::createExpr(Call16RelocExpr), IDLoc, Instructions);
+ }
+
+ MCInst JalrInst;
+ if (IsCpRestoreSet && inMicroMipsMode())
+ JalrInst.setOpcode(Mips::JALRS_MM);
+ else
+ JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+ JalrInst.addOperand(MCOperand::createReg(Mips::RA));
+ JalrInst.addOperand(MCOperand::createReg(Mips::T9));
+
+ // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR.
+ // This relocation is supposed to be an optimization hint for the linker
+ // and is not necessary for correctness.
+
+ Inst = JalrInst;
+ ExpandedJalSym = true;
+ }
+
if (MCID.mayLoad() || MCID.mayStore()) {
// Check the offset of memory operand, if it is a symbol
// reference or immediate we may have to expand instructions.
@@ -1500,17 +1757,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
int MemOffset = Op.getImm();
MCOperand &DstReg = Inst.getOperand(0);
MCOperand &BaseReg = Inst.getOperand(1);
- if (isIntN(9, MemOffset) && (MemOffset % 4 == 0) &&
+ if (isInt<9>(MemOffset) && (MemOffset % 4 == 0) &&
getContext().getRegisterInfo()->getRegClass(
Mips::GPRMM16RegClassID).contains(DstReg.getReg()) &&
- BaseReg.getReg() == Mips::GP) {
- MCInst TmpInst;
- TmpInst.setLoc(IDLoc);
- TmpInst.setOpcode(Mips::LWGP_MM);
- TmpInst.addOperand(MCOperand::createReg(DstReg.getReg()));
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
- TmpInst.addOperand(MCOperand::createImm(MemOffset));
- Instructions.push_back(TmpInst);
+ (BaseReg.getReg() == Mips::GP ||
+ BaseReg.getReg() == Mips::GP_64)) {
+
+ emitRRI(Mips::LWGP_MM, DstReg.getReg(), Mips::GP, MemOffset,
+ IDLoc, Instructions);
return false;
}
}
@@ -1597,7 +1851,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (Imm < -1 || Imm > 14)
return Error(IDLoc, "immediate operand value out of range");
break;
+ case Mips::TEQ_MM:
+ case Mips::TGE_MM:
+ case Mips::TGEU_MM:
+ case Mips::TLT_MM:
+ case Mips::TLTU_MM:
+ case Mips::TNE_MM:
case Mips::SB16_MM:
+ case Mips::SB16_MMR6:
Opnd = Inst.getOperand(2);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
@@ -1607,6 +1868,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break;
case Mips::LHU16_MM:
case Mips::SH16_MM:
+ case Mips::SH16_MMR6:
Opnd = Inst.getOperand(2);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
@@ -1616,6 +1878,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break;
case Mips::LW16_MM:
case Mips::SW16_MM:
+ case Mips::SW16_MMR6:
Opnd = Inst.getOperand(2);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
@@ -1623,93 +1886,111 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (Imm < 0 || Imm > 60 || (Imm % 4 != 0))
return Error(IDLoc, "immediate operand value out of range");
break;
- case Mips::CACHE:
- case Mips::PREF:
- Opnd = Inst.getOperand(2);
- if (!Opnd.isImm())
- return Error(IDLoc, "expected immediate operand kind");
- Imm = Opnd.getImm();
- if (!isUInt<5>(Imm))
- return Error(IDLoc, "immediate operand value out of range");
- break;
case Mips::ADDIUPC_MM:
MCOperand Opnd = Inst.getOperand(1);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
int Imm = Opnd.getImm();
- if ((Imm % 4 != 0) || !isIntN(25, Imm))
+ if ((Imm % 4 != 0) || !isInt<25>(Imm))
return Error(IDLoc, "immediate operand value out of range");
break;
}
}
- if (needsExpansion(Inst)) {
- if (expandInstruction(Inst, IDLoc, Instructions))
- return true;
- } else
+ MacroExpanderResultTy ExpandResult =
+ tryExpandInstruction(Inst, IDLoc, Instructions);
+ switch (ExpandResult) {
+ case MER_NotAMacro:
Instructions.push_back(Inst);
+ break;
+ case MER_Success:
+ break;
+ case MER_Fail:
+ return true;
+ }
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
- return false;
-}
+ if ((Inst.getOpcode() == Mips::JalOneReg ||
+ Inst.getOpcode() == Mips::JalTwoReg || ExpandedJalSym) &&
+ isPicAndNotNxxAbi()) {
+ if (IsCpRestoreSet) {
+ // We need a NOP between the JALR and the LW:
+ // If .set reorder has been used, we've already emitted a NOP.
+ // If .set noreorder has been used, we need to emit a NOP at this point.
+ if (!AssemblerOptions.back()->isReorder())
+ createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
-bool MipsAsmParser::needsExpansion(MCInst &Inst) {
+ // Load the $gp from the stack.
+ SmallVector<MCInst, 3> LoadInsts;
+ createCpRestoreMemOp(true /*IsLoad*/, CpRestoreOffset /*StackOffset*/,
+ IDLoc, LoadInsts);
- switch (Inst.getOpcode()) {
- case Mips::LoadImm32:
- case Mips::LoadImm64:
- case Mips::LoadAddrImm32:
- case Mips::LoadAddrReg32:
- case Mips::B_MM_Pseudo:
- case Mips::LWM_MM:
- case Mips::SWM_MM:
- case Mips::JalOneReg:
- case Mips::JalTwoReg:
- case Mips::BneImm:
- case Mips::BeqImm:
- case Mips::BLT:
- case Mips::BLE:
- case Mips::BGE:
- case Mips::BGT:
- case Mips::BLTU:
- case Mips::BLEU:
- case Mips::BGEU:
- case Mips::BGTU:
- case Mips::Ulhu:
- case Mips::Ulw:
- return true;
- default:
- return false;
+ for (const MCInst &Inst : LoadInsts)
+ Instructions.push_back(Inst);
+
+ } else
+ Warning(IDLoc, "no .cprestore used in PIC mode");
}
+
+ return false;
}
-bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+MipsAsmParser::MacroExpanderResultTy
+MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
- default: llvm_unreachable("unimplemented expansion");
+ default:
+ return MER_NotAMacro;
case Mips::LoadImm32:
- return expandLoadImm(Inst, true, IDLoc, Instructions);
+ return expandLoadImm(Inst, true, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::LoadImm64:
- return expandLoadImm(Inst, false, IDLoc, Instructions);
+ return expandLoadImm(Inst, false, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::LoadAddrImm32:
- return expandLoadAddressImm(Inst, true, IDLoc, Instructions);
+ case Mips::LoadAddrImm64:
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ assert((Inst.getOperand(1).isImm() || Inst.getOperand(1).isExpr()) &&
+ "expected immediate operand kind");
+
+ return expandLoadAddress(Inst.getOperand(0).getReg(), Mips::NoRegister,
+ Inst.getOperand(1),
+ Inst.getOpcode() == Mips::LoadAddrImm32, IDLoc,
+ Instructions)
+ ? MER_Fail
+ : MER_Success;
case Mips::LoadAddrReg32:
- return expandLoadAddressReg(Inst, true, IDLoc, Instructions);
+ case Mips::LoadAddrReg64:
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ assert((Inst.getOperand(2).isImm() || Inst.getOperand(2).isExpr()) &&
+ "expected immediate operand kind");
+
+ return expandLoadAddress(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getReg(), Inst.getOperand(2),
+ Inst.getOpcode() == Mips::LoadAddrReg32, IDLoc,
+ Instructions)
+ ? MER_Fail
+ : MER_Success;
case Mips::B_MM_Pseudo:
- return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions);
+ case Mips::B_MMR6_Pseudo:
+ return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::SWM_MM:
case Mips::LWM_MM:
- return expandLoadStoreMultiple(Inst, IDLoc, Instructions);
+ return expandLoadStoreMultiple(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::JalOneReg:
case Mips::JalTwoReg:
- return expandJalWithRegs(Inst, IDLoc, Instructions);
+ return expandJalWithRegs(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::BneImm:
case Mips::BeqImm:
- return expandBranchImm(Inst, IDLoc, Instructions);
+ return expandBranchImm(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success;
case Mips::BLT:
case Mips::BLE:
case Mips::BGE:
@@ -1718,78 +1999,97 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
case Mips::BLEU:
case Mips::BGEU:
case Mips::BGTU:
- return expandCondBranches(Inst, IDLoc, Instructions);
+ case Mips::BLTL:
+ case Mips::BLEL:
+ case Mips::BGEL:
+ case Mips::BGTL:
+ case Mips::BLTUL:
+ case Mips::BLEUL:
+ case Mips::BGEUL:
+ case Mips::BGTUL:
+ case Mips::BLTImmMacro:
+ case Mips::BLEImmMacro:
+ case Mips::BGEImmMacro:
+ case Mips::BGTImmMacro:
+ case Mips::BLTUImmMacro:
+ case Mips::BLEUImmMacro:
+ case Mips::BGEUImmMacro:
+ case Mips::BGTUImmMacro:
+ case Mips::BLTLImmMacro:
+ case Mips::BLELImmMacro:
+ case Mips::BGELImmMacro:
+ case Mips::BGTLImmMacro:
+ case Mips::BLTULImmMacro:
+ case Mips::BLEULImmMacro:
+ case Mips::BGEULImmMacro:
+ case Mips::BGTULImmMacro:
+ return expandCondBranches(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::SDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, false, true) ? MER_Fail
+ : MER_Success;
+ case Mips::DSDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, true, true) ? MER_Fail
+ : MER_Success;
+ case Mips::UDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, false, false) ? MER_Fail
+ : MER_Success;
+ case Mips::DUDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, true, false) ? MER_Fail
+ : MER_Success;
+ case Mips::Ulh:
+ return expandUlh(Inst, true, IDLoc, Instructions) ? MER_Fail : MER_Success;
case Mips::Ulhu:
- return expandUlhu(Inst, IDLoc, Instructions);
+ return expandUlh(Inst, false, IDLoc, Instructions) ? MER_Fail : MER_Success;
case Mips::Ulw:
- return expandUlw(Inst, IDLoc, Instructions);
+ return expandUlw(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success;
+ case Mips::NORImm:
+ return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::ADDi:
+ case Mips::ADDiu:
+ case Mips::SLTi:
+ case Mips::SLTiu:
+ if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ if (isInt<16>(ImmValue))
+ return MER_NotAMacro;
+ return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ }
+ return MER_NotAMacro;
+ case Mips::ANDi:
+ case Mips::ORi:
+ case Mips::XORi:
+ if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ if (isUInt<16>(ImmValue))
+ return MER_NotAMacro;
+ return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ }
+ return MER_NotAMacro;
+ case Mips::ROL:
+ case Mips::ROR:
+ return expandRotation(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::ROLImm:
+ case Mips::RORImm:
+ return expandRotationImm(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::DROL:
+ case Mips::DROR:
+ return expandDRotation(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::DROLImm:
+ case Mips::DRORImm:
+ return expandDRotationImm(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
}
}
-namespace {
-void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- MCInst tmpInst;
- tmpInst.setOpcode(Opcode);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
- tmpInst.addOperand(Imm);
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
-}
-
-void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions);
-}
-
-
-void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm,
- SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
- MCInst tmpInst;
- tmpInst.setOpcode(Opcode);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
- tmpInst.addOperand(MCOperand::createReg(SrcReg));
- tmpInst.addOperand(Imm);
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
-}
-
-void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg,
- unsigned SrcReg2, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc,
- Instructions);
-}
-
-void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm,
- SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
- emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc,
- Instructions);
-}
-
-template <int16_t ShiftAmount>
-void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- if (ShiftAmount >= 32)
- emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions);
- else if (ShiftAmount > 0)
- emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions);
-
- // There's no need for an ORi if the immediate is 0.
- if (Operand.isImm() && Operand.getImm() == 0)
- return;
-
- emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions);
-}
-
-template <unsigned ShiftAmount>
-void createLShiftOri(int64_t Value, unsigned RegNo, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- createLShiftOri<ShiftAmount>(MCOperand::createImm(Value), RegNo, IDLoc,
- Instructions);
-}
-}
-
bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
// Create a JALR instruction which is going to replace the pseudo-JAL.
@@ -1800,8 +2100,11 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
if (Opcode == Mips::JalOneReg) {
// jal $rs => jalr $rs
- if (inMicroMipsMode()) {
- JalrInst.setOpcode(Mips::JALR16_MM);
+ if (IsCpRestoreSet && inMicroMipsMode()) {
+ JalrInst.setOpcode(Mips::JALRS16_MM);
+ JalrInst.addOperand(FirstRegOp);
+ } else if (inMicroMipsMode()) {
+ JalrInst.setOpcode(hasMips32r6() ? Mips::JALRC16_MMR6 : Mips::JALR16_MM);
JalrInst.addOperand(FirstRegOp);
} else {
JalrInst.setOpcode(Mips::JALR);
@@ -1810,30 +2113,47 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
}
} else if (Opcode == Mips::JalTwoReg) {
// jal $rd, $rs => jalr $rd, $rs
- JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+ if (IsCpRestoreSet && inMicroMipsMode())
+ JalrInst.setOpcode(Mips::JALRS_MM);
+ else
+ JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
JalrInst.addOperand(FirstRegOp);
const MCOperand SecondRegOp = Inst.getOperand(1);
JalrInst.addOperand(SecondRegOp);
}
Instructions.push_back(JalrInst);
- // If .set reorder is active, emit a NOP after it.
- if (AssemblerOptions.back()->isReorder()) {
- // This is a 32-bit NOP because these 2 pseudo-instructions
- // do not have a short delay slot.
- MCInst NopInst;
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createImm(0));
- Instructions.push_back(NopInst);
+ // If .set reorder is active and branch instruction has a delay slot,
+ // emit a NOP after it.
+ const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode());
+ if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
+ createNop(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc, Instructions);
}
return false;
}
+/// Can the value be represented by a unsigned N-bit value and a shift left?
+template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
+ unsigned BitNum = findFirstSet(x);
+
+ return (x == x >> BitNum << BitNum) && isUInt<N>(x >> BitNum);
+}
+
+/// Load (or add) an immediate into a register.
+///
+/// @param ImmValue The immediate to load.
+/// @param DstReg The register that will hold the immediate.
+/// @param SrcReg A register to add to the immediate or Mips::NoRegister
+/// for a simple initialization.
+/// @param Is32BitImm Is ImmValue 32-bit or 64-bit?
+/// @param IsAddress True if the immediate represents an address. False if it
+/// is an integer.
+/// @param IDLoc Location of the immediate in the source file.
+/// @param Instructions The instructions emitted by this expansion.
bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
- unsigned SrcReg, bool Is32BitImm, SMLoc IDLoc,
+ unsigned SrcReg, bool Is32BitImm,
+ bool IsAddress, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
if (!Is32BitImm && !isGP64bit()) {
Error(IDLoc, "instruction requires a 64-bit architecture");
@@ -1852,6 +2172,9 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
}
}
+ unsigned ZeroReg = IsAddress ? ABI.GetNullPtr() : ABI.GetZeroReg();
+ unsigned AdduOp = !Is32BitImm ? Mips::DADDu : Mips::ADDu;
+
bool UseSrcReg = false;
if (SrcReg != Mips::NoRegister)
UseSrcReg = true;
@@ -1866,111 +2189,129 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
TmpReg = ATReg;
}
- // FIXME: gas has a special case for values that are 000...1111, which
- // becomes a li -1 and then a dsrl
if (isInt<16>(ImmValue)) {
- // li d,j => addiu d,$zero,j
if (!UseSrcReg)
- SrcReg = Mips::ZERO;
+ SrcReg = ZeroReg;
+
+ // This doesn't quite follow the usual ABI expectations for N32 but matches
+ // traditional assembler behaviour. N32 would normally use addiu for both
+ // integers and addresses.
+ if (IsAddress && !Is32BitImm) {
+ emitRRI(Mips::DADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
+ return false;
+ }
+
emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
- } else if (isUInt<16>(ImmValue)) {
- // li d,j => ori d,$zero,j
+ return false;
+ }
+
+ if (isUInt<16>(ImmValue)) {
unsigned TmpReg = DstReg;
if (SrcReg == DstReg) {
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
+ TmpReg = getATReg(IDLoc);
+ if (!TmpReg)
return true;
- TmpReg = ATReg;
}
- emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions);
+ emitRRI(Mips::ORi, TmpReg, ZeroReg, ImmValue, IDLoc, Instructions);
if (UseSrcReg)
- emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
- } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
+ emitRRR(ABI.GetPtrAdduOp(), DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
+
+ if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
warnIfNoMacro(IDLoc);
- // For all other values which are representable as a 32-bit integer:
- // li d,j => lui d,hi16(j)
- // ori d,d,lo16(j)
uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
uint16_t Bits15To0 = ImmValue & 0xffff;
if (!Is32BitImm && !isInt<32>(ImmValue)) {
- // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
+ // Traditional behaviour seems to special case this particular value. It's
+ // not clear why other masks are handled differently.
+ if (ImmValue == 0xffffffff) {
+ emitRI(Mips::LUi, TmpReg, 0xffff, IDLoc, Instructions);
+ emitRRI(Mips::DSRL32, TmpReg, TmpReg, 0, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
+
+ // Expand to an ORi instead of a LUi to avoid sign-extending into the
// upper 32 bits.
- emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions);
+ emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits31To16, IDLoc, Instructions);
emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions);
- } else
- emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
- createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
+ if (Bits15To0)
+ emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
+ emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
+ if (Bits15To0)
+ emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions);
if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
-
- } else if ((ImmValue & (0xffffLL << 48)) == 0) {
- warnIfNoMacro(IDLoc);
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
- // <------- lo32 ------>
- // <------- hi32 ------>
- // <- hi16 -> <- lo16 ->
- // _________________________________
- // | | | |
- // | 16-bits | 16-bits | 16-bits |
- // |__________|__________|__________|
- //
- // For any 64-bit value that is representable as a 48-bit integer:
- // li d,j => lui d,hi16(j)
- // ori d,d,hi16(lo32(j))
- // dsll d,d,16
- // ori d,d,lo16(lo32(j))
- uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
- uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
- uint16_t Bits15To0 = ImmValue & 0xffff;
+ if (isShiftedUIntAtAnyPosition<16>(ImmValue)) {
+ if (Is32BitImm) {
+ Error(IDLoc, "instruction requires a 32-bit immediate");
+ return true;
+ }
- emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions);
- createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
- createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
+ // Traditionally, these immediates are shifted as little as possible and as
+ // such we align the most significant bit to bit 15 of our temporary.
+ unsigned FirstSet = findFirstSet((uint64_t)ImmValue);
+ unsigned LastSet = findLastSet((uint64_t)ImmValue);
+ unsigned ShiftAmount = FirstSet - (15 - (LastSet - FirstSet));
+ uint16_t Bits = (ImmValue >> ShiftAmount) & 0xffff;
+ emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits, IDLoc, Instructions);
+ emitRRI(Mips::DSLL, TmpReg, TmpReg, ShiftAmount, IDLoc, Instructions);
if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
- } else {
- warnIfNoMacro(IDLoc);
+ return false;
+ }
- // <------- hi32 ------> <------- lo32 ------>
- // <- hi16 -> <- lo16 ->
- // ___________________________________________
- // | | | | |
- // | 16-bits | 16-bits | 16-bits | 16-bits |
- // |__________|__________|__________|__________|
- //
- // For all other values which are representable as a 64-bit integer:
- // li d,j => lui d,hi16(j)
- // ori d,d,lo16(hi32(j))
- // dsll d,d,16
- // ori d,d,hi16(lo32(j))
- // dsll d,d,16
- // ori d,d,lo16(lo32(j))
- uint16_t Bits63To48 = (ImmValue >> 48) & 0xffff;
- uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
- uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
- uint16_t Bits15To0 = ImmValue & 0xffff;
+ warnIfNoMacro(IDLoc);
- emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions);
- createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
+ // The remaining case is packed with a sequence of dsll and ori with zeros
+ // being omitted and any neighbouring dsll's being coalesced.
+ // The highest 32-bit's are equivalent to a 32-bit immediate load.
- // When Bits31To16 is 0, do a left shift of 32 bits instead of doing
- // two left shifts of 16 bits.
- if (Bits31To16 == 0) {
- createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions);
- } else {
- createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions);
- createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
+ // Load bits 32-63 of ImmValue into bits 0-31 of the temporary register.
+ if (loadImmediate(ImmValue >> 32, TmpReg, Mips::NoRegister, true, false,
+ IDLoc, Instructions))
+ return false;
+
+ // Shift and accumulate into the register. If a 16-bit chunk is zero, then
+ // skip it and defer the shift to the next chunk.
+ unsigned ShiftCarriedForwards = 16;
+ for (int BitNum = 16; BitNum >= 0; BitNum -= 16) {
+ uint16_t ImmChunk = (ImmValue >> BitNum) & 0xffff;
+
+ if (ImmChunk != 0) {
+ emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc,
+ Instructions);
+ emitRRI(Mips::ORi, TmpReg, TmpReg, ImmChunk, IDLoc, Instructions);
+ ShiftCarriedForwards = 0;
}
- if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
+ ShiftCarriedForwards += 16;
}
+ ShiftCarriedForwards -= 16;
+
+ // Finish any remaining shifts left by trailing zeros.
+ if (ShiftCarriedForwards)
+ emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc,
+ Instructions);
+
+ if (UseSrcReg)
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+
return false;
}
@@ -1982,63 +2323,38 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
assert(DstRegOp.isReg() && "expected register operand kind");
if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
- Is32BitImm, IDLoc, Instructions))
+ Is32BitImm, false, IDLoc, Instructions))
return true;
return false;
}
-bool
-MipsAsmParser::expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- const MCOperand &DstRegOp = Inst.getOperand(0);
- assert(DstRegOp.isReg() && "expected register operand kind");
-
- const MCOperand &SrcRegOp = Inst.getOperand(1);
- assert(SrcRegOp.isReg() && "expected register operand kind");
-
- const MCOperand &ImmOp = Inst.getOperand(2);
- assert((ImmOp.isImm() || ImmOp.isExpr()) &&
- "expected immediate operand kind");
- if (!ImmOp.isImm()) {
- if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
- SrcRegOp.getReg(), Is32BitImm, IDLoc,
- Instructions))
- return true;
-
- return false;
- }
-
- if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), SrcRegOp.getReg(),
- Is32BitImm, IDLoc, Instructions))
+bool MipsAsmParser::expandLoadAddress(unsigned DstReg, unsigned BaseReg,
+ const MCOperand &Offset,
+ bool Is32BitAddress, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ // la can't produce a usable address when addresses are 64-bit.
+ if (Is32BitAddress && ABI.ArePtrs64bit()) {
+ // FIXME: Demote this to a warning and continue as if we had 'dla' instead.
+ // We currently can't do this because we depend on the equality
+ // operator and N64 can end up with a GPR32/GPR64 mismatch.
+ Error(IDLoc, "la used to load 64-bit address");
+ // Continue as if we had 'dla' instead.
+ Is32BitAddress = false;
+ }
+
+ // dla requires 64-bit addresses.
+ if (!Is32BitAddress && !ABI.ArePtrs64bit()) {
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
-
- return false;
-}
-
-bool
-MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- const MCOperand &DstRegOp = Inst.getOperand(0);
- assert(DstRegOp.isReg() && "expected register operand kind");
-
- const MCOperand &ImmOp = Inst.getOperand(1);
- assert((ImmOp.isImm() || ImmOp.isExpr()) &&
- "expected immediate operand kind");
- if (!ImmOp.isImm()) {
- if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
- Mips::NoRegister, Is32BitImm, IDLoc,
- Instructions))
- return true;
-
- return false;
}
- if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
- Is32BitImm, IDLoc, Instructions))
- return true;
+ if (!Offset.isImm())
+ return loadAndAddSymbolAddress(Offset.getExpr(), DstReg, BaseReg,
+ Is32BitAddress, IDLoc, Instructions);
- return false;
+ return loadImmediate(Offset.getImm(), DstReg, BaseReg, Is32BitAddress, true,
+ IDLoc, Instructions);
}
bool MipsAsmParser::loadAndAddSymbolAddress(
@@ -2046,67 +2362,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
warnIfNoMacro(IDLoc);
- if (Is32BitSym && isABI_N64())
- Warning(IDLoc, "instruction loads the 32-bit address of a 64-bit symbol");
-
- MCInst tmpInst;
- const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymExpr);
- const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
- const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+ const MCExpr *Symbol = cast<MCExpr>(SymExpr);
+ const MipsMCExpr *HiExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_ABS_HI, Symbol, getContext());
+ const MipsMCExpr *LoExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_ABS_LO, Symbol, getContext());
bool UseSrcReg = SrcReg != Mips::NoRegister;
+ // This is the 64-bit symbol address expansion.
+ if (ABI.ArePtrs64bit() && isGP64bit()) {
+ // We always need AT for the 64-bit expansion.
+ // If it is not available we exit.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ const MipsMCExpr *HighestExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_HIGHEST, Symbol, getContext());
+ const MipsMCExpr *HigherExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_HIGHER, Symbol, getContext());
+
+ if (UseSrcReg && (DstReg == SrcReg)) {
+ // If $rs is the same as $rd:
+ // (d)la $rd, sym($rd) => lui $at, %highest(sym)
+ // daddiu $at, $at, %higher(sym)
+ // dsll $at, $at, 16
+ // daddiu $at, $at, %hi(sym)
+ // dsll $at, $at, 16
+ // daddiu $at, $at, %lo(sym)
+ // daddu $rd, $at, $rd
+ emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HigherExpr),
+ IDLoc, Instructions);
+ emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr), IDLoc,
+ Instructions);
+ emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc,
+ Instructions);
+ emitRRR(Mips::DADDu, DstReg, ATReg, SrcReg, IDLoc, Instructions);
+
+ return false;
+ }
+
+ // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
+ // lui $at, %hi(sym)
+ // daddiu $rd, $rd, %higher(sym)
+ // daddiu $at, $at, %lo(sym)
+ // dsll32 $rd, $rd, 0
+ // daddu $rd, $rd, $at
+ // (daddu $rd, $rd, $rs)
+ emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ Instructions);
+ emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc,
+ Instructions);
+ emitRRX(Mips::DADDiu, DstReg, DstReg, MCOperand::createExpr(HigherExpr),
+ IDLoc, Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc,
+ Instructions);
+ emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, Instructions);
+ emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, Instructions);
+
+ return false;
+ }
+
+ // And now, the 32-bit symbol address expansion:
+ // If $rs is the same as $rd:
+ // (d)la $rd, sym($rd) => lui $at, %hi(sym)
+ // ori $at, $at, %lo(sym)
+ // addu $rd, $at, $rd
+ // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %hi(sym)
+ // ori $rd, $rd, %lo(sym)
+ // (addu $rd, $rd, $rs)
unsigned TmpReg = DstReg;
if (UseSrcReg && (DstReg == SrcReg)) {
- // At this point we need AT to perform the expansions and we exit if it is
- // not available.
+ // If $rs is the same as $rd, we need to use AT.
+ // If it is not available we exit.
unsigned ATReg = getATReg(IDLoc);
if (!ATReg)
return true;
TmpReg = ATReg;
}
- if (!Is32BitSym) {
- // If it's a 64-bit architecture, expand to:
- // la d,sym => lui d,highest(sym)
- // ori d,d,higher(sym)
- // dsll d,d,16
- // ori d,d,hi16(sym)
- // dsll d,d,16
- // ori d,d,lo16(sym)
- const MCSymbolRefExpr *HighestExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
- const MCSymbolRefExpr *HigherExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
-
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
- Instructions.push_back(tmpInst);
-
- createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(),
- Instructions);
- createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(),
- Instructions);
- createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
- Instructions);
- } else {
- // Otherwise, expand to:
- // la d,sym => lui d,hi16(sym)
- // ori d,d,lo16(sym)
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createExpr(HiExpr));
- Instructions.push_back(tmpInst);
-
- emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(),
- Instructions);
- }
+ emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(HiExpr), IDLoc, Instructions);
+ emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), IDLoc,
+ Instructions);
if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions);
+ emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ else
+ assert(DstReg == TmpReg);
return false;
}
@@ -2125,12 +2476,13 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
Inst.addOperand(MCOperand::createExpr(Offset.getExpr()));
} else {
assert(Offset.isImm() && "expected immediate operand kind");
- if (isIntN(11, Offset.getImm())) {
+ if (isInt<11>(Offset.getImm())) {
// If offset fits into 11 bits then this instruction becomes microMIPS
// 16-bit unconditional branch instruction.
- Inst.setOpcode(Mips::B16_MM);
+ if (inMicroMipsMode())
+ Inst.setOpcode(hasMips32r6() ? Mips::BC16_MMR6 : Mips::B16_MM);
} else {
- if (!isIntN(17, Offset.getImm()))
+ if (!isInt<17>(Offset.getImm()))
Error(IDLoc, "branch target out of range");
if (OffsetToAlignment(Offset.getImm(), 1LL << 1))
Error(IDLoc, "branch to misaligned address");
@@ -2143,8 +2495,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
}
Instructions.push_back(Inst);
- // If .set reorder is active, emit a NOP after the branch instruction.
- if (AssemblerOptions.back()->isReorder())
+ // If .set reorder is active and branch instruction has a delay slot,
+ // emit a NOP after it.
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
createNop(true, IDLoc, Instructions);
return false;
@@ -2175,30 +2529,21 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
}
int64_t ImmValue = ImmOp.getImm();
- if (ImmValue == 0) {
- MCInst BranchInst;
- BranchInst.setOpcode(OpCode);
- BranchInst.addOperand(DstRegOp);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MemOffsetOp);
- Instructions.push_back(BranchInst);
- } else {
+ if (ImmValue == 0)
+ emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
+ Instructions);
+ else {
warnIfNoMacro(IDLoc);
unsigned ATReg = getATReg(IDLoc);
if (!ATReg)
return true;
- if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), IDLoc,
- Instructions))
+ if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), true,
+ IDLoc, Instructions))
return true;
- MCInst BranchInst;
- BranchInst.setOpcode(OpCode);
- BranchInst.addOperand(DstRegOp);
- BranchInst.addOperand(MCOperand::createReg(ATReg));
- BranchInst.addOperand(MemOffsetOp);
- Instructions.push_back(BranchInst);
+ emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, Instructions);
}
return false;
}
@@ -2206,7 +2551,6 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions,
bool isLoad, bool isImmOpnd) {
- MCInst TempInst;
unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
@@ -2227,8 +2571,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
HiOffset++;
} else
ExprOffset = Inst.getOperand(2).getExpr();
- // All instructions will have the same location.
- TempInst.setLoc(IDLoc);
// These are some of the types of expansions we perform here:
// 1) lw $8, sym => lui $8, %hi(sym)
// lw $8, %lo(sym)($8)
@@ -2267,40 +2609,20 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
return;
}
- TempInst.setOpcode(Mips::LUi);
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- if (isImmOpnd)
- TempInst.addOperand(MCOperand::createImm(HiOffset));
- else {
- const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
- TempInst.addOperand(MCOperand::createExpr(HiExpr));
- }
- // Add the instruction to the list.
- Instructions.push_back(TempInst);
- // Prepare TempInst for next instruction.
- TempInst.clear();
+ emitRX(Mips::LUi, TmpRegNum,
+ isImmOpnd ? MCOperand::createImm(HiOffset)
+ : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "hi")),
+ IDLoc, Instructions);
// Add temp register to base.
- if (BaseRegNum != Mips::ZERO) {
- TempInst.setOpcode(Mips::ADDu);
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- TempInst.addOperand(MCOperand::createReg(BaseRegNum));
- Instructions.push_back(TempInst);
- TempInst.clear();
- }
+ if (BaseRegNum != Mips::ZERO)
+ emitRRR(Mips::ADDu, TmpRegNum, TmpRegNum, BaseRegNum, IDLoc, Instructions);
// And finally, create original instruction with low part
// of offset and new base.
- TempInst.setOpcode(Inst.getOpcode());
- TempInst.addOperand(MCOperand::createReg(RegOpNum));
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- if (isImmOpnd)
- TempInst.addOperand(MCOperand::createImm(LoOffset));
- else {
- const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
- TempInst.addOperand(MCOperand::createExpr(LoExpr));
- }
- Instructions.push_back(TempInst);
- TempInst.clear();
+ emitRRX(Inst.getOpcode(), RegOpNum, TmpRegNum,
+ isImmOpnd
+ ? MCOperand::createImm(LoOffset)
+ : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "lo")),
+ IDLoc, Instructions);
}
bool
@@ -2316,10 +2638,16 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 &&
Inst.getOperand(OpNum - 1).getImm() >= 0 &&
- Inst.getOperand(OpNum - 2).getReg() == Mips::SP &&
- Inst.getOperand(OpNum - 3).getReg() == Mips::RA)
+ (Inst.getOperand(OpNum - 2).getReg() == Mips::SP ||
+ Inst.getOperand(OpNum - 2).getReg() == Mips::SP_64) &&
+ (Inst.getOperand(OpNum - 3).getReg() == Mips::RA ||
+ Inst.getOperand(OpNum - 3).getReg() == Mips::RA_64)) {
// It can be implemented as SWM16 or LWM16 instruction.
- NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+ if (inMicroMipsMode() && hasMips32r6())
+ NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MMR6 : Mips::LWM16_MMR6;
+ else
+ NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+ }
Inst.setOpcode(NewOpcode);
Instructions.push_back(Inst);
@@ -2328,44 +2656,126 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
+ bool EmittedNoMacroWarning = false;
unsigned PseudoOpcode = Inst.getOpcode();
unsigned SrcReg = Inst.getOperand(0).getReg();
- unsigned TrgReg = Inst.getOperand(1).getReg();
+ const MCOperand &TrgOp = Inst.getOperand(1);
const MCExpr *OffsetExpr = Inst.getOperand(2).getExpr();
unsigned ZeroSrcOpcode, ZeroTrgOpcode;
- bool ReverseOrderSLT, IsUnsigned, AcceptsEquality;
+ bool ReverseOrderSLT, IsUnsigned, IsLikely, AcceptsEquality;
+
+ unsigned TrgReg;
+ if (TrgOp.isReg())
+ TrgReg = TrgOp.getReg();
+ else if (TrgOp.isImm()) {
+ warnIfNoMacro(IDLoc);
+ EmittedNoMacroWarning = true;
+
+ TrgReg = getATReg(IDLoc);
+ if (!TrgReg)
+ return true;
+
+ switch(PseudoOpcode) {
+ default:
+ llvm_unreachable("unknown opcode for branch pseudo-instruction");
+ case Mips::BLTImmMacro:
+ PseudoOpcode = Mips::BLT;
+ break;
+ case Mips::BLEImmMacro:
+ PseudoOpcode = Mips::BLE;
+ break;
+ case Mips::BGEImmMacro:
+ PseudoOpcode = Mips::BGE;
+ break;
+ case Mips::BGTImmMacro:
+ PseudoOpcode = Mips::BGT;
+ break;
+ case Mips::BLTUImmMacro:
+ PseudoOpcode = Mips::BLTU;
+ break;
+ case Mips::BLEUImmMacro:
+ PseudoOpcode = Mips::BLEU;
+ break;
+ case Mips::BGEUImmMacro:
+ PseudoOpcode = Mips::BGEU;
+ break;
+ case Mips::BGTUImmMacro:
+ PseudoOpcode = Mips::BGTU;
+ break;
+ case Mips::BLTLImmMacro:
+ PseudoOpcode = Mips::BLTL;
+ break;
+ case Mips::BLELImmMacro:
+ PseudoOpcode = Mips::BLEL;
+ break;
+ case Mips::BGELImmMacro:
+ PseudoOpcode = Mips::BGEL;
+ break;
+ case Mips::BGTLImmMacro:
+ PseudoOpcode = Mips::BGTL;
+ break;
+ case Mips::BLTULImmMacro:
+ PseudoOpcode = Mips::BLTUL;
+ break;
+ case Mips::BLEULImmMacro:
+ PseudoOpcode = Mips::BLEUL;
+ break;
+ case Mips::BGEULImmMacro:
+ PseudoOpcode = Mips::BGEUL;
+ break;
+ case Mips::BGTULImmMacro:
+ PseudoOpcode = Mips::BGTUL;
+ break;
+ }
+
+ if (loadImmediate(TrgOp.getImm(), TrgReg, Mips::NoRegister, !isGP64bit(),
+ false, IDLoc, Instructions))
+ return true;
+ }
switch (PseudoOpcode) {
case Mips::BLT:
case Mips::BLTU:
+ case Mips::BLTL:
+ case Mips::BLTUL:
AcceptsEquality = false;
ReverseOrderSLT = false;
- IsUnsigned = (PseudoOpcode == Mips::BLTU);
+ IsUnsigned = ((PseudoOpcode == Mips::BLTU) || (PseudoOpcode == Mips::BLTUL));
+ IsLikely = ((PseudoOpcode == Mips::BLTL) || (PseudoOpcode == Mips::BLTUL));
ZeroSrcOpcode = Mips::BGTZ;
ZeroTrgOpcode = Mips::BLTZ;
break;
case Mips::BLE:
case Mips::BLEU:
+ case Mips::BLEL:
+ case Mips::BLEUL:
AcceptsEquality = true;
ReverseOrderSLT = true;
- IsUnsigned = (PseudoOpcode == Mips::BLEU);
+ IsUnsigned = ((PseudoOpcode == Mips::BLEU) || (PseudoOpcode == Mips::BLEUL));
+ IsLikely = ((PseudoOpcode == Mips::BLEL) || (PseudoOpcode == Mips::BLEUL));
ZeroSrcOpcode = Mips::BGEZ;
ZeroTrgOpcode = Mips::BLEZ;
break;
case Mips::BGE:
case Mips::BGEU:
+ case Mips::BGEL:
+ case Mips::BGEUL:
AcceptsEquality = true;
ReverseOrderSLT = false;
- IsUnsigned = (PseudoOpcode == Mips::BGEU);
+ IsUnsigned = ((PseudoOpcode == Mips::BGEU) || (PseudoOpcode == Mips::BGEUL));
+ IsLikely = ((PseudoOpcode == Mips::BGEL) || (PseudoOpcode == Mips::BGEUL));
ZeroSrcOpcode = Mips::BLEZ;
ZeroTrgOpcode = Mips::BGEZ;
break;
case Mips::BGT:
case Mips::BGTU:
+ case Mips::BGTL:
+ case Mips::BGTUL:
AcceptsEquality = false;
ReverseOrderSLT = true;
- IsUnsigned = (PseudoOpcode == Mips::BGTU);
+ IsUnsigned = ((PseudoOpcode == Mips::BGTU) || (PseudoOpcode == Mips::BGTUL));
+ IsLikely = ((PseudoOpcode == Mips::BGTL) || (PseudoOpcode == Mips::BGTUL));
ZeroSrcOpcode = Mips::BLTZ;
ZeroTrgOpcode = Mips::BGTZ;
break;
@@ -2373,7 +2783,6 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
llvm_unreachable("unknown opcode for branch pseudo-instruction");
}
- MCInst BranchInst;
bool IsTrgRegZero = (TrgReg == Mips::ZERO);
bool IsSrcRegZero = (SrcReg == Mips::ZERO);
if (IsSrcRegZero && IsTrgRegZero) {
@@ -2381,51 +2790,37 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
// with GAS' behaviour. However, they may not generate the most efficient
// code in some circumstances.
if (PseudoOpcode == Mips::BLT) {
- BranchInst.setOpcode(Mips::BLTZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BLTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
return false;
}
if (PseudoOpcode == Mips::BLE) {
- BranchInst.setOpcode(Mips::BLEZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BLEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
if (PseudoOpcode == Mips::BGE) {
- BranchInst.setOpcode(Mips::BGEZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BGEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
if (PseudoOpcode == Mips::BGT) {
- BranchInst.setOpcode(Mips::BGTZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BGTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
return false;
}
if (PseudoOpcode == Mips::BGTU) {
- BranchInst.setOpcode(Mips::BNE);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(Mips::BNE, Mips::ZERO, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
return false;
}
if (AcceptsEquality) {
// If both registers are $0 and the pseudo-branch accepts equality, it
// will always be taken, so we emit an unconditional branch.
- BranchInst.setOpcode(Mips::BEQ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
@@ -2449,11 +2844,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
// the pseudo-branch will always be taken, so we emit an unconditional
// branch.
// This only applies to unsigned pseudo-branches.
- BranchInst.setOpcode(Mips::BEQ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
@@ -2470,21 +2862,17 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
//
// Because only BLEU and BGEU branch on equality, we can use the
// AcceptsEquality variable to decide when to emit the BEQZ.
- BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
- BranchInst.addOperand(
- MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(AcceptsEquality ? Mips::BEQ : Mips::BNE,
+ IsSrcRegZero ? TrgReg : SrcReg, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
return false;
}
// If we have a signed pseudo-branch and one of the registers is $0,
// we can use an appropriate compare-to-zero branch. We select which one
// to use in the switch statement above.
- BranchInst.setOpcode(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode);
- BranchInst.addOperand(MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode,
+ IsSrcRegZero ? TrgReg : SrcReg, MCOperand::createExpr(OffsetExpr),
+ IDLoc, Instructions);
return false;
}
@@ -2494,7 +2882,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
if (!ATRegNum)
return true;
- warnIfNoMacro(IDLoc);
+ if (!EmittedNoMacroWarning)
+ warnIfNoMacro(IDLoc);
// SLT fits well with 2 of our 4 pseudo-branches:
// BLT, where $rs < $rt, translates into "slt $at, $rs, $rt" and
@@ -2511,23 +2900,135 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
//
// The same applies to the unsigned variants, except that SLTu is used
// instead of SLT.
- MCInst SetInst;
- SetInst.setOpcode(IsUnsigned ? Mips::SLTu : Mips::SLT);
- SetInst.addOperand(MCOperand::createReg(ATRegNum));
- SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? TrgReg : SrcReg));
- SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? SrcReg : TrgReg));
- Instructions.push_back(SetInst);
-
- BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
- BranchInst.addOperand(MCOperand::createReg(ATRegNum));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRR(IsUnsigned ? Mips::SLTu : Mips::SLT, ATRegNum,
+ ReverseOrderSLT ? TrgReg : SrcReg, ReverseOrderSLT ? SrcReg : TrgReg,
+ IDLoc, Instructions);
+
+ emitRRX(IsLikely ? (AcceptsEquality ? Mips::BEQL : Mips::BNEL)
+ : (AcceptsEquality ? Mips::BEQ : Mips::BNE),
+ ATRegNum, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
return false;
}
-bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ const bool IsMips64, const bool Signed) {
+ if (hasMips32r6()) {
+ Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+ return false;
+ }
+
+ warnIfNoMacro(IDLoc);
+
+ const MCOperand &RsRegOp = Inst.getOperand(0);
+ assert(RsRegOp.isReg() && "expected register operand kind");
+ unsigned RsReg = RsRegOp.getReg();
+
+ const MCOperand &RtRegOp = Inst.getOperand(1);
+ assert(RtRegOp.isReg() && "expected register operand kind");
+ unsigned RtReg = RtRegOp.getReg();
+ unsigned DivOp;
+ unsigned ZeroReg;
+
+ if (IsMips64) {
+ DivOp = Signed ? Mips::DSDIV : Mips::DUDIV;
+ ZeroReg = Mips::ZERO_64;
+ } else {
+ DivOp = Signed ? Mips::SDIV : Mips::UDIV;
+ ZeroReg = Mips::ZERO;
+ }
+
+ bool UseTraps = useTraps();
+
+ if (RsReg == Mips::ZERO || RsReg == Mips::ZERO_64) {
+ if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)
+ Warning(IDLoc, "dividing zero by zero");
+ if (IsMips64) {
+ if (Signed && (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)) {
+ if (UseTraps) {
+ emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+ return false;
+ }
+
+ emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+ return false;
+ }
+ } else {
+ emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions);
+ return false;
+ }
+ }
+
+ if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) {
+ Warning(IDLoc, "division by zero");
+ if (Signed) {
+ if (UseTraps) {
+ emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+ return false;
+ }
+
+ emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+ return false;
+ }
+ }
+
+ // FIXME: The values for these two BranchTarget variables may be different in
+ // micromips. These magic numbers need to be removed.
+ unsigned BranchTargetNoTraps;
+ unsigned BranchTarget;
+
+ if (UseTraps) {
+ BranchTarget = IsMips64 ? 12 : 8;
+ emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+ } else {
+ BranchTarget = IsMips64 ? 20 : 16;
+ BranchTargetNoTraps = 8;
+ // Branch to the li instruction.
+ emitRRI(Mips::BNE, RtReg, ZeroReg, BranchTargetNoTraps, IDLoc,
+ Instructions);
+ }
+
+ emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions);
+
+ if (!UseTraps)
+ emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+
+ if (!Signed) {
+ emitR(Mips::MFLO, RsReg, IDLoc, Instructions);
+ return false;
+ }
+
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, Instructions);
+ if (IsMips64) {
+ // Branch to the mflo instruction.
+ emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions);
+ emitRRI(Mips::ADDiu, ATReg, ZeroReg, 1, IDLoc, Instructions);
+ emitRRI(Mips::DSLL32, ATReg, ATReg, 0x1f, IDLoc, Instructions);
+ } else {
+ // Branch to the mflo instruction.
+ emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions);
+ emitRI(Mips::LUi, ATReg, (uint16_t)0x8000, IDLoc, Instructions);
+ }
+
+ if (UseTraps)
+ emitRRI(Mips::TEQ, RsReg, ATReg, 0x6, IDLoc, Instructions);
+ else {
+ // Branch to the mflo instruction.
+ emitRRI(Mips::BNE, RsReg, ATReg, BranchTargetNoTraps, IDLoc, Instructions);
+ emitRRI(Mips::SLL, ZeroReg, ZeroReg, 0, IDLoc, Instructions);
+ emitII(Mips::BREAK, 0x6, 0, IDLoc, Instructions);
+ }
+ emitR(Mips::MFLO, RsReg, IDLoc, Instructions);
+ return false;
+}
+
+bool MipsAsmParser::expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
if (hasMips32r6() || hasMips64r6()) {
Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
return false;
@@ -2562,7 +3063,7 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
LoadedOffsetInAT = true;
if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
- IDLoc, Instructions))
+ true, IDLoc, Instructions))
return true;
// NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
@@ -2590,33 +3091,15 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg;
- MCInst TmpInst;
- TmpInst.setOpcode(Mips::LBu);
- TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg));
- TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
- TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset));
- Instructions.push_back(TmpInst);
-
- TmpInst.clear();
- TmpInst.setOpcode(Mips::LBu);
- TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg));
- TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
- TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset));
- Instructions.push_back(TmpInst);
-
- TmpInst.clear();
- TmpInst.setOpcode(Mips::SLL);
- TmpInst.addOperand(MCOperand::createReg(SllReg));
- TmpInst.addOperand(MCOperand::createReg(SllReg));
- TmpInst.addOperand(MCOperand::createImm(8));
- Instructions.push_back(TmpInst);
-
- TmpInst.clear();
- TmpInst.setOpcode(Mips::OR);
- TmpInst.addOperand(MCOperand::createReg(DstReg));
- TmpInst.addOperand(MCOperand::createReg(DstReg));
- TmpInst.addOperand(MCOperand::createReg(ATReg));
- Instructions.push_back(TmpInst);
+ emitRRI(Signed ? Mips::LB : Mips::LBu, FirstLbuDstReg, LbuSrcReg,
+ FirstLbuOffset, IDLoc, Instructions);
+
+ emitRRI(Mips::LBu, SecondLbuDstReg, LbuSrcReg, SecondLbuOffset, IDLoc,
+ Instructions);
+
+ emitRRI(Mips::SLL, SllReg, SllReg, 8, IDLoc, Instructions);
+
+ emitRRR(Mips::OR, DstReg, DstReg, ATReg, IDLoc, Instructions);
return false;
}
@@ -2654,7 +3137,7 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
warnIfNoMacro(IDLoc);
if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
- IDLoc, Instructions))
+ true, IDLoc, Instructions))
return true;
// NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
@@ -2677,37 +3160,373 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
RightLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
}
- MCInst LeftLoadInst;
- LeftLoadInst.setOpcode(Mips::LWL);
- LeftLoadInst.addOperand(DstRegOp);
- LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
- LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset));
- Instructions.push_back(LeftLoadInst);
+ emitRRI(Mips::LWL, DstRegOp.getReg(), FinalSrcReg, LeftLoadOffset, IDLoc,
+ Instructions);
- MCInst RightLoadInst;
- RightLoadInst.setOpcode(Mips::LWR);
- RightLoadInst.addOperand(DstRegOp);
- RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
- RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset ));
- Instructions.push_back(RightLoadInst);
+ emitRRI(Mips::LWR, DstRegOp.getReg(), FinalSrcReg, RightLoadOffset, IDLoc,
+ Instructions);
return false;
}
+bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ assert (Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert (Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned FinalDstReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+
+ bool Is32Bit = isInt<32>(ImmValue) || isUInt<32>(ImmValue);
+
+ unsigned FinalOpcode = Inst.getOpcode();
+
+ if (DstReg == SrcReg) {
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+ FinalDstReg = DstReg;
+ DstReg = ATReg;
+ }
+
+ if (!loadImmediate(ImmValue, DstReg, Mips::NoRegister, Is32Bit, false, Inst.getLoc(), Instructions)) {
+ switch (FinalOpcode) {
+ default:
+ llvm_unreachable("unimplemented expansion");
+ case (Mips::ADDi):
+ FinalOpcode = Mips::ADD;
+ break;
+ case (Mips::ADDiu):
+ FinalOpcode = Mips::ADDu;
+ break;
+ case (Mips::ANDi):
+ FinalOpcode = Mips::AND;
+ break;
+ case (Mips::NORImm):
+ FinalOpcode = Mips::NOR;
+ break;
+ case (Mips::ORi):
+ FinalOpcode = Mips::OR;
+ break;
+ case (Mips::SLTi):
+ FinalOpcode = Mips::SLT;
+ break;
+ case (Mips::SLTiu):
+ FinalOpcode = Mips::SLTu;
+ break;
+ case (Mips::XORi):
+ FinalOpcode = Mips::XOR;
+ break;
+ }
+
+ if (FinalDstReg == Mips::NoRegister)
+ emitRRR(FinalOpcode, DstReg, DstReg, SrcReg, IDLoc, Instructions);
+ else
+ emitRRR(FinalOpcode, FinalDstReg, FinalDstReg, DstReg, IDLoc,
+ Instructions);
+ return false;
+ }
+ return true;
+}
+
+bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ unsigned TReg = Inst.getOperand(2).getReg();
+ unsigned TmpReg = DReg;
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ if (hasMips32r2()) {
+
+ if (DReg == SReg) {
+ TmpReg = getATReg(Inst.getLoc());
+ if (!TmpReg)
+ return true;
+ }
+
+ if (Inst.getOpcode() == Mips::ROL) {
+ emitRRR(Mips::SUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::ROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ if (Inst.getOpcode() == Mips::ROR) {
+ emitRRR(Mips::ROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ return true;
+ }
+
+ if (hasMips32()) {
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::ROL:
+ FirstShift = Mips::SRLV;
+ SecondShift = Mips::SLLV;
+ break;
+ case Mips::ROR:
+ FirstShift = Mips::SLLV;
+ SecondShift = Mips::SRLV;
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRR(Mips::SUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions);
+ emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
+bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ if (hasMips32r2()) {
+
+ if (Inst.getOpcode() == Mips::ROLImm) {
+ uint64_t MaxShift = 32;
+ uint64_t ShiftValue = ImmValue;
+ if (ImmValue != 0)
+ ShiftValue = MaxShift - ImmValue;
+ emitRRI(Mips::ROTR, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ if (Inst.getOpcode() == Mips::RORImm) {
+ emitRRI(Mips::ROTR, DReg, SReg, ImmValue, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ return true;
+ }
+
+ if (hasMips32()) {
+
+ if (ImmValue == 0) {
+ emitRRI(Mips::SRL, DReg, SReg, 0, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::ROLImm:
+ FirstShift = Mips::SLL;
+ SecondShift = Mips::SRL;
+ break;
+ case Mips::RORImm:
+ FirstShift = Mips::SRL;
+ SecondShift = Mips::SLL;
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRI(FirstShift, ATReg, SReg, ImmValue, Inst.getLoc(), Instructions);
+ emitRRI(SecondShift, DReg, SReg, 32 - ImmValue, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
+bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ unsigned TReg = Inst.getOperand(2).getReg();
+ unsigned TmpReg = DReg;
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ if (hasMips64r2()) {
+
+ if (TmpReg == SReg) {
+ TmpReg = getATReg(Inst.getLoc());
+ if (!TmpReg)
+ return true;
+ }
+
+ if (Inst.getOpcode() == Mips::DROL) {
+ emitRRR(Mips::DSUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::DROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ if (Inst.getOpcode() == Mips::DROR) {
+ emitRRR(Mips::DROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ return true;
+ }
+
+ if (hasMips64()) {
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::DROL:
+ FirstShift = Mips::DSRLV;
+ SecondShift = Mips::DSLLV;
+ break;
+ case Mips::DROR:
+ FirstShift = Mips::DSLLV;
+ SecondShift = Mips::DSRLV;
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRR(Mips::DSUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions);
+ emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
+bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm() % 64;
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ MCInst TmpInst;
+
+ if (hasMips64r2()) {
+
+ unsigned FinalOpcode = Mips::NOP;
+ if (ImmValue == 0)
+ FinalOpcode = Mips::DROTR;
+ else if (ImmValue % 32 == 0)
+ FinalOpcode = Mips::DROTR32;
+ else if ((ImmValue >= 1) && (ImmValue <= 32)) {
+ if (Inst.getOpcode() == Mips::DROLImm)
+ FinalOpcode = Mips::DROTR32;
+ else
+ FinalOpcode = Mips::DROTR;
+ } else if (ImmValue >= 33) {
+ if (Inst.getOpcode() == Mips::DROLImm)
+ FinalOpcode = Mips::DROTR;
+ else
+ FinalOpcode = Mips::DROTR32;
+ }
+
+ uint64_t ShiftValue = ImmValue % 32;
+ if (Inst.getOpcode() == Mips::DROLImm)
+ ShiftValue = (32 - ImmValue % 32) % 32;
+
+ emitRRI(FinalOpcode, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ if (hasMips64()) {
+
+ if (ImmValue == 0) {
+ emitRRI(Mips::DSRL, DReg, SReg, 0, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::DROLImm:
+ if ((ImmValue >= 1) && (ImmValue <= 31)) {
+ FirstShift = Mips::DSLL;
+ SecondShift = Mips::DSRL32;
+ }
+ if (ImmValue == 32) {
+ FirstShift = Mips::DSLL32;
+ SecondShift = Mips::DSRL32;
+ }
+ if ((ImmValue >= 33) && (ImmValue <= 63)) {
+ FirstShift = Mips::DSLL32;
+ SecondShift = Mips::DSRL;
+ }
+ break;
+ case Mips::DRORImm:
+ if ((ImmValue >= 1) && (ImmValue <= 31)) {
+ FirstShift = Mips::DSRL;
+ SecondShift = Mips::DSLL32;
+ }
+ if (ImmValue == 32) {
+ FirstShift = Mips::DSRL32;
+ SecondShift = Mips::DSLL32;
+ }
+ if ((ImmValue >= 33) && (ImmValue <= 63)) {
+ FirstShift = Mips::DSRL32;
+ SecondShift = Mips::DSLL;
+ }
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRI(FirstShift, ATReg, SReg, ImmValue % 32, Inst.getLoc(), Instructions);
+ emitRRI(SecondShift, DReg, SReg, (32 - ImmValue % 32) % 32, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
- MCInst NopInst;
- if (hasShortDelaySlot) {
- NopInst.setOpcode(Mips::MOVE16_MM);
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- } else {
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createImm(0));
- }
- Instructions.push_back(NopInst);
+ if (hasShortDelaySlot)
+ emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, Instructions);
+ else
+ emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, Instructions);
}
void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
@@ -2717,6 +3536,24 @@ void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
Instructions);
}
+void MipsAsmParser::createCpRestoreMemOp(
+ bool IsLoad, int StackOffset, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ // If the offset can not fit into 16 bits, we need to expand.
+ if (!isInt<16>(StackOffset)) {
+ MCInst MemInst;
+ MemInst.setOpcode(IsLoad ? Mips::LW : Mips::SW);
+ MemInst.addOperand(MCOperand::createReg(Mips::GP));
+ MemInst.addOperand(MCOperand::createReg(Mips::SP));
+ MemInst.addOperand(MCOperand::createImm(StackOffset));
+ expandMemInst(MemInst, IDLoc, Instructions, IsLoad, true /*HasImmOpnd*/);
+ return;
+ }
+
+ emitRRI(IsLoad ? Mips::LW : Mips::SW, Mips::GP, Mips::SP, StackOffset, IDLoc,
+ Instructions);
+}
+
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// As described by the Mips32r2 spec, the registers Rd and Rs for
// jalr.hb must be different.
@@ -2729,6 +3566,17 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
+static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
+ uint64_t ErrorInfo) {
+ if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) {
+ SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ return Loc;
+ return ErrorLoc;
+ }
+ return Loc;
+}
+
bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -2745,7 +3593,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (processInstruction(Inst, IDLoc, Instructions))
return true;
for (unsigned i = 0; i < Instructions.size(); i++)
- Out.EmitInstruction(Instructions[i], STI);
+ Out.EmitInstruction(Instructions[i], getSTI());
return false;
}
case Match_MissingFeature:
@@ -2757,7 +3605,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc();
+ ErrorLoc = Operands[ErrorInfo]->getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -2768,6 +3616,58 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "invalid instruction");
case Match_RequiresDifferentSrcAndDst:
return Error(IDLoc, "source and destination must be different");
+ case Match_Immz:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
+ case Match_UImm1_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 1-bit unsigned immediate");
+ case Match_UImm2_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 2-bit unsigned immediate");
+ case Match_UImm2_1:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 1 .. 4");
+ case Match_UImm3_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 3-bit unsigned immediate");
+ case Match_UImm4_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 4-bit unsigned immediate");
+ case Match_UImm5_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 5-bit unsigned immediate");
+ case Match_UImm5_1:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 1 .. 32");
+ case Match_UImm5_32:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 32 .. 63");
+ case Match_UImm5_33:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 33 .. 64");
+ case Match_UImm5_0_Report_UImm6:
+ // This is used on UImm5 operands that have a corresponding UImm5_32
+ // operand to avoid confusing the user.
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 6-bit unsigned immediate");
+ case Match_UImm5_Lsl2:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected both 7-bit unsigned immediate and multiple of 4");
+ case Match_UImm6_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 6-bit unsigned immediate");
+ case Match_SImm6:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 6-bit signed immediate");
+ case Match_UImm7_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 7-bit unsigned immediate");
+ case Match_UImm8_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 8-bit unsigned immediate");
+ case Match_UImm10_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 10-bit unsigned immediate");
}
llvm_unreachable("Implement any new match types added!");
@@ -3264,7 +4164,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
MipsOperand &Mnemonic = static_cast<MipsOperand &>(*Operands[0]);
- if (Mnemonic.getToken() == "la") {
+ if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") {
SMLoc E =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
@@ -3598,12 +4498,15 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
if (RegRange) {
// Remove last register operand because registers from register range
// should be inserted first.
- if (RegNo == Mips::RA) {
+ if ((isGP64bit() && RegNo == Mips::RA_64) ||
+ (!isGP64bit() && RegNo == Mips::RA)) {
Regs.push_back(RegNo);
} else {
unsigned TmpReg = PrevReg + 1;
while (TmpReg <= RegNo) {
- if ((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) {
+ if ((((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) && !isGP64bit()) ||
+ (((TmpReg < Mips::S0_64) || (TmpReg > Mips::S7_64)) &&
+ isGP64bit())) {
Error(E, "invalid register operand");
return MatchOperand_ParseFail;
}
@@ -3615,16 +4518,23 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
RegRange = false;
} else {
- if ((PrevReg == Mips::NoRegister) && (RegNo != Mips::S0) &&
- (RegNo != Mips::RA)) {
+ if ((PrevReg == Mips::NoRegister) &&
+ ((isGP64bit() && (RegNo != Mips::S0_64) && (RegNo != Mips::RA_64)) ||
+ (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA)))) {
Error(E, "$16 or $31 expected");
return MatchOperand_ParseFail;
- } else if (((RegNo < Mips::S0) || (RegNo > Mips::S7)) &&
- (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ } else if (!(((RegNo == Mips::FP || RegNo == Mips::RA ||
+ (RegNo >= Mips::S0 && RegNo <= Mips::S7)) &&
+ !isGP64bit()) ||
+ ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 ||
+ (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) &&
+ isGP64bit()))) {
Error(E, "invalid register operand");
return MatchOperand_ParseFail;
} else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
- (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) ||
+ (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 &&
+ isGP64bit()))) {
Error(E, "consecutive register numbers expected");
return MatchOperand_ParseFail;
}
@@ -4152,6 +5062,7 @@ bool MipsAsmParser::parseSetPopDirective() {
if (AssemblerOptions.size() == 2)
return reportParseError(Loc, ".set pop with no .set push");
+ MCSubtargetInfo &STI = copySTI();
AssemblerOptions.pop_back();
setAvailableFeatures(
ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures()));
@@ -4225,6 +5136,7 @@ bool MipsAsmParser::parseSetMips0Directive() {
return reportParseError("unexpected token, expected end of statement");
// Reset assembler options to their initial values.
+ MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures()));
STI.setFeatureBits(AssemblerOptions.front()->getFeatures());
@@ -4366,6 +5278,14 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
return true;
}
+// Used to determine if .cpload, .cprestore, and .cpsetup have any effect.
+// In this class, it is only used for .cprestore.
+// FIXME: Only keep track of IsPicEnabled in one place, instead of in both
+// MipsTargetELFStreamer and MipsAsmParser.
+bool MipsAsmParser::isPicAndNotNxxAbi() {
+ return inPicMode() && !(isABI_N32() || isABI_N64());
+}
+
bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
if (AssemblerOptions.back()->isReorder())
Warning(Loc, ".cpload should be inside a noreorder section");
@@ -4398,6 +5318,54 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
return false;
}
+bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) {
+ MCAsmParser &Parser = getParser();
+
+ // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it
+ // is used in non-PIC mode.
+
+ if (inMips16Mode()) {
+ reportParseError(".cprestore is not supported in Mips16 mode");
+ return false;
+ }
+
+ // Get the stack offset value.
+ const MCExpr *StackOffset;
+ int64_t StackOffsetVal;
+ if (Parser.parseExpression(StackOffset)) {
+ reportParseError("expected stack offset value");
+ return false;
+ }
+
+ if (!StackOffset->evaluateAsAbsolute(StackOffsetVal)) {
+ reportParseError("stack offset is not an absolute expression");
+ return false;
+ }
+
+ if (StackOffsetVal < 0) {
+ Warning(Loc, ".cprestore with negative stack offset has no effect");
+ IsCpRestoreSet = false;
+ } else {
+ IsCpRestoreSet = true;
+ CpRestoreOffset = StackOffsetVal;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ // Store the $gp on the stack.
+ SmallVector<MCInst, 3> StoreInsts;
+ createCpRestoreMemOp(false /*IsLoad*/, CpRestoreOffset /*StackOffset*/, Loc,
+ StoreInsts);
+
+ getTargetStreamer().emitDirectiveCpRestore(StoreInsts, CpRestoreOffset);
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveCPSetup() {
MCAsmParser &Parser = getParser();
unsigned FuncReg;
@@ -4427,16 +5395,19 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
ResTy = parseAnyRegister(TmpReg);
if (ResTy == MatchOperand_NoMatch) {
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Integer)) {
- Save = Tok.getIntVal();
- SaveIsReg = false;
- Parser.Lex();
- } else {
- reportParseError("expected save register or stack offset");
+ const MCExpr *OffsetExpr;
+ int64_t OffsetVal;
+ SMLoc ExprLoc = getLexer().getLoc();
+
+ if (Parser.parseExpression(OffsetExpr) ||
+ !OffsetExpr->evaluateAsAbsolute(OffsetVal)) {
+ reportParseError(ExprLoc, "expected save register or stack offset");
Parser.eatToEndOfStatement();
return false;
}
+
+ Save = OffsetVal;
+ SaveIsReg = false;
} else {
MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
if (!SaveOpnd.isGPRAsmReg()) {
@@ -4462,11 +5433,20 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
}
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
+ CpSaveLocation = Save;
+ CpSaveLocationIsRegister = SaveIsReg;
+
getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, Ref->getSymbol(),
SaveIsReg);
return false;
}
+bool MipsAsmParser::parseDirectiveCPReturn() {
+ getTargetStreamer().emitDirectiveCpreturn(CpSaveLocation,
+ CpSaveLocationIsRegister);
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveNaN() {
MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4655,6 +5635,9 @@ bool MipsAsmParser::parseDirectiveOption() {
StringRef Option = Tok.getIdentifier();
if (Option == "pic0") {
+ // MipsAsmParser needs to know if the current PIC mode changes.
+ IsPicEnabled = false;
+
getTargetStreamer().emitDirectiveOptionPic0();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
@@ -4666,6 +5649,9 @@ bool MipsAsmParser::parseDirectiveOption() {
}
if (Option == "pic2") {
+ // MipsAsmParser needs to know if the current PIC mode changes.
+ IsPicEnabled = true;
+
getTargetStreamer().emitDirectiveOptionPic2();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
@@ -4924,6 +5910,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".cpload")
return parseDirectiveCpLoad(DirectiveID.getLoc());
+ if (IDVal == ".cprestore")
+ return parseDirectiveCpRestore(DirectiveID.getLoc());
if (IDVal == ".dword") {
parseDataDirective(8, DirectiveID.getLoc());
return false;
@@ -4974,6 +5962,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
getTargetStreamer().emitDirectiveEnt(*Sym);
CurrentFn = Sym;
+ IsCpRestoreSet = false;
return false;
}
@@ -5002,6 +5991,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
getTargetStreamer().emitDirectiveEnd(SymbolName);
CurrentFn = nullptr;
+ IsCpRestoreSet = false;
return false;
}
@@ -5073,6 +6063,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
getTargetStreamer().emitFrame(StackReg, FrameSizeVal,
ReturnRegOpnd.getGPR32Reg());
+ IsCpRestoreSet = false;
return false;
}
@@ -5173,6 +6164,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".cpsetup")
return parseDirectiveCPSetup();
+ if (IDVal == ".cpreturn")
+ return parseDirectiveCPReturn();
+
if (IDVal == ".module")
return parseDirectiveModule();
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index a34ba3b..3c1a771 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -229,6 +229,13 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// DecodeBranchTarget26MM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
// DecodeJumpTargetMM - Decode microMIPS jump target, which is
// shifted left by 1 bit.
static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
@@ -241,17 +248,42 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMemEVA(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadByte9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadByte15(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCacheOp(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
+static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
@@ -261,6 +293,11 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSynciR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -284,6 +321,11 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -330,6 +372,11 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSimm4(MCInst &Inst,
unsigned Value,
uint64_t Address,
@@ -340,23 +387,15 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-// Decode the immediate field of an LSA instruction which
-// is off by one.
-static DecodeStatus DecodeLSAImm(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+template <unsigned Bits, int Offset>
+static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeInsSize(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeExtSize(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
-
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -830,9 +869,24 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
if (IsMicroMips) {
Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ if (hasMips32r6()) {
+ DEBUG(dbgs() << "Trying MicroMipsR616 table (16-bit instructions):\n");
+ // Calling the auto-generated decoder function for microMIPS32R6
+ // (and microMIPS64R6) 16-bit instructions.
+ Result = decodeInstruction(DecoderTableMicroMipsR616, Instr, Insn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 2;
+ return Result;
+ }
+ }
DEBUG(dbgs() << "Trying MicroMips16 table (16-bit instructions):\n");
- // Calling the auto-generated decoder function.
+ // Calling the auto-generated decoder function for microMIPS 16-bit
+ // instructions.
Result = decodeInstruction(DecoderTableMicroMips16, Instr, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
@@ -847,24 +901,33 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
if (hasMips32r6()) {
DEBUG(dbgs() << "Trying MicroMips32r632 table (32-bit instructions):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMips32r632, Instr, Insn, Address,
- this, STI);
- } else {
- DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
- // Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+ Result = decodeInstruction(DecoderTableMicroMipsR632, Instr, Insn, Address,
this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
}
+
+ DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
+ // This is an invalid instruction. Let the disassembler move forward by the
+ // minimum instruction size.
+ Size = 2;
return MCDisassembler::Fail;
}
Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
- if (Result == MCDisassembler::Fail)
+ if (Result == MCDisassembler::Fail) {
+ Size = 4;
return MCDisassembler::Fail;
+ }
if (hasCOP3()) {
DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
@@ -925,6 +988,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return Result;
}
+ Size = 4;
return MCDisassembler::Fail;
}
@@ -1079,10 +1143,66 @@ static DecodeStatus DecodeMem(MCInst &Inst,
Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
- if(Inst.getOpcode() == Mips::SC ||
- Inst.getOpcode() == Mips::SCD){
+ if (Inst.getOpcode() == Mips::SC ||
+ Inst.getOpcode() == Mips::SCD)
Inst.addOperand(MCOperand::createReg(Reg));
- }
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemEVA(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn >> 7);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ if (Inst.getOpcode() == Mips::SCE)
+ Inst.addOperand(MCOperand::createReg(Reg));
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadByte9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadByte15(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createReg(Base));
@@ -1125,11 +1245,28 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
- int Offset = fieldFromInstruction(Insn, 7, 9);
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Hint = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+ Inst.addOperand(MCOperand::createImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn >> 7);
unsigned Hint = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1142,6 +1279,24 @@ static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSyncI(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1157,6 +1312,21 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSynciR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Immediate = SignExtend32<16>(Insn & 0xffff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Immediate));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@@ -1220,8 +1390,11 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
return MCDisassembler::Fail;
break;
case Mips::SB16_MM:
+ case Mips::SB16_MMR6:
case Mips::SH16_MM:
+ case Mips::SH16_MMR6:
case Mips::SW16_MM:
+ case Mips::SW16_MMR6:
if (DecodeGPRMM16ZeroRegisterClass(Inst, Reg, Address, Decoder)
== MCDisassembler::Fail)
return MCDisassembler::Fail;
@@ -1240,14 +1413,17 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
Inst.addOperand(MCOperand::createImm(Offset));
break;
case Mips::SB16_MM:
+ case Mips::SB16_MMR6:
Inst.addOperand(MCOperand::createImm(Offset));
break;
case Mips::LHU16_MM:
case Mips::SH16_MM:
+ case Mips::SH16_MMR6:
Inst.addOperand(MCOperand::createImm(Offset << 1));
break;
case Mips::LW16_MM:
case Mips::SW16_MM:
+ case Mips::SW16_MMR6:
Inst.addOperand(MCOperand::createImm(Offset << 2));
break;
}
@@ -1291,7 +1467,16 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
- int Offset = SignExtend32<4>(Insn & 0xf);
+ int Offset;
+ switch (Inst.getOpcode()) {
+ case Mips::LWM16_MMR6:
+ case Mips::SWM16_MMR6:
+ Offset = fieldFromInstruction(Insn, 4, 4);
+ break;
+ default:
+ Offset = SignExtend32<4>(Insn & 0xf);
+ break;
+ }
if (DecodeRegListOperand16(Inst, Insn, Address, Decoder)
== MCDisassembler::Fail)
@@ -1303,6 +1488,27 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ if (Inst.getOpcode() == Mips::SCE_MM)
+ Inst.addOperand(MCOperand::createReg(Reg));
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1659,6 +1865,16 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ int32_t BranchOffset = SignExtend32<26>(Offset) << 1;
+
+ Inst.addOperand(MCOperand::createImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1700,6 +1916,14 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm(Value == 0x0 ? 8 : Value));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSimm4(MCInst &Inst,
unsigned Value,
uint64_t Address,
@@ -1716,12 +1940,12 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLSAImm(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- // We add one to the immediate field as it was encoded as 'imm - 1'.
- Inst.addOperand(MCOperand::createImm(Insn + 1));
+template <unsigned Bits, int Offset>
+static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ Value &= ((1 << Bits) - 1);
+ Inst.addOperand(MCOperand::createImm(Value + Offset));
return MCDisassembler::Success;
}
@@ -1736,15 +1960,6 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeExtSize(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- int Size = (int) Insn + 1;
- Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4));
@@ -1792,15 +2007,21 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
- Mips::S6, Mips::FP};
+ Mips::S6, Mips::S7, Mips::FP};
unsigned RegNum;
unsigned RegLst = fieldFromInstruction(Insn, 21, 5);
+
// Empty register lists are not allowed.
if (RegLst == 0)
return MCDisassembler::Fail;
RegNum = RegLst & 0xf;
+
+ // RegLst values 10-15, and 26-31 are reserved.
+ if (RegNum > 9)
+ return MCDisassembler::Fail;
+
for (unsigned i = 0; i < RegNum; i++)
Inst.addOperand(MCOperand::createReg(Regs[i]));
@@ -1814,7 +2035,16 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder) {
unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3};
- unsigned RegLst = fieldFromInstruction(Insn, 4, 2);
+ unsigned RegLst;
+ switch(Inst.getOpcode()) {
+ default:
+ RegLst = fieldFromInstruction(Insn, 4, 2);
+ break;
+ case Mips::LWM16_MMR6:
+ case Mips::SWM16_MMR6:
+ RegLst = fieldFromInstruction(Insn, 8, 2);
+ break;
+ }
unsigned RegNum = RegLst & 0x3;
for (unsigned i = 0; i <= RegNum; i++)
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index a5637b1..a7b7d2e 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -235,7 +235,9 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
case Mips::SWM32_MM:
case Mips::LWM32_MM:
case Mips::SWM16_MM:
+ case Mips::SWM16_MMR6:
case Mips::LWM16_MM:
+ case Mips::LWM16_MMR6:
opNum = MI->getNumOperands() - 2;
break;
}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 713f35c..0e61ea6 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -73,8 +73,6 @@ enum CondCode {
const char *MipsFCCToString(Mips::CondCode CC);
} // end namespace Mips
-class TargetMachine;
-
class MipsInstPrinter : public MCInstPrinter {
public:
MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 8e6c9e6..cdcc392 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -23,7 +23,7 @@ static const MCPhysReg Mips64IntRegs[8] = {
Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
}
-const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
+ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
if (IsO32())
return makeArrayRef(O32IntRegs);
if (IsN32() || IsN64())
@@ -31,7 +31,7 @@ const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
llvm_unreachable("Unhandled ABI");
}
-const ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
+ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
if (IsO32())
return makeArrayRef(O32IntRegs);
if (IsN32() || IsN64())
@@ -78,7 +78,6 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
.Case("mips32r3", MipsABIInfo::O32())
.Case("mips32r5", MipsABIInfo::O32())
.Case("mips32r6", MipsABIInfo::O32())
- .Case("mips16", MipsABIInfo::O32())
.Case("mips3", MipsABIInfo::N64())
.Case("mips4", MipsABIInfo::N64())
.Case("mips5", MipsABIInfo::N64())
@@ -107,6 +106,10 @@ unsigned MipsABIInfo::GetNullPtr() const {
return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO;
}
+unsigned MipsABIInfo::GetZeroReg() const {
+ return AreGprs64bit() ? Mips::ZERO_64 : Mips::ZERO;
+}
+
unsigned MipsABIInfo::GetPtrAdduOp() const {
return ArePtrs64bit() ? Mips::DADDu : Mips::ADDu;
}
@@ -115,6 +118,10 @@ unsigned MipsABIInfo::GetPtrAddiuOp() const {
return ArePtrs64bit() ? Mips::DADDiu : Mips::ADDiu;
}
+unsigned MipsABIInfo::GetGPRMoveOp() const {
+ return ArePtrs64bit() ? Mips::OR64 : Mips::OR;
+}
+
unsigned MipsABIInfo::GetEhDataReg(unsigned I) const {
static const unsigned EhDataReg[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 40c5681..ffa2c76 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -47,10 +47,10 @@ public:
ABI GetEnumValue() const { return ThisABI; }
/// The registers to use for byval arguments.
- const ArrayRef<MCPhysReg> GetByValArgRegs() const;
+ ArrayRef<MCPhysReg> GetByValArgRegs() const;
/// The registers to use for the variable argument list.
- const ArrayRef<MCPhysReg> GetVarArgRegs() const;
+ ArrayRef<MCPhysReg> GetVarArgRegs() const;
/// Obtain the size of the area allocated by the callee for arguments.
/// CallingConv::FastCall affects the value for O32.
@@ -67,9 +67,12 @@ public:
unsigned GetFramePtr() const;
unsigned GetBasePtr() const;
unsigned GetNullPtr() const;
+ unsigned GetZeroReg() const;
unsigned GetPtrAdduOp() const;
unsigned GetPtrAddiuOp() const;
+ unsigned GetGPRMoveOp() const;
inline bool ArePtrs64bit() const { return IsN64(); }
+ inline bool AreGprs64bit() const { return IsN32() || IsN64(); }
unsigned GetEhDataReg(unsigned I) const;
};
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 328e717..e4865e2 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -63,15 +63,19 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// address range. Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isIntN(16, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC19_S2:
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 19-bit signed immediate.
- if (!isIntN(19, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC19 fixup");
+ if (!isInt<19>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC19 fixup");
+ return 0;
+ }
break;
case Mips::fixup_Mips_26:
// So far we are only using this type for jumps.
@@ -104,45 +108,57 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 7-bit signed immediate.
- if (!isIntN(7, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC7 fixup");
+ if (!isInt<7>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC7 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MICROMIPS_PC10_S1:
Value -= 2;
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 10-bit signed immediate.
- if (!isIntN(10, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC10 fixup");
+ if (!isInt<10>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC10 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MICROMIPS_PC16_S1:
Value -= 4;
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isIntN(16, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC18_S3:
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 8;
// We now check if Value can be encoded as a 18-bit signed immediate.
- if (!isIntN(18, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup");
+ if (!isInt<18>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC21_S2:
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 21-bit signed immediate.
- if (!isIntN(21, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup");
+ if (!isInt<21>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC26_S2:
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 26-bit signed immediate.
- if (!isIntN(26, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
+ if (!isInt<26>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC26 fixup");
+ return 0;
+ }
break;
}
@@ -232,6 +248,18 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
+bool MipsAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const {
+ if (Name == "R_MIPS_NONE") {
+ MappedKind = (MCFixupKind)Mips::fixup_Mips_NONE;
+ return true;
+ }
+ if (Name == "R_MIPS_32") {
+ MappedKind = FK_Data_4;
+ return true;
+ }
+ return MCAsmBackend::getFixupKind(Name, MappedKind);
+}
+
const MCFixupKindInfo &MipsAsmBackend::
getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = {
@@ -239,6 +267,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
// MipsFixupKinds.h.
//
// name offset bits flags
+ { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 0, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
@@ -304,6 +333,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
// MipsFixupKinds.h.
//
// name offset bits flags
+ { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 16, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index b3d5a49..1c9af92 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -41,6 +41,7 @@ public:
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
+ bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
unsigned getNumFixupKinds() const override {
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 9b29527..5b9f02b 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -68,6 +68,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
unsigned Kind = (unsigned)Fixup.getKind();
switch (Kind) {
+ case Mips::fixup_Mips_NONE:
+ return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_16:
case FK_Data_2:
return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
@@ -325,13 +327,24 @@ static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
// matching LO;
// - prefer LOs without a pair;
// - prefer LOs with higher offset;
+
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+ const ELFRelocationEntry &A = *AP;
+ const ELFRelocationEntry &B = *BP;
+ if (A.Offset != B.Offset)
+ return B.Offset - A.Offset;
+ if (B.Type != A.Type)
+ return A.Type - B.Type;
+ return 0;
+}
+
void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
if (Relocs.size() < 2)
return;
- // The default function sorts entries by Offset in descending order.
- MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+ // Sorts entries by Offset in descending order.
+ array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
// Init MipsRelocs from Relocs.
std::vector<MipsRelocationEntry> MipsRelocs;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index b45d9cf..e7d687e 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -63,7 +63,7 @@ void MipsELFStreamer::SwitchSection(MCSection *Section,
}
void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+ SMLoc Loc) {
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
Labels.clear();
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index af9311f..a241cde 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -60,8 +60,7 @@ public:
/// Overriding this function allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .word directive is emitted.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override;
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
/// Emits all the option records stored up until the point it's called.
void EmitMipsOptionRecords();
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index e601963..3652f4b 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -23,8 +23,11 @@ namespace Mips {
// in MipsAsmBackend.cpp.
//
enum Fixups {
+ // Branch fixups resulting in R_MIPS_NONE.
+ fixup_Mips_NONE = FirstTargetFixupKind,
+
// Branch fixups resulting in R_MIPS_16.
- fixup_Mips_16 = FirstTargetFixupKind,
+ fixup_Mips_16,
// Pure 32 bit data fixup resulting in - R_MIPS_32.
fixup_Mips_32,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 5d23fcb..d4ccf03 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -17,13 +17,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
+class Triple;
- class MipsMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit MipsMCAsmInfo(const Triple &TheTriple);
- };
+class MipsMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit MipsMCAsmInfo(const Triple &TheTriple);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index e36263d..4b030eb 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -190,6 +190,10 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
else
NewOpcode = Mips::Std2MicroMips(Opcode, Mips::Arch_micromips);
+ // Check whether it is Dsp instruction.
+ if (NewOpcode == -1)
+ NewOpcode = Mips::Dsp2MicroMips(Opcode, Mips::Arch_mmdsp);
+
if (NewOpcode != -1) {
if (Fixups.size() > N)
Fixups.pop_back();
@@ -346,6 +350,23 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
+/// getBranchTarget26OpValueMM - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::getBranchTarget26OpValueMM(
+ const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm())
+ return MO.getImm() >> 1;
+
+ // TODO: Push 26 PC fixup.
+ return 0;
+}
+
/// getJumpOffset16OpValue - Return binary encoding of the jump
/// target operand. If the machine operand requires relocation,
/// record the relocation and return zero.
@@ -745,7 +766,8 @@ getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
// Register is encoded in bits 9-5, offset is encoded in bits 4-0.
assert(MI.getOperand(OpNo).isReg() &&
- MI.getOperand(OpNo).getReg() == Mips::SP &&
+ (MI.getOperand(OpNo).getReg() == Mips::SP ||
+ MI.getOperand(OpNo).getReg() == Mips::SP_64) &&
"Unexpected base register!");
unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
Fixups, STI) >> 2;
@@ -769,6 +791,19 @@ getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 8-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
+ STI) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
+
+ return (OffBits & 0x1FF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -792,6 +827,19 @@ getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
+ STI) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
+
+ return (OffBits & 0xFFFF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -801,7 +849,9 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
default:
break;
case Mips::SWM16_MM:
+ case Mips::SWM16_MMR6:
case Mips::LWM16_MM:
+ case Mips::LWM16_MMR6:
OpNo = MI.getNumOperands() - 2;
break;
}
@@ -815,15 +865,6 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
return ((OffBits >> 2) & 0x0F);
}
-unsigned
-MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- assert(MI.getOperand(OpNo).isImm());
- unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
- return SizeEncoding - 1;
-}
-
// FIXME: should be called getMSBEncoding
//
unsigned
@@ -838,13 +879,15 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
return Position + Size - 1;
}
+template <unsigned Bits, int Offset>
unsigned
-MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+MipsMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
assert(MI.getOperand(OpNo).isImm());
- // The immediate is encoded as 'immediate - 1'.
- return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - 1;
+ unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+ Value -= Offset;
+ return Value;
}
unsigned
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 911cc2f..fdacd17 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -137,6 +137,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // getBranchTarget26OpValueMM - Return binary encoding of the branch
+ // offset operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTarget26OpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
// getJumpOffset16OpValue - Return binary encoding of the jump
// offset operand. If the machine operand requires relocation,
// record the relocation and return zero.
@@ -172,23 +179,27 @@ public:
unsigned getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- // getLSAImmEncoding - Return binary encoding of LSA immediate.
- unsigned getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// Subtract Offset then encode as a N-bit unsigned integer.
+ template <unsigned Bits, int Offset>
+ unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index fd2ed17..e889972 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -51,8 +51,8 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS MipsMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index e4da2df..e5fa755 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -89,9 +89,15 @@ void MipsTargetStreamer::emitDirectiveSetHardFloat() {
void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpRestore(
+ SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+ forbidModuleDirective();
+}
void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) {
}
+void MipsTargetStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) {}
void MipsTargetStreamer::emitDirectiveModuleFP() {}
@@ -358,6 +364,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
forbidModuleDirective();
}
+void MipsTargetAsmStreamer::emitDirectiveCpRestore(
+ SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+ MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset);
+ OS << "\t.cprestore\t" << Offset << "\n";
+}
+
void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
int RegOrOffset,
const MCSymbol &Sym,
@@ -373,7 +385,13 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
OS << ", ";
- OS << Sym.getName() << "\n";
+ OS << Sym.getName();
+ forbidModuleDirective();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) {
+ OS << "\t.cpreturn";
forbidModuleDirective();
}
@@ -595,8 +613,9 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHT_REL);
+ MCSymbol *Sym = Context.getOrCreateSymbol(Name);
const MCSymbolRefExpr *ExprRef =
- MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context);
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context);
MCA.registerSection(*Sec);
Sec->setAlignment(4);
@@ -622,10 +641,25 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
OS.PopSection();
+
+ // .end also implicitly sets the size.
+ MCSymbol *CurPCSym = Context.createTempSymbol();
+ OS.EmitLabel(CurPCSym);
+ const MCExpr *Size = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context),
+ ExprRef, Context);
+ int64_t AbsSize;
+ if (!Size->evaluateAsAbsolute(AbsSize, MCA))
+ llvm_unreachable("Function size must be evaluatable as absolute");
+ Size = MCConstantExpr::create(AbsSize, Context);
+ static_cast<MCSymbolELF *>(Sym)->setSize(Size);
}
void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+
+ // .ent also acts like an implicit '.type symbol, STT_FUNC'
+ static_cast<const MCSymbolELF &>(Symbol).setType(ELF::STT_FUNC);
}
void MipsTargetELFStreamer::emitDirectiveAbiCalls() {
@@ -752,6 +786,24 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
forbidModuleDirective();
}
+void MipsTargetELFStreamer::emitDirectiveCpRestore(
+ SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+ MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset);
+ // .cprestore offset
+ // When PIC mode is enabled and the O32 ABI is used, this directive expands
+ // to:
+ // sw $gp, offset($sp)
+ // and adds a corresponding LW after every JAL.
+
+ // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it
+ // is used in non-PIC mode.
+ if (!Pic || (getABI().IsN32() || getABI().IsN64()))
+ return;
+
+ for (const MCInst &Inst : StoreInsts)
+ getStreamer().EmitInstruction(Inst, STI);
+}
+
void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
int RegOrOffset,
const MCSymbol &Sym,
@@ -766,7 +818,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
// Either store the old $gp in a register or on the stack
if (IsReg) {
// move $save, $gpreg
- Inst.setOpcode(Mips::DADDu);
+ Inst.setOpcode(Mips::OR64);
Inst.addOperand(MCOperand::createReg(RegOrOffset));
Inst.addOperand(MCOperand::createReg(Mips::GP));
Inst.addOperand(MCOperand::createReg(Mips::ZERO));
@@ -810,6 +862,30 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
forbidModuleDirective();
}
+void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) {
+ // Only N32 and N64 emit anything for .cpreturn iff PIC is set.
+ if (!Pic || !(getABI().IsN32() || getABI().IsN64()))
+ return;
+
+ MCInst Inst;
+ // Either restore the old $gp from a register or on the stack
+ if (SaveLocationIsRegister) {
+ Inst.setOpcode(Mips::OR);
+ Inst.addOperand(MCOperand::createReg(Mips::GP));
+ Inst.addOperand(MCOperand::createReg(SaveLocation));
+ Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+ } else {
+ Inst.setOpcode(Mips::LD);
+ Inst.addOperand(MCOperand::createReg(Mips::GP));
+ Inst.addOperand(MCOperand::createReg(Mips::SP));
+ Inst.addOperand(MCOperand::createImm(SaveLocation));
+ }
+ getStreamer().EmitInstruction(Inst, STI);
+
+ forbidModuleDirective();
+}
+
void MipsTargetELFStreamer::emitMipsAbiFlags() {
MCAssembler &MCA = getStreamer().getAssembler();
MCContext &Context = MCA.getContext();
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 187a022..400f6eef 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -16,6 +16,64 @@ class MMR6Arch<string opstr> {
string BaseOpcode = opstr;
}
+// Class used for microMIPS32r6 and microMIPS64r6 instructions.
+class MicroMipsR6Inst16 : PredicateControl {
+ string DecoderNamespace = "MicroMipsR6";
+ let InsnPredicates = [HasMicroMips32r6];
+}
+
+class BC16_FM_MM16R6 {
+ bits<10> offset;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x33;
+ let Inst{9-0} = offset;
+}
+
+class BEQZC_BNEZC_FM_MM16R6<bits<6> op> : MicroMipsR6Inst16 {
+ bits<3> rs;
+ bits<7> offset;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = op;
+ let Inst{9-7} = rs;
+ let Inst{6-0} = offset;
+}
+
+class POOL16C_JALRC_FM_MM16R6<bits<5> op> {
+ bits<5> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-5} = rs;
+ let Inst{4-0} = op;
+}
+
+class POOL16C_JRCADDIUSP_FM_MM16R6<bits<5> op> {
+ bits<5> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-5} = imm;
+ let Inst{4-0} = op;
+}
+
+class POOL16C_LWM_SWM_FM_MM16R6<bits<4> funct> {
+ bits<2> rt;
+ bits<4> addr;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-8} = rt;
+ let Inst{7-4} = addr;
+ let Inst{3-0} = funct;
+}
+
class POOL32A_BITSWAP_FM_MMR6<bits<6> funct> : MipsR6Inst {
bits<5> rd;
bits<5> rt;
@@ -71,6 +129,64 @@ class ADDI_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
let Inst{15-0} = imm16;
}
+class POOL32C_ST_EVA_FM_MMR6<bits<6> op, bits<3> funct> : MipsR6Inst {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = hint;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0b1010;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class LB32_FM_MMR6 : MipsR6Inst {
+ bits<21> addr;
+ bits<5> rt;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000111;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class LBU32_FM_MMR6 : MipsR6Inst {
+ bits<21> addr;
+ bits<5> rt;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000101;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class POOL32C_LB_LBU_FM_MMR6<bits<3> funct> : MipsR6Inst {
+ bits<21> addr;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-12} = 0b0110;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = addr{8-0};
+}
+
class SIGN_EXTEND_FM_MMR6<string instr_asm, bits<10> funct>
: MMR6Arch<instr_asm> {
bits<5> rd;
@@ -124,6 +240,69 @@ class POOL32A_FM_MMR6<bits<10> funct> : MipsR6Inst {
let Inst{9-0} = funct;
}
+class POOL32A_PAUSE_FM_MMR6<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = op;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 0;
+}
+
+class POOL32A_RDPGPR_FM_MMR6<bits<10> funct> {
+ bits<5> rt;
+ bits<5> rd;
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rd;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_RDHWR_FM_MMR6 {
+ bits<5> rt;
+ bits<5> rs;
+ bits<3> sel;
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = 0;
+ let Inst{13-11} = sel;
+ let Inst{10} = 0;
+ let Inst{9-0} = 0b0111000000;
+}
+
+class POOL32A_SYNC_FM_MMR6 {
+ bits<5> stype;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = stype;
+ let Inst{15-6} = 0b0110101101;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32I_SYNCI_FM_MMR6 {
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<16> immediate = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010000;
+ let Inst{25-21} = 0b01100;
+ let Inst{20-16} = base;
+ let Inst{15-0} = immediate;
+}
+
class POOL32A_2R_FM_MMR6<bits<10> funct> : MipsR6Inst {
bits<5> rs;
bits<5> rt;
@@ -198,6 +377,78 @@ class POOL32A_LSA_FM<bits<6> funct> : MipsR6Inst {
let Inst{5-0} = funct;
}
+class SB32_SH32_STORE_FM_MMR6<bits<6> op> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class POOL32C_STORE_EVA_FM_MMR6<bits<3> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0b1010;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class LOAD_WORD_EVA_FM_MMR6<bits<3> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0b0110;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class LOAD_WORD_FM_MMR6 {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b111111;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class LOAD_UPPER_IMM_FM_MMR6 {
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000100;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = imm16;
+}
+
class CMP_BRANCH_1R_RT_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
bits<5> rt;
bits<16> offset;
@@ -222,12 +473,13 @@ class CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
let Inst{15-0} = offset;
}
-class ERET_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
+class POOL32A_ERET_FM_MMR6<string instr_asm, bits<10> funct>
+ : MMR6Arch<instr_asm> {
bits<32> Inst;
let Inst{31-26} = 0x00;
let Inst{25-16} = 0x00;
- let Inst{15-6} = 0x3cd;
+ let Inst{15-6} = funct;
let Inst{5-0} = 0x3c;
}
@@ -262,7 +514,8 @@ class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
let Inst{5-0} = 0x0;
}
-class EIDI_MMR6_ENC<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> {
+class POOL32A_EIDI_MMR6_ENC<string instr_asm, bits<10> funct>
+ : MMR6Arch<instr_asm> {
bits<32> Inst;
bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt
@@ -287,3 +540,323 @@ class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<in
let Inst{10} = rotate;
let Inst{9-0} = funct;
}
+
+class SW32_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-0} = addr{15-0};
+}
+
+class POOL32C_SWE_FM_MMR6<string instr_asm, bits<6> op, bits<4> fmt,
+ bits<3> funct> : MMR6Arch<instr_asm> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = fmt;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class POOL32F_ARITH_FM_MMR6<string instr_asm, bits<2> fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10} = 0;
+ let Inst{9-8} = fmt;
+ let Inst{7-0} = funct;
+}
+
+class POOL32F_ARITHF_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
+
+class POOL32F_MOV_NEG_FM_MMR6<string instr_asm, bits<2> fmt, bits<7> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14-13} = fmt;
+ let Inst{12-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_MINMAX_FM<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
+
+class POOL32F_CMP_FM<string instr_asm, bits<6> format, FIELD_CMP_COND Cond>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-6} = Cond.Value;
+ let Inst{5-0} = format;
+}
+
+class POOL32F_CVT_LW_FM<string instr_asm, bit fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14} = fmt;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_CVT_DS_FM<string instr_asm, bits<2> fmt, bits<7> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14-13} = fmt;
+ let Inst{12-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_ABS_FM_MMR6<string instr_asm, bits<2> fmt, bits<7> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14-13} = fmt;
+ let Inst{12-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_MATH_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14} = fmt;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL16A_ADDU16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> rs;
+ bits<3> rt;
+ bits<3> rd;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b000001;
+ let Inst{9-7} = rs;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = rd;
+ let Inst{0} = 0;
+}
+
+class POOL16C_AND16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = 0b0001;
+}
+
+class POOL16C_NOT16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = 0b0000;
+}
+
+class POOL16C_OR16_XOR16_FM_MMR6<bits<4> op> {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = op;
+}
+
+class POOL16C_BREAKPOINT_FM_MMR6<bits<6> op> {
+ bits<4> code_;
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = code_;
+ let Inst{5-0} = op;
+}
+
+class POOL16A_SUBU16_FM_MMR6 {
+ bits<3> rs;
+ bits<3> rt;
+ bits<3> rd;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b000001;
+ let Inst{9-7} = rs;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = rd;
+ let Inst{0} = 0b1;
+}
+
+class POOL32A_WRPGPR_WSBH_FM_MMR6<bits<10> funct> : MipsR6Inst {
+ bits<5> rt;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class POOL32F_RECIP_ROUND_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14} = fmt;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_RINT_FM_MMR6<string instr_asm, bits<2> fmt>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = fs;
+ let Inst{20-16} = fd;
+ let Inst{15-11} = 0;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = 0b000100000;
+}
+
+class POOL32F_SEL_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
+
+class POOL32F_CLASS_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = fs;
+ let Inst{20-16} = fd;
+ let Inst{15-11} = 0b00000;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 53bde13..31b5db0 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -11,6 +11,13 @@
//
//===----------------------------------------------------------------------===//
+def brtarget26_mm : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTarget26OpValueMM";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTarget26MM";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Encodings
@@ -28,6 +35,9 @@ class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>;
class AUI_MMR6_ENC : AUI_FM_MMR6;
class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>;
class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>;
+class BC16_MMR6_ENC : BC16_FM_MM16R6;
+class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>;
+class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>;
class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>;
@@ -42,13 +52,19 @@ class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>;
-class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>;
-class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">;
+class EI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"ei", 0x15d>;
+class DI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"di", 0b0100011101>;
+class ERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0x3cd>;
+class DERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0b1110001101>;
class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">;
+class JALRC16_MMR6_ENC : POOL16C_JALRC_FM_MM16R6<0xb>;
class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
class JIC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b101000>;
+class JRC16_MMR6_ENC: POOL16C_JALRC_FM_MM16R6<0x3>;
+class JRCADDIUSP_MMR6_ENC : POOL16C_JRCADDIUSP_FM_MM16R6<0x13>;
class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>;
class LWPC_MMR6_ENC : PCREL19_FM_MMR6<0b01>;
+class LWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0x2>;
class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>;
class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>;
class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>;
@@ -59,15 +75,99 @@ class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>;
class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>;
class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>;
class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>;
+class SB16_MMR6_ENC : LOAD_STORE_FM_MM16<0x22>;
class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>;
class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>;
class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>;
class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>;
+class SH16_MMR6_ENC : LOAD_STORE_FM_MM16<0x2a>;
class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>;
class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>;
class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>;
+class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>;
+class SWE_MMR6_ENC : POOL32C_SWE_FM_MMR6<"swe", 0x18, 0xa, 0x7>;
+class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>;
+class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>;
+class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>;
+class PREFE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b010>;
+class CACHEE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b011>;
+class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x3c5>;
+class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x1ec>;
+class LB_MMR6_ENC : LB32_FM_MMR6;
+class LBU_MMR6_ENC : LBU32_FM_MMR6;
+class LBE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b100>;
+class LBUE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b000>;
+class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>;
+class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6;
+class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">;
+class SSNOP_MMR6_ENC : BARRIER_FM_MM<0x1>, MMR6Arch<"ssnop">;
+class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6;
+class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">;
+class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>;
+class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">;
class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
+class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>;
+class ABS_D_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.d", 1, 0b0001101>;
+class FLOOR_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.s", 0, 0b00001100>;
+class FLOOR_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.d", 1, 0b00001100>;
+class FLOOR_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.s", 0, 0b00101100>;
+class FLOOR_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.d", 1, 0b00101100>;
+class CEIL_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.s", 0, 0b01001100>;
+class CEIL_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.d", 1, 0b01001100>;
+class CEIL_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.s", 0, 0b01101100>;
+class CEIL_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.d", 1, 0b01101100>;
+class TRUNC_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.s", 0, 0b10001100>;
+class TRUNC_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.d", 1, 0b10001100>;
+class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>;
+class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>;
+class SQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.s", 0, 0b00101000>;
+class SQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.d", 1, 0b00101000>;
+class RSQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.s", 0, 0b00001000>;
+class RSQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.d", 1, 0b00001000>;
+class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>;
+class SBE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b100>;
+class SCE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b110>;
+class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>;
+class SHE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b101>;
+class LLE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b110>;
+class LWE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b111>;
+class LW_MMR6_ENC : LOAD_WORD_FM_MMR6;
+class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6;
+class RECIP_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.s", 0, 0b01001000>;
+class RECIP_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.d", 1, 0b01001000>;
+class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>;
+class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>;
+class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0,
+ 0b11001100>;
+class ROUND_L_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.d", 1,
+ 0b11001100>;
+class ROUND_W_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.s", 0,
+ 0b11101100>;
+class ROUND_W_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.d", 1,
+ 0b11101100>;
+class SEL_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.s", 0, 0b010111000>;
+class SEL_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.d", 1, 0b010111000>;
+class SELEQZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.s", 0, 0b000111000>;
+class SELEQZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.d", 1, 0b000111000>;
+class SELENZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.s", 0, 0b001111000>;
+class SELENZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.d", 1, 0b001111000>;
+class CLASS_S_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.s", 0, 0b001100000>;
+class CLASS_D_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.d", 1, 0b001100000>;
+
+class ADDU16_MMR6_ENC : POOL16A_ADDU16_FM_MMR6;
+class AND16_MMR6_ENC : POOL16C_AND16_FM_MMR6;
+class ANDI16_MMR6_ENC : ANDI_FM_MM16<0b001011>, MicroMipsR6Inst16;
+class NOT16_MMR6_ENC : POOL16C_NOT16_FM_MMR6;
+class OR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1001>;
+class SLL16_MMR6_ENC : SHIFT_FM_MM16<0>, MicroMipsR6Inst16;
+class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>, MicroMipsR6Inst16;
+class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>;
+class LI16_MMR6_ENC : LI_FM_MM16;
+class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>;
+class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>;
+class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6;
+class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>;
class CMP_CBR_RT_Z_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
RegisterOperand GPROpnd>
@@ -108,6 +208,43 @@ class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm,
list<Register> Defs = [RA];
}
+/// Floating Point Instructions
+class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>;
+class FADD_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.d", 1, 0b00110000>;
+class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>;
+class FSUB_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.d", 1, 0b01110000>;
+class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>;
+class FMUL_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.d", 1, 0b10110000>;
+class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>;
+class FDIV_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.d", 1, 0b11110000>;
+class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>;
+class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
+class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
+class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
+class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
+class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
+class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
+class FNEG_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.d", 1, 0b0101101>;
+class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
+class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
+class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>;
+class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>;
+class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>;
+class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>;
+class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>;
+class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>;
+
+class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>;
+class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>;
+class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>;
+class CVT_W_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.d", 1, 0b00100100>;
+class CVT_D_S_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.s", 0, 0b1001101>;
+class CVT_D_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.w", 1, 0b1001101>;
+class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>;
+class CVT_S_D_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.d", 0, 0b1101101>;
+class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>;
+class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>;
+
//===----------------------------------------------------------------------===//
//
// Instruction Descriptions
@@ -130,11 +267,34 @@ class BC_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd>
bit isBarrier = 1;
}
-class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26> {
+class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm> {
bit isCall = 1;
list<Register> Defs = [RA];
}
-class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26>;
+class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm>;
+
+class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
+ !strconcat("bc16", "\t$offset"), [],
+ II_BC, FrmI>,
+ MMR6Arch<"bc16">, MicroMipsR6Inst16 {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 0;
+ let AdditionalPredicates = [RelocPIC];
+ let Defs = [AT];
+}
+
+class BEQZC_BNEZC_MM16R6_DESC_BASE<string instr_asm>
+ : CBranchZeroMM<instr_asm, brtarget7_mm, GPRMM16Opnd>, MMR6Arch<instr_asm> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 0;
+ let Defs = [AT];
+}
+class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">;
+class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">;
+
class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd>;
class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd>;
@@ -162,6 +322,35 @@ class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd>;
class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd>;
+class PREFE_CACHEE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd> :
+ CACHE_HINT_MMR6_DESC<instr_asm, MemOpnd,
+ GPROpnd> {
+ string DecoderMethod = "DecodePrefeOpMM";
+}
+
+class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9, GPR32Opnd>;
+class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9, GPR32Opnd>;
+
+class LB_LBU_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins MemOpnd:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ string DecoderMethod = "DecodeLoadByte15";
+ bit mayLoad = 1;
+}
+class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd>;
+class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd>;
+
+class LBE_LBUE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd>
+ : LB_LBU_MMR6_DESC_BASE<instr_asm, MemOpnd, GPROpnd> {
+ let DecoderMethod = "DecodeLoadByte9";
+}
+class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd>;
+class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd>;
+
class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
: MMR6Arch<instr_asm> {
dag OutOperandList = (outs GPROpnd:$rt);
@@ -174,10 +363,22 @@ class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
class EHB_MMR6_DESC : Barrier<"ehb">;
class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>;
+class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd>;
class ERET_MMR6_DESC : ER_FT<"eret">;
+class DERET_MMR6_DESC : ER_FT<"deret">;
class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
+class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
+ : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [(MipsJmpLink RO:$rs)], II_JALR, FrmR>,
+ MMR6Arch<opstr>, MicroMipsR6Inst16 {
+ let isCall = 1;
+ let hasDelaySlot = 0;
+ let Defs = [RA];
+}
+class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
+
class JMP_MMR6_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
RegisterOperand GPROpnd>
: MMR6Arch<opstr> {
@@ -200,6 +401,27 @@ class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16,
list<Register> Defs = [AT];
}
+class JRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
+ : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], II_JR, FrmR>,
+ MMR6Arch<opstr>, MicroMipsR6Inst16 {
+ let hasDelaySlot = 0;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+class JRC16_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR32Opnd>;
+
+class JRCADDIUSP_MMR6_DESC
+ : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jrcaddiusp\t$imm",
+ [], II_JRADDIUSP, FrmR>,
+ MMR6Arch<"jrcaddiusp">, MicroMipsR6Inst16 {
+ let hasDelaySlot = 0;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
class ALIGN_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd> : MMR6Arch<instr_asm> {
dag OutOperandList = (outs GPROpnd:$rd);
@@ -241,7 +463,7 @@ class LSA_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
list<dag> Pattern = [];
}
-class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
class PCREL_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd> : MMR6Arch<instr_asm> {
@@ -264,6 +486,18 @@ class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
+class PAUSE_MMR6_DESC : Barrier<"pause">;
+class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel);
+ string AsmString = !strconcat("rdhwr", "\t$rt, $rs, $sel");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_RDHWR;
+ Format Form = FrmR;
+}
+
+class WAIT_MMR6_DESC : WaitMM<"wait">;
+class SSNOP_MMR6_DESC : Barrier<"ssnop">;
class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>;
class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>;
class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>;
@@ -277,13 +511,426 @@ class ORI_MMR6_DESC : ArithLogicI<"ori", simm16, GPR32Opnd>;
class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd>;
class XORI_MMR6_DESC : ArithLogicI<"xori", simm16, GPR32Opnd>;
+class SWE_MMR6_DESC_BASE<string opstr, DAGOperand RO, DAGOperand MO,
+ SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary,
+ ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMem";
+ let mayStore = 1;
+}
+class SW_MMR6_DESC : Store<"sw", GPR32Opnd>;
+class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9>;
+
+class WRPGPR_WSBH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+ : MMR6Arch<instr_asm> {
+ dag InOperandList = (ins RO:$rs);
+ dag OutOperandList = (outs RO:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+ list<dag> Pattern = [];
+ Format f = FrmR;
+ string BaseOpcode = instr_asm;
+ bit hasSideEffects = 0;
+}
+class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd>;
+class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd>;
+
+/// Floating Point Instructions
+class FARITH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RC,
+ InstrItinClass Itin, bit isComm,
+ SDPatternOperator OpNode = null_frag> : HARDFLOAT {
+ dag OutOperandList = (outs RC:$fd);
+ dag InOperandList = (ins RC:$ft, RC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [(set RC:$fd, (OpNode RC:$fs, RC:$ft))];
+ InstrItinClass Itinerary = Itin;
+ bit isCommutable = isComm;
+}
+class FADD_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>;
+class FADD_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"add.d", AFGR64Opnd, II_ADD_D, 1, fadd>;
+class FSUB_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>;
+class FSUB_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"sub.d", AFGR64Opnd, II_SUB_D, 0, fsub>;
+class FMUL_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>;
+class FMUL_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"mul.d", AFGR64Opnd, II_MUL_D, 1, fmul>;
+class FDIV_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>;
+class FDIV_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"div.d", AFGR64Opnd, II_DIV_D, 0, fdiv>;
+class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>, HARDFLOAT;
+class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>, HARDFLOAT;
+class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>, HARDFLOAT;
+class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>, HARDFLOAT;
+
+class FMOV_FNEG_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
+ RegisterOperand SrcRC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag>
+ : HARDFLOAT, NeverHasSideEffects {
+ dag OutOperandList = (outs DstRC:$ft);
+ dag InOperandList = (ins SrcRC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+ list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+ InstrItinClass Itinerary = Itin;
+ Format Form = FrmFR;
+}
+class FMOV_S_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>;
+class FMOV_D_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>;
+class FNEG_S_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>;
+class FNEG_D_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>;
+
+class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>, HARDFLOAT;
+class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>, HARDFLOAT;
+class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>, HARDFLOAT;
+class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>, HARDFLOAT;
+
+class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>, HARDFLOAT;
+class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>, HARDFLOAT;
+class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>, HARDFLOAT;
+class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>, HARDFLOAT;
+
+class CVT_MMR6_DESC_BASE<
+ string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag>
+ : HARDFLOAT, NeverHasSideEffects {
+ dag OutOperandList = (outs DstRC:$ft);
+ dag InOperandList = (ins SrcRC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+ list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+ InstrItinClass Itinerary = Itin;
+ Format Form = FrmFR;
+}
+
+class CVT_L_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.s", FGR64Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_L_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.d", FGR64Opnd, FGR64Opnd,
+ II_CVT>;
+class CVT_W_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.s", FGR32Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_W_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.d", FGR32Opnd, AFGR64Opnd,
+ II_CVT>;
+class CVT_D_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.s", FGR32Opnd, AFGR64Opnd,
+ II_CVT>;
+class CVT_D_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.w", FGR32Opnd, AFGR64Opnd,
+ II_CVT>;
+class CVT_D_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.l", FGR64Opnd, FGR64Opnd,
+ II_CVT>, FGR_64;
+class CVT_S_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.d", AFGR64Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_S_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.w", FGR32Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd,
+ II_CVT>, FGR_64;
+
+multiclass CMP_CC_MMR6<bits<6> format, string Typestr,
+ RegisterOperand FGROpnd> {
+ def CMP_AF_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>,
+ CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_UN_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_EQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_UEQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_LT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_ULT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_LE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_ULE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SAF_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>,
+ CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SUN_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>,
+ CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SEQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>,
+ CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SUEQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>,
+ CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SLT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>,
+ CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SULT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>,
+ CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SLE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>,
+ CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SULE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>,
+ CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+}
+
+class ABSS_FT_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
+ RegisterOperand SrcRC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag>
+ : HARDFLOAT, NeverHasSideEffects {
+ dag OutOperandList = (outs DstRC:$ft);
+ dag InOperandList = (ins SrcRC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+ list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+ InstrItinClass Itinerary = Itin;
+ Format Form = FrmFR;
+ list<Predicate> EncodingPredicates = [HasStdEnc];
+}
+
+class ABS_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.s", FGR32Opnd, FGR32Opnd,
+ II_ABS, fabs>;
+class ABS_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.d", AFGR64Opnd, AFGR64Opnd,
+ II_ABS, fabs>;
+class FLOOR_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.s", FGR64Opnd,
+ FGR32Opnd, II_FLOOR>;
+class FLOOR_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.d", FGR64Opnd,
+ FGR64Opnd, II_FLOOR>;
+class FLOOR_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.s", FGR32Opnd,
+ FGR32Opnd, II_FLOOR>;
+class FLOOR_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.d", FGR32Opnd,
+ AFGR64Opnd, II_FLOOR>;
+class CEIL_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.s", FGR64Opnd,
+ FGR32Opnd, II_CEIL>;
+class CEIL_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.d", FGR64Opnd,
+ FGR64Opnd, II_CEIL>;
+class CEIL_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.s", FGR32Opnd,
+ FGR32Opnd, II_CEIL>;
+class CEIL_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.d", FGR32Opnd,
+ AFGR64Opnd, II_CEIL>;
+class TRUNC_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.s", FGR64Opnd,
+ FGR32Opnd, II_TRUNC>;
+class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
+ FGR64Opnd, II_TRUNC>;
+class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
+ FGR32Opnd, II_TRUNC>;
+class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
+ AFGR64Opnd, II_TRUNC>;
+class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
+ II_SQRT_S, fsqrt>;
+class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
+ II_SQRT_D, fsqrt>;
+class RSQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.s", FGR32Opnd,
+ FGR32Opnd, II_TRUNC>;
+class RSQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.d", FGR32Opnd,
+ AFGR64Opnd, II_TRUNC>;
+class RECIP_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.s", FGR32Opnd,
+ FGR32Opnd, II_ROUND>;
+class RECIP_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.d", FGR32Opnd, FGR32Opnd,
+ II_ROUND>;
+class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd,
+ FGR32Opnd, II_ROUND>;
+class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd,
+ FGR64Opnd, II_ROUND>;
+class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd,
+ FGR32Opnd, II_ROUND>;
+class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd,
+ FGR64Opnd, II_ROUND>;
+
+class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
+class SEL_D_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
+ // We must insert a SUBREG_TO_REG around $fd_in
+ bit usesCustomInserter = 1;
+}
+
+class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>;
+class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>;
+class SELENZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>;
+class SELENZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>;
+class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>;
+class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
+class CLASS_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
+class CLASS_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+
+class STORE_MMR6_DESC_BASE<string opstr, DAGOperand RO>
+ : Store<opstr, RO>, MMR6Arch<opstr> {
+ let DecoderMethod = "DecodeMemMMImm16";
+}
+class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd>;
+
+class STORE_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins RO:$rt, mem_mm_9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ string DecoderMethod = "DecodeStoreEvaOpMM";
+ bit mayStore = 1;
+}
+class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd>;
+class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd>;
+class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd>;
+class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd>;
+class LOAD_WORD_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO> :
+ MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs RO:$rt);
+ dag InOperandList = (ins mem_mm_12:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ string DecoderMethod = "DecodeMemMMImm9";
+ bit mayLoad = 1;
+}
+class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd>;
+class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd>;
+class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
+ MMR6Arch<"addu16">;
+class AND16_MMR6_DESC : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
+ MMR6Arch<"and16">;
+class ANDI16_MMR6_DESC : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>,
+ MMR6Arch<"andi16">;
+class NOT16_MMR6_DESC : NotMM16<"not16", GPRMM16Opnd>, MMR6Arch<"not16">;
+class OR16_MMR6_DESC : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
+ MMR6Arch<"or16">;
+class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
+ MMR6Arch<"sll16">;
+class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
+ MMR6Arch<"srl16">;
+class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16">, MMR6Arch<"srl16">,
+ MicroMipsR6Inst16;
+class LI16_MMR6_DESC : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>,
+ MMR6Arch<"srl16">, MicroMipsR6Inst16, IsAsCheapAsAMove;
+class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"srl16">,
+ MicroMipsR6Inst16;
+class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16">, MMR6Arch<"sdbbp16">,
+ MicroMipsR6Inst16;
+class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+ MMR6Arch<"sdbbp16">, MicroMipsR6Inst16;
+class XOR16_MMR6_DESC : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+ MMR6Arch<"sdbbp16">, MicroMipsR6Inst16;
+
+class LW_MMR6_DESC : MMR6Arch<"lw">, MipsR6Inst {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins mem:$addr);
+ string AsmString = "lw\t$rt, $addr";
+ let DecoderMethod = "DecodeMemMMImm16";
+ let canFoldAsLoad = 1;
+ let mayLoad = 1;
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (load addrDefault:$addr))];
+ InstrItinClass Itinerary = II_LW;
+}
+
+class LUI_MMR6_DESC : IsAsCheapAsAMove, MMR6Arch<"lui">, MipsR6Inst{
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins uimm16:$imm16);
+ string AsmString = "lui\t$rt, $imm16";
+ list<dag> Pattern = [];
+ bit hasSideEffects = 0;
+ bit isReMaterializable = 1;
+ InstrItinClass Itinerary = II_LUI;
+ Format Form = FrmI;
+}
+
+class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins i32imm:$stype);
+ string AsmString = !strconcat("sync", "\t$stype");
+ list<dag> Pattern = [(MipsSync imm:$stype)];
+ InstrItinClass Itinerary = NoItinerary;
+ bit HasSideEffects = 1;
+}
+
+class SYNCI_MMR6_DESC : SYNCI_FT<"synci"> {
+ let DecoderMethod = "DecodeSynciR6";
+}
+
+class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins GPR32Opnd:$rd);
+ string AsmString = !strconcat("rdpgpr", "\t$rt, $rd");
+}
+
+class SDBBP_MMR6_DESC : MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm20:$code_);
+ string AsmString = !strconcat("sdbbp", "\t$code_");
+ list<dag> Pattern = [];
+}
+
+class LWM16_MMR6_DESC
+ : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
+ !strconcat("lwm16", "\t$rt, $addr"), [],
+ NoItinerary, FrmI>,
+ MMR6Arch<"lwm16">, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
+ let mayLoad = 1;
+ InstrItinClass Itin = NoItinerary;
+ ComplexPattern Addr = addr;
+}
+
+class SWM16_MMR6_DESC
+ : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
+ !strconcat("swm16", "\t$rt, $addr"), [],
+ NoItinerary, FrmI>,
+ MMR6Arch<"swm16">, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
+ let mayStore = 1;
+ InstrItinClass Itin = NoItinerary;
+ ComplexPattern Addr = addr;
+}
+
+class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd, DAGOperand RO,
+ SDPatternOperator OpNode, InstrItinClass Itin,
+ Operand MemOpnd>
+ : MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>,
+ MMR6Arch<opstr>, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMImm4";
+ let mayStore = 1;
+}
+class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd,
+ truncstorei8, II_SB, mem_mm_4>;
+class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd,
+ truncstorei16, II_SH, mem_mm_4_lsl1>;
+class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
+ store, II_SW, mem_mm_4_lsl2>;
+
+class SWSP_MMR6_DESC
+ : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
+ !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>,
+ MMR6Arch<"sw">, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
+ let mayStore = 1;
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
//
//===----------------------------------------------------------------------===//
-let DecoderNamespace = "MicroMips32r6" in {
+let DecoderNamespace = "MicroMipsR6" in {
def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6;
def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6;
def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -298,6 +945,11 @@ def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6;
def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6;
def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6;
def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BC16_MMR6 : StdMMR6Rel, BC16_MMR6_DESC, BC16_MMR6_ENC, ISA_MICROMIPS32R6;
+def BEQZC16_MMR6 : StdMMR6Rel, BEQZC16_MMR6_DESC, BEQZC16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def BNEZC16_MMR6 : StdMMR6Rel, BNEZC16_MMR6_DESC, BNEZC16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC,
ISA_MICROMIPS32R6;
def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC,
@@ -320,13 +972,21 @@ def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6;
def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6;
-def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def DI_MMR6 : StdMMR6Rel, DI_MMR6_DESC, DI_MMR6_ENC, ISA_MICROMIPS32R6;
+def ERET_MMR6 : StdMMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def DERET_MMR6 : StdMMR6Rel, DERET_MMR6_DESC, DERET_MMR6_ENC, ISA_MICROMIPS32R6;
def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC,
ISA_MICROMIPS32R6;
+def JALRC16_MMR6 : R6MMR6Rel, JALRC16_MMR6_DESC, JALRC16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6;
def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6;
+def JRC16_MMR6 : R6MMR6Rel, JRC16_MMR6_DESC, JRC16_MMR6_ENC, ISA_MICROMIPS32R6;
+def JRCADDIUSP_MMR6 : R6MMR6Rel, JRCADDIUSP_MMR6_DESC, JRCADDIUSP_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6;
def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6;
+def LWM16_MMR6 : StdMMR6Rel, LWM16_MMR6_DESC, LWM16_MMR6_ENC, ISA_MICROMIPS32R6;
def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6;
def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6;
def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -337,17 +997,211 @@ def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6;
def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6;
def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6;
def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6;
+def SB16_MMR6 : StdMMR6Rel, SB16_MMR6_DESC, SB16_MMR6_ENC, ISA_MICROMIPS32R6;
def SEB_MMR6 : StdMMR6Rel, SEB_MMR6_DESC, SEB_MMR6_ENC, ISA_MICROMIPS32R6;
def SEH_MMR6 : StdMMR6Rel, SEH_MMR6_DESC, SEH_MMR6_ENC, ISA_MICROMIPS32R6;
def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC,
ISA_MICROMIPS32R6;
def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC,
ISA_MICROMIPS32R6;
+def SH16_MMR6 : StdMMR6Rel, SH16_MMR6_DESC, SH16_MMR6_ENC, ISA_MICROMIPS32R6;
def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6;
def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6;
def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6;
+def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6;
+def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6;
+def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6;
+def PREFE_MMR6 : StdMMR6Rel, PREFE_MMR6_ENC, PREFE_MMR6_DESC, ISA_MICROMIPS32R6;
+def CACHEE_MMR6 : StdMMR6Rel, CACHEE_MMR6_ENC, CACHEE_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6;
+def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBE_MMR6 : R6MMR6Rel, LBE_MMR6_ENC, LBE_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBUE_MMR6 : R6MMR6Rel, LBUE_MMR6_ENC, LBUE_MMR6_DESC, ISA_MICROMIPS32R6;
+def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6;
+def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6;
+def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6;
+def SSNOP_MMR6 : StdMMR6Rel, SSNOP_MMR6_DESC, SSNOP_MMR6_ENC, ISA_MICROMIPS32R6;
+def SYNC_MMR6 : StdMMR6Rel, SYNC_MMR6_DESC, SYNC_MMR6_ENC, ISA_MICROMIPS32R6;
+def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
+def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
+let DecoderMethod = "DecodeMemMMImm16" in {
+ def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6;
+}
+let DecoderMethod = "DecodeMemMMImm9" in {
+ def SWE_MMR6 : StdMMR6Rel, SWE_MMR6_DESC, SWE_MMR6_ENC, ISA_MICROMIPS32R6;
+}
+/// Floating Point Instructions
+def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FADD_D_MMR6 : StdMMR6Rel, FADD_D_MMR6_ENC, FADD_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FSUB_S_MMR6 : StdMMR6Rel, FSUB_S_MMR6_ENC, FSUB_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FSUB_D_MMR6 : StdMMR6Rel, FSUB_D_MMR6_ENC, FSUB_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMUL_S_MMR6 : StdMMR6Rel, FMUL_S_MMR6_ENC, FMUL_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMUL_D_MMR6 : StdMMR6Rel, FMUL_D_MMR6_ENC, FMUL_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FDIV_S_MMR6 : StdMMR6Rel, FDIV_S_MMR6_ENC, FDIV_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FDIV_D_MMR6 : StdMMR6Rel, FDIV_D_MMR6_ENC, FDIV_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MADDF_S_MMR6 : R6MMR6Rel, MADDF_S_MMR6_ENC, MADDF_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MADDF_D_MMR6 : R6MMR6Rel, MADDF_D_MMR6_ENC, MADDF_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MSUBF_S_MMR6 : R6MMR6Rel, MSUBF_S_MMR6_ENC, MSUBF_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMOV_D_MMR6 : StdMMR6Rel, FMOV_D_MMR6_ENC, FMOV_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FNEG_D_MMR6 : StdMMR6Rel, FNEG_D_MMR6_ENC, FNEG_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def MAX_D_MMR6 : R6MMR6Rel, MAX_D_MMR6_ENC, MAX_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def MIN_S_MMR6 : R6MMR6Rel, MIN_S_MMR6_ENC, MIN_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def MIN_D_MMR6 : R6MMR6Rel, MIN_D_MMR6_ENC, MIN_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def MAXA_S_MMR6 : R6MMR6Rel, MAXA_S_MMR6_ENC, MAXA_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MAXA_D_MMR6 : R6MMR6Rel, MAXA_D_MMR6_ENC, MAXA_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MINA_S_MMR6 : R6MMR6Rel, MINA_S_MMR6_ENC, MINA_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MINA_D_MMR6 : R6MMR6Rel, MINA_D_MMR6_ENC, MINA_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_L_S_MMR6 : StdMMR6Rel, CVT_L_S_MMR6_ENC, CVT_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_L_D_MMR6 : StdMMR6Rel, CVT_L_D_MMR6_ENC, CVT_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_W_S_MMR6 : StdMMR6Rel, CVT_W_S_MMR6_ENC, CVT_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_W_D_MMR6 : StdMMR6Rel, CVT_W_D_MMR6_ENC, CVT_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_D_S_MMR6 : StdMMR6Rel, CVT_D_S_MMR6_ENC, CVT_D_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_D_W_MMR6 : StdMMR6Rel, CVT_D_W_MMR6_ENC, CVT_D_W_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_D_L_MMR6 : StdMMR6Rel, CVT_D_L_MMR6_ENC, CVT_D_L_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_S_D_MMR6 : StdMMR6Rel, CVT_S_D_MMR6_ENC, CVT_S_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd>;
+defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd>;
+def ABS_S_MMR6 : StdMMR6Rel, ABS_S_MMR6_ENC, ABS_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def ABS_D_MMR6 : StdMMR6Rel, ABS_D_MMR6_ENC, ABS_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FLOOR_L_D_MMR6 : StdMMR6Rel, FLOOR_L_D_MMR6_ENC, FLOOR_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FLOOR_W_S_MMR6 : StdMMR6Rel, FLOOR_W_S_MMR6_ENC, FLOOR_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FLOOR_W_D_MMR6 : StdMMR6Rel, FLOOR_W_D_MMR6_ENC, FLOOR_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_L_S_MMR6 : StdMMR6Rel, CEIL_L_S_MMR6_ENC, CEIL_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_L_D_MMR6 : StdMMR6Rel, CEIL_L_D_MMR6_ENC, CEIL_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_W_S_MMR6 : StdMMR6Rel, CEIL_W_S_MMR6_ENC, CEIL_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_W_D_MMR6 : StdMMR6Rel, CEIL_W_D_MMR6_ENC, CEIL_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_L_S_MMR6 : StdMMR6Rel, TRUNC_L_S_MMR6_ENC, TRUNC_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_L_D_MMR6 : StdMMR6Rel, TRUNC_L_D_MMR6_ENC, TRUNC_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_W_S_MMR6 : StdMMR6Rel, TRUNC_W_S_MMR6_ENC, TRUNC_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_W_D_MMR6 : StdMMR6Rel, TRUNC_W_D_MMR6_ENC, TRUNC_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SQRT_S_MMR6 : StdMMR6Rel, SQRT_S_MMR6_ENC, SQRT_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SQRT_D_MMR6 : StdMMR6Rel, SQRT_D_MMR6_ENC, SQRT_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RSQRT_S_MMR6 : StdMMR6Rel, RSQRT_S_MMR6_ENC, RSQRT_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RSQRT_D_MMR6 : StdMMR6Rel, RSQRT_D_MMR6_ENC, RSQRT_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6;
+def SBE_MMR6 : StdMMR6Rel, SBE_MMR6_DESC, SBE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SCE_MMR6 : StdMMR6Rel, SCE_MMR6_DESC, SCE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6;
+def SHE_MMR6 : StdMMR6Rel, SHE_MMR6_DESC, SHE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LLE_MMR6 : StdMMR6Rel, LLE_MMR6_DESC, LLE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LWE_MMR6 : StdMMR6Rel, LWE_MMR6_DESC, LWE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6;
+def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6;
+def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def AND16_MMR6 : StdMMR6Rel, AND16_MMR6_DESC, AND16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def ANDI16_MMR6 : StdMMR6Rel, ANDI16_MMR6_DESC, ANDI16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def NOT16_MMR6 : StdMMR6Rel, NOT16_MMR6_DESC, NOT16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def OR16_MMR6 : StdMMR6Rel, OR16_MMR6_DESC, OR16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SLL16_MMR6 : StdMMR6Rel, SLL16_MMR6_DESC, SLL16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SRL16_MMR6 : StdMMR6Rel, SRL16_MMR6_DESC, SRL16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def BREAK16_MMR6 : StdMMR6Rel, BREAK16_MMR6_DESC, BREAK16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def XOR16_MMR6 : StdMMR6Rel, XOR16_MMR6_DESC, XOR16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def RECIP_S_MMR6 : StdMMR6Rel, RECIP_S_MMR6_ENC, RECIP_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RECIP_D_MMR6 : StdMMR6Rel, RECIP_D_MMR6_ENC, RECIP_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def ROUND_W_S_MMR6 : StdMMR6Rel, ROUND_W_S_MMR6_ENC, ROUND_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def ROUND_W_D_MMR6 : StdMMR6Rel, ROUND_W_D_MMR6_ENC, ROUND_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SEL_S_MMR6 : StdMMR6Rel, SEL_S_MMR6_ENC, SEL_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def SEL_D_MMR6 : StdMMR6Rel, SEL_D_MMR6_ENC, SEL_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def SELEQZ_S_MMR6 : StdMMR6Rel, SELEQZ_S_MMR6_ENC, SELEQZ_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SELEQZ_D_MMR6 : StdMMR6Rel, SELEQZ_D_MMR6_ENC, SELEQZ_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SELENZ_S_MMR6 : StdMMR6Rel, SELENZ_S_MMR6_ENC, SELENZ_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SELENZ_D_MMR6 : StdMMR6Rel, SELENZ_D_MMR6_ENC, SELENZ_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CLASS_S_MMR6 : StdMMR6Rel, CLASS_S_MMR6_ENC, CLASS_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CLASS_D_MMR6 : StdMMR6Rel, CLASS_D_MMR6_ENC, CLASS_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
}
//===----------------------------------------------------------------------===//
@@ -357,4 +1211,23 @@ def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"di", (DI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6;
+def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
+ !strconcat("b", "\t$offset")> {
+ string DecoderNamespace = "MicroMipsR6";
+}
+def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"rdhwr $rt, $rs",
+ (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>,
+ ISA_MICROMIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// MicroMips arbitrary patterns that map to one or more instructions
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr),
+ (SW16_MMR6 GPRMM16:$src, addrimm4lsl2:$addr)>, ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
new file mode 100644
index 0000000..da305a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
@@ -0,0 +1,86 @@
+//=- MicroMips64r6InstrFormats.td - Instruction Formats -*- tablegen -* -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes microMIPS64r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_FM_MMR6 {
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b111100;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm;
+}
+
+class POOL32I_ADD_IMM_FM_MMR6<bits<5> funct> {
+ bits<5> rs;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010000;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm;
+}
+
+class POOL32S_EXTBITS_FM_MMR6<bits<6> funct> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> size;
+ bits<5> pos;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010110;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class POOL32S_DALIGN_FM_MMR6 {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+ bits<3> bp;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010110;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = bp;
+ let Inst{7-6} = 0b00;
+ let Inst{5-0} = 0b011100;
+}
+
+class POOL32A_DIVMOD_FM_MMR6<string instr_asm, bits<9> funct>
+ : MMR6Arch<instr_asm> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010110;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rt;
+ let Inst{10-9} = 0b00;
+ let Inst{8-0} = funct;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
new file mode 100644
index 0000000..ec1aef8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -0,0 +1,119 @@
+//=- MicroMips64r6InstrInfo.td - Instruction Information -*- tablegen -*- -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes MicroMips64r6 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Encodings
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_MMR6_ENC : DAUI_FM_MMR6;
+class DAHI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10001>;
+class DATI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10000>;
+class DEXT_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b101100>;
+class DEXTM_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b100100>;
+class DEXTU_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b010100>;
+class DALIGN_MMR6_ENC : POOL32S_DALIGN_FM_MMR6;
+class DDIV_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddiv", 0b100011000>;
+class DMOD_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmod", 0b101011000>;
+class DDIVU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddivu", 0b110011000>;
+class DMODU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmodu", 0b111011000>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins GPROpnd:$rs, simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
+ list<dag> Pattern = [];
+}
+class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd>;
+
+class DAHI_DATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs GPROpnd:$rs);
+ dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+ string Constraints = "$rs = $rt";
+}
+class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd>;
+class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd>;
+
+class EXTBITS_DESC_BASE<string instr_asm, RegisterOperand RO, Operand PosOpnd,
+ Operand SizeOpnd, SDPatternOperator Op = null_frag>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs RO:$rt);
+ dag InOperandList = (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $pos, $size");
+ list<dag> Pattern = [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))];
+ InstrItinClass Itinerary = II_EXT;
+ Format Form = FrmR;
+ string BaseOpcode = instr_asm;
+}
+// TODO: Add 'pos + size' constraint check to dext* instructions
+// DEXT: 0 < pos + size <= 63
+// DEXTM, DEXTU: 32 < pos + size <= 64
+class DEXT_MMR6_DESC : EXTBITS_DESC_BASE<"dext", GPR64Opnd, uimm5,
+ uimm5_plus1, MipsExt>;
+class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5,
+ uimm5_plus33, MipsExt>;
+class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32,
+ uimm5_plus1, MipsExt>;
+
+class DALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
+ list<dag> Pattern = [];
+}
+
+class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>;
+
+class DDIV_MM64R6_DESC : ArithLogicR<"ddiv", GPR32Opnd>;
+class DMOD_MM64R6_DESC : ArithLogicR<"dmod", GPR32Opnd>;
+class DDIVU_MM64R6_DESC : ArithLogicR<"ddivu", GPR32Opnd>;
+class DMODU_MM64R6_DESC : ArithLogicR<"dmodu", GPR32Opnd>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let DecoderNamespace = "MicroMipsR6" in {
+ def DAUI_MM64R6 : StdMMR6Rel, DAUI_MMR6_DESC, DAUI_MMR6_ENC, ISA_MICROMIPS64R6;
+ def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6;
+ def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6;
+ def DEXT_MM64R6 : StdMMR6Rel, DEXT_MMR6_DESC, DEXT_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DEXTM_MM64R6 : StdMMR6Rel, DEXTM_MMR6_DESC, DEXTM_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DEXTU_MM64R6 : StdMMR6Rel, DEXTU_MMR6_DESC, DEXTU_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DALIGN_MM64R6 : StdMMR6Rel, DALIGN_MMR6_DESC, DALIGN_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DDIV_MM64R6 : R6MMR6Rel, DDIV_MM64R6_DESC, DDIV_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+ def DMOD_MM64R6 : R6MMR6Rel, DMOD_MM64R6_DESC, DMOD_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+ def DDIVU_MM64R6 : R6MMR6Rel, DDIVU_MM64R6_DESC, DDIVU_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+ def DMODU_MM64R6 : R6MMR6Rel, DMODU_MM64R6_DESC, DMODU_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td
new file mode 100644
index 0000000..f11c09a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td
@@ -0,0 +1,244 @@
+//===-- MicroMipsDSPInstrFormats.td - Instruction Formats --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class MMDSPInst<string opstr = "">
+ : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+ let AdditionalPredicates = [InMicroMips];
+ string BaseOpcode = opstr;
+ string Arch = "mmdsp";
+ let DecoderNamespace = "MicroMips";
+}
+
+class MMDSPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+ let AdditionalPredicates = [InMicroMips];
+}
+
+class POOL32A_3R_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rd;
+ let Inst{10-0} = op;
+}
+
+class POOL32A_2R_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_2RAC_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_3RB0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0b0;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_2RSA4_FMT<string opstr, bits<12> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<4> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = sa;
+ let Inst{11-0} = op;
+}
+
+class POOL32A_2RSA3_FMT<string opstr, bits<7> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<3> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-13} = sa;
+ let Inst{12-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_2RSA5B0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = sa;
+ let Inst{10} = 0b0;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_2RSA4B0_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<4> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = sa;
+ let Inst{11} = 0b0;
+ let Inst{10-0} = op;
+}
+
+class POOL32A_2RSA4OP6_FMT<string opstr, bits<6> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<4> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = sa;
+ let Inst{11-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_1RIMM5AC_FMT<string opstr, bits<8> funct> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> imm;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = imm;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_2RSA5_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = sa;
+ let Inst{10-0} = op;
+}
+
+class POOL32A_1RMEMB0_FMT<string opstr, bits<10> funct> : MMDSPInst<opstr> {
+ bits<5> index;
+ bits<5> base;
+ bits<5> rd;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = index;
+ let Inst{20-16} = base;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0b0;
+ let Inst{9-0} = funct;
+}
+
+class POOL32A_1RAC_FMT<string instr_asm, bits<8> funct> : MMDSPInst<instr_asm> {
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_1RMASK7_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<7> mask;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-14} = mask;
+ let Inst{13-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_1RIMM10_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rd;
+ bits<10> imm;
+
+ let Inst{31-26} = 0;
+ let Inst{25-16} = imm;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_1RIMM8_FMT<string opstr, bits<6> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<8> imm;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-13} = imm;
+ let Inst{12} = 0;
+ let Inst{11-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_4B0SHIFT6AC4B0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<6> shift;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-22} = 0b0000;
+ let Inst{21-16} = shift;
+ let Inst{15-14} = ac;
+ let Inst{13-10} = 0b0000;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_5B01RAC_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = 0b00000;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = op;
+ let Inst{5-0} = 0b111100;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
new file mode 100644
index 0000000..b342e23
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -0,0 +1,528 @@
+//===- MicroMipsDSPInstrInfo.td - Micromips DSP instructions -*- tablegen *-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes MicroMips DSP instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// Instruction encoding.
+class ADDQ_PH_MM_ENC : POOL32A_3R_FMT<"addq.ph", 0b00000001101>;
+class ADDQ_S_PH_MM_ENC : POOL32A_3R_FMT<"addq_s.ph", 0b10000001101>;
+class ADDQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"addq_s.w", 0b1100000101>;
+class ADDQH_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh.ph", 0b00001001101>;
+class ADDQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.ph", 0b10001001101>;
+class ADDQH_W_MMR2_ENC: POOL32A_3R_FMT<"addqh.w", 0b00010001101>;
+class ADDQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.w", 0b10010001101>;
+class ADDU_PH_MMR2_ENC : POOL32A_3R_FMT<"addu.ph", 0b00100001101>;
+class ADDU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"addu_s.ph", 0b10100001101>;
+class ADDU_QB_MM_ENC : POOL32A_3R_FMT<"addu.qb", 0b00011001101>;
+class ADDU_S_QB_MM_ENC : POOL32A_3R_FMT<"addu_s.qb", 0b10011001101>;
+class ADDUH_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh.qb", 0b00101001101>;
+class ADDUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh_r.qb", 0b10101001101>;
+class ADDSC_MM_ENC : POOL32A_3RB0_FMT<"addsc", 0b1110000101>;
+class ADDWC_MM_ENC : POOL32A_3RB0_FMT<"addwc", 0b1111000101>;
+class DPA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpa.w.ph", 0b00000010>;
+class DPAQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpaq_s.w.ph", 0b00001010>;
+class DPAQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpaq_sa.l.w", 0b01001010>;
+class DPAQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_s.w.ph", 0b10001010>;
+class DPAQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_sa.w.ph", 0b11001010>;
+class DPAU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbl", 0b10000010>;
+class DPAU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbr", 0b11000010>;
+class DPAX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpax.w.ph", 0b01000010>;
+class ABSQ_S_PH_MM_ENC : POOL32A_2R_FMT<"absq_s.ph", 0b0001000100>;
+class ABSQ_S_W_MM_ENC : POOL32A_2R_FMT<"absq_s.w", 0b0010000100>;
+class ABSQ_S_QB_MMR2_ENC : POOL32A_2R_FMT<"absq_s.qb", 0b0000000100>;
+class INSV_MM_ENC : POOL32A_2R_FMT<"insv", 0b0100000100>;
+class MADD_DSP_MM_ENC : POOL32A_2RAC_FMT<"madd", 0b00101010>;
+class MADDU_DSP_MM_ENC : POOL32A_2RAC_FMT<"maddu", 0b01101010>;
+class MSUB_DSP_MM_ENC : POOL32A_2RAC_FMT<"msub", 0b10101010>;
+class MSUBU_DSP_MM_ENC : POOL32A_2RAC_FMT<"msubu", 0b11101010>;
+class MULT_DSP_MM_ENC : POOL32A_2RAC_FMT<"mult", 0b00110010>;
+class MULTU_DSP_MM_ENC : POOL32A_2RAC_FMT<"multu", 0b01110010>;
+class SHLL_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll.ph", 0b001110110101>;
+class SHLL_S_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll_s.ph", 0b101110110101>;
+class SHLL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shll.qb", 0b0100001>;
+class SHLLV_PH_MM_ENC : POOL32A_3R_FMT<"shllv.ph", 0b00000001110>;
+class SHLLV_S_PH_MM_ENC : POOL32A_3R_FMT<"shllv_s.ph", 0b10000001110>;
+class SHLLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shllv.qb", 0b1110010101>;
+class SHLLV_S_W_MM_ENC : POOL32A_3RB0_FMT<"shllv_s.w", 0b1111010101>;
+class SHLL_S_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shll_s.w", 0b1111110101>;
+class SHRA_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra.qb", 0b0000111>;
+class SHRA_R_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra_r.qb", 0b1000111>;
+class SHRA_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra.ph", 0b01100110101>;
+class SHRA_R_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra_r.ph", 0b11100110101>;
+class SHRAV_PH_MM_ENC : POOL32A_3R_FMT<"shrav.ph", 0b00110001101>;
+class SHRAV_R_PH_MM_ENC : POOL32A_3R_FMT<"shrav_r.ph", 0b10110001101>;
+class SHRAV_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav.qb", 0b00111001101>;
+class SHRAV_R_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav_r.qb", 0b10111001101>;
+class SHRAV_R_W_MM_ENC : POOL32A_3RB0_FMT<"shrav_r.w", 0b1011010101>;
+class SHRA_R_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shra_r.w", 0b1011110101>;
+class SHRL_PH_MMR2_ENC : POOL32A_2RSA4OP6_FMT<"shrl.ph", 0b001111>;
+class SHRL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shrl.qb", 0b1100001>;
+class SHRLV_PH_MMR2_ENC : POOL32A_3RB0_FMT<"shrlv.ph", 0b1100010101>;
+class SHRLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shrlv.qb", 0b1101010101>;
+class PRECEQ_W_PHL_MM_ENC : POOL32A_2R_FMT<"preceq.w.phl", 0b0101000100>;
+class PRECEQ_W_PHR_MM_ENC : POOL32A_2R_FMT<"preceq.w.phr", 0b0110000100>;
+class PRECEQU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbl", 0b0111000100>;
+class PRECEQU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbla", 0b0111001100>;
+class PRECEQU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbr", 0b1001000100>;
+class PRECEQU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbra", 0b1001001100>;
+class PRECEU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbl", 0b1011000100>;
+class PRECEU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbla", 0b1011001100>;
+class PRECEU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbr", 0b1101000100>;
+class PRECEU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbra", 0b1101001100>;
+class SUBQ_PH_MM_ENC : POOL32A_3R_FMT<"subq.ph", 0b01000001101>;
+class SUBQ_S_PH_MM_ENC : POOL32A_3R_FMT<"subq_s.ph", 0b11000001101>;
+class SUBQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"subq_s.w", 0b1101000101>;
+class SUBQH_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh.ph", 0b01001001101>;
+class SUBQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.ph", 0b11001001101>;
+class SUBQH_W_MMR2_ENC : POOL32A_3R_FMT<"subqh.w", 0b01010001101>;
+class SUBQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.w", 0b11010001101>;
+class SUBU_PH_MMR2_ENC : POOL32A_3R_FMT<"subu.ph", 0b01100001101>;
+class SUBU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"subu_s.ph", 0b11100001101>;
+class SUBU_QB_MM_ENC : POOL32A_3R_FMT<"subu.qb", 0b01011001101>;
+class SUBU_S_QB_MM_ENC : POOL32A_3R_FMT<"subu_s.qb", 0b11011001101>;
+class SUBUH_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh.qb", 0b01101001101>;
+class SUBUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh_r.qb", 0b11101001101>;
+class EXTP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extp", 0b10011001>;
+class EXTPDP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extpdp", 0b11011001>;
+class EXTPDPV_MM_ENC : POOL32A_2RAC_FMT<"extpdpv", 0b11100010>;
+class EXTPV_MM_ENC : POOL32A_2RAC_FMT<"extpv", 0b10100010>;
+class EXTR_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr.w", 0b00111001>;
+class EXTR_R_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_r.w", 0b01111001>;
+class EXTR_RS_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_rs.w", 0b10111001>;
+class EXTR_S_H_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_s.h", 0b11111001>;
+class EXTRV_W_MM_ENC : POOL32A_2RAC_FMT<"extrv.w", 0b00111010>;
+class EXTRV_R_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_r.w", 0b01111010>;
+class EXTRV_RS_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_rs.w", 0b10111010>;
+class EXTRV_S_H_MM_ENC : POOL32A_2RAC_FMT<"extrv_s.h", 0b11111010>;
+class DPS_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dps.w.ph", 0b00010010>;
+class DPSQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpsq_s.w.ph", 0b00011010>;
+class DPSQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpsq_sa.l.w", 0b01011010>;
+class DPSQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_s.w.ph", 0b10011010>;
+class DPSQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_sa.w.ph", 0b11011010>;
+class DPSU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbl", 0b10010010>;
+class DPSU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbr", 0b11010010>;
+class DPSX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsx.w.ph", 0b01010010>;
+class MUL_PH_MMR2_ENC : POOL32A_3R_FMT<"mul.ph", 0b00000101101>;
+class MUL_S_PH_MMR2_ENC : POOL32A_3R_FMT<"mul_s.ph", 0b10000101101>;
+class MULEQ_S_W_PHL_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phl", 0b0000100101>;
+class MULEQ_S_W_PHR_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phr", 0b0001100101>;
+class MULEU_S_PH_QBL_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbl", 0b0010010101>;
+class MULEU_S_PH_QBR_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbr", 0b0011010101>;
+class MULQ_RS_PH_MM_ENC : POOL32A_3RB0_FMT<"mulq_rs.ph", 0b0100010101>;
+class MULQ_RS_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_rs.w", 0b0110010101>;
+class MULQ_S_PH_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.ph", 0b0101010101>;
+class MULQ_S_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.w", 0b0111010101>;
+class PRECR_QB_PH_MMR2_ENC : POOL32A_3RB0_FMT<"precr.qb.ph", 0b0001101101>;
+class PRECR_SRA_PH_W_MMR2_ENC
+ : POOL32A_2RSA5_FMT<"precr_sra.ph.w", 0b01111001101>;
+class PRECR_SRA_R_PH_W_MMR2_ENC
+ : POOL32A_2RSA5_FMT<"precr_sra_r.ph.w", 0b11111001101>;
+class PRECRQ_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq.ph.w", 0b0011101101>;
+class PRECRQ_QB_PH_MM_ENC : POOL32A_3RB0_FMT<"precrq.qb.ph", 0b0010101101>;
+class PRECRQU_S_QB_PH_MM_ENC
+ : POOL32A_3RB0_FMT<"precrqu_s.qb.ph", 0b0101101101>;
+class PRECRQ_RS_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq_rs.ph.w", 0b0100101101>;
+class LBUX_MM_ENC : POOL32A_1RMEMB0_FMT<"lbux", 0b1000100101>;
+class LHX_MM_ENC : POOL32A_1RMEMB0_FMT<"lhx", 0b0101100101>;
+class LWX_MM_ENC : POOL32A_1RMEMB0_FMT<"lwx", 0b0110100101>;
+class MAQ_S_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phl", 0b01101001>;
+class MAQ_SA_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phl", 0b11101001>;
+class MAQ_S_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phr", 0b00101001>;
+class MAQ_SA_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phr", 0b10101001>;
+class MFHI_MM_ENC : POOL32A_1RAC_FMT<"mfhi", 0b00000001>;
+class MFLO_MM_ENC : POOL32A_1RAC_FMT<"mflo", 0b01000001>;
+class MTHI_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b10000001>;
+class MTLO_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b11000001>;
+class PREPEND_MMR2_ENC : POOL32A_2RSA5B0_FMT<"prepend", 0b1001010101>;
+class RADDU_W_QB_MM_ENC : POOL32A_2R_FMT<"raddu.w.qb", 0b1111000100>;
+class RDDSP_MM_ENC : POOL32A_1RMASK7_FMT<"rddsp", 0b00011001>;
+class REPL_PH_MM_ENC : POOL32A_1RIMM10_FMT<"repl.ph", 0b0000111101>;
+class REPL_QB_MM_ENC : POOL32A_1RIMM8_FMT<"repl.qb", 0b010111>;
+class REPLV_PH_MM_ENC : POOL32A_2R_FMT<"replv.ph", 0b0000001100>;
+class REPLV_QB_MM_ENC : POOL32A_2R_FMT<"replv.qb", 0b0001001100>;
+class MTHLIP_MM_ENC : POOL32A_1RAC_FMT<"mthlip", 0b00001001>;
+class PACKRL_PH_MM_ENC : POOL32A_3RB0_FMT<"packrl.ph", 0b0110101101>;
+class PICK_PH_MM_ENC : POOL32A_3RB0_FMT<"pick.ph", 0b1000101101>;
+class PICK_QB_MM_ENC : POOL32A_3RB0_FMT<"pick.qb", 0b0111101101>;
+class SHILO_MM_ENC : POOL32A_4B0SHIFT6AC4B0_FMT<"shilo", 0b0000011101>;
+class SHILOV_MM_ENC : POOL32A_5B01RAC_FMT<"shilov", 0b01001001>;
+class WRDSP_MM_ENC : POOL32A_1RMASK7_FMT<"wrdsp", 0b01011001>;
+
+// Instruction desc.
+class ABSQ_S_PH_MM_R2_DESC_BASE<string opstr, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS = ROD> {
+ dag OutOperandList = (outs ROD:$rt);
+ dag InOperandList = (ins ROS:$rs);
+ string AsmString = !strconcat(opstr, "\t$rt, $rs");
+ list<dag> Pattern = [(set ROD:$rt, (OpNode ROS:$rs))];
+ InstrItinClass Itinerary = itin;
+}
+class ABSQ_S_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "absq_s.ph", int_mips_absq_s_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>;
+class ABSQ_S_W_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "absq_s.w", int_mips_absq_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag20]>;
+class ABSQ_S_QB_MMR2_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "absq_s.qb", int_mips_absq_s_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>;
+class PRECEQ_W_PHL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceq.w.phl", int_mips_preceq_w_phl, NoItinerary, GPR32Opnd, DSPROpnd>;
+class PRECEQ_W_PHR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceq.w.phr", int_mips_preceq_w_phr, NoItinerary, GPR32Opnd, DSPROpnd>;
+class PRECEQU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbl", int_mips_precequ_ph_qbl, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbla", int_mips_precequ_ph_qbla, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbr", int_mips_precequ_ph_qbr, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbra", int_mips_precequ_ph_qbra, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbl", int_mips_preceu_ph_qbl, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbla", int_mips_preceu_ph_qbla, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbr", int_mips_preceu_ph_qbr, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbra", int_mips_preceu_ph_qbra, NoItinerary, DSPROpnd>;
+
+class SHLL_R2_MM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmPat, InstrItinClass itin,
+ RegisterOperand RO, Operand ImmOpnd> {
+ dag OutOperandList = (outs RO:$rt);
+ dag InOperandList = (ins RO:$rs, ImmOpnd:$sa);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set RO:$rt, (OpNode RO:$rs, ImmPat:$sa))];
+ InstrItinClass Itinerary = itin;
+ bit hasSideEffects = 1;
+}
+class SHLL_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>,
+ Defs<[DSPOutFlag22]>;
+class SHLL_S_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll_s.ph", int_mips_shll_s_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>,
+ Defs<[DSPOutFlag22]>;
+class SHLL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>,
+ Defs<[DSPOutFlag22]>;
+class SHLL_S_W_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll_s.w", int_mips_shll_s_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>,
+ Defs<[DSPOutFlag22]>;
+class SHRA_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRA_R_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra_r.qb", int_mips_shra_r_qb, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRA_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+class SHRA_R_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra_r.ph", int_mips_shra_r_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+class SHRA_R_W_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra_r.w", int_mips_shra_r_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>;
+class SHRL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shrl.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRL_PH_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+ "shrl.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+
+class SHLLV_R3_MM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
+ dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs");
+ list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))];
+ InstrItinClass Itinerary = itin;
+}
+class SHLLV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv.ph", int_mips_shll_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>;
+class SHLLV_S_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv_s.ph", int_mips_shll_s_ph, NoItinerary, DSPROpnd>,
+ Defs<[DSPOutFlag22]>;
+class SHLLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv.qb", int_mips_shll_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>;
+class SHLLV_S_W_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv_s.w", int_mips_shll_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>;
+class SHRAV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav.ph", int_mips_shra_ph, NoItinerary, DSPROpnd>;
+class SHRAV_R_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav_r.ph", int_mips_shra_r_ph, NoItinerary, DSPROpnd>;
+class SHRAV_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav.qb", int_mips_shra_qb, NoItinerary, DSPROpnd>;
+class SHRAV_R_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPROpnd>;
+class SHRAV_R_W_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav_r.w", int_mips_shra_r_w, NoItinerary, GPR32Opnd>;
+class SHRLV_PH_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>;
+class SHRLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>;
+
+class EXT_MM_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $rs");
+ InstrItinClass Itinerary = itin;
+}
+class EXT_MM_1R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $imm");
+ InstrItinClass Itinerary = itin;
+}
+
+class EXTP_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPEFI]>;
+class EXTPDP_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
+class EXTPDPV_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
+class EXTPV_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPEFI]>;
+class EXTR_W_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTR_R_W_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTR_RS_W_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTR_S_H_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_W_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_R_W_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_RS_W_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_S_H_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+
+class MFHI_MM_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rs);
+ dag InOperandList = (ins RO:$ac);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+ list<dag> Pattern = [(set GPR32Opnd:$rs, (OpNode RO:$ac))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MFHI_MM_DESC : MFHI_MM_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI,
+ NoItinerary>;
+class MFLO_MM_DESC : MFHI_MM_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO,
+ NoItinerary>;
+
+class RADDU_W_QB_MM_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins DSPROpnd:$rs);
+ string AsmString = !strconcat("raddu.w.qb", "\t$rt, $rs");
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_raddu_w_qb DSPROpnd:$rs))];
+ InstrItinClass Itinerary = NoItinerary;
+ string BaseOpcode = "raddu.w.qb";
+}
+
+class RDDSP_MM_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins uimm16:$mask);
+ string AsmString = !strconcat("rddsp", "\t$rt, $mask");
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt10:$mask))];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class REPL_QB_MM_DESC {
+ dag OutOperandList = (outs DSPROpnd:$rt);
+ dag InOperandList = (ins uimm16:$imm);
+ string AsmString = !strconcat("repl.qb", "\t$rt, $imm");
+ list<dag> Pattern = [(set DSPROpnd:$rt, (int_mips_repl_qb immZExt8:$imm))];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class REPLV_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
+class REPLV_QB_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
+
+class WRDSP_MM_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
+ string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
+ list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+// Instruction defs.
+// microMIPS DSP Rev 1
+def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH_MM : DspMMRel, ADDQ_S_PH_MM_ENC, ADDQ_S_PH_DESC;
+def ADDQ_S_W_MM : DspMMRel, ADDQ_S_W_MM_ENC, ADDQ_S_W_DESC;
+def ADDU_QB_MM : DspMMRel, ADDU_QB_MM_ENC, ADDU_QB_DESC;
+def ADDU_S_QB_MM : DspMMRel, ADDU_S_QB_MM_ENC, ADDU_S_QB_DESC;
+def ADDSC_MM : DspMMRel, ADDSC_MM_ENC, ADDSC_DESC;
+def ADDWC_MM : DspMMRel, ADDWC_MM_ENC, ADDWC_DESC;
+def DPAQ_S_W_PH_MM : DspMMRel, DPAQ_S_W_PH_MM_ENC, DPAQ_S_W_PH_DESC;
+def DPAQ_SA_L_W_MM : DspMMRel, DPAQ_SA_L_W_MM_ENC, DPAQ_SA_L_W_DESC;
+def DPAU_H_QBL_MM : DspMMRel, DPAU_H_QBL_MM_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR_MM : DspMMRel, DPAU_H_QBR_MM_ENC, DPAU_H_QBR_DESC;
+def ABSQ_S_PH_MM : DspMMRel, ABSQ_S_PH_MM_ENC, ABSQ_S_PH_MM_DESC;
+def ABSQ_S_W_MM : DspMMRel, ABSQ_S_W_MM_ENC, ABSQ_S_W_MM_DESC;
+def INSV_MM : DspMMRel, INSV_MM_ENC, INSV_DESC;
+def MADD_DSP_MM : DspMMRel, MADD_DSP_MM_ENC, MADD_DSP_DESC;
+def MADDU_DSP_MM : DspMMRel, MADDU_DSP_MM_ENC, MADDU_DSP_DESC;
+def MSUB_DSP_MM : DspMMRel, MSUB_DSP_MM_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP_MM : DspMMRel, MSUBU_DSP_MM_ENC, MSUBU_DSP_DESC;
+def MULT_DSP_MM : DspMMRel, MULT_DSP_MM_ENC, MULT_DSP_DESC;
+def MULTU_DSP_MM : DspMMRel, MULTU_DSP_MM_ENC, MULTU_DSP_DESC;
+def SHLL_PH_MM : DspMMRel, SHLL_PH_MM_ENC, SHLL_PH_MM_DESC;
+def SHLL_S_PH_MM : DspMMRel, SHLL_S_PH_MM_ENC, SHLL_S_PH_MM_DESC;
+def SHLL_QB_MM : DspMMRel, SHLL_QB_MM_ENC, SHLL_QB_MM_DESC;
+def SHLLV_PH_MM : DspMMRel, SHLLV_PH_MM_ENC, SHLLV_PH_MM_DESC;
+def SHLLV_S_PH_MM : DspMMRel, SHLLV_S_PH_MM_ENC, SHLLV_S_PH_MM_DESC;
+def SHLLV_QB_MM : DspMMRel, SHLLV_QB_MM_ENC, SHLLV_QB_MM_DESC;
+def SHLLV_S_W_MM : DspMMRel, SHLLV_S_W_MM_ENC, SHLLV_S_W_MM_DESC;
+def SHLL_S_W_MM : DspMMRel, SHLL_S_W_MM_ENC, SHLL_S_W_MM_DESC;
+def SHRA_PH_MM : DspMMRel, SHRA_PH_MM_ENC, SHRA_PH_MM_DESC;
+def SHRA_R_PH_MM : DspMMRel, SHRA_R_PH_MM_ENC, SHRA_R_PH_MM_DESC;
+def SHRAV_PH_MM : DspMMRel, SHRAV_PH_MM_ENC, SHRAV_PH_MM_DESC;
+def SHRAV_R_PH_MM : DspMMRel, SHRAV_R_PH_MM_ENC, SHRAV_R_PH_MM_DESC;
+def SHRAV_R_W_MM : DspMMRel, SHRAV_R_W_MM_ENC, SHRAV_R_W_MM_DESC;
+def SHRA_R_W_MM : DspMMRel, SHRA_R_W_MM_ENC, SHRA_R_W_MM_DESC;
+def SHRL_QB_MM : DspMMRel, SHRL_QB_MM_ENC, SHRL_QB_MM_DESC;
+def SHRLV_QB_MM : DspMMRel, SHRLV_QB_MM_ENC, SHRLV_QB_MM_DESC;
+def PRECEQ_W_PHL_MM : DspMMRel, PRECEQ_W_PHL_MM_ENC, PRECEQ_W_PHL_MM_DESC;
+def PRECEQ_W_PHR_MM : DspMMRel, PRECEQ_W_PHR_MM_ENC, PRECEQ_W_PHR_MM_DESC;
+def PRECEQU_PH_QBL_MM : DspMMRel, PRECEQU_PH_QBL_MM_ENC, PRECEQU_PH_QBL_MM_DESC;
+def PRECEQU_PH_QBLA_MM : DspMMRel, PRECEQU_PH_QBLA_MM_ENC,
+ PRECEQU_PH_QBLA_MM_DESC;
+def PRECEQU_PH_QBR_MM : DspMMRel, PRECEQU_PH_QBR_MM_ENC, PRECEQU_PH_QBR_MM_DESC;
+def PRECEQU_PH_QBRA_MM : DspMMRel, PRECEQU_PH_QBRA_MM_ENC,
+ PRECEQU_PH_QBRA_MM_DESC;
+def PRECEU_PH_QBL_MM : DspMMRel, PRECEU_PH_QBL_MM_ENC, PRECEU_PH_QBL_MM_DESC;
+def PRECEU_PH_QBLA_MM : DspMMRel, PRECEU_PH_QBLA_MM_ENC, PRECEU_PH_QBLA_MM_DESC;
+def PRECEU_PH_QBR_MM : DspMMRel, PRECEU_PH_QBR_MM_ENC, PRECEU_PH_QBR_MM_DESC;
+def PRECEU_PH_QBRA_MM : DspMMRel, PRECEU_PH_QBRA_MM_ENC, PRECEU_PH_QBRA_MM_DESC;
+def SUBQ_PH_MM : DspMMRel, SUBQ_PH_MM_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH_MM : DspMMRel, SUBQ_S_PH_MM_ENC, SUBQ_S_PH_DESC;
+def SUBQ_S_W_MM : DspMMRel, SUBQ_S_W_MM_ENC, SUBQ_S_W_DESC;
+def SUBU_QB_MM : DspMMRel, SUBU_QB_MM_ENC, SUBU_QB_DESC;
+def SUBU_S_QB_MM : DspMMRel, SUBU_S_QB_MM_ENC, SUBU_S_QB_DESC;
+def EXTP_MM : DspMMRel, EXTP_MM_ENC, EXTP_MM_DESC;
+def EXTPDP_MM : DspMMRel, EXTPDP_MM_ENC, EXTPDP_MM_DESC;
+def EXTPDPV_MM : DspMMRel, EXTPDPV_MM_ENC, EXTPDPV_MM_DESC;
+def EXTPV_MM : DspMMRel, EXTPV_MM_ENC, EXTPV_MM_DESC;
+def EXTR_W_MM : DspMMRel, EXTR_W_MM_ENC, EXTR_W_MM_DESC;
+def EXTR_R_W_MM : DspMMRel, EXTR_R_W_MM_ENC, EXTR_R_W_MM_DESC;
+def EXTR_RS_W_MM : DspMMRel, EXTR_RS_W_MM_ENC, EXTR_RS_W_MM_DESC;
+def EXTR_S_H_MM : DspMMRel, EXTR_S_H_MM_ENC, EXTR_S_H_MM_DESC;
+def EXTRV_W_MM : DspMMRel, EXTRV_W_MM_ENC, EXTRV_W_MM_DESC;
+def EXTRV_R_W_MM : DspMMRel, EXTRV_R_W_MM_ENC, EXTRV_R_W_MM_DESC;
+def EXTRV_RS_W_MM : DspMMRel, EXTRV_RS_W_MM_ENC, EXTRV_RS_W_MM_DESC;
+def EXTRV_S_H_MM : DspMMRel, EXTRV_S_H_MM_ENC, EXTRV_S_H_MM_DESC;
+def DPSQ_S_W_PH_MM : DspMMRel, DPSQ_S_W_PH_MM_ENC, DPSQ_S_W_PH_DESC;
+def DPSQ_SA_L_W_MM : DspMMRel, DPSQ_SA_L_W_MM_ENC, DPSQ_SA_L_W_DESC;
+def DPSU_H_QBL_MM : DspMMRel, DPSU_H_QBL_MM_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR_MM : DspMMRel, DPSU_H_QBR_MM_ENC, DPSU_H_QBR_DESC;
+def MULEQ_S_W_PHL_MM : DspMMRel, MULEQ_S_W_PHL_MM_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR_MM : DspMMRel, MULEQ_S_W_PHR_MM_ENC, MULEQ_S_W_PHR_DESC;
+def MULEU_S_PH_QBL_MM : DspMMRel, MULEU_S_PH_QBL_MM_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR_MM : DspMMRel, MULEU_S_PH_QBR_MM_ENC, MULEU_S_PH_QBR_DESC;
+def MULQ_RS_PH_MM : DspMMRel, MULQ_RS_PH_MM_ENC, MULQ_RS_PH_DESC;
+def PRECRQ_PH_W_MM : DspMMRel, PRECRQ_PH_W_MM_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_QB_PH_MM : DspMMRel, PRECRQ_QB_PH_MM_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQU_S_QB_PH_MM : DspMMRel, PRECRQU_S_QB_PH_MM_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECRQ_RS_PH_W_MM : DspMMRel, PRECRQ_RS_PH_W_MM_ENC, PRECRQ_RS_PH_W_DESC;
+def LBUX_MM : DspMMRel, LBUX_MM_ENC, LBUX_DESC;
+def LHX_MM : DspMMRel, LHX_MM_ENC, LHX_DESC;
+def LWX_MM : DspMMRel, LWX_MM_ENC, LWX_DESC;
+def MAQ_S_W_PHL_MM : DspMMRel, MAQ_S_W_PHL_MM_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_SA_W_PHL_MM : DspMMRel, MAQ_SA_W_PHL_MM_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_S_W_PHR_MM : DspMMRel, MAQ_S_W_PHR_MM_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHR_MM : DspMMRel, MAQ_SA_W_PHR_MM_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP_MM : DspMMRel, MFHI_MM_ENC, MFHI_MM_DESC;
+def MFLO_DSP_MM : DspMMRel, MFLO_MM_ENC, MFLO_MM_DESC;
+def MTHI_DSP_MM : DspMMRel, MTHI_MM_ENC, MTHI_DESC;
+def MTLO_DSP_MM : DspMMRel, MTLO_MM_ENC, MTLO_DESC;
+def RADDU_W_QB_MM : DspMMRel, RADDU_W_QB_MM_ENC, RADDU_W_QB_MM_DESC;
+def RDDSP_MM : DspMMRel, RDDSP_MM_ENC, RDDSP_MM_DESC;
+def REPL_PH_MM : DspMMRel, REPL_PH_MM_ENC, REPL_PH_DESC;
+def REPL_QB_MM : DspMMRel, REPL_QB_MM_ENC, REPL_QB_MM_DESC;
+def REPLV_PH_MM : DspMMRel, REPLV_PH_MM_ENC, REPLV_PH_MM_DESC;
+def REPLV_QB_MM : DspMMRel, REPLV_QB_MM_ENC, REPLV_QB_MM_DESC;
+def MTHLIP_MM : DspMMRel, MTHLIP_MM_ENC, MTHLIP_DESC;
+def PACKRL_PH_MM : DspMMRel, PACKRL_PH_MM_ENC, PACKRL_PH_DESC;
+def PICK_PH_MM : DspMMRel, PICK_PH_MM_ENC, PICK_PH_DESC;
+def PICK_QB_MM : DspMMRel, PICK_QB_MM_ENC, PICK_QB_DESC;
+def SHILO_MM : DspMMRel, SHILO_MM_ENC, SHILO_DESC;
+def SHILOV_MM : DspMMRel, SHILOV_MM_ENC, SHILOV_DESC;
+def WRDSP_MM : DspMMRel, WRDSP_MM_ENC, WRDSP_MM_DESC;
+// microMIPS DSP Rev 2
+def ABSQ_S_QB_MMR2 : DspMMRel, ABSQ_S_QB_MMR2_ENC, ABSQ_S_QB_MMR2_DESC,
+ ISA_DSPR2;
+def ADDQH_PH_MMR2 : DspMMRel, ADDQH_PH_MMR2_ENC, ADDQH_PH_DESC, ISA_DSPR2;
+def ADDQH_R_PH_MMR2 : DspMMRel, ADDQH_R_PH_MMR2_ENC, ADDQH_R_PH_DESC, ISA_DSPR2;
+def ADDQH_W_MMR2 : DspMMRel, ADDQH_W_MMR2_ENC, ADDQH_W_DESC, ISA_DSPR2;
+def ADDQH_R_W_MMR2 : DspMMRel, ADDQH_R_W_MMR2_ENC, ADDQH_R_W_DESC, ISA_DSPR2;
+def ADDU_PH_MMR2 : DspMMRel, ADDU_PH_MMR2_ENC, ADDU_PH_DESC, ISA_DSPR2;
+def ADDU_S_PH_MMR2 : DspMMRel, ADDU_S_PH_MMR2_ENC, ADDU_S_PH_DESC, ISA_DSPR2;
+def ADDUH_QB_MMR2 : DspMMRel, ADDUH_QB_MMR2_ENC, ADDUH_QB_DESC, ISA_DSPR2;
+def ADDUH_R_QB_MMR2 : DspMMRel, ADDUH_R_QB_MMR2_ENC, ADDUH_R_QB_DESC, ISA_DSPR2;
+def DPA_W_PH_MMR2 : DspMMRel, DPA_W_PH_MMR2_ENC, DPA_W_PH_DESC, ISA_DSPR2;
+def DPAQX_S_W_PH_MMR2 : DspMMRel, DPAQX_S_W_PH_MMR2_ENC, DPAQX_S_W_PH_DESC,
+ ISA_DSPR2;
+def DPAQX_SA_W_PH_MMR2 : DspMMRel, DPAQX_SA_W_PH_MMR2_ENC, DPAQX_SA_W_PH_DESC,
+ ISA_DSPR2;
+def DPAX_W_PH_MMR2 : DspMMRel, DPAX_W_PH_MMR2_ENC, DPAX_W_PH_DESC, ISA_DSPR2;
+def SHRA_QB_MMR2 : DspMMRel, SHRA_QB_MMR2_ENC, SHRA_QB_MMR2_DESC, ISA_DSPR2;
+def SHRA_R_QB_MMR2 : DspMMRel, SHRA_R_QB_MMR2_ENC, SHRA_R_QB_MMR2_DESC,
+ ISA_DSPR2;
+def SHRAV_QB_MMR2 : DspMMRel, SHRAV_QB_MMR2_ENC, SHRAV_QB_MMR2_DESC, ISA_DSPR2;
+def SHRAV_R_QB_MMR2 : DspMMRel, SHRAV_R_QB_MMR2_ENC, SHRAV_R_QB_MMR2_DESC,
+ ISA_DSPR2;
+def SHRL_PH_MMR2 : DspMMRel, SHRL_PH_MMR2_ENC, SHRL_PH_MMR2_DESC, ISA_DSPR2;
+def SHRLV_PH_MMR2 : DspMMRel, SHRLV_PH_MMR2_ENC, SHRLV_PH_MMR2_DESC, ISA_DSPR2;
+def SUBQH_PH_MMR2 : DspMMRel, SUBQH_PH_MMR2_ENC, SUBQH_PH_DESC, ISA_DSPR2;
+def SUBQH_R_PH_MMR2 : DspMMRel, SUBQH_R_PH_MMR2_ENC, SUBQH_R_PH_DESC, ISA_DSPR2;
+def SUBQH_W_MMR2 : DspMMRel, SUBQH_W_MMR2_ENC, SUBQH_W_DESC, ISA_DSPR2;
+def SUBQH_R_W_MMR2 : DspMMRel, SUBQH_R_W_MMR2_ENC, SUBQH_R_W_DESC, ISA_DSPR2;
+def SUBU_PH_MMR2 : DspMMRel, SUBU_PH_MMR2_ENC, SUBU_PH_DESC, ISA_DSPR2;
+def SUBU_S_PH_MMR2 : DspMMRel, SUBU_S_PH_MMR2_ENC, SUBU_S_PH_DESC, ISA_DSPR2;
+def SUBUH_QB_MMR2 : DspMMRel, SUBUH_QB_MMR2_ENC, SUBUH_QB_DESC, ISA_DSPR2;
+def SUBUH_R_QB_MMR2 : DspMMRel, SUBUH_R_QB_MMR2_ENC, SUBUH_R_QB_DESC, ISA_DSPR2;
+def DPS_W_PH_MMR2 : DspMMRel, DPS_W_PH_MMR2_ENC, DPS_W_PH_DESC, ISA_DSPR2;
+def DPSQX_S_W_PH_MMR2 : DspMMRel, DPSQX_S_W_PH_MMR2_ENC, DPSQX_S_W_PH_DESC,
+ ISA_DSPR2;
+def DPSQX_SA_W_PH_MMR2 : DspMMRel, DPSQX_SA_W_PH_MMR2_ENC, DPSQX_SA_W_PH_DESC,
+ ISA_DSPR2;
+def DPSX_W_PH_MMR2 : DspMMRel, DPSX_W_PH_MMR2_ENC, DPSX_W_PH_DESC, ISA_DSPR2;
+def MUL_PH_MMR2 : DspMMRel, MUL_PH_MMR2_ENC, MUL_PH_DESC, ISA_DSPR2;
+def MUL_S_PH_MMR2 : DspMMRel, MUL_S_PH_MMR2_ENC, MUL_S_PH_DESC, ISA_DSPR2;
+def MULQ_RS_W_MMR2 : DspMMRel, MULQ_RS_W_MMR2_ENC, MULQ_RS_W_DESC, ISA_DSPR2;
+def MULQ_S_PH_MMR2 : DspMMRel, MULQ_S_PH_MMR2_ENC, MULQ_S_PH_DESC, ISA_DSPR2;
+def MULQ_S_W_MMR2 : DspMMRel, MULQ_S_W_MMR2_ENC, MULQ_S_W_DESC, ISA_DSPR2;
+def PRECR_QB_PH_MMR2 : DspMMRel, PRECR_QB_PH_MMR2_ENC, PRECR_QB_PH_DESC,
+ ISA_DSPR2;
+def PRECR_SRA_PH_W_MMR2 : DspMMRel, PRECR_SRA_PH_W_MMR2_ENC,
+ PRECR_SRA_PH_W_DESC, ISA_DSPR2;
+def PRECR_SRA_R_PH_W_MMR2 : DspMMRel, PRECR_SRA_R_PH_W_MMR2_ENC,
+ PRECR_SRA_R_PH_W_DESC, ISA_DSPR2;
+def PREPEND_MMR2 : DspMMRel, PREPEND_MMR2_ENC, PREPEND_DESC, ISA_DSPR2;
+
+// Instruction alias.
+def : MMDSPInstAlias<"wrdsp $rt", (WRDSP_MM GPR32Opnd:$rt, 0x1F), 1>;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 004b0d5..756e6c9 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -37,23 +37,14 @@ def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
CEQS_FM_MM<1>;
-def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>,
+def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>,
+def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>,
BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6;
-
-def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
- ROUND_W_FM_MM<0, 0x6c>;
def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
ROUND_W_FM_MM<0, 0x24>;
-def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
- ROUND_W_FM_MM<0, 0x2c>;
-def ROUND_W_S_MM : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
ROUND_W_FM_MM<0, 0xec>;
-def TRUNC_W_S_MM : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
- ROUND_W_FM_MM<0, 0xac>;
-def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
- fsqrt>, ROUND_W_FM_MM<0, 0x28>;
def CEIL_W_MM : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>,
ROUND_W_FM_MM<1, 0x6c>;
@@ -61,7 +52,7 @@ def CVT_W_MM : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
ROUND_W_FM_MM<1, 0x24>;
def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>,
ROUND_W_FM_MM<1, 0x2c>;
-def ROUND_W_MM : MMRel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>,
+def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>,
ROUND_W_FM_MM<1, 0xec>;
def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>,
ROUND_W_FM_MM<1, 0xac>;
@@ -146,3 +137,14 @@ def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
MADDS_FM_MM<0x2a>;
}
+
+let AdditionalPredicates = [InMicroMips] in {
+ def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd,
+ II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>;
+ def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd,
+ FGR32Opnd, II_TRUNC>, ROUND_W_FM_MM<0, 0xac>;
+ def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+ ROUND_W_FM_MM<0, 0x6c>;
+ def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
+ fsqrt>, ROUND_W_FM_MM<0, 0x28>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 560afa4..b736367 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -389,6 +389,22 @@ class LW_FM_MM<bits<6> op> : MMArch {
let Inst{15-0} = addr{15-0};
}
+class POOL32C_LHUE_FM_MM<bits<6> op, bits<4> fmt, bits<3> funct> : MMArch {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = fmt;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
class LWL_FM_MM<bits<4> funct> {
bits<5> rt;
bits<21> addr;
@@ -402,6 +418,22 @@ class LWL_FM_MM<bits<4> funct> {
let Inst{11-0} = addr{11-0};
}
+class POOL32C_STEVA_LDEVA_FM_MM<bits<4> type, bits<3> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x18;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = type;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
class CMov_F_I_FM_MM<bits<7> func> : MMArch {
bits<5> rd;
bits<5> rs;
@@ -655,6 +687,22 @@ class LL_FM_MM<bits<4> funct> {
let Inst{11-0} = addr{11-0};
}
+class LLE_FM_MM<bits<4> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x18;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = funct;
+ let Inst{11-9} = 0x6;
+ let Inst{8-0} = offset;
+}
+
class ADDS_FM_MM<bits<2> fmt, bits<8> funct> : MMArch {
bits<5> ft;
bits<5> fs;
@@ -895,7 +943,7 @@ class LWM_FM_MM<bits<4> funct> : MMArch {
let Inst{11-0} = addr{11-0};
}
-class LWM_FM_MM16<bits<4> funct> : MMArch {
+class LWM_FM_MM16<bits<4> funct> : MMArch, PredicateControl {
bits<2> rt;
bits<4> addr;
@@ -922,6 +970,37 @@ class CACHE_PREF_FM_MM<bits<6> op, bits<4> funct> : MMArch {
let Inst{11-0} = offset;
}
+class CACHE_PREFE_FM_MM<bits<6> op, bits<3> funct> : MMArch {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = hint;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0xA;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class POOL32F_PREFX_FM_MM<bits<6> op, bits<9> funct> : MMArch {
+ bits<5> index;
+ bits<5> base;
+ bits<5> hint;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = index;
+ let Inst{20-16} = base;
+ let Inst{15-11} = hint;
+ let Inst{10-9} = 0x0;
+ let Inst{8-0} = funct;
+}
+
class BARRIER_FM_MM<bits<5> op> : MMArch {
bits<32> Inst;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 3939384..99f0f44 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -13,11 +13,6 @@ def simm12 : Operand<i32> {
let DecoderMethod = "DecodeSimm12";
}
-def uimm5_lsl2 : Operand<OtherVT> {
- let EncoderMethod = "getUImm5Lsl2Encoding";
- let DecoderMethod = "DecodeUImm5lsl2";
-}
-
def uimm6_lsl2 : Operand<i32> {
let EncoderMethod = "getUImm6Lsl2Encoding";
let DecoderMethod = "DecodeUImm6Lsl2";
@@ -30,6 +25,7 @@ def simm9_addiusp : Operand<i32> {
def uimm3_shift : Operand<i32> {
let EncoderMethod = "getUImm3Mod8Encoding";
+ let DecoderMethod = "DecodePOOL16BEncodedField";
}
def simm3_lsa2 : Operand<i32> {
@@ -105,6 +101,14 @@ def mem_mm_gp_imm7_lsl2 : Operand<i32> {
let EncoderMethod = "getMemEncodingMMGPImm7Lsl2";
}
+def mem_mm_9 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, simm9);
+ let EncoderMethod = "getMemEncodingMMImm9";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
def mem_mm_12 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops GPR32, simm12);
@@ -113,6 +117,14 @@ def mem_mm_12 : Operand<i32> {
let OperandType = "OPERAND_MEMORY";
}
+def mem_mm_16 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, simm16);
+ let EncoderMethod = "getMemEncodingMMImm16";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
def MipsMemUimm4AsmOperand : AsmOperandClass {
let Name = "MemOffsetUimm4";
let SuperClasses = [MipsMemAsmOperand];
@@ -166,7 +178,7 @@ def simm23_lsl2 : Operand<i32> {
class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZC, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 0;
@@ -251,6 +263,13 @@ class LLBaseMM<string opstr, RegisterOperand RO> :
let mayLoad = 1;
}
+class LLEBaseMM<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm9";
+ let mayLoad = 1;
+}
+
class SCBaseMM<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
@@ -259,6 +278,14 @@ class SCBaseMM<string opstr, RegisterOperand RO> :
let Constraints = "$rt = $dst";
}
+class SCEBaseMM<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm9";
+ let mayStore = 1;
+ let Constraints = "$rt = $dst";
+}
+
class LoadMM<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
InstrItinClass Itin = NoItinerary> :
InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
@@ -392,7 +419,7 @@ class LoadImmMM16<string opstr, Operand Od, RegisterOperand RO> :
// 16-bit Jump and Link (Call)
class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [(MipsJmpLink RO:$rs)], IIBranch, FrmR> {
+ [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, PredicateControl {
let isCall = 1;
let hasDelaySlot = 1;
let Defs = [RA];
@@ -401,7 +428,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
// 16-bit Jump Reg
class JumpRegMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [], IIBranch, FrmR> {
+ [], II_JR, FrmR> {
let hasDelaySlot = 1;
let isBranch = 1;
let isIndirectBranch = 1;
@@ -410,7 +437,7 @@ class JumpRegMM16<string opstr, RegisterOperand RO> :
// Base class for JRADDIUSP instruction.
class JumpRAddiuStackMM16 :
MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jraddiusp\t$imm",
- [], IIBranch, FrmR> {
+ [], II_JRADDIUSP, FrmR> {
let isTerminator = 1;
let isBarrier = 1;
let isBranch = 1;
@@ -420,7 +447,7 @@ class JumpRAddiuStackMM16 :
// 16-bit Jump and Link (Call) - Short Delay Slot
class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [], IIBranch, FrmR> {
+ [], II_JALRS, FrmR> {
let isCall = 1;
let hasDelaySlot = 1;
let Defs = [RA];
@@ -429,7 +456,7 @@ class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
// 16-bit Jump Register Compact - No delay slot
class JumpRegCMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [], IIBranch, FrmR> {
+ [], II_JRC, FrmR> {
let isTerminator = 1;
let isBarrier = 1;
let isBranch = 1;
@@ -444,7 +471,7 @@ class BrkSdbbp16MM<string opstr> :
class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZ, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -455,18 +482,18 @@ class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
class JumpLinkMM<string opstr, DAGOperand opnd> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [], IIBranch, FrmJ, opstr> {
+ [], II_JALS, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTargetMM";
}
class JumpLinkRegMM<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], IIBranch, FrmR>;
+ [], II_JALRS, FrmR>;
class BranchCompareToZeroLinkMM<string opstr, DAGOperand opnd,
RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZALS, FrmI, opstr>;
}
class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
@@ -475,6 +502,10 @@ class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
+class PrefetchIndexed<string opstr> :
+ InstSE<(outs), (ins PtrRC:$base, PtrRC:$index, uimm5:$hint),
+ !strconcat(opstr, "\t$hint, ${index}(${base})"), [], NoItinerary, FrmOther>;
+
class AddImmUPC<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm),
!strconcat(opstr, "\t$rs, $imm"), [], NoItinerary, FrmR>;
@@ -543,7 +574,7 @@ class LoadMultMM16<string opstr,
class UncondBranchMM16<string opstr> :
MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
!strconcat(opstr, "\t$offset"),
- [], IIBranch, FrmI> {
+ [], II_B, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let isBarrier = 1;
@@ -553,21 +584,24 @@ class UncondBranchMM16<string opstr> :
}
def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
- ARITH_FM_MM16<0>;
-def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
- ARITH_FM_MM16<1>;
-def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>;
+ ARITH_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6;
def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
- LOGIC_FM_MM16<0x2>;
-def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
- LOGIC_FM_MM16<0x3>;
-def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
- LOGIC_FM_MM16<0x1>;
-def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>;
+ LOGIC_FM_MM16<0x2>, ISA_MICROMIPS_NOT_32R6_64R6;
+def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, LOGIC_FM_MM16<0x3>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
- SHIFT_FM_MM16<0>;
+ SHIFT_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6;
def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
- SHIFT_FM_MM16<1>;
+ SHIFT_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6;
+
+def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+ ARITH_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6;
+def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+ LOGIC_FM_MM16<0x1>, ISA_MICROMIPS_NOT_32R6_64R6;
def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU,
mem_mm_4>, LOAD_STORE_FM_MM16<0x02>;
def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU,
@@ -597,7 +631,8 @@ def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16;
def LI16_MM : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, LI_FM_MM16,
IsAsCheapAsAMove;
-def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>;
+def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
def JALRS16_MM : JumpLinkRegSMM16<"jalrs16", GPR32Opnd>, JALR_FM_MM16<0x0f>;
def JRC16_MM : JumpRegCMM16<"jrc", GPR32Opnd>, JALR_FM_MM16<0x0d>;
def JRADDIUSP : JumpRAddiuStackMM16, JRADDIUSP_FM_MM16<0x18>;
@@ -607,8 +642,18 @@ def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>,
def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>,
BEQNEZ_FM_MM16<0x2b>;
def B16_MM : UncondBranchMM16<"b16">, B16_FM;
-def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>;
-def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>;
+def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+
+let DecoderNamespace = "MicroMips" in {
+ /// Load and Store Instructions - multiple
+ def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+}
class WaitMM<string opstr> :
InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
@@ -701,6 +746,18 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
}
+ let DecoderMethod = "DecodeMemMMImm9" in {
+ def LBE_MM : Load<"lbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>;
+ def LBuE_MM : Load<"lbue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>;
+ def LHE_MM : Load<"lhe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>;
+ def LHuE_MM : Load<"lhue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>;
+ def LWE_MM : Load<"lwe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x7>;
+ def SBE_MM : Store<"sbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x4>;
+ def SHE_MM : Store<"she", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x5>;
+ def SWE_MM : StoreMemory<"swe", GPR32Opnd, mem_simm9gpr>,
+ POOL32C_LHUE_FM_MM<0x18, 0xa, 0x7>;
+ }
+
def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>;
def LWU_MM : LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU>, LL_FM_MM<0xe>;
@@ -714,12 +771,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
LWL_FM_MM<0x8>;
def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
LWL_FM_MM<0x9>;
+ let DecoderMethod = "DecodeMemMMImm9" in {
+ def LWLE_MM : LoadLeftRightMM<"lwle", MipsLWL, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x2>;
+ def LWRE_MM : LoadLeftRightMM<"lwre", MipsLWR, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x3>;
+ def SWLE_MM : StoreLeftRightMM<"swle", MipsSWL, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x0>;
+ def SWRE_MM : StoreLeftRightMM<"swre", MipsSWR, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x1>, ISA_MIPS1_NOT_32R6_64R6;
+ }
/// Load and Store Instructions - multiple
def SWM32_MM : StoreMultMM<"swm32">, LWM_FM_MM<0xd>;
def LWM32_MM : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>;
- def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>;
- def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>;
/// Load and Store Pair Instructions
def SWP_MM : StorePairMM<"swp">, LWM_FM_MM<0x9>;
@@ -777,11 +842,11 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
SEB_FM_MM<0x0ec>, ISA_MIPS32R2;
/// Word Swap Bytes Within Halfwords
- def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>,
- ISA_MIPS32R2;
-
- def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>,
- EXT_FM_MM<0x2c>;
+ def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>,
+ SEB_FM_MM<0x1ec>, ISA_MIPS32R2;
+ // TODO: Add '0 < pos+size <= 32' constraint check to ext instruction
+ def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1,
+ MipsExt>, EXT_FM_MM<0x2c>;
def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>,
EXT_FM_MM<0x0c>;
@@ -854,12 +919,22 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>;
def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>;
+ def LLE_MM : LLEBaseMM<"lle", GPR32Opnd>, LLE_FM_MM<0x6>;
+ def SCE_MM : SCEBaseMM<"sce", GPR32Opnd>, LLE_FM_MM<0xA>;
+
let DecoderMethod = "DecodeCacheOpMM" in {
def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12>,
CACHE_PREF_FM_MM<0x08, 0x6>;
def PREF_MM : MMRel, CacheOp<"pref", mem_mm_12>,
CACHE_PREF_FM_MM<0x18, 0x2>;
}
+
+ let DecoderMethod = "DecodePrefeOpMM" in {
+ def PREFE_MM : MMRel, CacheOp<"prefe", mem_mm_9>,
+ CACHE_PREFE_FM_MM<0x18, 0x2>;
+ def CACHEE_MM : MMRel, CacheOp<"cachee", mem_mm_9>,
+ CACHE_PREFE_FM_MM<0x18, 0x3>;
+ }
def SSNOP_MM : MMRel, Barrier<"ssnop">, BARRIER_FM_MM<0x1>;
def EHB_MM : MMRel, Barrier<"ehb">, BARRIER_FM_MM<0x3>;
def PAUSE_MM : MMRel, Barrier<"pause">, BARRIER_FM_MM<0x5>;
@@ -870,7 +945,13 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>;
def SDBBP_MM : MMRel, SYS_FT<"sdbbp">, SDBBP_FM_MM;
- def RDHWR_MM : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM;
+
+ def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>;
+}
+
+let DecoderNamespace = "MicroMips" in {
+ def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
+ RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
}
let Predicates = [InMicroMips] in {
@@ -928,7 +1009,7 @@ class UncondBranchMMPseudo<string opstr> :
MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
!strconcat(opstr, "\t$offset")>;
- def B_MM_Pseudo : UncondBranchMMPseudo<"b">;
+def B_MM_Pseudo : UncondBranchMMPseudo<"b">, ISA_MICROMIPS;
def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
@@ -937,4 +1018,17 @@ class UncondBranchMMPseudo<string opstr> :
let Predicates = [InMicroMips] in {
def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"di", (DI_MM ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"teq $rs, $rt",
+ (TEQ_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tge $rs, $rt",
+ (TGE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tgeu $rs, $rt",
+ (TGEU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tlt $rs, $rt",
+ (TLT_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tltu $rs, $rt",
+ (TLTU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tne $rs, $rt",
+ (TNE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
index dbb5f7b..35352b6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -154,9 +154,14 @@ def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
"Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureDSPR3
+ : SubtargetFeature<"dspr3", "HasDSPR3", "true", "Mips DSP-R3 ASE",
+ [ FeatureDSP, FeatureDSPR2 ]>;
def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
+def FeatureEVA : SubtargetFeature<"eva", "HasEVA", "true", "Mips EVA ASE">;
+
def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
"microMips mode">;
@@ -164,10 +169,19 @@ def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips",
"true", "Octeon cnMIPS Support",
[FeatureMips64r2]>;
+def FeatureUseTCCInDIV : SubtargetFeature<
+ "use-tcc-in-div",
+ "UseTCCInDIV", "false",
+ "Force the assembler to use trapping">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
+def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl",
+ "MipsSubtarget::CPU::P5600",
+ "The P5600 Processor", [FeatureMips32r5]>;
+
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
@@ -187,12 +201,11 @@ def : Proc<"mips64r2", [FeatureMips64r2]>;
def : Proc<"mips64r3", [FeatureMips64r3]>;
def : Proc<"mips64r5", [FeatureMips64r5]>;
def : Proc<"mips64r6", [FeatureMips64r6]>;
-def : Proc<"mips16", [FeatureMips16]>;
def : Proc<"octeon", [FeatureMips64r2, FeatureCnMips]>;
+def : ProcessorModel<"p5600", MipsP5600Model, [ImplP5600]>;
def MipsAsmParser : AsmParser {
let ShouldEmitMatchRegisterName = 0;
- let MnemonicContainsDot = 1;
}
def MipsAsmParserVariant : AsmParserVariant {
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index 46cc99c..26426c0 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -39,7 +39,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
const Mips16InstrInfo &TII =
*static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
uint64_t StackSize = MFI->getStackSize();
// No need to allocate space on the stack.
@@ -107,7 +111,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *EntryBlock = MF->begin();
+ MachineBasicBlock *EntryBlock = &MF->front();
//
// Registers RA, S0,S1 are the callee saved registers and they
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 893fc7c..b2bc7e7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -40,26 +40,17 @@ namespace {
const MipsTargetMachine &TM;
};
- class InlineAsmHelper {
- LLVMContext &C;
- BasicBlock *BB;
- public:
- InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
- C(C_), BB(BB_) {
- }
-
- void Out(StringRef AsmString) {
- std::vector<llvm::Type *> AsmArgTypes;
- std::vector<llvm::Value*> AsmArgs;
-
- llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C),
- AsmArgTypes, false);
- llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
- /* IsAlignStack */ false,
- llvm::InlineAsm::AD_ATT);
- CallInst::Create(IA, AsmArgs, "", BB);
- }
- };
+ static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) {
+ std::vector<llvm::Type *> AsmArgTypes;
+ std::vector<llvm::Value *> AsmArgs;
+
+ llvm::FunctionType *AsmFTy =
+ llvm::FunctionType::get(Type::getVoidTy(C), AsmArgTypes, false);
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(AsmFTy, AsmText, "", true,
+ /* IsAlignStack */ false, llvm::InlineAsm::AD_ATT);
+ CallInst::Create(IA, AsmArgs, "", BB);
+ }
char Mips16HardFloat::ID = 0;
}
@@ -182,7 +173,7 @@ static bool needsFPReturnHelper(Function &F) {
return whichFPReturnVariant(RetType) != NoFPRet;
}
-static bool needsFPReturnHelper(const FunctionType &FT) {
+static bool needsFPReturnHelper(FunctionType &FT) {
Type* RetType = FT.getReturnType();
return whichFPReturnVariant(RetType) != NoFPRet;
}
@@ -195,63 +186,72 @@ static bool needsFPHelperFromSig(Function &F) {
// We swap between FP and Integer registers to allow Mips16 and Mips32 to
// interoperate
//
-static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
- bool LE, bool ToFP) {
- //LLVMContext &Context = M->getContext();
- std::string MI = ToFP? "mtc1 ": "mfc1 ";
+static std::string swapFPIntParams(FPParamVariant PV, Module *M, bool LE,
+ bool ToFP) {
+ std::string MI = ToFP ? "mtc1 ": "mfc1 ";
+ std::string AsmText;
+
switch (PV) {
case FSig:
- IAH.Out(MI + "$$4,$$f12");
+ AsmText += MI + "$$4, $$f12\n";
break;
+
case FFSig:
- IAH.Out(MI +"$$4,$$f12");
- IAH.Out(MI + "$$5,$$f14");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f14\n";
break;
+
case FDSig:
- IAH.Out(MI + "$$4,$$f12");
+ AsmText += MI + "$$4, $$f12\n";
if (LE) {
- IAH.Out(MI + "$$6,$$f14");
- IAH.Out(MI + "$$7,$$f15");
+ AsmText += MI + "$$6, $$f14\n";
+ AsmText += MI + "$$7, $$f15\n";
} else {
- IAH.Out(MI + "$$7,$$f14");
- IAH.Out(MI + "$$6,$$f15");
+ AsmText += MI + "$$7, $$f14\n";
+ AsmText += MI + "$$6, $$f15\n";
}
break;
+
case DSig:
if (LE) {
- IAH.Out(MI + "$$4,$$f12");
- IAH.Out(MI + "$$5,$$f13");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f13\n";
} else {
- IAH.Out(MI + "$$5,$$f12");
- IAH.Out(MI + "$$4,$$f13");
+ AsmText += MI + "$$5, $$f12\n";
+ AsmText += MI + "$$4, $$f13\n";
}
break;
+
case DDSig:
if (LE) {
- IAH.Out(MI + "$$4,$$f12");
- IAH.Out(MI + "$$5,$$f13");
- IAH.Out(MI + "$$6,$$f14");
- IAH.Out(MI + "$$7,$$f15");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f13\n";
+ AsmText += MI + "$$6, $$f14\n";
+ AsmText += MI + "$$7, $$f15\n";
} else {
- IAH.Out(MI + "$$5,$$f12");
- IAH.Out(MI + "$$4,$$f13");
- IAH.Out(MI + "$$7,$$f14");
- IAH.Out(MI + "$$6,$$f15");
+ AsmText += MI + "$$5, $$f12\n";
+ AsmText += MI + "$$4, $$f13\n";
+ AsmText += MI + "$$7, $$f14\n";
+ AsmText += MI + "$$6, $$f15\n";
}
break;
+
case DFSig:
if (LE) {
- IAH.Out(MI + "$$4,$$f12");
- IAH.Out(MI + "$$5,$$f13");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f13\n";
} else {
- IAH.Out(MI + "$$5,$$f12");
- IAH.Out(MI + "$$4,$$f13");
+ AsmText += MI + "$$5, $$f12\n";
+ AsmText += MI + "$$4, $$f13\n";
}
- IAH.Out(MI + "$$6,$$f14");
+ AsmText += MI + "$$6, $$f14\n";
break;
+
case NoSig:
- return;
+ break;
}
+
+ return AsmText;
}
//
@@ -282,68 +282,77 @@ static void assureFPCallStub(Function &F, Module *M,
FStub->addFnAttr("nomips16");
FStub->setSection(SectionName);
BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
- InlineAsmHelper IAH(Context, BB);
- IAH.Out(".set reorder");
FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType());
FPParamVariant PV = whichFPParamVariantNeeded(F);
- swapFPIntParams(PV, M, IAH, LE, true);
+
+ std::string AsmText;
+ AsmText += ".set reorder\n";
+ AsmText += swapFPIntParams(PV, M, LE, true);
if (RV != NoFPRet) {
- IAH.Out("move $$18, $$31");
- IAH.Out("jal " + Name);
+ AsmText += "move $$18, $$31\n";
+ AsmText += "jal " + Name + "\n";
} else {
- IAH.Out("lui $$25,%hi(" + Name + ")");
- IAH.Out("addiu $$25,$$25,%lo(" + Name + ")" );
+ AsmText += "lui $$25, %hi(" + Name + ")\n";
+ AsmText += "addiu $$25, $$25, %lo(" + Name + ")\n";
}
+
switch (RV) {
case FRet:
- IAH.Out("mfc1 $$2,$$f0");
+ AsmText += "mfc1 $$2, $$f0\n";
break;
+
case DRet:
if (LE) {
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f1");
+ AsmText += "mfc1 $$2, $$f0\n";
+ AsmText += "mfc1 $$3, $$f1\n";
} else {
- IAH.Out("mfc1 $$3,$$f0");
- IAH.Out("mfc1 $$2,$$f1");
+ AsmText += "mfc1 $$3, $$f0\n";
+ AsmText += "mfc1 $$2, $$f1\n";
}
break;
+
case CFRet:
if (LE) {
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f2");
+ AsmText += "mfc1 $$2, $$f0\n";
+ AsmText += "mfc1 $$3, $$f2\n";
} else {
- IAH.Out("mfc1 $$3,$$f0");
- IAH.Out("mfc1 $$3,$$f2");
+ AsmText += "mfc1 $$3, $$f0\n";
+ AsmText += "mfc1 $$3, $$f2\n";
}
break;
+
case CDRet:
if (LE) {
- IAH.Out("mfc1 $$4,$$f2");
- IAH.Out("mfc1 $$5,$$f3");
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f1");
+ AsmText += "mfc1 $$4, $$f2\n";
+ AsmText += "mfc1 $$5, $$f3\n";
+ AsmText += "mfc1 $$2, $$f0\n";
+ AsmText += "mfc1 $$3, $$f1\n";
} else {
- IAH.Out("mfc1 $$5,$$f2");
- IAH.Out("mfc1 $$4,$$f3");
- IAH.Out("mfc1 $$3,$$f0");
- IAH.Out("mfc1 $$2,$$f1");
+ AsmText += "mfc1 $$5, $$f2\n";
+ AsmText += "mfc1 $$4, $$f3\n";
+ AsmText += "mfc1 $$3, $$f0\n";
+ AsmText += "mfc1 $$2, $$f1\n";
}
break;
+
case NoFPRet:
break;
}
+
if (RV != NoFPRet)
- IAH.Out("jr $$18");
+ AsmText += "jr $$18\n";
else
- IAH.Out("jr $$25");
+ AsmText += "jr $$25\n";
+ EmitInlineAsm(Context, BB, AsmText);
+
new UnreachableInst(Context, BB);
}
//
// Functions that are llvm intrinsics and don't need helpers.
//
-static const char *IntrinsicInline[] = {
+static const char *const IntrinsicInline[] = {
"fabs", "fabsf",
"llvm.ceil.f32", "llvm.ceil.f64",
"llvm.copysign.f32", "llvm.copysign.f64",
@@ -395,7 +404,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Type *T = RVal->getType();
FPReturnVariant RV = whichFPReturnVariant(T);
if (RV == NoFPRet) continue;
- static const char* Helper[NoFPRet] = {
+ static const char *const Helper[NoFPRet] = {
"__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
"__mips16_ret_dc"
};
@@ -419,11 +428,11 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
const Value* V = CI->getCalledValue();
- const Type* T = nullptr;
+ Type* T = nullptr;
if (V) T = V->getType();
- const PointerType *PFT=nullptr;
+ PointerType *PFT = nullptr;
if (T) PFT = dyn_cast<PointerType>(T);
- const FunctionType *FT=nullptr;
+ FunctionType *FT = nullptr;
if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
Function *F_ = CI->getCalledFunction();
if (FT && needsFPReturnHelper(*FT) &&
@@ -469,20 +478,21 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
FStub->addFnAttr("nomips16");
FStub->setSection(SectionName);
BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
- InlineAsmHelper IAH(Context, BB);
+
+ std::string AsmText;
if (PicMode) {
- IAH.Out(".set noreorder");
- IAH.Out(".cpload $$25");
- IAH.Out(".set reorder");
- IAH.Out(".reloc 0,R_MIPS_NONE," + Name);
- IAH.Out("la $$25," + LocalName);
- }
- else {
- IAH.Out("la $$25," + Name);
- }
- swapFPIntParams(PV, M, IAH, LE, false);
- IAH.Out("jr $$25");
- IAH.Out(LocalName + " = " + Name);
+ AsmText += ".set noreorder\n";
+ AsmText += ".cpload $$25\n";
+ AsmText += ".set reorder\n";
+ AsmText += ".reloc 0, R_MIPS_NONE, " + Name + "\n";
+ AsmText += "la $$25, " + LocalName + "\n";
+ } else
+ AsmText += "la $$25, " + Name + "\n";
+ AsmText += swapFPIntParams(PV, M, LE, false);
+ AsmText += "jr $$25\n";
+ AsmText += LocalName + " = " + Name + "\n";
+ EmitInlineAsm(Context, BB, AsmText);
+
new UnreachableInst(FStub->getContext(), BB);
}
@@ -535,7 +545,7 @@ bool Mips16HardFloat::runOnModule(Module &M) {
FPParamVariant V = whichFPParamVariantNeeded(*F);
if (V != NoSig) {
Modified = true;
- createFPFnStub(F, &M, V, TM);
+ createFPFnStub(&*F, &M, V, TM);
}
}
return Modified;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index bce2c1e..5a1c2c67 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 3522cbb..e748325 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -530,8 +530,7 @@ emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -592,8 +591,7 @@ Mips16TargetLowering::emitSelT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -657,8 +655,7 @@ Mips16TargetLowering::emitSeliT16(unsigned Opc1, unsigned Opc2,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index a49572e..da8ada4 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -196,7 +196,7 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
const BitVector Reserved = RI.getReservedRegs(MF);
@@ -263,7 +263,7 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Reg1, unsigned Reg2) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
//
// li reg1, constant
// move reg2, sp
@@ -446,7 +446,7 @@ const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
void Mips16InstrInfo::BuildAddiuSpImm
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
index 10fff03..dad6ea4 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -530,19 +530,19 @@ class MayStore {
// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
// To add a constant to a 32-bit integer.
//
-def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIM16Alu>;
-def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIM16Alu>,
ArithLogic16Defs<0> {
let AddedComplexity = 5;
}
-def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIM16Alu>,
ArithLogic16Defs<0> {
let isCodeGenOnly = 1;
}
def AddiuRxRyOffMemX16:
- FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
+ FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIM16Alu>;
//
@@ -550,7 +550,7 @@ def AddiuRxRyOffMemX16:
// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
// To add a constant to the program counter.
//
-def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIM16Alu>;
//
// Format: ADDIU sp, immediate MIPS16e
@@ -558,14 +558,14 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
// To add a constant to the stack pointer.
//
def AddiuSpImm16
- : FI816_SP_ins<0b011, "addiu", IIAlu> {
+ : FI816_SP_ins<0b011, "addiu", IIM16Alu> {
let Defs = [SP];
let Uses = [SP];
let AddedComplexity = 5;
}
def AddiuSpImmX16
- : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+ : FEXT_I816_SP_ins<0b011, "addiu", IIM16Alu> {
let Defs = [SP];
let Uses = [SP];
}
@@ -576,14 +576,14 @@ def AddiuSpImmX16
// To add 32-bit integers.
//
-def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIM16Alu>, ArithLogic16Defs<1>;
//
// Format: AND rx, ry MIPS16e
// Purpose: AND
// To do a bitwise logical AND.
-def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIM16Alu>, ArithLogic16Defs<1>;
//
@@ -591,7 +591,7 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
// Purpose: Branch on Equal to Zero
// To test a GPR then do a PC-relative conditional branch.
//
-def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16;
//
@@ -599,7 +599,7 @@ def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
// Purpose: Branch on Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
-def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16;
//
// Format: B offset MIPS16e
@@ -607,27 +607,27 @@ def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
// To do an unconditional PC-relative branch.
//
-def Bimm16: FI16_ins<0b00010, "b", IIAlu>, branch16;
+def Bimm16: FI16_ins<0b00010, "b", IIM16Alu>, branch16;
// Format: B offset MIPS16e
// Purpose: Unconditional Branch
// To do an unconditional PC-relative branch.
//
-def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
+def BimmX16: FEXT_I16_ins<0b00010, "b", IIM16Alu>, branch16;
//
// Format: BNEZ rx, offset MIPS16e
// Purpose: Branch on Not Equal to Zero
// To test a GPR then do a PC-relative conditional branch.
//
-def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16;
//
// Format: BNEZ rx, offset MIPS16e
// Purpose: Branch on Not Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
-def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16;
//
@@ -641,11 +641,11 @@ def Break16: FRRBreakNull16_ins<"break 0", NoItinerary>;
// Purpose: Branch on T Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
-def Bteqz16: FI816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+def Bteqz16: FI816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 {
let Uses = [T8];
}
-def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 {
let Uses = [T8];
}
@@ -669,11 +669,11 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
// To test special register T then do a PC-relative conditional branch.
//
-def Btnez16: FI816_ins<0b001, "btnez", IIAlu>, cbranch16 {
+def Btnez16: FI816_ins<0b001, "btnez", IIM16Alu>, cbranch16 {
let Uses = [T8];
}
-def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIM16Alu> ,cbranch16 {
let Uses = [T8];
}
@@ -695,7 +695,7 @@ def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
// Purpose: Compare
// To compare the contents of two GPRs.
//
-def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIM16Alu> {
let Defs = [T8];
}
@@ -704,7 +704,7 @@ def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
// Purpose: Compare Immediate
// To compare a constant with the contents of a GPR.
//
-def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIM16Alu> {
let Defs = [T8];
}
@@ -713,7 +713,7 @@ def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
// Purpose: Compare Immediate (Extended)
// To compare a constant with the contents of a GPR.
//
-def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIM16Alu> {
let Defs = [T8];
}
@@ -723,7 +723,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
// Purpose: Divide Word
// To divide 32-bit signed integers.
//
-def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIM16Alu> {
let Defs = [HI0, LO0];
}
@@ -732,7 +732,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
// Purpose: Divide Unsigned Word
// To divide 32-bit unsigned integers.
//
-def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIM16Alu> {
let Defs = [HI0, LO0];
}
//
@@ -742,13 +742,13 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
// region and preserve the current ISA.
//
-def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+def Jal16 : FJAL16_ins<0b0, "jal", IIM16Alu> {
let hasDelaySlot = 0; // not true, but we add the nop for now
let isCall=1;
let Defs = [RA];
}
-def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 {
+def JalB16 : FJALB16_ins<0b0, "jal", IIM16Alu>, branch16 {
let hasDelaySlot = 0; // not true, but we add the nop for now
let isBranch=1;
let Defs = [RA];
@@ -761,7 +761,7 @@ def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 {
// address register.
//
-def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
+def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIM16Alu> {
let isBranch = 1;
let isIndirectBranch = 1;
let hasDelaySlot = 1;
@@ -769,14 +769,14 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
let isBarrier=1;
}
-def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
let isBranch = 1;
let isIndirectBranch = 1;
let isTerminator=1;
let isBarrier=1;
}
-def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
+def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIM16Alu> {
let isBranch = 1;
let isIndirectBranch = 1;
let isTerminator=1;
@@ -825,16 +825,16 @@ def LhuRxRyOffMemX16:
// Purpose: Load Immediate
// To load a constant into a GPR.
//
-def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
+def LiRxImm16: FRI16_ins<0b01101, "li", IIM16Alu>;
//
// Format: LI rx, immediate MIPS16e
// Purpose: Load Immediate (Extended)
// To load a constant into a GPR.
//
-def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIM16Alu>;
-def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> {
+def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIM16Alu> {
let isCodeGenOnly = 1;
}
@@ -863,21 +863,21 @@ def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", II_LW>, MayLoad;
// Purpose: Move
// To move the contents of a GPR to a GPR.
//
-def Move32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+def Move32R16: FI8_MOV32R16_ins<"move", IIM16Alu>;
//
// Format: MOVE ry, r32 MIPS16e
//Purpose: Move
// To move the contents of a GPR to a GPR.
//
-def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
+def MoveR3216: FI8_MOVR3216_ins<"move", IIM16Alu>;
//
// Format: MFHI rx MIPS16e
// Purpose: Move From HI Register
// To copy the special purpose HI register to a GPR.
//
-def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
+def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIM16Alu> {
let Uses = [HI0];
let hasSideEffects = 0;
}
@@ -887,7 +887,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
// Purpose: Move From LO Register
// To copy the special purpose LO register to a GPR.
//
-def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
+def Mflo16: FRR16_M_ins<0b10010, "mflo", IIM16Alu> {
let Uses = [LO0];
let hasSideEffects = 0;
}
@@ -895,13 +895,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
//
// Pseudo Instruction for mult
//
-def MultRxRy16: FMULT16_ins<"mult", IIAlu> {
+def MultRxRy16: FMULT16_ins<"mult", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
-def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
+def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -912,7 +912,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
// Purpose: Multiply Word
// To multiply 32-bit signed integers.
//
-def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
+def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -923,7 +923,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
// Purpose: Multiply Unsigned Word
// To multiply 32-bit unsigned integers.
//
-def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
+def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -934,21 +934,21 @@ def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
// Purpose: Negate
// To negate an integer value.
//
-def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>;
+def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIM16Alu>;
//
// Format: NOT rx, ry MIPS16e
// Purpose: Not
// To complement an integer value
//
-def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>;
+def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIM16Alu>;
//
// Format: OR rx, ry MIPS16e
// Purpose: Or
// To do a bitwise logical OR.
//
-def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIM16Alu>, ArithLogic16Defs<1>;
//
// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
@@ -1012,7 +1012,7 @@ def SbRxRyOffMemX16:
// Sign-extend least significant byte in register rx.
//
def SebRx16
- : FRR_SF16_ins<0b10001, 0b100, "seb", IIAlu>;
+ : FRR_SF16_ins<0b10001, 0b100, "seb", IIM16Alu>;
//
// Format: SEH rx MIPS16e
@@ -1020,7 +1020,7 @@ def SebRx16
// Sign-extend least significant word in register rx.
//
def SehRx16
- : FRR_SF16_ins<0b10001, 0b101, "seh", IIAlu>;
+ : FRR_SF16_ins<0b10001, 0b101, "seh", IIM16Alu>;
//
// The Sel(T) instructions are pseudos
@@ -1149,21 +1149,21 @@ def ShRxRyOffMemX16:
// Purpose: Shift Word Left Logical (Extended)
// To execute a left-shift of a word by a fixed number of bits-0 to 31 bits.
//
-def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
+def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIM16Alu>;
//
// Format: SLLV ry, rx MIPS16e
// Purpose: Shift Word Left Logical Variable
// To execute a left-shift of a word by a variable number of bits.
//
-def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIM16Alu>;
// Format: SLTI rx, immediate MIPS16e
// Purpose: Set on Less Than Immediate
// To record the result of a less-than comparison with a constant.
//
//
-def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIM16Alu> {
let Defs = [T8];
}
@@ -1173,7 +1173,7 @@ def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
// To record the result of a less-than comparison with a constant.
//
//
-def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIM16Alu> {
let Defs = [T8];
}
@@ -1184,7 +1184,7 @@ def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
// To record the result of a less-than comparison with a constant.
//
//
-def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIM16Alu> {
let Defs = [T8];
}
@@ -1194,7 +1194,7 @@ def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
// To record the result of a less-than comparison with a constant.
//
//
-def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIM16Alu> {
let Defs = [T8];
}
//
@@ -1209,7 +1209,7 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
// Purpose: Set on Less Than
// To record the result of a less-than comparison.
//
-def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIM16Alu>{
let Defs = [T8];
}
@@ -1219,7 +1219,7 @@ def SltCCRxRy16: FCCRR16_ins<"slt">;
// Purpose: Set on Less Than Unsigned
// To record the result of an unsigned less-than comparison.
//
-def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIM16Alu>{
let Defs = [T8];
}
@@ -1236,7 +1236,7 @@ def SltuCCRxRy16: FCCRR16_ins<"sltu">;
// To execute an arithmetic right-shift of a word by a variable
// number of bits.
//
-def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIM16Alu>;
//
@@ -1245,7 +1245,7 @@ def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
// To execute an arithmetic right-shift of a word by a fixed
// number of bits-1 to 8 bits.
//
-def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIM16Alu>;
//
@@ -1254,7 +1254,7 @@ def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
// To execute a logical right-shift of a word by a variable
// number of bits.
//
-def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIM16Alu>;
//
@@ -1263,14 +1263,14 @@ def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
// To execute a logical right-shift of a word by a fixed
// number of bits-1 to 31 bits.
//
-def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIM16Alu>;
//
// Format: SUBU rz, rx, ry MIPS16e
// Purpose: Subtract Unsigned Word
// To subtract 32-bit integers
//
-def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIM16Alu>, ArithLogic16Defs<0>;
//
// Format: SW ry, offset(rx) MIPS16e
@@ -1294,7 +1294,7 @@ def SwRxSpImmX16: FEXT_RI16_SP_Store_explicit_ins
// Purpose: Xor
// To do a bitwise logical XOR.
//
-def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIM16Alu>, ArithLogic16Defs<1>;
class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [InMips16Mode];
@@ -1380,7 +1380,7 @@ def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> {
let isCall=1, hasDelaySlot=0 in
def JumpLinkReg16:
FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs),
- "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch> {
+ "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], II_JALRC> {
let Defs = [RA];
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index d6ab8a6..82d2c8e 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -186,54 +186,56 @@ class CMP_CONDN_DESC_BASE<string CondStr, string Typestr,
multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
RegisterOperand FGROpnd>{
- def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
- CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
- CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
- CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
- CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+ let AdditionalPredicates = [NotInMicroMips] in {
+ def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
+ CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
ISA_MIPS32R6, HARDFLOAT;
- def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
- CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
- CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
- CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
- CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
- CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
- CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
- CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
- CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+ def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
ISA_MIPS32R6, HARDFLOAT;
- def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
- CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
- CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+ def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
ISA_MIPS32R6, HARDFLOAT;
- def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
- CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
- CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+ def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
ISA_MIPS32R6, HARDFLOAT;
+ def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
+ CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
+ CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
+ CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
+ CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
+ CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
+ CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
+ CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
+ CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ }
}
//===----------------------------------------------------------------------===//
@@ -557,7 +559,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
list<dag> Pattern = [];
- string DecoderMethod = "DecodeCacheOpR6";
+ string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
}
class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
@@ -595,7 +597,7 @@ class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
list<dag> Pattern = [];
}
-class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
dag OutOperandList = (outs GPROpnd:$rt);
@@ -685,8 +687,10 @@ def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
def CACHE_R6 : R6MMR6Rel, CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
-def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def CLO_R6 : R6MMR6Rel, CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
def CLZ_R6 : R6MMR6Rel, CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
@@ -702,39 +706,51 @@ def LSA_R6 : R6MMR6Rel, LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
-def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def MOD : R6MMR6Rel, MOD_ENC, MOD_DESC, ISA_MIPS32R6;
def MODU : R6MMR6Rel, MODU_ENC, MODU_DESC, ISA_MIPS32R6;
-def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def MUH : R6MMR6Rel, MUH_ENC, MUH_DESC, ISA_MIPS32R6;
def MUHU : R6MMR6Rel, MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
def MUL_R6 : R6MMR6Rel, MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
def MULU : R6MMR6Rel, MULU_ENC, MULU_DESC, ISA_MIPS32R6;
def NAL; // BAL with rd=0
def PREF_R6 : R6MMR6Rel, PREF_ENC, PREF_DESC, ISA_MIPS32R6;
-def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6;
+let AdditionalPredicates = [NotInMicroMips] in {
def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6;
+}
def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
def SELEQZ : R6MMR6Rel, SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def SELNEZ : R6MMR6Rel, SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
//===----------------------------------------------------------------------===//
@@ -743,7 +759,9 @@ def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
//
//===----------------------------------------------------------------------===//
+let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+}
def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
//===----------------------------------------------------------------------===//
@@ -752,84 +770,78 @@ def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
//
//===----------------------------------------------------------------------===//
-// f32 comparisons supported via another comparison
-def : MipsPat<(setone f32:$lhs, f32:$rhs),
- (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seto f32:$lhs, f32:$rhs),
- (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(setune f32:$lhs, f32:$rhs),
- (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setne f32:$lhs, f32:$rhs),
- (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-
-// f64 comparisons supported via another comparison
-def : MipsPat<(setone f64:$lhs, f64:$rhs),
- (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seto f64:$lhs, f64:$rhs),
- (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(setune f64:$lhs, f64:$rhs),
- (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setne f64:$lhs, f64:$rhs),
- (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+// comparisons supported via another comparison
+multiclass Cmp_Pats<ValueType VT, Instruction NOROp, Register ZEROReg> {
+def : MipsPat<(setone VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_UEQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(seto VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_UN_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(setune VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(seteq VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setgt VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LE_"#NAME) VT:$rhs, VT:$lhs)>;
+def : MipsPat<(setge VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LT_"#NAME) VT:$rhs, VT:$lhs)>;
+def : MipsPat<(setlt VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LT_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setle VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LE_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setne VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+}
+
+defm S : Cmp_Pats<f32, NOR, ZERO>, ISA_MIPS32R6;
+defm D : Cmp_Pats<f64, NOR, ZERO>, ISA_MIPS32R6;
// i32 selects
+multiclass SelectInt_Pats<ValueType RC, Instruction OROp, Instruction XORiOp,
+ Instruction SLTiOp, Instruction SLTiuOp,
+ Instruction SELEQZOp, Instruction SELNEZOp,
+ SDPatternOperator imm_type, ValueType Opg> {
+// reg, immz
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, RC:$cond), (SELNEZOp RC:$f, RC:$cond))>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f),
+ (OROp (SELNEZOp RC:$t, RC:$cond), (SELEQZOp RC:$f, RC:$cond))>;
+
+// reg, immZExt16[_64]
+def : MipsPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
+ (SELNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
+def : MipsPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
+ (OROp (SELNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
+ (SELEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
+
+// reg, immSExt16Plus1
+def : MipsPat<(select (Opg (setgt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))),
+ (SELNEZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>;
+def : MipsPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))),
+ (SELNEZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>;
+
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz),
+ (SELEQZOp RC:$t, RC:$cond)>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz),
+ (SELNEZOp RC:$t, RC:$cond)>;
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f),
+ (SELNEZOp RC:$f, RC:$cond)>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f),
+ (SELEQZOp RC:$f, RC:$cond)>;
+}
+
+defm : SelectInt_Pats<i32, OR, XORi, SLTi, SLTiu, SELEQZ, SELNEZ,
+ immZExt16, i32>, ISA_MIPS32R6;
+
def : MipsPat<(select i32:$cond, i32:$t, i32:$f),
- (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f),
- (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f),
- (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
- (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
- (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
+ (OR (SELNEZ i32:$t, i32:$cond),
+ (SELEQZ i32:$f, i32:$cond))>,
ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
- (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
- (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t,
- i32:$f),
- (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))),
- (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)),
- i32:$t, i32:$f),
- (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))),
- (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>,
- ISA_MIPS32R6;
-
def : MipsPat<(select i32:$cond, i32:$t, immz),
- (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz),
- (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz),
- (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+ (SELNEZ i32:$t, i32:$cond)>,
+ ISA_MIPS32R6;
def : MipsPat<(select i32:$cond, immz, i32:$f),
- (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f),
- (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f),
- (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+ (SELEQZ i32:$f, i32:$cond)>,
+ ISA_MIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index f917eca..cbdcdd7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -16,10 +16,6 @@
//===----------------------------------------------------------------------===//
// Unsigned Operand
-def uimm5_64 : Operand<i64> {
- let PrintMethod = "printUnsignedImm";
-}
-
def uimm16_64 : Operand<i64> {
let PrintMethod = "printUnsignedImm";
}
@@ -276,12 +272,20 @@ def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
let isCodeGenOnly = 1 in
def RDHWR64 : ReadHardware<GPR64Opnd, HWRegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>;
-def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>;
-def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>;
+let AdditionalPredicates = [NotInMicroMips] in {
+ // TODO: Add 'pos + size' constraint check to dext* instructions
+ // DEXT: 0 < pos + size <= 63
+ // DEXTM, DEXTU: 32 < pos + size <= 64
+ def DEXT : ExtBase<"dext", GPR64Opnd, uimm5, uimm5_plus1, MipsExt>,
+ EXT_FM<3>;
+ def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5, uimm5_plus33, MipsExt>,
+ EXT_FM<1>;
+ def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32, uimm5_plus1,
+ MipsExt>, EXT_FM<2>;
+}
def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>;
-def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>;
+def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32>, EXT_FM<6>;
def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>;
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -341,11 +345,11 @@ class SetCC64_I<string opstr, PatFrag cond_op>:
}
class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO, bits<64> shift = 1> :
- InstSE<(outs), (ins RO:$rs, uimm5_64:$p, opnd:$offset),
+ RegisterOperand RO, Operand ImmOp, bits<64> shift = 1> :
+ InstSE<(outs), (ins RO:$rs, ImmOp:$p, opnd:$offset),
!strconcat(opstr, "\t$rs, $p, $offset"),
[(brcond (i32 (cond_op (and RO:$rs, (shl shift, immZExt5_64:$p)), 0)),
- bb:$offset)], IIBranch, FrmI, opstr> {
+ bb:$offset)], II_BBIT, FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -363,14 +367,17 @@ def BADDu : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>,
ADD_FM<0x1c, 0x28>;
// Branch on Bit Clear /+32
-def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd>, BBIT_FM<0x32>;
-def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, 0x100000000>,
+def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd,
+ uimm5_64_report_uimm6>, BBIT_FM<0x32>;
+def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, uimm5_64,
+ 0x100000000>,
BBIT_FM<0x36>;
// Branch on Bit Set /+32
-def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd>, BBIT_FM<0x3a>;
-def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, 0x100000000>,
- BBIT_FM<0x3e>;
+def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd,
+ uimm5_64_report_uimm6>, BBIT_FM<0x3a>;
+def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, uimm5_64,
+ 0x100000000>, BBIT_FM<0x3e>;
// Multiply Doubleword to GPR
let Defs = [HI0, LO0, P0, P1, P2] in
@@ -544,10 +551,25 @@ def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst
(BBIT132 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>;
}
+// Atomic load patterns.
+def : MipsPat<(atomic_load_8 addr:$a), (LB64 addr:$a)>;
+def : MipsPat<(atomic_load_16 addr:$a), (LH64 addr:$a)>;
+def : MipsPat<(atomic_load_32 addr:$a), (LW64 addr:$a)>;
+def : MipsPat<(atomic_load_64 addr:$a), (LD addr:$a)>;
+
+// Atomic store patterns.
+def : MipsPat<(atomic_store_8 addr:$a, GPR64:$v), (SB64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_16 addr:$a, GPR64:$v), (SH64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_32 addr:$a, GPR64:$v), (SW64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_64 addr:$a, GPR64:$v), (SD GPR64:$v, addr:$a)>;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"move $dst, $src",
+ (OR64 GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>,
+ GPR_64;
+def : MipsInstAlias<"move $dst, $src",
(DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>,
GPR_64;
def : MipsInstAlias<"daddu $rs, $rt, $imm",
@@ -617,6 +639,38 @@ def : MipsInstAlias<"syncw", (SYNC 0x4), 0>;
def : MipsInstAlias<"syncws", (SYNC 0x5), 0>;
}
+// cnMIPS Aliases.
+
+// bbit* with $p 32-63 converted to bbit*32 with $p 0-31
+def : MipsInstAlias<"bbit0 $rs, $p, $offset",
+ (BBIT032 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p,
+ brtarget:$offset), 0>,
+ ASE_CNMIPS;
+def : MipsInstAlias<"bbit1 $rs, $p, $offset",
+ (BBIT132 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p,
+ brtarget:$offset), 0>,
+ ASE_CNMIPS;
+
+// exts with $pos 32-63 in converted to exts32 with $pos 0-31
+def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1",
+ (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+def : MipsInstAlias<"exts $rt, $pos, $lenm1",
+ (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+
+// cins with $pos 32-63 in converted to cins32 with $pos 0-31
+def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1",
+ (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+def : MipsInstAlias<"cins $rt, $pos, $lenm1",
+ (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -625,3 +679,8 @@ class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
!strconcat(instr_asm, "\t$rt, $imm64")> ;
def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>;
+
+def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr),
+ "dla\t$rt, $addr">;
+def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64),
+ "dla\t$rt, $imm64">;
diff --git a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
index 6b546e8..6f34dbe 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -62,7 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
-class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2_plus1>;
class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
@@ -81,10 +81,12 @@ class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
//
//===----------------------------------------------------------------------===//
-def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
-def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
-def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
-def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
+ def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
+ def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+ def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
+}
def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index fdba064..9575293 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -169,12 +169,12 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MCPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(MCPE.Val.ConstVal);
+ EmitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal);
return;
}
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
@@ -202,7 +202,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
llvm_unreachable("Pseudo opcode found in EmitInstruction()");
MCInst TmpInst0;
- MCInstLowering.Lower(I, TmpInst0);
+ MCInstLowering.Lower(&*I, TmpInst0);
EmitToStreamer(*OutStreamer, TmpInst0);
} while ((++I != E) && I->isInsideBundle()); // Delay slot check
}
@@ -405,7 +405,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
- if (MBB->isLandingPad() || MBB->pred_empty())
+ if (MBB->isEHPad() || MBB->pred_empty())
return false;
// If there isn't exactly one predecessor, it can't be a fall through.
@@ -559,7 +559,6 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
bool closeP = false;
@@ -608,7 +607,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
}
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI"
+ O << getDataLayout().getPrivateGlobalPrefix() << "CPI"
<< getFunctionNumber() << "_" << MO.getIndex();
if (MO.getOffset())
O << "+" << MO.getOffset();
@@ -1009,7 +1008,7 @@ void MipsAsmPrinter::EmitFPCallStub(
//
// Mov $18, $31
- EmitInstrRegRegReg(*STI, Mips::ADDu, Mips::S2, Mips::RA, Mips::ZERO);
+ EmitInstrRegRegReg(*STI, Mips::OR, Mips::S2, Mips::RA, Mips::ZERO);
EmitSwapFPIntParams(*STI, Signature->ParamSig, LE, true);
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
index b808129..d82063e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -29,22 +29,16 @@ static bool isF128SoftLibCall(const char *CallSym) {
"powl", "rintl", "sinl", "sqrtl",
"truncl"};
- const char *const *End = LibCalls + array_lengthof(LibCalls);
-
// Check that LibCalls is sorted alphabetically.
- MipsTargetLowering::LTStr Comp;
-
-#ifndef NDEBUG
- for (const char *const *I = LibCalls; I < End - 1; ++I)
- assert(Comp(*I, *(I + 1)));
-#endif
-
- return std::binary_search(LibCalls, End, CallSym, Comp);
+ auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
+ assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
+ return std::binary_search(std::begin(LibCalls), std::end(LibCalls),
+ CallSym, Comp);
}
/// This function returns true if Ty is fp128, {f128} or i128 which was
/// originally a fp128.
-static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) {
if (Ty->isFP128Ty())
return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
index 93e1908..0b4b778 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -427,3 +427,28 @@ def CSR_Mips16RetHelper :
CalleeSavedRegs<(add V0, V1, FP,
(sequence "A%u", 3, 0), (sequence "S%u", 7, 0),
(sequence "D%u", 15, 10))>;
+
+def CSR_Interrupt_32R6 : CalleeSavedRegs<(add (sequence "A%u", 3, 0),
+ (sequence "S%u", 7, 0),
+ (sequence "V%u", 1, 0),
+ (sequence "T%u", 9, 0),
+ RA, FP, GP, AT)>;
+
+def CSR_Interrupt_32 : CalleeSavedRegs<(add (sequence "A%u", 3, 0),
+ (sequence "S%u", 7, 0),
+ (sequence "V%u", 1, 0),
+ (sequence "T%u", 9, 0),
+ RA, FP, GP, AT, LO0, HI0)>;
+
+def CSR_Interrupt_64R6 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0),
+ (sequence "V%u_64", 1, 0),
+ (sequence "S%u_64", 7, 0),
+ (sequence "T%u_64", 9, 0),
+ RA_64, FP_64, GP_64, AT_64)>;
+
+def CSR_Interrupt_64 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0),
+ (sequence "S%u_64", 7, 0),
+ (sequence "T%u_64", 9, 0),
+ (sequence "V%u_64", 1, 0),
+ RA_64, FP_64, GP_64, AT_64,
+ LO0_64, HI0_64)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 96553d2..ea8c587 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -560,7 +560,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -598,12 +598,12 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
/// into the block immediately after it.
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI = MBB->getIterator();
// Can't fall off end of function.
if (std::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = std::next(MBBI);
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I == NextBB)
@@ -656,11 +656,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
+ computeBlockSize(&*I);
// Compute block offsets.
- adjustBBOffsetsAfter(MF->begin());
+ adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -879,7 +879,7 @@ MachineBasicBlock *MipsConstantIslands::splitBlockBeforeInstr
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
@@ -967,8 +967,8 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
unsigned NextBlockOffset, NextBlockAlignment;
- MachineFunction::const_iterator NextBlock = Water;
- if (++NextBlock == MF->end()) {
+ MachineFunction::const_iterator NextBlock = ++Water->getIterator();
+ if (NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
NextBlockAlignment = 0;
} else {
@@ -1261,7 +1261,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
<< format(", expected CPE offset %#x\n", CPEOffset));
- NewMBB = std::next(MachineFunction::iterator(UserMBB));
+ NewMBB = &*++UserMBB->getIterator();
// Add an unconditional branch from UserMBB to fallthrough block. Record
// it for branch lengthening; this new branch will not get out of range,
// but if the preceding conditional branch is out of range, the targets
@@ -1371,8 +1371,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
NewWaterList.insert(NewIsland);
// The new CPE goes before the following block (NewMBB).
- NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+ NewMBB = &*++WaterBB->getIterator();
} else {
// No water found.
// we first see if a longer form of the instrucion could have reached
@@ -1389,7 +1388,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
- MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1406,7 +1405,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF->insert(NewMBB, NewIsland);
+ MF->insert(NewMBB->getIterator(), NewIsland);
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
@@ -1431,9 +1430,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
-
-
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1645,7 +1642,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
index b5d52ce..f959bd4 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -7,10 +7,30 @@
//
//===----------------------------------------------------------------------===//
+class DspMMRel;
+
+def Dsp2MicroMips : InstrMapping {
+ let FilterClass = "DspMMRel";
+ // Instructions with the same BaseOpcode and isNVStore values form a row.
+ let RowFields = ["BaseOpcode"];
+ // Instructions with the same predicate sense form a column.
+ let ColFields = ["Arch"];
+ // The key column is the unpredicated instructions.
+ let KeyCol = ["dsp"];
+ // Value columns are PredSense=true and PredSense=false
+ let ValueCols = [["dsp"], ["mmdsp"]];
+}
+
def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP">;
def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">,
AssemblerPredicate<"FeatureDSPR2">;
+def HasDSPR3 : Predicate<"Subtarget->hasDSPR3()">,
+ AssemblerPredicate<"FeatureDSPR3">;
+
+class ISA_DSPR2 {
+ list<Predicate> InsnPredicates = [HasDSPR2];
+}
// Fields.
class Field6<bits<6> val> {
@@ -20,14 +40,22 @@ class Field6<bits<6> val> {
def SPECIAL3_OPCODE : Field6<0b011111>;
def REGIMM_OPCODE : Field6<0b000001>;
-class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
- let Predicates = [HasDSP];
+class DSPInst<string opstr = "">
+ : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+ string BaseOpcode = opstr;
+ string Arch = "dsp";
}
class PseudoDSP<dag outs, dag ins, list<dag> pattern,
- InstrItinClass itin = IIPseudo>:
- MipsPseudo<outs, ins, pattern, itin> {
- let Predicates = [HasDSP];
+ InstrItinClass itin = IIPseudo>
+ : MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+}
+
+class DSPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, PredicateControl {
+ let InsnPredicates = [HasDSP];
}
// ADDU.QB sub-class format.
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index d268384..da6f174 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
// ImmLeaf
+def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
+def immZExt7 : ImmLeaf<i32, [{return isUInt<7>(Imm);}]>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
@@ -263,6 +265,7 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -273,6 +276,7 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -293,6 +297,7 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -304,6 +309,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
+ string BaseOpcode = instr_asm;
}
class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -314,6 +320,7 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -323,6 +330,7 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
list<dag> Pattern = [(set RO:$rd, (OpNode immPat:$imm))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -332,17 +340,19 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
SDPatternOperator ImmPat, InstrItinClass itin,
- RegisterOperand RO> {
+ RegisterOperand RO, Operand ImmOpnd> {
dag OutOperandList = (outs RO:$rd);
- dag InOperandList = (ins RO:$rt, uimm16:$rs_sa);
+ dag InOperandList = (ins RO:$rt, ImmOpnd:$rs_sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))];
InstrItinClass Itinerary = itin;
bit hasSideEffects = 1;
+ string BaseOpcode = instr_asm;
}
class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -353,6 +363,7 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))];
InstrItinClass Itinerary = itin;
bit mayLoad = 1;
+ string BaseOpcode = instr_asm;
}
class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -363,17 +374,19 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- SDPatternOperator ImmOp, InstrItinClass itin> {
+ Operand ImmOp, SDPatternOperator Imm, InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
- dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src);
+ dag InOperandList = (ins GPR32Opnd:$rs, ImmOp:$sa, GPR32Opnd:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
list<dag> Pattern = [(set GPR32Opnd:$rt,
- (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))];
+ (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, Imm:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
+ string BaseOpcode = instr_asm;
}
class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -382,6 +395,7 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -390,15 +404,17 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
dag OutOperandList = (outs ACC64DSPOpnd:$ac);
- dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin);
+ dag InOperandList = (ins simm6:$shift, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -408,6 +424,7 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -417,6 +434,7 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -426,15 +444,17 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs);
- dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask);
+ dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -444,6 +464,7 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -454,6 +475,7 @@ class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))];
InstrItinClass Itinerary = itin;
bit isCommutable = 1;
+ string BaseOpcode = instr_asm;
}
class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -465,6 +487,7 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
(OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
InstrItinClass Itinerary = itin;
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
@@ -474,6 +497,7 @@ class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin> {
@@ -481,6 +505,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin>
dag InOperandList = (ins GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
@@ -506,6 +531,7 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
+ string BaseOpcode = instr_asm;
}
//===----------------------------------------------------------------------===//
@@ -639,7 +665,7 @@ class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
// Shift
class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
- NoItinerary, DSPROpnd>,
+ NoItinerary, DSPROpnd, uimm3>,
Defs<[DSPOutFlag22]>;
class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
@@ -647,13 +673,13 @@ class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
Defs<[DSPOutFlag22]>;
class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm3>;
class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
NoItinerary, DSPROpnd>;
class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
- NoItinerary, DSPROpnd>,
+ NoItinerary, DSPROpnd, uimm4>,
Defs<[DSPOutFlag22]>;
class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
@@ -661,7 +687,8 @@ class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
Defs<[DSPOutFlag22]>;
class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
- immZExt4, NoItinerary, DSPROpnd>,
+ immZExt4, NoItinerary, DSPROpnd,
+ uimm4>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
@@ -669,19 +696,21 @@ class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
Defs<[DSPOutFlag22]>;
class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm4>;
class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
NoItinerary, DSPROpnd>;
class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
- immZExt4, NoItinerary, DSPROpnd>;
+ immZExt4, NoItinerary, DSPROpnd,
+ uimm4>;
class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
NoItinerary, DSPROpnd>;
class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
- immZExt5, NoItinerary, GPR32Opnd>,
+ immZExt5, NoItinerary, GPR32Opnd,
+ uimm5>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
@@ -689,7 +718,8 @@ class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
Defs<[DSPOutFlag22]>;
class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
- immZExt5, NoItinerary, GPR32Opnd>;
+ immZExt5, NoItinerary, GPR32Opnd,
+ uimm5>;
class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
NoItinerary, GPR32Opnd>;
@@ -1039,32 +1069,33 @@ class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
// Shift
class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm3>;
class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
NoItinerary, DSPROpnd>;
class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
- immZExt3, NoItinerary, DSPROpnd>;
+ immZExt3, NoItinerary, DSPROpnd,
+ uimm3>;
class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
NoItinerary, DSPROpnd>;
class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm4>;
class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
NoItinerary, DSPROpnd>;
// Misc
-class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
NoItinerary>;
-class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
NoItinerary>;
-class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
- NoItinerary>;
+class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
+ immZExt5, NoItinerary>;
// Pseudos.
def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
@@ -1072,80 +1103,80 @@ def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
// Instruction defs.
// MIPS DSP Rev 1
-def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC;
-def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC;
-def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC;
-def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC;
-def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC;
-def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
-def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC;
-def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
-def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC;
-def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC;
-def ADDSC : ADDSC_ENC, ADDSC_DESC;
-def ADDWC : ADDWC_ENC, ADDWC_DESC;
+def ADDU_QB : DspMMRel, ADDU_QB_ENC, ADDU_QB_DESC;
+def ADDU_S_QB : DspMMRel, ADDU_S_QB_ENC, ADDU_S_QB_DESC;
+def SUBU_QB : DspMMRel, SUBU_QB_ENC, SUBU_QB_DESC;
+def SUBU_S_QB : DspMMRel, SUBU_S_QB_ENC, SUBU_S_QB_DESC;
+def ADDQ_PH : DspMMRel, ADDQ_PH_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH : DspMMRel, ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
+def SUBQ_PH : DspMMRel, SUBQ_PH_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH : DspMMRel, SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
+def ADDQ_S_W : DspMMRel, ADDQ_S_W_ENC, ADDQ_S_W_DESC;
+def SUBQ_S_W : DspMMRel, SUBQ_S_W_ENC, SUBQ_S_W_DESC;
+def ADDSC : DspMMRel, ADDSC_ENC, ADDSC_DESC;
+def ADDWC : DspMMRel, ADDWC_ENC, ADDWC_DESC;
def MODSUB : MODSUB_ENC, MODSUB_DESC;
-def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC;
-def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
-def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC;
-def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
-def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
-def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
-def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
-def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
-def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
-def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
-def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
-def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
-def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
-def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
-def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
-def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
-def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
-def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC;
-def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC;
-def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC;
-def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC;
-def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC;
-def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC;
-def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC;
-def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
-def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC;
-def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC;
-def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC;
-def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
-def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC;
-def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC;
-def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC;
-def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC;
-def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
-def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
-def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
-def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
-def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
+def RADDU_W_QB : DspMMRel, RADDU_W_QB_ENC, RADDU_W_QB_DESC;
+def ABSQ_S_PH : DspMMRel, ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
+def ABSQ_S_W : DspMMRel, ABSQ_S_W_ENC, ABSQ_S_W_DESC;
+def PRECRQ_QB_PH : DspMMRel, PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQ_PH_W : DspMMRel, PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_RS_PH_W : DspMMRel, PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
+def PRECRQU_S_QB_PH : DspMMRel, PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECEQ_W_PHL : DspMMRel, PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
+def PRECEQ_W_PHR : DspMMRel, PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
+def PRECEQU_PH_QBL : DspMMRel, PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
+def PRECEQU_PH_QBR : DspMMRel, PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
+def PRECEQU_PH_QBLA : DspMMRel, PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
+def PRECEQU_PH_QBRA : DspMMRel, PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
+def PRECEU_PH_QBL : DspMMRel, PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
+def PRECEU_PH_QBR : DspMMRel, PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
+def PRECEU_PH_QBLA : DspMMRel, PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
+def PRECEU_PH_QBRA : DspMMRel, PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
+def SHLL_QB : DspMMRel, SHLL_QB_ENC, SHLL_QB_DESC;
+def SHLLV_QB : DspMMRel, SHLLV_QB_ENC, SHLLV_QB_DESC;
+def SHRL_QB : DspMMRel, SHRL_QB_ENC, SHRL_QB_DESC;
+def SHRLV_QB : DspMMRel, SHRLV_QB_ENC, SHRLV_QB_DESC;
+def SHLL_PH : DspMMRel, SHLL_PH_ENC, SHLL_PH_DESC;
+def SHLLV_PH : DspMMRel, SHLLV_PH_ENC, SHLLV_PH_DESC;
+def SHLL_S_PH : DspMMRel, SHLL_S_PH_ENC, SHLL_S_PH_DESC;
+def SHLLV_S_PH : DspMMRel, SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
+def SHRA_PH : DspMMRel, SHRA_PH_ENC, SHRA_PH_DESC;
+def SHRAV_PH : DspMMRel, SHRAV_PH_ENC, SHRAV_PH_DESC;
+def SHRA_R_PH : DspMMRel, SHRA_R_PH_ENC, SHRA_R_PH_DESC;
+def SHRAV_R_PH : DspMMRel, SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
+def SHLL_S_W : DspMMRel, SHLL_S_W_ENC, SHLL_S_W_DESC;
+def SHLLV_S_W : DspMMRel, SHLLV_S_W_ENC, SHLLV_S_W_DESC;
+def SHRA_R_W : DspMMRel, SHRA_R_W_ENC, SHRA_R_W_DESC;
+def SHRAV_R_W : DspMMRel, SHRAV_R_W_ENC, SHRAV_R_W_DESC;
+def MULEU_S_PH_QBL : DspMMRel, MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR : DspMMRel, MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
+def MULEQ_S_W_PHL : DspMMRel, MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR : DspMMRel, MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
+def MULQ_RS_PH : DspMMRel, MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC;
-def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
-def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
-def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
-def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
-def MFHI_DSP : MFHI_ENC, MFHI_DESC;
-def MFLO_DSP : MFLO_ENC, MFLO_DESC;
-def MTHI_DSP : MTHI_ENC, MTHI_DESC;
-def MTLO_DSP : MTLO_ENC, MTLO_DESC;
-def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
-def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
-def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
-def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
-def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
-def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
-def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
-def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
-def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC;
-def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC;
-def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC;
-def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC;
-def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC;
-def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC;
+def MAQ_S_W_PHL : DspMMRel, MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_S_W_PHR : DspMMRel, MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHL : DspMMRel, MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_SA_W_PHR : DspMMRel, MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : DspMMRel, MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : DspMMRel, MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : DspMMRel, MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : DspMMRel, MTLO_ENC, MTLO_DESC;
+def DPAU_H_QBL : DspMMRel, DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR : DspMMRel, DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
+def DPSU_H_QBL : DspMMRel, DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR : DspMMRel, DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
+def DPAQ_S_W_PH : DspMMRel, DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
+def DPSQ_S_W_PH : DspMMRel, DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
+def DPAQ_SA_L_W : DspMMRel, DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
+def DPSQ_SA_L_W : DspMMRel, DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
+def MULT_DSP : DspMMRel, MULT_DSP_ENC, MULT_DSP_DESC;
+def MULTU_DSP : DspMMRel, MULTU_DSP_ENC, MULTU_DSP_DESC;
+def MADD_DSP : DspMMRel, MADD_DSP_ENC, MADD_DSP_DESC;
+def MADDU_DSP : DspMMRel, MADDU_DSP_ENC, MADDU_DSP_DESC;
+def MSUB_DSP : DspMMRel, MSUB_DSP_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP : DspMMRel, MSUBU_DSP_ENC, MSUBU_DSP_DESC;
def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC;
def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC;
def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC;
@@ -1156,87 +1187,85 @@ def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC;
def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC;
def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC;
def BITREV : BITREV_ENC, BITREV_DESC;
-def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC;
-def REPL_QB : REPL_QB_ENC, REPL_QB_DESC;
-def REPL_PH : REPL_PH_ENC, REPL_PH_DESC;
-def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC;
-def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC;
-def PICK_QB : PICK_QB_ENC, PICK_QB_DESC;
-def PICK_PH : PICK_PH_ENC, PICK_PH_DESC;
-def LWX : LWX_ENC, LWX_DESC;
-def LHX : LHX_ENC, LHX_DESC;
-def LBUX : LBUX_ENC, LBUX_DESC;
+def PACKRL_PH : DspMMRel, PACKRL_PH_ENC, PACKRL_PH_DESC;
+def REPL_QB : DspMMRel, REPL_QB_ENC, REPL_QB_DESC;
+def REPL_PH : DspMMRel, REPL_PH_ENC, REPL_PH_DESC;
+def REPLV_QB : DspMMRel, REPLV_QB_ENC, REPLV_QB_DESC;
+def REPLV_PH : DspMMRel, REPLV_PH_ENC, REPLV_PH_DESC;
+def PICK_QB : DspMMRel, PICK_QB_ENC, PICK_QB_DESC;
+def PICK_PH : DspMMRel, PICK_PH_ENC, PICK_PH_DESC;
+def LWX : DspMMRel, LWX_ENC, LWX_DESC;
+def LHX : DspMMRel, LHX_ENC, LHX_DESC;
+def LBUX : DspMMRel, LBUX_ENC, LBUX_DESC;
def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC;
-def INSV : INSV_ENC, INSV_DESC;
-def EXTP : EXTP_ENC, EXTP_DESC;
-def EXTPV : EXTPV_ENC, EXTPV_DESC;
-def EXTPDP : EXTPDP_ENC, EXTPDP_DESC;
-def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC;
-def EXTR_W : EXTR_W_ENC, EXTR_W_DESC;
-def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC;
-def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC;
-def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC;
-def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC;
-def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
-def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC;
-def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC;
-def SHILO : SHILO_ENC, SHILO_DESC;
-def SHILOV : SHILOV_ENC, SHILOV_DESC;
-def MTHLIP : MTHLIP_ENC, MTHLIP_DESC;
-def RDDSP : RDDSP_ENC, RDDSP_DESC;
-def WRDSP : WRDSP_ENC, WRDSP_DESC;
+def INSV : DspMMRel, INSV_ENC, INSV_DESC;
+def EXTP : DspMMRel, EXTP_ENC, EXTP_DESC;
+def EXTPV : DspMMRel, EXTPV_ENC, EXTPV_DESC;
+def EXTPDP : DspMMRel, EXTPDP_ENC, EXTPDP_DESC;
+def EXTPDPV : DspMMRel, EXTPDPV_ENC, EXTPDPV_DESC;
+def EXTR_W : DspMMRel, EXTR_W_ENC, EXTR_W_DESC;
+def EXTRV_W : DspMMRel, EXTRV_W_ENC, EXTRV_W_DESC;
+def EXTR_R_W : DspMMRel, EXTR_R_W_ENC, EXTR_R_W_DESC;
+def EXTRV_R_W : DspMMRel, EXTRV_R_W_ENC, EXTRV_R_W_DESC;
+def EXTR_RS_W : DspMMRel, EXTR_RS_W_ENC, EXTR_RS_W_DESC;
+def EXTRV_RS_W : DspMMRel, EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
+def EXTR_S_H : DspMMRel, EXTR_S_H_ENC, EXTR_S_H_DESC;
+def EXTRV_S_H : DspMMRel, EXTRV_S_H_ENC, EXTRV_S_H_DESC;
+def SHILO : DspMMRel, SHILO_ENC, SHILO_DESC;
+def SHILOV : DspMMRel, SHILOV_ENC, SHILOV_DESC;
+def MTHLIP : DspMMRel, MTHLIP_ENC, MTHLIP_DESC;
+def RDDSP : DspMMRel, RDDSP_ENC, RDDSP_DESC;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def WRDSP : WRDSP_ENC, WRDSP_DESC;
+}
// MIPS DSP Rev 2
-let Predicates = [HasDSPR2] in {
-
-def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC;
-def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC;
-def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC;
-def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC;
-def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC;
-def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC;
-def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC;
-def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC;
-def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC;
-def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC;
-def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC;
-def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC;
-def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC;
-def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC;
-def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC;
-def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC;
-def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC;
-def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC;
-def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC;
-def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC;
-def MUL_PH : MUL_PH_ENC, MUL_PH_DESC;
-def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC;
-def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC;
-def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC;
-def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC;
-def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC;
-def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC;
-def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC;
-def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC;
-def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC;
-def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC;
-def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC;
-def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC;
-def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC;
-def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC;
-def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC;
-def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC;
-def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC;
-def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC;
-def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC;
-def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC;
-def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC;
-def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC;
-def APPEND : APPEND_ENC, APPEND_DESC;
-def BALIGN : BALIGN_ENC, BALIGN_DESC;
-def PREPEND : PREPEND_ENC, PREPEND_DESC;
-
-}
+def ADDU_PH : DspMMRel, ADDU_PH_ENC, ADDU_PH_DESC, ISA_DSPR2;
+def ADDU_S_PH : DspMMRel, ADDU_S_PH_ENC, ADDU_S_PH_DESC, ISA_DSPR2;
+def SUBU_PH : DspMMRel, SUBU_PH_ENC, SUBU_PH_DESC, ISA_DSPR2;
+def SUBU_S_PH : DspMMRel, SUBU_S_PH_ENC, SUBU_S_PH_DESC, ISA_DSPR2;
+def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC, ISA_DSPR2;
+def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC, ISA_DSPR2;
+def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC, ISA_DSPR2;
+def ABSQ_S_QB : DspMMRel, ABSQ_S_QB_ENC, ABSQ_S_QB_DESC, ISA_DSPR2;
+def ADDUH_QB : DspMMRel, ADDUH_QB_ENC, ADDUH_QB_DESC, ISA_DSPR2;
+def ADDUH_R_QB : DspMMRel, ADDUH_R_QB_ENC, ADDUH_R_QB_DESC, ISA_DSPR2;
+def SUBUH_QB : DspMMRel, SUBUH_QB_ENC, SUBUH_QB_DESC, ISA_DSPR2;
+def SUBUH_R_QB : DspMMRel, SUBUH_R_QB_ENC, SUBUH_R_QB_DESC, ISA_DSPR2;
+def ADDQH_PH : DspMMRel, ADDQH_PH_ENC, ADDQH_PH_DESC, ISA_DSPR2;
+def ADDQH_R_PH : DspMMRel, ADDQH_R_PH_ENC, ADDQH_R_PH_DESC, ISA_DSPR2;
+def SUBQH_PH : DspMMRel, SUBQH_PH_ENC, SUBQH_PH_DESC, ISA_DSPR2;
+def SUBQH_R_PH : DspMMRel, SUBQH_R_PH_ENC, SUBQH_R_PH_DESC, ISA_DSPR2;
+def ADDQH_W : DspMMRel, ADDQH_W_ENC, ADDQH_W_DESC, ISA_DSPR2;
+def ADDQH_R_W : DspMMRel, ADDQH_R_W_ENC, ADDQH_R_W_DESC, ISA_DSPR2;
+def SUBQH_W : DspMMRel, SUBQH_W_ENC, SUBQH_W_DESC, ISA_DSPR2;
+def SUBQH_R_W : DspMMRel, SUBQH_R_W_ENC, SUBQH_R_W_DESC, ISA_DSPR2;
+def MUL_PH : DspMMRel, MUL_PH_ENC, MUL_PH_DESC, ISA_DSPR2;
+def MUL_S_PH : DspMMRel, MUL_S_PH_ENC, MUL_S_PH_DESC, ISA_DSPR2;
+def MULQ_S_W : DspMMRel, MULQ_S_W_ENC, MULQ_S_W_DESC, ISA_DSPR2;
+def MULQ_RS_W : DspMMRel, MULQ_RS_W_ENC, MULQ_RS_W_DESC, ISA_DSPR2;
+def MULQ_S_PH : DspMMRel, MULQ_S_PH_ENC, MULQ_S_PH_DESC, ISA_DSPR2;
+def DPA_W_PH : DspMMRel, DPA_W_PH_ENC, DPA_W_PH_DESC, ISA_DSPR2;
+def DPS_W_PH : DspMMRel, DPS_W_PH_ENC, DPS_W_PH_DESC, ISA_DSPR2;
+def DPAQX_S_W_PH : DspMMRel, DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC, ISA_DSPR2;
+def DPAQX_SA_W_PH : DspMMRel, DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC, ISA_DSPR2;
+def DPAX_W_PH : DspMMRel, DPAX_W_PH_ENC, DPAX_W_PH_DESC, ISA_DSPR2;
+def DPSX_W_PH : DspMMRel, DPSX_W_PH_ENC, DPSX_W_PH_DESC, ISA_DSPR2;
+def DPSQX_S_W_PH : DspMMRel, DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC, ISA_DSPR2;
+def DPSQX_SA_W_PH : DspMMRel, DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC, ISA_DSPR2;
+def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC, ISA_DSPR2;
+def PRECR_QB_PH : DspMMRel, PRECR_QB_PH_ENC, PRECR_QB_PH_DESC, ISA_DSPR2;
+def PRECR_SRA_PH_W : DspMMRel, PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC, ISA_DSPR2;
+def PRECR_SRA_R_PH_W : DspMMRel, PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC, ISA_DSPR2;
+def SHRA_QB : DspMMRel, SHRA_QB_ENC, SHRA_QB_DESC, ISA_DSPR2;
+def SHRAV_QB : DspMMRel, SHRAV_QB_ENC, SHRAV_QB_DESC, ISA_DSPR2;
+def SHRA_R_QB : DspMMRel, SHRA_R_QB_ENC, SHRA_R_QB_DESC, ISA_DSPR2;
+def SHRAV_R_QB : DspMMRel, SHRAV_R_QB_ENC, SHRAV_R_QB_DESC, ISA_DSPR2;
+def SHRL_PH : DspMMRel, SHRL_PH_ENC, SHRL_PH_DESC, ISA_DSPR2;
+def SHRLV_PH : DspMMRel, SHRLV_PH_ENC, SHRLV_PH_DESC, ISA_DSPR2;
+def APPEND : APPEND_ENC, APPEND_DESC, ISA_DSPR2;
+def BALIGN : BALIGN_ENC, BALIGN_DESC, ISA_DSPR2;
+def PREPEND : DspMMRel, PREPEND_ENC, PREPEND_DESC, ISA_DSPR2;
// Pseudos.
let isPseudo = 1, isCodeGenOnly = 1 in {
@@ -1415,3 +1444,8 @@ let AddedComplexity = 20 in {
def : IndexedLoadPat<sextloadi16, LHX>;
def : IndexedLoadPat<load, LWX>;
}
+
+// Instruction alias.
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : DSPInstAlias<"wrdsp $rt", (WRDSP GPR32Opnd:$rt, 0x1F), 1>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 4faeb33..8313d90 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -355,9 +355,8 @@ void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
SE = MBB.succ_end(); SI != SE; ++SI)
if (*SI != &SuccBB)
- for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
- LE = (*SI)->livein_end(); LI != LE; ++LI)
- Uses.set(*LI);
+ for (const auto &LI : (*SI)->liveins())
+ Uses.set(LI.PhysReg);
}
bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
@@ -431,7 +430,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
(*MI.memoperands_begin())->getPseudoValue()) {
if (isa<FixedStackPseudoSourceValue>(PSV))
return false;
- return !PSV->isConstant(nullptr) && PSV != PseudoSourceValue::getStack();
+ return !PSV->isConstant(nullptr) && !PSV->isStack();
}
return true;
@@ -598,7 +597,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
// Get instruction with delay slot.
MachineBasicBlock::instr_iterator DSI(I);
- if (InMicroMipsMode && TII->GetInstSizeInBytes(std::next(DSI)) == 2 &&
+ if (InMicroMipsMode && TII->GetInstSizeInBytes(&*std::next(DSI)) == 2 &&
DSI->isCall()) {
// If instruction in delay slot is 16b change opcode to
// corresponding instruction with short delay slot.
@@ -713,8 +712,9 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
if (DisableBackwardSearch)
return false;
- RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
- MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo());
+ auto *Fn = MBB.getParent();
+ RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
+ MemDefsUses MemDU(Fn->getDataLayout(), Fn->getFrameInfo());
ReverseIter Filler;
RegDU.init(*Slot);
@@ -763,6 +763,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
BB2BrMap BrMap;
std::unique_ptr<InspectMemInstr> IM;
Iter Filler;
+ auto *Fn = MBB.getParent();
// Iterate over SuccBB's predecessor list.
for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
@@ -772,15 +773,15 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
// Do not allow moving instructions which have unallocatable register operands
// across basic block boundaries.
- RegDU.setUnallocatableRegs(*MBB.getParent());
+ RegDU.setUnallocatableRegs(*Fn);
// Only allow moving loads from stack or constants if any of the SuccBB's
// predecessors have multiple successors.
if (HasMultipleSuccs) {
IM.reset(new LoadFromStackOrConst());
} else {
- const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
- IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI));
+ const MachineFrameInfo *MFI = Fn->getFrameInfo();
+ IM.reset(new MemDefsUses(Fn->getDataLayout(), MFI));
}
if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
@@ -800,12 +801,13 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
// Select the successor with the larget edge weight.
auto &Prob = getAnalysis<MachineBranchProbabilityInfo>();
- MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(),
- [&](const MachineBasicBlock *Dst0,
- const MachineBasicBlock *Dst1) {
- return Prob.getEdgeWeight(&B, Dst0) < Prob.getEdgeWeight(&B, Dst1);
- });
- return S->isLandingPad() ? nullptr : S;
+ MachineBasicBlock *S = *std::max_element(
+ B.succ_begin(), B.succ_end(),
+ [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) {
+ return Prob.getEdgeProbability(&B, Dst0) <
+ Prob.getEdgeProbability(&B, Dst1);
+ });
+ return S->isEHPad() ? nullptr : S;
}
std::pair<MipsInstrInfo::BranchType, MachineInstr *>
diff --git a/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
new file mode 100644
index 0000000..11e191a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
@@ -0,0 +1,84 @@
+//===- MipsEVAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips32r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class MipsEVAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+ PredicateControl, StdArch {
+ let DecoderNamespace = "Mips";
+ let EncodingPredicates = [HasStdEnc];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Field Values
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA
+def OPCODE6_LBE : OPCODE6<0b101100>;
+def OPCODE6_LBuE : OPCODE6<0b101000>;
+def OPCODE6_LHE : OPCODE6<0b101101>;
+def OPCODE6_LHuE : OPCODE6<0b101001>;
+def OPCODE6_LWE : OPCODE6<0b101111>;
+
+def OPCODE6_SBE : OPCODE6<0b011100>;
+def OPCODE6_SHE : OPCODE6<0b011101>;
+def OPCODE6_SWE : OPCODE6<0b011111>;
+
+// load/store left/right EVA
+def OPCODE6_LWLE : OPCODE6<0b011001>;
+def OPCODE6_LWRE : OPCODE6<0b011010>;
+def OPCODE6_SWLE : OPCODE6<0b100001>;
+def OPCODE6_SWRE : OPCODE6<0b100010>;
+
+// Load-linked EVA, Store-conditional EVA
+def OPCODE6_LLE : OPCODE6<0b101110>;
+def OPCODE6_SCE : OPCODE6<0b011110>;
+
+def OPCODE6_TLBINV : OPCODE6<0b000011>;
+def OPCODE6_TLBINVF : OPCODE6<0b000100>;
+
+def OPCODE6_CACHEE : OPCODE6<0b011011>;
+def OPCODE6_PREFE : OPCODE6<0b100011>;
+
+def OPGROUP_COP0 : OPGROUP<0b010000>;
+
+//===----------------------------------------------------------------------===//
+//
+// Encoding Formats
+//
+//===----------------------------------------------------------------------===//
+
+class SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6 Operation> : MipsEVAInst {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = base;
+ let Inst{20-16} = hint;
+ let Inst{15-7} = offset;
+ let Inst{6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
+
+class TLB_FM<OPCODE6 Operation> : MipsEVAInst {
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP0.Value;
+ let Inst{25} = 1; // CO
+ let Inst{24-6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
new file mode 100644
index 0000000..36c9694
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -0,0 +1,192 @@
+//===- MipsEVAInstrInfo.td - EVA ASE instructions -*- tablegen ------------*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips EVA ASE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction encodings
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA encodings
+class LBE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LBE>;
+class LBuE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LBuE>;
+class LHE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LHE>;
+class LHuE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LHuE>;
+class LWE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWE>;
+
+class SBE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SBE>;
+class SHE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SHE>;
+class SWE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWE>;
+
+// load/store left/right EVA encodings
+class LWLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWLE>;
+class LWRE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWRE>;
+class SWLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWLE>;
+class SWRE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWRE>;
+
+// Load-linked EVA, Store-conditional EVA encodings
+class LLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LLE>;
+class SCE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SCE>;
+
+class TLBINV_ENC : TLB_FM<OPCODE6_TLBINV>;
+class TLBINVF_ENC : TLB_FM<OPCODE6_TLBINVF>;
+
+class CACHEE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_CACHEE>;
+class PREFE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_PREFE>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction descriptions
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA descriptions
+class LOAD_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+ bit canFoldAsLoad = 1;
+ bit mayLoad = 1;
+}
+
+class LBE_DESC : LOAD_EVA_DESC_BASE<"lbe", GPR32Opnd>;
+class LBuE_DESC : LOAD_EVA_DESC_BASE<"lbue", GPR32Opnd>;
+class LHE_DESC : LOAD_EVA_DESC_BASE<"lhe", GPR32Opnd>;
+class LHuE_DESC : LOAD_EVA_DESC_BASE<"lhue", GPR32Opnd>;
+class LWE_DESC : LOAD_EVA_DESC_BASE<"lwe", GPR32Opnd>;
+
+class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ SDPatternOperator OpNode = null_frag> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+ bit mayStore = 1;
+}
+
+class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd>;
+class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd>;
+class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd>;
+
+// Load/Store Left/Right EVA descriptions
+class LOAD_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr, GPROpnd:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+ string Constraints = "$src = $rt";
+ bit canFoldAsLoad = 1;
+}
+
+class LWLE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwle", GPR32Opnd>;
+class LWRE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwre", GPR32Opnd>;
+
+class STORE_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+}
+
+class SWLE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swle", GPR32Opnd>;
+class SWRE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swre", GPR32Opnd>;
+
+// Load-linked EVA, Store-conditional EVA descriptions
+class LLE_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayLoad = 1;
+ string DecoderMethod = "DecodeMemEVA";
+}
+
+class LLE_DESC : LLE_DESC_BASE<"lle", GPR32Opnd>;
+
+class SCE_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$dst);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayStore = 1;
+ string Constraints = "$rt = $dst";
+ string DecoderMethod = "DecodeMemEVA";
+}
+
+class SCE_DESC : SCE_DESC_BASE<"sce", GPR32Opnd>;
+
+class TLB_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ string AsmString = instr_asm;
+ list<dag> Pattern = [];
+}
+
+class TLBINV_DESC : TLB_DESC_BASE<"tlbinv">;
+class TLBINVF_DESC : TLB_DESC_BASE<"tlbinvf">;
+
+class CACHEE_DESC_BASE<string instr_asm, Operand MemOpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
+ string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
+}
+
+class CACHEE_DESC : CACHEE_DESC_BASE<"cachee", mem>;
+class PREFE_DESC : CACHEE_DESC_BASE<"prefe", mem>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+/// Load and Store EVA Instructions
+def LBE : LBE_ENC, LBE_DESC, INSN_EVA;
+def LBuE : LBuE_ENC, LBuE_DESC, INSN_EVA;
+def LHE : LHE_ENC, LHE_DESC, INSN_EVA;
+def LHuE : LHuE_ENC, LHuE_DESC, INSN_EVA;
+let AdditionalPredicates = [NotInMicroMips] in {
+def LWE : LWE_ENC, LWE_DESC, INSN_EVA;
+}
+def SBE : SBE_ENC, SBE_DESC, INSN_EVA;
+def SHE : SHE_ENC, SHE_DESC, INSN_EVA;
+let AdditionalPredicates = [NotInMicroMips] in {
+def SWE : SWE_ENC, SWE_DESC, INSN_EVA;
+}
+
+/// load/store left/right EVA
+let AdditionalPredicates = [NotInMicroMips] in {
+def LWLE : LWLE_ENC, LWLE_DESC, INSN_EVA_NOT_32R6_64R6;
+def LWRE : LWRE_ENC, LWRE_DESC, INSN_EVA_NOT_32R6_64R6;
+def SWLE : SWLE_ENC, SWLE_DESC, INSN_EVA_NOT_32R6_64R6;
+def SWRE : SWRE_ENC, SWRE_DESC, INSN_EVA_NOT_32R6_64R6;
+}
+
+/// Load-linked EVA, Store-conditional EVA
+let AdditionalPredicates = [NotInMicroMips] in {
+def LLE : LLE_ENC, LLE_DESC, INSN_EVA;
+def SCE : SCE_ENC, SCE_DESC, INSN_EVA;
+}
+
+def TLBINV : TLBINV_ENC, TLBINV_DESC, INSN_EVA;
+def TLBINVF : TLBINVF_ENC, TLBINVF_DESC, INSN_EVA;
+
+def CACHEE : CACHEE_ENC, CACHEE_DESC, INSN_EVA;
+def PREFE : PREFE_ENC, PREFE_DESC, INSN_EVA;
diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
index 5152a07..e9eaf81 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -192,10 +192,10 @@ public:
TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) {
MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
Context = &funcInfo.Fn->getContext();
+ bool ISASupported = !Subtarget->hasMips32r6() && Subtarget->hasMips32();
TargetSupported =
- ((TM.getRelocationModel() == Reloc::PIC_) &&
- ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
- (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32())));
+ ISASupported && (TM.getRelocationModel() == Reloc::PIC_) &&
+ (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32());
UnsupportedFPMode = Subtarget->isFP64bit();
}
@@ -236,32 +236,36 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
std::swap(LHS, RHS);
unsigned Opc;
- if (ISDOpc == ISD::AND) {
+ switch (ISDOpc) {
+ case ISD::AND:
Opc = Mips::AND;
- } else if (ISDOpc == ISD::OR) {
+ break;
+ case ISD::OR:
Opc = Mips::OR;
- } else if (ISDOpc == ISD::XOR) {
+ break;
+ case ISD::XOR:
Opc = Mips::XOR;
- } else
+ break;
+ default:
llvm_unreachable("unexpected opcode");
+ }
unsigned LHSReg = getRegForValue(LHS);
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
- if (!ResultReg)
- return 0;
-
- unsigned RHSReg;
if (!LHSReg)
return 0;
+ unsigned RHSReg;
if (const auto *C = dyn_cast<ConstantInt>(RHS))
RHSReg = materializeInt(C, MVT::i32);
else
RHSReg = getRegForValue(RHS);
-
if (!RHSReg)
return 0;
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!ResultReg)
+ return 0;
+
emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg);
return ResultReg;
}
@@ -747,7 +751,7 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Offset = Addr.getOffset();
MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addFrameIndex(FI)
@@ -798,7 +802,7 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Offset = Addr.getOffset();
MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg)
@@ -912,8 +916,7 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
.addReg(CondReg)
.addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
return false;
@@ -1057,22 +1060,16 @@ bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) {
// entirely within FPRs.
unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
unsigned TempReg = createResultReg(&Mips::FGR32RegClass);
- unsigned Opc;
-
- if (SrcVT == MVT::f32)
- Opc = Mips::TRUNC_W_S;
- else
- Opc = Mips::TRUNC_W_D32;
+ unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32;
// Generate the convert.
emitInst(Opc, TempReg).addReg(SrcReg);
-
emitInst(Mips::MFC1, DestReg).addReg(TempReg);
updateValueMap(I, DestReg);
return true;
}
-//
+
bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
SmallVectorImpl<MVT> &OutVTs,
unsigned &NumBytes) {
@@ -1196,7 +1193,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(Addr.getOffset()),
+ MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
(void)(MMO);
// if (!emitStore(ArgVT, ArgReg, Addr, MMO))
@@ -1607,19 +1604,23 @@ bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg) {
+ int64_t Imm;
+
switch (SrcVT.SimpleTy) {
default:
return false;
case MVT::i1:
- emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(1);
+ Imm = 1;
break;
case MVT::i8:
- emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xff);
+ Imm = 0xff;
break;
case MVT::i16:
- emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xffff);
+ Imm = 0xffff;
break;
}
+
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(Imm);
return true;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index fab2fdf..6756c17 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -117,6 +117,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::GPRel: return "MipsISD::GPRel";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::ERet: return "MipsISD::ERet";
case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
case MipsISD::FPCmp: return "MipsISD::FPCmp";
@@ -390,10 +391,10 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ if (!Subtarget.isGP64bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ }
setInsertFencesForAtomic(true);
@@ -437,9 +438,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP);
- setExceptionPointerRegister(ABI.IsN64() ? Mips::A0_64 : Mips::A0);
- setExceptionSelectorRegister(ABI.IsN64() ? Mips::A1_64 : Mips::A1);
-
MaxStoresPerMemcpy = 16;
isMicroMips = Subtarget.inMicroMipsMode();
@@ -836,6 +834,14 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return SDValue();
}
+bool MipsTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget.hasMips32();
+}
+
+bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget.hasMips32();
+}
+
void
MipsTargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -1092,8 +1098,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
@@ -1204,8 +1209,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loopMBB);
MF->insert(It, sinkMBB);
MF->insert(It, exitMBB);
@@ -1330,15 +1334,20 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
- if (Size == 4) {
- LL = isMicroMips ? Mips::LL_MM : Mips::LL;
- SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+ if (Size == 4) {
+ if (isMicroMips) {
+ LL = Mips::LL_MM;
+ SC = Mips::SC_MM;
+ } else {
+ LL = Subtarget.hasMips32r6() ? Mips::LL_R6 : Mips::LL;
+ SC = Subtarget.hasMips32r6() ? Mips::SC_R6 : Mips::SC;
+ }
ZERO = Mips::ZERO;
BNE = Mips::BNE;
BEQ = Mips::BEQ;
} else {
- LL = Mips::LLD;
- SC = Mips::SCD;
+ LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
+ SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
ZERO = Mips::ZERO_64;
BNE = Mips::BNE64;
BEQ = Mips::BEQ64;
@@ -1356,8 +1365,7 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
@@ -1440,8 +1448,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, sinkMBB);
@@ -1586,9 +1593,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
- MachinePointerInfo::getJumpTable(), MemVT, false, false,
- false, 0);
+ Addr =
+ DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ MemVT, false, false, false, 0);
Chain = Addr.getValue(1);
if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || ABI.IsN64()) {
@@ -1690,14 +1698,15 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
if (LargeGOT)
- return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,
- MipsII::MO_GOT_LO16, DAG.getEntryNode(),
- MachinePointerInfo::getGOT());
+ return getAddrGlobalLargeGOT(
+ N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16,
+ DAG.getEntryNode(),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
- return getAddrGlobal(N, SDLoc(N), Ty, DAG,
- (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP
- : MipsII::MO_GOT16,
- DAG.getEntryNode(), MachinePointerInfo::getGOT());
+ return getAddrGlobal(
+ N, SDLoc(N), Ty, DAG,
+ (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16,
+ DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction()));
}
SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
@@ -1719,6 +1728,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
// Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -1813,7 +1825,8 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
- if (TLOF->IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
+ if (TLOF->IsConstantInSmallSection(DAG.getDataLayout(), N->getConstVal(),
+ getTargetMachine()))
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
@@ -2946,8 +2959,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
- Function::const_arg_iterator FuncArg =
- DAG.getMachineFunction().getFunction()->arg_begin();
+ const Function *Func = DAG.getMachineFunction().getFunction();
+ Function::const_arg_iterator FuncArg = Func->arg_begin();
+
+ if (Func->hasFnAttribute("interrupt") && !Func->arg_empty())
+ report_fatal_error(
+ "Functions with the interrupt attribute cannot have arguments!");
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
@@ -3019,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// We ought to be able to use LocVT directly but O32 sets it to i32
// when allocating floating point values to integer registers.
// This shouldn't influence how we load the value into registers unless
- // we are targetting softfloat.
+ // we are targeting softfloat.
if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat())
LocVT = VA.getValVT();
}
@@ -3033,9 +3050,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ SDValue ArgValue = DAG.getLoad(
+ LocVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
OutChains.push_back(ArgValue.getValue(1));
ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
@@ -3098,8 +3116,20 @@ MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
}
SDValue
-MipsTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool IsVarArg,
+MipsTargetLowering::LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setISR();
+
+ return DAG.getNode(MipsISD::ERet, DL, MVT::Other, RetOps);
+}
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc DL, SelectionDAG &DAG) const {
@@ -3192,7 +3222,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- // Return on Mips is always a "jr $ra"
+ // ISRs must use "eret".
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt"))
+ return LowerInterruptReturn(RetOps, DL, DAG);
+
+ // Standard return on Mips is a "jr $ra"
return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps);
}
@@ -3300,7 +3334,7 @@ static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
// Search for the first numeric character.
StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
- I = std::find_if(B, E, std::ptr_fun(isdigit));
+ I = std::find_if(B, E, isdigit);
Prefix = StringRef(B, I - B);
@@ -3669,7 +3703,7 @@ void MipsTargetLowering::passByValArg(
unsigned NumRegs = LastReg - FirstReg;
if (NumRegs) {
- const ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
+ ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
unsigned I = 0;
@@ -3755,7 +3789,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
SDValue Chain, SDLoc DL,
SelectionDAG &DAG,
CCState &State) const {
- const ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
+ ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
unsigned Idx = State.getFirstUnallocated(ArgRegs);
unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
@@ -3812,7 +3846,7 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
if (State->getCallingConv() != CallingConv::Fast) {
unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
- const ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
+ ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
// FIXME: The O32 case actually describes no shadow registers.
const MCPhysReg *ShadowRegs =
ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs;
@@ -3860,8 +3894,7 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index b3d861d..b33e125 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -67,6 +67,10 @@ namespace llvm {
// Return
Ret,
+ // Interrupt, exception, error trap Return
+ ERet,
+
+ // Software Exception Return.
EH_RETURN,
// Node used to extract integer from accumulator.
@@ -231,6 +235,9 @@ namespace llvm {
return MVT::i32;
}
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -258,17 +265,25 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
- struct LTStr {
- bool operator()(const char *S1, const char *S2) const {
- return strcmp(S1, S2) < 0;
- }
- };
-
void HandleByVal(CCState *, unsigned &, unsigned) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return ABI.IsN64() ? Mips::A0_64 : Mips::A0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return ABI.IsN64() ? Mips::A1_64 : Mips::A1;
+ }
+
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Mips doesn't have any special address spaces so we just reserve
@@ -290,9 +305,10 @@ namespace llvm {
unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(N, Ty, DAG, GOTFlag));
- SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
- MachinePointerInfo::getGOT(), false, false,
- false, 0);
+ SDValue Load =
+ DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
unsigned LoFlag = IsN32OrN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty,
getTargetNode(N, Ty, DAG, LoFlag));
@@ -487,6 +503,9 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, SDLoc DL,
+ SelectionDAG &DAG) const;
+
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
// Inline asm support
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index cb91225..377260f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -136,7 +136,7 @@ multiclass ABSS_M<string opstr, InstrItinClass Itin,
multiclass ROUND_M<string opstr, InstrItinClass Itin> {
def _D32 : MMRel, ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>, FGR_32;
- def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
+ def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
let DecoderNamespace = "Mips64";
}
}
@@ -267,24 +267,25 @@ defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
-def ROUND_W_S : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+def ROUND_W_S : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
ABSS_FM<0xc, 16>, ISA_MIPS2;
-def TRUNC_W_S : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
+defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
+def TRUNC_W_S : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
ABSS_FM<0xd, 16>, ISA_MIPS2;
-def CEIL_W_S : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+def CEIL_W_S : MMRel, StdMMR6Rel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
ABSS_FM<0xe, 16>, ISA_MIPS2;
-def FLOOR_W_S : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
+def FLOOR_W_S : MMRel, StdMMR6Rel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
ABSS_FM<0xf, 16>, ISA_MIPS2;
def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x24, 16>;
-defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2;
defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2;
defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2;
defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>;
let DecoderNamespace = "Mips64" in {
+ let AdditionalPredicates = [NotInMicroMips] in {
def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>,
ABSS_FM<0x8, 16>, FGR_64;
def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>,
@@ -301,14 +302,17 @@ let DecoderNamespace = "Mips64" in {
ABSS_FM<0xb, 16>, FGR_64;
def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>,
ABSS_FM<0xb, 17>, FGR_64;
+ }
}
def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x20, 20>;
-def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
-def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+let AdditionalPredicates = [NotInMicroMips] in{
+ def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
+ ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
+ def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+}
def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
ABSS_FM<0x20, 17>, FGR_32;
@@ -320,8 +324,10 @@ def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>,
let DecoderNamespace = "Mips64" in {
def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>,
ABSS_FM<0x20, 17>, FGR_64;
- def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x20, 21>, FGR_64;
+ let AdditionalPredicates = [NotInMicroMips] in{
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x20, 21>, FGR_64;
+ }
def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x21, 20>, FGR_64;
def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>,
@@ -345,8 +351,8 @@ def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>,
defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>;
defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>;
-def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>,
- ABSS_FM<0x4, 16>, ISA_MIPS2;
+def FSQRT_S : MMRel, StdMMR6Rel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd,
+ II_SQRT_S, fsqrt>, ABSS_FM<0x4, 16>, ISA_MIPS2;
defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2;
// The odd-numbered registers are only referenced when doing loads,
@@ -503,13 +509,13 @@ let AdditionalPredicates = [NoNaNsFPMath],
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
-def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
+def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>,
BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, IIBranch, MIPS_BRANCH_F, 0>,
+def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, II_BC1FL, MIPS_BRANCH_F, 0>,
BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6;
-def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
+def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>,
BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>,
+def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6;
/// Floating Point Compare
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index 5f4fcc3..45baf27 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -132,7 +132,7 @@ class PseudoSE<dag outs, dag ins, list<dag> pattern,
// These are aliases that require C++ handling to convert to the target
// instruction, while InstAliases can be handled directly by tblgen.
class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>:
- MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> {
+ MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo>, PredicateControl {
let isPseudo = 1;
let Pattern = [];
}
@@ -644,16 +644,16 @@ class BRK_FM<bits<6> funct> : StdArch
// Exception return format <Cop0|1|0|funct>
//===----------------------------------------------------------------------===//
-class ER_FM<bits<6> funct> : StdArch
+class ER_FM<bits<6> funct, bit LLBit> : StdArch
{
bits<32> Inst;
let Inst{31-26} = 0x10;
let Inst{25} = 1;
- let Inst{24-6} = 0;
+ let Inst{24-7} = 0;
+ let Inst{6} = LLBit;
let Inst{5-0} = funct;
}
-
//===----------------------------------------------------------------------===//
// Enable/disable interrupt instruction format <Cop0|MFMC0|rt|12|0|sc|0|0>
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index bb23cc0..b1d6950 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -60,8 +60,8 @@ MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag,
- MFI.getObjectSize(FI), Align);
+ return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+ Flag, MFI.getObjectSize(FI), Align);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index ab98c90..d9fb8c8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -77,6 +77,9 @@ def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def MipsERet : SDNode<"MipsISD::ERet", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>;
+
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
@@ -157,7 +160,7 @@ def HasMips3 : Predicate<"Subtarget->hasMips3()">,
def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">,
AssemblerPredicate<"FeatureMips4_32">;
def NotMips4_32 : Predicate<"!Subtarget->hasMips4_32()">,
- AssemblerPredicate<"FeatureMips4_32">;
+ AssemblerPredicate<"!FeatureMips4_32">;
def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">,
AssemblerPredicate<"FeatureMips4_32r2">;
def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">,
@@ -166,6 +169,8 @@ def HasMips32 : Predicate<"Subtarget->hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
+def HasMips32r5 : Predicate<"Subtarget->hasMips32r5()">,
+ AssemblerPredicate<"FeatureMips32r5">;
def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">,
AssemblerPredicate<"FeatureMips32r6">;
def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">,
@@ -176,6 +181,8 @@ def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">,
AssemblerPredicate<"!FeatureGP64Bit">;
def HasMips64 : Predicate<"Subtarget->hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
+def NotMips64 : Predicate<"!Subtarget->hasMips64()">,
+ AssemblerPredicate<"!FeatureMips64">;
def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">,
AssemblerPredicate<"FeatureMips64r2">;
def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">,
@@ -184,6 +191,8 @@ def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">,
AssemblerPredicate<"!FeatureMips64r6">;
def HasMicroMips32r6 : Predicate<"Subtarget->inMicroMips32r6Mode()">,
AssemblerPredicate<"FeatureMicroMips,FeatureMips32r6">;
+def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">,
+ AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">;
def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
@@ -201,6 +210,12 @@ def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">,
def IsLE : Predicate<"Subtarget->isLittle()">;
def IsBE : Predicate<"!Subtarget->isLittle()">;
def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
+def UseTCCInDIV : AssemblerPredicate<"FeatureUseTCCInDIV">;
+def HasEVA : Predicate<"Subtarget->hasEVA()">,
+ AssemblerPredicate<"FeatureEVA,FeatureMips32r2">;
+def HasMSA : Predicate<"Subtarget->hasMSA()">,
+ AssemblerPredicate<"FeatureMSA">;
+
//===----------------------------------------------------------------------===//
// Mips GPR size adjectives.
@@ -242,6 +257,7 @@ class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
class ISA_MIPS32R2_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6];
}
+class ISA_MIPS32R5 { list<Predicate> InsnPredicates = [HasMips32r5]; }
class ISA_MIPS64 { list<Predicate> InsnPredicates = [HasMips64]; }
class ISA_MIPS64_NOT_64R6 {
list<Predicate> InsnPredicates = [HasMips64, NotMips64r6];
@@ -249,9 +265,21 @@ class ISA_MIPS64_NOT_64R6 {
class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
+class ISA_MICROMIPS { list<Predicate> InsnPredicates = [InMicroMips]; }
class ISA_MICROMIPS32R6 {
list<Predicate> InsnPredicates = [HasMicroMips32r6];
}
+class ISA_MICROMIPS64R6 {
+ list<Predicate> InsnPredicates = [HasMicroMips64r6];
+}
+class ISA_MICROMIPS32_NOT_MIPS32R6 {
+ list<Predicate> InsnPredicates = [InMicroMips, NotMips32r6];
+}
+
+class INSN_EVA { list<Predicate> InsnPredicates = [HasEVA]; }
+class INSN_EVA_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [NotMips32r6, NotMips64r6, HasEVA];
+}
// The portions of MIPS-III that were also added to MIPS32
class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
@@ -283,6 +311,28 @@ class INSN_MIPS5_32R2_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6];
}
+class ASE_CNMIPS {
+ list<Predicate> InsnPredicates = [HasCnMips];
+}
+
+class ASE_MSA {
+ list<Predicate> InsnPredicates = [HasMSA];
+}
+
+class ASE_MSA_NOT_MSA64 {
+ list<Predicate> InsnPredicates = [HasMSA, NotMips64];
+}
+
+class ASE_MSA64 {
+ list<Predicate> InsnPredicates = [HasMSA, HasMips64];
+}
+
+// Class used for separating microMIPSr6 and microMIPS (r3) instruction.
+// It can be used only on instructions that doesn't inherit PredicateControl.
+class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl {
+ let InsnPredicates = [InMicroMips, NotMips32r6, NotMips64r6];
+}
+
//===----------------------------------------------------------------------===//
class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl {
@@ -335,6 +385,81 @@ include "MipsInstrFormats.td"
// Mips Operand, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
+class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+ : AsmOperandClass {
+ let Name = "ConstantSImm" # Bits;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isConstantSImm<" # Bits # ">";
+ let SuperClasses = Supers;
+ let DiagnosticType = "SImm" # Bits;
+}
+
+class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
+ int Offset = 0> : AsmOperandClass {
+ let Name = "ConstantUImm" # Bits # "_" # Offset;
+ let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">";
+ let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">";
+ let SuperClasses = Supers;
+ let DiagnosticType = "UImm" # Bits # "_" # Offset;
+}
+
+def ConstantUImm10AsmOperandClass
+ : ConstantUImmAsmOperandClass<10, []>;
+def ConstantUImm8AsmOperandClass
+ : ConstantUImmAsmOperandClass<8, [ConstantUImm10AsmOperandClass]>;
+def ConstantUImm7AsmOperandClass
+ : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass]>;
+def ConstantUImm6AsmOperandClass
+ : ConstantUImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>;
+def ConstantSImm6AsmOperandClass
+ : ConstantSImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>;
+def ConstantUImm5Plus1AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 1>;
+def ConstantUImm5Plus32AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>;
+def ConstantUImm5Plus33AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 33>;
+def ConstantUImm5Plus32NormalizeAsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32> {
+ let Name = "ConstantUImm5_32_Norm";
+ // We must also subtract 32 when we render the operand.
+ let RenderMethod = "addConstantUImmOperands<5, 32, -32>";
+}
+def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass {
+ let Name = "UImm5Lsl2";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isScaledUImm<5, 2>";
+ let SuperClasses = [ConstantUImm6AsmOperandClass];
+ let DiagnosticType = "UImm5_Lsl2";
+}
+def ConstantUImm5ReportUImm6AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]> {
+ let Name = "ConstantUImm5_0_Report_UImm6";
+ let DiagnosticType = "UImm5_0_Report_UImm6";
+}
+def ConstantUImm5AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]>;
+def ConstantUImm4AsmOperandClass
+ : ConstantUImmAsmOperandClass<
+ 4, [ConstantUImm5AsmOperandClass,
+ ConstantUImm5Plus32AsmOperandClass,
+ ConstantUImm5Plus32NormalizeAsmOperandClass]>;
+def ConstantUImm3AsmOperandClass
+ : ConstantUImmAsmOperandClass<3, [ConstantUImm4AsmOperandClass]>;
+def ConstantUImm2Plus1AsmOperandClass
+ : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>;
+def ConstantUImm2AsmOperandClass
+ : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>;
+def ConstantUImm1AsmOperandClass
+ : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>;
+def ConstantImmzAsmOperandClass : AsmOperandClass {
+ let Name = "ConstantImmz";
+ let RenderMethod = "addConstantUImmOperands<1>";
+ let PredicateMethod = "isConstantImmz";
+ let SuperClasses = [ConstantUImm1AsmOperandClass];
+ let DiagnosticType = "Immz";
+}
+
def MipsJumpTargetAsmOperand : AsmOperandClass {
let Name = "JumpTarget";
let ParserMethod = "parseJumpTarget";
@@ -360,6 +485,10 @@ def calltarget : Operand<iPTR> {
def imm64: Operand<i64>;
+def simm6 : Operand<i32> {
+ let ParserMatchClass = ConstantSImm6AsmOperandClass;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
def simm9 : Operand<i32>;
def simm10 : Operand<i32>;
def simm11 : Operand<i32>;
@@ -380,23 +509,12 @@ def simm18_lsl3 : Operand<i32> {
let ParserMatchClass = MipsJumpTargetAsmOperand;
}
-def simm20 : Operand<i32> {
-}
+def simm20 : Operand<i32>;
+def simm32 : Operand<i32>;
def uimm20 : Operand<i32> {
}
-def MipsUImm10AsmOperand : AsmOperandClass {
- let Name = "UImm10";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseImm";
- let PredicateMethod = "isUImm<10>";
-}
-
-def uimm10 : Operand<i32> {
- let ParserMatchClass = MipsUImm10AsmOperand;
-}
-
def simm16_64 : Operand<i64> {
let DecoderMethod = "DecodeSimm16";
}
@@ -404,23 +522,71 @@ def simm16_64 : Operand<i64> {
// Zero
def uimmz : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantImmzAsmOperandClass;
+}
+
+// Unsigned Operands
+foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10} in
+ def uimm # I : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+ }
+
+def uimm2_plus1 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>";
+ let DecoderMethod = "DecodeUImmWithOffset<2, 1>";
+ let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass;
}
-// Unsigned Operand
-def uimm2 : Operand<i32> {
+def uimm5_plus1 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>";
+ let DecoderMethod = "DecodeUImmWithOffset<5, 1>";
+ let ParserMatchClass = ConstantUImm5Plus1AsmOperandClass;
}
-def uimm3 : Operand<i32> {
+def uimm5_plus32 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5Plus32AsmOperandClass;
}
-def uimm5 : Operand<i32> {
+def uimm5_plus33 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>";
+ let DecoderMethod = "DecodeUImmWithOffset<5, 1>";
+ let ParserMatchClass = ConstantUImm5Plus33AsmOperandClass;
}
-def uimm6 : Operand<i32> {
+def uimm5_plus32_normalize : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass;
+}
+
+def uimm5_lsl2 : Operand<OtherVT> {
+ let EncoderMethod = "getUImm5Lsl2Encoding";
+ let DecoderMethod = "DecodeUImm5lsl2";
+ let ParserMatchClass = ConstantUImm5Lsl2AsmOperandClass;
+}
+
+def uimm5_plus32_normalize_64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass;
+}
+
+foreach I = {5} in
+ def uimm # I # _64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+ }
+
+// Like uimm5_64 but reports a less confusing error for 32-63 when
+// an instruction alias permits that.
+def uimm5_64_report_uimm6 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5ReportUImm6AsmOperandClass;
}
def uimm16 : Operand<i32> {
@@ -435,6 +601,22 @@ def MipsMemAsmOperand : AsmOperandClass {
let ParserMethod = "parseMemOperand";
}
+def MipsMemSimm9AsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm9";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffset<9>";
+}
+
+def MipsMemSimm9GPRAsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm9GPR";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffsetGPR<9>";
+}
+
def MipsMemSimm11AsmOperand : AsmOperandClass {
let Name = "MemOffsetSimm11";
let SuperClasses = [MipsMemAsmOperand];
@@ -485,6 +667,13 @@ def mem_msa : mem_generic {
def mem_simm9 : mem_generic {
let MIOperandInfo = (ops ptr_rc, simm9);
let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm9AsmOperand;
+}
+
+def mem_simm9gpr : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm9);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm9GPRAsmOperand;
}
def mem_simm11 : mem_generic {
@@ -512,12 +701,6 @@ def PtrRC : Operand<iPTR> {
let ParserMatchClass = GPR32AsmOperand;
}
-// size operand of ext instruction
-def size_ext : Operand<i32> {
- let EncoderMethod = "getSizeExtEncoding";
- let DecoderMethod = "DecodeExtSize";
-}
-
// size operand of ins instruction
def size_ins : Operand<i32> {
let EncoderMethod = "getSizeInsEncoding";
@@ -657,7 +840,7 @@ class shift_rotate_reg<string opstr, RegisterOperand RO, InstrItinClass itin,
[(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))], itin, FrmR,
opstr>;
-// Load Upper Imediate
+// Load Upper Immediate
class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
[], II_LUI, FrmI, opstr>, IsAsCheapAsAMove {
@@ -675,14 +858,19 @@ class Load<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
let mayLoad = 1;
}
-class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+class StoreMemory<string opstr, DAGOperand RO, DAGOperand MO,
+ SDPatternOperator OpNode = null_frag,
InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
- InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
}
+class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ StoreMemory<opstr, RO, mem, OpNode, Itin, Addr>;
+
// Load/Store Left/Right
let canFoldAsLoad = 1 in
class LoadLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
@@ -740,7 +928,7 @@ class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
!strconcat(opstr, "\t$rs, $rt, $offset"),
- [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
+ [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], II_BCC,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
@@ -752,7 +940,7 @@ class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"),
- [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
+ [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], II_BCCZ,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
@@ -778,7 +966,7 @@ class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
SDPatternOperator targetoperator, string bopstr> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(operator targetoperator:$target)], IIBranch, FrmJ, bopstr> {
+ [(operator targetoperator:$target)], II_J, FrmJ, bopstr> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -788,7 +976,7 @@ class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
// Unconditional branch
class UncondBranch<Instruction BEQInst> :
- PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>,
+ PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], II_B>,
PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> {
let isBranch = 1;
let isTerminator = 1;
@@ -802,7 +990,7 @@ class UncondBranch<Instruction BEQInst> :
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
class JumpFR<string opstr, RegisterOperand RO,
SDPatternOperator operator = null_frag>:
- InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch,
+ InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], II_JR,
FrmR, opstr>;
// Indirect branch
@@ -815,23 +1003,23 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<string opstr, DAGOperand opnd> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(MipsJmpLink imm:$target)], IIBranch, FrmJ, opstr> {
+ [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTarget";
}
class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
Register RetReg, RegisterOperand ResRO = RO>:
- PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], IIBranch>,
+ PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
class JumpLinkReg<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], IIBranch, FrmR>;
+ [], II_JALR, FrmR, opstr>;
class BGEZAL_FT<string opstr, DAGOperand opnd,
RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr> {
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZAL, FrmI, opstr> {
let hasDelaySlot = DelaySlot;
}
@@ -840,17 +1028,17 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
hasExtraSrcRegAllocReq = 1, Defs = [AT] in {
class TailCall<Instruction JumpInst> :
- PseudoSE<(outs), (ins calltarget:$target), [], IIBranch>,
+ PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
PseudoInstExpansion<(JumpInst jmptarget:$target)>;
class TailCallReg<RegisterOperand RO, Instruction JRInst,
RegisterOperand ResRO = RO> :
- PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], IIBranch>,
+ PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
PseudoInstExpansion<(JRInst ResRO:$rs)>;
}
class BAL_BR_Pseudo<Instruction RealInst> :
- PseudoSE<(outs), (ins brtarget:$offset), [], IIBranch>,
+ PseudoSE<(outs), (ins brtarget:$offset), [], II_BCCZAL>,
PseudoInstExpansion<(RealInst ZERO, brtarget:$offset)> {
let isBranch = 1;
let isTerminator = 1;
@@ -997,9 +1185,10 @@ class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
[(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr>;
// Subword Swap
-class SubwordSwap<string opstr, RegisterOperand RO>:
- InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
- NoItinerary, FrmR, opstr> {
+class SubwordSwap<string opstr, RegisterOperand RO,
+ InstrItinClass itin = NoItinerary>:
+ InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], itin,
+ FrmR, opstr> {
let hasSideEffects = 0;
}
@@ -1010,8 +1199,8 @@ class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
// Ext and Ins
class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
- SDPatternOperator Op = null_frag>:
- InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
+ Operand SizeOpnd, SDPatternOperator Op = null_frag> :
+ InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
[(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT,
FrmR, opstr>, ISA_MIPS32R2;
@@ -1074,6 +1263,9 @@ class TrapBase<Instruction RealInst>
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
+let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, hasSideEffects=1 in
+def ERet : PseudoSE<(outs), (ins), [(MipsERet)]>;
+
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
[(callseq_start timm:$amt)]>;
@@ -1215,10 +1407,11 @@ def LH : Load<"lh", GPR32Opnd, sextloadi16, II_LH, addrDefault>, MMRel,
LW_FM<0x21>;
def LHu : Load<"lhu", GPR32Opnd, zextloadi16, II_LHU>, MMRel, LW_FM<0x25>;
let AdditionalPredicates = [NotInMicroMips] in {
-def LW : Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
+def LW : StdMMR6Rel, Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
LW_FM<0x23>;
}
-def SB : Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel, LW_FM<0x28>;
+def SB : StdMMR6Rel, Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel,
+ LW_FM<0x28>;
def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>;
let AdditionalPredicates = [NotInMicroMips] in {
def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>;
@@ -1259,15 +1452,17 @@ let DecoderNamespace = "COP3_" in {
}
}
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
-def SYNCI : MMRel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
+def SYNC : MMRel, StdMMR6Rel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
+def SYNCI : MMRel, StdMMR6Rel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
-def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
-def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
-def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
-def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
-def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
-def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
+ def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
+ def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
+ def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
+ def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
+ def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
+}
def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>,
ISA_MIPS2_NOT_32R6_64R6;
@@ -1290,14 +1485,15 @@ def TRAP : TrapBase<BREAK>;
def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
let AdditionalPredicates = [NotInMicroMips] in {
-def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
+ def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18, 0x0>, INSN_MIPS3_32;
+ def ERETNC : MMRel, ER_FT<"eretnc">, ER_FM<0x18, 0x1>, ISA_MIPS32R5;
+ def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f, 0x0>, ISA_MIPS32;
}
-def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
let AdditionalPredicates = [NotInMicroMips] in {
-def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+ def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+ def DI : MMRel, StdMMR6Rel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
}
-def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
AdditionalPredicates = [NotInMicroMips] in {
@@ -1359,7 +1555,8 @@ def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
class PseudoIndirectBranchBase<RegisterOperand RO> :
- MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> {
+ MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
+ II_IndirectBranchPseudo> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -1369,12 +1566,12 @@ class PseudoIndirectBranchBase<RegisterOperand RO> :
def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
-// Return instructions are matched as a RetRA instruction, then ar expanded
+// Return instructions are matched as a RetRA instruction, then are expanded
// into PseudoReturn/PseudoReturn64 after register allocation. Finally,
// MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the
// ISA.
class PseudoReturnBase<RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs),
- [], IIBranch> {
+ [], II_ReturnPseudo> {
let isTerminator = 1;
let isBarrier = 1;
let hasDelaySlot = 1;
@@ -1441,8 +1638,11 @@ def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>,
def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>,
ISA_MIPS32_NOT_32R6_64R6;
-/// Word Swap Bytes Within Halfwords
-def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2;
+let AdditionalPredicates = [NotInMicroMips] in {
+ /// Word Swap Bytes Within Halfwords
+ def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>, SEB_FM<2, 0x20>,
+ ISA_MIPS32R2;
+}
/// No operation.
def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
@@ -1485,10 +1685,12 @@ def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
-
+let AdditionalPredicates = [NotInMicroMips] in {
def RDHWR : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
-
-def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
+}
+// TODO: Add '0 < pos+size <= 32' constraint check to ext instruction
+def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, MipsExt>,
+ EXT_FM<0>;
def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
@@ -1499,9 +1701,9 @@ def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>;
class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
FrmOther, asmstr>;
-def SSNOP : MMRel, Barrier<"ssnop">, BARRIER_FM<1>;
+def SSNOP : MMRel, StdMMR6Rel, Barrier<"ssnop">, BARRIER_FM<1>;
def EHB : MMRel, Barrier<"ehb">, BARRIER_FM<3>;
-def PAUSE : MMRel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+def PAUSE : MMRel, StdMMR6Rel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
// JR_HB and JALR_HB are defined here using the new style naming
// scheme because some of this code is shared with Mips32r6InstrInfo.td
@@ -1562,11 +1764,60 @@ def CACHE : MMRel, CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
def PREF : MMRel, CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
INSN_MIPS3_32_NOT_32R6_64R6;
+def ROL : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "rol\t$rs, $rt, $rd">;
+def ROLImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "rol\t$rs, $rt, $imm">;
+def : MipsInstAlias<"rol $rd, $rs",
+ (ROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"rol $rd, $imm",
+ (ROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>;
+
+def ROR : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "ror\t$rs, $rt, $rd">;
+def RORImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "ror\t$rs, $rt, $imm">;
+def : MipsInstAlias<"ror $rd, $rs",
+ (ROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"ror $rd, $imm",
+ (RORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>;
+
+def DROL : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "drol\t$rs, $rt, $rd">, ISA_MIPS64;
+def DROLImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "drol\t$rs, $rt, $imm">, ISA_MIPS64;
+def : MipsInstAlias<"drol $rd, $rs",
+ (DROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+def : MipsInstAlias<"drol $rd, $imm",
+ (DROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+
+def DROR : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "dror\t$rs, $rt, $rd">, ISA_MIPS64;
+def DRORImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "dror\t$rs, $rt, $imm">, ISA_MIPS64;
+def : MipsInstAlias<"dror $rd, $rs",
+ (DROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+def : MipsInstAlias<"dror $rd, $imm",
+ (DRORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"move $dst, $src",
- (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>,
+ (OR GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
+ GPR_32 {
+ let AdditionalPredicates = [NotInMicroMips];
+}
+def : MipsInstAlias<"move $dst, $src",
+ (ADDu GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
GPR_32 {
let AdditionalPredicates = [NotInMicroMips];
}
@@ -1630,27 +1881,27 @@ def : MipsInstAlias<"beqz $rs,$offset",
def : MipsInstAlias<"beqzl $rs,$offset",
(BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
-
+
def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
let AdditionalPredicates = [NotInMicroMips] in {
-def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
-}
-def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
-
-def : MipsInstAlias<"teq $rs, $rt",
- (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tge $rs, $rt",
- (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tgeu $rs, $rt",
- (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tlt $rs, $rt",
- (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tltu $rs, $rt",
- (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tne $rs, $rt",
- (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-
+ def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+ def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
+}
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsInstAlias<"teq $rs, $rt",
+ (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tge $rs, $rt",
+ (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tgeu $rs, $rt",
+ (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tlt $rs, $rt",
+ (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tltu $rs, $rt",
+ (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tne $rs, $rt",
+ (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+}
def : MipsInstAlias<"sll $rd, $rt, $rs",
(SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"sub, $rd, $rs, $imm",
@@ -1678,7 +1929,7 @@ def : MipsInstAlias<"sync",
class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", simm32, GPR32Opnd>;
class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
RegisterOperand RO> :
@@ -1689,13 +1940,16 @@ def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>;
class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>;
+def LoadAddrImm32 : LoadAddressFromImm32<"la", simm32, GPR32Opnd>;
def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
"jal\t$rd, $rs"> ;
def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
"jal\t$rs"> ;
+def NORImm : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "nor\t$rs, $rt, $imm"> ;
+
let hasDelaySlot = 1 in {
def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
(ins imm64:$imm64, brtarget:$offset),
@@ -1718,12 +1972,62 @@ def BLTU : CondBranchPseudo<"bltu">;
def BLEU : CondBranchPseudo<"bleu">;
def BGEU : CondBranchPseudo<"bgeu">;
def BGTU : CondBranchPseudo<"bgtu">;
+def BLTL : CondBranchPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEL : CondBranchPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEL : CondBranchPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTL : CondBranchPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLTUL: CondBranchPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEUL: CondBranchPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEUL: CondBranchPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTUL: CondBranchPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
+
+class CondBranchImmPseudo<string instr_asm> :
+ MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $imm, $offset")>;
+
+def BLTImmMacro : CondBranchImmPseudo<"blt">;
+def BLEImmMacro : CondBranchImmPseudo<"ble">;
+def BGEImmMacro : CondBranchImmPseudo<"bge">;
+def BGTImmMacro : CondBranchImmPseudo<"bgt">;
+def BLTUImmMacro : CondBranchImmPseudo<"bltu">;
+def BLEUImmMacro : CondBranchImmPseudo<"bleu">;
+def BGEUImmMacro : CondBranchImmPseudo<"bgeu">;
+def BGTUImmMacro : CondBranchImmPseudo<"bgtu">;
+def BLTLImmMacro : CondBranchImmPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLELImmMacro : CondBranchImmPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGELImmMacro : CondBranchImmPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTLImmMacro : CondBranchImmPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLTULImmMacro : CondBranchImmPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEULImmMacro : CondBranchImmPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEULImmMacro : CondBranchImmPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTULImmMacro : CondBranchImmPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
+
+// FIXME: Predicates are removed because instructions are matched regardless of
+// predicates, because PredicateControl was not in the hierarchy. This was
+// done to emit more precise error message from expansion function.
+// Once the tablegen-erated errors are made better, this needs to be fixed and
+// predicates needs to be restored.
+
+def SDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "div\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def UDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "divu\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def DSDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "ddiv\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6;
+
+def DUDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "ddivu\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6;
+
+def Ulh : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+ "ulh\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
- "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+ "ulhu\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
- "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+ "ulw\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -1939,6 +2243,16 @@ let AddedComplexity = 40 in {
}
}
+// Atomic load patterns.
+def : MipsPat<(atomic_load_8 addr:$a), (LB addr:$a)>;
+def : MipsPat<(atomic_load_16 addr:$a), (LH addr:$a)>;
+def : MipsPat<(atomic_load_32 addr:$a), (LW addr:$a)>;
+
+// Atomic store patterns.
+def : MipsPat<(atomic_store_8 addr:$a, GPR32:$v), (SB GPR32:$v, addr:$a)>;
+def : MipsPat<(atomic_store_16 addr:$a, GPR32:$v), (SH GPR32:$v, addr:$a)>;
+def : MipsPat<(atomic_store_32 addr:$a, GPR32:$v), (SW GPR32:$v, addr:$a)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
@@ -1964,6 +2278,10 @@ include "MipsDSPInstrInfo.td"
include "MipsMSAInstrFormats.td"
include "MipsMSAInstrInfo.td"
+// EVA
+include "MipsEVAInstrFormats.td"
+include "MipsEVAInstrInfo.td"
+
// Micromips
include "MicroMipsInstrFormats.td"
include "MicroMipsInstrInfo.td"
@@ -1972,3 +2290,11 @@ include "MicroMipsInstrFPU.td"
// Micromips r6
include "MicroMips32r6InstrFormats.td"
include "MicroMips32r6InstrInfo.td"
+
+// Micromips64 r6
+include "MicroMips64r6InstrFormats.td"
+include "MicroMips64r6InstrInfo.td"
+
+// Micromips DSP
+include "MicroMipsDSPInstrFormats.td"
+include "MicroMipsDSPInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
index 90f8cc0..49fb99a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -148,7 +148,7 @@ void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
// Insert NewMBB and fix control flow.
MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
NewMBB->transferSuccessors(MBB);
- NewMBB->removeSuccessor(Tgt);
+ NewMBB->removeSuccessor(Tgt, true);
MBB->addSuccessor(NewMBB);
MBB->addSuccessor(Tgt);
MF->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
@@ -161,7 +161,7 @@ void MipsLongBranch::initMBBInfo() {
// Split the MBBs if they have two branches. Each basic block should have at
// most one branch after this loop is executed.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;)
- splitMBB(I++);
+ splitMBB(&*I++);
MF->RenumberBlocks();
MBBInfos.clear();
@@ -262,8 +262,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
static_cast<const MipsInstrInfo *>(Subtarget.getInstrInfo());
MF->insert(FallThroughMBB, LongBrMBB);
- MBB->removeSuccessor(TgtMBB);
- MBB->addSuccessor(LongBrMBB);
+ MBB->replaceSuccessor(TgtMBB, LongBrMBB);
if (IsPIC) {
MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
@@ -434,7 +433,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB));
} else
// Change branch destination and reverse condition.
- replaceBranch(*MBB, I.Br, DL, FallThroughMBB);
+ replaceBranch(*MBB, I.Br, DL, &*FallThroughMBB);
}
static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
index bff2d0f..7d25ea5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -7,18 +7,12 @@
//
//===----------------------------------------------------------------------===//
-def HasMSA : Predicate<"Subtarget->hasMSA()">,
- AssemblerPredicate<"FeatureMSA">;
-
-class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
- let Predicates = [HasMSA];
+class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+ PredicateControl, ASE_MSA {
+ let EncodingPredicates = [HasStdEnc];
let Inst{31-26} = 0b011110;
}
-class MSA64Inst : MSAInst {
- let Predicates = [HasMSA, HasMips64];
-}
-
class MSACBranch : MSAInst {
let Inst{31-26} = 0b010001;
}
@@ -27,10 +21,6 @@ class MSASpecial : MSAInst {
let Inst{31-26} = 0b000000;
}
-class MSA64Special : MSA64Inst {
- let Inst{31-26} = 0b000000;
-}
-
class MSAPseudo<dag outs, dag ins, list<dag> pattern,
InstrItinClass itin = IIPseudo>:
MipsPseudo<outs, ins, pattern, itin> {
@@ -100,7 +90,7 @@ class MSA_2R_FILL_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
let Inst{5-0} = minor;
}
-class MSA_2R_FILL_D_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSA64Inst {
+class MSA_2R_FILL_D_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
bits<5> rs;
bits<5> wd;
@@ -293,7 +283,7 @@ class MSA_ELM_COPY_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
let Inst{5-0} = minor;
}
-class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
bits<4> n;
bits<5> ws;
bits<5> rd;
@@ -345,7 +335,7 @@ class MSA_ELM_INSERT_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
let Inst{5-0} = minor;
}
-class MSA_ELM_INSERT_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+class MSA_ELM_INSERT_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
bits<6> n;
bits<5> rs;
bits<5> wd;
@@ -450,7 +440,7 @@ class SPECIAL_LSA_FMT<bits<6> minor>: MSASpecial {
let Inst{5-0} = minor;
}
-class SPECIAL_DLSA_FMT<bits<6> minor>: MSA64Special {
+class SPECIAL_DLSA_FMT<bits<6> minor>: MSASpecial {
bits<5> rs;
bits<5> rt;
bits<5> rd;
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 970e98e..eacfcec 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -63,30 +63,13 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT",
def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+def immZExt1Ptr : ImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
+def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
def immZExt6Ptr : ImmLeaf<iPTR, [{return isUInt<6>(Imm);}]>;
// Operands
-// The immediate of an LSA instruction needs special handling
-// as the encoded value should be subtracted by one.
-def uimm2LSAAsmOperand : AsmOperandClass {
- let Name = "LSAImm";
- let ParserMethod = "parseLSAImm";
- let RenderMethod = "addImmOperands";
-}
-
-def LSAImm : Operand<i32> {
- let PrintMethod = "printUnsignedImm";
- let EncoderMethod = "getLSAImmEncoding";
- let DecoderMethod = "DecodeLSAImm";
- let ParserMatchClass = uimm2LSAAsmOperand;
-}
-
-def uimm4 : Operand<i32> {
- let PrintMethod = "printUnsignedImm8";
-}
-
def uimm4_ptr : Operand<iPTR> {
let PrintMethod = "printUnsignedImm8";
}
@@ -95,10 +78,6 @@ def uimm6_ptr : Operand<iPTR> {
let PrintMethod = "printUnsignedImm8";
}
-def uimm8 : Operand<i32> {
- let PrintMethod = "printUnsignedImm8";
-}
-
def simm5 : Operand<i32>;
def vsplat_uimm1 : Operand<vAny> {
@@ -639,7 +618,6 @@ class COPY_S_D_ENC : MSA_ELM_COPY_D_FMT<0b0010, 0b011001>;
class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>;
class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>;
class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>;
-class COPY_U_D_ENC : MSA_ELM_COPY_D_FMT<0b0011, 0b011001>;
class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>;
@@ -1195,47 +1173,14 @@ class MSA_BIT_D_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass Itinerary = itin;
}
-// This class is deprecated and will be removed soon.
-class MSA_BIT_B_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
- dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm3:$m);
- string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))];
- InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_H_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
- dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm4:$m);
- string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))];
- InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_W_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
- dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm5:$m);
- string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))];
- InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_D_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
+class MSA_BIT_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ Operand ImmOp, ImmLeaf Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm6:$m);
+ dag InOperandList = (ins ROWS:$ws, ImmOp:$m);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))];
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
InstrItinClass Itinerary = itin;
}
@@ -1291,13 +1236,14 @@ class MSA_COPY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
}
class MSA_ELM_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWD, RegisterOperand ROWS,
+ Operand ImmOp, ImmLeaf Imm,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, uimm4:$n);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ImmOp:$n);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws,
- immZExt4:$n))];
+ Imm:$n))];
string Constraints = "$wd = $wd_in";
InstrItinClass Itinerary = itin;
}
@@ -1479,7 +1425,7 @@ class MSA_CBRANCH_DESC_BASE<string instr_asm, RegisterOperand ROWD> {
dag InOperandList = (ins ROWD:$wt, brtarget:$offset);
string AsmString = !strconcat(instr_asm, "\t$wt, $offset");
list<dag> Pattern = [];
- InstrItinClass Itinerary = IIBranch;
+ InstrItinClass Itinerary = NoItinerary;
bit isBranch = 1;
bit isTerminator = 1;
bit hasDelaySlot = 1;
@@ -1519,13 +1465,14 @@ class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
}
class MSA_INSVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ Operand ImmOp, ImmLeaf Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws, uimmz:$n2);
+ dag InOperandList = (ins ROWD:$wd_in, ImmOp:$n, ROWS:$ws, uimmz:$n2);
string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[$n2]");
list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
- immZExt6:$n,
+ Imm:$n,
ROWS:$ws,
immz:$n2))];
InstrItinClass Itinerary = itin;
@@ -1934,8 +1881,6 @@ class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16,
GPR32Opnd, MSA128HOpnd>;
class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32,
GPR32Opnd, MSA128WOpnd>;
-class COPY_U_D_DESC : MSA_COPY_DESC_BASE<"copy_u.d", vextract_zext_i64, v2i64,
- GPR64Opnd, MSA128DOpnd>;
class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v4f32, FGR32,
MSA128W>;
@@ -2346,13 +2291,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
class INSERT_FD_VIDX64_PSEUDO_DESC :
MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
-class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8,
+class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
MSA128BOpnd>;
-class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16,
+class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
MSA128HOpnd>;
-class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32,
+class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
MSA128WOpnd>;
-class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64,
+class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
MSA128DOpnd>;
class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -2381,7 +2326,7 @@ class LSA_DESC_BASE<string instr_asm, RegisterOperand RORD,
RegisterOperand RORS = RORD, RegisterOperand RORT = RORD,
InstrItinClass itin = NoItinerary > {
dag OutOperandList = (outs RORD:$rd);
- dag InOperandList = (ins RORS:$rs, RORT:$rt, LSAImm:$sa);
+ dag InOperandList = (ins RORS:$rs, RORT:$rt, uimm2_plus1:$sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $sa");
list<dag> Pattern = [(set RORD:$rd, (add RORT:$rt,
(shl RORS:$rs,
@@ -2561,23 +2506,23 @@ class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>;
class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
-class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b,
- MSA128BOpnd>;
-class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h,
- MSA128HOpnd>;
-class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w,
- MSA128WOpnd>;
-class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d,
- MSA128DOpnd>;
-
-class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b,
- MSA128BOpnd>;
-class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h,
- MSA128HOpnd>;
-class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w,
- MSA128WOpnd>;
-class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d,
- MSA128DOpnd>;
+class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
+ immZExt6, MSA128DOpnd>;
+
+class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
+ immZExt6, MSA128DOpnd>;
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
@@ -2589,13 +2534,17 @@ class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>;
class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
- MSA128BOpnd>;
+ MSA128BOpnd, MSA128BOpnd, uimm4,
+ immZExt4>;
class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
- MSA128HOpnd>;
+ MSA128HOpnd, MSA128HOpnd, uimm3,
+ immZExt3>;
class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
- MSA128WOpnd>;
+ MSA128WOpnd, MSA128WOpnd, uimm2,
+ immZExt2>;
class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
- MSA128DOpnd>;
+ MSA128DOpnd, MSA128DOpnd, uimm1,
+ immZExt1>;
class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
@@ -2648,14 +2597,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>;
class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
-class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b,
- MSA128BOpnd>;
-class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h,
- MSA128HOpnd>;
-class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w,
- MSA128WOpnd>;
-class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d,
- MSA128DOpnd>;
+class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
+ immZExt6, MSA128DOpnd>;
class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
@@ -2676,14 +2625,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>;
class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
-class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b,
- MSA128BOpnd>;
-class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h,
- MSA128HOpnd>;
-class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w,
- MSA128WOpnd>;
-class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d,
- MSA128DOpnd>;
+class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
+ immZExt6, MSA128DOpnd>;
class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
ValueType TyNode, RegisterOperand ROWD,
@@ -2991,12 +2940,11 @@ def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC;
def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC;
def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC;
def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC;
-def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC;
+def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC, ASE_MSA64;
def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC;
def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC;
-def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC;
-def COPY_U_D : COPY_U_D_ENC, COPY_U_D_DESC;
+def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC, ASE_MSA64;
def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
@@ -3108,7 +3056,7 @@ def FFQR_D : FFQR_D_ENC, FFQR_D_DESC;
def FILL_B : FILL_B_ENC, FILL_B_DESC;
def FILL_H : FILL_H_ENC, FILL_H_DESC;
def FILL_W : FILL_W_ENC, FILL_W_DESC;
-def FILL_D : FILL_D_ENC, FILL_D_DESC;
+def FILL_D : FILL_D_ENC, FILL_D_DESC, ASE_MSA64;
def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC;
def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC;
@@ -3238,7 +3186,7 @@ def ILVR_D : ILVR_D_ENC, ILVR_D_DESC;
def INSERT_B : INSERT_B_ENC, INSERT_B_DESC;
def INSERT_H : INSERT_H_ENC, INSERT_H_DESC;
def INSERT_W : INSERT_W_ENC, INSERT_W_DESC;
-def INSERT_D : INSERT_D_ENC, INSERT_D_DESC;
+def INSERT_D : INSERT_D_ENC, INSERT_D_DESC, ASE_MSA64;
// INSERT_FW_PSEUDO defined after INSVE_W
// INSERT_FD_PSEUDO defined after INSVE_D
@@ -3280,7 +3228,7 @@ def LDI_W : LDI_W_ENC, LDI_W_DESC;
def LDI_D : LDI_D_ENC, LDI_D_DESC;
def LSA : LSA_ENC, LSA_DESC;
-def DLSA : DLSA_ENC, DLSA_DESC;
+def DLSA : DLSA_ENC, DLSA_DESC, ASE_MSA64;
def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC;
def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC;
@@ -3787,6 +3735,28 @@ def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
MSA128B, NoItinerary>;
+// Vector extraction with fixed index.
+//
+// Extracting 32-bit values on MSA32 should always use COPY_S_W rather than
+// COPY_U_W, even for the zero-extended case. This is because our forward
+// compatibility strategy is to consider registers to be infinitely
+// sign-extended so that a MIPS64 can execute MIPS32 code without getting
+// different register values.
+def : MSAPat<(vextract_zext_i32 (v4i32 MSA128W:$ws), immZExt2Ptr:$idx),
+ (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64;
+def : MSAPat<(vextract_zext_i32 (v4f32 MSA128W:$ws), immZExt2Ptr:$idx),
+ (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64;
+
+// Extracting 64-bit values on MSA64 should always use COPY_S_D rather than
+// COPY_U_D, even for the zero-extended case. This is because our forward
+// compatibility strategy is to consider registers to be infinitely
+// sign-extended so that a hypothetical MIPS128 would be able to execute MIPS64
+// code without getting different register values.
+def : MSAPat<(vextract_zext_i64 (v2i64 MSA128D:$ws), immZExt1Ptr:$idx),
+ (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64;
+def : MSAPat<(vextract_zext_i64 (v2f64 MSA128D:$ws), immZExt1Ptr:$idx),
+ (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64;
+
// Vector extraction with variable index
def : MSAPat<(i32 (vextract_sext_i8 v16i8:$ws, i32:$idx)),
(SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws,
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 0d1ee04..c7d2738 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -24,42 +24,6 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
-// class MipsCallEntry.
-MipsCallEntry::MipsCallEntry(StringRef N) {
-#ifndef NDEBUG
- Name = N;
- Val = nullptr;
-#endif
-}
-
-MipsCallEntry::MipsCallEntry(const GlobalValue *V) {
-#ifndef NDEBUG
- Val = V;
-#endif
-}
-
-bool MipsCallEntry::isConstant(const MachineFrameInfo *) const {
- return false;
-}
-
-bool MipsCallEntry::isAliased(const MachineFrameInfo *) const {
- return false;
-}
-
-bool MipsCallEntry::mayAlias(const MachineFrameInfo *) const {
- return false;
-}
-
-void MipsCallEntry::printCustom(raw_ostream &O) const {
- O << "MipsCallEntry: ";
-#ifndef NDEBUG
- if (Val)
- O << Val->getName();
- else
- O << Name;
-#endif
-}
-
MipsFunctionInfo::~MipsFunctionInfo() {}
bool MipsFunctionInfo::globalBaseRegSet() const {
@@ -111,27 +75,32 @@ void MipsFunctionInfo::createEhDataRegsFI() {
}
}
+void MipsFunctionInfo::createISRRegFI() {
+ // ISRs require spill slots for Status & ErrorPC Coprocessor 0 registers.
+ // The current implementation only supports Mips32r2+ not Mips64rX. Status
+ // is always 32 bits, ErrorPC is 32 or 64 bits dependant on architecture,
+ // however Mips32r2+ is the supported architecture.
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+
+ for (int I = 0; I < 2; ++I)
+ ISRDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(
+ RC->getSize(), RC->getAlignment(), false);
+}
+
bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
|| FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
- std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
-
- if (!E)
- E = llvm::make_unique<MipsCallEntry>(Name);
-
- return MachinePointerInfo(E.get());
+bool MipsFunctionInfo::isISRRegFI(int FI) const {
+ return IsISR && (FI == ISRDataRegFI[0] || FI == ISRDataRegFI[1]);
+}
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const char *ES) {
+ return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES));
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
- std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
-
- if (!E)
- E = llvm::make_unique<MipsCallEntry>(Val);
-
- return MachinePointerInfo(E.get());
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) {
+ return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV));
}
int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
index 32436ef..a2f6ee0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -15,12 +15,10 @@
#define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
#include "Mips16HardFloatInfo.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -30,31 +28,13 @@
namespace llvm {
-/// \brief A class derived from PseudoSourceValue that represents a GOT entry
-/// resolved by lazy-binding.
-class MipsCallEntry : public PseudoSourceValue {
-public:
- explicit MipsCallEntry(StringRef N);
- explicit MipsCallEntry(const GlobalValue *V);
- bool isConstant(const MachineFrameInfo *) const override;
- bool isAliased(const MachineFrameInfo *) const override;
- bool mayAlias(const MachineFrameInfo *) const override;
-
-private:
- void printCustom(raw_ostream &O) const override;
-#ifndef NDEBUG
- std::string Name;
- const GlobalValue *Val;
-#endif
-};
-
/// MipsFunctionInfo - This class is derived from MachineFunction private
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
public:
MipsFunctionInfo(MachineFunction &MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
- VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false),
+ VarArgsFrameIndex(0), CallsEhReturn(false), IsISR(false), SaveS2(false),
MoveF64ViaSpillFI(-1) {}
~MipsFunctionInfo();
@@ -86,13 +66,21 @@ public:
int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
bool isEhDataRegFI(int FI) const;
- /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
- /// representing a GOT entry for an external function.
- MachinePointerInfo callPtrInfo(StringRef Name);
+ /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue
+ /// object representing a GOT entry for an external function.
+ MachinePointerInfo callPtrInfo(const char *ES);
+
+ // Functions with the "interrupt" attribute require special prologues,
+ // epilogues and additional spill slots.
+ bool isISR() const { return IsISR; }
+ void setISR() { IsISR = true; }
+ void createISRRegFI();
+ int getISRRegFI(unsigned Reg) const { return ISRDataRegFI[Reg]; }
+ bool isISRRegFI(int FI) const;
- /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+ /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object
/// representing a GOT entry for a global function.
- MachinePointerInfo callPtrInfo(const GlobalValue *Val);
+ MachinePointerInfo callPtrInfo(const GlobalValue *GV);
void setSaveS2() { SaveS2 = true; }
bool hasSaveS2() const { return SaveS2; }
@@ -136,17 +124,18 @@ private:
/// Frame objects for spilling eh data registers.
int EhDataRegFI[4];
+ /// ISR - Whether the function is an Interrupt Service Routine.
+ bool IsISR;
+
+ /// Frame objects for spilling C0_STATUS, C0_EPC
+ int ISRDataRegFI[2];
+
// saveS2
bool SaveS2;
/// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
/// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
int MoveF64ViaSpillFI;
-
- /// MipsCallEntry maps.
- StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
- ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
- GlobalCallEntries;
};
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index f6647e6..28e5a42 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -84,6 +84,16 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>();
+ const Function *F = MF->getFunction();
+ if (F->hasFnAttribute("interrupt")) {
+ if (Subtarget.hasMips64())
+ return Subtarget.hasMips64r6() ? CSR_Interrupt_64R6_SaveList
+ : CSR_Interrupt_64_SaveList;
+ else
+ return Subtarget.hasMips32r6() ? CSR_Interrupt_32R6_SaveList
+ : CSR_Interrupt_32_SaveList;
+ }
+
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
@@ -284,6 +294,16 @@ getFrameRegister(const MachineFunction &MF) const {
}
bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ // Avoid realigning functions that explicitly do not want to be realigned.
+ // Normally, we should report an error when a function should be dynamically
+ // realigned but also has the attribute no-realign-stack. Unfortunately,
+ // with this attribute, MachineFrameInfo clamps each new object's alignment
+ // to that of the stack's alignment as specified by the ABI. As a result,
+ // the information of whether we have objects with larger alignment
+ // requirement than the stack's alignment is already lost at this point.
+ if (!TargetRegisterInfo::canRealignStack(MF))
+ return false;
+
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64;
unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64;
@@ -306,42 +326,3 @@ bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// sized objects.
return MF.getRegInfo().canReserveReg(BP);
}
-
-bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- bool CanRealign = canRealignStack(MF);
-
- // Avoid realigning functions that explicitly do not want to be realigned.
- // Normally, we should report an error when a function should be dynamically
- // realigned but also has the attribute no-realign-stack. Unfortunately,
- // with this attribute, MachineFrameInfo clamps each new object's alignment
- // to that of the stack's alignment as specified by the ABI. As a result,
- // the information of whether we have objects with larger alignment
- // requirement than the stack's alignment is already lost at this point.
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
- return false;
-
- const Function *F = MF.getFunction();
- if (F->hasFnAttribute(Attribute::StackAlignment)) {
-#ifdef DEBUG
- if (!CanRealign)
- DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
- << F->getName() << "\n");
-#endif
- return CanRealign;
- }
-
- unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment();
- if (MFI->getMaxAlignment() > StackAlignment) {
-#ifdef DEBUG
- if (!CanRealign)
- DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
- << F->getName() << "\n");
-#endif
- return CanRealign;
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index ee1f6bc..5de68a2 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -61,9 +61,7 @@ public:
RegScavenger *RS = nullptr) const;
// Stack realignment queries.
- bool canRealignStack(const MachineFunction &MF) const;
-
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 096b3be..a4abd62 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -319,6 +320,15 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
bool FP64) const {
+ const MachineOperand &Op1 = I->getOperand(1);
+ const MachineOperand &Op2 = I->getOperand(2);
+
+ if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg);
+ return true;
+ }
+
// For fpxx and when mfhc1 is not available, use:
// spill + reload via ldc1
//
@@ -335,8 +345,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
(FP64 && !Subtarget.useOddSPReg())) {
unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = I->getOperand(1).getReg();
- unsigned N = I->getOperand(2).getImm();
+ unsigned SrcReg = Op1.getReg();
+ unsigned N = Op2.getImm();
int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));
// It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
@@ -352,8 +362,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
// We re-use the same spill slot each time so that the stack frame doesn't
// grow too much in functions with a large number of moves.
int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
- TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC,
- &RegInfo, 0);
+ TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0);
TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset);
return true;
}
@@ -376,12 +385,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
*static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ DebugLoc dl;
MipsABIInfo ABI = STI.getABI();
unsigned SP = ABI.GetStackPtr();
unsigned FP = ABI.GetFramePtr();
unsigned ZERO = ABI.GetNullPtr();
- unsigned ADDu = ABI.GetPtrAdduOp();
+ unsigned MOVE = ABI.GetGPRMoveOp();
unsigned ADDiu = ABI.GetPtrAddiuOp();
unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND;
@@ -407,6 +416,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+ if (MF.getFunction()->hasFnAttribute("interrupt"))
+ emitInterruptPrologueStub(MF, MBB);
+
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.size()) {
@@ -491,7 +503,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO)
+ BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO)
.setMIFlag(MachineInstr::FrameSetup);
// emit ".cfi_def_cfa_register $fp"
@@ -514,7 +526,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
if (hasBP(MF)) {
// move $s7, $sp
unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7;
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP)
+ BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP)
.addReg(SP)
.addReg(ZERO);
}
@@ -522,6 +534,135 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void MipsSEFrameLowering::emitInterruptPrologueStub(
+ MachineFunction &MF, MachineBasicBlock &MBB) const {
+
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Report an error the target doesn't support Mips32r2 or later.
+ // The epilogue relies on the use of the "ehb" to clear execution
+ // hazards. Pre R2 Mips relies on an implementation defined number
+ // of "ssnop"s to clear the execution hazard. Support for ssnop hazard
+ // clearing is not provided so reject that configuration.
+ if (!STI.hasMips32r2())
+ report_fatal_error(
+ "\"interrupt\" attribute is not supported on pre-MIPS32R2 or "
+ "MIPS16 targets.");
+
+ // The GP register contains the "user" value, so we cannot perform
+ // any gp relative loads until we restore the "kernel" or "system" gp
+ // value. Until support is written we shall only accept the static
+ // relocation model.
+ if ((STI.getRelocationModel() != Reloc::Static))
+ report_fatal_error("\"interrupt\" attribute is only supported for the "
+ "static relocation model on MIPS at the present time.");
+
+ if (!STI.isABI_O32() || STI.hasMips64())
+ report_fatal_error("\"interrupt\" attribute is only supported for the "
+ "O32 ABI on MIPS32R2+ at the present time.");
+
+ // Perform ISR handling like GCC
+ StringRef IntKind =
+ MF.getFunction()->getFnAttribute("interrupt").getValueAsString();
+ const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass;
+
+ // EIC interrupt handling needs to read the Cause register to disable
+ // interrupts.
+ if (IntKind == "eic") {
+ // Coprocessor registers are always live per se.
+ MBB.addLiveIn(Mips::COP013);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K0)
+ .addReg(Mips::COP013)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EXT), Mips::K0)
+ .addReg(Mips::K0)
+ .addImm(10)
+ .addImm(6)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Fetch and spill EPC
+ MBB.addLiveIn(Mips::COP014);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1)
+ .addReg(Mips::COP014)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false,
+ MipsFI->getISRRegFI(0), PtrRC,
+ STI.getRegisterInfo(), 0);
+
+ // Fetch and Spill Status
+ MBB.addLiveIn(Mips::COP012);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1)
+ .addReg(Mips::COP012)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false,
+ MipsFI->getISRRegFI(1), PtrRC,
+ STI.getRegisterInfo(), 0);
+
+ // Build the configuration for disabling lower priority interrupts. Non EIC
+ // interrupts need to be masked off with zero, EIC from the Cause register.
+ unsigned InsPosition = 8;
+ unsigned InsSize = 0;
+ unsigned SrcReg = Mips::ZERO;
+
+ // If the interrupt we're tied to is the EIC, switch the source for the
+ // masking off interrupts to the cause register.
+ if (IntKind == "eic") {
+ SrcReg = Mips::K0;
+ InsPosition = 10;
+ InsSize = 6;
+ } else
+ InsSize = StringSwitch<unsigned>(IntKind)
+ .Case("sw0", 1)
+ .Case("sw1", 2)
+ .Case("hw0", 3)
+ .Case("hw1", 4)
+ .Case("hw2", 5)
+ .Case("hw3", 6)
+ .Case("hw4", 7)
+ .Case("hw5", 8)
+ .Default(0);
+ assert(InsSize != 0 && "Unknown interrupt type!");
+
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+ .addReg(SrcReg)
+ .addImm(InsPosition)
+ .addImm(InsSize)
+ .addReg(Mips::K1)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Mask off KSU, ERL, EXL
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+ .addReg(Mips::ZERO)
+ .addImm(1)
+ .addImm(4)
+ .addReg(Mips::K1)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Disable the FPU as we are not spilling those register sets.
+ if (!STI.useSoftFloat())
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+ .addReg(Mips::ZERO)
+ .addImm(29)
+ .addImm(1)
+ .addReg(Mips::K1)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Set the new status
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012)
+ .addReg(Mips::K1)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+}
+
void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -533,12 +674,12 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
const MipsRegisterInfo &RegInfo =
*static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
- DebugLoc dl = MBBI->getDebugLoc();
+ DebugLoc DL = MBBI->getDebugLoc();
MipsABIInfo ABI = STI.getABI();
unsigned SP = ABI.GetStackPtr();
unsigned FP = ABI.GetFramePtr();
unsigned ZERO = ABI.GetNullPtr();
- unsigned ADDu = ABI.GetPtrAdduOp();
+ unsigned MOVE = ABI.GetGPRMoveOp();
// if framepointer enabled, restore the stack pointer.
if (hasFP(MF)) {
@@ -549,7 +690,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
--I;
// Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+ BuildMI(MBB, I, DL, TII.get(MOVE), SP).addReg(FP).addReg(ZERO);
}
if (MipsFI->callsEhReturn()) {
@@ -568,6 +709,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+ if (MF.getFunction()->hasFnAttribute("interrupt"))
+ emitInterruptEpilogueStub(MF, MBB);
+
// Get the number of bytes from FrameInfo
uint64_t StackSize = MFI->getStackSize();
@@ -578,13 +722,59 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
}
+void MipsSEFrameLowering::emitInterruptEpilogueStub(
+ MachineFunction &MF, MachineBasicBlock &MBB) const {
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Perform ISR handling like GCC
+ const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass;
+
+ // Disable Interrupts.
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::DI), Mips::ZERO);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EHB));
+
+ // Restore EPC
+ STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1,
+ MipsFI->getISRRegFI(0), PtrRC,
+ STI.getRegisterInfo());
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014)
+ .addReg(Mips::K1)
+ .addImm(0);
+
+ // Restore Status
+ STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1,
+ MipsFI->getISRRegFI(1), PtrRC,
+ STI.getRegisterInfo());
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012)
+ .addReg(Mips::K1)
+ .addImm(0);
+}
+
+int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsABIInfo ABI = STI.getABI();
+
+ if (MFI->isFixedObjectIndex(FI))
+ FrameReg = hasFP(MF) ? ABI.GetFramePtr() : ABI.GetStackPtr();
+ else
+ FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr();
+
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
bool MipsSEFrameLowering::
spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *EntryBlock = MF->begin();
+ MachineBasicBlock *EntryBlock = &MF->front();
const TargetInstrInfo &TII = *STI.getInstrInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
@@ -599,6 +789,26 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (!IsRAAndRetAddrIsTaken)
EntryBlock->addLiveIn(Reg);
+ // ISRs require HI/LO to be spilled into kernel registers to be then
+ // spilled to the stack frame.
+ bool IsLOHI = (Reg == Mips::LO0 || Reg == Mips::LO0_64 ||
+ Reg == Mips::HI0 || Reg == Mips::HI0_64);
+ const Function *Func = MBB.getParent()->getFunction();
+ if (IsLOHI && Func->hasFnAttribute("interrupt")) {
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned Op = 0;
+ if (!STI.getABI().ArePtrs64bit()) {
+ Op = (Reg == Mips::HI0) ? Mips::MFHI : Mips::MFLO;
+ Reg = Mips::K0;
+ } else {
+ Op = (Reg == Mips::HI0) ? Mips::MFHI64 : Mips::MFLO64;
+ Reg = Mips::K0_64;
+ }
+ BuildMI(MBB, MI, DL, TII.get(Op), Mips::K0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// Insert the spill to the stack frame.
bool IsKill = !IsRAAndRetAddrIsTaken;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -622,7 +832,8 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
}
/// Mark \p Reg and all registers aliasing it in the bitset.
-void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) {
+static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs,
+ unsigned Reg) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
SavedRegs.set(*AI);
@@ -648,6 +859,10 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MipsFI->callsEhReturn())
MipsFI->createEhDataRegsFI();
+ // Create spill slots for Coprocessor 0 registers if function is an ISR.
+ if (MipsFI->isISR())
+ MipsFI->createISRRegFI();
+
// Expand pseudo instructions which load, store or copy accumulators.
// Add an emergency spill slot if a pseudo was expanded.
if (ExpandPseudo(MF).expand()) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
index 9cb32e6..63cd3ce 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -27,6 +27,9 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -37,8 +40,13 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
unsigned ehDataReg(unsigned I) const;
-};
+private:
+ void emitInterruptEpilogueStub(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
+ void emitInterruptPrologueStub(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
+};
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 2ebfbd1..6f001ea 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -136,7 +136,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC;
const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index b319fd0..efe22fb 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1181,6 +1181,10 @@ bool MipsSETargetLowering::isEligibleForTailCallOptimization(
if (!EnableMipsTailCalls)
return false;
+ // Exception has to be cleared with eret.
+ if (FI.isISR())
+ return false;
+
// Return false if either the callee or caller has a byval argument.
if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
return false;
@@ -1786,9 +1790,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
case Intrinsic::mips_fadd_w:
- case Intrinsic::mips_fadd_d:
+ case Intrinsic::mips_fadd_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
// Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
case Intrinsic::mips_fceq_w:
case Intrinsic::mips_fceq_d:
@@ -1831,9 +1837,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2), ISD::SETUNE);
case Intrinsic::mips_fdiv_w:
- case Intrinsic::mips_fdiv_d:
+ case Intrinsic::mips_fdiv_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_ffint_u_w:
case Intrinsic::mips_ffint_u_d:
return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
@@ -1856,6 +1864,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::mips_fexp2_w:
case Intrinsic::mips_fexp2_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
EVT ResTy = Op->getValueType(0);
return DAG.getNode(
ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
@@ -1869,11 +1878,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
case Intrinsic::mips_fmul_w:
- case Intrinsic::mips_fmul_d:
+ case Intrinsic::mips_fmul_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_fmsub_w:
case Intrinsic::mips_fmsub_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
EVT ResTy = Op->getValueType(0);
return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
@@ -1886,9 +1898,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_fsqrt_d:
return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
case Intrinsic::mips_fsub_w:
- case Intrinsic::mips_fsub_d:
+ case Intrinsic::mips_fsub_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_ftrunc_u_w:
case Intrinsic::mips_ftrunc_u_d:
return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 786307b..e6f7fe9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -88,7 +88,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (isMicroMips)
Opc = Mips::MOVE16_MM;
else
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ Opc = Mips::OR, ZeroReg = Mips::ZERO;
} else if (Mips::CCRRegClass.contains(SrcReg))
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
@@ -141,7 +141,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::FMOV_D64;
else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::GPR64RegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
else if (Mips::HI64RegClass.contains(SrcReg))
Opc = Mips::MFHI64, SrcReg = 0;
else if (Mips::LO64RegClass.contains(SrcReg))
@@ -182,7 +182,6 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
int64_t Offset) const {
DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
unsigned Opc = 0;
@@ -213,6 +212,33 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::ST_W;
else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
Opc = Mips::ST_D;
+ else if (Mips::LO32RegClass.hasSubClassEq(RC))
+ Opc = Mips::SW;
+ else if (Mips::LO64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SD;
+ else if (Mips::HI32RegClass.hasSubClassEq(RC))
+ Opc = Mips::SW;
+ else if (Mips::HI64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SD;
+
+ // Hi, Lo are normally caller save but they are callee save
+ // for interrupt handling.
+ const Function *Func = MBB.getParent()->getFunction();
+ if (Func->hasFnAttribute("interrupt")) {
+ if (Mips::HI32RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0);
+ SrcReg = Mips::K0;
+ } else if (Mips::HI64RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFHI64), Mips::K0_64);
+ SrcReg = Mips::K0_64;
+ } else if (Mips::LO32RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFLO), Mips::K0);
+ SrcReg = Mips::K0;
+ } else if (Mips::LO64RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFLO64), Mips::K0_64);
+ SrcReg = Mips::K0_64;
+ }
+ }
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
@@ -228,6 +254,11 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
unsigned Opc = 0;
+ const Function *Func = MBB.getParent()->getFunction();
+ bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") &&
+ (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 ||
+ DestReg == Mips::HI0 || DestReg == Mips::HI0_64);
+
if (Mips::GPR32RegClass.hasSubClassEq(RC))
Opc = Mips::LW;
else if (Mips::GPR64RegClass.hasSubClassEq(RC))
@@ -254,10 +285,44 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::LD_W;
else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
Opc = Mips::LD_D;
+ else if (Mips::HI32RegClass.hasSubClassEq(RC))
+ Opc = Mips::LW;
+ else if (Mips::HI64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LD;
+ else if (Mips::LO32RegClass.hasSubClassEq(RC))
+ Opc = Mips::LW;
+ else if (Mips::LO64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LD;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
- .addMemOperand(MMO);
+
+ if (!ReqIndirectLoad)
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMemOperand(MMO);
+ else {
+ // Load HI/LO through K0. Notably the DestReg is encoded into the
+ // instruction itself.
+ unsigned Reg = Mips::K0;
+ unsigned LdOp = Mips::MTLO;
+ if (DestReg == Mips::HI0)
+ LdOp = Mips::MTHI;
+
+ if (Subtarget.getABI().ArePtrs64bit()) {
+ Reg = Mips::K0_64;
+ if (DestReg == Mips::HI0_64)
+ LdOp = Mips::MTHI64;
+ else
+ LdOp = Mips::MTLO64;
+ }
+
+ BuildMI(MBB, I, DL, get(Opc), Reg)
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMemOperand(MMO);
+ BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg);
+ }
}
bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
@@ -271,6 +336,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::RetRA:
expandRetRA(MBB, MI);
break;
+ case Mips::ERet:
+ expandERet(MBB, MI);
+ break;
case Mips::PseudoMFHI:
Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
expandPseudoMFHiLo(MBB, MI, Opc);
@@ -360,7 +428,7 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
MipsABIInfo ABI = Subtarget.getABI();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
unsigned ADDu = ABI.GetPtrAdduOp();
unsigned ADDiu = ABI.GetPtrAddiuOp();
@@ -438,6 +506,11 @@ void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
}
+void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET));
+}
+
std::pair<bool, bool>
MipsSEInstrInfo::compareOpndSize(unsigned Opc,
const MachineFunction &MF) const {
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index bebbabf..5d73545 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -82,6 +82,8 @@ private:
void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
+ void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
+
std::pair<bool, bool> compareOpndSize(unsigned Opc,
const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index 132c3a1..b1e2885 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -126,17 +126,19 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
}
bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
-
+ bool IsISRRegFI = MipsFI->isISRRegFI(FrameIndex);
// The following stack frame objects are always referenced relative to $sp:
// 1. Outgoing arguments.
// 2. Pointer to dynamically allocated stack space.
// 3. Locations for callee-saved registers.
// 4. Locations for eh data registers.
+ // 5. Locations for ISR saved Coprocessor 0 registers 12 & 14.
// Everything else is referenced relative to whatever register
// getFrameRegister() returns.
unsigned FrameReg;
- if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
+ if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI ||
+ IsISRRegFI)
FrameReg = ABI.GetStackPtr();
else if (RegInfo->needsStackRealignment(MF)) {
if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex))
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 54b5d28..37f9e49 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -16,8 +16,8 @@ def IMULDIV : FuncUnit;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for Mips
//===----------------------------------------------------------------------===//
-def IIAlu : InstrItinClass;
-def IIBranch : InstrItinClass;
+// IIM16Alu is a placeholder class for most MIPS16 instructions.
+def IIM16Alu : InstrItinClass;
def IIPseudo : InstrItinClass;
def II_ABS : InstrItinClass;
@@ -28,7 +28,19 @@ def II_ADD_D : InstrItinClass;
def II_ADD_S : InstrItinClass;
def II_AND : InstrItinClass;
def II_ANDI : InstrItinClass;
+def II_B : InstrItinClass;
def II_BADDU : InstrItinClass;
+def II_BBIT : InstrItinClass; // bbit[01], bbit[01]32
+def II_BC : InstrItinClass;
+def II_BC1F : InstrItinClass;
+def II_BC1FL : InstrItinClass;
+def II_BC1T : InstrItinClass;
+def II_BC1TL : InstrItinClass;
+def II_BCC : InstrItinClass; // beq and bne
+def II_BCCZ : InstrItinClass; // b[gl][et]z
+def II_BCCZAL : InstrItinClass; // bgezal and bltzal
+def II_BCCZALS : InstrItinClass; // bgezals and bltzals
+def II_BCCZC : InstrItinClass; // beqzc, bnezc
def II_CEIL : InstrItinClass;
def II_CFC1 : InstrItinClass;
def II_CLO : InstrItinClass;
@@ -68,21 +80,39 @@ def II_DSUB : InstrItinClass;
def II_EXT : InstrItinClass; // Any EXT instruction
def II_FLOOR : InstrItinClass;
def II_INS : InstrItinClass; // Any INS instruction
+def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo.
+def II_J : InstrItinClass;
+def II_JAL : InstrItinClass;
+def II_JALR : InstrItinClass;
+def II_JALRC : InstrItinClass;
+def II_JALRS : InstrItinClass;
+def II_JALS : InstrItinClass;
+def II_JR : InstrItinClass;
+def II_JRADDIUSP : InstrItinClass;
+def II_JRC : InstrItinClass;
+def II_ReturnPseudo : InstrItinClass; // Return pseudo.
def II_LB : InstrItinClass;
+def II_LBE : InstrItinClass;
def II_LBU : InstrItinClass;
+def II_LBUE : InstrItinClass;
def II_LD : InstrItinClass;
def II_LDC1 : InstrItinClass;
def II_LDL : InstrItinClass;
def II_LDR : InstrItinClass;
def II_LDXC1 : InstrItinClass;
def II_LH : InstrItinClass;
+def II_LHE : InstrItinClass;
def II_LHU : InstrItinClass;
+def II_LHUE : InstrItinClass;
def II_LUI : InstrItinClass;
def II_LUXC1 : InstrItinClass;
def II_LW : InstrItinClass;
+def II_LWE : InstrItinClass;
def II_LWC1 : InstrItinClass;
def II_LWL : InstrItinClass;
+def II_LWLE : InstrItinClass;
def II_LWR : InstrItinClass;
+def II_LWRE : InstrItinClass;
def II_LWU : InstrItinClass;
def II_LWXC1 : InstrItinClass;
def II_MADD : InstrItinClass;
@@ -134,6 +164,7 @@ def II_ROTRV : InstrItinClass;
def II_ROUND : InstrItinClass;
def II_SAVE : InstrItinClass;
def II_SB : InstrItinClass;
+def II_SBE : InstrItinClass;
def II_SD : InstrItinClass;
def II_SDC1 : InstrItinClass;
def II_SDL : InstrItinClass;
@@ -144,6 +175,7 @@ def II_SEH : InstrItinClass;
def II_SEQ_SNE : InstrItinClass; // seq and sne
def II_SEQI_SNEI : InstrItinClass; // seqi and snei
def II_SH : InstrItinClass;
+def II_SHE : InstrItinClass;
def II_SLL : InstrItinClass;
def II_SLLV : InstrItinClass;
def II_SLTI_SLTIU : InstrItinClass; // slti and sltiu
@@ -159,11 +191,15 @@ def II_SUB_D : InstrItinClass;
def II_SUB_S : InstrItinClass;
def II_SUXC1 : InstrItinClass;
def II_SW : InstrItinClass;
+def II_SWE : InstrItinClass;
def II_SWC1 : InstrItinClass;
def II_SWL : InstrItinClass;
+def II_SWLE : InstrItinClass;
def II_SWR : InstrItinClass;
+def II_SWRE : InstrItinClass;
def II_SWXC1 : InstrItinClass;
def II_TRUNC : InstrItinClass;
+def II_WSBH : InstrItinClass;
def II_XOR : InstrItinClass;
def II_XORI : InstrItinClass;
@@ -171,7 +207,7 @@ def II_XORI : InstrItinClass;
// Mips Generic instruction itineraries.
//===----------------------------------------------------------------------===//
def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
- InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIM16Alu , [InstrStage<1, [ALU]>]>,
InstrItinData<II_ADDI , [InstrStage<1, [ALU]>]>,
InstrItinData<II_ADDIU , [InstrStage<1, [ALU]>]>,
InstrItinData<II_ADDU , [InstrStage<1, [ALU]>]>,
@@ -240,7 +276,29 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_SAVE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SEQ_SNE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SEQI_SNEI , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_B , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BBIT , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1F , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1FL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1T , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1TL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZ , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZAL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZALS , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_IndirectBranchPseudo, [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_J , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JAL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALR , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALRC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALRS , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALS , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JR , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JRADDIUSP , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JRC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_ReturnPseudo , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DMUL , [InstrStage<17, [IMULDIV]>]>,
InstrItinData<II_DMULT , [InstrStage<17, [IMULDIV]>]>,
InstrItinData<II_DMULTU , [InstrStage<17, [IMULDIV]>]>,
@@ -313,3 +371,5 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_MFHC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTHC1 , [InstrStage<2, [ALU]>]>
]>;
+
+include "MipsScheduleP5600.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
new file mode 100644
index 0000000..d32ae4f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -0,0 +1,392 @@
+//==- MipsScheduleP5600.td - P5600 Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def MipsP5600Model : SchedMachineModel {
+ int IssueWidth = 2; // 2x dispatched per cycle
+ int MicroOpBufferSize = 48; // min(48, 48, 64)
+ int LoadLatency = 4;
+ int MispredictPenalty = 8; // TODO: Estimated
+
+ let CompleteModel = 1;
+}
+
+let SchedModel = MipsP5600Model in {
+
+// ALQ Pipelines
+// =============
+
+def P5600ALQ : ProcResource<1> { let BufferSize = 16; }
+def P5600IssueALU : ProcResource<1> { let Super = P5600ALQ; }
+
+// ALU Pipeline
+// ------------
+
+def P5600WriteALU : SchedWriteRes<[P5600IssueALU]>;
+
+// and, lui, nor, or, slti, sltiu, sub, subu, xor
+def : ItinRW<[P5600WriteALU],
+ [II_AND, II_LUI, II_NOR, II_OR, II_SLTI_SLTIU, II_SUBU, II_XOR]>;
+
+// AGQ Pipelines
+// =============
+
+def P5600AGQ : ProcResource<3> { let BufferSize = 16; }
+def P5600IssueAL2 : ProcResource<1> { let Super = P5600AGQ; }
+def P5600IssueCTISTD : ProcResource<1> { let Super = P5600AGQ; }
+def P5600IssueLDST : ProcResource<1> { let Super = P5600AGQ; }
+
+def P5600AL2Div : ProcResource<1>;
+// Pseudo-resource used to block CTISTD when handling multi-pipeline splits.
+def P5600CTISTD : ProcResource<1>;
+
+// CTISTD Pipeline
+// ---------------
+
+def P5600WriteJump : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]>;
+def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> {
+ let Latency = 2;
+}
+
+// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
+// jalr, jr.hb, jr
+def : ItinRW<[P5600WriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J, II_JR]>;
+def : ItinRW<[P5600WriteJumpAndLink], [II_JAL, II_JALR]>;
+
+// LDST Pipeline
+// -------------
+
+def P5600WriteLoad : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 4;
+}
+
+def P5600WriteLoadShifted : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
+ let Latency = 4;
+}
+
+def P5600WritePref : SchedWriteRes<[P5600IssueLDST]>;
+
+def P5600WriteStore : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
+ // FIXME: This is a bit pessimistic. P5600CTISTD is only used during cycle 2
+ // not during 0, 1, and 2.
+ let ResourceCycles = [ 1, 3 ];
+}
+
+def P5600WriteGPRFromBypass : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 2;
+}
+
+def P5600WriteStoreFromOtherUnits : SchedWriteRes<[P5600IssueLDST]>;
+def P5600WriteLoadToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 0;
+}
+
+// l[bhw], l[bh]u, ll
+def : ItinRW<[P5600WriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LWU]>;
+
+// lw[lr]
+def : ItinRW<[P5600WriteLoadShifted], [II_LWL, II_LWR]>;
+
+// s[bhw], sw[lr]
+def : ItinRW<[P5600WriteStore], [II_SB, II_SH, II_SW, II_SWL, II_SWR]>;
+
+// pref
+// (this instruction does not exist in the backend yet)
+def : ItinRW<[P5600WritePref], []>;
+
+// sc
+// (this instruction does not exist in the backend yet)
+def : ItinRW<[P5600WriteStore], []>;
+
+// LDST is also used in moves from general purpose registers to floating point
+// and MSA.
+def P5600WriteMoveGPRToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 0;
+}
+
+// AL2 Pipeline
+// ------------
+
+def P5600WriteAL2 : SchedWriteRes<[P5600IssueAL2]>;
+def P5600WriteAL2BitExt : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; }
+def P5600WriteAL2ShadowMov : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; }
+def P5600WriteAL2CondMov : SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
+ let Latency = 2;
+}
+def P5600WriteAL2Div : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
+ // Estimated worst case
+ let Latency = 34;
+ let ResourceCycles = [1, 34];
+}
+def P5600WriteAL2DivU : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
+ // Estimated worst case
+ let Latency = 34;
+ let ResourceCycles = [1, 34];
+}
+def P5600WriteAL2Mul : SchedWriteRes<[P5600IssueAL2]> { let Latency = 3; }
+def P5600WriteAL2Mult: SchedWriteRes<[P5600IssueAL2]> { let Latency = 5; }
+def P5600WriteAL2MAdd: SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
+ let Latency = 5;
+}
+
+// clo, clz, di, mfhi, mflo
+def : ItinRW<[P5600WriteAL2], [II_CLO, II_CLZ, II_MFHI_MFLO]>;
+
+// ehb, rdhwr, rdpgpr, wrpgpr, wsbh
+def : ItinRW<[P5600WriteAL2ShadowMov], [II_RDHWR]>;
+
+// mov[nz]
+def : ItinRW<[P5600WriteAL2CondMov], [II_MOVN, II_MOVZ]>;
+
+// divu?
+def : ItinRW<[P5600WriteAL2Div], [II_DIV]>;
+def : ItinRW<[P5600WriteAL2DivU], [II_DIVU]>;
+
+// mul
+def : ItinRW<[P5600WriteAL2Mul], [II_MUL]>;
+// multu?, multu?
+def : ItinRW<[P5600WriteAL2Mult], [II_MULT, II_MULTU]>;
+// maddu?, msubu?, mthi, mtlo
+def : ItinRW<[P5600WriteAL2MAdd],
+ [II_MADD, II_MADDU, II_MSUB, II_MSUBU, II_MTHI_MTLO]>;
+
+// ext, ins
+def : ItinRW<[P5600WriteAL2BitExt],
+ [II_EXT, II_INS]>;
+
+// Either ALU or AL2 Pipelines
+// ---------------------------
+//
+// Some instructions can choose between ALU and AL2, but once dispatched to
+// ALQ or AGQ respectively they are committed to that path.
+// The decision is based on the outcome of the most recent selection when the
+// choice was last available. For now, we assume ALU is always chosen.
+
+def P5600WriteEitherALU : SchedWriteVariant<
+ // FIXME: Implement selection predicate
+ [SchedVar<SchedPredicate<[{1}]>, [P5600WriteALU]>,
+ SchedVar<SchedPredicate<[{0}]>, [P5600WriteAL2]>
+ ]>;
+
+// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
+// xori
+def : ItinRW<[P5600WriteEitherALU],
+ [II_ADDI, II_ADDIU, II_ANDI, II_ORI, II_ROTR, II_SEB, II_SEH,
+ II_SLT_SLTU, II_SLL, II_SRA, II_SRL, II_XORI, II_ADDU, II_SLLV,
+ II_SRAV, II_SRLV]>;
+
+// FPU Pipelines
+// =============
+
+def P5600FPQ : ProcResource<3> { let BufferSize = 16; }
+def P5600IssueFPUS : ProcResource<1> { let Super = P5600FPQ; }
+def P5600IssueFPUL : ProcResource<1> { let Super = P5600FPQ; }
+def P5600IssueFPULoad : ProcResource<1> { let Super = P5600FPQ; }
+
+def P5600FPUDivSqrt : ProcResource<2>;
+
+def P5600WriteFPUS : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteFPUL : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 4; }
+def P5600WriteFPUL_MADDSUB : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 6; }
+def P5600WriteFPUDivS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 23 / 27
+ let Latency = 23; // Using common case
+ let ResourceCycles = [ 1, 23 ];
+}
+def P5600WriteFPUDivD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 31 / 35
+ let Latency = 31; // Using common case
+ let ResourceCycles = [ 1, 31 ];
+}
+def P5600WriteFPURcpS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 19 / 23
+ let Latency = 19; // Using common case
+ let ResourceCycles = [ 1, 19 ];
+}
+def P5600WriteFPURcpD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 31
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPURsqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 27
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPURsqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 31
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPUSqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 31
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPUSqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 35 / 39
+ let Latency = 35; // Using common case
+ let ResourceCycles = [ 1, 35 ];
+}
+def P5600WriteMSAShortLogic : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteMSAShortInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 2; }
+def P5600WriteMoveOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPUS]>;
+
+// FPUS is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def P5600WriteMoveFPUSToOtherUnits : SchedWriteRes<[P5600IssueFPUS]> {
+ let Latency = 0;
+}
+
+// FPUL is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def P5600WriteMoveFPULToOtherUnits : SchedWriteRes<[P5600IssueFPUL]>;
+
+// Short Pipe
+// ----------
+//
+// abs.[ds], abs.ps, bc1[tf]l?, mov[tf].[ds], mov[tf], mov.[ds], [cm][ft]c1,
+// m[ft]hc1, neg.[ds], neg.ps, nor.v, nori.b, or.v, ori.b, xor.v, xori.b,
+// sdxc1, sdc1, st.[bhwd], swc1, swxc1
+def : ItinRW<[P5600WriteFPUS], [II_ABS, II_MOVF_D, II_MOVF_S, II_MOVT_D,
+ II_MOVT_S, II_MOV_D, II_MOV_S, II_NEG]>;
+
+// adds_a.[bhwd], adds_[asu].[bhwd], addvi?.[bhwd], asub_[us].[bhwd],
+// aver?_[us].[bhwd]
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADD_A_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDS_[ASU]_[BHWD]$")>;
+// TODO: ADDVI_[BHW] might be 1 cycle latency rather than 2. Need to confirm it.
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
+
+// and.v, andi.b, move.v, ldi.[bhwd]
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+
+// Long Pipe
+// ----------
+//
+// add.[ds], add.ps, cvt.d.[sw], cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps,
+// cvt.ps.[sw], c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps, sub.[ds], sub.ps,
+// trunc.w.[ds], trunc.w.ps
+def : ItinRW<[P5600WriteFPUL],
+ [II_ADD_D, II_ADD_S, II_CVT, II_C_CC_D, II_C_CC_S, II_MUL_D,
+ II_MUL_S, II_SUB_D, II_SUB_S, II_TRUNC]>;
+
+// div.[ds], div.ps
+def : ItinRW<[P5600WriteFPUDivS], [II_DIV_S]>;
+def : ItinRW<[P5600WriteFPUDivD], [II_DIV_D]>;
+
+// sqrt.[ds], sqrt.ps
+def : ItinRW<[P5600WriteFPUSqrtS], [II_SQRT_S]>;
+def : ItinRW<[P5600WriteFPUSqrtD], [II_SQRT_D]>;
+
+// madd.[ds], msub.[ds], nmadd.[ds], nmsub.[ds],
+// Operand 0 is read on cycle 5. All other operands are read on operand 0.
+def : ItinRW<[SchedReadAdvance<5>, P5600WriteFPUL_MADDSUB],
+ [II_MADD_D, II_MADD_S, II_MSUB_D, II_MSUB_S, II_NMADD_D,
+ II_NMADD_S, II_NMSUB_D, II_NMSUB_S]>;
+
+// madd.ps, msub.ps, nmadd.ps, nmsub.ps
+// Operand 0 and 1 are read on cycle 5. All others are read on operand 0.
+// (none of these instructions exist in the backend yet)
+
+// Load Pipe
+// ---------
+//
+// This is typically used in conjunction with the load pipeline under the AGQ
+// All the instructions are in the 'Tricky Instructions' section.
+
+def P5600WriteLoadOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPULoad]> {
+ let Latency = 4;
+}
+
+// Tricky Instructions
+// ===================
+//
+// These instructions are split across multiple uops (in different pipelines)
+// that must cooperate to complete the operation
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteMoveGPRToFPU : WriteSequence<[P5600WriteMoveGPRToOtherUnits,
+ P5600WriteMoveOtherUnitsToFPU]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteMoveFPUToGPR : WriteSequence<[P5600WriteMoveFPUSToOtherUnits,
+ P5600WriteGPRFromBypass]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteStoreFPUS : WriteSequence<[P5600WriteMoveFPUSToOtherUnits,
+ P5600WriteStoreFromOtherUnits]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteStoreFPUL : WriteSequence<[P5600WriteMoveFPULToOtherUnits,
+ P5600WriteStoreFromOtherUnits]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteLoadFPU : WriteSequence<[P5600WriteLoadToOtherUnits,
+ P5600WriteLoadOtherUnitsToFPU]>;
+
+// ctc1, mtc1, mthc1
+def : ItinRW<[P5600WriteMoveGPRToFPU], [II_CTC1, II_MTC1, II_MTHC1]>;
+
+// bc1[ft], cfc1, mfc1, mfhc1, movf, movt
+def : ItinRW<[P5600WriteMoveFPUToGPR],
+ [II_BC1F, II_BC1T, II_CFC1, II_MFC1, II_MFHC1, II_MOVF, II_MOVT]>;
+
+// swc1, swxc1, st.[bhwd]
+def : ItinRW<[P5600WriteStoreFPUS], [II_SWC1, II_SWXC1]>;
+def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>;
+
+// movn.[ds], movz.[ds]
+def : ItinRW<[P5600WriteStoreFPUL], [II_MOVN_D, II_MOVN_S, II_MOVZ_D, II_MOVZ_S]>;
+
+// l[dw]x?c1, ld.[bhwd]
+def : ItinRW<[P5600WriteLoadFPU], [II_LDC1, II_LDXC1, II_LWC1, II_LWXC1]>;
+def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
+
+// Unsupported Instructions
+// ========================
+//
+// The following instruction classes are never valid on P5600.
+// II_DADDIU, II_DADDU, II_DMFC1, II_DMTC1, II_DMULT, II_DMULTU, II_DROTR,
+// II_DROTR32, II_DROTRV, II_DDIV, II_DSLL, II_DSLL32, II_DSLLV, II_DSRA,
+// II_DSRA32, II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV, II_DSUBU, II_DDIVU,
+// II_JALRC, II_LD, II_LD[LR], II_LUXC1, II_RESTORE, II_SAVE, II_SD, II_SDC1,
+// II_SDL, II_SDR, II_SDXC1
+//
+// The following instructions are never valid on P5600.
+// addq.ph, rdhwr, repl.ph, repl.qb, subq.ph, subu_s.qb
+//
+// Guesswork
+// =========
+//
+// This section is largely temporary guesswork.
+
+// ceil.[lw].[ds], floor.[lw].[ds]
+// Reason behind guess: trunc.[lw].ds and the various cvt's are in FPUL
+def : ItinRW<[P5600WriteFPUL], [II_CEIL, II_FLOOR, II_ROUND]>;
+
+// rotrv
+// Reason behind guess: rotr is in the same category and the two register forms
+// generally follow the immediate forms in this category
+def : ItinRW<[P5600WriteEitherALU], [II_ROTRV]>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 471b6e1..8a18b51 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -69,8 +69,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
- HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(),
+ HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
+ Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasEVA(false), TM(TM),
+ TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 1db8881..fbb01fe 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -42,9 +42,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
Mips3, Mips4, Mips5, Mips64, Mips64r2, Mips64r3, Mips64r5, Mips64r6
};
+ enum class CPU { P5600 };
+
// Mips architecture version
MipsArchEnum MipsArchVersion;
+ // Processor implementation (unused but required to exist by
+ // tablegen-erated code).
+ CPU ProcImpl;
+
// IsLittle - The target is Little Endian
bool IsLittle;
@@ -116,8 +122,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// InMicroMips -- can process MicroMips instructions
bool InMicroMipsMode;
- // HasDSP, HasDSPR2 -- supports DSP ASE.
- bool HasDSP, HasDSPR2;
+ // HasDSP, HasDSPR2, HasDSPR3 -- supports DSP ASE.
+ bool HasDSP, HasDSPR2, HasDSPR3;
// Allow mixed Mips16 and Mips32 in one source file
bool AllowMixed16_32;
@@ -130,6 +136,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasMSA -- supports MSA ASE.
bool HasMSA;
+ // UseTCCInDIV -- Enables the use of trapping in the assembler.
+ bool UseTCCInDIV;
+
+ // HasEVA -- supports EVA ASE.
+ bool HasEVA;
+
InstrItineraryData InstrItins;
// We can override the determination of whether we are in mips16 mode
@@ -189,7 +201,7 @@ public:
}
bool hasMips32r5() const {
return (MipsArchVersion >= Mips32r5 && MipsArchVersion < Mips32Max) ||
- hasMips64r2();
+ hasMips64r5();
}
bool hasMips32r6() const {
return (MipsArchVersion >= Mips32r6 && MipsArchVersion < Mips32Max) ||
@@ -228,9 +240,12 @@ public:
}
bool inMicroMipsMode() const { return InMicroMipsMode; }
bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
+ bool inMicroMips64r6Mode() const { return InMicroMipsMode && hasMips64r6(); }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
+ bool hasDSPR3() const { return HasDSPR3; }
bool hasMSA() const { return HasMSA; }
+ bool hasEVA() const { return HasEVA; }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 1c77745..3e63872 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -233,7 +233,7 @@ void MipsPassConfig::addPreRegAlloc() {
}
TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
if (Subtarget->allowMixed16_32()) {
DEBUG(errs() << "No Target Transform Info Pass Added\n");
// FIXME: This is no longer necessary as the TTI returned is per-function.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index 0f2db60..146f33b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -76,7 +76,7 @@ bool MipsTargetObjectFile::
IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
SectionKind Kind) const {
return (IsGlobalInSmallSectionImpl(GV, TM) &&
- (Kind.isDataRel() || Kind.isBSS() || Kind.isCommon()));
+ (Kind.isData() || Kind.isBSS() || Kind.isCommon()));
}
/// Return true if this global address should be placed into small data/bss
@@ -107,7 +107,8 @@ IsGlobalInSmallSectionImpl(const GlobalValue *GV,
return false;
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
}
MCSection *
@@ -120,7 +121,7 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
- if (Kind.isDataRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
// Otherwise, we work the same as ELF.
@@ -128,21 +129,20 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
}
/// Return true if this constant should be placed into small data section.
-bool MipsTargetObjectFile::
-IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
+bool MipsTargetObjectFile::IsConstantInSmallSection(
+ const DataLayout &DL, const Constant *CN, const TargetMachine &TM) const {
return (static_cast<const MipsTargetMachine &>(TM)
.getSubtargetImpl()
->useSmallSection() &&
- LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(
- CN->getType())));
+ LocalSData && IsInSmallSection(DL.getTypeAllocSize(CN->getType())));
}
-MCSection *
-MipsTargetObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
- if (IsConstantInSmallSection(C, *TM))
+/// Return true if this constant should be placed into small data section.
+MCSection *MipsTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
+ if (IsConstantInSmallSection(DL, C, *TM))
return SmallDataSection;
// Otherwise, we work the same as ELF.
- return TargetLoweringObjectFileELF::getSectionForConstant(Kind, C);
+ return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C);
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index 725f2ff..ba04343 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -36,10 +36,10 @@ class MipsTargetMachine;
const TargetMachine &TM) const override;
/// Return true if this constant should be placed into small data section.
- bool IsConstantInSmallSection(const Constant *CN,
+ bool IsConstantInSmallSection(const DataLayout &DL, const Constant *CN,
const TargetMachine &TM) const;
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 6ce1be7..b3222f5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -12,6 +12,7 @@
#include "MCTargetDesc/MipsABIFlagsSection.h"
#include "MCTargetDesc/MipsABIInfo.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -77,8 +78,12 @@ public:
// PIC support
virtual void emitDirectiveCpLoad(unsigned RegNo);
+ virtual void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+ int Offset);
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg);
+ virtual void emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister);
// FP abiflags directives
virtual void emitDirectiveModuleFP();
@@ -97,18 +102,18 @@ public:
// structure values.
template <class PredicateLibrary>
void updateABIInfo(const PredicateLibrary &P) {
- ABI = &P.getABI();
+ ABI = P.getABI();
ABIFlagsSection.setAllFromPredicates(P);
}
MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
const MipsABIInfo &getABI() const {
- assert(ABI && "ABI hasn't been set!");
+ assert(ABI.hasValue() && "ABI hasn't been set!");
return *ABI;
}
protected:
- const MipsABIInfo *ABI;
+ llvm::Optional<MipsABIInfo> ABI;
MipsABIFlagsSection ABIFlagsSection;
bool GPRInfoSet;
@@ -188,8 +193,12 @@ public:
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
+ void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+ int Offset) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+ void emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) override;
// FP abiflags directives
void emitDirectiveModuleFP() override;
@@ -237,8 +246,12 @@ public:
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
+ void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+ int Offset) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+ void emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) override;
void emitMipsAbiFlags();
};
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 02c5a21..f0f223a 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -15,11 +15,9 @@
#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
-class MCOperand;
class MCSubtargetInfo;
class NVPTXInstPrinter : public MCInstPrinter {
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index b432e06..9ac3c88 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -22,6 +22,7 @@ class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
+
public:
explicit NVPTXMCAsmInfo(const Triple &TheTriple);
};
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index fe28214..e5fae85 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -41,24 +41,6 @@ enum CondCodes {
};
}
-inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
- switch (CC) {
- case NVPTXCC::NE:
- return "ne";
- case NVPTXCC::EQ:
- return "eq";
- case NVPTXCC::LT:
- return "lt";
- case NVPTXCC::LE:
- return "le";
- case NVPTXCC::GT:
- return "gt";
- case NVPTXCC::GE:
- return "ge";
- }
- llvm_unreachable("Unknown condition code");
-}
-
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ecb0f0a..e8c3608 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -355,7 +355,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
if (isABI) {
if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
@@ -635,9 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
return false;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (GV->getName() == "llvm.used")
- return false;
- return true;
+ return GV->getName() != "llvm.used";
}
for (const User *U : C->users())
@@ -682,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
if (!gv->hasInternalLinkage())
return false;
- const PointerType *Pty = gv->getType();
+ PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
return false;
@@ -720,7 +718,7 @@ static bool useFuncSeen(const Constant *C,
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- const Function *F = FI;
+ const Function *F = &*FI;
if (F->isDeclaration()) {
if (F->use_empty())
@@ -870,9 +868,8 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+ for (const GlobalVariable &I : M.globals())
+ VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
"Missed a global variable");
@@ -1029,10 +1026,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
- const PointerType *PTy = GVar->getType();
+ PointerType *PTy = GVar->getType();
Type *ETy = PTy->getElementType();
if (GVar->hasExternalLinkage()) {
@@ -1159,7 +1156,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
if (GVar->getAlignment() == 0)
- O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
else
O << " .align " << GVar->getAlignment();
@@ -1185,9 +1182,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
printScalarConstant(Initializer, O);
}
} else {
- // The frontend adds zero-initializer to variables that don't have an
- // initial value, so skip warning for this case.
- if (!GVar->getInitializer()->isNullValue()) {
+ // The frontend adds zero-initializer to device and constant variables
+ // that don't have an initial value, and UndefValue to shared
+ // variables, so skip warning for this case.
+ if (!GVar->getInitializer()->isNullValue() &&
+ !isa<UndefValue>(GVar->getInitializer())) {
report_fatal_error("initial value of '" + GVar->getName() +
"' is not allowed in addrspace(" +
Twine(PTy->getAddressSpace()) + ")");
@@ -1205,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
- ElementSize = TD->getTypeStoreSize(ETy);
+ ElementSize = DL.getTypeStoreSize(ETy);
// Ptx allows variable initilization only for constant and
// global state spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
@@ -1299,7 +1298,7 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
}
std::string
-NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
switch (Ty->getTypeID()) {
default:
llvm_unreachable("unexpected type");
@@ -1339,16 +1338,16 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
- const PointerType *PTy = GVar->getType();
+ PointerType *PTy = GVar->getType();
Type *ETy = PTy->getElementType();
O << ".";
emitPTXAddressSpace(PTy->getAddressSpace(), O);
if (GVar->getAlignment() == 0)
- O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
else
O << " .align " << GVar->getAlignment();
@@ -1370,7 +1369,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
- ElementSize = TD->getTypeStoreSize(ETy);
+ ElementSize = DL.getTypeStoreSize(ETy);
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
O << "[";
@@ -1385,32 +1384,32 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
return;
}
-static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
if (Ty->isSingleValueType())
- return TD->getPrefTypeAlignment(Ty);
+ return DL.getPrefTypeAlignment(Ty);
- const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ auto *ATy = dyn_cast<ArrayType>(Ty);
if (ATy)
- return getOpenCLAlignment(TD, ATy->getElementType());
+ return getOpenCLAlignment(DL, ATy->getElementType());
- const StructType *STy = dyn_cast<StructType>(Ty);
+ auto *STy = dyn_cast<StructType>(Ty);
if (STy) {
unsigned int alignStruct = 1;
// Go through each element of the struct and find the
// largest alignment.
for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
Type *ETy = STy->getElementType(i);
- unsigned int align = getOpenCLAlignment(TD, ETy);
+ unsigned int align = getOpenCLAlignment(DL, ETy);
if (align > alignStruct)
alignStruct = align;
}
return alignStruct;
}
- const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+ auto *FTy = dyn_cast<FunctionType>(Ty);
if (FTy)
- return TD->getPointerPrefAlignment();
- return TD->getPrefTypeAlignment(Ty);
+ return DL.getPointerPrefAlignment();
+ return DL.getPrefTypeAlignment(Ty);
}
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
@@ -1419,13 +1418,8 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
O << "_param_" << paramIndex;
}
-void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
-}
-
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const AttributeSet &PAL = F->getAttributes();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
@@ -1433,7 +1427,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
- MVT thePointerTy = TLI->getPointerTy(*TD);
+ MVT thePointerTy = TLI->getPointerTy(DL);
O << "(\n";
@@ -1485,9 +1479,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// size = typeallocsize of element type
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
- align = TD->getABITypeAlignment(Ty);
+ align = DL.getABITypeAlignment(Ty);
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1495,7 +1489,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
continue;
}
// Just a scalar
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (isKernelFunc) {
if (PTy) {
// Special handling for pointer arguments to kernel
@@ -1519,7 +1513,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
- O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
+ O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
}
printParamName(I, paramIndex, O);
continue;
@@ -1556,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
// param has byVal attribute. So should be a pointer
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
@@ -1566,9 +1560,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// size = typeallocsize of element type
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
- align = TD->getABITypeAlignment(ETy);
+ align = DL.getABITypeAlignment(ETy);
- unsigned sz = TD->getTypeAllocSize(ETy);
+ unsigned sz = DL.getTypeAllocSize(ETy);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1579,7 +1573,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Further, if a part is vector, print the above for
// each vector element.
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
+ ComputeValueVTs(*TLI, DL, ETy, vtparts);
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
@@ -1786,10 +1780,10 @@ static void ConvertDoubleToBytes(unsigned char *p, double val) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- int s = TD->getTypeAllocSize(CPV->getType());
+ int s = DL.getTypeAllocSize(CPV->getType());
if (s < Bytes)
s = Bytes;
aggBuffer->addZeros(s);
@@ -1800,7 +1794,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
switch (CPV->getType()->getTypeID()) {
case Type::IntegerTyID: {
- const Type *ETy = CPV->getType();
+ Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
ConvertIntToBytes<>(ptr, c);
@@ -1817,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, *TD))) {
+ ConstantFoldConstantExpression(Cexpr, DL))) {
int int32 = (int)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1839,7 +1833,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, *TD))) {
+ ConstantFoldConstantExpression(Cexpr, DL))) {
long long int64 = (long long)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1860,7 +1854,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::FloatTyID:
case Type::DoubleTyID: {
const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
- const Type *Ty = CFP->getType();
+ Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
ConvertFloatToBytes(ptr, float32);
@@ -1881,7 +1875,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v, Cexpr);
}
- unsigned int s = TD->getTypeAllocSize(CPV->getType());
+ unsigned int s = DL.getTypeAllocSize(CPV->getType());
aggBuffer->addZeros(s);
break;
}
@@ -1891,7 +1885,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::StructTyID: {
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
- int ElementSize = TD->getTypeAllocSize(CPV->getType());
+ int ElementSize = DL.getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
if (Bytes > ElementSize)
aggBuffer->addZeros(Bytes - ElementSize);
@@ -1909,7 +1903,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
int Bytes;
// Old constants
@@ -1934,12 +1928,12 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
StructType *ST = cast<StructType>(CPV->getType());
for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
if (i == (e - 1))
- Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
- TD->getTypeAllocSize(ST) -
- TD->getStructLayout(ST)->getElementOffset(i);
+ Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
+ DL.getTypeAllocSize(ST) -
+ DL.getStructLayout(ST)->getElementOffset(i);
else
- Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
- TD->getStructLayout(ST)->getElementOffset(i);
+ Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
+ DL.getStructLayout(ST)->getElementOffset(i);
bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
}
}
@@ -1951,18 +1945,6 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
// buildTypeNameMap - Run through symbol table looking for type names.
//
-bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
-
- std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
-
- if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
- !PI->second.compare("struct._image2d_t") ||
- !PI->second.compare("struct._image3d_t")))
- return true;
-
- return false;
-}
-
bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -2054,7 +2036,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
if (C != CE)
return lowerConstantForGV(C, ProcessingGeneric);
@@ -2083,7 +2065,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
@@ -2109,7 +2091,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
case Instruction::IntToPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
@@ -2120,7 +2102,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index f6f7685..76bf179 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -212,28 +212,21 @@ private:
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}
-
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
- void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool = false);
- void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, raw_ostream &);
- void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
- void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
- bool isImageType(const Type *Ty);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -271,7 +264,7 @@ private:
// Build the map between type name and ID based on module's type
// symbol table.
- std::map<const Type *, std::string> TypeNameMap;
+ std::map<Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
@@ -282,19 +275,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
- std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+ std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
- void printOperandProper(const MachineOperand &MO);
-
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
-
- static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 69a229e..95813c8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -98,7 +98,7 @@ private:
/// This reordering exposes to optimizeMemoryInstruction more
/// optimization opportunities on loads and stores.
///
- /// If this function succesfully hoists an eliminable addrspacecast or V is
+ /// If this function successfully hoists an eliminable addrspacecast or V is
/// already such an addrspacecast, it returns the transformed value (which is
/// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
@@ -267,14 +267,14 @@ bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
return false;
bool Changed = false;
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
+ for (BasicBlock &B : F) {
+ for (Instruction &I : B) {
if (isa<LoadInst>(I)) {
// V = load P
- Changed |= optimizeMemoryInstruction(I, 0);
+ Changed |= optimizeMemoryInstruction(&I, 0);
} else if (isa<StoreInst>(I)) {
// store V, P
- Changed |= optimizeMemoryInstruction(I, 1);
+ Changed |= optimizeMemoryInstruction(&I, 1);
}
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 6fd09c4..62ca5e9 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -81,7 +81,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E;) {
- GlobalVariable *GV = I++;
+ GlobalVariable *GV = &*I++;
if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
!llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
@@ -117,7 +117,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
Value *Operand = II->getOperand(i);
if (isa<Constant>(Operand)) {
II->setOperand(
- i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+ i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder));
}
}
}
@@ -132,10 +132,8 @@ bool GenericToNVVM::runOnModule(Module &M) {
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
- for (Module::named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end();
- I != E; I++) {
- remapNamedMDNode(VM, I);
+ for (NamedMDNode &I : M.named_metadata()) {
+ remapNamedMDNode(VM, &I);
}
// Walk through the global variable initializers, and replace any use of
@@ -318,9 +316,8 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
NewOperands[0], NewOperands[1]);
case Instruction::FCmp:
// CompareConstantExpr (fcmp)
- assert(false && "Address space conversion should have no effect "
- "on float point CompareConstantExpr (fcmp)!");
- return C;
+ llvm_unreachable("Address space conversion should have no effect "
+ "on float point CompareConstantExpr (fcmp)!");
case Instruction::ExtractElement:
// ExtractElementConstantExpr
return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
@@ -364,8 +361,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
NewOperands[0], C->getType());
}
- assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
- return C;
+ llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr");
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 232a611..2d0098b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
@@ -530,7 +532,7 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
if (!Src)
return NVPTX::PTXLdStInstCode::GENERIC;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
@@ -544,6 +546,39 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
+static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
+ unsigned CodeAddrSpace, MachineFunction *F) {
+ // To use non-coherent caching, the load has to be from global
+ // memory and we have to prove that the memory area is not written
+ // to anywhere for the duration of the kernel call, not even after
+ // the load.
+ //
+ // To ensure that there are no writes to the memory, we require the
+ // underlying pointer to be a noalias (__restrict) kernel parameter
+ // that is never used for a write. We can only do this for kernel
+ // functions since from within a device function, we cannot know if
+ // there were or will be writes to the memory from the caller - or we
+ // could, but then we would have to do inter-procedural analysis.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
+ !isKernelFunction(*F->getFunction())) {
+ return false;
+ }
+
+ // We use GetUnderlyingObjects() here instead of
+ // GetUnderlyingObject() mainly because the former looks through phi
+ // nodes while the latter does not. We need to look through phi
+ // nodes to handle pointer induction variables.
+ SmallVector<Value *, 8> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+ Objs, F->getDataLayout());
+ for (Value *Obj : Objs) {
+ auto *A = dyn_cast<const Argument>(Obj);
+ if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
+ }
+
+ return true;
+}
+
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
@@ -638,6 +673,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD);
+ if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = LD->isVolatile();
@@ -872,6 +911,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
+ if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
@@ -1425,6 +1468,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1474,6 +1518,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1522,6 +1567,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1563,6 +1609,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1612,6 +1659,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1660,6 +1708,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1707,6 +1756,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1756,6 +1806,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1804,6 +1855,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1845,6 +1897,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1894,6 +1947,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1942,6 +1996,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -5039,7 +5094,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
}
if (!Src)
return false;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (auto *PT = dyn_cast<PointerType>(Src->getType()))
return (PT->getAddressSpace() == spN);
return false;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b75cf40..7663696 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -124,6 +124,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// condition branches.
setJumpIsExpensive(true);
+ // Wide divides are _very_ slow. Try to reduce the width of the divide if
+ // possible.
+ addBypassSlowDiv(64, 32);
+
// By default, use the Source scheduling
if (sched4reg)
setSchedulingPreference(Sched::RegPressure);
@@ -275,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SELECT);
// Now deduce the information based on the above mentioned
// actions
@@ -910,7 +915,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << "(";
if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
@@ -981,7 +986,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << "_";
continue;
}
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
@@ -1318,7 +1323,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// struct or vector
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
vtparts, &Offsets, 0);
@@ -2007,15 +2012,6 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
-SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
- int idx, EVT v) const {
- std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
- std::stringstream suffix;
- suffix << idx;
- *name += suffix.str();
- return DAG.getTargetExternalSymbol(name->c_str(), v);
-}
-
SDValue
NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
std::string ParamSym;
@@ -2029,10 +2025,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
}
-SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
- return getExtSymb(DAG, ".HLPPARAM", idx);
-}
-
// Check to see if the kernel argument is image*_t or sampler_t
bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
@@ -2040,8 +2032,8 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
"struct._image3d_t",
"struct._sampler_t" };
- const Type *Ty = arg->getType();
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ Type *Ty = arg->getType();
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return false;
@@ -2049,14 +2041,11 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
if (!context)
return false;
- const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ auto *STy = dyn_cast<StructType>(PTy->getElementType());
const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
- for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
- if (TypeName == specialTypes[i])
- return true;
-
- return false;
+ return std::find(std::begin(specialTypes), std::end(specialTypes),
+ TypeName) != std::end(specialTypes);
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -2082,10 +2071,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
std::vector<Type *> argTypes;
std::vector<const Argument *> theArgs;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
- theArgs.push_back(I);
- argTypes.push_back(I->getType());
+ for (const Argument &I : F->args()) {
+ theArgs.push_back(&I);
+ argTypes.push_back(I.getType());
}
// argTypes.size() (or theArgs.size()) and Ins.size() need not match.
// Ins.size() will be larger
@@ -2545,20 +2533,6 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-// NVPTX suuport vector of legal types of any length in Intrinsics because the
-// NVPTX specific type legalizer
-// will legalize them to the PTX supported length.
-bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
- if (isTypeLegal(VT))
- return true;
- if (VT.isVector()) {
- MVT eVT = VT.getVectorElementType();
- if (isTypeLegal(eVT))
- return true;
- }
- return false;
-}
-
static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
switch (Intrinsic) {
default:
@@ -3747,9 +3721,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// - [immAddr]
if (AM.BaseGV) {
- if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
- return false;
- return true;
+ return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
}
switch (AM.Scale) {
@@ -3820,11 +3792,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
- return 4;
-}
-
//===----------------------------------------------------------------------===//
// NVPTX DAG Combining
//===----------------------------------------------------------------------===//
@@ -4057,6 +4024,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Currently this detects patterns for integer min and max and
+ // lowers them to PTX-specific intrinsics that enable hardware
+ // support.
+
+ const SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC) return SDValue();
+
+ const SDValue LHS = Cond.getOperand(0);
+ const SDValue RHS = Cond.getOperand(1);
+ const SDValue True = N->getOperand(1);
+ const SDValue False = N->getOperand(2);
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ const EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
+
+ const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ SDValue Larger; // The larger of LHS and RHS when condition is true.
+ switch (CC) {
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Larger = RHS;
+ break;
+
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ Larger = LHS;
+ break;
+
+ default:
+ return SDValue();
+ }
+ const bool IsMax = (Larger == True);
+ const bool IsSigned = ISD::isSignedIntSetCC(CC);
+
+ unsigned IntrinsicId;
+ if (VT == MVT::i32) {
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
+ } else {
+ assert(VT == MVT::i64);
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
+ }
+
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
+}
+
enum OperandSignedness {
Signed = 0,
Unsigned,
@@ -4113,25 +4141,16 @@ static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
APInt Val = CI->getAPIntValue();
if (LHSSign == Unsigned) {
- if (Val.isIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isIntN(OptSize);
} else {
- if (Val.isSignedIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isSignedIntN(OptSize);
}
} else {
OperandSignedness RHSSign;
if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
return false;
- if (LHSSign != RHSSign)
- return false;
-
- return true;
+ return LHSSign == RHSSign;
}
}
@@ -4247,6 +4266,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformSHLCombine(N, DCI, OptLevel);
case ISD::AND:
return PerformANDCombine(N, DCI);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DCI);
}
return SDValue();
}
@@ -4509,25 +4530,25 @@ void NVPTXTargetLowering::ReplaceNodeResults(
void NVPTXSection::anchor() {}
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
- delete TextSection;
- delete DataSection;
- delete BSSSection;
- delete ReadOnlySection;
-
- delete StaticCtorSection;
- delete StaticDtorSection;
- delete LSDASection;
- delete EHFrameSection;
- delete DwarfAbbrevSection;
- delete DwarfInfoSection;
- delete DwarfLineSection;
- delete DwarfFrameSection;
- delete DwarfPubTypesSection;
- delete DwarfDebugInlineSection;
- delete DwarfStrSection;
- delete DwarfLocSection;
- delete DwarfARangesSection;
- delete DwarfRangesSection;
+ delete static_cast<NVPTXSection *>(TextSection);
+ delete static_cast<NVPTXSection *>(DataSection);
+ delete static_cast<NVPTXSection *>(BSSSection);
+ delete static_cast<NVPTXSection *>(ReadOnlySection);
+
+ delete static_cast<NVPTXSection *>(StaticCtorSection);
+ delete static_cast<NVPTXSection *>(StaticDtorSection);
+ delete static_cast<NVPTXSection *>(LSDASection);
+ delete static_cast<NVPTXSection *>(EHFrameSection);
+ delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
+ delete static_cast<NVPTXSection *>(DwarfInfoSection);
+ delete static_cast<NVPTXSection *>(DwarfLineSection);
+ delete static_cast<NVPTXSection *>(DwarfFrameSection);
+ delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
+ delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
+ delete static_cast<NVPTXSection *>(DwarfStrSection);
+ delete static_cast<NVPTXSection *>(DwarfLocSection);
+ delete static_cast<NVPTXSection *>(DwarfARangesSection);
+ delete static_cast<NVPTXSection *>(DwarfRangesSection);
}
MCSection *
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index e5c3732..60914c1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -441,13 +441,9 @@ public:
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
- SelectionDAG &DAG) const;
const char *getTargetNodeName(unsigned Opcode) const override;
- bool isTypeSupportedInIntrinsic(MVT VT) const;
-
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const override;
@@ -459,8 +455,13 @@ public:
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- unsigned getFunctionAlignment(const Function *F) const;
+ bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
+ // Truncating 64-bit to 32-bit is free in SASS.
+ if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
+ return false;
+ return SrcTy->getPrimitiveSizeInBits() == 64 &&
+ DstTy->getPrimitiveSizeInBits() == 32;
+ }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const override {
@@ -515,11 +516,7 @@ public:
private:
const NVPTXSubtarget &STI; // cache the subtarget here
-
- SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
- EVT = MVT::i32) const;
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
- SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 76d6597..9f3cf45 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -37,30 +37,31 @@ void NVPTXInstrInfo::copyPhysReg(
const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
- if (DestRC != SrcRC)
- report_fatal_error("Attempted to created cross-class register copy");
-
- if (DestRC == &NVPTX::Int32RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int1RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Float32RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int16RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int64RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Float64RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else {
+ if (DestRC->getSize() != SrcRC->getSize())
+ report_fatal_error("Copy one register into another with a different width");
+
+ unsigned Op;
+ if (DestRC == &NVPTX::Int1RegsRegClass) {
+ Op = NVPTX::IMOV1rr;
+ } else if (DestRC == &NVPTX::Int16RegsRegClass) {
+ Op = NVPTX::IMOV16rr;
+ } else if (DestRC == &NVPTX::Int32RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
+ : NVPTX::BITCONVERT_32_F2I);
+ } else if (DestRC == &NVPTX::Int64RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
+ : NVPTX::BITCONVERT_64_F2I);
+ } else if (DestRC == &NVPTX::Float32RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
+ : NVPTX::BITCONVERT_32_I2F);
+ } else if (DestRC == &NVPTX::Float64RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
+ : NVPTX::BITCONVERT_64_I2F);
+ } else {
llvm_unreachable("Bad register copy");
}
+ BuildMI(MBB, I, DL, get(Op), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -86,27 +87,6 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
return false;
}
-bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case NVPTX::INT_PTX_SREG_NTID_X:
- case NVPTX::INT_PTX_SREG_NTID_Y:
- case NVPTX::INT_PTX_SREG_NTID_Z:
- case NVPTX::INT_PTX_SREG_TID_X:
- case NVPTX::INT_PTX_SREG_TID_Y:
- case NVPTX::INT_PTX_SREG_TID_Z:
- case NVPTX::INT_PTX_SREG_CTAID_X:
- case NVPTX::INT_PTX_SREG_CTAID_Y:
- case NVPTX::INT_PTX_SREG_CTAID_Z:
- case NVPTX::INT_PTX_SREG_NCTAID_X:
- case NVPTX::INT_PTX_SREG_NCTAID_Y:
- case NVPTX::INT_PTX_SREG_NCTAID_Z:
- case NVPTX::INT_PTX_SREG_WARPSIZE:
- return true;
- }
-}
-
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isLoad = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 179c068..3e40722 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -56,7 +56,6 @@ public:
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
- bool isReadSpecialReg(MachineInstr &MI) const;
virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 0bf72fe..f770c2a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -6,6 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
+// \file
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
// the size is large or is not a compile-time constant.
//
@@ -18,19 +20,20 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "nvptx"
using namespace llvm;
namespace {
+
// actual analysis class, which is a functionpass
struct NVPTXLowerAggrCopies : public FunctionPass {
static char ID;
@@ -50,179 +53,299 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
return "Lower aggregate copies/intrinsics into loops";
}
};
-} // namespace
char NVPTXLowerAggrCopies::ID = 0;
-// Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(
- Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
- bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
- Type *indType = len->getType();
+// Lower memcpy to loop.
+void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+ bool DstIsVolatile, LLVMContext &Context,
+ Function &F) {
+ Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *origBB = splitAt->getParent();
- BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
- BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+ BasicBlock *NewBB =
+ ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
- origBB->getTerminator()->setSuccessor(0, loopBB);
- IRBuilder<> builder(origBB, origBB->getTerminator());
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
- // srcAddr and dstAddr are expected to be pointer types,
+ // SrcAddr and DstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
- srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
- dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
+ SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+ DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
- IRBuilder<> loop(loopBB);
- // The loop index (ind) is a phi node.
- PHINode *ind = loop.CreatePHI(indType, 0);
- // Incoming value for ind is 0
- ind->addIncoming(ConstantInt::get(indType, 0), origBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
- // load from srcAddr+ind
+ // load from SrcAddr+LoopIndex
// TODO: we can leverage the align parameter of llvm.memcpy for more efficient
// word-sized loads and stores.
- Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
- srcVolatile);
- // store at dstAddr+ind
- loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
- dstVolatile);
-
- // The value for ind coming from backedge is (ind + 1)
- Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
- ind->addIncoming(newind, loopBB);
-
- loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+ Value *Element =
+ LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+ LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+ SrcIsVolatile);
+ // store at DstAddr+LoopIndex
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+ DstAddr, LoopIndex),
+ DstIsVolatile);
+
+ // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
}
-// Lower MemSetInst to loop
-static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
- Value *len, Value *val, LLVMContext &Context,
- Function &F) {
- BasicBlock *origBB = splitAt->getParent();
- BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
- BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+ bool DstIsVolatile, LLVMContext &Context,
+ Function &F) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = ConvertedInst->getParent();
+ ExitBB->setName("memmove_done");
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateLoad(
+ LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *FwdElement = FwdLoopBuilder.CreateLoad(
+ FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ FwdLoopBuilder.CreateStore(
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
- origBB->getTerminator()->setSuccessor(0, loopBB);
- IRBuilder<> builder(origBB, origBB->getTerminator());
+// Lower memset to loop.
+void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr,
+ Value *CopyLen, Value *SetValue, LLVMContext &Context,
+ Function &F) {
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+ BasicBlock *NewBB =
+ ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
- unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
// Cast pointer to the type of value getting stored
- dstAddr =
- builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
- IRBuilder<> loop(loopBB);
- PHINode *ind = loop.CreatePHI(len->getType(), 0);
- ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
+ LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
- loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
+ LoopBuilder.CreateStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ false);
- Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
- ind->addIncoming(newind, loopBB);
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
- loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
}
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
- SmallVector<LoadInst *, 4> aggrLoads;
- SmallVector<MemTransferInst *, 4> aggrMemcpys;
- SmallVector<MemSetInst *, 4> aggrMemsets;
+ SmallVector<LoadInst *, 4> AggrLoads;
+ SmallVector<MemIntrinsic *, 4> MemCalls;
const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
- //
- // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
- //
+ // Collect all aggregate loads and mem* calls.
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
++II) {
- if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (!load->hasOneUse())
+ if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
+ if (!LI->hasOneUse())
continue;
- if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
continue;
- User *use = load->user_back();
- if (StoreInst *store = dyn_cast<StoreInst>(use)) {
- if (store->getOperand(0) != load)
+ if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
+ if (SI->getOperand(0) != LI)
continue;
- aggrLoads.push_back(load);
- }
- } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
- Value *len = intr->getLength();
- // If the number of elements being copied is greater
- // than MaxAggrCopySize, lower it to a loop
- if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
- if (len_int->getZExtValue() >= MaxAggrCopySize) {
- aggrMemcpys.push_back(intr);
- }
- } else {
- // turn variable length memcpy/memmov into loop
- aggrMemcpys.push_back(intr);
+ AggrLoads.push_back(LI);
}
- } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
- Value *len = memsetintr->getLength();
- if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
- if (len_int->getZExtValue() >= MaxAggrCopySize) {
- aggrMemsets.push_back(memsetintr);
+ } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
+ // Convert intrinsic calls with variable size or with constant size
+ // larger than the MaxAggrCopySize threshold.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
+ if (LenCI->getZExtValue() >= MaxAggrCopySize) {
+ MemCalls.push_back(IntrCall);
}
} else {
- // turn variable length memset into loop
- aggrMemsets.push_back(memsetintr);
+ MemCalls.push_back(IntrCall);
}
}
}
}
- if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
- (aggrMemsets.size() == 0))
+
+ if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
return false;
+ }
//
// Do the transformation of an aggr load/copy/set to a loop
//
- for (LoadInst *load : aggrLoads) {
- StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
- Value *srcAddr = load->getOperand(0);
- Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL.getTypeStoreSize(load->getType());
- Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
-
- convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
- store->isVolatile(), Context, F);
-
- store->eraseFromParent();
- load->eraseFromParent();
+ for (LoadInst *LI : AggrLoads) {
+ StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
+ Value *SrcAddr = LI->getOperand(0);
+ Value *DstAddr = SI->getOperand(1);
+ unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
+ Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
+
+ convertMemCpyToLoop(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
+
+ SI->eraseFromParent();
+ LI->eraseFromParent();
}
- for (MemTransferInst *cpy : aggrMemcpys) {
- convertTransferToLoop(/* splitAt */ cpy,
- /* srcAddr */ cpy->getSource(),
- /* dstAddr */ cpy->getDest(),
- /* len */ cpy->getLength(),
- /* srcVolatile */ cpy->isVolatile(),
- /* dstVolatile */ cpy->isVolatile(),
+ // Transform mem* intrinsic calls.
+ for (MemIntrinsic *MemCall : MemCalls) {
+ if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
+ convertMemCpyToLoop(/* ConvertedInst */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
/* Context */ Context,
/* Function F */ F);
- cpy->eraseFromParent();
- }
-
- for (MemSetInst *memsetinst : aggrMemsets) {
- Value *len = memsetinst->getLength();
- Value *val = memsetinst->getValue();
- convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
- F);
- memsetinst->eraseFromParent();
+ } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
+ convertMemMoveToLoop(/* ConvertedInst */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
+
+ } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
+ convertMemSetToLoop(/* ConvertedInst */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Context */ Context,
+ /* Function F */ F);
+ }
+ MemCall->eraseFromParent();
}
return true;
}
+} // namespace
+
+namespace llvm {
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
+ "Lower aggregate copies, and llvm.mem* intrinsics into loops",
+ false, false)
+
FunctionPass *llvm::createLowerAggrCopies() {
return new NVPTXLowerAggrCopies();
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 93d0025..624052e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -81,7 +81,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
- // of instructions this is unecessary and may introduce redudant
+ // of instructions this is unnecessary and may introduce redundant
// address cast.
const auto &AllocaUse = *UI++;
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index b533f31..6656077 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -47,6 +47,36 @@
// ...
// }
//
+// 3. Convert pointers in a byval kernel parameter to pointers in the global
+// address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
+//
+// struct S {
+// int *x;
+// int *y;
+// };
+// __global__ void foo(S s) {
+// int *b = s.y;
+// // use b
+// }
+//
+// "b" points to the global address space. In the IR level,
+//
+// define void @foo({i32*, i32*}* byval %input) {
+// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+// %b = load i32*, i32** %b_ptr
+// ; use %b
+// }
+//
+// becomes
+//
+// define void @foo({i32*, i32*}* byval %input) {
+// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+// %b = load i32*, i32** %b_ptr
+// %b_global = addrspacecast i32* %b to i32 addrspace(1)*
+// %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32*
+// ; use %b_generic
+// }
+//
// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
// don't cancel the addrspacecast pair this pass emits.
//===----------------------------------------------------------------------===//
@@ -54,6 +84,7 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -71,9 +102,12 @@ class NVPTXLowerKernelArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
// handle byval parameters
- void handleByValParam(Argument *);
- // handle non-byval pointer parameters
- void handlePointerParam(Argument *);
+ void handleByValParam(Argument *Arg);
+ // Knowing Ptr must point to the global address space, this function
+ // addrspacecasts Ptr to global and then back to generic. This allows
+ // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into
+ // loads/stores that appear later.
+ void markPointerAsGlobal(Value *Ptr);
public:
static char ID; // Pass identification, replacement for typeid
@@ -104,7 +138,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
//
// The above code allocates some space in the stack and copies the incoming
// struct from param space to local space.
-// Then replace all occurences of %d by %temp.
+// Then replace all occurrences of %d by %temp.
// =============================================================================
void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
Function *Func = Arg->getParent();
@@ -128,27 +162,33 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
new StoreInst(LI, AllocA, FirstInst);
}
-void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
- assert(!Arg->hasByValAttr() &&
- "byval params should be handled by handleByValParam");
-
- // Do nothing if the argument already points to the global address space.
- if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
+ if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
return;
- Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
- Instruction *ArgInGlobal = new AddrSpaceCastInst(
- Arg, PointerType::get(Arg->getType()->getPointerElementType(),
+ // Deciding where to emit the addrspacecast pair.
+ BasicBlock::iterator InsertPt;
+ if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+ // Insert at the functon entry if Ptr is an argument.
+ InsertPt = Arg->getParent()->getEntryBlock().begin();
+ } else {
+ // Insert right after Ptr if Ptr is an instruction.
+ InsertPt = ++cast<Instruction>(Ptr)->getIterator();
+ assert(InsertPt != InsertPt->getParent()->end() &&
+ "We don't call this function with Ptr being a terminator.");
+ }
+
+ Instruction *PtrInGlobal = new AddrSpaceCastInst(
+ Ptr, PointerType::get(Ptr->getType()->getPointerElementType(),
ADDRESS_SPACE_GLOBAL),
- Arg->getName(), FirstInst);
- Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
- Arg->getName(), FirstInst);
- // Replace with ArgInGeneric all uses of Args except ArgInGlobal.
- Arg->replaceAllUsesWith(ArgInGeneric);
- ArgInGlobal->setOperand(0, Arg);
+ Ptr->getName(), &*InsertPt);
+ Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),
+ Ptr->getName(), &*InsertPt);
+ // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal.
+ Ptr->replaceAllUsesWith(PtrInGeneric);
+ PtrInGlobal->setOperand(0, Ptr);
}
-
// =============================================================================
// Main function for this pass.
// =============================================================================
@@ -157,12 +197,32 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
if (!isKernelFunction(F))
return false;
+ if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
+ // Mark pointers in byval structs as global.
+ for (auto &B : F) {
+ for (auto &I : B) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->getType()->isPointerTy()) {
+ Value *UO = GetUnderlyingObject(LI->getPointerOperand(),
+ F.getParent()->getDataLayout());
+ if (Argument *Arg = dyn_cast<Argument>(UO)) {
+ if (Arg->hasByValAttr()) {
+ // LI is a load from a pointer within a byval kernel parameter.
+ markPointerAsGlobal(LI);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
for (Argument &Arg : F.args()) {
if (Arg.getType()->isPointerTy()) {
if (Arg.hasByValAttr())
handleByValParam(&Arg);
else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
- handlePointerParam(&Arg);
+ markPointerAsGlobal(&Arg);
}
}
return true;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index 46b4b33..81a606d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -68,7 +68,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *findAssociatedSection() const override { return nullptr; }
+ MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -110,7 +110,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *findAssociatedSection() const override { return nullptr; }
+ MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5fd69a6..17019d7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -72,7 +72,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().isReturn())
+ if (I->isReturnBlock())
TFI.emitEpilogue(MF, *I);
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index 0d2627d..45a7309 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -19,15 +19,14 @@
#include <vector>
namespace llvm {
-/// NVPTXSection - Represents a section in PTX
-/// PTX does not have sections. We create this class in order to use
-/// the ASMPrint interface.
+/// Represents a section in PTX PTX does not have sections. We create this class
+/// in order to use the ASMPrint interface.
///
-class NVPTXSection : public MCSection {
+class NVPTXSection final : public MCSection {
virtual void anchor();
public:
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
- virtual ~NVPTXSection() {}
+ ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
/// to a section.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 248f9e1..aa931b1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
@@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
- initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
- initializeNVPTXFavorNonGenericAddrSpacesPass(
- *PassRegistry::getPassRegistry());
- initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
- initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeNVVMReflectPass(PR);
+ initializeGenericToNVVMPass(PR);
+ initializeNVPTXAllocaHoistingPass(PR);
+ initializeNVPTXAssignValidGlobalNamesPass(PR);
+ initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+ initializeNVPTXLowerKernelArgsPass(PR);
+ initializeNVPTXLowerAllocaPass(PR);
+ initializeNVPTXLowerAggrCopiesPass(PR);
}
static std::string computeDataLayout(bool is64Bit) {
@@ -139,6 +141,10 @@ public:
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+private:
+ // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ void addEarlyCSEOrGVNPass();
};
} // end anonymous namespace
@@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
});
}
+void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
@@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
- disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVVMReflectPass());
addPass(createNVPTXImageOptimizerPass());
- TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+
+ // === Propagate special address spaces ===
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
@@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() {
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
addPass(createDeadCodeEliminationPass());
+
+ // === Straight-line scalar optimizations ===
addPass(createSeparateConstOffsetFromGEPPass());
+ addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
// EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
// for some of our benchmarks.
- if (getOptLevel() == CodeGenOpt::Aggressive)
- addPass(createGVNPass());
- else
- addPass(createEarlyCSEPass());
+ addEarlyCSEOrGVNPass();
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
addPass(createNaryReassociatePass());
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
+
+ // === LSR and other generic IR passes ===
+ TargetPassConfig::addIRPasses();
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
+ // example, GVN can combine
+ //
+ // %0 = add %a, %b
+ // %1 = add %b, %a
+ //
+ // and
+ //
+ // %0 = shl nsw %a, 2
+ // %1 = shl %a, 2
+ //
+ // but EarlyCSE can do neither of them.
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 5ecdc87..0f88ddf 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -48,8 +48,7 @@ public:
void Initialize(MCContext &ctx, const TargetMachine &TM) override {
TargetLoweringObjectFile::Initialize(ctx, TM);
TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
- DataSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+ DataSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getData());
BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
ReadOnlySection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
@@ -84,7 +83,7 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override {
return ReadOnlySection;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index e7250cd..6e679dd 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
return false;
}
-unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 5bcd1e2..0946a32 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -52,7 +52,7 @@ public:
bool isSourceOfDivergence(const Value *V);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1f178af..578b466 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -335,106 +335,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
return false;
}
-bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
- if ((id == Intrinsic::nvvm_barrier0) ||
- (id == Intrinsic::nvvm_barrier0_popc) ||
- (id == Intrinsic::nvvm_barrier0_and) ||
- (id == Intrinsic::nvvm_barrier0_or) ||
- (id == Intrinsic::cuda_syncthreads))
- return true;
- return false;
-}
-
-// Interface for checking all memory space transfer related intrinsics
-bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
- if (id == Intrinsic::nvvm_ptr_local_to_gen ||
- id == Intrinsic::nvvm_ptr_shared_to_gen ||
- id == Intrinsic::nvvm_ptr_global_to_gen ||
- id == Intrinsic::nvvm_ptr_constant_to_gen ||
- id == Intrinsic::nvvm_ptr_gen_to_global ||
- id == Intrinsic::nvvm_ptr_gen_to_shared ||
- id == Intrinsic::nvvm_ptr_gen_to_local ||
- id == Intrinsic::nvvm_ptr_gen_to_constant ||
- id == Intrinsic::nvvm_ptr_gen_to_param) {
- return true;
- }
-
- return false;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// provide an option to ignore GEP indicies for find out the base address only
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
- V = V->stripPointerCasts();
- while (true) {
- if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
- if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
- V = IS->getArgOperand(0)->stripPointerCasts();
- continue;
- }
- } else if (ignore_GEP_indices)
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand()->stripPointerCasts();
- continue;
- }
- break;
- }
- return V;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// - ignore GEP indicies for find out the base address only, and
-// - tracking PHINode
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
- if (processed.find(V) != processed.end())
- return nullptr;
- processed.insert(V);
-
- const Value *V2 = V->stripPointerCasts();
- if (V2 != V && processed.find(V2) != processed.end())
- return nullptr;
- processed.insert(V2);
-
- V = V2;
-
- while (true) {
- if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
- if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
- V = IS->getArgOperand(0)->stripPointerCasts();
- continue;
- }
- } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand()->stripPointerCasts();
- continue;
- } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
- if (V != V2 && processed.find(V) != processed.end())
- return nullptr;
- processed.insert(PN);
- const Value *common = nullptr;
- for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
- const Value *pv = PN->getIncomingValue(i);
- const Value *base = skipPointerTransfer(pv, processed);
- if (base) {
- if (!common)
- common = base;
- else if (common != base)
- return PN;
- }
- }
- if (!common)
- return PN;
- V = common;
- }
- break;
- }
- return V;
-}
-
-// The following are some useful utilities for debuggung
+// The following are some useful utilities for debugging
BasicBlock *llvm::getParentBlock(Value *v) {
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
@@ -466,7 +367,7 @@ void llvm::dumpBlock(Value *v, char *blockName) {
return;
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
- BasicBlock *B = it;
+ BasicBlock *B = &*it;
if (strcmp(B->getName().data(), blockName) == 0) {
B->dump();
return;
@@ -490,7 +391,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {
return nullptr;
}
-// Dump an instruction by nane
+// Dump an instruction by name
void llvm::dumpInst(Value *base, char *instName) {
Instruction *I = getInst(base, instName);
if (I)
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 7e2ce73..a5262cb 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -61,27 +61,6 @@ bool isKernelFunction(const llvm::Function &);
bool getAlign(const llvm::Function &, unsigned index, unsigned &);
bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
-bool isBarrierIntrinsic(llvm::Intrinsic::ID);
-
-/// make_vector - Helper function which is useful for building temporary vectors
-/// to pass into type construction of CallInst ctors. This turns a null
-/// terminated list of pointers (or other value types) into a real live vector.
-///
-template <typename T> inline std::vector<T> make_vector(T A, ...) {
- va_list Args;
- va_start(Args, A);
- std::vector<T> Result;
- Result.push_back(A);
- while (T Val = va_arg(Args, T))
- Result.push_back(Val);
- va_end(Args);
- return Result;
-}
-
-bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
-const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *
-skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
BasicBlock *getParentBlock(Value *v);
Function *getParentFunction(Value *v);
void dumpBlock(Value *v, char *blockName);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
index a237247..e69bbba 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
@@ -26,7 +26,7 @@ let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
(ins V2I16Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int16Regs:$dst, (vector_extract
+ [(set Int16Regs:$dst, (extractelt
(v2i16 V2I16Regs:$src), imm:$c))],
IMOV16rr>;
@@ -34,7 +34,7 @@ def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
(ins V4I16Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int16Regs:$dst, (vector_extract
+ [(set Int16Regs:$dst, (extractelt
(v4i16 V4I16Regs:$src), imm:$c))],
IMOV16rr>;
@@ -42,7 +42,7 @@ def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
(ins V2I8Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int8Regs:$dst, (vector_extract
+ [(set Int8Regs:$dst, (extractelt
(v2i8 V2I8Regs:$src), imm:$c))],
IMOV8rr>;
@@ -50,7 +50,7 @@ def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
(ins V4I8Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int8Regs:$dst, (vector_extract
+ [(set Int8Regs:$dst, (extractelt
(v4i8 V4I8Regs:$src), imm:$c))],
IMOV8rr>;
@@ -58,7 +58,7 @@ def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
(ins V2I32Regs:$src, i8imm:$c),
"mov.u32 \t$dst, $src${c:vecelem};",
- [(set Int32Regs:$dst, (vector_extract
+ [(set Int32Regs:$dst, (extractelt
(v2i32 V2I32Regs:$src), imm:$c))],
IMOV32rr>;
@@ -66,7 +66,7 @@ def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
(ins V2F32Regs:$src, i8imm:$c),
"mov.f32 \t$dst, $src${c:vecelem};",
- [(set Float32Regs:$dst, (vector_extract
+ [(set Float32Regs:$dst, (extractelt
(v2f32 V2F32Regs:$src), imm:$c))],
FMOV32rr>;
@@ -74,7 +74,7 @@ def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
(ins V2I64Regs:$src, i8imm:$c),
"mov.u64 \t$dst, $src${c:vecelem};",
- [(set Int64Regs:$dst, (vector_extract
+ [(set Int64Regs:$dst, (extractelt
(v2i64 V2I64Regs:$src), imm:$c))],
IMOV64rr>;
@@ -82,7 +82,7 @@ def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
(ins V2F64Regs:$src, i8imm:$c),
"mov.f64 \t$dst, $src${c:vecelem};",
- [(set Float64Regs:$dst, (vector_extract
+ [(set Float64Regs:$dst, (extractelt
(v2f64 V2F64Regs:$src), imm:$c))],
FMOV64rr>;
@@ -90,7 +90,7 @@ def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
(ins V4I32Regs:$src, i8imm:$c),
"mov.u32 \t$dst, $src${c:vecelem};",
- [(set Int32Regs:$dst, (vector_extract
+ [(set Int32Regs:$dst, (extractelt
(v4i32 V4I32Regs:$src), imm:$c))],
IMOV32rr>;
@@ -98,7 +98,7 @@ def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
(ins V4F32Regs:$src, i8imm:$c),
"mov.f32 \t$dst, $src${c:vecelem};",
- [(set Float32Regs:$dst, (vector_extract
+ [(set Float32Regs:$dst, (extractelt
(v4f32 V4F32Regs:$src), imm:$c))],
FMOV32rr>;
}
@@ -110,8 +110,7 @@ def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
"mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V2I8Regs:$dst,
- (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
- IMOV8rr>;
+ (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
// Insert v4i8
def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
@@ -119,8 +118,7 @@ def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
"mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V4I8Regs:$dst,
- (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
- IMOV8rr>;
+ (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
// Insert v2i16
def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
@@ -128,8 +126,8 @@ def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
"mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V2I16Regs:$dst,
- (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
- IMOV16rr>;
+ (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
// Insert v4i16
def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
@@ -137,8 +135,8 @@ def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
"mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V4I16Regs:$dst,
- (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
- IMOV16rr>;
+ (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
// Insert v2i32
def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
@@ -146,8 +144,8 @@ def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
"mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u32 \t$dst${c:vecelem}, $val;",
[(set V2I32Regs:$dst,
- (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
- IMOV32rr>;
+ (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
// Insert v2f32
def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
@@ -155,8 +153,8 @@ def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
"mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f32 \t$dst${c:vecelem}, $val;",
[(set V2F32Regs:$dst,
- (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
- FMOV32rr>;
+ (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
// Insert v2i64
def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
@@ -164,8 +162,8 @@ def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
"mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u64 \t$dst${c:vecelem}, $val;",
[(set V2I64Regs:$dst,
- (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
- IMOV64rr>;
+ (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))],
+ IMOV64rr>;
// Insert v2f64
def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
@@ -173,8 +171,8 @@ def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
"mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f64 \t$dst${c:vecelem}, $val;",
[(set V2F64Regs:$dst,
- (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
- FMOV64rr>;
+ (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))],
+ FMOV64rr>;
// Insert v4i32
def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
@@ -182,8 +180,8 @@ def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
"mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u32 \t$dst${c:vecelem}, $val;",
[(set V4I32Regs:$dst,
- (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
- IMOV32rr>;
+ (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
// Insert v4f32
def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
@@ -191,8 +189,8 @@ def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
"mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f32 \t$dst${c:vecelem}, $val;",
[(set V4F32Regs:$dst,
- (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
- FMOV32rr>;
+ (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
}
class BinOpAsmString<string c> {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 5e375b7..20ab5db 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -109,10 +109,10 @@ void NVVMReflect::setVarMap() {
for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
SmallVector<StringRef, 4> NameValList;
- StringRef(ReflectList[i]).split(NameValList, ",");
+ StringRef(ReflectList[i]).split(NameValList, ',');
for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
SmallVector<StringRef, 2> NameValPair;
- NameValList[j].split(NameValPair, "=");
+ NameValList[j].split(NameValPair, '=');
assert(NameValPair.size() == 2 && "name=val expected");
std::stringstream ValStream(NameValPair[1]);
int Val;
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index a699a55..220c70a 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -243,7 +243,6 @@ namespace {
struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
bool IsPPC64;
bool IsDarwin;
@@ -291,9 +290,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI), MII(MII) {
+ PPCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI), MII(MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -1185,7 +1184,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::MFTB: {
- if (STI.getFeatureBits()[PPC::FeatureMFTB]) {
+ if (getSTI().getFeatureBits()[PPC::FeatureMFTB]) {
assert(Inst.getNumOperands() == 2 && "Expecting two operands");
Inst.setOpcode(PPC::MFSPR);
}
@@ -1205,7 +1204,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -1690,7 +1689,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// where th can be omitted when it is 0. dcbtst is the same. We take the
// server form to be the default, so swap the operands if we're parsing for
// an embedded core (they'll be swapped again upon printing).
- if (STI.getFeatureBits()[PPC::FeatureBookE] &&
+ if (getSTI().getFeatureBits()[PPC::FeatureBookE] &&
Operands.size() == 4 &&
(Name == "dcbt" || Name == "dcbtst")) {
std::swap(Operands[1], Operands[3]);
@@ -1730,10 +1729,19 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
if (getParser().parseExpression(Value))
return false;
- getParser().getStreamer().EmitValue(Value, Size);
+ if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else {
+ getStreamer().EmitValue(Value, Size, ExprLoc);
+ }
if (getLexer().is(AsmToken::EndOfStatement))
break;
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 93a503c..1fc84fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -401,8 +401,6 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
-
- MI.clear();
}
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8e18783..53eb727 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -18,8 +18,6 @@
namespace llvm {
-class MCOperand;
-
class PPCInstPrinter : public MCInstPrinter {
bool IsDarwin;
public:
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 992be5b..dd99495 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -113,6 +113,10 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
break;
}
break;
+ case PPC::fixup_ppc_half16ds:
+ Target.print(errs());
+ errs() << '\n';
+ report_fatal_error("Invalid PC-relative half16ds relocation");
case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
@@ -305,13 +309,13 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
break;
case MCSymbolRefExpr::VK_GOT:
Type = ELF::R_PPC64_GOT16_DS;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_GOT_LO:
Type = ELF::R_PPC64_GOT16_LO_DS;
break;
case MCSymbolRefExpr::VK_PPC_TOC:
Type = ELF::R_PPC64_TOC16_DS;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_TOC_LO:
Type = ELF::R_PPC64_TOC16_LO_DS;
break;
@@ -372,16 +376,16 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC64_ADDR64;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_DTPMOD:
Type = ELF::R_PPC64_DTPMOD64;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_TPREL:
Type = ELF::R_PPC64_TPREL64;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_DTPREL:
Type = ELF::R_PPC64_DTPREL64;
- break;
+ break;
}
break;
case FK_Data_4:
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 86ad385..e252ac9 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -20,18 +20,19 @@
namespace llvm {
class Triple;
- class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
- };
-
- class PPCELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
- };
+class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
+};
+
+class PPCELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index a641780..d42a111 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -82,8 +82,8 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS PPCMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 9d72896..b54a0e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -241,12 +241,12 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
if (FixupOffset > 0xffffff) {
char Buffer[32];
format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
Twine("Section too large, can't encode "
"r_address (") +
Buffer + ") into 24 bits of scattered "
"relocation entry.");
- llvm_unreachable("fatal error returned?!");
+ return false;
}
// Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 6075631..acea600 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -56,6 +56,14 @@ namespace PPC {
PRED_BIT_UNSET = 1025
};
+ // Bit for branch taken (plus) or not-taken (minus) hint
+ enum BranchHintBit {
+ BR_NO_HINT = 0x0,
+ BR_NONTAKEN_HINT = 0x2,
+ BR_TAKEN_HINT = 0x3,
+ BR_HINT_MASK = 0X3
+ };
+
/// Invert the specified predicate. != -> ==, < -> >=.
Predicate InvertPredicate(Predicate Opcode);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index ae8d8b4..a259ed3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -41,13 +41,16 @@ namespace llvm {
FunctionPass *createPPCVSXCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
+ FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
+ FunctionPass *createPPCBoolRetToIntPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
void initializePPCVSXFMAMutatePass(PassRegistry&);
+ void initializePPCBoolRetToIntPass(PassRegistry&);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 641b237..b03be12 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -50,6 +50,8 @@ def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
+def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+ "Use software emulation for floating point">;
def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
@@ -137,6 +139,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+ "Target supports add/load integer fusion.">;
+def FeatureFloat128 :
+ SubtargetFeature<"float128", "HasFloat128", "true",
+ "Enable the __float128 data type for IEEE-754R Binary128.",
+ [FeatureVSX]>;
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
@@ -168,7 +176,8 @@ def ProcessorFeatures {
FeatureMFTB, DeprecatedDST];
list<SubtargetFeature> Power8SpecificFeatures =
[DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
- FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
+ FeatureFusion];
list<SubtargetFeature> Power8FeatureList =
!listconcat(Power7FeatureList, Power8SpecificFeatures);
}
@@ -309,7 +318,7 @@ def : ProcessorModel<"g5", G5Model,
Feature64Bit /*, Feature64BitRegs */,
FeatureMFTB, DeprecatedDST]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
- [DirectiveE500mc, FeatureMFOCRF,
+ [DirectiveE500mc,
FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
@@ -403,6 +412,7 @@ def PPCAsmParserVariant : AsmParserVariant {
// InstAlias definitions use immediate literals. Set RegisterPrefix
// so that those are not misinterpreted as registers.
string RegisterPrefix = "%";
+ string BreakCharacters = ".";
}
def PPC : Target {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8e118ec..9a63c14 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -65,19 +65,20 @@ using namespace llvm;
#define DEBUG_TYPE "asmprinter"
namespace {
- class PPCAsmPrinter : public AsmPrinter {
- protected:
- MapVector<MCSymbol*, MCSymbol*> TOC;
- const PPCSubtarget *Subtarget;
- StackMaps SM;
- public:
- explicit PPCAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
-
- const char *getPassName() const override {
- return "PowerPC Assembly Printer";
- }
+class PPCAsmPrinter : public AsmPrinter {
+protected:
+ MapVector<MCSymbol *, MCSymbol *> TOC;
+ const PPCSubtarget *Subtarget;
+ StackMaps SM;
+
+public:
+ explicit PPCAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+
+ const char *getPassName() const override {
+ return "PowerPC Assembly Printer";
+ }
MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
@@ -94,10 +95,8 @@ namespace {
void EmitEndOfAsmFile(Module &M) override;
- void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
- void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
+ void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+ void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
@@ -157,15 +156,15 @@ static const char *stripRegisterPrefix(const char *RegName) {
return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
-
+
return RegName;
}
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
-
+
switch (MO.getType()) {
case MachineOperand::MO_Register: {
const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
@@ -184,8 +183,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
MO.getMBB()->getSymbol()->print(O, MAI);
return;
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
- << '_' << MO.getIndex();
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
return;
case MachineOperand::MO_BlockAddress:
GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
@@ -200,19 +199,19 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
!GV->isStrongDefinitionForLinker()) {
if (!GV->hasHiddenVisibility()) {
SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>()
- .getGVStubEntry(SymToPrint);
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+ SymToPrint);
if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().
- getHiddenGVStubEntry(SymToPrint);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(
+ SymToPrint);
if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
@@ -295,16 +294,16 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
}
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
- {
- // FIXME: Currently for PowerPC memory operands are always loaded
- // into a register, so we never get an update or indexed form.
- // This is bad even for offset forms, since even if we know we
- // have a value in -16(r1), we will generate a load into r<n>
- // and then load from 0(r<n>). Until that issue is fixed,
- // tolerate 'U' and 'X' but don't output anything.
- assert(MI->getOperand(OpNo).isReg());
- return false;
- }
+ {
+ // FIXME: Currently for PowerPC memory operands are always loaded
+ // into a register, so we never get an update or indexed form.
+ // This is bad even for offset forms, since even if we know we
+ // have a value in -16(r1), we will generate a load into r<n>
+ // and then load from 0(r<n>). Until that issue is fixed,
+ // tolerate 'U' and 'X' but don't output anything.
+ assert(MI->getOperand(OpNo).isReg());
+ return false;
+ }
}
}
@@ -315,7 +314,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
-
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
@@ -330,8 +328,7 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
SM.serializeToStackMapSection();
}
-void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
+void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();
SM.recordStackMap(MI);
@@ -353,13 +350,12 @@ void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
// Emit nops.
for (unsigned i = 0; i < NumNOPBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
}
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
-void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
+void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
SM.recordPatchPoint(MI);
PatchPointOpers Opers(&MI);
@@ -375,60 +371,59 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 0;
// Materialize the jump address:
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));
// Save the current TOC pointer before the remote call.
int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
++EncodedBytes;
-
// If we're on ELFv1, then we need to load the actual function pointer
// from the function descriptor.
if (!Subtarget->isELFv2ABI()) {
- // Load the new TOC pointer and the function address, but not r11
- // (needing this is rare, and loading it here would prevent passing it
- // via a 'nest' parameter.
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ // Load the new TOC pointer and the function address, but not r11
+ // (needing this is rare, and loading it here would prevent passing it
+ // via a 'nest' parameter.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(8)
.addReg(ScratchReg));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
.addReg(ScratchReg)
.addImm(0)
.addReg(ScratchReg));
++EncodedBytes;
}
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR8)
.addReg(ScratchReg));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTRL8));
++EncodedBytes;
// Restore the TOC pointer after the call.
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
@@ -439,7 +434,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL8_NOP)
.addExpr(SymVar));
EncodedBytes += 2;
}
@@ -454,7 +449,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
assert((NumBytes - EncodedBytes) % 4 == 0 &&
"Invalid number of NOP bytes requested!");
for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
}
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
@@ -499,16 +494,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
bool isDarwin = TM.getTargetTriple().isOSDarwin();
const Module *M = MF->getFunction()->getParent();
PICLevel::Level PL = M->getPICLevel();
-
+
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(*OutStreamer, SM, *MI);
+ return LowerSTACKMAP(SM, *MI);
case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+ return LowerPATCHPOINT(SM, *MI);
case PPC::MoveGOTtoLR: {
// Transform %LR = MoveGOTtoLR
@@ -533,17 +528,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
- // Into this, where the label is the PIC base:
+ // Into this, where the label is the PIC base:
// bl L1$pb
// L1$pb:
MCSymbol *PICBase = MF->getPICBaseSymbol();
-
+
// Emit the 'bl'.
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
- // FIXME: We would like an efficient form for this, so we don't have to do
- // a lot of extra uniquing.
- .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
-
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we
+ // don't have to do a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+
// Emit the label.
OutStreamer->EmitLabel(PICBase);
return;
@@ -654,7 +650,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
-
+
case PPC::ADDIStocHA: {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
@@ -669,28 +665,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
- bool IsExternal = false;
- bool IsNonLocalFunction = false;
- bool IsCommon = false;
- bool IsAvailExt = false;
+ bool GlobalToc = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
MOSymbol = getSymbol(GV);
- IsExternal = GV->isDeclaration();
- IsCommon = GV->hasCommonLinkage();
- IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- !GV->isStrongDefinitionForLinker();
- IsAvailExt = GV->hasAvailableExternallyLinkage();
- } else if (MO.isCPI())
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
+ } else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
- else if (MO.isJTI())
+ } else if (MO.isJTI()) {
MOSymbol = GetJTISymbol(MO.getIndex());
- else if (MO.isBlockAddress())
+ } else if (MO.isBlockAddress()) {
MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ }
- if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
- MO.isJTI() || MO.isBlockAddress() ||
+ if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -727,13 +717,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
else if (MO.isGlobal()) {
- const GlobalValue *GValue = MO.getGlobal();
- MOSymbol = getSymbol(GValue);
- if (GValue->getType()->getElementType()->isFunctionTy() ||
- GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage() ||
- TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ DEBUG(
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ assert((GVFlags & PPCII::MO_NLP_FLAG) &&
+ "LDtocL used on symbol that could be accessed directly is "
+ "invalid. Must match ADDIStocHA."));
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
const MCExpr *Exp =
@@ -754,21 +745,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = nullptr;
- bool IsExternal = false;
- bool IsNonLocalFunction = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
+ DEBUG(
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ assert (
+ !(GVFlags & PPCII::MO_NLP_FLAG) &&
+ "Interposable definitions must use indirect access."));
MOSymbol = getSymbol(GV);
- IsExternal = GV->isDeclaration();
- IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- !GV->isStrongDefinitionForLinker();
- } else if (MO.isCPI())
+ } else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
-
- if (IsNonLocalFunction || IsExternal ||
- TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
const MCExpr *Exp =
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
@@ -840,13 +828,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::PPC32GOT: {
- MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
- const MCExpr *SymGotTlsL =
- MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
- OutContext);
- const MCExpr *SymGotTlsHA =
- MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
- OutContext);
+ MCSymbol *GOTSymbol =
+ OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *SymGotTlsL = MCSymbolRefExpr::create(
+ GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext);
+ const MCExpr *SymGotTlsHA = MCSymbolRefExpr::create(
+ GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
.addReg(MI->getOperand(0).getReg())
.addExpr(SymGotTlsL));
@@ -1079,14 +1066,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// linux/ppc32 - Normal entry label.
- if (!Subtarget->isPPC64() &&
- (TM.getRelocationModel() != Reloc::PIC_ ||
+ if (!Subtarget->isPPC64() &&
+ (TM.getRelocationModel() != Reloc::PIC_ ||
MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
return AsmPrinter::EmitFunctionEntryLabel();
if (!Subtarget->isPPC64()) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
- if (PPCFI->usesPICBase()) {
+ if (PPCFI->usesPICBase()) {
MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
MCSymbol *PICBase = MF->getPICBaseSymbol();
OutStreamer->EmitLabel(RelocSymbol);
@@ -1130,11 +1117,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer->SwitchSection(Current.first, Current.second);
}
-
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
- bool isPPC64 = TD->getPointerSizeInBits() == 64;
+ bool isPPC64 = DL.getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
@@ -1293,8 +1279,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
if (TM.getRelocationModel() == Reloc::PIC_) {
OutStreamer->SwitchSection(
@@ -1325,7 +1311,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
// Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
// This is because the MachineFunction won't exist (but have not yet been
@@ -1338,8 +1324,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
S.EmitInstruction(Inst, *STI);
};
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
// .lazy_symbol_pointer
MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
@@ -1353,12 +1339,12 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer->SwitchSection(StubSection);
EmitAlignment(4);
-
+
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
-
+
OutStreamer->EmitLabel(Stub);
OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
@@ -1463,20 +1449,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
}
}
-
+
OutStreamer->AddBlankLine();
}
-
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
MachineModuleInfoMachO &MMIMacho =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
if (!Stubs.empty())
EmitFunctionStubs(Stubs);
@@ -1484,27 +1469,27 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MAI->doesSupportExceptionHandling() && MMI) {
// Add the (possibly multiple) personalities to the set of global values.
// Only referenced functions get into the Personalities list.
- const std::vector<const Function*> &Personalities = MMI->getPersonalities();
- for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
- E = Personalities.end(); I != E; ++I) {
- if (*I) {
- MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ for (const Function *Personality : MMI->getPersonalities()) {
+ if (Personality) {
+ MCSymbol *NLPSym =
+ getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
- MMIMacho.getGVStubEntry(NLPSym);
- StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym =
+ MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
}
}
}
// Output stubs for dynamically-linked functions.
Stubs = MMIMacho.GetGVStubList();
-
+
// Output macho stubs for external and common global variables.
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(isPPC64 ? 3 : 2);
-
+
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer->EmitLabel(Stubs[i].first);
@@ -1535,7 +1520,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (!Stubs.empty()) {
OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
EmitAlignment(isPPC64 ? 3 : 2);
-
+
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer->EmitLabel(Stubs[i].first);
@@ -1573,7 +1558,7 @@ createPPCAsmPrinterPass(TargetMachine &tm,
}
// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() {
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
new file mode 100644
index 0000000..7920240
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -0,0 +1,253 @@
+//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting i1 values to i32 if they could be more
+// profitably allocated as GPRs rather than CRs. This pass will become totally
+// unnecessary if Register Bank Allocation and Global Instruction Selection ever
+// go upstream.
+//
+// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// transitive closure of their uses includes only PHINodes, CallInsts, and
+// ReturnInsts. The rational is that arguments are generally passed and returned
+// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// actually save casts at the Machine Instruction level.
+//
+// It might be useful to expand this pass to add bit-wise operations to the list
+// of safe transitive closure types. Also, we miss some opportunities when LLVM
+// represents logical AND and OR operations with control flow rather than data
+// flow. For example by lowering the expression: return (A && B && C)
+//
+// as: return A ? true : B && C.
+//
+// There's code in SimplifyCFG that code be used to turn control flow in data
+// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so
+// this probably isn't good in general, but for the special case of i1, the
+// Selects could be further lowered to bit operations that are fast everywhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+#define DEBUG_TYPE "bool-ret-to-int"
+
+STATISTIC(NumBoolRetPromotion,
+ "Number of times a bool feeding a RetInst was promoted to an int");
+STATISTIC(NumBoolCallPromotion,
+ "Number of times a bool feeding a CallInst was promoted to an int");
+STATISTIC(NumBoolToIntPromotion,
+ "Total number of times a bool was promoted to an int");
+
+class PPCBoolRetToInt : public FunctionPass {
+
+ static SmallPtrSet<Value *, 8> findAllDefs(Value *V) {
+ SmallPtrSet<Value *, 8> Defs;
+ SmallVector<Value *, 8> WorkList;
+ WorkList.push_back(V);
+ Defs.insert(V);
+ while (!WorkList.empty()) {
+ Value *Curr = WorkList.back();
+ WorkList.pop_back();
+ if (User *CurrUser = dyn_cast<User>(Curr))
+ for (auto &Op : CurrUser->operands())
+ if (Defs.insert(Op).second)
+ WorkList.push_back(Op);
+ }
+ return Defs;
+ }
+
+ // Translate a i1 value to an equivalent i32 value:
+ static Value *translate(Value *V) {
+ Type *Int32Ty = Type::getInt32Ty(V->getContext());
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getZExt(C, Int32Ty);
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ // Temporarily set the operands to 0. We'll fix this later in
+ // runOnUse.
+ Value *Zero = Constant::getNullValue(Int32Ty);
+ PHINode *Q =
+ PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+ for (unsigned i = 0; i < P->getNumOperands(); ++i)
+ Q->addIncoming(Zero, P->getIncomingBlock(i));
+ return Q;
+ }
+
+ Argument *A = dyn_cast<Argument>(V);
+ Instruction *I = dyn_cast<Instruction>(V);
+ assert((A || I) && "Unknown value type");
+
+ auto InstPt =
+ A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
+ return new ZExtInst(V, Int32Ty, "", InstPt);
+ }
+
+ typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
+
+ // A PHINode is Promotable if:
+ // 1. Its type is i1 AND
+ // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic
+ // AND
+ // 3. All of its operands are Constant or Argument or
+ // CallInst or PHINode AND
+ // 4. All of its PHINode uses are Promotable AND
+ // 5. All of its PHINode operands are Promotable
+ static PHINodeSet getPromotablePHINodes(const Function &F) {
+ PHINodeSet Promotable;
+ // Condition 1
+ for (auto &BB : F)
+ for (auto &I : BB)
+ if (const PHINode *P = dyn_cast<PHINode>(&I))
+ if (P->getType()->isIntegerTy(1))
+ Promotable.insert(P);
+
+ SmallVector<const PHINode *, 8> ToRemove;
+ for (const auto &P : Promotable) {
+ // Condition 2 and 3
+ auto IsValidUser = [] (const Value *V) -> bool {
+ return isa<ReturnInst>(V) || isa<CallInst>(V) || isa<PHINode>(V) ||
+ isa<DbgInfoIntrinsic>(V);
+ };
+ auto IsValidOperand = [] (const Value *V) -> bool {
+ return isa<Constant>(V) || isa<Argument>(V) || isa<CallInst>(V) ||
+ isa<PHINode>(V);
+ };
+ const auto &Users = P->users();
+ const auto &Operands = P->operands();
+ if (!std::all_of(Users.begin(), Users.end(), IsValidUser) ||
+ !std::all_of(Operands.begin(), Operands.end(), IsValidOperand))
+ ToRemove.push_back(P);
+ }
+
+ // Iterate to convergence
+ auto IsPromotable = [&Promotable] (const Value *V) -> bool {
+ const PHINode *Phi = dyn_cast<PHINode>(V);
+ return !Phi || Promotable.count(Phi);
+ };
+ while (!ToRemove.empty()) {
+ for (auto &User : ToRemove)
+ Promotable.erase(User);
+ ToRemove.clear();
+
+ for (const auto &P : Promotable) {
+ // Condition 4 and 5
+ const auto &Users = P->users();
+ const auto &Operands = P->operands();
+ if (!std::all_of(Users.begin(), Users.end(), IsPromotable) ||
+ !std::all_of(Operands.begin(), Operands.end(), IsPromotable))
+ ToRemove.push_back(P);
+ }
+ }
+
+ return Promotable;
+ }
+
+ typedef DenseMap<Value *, Value *> B2IMap;
+
+ public:
+ static char ID;
+ PPCBoolRetToInt() : FunctionPass(ID) {
+ initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) {
+ PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
+ B2IMap Bool2IntMap;
+ bool Changed = false;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (ReturnInst *R = dyn_cast<ReturnInst>(&I))
+ if (F.getReturnType()->isIntegerTy(1))
+ Changed |=
+ runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap);
+
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ for (auto &U : CI->operands())
+ if (U->getType()->isIntegerTy(1))
+ Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap);
+ }
+ }
+
+ return Changed;
+ }
+
+ static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+ B2IMap &BoolToIntMap) {
+ auto Defs = findAllDefs(U);
+
+ // If the values are all Constants or Arguments, don't bother
+ if (!std::any_of(Defs.begin(), Defs.end(), isa<Instruction, Value *>))
+ return false;
+
+ // Presently, we only know how to handle PHINode, Constant, and Arguments.
+ // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension
+ // could also be handled in the future.
+ for (const auto &V : Defs)
+ if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V))
+ return false;
+
+ for (const auto &V : Defs)
+ if (const PHINode *P = dyn_cast<PHINode>(V))
+ if (!PromotablePHINodes.count(P))
+ return false;
+
+ if (isa<ReturnInst>(U.getUser()))
+ ++NumBoolRetPromotion;
+ if (isa<CallInst>(U.getUser()))
+ ++NumBoolCallPromotion;
+ ++NumBoolToIntPromotion;
+
+ for (const auto &V : Defs)
+ if (!BoolToIntMap.count(V))
+ BoolToIntMap[V] = translate(V);
+
+ // Replace the operands of the translated instructions. There were set to
+ // zero in the translate function.
+ for (auto &Pair : BoolToIntMap) {
+ User *First = dyn_cast<User>(Pair.first);
+ User *Second = dyn_cast<User>(Pair.second);
+ assert((!First || Second) && "translated from user to non-user!?");
+ if (First)
+ for (unsigned i = 0; i < First->getNumOperands(); ++i)
+ Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
+ }
+
+ Value *IntRetVal = BoolToIntMap[U];
+ Type *Int1Ty = Type::getInt1Ty(U->getContext());
+ Instruction *I = cast<Instruction>(U.getUser());
+ Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I);
+ U.set(BackToBool);
+
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char PPCBoolRetToInt::ID = 0;
+INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
+ "Convert i1 constants to i32 if they are returned",
+ false, false)
+
+FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 940d55a..73a5305 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -91,7 +91,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned FuncSize = 0;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
- MachineBasicBlock *MBB = MFI;
+ MachineBasicBlock *MBB = &*MFI;
// The end of the previous block may have extra nops if this block has an
// alignment requirement.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index fd150be..b6ac4d5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -98,7 +98,7 @@ namespace {
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
private:
@@ -112,6 +112,7 @@ namespace {
const DataLayout *DL;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
+ bool PreserveLCSSA;
};
char PPCCTRLoops::ID = 0;
@@ -147,7 +148,7 @@ INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
@@ -169,11 +170,12 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
bool MadeChange = false;
@@ -250,8 +252,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
// we're definitely using CTR.
case Intrinsic::ppc_is_decremented_ctr_nonzero:
- case Intrinsic::ppc_mtctr:
- return true;
+ case Intrinsic::ppc_mtctr:
+ return true;
// VisualStudio defines setjmp as _setjmp
#if defined(_MSC_VER) && defined(setjmp) && \
@@ -369,7 +371,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
true);
if (VTy == MVT::Other)
return true;
-
+
if (TLI->isOperationLegalOrCustom(Opcode, VTy))
continue;
else if (VTy.isVector() &&
@@ -537,7 +539,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
if (!Preheader || mightUseCTR(TT, Preheader))
- Preheader = InsertPreheaderForLoop(L, this);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (!Preheader)
return MadeChange;
@@ -554,10 +556,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
- ExitCount = SE->getAddExpr(ExitCount,
- SE->getConstant(CountType, 1));
- Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
- Preheader->getTerminator());
+ ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
+ Value *ECValue =
+ SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
IRBuilder<> CountBuilder(Preheader->getTerminator());
Module *M = Preheader->getParent()->getParent();
@@ -677,7 +678,7 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
// any other instructions that might clobber the ctr register.
for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
I != IE; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
if (!MDT->isReachableFromEntry(MBB))
continue;
@@ -694,4 +695,3 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
return false;
}
#endif // NDEBUG
-
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index fc89753..7cb1bb5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -71,15 +71,20 @@ protected:
for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
bool OtherReference = false, BlockChanged = false;
+
+ if ((*PI)->empty())
+ continue;
+
for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
- MachineInstrBuilder MIB;
+ if (J == (*PI)->end())
+ break;
+
if (J->getOpcode() == PPC::B) {
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
- MIB =
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
- MIB.copyImplicitOps(I);
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
+ .copyImplicitOps(I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -90,10 +95,10 @@ protected:
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- MIB = BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
- .addImm(J->getOperand(0).getImm())
- .addReg(J->getOperand(1).getReg());
- MIB.copyImplicitOps(I);
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ .addImm(J->getOperand(0).getImm())
+ .addReg(J->getOperand(1).getReg())
+ .copyImplicitOps(I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -104,11 +109,11 @@ protected:
if (J->getOperand(1).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- MIB = BuildMI(**PI, J, J->getDebugLoc(),
- TII->get(J->getOpcode() == PPC::BC ?
- PPC::BCLR : PPC::BCLRn))
- .addReg(J->getOperand(0).getReg());
- MIB.copyImplicitOps(I);
+ BuildMI(
+ **PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg())
+ .copyImplicitOps(I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -146,7 +151,7 @@ protected:
}
for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
- PredToRemove[i]->removeSuccessor(&ReturnMBB);
+ PredToRemove[i]->removeSuccessor(&ReturnMBB, true);
if (Changed && !ReturnMBB.hasAddressTaken()) {
// We now might be able to merge this blr-only block into its
@@ -156,7 +161,7 @@ protected:
if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
// Move the blr into the preceding block.
PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
- PrevMBB.removeSuccessor(&ReturnMBB);
+ PrevMBB.removeSuccessor(&ReturnMBB, true);
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 5f236f7..b451ebf 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -164,7 +164,8 @@ class PPCFastISel final : public FastISel {
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
+ unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+ bool UseSExt = true);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -292,10 +293,7 @@ bool PPCFastISel::isValueAvailable(const Value *V) const {
return true;
const auto *I = cast<Instruction>(V);
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- return true;
-
- return false;
+ return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
}
// Given a value Obj, create an Address object Addr that represents its
@@ -527,9 +525,9 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// VSX only provides an indexed load.
if (Is32VSXLoad || Is64VSXLoad) return false;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+ Addr.Offset),
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
@@ -660,9 +658,9 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
// VSX only provides an indexed store.
if (Is32VSXStore || Is64VSXStore) return false;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+ Addr.Offset),
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
@@ -774,8 +772,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const ConstantInt *CI =
@@ -1607,21 +1604,18 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (ValLocs.size() > 1)
return false;
- // Special case for returning a constant integer of any size.
- // Materialize the constant as an i64 and copy it to the return
- // register. We still need to worry about properly extending the sign. E.g:
- // If the constant has only one bit, it means it is a boolean. Therefore
- // we can't use PPCMaterializeInt because it extends the sign which will
- // cause negations of the returned value to be incorrect as they are
- // implemented as the flip of the least significant bit.
- if (isa<ConstantInt>(*RV)) {
- const Constant *C = cast<Constant>(RV);
-
+ // Special case for returning a constant integer of any size - materialize
+ // the constant as an i64 and copy it to the return register.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
CCValAssign &VA = ValLocs[0];
unsigned RetReg = VA.getLocReg();
- unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
- VA.getLocInfo() == CCValAssign::SExt);
+ // We still need to worry about properly extending the sign. For example,
+ // we could have only a single bit or a constant that needs zero
+ // extension rather than sign extension. Make sure we pass the return
+ // value extension property to integer materialization.
+ unsigned SrcReg =
+ PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
@@ -1761,8 +1755,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
- for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
return true;
}
@@ -1898,10 +1892,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
CodeModel::Model CModel = TM.getCodeModel();
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- (VT == MVT::f32) ? 4 : 8, Align);
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
@@ -1976,19 +1969,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- // If/when switches are implemented, jump tables should be handled
- // on the "if" path here.
- if (CModel == CodeModel::Large ||
- (GV->getType()->getElementType()->isFunctionTy() &&
- !GV->isStrongDefinitionForLinker()) ||
- GV->isDeclaration() || GV->hasCommonLinkage() ||
- GV->hasAvailableExternallyLinkage())
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
- else
+ } else {
// Otherwise generate the ADDItocL.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+ }
}
return DestReg;
@@ -2085,12 +2074,11 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
- bool UseSExt) {
+unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+ bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
- const ConstantInt *CI = cast<ConstantInt>(C);
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
@@ -2105,12 +2093,17 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
&PPC::GPRCRegClass);
// If the constant is in range, use a load-immediate.
- const ConstantInt *CI = cast<ConstantInt>(C);
- if (isInt<16>(CI->getSExtValue())) {
+ if (UseSExt && isInt<16>(CI->getSExtValue())) {
+ unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+ unsigned ImmReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
+ .addImm(CI->getSExtValue());
+ return ImmReg;
+ } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
+ .addImm(CI->getZExtValue());
return ImmReg;
}
@@ -2138,8 +2131,8 @@ unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
return PPCMaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return PPCMaterializeGV(GV, VT);
- else if (isa<ConstantInt>(C))
- return PPCMaterializeInt(C, VT, VT != MVT::i1);
+ else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return PPCMaterializeInt(CI, VT, VT != MVT::i1);
return 0;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 08ae717..beab844 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
/// VRRegNo - Map from a numbered VR register to its enum value.
///
-static const uint16_t VRRegNo[] = {
+static const MCPhysReg VRRegNo[] = {
PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
@@ -270,7 +270,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// epilog blocks.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().isReturn()) {
+ if (I->isReturnBlock()) {
bool FoundIt = false;
for (MBBI = I->end(); MBBI != I->begin(); ) {
--MBBI;
@@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UsedRegMask = 0;
for (unsigned i = 0; i != 32; ++i)
- if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ if (MRI.isPhysRegModified(VRRegNo[i]))
UsedRegMask |= 1 << (31-i);
// Live in and live out values already must be in the mask, so don't bother
@@ -325,7 +326,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
UsedRegMask != 0 && BI != BE; ++BI) {
const MachineBasicBlock &MBB = *BI;
- if (MBB.empty() || !MBB.back().isReturn())
+ if (!MBB.isReturnBlock())
continue;
const MachineInstr &Ret = MBB.back();
for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
@@ -555,9 +556,67 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
}
}
+bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
+ bool UseAtEnd,
+ unsigned *ScratchRegister) const {
+ RegScavenger RS;
+ unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+
+ if (ScratchRegister)
+ *ScratchRegister = R0;
+
+ // If MBB is an entry or exit block, use R0 as the scratch register
+ if ((UseAtEnd && MBB->isReturnBlock()) ||
+ (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
+ return true;
+
+ RS.enterBasicBlock(MBB);
+
+ if (UseAtEnd && !MBB->empty()) {
+ // The scratch register will be used at the end of the block, so must consider
+ // all registers used within the block
+
+ MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
+ // If no terminator, back iterator up to previous instruction.
+ if (MBBI == MBB->end())
+ MBBI = std::prev(MBBI);
+
+ if (MBBI != MBB->begin())
+ RS.forward(MBBI);
+ }
+
+ if (!RS.isRegUsed(R0))
+ return true;
+
+ unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass);
+
+ // Make sure the register scavenger was able to find an available register
+ // If not, use R0 but return false to indicate no register was available and
+ // R0 must be used (as recommended by the ABI)
+ if (Reg == 0)
+ return false;
+
+ if (ScratchRegister)
+ *ScratchRegister = Reg;
+
+ return true;
+}
+
+bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+ return findScratchRegister(TmpMBB, false, nullptr);
+}
+
+bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+ return findScratchRegister(TmpMBB, true, nullptr);
+}
+
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
@@ -589,7 +648,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // Move MBBI back to the beginning of the function.
+ // Move MBBI back to the beginning of the prologue block.
MBBI = MBB.begin();
// Work out frame sizes.
@@ -613,7 +672,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
- unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
@@ -642,6 +701,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
+ findScratchRegister(&MBB, false, &ScratchReg);
+ assert(ScratchReg && "No scratch register!");
+
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
@@ -916,27 +978,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI != MBB.end() && "Returning block has no terminator");
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl;
+
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl;
-
- assert((RetOpcode == PPC::BLR ||
- RetOpcode == PPC::BLR8 ||
- RetOpcode == PPC::TCRETURNri ||
- RetOpcode == PPC::TCRETURNdi ||
- RetOpcode == PPC::TCRETURNai ||
- RetOpcode == PPC::TCRETURNri8 ||
- RetOpcode == PPC::TCRETURNdi8 ||
- RetOpcode == PPC::TCRETURNai8) &&
- "Can only insert epilog into returning blocks");
-
// Get alignment info so we know how to restore the SP.
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -959,7 +1012,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
: PPC::MTLR );
@@ -973,10 +1026,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::ADDI );
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
-
+
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
+
+ findScratchRegister(&MBB, true, &ScratchReg);
+ assert(ScratchReg && "No scratch register!");
+
if (HasFP) {
if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -1008,25 +1065,30 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
PBPOffset = FFI->getObjectOffset(PBPIndex);
}
- bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
- RetOpcode == PPC::TCRETURNdi ||
- RetOpcode == PPC::TCRETURNai ||
- RetOpcode == PPC::TCRETURNri8 ||
- RetOpcode == PPC::TCRETURNdi8 ||
- RetOpcode == PPC::TCRETURNai8;
-
- if (UsesTCRet) {
- int MaxTCRetDelta = FI->getTailCallSPDelta();
- MachineOperand &StackAdjust = MBBI->getOperand(1);
- assert(StackAdjust.isImm() && "Expecting immediate value.");
- // Adjust stack pointer.
- int StackAdj = StackAdjust.getImm();
- int Delta = StackAdj - MaxTCRetDelta;
- assert((Delta >= 0) && "Delta must be positive");
- if (MaxTCRetDelta>0)
- FrameSize += (StackAdj +Delta);
- else
- FrameSize += StackAdj;
+ bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
+
+ if (IsReturnBlock) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
}
// Frames of 32KB & larger require special handling because they cannot be
@@ -1066,7 +1128,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addImm(0)
.addReg(SPReg);
}
-
}
if (MustSaveLR)
@@ -1109,52 +1170,55 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
- if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
- MF.getFunction()->getCallingConv() == CallingConv::Fast) {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- unsigned CallerAllocatedAmt = FI->getMinReservedArea();
-
- if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
- .addReg(SPReg).addImm(CallerAllocatedAmt);
- } else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ if (IsReturnBlock) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(CallerAllocatedAmt >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
.addReg(ScratchReg, RegState::Kill)
.addImm(CallerAllocatedAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, AddInst)
+ BuildMI(MBB, MBBI, dl, AddInst)
.addReg(SPReg)
.addReg(FPReg)
.addReg(ScratchReg);
- }
- } else if (RetOpcode == PPC::TCRETURNdi) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
- } else if (RetOpcode == PPC::TCRETURNri) {
- MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
- } else if (RetOpcode == PPC::TCRETURNai) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
- } else if (RetOpcode == PPC::TCRETURNdi8) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
- } else if (RetOpcode == PPC::TCRETURNri8) {
- MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
- } else if (RetOpcode == PPC::TCRETURNai8) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
}
}
@@ -1200,8 +1264,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Reserve stack space for the PIC Base register (R30).
// Only used in SVR4 32-bit.
if (FI->usesPICBase()) {
- int PBPSI = FI->getPICBasePointerSaveIndex();
- PBPSI = MFI->CreateFixedObject(4, -8, true);
+ int PBPSI = MFI->CreateFixedObject(4, -8, true);
FI->setPICBasePointerSaveIndex(PBPSI);
}
@@ -1710,3 +1773,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+
+bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
+ MF.getSubtarget<PPCSubtarget>().isPPC64());
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index d6a389b..bbe1329 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -29,6 +29,30 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned LinkageSize;
const unsigned BasePointerSaveOffset;
+ /**
+ * \brief Find a register that can be used in function prologue and epilogue
+ *
+ * Find a register that can be use as the scratch register in function
+ * prologue and epilogue to save various registers (Link Register, Base
+ * Pointer, etc.). Prefer R0, if it is available. If it is not available,
+ * then choose a different register.
+ *
+ * This method will return true if an available register was found (including
+ * R0). If no available registers are found, the method returns false and sets
+ * ScratchRegister to R0, as per the recommendation in the ABI.
+ *
+ * \param[in] MBB The machine basic block to find an available register for
+ * \param[in] UseAtEnd Specify whether the scratch register will be used at
+ * the end of the basic block (i.e., will the scratch
+ * register kill a register defined in the basic block)
+ * \param[out] ScratchRegister The scratch register to use
+ * \return true if a scratch register was found. false of a scratch register
+ * was not found and R0 is being used as the default.
+ */
+ bool findScratchRegister(MachineBasicBlock *MBB,
+ bool UseAtEnd,
+ unsigned *ScratchRegister) const;
+
public:
PPCFrameLowering(const PPCSubtarget &STI);
@@ -92,6 +116,13 @@ public:
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
+ /// Methods used by shrink wrapping to determine if MBB can be used for the
+ /// function prologue/epilogue.
+ bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9322268..1eaa811 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,6 +16,8 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -52,6 +54,11 @@ static cl::opt<bool> BPermRewriterNoMasking(
"bit permutations"),
cl::Hidden);
+static cl::opt<bool> EnableBranchHint(
+ "ppc-use-branch-hint", cl::init(true),
+ cl::desc("Enable static hinting of branches on ppc"),
+ cl::Hidden);
+
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
@@ -286,7 +293,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (!BB->empty() && BB->back().isReturn()) {
+ if (BB->isReturnBlock()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
@@ -393,6 +400,55 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) {
return isInt32Immediate(N.getNode(), Imm);
}
+static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+ const SDValue &DestMBB) {
+ assert(isa<BasicBlockSDNode>(DestMBB));
+
+ if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+
+ const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const TerminatorInst *BBTerm = BB->getTerminator();
+
+ if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
+
+ const BasicBlock *TBB = BBTerm->getSuccessor(0);
+ const BasicBlock *FBB = BBTerm->getSuccessor(1);
+
+ auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
+ auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+
+ // We only want to handle cases which are easy to predict at static time, e.g.
+ // C++ throw statement, that is very likely not taken, or calling never
+ // returned function, e.g. stdlib exit(). So we set Threshold to filter
+ // unwanted cases.
+ //
+ // Below is LLVM branch weight table, we only want to handle case 1, 2
+ //
+ // Case Taken:Nontaken Example
+ // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
+ // 2. Invoke-terminating 1:1048575
+ // 3. Coldblock 4:64 __builtin_expect
+ // 4. Loop Branch 124:4 For loop
+ // 5. PH/ZH/FPH 20:12
+ const uint32_t Threshold = 10000;
+
+ if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
+ return PPC::BR_NO_HINT;
+
+ DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
+ << BB->getName() << "'\n"
+ << " -> " << TBB->getName() << ": " << TProb << "\n"
+ << " -> " << FBB->getName() << ": " << FProb << "\n");
+
+ const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
+
+ // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
+ // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
+ if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
+ std::swap(TProb, FProb);
+
+ return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
+}
// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
@@ -564,7 +620,6 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
// Handle first 32 bits.
unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
// Simple value.
if (isInt<16>(Imm)) {
@@ -586,9 +641,9 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
++Result;
// Add in the last bits as required.
- if ((Hi = (Remainder >> 16) & 0xFFFF))
+ if ((Remainder >> 16) & 0xFFFF)
++Result;
- if ((Lo = Remainder & 0xFFFF))
+ if (Remainder & 0xFFFF)
++Result;
return Result;
@@ -1028,7 +1083,7 @@ class BitPermutationSelector {
BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
- DEBUG(dbgs() << "\tcombining final bit group with inital one\n");
+ DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
BitGroups.erase(BitGroups.begin());
}
@@ -1557,10 +1612,7 @@ class BitPermutationSelector {
return false;
}
- if (VRI.RLAmt != EffRLAmt)
- return false;
-
- return true;
+ return VRI.RLAmt == EffRLAmt;
};
for (auto &BG : BitGroups) {
@@ -2781,7 +2833,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
-
+
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
unsigned DM[2];
@@ -2798,7 +2850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
SDValue Base, Offset;
- if (LD->isUnindexed() &&
+ if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
(LD->getMemoryVT() == MVT::f64 ||
LD->getMemoryVT() == MVT::i64) &&
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
@@ -2841,8 +2893,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Op #3 is the Dest MBB
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
- SDValue Pred =
- getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
+ unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+
+ SDValue Pred = getI32Imm(PCC, dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
@@ -2871,6 +2926,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
BitComp, N->getOperand(4), N->getOperand(0));
}
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
N->getOperand(4), N->getOperand(0) };
@@ -2903,9 +2961,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
// The first source operand is a TargetGlobalAddress or a TargetJumpTable.
- // If it is an externally defined symbol, a symbol with common linkage,
- // a non-local function address, or a jump table address, or if we are
- // generating code for large code model, we generate:
+ // If it must be toc-referenced according to PPCSubTarget, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -2920,13 +2976,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GValue = G->getGlobal();
- if ((GValue->getType()->getElementType()->isFunctionTy() &&
- !GValue->isStrongDefinitionForLinker()) ||
- GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage())
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
MVT::i64, GA, SDValue(Tmp, 0)));
+ }
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
@@ -3110,7 +3165,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
if (!CurDAG->MaskedValueIsZero(Op0,
APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
return false;
-
+
LHS = Op0.getOperand(0);
RHS = Op0.getOperand(1);
return true;
@@ -3305,7 +3360,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
if (N->use_empty())
continue;
@@ -3989,7 +4044,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
@@ -4145,7 +4200,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
++Position;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
@@ -4184,16 +4239,24 @@ void PPCDAGToDAGISel::PeepholePPC64() {
break;
}
- // If this is a load or store with a zero offset, we may be able to
- // fold an add-immediate into the memory operation.
- if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
- N->getConstantOperandVal(FirstOp) != 0)
+ // If this is a load or store with a zero offset, or within the alignment,
+ // we may be able to fold an add-immediate into the memory operation.
+ // The check against alignment is below, as it can't occur until we check
+ // the arguments to N
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
continue;
SDValue Base = N->getOperand(FirstOp + 1);
if (!Base.isMachineOpcode())
continue;
+ // On targets with fusion, we don't want this to fire and remove a fusion
+ // opportunity, unless a) it results in another fusion opportunity or
+ // b) optimizing for size.
+ if (PPCSubTarget->hasFusion() &&
+ (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
+ continue;
+
unsigned Flags = 0;
bool ReplaceFlags = true;
@@ -4237,6 +4300,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
break;
}
+ SDValue ImmOpnd = Base.getOperand(1);
+ int MaxDisplacement = 0;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ MaxDisplacement = GV->getAlignment() - 1;
+ }
+
+ int Offset = N->getConstantOperandVal(FirstOp);
+ if (Offset < 0 || Offset > MaxDisplacement)
+ continue;
+
// We found an opportunity. Reverse the operands from the add
// immediate and substitute them into the load or store. If
// needed, update the target flags for the immediate operand to
@@ -4247,8 +4321,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
DEBUG(N->dump(CurDAG));
DEBUG(dbgs() << "\n");
- SDValue ImmOpnd = Base.getOperand(1);
-
// If the relocation information isn't already present on the
// immediate operand, add it now.
if (ReplaceFlags) {
@@ -4259,17 +4331,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
// is insufficient for the instruction encoding.
if (GV->getAlignment() < 4 &&
(StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
- StorageOpcode == PPC::LWA)) {
+ StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;
}
- ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
CP->getAlignment(),
- 0, Flags);
+ Offset, Flags);
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1b8f8fb..af9ad07 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -42,10 +42,6 @@
using namespace llvm;
-// FIXME: Remove this once soft-float is supported.
-static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
-cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
-
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -72,8 +68,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
- addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
- addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ if (!Subtarget.useSoftFloat()) {
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ }
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
for (MVT VT : MVT::integer_valuetypes()) {
@@ -107,8 +105,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
- isPPC64 ? MVT::i64 : MVT::i32);
+ AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
} else {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
@@ -257,10 +255,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::BITCAST, MVT::f32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i64, Expand);
- setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+ } else {
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+ }
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -329,6 +334,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -403,9 +410,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD , VT, Legal);
- setOperationAction(ISD::SUB , VT, Legal);
-
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
setOperationAction(ISD::CTPOP, VT, Legal);
@@ -477,6 +484,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
@@ -519,12 +528,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
-
- if (Subtarget.hasP8Altivec())
+ if (Subtarget.hasP8Altivec())
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
-
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -545,6 +553,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ if (Subtarget.hasP8Vector()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ }
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -813,15 +836,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::SRA_I128, nullptr);
}
- if (isPPC64) {
- setStackPointerRegisterToSaveRestore(PPC::X1);
- setExceptionPointerRegister(PPC::X3);
- setExceptionSelectorRegister(PPC::X4);
- } else {
- setStackPointerRegisterToSaveRestore(PPC::R1);
- setExceptionPointerRegister(PPC::R3);
- setExceptionSelectorRegister(PPC::R4);
- }
+ setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
@@ -942,9 +957,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
- getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == MaxMaxAlign)
@@ -969,6 +984,10 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
return Align;
}
+bool PPCTargetLowering::useSoftFloat() const {
+ return Subtarget.useSoftFloat();
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -992,6 +1011,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
@@ -1236,7 +1256,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
-/// The ShuffleKind distinguishes between big-endian merges with two
+/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
@@ -1261,7 +1281,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
-/// The ShuffleKind distinguishes between big-endian merges with two
+/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
@@ -1353,7 +1373,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
* - 2 = little-endian merge with two different inputs (inputs are swapped for
* little-endian merges).
* \param[in] DAG The current SelectionDAG
- * \return true iff this shuffle mask
+ * \return true iff this shuffle mask
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
@@ -1380,7 +1400,7 @@ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-/// The ShuffleKind distinguishes between big-endian operations with two
+/// The ShuffleKind distinguishes between big-endian operations with two
/// different inputs (0), either-endian operations with two identical inputs
/// (1), and little-endian operations with two different inputs (2). For the
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
@@ -1513,8 +1533,8 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
for (unsigned i = 0; i != Multiple-1; ++i) {
if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
- LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
- LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ LeadingZero &= isNullConstant(UniquedVals[i]);
+ LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
}
// Finally, check the least significant entry.
if (LeadingZero) {
@@ -1629,7 +1649,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
-
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented with [r+imm].
@@ -1998,10 +2017,10 @@ static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
SDValue Ops[] = { GA, Reg };
- return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
- DAG.getVTList(VT, MVT::Other), Ops, VT,
- MachinePointerInfo::getGOT(), 0, false, true,
- false, 0);
+ return DAG.getMemIntrinsicNode(
+ PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
+ false, 0);
}
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
@@ -2092,6 +2111,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
// large models could be added if users need it, at the cost of
// additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2480,7 +2502,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
// */
// } va_list[1];
-
SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
@@ -2536,7 +2557,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-// Function whose sole purpose is to kill compiler warnings
+// Function whose sole purpose is to kill compiler warnings
// stemming from unused functions included from PPCGenCallingConv.inc.
CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
@@ -2933,8 +2954,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
PPC::F8
};
unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
- if (DisablePPCFloatInVariadic)
- NumFPArgRegs = 0;
+
+ if (Subtarget.useSoftFloat())
+ NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
@@ -3177,15 +3199,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
- MachinePointerInfo(FuncArg),
- ObjType, false, false, 0);
+ MachinePointerInfo(&*FuncArg), ObjType,
+ false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
// store the whole register as-is to the parameter save area
// slot.
- Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg),
- false, false, 0);
+ Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg), false, false, 0);
}
MemOps.push_back(Store);
@@ -3212,9 +3234,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Off = DAG.getConstant(j, dl, PtrVT);
Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(&*FuncArg, j), false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
@@ -3592,7 +3614,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg),
+ MachinePointerInfo(&*FuncArg),
ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -3615,9 +3637,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg, j), false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
ArgOffset += PtrByteSize;
@@ -3880,7 +3902,6 @@ struct TailCallArgumentInfo {
TailCallArgumentInfo() : FrameIdx(0) {}
};
-
}
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
@@ -3895,9 +3916,10 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue FIN = TailCallArgs[i].FrameIdxOp;
int FI = TailCallArgs[i].FrameIdx;
// Store relative to framepointer.
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, 0));
}
}
@@ -3922,9 +3944,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
- Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
- MachinePointerInfo::getFixedStack(NewRetAddr),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr),
+ false, false, 0);
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
@@ -3933,9 +3956,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
- Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
- MachinePointerInfo::getFixedStack(NewFPIdx),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
+ false, false, 0);
}
}
return Chain;
@@ -4812,8 +4836,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
continue;
break;
case MVT::v4f32:
- // When using QPX, this is handled like a FP register, otherwise, it
- // is an Altivec register.
+ // When using QPX, this is handled like a FP register, otherwise, it
+ // is an Altivec register.
if (Subtarget.hasQPX()) {
if (++NumFPRsUsed <= NumFPRs)
continue;
@@ -5318,9 +5342,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
- MachinePointerInfo::getStack(TOCSaveOffset),
- false, false, 0);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
+ false, false, 0);
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
@@ -5341,9 +5366,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
- hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
- Callee, SPDiff, NumBytes, Ins, InVals, CS);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest,
+ DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
+ SPDiff, NumBytes, Ins, InVals, CS);
}
SDValue
@@ -5798,6 +5823,22 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
}
+SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
+ SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const {
+ SDLoc dl(Op);
+
+ // Get the corect type for integers.
+ EVT IntVT = Op.getValueType();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNAREAOFFSET node.
+ SDValue Ops[2] = {Chain, FPSIdx};
+ SDVTList VTs = DAG.getVTList(IntVT);
+ return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
+}
+
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
// When we pop the dynamic allocation we need to restore the SP link.
@@ -5828,10 +5869,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
false, false, 0);
}
-
-
-SDValue
-PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
@@ -5983,6 +6021,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
if (!DAG.getTarget().Options.NoInfsFPMath ||
!DAG.getTarget().Options.NoNaNsFPMath)
return Op;
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -6033,7 +6075,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETNE:
std::swap(TV, FV);
case ISD::SETEQ:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6043,25 +6085,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6101,7 +6143,8 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
(Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
- MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store to the stack slot.
SDValue Chain;
@@ -6291,11 +6334,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
// This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
+
SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
- FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
- FPHalfs, FPHalfs, FPHalfs, FPHalfs);
-
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs,
+ FPHalfs, FPHalfs);
+
Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
if (Op.getValueType() != MVT::v4f64)
@@ -6421,17 +6464,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
RLI.Chain = Store;
- RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = 4;
MachineMemOperand *MMO =
@@ -6472,16 +6516,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
RLI.Chain = Store;
- RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = 4;
}
@@ -6506,14 +6552,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
Op.getOperand(0));
// STD the extended value into the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
// Load the value as a double.
- Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, false, 0);
+ Ld = DAG.getLoad(
+ MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, false, 0);
}
// FCFID it and return it.
@@ -6735,7 +6783,6 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
}
-
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
/// amount. The result has the specified value type.
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
@@ -6768,7 +6815,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// to a zero vector to get the boolean result.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -6794,8 +6842,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (unsigned i = 0; i < 4; ++i) {
if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
- else if (cast<ConstantSDNode>(BVN->getOperand(i))->
- getConstantIntValue()->isZero())
+ else if (isNullConstant(BVN->getOperand(i)))
continue;
else
CV[i] = One;
@@ -6814,9 +6861,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
ValueVTs.push_back(MVT::Other); // chain
SDVTList VTs = DAG.getVTList(ValueVTs);
- return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
- dl, VTs, Ops, MVT::v4f32,
- MachinePointerInfo::getConstantPool());
+ return DAG.getMemIntrinsicNode(
+ PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
SmallVector<SDValue, 4> Stores;
@@ -6915,7 +6962,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (SextVal >= -16 && SextVal <= 15)
return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
-
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
@@ -7304,11 +7350,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
V1, V2, VPermMask);
}
-/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
-/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
+/// vector comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
-static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot, const PPCSubtarget &Subtarget) {
+static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -7321,12 +7367,11 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequd_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 199;
- isDot = 1;
- }
- else
+ CompareOpc = 199;
+ isDot = 1;
+ } else
return false;
break;
@@ -7335,28 +7380,48 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 967;
- isDot = 1;
- }
- else
+ CompareOpc = 967;
+ isDot = 1;
+ } else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtud_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 711;
- isDot = 1;
+ CompareOpc = 711;
+ isDot = 1;
+ } else
+ return false;
+
+ break;
+ // VSX predicate comparisons use the same infrastructure
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ if (Subtarget.hasVSX()) {
+ switch (IntrinsicID) {
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ }
+ isDot = 1;
}
- else
+ else
return false;
break;
-
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
@@ -7365,10 +7430,9 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequd:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 199;
- isDot = 0;
- }
- else
+ CompareOpc = 199;
+ isDot = 0;
+ } else
return false;
break;
@@ -7377,24 +7441,22 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsd:
+ case Intrinsic::ppc_altivec_vcmpgtsd:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 967;
- isDot = 0;
- }
- else
+ CompareOpc = 967;
+ isDot = 0;
+ } else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtud:
+ case Intrinsic::ppc_altivec_vcmpgtud:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 711;
- isDot = 0;
- }
- else
+ CompareOpc = 711;
+ isDot = 0;
+ } else
return false;
break;
@@ -7411,7 +7473,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
+ if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7536,7 +7598,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
FPHalfs, FPHalfs, FPHalfs, FPHalfs);
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
// Now convert to an integer and store.
Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
@@ -7545,7 +7607,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -7752,7 +7815,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
FPHalfs, FPHalfs, FPHalfs, FPHalfs);
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
// Now convert to an integer and store.
Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
@@ -7761,7 +7824,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -7798,11 +7862,10 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
- Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
- SN->getPointerInfo().getWithOffset(i),
- MVT::i8 /* memory type */,
- SN->isNonTemporal(), SN->isVolatile(),
- 1 /* alignment */, SN->getAAInfo()));
+ Stores.push_back(DAG.getTruncStore(
+ StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(),
+ 1 /* alignment */, SN->getAAInfo()));
}
StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
@@ -7906,6 +7969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -7971,7 +8035,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
N->getValueType(0));
SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
- N->getOperand(1));
+ N->getOperand(1));
Results.push_back(NewInt);
Results.push_back(NewInt.getValue(1));
@@ -8020,7 +8084,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
@@ -8089,8 +8152,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
@@ -8160,8 +8222,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
@@ -8283,8 +8344,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
+ MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -8384,8 +8444,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
.addMBB(mainMBB);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
- thisMBB->addSuccessor(mainMBB, /* weight */ 0);
- thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+ thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
+ thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
// mainMBB:
// mainDstReg = 0
@@ -8562,8 +8622,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MachineFunction *F = BB->getParent();
@@ -8675,7 +8734,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// mfspr Rx,TBU # load from TBU
// mfspr Ry,TB # load from TB
// mfspr Rz,TBU # load from TBU
- // cmpw crX,Rx,Rz # check if ‘old’=’new’
+ // cmpw crX,Rx,Rz # check if 'old'='new'
// bne readLoop # branch if they're not equal
// ...
@@ -9137,7 +9196,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
return SDValue();
}
-bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
// Note: This functionality is used only when unsafe-fp-math is enabled, and
// on cores with reciprocal estimates (which are used when unsafe-fp-math is
// enabled for division), this functionality is redundant with the default
@@ -9150,12 +9209,26 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
switch (Subtarget.getDarwinDirective()) {
default:
- return NumUsers > 2;
+ return 3;
case PPC::DIR_440:
case PPC::DIR_A2:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
- return NumUsers > 1;
+ return 2;
+ }
+}
+
+// isConsecutiveLSLoc needs to work even if all adds have not yet been
+// collapsed, and so we need to look through chains of them.
+static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
+ int64_t& Offset, SelectionDAG &DAG) {
+ if (DAG.isBaseWithConstantOffset(Loc)) {
+ Base = Loc.getOperand(0);
+ Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+
+ // The base might itself be a base plus an offset, and if so, accumulate
+ // that as well.
+ getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
}
}
@@ -9178,16 +9251,18 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- // Handle X+C
- if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
- cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ SDValue Base1 = Loc, Base2 = BaseLoc;
+ int64_t Offset1 = 0, Offset2 = 0;
+ getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
+ getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
+ if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
+ Offset1 = 0;
+ Offset2 = 0;
bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
@@ -9343,7 +9418,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
IE = LoadRoots.end(); I != IE; ++I) {
Queue.push_back(*I);
-
+
while (!Queue.empty()) {
SDNode *LoadRoot = Queue.pop_back_val();
if (!Visited.insert(LoadRoot).second)
@@ -9470,7 +9545,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
}
// Visit all inputs, collect all binary operations (and, or, xor and
- // select) that are all fed by extensions.
+ // select) that are all fed by extensions.
while (!BinOps.empty()) {
SDValue BinOp = BinOps.back();
BinOps.pop_back();
@@ -9492,7 +9567,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
isa<ConstantSDNode>(BinOp.getOperand(i))) {
- Inputs.push_back(BinOp.getOperand(i));
+ Inputs.push_back(BinOp.getOperand(i));
} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
BinOp.getOperand(i).getOpcode() == ISD::OR ||
BinOp.getOperand(i).getOpcode() == ISD::XOR ||
@@ -9572,7 +9647,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
if (isa<ConstantSDNode>(Inputs[i]))
continue;
else
- DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
}
// Replace all operations (these are all the same, but have a different
@@ -9682,7 +9757,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
SmallPtrSet<SDNode *, 16> Visited;
// Visit all inputs, collect all binary operations (and, or, xor and
- // select) that are all fed by truncations.
+ // select) that are all fed by truncations.
while (!BinOps.empty()) {
SDValue BinOp = BinOps.back();
BinOps.pop_back();
@@ -9701,7 +9776,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
isa<ConstantSDNode>(BinOp.getOperand(i))) {
- Inputs.push_back(BinOp.getOperand(i));
+ Inputs.push_back(BinOp.getOperand(i));
} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
BinOp.getOperand(i).getOpcode() == ISD::OR ||
BinOp.getOperand(i).getOpcode() == ISD::XOR ||
@@ -9915,10 +9990,11 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
"Invalid extension type");
EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
SDValue ShiftCst =
- DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
- return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
- DAG.getNode(ISD::SHL, dl, N->getValueType(0),
- N->getOperand(0), ShiftCst), ShiftCst);
+ DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
+ return DAG.getNode(
+ ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
+ ShiftCst);
}
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
@@ -10102,16 +10178,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case PPCISD::SHL:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->isNullValue()) // 0 << V -> 0.
+ if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
- }
break;
case PPCISD::SRL:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->isNullValue()) // 0 >>u V -> 0.
+ if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
return N->getOperand(0);
- }
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
@@ -10122,7 +10194,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
case ISD::SETCC:
@@ -10277,7 +10349,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMMO =
- MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ -(long)MemVT.getStoreSize()+1,
2*MemVT.getStoreSize()-1);
// Create the new base load.
@@ -10527,7 +10600,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::BRCOND: {
SDValue Cond = N->getOperand(1);
SDValue Target = N->getOperand(2);
-
+
if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
Intrinsic::ppc_is_decremented_ctr_nonzero) {
@@ -10558,8 +10631,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
Intrinsic::ppc_is_decremented_ctr_nonzero &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
- !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
- isZero())
+ !isNullConstant(LHS.getOperand(1)))
LHS = LHS.getOperand(0);
if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
@@ -10588,7 +10660,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
+ getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -10739,8 +10811,11 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
// boundary so that the entire loop fits in one instruction-cache line.
uint64_t LoopSize = 0;
for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
- for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
+ for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
LoopSize += TII->GetInstSizeInBytes(J);
+ if (LoopSize > 32)
+ break;
+ }
if (LoopSize > 16 && LoopSize <= 32)
return 5;
@@ -10868,17 +10943,19 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::QFRCRegClass);
if (VT == MVT::v4f32 && Subtarget.hasQPX())
return std::make_pair(0U, &PPC::QSRCRegClass);
- return std::make_pair(0U, &PPC::VRRCRegClass);
+ if (Subtarget.hasAltivec())
+ return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
}
- } else if (Constraint == "wc") { // an individual CR bit.
+ } else if (Constraint == "wc" && Subtarget.useCRBits()) {
+ // An individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
- } else if (Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf") {
+ } else if ((Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") && Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
- } else if (Constraint == "ws") {
- if (VT == MVT::f32)
+ } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+ if (VT == MVT::f32 && Subtarget.hasP8Vector())
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
return std::make_pair(0U, &PPC::VSFRCRegClass);
@@ -10908,7 +10985,6 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return R;
}
-
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -11358,9 +11434,7 @@ bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
- if (BitSize == 0 || BitSize > 64)
- return false;
- return true;
+ return !(BitSize == 0 || BitSize > 64);
}
bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -11477,11 +11551,21 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
return ScratchRegs;
}
+unsigned PPCTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
+}
+
+unsigned PPCTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
+}
+
bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
if (VT == MVT::v2i64)
- return false;
+ return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
if (Subtarget.hasQPX()) {
if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 6e13533..44bcb89 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -79,6 +79,11 @@ namespace llvm {
/// compute an allocation on the stack.
DYNALLOC,
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an offset from native SP to the address of the most recent
+ /// dynamic alloca.
+ DYNAREAOFFSET,
+
/// GlobalBaseReg - On Darwin, this node represents the result of the mflr
/// at function entry, used for PIC code.
GlobalBaseReg,
@@ -423,6 +428,8 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ bool useSoftFloat() const override;
+
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
}
@@ -655,8 +662,17 @@ namespace llvm {
return Ty->isArrayTy();
}
- private:
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+ private:
struct ReuseLoadInfo {
SDValue Ptr;
SDValue Chain;
@@ -719,6 +735,8 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
+ SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -853,7 +871,7 @@ namespace llvm {
bool &UseOneConstNR) const override;
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override;
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index d628330..075e093 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -369,6 +369,8 @@ let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set i64:$result,
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+ [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d4e666c..c17603a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -144,6 +144,9 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
UseMI, UseIdx);
+ if (!DefMI->getParent())
+ return Latency;
+
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
@@ -186,6 +189,60 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+// This function does not list all associative and commutative operations, but
+// only those worth feeding through the machine combiner in an attempt to
+// reduce the critical path. Mostly, this means floating-point operations,
+// because they have high latencies (compared to other operations, such and
+// and/or, which are also associative and commutative, but have low latencies).
+bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ // FP Add:
+ case PPC::FADD:
+ case PPC::FADDS:
+ // FP Multiply:
+ case PPC::FMUL:
+ case PPC::FMULS:
+ // Altivec Add:
+ case PPC::VADDFP:
+ // VSX Add:
+ case PPC::XSADDDP:
+ case PPC::XVADDDP:
+ case PPC::XVADDSP:
+ case PPC::XSADDSP:
+ // VSX Multiply:
+ case PPC::XSMULDP:
+ case PPC::XVMULDP:
+ case PPC::XVMULSP:
+ case PPC::XSMULSP:
+ // QPX Add:
+ case PPC::QVFADD:
+ case PPC::QVFADDS:
+ case PPC::QVFADDSs:
+ // QPX Multiply:
+ case PPC::QVFMUL:
+ case PPC::QVFMULS:
+ case PPC::QVFMULSs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool PPCInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ // Using the machine combiner in this way is potentially expensive, so
+ // restrict to when aggressive optimizations are desired.
+ if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
+ return false;
+
+ // FP reassociation is only legal when we don't need strict IEEE semantics.
+ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+ return false;
+
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -259,16 +316,16 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-// commuteInstruction - We can commute rlwimi instructions, but only if the
-// rotate amt is zero. We also have to munge the immediates a bit.
-MachineInstr *
-PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
MachineFunction &MF = *MI->getParent()->getParent();
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI &&
MI->getOpcode() != PPC::RLWIMIo)
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
// Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
// 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
// changing the relative order of the mask operands might change what happens
@@ -286,6 +343,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Op0 = (Op2 & ~M) | (Op1 & M)
// Swap op1/op2
+ assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
+ "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
unsigned Reg2 = MI->getOperand(2).getReg();
@@ -353,9 +412,9 @@ bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
if (AltOpc == -1)
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
- SrcOpIdx1 = 2;
- SrcOpIdx2 = 3;
- return true;
+ // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
+ // and SrcOpIdx2.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
}
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -996,11 +1055,10 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
@@ -1109,11 +1167,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
@@ -1214,7 +1271,7 @@ bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumT, unsigned ExtraT,
MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
}
@@ -1691,13 +1748,13 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
MI->setDesc(NewDesc);
if (NewDesc.ImplicitDefs)
- for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+ for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs();
*ImpDefs; ++ImpDefs)
if (!MI->definesRegister(*ImpDefs))
MI->addOperand(*MI->getParent()->getParent(),
MachineOperand::CreateReg(*ImpDefs, true, true));
if (NewDesc.ImplicitUses)
- for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+ for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses();
*ImpUses; ++ImpUses)
if (!MI->readsRegister(*ImpUses))
MI->addOperand(*MI->getParent()->getParent(),
@@ -1737,3 +1794,35 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
}
+std::pair<unsigned, unsigned>
+PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = PPCII::MO_ACCESS_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace PPCII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_LO, "ppc-lo"},
+ {MO_HA, "ppc-ha"},
+ {MO_TPREL_LO, "ppc-tprel-lo"},
+ {MO_TPREL_HA, "ppc-tprel-ha"},
+ {MO_DTPREL_LO, "ppc-dtprel-lo"},
+ {MO_TLSLD_LO, "ppc-tlsld-lo"},
+ {MO_TOC_LO, "ppc-toc-lo"},
+ {MO_TLS, "ppc-tls"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace PPCII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_PLT_OR_STUB, "ppc-plt-or-stub"},
+ {MO_PIC_FLAG, "ppc-pic"},
+ {MO_NLP_FLAG, "ppc-nlp"},
+ {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}};
+ return makeArrayRef(TargetFlags);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 40badae..c3c3a48 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -79,6 +79,23 @@ class PPCInstrInfo : public PPCGenInstrInfo {
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const;
virtual void anchor();
+
+protected:
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ ///
+ /// For example, we can commute rlwimi instructions, but only if the
+ /// rotate amt is zero. We also have to munge the immediates a bit.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
public:
explicit PPCInstrInfo(PPCSubtarget &STI);
@@ -119,6 +136,19 @@ public:
return false;
}
+ bool useMachineCombiner() const override {
+ return true;
+ }
+
+ /// Return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// output in the <Pattern> array.
+ bool getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P) const override;
+
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const override;
@@ -127,10 +157,6 @@ public:
unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
- // commuteInstruction - We can commute rlwimi instructions, but only if the
- // rotate amt is zero. We also have to munge the immediates a bit.
- MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
-
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
@@ -183,7 +209,7 @@ public:
// profitable to use the predicated branches.
bool isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override {
+ BranchProbability Probability) const override {
return true;
}
@@ -191,12 +217,10 @@ public:
unsigned NumT, unsigned ExtraT,
MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
- bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles,
- const BranchProbability
- &Probability) const override {
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override {
return true;
}
@@ -239,6 +263,15 @@ public:
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 24fd9bd..6c4364a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -226,7 +226,9 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
+def SDTDynAreaOp : SDTypeProfile<1, 1, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
@@ -1029,6 +1031,8 @@ let Defs = [R1], Uses = [R1] in
def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+ [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
@@ -3883,8 +3887,11 @@ def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0,
def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
-def : InstAlias<"cntlz $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
-def : InstAlias<"cntlz. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+// The POWER variant
+def : MnemonicAlias<"cntlz", "cntlzw">;
+def : MnemonicAlias<"cntlz.", "cntlzw.">;
def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index 0a044c5..4312007 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -839,31 +839,31 @@ def : Pat<(v4f64 (scalar_to_vector f64:$A)),
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+def : Pat<(f64 (extractelt v4f64:$S, 0)),
(EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+def : Pat<(f32 (extractelt v4f32:$S, 0)),
(EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+def : Pat<(f64 (extractelt v4f64:$S, 1)),
(EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+def : Pat<(f64 (extractelt v4f64:$S, 2)),
(EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+def : Pat<(f64 (extractelt v4f64:$S, 3)),
(EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+def : Pat<(f32 (extractelt v4f32:$S, 1)),
(EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+def : Pat<(f32 (extractelt v4f32:$S, 2)),
(EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+def : Pat<(f32 (extractelt v4f32:$S, 3)),
(EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
(EXTRACT_SUBREG (QVFPERM $S, $S,
(QVLPCLSXint (RLDICR $F, 2,
/* 63-2 = */ 61))),
sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
(EXTRACT_SUBREG (QVFPERMs $S, $S,
(QVLPCLSXint (RLDICR $F, 2,
/* 63-2 = */ 61))),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index ce63c22..df1142c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,17 +67,19 @@ def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
-multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
- string asmbase, string asmstr, InstrItinClass itin,
- list<dag> pattern> {
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
+ string asmstr, InstrItinClass itin, Intrinsic Int,
+ ValueType OutTy, ValueType InTy> {
let BaseName = asmbase in {
- def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
- pattern>;
+ [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
let Defs = [CR6] in
- def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ def o : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT;
+ [(set InTy:$XT,
+ (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
+ isDOT;
}
}
@@ -456,35 +458,23 @@ let Uses = [RM] in {
"xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
defm XVCMPEQDP : XX3Form_Rcr<60, 99,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v2i64:$XT,
- (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>;
+ int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
defm XVCMPEQSP : XX3Form_Rcr<60, 67,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v4i32:$XT,
- (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>;
+ int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>;
defm XVCMPGEDP : XX3Form_Rcr<60, 115,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v2i64:$XT,
- (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>;
+ int_ppc_vsx_xvcmpgedp, v2i64, v2f64>;
defm XVCMPGESP : XX3Form_Rcr<60, 83,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v4i32:$XT,
- (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>;
+ int_ppc_vsx_xvcmpgesp, v4i32, v4f32>;
defm XVCMPGTDP : XX3Form_Rcr<60, 107,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v2i64:$XT,
- (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>;
+ int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>;
defm XVCMPGTSP : XX3Form_Rcr<60, 75,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v4i32:$XT,
- (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>;
+ int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
// Move Instructions
def XSABSDP : XX2Form<60, 345,
@@ -845,9 +835,9 @@ let Predicates = [IsBigEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
}
@@ -856,9 +846,9 @@ def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
(SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
}
@@ -1206,6 +1196,23 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
}
+
+ // Single Precision Conversions (FP <-> INT)
+ def XSCVSXDSP : XX2Form<60, 312,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvsxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ def XSCVUXDSP : XX2Form<60, 296,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvuxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+
+ // Conversions between vector and scalar single precision
+ def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
+ "xscvdpspn $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
+ "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+
} // AddedComplexity = 400
} // HasP8Vector
@@ -1229,3 +1236,550 @@ let Predicates = [HasDirectMove, HasVSX] in {
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
} // HasDirectMove, HasVSX
+
+/* Direct moves of various widths from GPR's into VSR's. Each move lines
+ the value up into element 0 (both BE and LE). Namely, entities smaller than
+ a doubleword are shifted left and moved for BE. For LE, they're moved, then
+ swapped to go into the least significant element of the VSR.
+*/
+def MovesToVSR {
+ dag BE_BYTE_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+ dag BE_HALF_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+ dag BE_WORD_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+ dag BE_DWORD_0 = (MTVSRD $A);
+
+ dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
+ dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ LE_MTVSRW, sub_64));
+ dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
+ dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ BE_DWORD_0, sub_64));
+ dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
+}
+
+/* Patterns for extracting elements out of vectors. Integer elements are
+ extracted using direct move operations. Patterns for extracting elements
+ whose indices are not available at compile time are also provided with
+ various _VARIABLE_ patterns.
+ The numbering for the DAG's is for LE, but when used on BE, the correct
+ LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
+*/
+def VectorExtractions {
+ // Doubleword extraction
+ dag LE_DWORD_0 =
+ (MFVSRD
+ (EXTRACT_SUBREG
+ (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
+ (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
+ dag LE_DWORD_1 = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+
+ // Word extraction
+ dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
+ dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
+ dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+ dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
+
+ // Halfword extraction
+ dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
+ dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
+ dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
+ dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
+ dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
+ dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
+ dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
+ dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
+
+ // Byte extraction
+ dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
+ dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
+ dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
+ dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
+ dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
+ dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
+ dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
+ dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
+ dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
+ dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
+ dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
+ dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
+ dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
+ dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
+ dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
+ dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
+
+ /* Variable element number (BE and LE patterns must be specified separately)
+ This is a rather involved process.
+
+ Conceptually, this is how the move is accomplished:
+ 1. Identify which doubleword contains the element
+ 2. Shift in the VMX register so that the correct doubleword is correctly
+ lined up for the MFVSRD
+ 3. Perform the move so that the element (along with some extra stuff)
+ is in the GPR
+ 4. Right shift within the GPR so that the element is right-justified
+
+ Of course, the index is an element number which has a different meaning
+ on LE/BE so the patterns have to be specified separately.
+
+ Note: The final result will be the element right-justified with high
+ order bits being arbitrarily defined (namely, whatever was in the
+ vector register to the left of the value originally).
+ */
+
+ /* LE variable byte
+ Number 1. above:
+ - For elements 0-7, we shift left by 8 bytes since they're on the right
+ - For elements 8-15, we need not shift (shift left by zero bytes)
+ This is accomplished by inverting the bits of the index and AND-ing
+ with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
+ */
+ dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-7 (8-15 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 8 as we need to shift right by the number of bits, not bytes
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
+ sub_32);
+
+ /* LE variable halfword
+ Number 1. above:
+ - For elements 0-3, we shift left by 8 since they're on the right
+ - For elements 4-7, we need not shift (shift left by zero bytes)
+ Similarly to the byte pattern, we invert the bits of the index, but we
+ AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
+ Of course, the shift is still by 8 bytes, so we must multiply by 2.
+ */
+ dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-3 (4-7 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 16 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
+ sub_32);
+
+ /* LE variable word
+ Number 1. above:
+ - For elements 0-1, we shift left by 8 since they're on the right
+ - For elements 2-3, we need not shift
+ */
+ dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-1 (2-3 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 32 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
+ sub_32);
+
+ /* LE variable doubleword
+ Number 1. above:
+ - For element 0, we shift left by 8 since it's on the right
+ - For element 1, we need not shift
+ */
+ dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ // - Number 4. is not needed for the doubleword as the value is 64-bits
+ dag LE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* LE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61));
+ dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
+ dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
+
+ /* LE variable double
+ Same as the LE doubleword except there is no move.
+ */
+ dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+ (COPY_TO_REGCLASS $S, VRRC),
+ LE_VDWORD_PERM_VEC);
+ dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
+
+ /* BE variable byte
+ The algorithm here is the same as the LE variable byte except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x8
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-7
+ */
+ dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8));
+ dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC);
+ dag BE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
+ sub_32);
+
+ /* BE variable halfword
+ The algorithm here is the same as the LE variable halfword except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x4 and multiply by 2
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-3
+ */
+ dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62));
+ dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC);
+ dag BE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
+ sub_32);
+
+ /* BE variable word
+ The algorithm is the same as the LE variable word except:
+ - The shift in the VMX register happens for opposite element numbers
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-1
+ */
+ dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61));
+ dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC);
+ dag BE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
+ sub_32);
+
+ /* BE variable doubleword
+ Same as the LE doubleword except we shift in the VMX register for opposite
+ element indices.
+ */
+ dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60));
+ dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC);
+ dag BE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* BE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61));
+ dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
+ dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
+
+ /* BE variable double
+ Same as the BE doubleword except there is no move.
+ */
+ dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+ (COPY_TO_REGCLASS $S, VRRC),
+ BE_VDWORD_PERM_VEC);
+ dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
+}
+
+// v4f32 scalar <-> vector conversions (BE)
+let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+} // IsBigEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsBigEndian, HasVSX] in
+ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsBigEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (BE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+
+ // v2i64 scalar <-> vector conversions (BE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // IsBigEndian, HasDirectMove
+
+// v4f32 scalar <-> vector conversions (LE)
+let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+} // IsLittleEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsLittleEndian, HasVSX] in
+ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsLittleEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (LE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 MovesToVSR.LE_WORD_0)>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 MovesToVSR.LE_DWORD_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // IsLittleEndian, HasDirectMove
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+ (i32 (MFVSRWZ (EXTRACT_SUBREG
+ (XXSLDWI (XSCVDPSPN $S),(XSCVDPSPN $S), 3),
+ sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+ (f32 (XSCVSPDPN
+ (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
+
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+ (i64 (MFVSRD $S))>;
+
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+ (f64 (MTVSRD $S))>;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index b4e1c09..e3a35d5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -71,10 +72,10 @@ namespace {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
// FIXME: For some reason, preserving SE here breaks LSR (even if
// this pass changes nothing).
- // AU.addPreserved<ScalarEvolution>();
+ // AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -96,7 +97,7 @@ INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
"PPC Loop Data Prefetch", false, false)
@@ -104,7 +105,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref
bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DL = &F.getParent()->getDataLayout();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index b6e7799..5e18826 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -73,7 +73,7 @@ namespace {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
bool runOnFunction(Function &F) override;
@@ -84,8 +84,10 @@ namespace {
private:
PPCTargetMachine *TM;
+ DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
+ bool PreserveLCSSA;
};
}
@@ -93,7 +95,7 @@ char PPCLoopPreIncPrep::ID = 0;
static const char *name = "Prepare loop for pre-inc. addressing modes";
INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
@@ -101,17 +103,20 @@ FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
}
namespace {
- struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool>
- {
- SCEVLess(ScalarEvolution *SE) : SE(SE) {}
+ struct BucketElement {
+ BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
- bool operator() (const SCEV *X, const SCEV *Y) const {
- const SCEV *Diff = SE->getMinusSCEV(X, Y);
- return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0;
- }
+ const SCEVConstant *Offset;
+ Instruction *Instr;
+ };
- protected:
- ScalarEvolution *SE;
+ struct Bucket {
+ Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+ Elements(1, BucketElement(I)) {}
+
+ const SCEV *BaseSCEV;
+ SmallVector<BucketElement, 16> Elements;
};
}
@@ -140,7 +145,10 @@ static Value *GetPointerOperand(Value *MemI) {
bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
bool MadeChange = false;
@@ -169,7 +177,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
std::distance(pred_begin(Header), pred_end(Header));
// Collect buckets of comparable addresses used by loads and stores.
- typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
SmallVector<Bucket, 16> Buckets;
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I) {
@@ -212,25 +219,24 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
bool FoundBucket = false;
- for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
- for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
- K != KE; ++K) {
- const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
- if (isa<SCEVConstant>(Diff)) {
- Buckets[i].insert(std::make_pair(LSCEV, MemI));
- FoundBucket = true;
- break;
- }
+ for (auto &B : Buckets) {
+ const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+ if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+ B.Elements.push_back(BucketElement(CDiff, MemI));
+ FoundBucket = true;
+ break;
}
+ }
if (!FoundBucket) {
- Buckets.push_back(Bucket(SCEVLess(SE)));
- Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
+ if (Buckets.size() == MaxVars)
+ return MadeChange;
+ Buckets.push_back(Bucket(LSCEV, MemI));
}
}
}
- if (Buckets.empty() || Buckets.size() > MaxVars)
+ if (Buckets.empty())
return MadeChange;
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
@@ -239,7 +245,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// iteration space), insert a new preheader for the loop.
if (!LoopPredecessor ||
!LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
- LoopPredecessor = InsertPreheaderForLoop(L, this);
+ LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (LoopPredecessor)
MadeChange = true;
}
@@ -253,8 +259,45 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// The base address of each bucket is transformed into a phi and the others
// are rewritten as offsets of that variable.
+ // We have a choice now of which instruction's memory operand we use as the
+ // base for the generated PHI. Always picking the first instruction in each
+ // bucket does not work well, specifically because that instruction might
+ // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+ // the choice is somewhat arbitrary, because the backend will happily
+ // generate direct offsets from both the pre-incremented and
+ // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+ // instruction in each bucket, and adjust the recurrence and other offsets
+ // accordingly.
+ for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
+ if (II->getIntrinsicID() == Intrinsic::prefetch)
+ continue;
+
+ // If we'd otherwise pick the first element anyway, there's nothing to do.
+ if (j == 0)
+ break;
+
+ // If our chosen element has no offset from the base pointer, there's
+ // nothing to do.
+ if (!Buckets[i].Elements[j].Offset ||
+ Buckets[i].Elements[j].Offset->isZero())
+ break;
+
+ const SCEV *Offset = Buckets[i].Elements[j].Offset;
+ Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
+ for (auto &E : Buckets[i].Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
+ break;
+ }
+
const SCEVAddRecExpr *BasePtrSCEV =
- cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
+ cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
if (!BasePtrSCEV->isAffine())
continue;
@@ -262,7 +305,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
assert(BasePtrSCEV->getLoop() == L &&
"AddRec for the wrong loop?");
- Instruction *MemI = Buckets[i].begin()->second;
+ // The instruction corresponding to the Bucket's BaseSCEV must be the first
+ // in the vector of elements.
+ Instruction *MemI = Buckets[i].Elements.begin()->Instr;
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
@@ -302,7 +347,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
}
- Instruction *InsPoint = Header->getFirstInsertionPt();
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
@@ -327,18 +372,20 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
BasePtr->replaceAllUsesWith(NewBasePtr);
RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
- Value *LastNewPtr = NewBasePtr;
- for (Bucket::iterator I = std::next(Buckets[i].begin()),
- IE = Buckets[i].end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->second);
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert( NewBasePtr);
+
+ for (auto I = std::next(Buckets[i].Elements.begin()),
+ IE = Buckets[i].Elements.end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->Instr);
assert(Ptr && "No pointer operand");
- if (Ptr == LastNewPtr)
+ if (NewPtrs.count(Ptr))
continue;
Instruction *RealNewPtr;
- const SCEVConstant *Diff =
- cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
- if (Diff->isZero()) {
+ if (!I->Offset || I->Offset->getValue()->isZero()) {
RealNewPtr = NewBasePtr;
} else {
Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
@@ -346,13 +393,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
PtrIP = 0;
else if (isa<PHINode>(PtrIP))
- PtrIP = PtrIP->getParent()->getFirstInsertionPt();
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
else if (!PtrIP)
- PtrIP = I->second;
+ PtrIP = I->Instr;
GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, Diff->getValue(),
- I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
+ I8Ty, PtrInc, I->Offset->getValue(),
+ I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
if (!PtrIP)
NewPtr->insertAfter(cast<Instruction>(PtrInc));
NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
@@ -373,7 +420,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
Ptr->replaceAllUsesWith(ReplNewPtr);
RecursivelyDeleteTriviallyDeadInstructions(Ptr);
- LastNewPtr = RealNewPtr;
+ NewPtrs.insert(RealNewPtr);
}
MadeChange = true;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 76837ec..44a692d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -38,7 +38,7 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
const TargetMachine &TM = AP.TM;
Mangler *Mang = AP.Mang;
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = AP.getDataLayout();
MCContext &Ctx = AP.OutContext;
bool isDarwin = TM.getTargetTriple().isOSDarwin();
@@ -51,13 +51,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
Suffix = "$non_lazy_ptr";
if (!Suffix.empty())
- Name += DL->getPrivateGlobalPrefix();
+ Name += DL.getPrivateGlobalPrefix();
unsigned PrefixLen = Name.size();
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
- Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
} else {
const GlobalValue *GV = MO.getGlobal();
TM.getNameWithPrefix(Name, GV, *Mang);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
new file mode 100644
index 0000000..fe339d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer. It runs at the end of
+// the SSA phases, following VSX swap removal. A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here. Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects: it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+ void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+ static char ID;
+ const PPCInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ PPCMIPeephole() : MachineFunctionPass(ID) {
+ initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ // Perform peepholes.
+ bool simplifyCode(void);
+
+ // Find the "true" register represented by SrcReg (following chains
+ // of copies and subreg_to_reg operations).
+ unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ initialize(MF);
+ return simplifyCode();
+ }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+ DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+ DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+ bool Simplified = false;
+ MachineInstr* ToErase = nullptr;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // If the previous instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // Ignore debug instructions.
+ if (MI.isDebugValue())
+ continue;
+
+ // Per-opcode peepholes.
+ switch (MI.getOpcode()) {
+
+ default:
+ break;
+
+ case PPC::XXPERMDI: {
+ // Perform simplifications of 2x64 vector swaps and splats.
+ // A swap is identified by an immediate value of 2, and a splat
+ // is identified by an immediate value of 0 or 3.
+ int Immed = MI.getOperand(3).getImm();
+
+ if (Immed != 1) {
+
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+ unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+ if (TrueReg1 == TrueReg2
+ && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+ unsigned FeedImmed = DefMI->getOperand(3).getImm();
+ unsigned FeedReg1
+ = lookThruCopyLike(DefMI->getOperand(1).getReg());
+ unsigned FeedReg2
+ = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+ if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs()
+ << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3)
+ && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+ MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // If the last instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+ }
+
+ return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg). Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register. If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+ while (true) {
+
+ MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index ec4e0a5..95f1631 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -18,8 +18,8 @@ using namespace llvm;
void PPCFunctionInfo::anchor() { }
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
- const DataLayout *DL = MF.getTarget().getDataLayout();
- return MF.getContext().getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
Twine(MF.getFunctionNumber()) +
"$poff");
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2b09b2f..934bdf6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -200,7 +200,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
-
+
// On PPC64, r13 is the thread pointer. Never allocate this register.
if (TM.isPPC64()) {
Reserved.set(PPC::R13);
@@ -262,7 +262,7 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
default:
return 0;
case PPC::G8RC_NOX0RegClassID:
- case PPC::GPRC_NOR0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -311,7 +311,7 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
//===----------------------------------------------------------------------===//
/// lowerDynamicAlloc - Generate the code for allocating an object in the
-/// current frame. The sequence of code with be in the general form
+/// current frame. The sequence of code will be in the general form
///
/// addi R0, SP, \#frameSize ; get the address of the previous frame
/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
@@ -337,7 +337,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
// Get the total frame size.
unsigned FrameSize = MFI->getStackSize();
-
+
// Get stack alignments.
const PPCFrameLowering *TFI = getFrameLowering(MF);
unsigned TargetAlign = TFI->getStackAlignment();
@@ -347,14 +347,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
- // previous frame's address from 0(SP). Why not do an addis of the hi?
- // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
- // Constructing the constant and adding would take 3 instructions.
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
-
+
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
@@ -425,11 +425,32 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
.addReg(PPC::R1)
.addImm(maxCallFrameSize);
}
-
+
// Discard the DYNALLOC instruction.
MBB.erase(II);
}
+void PPCRegisterInfo::lowerDynamicAreaOffset(
+ MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ DebugLoc dl = MI.getDebugLoc();
+ BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
+ .addImm(maxCallFrameSize);
+ MBB.erase(II);
+}
+
/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
/// reserving a whole register (R0), we scrounge for one here. This generates
/// code like this:
@@ -459,8 +480,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0) {
@@ -549,8 +570,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(getCRFromCRBit(SrcReg));
-
+ .addReg(getCRFromCRBit(SrcReg));
+
// If the saved register wasn't CR0LT, shift the bits left so that the bit to
// store is the first one. Mask all but that bit.
unsigned Reg1 = Reg;
@@ -602,17 +623,19 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
unsigned ShiftBits = getEncodingValue(DestReg);
// rlwimi r11, r10, 32-ShiftBits, ..., ...
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
- .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill)
- .addImm(ShiftBits ? 32-ShiftBits : 0)
- .addImm(ShiftBits).addImm(ShiftBits);
-
+ .addReg(RegO, RegState::Kill)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ShiftBits ? 32 - ShiftBits : 0)
+ .addImm(ShiftBits)
+ .addImm(ShiftBits);
+
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
getCRFromCRBit(DestReg))
- .addReg(RegO, RegState::Kill)
- // Make sure we have a use dependency all the way through this
- // sequence of instructions. We can't have the other bits in the CR
- // modified in between the mfocrf and the mtocrf.
- .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+ .addReg(RegO, RegState::Kill)
+ // Make sure we have a use dependency all the way through this
+ // sequence of instructions. We can't have the other bits in the CR
+ // modified in between the mfocrf and the mtocrf.
+ .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -634,11 +657,11 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
unsigned SrcReg = MI.getOperand(0).getReg();
BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
- addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
- .addReg(Reg, RegState::Kill),
- FrameIndex);
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(
+ BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill),
+ FrameIndex);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -671,9 +694,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-bool
-PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
- unsigned Reg, int &FrameIdx) const {
+bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
@@ -752,7 +774,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FPSI = FI->getFramePointerSaveIndex();
// Get the instruction opcode.
unsigned OpC = MI.getOpcode();
-
+
+ if ((OpC == PPC::DYNAREAOFFSET || OpC == PPC::DYNAREAOFFSET8)) {
+ lowerDynamicAreaOffset(II);
+ return;
+ }
+
// Special case for dynamic alloca.
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
@@ -800,8 +827,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we're not using a Frame Pointer that has been set to the value of the
// SP before having the stack size subtracted from it, then add the stack size
// to Offset to get the correct offset.
- // Naked functions have stack size 0, although getStackSize may not reflect that
- // because we didn't call all the pieces that compute it for naked functions.
+ // Naked functions have stack size 0, although getStackSize may not reflect
+ // that because we didn't call all the pieces that compute it for naked
+ // functions.
if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
if (!(hasBasePointer(MF) && FrameIndex < 0))
Offset += MFI->getStackSize();
@@ -840,7 +868,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
.addImm(Offset);
// Convert into indexed form of the instruction:
- //
+ //
// sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
@@ -898,24 +926,6 @@ bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return needsStackRealignment(MF);
}
-bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
- return false;
-
- return true;
-}
-
-bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const PPCFrameLowering *TFI = getFrameLowering(MF);
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
/// Returns true if the instruction's frame index
/// reference would be better served by a base register other than FP
/// or SP. Used by LocalStackFrameAllocation to determine which frame index
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index d304e1d..b15fde8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -54,13 +54,13 @@ inline static unsigned getCRFromCRBit(unsigned SrcReg) {
return Reg;
}
-
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
const PPCTargetMachine &TM;
+
public:
PPCRegisterInfo(const PPCTargetMachine &TM);
-
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
@@ -77,7 +77,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -101,6 +101,7 @@ public:
}
void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const;
void lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerCRRestore(MachineBasicBlock::iterator II,
@@ -115,9 +116,9 @@ public:
unsigned FrameIndex) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const override;
- void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
+ int &FrameIdx) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
// Support for virtual base registers.
@@ -136,8 +137,6 @@ public:
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 58dacca..c0fcb6c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -62,6 +62,7 @@ void PPCSubtarget::initializeEnvironment() {
Has64BitSupport = false;
Use64BitRegs = false;
UseCRBits = false;
+ UseSoftFloat = false;
HasAltivec = false;
HasSPE = false;
HasQPX = false;
@@ -100,6 +101,8 @@ void PPCSubtarget::initializeEnvironment() {
HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
+ HasFusion = false;
+ HasFloat128 = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -210,5 +213,33 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
+unsigned char PPCSubtarget::classifyGlobalReference(
+ const GlobalValue *GV) const {
+ // Note that currently we don't generate non-pic references.
+ // If a caller wants that, this will have to be updated.
+
+ // Large code model always uses the TOC even for local symbols.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
+
+ unsigned char flags = PPCII::MO_PIC_FLAG;
+
+ // Only if the relocation mode is PIC do we have to worry about
+ // interposition. In all other cases we can use a slightly looser standard to
+ // decide how to access the symbol.
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // If it's local, or it's non-default, it can't be interposed.
+ if (!GV->hasLocalLinkage() &&
+ GV->hasDefaultVisibility()) {
+ flags |= PPCII::MO_NLP_FLAG;
+ }
+ return flags;
+ }
+
+ if (GV->isStrongDefinitionForLinker())
+ return flags;
+ return flags | PPCII::MO_NLP_FLAG;
+}
+
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 0616c1f..4f5c95c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -83,6 +83,7 @@ protected:
bool Has64BitSupport;
bool Use64BitRegs;
bool UseCRBits;
+ bool UseSoftFloat;
bool IsPPC64;
bool HasAltivec;
bool HasSPE;
@@ -119,6 +120,8 @@ protected:
bool HasPartwordAtomics;
bool HasDirectMove;
bool HasHTM;
+ bool HasFusion;
+ bool HasFloat128;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
@@ -188,6 +191,8 @@ public:
/// has64BitSupport - Return true if the selected CPU supports 64-bit
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
bool has64BitSupport() const { return Has64BitSupport; }
+ // useSoftFloat - Return true if soft-float option is turned on.
+ bool useSoftFloat() const { return UseSoftFloat; }
/// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
/// registers in 32-bit mode when possible. This can only true if
@@ -254,6 +259,8 @@ public:
return 16;
}
bool hasHTM() const { return HasHTM; }
+ bool hasFusion() const { return HasFusion; }
+ bool hasFloat128() const { return HasFloat128; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -285,6 +292,10 @@ public:
bool useAA() const override;
bool enableSubRegLiveness() const override;
+
+ /// classifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget accourding to how we should reference it.
+ unsigned char classifyGlobalReference(const GlobalValue *GV) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1daf244..d24b590 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -42,6 +42,10 @@ static cl::
opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+ cl::desc("Disable machine peepholes for PPC"));
+
static cl::opt<bool>
EnableGEPOpt("ppc-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
@@ -57,11 +61,19 @@ EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
cl::desc("Add extra TOC register dependencies"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnableMachineCombinerPass("ppc-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializePPCBoolRetToIntPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -118,7 +130,7 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
}
if (OL != CodeGenOpt::None) {
- if (!FullFS.empty())
+ if (!FullFS.empty())
FullFS = "+invariant-function-descriptors," + FullFS;
else
FullFS = "+invariant-function-descriptors";
@@ -144,7 +156,7 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
return PPCTargetMachine::PPC_ABI_ELFv2;
assert(Options.MCOptions.getABIName().empty() &&
- "Unknown target-abi option!");
+ "Unknown target-abi option!");
if (!TT.isMacOSX()) {
switch (TT.getArch()) {
@@ -160,9 +172,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
-// The FeatureString here is a little subtle. We are modifying the feature string
-// with what are (currently) non-function specific overrides as it goes into the
-// LLVMTargetMachine constructor and then using the stored value in the
+// The FeatureString here is a little subtle. We are modifying the feature
+// string with what are (currently) non-function specific overrides as it goes
+// into the LLVMTargetMachine constructor and then using the stored value in the
// Subtarget constructor below it.
PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -227,6 +239,19 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
? FSAttr.getValueAsString().str()
: TargetFS;
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ bool SoftFloat =
+ F.hasFnAttribute("use-soft-float") &&
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+ // If the soft float attribute is set on the function turn on the soft float
+ // subtarget feature.
+ if (SoftFloat)
+ FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
auto &I = SubtargetMap[CPU + FS];
if (!I) {
// This needs to be done before we create a new subtarget since any
@@ -277,6 +302,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void PPCPassConfig::addIRPasses() {
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCBoolRetToIntPass());
addPass(createAtomicExpandPass(&getPPCTargetMachine()));
// For the BG/Q (or if explicitly requested), add explicit data prefetch
@@ -316,6 +343,10 @@ bool PPCPassConfig::addPreISel() {
bool PPCPassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
+
+ if (EnableMachineCombinerPass)
+ addPass(&MachineCombinerID);
+
return true;
}
@@ -339,6 +370,12 @@ void PPCPassConfig::addMachineSSAOptimization() {
if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
!DisableVSXSwapRemoval)
addPass(createPPCVSXSwapRemovalPass());
+ // Target-specific peephole cleanups performed after instruction
+ // selection.
+ if (!DisableMIPeephole) {
+ addPass(createPPCMIPeepholePass());
+ addPass(&DeadMachineInstructionElimID);
+ }
}
void PPCPassConfig::addPreRegAlloc() {
@@ -364,6 +401,7 @@ void PPCPassConfig::addPreEmitPass() {
}
TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(PPCTTIImpl(this, F));
+ });
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 9ee5db9..798bb9d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -42,9 +42,7 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
if (Kind.isReadOnly()) {
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (GVar && GVar->isConstant() &&
- (GVar->getInitializer()->getRelocationInfo() ==
- Constant::GlobalRelocations))
+ if (GVar && GVar->isConstant() && GVar->getInitializer()->needsRelocation())
Kind = SectionKind::getReadOnlyWithRel();
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e21c2b7..cd86dab 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -35,7 +35,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Imm, Ty);
@@ -64,8 +64,8 @@ unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
@@ -98,8 +98,8 @@ unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
@@ -197,9 +197,20 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
+ // on combining the loads generated for consecutive accesses, and failure to
+ // do so is particularly expensive. This makes it much more likely (compared
+ // to only using concatenation unrolling).
+ if (ST->getDarwinDirective() == PPC::DIR_A2)
+ return true;
+
return LoopHasReductions;
}
+bool PPCTTIImpl::enableInterleavedAccessVectorization() {
+ return true;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
@@ -246,7 +257,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-unsigned PPCTTIImpl::getArithmeticInstrCost(
+int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
@@ -257,24 +268,30 @@ unsigned PPCTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo);
}
-unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). We need one such shuffle instruction for each actual
+ // register (this is not true for arbitrary shuffles, but is true for the
+ // structured types of shuffles covered by TTI::ShuffleKind).
+ return LT.first;
}
-unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) {
+int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -313,41 +330,83 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
-unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
-
- // VSX loads/stores support unaligned access.
- if (ST->hasVSX()) {
- if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
- return Cost;
- }
+ int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
- bool UnalignedAltivec =
- Src->isVectorTy() &&
- Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
- LT.second.getSizeInBits() == 128 &&
- Opcode == Instruction::Load;
+ // Aligned loads and stores are easy.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+ return Cost;
+
+ bool IsAltivecType = ST->hasAltivec() &&
+ (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
+ LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
+ bool IsVSXType = ST->hasVSX() &&
+ (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
+ bool IsQPXType = ST->hasQPX() &&
+ (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
+
+ // If we can use the permutation-based load sequence, then this is also
+ // relatively cheap (not counting loop-invariant instructions): one load plus
+ // one permute (the last load in a series has extra cost, but we're
+ // neglecting that here). Note that on the P7, we should do unaligned loads
+ // for Altivec types using the VSX instructions, but that's more expensive
+ // than using the permutation-based load sequence. On the P8, that's no
+ // longer true.
+ if (Opcode == Instruction::Load &&
+ ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
+ Alignment >= LT.second.getScalarType().getStoreSize())
+ return Cost + LT.first; // Add the cost of the permutations.
+
+ // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
+ // P7, unaligned vector loads are more expensive than the permutation-based
+ // load sequence, so that might be used instead, but regardless, the net cost
+ // is about the same (not counting loop-invariant instructions).
+ if (IsVSXType || (ST->hasVSX() && IsAltivecType))
+ return Cost;
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
- unsigned SrcBytes = LT.second.getStoreSize();
- if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
- Cost += LT.first*(SrcBytes/Alignment-1);
-
- // For a vector type, there is also scalarization overhead (only for
- // stores, loads are expanded using the vector-load + permutation sequence,
- // which is much less expensive).
- if (Src->isVectorTy() && Opcode == Instruction::Store)
- for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
- }
+
+ // Add the cost of each scalar load or store.
+ Cost += LT.first*(SrcBytes/Alignment-1);
+
+ // For a vector type, there is also scalarization overhead (only for
+ // stores, loads are expanded using the vector-load + permutation sequence,
+ // which is much less expensive).
+ if (Src->isVectorTy() && Opcode == Instruction::Store)
+ for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+
+ return Cost;
+}
+
+int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+
+ // Firstly, the cost of load/store operation.
+ int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). For each result vector, we need one shuffle per incoming
+ // vector (except that the first shuffle can take two incoming vectors
+ // because it does not need to take itself).
+ Cost += Factor*(LT.first-1);
return Cost;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 368bef9..04c1b02 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -37,7 +37,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCTargetLowering *getTLI() const { return TLI; }
public:
- explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
+ explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -52,12 +52,11 @@ public:
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
@@ -68,22 +67,27 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
+ bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 5e3ae2a..782583c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -77,6 +77,14 @@ namespace {
return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
}
+ bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+ }
+
+ bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
+ }
+
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -100,7 +108,9 @@ protected:
IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(SrcMO.getReg(), MRI) ||
- IsVRReg(SrcMO.getReg(), MRI)) &&
+ IsVRReg(SrcMO.getReg(), MRI) ||
+ IsVSSReg(SrcMO.getReg(), MRI) ||
+ IsVSFReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
@@ -123,6 +133,8 @@ protected:
IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVSFReg(DstMO.getReg(), MRI) ||
+ IsVSSReg(DstMO.getReg(), MRI) ||
IsVRReg(DstMO.getReg(), MRI)) &&
"Unknown destination for a VSX copy");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 46b8d13..6b19a2f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -103,10 +103,10 @@ protected:
VNInfo *AddendValNo =
LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
- if (!AddendValNo) {
- // This can be null if the register is undef.
+
+ // This can be null if the register is undef.
+ if (!AddendValNo)
continue;
- }
MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
@@ -186,18 +186,17 @@ protected:
if (!KilledProdOp)
continue;
- // If the addend copy is used only by this MI, then the addend source
- // register is likely not live here. This could be fixed (based on the
- // legality checks above, the live range for the addend source register
- // could be extended), but it seems likely that such a trivial copy can
- // be coalesced away later, and thus is not worth the effort.
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+ // If the addend copy is used only by this MI, then the addend source
+ // register is likely not live here. This could be fixed (based on the
+ // legality checks above, the live range for the addend source register
+ // could be extended), but it seems likely that such a trivial copy can
+ // be coalesced away later, and thus is not worth the effort.
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
!LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
continue;
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
- unsigned AddReg = AddendMI->getOperand(1).getReg();
unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
@@ -221,6 +220,14 @@ protected:
if (OldFMAReg == KilledProdReg)
continue;
+ // If there isn't a class that fits, we can't perform the transform.
+ // This is needed for correctness with a mixture of VSX and Altivec
+ // instructions to make sure that a low VSX register is not assigned to
+ // the Altivec instruction.
+ if (!MRI.constrainRegClass(KilledProdReg,
+ MRI.getRegClass(OldFMAReg)))
+ continue;
+
assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
"Addend copy not tied to old FMA output!");
@@ -228,7 +235,7 @@ protected:
MI->getOperand(0).setReg(KilledProdReg);
MI->getOperand(1).setReg(KilledProdReg);
- MI->getOperand(3).setReg(AddReg);
+ MI->getOperand(3).setReg(AddendSrcReg);
MI->getOperand(2).setReg(OtherProdReg);
MI->getOperand(0).setSubReg(KilledProdSubReg);
@@ -263,8 +270,7 @@ protected:
if (UseMI == AddendMI)
continue;
- UseMO.setReg(KilledProdReg);
- UseMO.setSubReg(KilledProdSubReg);
+ UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
}
// Extend the live intervals of the killed product operand to hold the
@@ -286,6 +292,20 @@ protected:
}
DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+ // Extend the live interval of the addend source (it might end at the
+ // copy to be removed, or somewhere in between there and here). This
+ // is necessary only if it is a physical register.
+ if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
+ for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
+ ++Units) {
+ unsigned Unit = *Units;
+
+ LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
+ AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
+ FMAIdx.getRegSlot());
+ DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
+ }
+
FMAInt.removeValNo(FMAValNo);
DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
@@ -347,7 +367,6 @@ INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
char PPCVSXFMAMutate::ID = 0;
-FunctionPass*
-llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
-
-
+FunctionPass *llvm::createPPCVSXFMAMutatePass() {
+ return new PPCVSXFMAMutate();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index d7132d5..27c540f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -94,7 +94,7 @@ enum SHValues {
SH_NOSWAP_ST,
SH_SPLAT,
SH_XXPERMDI,
- SH_COPYSCALAR
+ SH_COPYWIDEN
};
struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -149,6 +149,11 @@ private:
// handling. Return true iff any changes are made.
bool removeSwaps();
+ // Insert a swap instruction from SrcReg to DstReg at the given
+ // InsertPoint.
+ void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg);
+
// Update instructions requiring special handling.
void handleSpecialSwappables(int EntryIdx);
@@ -159,9 +164,7 @@ private:
bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
return RC->hasSubClassEq(MRI->getRegClass(Reg));
- if (RC->contains(Reg))
- return true;
- return false;
+ return RC->contains(Reg);
}
// Return true iff the given register is a full vector register.
@@ -215,7 +218,7 @@ public:
void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
- TII = static_cast<const PPCInstrInfo*>(MF->getSubtarget().getInstrInfo());
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
// An initial vector size of 256 appears to work well in practice.
// Small/medium functions with vector content tend not to incur a
@@ -343,6 +346,15 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
SwapVector[VecIdx].IsLoad = 1;
SwapVector[VecIdx].IsSwap = 1;
break;
+ case PPC::LXSDX:
+ case PPC::LXSSPX:
+ // A load of a floating-point value into the high-order half of
+ // a vector register is safe, provided that we introduce a swap
+ // following the load, which will be done by the SUBREG_TO_REG
+ // support. So just mark these as safe.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
case PPC::STVX:
// Non-permuting stores are currently unsafe. We can use special
// handling for this in the future. By not marking these as
@@ -385,7 +397,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
else if (isVecReg(MI.getOperand(0).getReg()) &&
isScalarVecReg(MI.getOperand(2).getReg())) {
SwapVector[VecIdx].IsSwappable = 1;
- SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
}
break;
}
@@ -420,7 +432,14 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::STVEHX:
case PPC::STVEWX:
case PPC::STVXL:
+ // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
+ // by adding special handling for narrowing copies as well as
+ // widening ones. However, I've experimented with this, and in
+ // practice we currently do not appear to use STXSDX fed by
+ // a narrowing copy from a full vector register. Since I can't
+ // generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
+ case PPC::STXSSPX:
case PPC::VCIPHER:
case PPC::VCIPHERLAST:
case PPC::VMRGHB:
@@ -543,7 +562,8 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
- SwapVector[VecIdx].MentionsPhysVR = 1;
+ if (!isScalarVecReg(CopySrcReg))
+ SwapVector[VecIdx].MentionsPhysVR = 1;
return CopySrcReg;
}
@@ -629,8 +649,8 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
- format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
- Repr));
+ format("Web %d rejected for physreg, partial reg, or not "
+ "swap[pable]\n", Repr));
DEBUG(dbgs() << " in " << EntryIdx << ": ");
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
DEBUG(dbgs() << "\n");
@@ -743,6 +763,21 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
}
}
+// Create an xxswapd instruction and insert it prior to the given point.
+// MI is used to determine basic block and debug loc information.
+// FIXME: When inserting a swap, we should check whether SrcReg is
+// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
+// then instead we should generate a copy from Reg to DstReg.
+void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg) {
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .addImm(2);
+}
+
// The identified swap entry requires special handling to allow its
// containing computation to be optimized. Perform that handling
// here.
@@ -752,8 +787,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (SwapVector[EntryIdx].SpecialHandling) {
default:
- assert(false && "Unexpected special handling type");
- break;
+ llvm_unreachable("Unexpected special handling type");
// For splats based on an index into a vector, add N/2 modulo N
// to the index, where N is the number of vector elements.
@@ -766,7 +800,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (MI->getOpcode()) {
default:
- assert(false && "Unexpected splat opcode");
+ llvm_unreachable("Unexpected splat opcode");
case PPC::VSPLTB: NElts = 16; break;
case PPC::VSPLTH: NElts = 8; break;
case PPC::VSPLTW: NElts = 4; break;
@@ -811,7 +845,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
// For a copy from a scalar floating-point register to a vector
// register, removing swaps will leave the copied value in the
// wrong lane. Insert a swap following the copy to fix this.
- case SHValues::SH_COPYSCALAR: {
+ case SHValues::SH_COPYWIDEN: {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
@@ -825,14 +859,13 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
DEBUG(dbgs() << " Into: ");
DEBUG(MI->dump());
- MachineBasicBlock::iterator InsertPoint = MI->getNextNode();
+ auto InsertPoint = ++MachineBasicBlock::iterator(MI);
// Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
// is copying to a VRRC, we need to be careful to avoid a register
// assignment problem. In this case we must copy from VRRC to VSRC
// prior to the swap, and from VSRC to VRRC following the swap.
// Coalescing will usually remove all this mess.
-
if (DstRC == &PPC::VRRCRegClass) {
unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
@@ -840,29 +873,19 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
TII->get(PPC::COPY), VSRCTmp1)
.addReg(NewVReg);
- DEBUG(MI->getNextNode()->dump());
+ DEBUG(std::prev(InsertPoint)->dump());
- BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
- TII->get(PPC::XXPERMDI), VSRCTmp2)
- .addReg(VSRCTmp1)
- .addReg(VSRCTmp1)
- .addImm(2);
- DEBUG(MI->getNextNode()->getNextNode()->dump());
+ insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
+ DEBUG(std::prev(InsertPoint)->dump());
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
TII->get(PPC::COPY), DstReg)
.addReg(VSRCTmp2);
- DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
+ DEBUG(std::prev(InsertPoint)->dump());
} else {
-
- BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
- TII->get(PPC::XXPERMDI), DstReg)
- .addReg(NewVReg)
- .addReg(NewVReg)
- .addImm(2);
-
- DEBUG(MI->getNextNode()->dump());
+ insertSwap(MI, InsertPoint, DstReg, NewVReg);
+ DEBUG(std::prev(InsertPoint)->dump());
}
break;
}
@@ -947,8 +970,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
case SH_XXPERMDI:
DEBUG(dbgs() << "special:xxpermdi ");
break;
- case SH_COPYSCALAR:
- DEBUG(dbgs() << "special:copyscalar ");
+ case SH_COPYWIDEN:
+ DEBUG(dbgs() << "special:copywiden ");
break;
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 1c4e486..a552747 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -34,7 +35,6 @@ namespace {
class SparcOperand;
class SparcAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
MCAsmParser &Parser;
/// @name Auto-generated Match Functions
@@ -69,6 +69,10 @@ class SparcAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseBranchModifiers(OperandVector &Operands);
+ // Helper function for dealing with %lo / %hi in PIC mode.
+ const SparcMCExpr *adjustPICRelocation(SparcMCExpr::VariantKind VK,
+ const MCExpr *subExpr);
+
// returns true if Tok is matched to a register and returns register in RegNo.
bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
unsigned &RegKind);
@@ -77,24 +81,24 @@ class SparcAsmParser : public MCTargetAsmParser {
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool is64Bit() const {
- return STI.getTargetTriple().getArch() == Triple::sparcv9;
+ return getSTI().getTargetTriple().getArch() == Triple::sparcv9;
}
void expandSET(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
public:
- SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(Options, sti), Parser(parser) {
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
};
- static unsigned IntRegs[32] = {
+ static const MCPhysReg IntRegs[32] = {
Sparc::G0, Sparc::G1, Sparc::G2, Sparc::G3,
Sparc::G4, Sparc::G5, Sparc::G6, Sparc::G7,
Sparc::O0, Sparc::O1, Sparc::O2, Sparc::O3,
@@ -104,7 +108,7 @@ public:
Sparc::I0, Sparc::I1, Sparc::I2, Sparc::I3,
Sparc::I4, Sparc::I5, Sparc::I6, Sparc::I7 };
- static unsigned FloatRegs[32] = {
+ static const MCPhysReg FloatRegs[32] = {
Sparc::F0, Sparc::F1, Sparc::F2, Sparc::F3,
Sparc::F4, Sparc::F5, Sparc::F6, Sparc::F7,
Sparc::F8, Sparc::F9, Sparc::F10, Sparc::F11,
@@ -114,7 +118,7 @@ public:
Sparc::F24, Sparc::F25, Sparc::F26, Sparc::F27,
Sparc::F28, Sparc::F29, Sparc::F30, Sparc::F31 };
- static unsigned DoubleRegs[32] = {
+ static const MCPhysReg DoubleRegs[32] = {
Sparc::D0, Sparc::D1, Sparc::D2, Sparc::D3,
Sparc::D4, Sparc::D5, Sparc::D6, Sparc::D7,
Sparc::D8, Sparc::D7, Sparc::D8, Sparc::D9,
@@ -124,13 +128,13 @@ public:
Sparc::D24, Sparc::D25, Sparc::D26, Sparc::D27,
Sparc::D28, Sparc::D29, Sparc::D30, Sparc::D31 };
- static unsigned QuadFPRegs[32] = {
+ static const MCPhysReg QuadFPRegs[32] = {
Sparc::Q0, Sparc::Q1, Sparc::Q2, Sparc::Q3,
Sparc::Q4, Sparc::Q5, Sparc::Q6, Sparc::Q7,
Sparc::Q8, Sparc::Q9, Sparc::Q10, Sparc::Q11,
Sparc::Q12, Sparc::Q13, Sparc::Q14, Sparc::Q15 };
- static unsigned ASRRegs[32] = {
+ static const MCPhysReg ASRRegs[32] = {
SP::Y, SP::ASR1, SP::ASR2, SP::ASR3,
SP::ASR4, SP::ASR5, SP::ASR6, SP::ASR7,
SP::ASR8, SP::ASR9, SP::ASR10, SP::ASR11,
@@ -140,6 +144,12 @@ public:
SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
+ static const MCPhysReg IntPairRegs[] = {
+ Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7,
+ Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7,
+ Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7,
+ Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7};
+
/// SparcOperand - Instances of this class represent a parsed Sparc machine
/// instruction.
class SparcOperand : public MCParsedAsmOperand {
@@ -147,6 +157,7 @@ public:
enum RegisterKind {
rk_None,
rk_IntReg,
+ rk_IntPairReg,
rk_FloatReg,
rk_DoubleReg,
rk_QuadReg,
@@ -200,6 +211,10 @@ public:
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
+ bool isIntReg() const {
+ return (Kind == k_Register && Reg.Kind == rk_IntReg);
+ }
+
bool isFloatReg() const {
return (Kind == k_Register && Reg.Kind == rk_FloatReg);
}
@@ -330,6 +345,25 @@ public:
return Op;
}
+ static bool MorphToIntPairReg(SparcOperand &Op) {
+ unsigned Reg = Op.getReg();
+ assert(Op.Reg.Kind == rk_IntReg);
+ unsigned regIdx = 32;
+ if (Reg >= Sparc::G0 && Reg <= Sparc::G7)
+ regIdx = Reg - Sparc::G0;
+ else if (Reg >= Sparc::O0 && Reg <= Sparc::O7)
+ regIdx = Reg - Sparc::O0 + 8;
+ else if (Reg >= Sparc::L0 && Reg <= Sparc::L7)
+ regIdx = Reg - Sparc::L0 + 16;
+ else if (Reg >= Sparc::I0 && Reg <= Sparc::I7)
+ regIdx = Reg - Sparc::I0 + 24;
+ if (regIdx % 2 || regIdx > 31)
+ return false;
+ Op.Reg.RegNum = IntPairRegs[regIdx / 2];
+ Op.Reg.Kind = rk_IntPairReg;
+ return true;
+ }
+
static bool MorphToDoubleReg(SparcOperand &Op) {
unsigned Reg = Op.getReg();
assert(Op.Reg.Kind == rk_FloatReg);
@@ -407,7 +441,22 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
// the imm operand can be either an expression or an immediate.
bool IsImm = Inst.getOperand(1).isImm();
- uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
+ int64_t RawImmValue = IsImm ? MCValOp.getImm() : 0;
+
+ // Allow either a signed or unsigned 32-bit immediate.
+ if (RawImmValue < -2147483648LL || RawImmValue > 4294967295LL) {
+ Error(IDLoc, "set: argument must be between -2147483648 and 4294967295");
+ return;
+ }
+
+ // If the value was expressed as a large unsigned number, that's ok.
+ // We want to see if it "looks like" a small signed number.
+ int32_t ImmValue = RawImmValue;
+ // For 'set' you can't use 'or' with a negative operand on V9 because
+ // that would splat the sign bit across the upper half of the destination
+ // register, whereas 'set' is defined to zero the high 32 bits.
+ bool IsEffectivelyImm13 =
+ IsImm && ((is64Bit() ? 0 : -4096) <= ImmValue && ImmValue < 4096);
const MCExpr *ValExpr;
if (IsImm)
ValExpr = MCConstantExpr::create(ImmValue, getContext());
@@ -416,10 +465,12 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
MCOperand PrevReg = MCOperand::createReg(Sparc::G0);
- if (!IsImm || (ImmValue & ~0x1fff)) {
+ // If not just a signed imm13 value, then either we use a 'sethi' with a
+ // following 'or', or a 'sethi' by itself if there are no more 1 bits.
+ // In either case, start with the 'sethi'.
+ if (!IsEffectivelyImm13) {
MCInst TmpInst;
- const MCExpr *Expr =
- SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext());
+ const MCExpr *Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_HI, ValExpr);
TmpInst.setLoc(IDLoc);
TmpInst.setOpcode(SP::SETHIi);
TmpInst.addOperand(MCRegOp);
@@ -428,10 +479,23 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
PrevReg = MCRegOp;
}
- if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) {
+ // The low bits require touching in 3 cases:
+ // * A non-immediate value will always require both instructions.
+ // * An effectively imm13 value needs only an 'or' instruction.
+ // * Otherwise, an immediate that is not effectively imm13 requires the
+ // 'or' only if bits remain after clearing the 22 bits that 'sethi' set.
+ // If the low bits are known zeros, there's nothing to do.
+ // In the second case, and only in that case, must we NOT clear
+ // bits of the immediate value via the %lo() assembler function.
+ // Note also, the 'or' instruction doesn't mind a large value in the case
+ // where the operand to 'set' was 0xFFFFFzzz - it does exactly what you mean.
+ if (!IsImm || IsEffectivelyImm13 || (ImmValue & 0x3ff)) {
MCInst TmpInst;
- const MCExpr *Expr =
- SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext());
+ const MCExpr *Expr;
+ if (IsEffectivelyImm13)
+ Expr = ValExpr;
+ else
+ Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_LO, ValExpr);
TmpInst.setLoc(IDLoc);
TmpInst.setOpcode(SP::ORri);
TmpInst.addOperand(MCRegOp);
@@ -463,7 +527,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
for (const MCInst &I : Instructions) {
- Out.EmitInstruction(I, STI);
+ Out.EmitInstruction(I, getSTI());
}
return false;
}
@@ -742,6 +806,9 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
case Sparc::PSR:
Op = SparcOperand::CreateToken("%psr", S);
break;
+ case Sparc::FSR:
+ Op = SparcOperand::CreateToken("%fsr", S);
+ break;
case Sparc::WIM:
Op = SparcOperand::CreateToken("%wim", S);
break;
@@ -766,6 +833,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
case AsmToken::Minus:
case AsmToken::Integer:
case AsmToken::LParen:
+ case AsmToken::Dot:
if (!getParser().parseExpression(EVal, E))
Op = SparcOperand::CreateImm(EVal, S, E);
break;
@@ -848,6 +916,13 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
return true;
}
+ // %fprs is an alias of %asr6.
+ if (name.equals("fprs")) {
+ RegNo = ASRRegs[6];
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+
if (name.equals("icc")) {
RegNo = Sparc::ICC;
RegKind = SparcOperand::rk_Special;
@@ -860,6 +935,12 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
return true;
}
+ if (name.equals("fsr")) {
+ RegNo = Sparc::FSR;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+
if (name.equals("wim")) {
RegNo = Sparc::WIM;
RegKind = SparcOperand::rk_Special;
@@ -943,6 +1024,82 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
RegKind = SparcOperand::rk_IntReg;
return true;
}
+
+ if (name.equals("tpc")) {
+ RegNo = Sparc::TPC;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tnpc")) {
+ RegNo = Sparc::TNPC;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tstate")) {
+ RegNo = Sparc::TSTATE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tt")) {
+ RegNo = Sparc::TT;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tick")) {
+ RegNo = Sparc::TICK;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tba")) {
+ RegNo = Sparc::TBA;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("pstate")) {
+ RegNo = Sparc::PSTATE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tl")) {
+ RegNo = Sparc::TL;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("pil")) {
+ RegNo = Sparc::PIL;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("cwp")) {
+ RegNo = Sparc::CWP;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("cansave")) {
+ RegNo = Sparc::CANSAVE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("canrestore")) {
+ RegNo = Sparc::CANRESTORE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("cleanwin")) {
+ RegNo = Sparc::CLEANWIN;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("otherwin")) {
+ RegNo = Sparc::OTHERWIN;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("wstate")) {
+ RegNo = Sparc::WSTATE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
}
return false;
}
@@ -975,6 +1132,32 @@ static bool hasGOTReference(const MCExpr *Expr) {
return false;
}
+const SparcMCExpr *
+SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
+ const MCExpr *subExpr)
+{
+ // When in PIC mode, "%lo(...)" and "%hi(...)" behave differently.
+ // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is
+ // actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted
+ // as %got10 or %got22 relocation.
+
+ if (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) {
+ switch(VK) {
+ default: break;
+ case SparcMCExpr::VK_Sparc_LO:
+ VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC10
+ : SparcMCExpr::VK_Sparc_GOT10);
+ break;
+ case SparcMCExpr::VK_Sparc_HI:
+ VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC22
+ : SparcMCExpr::VK_Sparc_GOT22);
+ break;
+ }
+ }
+
+ return SparcMCExpr::create(VK, subExpr, getContext());
+}
+
bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
SMLoc &EndLoc)
{
@@ -998,30 +1181,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
if (Parser.parseParenExpression(subExpr, EndLoc))
return false;
- bool isPIC = getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
-
- // Ugly: if a sparc assembly expression says "%hi(...)" but the
- // expression within contains _GLOBAL_OFFSET_TABLE_, it REALLY means
- // %pc22. Same with %lo -> %pc10. Worse, if it doesn't contain that,
- // the meaning depends on whether the assembler was invoked with
- // -KPIC or not: if so, it really means %got22/%got10; if not, it
- // actually means what it said! Sigh, historical mistakes...
-
- switch(VK) {
- default: break;
- case SparcMCExpr::VK_Sparc_LO:
- VK = (hasGOTReference(subExpr)
- ? SparcMCExpr::VK_Sparc_PC10
- : (isPIC ? SparcMCExpr::VK_Sparc_GOT10 : VK));
- break;
- case SparcMCExpr::VK_Sparc_HI:
- VK = (hasGOTReference(subExpr)
- ? SparcMCExpr::VK_Sparc_PC22
- : (isPIC ? SparcMCExpr::VK_Sparc_GOT22 : VK));
- break;
- }
-
- EVal = SparcMCExpr::create(VK, subExpr, getContext());
+ EVal = adjustPICRelocation(VK, subExpr);
return true;
}
@@ -1051,5 +1211,9 @@ unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
break;
}
}
+ if (Op.isIntReg() && Kind == MCK_IntPair) {
+ if (SparcOperand::MorphToIntPairReg(Op))
+ return MCTargetAsmParser::Match_Success;
+ }
return Match_InvalidOperand;
}
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 38bff44..c689b7f 100644
--- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -122,6 +122,8 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
continue;
}
+ // TODO: If we ever want to support v7, this needs to be extended
+ // to cover all floating point operations.
if (!Subtarget->isV9() &&
(MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD
|| MI->getOpcode() == SP::FCMPQ)) {
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e56b9e..51751ec 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -117,6 +117,19 @@ static const unsigned ASRRegDecoderTable[] = {
SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
+static const unsigned PRRegDecoderTable[] = {
+ SP::TPC, SP::TNPC, SP::TSTATE, SP::TT, SP::TICK, SP::TBA, SP::PSTATE,
+ SP::TL, SP::PIL, SP::CWP, SP::CANSAVE, SP::CANRESTORE, SP::CLEANWIN,
+ SP::OTHERWIN, SP::WSTATE
+};
+
+static const uint16_t IntPairDecoderTable[] = {
+ SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7,
+ SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7,
+ SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7,
+ SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7,
+};
+
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -196,9 +209,34 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= array_lengthof(PRRegDecoderTable))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ if ((RegNo & 1))
+ S = MCDisassembler::SoftFail;
+
+ unsigned RegisterPair = IntPairDecoderTable[RegNo/2];
+ Inst.addOperand(MCOperand::createReg(RegisterPair));
+ return S;
+}
static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address,
@@ -207,6 +245,8 @@ static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn,
@@ -326,6 +366,12 @@ static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
DecodeIntRegsRegisterClass);
}
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, true,
+ DecodeIntPairRegisterClass);
+}
+
static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
@@ -350,6 +396,12 @@ static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
DecodeIntRegsRegisterClass);
}
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, false,
+ DecodeIntPairRegisterClass);
+}
+
static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 0b01b88..6f06d1d 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class SparcInstPrinter : public MCInstPrinter {
public:
SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 12386f1..ad44122 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -21,6 +21,7 @@ class Triple;
class SparcELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
+
public:
explicit SparcELFMCAsmInfo(const Triple &TheTriple);
const MCExpr*
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index d08ad86..13f0819 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -90,8 +90,8 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index c5f046b..e3b0f52 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -267,11 +267,11 @@ void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo());
return;
}
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
MCInst TmpInst;
- LowerSparcMachineInstrToMCInst(I, TmpInst, *this);
+ LowerSparcMachineInstrToMCInst(&*I, TmpInst, *this);
EmitToStreamer(*OutStreamer, TmpInst);
} while ((++I != E) && I->isInsideBundle()); // Delay slot check.
}
@@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const MachineOperand &MO = MI->getOperand (opNum);
SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
@@ -373,7 +373,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << MO.getSymbolName();
break;
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
<< MO.getIndex();
break;
default:
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
index dfaaabf..0aa29d1 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -21,7 +21,11 @@ def CC_Sparc32 : CallingConv<[
// i32 f32 arguments get passed in integer registers if there is space.
CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
// f64 arguments are split and passed through registers or through stack.
- CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+ CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
+ // As are v2i32 arguments (this would be the default behavior for
+ // v2i32 if it wasn't allocated to the IntPair register-class)
+ CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,
+
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
@@ -30,7 +34,8 @@ def CC_Sparc32 : CallingConv<[
def RetCC_Sparc32 : CallingConv<[
CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
+ CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index c0279da..39b5e80 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -44,7 +44,7 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
unsigned ADDrr,
unsigned ADDri) const {
- DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+ DebugLoc dl;
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -90,8 +90,23 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SparcRegisterInfo &RegInfo =
+ *static_cast<const SparcRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+ bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
+ // FIXME: unfortunately, returning false from canRealignStack
+ // actually just causes needsStackRealignment to return false,
+ // rather than reporting an error, as would be sensible. This is
+ // poor, but fixing that bogosity is going to be a large project.
+ // For now, just see if it's lied, and report an error here.
+ if (!NeedsStackRealignment && MFI->getMaxAlignment() > getStackAlignment())
+ report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
+ "stack re-alignment, but LLVM couldn't handle it "
+ "(probably because it has a dynamic alloca).");
// Get the number of bytes to allocate from the FrameInfo
int NumBytes = (int) MFI->getStackSize();
@@ -104,12 +119,43 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
SAVEri = SP::ADDri;
SAVErr = SP::ADDrr;
}
- NumBytes = -MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
- emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
+
+ // The SPARC ABI is a bit odd in that it requires a reserved 92-byte
+ // (128 in v9) area in the user's stack, starting at %sp. Thus, the
+ // first part of the stack that can actually be used is located at
+ // %sp + 92.
+ //
+ // We therefore need to add that offset to the total stack size
+ // after all the stack objects are placed by
+ // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack needs to be
+ // aligned *after* the extra size is added, we need to disable
+ // calculateFrameObjectOffsets's built-in stack alignment, by having
+ // targetHandlesStackFrameRounding return true.
+
+
+ // Add the extra call frame stack size, if needed. (This is the same
+ // code as in PrologEpilogInserter, but also gets disabled by
+ // targetHandlesStackFrameRounding)
+ if (MFI->adjustsStack() && hasReservedCallFrame(MF))
+ NumBytes += MFI->getMaxCallFrameSize();
+
+ // Adds the SPARC subtarget-specific spill area to the stack
+ // size. Also ensures target-required alignment.
+ NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
+
+ // Finally, ensure that the size is sufficiently aligned for the
+ // data on the stack.
+ if (MFI->getMaxAlignment() > 0) {
+ NumBytes = RoundUpToAlignment(NumBytes, MFI->getMaxAlignment());
+ }
+
+ // Update stack size with corrected value.
+ MFI->setStackSize(NumBytes);
+
+ emitSPAdjustment(MF, MBB, MBBI, -NumBytes, SAVErr, SAVEri);
MachineModuleInfo &MMI = MF.getMMI();
- const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- unsigned regFP = MRI->getDwarfRegNum(SP::I6, true);
+ unsigned regFP = RegInfo.getDwarfRegNum(SP::I6, true);
// Emit ".cfi_def_cfa_register 30".
unsigned CFIIndex =
@@ -122,13 +168,19 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true);
- unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true);
+ unsigned regInRA = RegInfo.getDwarfRegNum(SP::I7, true);
+ unsigned regOutRA = RegInfo.getDwarfRegNum(SP::O7, true);
// Emit ".cfi_register 15, 31".
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+
+ if (NeedsStackRealignment) {
+ // andn %o6, MaxAlign-1, %o6
+ int MaxAlign = MFI->getMaxAlignment();
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), SP::O6).addReg(SP::O6).addImm(MaxAlign - 1);
+ }
}
void SparcFrameLowering::
@@ -167,7 +219,6 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes == 0)
return;
- NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
}
@@ -180,21 +231,69 @@ bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken();
}
+int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ // Addressable stack objects are accessed using neg. offsets from
+ // %fp, or positive offsets from %sp.
+ bool UseFP;
+
+ // Sparc uses FP-based references in general, even when "hasFP" is
+ // false. That function is rather a misnomer, because %fp is
+ // actually always available, unless isLeafProc.
+ if (FuncInfo->isLeafProc()) {
+ // If there's a leaf proc, all offsets need to be %sp-based,
+ // because we haven't caused %fp to actually point to our frame.
+ UseFP = false;
+ } else if (isFixed) {
+ // Otherwise, argument access should always use %fp.
+ UseFP = true;
+ } else if (RegInfo->needsStackRealignment(MF)) {
+ // If there is dynamic stack realignment, all local object
+ // references need to be via %sp, to take account of the
+ // re-alignment.
+ UseFP = false;
+ } else {
+ // Finally, default to using %fp.
+ UseFP = true;
+ }
+
+ int64_t FrameOffset = MF.getFrameInfo()->getObjectOffset(FI) +
+ Subtarget.getStackPointerBias();
+
+ if (UseFP) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FrameOffset;
+ } else {
+ FrameReg = SP::O6; // %sp
+ return FrameOffset + MF.getFrameInfo()->getStackSize();
+ }
+}
+
static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
{
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
- if (MRI->isPhysRegUsed(reg))
+ if (!MRI->reg_nodbg_empty(reg))
return false;
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
- if (MRI->isPhysRegUsed(reg))
+ if (!MRI->reg_nodbg_empty(reg))
return false;
return true;
@@ -206,33 +305,42 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
- return !(MFI->hasCalls() // has calls
- || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
- || MRI.isPhysRegUsed(SP::O6) // %SP is used
- || hasFP(MF)); // need %FP
+ return !(MFI->hasCalls() // has calls
+ || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
+ || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+ || hasFP(MF)); // need %FP
}
void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
-
MachineRegisterInfo &MRI = MF.getRegInfo();
-
// Remap %i[0-7] to %o[0-7].
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (!MRI.isPhysRegUsed(reg))
+ if (MRI.reg_nodbg_empty(reg))
continue;
- unsigned mapped_reg = (reg - SP::I0 + SP::O0);
- assert(!MRI.isPhysRegUsed(mapped_reg));
+
+ unsigned mapped_reg = reg - SP::I0 + SP::O0;
+ assert(MRI.reg_nodbg_empty(mapped_reg));
// Replace I register with O register.
MRI.replaceRegWith(reg, mapped_reg);
- // Mark the reg unused.
- MRI.setPhysRegUnused(reg);
+ // Also replace register pair super-registers.
+ if ((reg - SP::I0) % 2 == 0) {
+ unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1;
+ unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1;
+ MRI.replaceRegWith(preg, mapped_preg);
+ }
}
// Rewrite MBB's Live-ins.
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
+ for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
+ if (!MBB->isLiveIn(reg))
+ continue;
+ MBB->removeLiveIn(reg);
+ MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
+ }
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
if (!MBB->isLiveIn(reg))
continue;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 29fc7b7..cbb4dc0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -39,6 +39,14 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const override { return true; }
+
private:
// Remap input registers to output registers for leaf procedure.
void remapRegsForLeafProc(MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 340b72e..c4c6416 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
@@ -62,6 +63,7 @@ public:
private:
SDNode* getGlobalBaseReg();
+ SDNode *SelectInlineAsm(SDNode *N);
};
} // end anonymous namespace
@@ -141,6 +143,181 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
return true;
}
+
+// Re-assemble i64 arguments split up in SelectionDAGBuilder's
+// visitInlineAsm / GetRegistersForValue functions.
+//
+// Note: This function was copied from, and is essentially identical
+// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that
+// such hacking-up is necessary; a rethink of how inline asm operands
+// are handled may be in order to make doing this more sane.
+//
+// TODO: fix inline asm support so I can simply tell it that 'i64'
+// inputs to asm need to be allocated to the IntPair register type,
+// and have that work. Then, delete this function.
+SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ // Normally, i64 data is bounded to two arbitrary GPRs for "%r"
+ // constraint. However, some instructions (e.g. ldd/std) require
+ // (even/even+1) GPRs.
+
+ // So, here, we check for this case, and mutate the inlineasm to use
+ // a single IntPair register instead, which guarantees such even/odd
+ // placement.
+
+ SDLoc dl(N);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+ : SDValue(nullptr,0);
+
+ SmallVector<bool, 8> OpChanged;
+ // Glue node will be appended late.
+ for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ }
+ else
+ continue;
+
+ // Immediate operands to inline asm in the SelectionDAG are modeled with
+ // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // the second is a constant with the value of the immediate. If we get here
+ // and we have a Kind_Imm, skip the next operand, and continue.
+ if (Kind == InlineAsm::Kind_Imm) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ if (NumRegs)
+ OpChanged.push_back(false);
+
+ unsigned DefIdx = 0;
+ bool IsTiedToChangedOp = false;
+ // If it's a use that is tied with a previous def, it has no
+ // reg class constraint.
+ if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ IsTiedToChangedOp = OpChanged[DefIdx];
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+ && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
+ || NumRegs != 2)
+ continue;
+
+ assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i+1);
+ SDValue V1 = N->getOperand(i+2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+ SDValue Chain = SDValue(N,0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32,
+ Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32,
+ RegCopy);
+ SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32,
+ RegCopy);
+ SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+ RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, Ops);
+ }
+ else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+ Chain.getValue(1));
+ SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+ T0.getValue(1));
+ SDValue Pair = SDValue(
+ CurDAG->getMachineNode(
+ TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32,
+ {
+ CurDAG->getTargetConstant(SP::IntPairRegClassID, dl,
+ MVT::i32),
+ T0,
+ CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32),
+ T1,
+ CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32),
+ }),
+ 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if(PairedReg.getNode()) {
+ OpChanged[OpChanged.size() -1 ] = true;
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+ Flag, dl, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ if (Glue.getNode())
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return nullptr;
+
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+ CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
@@ -150,6 +327,12 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::INLINEASM: {
+ SDNode *ResNode = SelectInlineAsm(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
case SPISD::GLOBAL_BASE_REG:
return getGlobalBaseReg();
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 4879d4e..5e70ffe 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -49,9 +49,9 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
return true;
}
-static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT, CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State)
+static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
@@ -77,6 +77,29 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
return true;
}
+static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ static const MCPhysReg RegList[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+
+ // Try to get first reg.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ return false;
+
+ // Try to get second reg.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ return false;
+
+ return true;
+}
+
// Allocate a full-sized argument for the 64-bit ABI.
static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
@@ -202,12 +225,34 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
RetOps.push_back(SDValue());
// Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
- OutVals[i], Flag);
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::v2i32);
+ // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
+ // happen by default if this wasn't a legal type)
+
+ SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Arg,
+ DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Arg,
+ DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
@@ -355,6 +400,7 @@ LowerFormalArguments_32(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
const unsigned StackOffset = 92;
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
unsigned InIdx = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
@@ -375,7 +421,8 @@ LowerFormalArguments_32(SDValue Chain,
if (VA.isRegLoc()) {
if (VA.needsCustom()) {
- assert(VA.getLocVT() == MVT::f64);
+ assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
+
unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
@@ -396,9 +443,13 @@ LowerFormalArguments_32(SDValue Chain,
&SP::IntRegsRegClass);
LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
}
+
+ if (IsLittleEndian)
+ std::swap(LoVal, HiVal);
+
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
- WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
@@ -422,7 +473,7 @@ LowerFormalArguments_32(SDValue Chain,
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (VA.needsCustom()) {
- assert(VA.getValVT() == MVT::f64);
+ assert(VA.getValVT() == MVT::f64 || MVT::v2i32);
// If it is double-word aligned, just load.
if (Offset % 8 == 0) {
int FI = MF.getFrameInfo()->CreateFixedObject(8,
@@ -452,9 +503,12 @@ LowerFormalArguments_32(SDValue Chain,
MachinePointerInfo(),
false, false, false, 0);
+ if (IsLittleEndian)
+ std::swap(LoVal, HiVal);
+
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
- WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
@@ -468,16 +522,12 @@ LowerFormalArguments_32(SDValue Chain,
Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
MachinePointerInfo(),
false, false, false, 0);
+ } else if (VA.getValVT() == MVT::f128) {
+ report_fatal_error("SPARCv8 does not handle f128 in calls; "
+ "pass indirectly");
} else {
- ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
- // Sparc is big endian, so add an offset based on the ObjectVT.
- unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
- FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
- DAG.getConstant(Offset, dl, MVT::i32));
- Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
- MachinePointerInfo(),
- VA.getValVT(), false, false, false,0);
- Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+ // We shouldn't see any other value types here.
+ llvm_unreachable("Unexpected ValVT encountered in frame lowering.");
}
InVals.push_back(Load);
}
@@ -612,7 +662,7 @@ LowerFormalArguments_64(SDValue Chain,
InVals.push_back(DAG.getLoad(
VA.getValVT(), DL, Chain,
DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
- MachinePointerInfo::getFixedStack(FI), false, false, false, 0));
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0));
}
if (!IsVarArg)
@@ -640,9 +690,9 @@ LowerFormalArguments_64(SDValue Chain,
SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
auto PtrVT = getPointerTy(MF.getDataLayout());
- OutChains.push_back(
- DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
- MachinePointerInfo::getFixedStack(FI), false, false, 0));
+ OutChains.push_back(DAG.getStore(
+ Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
}
if (!OutChains.empty())
@@ -788,7 +838,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
if (VA.needsCustom()) {
- assert(VA.getLocVT() == MVT::f64);
+ assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
if (VA.isMemLoc()) {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
@@ -804,49 +854,53 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
}
- SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
- Arg, StackPtr, MachinePointerInfo(),
- false, false, 0);
- // Sparc is big-endian, so the high part comes first.
- SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
- MachinePointerInfo(), false, false, false, 0);
- // Increment the pointer to the other half.
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getIntPtrConstant(4, dl));
- // Load the low part.
- SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
- MachinePointerInfo(), false, false, false, 0);
+ if (VA.getLocVT() == MVT::f64) {
+ // Move from the float value from float registers into the
+ // integer registers.
+
+ // TODO: The f64 -> v2i32 conversion is super-inefficient for
+ // constants: it sticks them in the constant pool, then loads
+ // to a fp register, then stores to temp memory, then loads to
+ // integer registers.
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+ }
+
+ SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ Arg,
+ DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ Arg,
+ DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));
if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
assert(i+1 != e);
CCValAssign &NextVA = ArgLocs[++i];
if (NextVA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
} else {
- // Store the low part in stack.
+ // Store the second part in stack.
unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
MachinePointerInfo(),
false, false, 0));
}
} else {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
- // Store the high part.
+ // Store the first part.
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff,
MachinePointerInfo(),
false, false, 0));
- // Store the low part.
+ // Store the second part.
PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
MachinePointerInfo(),
false, false, 0));
}
@@ -990,8 +1044,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
if (!CalleeFn)
return 0;
- assert(CalleeFn->hasStructRetAttr() &&
- "Callee does not have the StructRet attribute.");
+ // It would be nice to check for the sret attribute on CalleeFn here,
+ // but since it is not part of the function type, any check will misfire.
PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
Type *ElementTy = Ty->getElementType();
@@ -1370,15 +1424,60 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
const SparcSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
- auto &DL = *TM.getDataLayout();
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
+
+ // Instructions which use registers as conditionals examine all the
+ // bits (as does the pseudo SELECT_CC expansion). I don't think it
+ // matters much whether it's ZeroOrOneBooleanContent, or
+ // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
+ // former.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
- if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit()) {
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
+ } else {
+ // On 32bit sparc, we define a double-register 32bit register
+ // class, as well. This is modeled in LLVM as a 2-vector of i32.
+ addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);
+
+ // ...but almost all operations must be expanded, so set that as
+ // the default.
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+ setOperationAction(Op, MVT::v2i32, Expand);
+ }
+ // Truncating/extending stores/loads are also not supported.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);
+
+ setTruncStoreAction(VT, MVT::v2i32, Expand);
+ setTruncStoreAction(MVT::v2i32, VT, Expand);
+ }
+ // However, load and store *are* legal.
+ setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
+ setOperationAction(ISD::STORE, MVT::v2i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);
+
+ // And we need to promote i64 loads/stores into vector load/store
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+
+ // Sadly, this doesn't work:
+ // AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+ // AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+ }
// Turn FP extload into load/fextend
for (MVT VT : MVT::fp_valuetypes()) {
@@ -1396,10 +1495,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
- setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom);
- setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom);
- setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom);
- setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+ setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
// Sparc doesn't have sext_inreg, replace them with shl/sra
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -1579,9 +1678,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- setExceptionPointerRegister(SP::I0);
- setExceptionSelectorRegister(SP::I1);
-
setStackPointerRegisterToSaveRestore(SP::O6);
setOperationAction(ISD::CTPOP, MVT::i32,
@@ -1744,18 +1840,15 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
- if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->isNullValue() &&
+ if (isNullConstant(RHS) &&
CC == ISD::SETNE &&
(((LHS.getOpcode() == SPISD::SELECT_ICC ||
LHS.getOpcode() == SPISD::SELECT_XCC) &&
LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
(LHS.getOpcode() == SPISD::SELECT_FCC &&
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
- isa<ConstantSDNode>(LHS.getOperand(0)) &&
- isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
- cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+ isOneConstant(LHS.getOperand(0)) &&
+ isNullConstant(LHS.getOperand(1))) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
@@ -1821,7 +1914,8 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setHasCalls(true);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// This is one of the absolute code models.
@@ -1872,6 +1966,9 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2601,6 +2698,17 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
return DAG.getMergeValues(Ops, dl);
}
+static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
+{
+ LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+ EVT MemVT = LdNode->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return LowerF128Load(Op, DAG);
+
+ return Op;
+}
+
// Lower a f128 store into two f64 stores.
static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -2645,6 +2753,29 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
+{
+ SDLoc dl(Op);
+ StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
+
+ EVT MemVT = St->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return LowerF128Store(Op, DAG);
+
+ if (MemVT == MVT::i64) {
+ // Custom handling for i64 stores: turn it into a bitcast and a
+ // v2i32 store.
+ SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
+ SDValue Chain = DAG.getStore(
+ St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ return Chain;
+ }
+
+ return SDValue();
+}
+
static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
&& "invalid opcode");
@@ -2752,7 +2883,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
SDValue MulResult = TLI.makeLibCall(DAG,
RTLIB::MUL_I128, WideVT,
- Args, 4, isSigned, dl).first;
+ Args, isSigned, dl).first;
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
MulResult, DAG.getIntPtrConstant(0, dl));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
@@ -2783,7 +2914,6 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-
SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -2818,8 +2948,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
Subtarget);
- case ISD::LOAD: return LowerF128Load(Op, DAG);
- case ISD::STORE: return LowerF128Store(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FADD: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::ADD_F128), 2);
case ISD::FSUB: return LowerF128Op(Op, DAG,
@@ -2921,8 +3051,7 @@ SparcTargetLowering::expandSelectCC(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -3007,7 +3136,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
.addReg(AddrReg).addImm(0);
// Split the basic block MBB before MI and insert the loop block in the hole.
- MachineFunction::iterator MFI = MBB;
+ MachineFunction::iterator MFI = MBB->getIterator();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -3149,9 +3278,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- return std::make_pair(0U, &SP::IntRegsRegClass);
+ if (VT == MVT::v2i32)
+ return std::make_pair(0U, &SP::IntPairRegClass);
+ else
+ return std::make_pair(0U, &SP::IntRegsRegClass);
}
- } else if (!Constraint.empty() && Constraint.size() <= 5
+ } else if (!Constraint.empty() && Constraint.size() <= 5
&& Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
// constraint = '{r<d>}'
// Remove the braces from around the name.
@@ -3227,5 +3359,24 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
getLibcallName(libCall),
1));
return;
+ case ISD::LOAD: {
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ // Custom handling only for i64: turn i64 load into a v2i32 load,
+ // and a bitcast.
+ if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
+ return;
+
+ SDLoc dl(N);
+ SDValue LoadRes = DAG.getExtLoad(
+ Ld->getExtensionType(), dl, MVT::v2i32,
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo());
+
+ SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
+ Results.push_back(Res);
+ Results.push_back(LoadRes.getValue(1));
+ return;
+ }
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index bbc91a4..4e46709 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -89,6 +89,20 @@ namespace llvm {
return MVT::i32;
}
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return SP::I0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return SP::I1;
+ }
+
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -167,8 +181,8 @@ namespace llvm {
}
void ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG) const override;
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const override;
MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 25cc652..d51e2cc 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -250,6 +250,7 @@ defm : int_cond_alias<"n", 0b0000>;
defm : int_cond_alias<"ne", 0b1001>;
defm : int_cond_alias<"nz", 0b1001>; // same as ne
defm : int_cond_alias<"e", 0b0001>;
+defm : int_cond_alias<"eq", 0b0001>; // same as e
defm : int_cond_alias<"z", 0b0001>; // same as e
defm : int_cond_alias<"g", 0b1010>;
defm : int_cond_alias<"le", 0b0010>;
@@ -429,6 +430,9 @@ def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
def : InstAlias<"flush", (FLUSH), 0>;
+def : MnemonicAlias<"lduw", "ld">, Requires<[HasV9]>;
+def : MnemonicAlias<"lduwa", "lda">, Requires<[HasV9]>;
+
def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>;
def : MnemonicAlias<"addc", "addx">, Requires<[HasV9]>;
@@ -450,3 +454,8 @@ def : InstAlias<"fcmpeq $rs1, $rs2", (V9FCMPEQ FCC0, QFPRegs:$rs1,
QFPRegs:$rs2)>,
Requires<[HasHardQuad]>;
+// signx rd -> sra rd, %g0, rd
+def : InstAlias<"signx $rd", (SRArr IntRegs:$rd, IntRegs:$rd, G0), 0>, Requires<[HasV9]>;
+
+// signx reg, rd -> sra reg, %g0, rd
+def : InstAlias<"signx $rs1, $rd", (SRArr IntRegs:$rd, IntRegs:$rs1, G0), 0>, Requires<[HasV9]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 6167c53..733027a 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -284,7 +284,9 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned numSubRegs = 0;
unsigned movOpc = 0;
const unsigned *subRegIdx = nullptr;
+ bool ExtraG0 = false;
+ const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd,
@@ -294,7 +296,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
.addReg(SrcReg, getKillRegState(KillSrc));
- else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) {
+ subRegIdx = DW_SubRegsIdx;
+ numSubRegs = 2;
+ movOpc = SP::ORrr;
+ ExtraG0 = true;
+ } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
@@ -347,7 +354,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
assert(Dst && Src && "Bad sub-register");
- MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
+ if (ExtraG0)
+ MIB.addReg(SP::G0);
+ MIB.addReg(Src);
+ MovMI = MIB.getInstr();
}
// Add implicit super-register defs and kills to the last MovMI.
MovMI->addRegisterDefined(DestReg, TRI);
@@ -365,19 +376,20 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == &SP::I64RegsRegClass)
+ if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (RC == &SP::IntPairRegClass)
+ BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
@@ -403,11 +415,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0)
@@ -415,6 +425,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
+ else if (RC == &SP::IntPairRegClass)
+ BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 3b9e048..ec37c22 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -283,17 +283,32 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
[(set Ty:$dst, (OpNode ADDRri:$addr))]>;
}
+// TODO: Instructions of the LoadASI class are currently asm only; hooking up
+// CodeGen's address spaces to use these is a future task.
+class LoadASI<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
+ RegisterClass RC, ValueType Ty> :
+ F3_1_asi<3, Op3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
+ !strconcat(OpcStr, "a [$addr] $asi, $dst"),
+ []>;
+
// LoadA multiclass - As above, but also define alternate address space variant
multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
Load<OpcStr, Op3Val, OpNode, RC, Ty> {
- // TODO: The LD*Arr instructions are currently asm only; hooking up
- // CodeGen's address spaces to use these is a future task.
- def Arr : F3_1_asi<3, LoadAOp3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
- !strconcat(OpcStr, "a [$addr] $asi, $dst"),
- []>;
+ def Arr : LoadASI<OpcStr, LoadAOp3Val, OpNode, RC, Ty>;
}
+// The LDSTUB instruction is supported for asm only.
+// It is unlikely that general-purpose code could make use of it.
+// CAS is preferred for sparc v9.
+def LDSTUBrr : F3_1<3, 0b001101, (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldstub [$addr], $dst", []>;
+def LDSTUBri : F3_2<3, 0b001101, (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldstub [$addr], $dst", []>;
+def LDSTUBArr : F3_1_asi<3, 0b011101, (outs IntRegs:$dst),
+ (ins MEMrr:$addr, i8imm:$asi),
+ "ldstuba [$addr] $asi, $dst", []>;
+
// Store multiclass - Define both Reg+Reg/Reg+Imm patterns in one shot.
multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
RegisterClass RC, ValueType Ty> {
@@ -307,14 +322,18 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
[(OpNode Ty:$rd, ADDRri:$addr)]>;
}
-multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
+// TODO: Instructions of the StoreASI class are currently asm only; hooking up
+// CodeGen's address spaces to use these is a future task.
+class StoreASI<string OpcStr, bits<6> Op3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
- Store<OpcStr, Op3Val, OpNode, RC, Ty> {
- // TODO: The ST*Arr instructions are currently asm only; hooking up
- // CodeGen's address spaces to use these is a future task.
- def Arr : F3_1_asi<3, StoreAOp3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
+ F3_1_asi<3, Op3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
!strconcat(OpcStr, "a $rd, [$addr] $asi"),
[]>;
+
+multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
+ SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
+ Store<OpcStr, Op3Val, OpNode, RC, Ty> {
+ def Arr : StoreASI<OpcStr, StoreAOp3Val, OpNode, RC, Ty>;
}
//===----------------------------------------------------------------------===//
@@ -408,15 +427,40 @@ let DecoderMethod = "DecodeLoadInt" in {
defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>;
}
+let DecoderMethod = "DecodeLoadIntPair" in
+ defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>;
+
// Section B.2 - Load Floating-point Instructions, p. 92
-let DecoderMethod = "DecodeLoadFP" in
- defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>;
-let DecoderMethod = "DecodeLoadDFP" in
- defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64>;
+let DecoderMethod = "DecodeLoadFP" in {
+ defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>;
+ def LDFArr : LoadASI<"ld", 0b110000, load, FPRegs, f32>,
+ Requires<[HasV9]>;
+}
+let DecoderMethod = "DecodeLoadDFP" in {
+ defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64>;
+ def LDDFArr : LoadASI<"ldd", 0b110011, load, DFPRegs, f64>,
+ Requires<[HasV9]>;
+}
let DecoderMethod = "DecodeLoadQFP" in
- defm LDQF : Load<"ldq", 0b100010, load, QFPRegs, f128>,
+ defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
+let DecoderMethod = "DecodeLoadFP" in
+ let Defs = [FSR] in {
+ let rd = 0 in {
+ def LDFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr),
+ "ld [$addr], %fsr", []>;
+ def LDFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr),
+ "ld [$addr], %fsr", []>;
+ }
+ let rd = 1 in {
+ def LDXFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr),
+ "ldx [$addr], %fsr", []>, Requires<[HasV9]>;
+ def LDXFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr),
+ "ldx [$addr], %fsr", []>, Requires<[HasV9]>;
+ }
+ }
+
// Section B.4 - Store Integer Instructions, p. 95
let DecoderMethod = "DecodeStoreInt" in {
defm STB : StoreA<"stb", 0b000101, 0b010101, truncstorei8, IntRegs, i32>;
@@ -424,15 +468,40 @@ let DecoderMethod = "DecodeStoreInt" in {
defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>;
}
+let DecoderMethod = "DecodeStoreIntPair" in
+ defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
+
// Section B.5 - Store Floating-point Instructions, p. 97
-let DecoderMethod = "DecodeStoreFP" in
+let DecoderMethod = "DecodeStoreFP" in {
defm STF : Store<"st", 0b100100, store, FPRegs, f32>;
-let DecoderMethod = "DecodeStoreDFP" in
- defm STDF : Store<"std", 0b100111, store, DFPRegs, f64>;
+ def STFArr : StoreASI<"st", 0b110100, store, FPRegs, f32>,
+ Requires<[HasV9]>;
+}
+let DecoderMethod = "DecodeStoreDFP" in {
+ defm STDF : Store<"std", 0b100111, store, DFPRegs, f64>;
+ def STDFArr : StoreASI<"std", 0b110111, store, DFPRegs, f64>,
+ Requires<[HasV9]>;
+}
let DecoderMethod = "DecodeStoreQFP" in
- defm STQF : Store<"stq", 0b100110, store, QFPRegs, f128>,
+ defm STQF : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
+let DecoderMethod = "DecodeStoreFP" in
+ let Defs = [FSR] in {
+ let rd = 0 in {
+ def STFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins),
+ "st %fsr, [$addr]", []>;
+ def STFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins),
+ "st %fsr, [$addr]", []>;
+ }
+ let rd = 1 in {
+ def STXFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins),
+ "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
+ def STXFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins),
+ "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
+ }
+ }
+
// Section B.8 - SWAP Register with Memory Instruction
// (Atomic swap)
let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
@@ -559,6 +628,10 @@ let Defs = [Y, ICC] in {
defm SMULCC : F3_12np<"smulcc", 0b011011>;
}
+let Defs = [Y, ICC], Uses = [Y, ICC] in {
+ defm MULSCC : F3_12np<"mulscc", 0b100100>;
+}
+
// Section B.19 - Divide Instructions, p. 115
let Uses = [Y], Defs = [Y] in {
defm UDIV : F3_12np<"udiv", 0b001110>;
@@ -1221,8 +1294,8 @@ let Predicates = [HasV9] in {
// the top 32-bits before using it. To do this clearing, we use a SRLri X,0.
let rs1 = 0 in
def POPCrr : F3_1<2, 0b101110,
- (outs IntRegs:$dst), (ins IntRegs:$src),
- "popc $src, $dst", []>, Requires<[HasV9]>;
+ (outs IntRegs:$rd), (ins IntRegs:$rs2),
+ "popc $rs2, $rd", []>, Requires<[HasV9]>;
def : Pat<(ctpop i32:$src),
(POPCrr (SRLri $src, 0))>;
@@ -1254,6 +1327,25 @@ let hasSideEffects = 1 in {
}
}
+
+// Section A.43 - Read Privileged Register Instructions
+let Predicates = [HasV9] in {
+let rs2 = 0 in
+ def RDPR : F3_1<2, 0b101010,
+ (outs IntRegs:$rd), (ins PRRegs:$rs1),
+ "rdpr $rs1, $rd", []>;
+}
+
+// Section A.62 - Write Privileged Register Instructions
+let Predicates = [HasV9] in {
+ def WRPRrr : F3_1<2, 0b110010,
+ (outs PRRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2),
+ "wrpr $rs1, $rs2, $rd", []>;
+ def WRPRri : F3_2<2, 0b110010,
+ (outs PRRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
+ "wrpr $rs1, $simm13, $rd", []>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
@@ -1327,6 +1419,18 @@ def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>;
def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;
+// extract_vector
+def : Pat<(extractelt (v2i32 IntPair:$Rn), 0),
+ (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>;
+def : Pat<(extractelt (v2i32 IntPair:$Rn), 1),
+ (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>;
+
+// build_vector
+def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even),
+ (i32 IntRegs:$a2), sub_odd)>;
+
include "SparcInstr64Bit.td"
include "SparcInstrVIS.td"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 9667bc0..da31783 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -75,6 +75,18 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(SP::G6);
Reserved.set(SP::G7);
+ // Also reserve the register pair aliases covering the above
+ // registers, with the same conditions.
+ Reserved.set(SP::G0_G1);
+ if (ReserveAppRegisters)
+ Reserved.set(SP::G2_G3);
+ if (ReserveAppRegisters || !Subtarget.is64Bit())
+ Reserved.set(SP::G4_G5);
+
+ Reserved.set(SP::O6_O7);
+ Reserved.set(SP::I6_I7);
+ Reserved.set(SP::G6_G7);
+
// Unaliased double registers are not available in non-V9 targets.
if (!Subtarget.isV9()) {
for (unsigned n = 0; n != 16; ++n) {
@@ -158,21 +170,15 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-
- // Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
- int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(FIOperandNum + 1).getImm() +
- Subtarget.getStackPointerBias();
- SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
- unsigned FramePtr = SP::I6;
- if (FuncInfo->isLeafProc()) {
- // Use %sp and adjust offset if needed.
- FramePtr = SP::O6;
- int stackSize = MF.getFrameInfo()->getStackSize();
- Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
- }
+ const SparcFrameLowering *TFI = getFrameLowering(MF);
+
+ unsigned FrameReg;
+ int Offset;
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
+
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
@@ -182,8 +188,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
- .addReg(FramePtr).addImm(0).addReg(SrcEvenReg);
- replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr);
+ .addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
+ replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
MI.setDesc(TII.get(SP::STDFri));
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
@@ -194,8 +200,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
- .addReg(FramePtr).addImm(0);
- replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr);
+ .addReg(FrameReg).addImm(0);
+ replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
MI.setDesc(TII.get(SP::LDDFri));
MI.getOperand(0).setReg(DestOddReg);
@@ -203,7 +209,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
- replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr);
+ replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
}
@@ -211,3 +217,25 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
+// Sparc has no architectural need for stack realignment support,
+// except that LLVM unfortunately currently implements overaligned
+// stack objects by depending upon stack realignment support.
+// If that ever changes, this can probably be deleted.
+bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ if (!TargetRegisterInfo::canRealignStack(MF))
+ return false;
+
+ // Sparc always has a fixed frame pointer register, so don't need to
+ // worry about needing to reserve it. [even if we don't have a frame
+ // pointer for our frame, it still cannot be used for other things,
+ // or register window traps will be SADNESS.]
+
+ // If there's a reserved call frame, we can use SP to access locals.
+ if (getFrameLowering(MF)->hasReservedCallFrame(MF))
+ return true;
+
+ // Otherwise, we'd need a base pointer, but those aren't implemented
+ // for SPARC at the moment.
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 764a894..32075b1 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -42,8 +42,10 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const;
- // Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+ bool canRealignStack(const MachineFunction &MF) const override;
+
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index db8a7e8..cca9463 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -32,6 +32,12 @@ def sub_odd64 : SubRegIndex<64, 64>;
// Ri - 32-bit integer registers
class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
+// Rdi - pairs of 32-bit integer registers
+class Rdi<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
+ let CoveredBySubRegs = 1;
+}
// Rf - 32-bit floating-point registers
class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;
@@ -54,6 +60,8 @@ def ICC : SparcCtrlReg<0, "ICC">; // This represents icc and xcc in 64-bit code.
foreach I = 0-3 in
def FCC#I : SparcCtrlReg<I, "FCC"#I>;
+def FSR : SparcCtrlReg<0, "FSR">; // Floating-point state register.
+
// Y register
def Y : SparcCtrlReg<0, "Y">, DwarfRegNum<[64]>;
// Ancillary state registers (implementation defined)
@@ -94,6 +102,22 @@ def PSR : SparcCtrlReg<0, "PSR">;
def WIM : SparcCtrlReg<0, "WIM">;
def TBR : SparcCtrlReg<0, "TBR">;
+def TPC : SparcCtrlReg<0, "TPC">;
+def TNPC : SparcCtrlReg<1, "TNPC">;
+def TSTATE : SparcCtrlReg<2, "TSTATE">;
+def TT : SparcCtrlReg<3, "TT">;
+def TICK : SparcCtrlReg<4, "TICK">;
+def TBA : SparcCtrlReg<5, "TBA">;
+def PSTATE : SparcCtrlReg<6, "PSTATE">;
+def TL : SparcCtrlReg<7, "TL">;
+def PIL : SparcCtrlReg<8, "PIL">;
+def CWP : SparcCtrlReg<9, "CWP">;
+def CANSAVE : SparcCtrlReg<10, "CANSAVE">;
+def CANRESTORE : SparcCtrlReg<11, "CANRESTORE">;
+def CLEANWIN : SparcCtrlReg<12, "CLEANWIN">;
+def OTHERWIN : SparcCtrlReg<13, "OTHERWIN">;
+def WSTATE : SparcCtrlReg<14, "WSTATE">;
+
// Integer registers
def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
@@ -217,6 +241,24 @@ def Q13 : Rq<21, "F52", [D26, D27]>;
def Q14 : Rq<25, "F56", [D28, D29]>;
def Q15 : Rq<29, "F60", [D30, D31]>;
+// Aliases of the integer registers used for LDD/STD double-word operations
+def G0_G1 : Rdi<0, "G0", [G0, G1]>;
+def G2_G3 : Rdi<2, "G2", [G2, G3]>;
+def G4_G5 : Rdi<4, "G4", [G4, G5]>;
+def G6_G7 : Rdi<6, "G6", [G6, G7]>;
+def O0_O1 : Rdi<8, "O0", [O0, O1]>;
+def O2_O3 : Rdi<10, "O2", [O2, O3]>;
+def O4_O5 : Rdi<12, "O4", [O4, O5]>;
+def O6_O7 : Rdi<14, "O6", [O6, O7]>;
+def L0_L1 : Rdi<16, "L0", [L0, L1]>;
+def L2_L3 : Rdi<18, "L2", [L2, L3]>;
+def L4_L5 : Rdi<20, "L4", [L4, L5]>;
+def L6_L7 : Rdi<22, "L6", [L6, L7]>;
+def I0_I1 : Rdi<24, "I0", [I0, I1]>;
+def I2_I3 : Rdi<26, "I2", [I2, I3]>;
+def I4_I5 : Rdi<28, "I4", [I4, I5]>;
+def I6_I7 : Rdi<30, "I6", [I6, I7]>;
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -231,6 +273,13 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32,
(sequence "L%u", 0, 7),
(sequence "O%u", 0, 7))>;
+// Should be in the same order as IntRegs.
+def IntPair : RegisterClass<"SP", [v2i32], 64,
+ (add I0_I1, I2_I3, I4_I5, I6_I7,
+ G0_G1, G2_G3, G4_G5, G6_G7,
+ L0_L1, L2_L3, L4_L5, L6_L7,
+ O0_O1, O2_O3, O4_O5, O6_O7)>;
+
// Register class for 64-bit mode, with a 64-bit spill slot size.
// These are the same as the 32-bit registers, so TableGen will consider this
// to be a sub-class of IntRegs. That works out because requiring a 64-bit
@@ -252,3 +301,8 @@ def ASRRegs : RegisterClass<"SP", [i32], 32,
(add Y, (sequence "ASR%u", 1, 31))> {
let isAllocatable = 0;
}
+
+// Privileged Registers
+def PRRegs : RegisterClass<"SP", [i64], 64,
+ (add TPC, TNPC, TSTATE, TT, TICK, TBA, PSTATE, TL, PIL, CWP,
+ CANSAVE, CANRESTORE, CLEANWIN, OTHERWIN, WSTATE)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index d69da40..d701594 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -64,7 +64,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
frameSize += 128;
// Frames with calls must also reserve space for 6 outgoing arguments
// whether they are used or not. LowerCall_64 takes care of that.
- assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned");
+ frameSize = RoundUpToAlignment(frameSize, 16);
} else {
// Emit the correct save instruction based on the number of bytes in
// the frame. Minimum stack frame size according to V8 ABI is:
@@ -81,3 +81,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
}
return frameSize;
}
+
+bool SparcSubtarget::enableMachineScheduler() const {
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index 9d21911..e2fd2f0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -60,6 +60,8 @@ public:
return &TSInfo;
}
+ bool enableMachineScheduler() const override;
+
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
bool isVIS2() const { return IsVIS2; }
@@ -85,7 +87,6 @@ public:
/// returns adjusted framesize which includes space for register window
/// spills and arguments.
int getAdjustedFrameSize(int stackSize) const;
-
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 3aa4c6b..9c995bf 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -349,7 +349,6 @@ class SystemZAsmParser : public MCTargetAsmParser {
#include "SystemZGenAsmMatcher.inc"
private:
- MCSubtargetInfo &STI;
MCAsmParser &Parser;
enum RegisterGroup {
RegGR,
@@ -386,14 +385,14 @@ private:
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
public:
- SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(Options, sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
// Override MCTargetAsmParser.
@@ -533,14 +532,16 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
}
// Parse a register of group Group. If Regs is nonnull, use it to map
-// the raw register number to LLVM numbering, with zero entries indicating
-// an invalid register. IsAddress says whether the register appears in an
-// address context.
+// the raw register number to LLVM numbering, with zero entries
+// indicating an invalid register. IsAddress says whether the
+// register appears in an address context. Allow FP Group if expecting
+// RegV Group, since the f-prefix yields the FP group even while used
+// with vector instructions.
bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
const unsigned *Regs, bool IsAddress) {
if (parseRegister(Reg))
return true;
- if (Reg.Group != Group)
+ if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV))
return Error(Reg.StartLoc, "invalid operand for instruction");
if (Regs && Regs[Reg.Num] == 0)
return Error(Reg.StartLoc, "invalid register pair");
@@ -791,7 +792,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 059ae3f..6444cf8 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -60,15 +60,15 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
O << '%' << getRegisterName(RegNo);
}
-template<unsigned N>
-void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+template <unsigned N>
+static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
int64_t Value = MI->getOperand(OpNum).getImm();
assert(isUInt<N>(Value) && "Invalid uimm argument");
O << Value;
}
-template<unsigned N>
-void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+template <unsigned N>
+static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
int64_t Value = MI->getOperand(OpNum).getImm();
assert(isInt<N>(Value) && "Invalid simm argument");
O << Value;
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index ba55e68..7ca386f 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -15,7 +15,6 @@
#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/Compiler.h"
namespace llvm {
class MCOperand;
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 5fefa31..2115d44 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -226,7 +226,7 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCCodeEmitter.
TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
- createSystemZMCCodeEmitter);
+ createSystemZMCCodeEmitter);
// Register the MCInstrInfo.
TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt
index e089047..cd367d6 100644
--- a/contrib/llvm/lib/Target/SystemZ/README.txt
+++ b/contrib/llvm/lib/Target/SystemZ/README.txt
@@ -52,12 +52,6 @@ We don't use the TEST DATA CLASS instructions.
--
-We could use the generic floating-point forms of LOAD COMPLEMENT,
-LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the
-condition codes. For example, we could use LCDFR instead of LCDBR.
-
---
-
We only use MVC, XC and CLC for constant-length block operations.
We could extend them to variable-length operations too,
using EXECUTE RELATIVE LONG.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3dca7bd..7527311 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -288,7 +288,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+ uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType());
OutStreamer->EmitValue(Expr, Size);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 44ea1d2..4a6beb6 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -26,21 +26,6 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
return new SystemZConstantPoolValue(GV, Modifier);
}
-unsigned SystemZConstantPoolValue::getRelocationInfo() const {
- switch (Modifier) {
- case SystemZCP::TLSGD:
- case SystemZCP::TLSLDM:
- case SystemZCP::DTPOFF:
- // May require a dynamic relocation.
- return 2;
- case SystemZCP::NTPOFF:
- // May require a relocation, but the relocations are always resolved
- // by the static linker.
- return 1;
- }
- llvm_unreachable("Unknown modifier");
-}
-
int SystemZConstantPoolValue::
getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
index e5f1bb1..a71b595 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -43,7 +43,6 @@ public:
Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
// Override MachineConstantPoolValue.
- unsigned getRelocationInfo() const override;
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 16f9adc..4818ed0 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -37,13 +37,11 @@ namespace {
// instructions.
struct Reference {
Reference()
- : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {}
+ : Def(false), Use(false) {}
Reference &operator|=(const Reference &Other) {
Def |= Other.Def;
- IndirectDef |= Other.IndirectDef;
Use |= Other.Use;
- IndirectUse |= Other.IndirectUse;
return *this;
}
@@ -53,11 +51,6 @@ struct Reference {
// via a sub- or super-register.
bool Def;
bool Use;
-
- // True if the register is defined or used indirectly, by a sub- or
- // super-register.
- bool IndirectDef;
- bool IndirectUse;
};
class SystemZElimCompare : public MachineFunctionPass {
@@ -104,14 +97,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) {
return false;
}
-// Return true if any CC result of MI would reflect the value of subreg
-// SubReg of Reg.
-static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
+// Return true if any CC result of MI would reflect the value of Reg.
+static bool resultTests(MachineInstr *MI, unsigned Reg) {
if (MI->getNumOperands() > 0 &&
MI->getOperand(0).isReg() &&
MI->getOperand(0).isDef() &&
- MI->getOperand(0).getReg() == Reg &&
- MI->getOperand(0).getSubReg() == SubReg)
+ MI->getOperand(0).getReg() == Reg)
return true;
switch (MI->getOpcode()) {
@@ -127,30 +118,25 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
case SystemZ::LTEBR:
case SystemZ::LTDBR:
case SystemZ::LTXBR:
- if (MI->getOperand(1).getReg() == Reg &&
- MI->getOperand(1).getSubReg() == SubReg)
+ if (MI->getOperand(1).getReg() == Reg)
return true;
}
return false;
}
-// Describe the references to Reg in MI, including sub- and super-registers.
+// Describe the references to Reg or any of its aliases in MI.
Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
Reference Ref;
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
if (MO.isReg()) {
if (unsigned MOReg = MO.getReg()) {
- if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) {
- if (MO.isUse()) {
+ if (TRI->regsOverlap(MOReg, Reg)) {
+ if (MO.isUse())
Ref.Use = true;
- Ref.IndirectUse |= (MOReg != Reg);
- }
- if (MO.isDef()) {
+ else if (MO.isDef())
Ref.Def = true;
- Ref.IndirectDef |= (MOReg != Reg);
- }
}
}
}
@@ -158,6 +144,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
return Ref;
}
+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+ // If we during isel used a load-and-test as a compare with 0, the
+ // def operand is dead.
+ return ((MI->getOpcode() == SystemZ::LTEBR ||
+ MI->getOpcode() == SystemZ::LTDBR ||
+ MI->getOpcode() == SystemZ::LTXBR) &&
+ MI->getOperand(0).isDead());
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr *Compare) {
+ unsigned reg = 0;
+ if (Compare->isCompare())
+ reg = Compare->getOperand(0).getReg();
+ else if (isLoadAndTestAsCmp(Compare))
+ reg = Compare->getOperand(1).getReg();
+ assert (reg);
+
+ return reg;
+}
+
// Compare compares the result of MI against zero. If MI is an addition
// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
// and convert the branch to a BRCT(G). Return true on success.
@@ -188,7 +198,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
// We already know that there are no references to the register between
// MI and Compare. Make sure that there are also no references between
// Compare and Branch.
- unsigned SrcReg = Compare->getOperand(0).getReg();
+ unsigned SrcReg = getCompareSourceReg(Compare);
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (getRegReferences(MBBI, SrcReg))
@@ -196,16 +206,15 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
// The transformation is OK. Rebuild Branch as a BRCT(G).
MachineOperand Target(Branch->getOperand(2));
- Branch->RemoveOperand(2);
- Branch->RemoveOperand(1);
- Branch->RemoveOperand(0);
+ while (Branch->getNumOperands())
+ Branch->RemoveOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(Target)
.addReg(SystemZ::CC, RegState::ImplicitDefine);
- MI->removeFromParent();
+ MI->eraseFromParent();
return true;
}
@@ -308,6 +317,10 @@ static bool isCompareZero(MachineInstr *Compare) {
return true;
default:
+
+ if (isLoadAndTestAsCmp(Compare))
+ return true;
+
return (Compare->getNumExplicitOperands() == 2 &&
Compare->getOperand(1).isImm() &&
Compare->getOperand(1).getImm() == 0);
@@ -325,8 +338,7 @@ optimizeCompareZero(MachineInstr *Compare,
return false;
// Search back for CC results that are based on the first operand.
- unsigned SrcReg = Compare->getOperand(0).getReg();
- unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+ unsigned SrcReg = getCompareSourceReg(Compare);
MachineBasicBlock &MBB = *Compare->getParent();
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
Reference CCRefs;
@@ -334,7 +346,7 @@ optimizeCompareZero(MachineInstr *Compare,
while (MBBI != MBBE) {
--MBBI;
MachineInstr *MI = MBBI;
- if (resultTests(MI, SrcReg, SrcSubReg)) {
+ if (resultTests(MI, SrcReg)) {
// Try to remove both MI and Compare by converting a branch to BRCT(G).
// We don't care in this case whether CC is modified between MI and
// Compare.
@@ -435,23 +447,21 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
while (MBBI != MBB.begin()) {
MachineInstr *MI = --MBBI;
if (CompleteCCUsers &&
- MI->isCompare() &&
+ (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
(optimizeCompareZero(MI, CCUsers) ||
fuseCompareAndBranch(MI, CCUsers))) {
++MBBI;
- MI->removeFromParent();
+ MI->eraseFromParent();
Changed = true;
CCUsers.clear();
- CompleteCCUsers = true;
continue;
}
- Reference CCRefs(getRegReferences(MI, SystemZ::CC));
- if (CCRefs.Def) {
+ if (MI->definesRegister(SystemZ::CC)) {
CCUsers.clear();
- CompleteCCUsers = !CCRefs.IndirectDef;
+ CompleteCCUsers = true;
}
- if (CompleteCCUsers && CCRefs.Use)
+ if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers)
CCUsers.push_back(MI);
}
return Changed;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 397de47..e1b20d0 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -48,7 +48,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
SystemZFrameLowering::SystemZFrameLowering()
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
- -SystemZMC::CallFrameSize, 8) {
+ -SystemZMC::CallFrameSize, 8,
+ false /* StackRealignable */) {
// Create a mapping from register number to save slot offset.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
@@ -133,7 +134,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool IsVarArg = MF.getFunction()->isVarArg();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
// Scan the call-saved GPRs and find the bounds of the register spill area.
unsigned LowGPR = 0;
@@ -322,7 +323,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
bool HasFP = hasFP(MF);
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
// The current offset of the stack pointer from the CFA.
int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
@@ -394,7 +398,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Add CFI for the this save.
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- int64_t Offset = getFrameIndexOffset(MF, Save.getFrameIdx());
+ unsigned IgnoredFrameReg;
+ int64_t Offset =
+ getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
+
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, DwarfReg, SPOffsetFromCFA + Offset));
CFIIndexes.push_back(CFIIndex);
@@ -455,9 +462,14 @@ bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
}
-int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
// Start with the offset of FI from the top of the caller-allocated frame
// (i.e. the top of the 160 bytes allocated by the caller). This initial
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 5ade757..46bb6b7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -43,7 +43,8 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 75fd37f..a909309 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -585,7 +585,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
if (N.getNode()->getNodeId() == -1 ||
N.getNode()->getNodeId() > Pos->getNodeId()) {
- DAG->RepositionNode(Pos, N.getNode());
+ DAG->RepositionNode(Pos->getIterator(), N.getNode());
N.getNode()->setNodeId(Pos->getNodeId());
}
}
@@ -801,7 +801,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
RxSBG.Input = N.getOperand(0);
return true;
}
-
+
case ISD::ANY_EXTEND:
// Bits above the extended operand are don't-care.
RxSBG.Input = N.getOperand(0);
@@ -818,7 +818,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
return true;
}
// Fall through.
-
+
case ISD::SIGN_EXTEND: {
// Check that the extension bits are don't-care (i.e. are masked out
// by the final mask).
@@ -938,7 +938,23 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
return nullptr;
}
- }
+ }
+
+ // If the RISBG operands require no rotation and just masks the bottom
+ // 8/16 bits, attempt to convert this to a LLC zero extension.
+ if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) {
+ unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR);
+ if (VT == MVT::i32) {
+ if (Subtarget->hasHighWord())
+ OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux);
+ else
+ OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR);
+ }
+
+ SDValue In = convertTo(DL, VT, RISBG.Input);
+ N = CurDAG->getMachineNode(OpCode, DL, VT, In);
+ return convertTo(DL, VT, SDValue(N, 0)).getNode();
+ }
unsigned Opcode = SystemZ::RISBG;
// Prefer RISBGN if available, since it does not clobber CC.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 9a753c8..ee73267 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -84,8 +84,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- auto &DL = *TM.getDataLayout();
- MVT PtrVT = getPointerTy(DL);
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the register classes.
if (Subtarget.hasHighWord())
@@ -115,8 +114,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget.getRegisterInfo());
// Set up special registers.
- setExceptionPointerRegister(SystemZ::R6D);
- setExceptionSelectorRegister(SystemZ::R7D);
setStackPointerRegisterToSaveRestore(SystemZ::R15D);
// TODO: It may be better to default to latency-oriented scheduling, however
@@ -370,7 +367,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
}
}
@@ -776,9 +775,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
}
bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!CI->isTailCall())
- return false;
- return true;
+ return CI->isTailCall();
}
// We do not yet support 128-bit single-element vector types. If the user
@@ -939,8 +936,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getIntPtrConstant(4, DL));
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false,
+ false, false, 0);
}
// Convert the value of the argument register into the value that's
@@ -976,9 +973,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
&SystemZ::FP64BitRegClass);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ MachinePointerInfo::getFixedStack(MF, FI),
false, false, 0);
-
}
// Join the stores, which are independent of one another.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
@@ -1060,9 +1056,9 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
ArgValue = SpillSlot;
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
@@ -1607,8 +1603,8 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
} else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
if (Value > Mask)
return;
- assert(C.ICmpType == SystemZICMP::Any &&
- "Signedness shouldn't matter here.");
+ // If the constant is in range, we can use any comparison.
+ C.ICmpType = SystemZICMP::Any;
} else
return;
@@ -2439,7 +2435,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// If there was a non-zero offset that we didn't fold, create an explicit
@@ -2499,7 +2496,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
}
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(Node, DAG);
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2529,9 +2528,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
// Call __tls_get_offset to retrieve the offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
@@ -2544,9 +2544,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
// Call __tls_get_offset to retrieve the module base offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
@@ -2562,9 +2563,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
- DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- DTPOffset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ DTPOffset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), DTPOffset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
break;
@@ -2575,8 +2577,8 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
SystemZII::MO_INDNTPOFF);
Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getGOT(),
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
break;
}
@@ -2587,9 +2589,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
break;
}
}
@@ -2628,10 +2631,10 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue Result;
if (CP->isMachineConstantPoolEntry())
Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ CP->getAlignment());
else
Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ CP->getAlignment(), CP->getOffset());
// Use LARL to load the address of the constant pool entry.
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -2736,17 +2739,37 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDValue SystemZTargetLowering::
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ bool RealignOpt = !DAG.getMachineFunction().getFunction()->
+ hasFnAttribute("no-realign-stack");
+
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
SDLoc DL(Op);
+ // If user has set the no alignment function attribute, ignore
+ // alloca alignments.
+ uint64_t AlignVal = (RealignOpt ?
+ dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+
+ uint64_t StackAlign = TFI->getStackAlignment();
+ uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
+ uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
+
unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ SDValue NeededSpace = Size;
// Get a reference to the stack pointer.
SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+ // Add extra space for alignment if needed.
+ if (ExtraAlignSpace)
+ NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+ SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
// Copy the new stack pointer back.
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
@@ -2757,6 +2780,16 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+ // Dynamically realign if needed.
+ if (RequiredAlign > StackAlign) {
+ Result =
+ DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+ Result =
+ DAG.getNode(ISD::AND, DL, MVT::i64, Result,
+ DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
+ }
+
SDValue Ops[2] = { Result, Chain };
return DAG.getMergeValues(Ops, DL);
}
@@ -2837,7 +2870,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
} else if (DAG.ComputeNumSignBits(Op1) > 32) {
Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
Opcode = SystemZISD::SDIVREM32;
- } else
+ } else
Opcode = SystemZISD::SDIVREM64;
// DSG(F) takes a 64-bit dividend, so the even register in the GR128
@@ -3247,8 +3280,8 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op->getNumValues() == 1)
return CC;
assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
- return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
- Glued, CC);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
+ CC);
}
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -3890,7 +3923,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
GS.addUndef();
} else {
GS.add(SDValue(), ResidueOps.size());
- ResidueOps.push_back(Op);
+ ResidueOps.push_back(BVN->getOperand(I));
}
}
@@ -3901,7 +3934,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
// Create the BUILD_VECTOR for the remaining elements, if any.
if (!ResidueOps.empty()) {
while (ResidueOps.size() < NumElements)
- ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
+ ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
for (auto &Op : GS.Ops) {
if (!Op.getNode()) {
Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
@@ -4204,7 +4237,7 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue
SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const {
+ unsigned UnpackHigh) const {
SDValue PackedOp = Op.getOperand(0);
EVT OutVT = Op.getValueType();
EVT InVT = PackedOp.getValueType();
@@ -4566,9 +4599,9 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
}
return Op;
} else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
- canTreatAsByteVector(Op.getValueType()) &&
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ canTreatAsByteVector(Op.getValueType()) &&
canTreatAsByteVector(Op.getOperand(0).getValueType())) {
// Make sure that only the unextended bits are significant.
EVT ExtVT = Op.getValueType();
@@ -4579,14 +4612,14 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
unsigned SubByte = Byte % ExtBytesPerElement;
unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
if (SubByte < MinSubByte ||
- SubByte + BytesPerElement > ExtBytesPerElement)
- break;
+ SubByte + BytesPerElement > ExtBytesPerElement)
+ break;
// Get the byte offset of the unextended element
Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
// ...then add the byte offset relative to that element.
Byte += SubByte - MinSubByte;
if (Byte % BytesPerElement != 0)
- break;
+ break;
Op = Op.getOperand(0);
Index = Byte / BytesPerElement;
Force = true;
@@ -5611,6 +5644,31 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
return MBB;
}
+MachineBasicBlock *
+SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned SrcReg = MI->getOperand(0).getReg();
+
+ // Create new virtual register of the same class as source.
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ unsigned DstReg = MRI->createVirtualRegister(RC);
+
+ // Replace pseudo with a normal load-and-test that models the def as
+ // well.
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
+ .addReg(SrcReg);
+ MI->eraseFromParent();
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@@ -5858,6 +5916,13 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
case SystemZ::TBEGINC:
return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
+ case SystemZ::LTEBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
+ case SystemZ::LTDBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
+ case SystemZ::LTXBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 07ff251..391636e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -409,6 +409,20 @@ public:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return SystemZ::R6D;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return SystemZ::R7D;
+ }
+
MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const
override;
@@ -481,7 +495,7 @@ private:
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const;
+ unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
@@ -530,6 +544,10 @@ private:
MachineBasicBlock *MBB,
unsigned Opcode,
bool NoFloat) const;
+ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const;
+
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
index 464f79a..5a1c874 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -35,11 +35,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
if (MCID.mayStore())
Flags |= MachineMemOperand::MOStore;
int64_t Offset = 0;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(
- PseudoSourceValue::getFixedStack(FI), Offset),
- Flags, MFFrame->getObjectSize(FI),
- MFFrame->getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+ MFFrame->getObjectSize(FI), MFFrame->getObjectAlignment(FI));
return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 27fbd7d..0cb2672 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -46,15 +46,28 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
}
-// Note that the comparison against zero operation is not available if we
-// have vector support, since load-and-test instructions will partially
-// clobber the target (vector) register.
+// Note that LTxBRCompare is not available if we have vector support,
+// since load-and-test instructions will partially clobber the target
+// (vector) register.
let Predicates = [FeatureNoVector] in {
defm : CompareZeroFP<LTEBRCompare, FP32>;
defm : CompareZeroFP<LTDBRCompare, FP64>;
defm : CompareZeroFP<LTXBRCompare, FP128>;
}
+// Use a normal load-and-test for compare against zero in case of
+// vector support (via a pseudo to simplify instruction selection).
+let Defs = [CC], usesCustomInserter = 1 in {
+ def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
+ def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
+ def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
+}
+let Predicates = [FeatureVector] in {
+ defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
+ defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
+ defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
+}
+
// Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>;
@@ -238,26 +251,46 @@ let Predicates = [FeatureFPExtension] in {
// Unary arithmetic
//===----------------------------------------------------------------------===//
+// We prefer generic instructions during isel, because they do not
+// clobber CC and therefore give the scheduler more freedom. In cases
+// the CC is actually useful, the SystemZElimCompare pass will try to
+// convert generic instructions into opcodes that also set CC. Note
+// that lcdf / lpdf / lndf only affect the sign bit, and can therefore
+// be used with fp32 as well. This could be done for fp128, in which
+// case the operands would have to be tied.
+
// Negation (Load Complement).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>;
- def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>;
+ def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32, FP32>;
+ def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64, FP64>;
def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
}
+// Generic form, which does not set CC.
+def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32, FP32>;
// Absolute value (Load Positive).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>;
- def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>;
+ def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32, FP32>;
+ def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64, FP64>;
def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
}
+// Generic form, which does not set CC.
+def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32, FP32>;
// Negative absolute value (Load Negative).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>;
- def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>;
+ def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32, FP32>;
+ def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64, FP64>;
def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
}
+// Generic form, which does not set CC.
+def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32, FP32>;
// Square root.
def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>;
@@ -414,6 +447,6 @@ let Defs = [CC], CCValues = 0xF in {
// Peepholes
//===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimmneg0), (LCEBR (LZER))>;
-def : Pat<(f64 fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>;
+def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>;
def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 71eb998..01f4cde 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2381,6 +2381,7 @@ multiclass StringRRE<string mnemonic, bits<16> opcode,
def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
(ins GR64:$R1src, GR64:$R2src),
mnemonic#"\t$R1, $R2", []> {
+ let Uses = [R0L];
let Constraints = "$R1 = $R1src, $R2 = $R2src";
let DisableEncoding = "$R1src, $R2src";
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5d4a34f..e6b5fc8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -69,6 +69,11 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineOperand &LowOffsetOp = MI->getOperand(2);
LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
+ // Clear the kill flags for the base and index registers in the first
+ // instruction.
+ EarlierMI->getOperand(1).setIsKill(false);
+ EarlierMI->getOperand(3).setIsKill(false);
+
// Set the opcodes.
unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
@@ -111,7 +116,7 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
}
// MI is a three-operand RIE-style pseudo instruction. Replace it with
-// LowOpcode3 if the registers are both low GR32s, otherwise use a move
+// LowOpcodeK if the registers are both low GR32s, otherwise use a move
// followed by HighOpcode or LowOpcode, depending on whether the target
// is a high or low GR32.
void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
@@ -129,6 +134,7 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
MI->getOperand(1).isKill());
MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
MI->getOperand(1).setReg(DestReg);
+ MI->tieOperands(0, 1);
}
}
@@ -486,11 +492,8 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
const MachineRegisterInfo *MRI) const {
assert(!SrcReg2 && "Only optimizing constant comparisons so far");
bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
- if (Value == 0 &&
- !IsLogical &&
- removeIPMBasedCompare(Compare, SrcReg, MRI, &RI))
- return true;
- return false;
+ return Value == 0 && !IsLogical &&
+ removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
// If Opcode is a move that has a conditional variant, return that variant,
@@ -505,16 +508,13 @@ static unsigned getConditionalMove(unsigned Opcode) {
bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
unsigned Opcode = MI->getOpcode();
- if (STI.hasLoadStoreOnCond() &&
- getConditionalMove(Opcode))
- return true;
- return false;
+ return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode);
}
bool SystemZInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
// For now only convert single instructions.
return NumCycles == 1;
}
@@ -524,7 +524,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumCyclesT, unsigned ExtraPredCyclesT,
MachineBasicBlock &FMBB,
unsigned NumCyclesF, unsigned ExtraPredCyclesF,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
// For now avoid converting mutually-exclusive cases.
return false;
}
@@ -548,11 +548,10 @@ PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
return false;
}
-void
-SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
@@ -590,13 +589,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-void
-SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill,
- int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void SystemZInstrInfo::storeRegToStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Callers may expect a single instruction, so keep 128-bit moves
@@ -604,15 +600,14 @@ SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned LoadOpcode, StoreOpcode;
getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
- .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+ .addReg(SrcReg, getKillRegState(isKill)),
+ FrameIdx);
}
-void
-SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void SystemZInstrInfo::loadRegFromStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Callers may expect a single instruction, so keep 128-bit moves
@@ -681,7 +676,8 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LiveVariables *LV) const {
MachineInstr *MI = MBBI;
MachineBasicBlock *MBB = MI->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned Opcode = MI->getOpcode();
unsigned NumOps = MI->getNumOperands();
@@ -708,14 +704,19 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
if (ThreeOperandOpcode >= 0) {
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
- .addOperand(Dest);
+ // Create three address instruction without adding the implicit
+ // operands. Those will instead be copied over from the original
+ // instruction by the loop below.
+ MachineInstrBuilder MIB(*MF,
+ MF->CreateMachineInstr(get(ThreeOperandOpcode),
+ MI->getDebugLoc(), /*NoImplicit=*/true));
+ MIB.addOperand(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
MIB.addOperand(MI->getOperand(I));
+ MBB->insert(MI, MIB);
return finishConvertToThreeAddress(MI, MIB, LV);
}
}
@@ -1191,6 +1192,12 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
case SystemZ::LER: return SystemZ::LTEBR;
case SystemZ::LDR: return SystemZ::LTDBR;
case SystemZ::LXR: return SystemZ::LTXBR;
+ case SystemZ::LCDFR: return SystemZ::LCDBR;
+ case SystemZ::LPDFR: return SystemZ::LPDBR;
+ case SystemZ::LNDFR: return SystemZ::LNDBR;
+ case SystemZ::LCDFR_32: return SystemZ::LCEBR;
+ case SystemZ::LPDFR_32: return SystemZ::LPEBR;
+ case SystemZ::LNDFR_32: return SystemZ::LNEBR;
// On zEC12 we prefer to use RISBGN. But if there is a chance to
// actually use the condition code, we may turn it back into RISGB.
// Note that RISBG is not really a "load-and-test" instruction,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 31c9db2..d9094ba 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -159,12 +159,12 @@ public:
bool isPredicable(MachineInstr *MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumCyclesT, unsigned ExtraPredCyclesT,
MachineBasicBlock &FMBB,
unsigned NumCyclesF, unsigned ExtraPredCyclesF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool PredicateInstruction(MachineInstr *MI,
ArrayRef<MachineOperand> Pred) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 820f30b..b9f2eb5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -397,7 +397,7 @@ let mayLoad = 1, mayStore = 1 in
defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
// String moves.
-let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
//===----------------------------------------------------------------------===//
@@ -424,7 +424,7 @@ let hasSideEffects = 0 in {
def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
}
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
- def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>;
+ def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR32>;
// Match 32-to-64-bit sign extensions in which the source is already
// in a 64-bit register.
@@ -490,7 +490,7 @@ def : Pat<(and GR64:$src, 0xffffffff),
def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
Requires<[FeatureHighWord]>;
def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
-def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>,
Requires<[FeatureHighWord]>;
// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
@@ -498,7 +498,7 @@ def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
Requires<[FeatureHighWord]>;
def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
-def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>,
+def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>,
Requires<[FeatureHighWord]>;
def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
@@ -1147,7 +1147,7 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
Requires<[FeatureHighWord]>;
def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
- def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>,
+ def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GRH32, uimm32>,
Requires<[FeatureHighWord]>;
def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
@@ -1185,7 +1185,7 @@ let mayLoad = 1, Defs = [CC] in
defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
// String comparison.
-let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, Defs = [CC] in
defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
// Test under mask.
@@ -1459,9 +1459,29 @@ let usesCustomInserter = 1 in {
}
// Search a block of memory for a character.
-let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, Defs = [CC] in
defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
+// Other instructions for inline assembly
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2),
+ "stck\t$BD2",
+ []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STCKF : InstS<0xB27C, (outs), (ins bdaddr12only:$BD2),
+ "stckf\t$BD2",
+ []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STCKE : InstS<0xB278, (outs), (ins bdaddr12only:$BD2),
+ "stcke\t$BD2",
+ []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STFLE : InstS<0xB2B0, (outs), (ins bdaddr12only:$BD2),
+ "stfle\t$BD2",
+ []>;
+
+
+
//===----------------------------------------------------------------------===//
// Peepholes.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
index 00572d0..1a7c0d7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=//
+//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 34fc36d..f4a517b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index dc7bd25..6fd24e3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -69,8 +69,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Decompose the frame index into a base and offset.
int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
- unsigned BasePtr = getFrameRegister(MF);
- int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) +
+ unsigned BasePtr;
+ int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
MI->getOperand(FIOperandNum + 1).getImm());
// Special handling of dbg_value instructions.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 85aa0a6..0d8b08b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -282,4 +282,5 @@ def v128any : TypedReg<untyped, VR128>;
// The 2-bit condition code field of the PSW. Every register named in an
// inline asm needs a class associated with it.
def CC : SystemZReg<"cc">;
-def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
+let isAllocatable = 0 in
+ def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index d1a17c5..846edd5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -16,6 +16,8 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -35,19 +37,16 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
private:
- bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
- unsigned LLIxL, unsigned LLIxH);
+ bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH);
bool shortenOn0(MachineInstr &MI, unsigned Opcode);
bool shortenOn01(MachineInstr &MI, unsigned Opcode);
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
-
- // LowGPRs[I] has bit N set if LLVM register I includes the low
- // word of GPR N. HighGPRs is the same for the high word.
- unsigned LowGPRs[SystemZ::NUM_TARGET_REGS];
- unsigned HighGPRs[SystemZ::NUM_TARGET_REGS];
+ const TargetRegisterInfo *TRI;
+ LivePhysRegs LiveRegs;
};
char SystemZShortenInst::ID = 0;
@@ -58,33 +57,31 @@ FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
}
SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), LowGPRs(), HighGPRs() {
- // Set up LowGPRs and HighGPRs.
- for (unsigned I = 0; I < 16; ++I) {
- LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
- LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
- HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I;
- HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
- if (unsigned GR128 = SystemZMC::GR128Regs[I]) {
- LowGPRs[GR128] |= 3 << I;
- HighGPRs[GR128] |= 3 << I;
- }
- }
+ : MachineFunctionPass(ID), TII(nullptr) {}
+
+// Tie operands if MI has become a two-address instruction.
+static void tieOpsIfNeeded(MachineInstr &MI) {
+ if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ !MI.getOperand(0).isTied())
+ MI.tieOperands(0, 1);
}
// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
// are the halfword immediate loads for the same word. Try to use one of them
-// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high
-// words referenced by LLVM register X while LiveOther is the mask of low
-// words that are currently live, and vice versa.
-bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
- unsigned LiveOther, unsigned LLIxL,
- unsigned LLIxH) {
+// instead of IIxF.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI,
+ unsigned LLIxL, unsigned LLIxH) {
unsigned Reg = MI.getOperand(0).getReg();
- assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
- unsigned GPRs = GPRMap[Reg];
- assert(GPRs != 0 && "Register must be a GPR");
- if (GPRs & LiveOther)
+ // The new opcode will clear the other half of the GR64 reg, so
+ // cancel if that is live.
+ unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ?
+ SystemZ::subreg_h32 : SystemZ::subreg_l32);
+ unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ?
+ SystemZ::subreg_h32 : SystemZ::subreg_l32);
+ unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx,
+ &SystemZ::GR64BitRegClass);
+ unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
+ if (LiveRegs.contains(OtherReg))
return false;
uint64_t Imm = MI.getOperand(1).getImm();
@@ -123,12 +120,26 @@ bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
}
// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
-// 4-bit encoding and if operands 0 and 1 are tied.
+// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0
+// with op 1, if MI becomes 2-address.
bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
+ tieOpsIfNeeded(MI);
+ return true;
+ }
+ return false;
+}
+
+// Calls shortenOn001 if CCLive is false. CC def operand is added in
+// case of success.
+bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI,
+ unsigned Opcode) {
+ if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) {
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
return true;
}
return false;
@@ -164,35 +175,24 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- // Work out which words are live on exit from the block.
- unsigned LiveLow = 0;
- unsigned LiveHigh = 0;
- for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) {
- for (auto LI = (*SI)->livein_begin(), LE = (*SI)->livein_end();
- LI != LE; ++LI) {
- unsigned Reg = *LI;
- assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
- LiveLow |= LowGPRs[Reg];
- LiveHigh |= HighGPRs[Reg];
- }
- }
+ // Set up the set of live registers at the end of MBB (live out)
+ LiveRegs.clear();
+ LiveRegs.addLiveOuts(&MBB);
// Iterate backwards through the block looking for instructions to change.
for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
MachineInstr &MI = *MBBI;
switch (MI.getOpcode()) {
case SystemZ::IILF:
- Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
- SystemZ::LLILH);
+ Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH);
break;
case SystemZ::IIHF:
- Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
- SystemZ::LLIHH);
+ Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
break;
case SystemZ::WFADB:
- Changed |= shortenOn001(MI, SystemZ::ADBR);
+ Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;
case SystemZ::WFDDB:
@@ -216,15 +216,15 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
break;
case SystemZ::WFLCDB:
- Changed |= shortenOn01(MI, SystemZ::LCDBR);
+ Changed |= shortenOn01(MI, SystemZ::LCDFR);
break;
case SystemZ::WFLNDB:
- Changed |= shortenOn01(MI, SystemZ::LNDBR);
+ Changed |= shortenOn01(MI, SystemZ::LNDFR);
break;
case SystemZ::WFLPDB:
- Changed |= shortenOn01(MI, SystemZ::LPDBR);
+ Changed |= shortenOn01(MI, SystemZ::LPDFR);
break;
case SystemZ::WFSQDB:
@@ -232,7 +232,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
break;
case SystemZ::WFSDB:
- Changed |= shortenOn001(MI, SystemZ::SDBR);
+ Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
break;
case SystemZ::WFCDB:
@@ -257,33 +257,17 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
break;
}
- unsigned UsedLow = 0;
- unsigned UsedHigh = 0;
- for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
- if (MO.isReg()) {
- if (unsigned Reg = MO.getReg()) {
- assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
- if (MO.isDef()) {
- LiveLow &= ~LowGPRs[Reg];
- LiveHigh &= ~HighGPRs[Reg];
- } else if (!MO.isUndef()) {
- UsedLow |= LowGPRs[Reg];
- UsedHigh |= HighGPRs[Reg];
- }
- }
- }
- }
- LiveLow |= UsedLow;
- LiveHigh |= UsedHigh;
+ LiveRegs.stepBackward(MI);
}
return Changed;
}
bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ LiveRegs.init(TRI);
bool Changed = false;
for (auto &MBB : F)
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 00cbbd1..f305e85 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -16,6 +16,7 @@
using namespace llvm;
+extern cl::opt<bool> MISchedPostRA;
extern "C" void LLVMInitializeSystemZTarget() {
// Register the target.
RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
@@ -32,7 +33,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
VectorABI = false;
SmallVector<StringRef, 3> Features;
- FS.split(Features, ",", -1, false /* KeepEmpty */);
+ FS.split(Features, ',', -1, false /* KeepEmpty */);
for (auto &Feature : Features) {
if (Feature == "vector" || Feature == "+vector")
VectorABI = true;
@@ -130,6 +131,13 @@ void SystemZPassConfig::addPreSched2() {
}
void SystemZPassConfig::addPreEmitPass() {
+
+ // Do instruction shortening before compare elimination because some
+ // vector instructions will be shortened into opcodes that compare
+ // elimination recognizes.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
// We eliminate comparisons here rather than earlier because some
// transformations can change the set of available CC values and we
// generally want those transformations to have priority. This is
@@ -155,9 +163,17 @@ void SystemZPassConfig::addPreEmitPass() {
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
+
+ // Do final scheduling after all other optimizations, to get an
+ // optimal input for the decoder (branch relaxation must happen
+ // after block placement).
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (MISchedPostRA)
+ addPass(&PostMachineSchedulerID);
+ else
+ addPass(&PostRASchedulerID);
+ }
}
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -165,7 +181,7 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(SystemZTTIImpl(this, F));
});
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 0a81e1f..1a8f1f7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -43,6 +43,9 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ bool targetSchedulesPostRAScheduling() const override { return true; };
+
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 5a87df1..5ff5b21 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
-unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,8 +63,8 @@ unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -181,8 +181,8 @@ unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 4b80973..9ae736d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -28,7 +28,7 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
const SystemZTargetLowering *getTLI() const { return TLI; }
public:
- explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+ explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -42,12 +42,11 @@ public:
/// \name Scalar TTI Implementations
/// @{
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 19b5e2a..a0b0d8f 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,7 +43,6 @@ using namespace llvm;
void TargetLoweringObjectFile::Initialize(MCContext &ctx,
const TargetMachine &TM) {
Ctx = &ctx;
- DL = TM.getDataLayout();
InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(),
TM.getCodeModel(), *Ctx);
}
@@ -107,7 +106,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
assert(!Suffix.empty());
SmallString<60> NameStr;
- NameStr += DL->getPrivateGlobalPrefix();
+ NameStr += GV->getParent()->getDataLayout().getPrivateGlobalPrefix();
TM.getNameWithPrefix(NameStr, GV, Mang);
NameStr.append(Suffix.begin(), Suffix.end());
return Ctx->getOrCreateSymbol(NameStr);
@@ -120,7 +119,7 @@ MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
}
void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
- const TargetMachine &TM,
+ const DataLayout &,
const MCSymbol *Sym) const {
}
@@ -170,14 +169,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// If the initializer for the global contains something that requires a
// relocation, then we may have to drop this into a writable data section
// even though it is marked const.
- switch (C->getRelocationInfo()) {
- case Constant::NoRelocation:
+ if (!C->needsRelocation()) {
// If the global is required to have a unique address, it can't be put
// into a mergable section: just drop it into the general read-only
// section instead.
if (!GVar->hasUnnamedAddr())
return SectionKind::getReadOnly();
-
+
// If initializer is a null-terminated string, put it in a "cstring"
// section of the right width.
if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
@@ -200,7 +198,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Otherwise, just drop it into a mergable constant section. If we have
// a section for this size, use it, otherwise use the arbitrary sized
// mergable section.
- switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
+ switch (GV->getParent()->getDataLayout().getTypeAllocSize(C->getType())) {
case 4: return SectionKind::getMergeableConst4();
case 8: return SectionKind::getMergeableConst8();
case 16: return SectionKind::getMergeableConst16();
@@ -208,20 +206,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getReadOnly();
}
- case Constant::LocalRelocation:
- // In static relocation model, the linker will resolve all addresses, so
- // the relocation entries will actually be constants by the time the app
- // starts up. However, we can't put this into a mergable section, because
- // the linker doesn't take relocations into consideration when it tries to
- // merge entries in the section.
- if (ReloModel == Reloc::Static)
- return SectionKind::getReadOnly();
-
- // Otherwise, the dynamic linker needs to fix it up, put it in the
- // writable data.rel.local section.
- return SectionKind::getReadOnlyWithRelLocal();
-
- case Constant::GlobalRelocations:
+ } else {
// In static relocation model, the linker will resolve all addresses, so
// the relocation entries will actually be constants by the time the app
// starts up. However, we can't put this into a mergable section, because
@@ -242,17 +227,11 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// globals together onto fewer pages, improving the locality of the dynamic
// linker.
if (ReloModel == Reloc::Static)
- return SectionKind::getDataNoRel();
-
- switch (C->getRelocationInfo()) {
- case Constant::NoRelocation:
- return SectionKind::getDataNoRel();
- case Constant::LocalRelocation:
- return SectionKind::getDataRelLocal();
- case Constant::GlobalRelocations:
- return SectionKind::getDataRel();
- }
- llvm_unreachable("Invalid relocation");
+ return SectionKind::getData();
+
+ if (C->needsRelocation())
+ return SectionKind::getData();
+ return SectionKind::getData();
}
/// This method computes the appropriate section to emit the specified global
@@ -273,7 +252,8 @@ TargetLoweringObjectFile::SectionForGlobal(const GlobalValue *GV,
MCSection *TargetLoweringObjectFile::getSectionForJumpTable(
const Function &F, Mangler &Mang, const TargetMachine &TM) const {
- return getSectionForConstant(SectionKind::getReadOnly(), /*C=*/nullptr);
+ return getSectionForConstant(F.getParent()->getDataLayout(),
+ SectionKind::getReadOnly(), /*C=*/nullptr);
}
bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
@@ -296,9 +276,8 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
/// Given a mergable constant with the specified size and relocation
/// information, return a section that it should be placed in.
-MCSection *
-TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isReadOnly() && ReadOnlySection != nullptr)
return ReadOnlySection;
@@ -345,7 +324,7 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
}
void TargetLoweringObjectFile::getNameWithPrefix(
- SmallVectorImpl<char> &OutName, const GlobalValue *GV,
- bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
- Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ Mang.getNameWithPrefix(OutName, GV, /*CannotUsePrivateLabel=*/false);
}
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 83174c2..850c93c 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -150,24 +150,11 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
}
TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(F.getParent()->getDataLayout());
});
}
-static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
- const MCSection &Section) {
- if (!AsmInfo.isSectionAtomizableBySymbols(Section))
- return true;
-
- // If it is not dead stripped, it is safe to use private labels.
- const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
- if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
- return true;
-
- return false;
-}
-
void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
const GlobalValue *GV, Mangler &Mang,
bool MayAlwaysUsePrivate) const {
@@ -177,11 +164,8 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
Mang.getNameWithPrefix(Name, GV, false);
return;
}
- SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this);
const TargetLoweringObjectFile *TLOF = getObjFileLowering();
- const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this);
- bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
- TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this);
+ TLOF->getNameWithPrefix(Name, GV, Mang, *this);
}
MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 7199235..f82566c 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -32,17 +32,25 @@
using namespace llvm;
-inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
- return reinterpret_cast<TargetMachine*>(P);
+namespace llvm {
+// Friend to the TargetMachine, access legacy API that are made private in C++
+struct C_API_PRIVATE_ACCESS {
+ static const DataLayout &getDataLayout(const TargetMachine &T) {
+ return T.getDataLayout();
+ }
+};
+}
+
+static TargetMachine *unwrap(LLVMTargetMachineRef P) {
+ return reinterpret_cast<TargetMachine *>(P);
}
-inline Target *unwrap(LLVMTargetRef P) {
+static Target *unwrap(LLVMTargetRef P) {
return reinterpret_cast<Target*>(P);
}
-inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
- return
- reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
+static LLVMTargetMachineRef wrap(const TargetMachine *P) {
+ return reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine *>(P));
}
-inline LLVMTargetRef wrap(const Target * P) {
+static LLVMTargetRef wrap(const Target * P) {
return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
}
@@ -69,16 +77,16 @@ LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
char **ErrorMessage) {
std::string Error;
-
+
*T = wrap(TargetRegistry::lookupTarget(TripleStr, Error));
-
+
if (!*T) {
if (ErrorMessage)
*ErrorMessage = strdup(Error.c_str());
return 1;
}
-
+
return 0;
}
@@ -145,10 +153,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
CM, OL));
}
-
-void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) {
- delete unwrap(T);
-}
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }
LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
const Target* target = &(unwrap(T)->getTarget());
@@ -170,8 +175,9 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
return strdup(StringRep.c_str());
}
+/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
- return wrap(unwrap(T)->getDataLayout());
+ return wrap(&C_API_PRIVATE_ACCESS::getDataLayout(*unwrap(T)));
}
void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
@@ -190,14 +196,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
std::string error;
- const DataLayout *td = TM->getDataLayout();
-
- if (!td) {
- error = "No DataLayout in TargetMachine";
- *ErrorMessage = strdup(error.c_str());
- return true;
- }
- Mod->setDataLayout(*td);
+ Mod->setDataLayout(TM->createDataLayout());
TargetMachine::CodeGenFileType ft;
switch (codegen) {
@@ -239,7 +238,6 @@ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
SmallString<0> CodeString;
raw_svector_ostream OStream(CodeString);
bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
- OStream.flush();
StringRef Data = OStream.str();
*OutMemBuf =
diff --git a/contrib/llvm/lib/Target/TargetRecip.cpp b/contrib/llvm/lib/Target/TargetRecip.cpp
index 42bc487..d41b643 100644
--- a/contrib/llvm/lib/Target/TargetRecip.cpp
+++ b/contrib/llvm/lib/Target/TargetRecip.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
// the key strings for queries and command-line inputs.
// In addition, the command-line interface recognizes the global parameters
// "all", "none", and "default".
-static const char *RecipOps[] = {
+static const char *const RecipOps[] = {
"divd",
"divf",
"vec-divd",
@@ -46,7 +46,7 @@ TargetRecip::TargetRecip() {
RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
}
-static bool parseRefinementStep(const StringRef &In, size_t &Position,
+static bool parseRefinementStep(StringRef In, size_t &Position,
uint8_t &Value) {
const char RefStepToken = ':';
Position = In.find(RefStepToken);
@@ -175,7 +175,7 @@ TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
parseIndividualParams(Args);
}
-bool TargetRecip::isEnabled(const StringRef &Key) const {
+bool TargetRecip::isEnabled(StringRef Key) const {
ConstRecipIter Iter = RecipMap.find(Key);
assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
assert(Iter->second.Enabled != Uninitialized &&
@@ -183,7 +183,7 @@ bool TargetRecip::isEnabled(const StringRef &Key) const {
return Iter->second.Enabled;
}
-unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
+unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
ConstRecipIter Iter = RecipMap.find(Key);
assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
assert(Iter->second.RefinementSteps != Uninitialized &&
@@ -192,7 +192,7 @@ unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
}
/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
+void TargetRecip::setDefaults(StringRef Key, bool Enable,
unsigned RefSteps) {
if (Key == "all") {
for (auto &KV : RecipMap) {
@@ -213,7 +213,7 @@ void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
bool TargetRecip::operator==(const TargetRecip &Other) const {
for (const auto &KV : RecipMap) {
- const StringRef &Op = KV.first;
+ StringRef Op = KV.first;
const RecipParams &RP = KV.second;
const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
if (RP.RefinementSteps != OtherRP.RefinementSteps)
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index fbb985a..7ce3a00 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -13,7 +13,9 @@
//===----------------------------------------------------------------------===//
#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -21,11 +23,13 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include <cctype>
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+#include "WebAssemblyGenAsmWriter.inc"
+
WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
@@ -33,11 +37,93 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
- llvm_unreachable("TODO: implement printRegName");
+ assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
+ // Note that there's an implicit get_local/set_local here!
+ OS << "$" << RegNo;
}
void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot,
- const MCSubtargetInfo &STI) {
- llvm_unreachable("TODO: implement printInst");
+ const MCSubtargetInfo & /*STI*/) {
+ // Print the instruction (this uses the AsmStrings from the .td files).
+ printInstruction(MI, OS);
+
+ // Print any additional variadic operands.
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ if (Desc.isVariadic())
+ for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
+ if (i != 0)
+ OS << ", ";
+ printOperand(MI, i, OS);
+ }
+
+ // Print any added annotation.
+ printAnnotation(OS, Annot);
+}
+
+static std::string toString(const APFloat &FP) {
+ static const size_t BufBytes = 128;
+ char buf[BufBytes];
+ if (FP.isNaN())
+ assert((FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) ||
+ FP.bitwiseIsEqual(
+ APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) &&
+ "convertToHexString handles neither SNaN nor NaN payloads");
+ // Use C99's hexadecimal floating-point representation.
+ auto Written = FP.convertToHexString(
+ buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven);
+ (void)Written;
+ assert(Written != 0);
+ assert(Written < BufBytes);
+ return buf;
+}
+
+void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned WAReg = Op.getReg();
+ if (int(WAReg) >= 0)
+ printRegName(O, WAReg);
+ else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs())
+ O << "$pop" << (WAReg & INT32_MAX);
+ else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
+ O << "$push" << (WAReg & INT32_MAX);
+ else
+ O << "$discard";
+ // Add a '=' suffix if this is a def.
+ if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
+ O << '=';
+ } else if (Op.isImm()) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::PARAM:
+ case WebAssembly::RESULT:
+ case WebAssembly::LOCAL:
+ O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm()));
+ break;
+ default:
+ O << Op.getImm();
+ break;
+ }
+ } else if (Op.isFPImm())
+ O << toString(APFloat(Op.getFPImm()));
+ else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ Op.getExpr()->print(O, &MAI);
+ }
+}
+
+const char *llvm::WebAssembly::TypeToString(MVT Ty) {
+ switch (Ty.SimpleTy) {
+ case MVT::i32:
+ return "i32";
+ case MVT::i64:
+ return "i64";
+ case MVT::f32:
+ return "f32";
+ case MVT::f64:
+ return "f64";
+ default:
+ llvm_unreachable("unsupported type");
+ }
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 70fcef2..39a16f5 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -16,14 +16,13 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineValueType.h"
namespace llvm {
-class MCOperand;
class MCSubtargetInfo;
-class WebAssemblyInstPrinter : public MCInstPrinter {
+class WebAssemblyInstPrinter final : public MCInstPrinter {
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
@@ -31,8 +30,21 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
const MCSubtargetInfo &STI) override;
+
+ // Used by tblegen code.
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
};
+namespace WebAssembly {
+
+const char *TypeToString(MVT Ty);
+
+} // end namespace WebAssembly
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
new file mode 100644
index 0000000..b158ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -0,0 +1,103 @@
+//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyAsmBackend class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyAsmBackend final : public MCAsmBackend {
+ bool Is64Bit;
+
+public:
+ explicit WebAssemblyAsmBackend(bool Is64Bit)
+ : MCAsmBackend(), Is64Bit(Is64Bit) {}
+ ~WebAssemblyAsmBackend() override {}
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+ // No instruction requires relaxation
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ unsigned getNumFixupKinds() const override {
+ // We currently just use the generic fixups in MCFixup.h and don't have any
+ // target-specific fixups.
+ return 0;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {}
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const {
+ if (Count == 0)
+ return true;
+
+ // FIXME: Do something.
+ return false;
+}
+
+void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel) const {
+ const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
+ unsigned NumBytes = RoundUpToAlignment(Info.TargetSize, 8);
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+MCObjectWriter *
+WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
+ return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+}
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT,
+ StringRef CPU) {
+ return new WebAssemblyAsmBackend(TT.isArch64Bit());
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
new file mode 100644
index 0000000..c47a3d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -0,0 +1,54 @@
+//===-- WebAssemblyELFObjectWriter.cpp - WebAssembly ELF Writer -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file handles ELF-specific object emission, converting LLVM's
+/// internal fixups into the appropriate relocations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyELFObjectWriter final : public MCELFObjectTargetWriter {
+public:
+ WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+protected:
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
+};
+} // end anonymous namespace
+
+// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF
+// writing seriously, we should email generic-abi@googlegroups.com and ask
+// for our own ELF code.
+WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit,
+ uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE,
+ /*HasRelocationAddend=*/true) {}
+
+unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // FIXME: Do we need our own relocs?
+ return Fixup.getKind();
+}
+
+MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW =
+ new WebAssemblyELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 55346f7..d261779 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
- PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit();
+ PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
@@ -41,9 +41,6 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
COMMDirectiveAlignmentIsInBytes = false;
LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
- HasDotTypeDotSizeDirective = false;
- HasSingleParameterDotFile = false;
-
SupportsDebugInformation = true;
// For now, WebAssembly does not support exceptions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index d2b8fb7..2dcf2cd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -15,13 +15,13 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class Triple;
-class WebAssemblyMCAsmInfo final : public MCAsmInfo {
+class WebAssemblyMCAsmInfo final : public MCAsmInfoELF {
public:
explicit WebAssemblyMCAsmInfo(const Triple &T);
~WebAssemblyMCAsmInfo() override;
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
new file mode 100644
index 0000000..7c6c79e
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -0,0 +1,100 @@
+//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyMCCodeEmitter class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace {
+class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
+ const MCRegisterInfo &MRI;
+
+public:
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
+ MCContext &)
+ : MRI(mri) {}
+
+ ~WebAssemblyMCCodeEmitter() override {}
+
+ /// TableGen'erated function for getting the binary encoding for an
+ /// instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Return binary encoding of operand. If the machine operand requires
+ /// relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+};
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx);
+}
+
+unsigned WebAssemblyMCCodeEmitter::getMachineOpValue(
+ const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return MRI.getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ assert(MO.getExpr()->getKind() == MCExpr::SymbolRef);
+
+ assert(false && "FIXME: not implemented yet");
+
+ return 0;
+}
+
+void WebAssemblyMCCodeEmitter::encodeInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(false && "FIXME: not implemented yet");
+}
+
+// Encode WebAssembly Memory Operand
+uint64_t
+WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(false && "FIXME: not implemented yet");
+ return 0;
+}
+
+#include "WebAssemblyGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 224aa77..14cd295 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -26,25 +26,40 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mc-target-desc"
+#define GET_INSTRINFO_MC_DESC
+#include "WebAssemblyGenInstrInfo.inc"
+
#define GET_SUBTARGETINFO_MC_DESC
#include "WebAssemblyGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "WebAssemblyGenRegisterInfo.inc"
-static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
+static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/,
const Triple &TT) {
- MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
- return MAI;
+ return new WebAssemblyMCAsmInfo(TT);
+}
+
+static MCInstrInfo *createWebAssemblyMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitWebAssemblyMCInstrInfo(X);
+ return X;
+}
+
+static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
static MCInstPrinter *
-createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
+createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant,
const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI) {
- if (SyntaxVariant == 0 || SyntaxVariant == 1)
- return new WebAssemblyInstPrinter(MAI, MII, MRI);
- return nullptr;
+ assert(SyntaxVariant == 0);
+ return new WebAssemblyInstPrinter(MAI, MII, MRI);
}
// Force static initialization.
@@ -53,7 +68,19 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo);
+
+ // Register the object streamer
+ TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter);
+
+ // Register the ASM Backend
+ TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend);
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index eebf5b7..e78f73e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -16,7 +16,6 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
-#include <string>
namespace llvm {
@@ -34,13 +33,21 @@ class StringRef;
class Target;
class Triple;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheWebAssemblyTarget32;
extern Target TheWebAssemblyTarget64;
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
+ const Triple &TT, StringRef CPU);
+
+MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint8_t OSABI);
} // end namespace llvm
@@ -50,6 +57,11 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
#define GET_REGINFO_ENUM
#include "WebAssemblyGenRegisterInfo.inc"
+// Defines symbolic names for the WebAssembly instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
#define GET_SUBTARGETINFO_ENUM
#include "WebAssemblyGenSubtargetInfo.inc"
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index 63e02c4..b97ea45 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -12,6 +12,16 @@ binary encoding of WebAssembly itself:
* https://github.com/WebAssembly/design/blob/master/AstSemantics.md
* https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+The backend is built, tested and archived on the following waterfall:
+ https://build.chromium.org/p/client.wasm.llvm/console
+
+The backend's bringup is done using the GCC torture test suite first since it
+doesn't require C library support. Current known failures are in
+known_gcc_test_failures.txt, all other tests should pass. The waterfall will
+turn red if not. Once most of these pass, further testing will use LLVM's own
+test suite. The tests can be run locally using:
+ github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py
+
Interesting work that remains to be done:
* Write a pass to restructurize irreducible control flow. This needs to be done
before register allocation to be efficient, because it may duplicate basic
@@ -19,8 +29,60 @@ Interesting work that remains to be done:
level. Note that LLVM's GPU code has such a pass, but it linearizes control
flow (e.g. both sides of branches execute and are masked) which is undesirable
for WebAssembly.
-* Basic relooper to expose control flow as an AST.
-* Figure out how to properly use MC for virtual ISAs. This may require some
- refactoring of MC.
+
+//===---------------------------------------------------------------------===//
+
+set_local instructions have a return value. We should (a) model this,
+and (b) write optimizations which take advantage of it. Keep in mind that
+many set_local instructions are implicit!
+
+//===---------------------------------------------------------------------===//
+
+Br, br_if, and tableswitch instructions can support having a value on the
+expression stack across the jump (sometimes). We should (a) model this, and
+(b) extend the stackifier to utilize it.
+
+//===---------------------------------------------------------------------===//
+
+The min/max operators aren't exactly a<b?a:b because of NaN and negative zero
+behavior. The ARM target has the same kind of min/max instructions and has
+implemented optimizations for them; we should do similar optimizations for
+WebAssembly.
+
+//===---------------------------------------------------------------------===//
+
+AArch64 runs SeparateConstOffsetFromGEPPass, followed by EarlyCSE and LICM.
+Would these be useful to run for WebAssembly too? Also, it has an option to
+run SimplifyCFG after running the AtomicExpand pass. Would this be useful for
+us too?
+
+//===---------------------------------------------------------------------===//
+
+When is it profitable to set isAsCheapAsAMove on instructions in WebAssembly?
+
+//===---------------------------------------------------------------------===//
+
+Register stackification uses the EXPR_STACK physical register to impose
+ordering dependencies on instructions with stack operands. This is pessimistic;
+we should consider alternate ways to model stack dependencies.
+
+//===---------------------------------------------------------------------===//
+
+Lots of things could be done in WebAssemblyTargetTransformInfo.cpp. Similarly,
+there are numerous optimization-related hooks that can be overridden in
+WebAssemblyTargetLowering.
+
+//===---------------------------------------------------------------------===//
+
+Instead of the OptimizeReturned pass, which should consider preserving the
+"returned" attribute through to MachineInstrs and extending the StoreResults
+pass to do this optimization on calls too. That would also let the
+WebAssemblyPeephole pass clean up dead defs for such calls, as it does for
+stores.
+
+//===---------------------------------------------------------------------===//
+
+Memset/memcpy/memmove should be marked with the "returned" attribute somehow,
+even when they are translated through intrinsics.
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp
new file mode 100644
index 0000000..9b718ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp
@@ -0,0 +1,984 @@
+//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This implements the Relooper algorithm. This implementation includes
+/// optimizations added since the original academic paper [1] was published.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object
+/// oriented programming systems languages and applications companion
+/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
+///
+//===-------------------------------------------------------------------===//
+
+#include "Relooper.h"
+#include "WebAssembly.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <functional>
+#include <list>
+#include <stack>
+#include <string>
+
+#define DEBUG_TYPE "relooper"
+
+using namespace llvm;
+using namespace Relooper;
+
+static cl::opt<int> RelooperSplittingFactor(
+ "relooper-splitting-factor",
+ cl::desc(
+ "How much to discount code size when deciding whether to split a node"),
+ cl::init(5));
+
+static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
+ "relooper-multiple-switch-threshold",
+ cl::desc(
+ "How many entries to allow in a multiple before we use a switch"),
+ cl::init(10));
+
+static cl::opt<unsigned> RelooperNestingLimit(
+ "relooper-nesting-limit",
+ cl::desc(
+ "How much nesting is acceptable"),
+ cl::init(20));
+
+
+namespace {
+///
+/// Implements the relooper algorithm for a function's blocks.
+///
+/// Implementation details: The Relooper instance has
+/// ownership of the blocks and shapes, and frees them when done.
+///
+struct RelooperAlgorithm {
+ std::deque<Block *> Blocks;
+ std::deque<Shape *> Shapes;
+ Shape *Root;
+ bool MinSize;
+ int BlockIdCounter;
+ int ShapeIdCounter;
+
+ RelooperAlgorithm();
+ ~RelooperAlgorithm();
+
+ void AddBlock(Block *New, int Id = -1);
+
+ // Calculates the shapes
+ void Calculate(Block *Entry);
+
+ // Sets us to try to minimize size
+ void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
+};
+
+struct RelooperAnalysis final : public FunctionPass {
+ static char ID;
+ RelooperAnalysis() : FunctionPass(ID) {}
+ const char *getPassName() const override { return "relooper"; }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnFunction(Function &F) override;
+};
+}
+
+// RelooperAnalysis
+
+char RelooperAnalysis::ID = 0;
+FunctionPass *llvm::createWebAssemblyRelooper() {
+ return new RelooperAnalysis();
+}
+
+bool RelooperAnalysis::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
+ RelooperAlgorithm R;
+ // FIXME: remove duplication between relooper's and LLVM's BBs.
+ std::map<const BasicBlock *, Block *> BB2B;
+ std::map<const Block *, const BasicBlock *> B2BB;
+ for (const BasicBlock &BB : F) {
+ // FIXME: getName is wrong here, Code is meant to represent amount of code.
+ // FIXME: use BranchVarInit for switch.
+ Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
+ R.AddBlock(B);
+ assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
+ assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
+ BB2B[&BB] = B;
+ B2BB[B] = &BB;
+ }
+ for (Block *B : R.Blocks) {
+ const BasicBlock *BB = B2BB[B];
+ for (const BasicBlock *Successor : successors(BB))
+ // FIXME: add branch's Condition and Code below.
+ B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
+ }
+ R.Calculate(BB2B[&F.getEntryBlock()]);
+ return false; // Analysis passes don't modify anything.
+}
+
+// Helpers
+
+typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
+typedef std::list<Block *> BlockList;
+
+template <class T, class U>
+static bool contains(const T &container, const U &contained) {
+ return container.count(contained);
+}
+
+
+// Branch
+
+Branch::Branch(const char *ConditionInit, const char *CodeInit)
+ : Ancestor(nullptr), Labeled(true) {
+ // FIXME: move from char* to LLVM data structures
+ Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
+ Code = CodeInit ? strdup(CodeInit) : nullptr;
+}
+
+Branch::~Branch() {
+ // FIXME: move from char* to LLVM data structures
+ free(static_cast<void *>(const_cast<char *>(Condition)));
+ free(static_cast<void *>(const_cast<char *>(Code)));
+}
+
+// Block
+
+Block::Block(const char *CodeInit, const char *BranchVarInit)
+ : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
+ // FIXME: move from char* to LLVM data structures
+ Code = strdup(CodeInit);
+ BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
+}
+
+Block::~Block() {
+ // FIXME: move from char* to LLVM data structures
+ free(static_cast<void *>(const_cast<char *>(Code)));
+ free(static_cast<void *>(const_cast<char *>(BranchVar)));
+}
+
+void Block::AddBranchTo(Block *Target, const char *Condition,
+ const char *Code) {
+ assert(!contains(BranchesOut, Target) &&
+ "cannot add more than one branch to the same target");
+ BranchesOut[Target] = make_unique<Branch>(Condition, Code);
+}
+
+// Relooper
+
+RelooperAlgorithm::RelooperAlgorithm()
+ : Root(nullptr), MinSize(false), BlockIdCounter(1),
+ ShapeIdCounter(0) { // block ID 0 is reserved for clearings
+}
+
+RelooperAlgorithm::~RelooperAlgorithm() {
+ for (auto Curr : Blocks)
+ delete Curr;
+ for (auto Curr : Shapes)
+ delete Curr;
+}
+
+void RelooperAlgorithm::AddBlock(Block *New, int Id) {
+ New->Id = Id == -1 ? BlockIdCounter++ : Id;
+ Blocks.push_back(New);
+}
+
+struct RelooperRecursor {
+ RelooperAlgorithm *Parent;
+ RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
+};
+
+void RelooperAlgorithm::Calculate(Block *Entry) {
+ // Scan and optimize the input
+ struct PreOptimizer : public RelooperRecursor {
+ PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
+ BlockSet Live;
+
+ void FindLive(Block *Root) {
+ BlockList ToInvestigate;
+ ToInvestigate.push_back(Root);
+ while (!ToInvestigate.empty()) {
+ Block *Curr = ToInvestigate.front();
+ ToInvestigate.pop_front();
+ if (contains(Live, Curr))
+ continue;
+ Live.insert(Curr);
+ for (const auto &iter : Curr->BranchesOut)
+ ToInvestigate.push_back(iter.first);
+ }
+ }
+
+ // If a block has multiple entries but no exits, and it is small enough, it
+ // is useful to split it. A common example is a C++ function where
+ // everything ends up at a final exit block and does some RAII cleanup.
+ // Without splitting, we will be forced to introduce labelled loops to
+ // allow reaching the final block
+ void SplitDeadEnds() {
+ unsigned TotalCodeSize = 0;
+ for (const auto &Curr : Live) {
+ TotalCodeSize += strlen(Curr->Code);
+ }
+ BlockSet Splits;
+ BlockSet Removed;
+ for (const auto &Original : Live) {
+ if (Original->BranchesIn.size() <= 1 ||
+ !Original->BranchesOut.empty())
+ continue; // only dead ends, for now
+ if (contains(Original->BranchesOut, Original))
+ continue; // cannot split a looping node
+ if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
+ TotalCodeSize / RelooperSplittingFactor)
+ continue; // if splitting increases raw code size by a significant
+ // amount, abort
+ // Split the node (for simplicity, we replace all the blocks, even
+ // though we could have reused the original)
+ DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n");
+ for (const auto &Prior : Original->BranchesIn) {
+ Block *Split = new Block(Original->Code, Original->BranchVar);
+ Parent->AddBlock(Split, Original->Id);
+ Split->BranchesIn.insert(Prior);
+ std::unique_ptr<Branch> Details;
+ Details.swap(Prior->BranchesOut[Original]);
+ Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
+ Details->Code);
+ for (const auto &iter : Original->BranchesOut) {
+ Block *Post = iter.first;
+ Branch *Details = iter.second.get();
+ Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
+ Details->Code);
+ Post->BranchesIn.insert(Split);
+ }
+ Splits.insert(Split);
+ Removed.insert(Original);
+ }
+ for (const auto &iter : Original->BranchesOut) {
+ Block *Post = iter.first;
+ Post->BranchesIn.remove(Original);
+ }
+ }
+ for (const auto &iter : Splits)
+ Live.insert(iter);
+ for (const auto &iter : Removed)
+ Live.remove(iter);
+ }
+ };
+ PreOptimizer Pre(this);
+ Pre.FindLive(Entry);
+
+ // Add incoming branches from live blocks, ignoring dead code
+ for (unsigned i = 0; i < Blocks.size(); i++) {
+ Block *Curr = Blocks[i];
+ if (!contains(Pre.Live, Curr))
+ continue;
+ for (const auto &iter : Curr->BranchesOut)
+ iter.first->BranchesIn.insert(Curr);
+ }
+
+ if (!MinSize)
+ Pre.SplitDeadEnds();
+
+ // Recursively process the graph
+
+ struct Analyzer : public RelooperRecursor {
+ Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
+
+ // Add a shape to the list of shapes in this Relooper calculation
+ void Notice(Shape *New) {
+ New->Id = Parent->ShapeIdCounter++;
+ Parent->Shapes.push_back(New);
+ }
+
+ // Create a list of entries from a block. If LimitTo is provided, only
+ // results in that set will appear
+ void GetBlocksOut(Block *Source, BlockSet &Entries,
+ BlockSet *LimitTo = nullptr) {
+ for (const auto &iter : Source->BranchesOut)
+ if (!LimitTo || contains(*LimitTo, iter.first))
+ Entries.insert(iter.first);
+ }
+
+ // Converts/processes all branchings to a specific target
+ void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
+ BlockSet &From) {
+ DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type
+ << "\n");
+ for (auto iter = Target->BranchesIn.begin();
+ iter != Target->BranchesIn.end();) {
+ Block *Prior = *iter;
+ if (!contains(From, Prior)) {
+ iter++;
+ continue;
+ }
+ std::unique_ptr<Branch> PriorOut;
+ PriorOut.swap(Prior->BranchesOut[Target]);
+ PriorOut->Ancestor = Ancestor;
+ PriorOut->Type = Type;
+ if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
+ Multiple->Breaks++; // We are breaking out of this Multiple, so need a
+ // loop
+ iter++; // carefully increment iter before erasing
+ Target->BranchesIn.remove(Prior);
+ Target->ProcessedBranchesIn.insert(Prior);
+ Prior->ProcessedBranchesOut[Target].swap(PriorOut);
+ }
+ }
+
+ Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
+ DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n");
+ SimpleShape *Simple = new SimpleShape;
+ Notice(Simple);
+ Simple->Inner = Inner;
+ Inner->Parent = Simple;
+ if (Blocks.size() > 1) {
+ Blocks.remove(Inner);
+ GetBlocksOut(Inner, NextEntries, &Blocks);
+ BlockSet JustInner;
+ JustInner.insert(Inner);
+ for (const auto &iter : NextEntries)
+ Solipsize(iter, Branch::Direct, Simple, JustInner);
+ }
+ return Simple;
+ }
+
+ Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
+ BlockSet &NextEntries) {
+ // Find the inner blocks in this loop. Proceed backwards from the entries
+ // until
+ // you reach a seen block, collecting as you go.
+ BlockSet InnerBlocks;
+ BlockSet Queue = Entries;
+ while (!Queue.empty()) {
+ Block *Curr = *(Queue.begin());
+ Queue.remove(*Queue.begin());
+ if (!contains(InnerBlocks, Curr)) {
+ // This element is new, mark it as inner and remove from outer
+ InnerBlocks.insert(Curr);
+ Blocks.remove(Curr);
+ // Add the elements prior to it
+ for (const auto &iter : Curr->BranchesIn)
+ Queue.insert(iter);
+ }
+ }
+ assert(!InnerBlocks.empty());
+
+ for (const auto &Curr : InnerBlocks) {
+ for (const auto &iter : Curr->BranchesOut) {
+ Block *Possible = iter.first;
+ if (!contains(InnerBlocks, Possible))
+ NextEntries.insert(Possible);
+ }
+ }
+
+ LoopShape *Loop = new LoopShape();
+ Notice(Loop);
+
+ // Solipsize the loop, replacing with break/continue and marking branches
+ // as Processed (will not affect later calculations)
+ // A. Branches to the loop entries become a continue to this shape
+ for (const auto &iter : Entries)
+ Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
+ // B. Branches to outside the loop (a next entry) become breaks on this
+ // shape
+ for (const auto &iter : NextEntries)
+ Solipsize(iter, Branch::Break, Loop, InnerBlocks);
+ // Finish up
+ Shape *Inner = Process(InnerBlocks, Entries, nullptr);
+ Loop->Inner = Inner;
+ return Loop;
+ }
+
+ // For each entry, find the independent group reachable by it. The
+ // independent group is the entry itself, plus all the blocks it can
+ // reach that cannot be directly reached by another entry. Note that we
+ // ignore directly reaching the entry itself by another entry.
+ // @param Ignore - previous blocks that are irrelevant
+ void FindIndependentGroups(BlockSet &Entries,
+ BlockBlockSetMap &IndependentGroups,
+ BlockSet *Ignore = nullptr) {
+ typedef std::map<Block *, Block *> BlockBlockMap;
+
+ struct HelperClass {
+ BlockBlockSetMap &IndependentGroups;
+ BlockBlockMap Ownership; // For each block, which entry it belongs to.
+ // We have reached it from there.
+
+ HelperClass(BlockBlockSetMap &IndependentGroupsInit)
+ : IndependentGroups(IndependentGroupsInit) {}
+ void InvalidateWithChildren(Block *New) {
+ // Being in the list means you need to be invalidated
+ BlockList ToInvalidate;
+ ToInvalidate.push_back(New);
+ while (!ToInvalidate.empty()) {
+ Block *Invalidatee = ToInvalidate.front();
+ ToInvalidate.pop_front();
+ Block *Owner = Ownership[Invalidatee];
+ // Owner may have been invalidated, do not add to
+ // IndependentGroups!
+ if (contains(IndependentGroups, Owner))
+ IndependentGroups[Owner].remove(Invalidatee);
+ if (Ownership[Invalidatee]) { // may have been seen before and
+ // invalidated already
+ Ownership[Invalidatee] = nullptr;
+ for (const auto &iter : Invalidatee->BranchesOut) {
+ Block *Target = iter.first;
+ BlockBlockMap::iterator Known = Ownership.find(Target);
+ if (Known != Ownership.end()) {
+ Block *TargetOwner = Known->second;
+ if (TargetOwner)
+ ToInvalidate.push_back(Target);
+ }
+ }
+ }
+ }
+ }
+ };
+ HelperClass Helper(IndependentGroups);
+
+ // We flow out from each of the entries, simultaneously.
+ // When we reach a new block, we add it as belonging to the one we got to
+ // it from.
+ // If we reach a new block that is already marked as belonging to someone,
+ // it is reachable by two entries and is not valid for any of them.
+ // Remove it and all it can reach that have been visited.
+
+ // Being in the queue means we just added this item, and
+ // we need to add its children
+ BlockList Queue;
+ for (const auto &Entry : Entries) {
+ Helper.Ownership[Entry] = Entry;
+ IndependentGroups[Entry].insert(Entry);
+ Queue.push_back(Entry);
+ }
+ while (!Queue.empty()) {
+ Block *Curr = Queue.front();
+ Queue.pop_front();
+ Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
+ // map if we are in the queue
+ if (!Owner)
+ continue; // we have been invalidated meanwhile after being reached
+ // from two entries
+ // Add all children
+ for (const auto &iter : Curr->BranchesOut) {
+ Block *New = iter.first;
+ BlockBlockMap::iterator Known = Helper.Ownership.find(New);
+ if (Known == Helper.Ownership.end()) {
+ // New node. Add it, and put it in the queue
+ Helper.Ownership[New] = Owner;
+ IndependentGroups[Owner].insert(New);
+ Queue.push_back(New);
+ continue;
+ }
+ Block *NewOwner = Known->second;
+ if (!NewOwner)
+ continue; // We reached an invalidated node
+ if (NewOwner != Owner)
+ // Invalidate this and all reachable that we have seen - we reached
+ // this from two locations
+ Helper.InvalidateWithChildren(New);
+ // otherwise, we have the same owner, so do nothing
+ }
+ }
+
+ // Having processed all the interesting blocks, we remain with just one
+ // potential issue:
+ // If a->b, and a was invalidated, but then b was later reached by
+ // someone else, we must invalidate b. To check for this, we go over all
+ // elements in the independent groups, if an element has a parent which
+ // does *not* have the same owner, we/ must remove it and all its
+ // children.
+
+ for (const auto &iter : Entries) {
+ BlockSet &CurrGroup = IndependentGroups[iter];
+ BlockList ToInvalidate;
+ for (const auto &iter : CurrGroup) {
+ Block *Child = iter;
+ for (const auto &iter : Child->BranchesIn) {
+ Block *Parent = iter;
+ if (Ignore && contains(*Ignore, Parent))
+ continue;
+ if (Helper.Ownership[Parent] != Helper.Ownership[Child])
+ ToInvalidate.push_back(Child);
+ }
+ }
+ while (!ToInvalidate.empty()) {
+ Block *Invalidatee = ToInvalidate.front();
+ ToInvalidate.pop_front();
+ Helper.InvalidateWithChildren(Invalidatee);
+ }
+ }
+
+ // Remove empty groups
+ for (const auto &iter : Entries)
+ if (IndependentGroups[iter].empty())
+ IndependentGroups.erase(iter);
+ }
+
+ Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
+ BlockBlockSetMap &IndependentGroups, Shape *Prev,
+ BlockSet &NextEntries) {
+ bool Fused = isa<SimpleShape>(Prev);
+ MultipleShape *Multiple = new MultipleShape();
+ Notice(Multiple);
+ BlockSet CurrEntries;
+ for (auto &iter : IndependentGroups) {
+ Block *CurrEntry = iter.first;
+ BlockSet &CurrBlocks = iter.second;
+ // Create inner block
+ CurrEntries.clear();
+ CurrEntries.insert(CurrEntry);
+ for (const auto &CurrInner : CurrBlocks) {
+ // Remove the block from the remaining blocks
+ Blocks.remove(CurrInner);
+ // Find new next entries and fix branches to them
+ for (auto iter = CurrInner->BranchesOut.begin();
+ iter != CurrInner->BranchesOut.end();) {
+ Block *CurrTarget = iter->first;
+ auto Next = iter;
+ Next++;
+ if (!contains(CurrBlocks, CurrTarget)) {
+ NextEntries.insert(CurrTarget);
+ Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
+ }
+ iter = Next; // increment carefully because Solipsize can remove us
+ }
+ }
+ Multiple->InnerMap[CurrEntry->Id] =
+ Process(CurrBlocks, CurrEntries, nullptr);
+ // If we are not fused, then our entries will actually be checked
+ if (!Fused)
+ CurrEntry->IsCheckedMultipleEntry = true;
+ }
+ // Add entries not handled as next entries, they are deferred
+ for (const auto &Entry : Entries)
+ if (!contains(IndependentGroups, Entry))
+ NextEntries.insert(Entry);
+ // The multiple has been created, we can decide how to implement it
+ if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
+ Multiple->UseSwitch = true;
+ Multiple->Breaks++; // switch captures breaks
+ }
+ return Multiple;
+ }
+
+ // Main function.
+ // Process a set of blocks with specified entries, returns a shape
+ // The Make* functions receive a NextEntries. If they fill it with data,
+ // those are the entries for the ->Next block on them, and the blocks
+ // are what remains in Blocks (which Make* modify). In this way
+ // we avoid recursing on Next (imagine a long chain of Simples, if we
+ // recursed we could blow the stack).
+ Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
+ BlockSet *Entries = &InitialEntries;
+ BlockSet TempEntries[2];
+ int CurrTempIndex = 0;
+ BlockSet *NextEntries;
+ Shape *Ret = nullptr;
+
+ auto Make = [&](Shape *Temp) {
+ if (Prev)
+ Prev->Next = Temp;
+ if (!Ret)
+ Ret = Temp;
+ Prev = Temp;
+ Entries = NextEntries;
+ };
+
+ while (1) {
+ CurrTempIndex = 1 - CurrTempIndex;
+ NextEntries = &TempEntries[CurrTempIndex];
+ NextEntries->clear();
+
+ if (Entries->empty())
+ return Ret;
+ if (Entries->size() == 1) {
+ Block *Curr = *(Entries->begin());
+ if (Curr->BranchesIn.empty()) {
+ // One entry, no looping ==> Simple
+ Make(MakeSimple(Blocks, Curr, *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+ // One entry, looping ==> Loop
+ Make(MakeLoop(Blocks, *Entries, *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+
+ // More than one entry, try to eliminate through a Multiple groups of
+ // independent blocks from an entry/ies. It is important to remove
+ // through multiples as opposed to looping since the former is more
+ // performant.
+ BlockBlockSetMap IndependentGroups;
+ FindIndependentGroups(*Entries, IndependentGroups);
+
+ if (!IndependentGroups.empty()) {
+ // We can handle a group in a multiple if its entry cannot be reached
+ // by another group.
+ // Note that it might be reachable by itself - a loop. But that is
+ // fine, we will create a loop inside the multiple block (which
+ // is the performant order to do it).
+ for (auto iter = IndependentGroups.begin();
+ iter != IndependentGroups.end();) {
+ Block *Entry = iter->first;
+ BlockSet &Group = iter->second;
+ auto curr = iter++; // iterate carefully, we may delete
+ for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
+ iterBranch != Entry->BranchesIn.end(); iterBranch++) {
+ Block *Origin = *iterBranch;
+ if (!contains(Group, Origin)) {
+ // Reached from outside the group, so we cannot handle this
+ IndependentGroups.erase(curr);
+ break;
+ }
+ }
+ }
+
+ // As an optimization, if we have 2 independent groups, and one is a
+ // small dead end, we can handle only that dead end.
+ // The other then becomes a Next - without nesting in the code and
+ // recursion in the analysis.
+ // TODO: if the larger is the only dead end, handle that too
+ // TODO: handle >2 groups
+ // TODO: handle not just dead ends, but also that do not branch to the
+ // NextEntries. However, must be careful there since we create a
+ // Next, and that Next can prevent eliminating a break (since we no
+ // longer naturally reach the same place), which may necessitate a
+ // one-time loop, which makes the unnesting pointless.
+ if (IndependentGroups.size() == 2) {
+ // Find the smaller one
+ auto iter = IndependentGroups.begin();
+ Block *SmallEntry = iter->first;
+ auto SmallSize = iter->second.size();
+ iter++;
+ Block *LargeEntry = iter->first;
+ auto LargeSize = iter->second.size();
+ if (SmallSize != LargeSize) { // ignore the case where they are
+ // identical - keep things symmetrical
+ // there
+ if (SmallSize > LargeSize) {
+ Block *Temp = SmallEntry;
+ SmallEntry = LargeEntry;
+ LargeEntry = Temp; // Note: we did not flip the Sizes too, they
+ // are now invalid. TODO: use the smaller
+ // size as a limit?
+ }
+ // Check if dead end
+ bool DeadEnd = true;
+ BlockSet &SmallGroup = IndependentGroups[SmallEntry];
+ for (const auto &Curr : SmallGroup) {
+ for (const auto &iter : Curr->BranchesOut) {
+ Block *Target = iter.first;
+ if (!contains(SmallGroup, Target)) {
+ DeadEnd = false;
+ break;
+ }
+ }
+ if (!DeadEnd)
+ break;
+ }
+ if (DeadEnd)
+ IndependentGroups.erase(LargeEntry);
+ }
+ }
+
+ if (!IndependentGroups.empty())
+ // Some groups removable ==> Multiple
+ Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
+ *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+ // No independent groups, must be loopable ==> Loop
+ Make(MakeLoop(Blocks, *Entries, *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+ }
+ };
+
+ // Main
+
+ BlockSet AllBlocks;
+ for (const auto &Curr : Pre.Live) {
+ AllBlocks.insert(Curr);
+ }
+
+ BlockSet Entries;
+ Entries.insert(Entry);
+ Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
+ assert(Root);
+
+ ///
+ /// Relooper post-optimizer
+ ///
+ struct PostOptimizer {
+ RelooperAlgorithm *Parent;
+ std::stack<Shape *> LoopStack;
+
+ PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
+
+ void ShapeSwitch(Shape* var,
+ std::function<void (SimpleShape*)> simple,
+ std::function<void (MultipleShape*)> multiple,
+ std::function<void (LoopShape*)> loop) {
+ switch (var->getKind()) {
+ case Shape::SK_Simple: {
+ simple(cast<SimpleShape>(var));
+ break;
+ }
+ case Shape::SK_Multiple: {
+ multiple(cast<MultipleShape>(var));
+ break;
+ }
+ case Shape::SK_Loop: {
+ loop(cast<LoopShape>(var));
+ break;
+ }
+ }
+ }
+
+ // Find the blocks that natural control flow can get us directly to, or
+ // through a multiple that we ignore
+ void FollowNaturalFlow(Shape *S, BlockSet &Out) {
+ ShapeSwitch(S, [&](SimpleShape* Simple) {
+ Out.insert(Simple->Inner);
+ }, [&](MultipleShape* Multiple) {
+ for (const auto &iter : Multiple->InnerMap) {
+ FollowNaturalFlow(iter.second, Out);
+ }
+ FollowNaturalFlow(Multiple->Next, Out);
+ }, [&](LoopShape* Loop) {
+ FollowNaturalFlow(Loop->Inner, Out);
+ });
+ }
+
+ void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
+ if (Root->Next) {
+ Root->Natural = Root->Next;
+ FindNaturals(Root->Next, Otherwise);
+ } else {
+ Root->Natural = Otherwise;
+ }
+
+ ShapeSwitch(Root, [](SimpleShape* Simple) {
+ }, [&](MultipleShape* Multiple) {
+ for (const auto &iter : Multiple->InnerMap) {
+ FindNaturals(iter.second, Root->Natural);
+ }
+ }, [&](LoopShape* Loop){
+ FindNaturals(Loop->Inner, Loop->Inner);
+ });
+ }
+
+ // Remove unneeded breaks and continues.
+ // A flow operation is trivially unneeded if the shape we naturally get to
+ // by normal code execution is the same as the flow forces us to.
+ void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
+ LoopShape *LastLoop = nullptr,
+ unsigned Depth = 0) {
+ BlockSet NaturalBlocks;
+ FollowNaturalFlow(Natural, NaturalBlocks);
+ Shape *Next = Root;
+ while (Next) {
+ Root = Next;
+ Next = nullptr;
+ ShapeSwitch(
+ Root,
+ [&](SimpleShape* Simple) {
+ if (Simple->Inner->BranchVar)
+ LastLoop =
+ nullptr; // a switch clears out the loop (TODO: only for
+ // breaks, not continue)
+
+ if (Simple->Next) {
+ if (!Simple->Inner->BranchVar &&
+ Simple->Inner->ProcessedBranchesOut.size() == 2 &&
+ Depth < RelooperNestingLimit) {
+ // If there is a next block, we already know at Simple
+ // creation time to make direct branches, and we can do
+ // nothing more in general. But, we try to optimize the
+ // case of a break and a direct: This would normally be
+ // if (break?) { break; } ..
+ // but if we make sure to nest the else, we can save the
+ // break,
+ // if (!break?) { .. }
+ // This is also better because the more canonical nested
+ // form is easier to further optimize later. The
+ // downside is more nesting, which adds to size in builds with
+ // whitespace.
+ // Note that we avoid switches, as it complicates control flow
+ // and is not relevant for the common case we optimize here.
+ bool Found = false;
+ bool Abort = false;
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Block *Target = iter.first;
+ Branch *Details = iter.second.get();
+ if (Details->Type == Branch::Break) {
+ Found = true;
+ if (!contains(NaturalBlocks, Target))
+ Abort = true;
+ } else if (Details->Type != Branch::Direct)
+ Abort = true;
+ }
+ if (Found && !Abort) {
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Branch *Details = iter.second.get();
+ if (Details->Type == Branch::Break) {
+ Details->Type = Branch::Direct;
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor))
+ Multiple->Breaks--;
+ } else {
+ assert(Details->Type == Branch::Direct);
+ Details->Type = Branch::Nested;
+ }
+ }
+ }
+ Depth++; // this optimization increases depth, for us and all
+ // our next chain (i.e., until this call returns)
+ }
+ Next = Simple->Next;
+ } else {
+ // If there is no next then Natural is where we will
+ // go to by doing nothing, so we can potentially optimize some
+ // branches to direct.
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Block *Target = iter.first;
+ Branch *Details = iter.second.get();
+ if (Details->Type != Branch::Direct &&
+ contains(NaturalBlocks,
+ Target)) { // note: cannot handle split blocks
+ Details->Type = Branch::Direct;
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor))
+ Multiple->Breaks--;
+ } else if (Details->Type == Branch::Break && LastLoop &&
+ LastLoop->Natural == Details->Ancestor->Natural) {
+ // it is important to simplify breaks, as simpler breaks
+ // enable other optimizations
+ Details->Labeled = false;
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor))
+ Multiple->Breaks--;
+ }
+ }
+ }
+ }, [&](MultipleShape* Multiple)
+ {
+ for (const auto &iter : Multiple->InnerMap) {
+ RemoveUnneededFlows(iter.second, Multiple->Next,
+ Multiple->Breaks ? nullptr : LastLoop,
+ Depth + 1);
+ }
+ Next = Multiple->Next;
+ }, [&](LoopShape* Loop)
+ {
+ RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
+ Next = Loop->Next;
+ });
+ }
+ }
+
+ // After we know which loops exist, we can calculate which need to be
+ // labeled
+ void FindLabeledLoops(Shape *Root) {
+ Shape *Next = Root;
+ while (Next) {
+ Root = Next;
+ Next = nullptr;
+
+ ShapeSwitch(
+ Root,
+ [&](SimpleShape *Simple) {
+ MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
+ // If we are fusing a Multiple with a loop into this Simple, then
+ // visit it now
+ if (Fused && Fused->Breaks)
+ LoopStack.push(Fused);
+ if (Simple->Inner->BranchVar)
+ LoopStack.push(nullptr); // a switch means breaks are now useless,
+ // push a dummy
+ if (Fused) {
+ if (Fused->UseSwitch)
+ LoopStack.push(nullptr); // a switch means breaks are now
+ // useless, push a dummy
+ for (const auto &iter : Fused->InnerMap) {
+ FindLabeledLoops(iter.second);
+ }
+ }
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Branch *Details = iter.second.get();
+ if (Details->Type == Branch::Break ||
+ Details->Type == Branch::Continue) {
+ assert(!LoopStack.empty());
+ if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor)) {
+ Multiple->Labeled = true;
+ } else {
+ LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
+ Loop->Labeled = true;
+ }
+ } else {
+ Details->Labeled = false;
+ }
+ }
+ if (Fused && Fused->UseSwitch)
+ LoopStack.pop();
+ if (Simple->Inner->BranchVar)
+ LoopStack.pop();
+ if (Fused && Fused->Breaks)
+ LoopStack.pop();
+ if (Fused)
+ Next = Fused->Next;
+ else
+ Next = Root->Next;
+ }
+ }
+ , [&](MultipleShape* Multiple) {
+ if (Multiple->Breaks)
+ LoopStack.push(Multiple);
+ for (const auto &iter : Multiple->InnerMap)
+ FindLabeledLoops(iter.second);
+ if (Multiple->Breaks)
+ LoopStack.pop();
+ Next = Root->Next;
+ }
+ , [&](LoopShape* Loop) {
+ LoopStack.push(Loop);
+ FindLabeledLoops(Loop->Inner);
+ LoopStack.pop();
+ Next = Root->Next;
+ });
+ }
+ }
+
+ void Process(Shape * Root) {
+ FindNaturals(Root);
+ RemoveUnneededFlows(Root);
+ FindLabeledLoops(Root);
+ }
+ };
+
+ PostOptimizer(this).Process(Root);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.h b/contrib/llvm/lib/Target/WebAssembly/Relooper.h
new file mode 100644
index 0000000..7c564de
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.h
@@ -0,0 +1,186 @@
+//===-- Relooper.h - Top-level interface for WebAssembly ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This defines an optimized C++ implemention of the Relooper
+/// algorithm, originally developed as part of Emscripten, which
+/// generates a structured AST from arbitrary control flow.
+///
+//===-------------------------------------------------------------------===//
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Casting.h"
+
+#include <cassert>
+#include <cstdarg>
+#include <cstdio>
+#include <deque>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+
+namespace llvm {
+
+namespace Relooper {
+
+struct Block;
+struct Shape;
+
+///
+/// Info about a branching from one block to another
+///
+struct Branch {
+ enum FlowType {
+ Direct = 0, // We will directly reach the right location through other
+ // means, no need for continue or break
+ Break = 1,
+ Continue = 2,
+ Nested = 3 // This code is directly reached, but we must be careful to
+ // ensure it is nested in an if - it is not reached
+ // unconditionally, other code paths exist alongside it that we need to make
+ // sure do not intertwine
+ };
+ Shape
+ *Ancestor; // If not nullptr, this shape is the relevant one for purposes
+ // of getting to the target block. We break or continue on it
+ Branch::FlowType
+ Type; // If Ancestor is not nullptr, this says whether to break or
+ // continue
+ bool Labeled; // If a break or continue, whether we need to use a label
+ const char *Condition; // The condition for which we branch. For example,
+ // "my_var == 1". Conditions are checked one by one.
+ // One of the conditions should have nullptr as the
+ // condition, in which case it is the default
+ // FIXME: move from char* to LLVM data structures
+ const char *Code; // If provided, code that is run right before the branch is
+ // taken. This is useful for phis
+ // FIXME: move from char* to LLVM data structures
+
+ Branch(const char *ConditionInit, const char *CodeInit = nullptr);
+ ~Branch();
+};
+
+typedef SetVector<Block *> BlockSet;
+typedef MapVector<Block *, Branch *> BlockBranchMap;
+typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap;
+
+///
+/// Represents a basic block of code - some instructions that end with a
+/// control flow modifier (a branch, return or throw).
+///
+struct Block {
+ // Branches become processed after we finish the shape relevant to them. For
+ // example, when we recreate a loop, branches to the loop start become
+ // continues and are now processed. When we calculate what shape to generate
+ // from a set of blocks, we ignore processed branches. Blocks own the Branch
+ // objects they use, and destroy them when done.
+ OwningBlockBranchMap BranchesOut;
+ BlockSet BranchesIn;
+ OwningBlockBranchMap ProcessedBranchesOut;
+ BlockSet ProcessedBranchesIn;
+ Shape *Parent; // The shape we are directly inside
+ int Id; // A unique identifier, defined when added to relooper. Note that this
+ // uniquely identifies a *logical* block - if we split it, the two
+ // instances have the same content *and* the same Id
+ const char *Code; // The string representation of the code in this block.
+ // Owning pointer (we copy the input)
+ // FIXME: move from char* to LLVM data structures
+ const char *BranchVar; // A variable whose value determines where we go; if
+ // this is not nullptr, emit a switch on that variable
+ // FIXME: move from char* to LLVM data structures
+ bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching
+ // us requires setting the label variable
+
+ Block(const char *CodeInit, const char *BranchVarInit);
+ ~Block();
+
+ void AddBranchTo(Block *Target, const char *Condition,
+ const char *Code = nullptr);
+};
+
+///
+/// Represents a structured control flow shape
+///
+struct Shape {
+ int Id; // A unique identifier. Used to identify loops, labels are Lx where x
+ // is the Id. Defined when added to relooper
+ Shape *Next; // The shape that will appear in the code right after this one
+ Shape *Natural; // The shape that control flow gets to naturally (if there is
+ // Next, then this is Next)
+
+ /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
+ enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop };
+
+private:
+ ShapeKind Kind;
+
+public:
+ ShapeKind getKind() const { return Kind; }
+
+ Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {}
+};
+
+///
+/// Simple: No control flow at all, just instructions.
+///
+struct SimpleShape : public Shape {
+ Block *Inner;
+
+ SimpleShape() : Shape(SK_Simple), Inner(nullptr) {}
+
+ static bool classof(const Shape *S) { return S->getKind() == SK_Simple; }
+};
+
+///
+/// A shape that may be implemented with a labeled loop.
+///
+struct LabeledShape : public Shape {
+ bool Labeled; // If we have a loop, whether it needs to be labeled
+
+ LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {}
+};
+
+// Blocks with the same id were split and are identical, so we just care about
+// ids in Multiple entries
+typedef std::map<int, Shape *> IdShapeMap;
+
+///
+/// Multiple: A shape with more than one entry. If the next block to
+/// be entered is among them, we run it and continue to
+/// the next shape, otherwise we continue immediately to the
+/// next shape.
+///
+struct MultipleShape : public LabeledShape {
+ IdShapeMap InnerMap; // entry block ID -> shape
+ int Breaks; // If we have branches on us, we need a loop (or a switch). This
+ // is a counter of requirements,
+ // if we optimize it to 0, the loop is unneeded
+ bool UseSwitch; // Whether to switch on label as opposed to an if-else chain
+
+ MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {}
+
+ static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; }
+};
+
+///
+/// Loop: An infinite loop.
+///
+struct LoopShape : public LabeledShape {
+ Shape *Inner;
+
+ LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {}
+
+ static bool classof(const Shape *S) { return S->getKind() == SK_Loop; }
+};
+
+} // namespace Relooper
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 3ff19d4..e972da5 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -23,8 +23,22 @@ namespace llvm {
class WebAssemblyTargetMachine;
class FunctionPass;
+FunctionPass *createWebAssemblyOptimizeReturned();
+
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+FunctionPass *createWebAssemblyArgumentMove();
+
+FunctionPass *createWebAssemblyStoreResults();
+FunctionPass *createWebAssemblyRegStackify();
+FunctionPass *createWebAssemblyRegColoring();
+FunctionPass *createWebAssemblyPEI();
+FunctionPass *createWebAssemblyCFGStackify();
+FunctionPass *createWebAssemblyLowerBrUnless();
+FunctionPass *createWebAssemblyRegNumbering();
+FunctionPass *createWebAssemblyPeephole();
+
+FunctionPass *createWebAssemblyRelooper();
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index a123bf6..551ad93 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -6,10 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This is a target description file for the WebAssembly architecture, which is
-// also known as "wasm".
-//
+///
+/// \file
+/// \brief This is a target description file for the WebAssembly architecture,
+/// which is also known as "wasm".
+///
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -50,6 +51,9 @@ def WebAssemblyInstrInfo : InstrInfo;
// Minimal Viable Product.
def : ProcessorModel<"mvp", NoSchedModel, []>;
+// Generic processor: latest stable version.
+def : ProcessorModel<"generic", NoSchedModel, []>;
+
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
new file mode 100644
index 0000000..3893c40
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -0,0 +1,110 @@
+//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file moves ARGUMENT instructions after ScheduleDAG scheduling.
+///
+/// Arguments are really live-in registers, however, since we use virtual
+/// registers and LLVM doesn't support live-in virtual registers, we're
+/// currently making do with ARGUMENT instructions which are placed at the top
+/// of the entry block. The trick is to get them to *stay* at the top of the
+/// entry block.
+///
+/// The ARGUMENTS physical register keeps these instructions pinned in place
+/// during liveness-aware CodeGen passes, however one thing which does not
+/// respect this is the ScheduleDAG scheduler. This pass is therefore run
+/// immediately after that.
+///
+/// This is all hopefully a temporary solution until we find a better solution
+/// for describing the live-in nature of arguments.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-argument-move"
+
+namespace {
+class WebAssemblyArgumentMove final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyArgumentMove() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Argument Move";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyArgumentMove::ID = 0;
+FunctionPass *llvm::createWebAssemblyArgumentMove() {
+ return new WebAssemblyArgumentMove();
+}
+
+/// Test whether the given instruction is an ARGUMENT.
+static bool IsArgument(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Argument Move **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ bool Changed = false;
+ MachineBasicBlock &EntryMBB = MF.front();
+ MachineBasicBlock::iterator InsertPt = EntryMBB.end();
+
+ // Look for the first NonArg instruction.
+ for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) {
+ MachineInstr *MI = MII;
+ if (!IsArgument(MI)) {
+ InsertPt = MII;
+ break;
+ }
+ }
+
+ // Now move any argument instructions later in the block
+ // to before our first NonArg instruction.
+ for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) {
+ MachineInstr *MI = I;
+ if (IsArgument(MI)) {
+ EntryMBB.insert(InsertPt, MI->removeFromParent());
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
new file mode 100644
index 0000000..0d2b4d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -0,0 +1,285 @@
+//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains a printer that converts from our internal
+/// representation of machine-dependent LLVM code to the WebAssembly assembly
+/// language.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyRegisterInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+
+class WebAssemblyAsmPrinter final : public AsmPrinter {
+ const MachineRegisterInfo *MRI;
+ const WebAssemblyFunctionInfo *MFI;
+
+public:
+ WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
+
+private:
+ const char *getPassName() const override {
+ return "WebAssembly Assembly Printer";
+ }
+
+ //===------------------------------------------------------------------===//
+ // MachineFunctionPass Implementation.
+ //===------------------------------------------------------------------===//
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ MRI = &MF.getRegInfo();
+ MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+ return AsmPrinter::runOnMachineFunction(MF);
+ }
+
+ //===------------------------------------------------------------------===//
+ // AsmPrinter Implementation.
+ //===------------------------------------------------------------------===//
+
+ void EmitJumpTableInfo() override;
+ void EmitConstantPool() override;
+ void EmitFunctionBodyStart() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+
+ MVT getRegType(unsigned RegNo) const;
+ const char *toString(MVT VT) const;
+ std::string regToString(const MachineOperand &MO);
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Helpers.
+//===----------------------------------------------------------------------===//
+
+MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
+ const TargetRegisterClass *TRC =
+ TargetRegisterInfo::isVirtualRegister(RegNo) ?
+ MRI->getRegClass(RegNo) :
+ MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+ for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+ if (TRC->hasType(T))
+ return T;
+ DEBUG(errs() << "Unknown type for register number: " << RegNo);
+ llvm_unreachable("Unknown register type");
+ return MVT::Other;
+}
+
+std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
+ "Unlowered physical register encountered during assembly printing");
+ assert(!MFI->isVRegStackified(RegNo));
+ unsigned WAReg = MFI->getWAReg(RegNo);
+ assert(WAReg != WebAssemblyFunctionInfo::UnusedReg);
+ return '$' + utostr(WAReg);
+}
+
+const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
+ return WebAssembly::TypeToString(VT);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssemblyAsmPrinter Implementation.
+//===----------------------------------------------------------------------===//
+
+void WebAssemblyAsmPrinter::EmitConstantPool() {
+ assert(MF->getConstantPool()->getConstants().empty() &&
+ "WebAssembly disables constant pools");
+}
+
+void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
+ // Nothing to do; jump tables are incorporated into the instruction stream.
+}
+
+static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+ Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+ const DataLayout &DL(F.getParent()->getDataLayout());
+ const WebAssemblyTargetLowering &TLI =
+ *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+ SmallVector<EVT, 4> VTs;
+ ComputeValueVTs(TLI, DL, Ty, VTs);
+
+ for (EVT VT : VTs) {
+ unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ ValueVTs.push_back(RegisterVT);
+ }
+}
+
+void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
+ if (!MFI->getParams().empty()) {
+ MCInst Param;
+ Param.setOpcode(WebAssembly::PARAM);
+ for (MVT VT : MFI->getParams())
+ Param.addOperand(MCOperand::createImm(VT.SimpleTy));
+ EmitToStreamer(*OutStreamer, Param);
+ }
+
+ SmallVector<MVT, 4> ResultVTs;
+ const Function &F(*MF->getFunction());
+ ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
+ // If the return type needs to be legalized it will get converted into
+ // passing a pointer.
+ if (ResultVTs.size() == 1) {
+ MCInst Result;
+ Result.setOpcode(WebAssembly::RESULT);
+ Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy));
+ EmitToStreamer(*OutStreamer, Result);
+ }
+
+ bool AnyWARegs = false;
+ MCInst Local;
+ Local.setOpcode(WebAssembly::LOCAL);
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
+ unsigned WAReg = MFI->getWAReg(VReg);
+ // Don't declare unused registers.
+ if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
+ continue;
+ // Don't redeclare parameters.
+ if (WAReg < MFI->getParams().size())
+ continue;
+ // Don't declare stackified registers.
+ if (int(WAReg) < 0)
+ continue;
+ Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy));
+ AnyWARegs = true;
+ }
+ auto &PhysRegs = MFI->getPhysRegs();
+ for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
+ if (PhysRegs[PReg] == -1U)
+ continue;
+ Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy));
+ AnyWARegs = true;
+ }
+ if (AnyWARegs)
+ EmitToStreamer(*OutStreamer, Local);
+
+ AsmPrinter::EmitFunctionBodyStart();
+}
+
+void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
+
+ switch (MI->getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ // These represent values which are live into the function entry, so there's
+ // no instruction to emit.
+ break;
+ case WebAssembly::LOOP_END:
+ // This is a no-op which just exists to tell AsmPrinter.cpp that there's a
+ // fallthrough which nevertheless requires a label for the destination here.
+ break;
+ default: {
+ WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ break;
+ }
+ }
+}
+
+bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ if (AsmVariant != 0)
+ report_fatal_error("There are no defined alternate asm variants");
+
+ // First try the generic code, which knows about modifiers like 'c' and 'n'.
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+ return false;
+
+ if (!ExtraCode) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ OS << MO.getImm();
+ return false;
+ case MachineOperand::MO_Register:
+ OS << regToString(MO);
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ getSymbol(MO.getGlobal())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+ return false;
+ case MachineOperand::MO_ExternalSymbol:
+ GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+ return false;
+ case MachineOperand::MO_MachineBasicBlock:
+ MO.getMBB()->getSymbol()->print(OS, MAI);
+ return false;
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ if (AsmVariant != 0)
+ report_fatal_error("There are no defined alternate asm variants");
+
+ if (!ExtraCode) {
+ // TODO: For now, we just hard-code 0 as the constant offset; teach
+ // SelectInlineAsmMemoryOperand how to do address mode matching.
+ OS << "0(" + regToString(MI->getOperand(OpNo)) + ')';
+ return false;
+ }
+
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeWebAssemblyAsmPrinter() {
+ RegisterAsmPrinter<WebAssemblyAsmPrinter> X(TheWebAssemblyTarget32);
+ RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(TheWebAssemblyTarget64);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
new file mode 100644
index 0000000..e9671ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -0,0 +1,468 @@
+//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a CFG stacking pass.
+///
+/// This pass reorders the blocks in a function to put them into a reverse
+/// post-order [0], with special care to keep the order as similar as possible
+/// to the original order, and to keep loops contiguous even in the case of
+/// split backedges.
+///
+/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since
+/// scope boundaries serve as the labels for WebAssembly's control transfers.
+///
+/// This is sufficient to convert arbitrary CFGs into a form that works on
+/// WebAssembly, provided that all loops are single-entry.
+///
+/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-cfg-stackify"
+
+namespace {
+class WebAssemblyCFGStackify final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly CFG Stackify";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCFGStackify::ID = 0;
+FunctionPass *llvm::createWebAssemblyCFGStackify() {
+ return new WebAssemblyCFGStackify();
+}
+
+static void EliminateMultipleEntryLoops(MachineFunction &MF,
+ const MachineLoopInfo &MLI) {
+ SmallPtrSet<MachineBasicBlock *, 8> InSet;
+ for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF);
+ I != E; ++I) {
+ const std::vector<MachineBasicBlock *> &CurrentSCC = *I;
+
+ // Skip trivial SCCs.
+ if (CurrentSCC.size() == 1)
+ continue;
+
+ InSet.insert(CurrentSCC.begin(), CurrentSCC.end());
+ MachineBasicBlock *Header = nullptr;
+ for (MachineBasicBlock *MBB : CurrentSCC) {
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (InSet.count(Pred))
+ continue;
+ if (!Header) {
+ Header = MBB;
+ break;
+ }
+ // TODO: Implement multiple-entry loops.
+ report_fatal_error("multiple-entry loops are not supported yet");
+ }
+ }
+ assert(MLI.isLoopHeader(Header));
+
+ InSet.clear();
+ }
+}
+
+namespace {
+/// Post-order traversal stack entry.
+struct POStackEntry {
+ MachineBasicBlock *MBB;
+ SmallVector<MachineBasicBlock *, 0> Succs;
+
+ POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
+ const MachineLoopInfo &MLI);
+};
+} // end anonymous namespace
+
+static bool LoopContains(const MachineLoop *Loop,
+ const MachineBasicBlock *MBB) {
+ return Loop ? Loop->contains(MBB) : true;
+}
+
+POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
+ const MachineLoopInfo &MLI)
+ : MBB(MBB), Succs(MBB->successors()) {
+ // RPO is not a unique form, since at every basic block with multiple
+ // successors, the DFS has to pick which order to visit the successors in.
+ // Sort them strategically (see below).
+ MachineLoop *Loop = MLI.getLoopFor(MBB);
+ MachineFunction::iterator Next = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next;
+ std::stable_sort(
+ Succs.begin(), Succs.end(),
+ [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ if (A == B)
+ return false;
+
+ // Keep loops contiguous by preferring the block that's in the same
+ // loop.
+ bool LoopContainsA = LoopContains(Loop, A);
+ bool LoopContainsB = LoopContains(Loop, B);
+ if (LoopContainsA && !LoopContainsB)
+ return true;
+ if (!LoopContainsA && LoopContainsB)
+ return false;
+
+ // Minimize perturbation by preferring the block which is the immediate
+ // layout successor.
+ if (A == LayoutSucc)
+ return true;
+ if (B == LayoutSucc)
+ return false;
+
+ // TODO: More sophisticated orderings may be profitable here.
+
+ return false;
+ });
+}
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) {
+ MachineBasicBlock *Bottom = Loop->getHeader();
+ for (MachineBasicBlock *MBB : Loop->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
+
+/// Sort the blocks in RPO, taking special care to make sure that loops are
+/// contiguous even in the case of split backedges.
+///
+/// TODO: Determine whether RPO is actually worthwhile, or whether we should
+/// move to just a stable-topological-sort-based approach that would preserve
+/// more of the original order.
+static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
+ // Note that we do our own RPO rather than using
+ // "llvm/ADT/PostOrderIterator.h" because we want control over the order that
+ // successors are visited in (see above). Also, we can sort the blocks in the
+ // MachineFunction as we go.
+ SmallPtrSet<MachineBasicBlock *, 16> Visited;
+ SmallVector<POStackEntry, 16> Stack;
+
+ MachineBasicBlock *EntryBlock = &*MF.begin();
+ Visited.insert(EntryBlock);
+ Stack.push_back(POStackEntry(EntryBlock, MF, MLI));
+
+ for (;;) {
+ POStackEntry &Entry = Stack.back();
+ SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs;
+ if (!Succs.empty()) {
+ MachineBasicBlock *Succ = Succs.pop_back_val();
+ if (Visited.insert(Succ).second)
+ Stack.push_back(POStackEntry(Succ, MF, MLI));
+ continue;
+ }
+
+ // Put the block in its position in the MachineFunction.
+ MachineBasicBlock &MBB = *Entry.MBB;
+ MBB.moveBefore(&*MF.begin());
+
+ // Branch instructions may utilize a fallthrough, so update them if a
+ // fallthrough has been added or removed.
+ if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() &&
+ !MBB.back().isBarrier())
+ report_fatal_error(
+ "Non-branch terminator with fallthrough cannot yet be rewritten");
+ if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch())
+ MBB.updateTerminator();
+
+ Stack.pop_back();
+ if (Stack.empty())
+ break;
+ }
+
+ // Now that we've sorted the blocks in RPO, renumber them.
+ MF.RenumberBlocks();
+
+#ifndef NDEBUG
+ SmallSetVector<MachineLoop *, 8> OnStack;
+
+ // Insert a sentinel representing the degenerate loop that starts at the
+ // function entry block and includes the entire function as a "loop" that
+ // executes once.
+ OnStack.insert(nullptr);
+
+ for (auto &MBB : MF) {
+ assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+
+ MachineLoop *Loop = MLI.getLoopFor(&MBB);
+ if (Loop && &MBB == Loop->getHeader()) {
+ // Loop header. The loop predecessor should be sorted above, and the other
+ // predecessors should be backedges below.
+ for (auto Pred : MBB.predecessors())
+ assert(
+ (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
+ "Loop header predecessors must be loop predecessors or backedges");
+ assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
+ } else {
+ // Not a loop header. All predecessors should be sorted above.
+ for (auto Pred : MBB.predecessors())
+ assert(Pred->getNumber() < MBB.getNumber() &&
+ "Non-loop-header predecessors should be topologically sorted");
+ assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
+ "Blocks must be nested in their loops");
+ }
+ while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
+ OnStack.pop_back();
+ }
+ assert(OnStack.pop_back_val() == nullptr &&
+ "The function entry block shouldn't actually be a loop header");
+ assert(OnStack.empty() &&
+ "Control flow stack pushes and pops should be balanced.");
+#endif
+}
+
+/// Test whether Pred has any terminators explicitly branching to MBB, as
+/// opposed to falling through. Note that it's possible (eg. in unoptimized
+/// code) for a branch instruction to both branch to a block and fallthrough
+/// to it, so we check the actual branch operands to see if there are any
+/// explicit mentions.
+static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) {
+ for (MachineInstr &MI : Pred->terminators())
+ for (MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB() == MBB)
+ return true;
+ return false;
+}
+
+/// Insert a BLOCK marker for branches to MBB (if needed).
+static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
+ SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+ const WebAssemblyInstrInfo &TII,
+ const MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT) {
+ // First compute the nearest common dominator of all forward non-fallthrough
+ // predecessors so that we minimize the time that the BLOCK is on the stack,
+ // which reduces overall stack height.
+ MachineBasicBlock *Header = nullptr;
+ bool IsBranchedTo = false;
+ int MBBNumber = MBB.getNumber();
+ for (MachineBasicBlock *Pred : MBB.predecessors())
+ if (Pred->getNumber() < MBBNumber) {
+ Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
+ if (ExplicitlyBranchesTo(Pred, &MBB))
+ IsBranchedTo = true;
+ }
+ if (!Header)
+ return;
+ if (!IsBranchedTo)
+ return;
+
+ assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
+ MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));
+
+ // If the nearest common dominator is inside a more deeply nested context,
+ // walk out to the nearest scope which isn't more deeply nested.
+ for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
+ if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
+ if (ScopeTop->getNumber() > Header->getNumber()) {
+ // Skip over an intervening scope.
+ I = next(MachineFunction::iterator(ScopeTop));
+ } else {
+ // We found a scope level at an appropriate depth.
+ Header = ScopeTop;
+ break;
+ }
+ }
+ }
+
+ // If there's a loop which ends just before MBB which contains Header, we can
+ // reuse its label instead of inserting a new BLOCK.
+ for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
+ Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
+ if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
+ return;
+
+ // Decide where in Header to put the BLOCK.
+ MachineBasicBlock::iterator InsertPos;
+ MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
+ if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) {
+ // Header is the header of a loop that does not lexically contain MBB, so
+ // the BLOCK needs to be above the LOOP.
+ InsertPos = Header->begin();
+ } else {
+ // Otherwise, insert the BLOCK as late in Header as we can, but before the
+ // beginning of the local expression tree and any nested BLOCKs.
+ InsertPos = Header->getFirstTerminator();
+ while (InsertPos != Header->begin() &&
+ prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
+ prev(InsertPos)->getOpcode() != WebAssembly::LOOP)
+ --InsertPos;
+ }
+
+ // Add the BLOCK.
+ BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
+ .addMBB(&MBB);
+
+ // Track the farthest-spanning scope that ends at this point.
+ int Number = MBB.getNumber();
+ if (!ScopeTops[Number] ||
+ ScopeTops[Number]->getNumber() > Header->getNumber())
+ ScopeTops[Number] = Header;
+}
+
+/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
+static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
+ SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+ const WebAssemblyInstrInfo &TII,
+ const MachineLoopInfo &MLI) {
+ MachineLoop *Loop = MLI.getLoopFor(&MBB);
+ if (!Loop || Loop->getHeader() != &MBB)
+ return;
+
+ // The operand of a LOOP is the first block after the loop. If the loop is the
+ // bottom of the function, insert a dummy block at the end.
+ MachineBasicBlock *Bottom = LoopBottom(Loop);
+ auto Iter = next(MachineFunction::iterator(Bottom));
+ if (Iter == MF.end()) {
+ MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
+ // Give it a fake predecessor so that AsmPrinter prints its label.
+ Label->addSuccessor(Label);
+ MF.push_back(Label);
+ Iter = next(MachineFunction::iterator(Bottom));
+ }
+ MachineBasicBlock *AfterLoop = &*Iter;
+ BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP))
+ .addMBB(AfterLoop);
+
+ // Emit a special no-op telling the asm printer that we need a label to close
+ // the loop scope, even though the destination is only reachable by
+ // fallthrough.
+ if (!Bottom->back().isBarrier())
+ BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END));
+
+ assert((!ScopeTops[AfterLoop->getNumber()] ||
+ ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
+ "With RPO we should visit the outer-most loop for a block first.");
+ if (!ScopeTops[AfterLoop->getNumber()])
+ ScopeTops[AfterLoop->getNumber()] = &MBB;
+}
+
+/// Insert LOOP and BLOCK markers at appropriate places.
+static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const WebAssemblyInstrInfo &TII,
+ MachineDominatorTree &MDT) {
+ // For each block whose label represents the end of a scope, record the block
+ // which holds the beginning of the scope. This will allow us to quickly skip
+ // over scoped regions when walking blocks. We allocate one more than the
+ // number of blocks in the function to accommodate for the possible fake block
+ // we may insert at the end.
+ SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
+
+ for (auto &MBB : MF) {
+ // Place the LOOP for MBB if MBB is the header of a loop.
+ PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI);
+
+ // Place the BLOCK for MBB if MBB is branched to from above.
+ PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
+ }
+}
+
+#ifndef NDEBUG
+static bool
+IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
+ const MachineBasicBlock *MBB) {
+ for (const auto &Pair : Stack)
+ if (Pair.first == MBB)
+ return true;
+ return false;
+}
+#endif
+
+bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** CFG Stackifying **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+ // RPO sorting needs all loops to be single-entry.
+ EliminateMultipleEntryLoops(MF, MLI);
+
+ // Sort the blocks in RPO, with contiguous loops.
+ SortBlocks(MF, MLI);
+
+ // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
+ PlaceMarkers(MF, MLI, TII, MDT);
+
+#ifndef NDEBUG
+ // Verify that block and loop beginnings and endings are in LIFO order, and
+ // that all references to blocks are to blocks on the stack at the point of
+ // the reference.
+ SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
+ for (auto &MBB : MF) {
+ while (!Stack.empty() && Stack.back().first == &MBB)
+ if (Stack.back().second) {
+ assert(Stack.size() >= 2);
+ Stack.pop_back();
+ Stack.pop_back();
+ } else {
+ assert(Stack.size() >= 1);
+ Stack.pop_back();
+ }
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ case WebAssembly::LOOP:
+ Stack.push_back(std::make_pair(&MBB, false));
+ Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
+ break;
+ case WebAssembly::BLOCK:
+ Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
+ break;
+ default:
+ // Verify that all referenced blocks are in scope. A reference to a
+ // block with a negative number is invalid, but can happen with inline
+ // asm, so we shouldn't assert on it, but instead let CodeGen properly
+ // fail on it.
+ for (const MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB()->getNumber() >= 0)
+ assert(IsOnStack(Stack, MO.getMBB()));
+ break;
+ }
+ }
+ assert(Stack.empty());
+#endif
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
new file mode 100644
index 0000000..1b761b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -0,0 +1,81 @@
+//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific support for the FastISel
+/// class. Some of the target-specific code is generated by tablegen in the file
+/// WebAssemblyGenFastISel.inc, which is #included here.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-fastisel"
+
+namespace {
+
+class WebAssemblyFastISel final : public FastISel {
+ /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+ /// right decision when generating code for different targets.
+ const WebAssemblySubtarget *Subtarget;
+ LLVMContext *Context;
+
+ // Call handling routines.
+private:
+public:
+ // Backend specific FastISel code.
+ WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
+ Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>();
+ Context = &FuncInfo.Fn->getContext();
+ }
+
+ bool fastSelectInstruction(const Instruction *I) override;
+
+#include "WebAssemblyGenFastISel.inc"
+};
+
+} // end anonymous namespace
+
+bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default:
+ break;
+ // TODO: add fast-isel selection cases here...
+ }
+
+ // Fall back to target-independent instruction selection.
+ return selectOperator(I, I->getOpcode());
+}
+
+FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ return new WebAssemblyFastISel(FuncInfo, LibInfo);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index e4ca82e..0eefd57 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -35,11 +35,20 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-frame-info"
// TODO: Implement a red zone?
+// TODO: wasm64
+// TODO: Prolog/epilog should be stackified too. This pass runs after register
+// stackification, so we'll have to do it manually.
+// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions
/// Return true if the specified function should have a dedicated frame pointer
/// register.
bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
- llvm_unreachable("TODO: implement hasFP");
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const auto *RegInfo =
+ MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
+ RegInfo->needsStackRealignment(MF);
}
/// Under normal circumstances, when a frame pointer is not required, we reserve
@@ -52,23 +61,115 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
return !MF.getFrameInfo()->hasVarSizedObjects();
}
+
+/// Adjust the stack pointer by a constant amount.
+static void adjustStackPointer(unsigned StackSize,
+ bool AdjustUp,
+ MachineFunction& MF,
+ MachineBasicBlock& MBB,
+ const TargetInstrInfo* TII,
+ MachineBasicBlock::iterator InsertPt,
+ const DebugLoc& DL) {
+ auto &MRI = MF.getRegInfo();
+ unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg)
+ .addExternalSymbol(SPSymbol);
+ // This MachinePointerInfo should reference __stack_pointer as well but
+ // doesn't because MachinePointerInfo() takes a GV which we don't have for
+ // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry
+ // is appropriate instead. (likewise for EmitEpologue below)
+ auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOLoad, 4, 4);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+ .addImm(0)
+ .addReg(SPReg)
+ .addMemOperand(LoadMMO);
+ // Add/Subtract the frame size
+ unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(StackSize);
+ BuildMI(MBB, InsertPt, DL,
+ TII->get(AdjustUp ? WebAssembly::ADD_I32 : WebAssembly::SUB_I32),
+ WebAssembly::SP32)
+ .addReg(SPReg)
+ .addReg(OffsetReg);
+ // The SP32 register now has the new stacktop. Also write it back to memory.
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addExternalSymbol(SPSymbol);
+ auto *MMO = new MachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, 4, 4);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addReg(WebAssembly::SP32)
+ .addMemOperand(MMO);
+}
+
void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr");
+ const auto *TII =
+ static_cast<const WebAssemblyInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ DebugLoc DL = I->getDebugLoc();
+ unsigned Opc = I->getOpcode();
+ bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
+ unsigned Amount = I->getOperand(0).getImm();
+ if (Amount)
+ adjustStackPointer(Amount, IsDestroy, MF, MBB,
+ TII, I, DL);
+ MBB.erase(I);
}
void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- llvm_unreachable("TODO: implement emitPrologue");
+ // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions
+ auto *MFI = MF.getFrameInfo();
+ assert(MFI->getCalleeSavedInfo().empty() &&
+ "WebAssembly should not have callee-saved registers");
+ assert(!hasFP(MF) && "Functions needing frame pointers not yet supported");
+ uint64_t StackSize = MFI->getStackSize();
+ if (!StackSize && (!MFI->adjustsStack() || MFI->getMaxCallFrameSize() == 0))
+ return;
+
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+
+ auto InsertPt = MBB.begin();
+ DebugLoc DL;
+
+ adjustStackPointer(StackSize, false, MF, MBB, TII, InsertPt, DL);
}
void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- llvm_unreachable("TODO: implement emitEpilogue");
-}
+ uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+ if (!StackSize)
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ auto &MRI = MF.getRegInfo();
+ unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ auto InsertPt = MBB.getFirstTerminator();
+ DebugLoc DL;
+
+ if (InsertPt != MBB.end()) {
+ DL = InsertPt->getDebugLoc();
+ }
-void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger *RS) const {
- llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan");
+ // Restore the stack pointer. Without FP its value is just SP32 - stacksize
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(StackSize);
+ auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32)
+ .addReg(WebAssembly::SP32)
+ .addReg(OffsetReg);
+ // Re-use OffsetReg to hold the address of the stacktop
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addExternalSymbol(SPSymbol);
+ auto *MMO = new MachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, 4, 4);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addReg(WebAssembly::SP32)
+ .addMemOperand(MMO);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index 0b112d0..5f4708f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -38,9 +38,6 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
new file mode 100644
index 0000000..3a03fa5
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -0,0 +1,25 @@
+//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file describes the various WebAssembly ISD node types.
+///
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+HANDLE_NODETYPE(CALL1)
+HANDLE_NODETYPE(CALL0)
+HANDLE_NODETYPE(RETURN)
+HANDLE_NODETYPE(ARGUMENT)
+HANDLE_NODETYPE(Wrapper)
+HANDLE_NODETYPE(BR_IF)
+HANDLE_NODETYPE(TABLESWITCH)
+
+// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 518ef33..8390f79 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -56,13 +56,68 @@ public:
SDNode *Select(SDNode *Node) override;
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+
+// Include the pieces autogenerated from the target description.
+#include "WebAssemblyGenDAGISel.inc"
+
private:
// add select functions here...
};
} // end anonymous namespace
SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
- llvm_unreachable("TODO: implement Select");
+ // Dump information about the Node being selected.
+ DEBUG(errs() << "Selecting: ");
+ DEBUG(Node->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
+ return nullptr;
+ }
+
+ // Few custom selection stuff.
+ SDNode *ResNode = nullptr;
+ EVT VT = Node->getValueType(0);
+
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ // If we need WebAssembly-specific selection, it would go here.
+ (void)VT;
+ }
+
+ // Select the default instruction.
+ ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == nullptr || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ return ResNode;
+}
+
+bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch (ConstraintID) {
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ // We just support simple memory operands that just have a single address
+ // operand and need no special handling.
+ OutOps.push_back(Op);
+ return false;
+ default:
+ break;
+ }
+
+ return true;
}
/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4184eb6..7a89f78 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -17,10 +17,13 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
-#include "WebAssemblyTargetObjectFile.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
@@ -32,14 +35,254 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-lower"
+namespace {
+// Diagnostic information for unimplemented or unsupported feature reporting.
+// TODO: This code is copied from BPF and AMDGPU; consider factoring it out
+// and sharing code.
+class DiagnosticInfoUnsupported final : public DiagnosticInfo {
+private:
+ // Debug location where this diagnostic is triggered.
+ DebugLoc DLoc;
+ const Twine &Description;
+ const Function &Fn;
+ SDValue Value;
+
+ static int KindID;
+
+ static int getKindID() {
+ if (KindID == 0)
+ KindID = llvm::getNextAvailablePluginDiagnosticKind();
+ return KindID;
+ }
+
+public:
+ DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc,
+ SDValue Value)
+ : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()),
+ Description(Desc), Fn(Fn), Value(Value) {}
+
+ void print(DiagnosticPrinter &DP) const override {
+ std::string Str;
+ raw_string_ostream OS(Str);
+
+ if (DLoc) {
+ auto DIL = DLoc.get();
+ StringRef Filename = DIL->getFilename();
+ unsigned Line = DIL->getLine();
+ unsigned Column = DIL->getColumn();
+ OS << Filename << ':' << Line << ':' << Column << ' ';
+ }
+
+ OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n'
+ << Description;
+ if (Value)
+ Value->print(OS);
+ OS << '\n';
+ OS.flush();
+ DP << Str;
+ }
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == getKindID();
+ }
+};
+
+int DiagnosticInfoUnsupported::KindID = 0;
+} // end anonymous namespace
+
WebAssemblyTargetLowering::WebAssemblyTargetLowering(
const TargetMachine &TM, const WebAssemblySubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
+ auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
+
+ // Booleans always contain 0 or 1.
+ setBooleanContents(ZeroOrOneBooleanContent);
// WebAssembly does not produce floating-point exceptions on normal floating
// point operations.
setHasFloatingPointExceptions(false);
// We don't know the microarchitecture here, so just reduce register pressure.
setSchedulingPreference(Sched::RegPressure);
+ // Tell ISel that we have a stack pointer.
+ setStackPointerRegisterToSaveRestore(
+ Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
+ addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
+ addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
+ addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
+ // Compute derived properties from the register classes.
+ computeRegisterProperties(Subtarget->getRegisterInfo());
+
+ setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
+ setOperationAction(ISD::JumpTable, MVTPtr, Custom);
+
+ // Take the default expansion for va_arg, va_copy, and va_end. There is no
+ // default action for va_start, so we do that custom.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ for (auto T : {MVT::f32, MVT::f64}) {
+ // Don't expand the floating-point types to constant pools.
+ setOperationAction(ISD::ConstantFP, T, Legal);
+ // Expand floating-point comparisons.
+ for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
+ ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
+ setCondCodeAction(CC, T, Expand);
+ // Expand floating-point library function operators.
+ for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW,
+ ISD::FREM, ISD::FMA})
+ setOperationAction(Op, T, Expand);
+ // Note supported floating-point library function operators that otherwise
+ // default to expand.
+ for (auto Op :
+ {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
+ setOperationAction(Op, T, Legal);
+ // Support minnan and maxnan, which otherwise default to expand.
+ setOperationAction(ISD::FMINNAN, T, Legal);
+ setOperationAction(ISD::FMAXNAN, T, Legal);
+ }
+
+ for (auto T : {MVT::i32, MVT::i64}) {
+ // Expand unavailable integer operations.
+ for (auto Op :
+ {ISD::BSWAP, ISD::ROTL, ISD::ROTR, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
+ ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
+ ISD::SUBE}) {
+ setOperationAction(Op, T, Expand);
+ }
+ }
+
+ // As a special case, these operators use the type to mean the type to
+ // sign-extend from.
+ for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32})
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+
+ // Dynamic stack allocation: use the default expansion.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
+
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+ // Expand these forms; we pattern-match the forms that we can handle in isel.
+ for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+ for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
+ setOperationAction(Op, T, Expand);
+
+ // We have custom switch handling.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // WebAssembly doesn't have:
+ // - Floating-point extending loads.
+ // - Floating-point truncating stores.
+ // - i1 extending loads.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ for (auto T : MVT::integer_valuetypes())
+ for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
+ setLoadExtAction(Ext, T, MVT::i1, Promote);
+
+ // Trap lowers to wasm unreachable
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+}
+
+FastISel *WebAssemblyTargetLowering::createFastISel(
+ FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
+ return WebAssembly::createFastISel(FuncInfo, LibInfo);
+}
+
+bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode * /*GA*/) const {
+ // All offsets can be folded.
+ return true;
+}
+
+MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
+ EVT VT) const {
+ unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
+ if (BitWidth > 1 && BitWidth < 8)
+ BitWidth = 8;
+
+ if (BitWidth > 64) {
+ BitWidth = 64;
+ assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
+ "64-bit shift counts ought to be enough for anyone");
+ }
+
+ MVT Result = MVT::getIntegerVT(BitWidth);
+ assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+ "Unable to represent scalar shift amount type");
+ return Result;
+}
+
+const char *
+WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
+ case WebAssemblyISD::FIRST_NUMBER:
+ break;
+#define HANDLE_NODETYPE(NODE) \
+ case WebAssemblyISD::NODE: \
+ return "WebAssemblyISD::" #NODE;
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
+ }
+ return nullptr;
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ // First, see if this is a constraint that directly corresponds to a
+ // WebAssembly register class.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+ if (VT.isInteger() && !VT.isVector()) {
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &WebAssembly::I32RegClass);
+ if (VT.getSizeInBits() <= 64)
+ return std::make_pair(0U, &WebAssembly::I64RegClass);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
+ // Assume ctz is a relatively cheap operation.
+ return true;
+}
+
+bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
+ // Assume clz is a relatively cheap operation.
+ return true;
+}
+
+bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM,
+ Type *Ty,
+ unsigned AS) const {
+ // WebAssembly offsets are added as unsigned without wrapping. The
+ // isLegalAddressingMode gives us no way to determine if wrapping could be
+ // happening, so we approximate this by accepting only non-negative offsets.
+ if (AM.BaseOffs < 0)
+ return false;
+
+ // WebAssembly has no scale register operands.
+ if (AM.Scale != 0)
+ return false;
+
+ // Everything else is legal.
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -50,16 +293,359 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Lowering Code
//===----------------------------------------------------------------------===//
+static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DAG.getContext()->diagnose(
+ DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue()));
+}
+
+// Test whether the given calling convention is supported.
+static bool CallingConvSupported(CallingConv::ID CallConv) {
+ // We currently support the language-independent target-independent
+ // conventions. We don't yet have a way to annotate calls with properties like
+ // "cold", and we don't have any call-clobbered registers, so these are mostly
+ // all handled the same.
+ return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
+ CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::PreserveMost ||
+ CallConv == CallingConv::PreserveAll ||
+ CallConv == CallingConv::CXX_FAST_TLS;
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc DL = CLI.DL;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ CallingConv::ID CallConv = CLI.CallConv;
+ if (!CallingConvSupported(CallConv))
+ fail(DL, DAG,
+ "WebAssembly doesn't support language-specific or target-specific "
+ "calling conventions yet");
+ if (CLI.IsPatchPoint)
+ fail(DL, DAG, "WebAssembly doesn't support patch point yet");
+
+ // WebAssembly doesn't currently support explicit tail calls. If they are
+ // required, fail. Otherwise, just disable them.
+ if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
+ MF.getTarget().Options.GuaranteedTailCallOpt) ||
+ (CLI.CS && CLI.CS->isMustTailCall()))
+ fail(DL, DAG, "WebAssembly doesn't support tail call yet");
+ CLI.IsTailCall = false;
+
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ if (Ins.size() > 1)
+ fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
+
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ for (const ISD::OutputArg &Out : Outs) {
+ if (Out.Flags.isByVal())
+ fail(DL, DAG, "WebAssembly hasn't implemented byval arguments");
+ if (Out.Flags.isNest())
+ fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
+ if (Out.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
+ if (Out.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
+ if (Out.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
+ }
+
+ bool IsVarArg = CLI.IsVarArg;
+ unsigned NumFixedArgs = CLI.NumFixedArgs;
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ if (IsVarArg) {
+ // Outgoing non-fixed arguments are placed at the top of the stack. First
+ // compute their offsets and the total amount of argument stack space
+ // needed.
+ for (SDValue Arg :
+ make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ EVT VT = Arg.getValueType();
+ assert(VT != MVT::iPTR && "Legalized args should be concrete");
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ unsigned Offset =
+ CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty),
+ MF.getDataLayout().getABITypeAlignment(Ty));
+ CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
+ Offset, VT.getSimpleVT(),
+ CCValAssign::Full));
+ }
+ }
+
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
+
+ SDValue NB;
+ if (NumBytes) {
+ NB = DAG.getConstant(NumBytes, DL, PtrVT, true);
+ Chain = DAG.getCALLSEQ_START(Chain, NB, DL);
+ }
+
+ if (IsVarArg) {
+ // For non-fixed arguments, next emit stores to store the argument values
+ // to the stack at the offsets computed above.
+ SDValue SP = DAG.getCopyFromReg(
+ Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT);
+ unsigned ValNo = 0;
+ SmallVector<SDValue, 8> Chains;
+ for (SDValue Arg :
+ make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ assert(ArgLocs[ValNo].getValNo() == ValNo &&
+ "ArgLocs should remain in order and only hold varargs args");
+ unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
+ SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP,
+ DAG.getConstant(Offset, DL, PtrVT));
+ Chains.push_back(DAG.getStore(Chain, DL, Arg, Add,
+ MachinePointerInfo::getStack(MF, Offset),
+ false, false, 0));
+ }
+ if (!Chains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ // Compute the operands for the CALLn node.
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
+ // isn't reliable.
+ Ops.append(OutVals.begin(),
+ IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
+
+ SmallVector<EVT, 8> Tys;
+ for (const auto &In : Ins) {
+ assert(!In.Flags.isByVal() && "byval is not valid for return values");
+ assert(!In.Flags.isNest() && "nest is not valid for return values");
+ if (In.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
+ if (In.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
+ if (In.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG,
+ "WebAssembly hasn't implemented cons regs last return values");
+ // Ignore In.getOrigAlign() because all our arguments are passed in
+ // registers.
+ Tys.push_back(In.VT);
+ }
+ Tys.push_back(MVT::Other);
+ SDVTList TyList = DAG.getVTList(Tys);
+ SDValue Res =
+ DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
+ DL, TyList, Ops);
+ if (Ins.empty()) {
+ Chain = Res;
+ } else {
+ InVals.push_back(Res);
+ Chain = Res.getValue(1);
+ }
+
+ if (NumBytes) {
+ SDValue Unused = DAG.getTargetConstant(0, DL, PtrVT);
+ Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL);
+ }
+
+ return Chain;
+}
+
+bool WebAssemblyTargetLowering::CanLowerReturn(
+ CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext & /*Context*/) const {
+ // WebAssembly can't currently handle returning tuples.
+ return Outs.size() <= 1;
+}
+
+SDValue WebAssemblyTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
+ SelectionDAG &DAG) const {
+ assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
+ if (!CallingConvSupported(CallConv))
+ fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
+
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ RetOps.append(OutVals.begin(), OutVals.end());
+ Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
+
+ // Record the number and types of the return values.
+ for (const ISD::OutputArg &Out : Outs) {
+ assert(!Out.Flags.isByVal() && "byval is not valid for return values");
+ assert(!Out.Flags.isNest() && "nest is not valid for return values");
+ assert(Out.IsFixed && "non-fixed return value is not valid");
+ if (Out.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
+ if (Out.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
+ if (Out.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
+ }
+
+ return Chain;
+}
+
+SDValue WebAssemblyTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ if (!CallingConvSupported(CallConv))
+ fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
+
+ // Set up the incoming ARGUMENTS value, which serves to represent the liveness
+ // of the incoming values before they're represented by virtual registers.
+ MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
+
+ for (const ISD::InputArg &In : Ins) {
+ if (In.Flags.isByVal())
+ fail(DL, DAG, "WebAssembly hasn't implemented byval arguments");
+ if (In.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
+ if (In.Flags.isNest())
+ fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
+ if (In.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
+ if (In.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
+ // Ignore In.getOrigAlign() because all our arguments are passed in
+ // registers.
+ InVals.push_back(
+ In.Used
+ ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
+ DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
+ : DAG.getUNDEF(In.VT));
+
+ // Record the number and types of arguments.
+ MF.getInfo<WebAssemblyFunctionInfo>()->addParam(In.VT);
+ }
+
+ // Incoming varargs arguments are on the stack and will be accessed through
+ // va_arg, so we don't need to do anything for them here.
+
+ return Chain;
+}
+
//===----------------------------------------------------------------------===//
-// Other Lowering Code
+// Custom lowering hooks.
//===----------------------------------------------------------------------===//
+SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("unimplemented operation lowering");
+ return SDValue();
+ case ISD::FrameIndex:
+ return LowerFrameIndex(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ }
+}
+
+SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
+ SelectionDAG &DAG) const {
+ int FI = cast<FrameIndexSDNode>(Op)->getIndex();
+ return DAG.getTargetFrameIndex(FI, Op.getValueType());
+}
+
+SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const auto *GA = cast<GlobalAddressSDNode>(Op);
+ EVT VT = Op.getValueType();
+ assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ if (GA->getAddressSpace() != 0)
+ fail(DL, DAG, "WebAssembly only expects the 0 address space");
+ return DAG.getNode(
+ WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const auto *ES = cast<ExternalSymbolSDNode>(Op);
+ EVT VT = Op.getValueType();
+ assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
+}
+
+SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ // There's no need for a Wrapper node because we always incorporate a jump
+ // table operand into a TABLESWITCH instruction, rather than ever
+ // materializing it in a register.
+ const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
+ JT->getTargetFlags());
+}
+
+SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
+ SDValue Index = Op.getOperand(2);
+ assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Index);
+
+ MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
+ const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
+
+ // TODO: For now, we just pick something arbitrary for a default case for now.
+ // We really want to sniff out the guard and put in the real default case (and
+ // delete the guard).
+ Ops.push_back(DAG.getBasicBlock(MBBs[0]));
+
+ // Add an operand for each case.
+ for (auto MBB : MBBs)
+ Ops.push_back(DAG.getBasicBlock(MBB));
+
+ return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops);
+}
+
+SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
+
+ // The incoming non-fixed arguments are placed on the top of the stack, with
+ // natural alignment, at the point of the call, so the base pointer is just
+ // the current frame pointer.
+ DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true);
+ unsigned FP =
+ Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Optimization Hooks
//===----------------------------------------------------------------------===//
-
-MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
- return getDataSection();
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index efd60a7..e7232a0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -22,10 +22,11 @@ namespace llvm {
namespace WebAssemblyISD {
-enum {
+enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
+#define HANDLE_NODETYPE(NODE) NODE,
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
};
} // end namespace WebAssemblyISD
@@ -42,8 +43,51 @@ private:
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
/// right decision when generating code for different targets.
const WebAssemblySubtarget *Subtarget;
+
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
+
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+ SelectionDAG &DAG) const override;
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ // Custom lowering hooks.
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
};
+namespace WebAssembly {
+FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+} // end namespace WebAssembly
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 6b5b6cd..cfa1519 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -12,10 +12,63 @@
///
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- *
- * call_direct: call function directly
- * call_indirect: call function indirectly
- * addressof: obtain a function pointer value for a given function
- */
+// TODO: addr64: These currently assume the callee address is 32-bit.
+
+let Defs = [ARGUMENTS] in {
+
+// Call sequence markers. These have an immediate which represents the amount of
+// stack space to allocate or free, which is used for varargs lowering.
+let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in {
+def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt),
+ [(WebAssemblycallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2),
+ [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
+} // isCodeGenOnly = 1
+
+multiclass CALL<WebAssemblyRegClass vt, string prefix> {
+ def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops),
+ [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
+ !strconcat(prefix, "call\t$dst, $callee")>;
+ def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
+ [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
+ !strconcat(prefix, "call_indirect\t$dst, $callee")>;
+}
+let Uses = [SP32, SP64], isCall = 1 in {
+ defm : CALL<I32, "i32.">;
+ defm : CALL<I64, "i64.">;
+ defm : CALL<F32, "f32.">;
+ defm : CALL<F64, "f64.">;
+
+ def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops),
+ [(WebAssemblycall0 (i32 imm:$callee))],
+ "call \t$callee">;
+ def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
+ [(WebAssemblycall0 I32:$callee)],
+ "call_indirect\t$callee">;
+} // Uses = [SP32,SP64], isCall = 1
+
+} // Defs = [ARGUMENTS]
+
+// Patterns for matching a direct call to a global address.
+def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_I32 tglobaladdr:$callee)>;
+def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_I64 tglobaladdr:$callee)>;
+def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_F32 tglobaladdr:$callee)>;
+def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_F64 tglobaladdr:$callee)>;
+def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
+ (CALL_VOID tglobaladdr:$callee)>;
+
+// Patterns for matching a direct call to an external symbol.
+def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_I32 texternalsym:$callee)>;
+def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_I64 texternalsym:$callee)>;
+def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_F32 texternalsym:$callee)>;
+def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_F64 texternalsym:$callee)>;
+def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
+ (CALL_VOID texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
new file mode 100644
index 0000000..05efe89
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -0,0 +1,82 @@
+//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly control-flow code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+let Defs = [ARGUMENTS] in {
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+// The condition operand is a boolean value which WebAssembly represents as i32.
+def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst),
+ [(brcond I32:$cond, bb:$dst)],
+ "br_if \t$cond, $dst">;
+let isCodeGenOnly = 1 in
+def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [],
+ "br_unless\t$cond, $dst">;
+let isBarrier = 1 in {
+def BR : I<(outs), (ins bb_op:$dst),
+ [(br bb:$dst)],
+ "br \t$dst">;
+} // isBarrier = 1
+} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
+
+} // Defs = [ARGUMENTS]
+
+def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
+ (BR_IF I32:$cond, bb_op:$dst)>;
+def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
+ (BR_UNLESS I32:$cond, bb_op:$dst)>;
+
+let Defs = [ARGUMENTS] in {
+
+// TODO: SelectionDAG's lowering insists on using a pointer as the index for
+// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode
+// currently.
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops),
+ [(WebAssemblytableswitch I32:$index, bb:$default)],
+ "tableswitch\t$index, $default">;
+def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops),
+ [(WebAssemblytableswitch I64:$index, bb:$default)],
+ "tableswitch\t$index, $default">;
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+// Placemarkers to indicate the start of a block or loop scope. These
+// use/clobber EXPR_STACK to prevent them from being moved into the middle of
+// an expression tree.
+let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
+def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">;
+def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">;
+} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
+
+// No-op to indicate to the AsmPrinter that a loop ends here, so a
+// basic block label is needed even if it wouldn't otherwise appear so.
+let isTerminator = 1, hasCtrlDep = 1 in
+def LOOP_END : I<(outs), (ins), []>;
+
+multiclass RETURN<WebAssemblyRegClass vt> {
+ def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
+ "return \t$val">;
+}
+
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+let isReturn = 1 in {
+ defm : RETURN<I32>;
+ defm : RETURN<I64>;
+ defm : RETURN<F32>;
+ defm : RETURN<F64>;
+ def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">;
+} // isReturn = 1
+ def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">;
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 3fa2906..931f4a9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -13,32 +13,99 @@
///
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- *
- * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer
- * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer
- * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer
- * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer
- * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer
- * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer
- * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer
- * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer
- * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer
- * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer
- * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer
- * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer
- * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer
- * float32.demote[float64]: demote a 64-bit float to a 32-bit float
- * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float
- * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float
- * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float
- * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float
- * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float
- * float64.promote[float32]: promote a 32-bit float to a 64-bit float
- * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float
- * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float
- * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float
- * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float
- * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float
- */
+let Defs = [ARGUMENTS] in {
+
+def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src),
+ [(set I32:$dst, (trunc I64:$src))],
+ "i32.wrap/i64\t$dst, $src">;
+
+def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src),
+ [(set I64:$dst, (sext I32:$src))],
+ "i64.extend_s/i32\t$dst, $src">;
+def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src),
+ [(set I64:$dst, (zext I32:$src))],
+ "i64.extend_u/i32\t$dst, $src">;
+
+} // defs = [ARGUMENTS]
+
+// Expand a "don't care" extend into zero-extend (chosen over sign-extend
+// somewhat arbitrarily, although it favors popular hardware architectures
+// and is conceptually a simpler operation).
+def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Conversion from floating point to integer traps on overflow and invalid.
+let hasSideEffects = 1 in {
+def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src),
+ [(set I32:$dst, (fp_to_sint F32:$src))],
+ "i32.trunc_s/f32\t$dst, $src">;
+def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src),
+ [(set I32:$dst, (fp_to_uint F32:$src))],
+ "i32.trunc_u/f32\t$dst, $src">;
+def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src),
+ [(set I64:$dst, (fp_to_sint F32:$src))],
+ "i64.trunc_s/f32\t$dst, $src">;
+def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src),
+ [(set I64:$dst, (fp_to_uint F32:$src))],
+ "i64.trunc_u/f32\t$dst, $src">;
+def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src),
+ [(set I32:$dst, (fp_to_sint F64:$src))],
+ "i32.trunc_s/f64\t$dst, $src">;
+def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src),
+ [(set I32:$dst, (fp_to_uint F64:$src))],
+ "i32.trunc_u/f64\t$dst, $src">;
+def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src),
+ [(set I64:$dst, (fp_to_sint F64:$src))],
+ "i64.trunc_s/f64\t$dst, $src">;
+def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src),
+ [(set I64:$dst, (fp_to_uint F64:$src))],
+ "i64.trunc_u/f64\t$dst, $src">;
+} // hasSideEffects = 1
+
+def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src),
+ [(set F32:$dst, (sint_to_fp I32:$src))],
+ "f32.convert_s/i32\t$dst, $src">;
+def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src),
+ [(set F32:$dst, (uint_to_fp I32:$src))],
+ "f32.convert_u/i32\t$dst, $src">;
+def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src),
+ [(set F64:$dst, (sint_to_fp I32:$src))],
+ "f64.convert_s/i32\t$dst, $src">;
+def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src),
+ [(set F64:$dst, (uint_to_fp I32:$src))],
+ "f64.convert_u/i32\t$dst, $src">;
+def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src),
+ [(set F32:$dst, (sint_to_fp I64:$src))],
+ "f32.convert_s/i64\t$dst, $src">;
+def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src),
+ [(set F32:$dst, (uint_to_fp I64:$src))],
+ "f32.convert_u/i64\t$dst, $src">;
+def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src),
+ [(set F64:$dst, (sint_to_fp I64:$src))],
+ "f64.convert_s/i64\t$dst, $src">;
+def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src),
+ [(set F64:$dst, (uint_to_fp I64:$src))],
+ "f64.convert_u/i64\t$dst, $src">;
+
+def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src),
+ [(set F64:$dst, (fextend F32:$src))],
+ "f64.promote/f32\t$dst, $src">;
+def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src),
+ [(set F32:$dst, (fround F64:$src))],
+ "f32.demote/f64\t$dst, $src">;
+
+def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src),
+ [(set I32:$dst, (bitconvert F32:$src))],
+ "i32.reinterpret/f32\t$dst, $src">;
+def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src),
+ [(set F32:$dst, (bitconvert I32:$src))],
+ "f32.reinterpret/i32\t$dst, $src">;
+def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src),
+ [(set I64:$dst, (bitconvert F64:$src))],
+ "i64.reinterpret/f64\t$dst, $src">;
+def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src),
+ [(set F64:$dst, (bitconvert I64:$src))],
+ "f64.reinterpret/i64\t$dst, $src">;
+
+} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 30ef633..5520c6d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -12,33 +12,90 @@
///
//===----------------------------------------------------------------------===//
-defm FADD : BinaryFP<fadd>;
-defm FSUB : BinaryFP<fsub>;
-defm FMUL : BinaryFP<fmul>;
-defm FDIV : BinaryFP<fdiv>;
-defm FABS : UnaryFP<fabs>;
-defm FNEG : UnaryFP<fneg>;
-defm COPYSIGN : BinaryFP<fcopysign>;
-defm CEIL : UnaryFP<fceil>;
-defm FLOOR : UnaryFP<ffloor>;
-defm TRUNC : UnaryFP<ftrunc>;
-defm NEARESTINT : UnaryFP<fnearbyint>;
-
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * float32.eq: compare equal
- * float32.lt: less than
- * float32.le: less than or equal
- * float32.gt: greater than
- * float32.ge: greater than or equal
- */
-
-defm SQRT : UnaryFP<fsqrt>;
-
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * float32.min: minimum (binary operator); if either operand is NaN, returns NaN
- * float32.max: maximum (binary operator); if either operand is NaN, returns NaN
- */
+let Defs = [ARGUMENTS] in {
+
+let isCommutable = 1 in
+defm ADD : BinaryFP<fadd, "add ">;
+defm SUB : BinaryFP<fsub, "sub ">;
+let isCommutable = 1 in
+defm MUL : BinaryFP<fmul, "mul ">;
+defm DIV : BinaryFP<fdiv, "div ">;
+defm SQRT : UnaryFP<fsqrt, "sqrt">;
+
+defm ABS : UnaryFP<fabs, "abs ">;
+defm NEG : UnaryFP<fneg, "neg ">;
+defm COPYSIGN : BinaryFP<fcopysign, "copysign">;
+
+let isCommutable = 1 in {
+defm MIN : BinaryFP<fminnan, "min ">;
+defm MAX : BinaryFP<fmaxnan, "max ">;
+} // isCommutable = 1
+
+defm CEIL : UnaryFP<fceil, "ceil">;
+defm FLOOR : UnaryFP<ffloor, "floor">;
+defm TRUNC : UnaryFP<ftrunc, "trunc">;
+defm NEAREST : UnaryFP<fnearbyint, "nearest">;
+
+} // Defs = [ARGUMENTS]
+
+// DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
+def : Pat<(fcopysign F64:$lhs, F32:$rhs),
+ (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
+def : Pat<(fcopysign F32:$lhs, F64:$rhs),
+ (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>;
+
+// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
+def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
+def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+let isCommutable = 1 in {
+defm EQ : ComparisonFP<SETOEQ, "eq ">;
+defm NE : ComparisonFP<SETUNE, "ne ">;
+} // isCommutable = 1
+defm LT : ComparisonFP<SETOLT, "lt ">;
+defm LE : ComparisonFP<SETOLE, "le ">;
+defm GT : ComparisonFP<SETOGT, "gt ">;
+defm GE : ComparisonFP<SETOGE, "ge ">;
+
+} // Defs = [ARGUMENTS]
+
+// Don't care floating-point comparisons, supported via other comparisons.
+def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setlt f32:$lhs, f32:$rhs), (LT_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setle f32:$lhs, f32:$rhs), (LE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setgt f32:$lhs, f32:$rhs), (GT_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setge f32:$lhs, f32:$rhs), (GE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(seteq f64:$lhs, f64:$rhs), (EQ_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setne f64:$lhs, f64:$rhs), (NE_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setlt f64:$lhs, f64:$rhs), (LT_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>;
+
+let Defs = [ARGUMENTS] in {
+
+def SELECT_F32 : I<(outs F32:$dst), (ins I32:$cond, F32:$lhs, F32:$rhs),
+ [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
+ "f32.select\t$dst, $cond, $lhs, $rhs">;
+def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs),
+ [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
+ "f64.select\t$dst, $cond, $lhs, $rhs">;
+
+} // Defs = [ARGUMENTS]
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs),
+ (SELECT_F32 I32:$cond, F32:$lhs, F32:$rhs)>;
+def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs),
+ (SELECT_F64 I32:$cond, F64:$lhs, F64:$rhs)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs),
+ (SELECT_F32 I32:$cond, F32:$rhs, F32:$lhs)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs),
+ (SELECT_F64 I32:$cond, F64:$rhs, F64:$lhs)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 513c36f..8008dd3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -1,4 +1,4 @@
-// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-//
+//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -12,44 +12,68 @@
///
//===----------------------------------------------------------------------===//
-// WebAssembly Instruction Format
-class WebAssemblyInst<string cstr> : Instruction {
+// WebAssembly Instruction Format.
+class WebAssemblyInst<string asmstr> : Instruction {
field bits<0> Inst; // Instruction encoding.
let Namespace = "WebAssembly";
let Pattern = [];
- let Constraints = cstr;
+ let AsmString = asmstr;
}
-// Normal instructions
-class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
- : WebAssemblyInst<cstr> {
+// Normal instructions.
+class I<dag oops, dag iops, list<dag> pattern, string asmstr = "">
+ : WebAssemblyInst<asmstr> {
dag OutOperandList = oops;
dag InOperandList = iops;
let Pattern = pattern;
}
// Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node> {
- def _I32 : I<(outs Int32:$dst), (ins Int32:$src),
- [(set Int32:$dst, (node Int32:$src))]>;
- def _I64 : I<(outs Int64:$dst), (ins Int64:$src),
- [(set Int64:$dst, (node Int64:$src))]>;
-}
-multiclass BinaryInt<SDNode node> {
- def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs),
- [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>;
- def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs),
- [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>;
-}
-multiclass UnaryFP<SDNode node> {
- def _F32 : I<(outs Float32:$dst), (ins Float32:$src),
- [(set Float32:$dst, (node Float32:$src))]>;
- def _F64 : I<(outs Float64:$dst), (ins Float64:$src),
- [(set Float64:$dst, (node Float64:$src))]>;
-}
-multiclass BinaryFP<SDNode node> {
- def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs),
- [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>;
- def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs),
- [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>;
+multiclass UnaryInt<SDNode node, string name> {
+ def _I32 : I<(outs I32:$dst), (ins I32:$src),
+ [(set I32:$dst, (node I32:$src))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>;
+ def _I64 : I<(outs I64:$dst), (ins I64:$src),
+ [(set I64:$dst, (node I64:$src))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>;
+}
+multiclass BinaryInt<SDNode node, string name> {
+ def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
+ [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs),
+ [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass UnaryFP<SDNode node, string name> {
+ def _F32 : I<(outs F32:$dst), (ins F32:$src),
+ [(set F32:$dst, (node F32:$src))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>;
+ def _F64 : I<(outs F64:$dst), (ins F64:$src),
+ [(set F64:$dst, (node F64:$src))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>;
+}
+multiclass BinaryFP<SDNode node, string name> {
+ def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs),
+ [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs),
+ [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass ComparisonInt<CondCode cond, string name> {
+ def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
+ [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs),
+ [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass ComparisonFP<CondCode cond, string name> {
+ def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs),
+ [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs),
+ [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index ea8937c..5e7663c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -24,5 +24,136 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-instr-info"
+#define GET_INSTRINFO_CTOR_DTOR
+#include "WebAssemblyGenInstrInfo.inc"
+
WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
- : RI(STI.getTargetTriple()) {}
+ : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
+ WebAssembly::ADJCALLSTACKUP),
+ RI(STI.getTargetTriple()) {}
+
+void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
+ // This method is called by post-RA expansion, which expects only pregs to
+ // exist. However we need to handle both here.
+ auto &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(SrcReg);
+
+ unsigned CopyLocalOpcode;
+ if (RC == &WebAssembly::I32RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32;
+ else if (RC == &WebAssembly::I64RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64;
+ else if (RC == &WebAssembly::F32RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32;
+ else if (RC == &WebAssembly::F64RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64;
+ else
+ llvm_unreachable("Unexpected register class");
+
+ BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg)
+ .addReg(SrcReg, KillSrc ? RegState::Kill : 0);
+}
+
+// Branch analysis.
+bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool /*AllowModify*/) const {
+ bool HaveCond = false;
+ for (MachineInstr &MI : MBB.terminators()) {
+ switch (MI.getOpcode()) {
+ default:
+ // Unhandled instruction; bail out.
+ return true;
+ case WebAssembly::BR_IF:
+ if (HaveCond)
+ return true;
+ Cond.push_back(MachineOperand::CreateImm(true));
+ Cond.push_back(MI.getOperand(0));
+ TBB = MI.getOperand(1).getMBB();
+ HaveCond = true;
+ break;
+ case WebAssembly::BR_UNLESS:
+ if (HaveCond)
+ return true;
+ Cond.push_back(MachineOperand::CreateImm(false));
+ Cond.push_back(MI.getOperand(0));
+ TBB = MI.getOperand(1).getMBB();
+ HaveCond = true;
+ break;
+ case WebAssembly::BR:
+ if (!HaveCond)
+ TBB = MI.getOperand(0).getMBB();
+ else
+ FBB = MI.getOperand(0).getMBB();
+ break;
+ }
+ if (MI.isBarrier())
+ break;
+ }
+
+ return false;
+}
+
+unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ unsigned Count = 0;
+
+ while (I != MBB.instr_begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (!I->isTerminator())
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.instr_end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const {
+ if (Cond.empty()) {
+ if (!TBB)
+ return 0;
+
+ BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB);
+ return 1;
+ }
+
+ assert(Cond.size() == 2 && "Expected a flag and a successor block");
+
+ if (Cond[0].getImm()) {
+ BuildMI(&MBB, DL, get(WebAssembly::BR_IF))
+ .addOperand(Cond[1])
+ .addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS))
+ .addOperand(Cond[1])
+ .addMBB(TBB);
+ }
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB);
+ return 2;
+}
+
+bool WebAssemblyInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Expected a flag and a successor block");
+ Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index 1c4ae22..5ddd9b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -19,17 +19,35 @@
#include "WebAssemblyRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#define GET_INSTRINFO_HEADER
+#include "WebAssemblyGenInstrInfo.inc"
+
namespace llvm {
class WebAssemblySubtarget;
-class WebAssemblyInstrInfo final {
+class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
const WebAssemblyRegisterInfo RI;
public:
explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI);
const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const override;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index fe3ca76..f0b4ce7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -25,20 +25,48 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
// WebAssembly-specific DAG Node Types.
//===----------------------------------------------------------------------===//
+def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>;
+def SDT_WebAssemblyCallSeqEnd :
+ SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
+def SDT_WebAssemblyTableswitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
+def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
+def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Nodes.
//===----------------------------------------------------------------------===//
+def WebAssemblycallseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_WebAssemblyCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def WebAssemblycallseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0",
+ SDT_WebAssemblyCall0,
+ [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1",
+ SDT_WebAssemblyCall1,
+ [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblytableswitch : SDNode<"WebAssemblyISD::TABLESWITCH",
+ SDT_WebAssemblyTableswitch,
+ [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT",
+ SDT_WebAssemblyArgument>;
+def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
+ SDT_WebAssemblyReturn, [SDNPHasChain]>;
+def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
+ SDT_WebAssemblyWrapper>;
+
//===----------------------------------------------------------------------===//
// WebAssembly-specific Operands.
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- *
- * get_local: read the current value of a local variable
- * set_local: set the current value of a local variable
-*/
+def bb_op : Operand<OtherVT>;
//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
@@ -47,13 +75,86 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
include "WebAssemblyInstrFormats.td"
//===----------------------------------------------------------------------===//
+// Additional instructions.
+//===----------------------------------------------------------------------===//
+
+multiclass ARGUMENT<WebAssemblyRegClass vt> {
+ let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
+ def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
+ [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
+}
+defm : ARGUMENT<I32>;
+defm : ARGUMENT<I64>;
+defm : ARGUMENT<F32>;
+defm : ARGUMENT<F64>;
+
+let Defs = [ARGUMENTS] in {
+
+// get_local and set_local are not generated by instruction selection; they
+// are implied by virtual register uses and defs in most contexts. However,
+// they are explicitly emitted for special purposes.
+multiclass LOCAL<WebAssemblyRegClass vt> {
+ def GET_LOCAL_#vt : I<(outs vt:$res), (ins i32imm:$regno), [],
+ "get_local\t$res, $regno">;
+ // TODO: set_local returns its operand value
+ def SET_LOCAL_#vt : I<(outs), (ins i32imm:$regno, vt:$src), [],
+ "set_local\t$regno, $src">;
+
+ // COPY_LOCAL is not an actual instruction in wasm, but since we allow
+ // get_local and set_local to be implicit, we can have a COPY_LOCAL which
+ // is actually a no-op because all the work is done in the implied
+ // get_local and set_local.
+ let isAsCheapAsAMove = 1 in
+ def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [],
+ "copy_local\t$res, $src">;
+}
+defm : LOCAL<I32>;
+defm : LOCAL<I64>;
+defm : LOCAL<F32>;
+defm : LOCAL<F64>;
+
+let isMoveImm = 1 in {
+def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
+ [(set I32:$res, imm:$imm)],
+ "i32.const\t$res, $imm">;
+def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
+ [(set I64:$res, imm:$imm)],
+ "i64.const\t$res, $imm">;
+def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm),
+ [(set F32:$res, fpimm:$imm)],
+ "f32.const\t$res, $imm">;
+def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm),
+ [(set F64:$res, fpimm:$imm)],
+ "f64.const\t$res, $imm">;
+} // isMoveImm = 1
+
+} // Defs = [ARGUMENTS]
+
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)),
+ (CONST_I32 tglobaladdr:$dst)>;
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)),
+ (CONST_I32 texternalsym:$dst)>;
+def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)),
+ (CONST_I32 tjumptable:$dst)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Function signature and local variable declaration "instructions".
+def PARAM : I<(outs), (ins variable_ops), [], ".param \t">;
+def RESULT : I<(outs), (ins variable_ops), [], ".result \t">;
+def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">;
+
+} // Defs = [ARGUMENTS]
+
+//===----------------------------------------------------------------------===//
// Additional sets of instructions.
//===----------------------------------------------------------------------===//
include "WebAssemblyInstrMemory.td"
include "WebAssemblyInstrCall.td"
+include "WebAssemblyInstrControl.td"
include "WebAssemblyInstrInteger.td"
-include "WebAssemblyInstrFloat.td"
include "WebAssemblyInstrConv.td"
+include "WebAssemblyInstrFloat.td"
include "WebAssemblyInstrAtomics.td"
include "WebAssemblyInstrSIMD.td"
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 5f60fe8..09e5eaf 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -12,34 +12,77 @@
///
//===----------------------------------------------------------------------===//
-defm ADD : BinaryInt<add>;
-defm SUB : BinaryInt<sub>;
-defm MUL : BinaryInt<mul>;
-defm SDIV : BinaryInt<sdiv>;
-defm UDIV : BinaryInt<udiv>;
-defm SREM : BinaryInt<srem>;
-defm UREM : BinaryInt<urem>;
-defm AND : BinaryInt<and>;
-defm IOR : BinaryInt<or>;
-defm XOR : BinaryInt<xor>;
-defm SHL : BinaryInt<shl>;
-defm SHR : BinaryInt<srl>;
-defm SAR : BinaryInt<sra>;
-
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * int32.eq: signed-less compare equal
- * int32.slt: signed less than
- * int32.sle: signed less than or equal
- * int32.ult: unsigned less than
- * int32.ule: unsigned less than or equal
- * int32.sgt: signed greater than
- * int32.sge: signed greater than or equal
- * int32.ugt: unsigned greater than
- * int32.uge: unsigned greater than or equal
- */
-
-defm CLZ : UnaryInt<ctlz>;
-defm CTZ : UnaryInt<cttz>;
-defm POPCNT : UnaryInt<ctpop>;
+let Defs = [ARGUMENTS] in {
+
+// The spaces after the names are for aesthetic purposes only, to make
+// operands line up vertically after tab expansion.
+let isCommutable = 1 in
+defm ADD : BinaryInt<add, "add ">;
+defm SUB : BinaryInt<sub, "sub ">;
+let isCommutable = 1 in
+defm MUL : BinaryInt<mul, "mul ">;
+// Divide and remainder trap on a zero denominator.
+let hasSideEffects = 1 in {
+defm DIV_S : BinaryInt<sdiv, "div_s">;
+defm DIV_U : BinaryInt<udiv, "div_u">;
+defm REM_S : BinaryInt<srem, "rem_s">;
+defm REM_U : BinaryInt<urem, "rem_u">;
+} // hasSideEffects = 1
+let isCommutable = 1 in {
+defm AND : BinaryInt<and, "and ">;
+defm OR : BinaryInt<or, "or ">;
+defm XOR : BinaryInt<xor, "xor ">;
+} // isCommutable = 1
+defm SHL : BinaryInt<shl, "shl ">;
+defm SHR_U : BinaryInt<srl, "shr_u">;
+defm SHR_S : BinaryInt<sra, "shr_s">;
+
+let isCommutable = 1 in {
+defm EQ : ComparisonInt<SETEQ, "eq ">;
+defm NE : ComparisonInt<SETNE, "ne ">;
+} // isCommutable = 1
+defm LT_S : ComparisonInt<SETLT, "lt_s">;
+defm LE_S : ComparisonInt<SETLE, "le_s">;
+defm LT_U : ComparisonInt<SETULT, "lt_u">;
+defm LE_U : ComparisonInt<SETULE, "le_u">;
+defm GT_S : ComparisonInt<SETGT, "gt_s">;
+defm GE_S : ComparisonInt<SETGE, "ge_s">;
+defm GT_U : ComparisonInt<SETUGT, "gt_u">;
+defm GE_U : ComparisonInt<SETUGE, "ge_u">;
+
+defm CLZ : UnaryInt<ctlz, "clz ">;
+defm CTZ : UnaryInt<cttz, "ctz ">;
+defm POPCNT : UnaryInt<ctpop, "popcnt">;
+
+} // Defs = [ARGUMENTS]
+
+// Expand the "don't care" operations to supported operations.
+def : Pat<(ctlz_zero_undef I32:$src), (CLZ_I32 I32:$src)>;
+def : Pat<(ctlz_zero_undef I64:$src), (CLZ_I64 I64:$src)>;
+def : Pat<(cttz_zero_undef I32:$src), (CTZ_I32 I32:$src)>;
+def : Pat<(cttz_zero_undef I64:$src), (CTZ_I64 I64:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+def SELECT_I32 : I<(outs I32:$dst), (ins I32:$cond, I32:$lhs, I32:$rhs),
+ [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
+ "i32.select\t$dst, $cond, $lhs, $rhs">;
+def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs),
+ [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
+ "i64.select\t$dst, $cond, $lhs, $rhs">;
+
+} // Defs = [ARGUMENTS]
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs),
+ (SELECT_I32 I32:$cond, I32:$lhs, I32:$rhs)>;
+def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs),
+ (SELECT_I64 I32:$cond, I64:$lhs, I64:$rhs)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
+ (SELECT_I32 I32:$cond, I32:$rhs, I32:$lhs)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
+ (SELECT_I64 I32:$cond, I64:$rhs, I64:$lhs)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 5ab40e8..74ec45d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -12,35 +12,500 @@
///
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- * Each has optional alignment and immediate byte offset.
- *
- * int32.load_sx[int8]: sign-extend to int32
- * int32.load_sx[int16]: sign-extend to int32
- * int32.load_zx[int8]: zero-extend to int32
- * int32.load_zx[int16]: zero-extend to int32
- * int32.load[int32]: (no conversion)
- * int64.load_sx[int8]: sign-extend to int64
- * int64.load_sx[int16]: sign-extend to int64
- * int64.load_sx[int32]: sign-extend to int64
- * int64.load_zx[int8]: zero-extend to int64
- * int64.load_zx[int16]: zero-extend to int64
- * int64.load_zx[int32]: zero-extend to int64
- * int64.load[int64]: (no conversion)
- * float32.load[float32]: (no conversion)
- * float64.load[float64]: (no conversion)
- *
- * int32.store[int8]: wrap int32 to int8
- * int32.store[int16]: wrap int32 to int16
- * int32.store[int32]: (no conversion)
- * int64.store[int8]: wrap int64 to int8
- * int64.store[int16]: wrap int64 to int16
- * int64.store[int32]: wrap int64 to int32
- * int64.store[int64]: (no conversion)
- * float32.store[float32]: (no conversion)
- * float64.store[float64]: (no conversion)
- *
- * load_global: load the value of a given global variable
- * store_global: store a given value to a given global variable
- */
+// TODO:
+// - HasAddr64
+// - WebAssemblyTargetLowering having to do with atomics
+// - Each has optional alignment.
+
+// WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16
+// local types. These memory-only types instead zero- or sign-extend into local
+// types when loading, and truncate when storing.
+
+// WebAssembly constant offsets are performed as unsigned with infinite
+// precision, so we need to check for NoUnsignedWrap so that we don't fold an
+// offset for an add that needs wrapping.
+def regPlusImm : PatFrag<(ops node:$off, node:$addr),
+ (add node:$addr, node:$off),
+ [{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
+
+let Defs = [ARGUMENTS] in {
+
+// Basic load.
+def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load\t$dst, ${off}(${addr})">;
+def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load\t$dst, ${off}(${addr})">;
+def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "f32.load\t$dst, ${off}(${addr})">;
+def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "f64.load\t$dst, ${off}(${addr})">;
+
+} // Defs = [ARGUMENTS]
+
+// Select loads with no constant offset.
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
+
+// Select loads with a constant offset.
+def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_I32 imm:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_I64 imm:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_F32 imm:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_F64 imm:$off, $addr)>;
+def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_F32 tglobaladdr:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_F64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_I64 texternalsym:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_F32 texternalsym:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_F64 texternalsym:$off, $addr)>;
+
+// Select loads with just a constant offset.
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_F32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_F64 texternalsym:$off, (CONST_I32 0))>;
+
+let Defs = [ARGUMENTS] in {
+
+// Extending load.
+def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load8_s\t$dst, ${off}(${addr})">;
+def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load8_u\t$dst, ${off}(${addr})">;
+def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load16_s\t$dst, ${off}(${addr})">;
+def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load16_u\t$dst, ${off}(${addr})">;
+def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load8_s\t$dst, ${off}(${addr})">;
+def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load8_u\t$dst, ${off}(${addr})">;
+def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load16_s\t$dst, ${off}(${addr})">;
+def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load16_u\t$dst, ${off}(${addr})">;
+def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load32_s\t$dst, ${off}(${addr})">;
+def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load32_u\t$dst, ${off}(${addr})">;
+
+} // Defs = [ARGUMENTS]
+
+// Select extending loads with no constant offset.
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+
+// Select extending loads with a constant offset.
+def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_S_I32 imm:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I32 imm:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_S_I32 imm:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I32 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD32_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD32_U_I64 imm:$off, $addr)>;
+def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_S_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_S_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD32_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_S_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_S_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD32_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD32_U_I64 texternalsym:$off, $addr)>;
+
+// Select extending loads with just a constant offset.
+def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>;
+
+// Resolve "don't care" extending loads to zero-extending loads. This is
+// somewhat arbitrary, but zero-extending is conceptually simpler.
+
+// Select "don't care" extending loads with no constant offset.
+def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
+def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+
+// Select "don't care" extending loads with a constant offset.
+def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I32 imm:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I32 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD32_U_I64 imm:$off, $addr)>;
+def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD32_U_I64 texternalsym:$off, $addr)>;
+
+// Select "don't care" extending loads with just a constant offset.
+def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+
+let Defs = [ARGUMENTS] in {
+
+// Basic store.
+// Note that we split the patterns out of the instruction definitions because
+// WebAssembly's stores return their operand value, and tablegen doesn't like
+// instruction definition patterns that don't reference all of the output
+// operands.
+// Note: WebAssembly inverts SelectionDAG's usual operand order.
+def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+ "i32.store\t$dst, ${off}(${addr}), $val">;
+def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store\t$dst, ${off}(${addr}), $val">;
+def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [],
+ "f32.store\t$dst, ${off}(${addr}), $val">;
+def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [],
+ "f64.store\t$dst, ${off}(${addr}), $val">;
+
+} // Defs = [ARGUMENTS]
+
+// Select stores with no constant offset.
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
+
+// Select stores with a constant offset.
+def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_F32 imm:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_F64 imm:$off, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
+
+// Select stores with just a constant offset.
+def : Pat<(store I32:$val, imm:$off),
+ (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, imm:$off),
+ (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, imm:$off),
+ (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, imm:$off),
+ (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>;
+def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
+def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Truncating store.
+def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+ "i32.store8\t$dst, ${off}(${addr}), $val">;
+def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+ "i32.store16\t$dst, ${off}(${addr}), $val">;
+def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store8\t$dst, ${off}(${addr}), $val">;
+def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store16\t$dst, ${off}(${addr}), $val">;
+def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store32\t$dst, ${off}(${addr}), $val">;
+
+} // Defs = [ARGUMENTS]
+
+// Select truncating stores with no constant offset.
+def : Pat<(truncstorei8 I32:$val, I32:$addr),
+ (STORE8_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, I32:$addr),
+ (STORE16_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, I32:$addr),
+ (STORE8_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, I32:$addr),
+ (STORE16_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, I32:$addr),
+ (STORE32_I64 0, I32:$addr, I64:$val)>;
+
+// Select truncating stores with a constant offset.
+def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+
+// Select truncating stores with just a constant offset.
+def : Pat<(truncstorei8 I32:$val, imm:$off),
+ (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, imm:$off),
+ (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, imm:$off),
+ (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, imm:$off),
+ (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, imm:$off),
+ (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Memory size.
+def MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins),
+ [(set I32:$dst, (int_wasm_memory_size))],
+ "memory_size\t$dst">,
+ Requires<[HasAddr32]>;
+def MEMORY_SIZE_I64 : I<(outs I64:$dst), (ins),
+ [(set I64:$dst, (int_wasm_memory_size))],
+ "memory_size\t$dst">,
+ Requires<[HasAddr64]>;
+
+// Grow memory.
+def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta),
+ [(int_wasm_grow_memory I32:$delta)],
+ "grow_memory\t$delta">,
+ Requires<[HasAddr32]>;
+def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta),
+ [(int_wasm_grow_memory I64:$delta)],
+ "grow_memory\t$delta">,
+ Requires<[HasAddr64]>;
+
+} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
new file mode 100644
index 0000000..b009a4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -0,0 +1,133 @@
+//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file lowers br_unless into br_if with an inverted condition.
+///
+/// br_unless is not currently in the spec, but it's very convenient for LLVM
+/// to use. This pass allows LLVM to use it, for now.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-br_unless"
+
+namespace {
+class WebAssemblyLowerBrUnless final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Lower br_unless";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyLowerBrUnless::ID = 0;
+FunctionPass *llvm::createWebAssemblyLowerBrUnless() {
+ return new WebAssemblyLowerBrUnless();
+}
+
+bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Lowering br_unless **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &MRI = MF.getRegInfo();
+
+ for (auto &MBB : MF) {
+ for (auto MII = MBB.begin(); MII != MBB.end(); ) {
+ MachineInstr *MI = &*MII++;
+ if (MI->getOpcode() != WebAssembly::BR_UNLESS)
+ continue;
+
+ unsigned Cond = MI->getOperand(0).getReg();
+ bool Inverted = false;
+
+ // Attempt to invert the condition in place.
+ if (MFI.isVRegStackified(Cond)) {
+ assert(MRI.hasOneDef(Cond));
+ MachineInstr *Def = MRI.getVRegDef(Cond);
+ switch (Def->getOpcode()) {
+ using namespace WebAssembly;
+ case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
+ case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
+ case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
+ case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
+ case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
+ case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
+ case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
+ case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
+ case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
+ case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
+ case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
+ case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
+ case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
+ case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
+ case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
+ case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
+ case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
+ case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
+ case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
+ case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
+ case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
+ case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
+ case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
+ case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+ default: break;
+ }
+ }
+
+ // If we weren't able to invert the condition in place. Insert an
+ // expression to invert it.
+ if (!Inverted) {
+ unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ MFI.stackifyVReg(ZeroReg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg)
+ .addImm(0);
+ unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ MFI.stackifyVReg(Tmp);
+ BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp)
+ .addReg(Cond)
+ .addReg(ZeroReg);
+ Cond = Tmp;
+ Inverted = true;
+ }
+
+ // The br_unless condition has now been inverted. Insert a br_if and
+ // delete the br_unless.
+ assert(Inverted);
+ BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
+ .addReg(Cond)
+ .addOperand(MI->getOperand(1));
+ MBB.erase(MI);
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
new file mode 100644
index 0000000..a953f82
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -0,0 +1,106 @@
+// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst //
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower WebAssembly MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSymbol *
+WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ return Printer.getSymbol(MO.getGlobal());
+}
+
+MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
+ const MachineOperand &MO) const {
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags");
+
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+
+ int64_t Offset = MO.getOffset();
+ if (Offset != 0) {
+ assert(!MO.isJTI() && "Unexpected offset with jump table index");
+ Expr =
+ MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
+ }
+
+ return MCOperand::createExpr(Expr);
+}
+
+void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
+ MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register: {
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ continue;
+ const WebAssemblyFunctionInfo &MFI =
+ *MI->getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>();
+ unsigned WAReg = MFI.getWAReg(MO.getReg());
+ MCOp = MCOperand::createReg(WAReg);
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ // TODO: MC converts all floating point immediate operands to double.
+ // This is fine for numeric values, but may cause NaNs to change bits.
+ const ConstantFP *Imm = MO.getFPImm();
+ if (Imm->getType()->isFloatTy())
+ MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToFloat());
+ else if (Imm->getType()->isDoubleTy())
+ MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToDouble());
+ else
+ llvm_unreachable("unknown floating point immediate type");
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
new file mode 100644
index 0000000..6d70470
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -0,0 +1,45 @@
+//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the class to lower WebAssembly MachineInstrs to
+/// their corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+class MCSymbol;
+class MachineInstr;
+class MachineOperand;
+
+/// This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
+ MCContext &Ctx;
+ AsmPrinter &Printer;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+
+public:
+ WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 542d984..225c5d3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -17,3 +17,9 @@
using namespace llvm;
WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
+
+void WebAssemblyFunctionInfo::initWARegs() {
+ assert(WARegs.empty());
+ unsigned Reg = UnusedReg;
+ WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index fc5e910..6a60280 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*-
+// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*-
//
// The LLVM Compiler Infrastructure
//
@@ -16,8 +16,7 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
-#include "WebAssemblyRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -27,9 +26,70 @@ namespace llvm {
class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
MachineFunction &MF;
+ std::vector<MVT> Params;
+
+ /// A mapping from CodeGen vreg index to WebAssembly register number.
+ std::vector<unsigned> WARegs;
+
+ /// A mapping from CodeGen vreg index to a boolean value indicating whether
+ /// the given register is considered to be "stackified", meaning it has been
+ /// determined or made to meet the stack requirements:
+ /// - single use (per path)
+ /// - single def (per path)
+ /// - defined and used in LIFO order with other stack registers
+ BitVector VRegStackified;
+
+ // One entry for each possible target reg. we expect it to be small.
+ std::vector<unsigned> PhysRegs;
+
public:
- explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {
+ PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U);
+ }
~WebAssemblyFunctionInfo() override;
+
+ void addParam(MVT VT) { Params.push_back(VT); }
+ const std::vector<MVT> &getParams() const { return Params; }
+
+ static const unsigned UnusedReg = -1u;
+
+ void stackifyVReg(unsigned VReg) {
+ if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+ VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
+ VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
+ }
+ bool isVRegStackified(unsigned VReg) const {
+ if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+ return false;
+ return VRegStackified.test(TargetRegisterInfo::virtReg2Index(VReg));
+ }
+
+ void initWARegs();
+ void setWAReg(unsigned VReg, unsigned WAReg) {
+ assert(WAReg != UnusedReg);
+ assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size());
+ WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg;
+ }
+ unsigned getWAReg(unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
+ return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
+ }
+ return PhysRegs[Reg];
+ }
+ // If new virtual registers are created after initWARegs has been called,
+ // this function can be used to add WebAssembly register mappings for them.
+ void addWAReg(unsigned VReg, unsigned WAReg) {
+ assert(VReg = WARegs.size());
+ WARegs.push_back(WAReg);
+ }
+
+ void addPReg(unsigned PReg, unsigned WAReg) {
+ assert(PReg < WebAssembly::NUM_TARGET_REGS);
+ assert(WAReg < -1U);
+ PhysRegs[PReg] = WAReg;
+ }
+ const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
new file mode 100644
index 0000000..4dc401a
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -0,0 +1,76 @@
+//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Optimize calls with "returned" attributes for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-optimize-returned"
+
+namespace {
+class OptimizeReturned final : public FunctionPass,
+ public InstVisitor<OptimizeReturned> {
+ const char *getPassName() const override {
+ return "WebAssembly Optimize Returned";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ DominatorTree *DT;
+
+public:
+ static char ID;
+ OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
+
+ void visitCallSite(CallSite CS);
+};
+} // End anonymous namespace
+
+char OptimizeReturned::ID = 0;
+FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
+ return new OptimizeReturned();
+}
+
+void OptimizeReturned::visitCallSite(CallSite CS) {
+ for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
+ if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
+ Instruction *Inst = CS.getInstruction();
+ Value *Arg = CS.getArgOperand(i);
+ // Ignore constants, globals, undef, etc.
+ if (isa<Constant>(Arg))
+ continue;
+ // Like replaceDominatedUsesWith but using Instruction/Use dominance.
+ for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) {
+ Use &U = *UI++;
+ if (DT->dominates(Inst, U))
+ U.set(Inst);
+ }
+ }
+}
+
+bool OptimizeReturned::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ visit(F);
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp
new file mode 100644
index 0000000..d570d42
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp
@@ -0,0 +1,1066 @@
+//===-- WebAssemblyPEI.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This is a copy of lib/CodeGen/PrologEpilogInserter.cpp except that it does
+// not assert that all virtual registers are gone (because WebAssembly currently
+// uses virtual rather than physical registers), and only runs
+// MRI.clearVirtRegs() if scavenging happened (which it never does). It also
+// uses a different class name so it can be registered via INITIALIZE_PASS.
+// It is otherwise unmodified, so any changes to the target-independent PEI
+// can be easily applied.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pei"
+namespace llvm {
+void initializeWasmPEIPass(PassRegistry&);
+}
+namespace {
+class WasmPEI : public MachineFunctionPass {
+public:
+ static char ID;
+ WasmPEI() : MachineFunctionPass(ID) {
+ initializeWasmPEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ SmallVector<MachineBasicBlock *, 1> SaveBlocks;
+ SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the current function.
+ bool FrameIndexVirtualScavenging;
+
+ void calculateSets(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void assignCalleeSavedSpillSlots(MachineFunction &Fn,
+ const BitVector &SavedRegs);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+};
+} // namespace
+
+char WasmPEI::ID = 0;
+
+namespace llvm {
+FunctionPass *createWebAssemblyPEI() {
+ return new WasmPEI();
+}
+}
+
+static cl::opt<unsigned>
+WarnStackSize("wasm-warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
+ cl::desc("Warn for stack size bigger than the given"
+ " number"));
+
+INITIALIZE_PASS_BEGIN(WasmPEI, "wasmprologepilog",
+ "Wasm Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(WasmPEI, "wasmprologepilog",
+ "Wasm Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
+
+void WasmPEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<StackProtector>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Compute the set of return blocks
+void WasmPEI::calculateSets(MachineFunction &Fn) {
+ const MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI->getSavePoint()) {
+ SaveBlocks.push_back(MFI->getSavePoint());
+ assert(MFI->getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&Fn.front());
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+/// StackObjSet - A set of stack object indexes
+typedef SmallSetVector<int, 8> StackObjSet;
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool WasmPEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+
+ // LOCALMOD: assert removed from target-independent PEI
+ //assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallsInformation(Fn);
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSaves(Fn, SavedRegs, RS);
+
+ // Insert spill code for any callee saved registers that are modified.
+ assignCalleeSavedSpillSlots(Fn, SavedRegs);
+
+ // Determine placement of CSR spill/restore code:
+ // place all spills in the entry block, all restores in return blocks.
+ calculateSets(Fn);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimination
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
+ scavengeFrameVirtualRegs(Fn);
+ // Clear any vregs created by virtual scavenging.
+ // LOCALMOD: made this call conditional with scavengeFrameVirtualregs()
+ Fn.getRegInfo().clearVirtRegs();
+ }
+
+ // Warn on stack size when we exceeds the given limit.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ uint64_t StackSize = MFI->getStackSize();
+ if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
+ DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
+ F->getContext().diagnose(DiagStackSize);
+ }
+
+ delete RS;
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ return true;
+}
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void WasmPEI::calculateCallsInformation(MachineFunction &Fn) {
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI->adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ MFI->setAdjustsStack(AdjustsStack);
+ MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(Fn))
+ TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+void WasmPEI::assignCalleeSavedSpillSlots(MachineFunction &F,
+ const BitVector &SavedRegs) {
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ if (SavedRegs.empty())
+ return;
+
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg))
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
+
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ // If target doesn't implement this, use generic code.
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+ I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ }
+
+ I->setFrameIdx(FrameIdx);
+ }
+ }
+
+ MFI->setCalleeSavedInfo(CSI);
+}
+
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Visited will contain all the basic blocks that are in the region
+ // where the callee saved registers are alive:
+ // - Anything that is not Save or Restore -> LiveThrough.
+ // - Save -> LiveIn.
+ // - Restore -> LiveOut.
+ // The live-out is not attached to the block, so no need to keep
+ // Restore in this set.
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ SmallVector<MachineBasicBlock *, 8> WorkList;
+ MachineBasicBlock *Entry = &MF.front();
+ MachineBasicBlock *Save = MFI->getSavePoint();
+
+ if (!Save)
+ Save = Entry;
+
+ if (Entry != Save) {
+ WorkList.push_back(Entry);
+ Visited.insert(Entry);
+ }
+ Visited.insert(Save);
+
+ MachineBasicBlock *Restore = MFI->getRestorePoint();
+ if (Restore)
+ // By construction Restore cannot be visited, otherwise it
+ // means there exists a path to Restore that does not go
+ // through Save.
+ WorkList.push_back(Restore);
+
+ while (!WorkList.empty()) {
+ const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+ // By construction, the region that is after the save point is
+ // dominated by the Save and post-dominated by the Restore.
+ if (CurBB == Save && Save != Restore)
+ continue;
+ // Enqueue all the successors not already visited.
+ // Those are by construction either before Save or after Restore.
+ for (MachineBasicBlock *SuccBB : CurBB->successors())
+ if (Visited.insert(SuccBB).second)
+ WorkList.push_back(SuccBB);
+ }
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = CSI[i].getReg();
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
+ }
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function.
+///
+void WasmPEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ MachineBasicBlock::iterator I;
+
+ // Spill using target interface.
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ I = SaveBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(Fn);
+ }
+
+ // Restore using target interface.
+ for (MachineBasicBlock *MBB : RestoreBlocks) {
+ I = MBB->end();
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign, unsigned Skew) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+static void
+AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo *MFI, bool StackGrowsDown,
+ int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
+
+ for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+ E = UnassignedObjs.end(); I != E; ++I) {
+ int i = *I;
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ ProtectedObjs.insert(i);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void WasmPEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ StackProtector *SP = &getAnalysis<StackProtector>();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // Skew to be applied to alignment.
+ unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If the stack grows down, we need to add the size to find the lowest
+ // address of the object.
+ Offset += MFI->getObjectSize(i);
+
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ MFI->setObjectOffset(i, Offset);
+ Offset += MFI->getObjectSize(i);
+ }
+ }
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // incoming stack pointer if a frame pointer is required and is closer
+ // to the incoming rather than the final stack pointer.
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
+ bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
+ TFI.isFPCloseToIncomingSP() &&
+ RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn));
+ if (RS && EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> ProtectedObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ StackObjSet LargeArrayObjs;
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign, Skew);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+
+ switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+ case StackProtector::SSPLK_None:
+ continue;
+ case StackProtector::SSPLK_SmallArray:
+ SmallArrayObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_AddrOf:
+ AddrOfObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_LargeArray:
+ LargeArrayObjs.insert(i);
+ continue;
+ }
+ llvm_unreachable("Unexpected SSPLayoutKind.");
+ }
+
+ AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (ProtectedObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void WasmPEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+
+ // Add prologue to the function...
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.emitPrologue(Fn, *SaveBlock);
+
+ // Add epilogue to restore the callee-save registers in each exiting block.
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ TFI.emitEpilogue(Fn, *RestoreBlock);
+
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.inlineStackProbe(Fn, *SaveBlock);
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (Fn.shouldSplitStack()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ }
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void WasmPEI::replaceFrameIndices(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(Fn)) return;
+
+ // Store SPAdj at exit of a basic block.
+ SmallVector<int, 8> SPState;
+ SPState.resize(Fn.getNumBlockIDs());
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+ // Iterate over the reachable blocks in DFS order.
+ for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+ DFI != DFE; ++DFI) {
+ int SPAdj = 0;
+ // Check the exit state of the DFS stack predecessor.
+ if (DFI.getPathLength() >= 2) {
+ MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+ assert(Reachable.count(StackPred) &&
+ "DFS stack predecessor is already visited.\n");
+ SPAdj = SPState[StackPred->getNumber()];
+ }
+ MachineBasicBlock *BB = *DFI;
+ replaceFrameIndices(BB, Fn, SPAdj);
+ SPState[BB->getNumber()] = SPAdj;
+ }
+
+ // Handle the unreachable blocks.
+ for (auto &BB : Fn) {
+ if (Reachable.count(&BB))
+ // Already handled in DFS traversal.
+ continue;
+ int SPAdj = 0;
+ replaceFrameIndices(&BB, Fn, SPAdj);
+ }
+}
+
+void WasmPEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj) {
+ assert(Fn.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+ bool InsideCallSequence = false;
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ SPAdj += TII.getSPAdjust(I);
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = std::prev(I);
+ TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = std::next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (!MI->getOperand(i).isFI())
+ continue;
+
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ if (MI->isDebugValue()) {
+ assert(i == 0 && "Frame indices can only appear as the first "
+ "operand of a DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI->getOperand(1);
+ Offset.setImm(Offset.getImm() +
+ TFI->getFrameIndexReference(
+ Fn, MI->getOperand(0).getIndex(), Reg));
+ MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // TODO: This code should be commoned with the code for
+ // PATCHPOINT. There's no good reason for the difference in
+ // implementation other than historical accident. The only
+ // remaining difference is the unconditional use of the stack
+ // pointer as the base register.
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI->isDebugValue() || i == 0) &&
+ "Frame indicies can only appear as the first operand of a "
+ "DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI->getOperand(i + 1);
+ const unsigned refOffset =
+ TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(),
+ Reg);
+
+ Offset.setImm(Offset.getImm() + refOffset);
+ MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexVirtualScavenging ? nullptr : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = nullptr;
+ break;
+ }
+
+ // If we are looking at a call sequence, we need to keep track of
+ // the SP adjustment made by each instruction in the sequence.
+ // This includes both the frame setup/destroy pseudos (handled above),
+ // as well as other instructions that have side effects w.r.t the SP.
+ // Note that this must come after eliminateFrameIndex, because
+ // if I itself referred to a frame index, we shouldn't count its own
+ // adjustment.
+ if (MI && InsideCallSequence)
+ SPAdj += TII.getSPAdjust(MI);
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
+void
+WasmPEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+ // Run through the instructions and find any virtual registers.
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ RS->enterBasicBlock(&*BB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check BB->end()
+ // directly.
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in BB.
+ if (I == MachineBasicBlock::iterator(nullptr))
+ I = BB->begin();
+
+ MachineInstr *MI = I;
+ MachineBasicBlock::iterator J = std::next(I);
+ MachineBasicBlock::iterator P =
+ I == BB->begin() ? MachineBasicBlock::iterator(nullptr)
+ : std::prev(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isReg()) {
+ MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert (ScratchReg && "Missing scratch register!");
+ Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setRegUsed(ScratchReg);
+ }
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != std::prev(J)) {
+ BB->splice(J, &*BB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS->getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS->unprocess(P);
+ } else
+ ++I;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
new file mode 100644
index 0000000..4ad6eed
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -0,0 +1,86 @@
+//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Late peephole optimizations for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-peephole"
+
+namespace {
+class WebAssemblyPeephole final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly late peephole optimizer";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID;
+ WebAssemblyPeephole() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyPeephole::ID = 0;
+FunctionPass *llvm::createWebAssemblyPeephole() {
+ return new WebAssemblyPeephole();
+}
+
+bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case WebAssembly::STORE8_I32:
+ case WebAssembly::STORE16_I32:
+ case WebAssembly::STORE8_I64:
+ case WebAssembly::STORE16_I64:
+ case WebAssembly::STORE32_I64:
+ case WebAssembly::STORE_F32:
+ case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_I32:
+ case WebAssembly::STORE_I64: {
+ // Store instructions return their value operand. If we ended up using
+ // the same register for both, replace it with a dead def so that it
+ // can use $discard instead.
+ MachineOperand &MO = MI.getOperand(0);
+ unsigned OldReg = MO.getReg();
+ // TODO: Handle SP/physregs
+ if (OldReg == MI.getOperand(3).getReg()
+ && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) {
+ Changed = true;
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ MO.setReg(NewReg);
+ MO.setIsDead();
+ MFI.stackifyVReg(NewReg);
+ MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
new file mode 100644
index 0000000..9ec6659
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -0,0 +1,175 @@
+//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a virtual register coloring pass.
+///
+/// WebAssembly doesn't have a fixed number of registers, but it is still
+/// desirable to minimize the total number of registers used in each function.
+///
+/// This code is modeled after lib/CodeGen/StackSlotColoring.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-coloring"
+
+namespace {
+class WebAssemblyRegColoring final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyRegColoring() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Register Coloring";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyRegColoring::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegColoring() {
+ return new WebAssemblyRegColoring();
+}
+
+// Compute the total spill weight for VReg.
+static float computeWeight(const MachineRegisterInfo *MRI,
+ const MachineBlockFrequencyInfo *MBFI,
+ unsigned VReg) {
+ float weight = 0.0f;
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg))
+ weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
+ MO.getParent());
+ return weight;
+}
+
+bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Register Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ // If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual
+ // registers could be modified before the longjmp is executed, resulting in
+ // the wrong value being used afterwards. (See <rdar://problem/8007500>.)
+ // TODO: Does WebAssembly need to care about setjmp for register coloring?
+ if (MF.exposesReturnsTwice())
+ return false;
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ LiveIntervals *Liveness = &getAnalysis<LiveIntervals>();
+ const MachineBlockFrequencyInfo *MBFI =
+ &getAnalysis<MachineBlockFrequencyInfo>();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ // Gather all register intervals into a list and sort them.
+ unsigned NumVRegs = MRI->getNumVirtRegs();
+ SmallVector<LiveInterval *, 0> SortedIntervals;
+ SortedIntervals.reserve(NumVRegs);
+
+ DEBUG(dbgs() << "Interesting register intervals:\n");
+ for (unsigned i = 0; i < NumVRegs; ++i) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+ if (MFI.isVRegStackified(VReg))
+ continue;
+ // Skip unused registers, which can use $discard.
+ if (MRI->use_empty(VReg))
+ continue;
+
+ LiveInterval *LI = &Liveness->getInterval(VReg);
+ assert(LI->weight == 0.0f);
+ LI->weight = computeWeight(MRI, MBFI, VReg);
+ DEBUG(LI->dump());
+ SortedIntervals.push_back(LI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them to put arguments first (since we don't want to rename live-in
+ // registers), by weight next, and then by position.
+ // TODO: Investigate more intelligent sorting heuristics. For starters, we
+ // should try to coalesce adjacent live intervals before non-adjacent ones.
+ std::sort(SortedIntervals.begin(), SortedIntervals.end(),
+ [MRI](LiveInterval *LHS, LiveInterval *RHS) {
+ if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
+ return MRI->isLiveIn(LHS->reg);
+ if (LHS->weight != RHS->weight)
+ return LHS->weight > RHS->weight;
+ if (LHS->empty() || RHS->empty())
+ return !LHS->empty() && RHS->empty();
+ return *LHS < *RHS;
+ });
+
+ DEBUG(dbgs() << "Coloring register intervals:\n");
+ SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u);
+ SmallVector<SmallVector<LiveInterval *, 4>, 16> Assignments(
+ SortedIntervals.size());
+ BitVector UsedColors(SortedIntervals.size());
+ bool Changed = false;
+ for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
+ LiveInterval *LI = SortedIntervals[i];
+ unsigned Old = LI->reg;
+ size_t Color = i;
+ const TargetRegisterClass *RC = MRI->getRegClass(Old);
+
+ // Check if it's possible to reuse any of the used colors.
+ if (!MRI->isLiveIn(Old))
+ for (int C(UsedColors.find_first()); C != -1;
+ C = UsedColors.find_next(C)) {
+ if (MRI->getRegClass(SortedIntervals[C]->reg) != RC)
+ continue;
+ for (LiveInterval *OtherLI : Assignments[C])
+ if (!OtherLI->empty() && OtherLI->overlaps(*LI))
+ goto continue_outer;
+ Color = C;
+ break;
+ continue_outer:;
+ }
+
+ unsigned New = SortedIntervals[Color]->reg;
+ SlotMapping[i] = New;
+ Changed |= Old != New;
+ UsedColors.set(Color);
+ Assignments[Color].push_back(LI);
+ DEBUG(dbgs() << "Assigning vreg"
+ << TargetRegisterInfo::virtReg2Index(LI->reg) << " to vreg"
+ << TargetRegisterInfo::virtReg2Index(New) << "\n");
+ }
+ if (!Changed)
+ return false;
+
+ // Rewrite register operands.
+ for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
+ unsigned Old = SortedIntervals[i]->reg;
+ unsigned New = SlotMapping[i];
+ if (Old != New)
+ MRI->replaceRegWith(Old, New);
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
new file mode 100644
index 0000000..f621db0
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -0,0 +1,109 @@
+//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a pass which assigns WebAssembly register
+/// numbers for CodeGen virtual registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-numbering"
+
+namespace {
+class WebAssemblyRegNumbering final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Register Numbering";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyRegNumbering() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyRegNumbering::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegNumbering() {
+ return new WebAssemblyRegNumbering();
+}
+
+bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Register Numbering **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo &FrameInfo = *MF.getFrameInfo();
+
+ MFI.initWARegs();
+
+ // WebAssembly argument registers are in the same index space as local
+ // variables. Assign the numbers for them first.
+ MachineBasicBlock &EntryMBB = MF.front();
+ for (MachineInstr &MI : EntryMBB) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm());
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Then assign regular WebAssembly registers for all remaining used
+ // virtual registers. TODO: Consider sorting the registers by frequency of
+ // use, to maximize usage of small immediate fields.
+ unsigned NumArgRegs = MFI.getParams().size();
+ unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs();
+ unsigned NumStackRegs = 0;
+ unsigned CurReg = 0;
+ for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx);
+ // Handle stackified registers.
+ if (MFI.isVRegStackified(VReg)) {
+ MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++);
+ continue;
+ }
+ // Skip unused registers.
+ if (MRI.use_empty(VReg))
+ continue;
+ if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg)
+ MFI.setWAReg(VReg, NumArgRegs + CurReg++);
+ }
+ // Allocate locals for used physical registers
+ if (FrameInfo.getStackSize() > 0)
+ MFI.addPReg(WebAssembly::SP32, CurReg++);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
new file mode 100644
index 0000000..89ef5cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -0,0 +1,265 @@
+//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a register stacking pass.
+///
+/// This pass reorders instructions to put register uses and defs in an order
+/// such that they form single-use expression trees. Registers fitting this form
+/// are then marked as "stackified", meaning references to them are replaced by
+/// "push" and "pop" from the stack.
+///
+/// This is primarily a code size optimization, since temporary values on the
+/// expression don't need to be named.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-stackify"
+
+namespace {
+class WebAssemblyRegStackify final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Register Stackify";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(LiveVariablesID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyRegStackify::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegStackify() {
+ return new WebAssemblyRegStackify();
+}
+
+// Decorate the given instruction with implicit operands that enforce the
+// expression stack ordering constraints for an instruction which is on
+// the expression stack.
+static void ImposeStackOrdering(MachineInstr *MI) {
+ // Write the opaque EXPR_STACK register.
+ if (!MI->definesRegister(WebAssembly::EXPR_STACK))
+ MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+ /*isDef=*/true,
+ /*isImp=*/true));
+
+ // Also read the opaque EXPR_STACK register.
+ if (!MI->readsRegister(WebAssembly::EXPR_STACK))
+ MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+ /*isDef=*/false,
+ /*isImp=*/true));
+}
+
+// Test whether it's safe to move Def to just before Insert.
+// TODO: Compute memory dependencies in a way that doesn't require always
+// walking the block.
+// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
+// more precise.
+static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
+ AliasAnalysis &AA, LiveIntervals &LIS,
+ MachineRegisterInfo &MRI) {
+ assert(Def->getParent() == Insert->getParent());
+ bool SawStore = false, SawSideEffects = false;
+ MachineBasicBlock::const_iterator D(Def), I(Insert);
+
+ // Check for register dependencies.
+ for (const MachineOperand &MO : Def->operands()) {
+ if (!MO.isReg() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ // If the register is dead here and at Insert, ignore it.
+ if (MO.isDead() && Insert->definesRegister(Reg) &&
+ !Insert->readsRegister(Reg))
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // If the physical register is never modified, ignore it.
+ if (!MRI.isPhysRegModified(Reg))
+ continue;
+ // Otherwise, it's a physical register with unknown liveness.
+ return false;
+ }
+
+ // Ask LiveIntervals whether moving this virtual register use or def to
+ // Insert will change value numbers are seen.
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ VNInfo *DefVNI = MO.isDef() ?
+ LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) :
+ LI.getVNInfoBefore(LIS.getInstructionIndex(Def));
+ assert(DefVNI && "Instruction input missing value number");
+ VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert));
+ if (InsVNI && DefVNI != InsVNI)
+ return false;
+ }
+
+ // Check for memory dependencies and side effects.
+ for (--I; I != D; --I)
+ SawSideEffects |= I->isSafeToMove(&AA, SawStore);
+ return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
+ !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+}
+
+bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Register Stackifying **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+ // Walk the instructions from the bottom up. Currently we don't look past
+ // block boundaries, and the blocks aren't ordered so the block visitation
+ // order isn't significant, but we may want to change this in the future.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : reverse(MBB)) {
+ MachineInstr *Insert = &MI;
+ // Don't nest anything inside a phi.
+ if (Insert->getOpcode() == TargetOpcode::PHI)
+ break;
+
+ // Don't nest anything inside an inline asm, because we don't have
+ // constraints for $push inputs.
+ if (Insert->getOpcode() == TargetOpcode::INLINEASM)
+ break;
+
+ // Iterate through the inputs in reverse order, since we'll be pulling
+ // operands off the stack in LIFO order.
+ bool AnyStackified = false;
+ for (MachineOperand &Op : reverse(Insert->uses())) {
+ // We're only interested in explicit virtual register operands.
+ if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
+ continue;
+
+ unsigned Reg = Op.getReg();
+
+ // Only consider registers with a single definition.
+ // TODO: Eventually we may relax this, to stackify phi transfers.
+ MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (!Def)
+ continue;
+
+ // There's no use in nesting implicit defs inside anything.
+ if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ continue;
+
+ // Don't nest an INLINE_ASM def into anything, because we don't have
+ // constraints for $pop outputs.
+ if (Def->getOpcode() == TargetOpcode::INLINEASM)
+ continue;
+
+ // Don't nest PHIs inside of anything.
+ if (Def->getOpcode() == TargetOpcode::PHI)
+ continue;
+
+ // Argument instructions represent live-in registers and not real
+ // instructions.
+ if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
+ Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
+ Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
+ Def->getOpcode() == WebAssembly::ARGUMENT_F64)
+ continue;
+
+ // Single-use expression trees require defs that have one use.
+ // TODO: Eventually we'll relax this, to take advantage of set_local
+ // returning its result.
+ if (!MRI.hasOneUse(Reg))
+ continue;
+
+ // For now, be conservative and don't look across block boundaries.
+ // TODO: Be more aggressive?
+ if (Def->getParent() != &MBB)
+ continue;
+
+ // Don't move instructions that have side effects or memory dependencies
+ // or other complications.
+ if (!IsSafeToMove(Def, Insert, AA, LIS, MRI))
+ continue;
+
+ Changed = true;
+ AnyStackified = true;
+ // Move the def down and nest it in the current instruction.
+ MBB.splice(Insert, &MBB, Def);
+ LIS.handleMove(Def);
+ MFI.stackifyVReg(Reg);
+ ImposeStackOrdering(Def);
+ Insert = Def;
+ }
+ if (AnyStackified)
+ ImposeStackOrdering(&MI);
+ }
+ }
+
+ // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere
+ // so that it never looks like a use-before-def.
+ if (Changed) {
+ MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
+ for (MachineBasicBlock &MBB : MF)
+ MBB.addLiveIn(WebAssembly::EXPR_STACK);
+ }
+
+#ifndef NDEBUG
+ // Verify that pushes and pops are performed in FIFO order.
+ SmallVector<unsigned, 0> Stack;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : reverse(MI.explicit_operands())) {
+ if (!MO.isReg())
+ continue;
+ unsigned VReg = MO.getReg();
+
+ // Don't stackify physregs like SP or FP.
+ if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ continue;
+
+ if (MFI.isVRegStackified(VReg)) {
+ if (MO.isDef())
+ Stack.push_back(VReg);
+ else
+ assert(Stack.pop_back_val() == VReg);
+ }
+ }
+ }
+ // TODO: Generalize this code to support keeping values on the stack across
+ // basic block boundaries.
+ assert(Stack.empty());
+ }
+#endif
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 385c40b..dcada45 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -43,7 +43,7 @@ WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
}
BitVector
-WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const {
BitVector Reserved(getNumRegs());
for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32,
WebAssembly::FP64})
@@ -52,9 +52,37 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
void WebAssemblyRegisterInfo::eliminateFrameIndex(
- MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME
+ MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum, RegScavenger * /*RS*/) const {
+ assert(SPAdj == 0);
+ MachineInstr &MI = *II;
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ const MachineFrameInfo& MFI = *MF.getFrameInfo();
+ int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+
+ if (MI.mayLoadOrStore()) {
+ // If this is a load or store, make it relative to SP and fold the frame
+ // offset directly in
+ assert(MI.getOperand(1).getImm() == 0 &&
+ "Can't eliminate FI yet if offset is already set");
+ MI.getOperand(1).setImm(FrameOffset);
+ MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+ } else {
+ // Otherwise create an i32.add SP, offset and make it the operand
+ auto &MRI = MF.getRegInfo();
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+
+ unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(FrameOffset);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg)
+ .addReg(WebAssembly::SP32)
+ .addReg(OffsetReg);
+ MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false);
+ }
}
unsigned
@@ -67,21 +95,11 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return Regs[TFI->hasFP(MF)][TT.isArch64Bit()];
}
-bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- return !MF.getFunction()->hasFnAttribute("no-realign-stack");
-}
-
-// FIXME: share this with other backends with identical implementation?
-bool WebAssemblyRegisterInfo::needsStackRealignment(
- const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment =
- ((MFI->getMaxAlignment() > StackAlign) ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
+const TargetRegisterClass *
+WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ assert(Kind == 0 && "Only one kind of pointer on WebAssembly");
+ if (MF.getSubtarget<WebAssemblySubtarget>().hasAddr64())
+ return &WebAssembly::I64RegClass;
+ return &WebAssembly::I32RegClass;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index dbdb9d0..ad1d71e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -42,9 +42,9 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
- // Base pointer (stack realignment) support.
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 2ba42eb..80a83fa 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -33,22 +33,26 @@ def FP64 : WebAssemblyReg<"%FP64">;
def SP32 : WebAssemblyReg<"%SP32">;
def SP64 : WebAssemblyReg<"%SP64">;
-// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do
-// away with it? Try deleting once the backend works.
-// WebAssembly uses virtual registers, but the backend defines a few physical
-// registers here to keep SDAG and the MachineInstr layers happy.
-foreach i = 0-4 in {
- def I#i : WebAssemblyReg<"%i."#i>; // i32
- def L#i : WebAssemblyReg<"%l."#i>; // i64
- def F#i : WebAssemblyReg<"%f."#i>; // f32
- def D#i : WebAssemblyReg<"%d."#i>; // f64
-}
+// The register allocation framework requires register classes have at least
+// one register, so we define a few for the floating point register classes
+// since we otherwise don't need a physical register in those classes.
+def F32_0 : WebAssemblyReg<"%f32.0">;
+def F64_0 : WebAssemblyReg<"%f64.0">;
+
+// The expression stack "register". This is an opaque entity which serves to
+// order uses and defs that must remain in LIFO order.
+def EXPR_STACK : WebAssemblyReg<"STACK">;
+
+// The incoming arguments "register". This is an opaque entity which serves to
+// order the ARGUMENT instructions that are emulating live-in registers and
+// must not be scheduled below other instructions.
+def ARGUMENTS : WebAssemblyReg<"ARGUMENTS">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
-def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>;
-def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>;
-def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
-def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>;
+def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>;
+def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>;
+def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
+def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
new file mode 100644
index 0000000..4e08b2b
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -0,0 +1,124 @@
+//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an optimization pass using store result values.
+///
+/// WebAssembly's store instructions return the stored value. This is to enable
+/// an optimization wherein uses of the stored value can be replaced by uses of
+/// the store's result value, making the stored value register more likely to
+/// be single-use, thus more likely to be useful to register stackifying, and
+/// potentially also exposing the store to register stackifying. These both can
+/// reduce get_local/set_local traffic.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-store-results"
+
+namespace {
+class WebAssemblyStoreResults final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Store Results";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyStoreResults::ID = 0;
+FunctionPass *llvm::createWebAssemblyStoreResults() {
+ return new WebAssemblyStoreResults();
+}
+
+bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Store Results **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ bool Changed = false;
+
+ assert(MRI.isSSA() && "StoreResults depends on SSA form");
+
+ for (auto &MBB : MF) {
+ DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case WebAssembly::STORE8_I32:
+ case WebAssembly::STORE16_I32:
+ case WebAssembly::STORE8_I64:
+ case WebAssembly::STORE16_I64:
+ case WebAssembly::STORE32_I64:
+ case WebAssembly::STORE_F32:
+ case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_I32:
+ case WebAssembly::STORE_I64:
+ unsigned ToReg = MI.getOperand(0).getReg();
+ unsigned FromReg = MI.getOperand(3).getReg();
+ for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+ MachineOperand &O = *I++;
+ MachineInstr *Where = O.getParent();
+ if (Where->getOpcode() == TargetOpcode::PHI) {
+ // PHIs use their operands on their incoming CFG edges rather than
+ // in their parent blocks. Get the basic block paired with this use
+ // of FromReg and check that MI's block dominates it.
+ MachineBasicBlock *Pred =
+ Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
+ if (!MDT.dominates(&MBB, Pred))
+ continue;
+ } else {
+ // For a non-PHI, check that MI dominates the instruction in the
+ // normal way.
+ if (&MI == Where || !MDT.dominates(&MI, Where))
+ continue;
+ }
+ Changed = true;
+ DEBUG(dbgs() << "Setting operand " << O << " in " << *Where
+ << " from " << MI << "\n");
+ O.setReg(ToReg);
+ // If the store's def was previously dead, it is no longer. But the
+ // dead flag shouldn't be set yet.
+ assert(!MI.getOperand(0).isDead() && "Dead flag set on store result");
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 3d9e7aa..cb2d5a6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
+bool WebAssemblySubtarget::useAA() const { return true; }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 6f17619..f530a29 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -61,9 +61,15 @@ public:
const WebAssemblyTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+ const WebAssemblyInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
+ }
+ const WebAssemblyRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override;
- bool useAA() const override { return true; }
+ bool useAA() const override;
// Predicates used by WebAssemblyInstrInfo.td.
bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 6f93248..e31ea46 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,11 +45,16 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT.isArch64Bit()
- ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128"
- : "e-p:32:32-i64:64-v128:8:128-n32:64-S128",
+ : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128"
+ : "e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+ // WebAssembly type-checks expressions, but a noreturn function with a return
+ // type that doesn't match the context will cause a check failure. So we lower
+ // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
+ // 'unreachable' expression which is meant for that case.
+ this->Options.TrapUnreachable = true;
+
initAsmInfo();
// We need a reducible CFG, so disable some optimizations which tend to
@@ -77,7 +82,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+ I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
}
return I.get();
}
@@ -94,23 +99,18 @@ public:
}
FunctionPass *createTargetRegisterAllocator(bool) override;
- void addFastRegAlloc(FunctionPass *RegAllocPass) override;
- void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
void addIRPasses() override;
- bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
void addPreRegAlloc() override;
- void addRegAllocPasses(bool Optimized);
void addPostRegAlloc() override;
- void addPreSched2() override;
void addPreEmitPass() override;
};
} // end anonymous namespace
TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
});
}
@@ -124,50 +124,86 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
-void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
- assert(!RegAllocPass && "WebAssembly uses no regalloc!");
- addRegAllocPasses(false);
-}
-
-void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
- assert(!RegAllocPass && "WebAssembly uses no regalloc!");
- addRegAllocPasses(true);
-}
-
//===----------------------------------------------------------------------===//
// The following functions are called from lib/CodeGen/Passes.cpp to modify
// the CodeGen pass sequence.
//===----------------------------------------------------------------------===//
void WebAssemblyPassConfig::addIRPasses() {
- // FIXME: the default for this option is currently POSIX, whereas
- // WebAssembly's MVP should default to Single.
if (TM->Options.ThreadModel == ThreadModel::Single)
+ // In "single" mode, atomics get lowered to non-atomics.
addPass(createLowerAtomicPass());
else
// Expand some atomic operations. WebAssemblyTargetLowering has hooks which
// control specifically what gets lowered.
addPass(createAtomicExpandPass(TM));
+ // Optimize "returned" function attributes.
+ addPass(createWebAssemblyOptimizeReturned());
+
TargetPassConfig::addIRPasses();
}
-bool WebAssemblyPassConfig::addPreISel() { return false; }
-
bool WebAssemblyPassConfig::addInstSelector() {
+ (void)TargetPassConfig::addInstSelector();
addPass(
createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
+ // Run the argument-move pass immediately after the ScheduleDAG scheduler
+ // so that we can fix up the ARGUMENT instructions before anything else
+ // sees them in the wrong place.
+ addPass(createWebAssemblyArgumentMove());
return false;
}
-bool WebAssemblyPassConfig::addILPOpts() { return true; }
+bool WebAssemblyPassConfig::addILPOpts() {
+ (void)TargetPassConfig::addILPOpts();
+ return true;
+}
+
+void WebAssemblyPassConfig::addPreRegAlloc() {
+ TargetPassConfig::addPreRegAlloc();
-void WebAssemblyPassConfig::addPreRegAlloc() {}
+ // Prepare store instructions for register stackifying.
+ addPass(createWebAssemblyStoreResults());
+}
-void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {}
+void WebAssemblyPassConfig::addPostRegAlloc() {
+ // TODO: The following CodeGen passes don't currently support code containing
+ // virtual registers. Consider removing their restrictions and re-enabling
+ // them.
+ //
+ // We use our own PrologEpilogInserter which is very slightly modified to
+ // tolerate virtual registers.
+ disablePass(&PrologEpilogCodeInserterID);
+ // Fails with: should be run after register allocation.
+ disablePass(&MachineCopyPropagationID);
+
+ // Mark registers as representing wasm's expression stack.
+ addPass(createWebAssemblyRegStackify());
+
+ // Run the register coloring pass to reduce the total number of registers.
+ addPass(createWebAssemblyRegColoring());
+
+ TargetPassConfig::addPostRegAlloc();
+
+ // Run WebAssembly's version of the PrologEpilogInserter. Target-independent
+ // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run
+ // PEI before ShrinkWrap but otherwise in the same position in the order.
+ addPass(createWebAssemblyPEI());
+}
-void WebAssemblyPassConfig::addPostRegAlloc() {}
+void WebAssemblyPassConfig::addPreEmitPass() {
+ TargetPassConfig::addPreEmitPass();
-void WebAssemblyPassConfig::addPreSched2() {}
+ // Put the CFG in structured form; insert BLOCK and LOOP markers.
+ addPass(createWebAssemblyCFGStackify());
-void WebAssemblyPassConfig::addPreEmitPass() {}
+ // Lower br_unless into br_if.
+ addPass(createWebAssemblyLowerBrUnless());
+
+ // Create a mapping from LLVM CodeGen virtual registers to wasm registers.
+ addPass(createWebAssemblyRegNumbering());
+
+ // Perform the very last peephole optimizations on the code.
+ addPass(createWebAssemblyPeephole());
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
new file mode 100644
index 0000000..74e33b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the functions of the WebAssembly-specific subclass
+/// of TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetObjectFile.h"
+#include "WebAssemblyTargetMachine.h"
+using namespace llvm;
+
+void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index ee78b94..39e50c9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -16,50 +16,13 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
-class GlobalVariable;
-
-class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile {
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF {
public:
- WebAssemblyTargetObjectFile() {
- TextSection = nullptr;
- DataSection = nullptr;
- BSSSection = nullptr;
- ReadOnlySection = nullptr;
-
- StaticCtorSection = nullptr;
- StaticDtorSection = nullptr;
- LSDASection = nullptr;
- EHFrameSection = nullptr;
- DwarfAbbrevSection = nullptr;
- DwarfInfoSection = nullptr;
- DwarfLineSection = nullptr;
- DwarfFrameSection = nullptr;
- DwarfPubTypesSection = nullptr;
- DwarfDebugInlineSection = nullptr;
- DwarfStrSection = nullptr;
- DwarfLocSection = nullptr;
- DwarfARangesSection = nullptr;
- DwarfRangesSection = nullptr;
- }
-
- MCSection *getSectionForConstant(SectionKind Kind,
- const Constant *C) const override {
- return ReadOnlySection;
- }
-
- MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const override {
- return DataSection;
- }
-
- MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const override;
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index fa88ed5..3566317 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -21,8 +21,7 @@ using namespace llvm;
#define DEBUG_TYPE "wasmtti"
TargetTransformInfo::PopcntSupportKind
-WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) {
+WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- // TODO: Make Math.popcount32 happen in WebAssembly.
- return TTI::PSK_Software;
+ return TargetTransformInfo::PSK_FastHardware;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 7ffb604..26dc388 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -38,7 +38,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
const WebAssemblyTargetLowering *getTLI() const { return TLI; }
public:
- WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
+ WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -54,7 +54,7 @@ public:
// TODO: Implement more Scalar TTI for WebAssembly
- TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
/// @}
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
new file mode 100644
index 0000000..ee9d060
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -0,0 +1,311 @@
+# Tests which are known to fail from the GCC torture test suite.
+
+# Core dump.
+920908-1.c
+pr38151.c
+va-arg-22.c
+
+# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
+struct-ret-1.c
+va-arg-11.c
+va-arg-21.c
+va-arg-24.c
+va-arg-trap-1.c
+
+# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed.
+20000815-1.c
+20010129-1.c
+930628-1.c
+980707-1.c
+
+# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed.
+20030914-2.c
+20040703-1.c
+20081117-1.c
+920625-1.c
+931004-11.c
+931004-13.c
+980223.c
+bitfld-5.c
+complex-7.c
+pr38969.c
+pr51323.c
+pr52129.c
+pr57130.c
+
+# These were previously "Cannot select FrameIndex." Now most of them fail
+# because they contain call frame pseudos (e.g. call a vararg func),
+# frame pointers, or similar. This list will be updated again soon.
+20000519-1.c
+20000706-4.c
+20000706-5.c
+20000801-2.c
+20000801-4.c
+20011126-2.c
+
+20020529-1.c
+20021024-1.c
+
+20030828-1.c
+20030914-1.c
+
+20040302-1.c
+20040625-1.c
+20040823-1.c
+
+20041113-1.c
+
+20041214-1.c
+
+20050826-2.c
+
+20071213-1.c
+
+20080506-2.c
+20080519-1.c
+
+20081103-1.c
+20090113-1.c
+20090113-2.c
+20090113-3.c
+
+20090623-1.c
+
+920501-6.c
+920501-8.c
+920726-1.c
+930518-1.c
+
+931004-10.c
+931004-12.c
+931004-14.c
+931004-2.c
+931004-4.c
+931004-6.c
+931004-8.c
+
+980205.c
+980608-1.c
+980709-1.c
+980716-1.c
+990127-1.c
+
+991216-2.c
+
+#cbrt.c
+complex-5.c
+complex-6.c
+
+enum-3.c
+fprintf-chk-1.c
+frame-address.c
+loop-15.c
+loop-ivopts-2.c
+mayalias-3.c
+
+multi-ix.c
+
+pr20466-1.c
+
+
+pr28778.c
+pr28982b.c
+
+pr30778.c
+pr31448-2.c
+pr31448.c
+
+pr33870-1.c
+pr33870.c
+
+pr38051.c
+
+pr39100.c
+
+pr39339.c
+pr40022.c
+pr40657.c
+
+pr43987.c
+
+pr44575.c
+
+pr44942.c
+pr46309.c
+pr47538.c
+pr47925.c
+
+pr49390.c
+pr49419.c
+
+#pr51877.c
+
+#pr52979-1.c
+#pr52979-2.c
+pr53645-2.c
+pr53645.c
+
+pr56205.c
+
+pr56866.c
+
+pr57876.c
+pr58277-1.c
+
+pr59643.c
+
+printf-chk-1.c
+pta-field-1.c
+pta-field-2.c
+
+stdarg-1.c
+stdarg-2.c
+stdarg-3.c
+stdarg-4.c
+strct-stdarg-1.c
+strct-varg-1.c
+
+va-arg-1.c
+va-arg-10.c
+va-arg-12.c
+va-arg-13.c
+va-arg-14.c
+va-arg-15.c
+va-arg-16.c
+va-arg-17.c
+va-arg-18.c
+va-arg-19.c
+va-arg-2.c
+va-arg-20.c
+va-arg-23.c
+va-arg-26.c
+va-arg-4.c
+va-arg-5.c
+va-arg-6.c
+va-arg-7.c
+va-arg-8.c
+va-arg-9.c
+va-arg-pack-1.c
+vfprintf-1.c
+vfprintf-chk-1.c
+vprintf-1.c
+vprintf-chk-1.c
+
+# Cannot select callseq_end.
+20040811-1.c
+pr43220.c
+vla-dealloc-1.c
+
+# Cannot select brind.
+20071210-1.c
+920501-4.c
+920501-5.c
+
+# Cannot select BlockAddress.
+comp-goto-1.c
+980526-1.c
+990208-1.c
+
+# WebAssembly hasn't implemented byval arguments.
+20000412-3.c
+20000419-1.c
+20000706-1.c
+20000706-2.c
+20000707-1.c
+20000717-1.c
+20000717-5.c
+20000808-1.c
+20010605-2.c
+20011113-1.c
+20020215-1.c
+20020810-1.c
+20021118-1.c
+20040707-1.c
+20040709-1.c
+20040709-2.c
+20041201-1.c
+20050713-1.c
+20070614-1.c
+920908-2.c
+921112-1.c
+921117-1.c
+921123-2.c
+921204-1.c
+930126-1.c
+930208-1.c
+931004-5.c
+931004-9.c
+931031-1.c
+950607-2.c
+960416-1.c
+990525-1.c
+991118-1.c
+bf64-1.c
+complex-1.c
+complex-2.c
+pr15262-2.c
+pr20621-1.c
+pr23135.c
+pr30185.c
+pr42248.c
+
+# unimplemented operation lowering.
+20010122-1.c
+20030323-1.c
+20030811-1.c
+pr17377.c
+
+# Error: invalid output constraint '=t' in asm.
+990413-2.c
+990826-0.c
+
+# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target.
+built-in-setjmp.c
+pr60003.c
+
+# Error in the program / unsupported by Clang.
+scal-to-vec1.c
+scal-to-vec2.c
+scal-to-vec3.c
+20000822-1.c
+20010209-1.c
+20010605-1.c
+20030501-1.c
+20040520-1.c
+20061220-1.c
+20090219-1.c
+920415-1.c
+920428-2.c
+920501-7.c
+920612-2.c
+920721-4.c
+921017-1.c
+921215-1.c
+931002-1.c
+comp-goto-2.c
+nest-align-1.c
+nest-stdar-1.c
+nestfunc-1.c
+nestfunc-2.c
+nestfunc-3.c
+nestfunc-5.c
+nestfunc-6.c
+nestfunc-7.c
+pr22061-3.c
+pr22061-4.c
+pr24135.c
+pr51447.c
+20020412-1.c
+20040308-1.c
+20040423-1.c
+20041218-2.c
+20070919-1.c
+align-nest.c
+pr41935.c
+20050107-1.c
+20050119-1.c
+20050119-2.c
+920302-1.c
+920501-3.c
+920728-1.c
+pr28865.c
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 9eee4a0..09cc53a 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -10,10 +10,8 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86AsmInstrumentation.h"
#include "X86Operand.h"
-#include "X86RegisterInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
@@ -118,11 +116,6 @@ bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
-std::string FuncName(unsigned AccessSize, bool IsWrite) {
- return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
- utostr(AccessSize);
-}
-
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
struct RegisterContext {
@@ -136,26 +129,26 @@ public:
public:
RegisterContext(unsigned AddressReg, unsigned ShadowReg,
unsigned ScratchReg) {
- BusyRegs.push_back(convReg(AddressReg, MVT::i64));
- BusyRegs.push_back(convReg(ShadowReg, MVT::i64));
- BusyRegs.push_back(convReg(ScratchReg, MVT::i64));
+ BusyRegs.push_back(convReg(AddressReg, 64));
+ BusyRegs.push_back(convReg(ShadowReg, 64));
+ BusyRegs.push_back(convReg(ScratchReg, 64));
}
- unsigned AddressReg(MVT::SimpleValueType VT) const {
- return convReg(BusyRegs[REG_OFFSET_ADDRESS], VT);
+ unsigned AddressReg(unsigned Size) const {
+ return convReg(BusyRegs[REG_OFFSET_ADDRESS], Size);
}
- unsigned ShadowReg(MVT::SimpleValueType VT) const {
- return convReg(BusyRegs[REG_OFFSET_SHADOW], VT);
+ unsigned ShadowReg(unsigned Size) const {
+ return convReg(BusyRegs[REG_OFFSET_SHADOW], Size);
}
- unsigned ScratchReg(MVT::SimpleValueType VT) const {
- return convReg(BusyRegs[REG_OFFSET_SCRATCH], VT);
+ unsigned ScratchReg(unsigned Size) const {
+ return convReg(BusyRegs[REG_OFFSET_SCRATCH], Size);
}
void AddBusyReg(unsigned Reg) {
if (Reg != X86::NoRegister)
- BusyRegs.push_back(convReg(Reg, MVT::i64));
+ BusyRegs.push_back(convReg(Reg, 64));
}
void AddBusyRegs(const X86Operand &Op) {
@@ -163,36 +156,36 @@ public:
AddBusyReg(Op.getMemIndexReg());
}
- unsigned ChooseFrameReg(MVT::SimpleValueType VT) const {
+ unsigned ChooseFrameReg(unsigned Size) const {
static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
X86::RCX, X86::RDX, X86::RDI,
X86::RSI };
for (unsigned Reg : Candidates) {
if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
- return convReg(Reg, VT);
+ return convReg(Reg, Size);
}
return X86::NoRegister;
}
private:
- unsigned convReg(unsigned Reg, MVT::SimpleValueType VT) const {
- return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, VT);
+ unsigned convReg(unsigned Reg, unsigned Size) const {
+ return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, Size);
}
std::vector<unsigned> BusyRegs;
};
- X86AddressSanitizer(const MCSubtargetInfo &STI)
+ X86AddressSanitizer(const MCSubtargetInfo *&STI)
: X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
- virtual ~X86AddressSanitizer() {}
+ ~X86AddressSanitizer() override {}
// X86AsmInstrumentation implementation:
- virtual void InstrumentAndEmitInstruction(const MCInst &Inst,
- OperandVector &Operands,
- MCContext &Ctx,
- const MCInstrInfo &MII,
- MCStreamer &Out) override {
+ void InstrumentAndEmitInstruction(const MCInst &Inst,
+ OperandVector &Operands,
+ MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out) override {
InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
if (RepPrefix)
EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
@@ -240,17 +233,16 @@ public:
protected:
void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
- void EmitLEA(X86Operand &Op, MVT::SimpleValueType VT, unsigned Reg,
- MCStreamer &Out) {
- assert(VT == MVT::i32 || VT == MVT::i64);
+ void EmitLEA(X86Operand &Op, unsigned Size, unsigned Reg, MCStreamer &Out) {
+ assert(Size == 32 || Size == 64);
MCInst Inst;
- Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r);
- Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, VT)));
+ Inst.setOpcode(Size == 32 ? X86::LEA32r : X86::LEA64r);
+ Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, Size)));
Op.addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
- void ComputeMemOperandAddress(X86Operand &Op, MVT::SimpleValueType VT,
+ void ComputeMemOperandAddress(X86Operand &Op, unsigned Size,
unsigned Reg, MCContext &Ctx, MCStreamer &Out);
// Creates new memory operand with Displacement added to an original
@@ -261,13 +253,13 @@ protected:
MCContext &Ctx, int64_t *Residue);
bool is64BitMode() const {
- return STI.getFeatureBits()[X86::Mode64Bit];
+ return STI->getFeatureBits()[X86::Mode64Bit];
}
bool is32BitMode() const {
- return STI.getFeatureBits()[X86::Mode32Bit];
+ return STI->getFeatureBits()[X86::Mode32Bit];
}
bool is16BitMode() const {
- return STI.getFeatureBits()[X86::Mode16Bit];
+ return STI->getFeatureBits()[X86::Mode16Bit];
}
unsigned getPointerWidth() {
@@ -437,7 +429,7 @@ void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
}
void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
- MVT::SimpleValueType VT,
+ unsigned Size,
unsigned Reg, MCContext &Ctx,
MCStreamer &Out) {
int64_t Displacement = 0;
@@ -450,14 +442,14 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
// Emit Op as is.
if (Displacement == 0) {
- EmitLEA(Op, VT, Reg, Out);
+ EmitLEA(Op, Size, Reg, Out);
return;
}
int64_t Residue;
std::unique_ptr<X86Operand> NewOp =
AddDisplacement(Op, Displacement, Ctx, &Residue);
- EmitLEA(*NewOp, VT, Reg, Out);
+ EmitLEA(*NewOp, Size, Reg, Out);
while (Residue != 0) {
const MCConstantExpr *Disp =
@@ -465,7 +457,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
std::unique_ptr<X86Operand> DispOp =
X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
SMLoc());
- EmitLEA(*DispOp, VT, Reg, Out);
+ EmitLEA(*DispOp, Size, Reg, Out);
Residue -= Disp->getValue();
}
}
@@ -503,16 +495,16 @@ class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
static const long kShadowOffset = 0x20000000;
- X86AddressSanitizer32(const MCSubtargetInfo &STI)
+ X86AddressSanitizer32(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- virtual ~X86AddressSanitizer32() {}
+ ~X86AddressSanitizer32() override {}
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
if (FrameReg == X86::NoRegister)
return FrameReg;
- return getX86SubSuperRegister(FrameReg, MVT::i32);
+ return getX86SubSuperRegister(FrameReg, 32);
}
void SpillReg(MCStreamer &Out, unsigned Reg) {
@@ -535,10 +527,10 @@ public:
OrigSPOffset += 4;
}
- virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
assert(LocalFrameReg != X86::NoRegister);
const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
@@ -558,24 +550,24 @@ public:
MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
}
- SpillReg(Out, RegCtx.AddressReg(MVT::i32));
- SpillReg(Out, RegCtx.ShadowReg(MVT::i32));
- if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
- SpillReg(Out, RegCtx.ScratchReg(MVT::i32));
+ SpillReg(Out, RegCtx.AddressReg(32));
+ SpillReg(Out, RegCtx.ShadowReg(32));
+ if (RegCtx.ScratchReg(32) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(32));
StoreFlags(Out);
}
- virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
assert(LocalFrameReg != X86::NoRegister);
RestoreFlags(Out);
- if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
- RestoreReg(Out, RegCtx.ScratchReg(MVT::i32));
- RestoreReg(Out, RegCtx.ShadowReg(MVT::i32));
- RestoreReg(Out, RegCtx.AddressReg(MVT::i32));
+ if (RegCtx.ScratchReg(32) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(32));
+ RestoreReg(Out, RegCtx.ShadowReg(32));
+ RestoreReg(Out, RegCtx.AddressReg(32));
unsigned FrameReg = GetFrameReg(Ctx, Out);
if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
@@ -586,18 +578,18 @@ public:
}
}
- virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) override;
+ void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
private:
void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
@@ -610,10 +602,11 @@ private:
.addReg(X86::ESP)
.addImm(-16));
EmitInstruction(
- Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32)));
+ Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
- const std::string &Fn = FuncName(AccessSize, IsWrite);
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ (IsWrite ? "store" : "load") +
+ llvm::Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -623,14 +616,14 @@ private:
void X86AddressSanitizer32::InstrumentMemOperandSmall(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
- unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+ unsigned AddressRegI32 = RegCtx.AddressReg(32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
- assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
- unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+ assert(RegCtx.ScratchReg(32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
- ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+ ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
AddressRegI32));
@@ -673,7 +666,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
- EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
+ EmitLEA(*Op, 32, ScratchRegI32, Out);
break;
}
case 4:
@@ -698,10 +691,10 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
void X86AddressSanitizer32::InstrumentMemOperandLarge(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+ unsigned AddressRegI32 = RegCtx.AddressReg(32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
- ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+ ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
AddressRegI32));
@@ -760,16 +753,16 @@ class X86AddressSanitizer64 : public X86AddressSanitizer {
public:
static const long kShadowOffset = 0x7fff8000;
- X86AddressSanitizer64(const MCSubtargetInfo &STI)
+ X86AddressSanitizer64(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- virtual ~X86AddressSanitizer64() {}
+ ~X86AddressSanitizer64() override {}
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
if (FrameReg == X86::NoRegister)
return FrameReg;
- return getX86SubSuperRegister(FrameReg, MVT::i64);
+ return getX86SubSuperRegister(FrameReg, 64);
}
void SpillReg(MCStreamer &Out, unsigned Reg) {
@@ -792,10 +785,10 @@ public:
OrigSPOffset += 8;
}
- virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
assert(LocalFrameReg != X86::NoRegister);
const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
@@ -816,24 +809,24 @@ public:
}
EmitAdjustRSP(Ctx, Out, -128);
- SpillReg(Out, RegCtx.ShadowReg(MVT::i64));
- SpillReg(Out, RegCtx.AddressReg(MVT::i64));
- if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
- SpillReg(Out, RegCtx.ScratchReg(MVT::i64));
+ SpillReg(Out, RegCtx.ShadowReg(64));
+ SpillReg(Out, RegCtx.AddressReg(64));
+ if (RegCtx.ScratchReg(64) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(64));
StoreFlags(Out);
}
- virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
assert(LocalFrameReg != X86::NoRegister);
RestoreFlags(Out);
- if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
- RestoreReg(Out, RegCtx.ScratchReg(MVT::i64));
- RestoreReg(Out, RegCtx.AddressReg(MVT::i64));
- RestoreReg(Out, RegCtx.ShadowReg(MVT::i64));
+ if (RegCtx.ScratchReg(64) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(64));
+ RestoreReg(Out, RegCtx.AddressReg(64));
+ RestoreReg(Out, RegCtx.ShadowReg(64));
EmitAdjustRSP(Ctx, Out, 128);
unsigned FrameReg = GetFrameReg(Ctx, Out);
@@ -845,18 +838,18 @@ public:
}
}
- virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) override;
+ void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
private:
void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
@@ -864,7 +857,7 @@ private:
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
SMLoc(), SMLoc()));
- EmitLEA(*Op, MVT::i64, X86::RSP, Out);
+ EmitLEA(*Op, 64, X86::RSP, Out);
OrigSPOffset += Offset;
}
@@ -878,12 +871,13 @@ private:
.addReg(X86::RSP)
.addImm(-16));
- if (RegCtx.AddressReg(MVT::i64) != X86::RDI) {
+ if (RegCtx.AddressReg(64) != X86::RDI) {
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
- RegCtx.AddressReg(MVT::i64)));
+ RegCtx.AddressReg(64)));
}
- const std::string &Fn = FuncName(AccessSize, IsWrite);
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ (IsWrite ? "store" : "load") +
+ llvm::Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
@@ -893,16 +887,16 @@ private:
void X86AddressSanitizer64::InstrumentMemOperandSmall(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
- unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
- unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
- unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+ unsigned AddressRegI64 = RegCtx.AddressReg(64);
+ unsigned AddressRegI32 = RegCtx.AddressReg(32);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
- assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
- unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+ assert(RegCtx.ScratchReg(32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
- ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+ ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
AddressRegI64));
@@ -944,7 +938,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
- EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
+ EmitLEA(*Op, 32, ScratchRegI32, Out);
break;
}
case 4:
@@ -969,10 +963,10 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
void X86AddressSanitizer64::InstrumentMemOperandLarge(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
- unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
+ unsigned AddressRegI64 = RegCtx.AddressReg(64);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
- ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+ ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
AddressRegI64));
@@ -1030,7 +1024,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
} // End anonymous namespace
-X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo &STI)
+X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
: STI(STI), InitialFrameReg(0) {}
X86AsmInstrumentation::~X86AsmInstrumentation() {}
@@ -1043,7 +1037,7 @@ void X86AsmInstrumentation::InstrumentAndEmitInstruction(
void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
const MCInst &Inst) {
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, *STI);
}
unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
@@ -1067,17 +1061,17 @@ unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo &STI) {
- Triple T(STI.getTargetTriple());
+ const MCContext &Ctx, const MCSubtargetInfo *&STI) {
+ Triple T(STI->getTargetTriple());
const bool hasCompilerRTSupport = T.isOSLinux();
if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
MCOptions.SanitizeAddress) {
- if (STI.getFeatureBits()[X86::Mode32Bit] != 0)
+ if (STI->getFeatureBits()[X86::Mode32Bit] != 0)
return new X86AddressSanitizer32(STI);
- if (STI.getFeatureBits()[X86::Mode64Bit] != 0)
+ if (STI->getFeatureBits()[X86::Mode64Bit] != 0)
return new X86AddressSanitizer64(STI);
}
return new X86AsmInstrumentation(STI);
}
-} // End llvm namespace
+} // end llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 19ebcc4..470cead 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -28,7 +28,8 @@ class X86AsmInstrumentation;
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo &STI);
+ const MCContext &Ctx,
+ const MCSubtargetInfo *&STI);
class X86AsmInstrumentation {
public:
@@ -48,15 +49,16 @@ public:
protected:
friend X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo &STI);
+ const MCContext &Ctx,
+ const MCSubtargetInfo *&STI);
- X86AsmInstrumentation(const MCSubtargetInfo &STI);
+ X86AsmInstrumentation(const MCSubtargetInfo *&STI);
unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
- const MCSubtargetInfo &STI;
+ const MCSubtargetInfo *&STI;
unsigned InitialFrameReg;
};
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index bca059d..4d8ffac 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,7 +11,6 @@
#include "X86AsmInstrumentation.h"
#include "X86AsmParserCommon.h"
#include "X86Operand.h"
-#include "X86ISelLowering.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
@@ -26,6 +25,7 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -57,10 +57,10 @@ static const char OpPrecedence[] = {
};
class X86AsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
ParseInstructionInfo *InstInfo;
std::unique_ptr<X86AsmInstrumentation> Instrumentation;
+
private:
SMLoc consumeToken() {
MCAsmParser &Parser = getParser();
@@ -154,6 +154,7 @@ private:
// Push the new operator.
InfixOperatorStack.push_back(Op);
}
+
int64_t execute() {
// Push any remaining operators onto the postfix stack.
while (!InfixOperatorStack.empty()) {
@@ -268,6 +269,7 @@ private:
bool StopOnLBrac, AddImmPrefix;
InfixCalculator IC;
InlineAsmIdentifierInfo Info;
+
public:
IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
@@ -712,10 +714,10 @@ private:
SMLoc End, unsigned Size, StringRef Identifier,
InlineAsmIdentifierInfo &Info);
+ bool parseDirectiveEven(SMLoc L);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
- bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops);
/// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
@@ -758,23 +760,24 @@ private:
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[X86::Mode64Bit];
+ return getSTI().getFeatureBits()[X86::Mode64Bit];
}
bool is32BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[X86::Mode32Bit];
+ return getSTI().getFeatureBits()[X86::Mode32Bit];
}
bool is16BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[X86::Mode16Bit];
+ return getSTI().getFeatureBits()[X86::Mode16Bit];
}
void SwitchMode(unsigned mode) {
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
unsigned FB = ComputeAvailableFeatures(
STI.ToggleFeature(OldMode.flip(mode)));
setAvailableFeatures(FB);
-
+
assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
}
@@ -798,12 +801,12 @@ private:
/// }
public:
- X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
+ X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
+ : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) {
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
Instrumentation.reset(
CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
}
@@ -912,6 +915,11 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == 0)
RegNo = MatchRegisterName(Tok.getString().lower());
+ // The "flags" register cannot be referenced directly.
+ // Treat it as an identifier instead.
+ if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
+ RegNo = 0;
+
if (!is64BitMode()) {
// FIXME: This should be done using Requires<Not64BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
@@ -1042,8 +1050,11 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
.Cases("BYTE", "byte", 8)
.Cases("WORD", "word", 16)
.Cases("DWORD", "dword", 32)
+ .Cases("FWORD", "fword", 48)
.Cases("QWORD", "qword", 64)
+ .Cases("MMWORD","mmword", 64)
.Cases("XWORD", "xword", 80)
+ .Cases("TBYTE", "tbyte", 80)
.Cases("XMMWORD", "xmmword", 128)
.Cases("YMMWORD", "ymmword", 256)
.Cases("ZMMWORD", "zmmword", 512)
@@ -1062,8 +1073,8 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
// Insert an explicit size if the user didn't have one.
if (!Size) {
Size = getPointerWidth();
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
- /*Len=*/0, Size));
+ InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
+ /*Len=*/0, Size);
}
// Create an absolute memory reference in order to match against
@@ -1082,8 +1093,8 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
if (!Size) {
Size = Info.Type * 8; // Size is in terms of bits in this context.
if (Size)
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
- /*Len=*/0, Size));
+ InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
+ /*Len=*/0, Size);
}
}
@@ -1097,13 +1108,13 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
}
static void
-RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
StringRef SymName, int64_t ImmDisp,
int64_t FinalImmDisp, SMLoc &BracLoc,
SMLoc &StartInBrac, SMLoc &End) {
// Remove the '[' and ']' from the IR string.
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+ AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
+ AsmRewrites.emplace_back(AOK_Skip, End, 1);
// If ImmDisp is non-zero, then we parsed a displacement before the
// bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
@@ -1114,15 +1125,14 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
// We have an immediate displacement before the bracketed expression.
// Adjust this to match the final immediate displacement.
bool Found = false;
- for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
- E = AsmRewrites->end(); I != E; ++I) {
- if ((*I).Loc.getPointer() > BracLoc.getPointer())
+ for (AsmRewrite &AR : AsmRewrites) {
+ if (AR.Loc.getPointer() > BracLoc.getPointer())
continue;
- if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+ if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
assert (!Found && "ImmDisp already rewritten.");
- (*I).Kind = AOK_Imm;
- (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
- (*I).Val = FinalImmDisp;
+ AR.Kind = AOK_Imm;
+ AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
+ AR.Val = FinalImmDisp;
Found = true;
break;
}
@@ -1133,28 +1143,27 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
// We have a symbolic and an immediate displacement, but no displacement
// before the bracketed expression. Put the immediate displacement
// before the bracketed expression.
- AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
+ AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
}
}
// Remove all the ImmPrefix rewrites within the brackets.
- for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
- E = AsmRewrites->end(); I != E; ++I) {
- if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+ for (AsmRewrite &AR : AsmRewrites) {
+ if (AR.Loc.getPointer() < StartInBrac.getPointer())
continue;
- if ((*I).Kind == AOK_ImmPrefix)
- (*I).Kind = AOK_Delete;
+ if (AR.Kind == AOK_ImmPrefix)
+ AR.Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
// Skip everything before the symbol.
if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
assert(Len > 0 && "Expected a non-negative length.");
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+ AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
}
// Skip everything after the symbol.
if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
assert(Len > 0 && "Expected a non-negative length.");
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
+ AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
}
}
@@ -1162,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
+ AsmToken::TokenKind PrevTK = AsmToken::Error;
bool Done = false;
while (!Done) {
bool UpdateLocLex = true;
@@ -1205,7 +1215,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Tok.getLoc(), "Unexpected identifier!");
} else {
// This is a dot operator, not an adjacent identifier.
- if (Identifier.find('.') != StringRef::npos) {
+ if (Identifier.find('.') != StringRef::npos &&
+ PrevTK == AsmToken::RBrac) {
return false;
} else {
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
@@ -1223,8 +1234,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
case AsmToken::Integer: {
StringRef ErrMsg;
if (isParsingInlineAsm() && SM.getAddImmPrefix())
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
- Tok.getLoc()));
+ InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
// Look for 'b' or 'f' following an Integer as a directional label
SMLoc Loc = getTok().getLoc();
int64_t IntVal = getTok().getIntVal();
@@ -1237,7 +1247,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Val =
- MCSymbolRefExpr::create(Sym, Variant, getContext());
+ MCSymbolRefExpr::create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
@@ -1275,6 +1285,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (!Done && UpdateLocLex)
End = consumeToken();
+
+ PrevTK = TK;
}
return false;
}
@@ -1302,7 +1314,7 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// A symbolic displacement.
Disp = Sym;
if (isParsingInlineAsm())
- RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+ RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
ImmDisp, SM.getImm(), BracLoc, StartInBrac,
End);
}
@@ -1359,7 +1371,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End) {
MCAsmParser &Parser = getParser();
- assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+ assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
StringRef LineBuf(Identifier.data());
@@ -1372,15 +1384,17 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
// Advance the token stream until the end of the current token is
// after the end of what the frontend claimed.
const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
- while (true) {
+ do {
End = Tok.getEndLoc();
getLexer().Lex();
-
- assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
- if (End.getPointer() == EndPtr) break;
- }
+ } while (End.getPointer() < EndPtr);
Identifier = LineBuf;
+ // The frontend should end parsing on an assembler token boundary, unless it
+ // failed parsing.
+ assert((End.getPointer() == EndPtr || !Result) &&
+ "frontend claimed part of a token?");
+
// If the identifier lookup was unsuccessful, assume that we are dealing with
// a label.
if (!Result) {
@@ -1389,9 +1403,8 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
Loc, false);
assert(InternalName.size() && "We should have an internal name here.");
// Push a rewrite for replacing the identifier name with the internal name.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
- Identifier.size(),
- InternalName));
+ InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
+ InternalName);
}
// Create the symbol reference.
@@ -1418,8 +1431,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
if (isParsingInlineAsm())
- InstInfo->AsmRewrites->push_back(
- AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
+ InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
if (getLexer().isNot(AsmToken::LBrac)) {
// An immediate following a 'segment register', 'colon' token sequence can
@@ -1588,8 +1600,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
unsigned Len = DotDispStr.size();
unsigned Val = OrigDispVal + DotDispVal;
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
- Val));
+ InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
}
NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
@@ -1613,7 +1624,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
return nullptr;
// Don't emit the offset operator.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
+ InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
// The offset operator will have an 'r' constraint, thus we need to create
// register operand to ensure proper matching. Just pick a GPR based on
@@ -1664,7 +1675,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
// Rewrite the type operator and the C or C++ type or variable in terms of an
// immediate. E.g. TYPE foo -> $$4
unsigned Len = End.getPointer() - TypeLoc.getPointer();
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
+ InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
return X86Operand::CreateImm(Imm, Start, End);
@@ -1688,12 +1699,14 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
return ParseIntelOperator(IOK_TYPE);
}
+ bool PtrInOperand = false;
unsigned Size = getIntelMemOperandSize(Tok.getString());
if (Size) {
Parser.Lex(); // Eat operand size (e.g., byte, word).
if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
Parser.Lex(); // Eat ptr.
+ PtrInOperand = true;
}
Start = Tok.getLoc();
@@ -1711,10 +1724,10 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
if (StartTok.getString().size() == Len)
// Just add a prefix if this wasn't a complex immediate expression.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+ InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
else
// Otherwise, rewrite the complex expression as a single immediate.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+ InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
}
if (getLexer().isNot(AsmToken::LBrac)) {
@@ -1740,7 +1753,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
}
// rounding mode token
- if (STI.getFeatureBits()[X86::FeatureAVX512] &&
+ if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
getLexer().is(AsmToken::LCurly))
return ParseRoundingModeOp(Start, End);
@@ -1749,9 +1762,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
// of a segment override, otherwise this is a normal register reference.
- if (getLexer().isNot(AsmToken::Colon))
+ // In case it is a normal register and there is ptr in the operand this
+ // is an error
+ if (getLexer().isNot(AsmToken::Colon)){
+ if (PtrInOperand){
+ return ErrorOperand(Start, "expected memory operand after "
+ "'ptr', found register operand instead");
+ }
return X86Operand::CreateReg(RegNo, Start, End);
-
+ }
+
return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
}
@@ -1798,7 +1818,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
}
case AsmToken::LCurly:{
SMLoc Start = Parser.getTok().getLoc(), End;
- if (STI.getFeatureBits()[X86::FeatureAVX512])
+ if (getSTI().getFeatureBits()[X86::FeatureAVX512])
return ParseRoundingModeOp(Start, End);
return ErrorOperand(Start, "unknown token in expression");
}
@@ -1808,7 +1828,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
const MCParsedAsmOperand &Op) {
MCAsmParser &Parser = getParser();
- if(STI.getFeatureBits()[X86::FeatureAVX512]) {
+ if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
@@ -1983,12 +2003,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
}
// Validate the scale amount.
- if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
+ if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
ScaleVal != 1) {
Error(Loc, "scale factor in 16-bit address must be 1");
return nullptr;
- }
- if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+ }
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
+ ScaleVal != 8) {
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
return nullptr;
}
@@ -2175,7 +2196,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Name == "repne" || Name == "repnz" ||
Name == "rex64" || Name == "data16";
-
// This does the actual operand parsing. Don't parse any more if we have a
// prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
// just want to parse the "lock" as the first instruction and the "incl" as
@@ -2213,6 +2233,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
(isPrefix && getLexer().is(AsmToken::Slash)))
Parser.Lex();
+ // This is for gas compatibility and cannot be done in td.
+ // Adding "p" for some floating point with no argument.
+ // For example: fsub --> fsubp
+ bool IsFp =
+ Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
+ if (IsFp && Operands.size() == 1) {
+ const char *Repl = StringSwitch<const char *>(Name)
+ .Case("fsub", "fsubp")
+ .Case("fdiv", "fdivp")
+ .Case("fsubr", "fsubrp")
+ .Case("fdivr", "fdivrp");
+ static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
+ }
+
// This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
// "outb %al, %dx". Out doesn't take a memory form, but this is a widely
// documented form in various unofficial manuals, so a lot of code uses it.
@@ -2242,9 +2276,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Append default arguments to "ins[bwld]"
if (Name.startswith("ins") && Operands.size() == 1 &&
- (Name == "insb" || Name == "insw" || Name == "insl" ||
- Name == "insd" )) {
- AddDefaultSrcDestOperands(Operands,
+ (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) {
+ AddDefaultSrcDestOperands(Operands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
}
@@ -2346,98 +2379,21 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
- if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
- cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
- Operands.erase(Operands.begin() + 1);
- static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
- }
+ if (Op1.isImm())
+ if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
+ if (CE->getValue() == 3) {
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
+ }
}
return false;
}
-static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
- bool isCmp) {
- MCInst TmpInst;
- TmpInst.setOpcode(Opcode);
- if (!isCmp)
- TmpInst.addOperand(MCOperand::createReg(Reg));
- TmpInst.addOperand(MCOperand::createReg(Reg));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
-}
-
-static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
- bool isCmp = false) {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
-}
-
-static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
- bool isCmp = false) {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
-}
-
-static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
- bool isCmp = false) {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
-}
-
-bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
- switch (Inst.getOpcode()) {
- default: return true;
- case X86::INT:
- X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
- assert(Op.isImm() && "expected immediate");
- int64_t Res;
- if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
- Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
- return false;
- }
- return true;
- }
- llvm_unreachable("handle the instruction appropriately");
-}
-
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
switch (Inst.getOpcode()) {
default: return false;
- case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
- case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
- case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
- case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
- case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
- case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
- case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
- case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
- case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
- case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
- case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
- case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
- case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
- case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
- case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
- case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
- case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
- case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
- case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
- case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
- case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
- case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
- case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
- case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
+ case X86::VMOVZPQILo2PQIrr:
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
case X86::VMOVAPSrr:
@@ -2457,18 +2413,19 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
- case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
- case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
- case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
- case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
- case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
- case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
- case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
- case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
- case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
- case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
- case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
}
Inst.setOpcode(NewOpc);
return true;
@@ -2573,9 +2530,6 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
isParsingIntelSyntax())) {
default: llvm_unreachable("Unexpected match result!");
case Match_Success:
- if (!validateInstruction(Inst, Operands))
- return true;
-
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other.
@@ -2819,9 +2773,6 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
unsigned NumSuccessfulMatches =
std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
- if (!validateInstruction(Inst, Operands))
- return true;
-
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the individual
// transformations can chain off each other.
@@ -2898,10 +2849,29 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
"a '%' prefix in .intel_syntax");
}
return false;
- }
+ } else if (IDVal == ".even")
+ return parseDirectiveEven(DirectiveID.getLoc());
return true;
}
+/// parseDirectiveEven
+/// ::= .even
+bool X86AsmParser::parseDirectiveEven(SMLoc L) {
+ const MCSection *Section = getStreamer().getCurrentSection().first;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ TokError("unexpected token in directive");
+ return false;
+ }
+ if (!Section) {
+ getStreamer().InitSections(false);
+ Section = getStreamer().getCurrentSection().first;
+ }
+ if (Section->UseCodeAlign())
+ getStreamer().EmitCodeAlignment(2, 0);
+ else
+ getStreamer().EmitValueToAlignment(2, 0, 1, 0);
+ return false;
+}
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
@@ -2909,10 +2879,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
if (getParser().parseExpression(Value))
return false;
- getParser().getStreamer().EmitValue(Value, Size);
+ if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else {
+ getStreamer().EmitValue(Value, Size, ExprLoc);
+ }
if (getLexer().is(AsmToken::EndOfStatement))
break;
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index 7610806..54538c8 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -13,30 +13,25 @@
namespace llvm {
inline bool isImmSExti16i8Value(uint64_t Value) {
- return (( Value <= 0x000000000000007FULL)||
- (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<8>(Value) ||
+ (isUInt<16>(Value) && isInt<8>(static_cast<int16_t>(Value)));
}
inline bool isImmSExti32i8Value(uint64_t Value) {
- return (( Value <= 0x000000000000007FULL)||
- (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<8>(Value) ||
+ (isUInt<32>(Value) && isInt<8>(static_cast<int32_t>(Value)));
}
inline bool isImmSExti64i8Value(uint64_t Value) {
- return (( Value <= 0x000000000000007FULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<8>(Value);
}
inline bool isImmSExti64i32Value(uint64_t Value) {
- return (( Value <= 0x000000007FFFFFFFULL)||
- (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<32>(Value);
}
inline bool isImmUnsignedi8Value(uint64_t Value) {
- return (( Value <= 0x00000000000000FFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isUInt<8>(Value) || isInt<8>(Value);
}
} // End of namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index cfc3ee2..ce8fcf1 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -95,11 +95,13 @@ X86GenericDisassembler::X86GenericDisassembler(
llvm_unreachable("Invalid CPU mode");
}
+namespace {
struct Region {
ArrayRef<uint8_t> Bytes;
uint64_t Base;
Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
};
+} // end anonymous namespace
/// A callback function that wraps the readByte method from Region.
///
@@ -831,8 +833,12 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM256:
case TYPE_XMM512:
case TYPE_VK1:
+ case TYPE_VK2:
+ case TYPE_VK4:
case TYPE_VK8:
case TYPE_VK16:
+ case TYPE_VK32:
+ case TYPE_VK64:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
case TYPE_BNDR:
@@ -962,6 +968,7 @@ static bool translateInstruction(MCInst &mcInst,
return true;
}
+ mcInst.clear();
mcInst.setOpcode(insn.instructionID);
// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
// prefix bytes should be disassembled as xrelease and xacquire then set the
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index f73fa75..040143b 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -361,7 +361,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
* then it should be disassembled as a xacquire/xrelease not repne/rep.
*/
if ((byte == 0xf2 || byte == 0xf3) &&
- ((nextByte == 0xf0) |
+ ((nextByte == 0xf0) ||
((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
insn->xAcquireRelease = true;
/*
@@ -980,6 +980,47 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
insn->opcode == 0xE3)
attrMask ^= ATTR_ADSIZE;
+ /*
+ * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
+ * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
+ */
+
+ if (insn->mode == MODE_64BIT &&
+ isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
+ switch (insn->opcode) {
+ case 0xE8:
+ case 0xE9:
+ // Take care of psubsb and other mmx instructions.
+ if (insn->opcodeType == ONEBYTE) {
+ attrMask ^= ATTR_OPSIZE;
+ insn->immediateSize = 4;
+ insn->displacementSize = 4;
+ }
+ break;
+ case 0x82:
+ case 0x83:
+ case 0x84:
+ case 0x85:
+ case 0x86:
+ case 0x87:
+ case 0x88:
+ case 0x89:
+ case 0x8A:
+ case 0x8B:
+ case 0x8C:
+ case 0x8D:
+ case 0x8E:
+ case 0x8F:
+ // Take care of lea and three byte ops.
+ if (insn->opcodeType == TWOBYTE) {
+ attrMask ^= ATTR_OPSIZE;
+ insn->immediateSize = 4;
+ insn->displacementSize = 4;
+ }
+ break;
+ }
+ }
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@@ -1447,8 +1488,12 @@ static int readModRM(struct InternalInstruction* insn) {
case TYPE_XMM: \
return prefix##_XMM0 + index; \
case TYPE_VK1: \
+ case TYPE_VK2: \
+ case TYPE_VK4: \
case TYPE_VK8: \
case TYPE_VK16: \
+ case TYPE_VK32: \
+ case TYPE_VK64: \
if (index > 7) \
*valid = 0; \
return prefix##_K0 + index; \
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index a79a923..28a628e 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -572,8 +572,6 @@ struct InternalInstruction {
// The last byte of the opcode, not counting any ModR/M extension
uint8_t opcode;
- // The ModR/M byte of the instruction, if it is an opcode extension
- uint8_t modRMExtension;
// decode state
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index ea727e6..b4c0bc4 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 62b6b73..bbb3090 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class X86ATTInstPrinter final : public MCInstPrinter {
public:
X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 91b144a..82f0ee5 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -21,6 +21,27 @@
using namespace llvm;
+static unsigned getVectorRegSize(unsigned RegNo) {
+ if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
+ return 512;
+ if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
+ return 256;
+ if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
+ return 128;
+ if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
+ return 64;
+
+ llvm_unreachable("Unknown vector reg!");
+ return 0;
+}
+
+static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,
+ unsigned OperandIndex) {
+ unsigned OpReg = MI->getOperand(OperandIndex).getReg();
+ return MVT::getVectorVT(ScalarVT,
+ getVectorRegSize(OpReg)/ScalarVT.getSizeInBits());
+}
+
/// \brief Extracts the src/dst types for a given zero extension instruction.
/// \note While the number of elements in DstVT type correct, the
/// number in the SrcVT type is expanded to fill the src xmm register and the
@@ -107,6 +128,75 @@ static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
}
}
+#define CASE_MASK_INS_COMMON(Inst, Suffix, src) \
+ case X86::V##Inst##Suffix##src: \
+ case X86::V##Inst##Suffix##src##k: \
+ case X86::V##Inst##Suffix##src##kz:
+
+#define CASE_SSE_INS_COMMON(Inst, src) \
+ case X86::Inst##src:
+
+#define CASE_AVX_INS_COMMON(Inst, Suffix, src) \
+ case X86::V##Inst##Suffix##src:
+
+#define CASE_MOVDUP(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
+ CASE_AVX_INS_COMMON(Inst, , r##src) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src) \
+ CASE_SSE_INS_COMMON(Inst, r##src) \
+
+#define CASE_UNPCK(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
+ CASE_AVX_INS_COMMON(Inst, , r##src) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src) \
+ CASE_SSE_INS_COMMON(Inst, r##src) \
+
+#define CASE_SHUF(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) \
+ CASE_AVX_INS_COMMON(Inst, , r##src##i) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src##i) \
+ CASE_SSE_INS_COMMON(Inst, r##src##i) \
+
+#define CASE_VPERM(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z256, src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z128, src##i) \
+ CASE_AVX_INS_COMMON(Inst, , src##i) \
+ CASE_AVX_INS_COMMON(Inst, Y, src##i) \
+
+#define CASE_VSHUF(Inst, src) \
+ CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
+ CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i) \
+
+/// \brief Extracts the types and if it has memory operand for a given
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) instruction.
+static void getVSHUF64x2FamilyInfo(const MCInst *MI, MVT &VT, bool &HasMemOp) {
+ HasMemOp = false;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown VSHUF64x2 family instructions.");
+ break;
+ CASE_VSHUF(64X2, m)
+ HasMemOp = true; // FALL THROUGH.
+ CASE_VSHUF(64X2, r)
+ VT = getRegOperandVectorVT(MI, MVT::i64, 0);
+ break;
+ CASE_VSHUF(32X4, m)
+ HasMemOp = true; // FALL THROUGH.
+ CASE_VSHUF(32X4, r)
+ VT = getRegOperandVectorVT(MI, MVT::i32, 0);
+ break;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Top Level Entrypoint
//===----------------------------------------------------------------------===//
@@ -127,23 +217,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::BLENDPDrri:
case X86::VBLENDPDrri:
+ case X86::VBLENDPDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::BLENDPDrmi:
case X86::VBLENDPDrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v2f64,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VBLENDPDYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VBLENDPDYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v4f64,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -152,23 +233,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::BLENDPSrri:
case X86::VBLENDPSrri:
+ case X86::VBLENDPSYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::BLENDPSrmi:
case X86::VBLENDPSrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v4f32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VBLENDPSYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VBLENDPSYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v8f32,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -177,23 +249,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PBLENDWrri:
case X86::VPBLENDWrri:
+ case X86::VPBLENDWYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PBLENDWrmi:
case X86::VPBLENDWrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v8i16,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VPBLENDWYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VPBLENDWYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v16i16,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i16, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -201,23 +264,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::VPBLENDDrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPBLENDDrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v4i32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
-
case X86::VPBLENDDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::VPBLENDDrmi:
case X86::VPBLENDDYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v8i32,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -239,6 +292,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::MOVLHPSrr:
case X86::VMOVLHPSrr:
+ case X86::VMOVLHPSZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -247,569 +301,327 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::MOVHLPSrr:
case X86::VMOVHLPSrr:
+ case X86::VMOVHLPSZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeMOVHLPSMask(2, ShuffleMask);
break;
- case X86::MOVSLDUPrr:
- case X86::VMOVSLDUPrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::MOVSLDUPrm:
- case X86::VMOVSLDUPrm:
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
- break;
-
- case X86::VMOVSHDUPYrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::VMOVSHDUPYrm:
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
- break;
-
- case X86::VMOVSLDUPYrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_MOVDUP(MOVSLDUP, r)
+ Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
// FALL THROUGH.
- case X86::VMOVSLDUPYrm:
+ CASE_MOVDUP(MOVSLDUP, m)
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
+ DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
break;
- case X86::MOVSHDUPrr:
- case X86::VMOVSHDUPrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_MOVDUP(MOVSHDUP, r)
+ Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
// FALL THROUGH.
- case X86::MOVSHDUPrm:
- case X86::VMOVSHDUPrm:
+ CASE_MOVDUP(MOVSHDUP, m)
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
+ DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
break;
- case X86::VMOVDDUPYrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_MOVDUP(MOVDDUP, r)
+ Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
// FALL THROUGH.
- case X86::VMOVDDUPYrm:
+ CASE_MOVDUP(MOVDDUP, m)
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
- break;
-
- case X86::MOVDDUPrr:
- case X86::VMOVDDUPrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::MOVDDUPrm:
- case X86::VMOVDDUPrm:
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
+ DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
break;
case X86::PSLLDQri:
case X86::VPSLLDQri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSLLDQMask(MVT::v16i8,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
-
case X86::VPSLLDQYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSLLDQMask(MVT::v32i8,
+ DecodePSLLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PSRLDQri:
case X86::VPSRLDQri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSRLDQMask(MVT::v16i8,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
-
case X86::VPSRLDQYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSRLDQMask(MVT::v32i8,
+ DecodePSRLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
+ case X86::VPALIGNR256rr:
Src1Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PALIGNR128rm:
case X86::VPALIGNR128rm:
- Src2Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePALIGNRMask(MVT::v16i8,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPALIGNR256rr:
- Src1Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VPALIGNR256rm:
Src2Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePALIGNRMask(MVT::v32i8,
+ DecodePALIGNRMask(getRegOperandVectorVT(MI, MVT::i8, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PSHUFDri:
case X86::VPSHUFDri:
+ case X86::VPSHUFDYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFDmi:
case X86::VPSHUFDmi:
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v4i32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPSHUFDYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
case X86::VPSHUFDYmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v8i32,
+ DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::i32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PSHUFHWri:
case X86::VPSHUFHWri:
+ case X86::VPSHUFHWYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFHWmi:
case X86::VPSHUFHWmi:
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFHWMask(MVT::v8i16,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPSHUFHWYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
case X86::VPSHUFHWYmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFHWMask(MVT::v16i16,
+ DecodePSHUFHWMask(getRegOperandVectorVT(MI, MVT::i16, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
+
case X86::PSHUFLWri:
case X86::VPSHUFLWri:
+ case X86::VPSHUFLWYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFLWmi:
case X86::VPSHUFLWmi:
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFLWMask(MVT::v8i16,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPSHUFLWYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
case X86::VPSHUFLWYmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFLWMask(MVT::v16i16,
+ DecodePSHUFLWMask(getRegOperandVectorVT(MI, MVT::i16, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
- case X86::PUNPCKHBWrr:
- case X86::VPUNPCKHBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHBWrm:
- case X86::VPUNPCKHBWrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
- break;
- case X86::VPUNPCKHBWYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHBWYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
- break;
- case X86::PUNPCKHWDrr:
- case X86::VPUNPCKHWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHWDrm:
- case X86::VPUNPCKHWDrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
- break;
- case X86::VPUNPCKHWDYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHWDYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
- break;
- case X86::PUNPCKHDQrr:
- case X86::VPUNPCKHDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHDQrm:
- case X86::VPUNPCKHDQrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
- break;
- case X86::VPUNPCKHDQYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHDQYrm:
+ case X86::MMX_PSHUFWri:
Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
- break;
- case X86::VPUNPCKHDQZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKHDQZrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i32, ShuffleMask);
- break;
- case X86::PUNPCKHQDQrr:
- case X86::VPUNPCKHQDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHQDQrm:
- case X86::VPUNPCKHQDQrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::MMX_PSHUFWmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
- break;
- case X86::VPUNPCKHQDQYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHQDQYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
- break;
- case X86::VPUNPCKHQDQZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHQDQZrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i64, ShuffleMask);
+ if (MI->getOperand(MI->getNumOperands() - 1).isImm())
+ DecodePSHUFMask(MVT::v4i16,
+ MI->getOperand(MI->getNumOperands() - 1).getImm(),
+ ShuffleMask);
break;
- case X86::PUNPCKLBWrr:
- case X86::VPUNPCKLBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLBWrm:
- case X86::VPUNPCKLBWrm:
+ case X86::PSWAPDrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
- break;
- case X86::VPUNPCKLBWYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKLBWYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
- break;
- case X86::PUNPCKLWDrr:
- case X86::VPUNPCKLWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLWDrm:
- case X86::VPUNPCKLWDrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::PSWAPDrm:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ DecodePSWAPMask(MVT::v2i32, ShuffleMask);
break;
- case X86::VPUNPCKLWDYrr:
+
+ CASE_UNPCK(PUNPCKHBW, r)
+ case X86::MMX_PUNPCKHBWirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLWDYrm:
+ CASE_UNPCK(PUNPCKHBW, m)
+ case X86::MMX_PUNPCKHBWirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
break;
- case X86::PUNPCKLDQrr:
- case X86::VPUNPCKLDQrr:
+
+ CASE_UNPCK(PUNPCKHWD, r)
+ case X86::MMX_PUNPCKHWDirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLDQrm:
- case X86::VPUNPCKLDQrm:
+ CASE_UNPCK(PUNPCKHWD, m)
+ case X86::MMX_PUNPCKHWDirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
break;
- case X86::VPUNPCKLDQYrr:
+
+ CASE_UNPCK(PUNPCKHDQ, r)
+ case X86::MMX_PUNPCKHDQirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLDQYrm:
+ CASE_UNPCK(PUNPCKHDQ, m)
+ case X86::MMX_PUNPCKHDQirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
break;
- case X86::VPUNPCKLDQZrr:
+
+ CASE_UNPCK(PUNPCKHQDQ, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLDQZrm:
+ CASE_UNPCK(PUNPCKHQDQ, m)
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i32, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
- case X86::PUNPCKLQDQrr:
- case X86::VPUNPCKLQDQrr:
+
+ CASE_UNPCK(PUNPCKLBW, r)
+ case X86::MMX_PUNPCKLBWirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLQDQrm:
- case X86::VPUNPCKLQDQrm:
+ CASE_UNPCK(PUNPCKLBW, m)
+ case X86::MMX_PUNPCKLBWirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
break;
- case X86::VPUNPCKLQDQYrr:
+
+ CASE_UNPCK(PUNPCKLWD, r)
+ case X86::MMX_PUNPCKLWDirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLQDQYrm:
+ CASE_UNPCK(PUNPCKLWD, m)
+ case X86::MMX_PUNPCKLWDirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
break;
- case X86::VPUNPCKLQDQZrr:
+
+ CASE_UNPCK(PUNPCKLDQ, r)
+ case X86::MMX_PUNPCKLDQirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLQDQZrm:
+ CASE_UNPCK(PUNPCKLDQ, m)
+ case X86::MMX_PUNPCKLDQirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i64, ShuffleMask);
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
break;
- case X86::SHUFPDrri:
- case X86::VSHUFPDrri:
+ CASE_UNPCK(PUNPCKLQDQ, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::SHUFPDrmi:
- case X86::VSHUFPDrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v2f64,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VSHUFPDYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VSHUFPDYrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v4f64,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
+ CASE_UNPCK(PUNPCKLQDQ, m)
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
- case X86::SHUFPSrri:
- case X86::VSHUFPSrri:
+ CASE_SHUF(SHUFPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::SHUFPSrmi:
- case X86::VSHUFPSrmi:
+ CASE_SHUF(SHUFPD, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v4f32,
+ DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VSHUFPSYrri:
+
+ CASE_SHUF(SHUFPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VSHUFPSYrmi:
+ CASE_SHUF(SHUFPS, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v8f32,
+ DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKLPDrr:
- case X86::VUNPCKLPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPDrm:
- case X86::VUNPCKLPDrm:
- DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPDYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPDYrm:
- DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPDZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPDZrm:
- DecodeUNPCKLMask(MVT::v8f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::UNPCKLPSrr:
- case X86::VUNPCKLPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPSrm:
- case X86::VUNPCKLPSrm:
- DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPSYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPSYrm:
- DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPSZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPSZrm:
- DecodeUNPCKLMask(MVT::v16f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::UNPCKHPDrr:
- case X86::VUNPCKHPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKHPDrm:
- case X86::VUNPCKHPDrm:
- DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKHPDYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKHPDYrm:
- DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_VSHUF(64X2, r)
+ CASE_VSHUF(64X2, m)
+ CASE_VSHUF(32X4, r)
+ CASE_VSHUF(32X4, m) {
+ MVT VT;
+ bool HasMemOp;
+ unsigned NumOp = MI->getNumOperands();
+ getVSHUF64x2FamilyInfo(MI, VT, HasMemOp);
+ decodeVSHUF64x2FamilyMask(VT, MI->getOperand(NumOp - 1).getImm(),
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
+ if (HasMemOp) {
+ assert((NumOp >= 8) && "Expected at least 8 operands!");
+ Src1Name = getRegName(MI->getOperand(NumOp - 7).getReg());
+ } else {
+ assert((NumOp >= 4) && "Expected at least 4 operands!");
+ Src2Name = getRegName(MI->getOperand(NumOp - 2).getReg());
+ Src1Name = getRegName(MI->getOperand(NumOp - 3).getReg());
+ }
break;
- case X86::VUNPCKHPDZrr:
+ }
+
+ CASE_UNPCK(UNPCKLPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VUNPCKHPDZrm:
- DecodeUNPCKHMask(MVT::v8f64, ShuffleMask);
+ CASE_UNPCK(UNPCKLPD, m)
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKHPSrr:
- case X86::VUNPCKHPSrr:
+
+ CASE_UNPCK(UNPCKLPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::UNPCKHPSrm:
- case X86::VUNPCKHPSrm:
- DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
+ CASE_UNPCK(UNPCKLPS, m)
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VUNPCKHPSYrr:
+
+ CASE_UNPCK(UNPCKHPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VUNPCKHPSYrm:
- DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
+ CASE_UNPCK(UNPCKHPD, m)
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VUNPCKHPSZrr:
+
+ CASE_UNPCK(UNPCKHPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VUNPCKHPSZrm:
- DecodeUNPCKHMask(MVT::v16f32, ShuffleMask);
+ CASE_UNPCK(UNPCKHPS, m)
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VPERMILPSri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::VPERMILPSmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v4f32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VPERMILPSYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::VPERMILPSYmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v8f32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VPERMILPDri:
+
+ CASE_VPERM(PERMILPS, r)
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
- case X86::VPERMILPDmi:
+ CASE_VPERM(PERMILPS, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v2f64,
+ DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VPERMILPDYri:
+
+ CASE_VPERM(PERMILPD, r)
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
- case X86::VPERMILPDYmi:
+ CASE_VPERM(PERMILPD, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v4f64,
+ DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::VPERM2F128rr:
case X86::VPERM2I128rr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -824,6 +636,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::VPERMQYri:
case X86::VPERMPDYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -846,6 +659,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::MOVSSrr:
case X86::VMOVSSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -861,6 +675,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::MOVZPQILo2PQIrr:
case X86::VMOVPQI2QIrr:
case X86::VMOVZPQILo2PQIrr:
+ case X86::VMOVZPQILo2PQIZrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVQI2PQIrm:
@@ -869,9 +684,11 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVQI2PQIrm:
case X86::VMOVZQI2PQIrm:
case X86::VMOVZPQILo2PQIrm:
+ case X86::VMOVZPQILo2PQIZrm:
DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::MOVDI2PDIrm:
case X86::VMOVDI2PDIrm:
DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6e371da..20cd7ff 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -19,8 +19,6 @@
namespace llvm {
-class MCOperand;
-
class X86IntelInstPrinter final : public MCInstPrinter {
public:
X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 629802f..133bd0e 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -69,15 +69,19 @@ public:
class X86AsmBackend : public MCAsmBackend {
const StringRef CPU;
bool HasNopl;
- const uint64_t MaxNopLength;
+ uint64_t MaxNopLength;
public:
- X86AsmBackend(const Target &T, StringRef CPU)
- : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
+ X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
CPU != "c3" && CPU != "c3-2";
+ // Max length of true long nop instruction is 15 bytes.
+ // Max length of long nop replacement instruction is 7 bytes.
+ // Taking into account SilverMont architecture features max length of nops
+ // is reduced for it to achieve better performance.
+ MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;
}
unsigned getNumFixupKinds() const override {
@@ -200,6 +204,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::ADD64ri8: return X86::ADD64ri32;
case X86::ADD64mi8: return X86::ADD64mi32;
+ // ADC
+ case X86::ADC16ri8: return X86::ADC16ri;
+ case X86::ADC16mi8: return X86::ADC16mi;
+ case X86::ADC32ri8: return X86::ADC32ri;
+ case X86::ADC32mi8: return X86::ADC32mi;
+ case X86::ADC64ri8: return X86::ADC64ri32;
+ case X86::ADC64mi8: return X86::ADC64mi32;
+
// SUB
case X86::SUB16ri8: return X86::SUB16ri;
case X86::SUB16mi8: return X86::SUB16mi;
@@ -208,6 +220,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::SUB64ri8: return X86::SUB64ri32;
case X86::SUB64mi8: return X86::SUB64mi32;
+ // SBB
+ case X86::SBB16ri8: return X86::SBB16ri;
+ case X86::SBB16mi8: return X86::SBB16mi;
+ case X86::SBB32ri8: return X86::SBB32ri;
+ case X86::SBB32mi8: return X86::SBB32mi;
+ case X86::SBB64ri8: return X86::SBB64ri32;
+ case X86::SBB64mi8: return X86::SBB64mi32;
+
// CMP
case X86::CMP16ri8: return X86::CMP16ri;
case X86::CMP16mi8: return X86::CMP16mi;
@@ -279,7 +299,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
/// bytes.
/// \return - true on success, false on failure
bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- static const uint8_t Nops[10][10] = {
+ static const uint8_t TrueNops[10][10] = {
// nop
{0x90},
// xchg %ax,%ax
@@ -302,17 +322,31 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
};
- // This CPU doesn't support long nops. If needed add more.
- // FIXME: Can we get this from the subtarget somehow?
- // FIXME: We could generated something better than plain 0x90.
- if (!HasNopl) {
- for (uint64_t i = 0; i < Count; ++i)
- OW->write8(0x90);
- return true;
- }
+ // Alternative nop instructions for CPUs which don't support long nops.
+ static const uint8_t AltNops[7][10] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // lea 0x0(%esi),%esi
+ {0x8d, 0x76, 0x00},
+ // lea 0x0(%esi),%esi
+ {0x8d, 0x74, 0x26, 0x00},
+ // nop + lea 0x0(%esi),%esi
+ {0x90, 0x8d, 0x74, 0x26, 0x00},
+ // lea 0x0(%esi),%esi
+ {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 },
+ // lea 0x0(%esi),%esi
+ {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00},
+ };
+
+ // Select the right NOP table.
+ // FIXME: Can we get if CPU supports long nops from the subtarget somehow?
+ const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops;
+ assert(HasNopl || MaxNopLength <= 7);
- // 15 is the longest single nop instruction. Emit as many 15-byte nops as
- // needed, then emit a nop of the remaining length.
+ // Emit as many largest nops as needed, then emit a nop of the remaining
+ // length.
do {
const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
@@ -359,6 +393,17 @@ public:
}
};
+class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
+ ELF::EM_IAMCU);
+ }
+};
+
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
@@ -610,13 +655,13 @@ private:
/// \brief Get the compact unwind number for a given register. The number
/// corresponds to the enum lists in compact_unwind_encoding.h.
int getCompactUnwindRegNum(unsigned Reg) const {
- static const uint16_t CU32BitRegs[7] = {
+ static const MCPhysReg CU32BitRegs[7] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
- static const uint16_t CU64BitRegs[] = {
+ static const MCPhysReg CU64BitRegs[] = {
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
};
- const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
@@ -780,6 +825,10 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
return new WindowsX86AsmBackend(T, false, CPU);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+
+ if (TheTriple.isOSIAMCU())
+ return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU);
+
return new ELFX86_32AsmBackend(T, OSABI, CPU);
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index f0d00b0..9ff85b9 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,6 +41,16 @@ namespace X86 {
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
+
+ /// AVX512 static rounding constants. These need to match the values in
+ /// avx512fintrin.h.
+ enum STATIC_ROUNDING {
+ TO_NEAREST_INT = 0,
+ TO_NEG_INF = 1,
+ TO_POS_INF = 2,
+ TO_ZERO = 3,
+ CUR_DIRECTION = 4
+ };
} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
@@ -675,7 +685,7 @@ namespace X86II {
case X86II::RawFrmSrc:
case X86II::RawFrmDst:
case X86II::RawFrmDstSrc:
- return -1;
+ return -1;
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem:
@@ -696,23 +706,27 @@ namespace X86II {
// Start from 0, skip registers encoded in VEX_VVVV or a mask register.
return 0 + HasVEX_4V + HasEVEX_K;
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
+ case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
+ case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
+ case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE:
case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
- case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
- case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
- case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
- case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
- case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
- case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
- case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
- case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
- case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
- case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
- case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
- case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
- case X86II::MRM_FE: case X86II::MRM_FF:
+ case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4:
+ case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7:
+ case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0:
+ case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3:
+ case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6:
+ case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9:
+ case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
+ case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF:
+ case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2:
+ case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5:
+ case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8:
+ case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB:
+ case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE:
+ case X86II::MRM_FF:
return -1;
}
}
@@ -740,7 +754,7 @@ namespace X86II {
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
- return true;
+ return true;
}
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index a33468d..736c39d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -32,9 +32,11 @@ namespace {
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
- : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
- // Only i386 uses Rel instead of RelA.
- /*HasRelocationAddend*/ EMachine != ELF::EM_386) {}
+ : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
+ // Only i386 and IAMCU use Rel instead of RelA.
+ /*HasRelocationAddend*/
+ (EMachine != ELF::EM_386) &&
+ (EMachine != ELF::EM_IAMCU)) {}
X86ELFObjectWriter::~X86ELFObjectWriter()
{}
@@ -246,7 +248,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
if (getEMachine() == ELF::EM_X86_64)
return getRelocType64(Modifier, Type, IsPCRel);
- assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type.");
+ assert((getEMachine() == ELF::EM_386 || getEMachine() == ELF::EM_IAMCU) &&
+ "Unsupported ELF machine type.");
return getRelocType32(Modifier, getType32(Type), IsPCRel);
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index deaad2a..30d5c80 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -20,39 +20,42 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
-
- class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit X86MCAsmInfoDarwin(const Triple &Triple);
- };
-
- struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
- explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
- const MCExpr *
- getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
- MCStreamer &Streamer) const override;
- };
-
- class X86ELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit X86ELFMCAsmInfo(const Triple &Triple);
- };
-
- class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor() override;
- public:
- explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
- };
-
- class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
- void anchor() override;
- public:
- explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
- };
+class Triple;
+
+class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit X86MCAsmInfoDarwin(const Triple &Triple);
+};
+
+struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const override;
+};
+
+class X86ELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit X86ELFMCAsmInfo(const Triple &Triple);
+};
+
+class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+};
+
+class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+ void anchor() override;
+
+public:
+ explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
+};
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 10c434c..dfab6ec 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -510,8 +510,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
- Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+ CurByte, OS, Fixups);
return;
}
@@ -988,6 +988,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const MCInstrDesc &Desc) {
unsigned REX = 0;
+ bool UsesHighByteReg = false;
+
if (TSFlags & X86II::REX_W)
REX |= 1 << 3; // set REX.W
@@ -1004,6 +1006,8 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const MCOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ UsesHighByteReg = true;
if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
// FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
// that returns non-zero.
@@ -1073,6 +1077,9 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
}
break;
}
+ if (REX && UsesHighByteReg)
+ report_fatal_error("Cannot encode high byte register in REX-prefixed instruction");
+
return REX;
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 83b4091..53a6550 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -122,7 +122,8 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
} else if (TheTriple.isOSBinFormatELF()) {
// Force the use of an ELF container.
MAI = new X86ELFMCAsmInfo(TheTriple);
- } else if (TheTriple.isWindowsMSVCEnvironment()) {
+ } else if (TheTriple.isWindowsMSVCEnvironment() ||
+ TheTriple.isWindowsCoreCLREnvironment()) {
MAI = new X86MCAsmInfoMicrosoft(TheTriple);
} else if (TheTriple.isOSCygMing() ||
TheTriple.isWindowsItaniumEnvironment()) {
@@ -267,3 +268,184 @@ extern "C" void LLVMInitializeX86TargetMC() {
TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
}
+
+unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
+ bool High) {
+ switch (Size) {
+ default: return 0;
+ case 8:
+ if (High) {
+ switch (Reg) {
+ default: return getX86SubSuperRegisterOrZero(Reg, 64);
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case 16:
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case 32:
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case 64:
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+}
+
+unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) {
+ unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High);
+ assert(Res != 0 && "Unexpected register or VT");
+ return Res;
+}
+
+
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 6221bab..2d2836f 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -79,7 +79,7 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
/// Takes ownership of \p AB and \p CE.
MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS, MCCodeEmitter *CE,
- bool RelaxAll);
+ bool RelaxAll, bool IncrementalLinkerCompatible);
/// Construct an X86 Mach-O object writer.
MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
@@ -98,6 +98,17 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
/// Construct X86-64 ELF relocation info.
MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
+
+/// Returns the sub or super register of a specific X86 register.
+/// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX.
+/// Aborts on error.
+unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false);
+
+/// Returns the sub or super register of a specific X86 register.
+/// Like getX86SubSuperRegister() but returns 0 on error.
+unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
+ bool High = false);
+
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 9e801fc..191ebea 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -149,14 +149,19 @@ void X86MachObjectWriter::RecordX86_64Relocation(
// Neither symbol can be modified.
if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
- Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported relocation of modified symbol", false);
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation of modified symbol");
+ return;
+ }
// We don't support PCrel relocations of differences. Darwin 'as' doesn't
// implement most of these correctly.
- if (IsPCRel)
- report_fatal_error("unsupported pc-relative relocation of difference",
- false);
+ if (IsPCRel) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported pc-relative relocation of difference");
+ return;
+ }
// The support for the situation where one or both of the symbols would
// require a local relocation is handled just like if the symbols were
@@ -168,16 +173,20 @@ void X86MachObjectWriter::RecordX86_64Relocation(
// Darwin 'as' doesn't emit correct relocations for this (it ends up with a
// single SIGNED relocation); reject it for now. Except the case where both
// symbols don't have a base, equal but both NULL.
- if (A_Base == B_Base && A_Base)
- report_fatal_error("unsupported relocation with identical base", false);
+ if (A_Base == B_Base && A_Base) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported relocation with identical base");
+ return;
+ }
// A subtraction expression where either symbol is undefined is a
// non-relocatable expression.
if (A->isUndefined() || B->isUndefined()) {
StringRef Name = A->isUndefined() ? A->getName() : B->getName();
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
"unsupported relocation with subtraction expression, symbol '" +
Name + "' can not be undefined in a subtraction expression");
+ return;
}
Value += Writer->getSymbolAddress(*A, Layout) -
@@ -244,12 +253,16 @@ void X86MachObjectWriter::RecordX86_64Relocation(
FixedValue = Res;
return;
} else {
- report_fatal_error("unsupported relocation of variable '" +
- Symbol->getName() + "'", false);
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation of variable '" +
+ Symbol->getName() + "'");
+ return;
}
} else {
- report_fatal_error("unsupported relocation of undefined symbol '" +
- Symbol->getName() + "'", false);
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported relocation of undefined symbol '" +
+ Symbol->getName() + "'");
+ return;
}
MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -266,8 +279,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
Type = MachO::X86_64_RELOC_TLV;
} else if (Modifier != MCSymbolRefExpr::VK_None) {
- report_fatal_error("unsupported symbol modifier in relocation",
- false);
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported symbol modifier in relocation");
+ return;
} else {
Type = MachO::X86_64_RELOC_SIGNED;
@@ -292,9 +306,12 @@ void X86MachObjectWriter::RecordX86_64Relocation(
}
}
} else {
- if (Modifier != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported symbol modifier in branch "
- "relocation", false);
+ if (Modifier != MCSymbolRefExpr::VK_None) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "unsupported symbol modifier in branch relocation");
+ return;
+ }
Type = MachO::X86_64_RELOC_BRANCH;
}
@@ -309,16 +326,22 @@ void X86MachObjectWriter::RecordX86_64Relocation(
Type = MachO::X86_64_RELOC_GOT;
IsPCRel = 1;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- report_fatal_error("TLVP symbol modifier should have been rip-rel",
- false);
- } else if (Modifier != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported symbol modifier in relocation", false);
- else {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "TLVP symbol modifier should have been rip-rel");
+ return;
+ } else if (Modifier != MCSymbolRefExpr::VK_None) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported symbol modifier in relocation");
+ return;
+ } else {
Type = MachO::X86_64_RELOC_UNSIGNED;
unsigned Kind = Fixup.getKind();
- if (Kind == X86::reloc_signed_4byte)
- report_fatal_error("32-bit absolute addressing is not supported in "
- "64-bit mode", false);
+ if (Kind == X86::reloc_signed_4byte) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "32-bit absolute addressing is not supported in 64-bit mode");
+ return;
+ }
}
}
}
@@ -350,10 +373,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- report_fatal_error("symbol '" + A->getName() +
- "' can not be undefined in a subtraction expression",
- false);
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+ return false;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -363,10 +389,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
if (const MCSymbolRefExpr *B = Target.getSymB()) {
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
- "' can not be undefined in a subtraction expression",
- false);
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+ return false;
+ }
// Select the appropriate difference relocation type.
//
@@ -387,12 +416,12 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
if (FixupOffset > 0xffffff) {
char Buffer[32];
format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
Twine("Section too large, can't encode "
"r_address (") + Buffer +
") into 24 bits of scattered "
"relocation entry.");
- llvm_unreachable("fatal error returned?!");
+ return false;
}
MachO::any_relocation_info MRE;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 92f42b6..d045118 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -50,9 +50,11 @@ void X86WinCOFFStreamer::FinishImpl() {
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS,
- MCCodeEmitter *CE, bool RelaxAll) {
+ MCCodeEmitter *CE, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
S->getAssembler().setRelaxAll(RelaxAll);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
return S;
}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index cae865a..4fdd527 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -140,13 +140,14 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm,
}
}
-/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
unsigned NewImm = Imm;
@@ -191,6 +192,16 @@ void DecodePSHUFLWMask(MVT VT, unsigned Imm,
}
}
+void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumHalfElts = NumElts / 2;
+
+ for (unsigned l = 0; l != NumHalfElts; ++l)
+ ShuffleMask.push_back(l + NumHalfElts);
+ for (unsigned h = 0; h != NumHalfElts; ++h)
+ ShuffleMask.push_back(h);
+}
+
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
@@ -222,7 +233,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
// independently on 128-bit lanes.
unsigned NumLanes = VT.getSizeInBits() / 128;
- if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ if (NumLanes == 0) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
@@ -253,6 +264,26 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
}
}
+/// \brief Decode a shuffle packed values at 128-bit granularity
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits();
+ unsigned ControlBitsMask = NumLanes - 1;
+ unsigned NumControlBits = NumLanes / 2;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+ // We actually need the other source.
+ if (l >= NumLanes / 2)
+ LaneMask += NumLanes;
+ for (unsigned i = 0; i != NumElementsInLane; ++i)
+ ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+ }
+}
+
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned HalfSize = VT.getVectorNumElements() / 2;
@@ -277,10 +308,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
+#ifndef NDEBUG
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-
- if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
- return;
+ assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
+#endif
// This is a straightforward byte vector.
if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
@@ -290,7 +321,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0; i < NumElements; ++i) {
// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
// lane of the vector we're inside.
- int Base = i < 16 ? 0 : 16;
+ int Base = i & ~0xf;
Constant *COp = C->getAggregateElement(i);
if (!COp) {
ShuffleMask.clear();
@@ -357,44 +388,66 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+ SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
- assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
- assert(MaskTy->getVectorElementType()->isIntegerTy() &&
- "Expected integer constant mask elements!");
- int ElementBits = MaskTy->getScalarSizeInBits();
- int NumElements = MaskTy->getVectorNumElements();
+ // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+ // constant pool uniques constants by their bit representation.
+ // e.g. the following take up the same space in the constant pool:
+ // i128 -170141183420855150465331762880109871104
+ //
+ // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+ //
+ // <4 x i32> <i32 -2147483648, i32 -2147483648,
+ // i32 -2147483648, i32 -2147483648>
+
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+
+ if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+ return;
+
+ // Only support vector types.
+ if (!MaskTy->isVectorTy())
+ return;
+
+ // Make sure its an integer type.
+ Type *VecEltTy = MaskTy->getVectorElementType();
+ if (!VecEltTy->isIntegerTy())
+ return;
+
+ // Support any element type from byte up to element size.
+ // This is necesary primarily because 64-bit elements get split to 32-bit
+ // in the constant pool on 32-bit target.
+ unsigned EltTySize = VecEltTy->getIntegerBitWidth();
+ if (EltTySize < 8 || EltTySize > ElSize)
+ return;
+
+ unsigned NumElements = MaskTySize / ElSize;
assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
"Unexpected number of vector elements.");
ShuffleMask.reserve(NumElements);
- if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
- assert((unsigned)NumElements == CDS->getNumElements() &&
- "Constant mask has a different number of elements!");
-
- for (int i = 0; i < NumElements; ++i) {
- int Base = (i * ElementBits / 128) * (128 / ElementBits);
- uint64_t Element = CDS->getElementAsInteger(i);
- // Only the least significant 2 bits of the integer are used.
- int Index = Base + (Element & 0x3);
- ShuffleMask.push_back(Index);
- }
- } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
- assert((unsigned)NumElements == C->getNumOperands() &&
- "Constant mask has a different number of elements!");
-
- for (int i = 0; i < NumElements; ++i) {
- int Base = (i * ElementBits / 128) * (128 / ElementBits);
- Constant *COp = CV->getOperand(i);
- if (isa<UndefValue>(COp)) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- // Only the least significant 2 bits of the integer are used.
- int Index = Base + (Element & 0x3);
- ShuffleMask.push_back(Index);
+ unsigned NumElementsPerLane = 128 / ElSize;
+ unsigned Factor = ElSize / EltTySize;
+
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i * Factor);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
}
+ int Index = i & ~(NumElementsPerLane - 1);
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ if (ElSize == 64)
+ Index += (Element >> 1) & 0x1;
+ else
+ Index += Element & 0x3;
+ ShuffleMask.push_back(Index);
}
+
+ // TODO: Handle funny-looking vectors too.
}
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
@@ -503,4 +556,74 @@ void DecodeINSERTQIMask(int Len, int Idx,
ShuffleMask.push_back(SM_SentinelUndef);
}
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ ShuffleMask.push_back((int)M);
+ }
+}
+
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ ShuffleMask.push_back((int)M);
+ }
+}
+
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ if (MaskTy->isVectorTy()) {
+ unsigned NumElements = MaskTy->getVectorNumElements();
+ if (NumElements == VT.getVectorNumElements()) {
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
+ ShuffleMask.clear();
+ return;
+ }
+ if (isa<UndefValue>(COp))
+ ShuffleMask.push_back(SM_SentinelUndef);
+ else {
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ Element &= (1 << NumElements) - 1;
+ ShuffleMask.push_back(Element);
+ }
+ }
+ }
+ return;
+ }
+ // Scalar value; just broadcast it
+ if (!isa<ConstantInt>(C))
+ return;
+ uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
+ int NumElements = VT.getVectorNumElements();
+ Element &= (1 << NumElements) - 1;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask.push_back(Element);
+}
+
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ unsigned NumElements = MaskTy->getVectorNumElements();
+ if (NumElements == VT.getVectorNumElements()) {
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ }
+ if (isa<UndefValue>(COp))
+ ShuffleMask.push_back(SM_SentinelUndef);
+ else {
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ Element &= (1 << NumElements*2) - 1;
+ ShuffleMask.push_back(Element);
+ }
+ }
+ }
+}
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 3d10d18..ab18e64 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -54,6 +54,9 @@ void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decodes a PSWAPD 3DNow! instruction.
+void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
@@ -83,12 +86,18 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a shuffle packed values at 128-bit granularity
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+ SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a zero extension instruction as a shuffle mask.
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
@@ -108,6 +117,22 @@ void DecodeEXTRQIMask(int Len, int Idx,
/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
void DecodeINSERTQIMask(int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 8403ae6..fbec662 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -23,56 +23,47 @@ class FunctionPass;
class ImmutablePass;
class X86TargetMachine;
-/// createX86ISelDag - This pass converts a legalized DAG into a
-/// X86-specific DAG, ready for instruction scheduling.
-///
+/// This pass converts a legalized DAG into a X86-specific DAG, ready for
+/// instruction scheduling.
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
-/// createX86GlobalBaseRegPass - This pass initializes a global base
-/// register for PIC on x86-32.
+/// This pass initializes a global base register for PIC on x86-32.
FunctionPass* createX86GlobalBaseRegPass();
-/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
-/// to local-dynamic TLS variables so that the TLS base address for the module
-/// is only fetched once per execution path through the function.
+/// This pass combines multiple accesses to local-dynamic TLS variables so that
+/// the TLS base address for the module is only fetched once per execution path
+/// through the function.
FunctionPass *createCleanupLocalDynamicTLSPass();
-/// createX86FloatingPointStackifierPass - This function returns a pass which
-/// converts floating point register references and pseudo instructions into
-/// floating point stack references and physical instructions.
-///
+/// This function returns a pass which converts floating-point register
+/// references and pseudo instructions into floating-point stack references and
+/// physical instructions.
FunctionPass *createX86FloatingPointStackifierPass();
-/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
-/// before each call to avoid transition penalty between functions encoded with
-/// AVX and SSE.
+/// This pass inserts AVX vzeroupper instructions before each call to avoid
+/// transition penalty between functions encoded with AVX and SSE.
FunctionPass *createX86IssueVZeroUpperPass();
-/// createX86EmitCodeToMemory - Returns a pass that converts a register
-/// allocated function into raw machine code in a dynamically
-/// allocated chunk of memory.
-///
-FunctionPass *createEmitX86CodeToMemory();
-
-/// createX86PadShortFunctions - Return a pass that pads short functions
-/// with NOOPs. This will prevent a stall when returning on the Atom.
+/// Return a pass that pads short functions with NOOPs.
+/// This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
-/// createX86FixupLEAs - Return a a pass that selectively replaces
-/// certain instructions (like add, sub, inc, dec, some shifts,
-/// and some multiplies) by equivalent LEA instructions, in order
-/// to eliminate execution delays in some Atom processors.
+
+/// Return a a pass that selectively replaces certain instructions (like add,
+/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
+/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
-/// createX86CallFrameOptimization - Return a pass that optimizes
-/// the code-size of x86 call sequences. This is done by replacing
-/// esp-relative movs with pushes.
+/// Return a pass that removes redundant address recalculations.
+FunctionPass *createX86OptimizeLEAs();
+
+/// Return a pass that optimizes the code-size of x86 call sequences. This is
+/// done by replacing esp-relative movs with pushes.
FunctionPass *createX86CallFrameOptimization();
-/// createX86WinEHStatePass - Return an IR pass that inserts EH registration
-/// stack objects and explicit EH state updates. This pass must run after EH
-/// preparation, which does Windows-specific but architecture-neutral
-/// preparation.
+/// Return an IR pass that inserts EH registration stack objects and explicit
+/// EH state updates. This pass must run after EH preparation, which does
+/// Windows-specific but architecture-neutral preparation.
FunctionPass *createX86WinEHStatePass();
/// Return a Machine IR pass that expands X86-specific pseudo
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 8522674..8902a85 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -37,14 +37,26 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
+def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
+ "Support fxsave/fxrestore instructions">;
+
+def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
+ "Support xsave instructions">;
+
+def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
+ "Support xsaveopt instructions">;
+
+def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
+ "Support xsavec instructions">;
+
+def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
+ "Support xsaves instructions">;
-def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
- "Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
// SSE1+ processors support them.
- [FeatureMMX, FeatureCMOV]>;
+ [FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -60,6 +72,11 @@ def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41]>;
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
+ "Enable MMX instructions">;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions",
[FeatureMMX]>;
@@ -79,16 +96,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
-// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that
-// explicit. Also, it seems this would be the default state for most chips
-// going forward, so it would probably be better to negate the logic and
-// match the 32-byte "slow mem" feature below.
-def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
- "IsUAMemFast", "true",
- "Fast unaligned memory access">;
+// FIXME: This should not apply to CPUs that do not have SSE.
+def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+ "IsUAMem16Slow", "true",
+ "Slow unaligned 16-byte memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
- "IsUAMem32Slow", "true",
- "Slow unaligned 32-byte memory access">;
+ "IsUAMem32Slow", "true",
+ "Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
@@ -120,6 +134,8 @@ def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
"Enable AVX-512 Vector Length eXtensions",
[FeatureAVX512]>;
+def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
+ "Enable protection keys">;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -168,9 +184,11 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
+ "Support LAHF and SAHF instructions">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
-def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
@@ -181,6 +199,11 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
+// TODO: This feature ought to be renamed.
+// What it really refers to are CPUs for which certain instructions
+// (which ones besides the example below?) are microcoded.
+// The best examples of this are the memory forms of CALL and PUSH
+// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;
@@ -208,278 +231,473 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-def : Proc<"generic", []>;
-def : Proc<"i386", []>;
-def : Proc<"i486", []>;
-def : Proc<"i586", []>;
-def : Proc<"pentium", []>;
-def : Proc<"pentium-mmx", [FeatureMMX]>;
-def : Proc<"i686", []>;
-def : Proc<"pentiumpro", [FeatureCMOV]>;
-def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
-def : Proc<"pentium3", [FeatureSSE1]>;
-def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
-def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"pentium4", [FeatureSSE2]>;
-def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"generic", [FeatureSlowUAMem16]>;
+def : Proc<"i386", [FeatureSlowUAMem16]>;
+def : Proc<"i486", [FeatureSlowUAMem16]>;
+def : Proc<"i586", [FeatureSlowUAMem16]>;
+def : Proc<"pentium", [FeatureSlowUAMem16]>;
+def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"i686", [FeatureSlowUAMem16]>;
+def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV,
+ FeatureFXSR]>;
+def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+ FeatureFXSR]>;
+def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+ FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+ FeatureFXSR]>;
+def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureSlowBTMem]>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureSSE3, FeatureSlowBTMem]>;
+ [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
+ FeatureSlowBTMem]>;
// NetBurst.
-def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : Proc<"prescott",
+ [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
+ FeatureSlowBTMem]>;
+def : Proc<"nocona", [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSE3,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem
+]>;
// Intel Core 2 Solo/Duo.
-def : ProcessorModel<"core2", SandyBridgeModel,
- [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
-def : ProcessorModel<"penryn", SandyBridgeModel,
- [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"core2", SandyBridgeModel, [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
+]>;
+def : ProcessorModel<"penryn", SandyBridgeModel, [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSE41,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
+]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
- ProcIntelAtom,
- FeatureSSSE3,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeatureSlowBTMem,
- FeatureLeaForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureCallRegIndirect,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions
- ]>;
+ ProcIntelAtom,
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeatureSlowBTMem,
+ FeatureLEAForSP,
+ FeatureSlowDivide32,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
+ FeaturePadShortFunctions,
+ FeatureLAHFSAHF
+]>;
def : BonnellProc<"bonnell">;
def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
- ProcIntelSLM,
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeaturePOPCNT,
- FeaturePCLMUL,
- FeatureAES,
- FeatureSlowDivide64,
- FeatureCallRegIndirect,
- FeaturePRFCHW,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowBTMem,
- FeatureFastUAMem
- ]>;
+ ProcIntelSLM,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeaturePRFCHW,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
+]>;
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureFastUAMem,
- FeaturePOPCNT
- ]>;
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureLAHFSAHF
+]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureFastUAMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL
- ]>;
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureLAHFSAHF
+]>;
def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureAVX,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL
- ]>;
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureSlowUAMem32,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureLAHFSAHF
+]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureAVX,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase
- ]>;
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureSlowUAMem32,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
+]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureAVX2,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureSlowIncDec
- ]>;
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF
+]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureAVX2,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureADX,
- FeatureRDSEED,
- FeatureSlowIncDec
- ]>;
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF
+]>;
def : BroadwellProc<"broadwell">;
// FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
- [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
- FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
- FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureMPX]>;
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureMMX,
+ FeatureAVX512,
+ FeatureFXSR,
+ FeatureERI,
+ FeatureCDI,
+ FeaturePFI,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec,
+ FeatureMPX,
+ FeatureLAHFSAHF
+]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
- [FeatureAVX512, FeatureCDI,
- FeatureDQI, FeatureBWI, FeatureVLX,
- FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
- FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureMPX]>;
+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureMMX,
+ FeatureAVX512,
+ FeatureFXSR,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeaturePKU,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSlowIncDec,
+ FeatureMPX,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureLAHFSAHF
+]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
// AMD CPUs.
-def : Proc<"k6", [FeatureMMX]>;
-def : Proc<"k6-2", [Feature3DNow]>;
-def : Proc<"k6-3", [Feature3DNow]>;
-def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA,
+ FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA,
+ FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+ FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+ FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+ FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"amdfam10", [FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem,
+def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"barcelona", [FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem,
+def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
+def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+ FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+ FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+ FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+ FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+
// Bobcat
-def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD]>;
+def : Proc<"btver1", [
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureSSE4A,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Jaguar
-def : ProcessorModel<"btver2", BtVer2Model,
- [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
- FeatureBMI, FeatureF16C, FeatureMOVBE,
- FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
- FeatureSlowSHLD]>;
-
-// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
+def : ProcessorModel<"btver2", BtVer2Model, [
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureBMI,
+ FeatureF16C,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Bulldozer
-def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowSHLD]>;
+def : Proc<"bdver1", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Piledriver
-def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureF16C,
- FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
+def : Proc<"bdver2", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Steamroller
-def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureF16C,
- FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureFSGSBase]>;
+def : Proc<"bdver3", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureXSAVEOPT,
+ FeatureSlowSHLD,
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
+]>;
// Excavator
-def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
- FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
- FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureBMI2,
- FeatureTBM, FeatureFMA, FeatureSSE4A,
- FeatureFSGSBase]>;
-
-def : Proc<"geode", [Feature3DNowA]>;
-
-def : Proc<"winchip-c6", [FeatureMMX]>;
-def : Proc<"winchip2", [Feature3DNow]>;
-def : Proc<"c3", [Feature3DNow]>;
-def : Proc<"c3-2", [FeatureSSE1]>;
+def : Proc<"bdver4", [
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureFXSR,
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureXSAVEOPT,
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
+]>;
+
+def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
+
+def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -492,8 +710,8 @@ def : Proc<"c3-2", [FeatureSSE1]>;
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel,
- [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem]>;
+ [FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit,
+ FeatureSlowBTMem ]>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -520,10 +738,6 @@ include "X86CallingConv.td"
// Assembly Parser
//===----------------------------------------------------------------------===//
-def ATTAsmParser : AsmParser {
- string AsmParserClassName = "AsmParser";
-}
-
def ATTAsmParserVariant : AsmParserVariant {
int Variant = 0;
@@ -568,7 +782,6 @@ def IntelAsmWriter : AsmWriter {
def X86 : Target {
// Information about the instructions...
let InstructionSet = X86InstrInfo;
- let AssemblyParsers = [ATTAsmParser];
let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index ba33248..2170e62 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -217,10 +217,10 @@ static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
if (AsmVariant == 0) O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
- MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
- ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
- Reg = getX86SubSuperRegister(Reg, VT);
+ unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
+ (strcmp(Modifier+6,"32") == 0) ? 32 :
+ (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
+ Reg = getX86SubSuperRegister(Reg, Size);
}
O << X86ATTInstPrinter::getRegisterName(Reg);
return;
@@ -361,22 +361,21 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ Reg = getX86SubSuperRegister(Reg, 8);
break;
case 'h': // Print QImode high register
- Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ Reg = getX86SubSuperRegister(Reg, 8, true);
break;
case 'w': // Print HImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ Reg = getX86SubSuperRegister(Reg, 16);
break;
case 'k': // Print SImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ Reg = getX86SubSuperRegister(Reg, 32);
break;
case 'q':
// Print 64-bit register names if 64-bit integer registers are available.
// Otherwise, print 32-bit register names.
- MVT::SimpleValueType Ty = P.getSubtarget().is64Bit() ? MVT::i64 : MVT::i32;
- Reg = getX86SubSuperRegister(Reg, Ty);
+ Reg = getX86SubSuperRegister(Reg, P.getSubtarget().is64Bit() ? 64 : 32);
break;
}
@@ -535,6 +534,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
}
}
+ OutStreamer->EmitSyntaxDirective();
}
static void
@@ -565,10 +565,11 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
const MachineConstantPoolEntry &CPE =
MF->getConstantPool()->getConstants()[CPID];
if (!CPE.isMachineConstantPoolEntry()) {
- SectionKind Kind = CPE.getSectionKind(TM.getDataLayout());
+ const DataLayout &DL = MF->getDataLayout();
+ SectionKind Kind = CPE.getSectionKind(&DL);
const Constant *C = CPE.Val.ConstVal;
if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
- getObjFileLowering().getSectionForConstant(Kind, C))) {
+ getObjFileLowering().getSectionForConstant(DL, Kind, C))) {
if (MCSymbol *Sym = S->getCOMDATSymbol()) {
if (Sym->isUndefined())
OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
index 7f5d127..9c8bd98 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -78,8 +78,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// outputting it to the OutStream. This allows the shadow tracker to minimise
// the number of NOPs used for stackmap padding.
void EmitAndCountInstruction(MCInst &Inst);
-
- void InsertStackMapShadows(MachineFunction &MF);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index 031ba4b..fc6ee17 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
@@ -53,10 +54,13 @@ private:
// Information we know about a particular call site
struct CallContext {
CallContext()
- : Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
- MovVector(4, nullptr), NoStackParams(false), UsePush(false){};
+ : FrameSetup(nullptr), Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
+ MovVector(4, nullptr), NoStackParams(false), UsePush(false){}
- // Actuall call instruction
+ // Iterator referring to the frame setup instruction
+ MachineBasicBlock::iterator FrameSetup;
+
+ // Actual call instruction
MachineInstr *Call;
// A copy of the stack pointer
@@ -75,17 +79,16 @@ private:
bool UsePush;
};
- typedef DenseMap<MachineInstr *, CallContext> ContextMap;
+ typedef SmallVector<CallContext, 8> ContextVector;
bool isLegal(MachineFunction &MF);
- bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
+ bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, CallContext &Context);
- bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock::iterator I,
- const CallContext &Context);
+ bool adjustCallSequence(MachineFunction &MF, const CallContext &Context);
MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
unsigned Reg);
@@ -100,7 +103,8 @@ private:
const char *getPassName() const override { return "X86 Optimize Call Frame"; }
const TargetInstrInfo *TII;
- const TargetFrameLowering *TFL;
+ const X86FrameLowering *TFL;
+ const X86Subtarget *STI;
const MachineRegisterInfo *MRI;
static char ID;
};
@@ -124,8 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// No point in running this in 64-bit mode, since some arguments are
// passed in-register in all common calling conventions, so the pattern
// we're looking for will never match.
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- if (STI.is64Bit())
+ if (STI->is64Bit())
+ return false;
+
+ // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
+ // in the compact unwind encoding that Darwin uses. So, bail if there
+ // is a danger of that being generated.
+ if (STI->isTargetDarwin() &&
+ (!MF.getMMI().getLandingPads().empty() ||
+ (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
return false;
// You would expect straight-line code between call-frame setup and
@@ -161,7 +172,7 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// Check whether this trasnformation is profitable for a particular
// function - in terms of code size.
bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
- ContextMap &CallSeqMap) {
+ ContextVector &CallSeqVector) {
// This transformation is always a win when we do not expect to have
// a reserved call frame. Under other circumstances, it may be either
// a win or a loss, and requires a heuristic.
@@ -170,24 +181,20 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
return true;
// Don't do this when not optimizing for size.
- bool OptForSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize);
-
- if (!OptForSize)
+ if (!MF.getFunction()->optForSize())
return false;
unsigned StackAlign = TFL->getStackAlignment();
int64_t Advantage = 0;
- for (auto CC : CallSeqMap) {
+ for (auto CC : CallSeqVector) {
// Call sites where no parameters are passed on the stack
// do not affect the cost, since there needs to be no
// stack adjustment.
- if (CC.second.NoStackParams)
+ if (CC.NoStackParams)
continue;
- if (!CC.second.UsePush) {
+ if (!CC.UsePush) {
// If we don't use pushes for a particular call site,
// we pay for not having a reserved call frame with an
// additional sub/add esp pair. The cost is ~3 bytes per instruction,
@@ -200,11 +207,11 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
// We'll need a add after the call.
Advantage -= 3;
// If we have to realign the stack, we'll also need and sub before
- if (CC.second.ExpectedDist % StackAlign)
+ if (CC.ExpectedDist % StackAlign)
Advantage -= 3;
// Now, for each push, we save ~3 bytes. For small constants, we actually,
// save more (up to 5 bytes), but 3 should be a good approximation.
- Advantage += (CC.second.ExpectedDist / 4) * 3;
+ Advantage += (CC.ExpectedDist / 4) * 3;
}
}
@@ -212,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
}
bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getSubtarget().getInstrInfo();
- TFL = MF.getSubtarget().getFrameLowering();
+ STI = &MF.getSubtarget<X86Subtarget>();
+ TII = STI->getInstrInfo();
+ TFL = STI->getFrameLowering();
MRI = &MF.getRegInfo();
if (!isLegal(MF))
@@ -223,21 +231,22 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- ContextMap CallSeqMap;
+ ContextVector CallSeqVector;
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
if (I->getOpcode() == FrameSetupOpcode) {
- CallContext &Context = CallSeqMap[I];
+ CallContext Context;
collectCallInfo(MF, *BB, I, Context);
+ CallSeqVector.push_back(Context);
}
- if (!isProfitable(MF, CallSeqMap))
+ if (!isProfitable(MF, CallSeqVector))
return false;
- for (auto CC : CallSeqMap)
- if (CC.second.UsePush)
- Changed |= adjustCallSequence(MF, CC.first, CC.second);
+ for (auto CC : CallSeqVector)
+ if (CC.UsePush)
+ Changed |= adjustCallSequence(MF, CC);
return Changed;
}
@@ -307,13 +316,13 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// Check that this particular call sequence is amenable to the
// transformation.
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- unsigned StackPtr = RegInfo.getStackRegister();
+ STI->getRegisterInfo());
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
// We expect to enter this at the beginning of a call sequence
assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
MachineBasicBlock::iterator FrameSetup = I++;
+ Context.FrameSetup = FrameSetup;
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
@@ -338,7 +347,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
if (!I->isCopy() || !I->getOperand(0).isReg())
return;
Context.SPCopy = I++;
- StackPtr = Context.SPCopy->getOperand(0).getReg();
+
+ unsigned StackPtr = Context.SPCopy->getOperand(0).getReg();
// Scan the call setup sequence for the pattern we're looking for.
// We only handle a simple case - a sequence of MOV32mi or MOV32mr
@@ -434,22 +444,22 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
}
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
- MachineBasicBlock::iterator I,
const CallContext &Context) {
// Ok, we can in fact do the transformation for this call.
// Do not remove the FrameSetup instruction, but adjust the parameters.
// PEI will end up finalizing the handling of this.
- MachineBasicBlock::iterator FrameSetup = I;
- MachineBasicBlock &MBB = *(I->getParent());
+ MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
+ MachineBasicBlock &MBB = *(FrameSetup->getParent());
FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
- DebugLoc DL = I->getDebugLoc();
+ DebugLoc DL = FrameSetup->getDebugLoc();
// Now, iterate through the vector in reverse order, and replace the movs
// with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
// replace uses.
for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+ MachineBasicBlock::iterator Push = nullptr;
if (MOV->getOpcode() == X86::MOV32mi) {
unsigned PushOpcode = X86::PUSHi32;
// If the operand is a small (8-bit) immediate, we can use a
@@ -461,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
if (isInt<8>(Val))
PushOpcode = X86::PUSH32i8;
}
- BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
+ .addOperand(PushOp);
} else {
unsigned int Reg = PushOp.getReg();
// If PUSHrmm is not slow on this target, try to fold the source of the
// push into the instruction.
- const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
+ bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
// Check that this is legal to fold. Right now, we're extremely
// conservative about that.
MachineInstr *DefMov = nullptr;
if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
- MachineInstr *Push =
- BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
unsigned NumOps = DefMov->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -483,12 +492,19 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
DefMov->eraseFromParent();
} else {
- BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
.addReg(Reg)
.getInstr();
}
}
+ // For debugging, when using SP-based CFA, we need to adjust the CFA
+ // offset after each push.
+ // TODO: This is needed only if we require precise CFA.
+ if (!TFL->hasFP(MF))
+ TFL->BuildCFI(MBB, std::next(Push), DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+
MBB.erase(MOV);
}
@@ -532,13 +548,10 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
DefMI->getParent() != FrameSetup->getParent())
return nullptr;
- // Now, make sure everything else up until the ADJCALLSTACK is a sequence
- // of MOVs. To be less conservative would require duplicating a lot of the
- // logic from PeepholeOptimizer.
- // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
- // to be smarter about folding into pushes.
+ // Make sure we don't have any instructions between DefMI and the
+ // push that make folding the load illegal.
for (auto I = DefMI; I != FrameSetup; ++I)
- if (I->getOpcode() != X86::MOV32rm)
+ if (I->isLoadFoldBarrier())
return nullptr;
return DefMI;
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.h b/contrib/llvm/lib/Target/X86/X86CallingConv.h
index 0eb2494..a08160f 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.h
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
#define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"
@@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
+inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+ // not to split i64 and double between a register and stack
+ static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+ static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // If this is the first part of an double/i64/i128, or if we're already
+ // in the middle of a split, add to the pending list. If this is not
+ // the end of the split, return, otherwise go on to process the pending
+ // list
+ if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+ if (!ArgFlags.isSplitEnd())
+ return true;
+ }
+
+ // If there are no pending members, we are not in the middle of a split,
+ // so do the usual inreg stuff.
+ if (PendingMembers.empty()) {
+ if (unsigned Reg = State.AllocateReg(RegList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+ return false;
+ }
+
+ assert(ArgFlags.isSplitEnd());
+
+ // We now have the entire original argument in PendingMembers, so decide
+ // whether to use registers or the stack.
+ // Per the MCU ABI:
+ // a) To use registers, we need to have enough of them free to contain
+ // the entire argument.
+ // b) We never want to use more than 2 registers for a single argument.
+
+ unsigned FirstFree = State.getFirstUnallocated(RegList);
+ bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+ for (auto &It : PendingMembers) {
+ if (UseRegs)
+ It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+ else
+ It.convertToMem(State.AllocateStack(4, 4));
+ State.addLoc(It);
+ }
+
+ PendingMembers.clear();
+
+ return true;
+}
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index 8f88888..54d88cb 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -158,6 +158,7 @@ def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
// MMX vector types are always returned in XMM0.
CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
@@ -202,6 +203,16 @@ def RetCC_X86_64_AnyReg : CallingConv<[
CCCustom<"CC_X86_AnyReg_Error">
]>;
+// X86-64 HHVM return-value convention.
+def RetCC_X86_64_HHVM: CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: could return in any GP register save RSP and R12.
+ CCIfType<[i64], CCAssignToReg<[RBX, RBP, RDI, RSI, RDX, RCX, R8, R9,
+ RAX, R10, R11, R13, R14, R15]>>
+]>;
+
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
@@ -227,6 +238,9 @@ def RetCC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
+ // Handle HHVM calls.
+ CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
+
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@@ -280,7 +294,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
@@ -305,7 +319,7 @@ def CC_X86_64_C : CallingConv<[
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
- CCIfType<[f80], CCAssignToStack<0, 0>>,
+ CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
@@ -319,6 +333,23 @@ def CC_X86_64_C : CallingConv<[
CCAssignToStack<64, 64>>
]>;
+// Calling convention for X86-64 HHVM.
+def CC_X86_64_HHVM : CallingConv<[
+ // Use all/any GP registers for args, except RSP.
+ CCIfType<[i64], CCAssignToReg<[RBX, R12, RBP, R15,
+ RDI, RSI, RDX, RCX, R8, R9,
+ RAX, R10, R11, R13, R14]>>
+]>;
+
+// Calling convention for helper functions in HHVM.
+def CC_X86_64_HHVM_C : CallingConv<[
+ // Pass the first argument in RBP.
+ CCIfType<[i64], CCAssignToReg<[RBP]>>,
+
+ // Otherwise it's the same as the regular C calling convention.
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
// Calling convention used on Win64
def CC_X86_Win64_C : CallingConv<[
// FIXME: Handle byval stuff.
@@ -561,6 +592,23 @@ def CC_X86_32_C : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_MCU : CallingConv<[
+ // Handles byval parameters. Note that, like FastCC, we can't rely on
+ // the delegation to CC_X86_32_Common because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i1/i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // If the call is not a vararg call, some arguments may be passed
+ // in integer registers.
+ CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
def CC_X86_32_FastCall : CallingConv<[
// Promote i1/i8/i16 arguments to i32.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -708,18 +756,28 @@ def CC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<CC_X86_32_C>
]>;
+def CC_X86_32_Intr : CallingConv<[
+ CCAssignToStack<4, 4>
+]>;
+
+def CC_X86_64_Intr : CallingConv<[
+ CCAssignToStack<8, 8>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
+ CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
@@ -734,6 +792,9 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
+ CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
+ CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
+ CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -764,6 +825,12 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
+// The function used by Darwin to obtain the address of a thread-local variable
+// uses rdi to pass a single parameter and rax for the return value. All other
+// GPRs are preserved.
+def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
+ R8, R9, R10, R11)>;
+
// All GPRs - except r11
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
R8, R9, R10, RSP)>;
@@ -778,6 +845,11 @@ def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
R11, R12, R13, R14, R15, RBP,
(sequence "XMM%u", 0, 15))>;
+def CSR_32_AllRegs : CalleeSavedRegs<(add EAX, EBX, ECX, EDX, EBP, ESI,
+ EDI, ESP)>;
+def CSR_32_AllRegs_SSE : CalleeSavedRegs<(add CSR_32_AllRegs,
+ (sequence "XMM%u", 0, 7))>;
+
def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP,
(sequence "XMM%u", 16, 31))>;
def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP,
@@ -804,3 +876,6 @@ def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
(sequence "ZMM%u", 16, 31),
K4, K5, K6, K7)>;
+
+// Only R12 is preserved for PHP calls in HHVM.
+def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
diff --git a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 6a5a28e..a09d065 100644
--- a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -19,9 +19,10 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
-#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
#include "llvm/IR/GlobalValue.h"
using namespace llvm;
@@ -141,6 +142,24 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// The EH_RETURN pseudo is really removed during the MC Lowering.
return true;
}
+ case X86::IRET: {
+ // Adjust stack to erase error code
+ int64_t StackAdj = MBBI->getOperand(0).getImm();
+ X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true);
+ // Replace pseudo with machine iret
+ BuildMI(MBB, MBBI, DL,
+ TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32));
+ MBB.erase(MBBI);
+ return true;
+ }
+ case X86::EH_RESTORE: {
+ // Restore ESP and EBP, and optionally ESI if required.
+ bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(
+ MBB.getParent()->getFunction()->getPersonalityFn()));
+ X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH);
+ MBBI->eraseFromParent();
+ return true;
+ }
}
llvm_unreachable("Previous switch has a fallthrough?");
}
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index b4319c8..de94a13 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -298,8 +298,8 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
return false;
// Make sure nothing is in the way
- BasicBlock::const_iterator Start = I;
- BasicBlock::const_iterator End = II;
+ BasicBlock::const_iterator Start(I);
+ BasicBlock::const_iterator End(II);
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
@@ -433,6 +433,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasSSE4A = Subtarget->hasSSE4A();
+ bool HasAVX = Subtarget->hasAVX();
+ bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -449,35 +454,59 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
- case MVT::i32: Opc = X86::MOV32mr; break;
- case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::i32:
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
+ break;
case MVT::f32:
- Opc = X86ScalarSSEf32 ?
- (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSS;
+ else
+ Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+ } else
+ Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = X86ScalarSSEf64 ?
- (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSD;
+ else
+ Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+ } else
+ Opc = X86::ST_Fp64m;
break;
case MVT::v4f32:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
- else
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+ else
+ Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ } else
Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
@@ -1069,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax. We also do the same with %eax for Win32.
- if (F.hasStructRetAttr() &&
- (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+ // All x86 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
+ if (F.hasStructRetAttr()) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -1431,17 +1459,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addMBB(TrueMBB);
}
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
-
- // Emits an unconditional branch to the FalseBB, obtains the branch
- // weight, and adds it to the successor list.
- fastEmitBranch(FalseMBB, DbgLoc);
-
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1472,12 +1490,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
}
@@ -1492,12 +1506,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1511,12 +1520,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1945,6 +1949,9 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned ResultReg;
if (Subtarget->hasAVX()) {
+ const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
+
// If we have AVX, create 1 blendv instead of 3 logic instructions.
// Blendv was introduced with SSE 4.1, but the 2 register form implicitly
// uses XMM0 as the selection register. That may need just as many
@@ -1955,10 +1962,13 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned BlendOpcode =
(RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
- LHSReg, LHSIsKill, CmpReg, true);
+ unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
@@ -2806,10 +2816,12 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE)
return 0;
- if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
- return 0;
- if (CS && CS->paramHasAttr(1, Attribute::InReg))
- return 0;
+
+ if (CS)
+ if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
+ CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+ return 0;
+
return 4;
}
@@ -2924,7 +2936,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -3020,8 +3032,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
- ArgVT.getStoreSize(), Alignment);
+ MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (Flags.isByVal()) {
X86AddressMode SrcAM;
SrcAM.Base.Reg = ArgReg;
@@ -3252,6 +3264,30 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
updateValueMap(I, Reg);
return true;
}
+ case Instruction::BitCast: {
+ // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+ if (!Subtarget->hasSSE2())
+ return false;
+
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (!SrcVT.isSimple() || !DstVT.isSimple())
+ return false;
+
+ if (!SrcVT.is128BitVector() &&
+ !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+ return false;
+
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+
+ // No instruction is needed for conversion. Reuse the register used by
+ // the fist operand.
+ updateValueMap(I, Reg);
+ return true;
+ }
}
return false;
@@ -3384,8 +3420,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
TII.get(Opc), ResultReg);
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- DL.getPointerSize(), Align);
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 5eb4fae..1dd69e8 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -9,6 +9,7 @@
//
// This file defines the pass that finds instructions that can be
// re-written as LEA instructions in order to reduce pipeline delays.
+// When optimizing for size it replaces suitable LEAs with INC or DEC.
//
//===----------------------------------------------------------------------===//
@@ -61,6 +62,11 @@ class FixupLEAPass : public MachineFunctionPass {
void processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI);
+ /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg
+ /// and convert them to INC or DEC respectively.
+ bool fixupIncDec(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) const;
+
/// \brief Determine if an instruction references a machine register
/// and, if so, whether it reads or writes the register.
RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
@@ -89,6 +95,8 @@ public:
private:
MachineFunction *MF;
const X86InstrInfo *TII; // Machine instruction info.
+ bool OptIncDec;
+ bool OptLEA;
};
char FixupLEAPass::ID = 0;
}
@@ -150,7 +158,10 @@ FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
- if (!ST.LEAusesAG() && !ST.slowLEA())
+ OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize();
+ OptLEA = ST.LEAusesAG() || ST.slowLEA();
+
+ if (!OptLEA && !OptIncDec)
return false;
TII = ST.getInstrInfo();
@@ -187,7 +198,7 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
if (I == MFI->begin()) {
- if (MFI->isPredecessor(MFI)) {
+ if (MFI->isPredecessor(&*MFI)) {
I = --MFI->end();
return true;
} else
@@ -222,6 +233,60 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
return nullptr;
}
+static inline bool isLEA(const int opcode) {
+ return opcode == X86::LEA16r || opcode == X86::LEA32r ||
+ opcode == X86::LEA64r || opcode == X86::LEA64_32r;
+}
+
+/// isLEASimpleIncOrDec - Does this LEA have one these forms:
+/// lea %reg, 1(%reg)
+/// lea %reg, -1(%reg)
+static inline bool isLEASimpleIncOrDec(MachineInstr *LEA) {
+ unsigned SrcReg = LEA->getOperand(1 + X86::AddrBaseReg).getReg();
+ unsigned DstReg = LEA->getOperand(0).getReg();
+ unsigned AddrDispOp = 1 + X86::AddrDisp;
+ return SrcReg == DstReg &&
+ LEA->getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
+ LEA->getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
+ LEA->getOperand(AddrDispOp).isImm() &&
+ (LEA->getOperand(AddrDispOp).getImm() == 1 ||
+ LEA->getOperand(AddrDispOp).getImm() == -1);
+}
+
+bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) const {
+ MachineInstr *MI = I;
+ int Opcode = MI->getOpcode();
+ if (!isLEA(Opcode))
+ return false;
+
+ if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
+ int NewOpcode;
+ bool isINC = MI->getOperand(4).getImm() == 1;
+ switch (Opcode) {
+ case X86::LEA16r:
+ NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
+ break;
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
+ break;
+ case X86::LEA64r:
+ NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
+ break;
+ }
+
+ MachineInstr *NewMI =
+ BuildMI(*MFI, I, MI->getDebugLoc(), TII->get(NewOpcode))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MFI->erase(I);
+ I = static_cast<MachineBasicBlock::iterator>(NewMI);
+ return true;
+ }
+ return false;
+}
+
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
// Process a load, store, or LEA instruction.
@@ -265,8 +330,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
MachineInstr *MI = I;
const int opcode = MI->getOpcode();
- if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r &&
- opcode != X86::LEA64_32r)
+ if (!isLEA(opcode))
return;
if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() ||
!TII->isSafeToClobberEFLAGS(*MFI, I))
@@ -280,7 +344,8 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
return;
int addrr_opcode, addri_opcode;
switch (opcode) {
- default: llvm_unreachable("Unexpected LEA instruction");
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
case X86::LEA16r:
addrr_opcode = X86::ADD16rr;
addri_opcode = X86::ADD16ri;
@@ -330,10 +395,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
MachineFunction::iterator MFI) {
for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
- if (MF.getSubtarget<X86Subtarget>().isSLM())
- processInstructionForSLM(I, MFI);
- else
- processInstruction(I, MFI);
+ if (OptIncDec)
+ if (fixupIncDec(I, MFI))
+ continue;
+
+ if (OptLEA) {
+ if (MF.getSubtarget<X86Subtarget>().isSLM())
+ processInstructionForSLM(I, MFI);
+ else
+ processInstruction(I, MFI);
+ }
}
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 40b9c8a..97bb8ab 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -120,12 +120,10 @@ namespace {
// Return a bitmask of FP registers in block's live-in list.
static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
unsigned Mask = 0;
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- if (Reg < X86::FP0 || Reg > X86::FP6)
+ for (const auto &LI : MBB->liveins()) {
+ if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6)
continue;
- Mask |= 1 << (Reg - X86::FP0);
+ Mask |= 1 << (LI.PhysReg - X86::FP0);
}
return Mask;
}
@@ -301,8 +299,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
bool FPIsUsed = false;
static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0; i <= 6; ++i)
- if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
FPIsUsed = true;
break;
}
@@ -321,7 +320,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Process the function in depth first order so that we process at least one
// of the predecessors for every reachable block in the function.
SmallPtrSet<MachineBasicBlock*, 8> Processed;
- MachineBasicBlock *Entry = MF.begin();
+ MachineBasicBlock *Entry = &MF.front();
bool Changed = false;
for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
@@ -329,9 +328,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Process any unreachable blocks in arbitrary order now.
if (MF.size() != Processed.size())
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB).second)
- Changed |= processBasicBlock(MF, *BB);
+ for (MachineBasicBlock &BB : MF)
+ if (Processed.insert(&BB).second)
+ Changed |= processBasicBlock(MF, BB);
LiveBundles.clear();
@@ -348,13 +347,12 @@ void FPS::bundleCFG(MachineFunction &MF) {
LiveBundles.resize(Bundles->getNumBundles());
// Gather the actual live-in masks for all MBBs.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I;
- const unsigned Mask = calcLiveInMask(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ const unsigned Mask = calcLiveInMask(&MBB);
if (!Mask)
continue;
// Update MBB ingoing bundle mask.
- LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+ LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
}
}
@@ -546,17 +544,9 @@ namespace {
};
}
-#ifndef NDEBUG
-static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
- for (unsigned i = 0; i != NumEntries-1; ++i)
- if (!(Table[i] < Table[i+1])) return false;
- return true;
-}
-#endif
-
-static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
- const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
- if (I != Table+N && I->from == Opcode)
+static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode);
+ if (I != Table.end() && I->from == Opcode)
return I->to;
return -1;
}
@@ -567,7 +557,7 @@ static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
#define ASSERT_SORTED(TABLE) \
{ static bool TABLE##Checked = false; \
if (!TABLE##Checked) { \
- assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) && \
"All lookup tables must be sorted for efficient access!"); \
TABLE##Checked = true; \
} \
@@ -746,7 +736,7 @@ static const TableEntry OpcodeTable[] = {
static unsigned getConcreteOpcode(unsigned Opcode) {
ASSERT_SORTED(OpcodeTable);
- int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ int Opc = Lookup(OpcodeTable, Opcode);
assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
return Opc;
}
@@ -797,7 +787,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
RegMap[Stack[--StackTop]] = ~0; // Update state
// Check to see if there is a popping version of this instruction...
- int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ int Opcode = Lookup(PopTable, I->getOpcode());
if (Opcode != -1) {
I->setDesc(TII->get(Opcode));
if (Opcode == X86::UCOM_FPPr)
@@ -1193,7 +1183,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
// We decide which form to use based on what is on the top of the stack, and
// which operand is killed by this instruction.
- const TableEntry *InstTable;
+ ArrayRef<TableEntry> InstTable;
bool isForward = TOS == Op0;
bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
if (updateST0) {
@@ -1208,8 +1198,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
InstTable = ReverseSTiTable;
}
- int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
- MI->getOpcode());
+ int Opcode = Lookup(InstTable, MI->getOpcode());
assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
// NotTOS - The register which is not on the top of stack...
@@ -1520,31 +1509,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
return;
}
- case X86::WIN_FTOL_32:
- case X86::WIN_FTOL_64: {
- // Push the operand into ST0.
- MachineOperand &Op = MI->getOperand(0);
- assert(Op.isUse() && Op.isReg() &&
- Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
- unsigned FPReg = getFPReg(Op);
- if (Op.isKill())
- moveToTop(FPReg, Inst);
- else
- duplicateToTop(FPReg, ScratchFPReg, Inst);
-
- // Emit the call. This will pop the operand.
- BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
- .addExternalSymbol("_ftol2")
- .addReg(X86::ST0, RegState::ImplicitKill)
- .addReg(X86::ECX, RegState::ImplicitDefine)
- .addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::EDX, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- --StackTop;
-
- break;
- }
-
case X86::RETQ:
case X86::RETL:
case X86::RETIL:
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 3a21b57..242d0333 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -18,25 +18,23 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
#include <cstdlib>
using namespace llvm;
-// FIXME: completely move here.
-extern cl::opt<bool> ForceStackAlign;
-
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
unsigned StackAlignOverride)
: TargetFrameLowering(StackGrowsDown, StackAlignOverride,
@@ -80,6 +78,27 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
}
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Conservativley assume that inline assembly might use the stack.
+ if (MF.hasInlineAsm())
+ return true;
+
+ return any_of(MRI.reg_instructions(X86::EFLAGS),
+ [](const MachineInstr &RI) { return RI.isCopy(); });
+}
+
+static bool doesStackUseImplyFP(const MachineFunction &MF) {
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ return IsWin64Prologue && usesTheStack(MF);
+}
+
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -92,8 +111,9 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit() || MMI.callsEHReturn() ||
- MFI->hasStackMap() || MFI->hasPatchPoint());
+ MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
+ doesStackUseImplyFP(MF));
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -148,21 +168,14 @@ static unsigned getLEArOpcode(unsigned IsLP64) {
/// to this register without worry about clobbering it.
static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const TargetRegisterInfo *TRI,
+ const X86RegisterInfo *TRI,
bool Is64Bit) {
const MachineFunction *MF = MBB.getParent();
const Function *F = MF->getFunction();
if (!F || MF->getMMI().callsEHReturn())
return 0;
- static const uint16_t CallerSavedRegs32Bit[] = {
- X86::EAX, X86::EDX, X86::ECX, 0
- };
-
- static const uint16_t CallerSavedRegs64Bit[] = {
- X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
- X86::R8, X86::R9, X86::R10, X86::R11, 0
- };
+ const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
unsigned Opc = MBBI->getOpcode();
switch (Opc) {
@@ -191,10 +204,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
Uses.insert(*AI);
}
- const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
- for (; *CS; ++CS)
- if (!Uses.count(*CS))
- return *CS;
+ for (auto CS : AvailableRegs)
+ if (!Uses.count(CS) && CS != X86::RIP)
+ return CS;
}
}
@@ -214,8 +226,12 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
-/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
-static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
+/// Check if the flags need to be preserved before the terminators.
+/// This would be the case, if the eflags is live-in of the region
+/// composed by the terminators or live-out of that region, without
+/// being defined by a terminator.
+static bool
+flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
for (const MachineInstr &MI : MBB.terminators()) {
bool BreakNext = false;
for (const MachineOperand &MO : MI.operands()) {
@@ -225,15 +241,27 @@ static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
if (Reg != X86::EFLAGS)
continue;
- // This terminator needs an eflag that is not defined
- // by a previous terminator.
+ // This terminator needs an eflags that is not defined
+ // by a previous another terminator:
+ // EFLAGS is live-in of the region composed by the terminators.
if (!MO.isDef())
return true;
+ // This terminator defines the eflags, i.e., we don't need to preserve it.
+ // However, we still need to check this specific terminator does not
+ // read a live-in value.
BreakNext = true;
}
+ // We found a definition of the eflags, no need to preserve them.
if (BreakNext)
- break;
+ return false;
}
+
+ // None of the terminators use or define the eflags.
+ // Check if they are live-out, that would imply we need to preserve them.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
+ return true;
+
return false;
}
@@ -289,6 +317,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
if (isSub)
MI->setFlag(MachineInstr::FrameSetup);
+ else
+ MI->setFlag(MachineInstr::FrameDestroy);
Offset -= ThisVal;
continue;
}
@@ -298,6 +328,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
if (isSub)
MI.setMIFlag(MachineInstr::FrameSetup);
+ else
+ MI.setMIFlag(MachineInstr::FrameDestroy);
Offset -= ThisVal;
}
@@ -312,7 +344,11 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
// is tricky.
bool UseLEA;
if (!InEpilogue) {
- UseLEA = STI.useLeaForSP();
+ // Check if inserting the prologue at the beginning
+ // of MBB would require to use LEA operations.
+ // We need to use LEA operations if EFLAGS is live in, because
+ // it means an instruction will read it before it gets defined.
+ UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
} else {
// If we can use LEA for SP but we shouldn't, check that none
// of the terminators uses the eflags. Otherwise we will insert
@@ -321,10 +357,10 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
// and is an optimization anyway.
UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
if (UseLEA && !STI.useLeaForSP())
- UseLEA = terminatorsNeedFlagsAsInput(MBB);
+ UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
// If that assert breaks, that means we do not do the right thing
// in canUseAsEpilogue.
- assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) &&
+ assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
"We shouldn't have allowed this insertion point");
}
@@ -347,30 +383,6 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
return MI;
}
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = nullptr) {
- if (MBBI == MBB.begin()) return;
-
- MachineBasicBlock::iterator PI = std::prev(MBBI);
- unsigned Opc = PI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
- Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += PI->getOperand(2).getImm();
- MBB.erase(PI);
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- }
-}
-
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
bool doMergeWithPrevious) const {
@@ -436,27 +448,265 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
}
}
-/// usesTheStack - This function checks if any of the users of EFLAGS
-/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
-/// to use the stack, and if we don't adjust the stack we clobber the first
-/// frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(const MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
+MachineInstr *X86FrameLowering::emitStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL,
+ bool InProlog) const {
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ if (STI.isTargetWindowsCoreCLR()) {
+ if (InProlog) {
+ return emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
+ } else {
+ return emitStackProbeInline(MF, MBB, MBBI, DL, false);
+ }
+ } else {
+ return emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
+ }
+}
- for (MachineRegisterInfo::reg_instr_iterator
- ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
- ri != re; ++ri)
- if (ri->isCopy())
- return true;
+void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ const StringRef ChkStkStubSymbol = "__chkstk_stub";
+ MachineInstr *ChkStkStub = nullptr;
- return false;
+ for (MachineInstr &MI : PrologMBB) {
+ if (MI.isCall() && MI.getOperand(0).isSymbol() &&
+ ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) {
+ ChkStkStub = &MI;
+ break;
+ }
+ }
+
+ if (ChkStkStub != nullptr) {
+ MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
+ assert(std::prev(MBBI).operator==(ChkStkStub) &&
+ "MBBI expected after __chkstk_stub.");
+ DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
+ emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
+ ChkStkStub->eraseFromParent();
+ }
}
-void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL) const {
+MachineInstr *X86FrameLowering::emitStackProbeInline(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ assert(STI.is64Bit() && "different expansion needed for 32 bit");
+ assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+
+ // RAX contains the number of bytes of desired stack adjustment.
+ // The handling here assumes this value has already been updated so as to
+ // maintain stack alignment.
+ //
+ // We need to exit with RSP modified by this amount and execute suitable
+ // page touches to notify the OS that we're growing the stack responsibly.
+ // All stack probing must be done without modifying RSP.
+ //
+ // MBB:
+ // SizeReg = RAX;
+ // ZeroReg = 0
+ // CopyReg = RSP
+ // Flags, TestReg = CopyReg - SizeReg
+ // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
+ // LimitReg = gs magic thread env access
+ // if FinalReg >= LimitReg goto ContinueMBB
+ // RoundBB:
+ // RoundReg = page address of FinalReg
+ // LoopMBB:
+ // LoopReg = PHI(LimitReg,ProbeReg)
+ // ProbeReg = LoopReg - PageSize
+ // [ProbeReg] = 0
+ // if (ProbeReg > RoundReg) goto LoopMBB
+ // ContinueMBB:
+ // RSP = RSP - RAX
+ // [rest of original MBB]
+
+ // Set up the new basic blocks
+ MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
+ MF.insert(MBBIter, RoundMBB);
+ MF.insert(MBBIter, LoopMBB);
+ MF.insert(MBBIter, ContinueMBB);
+
+ // Split MBB and move the tail portion down to ContinueMBB.
+ MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
+ ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
+ ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+ // Some useful constants
+ const int64_t ThreadEnvironmentStackLimit = 0x10;
+ const int64_t PageSize = 0x1000;
+ const int64_t PageMask = ~(PageSize - 1);
+
+ // Registers we need. For the normal case we use virtual
+ // registers. For the prolog expansion we use RAX, RCX and RDX.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *RegClass = &X86::GR64RegClass;
+ const unsigned SizeReg = InProlog ? (unsigned)X86::RAX
+ : MRI.createVirtualRegister(RegClass),
+ ZeroReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass),
+ CopyReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ TestReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ FinalReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ RoundedReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ LimitReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass),
+ JoinReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass),
+ ProbeReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass);
+
+ // SP-relative offsets where we can save RCX and RDX.
+ int64_t RCXShadowSlot = 0;
+ int64_t RDXShadowSlot = 0;
+
+ // If inlining in the prolog, save RCX and RDX.
+ // Future optimization: don't save or restore if not live in.
+ if (InProlog) {
+ // Compute the offsets. We need to account for things already
+ // pushed onto the stack at this point: return address, frame
+ // pointer (if used), and callee saves.
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
+ const bool HasFP = hasFP(MF);
+ RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+ RDXShadowSlot = RCXShadowSlot + 8;
+ // Emit the saves.
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RCXShadowSlot)
+ .addReg(X86::RCX);
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RDXShadowSlot)
+ .addReg(X86::RDX);
+ } else {
+ // Not in the prolog. Copy RAX to a virtual reg.
+ BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
+ }
+
+ // Add code to MBB to check for overflow and set the new target stack pointer
+ // to zero if so.
+ BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
+ .addReg(ZeroReg, RegState::Undef)
+ .addReg(ZeroReg, RegState::Undef);
+ BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
+ BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
+ .addReg(CopyReg)
+ .addReg(SizeReg);
+ BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg)
+ .addReg(TestReg)
+ .addReg(ZeroReg);
+
+ // FinalReg now holds final stack pointer value, or zero if
+ // allocation would overflow. Compare against the current stack
+ // limit from the thread environment block. Note this limit is the
+ // lowest touched page on the stack, not the point at which the OS
+ // will cause an overflow exception, so this is just an optimization
+ // to avoid unnecessarily touching pages that are below the current
+ // SP but already commited to the stack by the OS.
+ BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
+ .addReg(0)
+ .addImm(1)
+ .addReg(0)
+ .addImm(ThreadEnvironmentStackLimit)
+ .addReg(X86::GS);
+ BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
+ // Jump if the desired stack pointer is at or above the stack limit.
+ BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
+
+ // Add code to roundMBB to round the final stack pointer to a page boundary.
+ BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
+ .addReg(FinalReg)
+ .addImm(PageMask);
+ BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
+
+ // LimitReg now holds the current stack limit, RoundedReg page-rounded
+ // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
+ // and probe until we reach RoundedReg.
+ if (!InProlog) {
+ BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
+ .addReg(LimitReg)
+ .addMBB(RoundMBB)
+ .addReg(ProbeReg)
+ .addMBB(LoopMBB);
+ }
+
+ addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
+ false, -PageSize);
+
+ // Probe by storing a byte onto the stack.
+ BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
+ .addReg(ProbeReg)
+ .addImm(1)
+ .addReg(0)
+ .addImm(0)
+ .addReg(0)
+ .addImm(0);
+ BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
+ .addReg(RoundedReg)
+ .addReg(ProbeReg);
+ BuildMI(LoopMBB, DL, TII.get(X86::JNE_1)).addMBB(LoopMBB);
+
+ MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
+
+ // If in prolog, restore RDX and RCX.
+ if (InProlog) {
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
+ X86::RCX),
+ X86::RSP, false, RCXShadowSlot);
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
+ X86::RDX),
+ X86::RSP, false, RDXShadowSlot);
+ }
+
+ // Now that the probing is done, add code to continueMBB to update
+ // the stack pointer for real.
+ BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(SizeReg);
+
+ // Add the control flow edges we need.
+ MBB.addSuccessor(ContinueMBB);
+ MBB.addSuccessor(RoundMBB);
+ RoundMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(ContinueMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+
+ // Mark all the instructions added to the prolog as frame setup.
+ if (InProlog) {
+ for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
+ BeforeMBBI->setFlag(MachineInstr::FrameSetup);
+ }
+ for (MachineInstr &MI : *RoundMBB) {
+ MI.setFlag(MachineInstr::FrameSetup);
+ }
+ for (MachineInstr &MI : *LoopMBB) {
+ MI.setFlag(MachineInstr::FrameSetup);
+ }
+ for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin();
+ CMBBI != ContinueMBBI; ++CMBBI) {
+ CMBBI->setFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ // Possible TODO: physreg liveness for InProlog case.
+
+ return ContinueMBBI;
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeCall(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
unsigned CallOp;
@@ -478,6 +728,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
Symbol = "_chkstk";
MachineInstrBuilder CI;
+ MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
// All current stack probes take AX and SP as input, clobber flags, and
// preserve all registers. x86_64 probes leave RSP unmodified.
@@ -507,6 +758,26 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
.addReg(X86::RSP)
.addReg(X86::RAX);
}
+
+ if (InProlog) {
+ // Apply the frame setup flag to all inserted instrs.
+ for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
+ ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
+ }
+
+ return MBBI;
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
+
+ assert(InProlog && "ChkStkStub called outside prolog!");
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__chkstk_stub");
+
+ return MBBI;
}
static unsigned calculateSetFPREG(uint64_t SPAdjust) {
@@ -526,7 +797,7 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
const MachineFrameInfo *MFI = MF.getFrameInfo();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
unsigned StackAlign = getStackAlignment();
- if (ForceStackAlign) {
+ if (MF.getFunction()->hasFnAttribute("stackrealign")) {
if (MFI->hasCalls())
MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
else if (MaxAlign < SlotSize)
@@ -537,15 +808,14 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc DL,
+ DebugLoc DL, unsigned Reg,
uint64_t MaxAlign) const {
uint64_t Val = -MaxAlign;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
+ unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
+ .addReg(Reg)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
// The EFLAGS implicit def is dead.
MI->getOperand(3).setIsDead();
@@ -646,6 +916,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool IsFunclet = MBB.isEHFuncletEntry();
+ EHPersonality Personality = EHPersonality::Unknown;
+ if (Fn->hasPersonalityFn())
+ Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ bool FnHasClrFunclet =
+ MMI.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
+ bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
bool HasFP = hasFP(MF);
bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
@@ -655,9 +932,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned FramePtr = TRI->getFrameRegister(MF);
const unsigned MachineFramePtr =
STI.isTarget64BitILP32()
- ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
- : FramePtr;
+ ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
unsigned BasePtr = TRI->getBaseRegister();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
DebugLoc DL;
// Add RETADDR move area to callee saved frame size.
@@ -723,6 +1002,24 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
uint64_t NumBytes = 0;
int stackGrowth = -SlotSize;
+ // Find the funclet establisher parameter
+ unsigned Establisher = X86::NoRegister;
+ if (IsClrFunclet)
+ Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
+ else if (IsFunclet)
+ Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
+
+ if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
+ // Immediately spill establisher into the home slot.
+ // The runtime cares about this.
+ // MOV64mr %rdx, 16(%rsp)
+ unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
+ .addReg(Establisher)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(Establisher);
+ }
+
if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -739,7 +1036,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Get the offset of the stack slot for the EBP register, which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
// Update the frame offset adjustment.
- MFI->setOffsetAdjustment(-NumBytes);
+ if (!IsFunclet)
+ MFI->setOffsetAdjustment(-NumBytes);
+ else
+ assert(MFI->getOffsetAdjustment() == -(int)NumBytes &&
+ "should calculate same local variable offset for funclets");
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
@@ -765,35 +1066,46 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- if (!IsWin64Prologue) {
+ if (!IsWin64Prologue && !IsFunclet) {
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
- }
- if (NeedsDwarfCFI) {
- // Mark effective beginning of when frame pointer becomes valid.
- // Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
- BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+ if (NeedsDwarfCFI) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ // Define the current CFA to use the EBP/RBP register.
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
+ nullptr, DwarfFramePtr));
+ }
}
- // Mark the FramePtr as live-in in every block.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- I->addLiveIn(MachineFramePtr);
+ // Mark the FramePtr as live-in in every block. Don't do this again for
+ // funclet prologues.
+ if (!IsFunclet) {
+ for (MachineBasicBlock &EveryMBB : MF)
+ EveryMBB.addLiveIn(MachineFramePtr);
+ }
} else {
+ assert(!IsFunclet && "funclets without FPs not yet implemented");
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
+ // For EH funclets, only allocate enough space for outgoing calls. Save the
+ // NumBytes value that we would've used for the parent frame.
+ unsigned ParentFrameNumBytes = NumBytes;
+ if (IsFunclet)
+ NumBytes = getWinEHFuncletFrameSize(MF);
+
// Skip the callee-saved push instructions.
bool PushedRegs = false;
int StackOffset = 2 * stackGrowth;
while (MBBI != MBB.end() &&
+ MBBI->getFlag(MachineInstr::FrameSetup) &&
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
@@ -818,9 +1130,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Don't do this for Win64, it needs to realign the stack after the prologue.
- if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) {
+ if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
- BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
+ BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
}
// If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -839,7 +1151,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
uint64_t AlignedNumBytes = NumBytes;
- if (IsWin64Prologue && TRI->needsStackRealignment(MF))
+ if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
// Check whether EAX is livein for this function.
@@ -876,26 +1188,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
// We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
}
- // Save a pointer to the MI where we set AX.
- MachineBasicBlock::iterator SetRAX = MBBI;
- --SetRAX;
-
// Call __chkstk, __chkstk_ms, or __alloca.
- emitStackProbeCall(MF, MBB, MBBI, DL);
-
- // Apply the frame setup flag to all inserted instrs.
- for (; SetRAX != MBBI; ++SetRAX)
- SetRAX->setFlag(MachineInstr::FrameSetup);
+ emitStackProbe(MF, MBB, MBBI, DL, true);
if (isEAXAlive) {
// Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
+ MachineInstr *MI =
+ addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
+ StackPtr, false, NumBytes - 4);
MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
@@ -909,19 +1213,72 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
int SEHFrameOffset = 0;
+ unsigned SPOrEstablisher;
+ if (IsFunclet) {
+ if (IsClrFunclet) {
+ // The establisher parameter passed to a CLR funclet is actually a pointer
+ // to the (mostly empty) frame of its nearest enclosing funclet; we have
+ // to find the root function establisher frame by loading the PSPSym from
+ // the intermediate frame.
+ unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
+ MachinePointerInfo NoInfo;
+ MBB.addLiveIn(Establisher);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
+ Establisher, false, PSPSlotOffset)
+ .addMemOperand(MF.getMachineMemOperand(
+ NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize));
+ ;
+ // Save the root establisher back into the current funclet's (mostly
+ // empty) frame, in case a sub-funclet or the GC needs it.
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
+ false, PSPSlotOffset)
+ .addReg(Establisher)
+ .addMemOperand(
+ MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile,
+ SlotSize, SlotSize));
+ }
+ SPOrEstablisher = Establisher;
+ } else {
+ SPOrEstablisher = StackPtr;
+ }
+
if (IsWin64Prologue && HasFP) {
- SEHFrameOffset = calculateSetFPREG(NumBytes);
+ // Set RBP to a small fixed offset from RSP. In the funclet case, we base
+ // this calculation on the incoming establisher, which holds the value of
+ // RSP from the parent frame at the end of the prologue.
+ SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
if (SEHFrameOffset)
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
- StackPtr, false, SEHFrameOffset);
+ SPOrEstablisher, false, SEHFrameOffset);
else
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
+ .addReg(SPOrEstablisher);
- if (NeedsWinCFI)
+ // If this is not a funclet, emit the CFI describing our frame pointer.
+ if (NeedsWinCFI && !IsFunclet) {
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
.addImm(FramePtr)
.addImm(SEHFrameOffset)
.setMIFlag(MachineInstr::FrameSetup);
+ if (isAsynchronousEHPersonality(Personality))
+ MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
+ }
+ } else if (IsFunclet && STI.is32Bit()) {
+ // Reset EBP / ESI to something good for funclets.
+ MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
+ // If we're a catch funclet, we can be returned to via catchret. Save ESP
+ // into the registration node so that the runtime will restore it for us.
+ if (!MBB.isCleanupFuncletEntry()) {
+ assert(Personality == EHPersonality::MSVC_CXX);
+ unsigned FrameReg;
+ int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
+ int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
+ // ESP is the first field, so no extra displacement is needed.
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
+ false, EHRegOffset)
+ .addReg(X86::ESP);
+ }
}
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
@@ -932,7 +1289,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
int FI;
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
if (X86::FR64RegClass.contains(Reg)) {
- int Offset = getFrameIndexOffset(MF, FI);
+ unsigned IgnoredFrameReg;
+ int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
Offset += SEHFrameOffset;
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
@@ -948,14 +1306,33 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
.setMIFlag(MachineInstr::FrameSetup);
+ if (FnHasClrFunclet && !IsFunclet) {
+ // Save the so-called Initial-SP (i.e. the value of the stack pointer
+ // immediately after the prolog) into the PSPSlot so that funclets
+ // and the GC can recover it.
+ unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
+ auto PSPInfo = MachinePointerInfo::getFixedStack(
+ MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
+ PSPSlotOffset)
+ .addReg(StackPtr)
+ .addMemOperand(MF.getMachineMemOperand(
+ PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+ SlotSize, SlotSize));
+ }
+
// Realign stack after we spilled callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Win64 requires aligning the stack after the prologue.
if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
- BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
+ BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
}
+ // We already dealt with stack realignment and funclets above.
+ if (IsFunclet && STI.is32Bit())
+ return;
+
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
@@ -964,7 +1341,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Update the base pointer with the current stack pointer.
unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
- .addReg(StackPtr)
+ .addReg(SPOrEstablisher)
.setMIFlag(MachineInstr::FrameSetup);
if (X86FI->getRestoreBasePointer()) {
// Stash value of base pointer. Saving RSP instead of EBP shortens
@@ -972,18 +1349,21 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
- .addReg(StackPtr)
+ .addReg(SPOrEstablisher)
.setMIFlag(MachineInstr::FrameSetup);
}
- if (X86FI->getHasSEHFramePtrSave()) {
+ if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
// Stash the value of the frame pointer relative to the base pointer for
// Win32 EH. This supports Win32 EH, which does the inverse of the above:
// it recovers the frame pointer from the base pointer rather than the
// other way around.
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
- getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex()))
+ unsigned UsedReg;
+ int Offset =
+ getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+ assert(UsedReg == BasePtr);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
.addReg(FramePtr)
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -1015,6 +1395,69 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue(
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
}
+static bool isFuncletReturnInstr(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case X86::CATCHRET:
+ case X86::CLEANUPRET:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("impossible");
+}
+
+// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
+// stack. It holds a pointer to the bottom of the root function frame. The
+// establisher frame pointer passed to a nested funclet may point to the
+// (mostly empty) frame of its parent funclet, but it will need to find
+// the frame of the root function to access locals. To facilitate this,
+// every funclet copies the pointer to the bottom of the root function
+// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
+// same offset for the PSPSym in the root function frame that's used in the
+// funclets' frames allows each funclet to dynamically accept any ancestor
+// frame as its establisher argument (the runtime doesn't guarantee the
+// immediate parent for some reason lost to history), and also allows the GC,
+// which uses the PSPSym for some bookkeeping, to find it in any funclet's
+// frame with only a single offset reported for the entire method.
+unsigned
+X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
+ const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
+ // getFrameIndexReferenceFromSP has an out ref parameter for the stack
+ // pointer register; pass a dummy that we ignore
+ unsigned SPReg;
+ int Offset = getFrameIndexReferenceFromSP(MF, Info.PSPSymFrameIdx, SPReg);
+ assert(Offset >= 0);
+ return static_cast<unsigned>(Offset);
+}
+
+unsigned
+X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
+ // This is the size of the pushed CSRs.
+ unsigned CSSize =
+ MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+ // This is the amount of stack a funclet needs to allocate.
+ unsigned UsedSize;
+ EHPersonality Personality =
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn());
+ if (Personality == EHPersonality::CoreCLR) {
+ // CLR funclets need to hold enough space to include the PSPSym, at the
+ // same offset from the stack pointer (immediately after the prolog) as it
+ // resides at in the main function.
+ UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
+ } else {
+ // Other funclets just need enough stack for outgoing call arguments.
+ UsedSize = MF.getFrameInfo()->getMaxCallFrameSize();
+ }
+ // RBP is not included in the callee saved register block. After pushing RBP,
+ // everything is 16 byte aligned. Everything we allocate before an outgoing
+ // call must also be 16 byte aligned.
+ unsigned FrameSizeMinusRBP =
+ RoundUpToAlignment(CSSize + UsedSize, getStackAlignment());
+ // Subtract out the size of the callee saved registers. This is how much stack
+ // each funclet will allocate.
+ return FrameSizeMinusRBP - CSSize;
+}
+
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1027,12 +1470,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
const bool Is64BitILP32 = STI.isTarget64BitILP32();
unsigned FramePtr = TRI->getFrameRegister(MF);
unsigned MachineFramePtr =
- Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
- : FramePtr;
+ Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWinCFI =
IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry();
+ bool IsFunclet = isFuncletReturnInstr(MBBI);
+ MachineBasicBlock *TargetMBB = nullptr;
// Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI->getStackSize();
@@ -1040,7 +1484,27 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
- if (hasFP(MF)) {
+ if (MBBI->getOpcode() == X86::CATCHRET) {
+ // SEH shouldn't use catchret.
+ assert(!isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn())) &&
+ "SEH should not use CATCHRET");
+
+ NumBytes = getWinEHFuncletFrameSize(MF);
+ assert(hasFP(MF) && "EH funclets without FP not yet implemented");
+ TargetMBB = MBBI->getOperand(0).getMBB();
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else if (MBBI->getOpcode() == X86::CLEANUPRET) {
+ NumBytes = getWinEHFuncletFrameSize(MF);
+ assert(hasFP(MF) && "EH funclets without FP not yet implemented");
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
NumBytes = FrameSize - CSSize;
@@ -1052,7 +1516,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Pop EBP.
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
} else {
NumBytes = StackSize - CSSize;
}
@@ -1063,26 +1528,50 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator PI = std::prev(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
- !PI->isTerminator())
+ if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+ (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+ Opc != X86::DBG_VALUE && !PI->isTerminator())
break;
--MBBI;
}
MachineBasicBlock::iterator FirstCSPop = MBBI;
+ if (TargetMBB) {
+ // Fill EAX/RAX with the address of the target block.
+ unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX;
+ if (STI.is64Bit()) {
+ // LEA64r TargetMBB(%rip), %rax
+ BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(TargetMBB)
+ .addReg(0);
+ } else {
+ // MOV32ri $TargetMBB, %eax
+ BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri), ReturnReg)
+ .addMBB(TargetMBB);
+ }
+ // Record that we've taken the address of TargetMBB and no longer just
+ // reference it in a terminator.
+ TargetMBB->setHasAddressTaken();
+ }
+
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
// If there is an ADD32ri or SUB32ri of ESP immediately before this
// instruction, merge the two instructions.
if (NumBytes || MFI->hasVarSizedObjects())
- mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+ NumBytes += mergeSPUpdates(MBB, MBBI, true);
// If dynamic alloca is used, then reset esp to point to the last callee-saved
// slot before popping them off! Same applies for the case, when stack was
- // realigned.
- if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+ // realigned. Don't do this if this was a funclet epilogue, since the funclets
+ // will not do realignment or dynamic stack allocation.
+ if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) &&
+ !IsFunclet) {
if (TRI->needsStackRealignment(MF))
MBBI = FirstCSPop;
unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
@@ -1134,9 +1623,24 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+// NOTE: this only has a subset of the full frame index logic. In
+// particular, the FI < 0 and AfterFPPop logic is handled in
+// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly
+// (probably?) it should be moved into here.
+int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // We can't calculate offset from frame pointer if the stack is realigned,
+ // so enforce usage of stack/base pointer. The base pointer is used when we
+ // have dynamic allocas in addition to dynamic realignment.
+ if (TRI->hasBasePointer(MF))
+ FrameReg = TRI->getBaseRegister();
+ else if (TRI->needsStackRealignment(MF))
+ FrameReg = TRI->getStackRegister();
+ else
+ FrameReg = TRI->getFrameRegister(MF);
+
// Offset will hold the offset from the stack pointer at function entry to the
// object.
// We need to factor in additional offsets applied during the prologue to the
@@ -1207,48 +1711,62 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
return Offset + FPDelta;
}
-int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
- // We can't calculate offset from frame pointer if the stack is realigned,
- // so enforce usage of stack/base pointer. The base pointer is used when we
- // have dynamic allocas in addition to dynamic realignment.
- if (TRI->hasBasePointer(MF))
- FrameReg = TRI->getBaseRegister();
- else if (TRI->needsStackRealignment(MF))
- FrameReg = TRI->getStackRegister();
- else
- FrameReg = TRI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
-}
-
-// Simplified from getFrameIndexOffset keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
+// Simplified from getFrameIndexReference keeping only StackPointer cases
+int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Does not include any dynamic realign.
const uint64_t StackSize = MFI->getStackSize();
{
#ifndef NDEBUG
- // Note: LLVM arranges the stack as:
- // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
- // > "Stack Slots" (<--SP)
- // We can always address StackSlots from RSP. We can usually (unless
- // needsStackRealignment) address CSRs from RSP, but sometimes need to
- // address them from RBP. FixedObjects can be placed anywhere in the stack
- // frame depending on their specific requirements (i.e. we can actually
- // refer to arguments to the function which are stored in the *callers*
- // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
- // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
-
- assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
-
- // We don't handle tail calls, and shouldn't be seeing them
- // either.
+ // LLVM arranges the stack as follows:
+ // ...
+ // ARG2
+ // ARG1
+ // RETADDR
+ // PUSH RBP <-- RBP points here
+ // PUSH CSRs
+ // ~~~~~~~ <-- possible stack realignment (non-win64)
+ // ...
+ // STACK OBJECTS
+ // ... <-- RSP after prologue points here
+ // ~~~~~~~ <-- possible stack realignment (win64)
+ //
+ // if (hasVarSizedObjects()):
+ // ... <-- "base pointer" (ESI/RBX) points here
+ // DYNAMIC ALLOCAS
+ // ... <-- RSP points here
+ //
+ // Case 1: In the simple case of no stack realignment and no dynamic
+ // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
+ // with fixed offsets from RSP.
+ //
+ // Case 2: In the case of stack realignment with no dynamic allocas, fixed
+ // stack objects are addressed with RBP and regular stack objects with RSP.
+ //
+ // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
+ // to address stack arguments for outgoing calls and nothing else. The "base
+ // pointer" points to local variables, and RBP points to fixed objects.
+ //
+ // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
+ // answer we give is relative to the SP after the prologue, and not the
+ // SP in the middle of the function.
+
+ assert((!MFI->isFixedObjectIndex(FI) || !TRI->needsStackRealignment(MF) ||
+ STI.isTargetWin64()) &&
+ "offset from fixed object to SP is not static");
+
+ // We don't handle tail calls, and shouldn't be seeing them either.
int TailCallReturnAddrDelta =
MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
#endif
}
+ // Fill in FrameReg output argument.
+ FrameReg = TRI->getStackRegister();
+
// This is how the math works out:
//
// %rsp grows (i.e. gets lower) left to right. Each box below is
@@ -1280,15 +1798,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
return Offset + StackSize;
}
-// Simplified from getFrameIndexReference keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
- assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
-
- FrameReg = TRI->getStackRegister();
- return getFrameIndexOffsetFromSP(MF, FI);
-}
bool X86FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
@@ -1358,6 +1867,11 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(MI);
+ // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
+ // for us, and there are no XMM CSRs on Win32.
+ if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
+ return true;
+
// Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -1399,6 +1913,22 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (CSI.empty())
return false;
+ if (isFuncletReturnInstr(MI) && STI.isOSWindows()) {
+ // Don't restore CSRs in 32-bit EH funclets. Matches
+ // spillCalleeSavedRegisters.
+ if (STI.is32Bit())
+ return true;
+ // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
+ // funclets. emitEpilogue transforms these to normal jumps.
+ if (MI->getOpcode() == X86::CATCHRET) {
+ const Function *Func = MBB.getParent()->getFunction();
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(Func->getPersonalityFn()));
+ if (IsSEH)
+ return true;
+ }
+ }
+
DebugLoc DL = MBB.findDebugLoc(MI);
// Reload XMMs from stack frame.
@@ -1420,7 +1950,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
!X86::GR32RegClass.contains(Reg))
continue;
- BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
+ .setMIFlag(MachineInstr::FrameDestroy);
}
return true;
}
@@ -1450,8 +1981,16 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// Spill the BasePtr if it's used.
- if (TRI->hasBasePointer(MF))
+ if (TRI->hasBasePointer(MF)) {
SavedRegs.set(TRI->getBaseRegister());
+
+ // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
+ if (MF.getMMI().hasEHFunclets()) {
+ int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize);
+ X86FI->setHasSEHFramePtrSave(true);
+ X86FI->setSEHFramePtrSaveIndex(FI);
+ }
+ }
}
static bool
@@ -1545,11 +2084,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// The MOV R10, RAX needs to be in a different block, since the RET we emit in
// allocMBB needs to be last (terminating) instruction.
- for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
- e = PrologueMBB.livein_end();
- i != e; i++) {
- allocMBB->addLiveIn(*i);
- checkMBB->addLiveIn(*i);
+ for (const auto &LI : PrologueMBB.liveins()) {
+ allocMBB->addLiveIn(LI);
+ checkMBB->addLiveIn(LI);
}
if (IsNested)
@@ -1682,8 +2219,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addImm(StackSize);
BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(Reg10);
- MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
@@ -1821,11 +2356,9 @@ void X86FrameLowering::adjustForHiPEPrologue(
MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
- for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(),
- E = PrologueMBB.livein_end();
- I != E; I++) {
- stackCheckMBB->addLiveIn(*I);
- incStackMBB->addLiveIn(*I);
+ for (const auto &LI : PrologueMBB.liveins()) {
+ stackCheckMBB->addLiveIn(LI);
+ incStackMBB->addLiveIn(LI);
}
MF.push_front(incStackMBB);
@@ -1870,16 +2403,84 @@ void X86FrameLowering::adjustForHiPEPrologue(
.addReg(ScratchReg), PReg, false, SPLimitOffset);
BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
- stackCheckMBB->addSuccessor(&PrologueMBB, 99);
- stackCheckMBB->addSuccessor(incStackMBB, 1);
- incStackMBB->addSuccessor(&PrologueMBB, 99);
- incStackMBB->addSuccessor(incStackMBB, 1);
+ stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
+ stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
+ incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
+ incStackMBB->addSuccessor(incStackMBB, {1, 100});
}
#ifdef XDEBUG
MF.verify();
#endif
}
+bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const {
+
+ if (Offset <= 0)
+ return false;
+
+ if (Offset % SlotSize)
+ return false;
+
+ int NumPops = Offset / SlotSize;
+ // This is only worth it if we have at most 2 pops.
+ if (NumPops != 1 && NumPops != 2)
+ return false;
+
+ // Handle only the trivial case where the adjustment directly follows
+ // a call. This is the most common one, anyway.
+ if (MBBI == MBB.begin())
+ return false;
+ MachineBasicBlock::iterator Prev = std::prev(MBBI);
+ if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
+ return false;
+
+ unsigned Regs[2];
+ unsigned FoundRegs = 0;
+
+ auto RegMask = Prev->getOperand(1);
+
+ auto &RegClass =
+ Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
+ // Try to find up to NumPops free registers.
+ for (auto Candidate : RegClass) {
+
+ // Poor man's liveness:
+ // Since we're immediately after a call, any register that is clobbered
+ // by the call and not defined by it can be considered dead.
+ if (!RegMask.clobbersPhysReg(Candidate))
+ continue;
+
+ bool IsDef = false;
+ for (const MachineOperand &MO : Prev->implicit_operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) {
+ IsDef = true;
+ break;
+ }
+ }
+
+ if (IsDef)
+ continue;
+
+ Regs[FoundRegs++] = Candidate;
+ if (FoundRegs == (unsigned)NumPops)
+ break;
+ }
+
+ if (FoundRegs == 0)
+ return false;
+
+ // If we found only one free register, but need two, reuse the same one twice.
+ while (FoundRegs < (unsigned)NumPops)
+ Regs[FoundRegs++] = Regs[0];
+
+ for (int i = 0; i < NumPops; ++i)
+ BuildMI(MBB, MBBI, DL,
+ TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
+
+ return true;
+}
+
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -1895,8 +2496,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
- if (Amount == 0)
- return;
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1904,15 +2503,68 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned StackAlign = getStackAlignment();
Amount = RoundUpToAlignment(Amount, StackAlign);
+ MachineModuleInfo &MMI = MF.getMMI();
+ const Function *Fn = MF.getFunction();
+ bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool DwarfCFI = !WindowsCFI &&
+ (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+
+ // If we have any exception handlers in this function, and we adjust
+ // the SP before calls, we may need to indicate this to the unwinder
+ // using GNU_ARGS_SIZE. Note that this may be necessary even when
+ // Amount == 0, because the preceding function may have set a non-0
+ // GNU_ARGS_SIZE.
+ // TODO: We don't need to reset this between subsequent functions,
+ // if it didn't change.
+ bool HasDwarfEHHandlers = !WindowsCFI &&
+ !MF.getMMI().getLandingPads().empty();
+
+ if (HasDwarfEHHandlers && !isDestroy &&
+ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
+
+ if (Amount == 0)
+ return;
+
// Factor out the amount that gets handled inside the sequence
// (Pushes of argument for frame setup, callee pops for frame destroy)
Amount -= InternalAmt;
+ // TODO: This is needed only if we require precise CFA.
+ // If this is a callee-pop calling convention, emit a CFA adjust for
+ // the amount the callee popped.
+ if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
+
if (Amount) {
// Add Amount to SP to destroy a frame, and subtract to setup.
int Offset = isDestroy ? Amount : -Amount;
- BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+
+ if (!(Fn->optForMinSize() &&
+ adjustStackWithPops(MBB, I, DL, Offset)))
+ BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+ }
+
+ if (DwarfCFI && !hasFP(MF)) {
+ // If we don't have FP, but need to generate unwind information,
+ // we need to set the correct CFA offset after the stack adjustment.
+ // How much we adjust the CFA offset depends on whether we're emitting
+ // CFI only for EH purposes or for debugging. EH only requires the CFA
+ // offset to be correct at each call site, while for debugging we want
+ // it to be more precise.
+ int CFAOffset = Amount;
+ // TODO: When not using precise CFA, we also need to adjust for the
+ // InternalAmt here.
+
+ if (CFAOffset) {
+ CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+ }
}
+
return;
}
@@ -1933,12 +2585,136 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
assert(MBB.getParent() && "Block is not attached to a function!");
+ // Win64 has strict requirements in terms of epilogue and we are
+ // not taking a chance at messing with them.
+ // I.e., unless this block is already an exit block, we can't use
+ // it as an epilogue.
+ if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
+ return false;
+
if (canUseLEAForSPInEpilogue(*MBB.getParent()))
return true;
// If we cannot use LEA to adjust SP, we may need to use ADD, which
- // clobbers the EFLAGS. Check that none of the terminators reads the
- // EFLAGS, and if one uses it, conservatively assume this is not
+ // clobbers the EFLAGS. Check that we do not need to preserve it,
+ // otherwise, conservatively assume this is not
// safe to insert the epilogue here.
- return !terminatorsNeedFlagsAsInput(MBB);
+ return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
+}
+
+bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ // If we may need to emit frameless compact unwind information, give
+ // up as this is currently broken: PR25614.
+ return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF);
+}
+
+MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool RestoreSP) const {
+ assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
+ assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
+ assert(STI.is32Bit() && !Uses64BitFramePtr &&
+ "restoring EBP/ESI on non-32-bit target");
+
+ MachineFunction &MF = *MBB.getParent();
+ unsigned FramePtr = TRI->getFrameRegister(MF);
+ unsigned BasePtr = TRI->getBaseRegister();
+ WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // FIXME: Don't set FrameSetup flag in catchret case.
+
+ int FI = FuncInfo.EHRegNodeFrameIndex;
+ int EHRegSize = MFI->getObjectSize(FI);
+
+ if (RestoreSP) {
+ // MOV32rm -EHRegSize(%ebp), %esp
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
+ X86::EBP, true, -EHRegSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ unsigned UsedReg;
+ int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
+ int EndOffset = -EHRegOffset - EHRegSize;
+ FuncInfo.EHRegNodeEndOffset = EndOffset;
+
+ if (UsedReg == FramePtr) {
+ // ADD $offset, %ebp
+ unsigned ADDri = getADDriOpcode(false, EndOffset);
+ BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
+ .addReg(FramePtr)
+ .addImm(EndOffset)
+ .setMIFlag(MachineInstr::FrameSetup)
+ ->getOperand(3)
+ .setIsDead();
+ assert(EndOffset >= 0 &&
+ "end of registration object above normal EBP position!");
+ } else if (UsedReg == BasePtr) {
+ // LEA offset(%ebp), %esi
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
+ FramePtr, false, EndOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ // MOV32rm SavedEBPOffset(%esi), %ebp
+ assert(X86FI->getHasSEHFramePtrSave());
+ int Offset =
+ getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+ assert(UsedReg == BasePtr);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
+ UsedReg, true, Offset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
+ }
+ return MBBI;
+}
+
+unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
+ // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
+ unsigned Offset = 16;
+ // RBP is immediately pushed.
+ Offset += SlotSize;
+ // All callee-saved registers are then pushed.
+ Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+ // Every funclet allocates enough stack space for the largest outgoing call.
+ Offset += getWinEHFuncletFrameSize(MF);
+ return Offset;
+}
+
+void X86FrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function isn't doing Win64-style C++ EH, we don't need to do
+ // anything.
+ const Function *Fn = MF.getFunction();
+ if (!STI.is64Bit() || !MF.getMMI().hasEHFunclets() ||
+ classifyEHPersonality(Fn->getPersonalityFn()) != EHPersonality::MSVC_CXX)
+ return;
+
+ // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
+ // relative to RSP after the prologue. Find the offset of the last fixed
+ // object, so that we can allocate a slot immediately following it. If there
+ // were no fixed objects, use offset -SlotSize, which is immediately after the
+ // return address. Fixed objects have negative frame indices.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int64_t MinFixedObjOffset = -SlotSize;
+ for (int I = MFI->getObjectIndexBegin(); I < 0; ++I)
+ MinFixedObjOffset = std::min(MinFixedObjOffset, MFI->getObjectOffset(I));
+
+ int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
+ int UnwindHelpFI =
+ MFI->CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+ MF.getWinEHFuncInfo()->UnwindHelpFrameIdx = UnwindHelpFI;
+
+ // Store -2 into UnwindHelp on function entry. We have to scan forwards past
+ // other frame setup instructions.
+ MachineBasicBlock &MBB = MF.front();
+ auto MBBI = MBB.begin();
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
+ UnwindHelpFI)
+ .addImm(-2);
}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 495cfcd..3ab41b4 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -47,11 +47,17 @@ public:
unsigned StackPtr;
- /// Emit a call to the target's stack probe function. This is required for all
+ /// Emit target stack probe code. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
- /// the number of bytes to probe in RAX/EAX.
- void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL) const;
+ /// the number of bytes to probe in RAX/EAX. Returns instruction just
+ /// after the expansion.
+ MachineInstr *emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ bool InProlog) const;
+
+ /// Replace a StackProbe inline-stub with the actual probe code inline.
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -91,11 +97,9 @@ public:
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
bool needsFrameIndexResolution(const MachineFunction &MF) const override;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
- int getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const;
int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
@@ -103,6 +107,11 @@ public:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
/// Check the instruction before/after the passed instruction. If
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
/// stack adjustment is returned as a positive value for ADD/LEA and
@@ -125,7 +134,9 @@ public:
/// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
-private:
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
/// convertArgMovsToPushes - This method tries to convert a call sequence
/// that uses sub and mov instructions to put the argument onto the stack
/// into a series of pushes.
@@ -135,22 +146,56 @@ private:
MachineBasicBlock::iterator I,
uint64_t Amount) const;
- uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
-
/// Wraps up getting a CFI index and building a MachineInstr for it.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, MCCFIInstruction CFIInst) const;
+ /// Sets up EBP and optionally ESI based on the incoming EBP value. Only
+ /// needed for 32-bit. Used in funclet prologues and at catchret destinations.
+ MachineBasicBlock::iterator
+ restoreWin32EHStackPointers(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ bool RestoreSP = false) const;
+
+private:
+ uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
+ /// Emit target stack probe as a call to a helper function
+ MachineInstr *emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool InProlog) const;
+
+ /// Emit target stack probe as an inline sequence.
+ MachineInstr *emitStackProbeInline(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool InProlog) const;
+
+ /// Emit a stub to later inline the target stack probe.
+ MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool InProlog) const;
+
/// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
- uint64_t MaxAlign) const;
+ unsigned Reg, uint64_t MaxAlign) const;
+
+ /// Make small positive stack adjustments using POPs.
+ bool adjustStackWithPops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ int Offset) const;
/// Adjusts the stack pointer using LEA, SUB, or ADD.
MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL, int64_t Offset,
bool InEpilogue) const;
+
+ unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const;
+
+ unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d5351d2..4414e47 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -46,9 +46,8 @@ STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
//===----------------------------------------------------------------------===//
namespace {
- /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
- /// SDValue's instead of register numbers for the leaves of the matched
- /// tree.
+ /// This corresponds to X86AddressMode, but uses SDValue's instead of register
+ /// numbers for the leaves of the matched tree.
struct X86ISelAddressMode {
enum {
RegBase,
@@ -87,8 +86,7 @@ namespace {
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
}
- /// isRIPRelative - Return true if this addressing mode is already RIP
- /// relative.
+ /// Return true if this addressing mode is already RIP-relative.
bool isRIPRelative() const {
if (BaseType != RegBase) return false;
if (RegisterSDNode *RegNode =
@@ -147,21 +145,25 @@ namespace {
namespace {
//===--------------------------------------------------------------------===//
- /// ISel - X86 specific code to select X86 machine instructions for
+ /// ISel - X86-specific code to select X86 machine instructions for
/// SelectionDAG operations.
///
class X86DAGToDAGISel final : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// OptForSize - If true, selector should try to optimize for code size
- /// instead of performance.
+ /// If true, selector should try to optimize for code size instead of
+ /// performance.
bool OptForSize;
+ /// If true, selector should try to optimize for minimum code size.
+ bool OptForMinSize;
+
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
+ : SelectionDAGISel(tm, OptLevel), OptForSize(false),
+ OptForMinSize(false) {}
const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@@ -184,8 +186,7 @@ namespace {
return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
}
- // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // sign extended field.
+ // True if the 64-bit immediate fits in a 32-bit sign-extended field.
inline bool i64immSExt32(SDNode *N) const {
uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
return (int64_t)v == (int32_t)v;
@@ -196,50 +197,50 @@ namespace {
private:
SDNode *Select(SDNode *N) override;
- SDNode *SelectGather(SDNode *N, unsigned Opc);
- SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
-
- bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
- bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
- bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
- bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
- bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ SDNode *selectGather(SDNode *N, unsigned Opc);
+ SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT);
+
+ bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+ bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+ bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool matchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
+ bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
- bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
- bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
- bool SelectLEAAddr(SDValue N, SDValue &Base,
+ bool selectMOV64Imm32(SDValue N, SDValue &Imm);
+ bool selectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
+ bool selectLEA64_32Addr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ bool selectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ bool selectScalarSSELoad(SDNode *Root, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
- bool TryFoldLoad(SDNode *P, SDValue N,
+ bool tryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
+ /// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- void EmitSpecialCodeForMain();
+ void emitSpecialCodeForMain();
inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
SDValue &Base, SDValue &Scale,
@@ -252,7 +253,7 @@ namespace {
: AM.Base_Reg;
Scale = getI8Imm(AM.Scale, DL);
Index = AM.IndexReg;
- // These are 32-bit even in 64-bit mode since RIP relative offset
+ // These are 32-bit even in 64-bit mode since RIP-relative offset
// is 32-bit.
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
@@ -283,32 +284,105 @@ namespace {
Segment = CurDAG->getRegister(0, MVT::i32);
}
- /// getI8Imm - Return a target constant with the specified value, of type
- /// i8.
+ // Utility function to determine whether we should avoid selecting
+ // immediate forms of instructions for better code size or not.
+ // At a high level, we'd like to avoid such instructions when
+ // we have similar constants used within the same basic block
+ // that can be kept in a register.
+ //
+ bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
+ uint32_t UseCount = 0;
+
+ // Do not want to hoist if we're not optimizing for size.
+ // TODO: We'd like to remove this restriction.
+ // See the comment in X86InstrInfo.td for more info.
+ if (!OptForSize)
+ return false;
+
+ // Walk all the users of the immediate.
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
+
+ SDNode *User = *UI;
+
+ // This user is already selected. Count it as a legitimate use and
+ // move on.
+ if (User->isMachineOpcode()) {
+ UseCount++;
+ continue;
+ }
+
+ // We want to count stores of immediates as real uses.
+ if (User->getOpcode() == ISD::STORE &&
+ User->getOperand(1).getNode() == N) {
+ UseCount++;
+ continue;
+ }
+
+ // We don't currently match users that have > 2 operands (except
+ // for stores, which are handled above)
+ // Those instruction won't match in ISEL, for now, and would
+ // be counted incorrectly.
+ // This may change in the future as we add additional instruction
+ // types.
+ if (User->getNumOperands() != 2)
+ continue;
+
+ // Immediates that are used for offsets as part of stack
+ // manipulation should be left alone. These are typically
+ // used to indicate SP offsets for argument passing and
+ // will get pulled into stores/pushes (implicitly).
+ if (User->getOpcode() == X86ISD::ADD ||
+ User->getOpcode() == ISD::ADD ||
+ User->getOpcode() == X86ISD::SUB ||
+ User->getOpcode() == ISD::SUB) {
+
+ // Find the other operand of the add/sub.
+ SDValue OtherOp = User->getOperand(0);
+ if (OtherOp.getNode() == N)
+ OtherOp = User->getOperand(1);
+
+ // Don't count if the other operand is SP.
+ RegisterSDNode *RegNode;
+ if (OtherOp->getOpcode() == ISD::CopyFromReg &&
+ (RegNode = dyn_cast_or_null<RegisterSDNode>(
+ OtherOp->getOperand(1).getNode())))
+ if ((RegNode->getReg() == X86::ESP) ||
+ (RegNode->getReg() == X86::RSP))
+ continue;
+ }
+
+ // ... otherwise, count this and move on.
+ UseCount++;
+ }
+
+ // If we have more than 1 use, then recommend for hoisting.
+ return (UseCount > 1);
+ }
+
+ /// Return a target constant with the specified value of type i8.
inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
}
- /// getI32Imm - Return a target constant with the specified value, of type
- /// i32.
+ /// Return a target constant with the specified value, of type i32.
inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
}
- /// getGlobalBaseReg - Return an SDNode that returns the value of
- /// the global base register. Output instructions required to
- /// initialize the global base register, if necessary.
- ///
+ /// Return an SDNode that returns the value of the global base register.
+ /// Output instructions required to initialize the global base register,
+ /// if necessary.
SDNode *getGlobalBaseReg();
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
+ /// Return a reference to the TargetMachine, casted to the target-specific
+ /// type.
const X86TargetMachine &getTargetMachine() const {
return static_cast<const X86TargetMachine &>(TM);
}
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
+ /// Return a reference to the TargetInstrInfo, casted to the target-specific
+ /// type.
const X86InstrInfo *getInstrInfo() const {
return Subtarget->getInstrInfo();
}
@@ -386,9 +460,9 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
return true;
}
-/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
-static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
SDValue Chain = OrigChain.getOperand(0);
@@ -418,7 +492,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
}
-/// isCalleeLoad - Return true if call address is a load and it can be
+/// Return true if call address is a load and it can be
/// moved below CALLSEQ_START and the chains leading up to the call.
/// Return the CALLSEQ_START by reference as a second output.
/// In the case of a tail call, there isn't a callseq node between the call
@@ -461,12 +535,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
- // OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ // OptFor[Min]Size are used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->optForSize();
+ OptForMinSize = MF->getFunction()->optForMinSize();
+ assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
if (OptLevel != CodeGenOpt::None &&
// Only does this when target favors doesn't favor register indirect
@@ -500,7 +576,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
SDValue Load = N->getOperand(1);
if (!isCalleeLoad(Load, Chain, HasCallSeq))
continue;
- MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+ moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}
@@ -577,9 +653,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
-/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
-/// the main function.
-void X86DAGToDAGISel::EmitSpecialCodeForMain() {
+/// Emit any code that needs to be executed only in the main function.
+void X86DAGToDAGISel::emitSpecialCodeForMain() {
if (Subtarget->isTargetCygMing()) {
TargetLowering::ArgListTy Args;
auto &DL = CurDAG->getDataLayout();
@@ -599,7 +674,7 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
// If this is main, emit special code for main.
if (const Function *Fn = MF->getFunction())
if (Fn->hasExternalLinkage() && Fn->getName() == "main")
- EmitSpecialCodeForMain();
+ emitSpecialCodeForMain();
}
static bool isDispSafeForFrameIndex(int64_t Val) {
@@ -612,7 +687,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
return isInt<31>(Val);
}
-bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
X86ISelAddressMode &AM) {
// Cannot combine ExternalSymbol displacements with integer offsets.
if (Offset != 0 && (AM.ES || AM.MCSym))
@@ -634,7 +709,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
}
-bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
// load gs:0 -> GS segment register.
@@ -658,11 +733,10 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
return true;
}
-/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
-/// into an addressing mode. These wrap things that will resolve down into a
-/// symbol reference. If no match is possible, this returns true, otherwise it
-/// returns false.
-bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
+/// mode. These wrap things that will resolve down into a symbol reference.
+/// If no match is possible, this returns true, otherwise it returns false.
+bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
// If the addressing mode already has a symbol as the displacement, we can
// never match another symbol.
if (AM.hasSymbolicDisplacement())
@@ -685,7 +759,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
AM.SymbolFlags = G->getTargetFlags();
- if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+ if (foldOffsetIntoAddress(G->getOffset(), AM)) {
AM = Backup;
return true;
}
@@ -694,7 +768,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.SymbolFlags = CP->getTargetFlags();
- if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+ if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
AM = Backup;
return true;
}
@@ -710,7 +784,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
X86ISelAddressMode Backup = AM;
AM.BlockAddr = BA->getBlockAddress();
AM.SymbolFlags = BA->getTargetFlags();
- if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
+ if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
AM = Backup;
return true;
}
@@ -758,11 +832,10 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
return true;
}
-/// MatchAddress - Add the specified node to the specified addressing mode,
-/// returning true if it cannot be done. This just pattern matches for the
-/// addressing mode.
-bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
- if (MatchAddressRecursively(N, AM, 0))
+/// Add the specified node to the specified addressing mode, returning true if
+/// it cannot be done. This just pattern matches for the addressing mode.
+bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
+ if (matchAddressRecursively(N, AM, 0))
return true;
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -790,15 +863,49 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
+bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
+ !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base_Reg.getNode() &&
+ !AM.IndexReg.getNode()) {
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ N = Handle.getValue();
+ return true;
+}
+
// Insert a node into the DAG at least before the Pos node's position. This
// will reposition the node as needed, and will assign it a node ID that is <=
// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
// IDs! The selection DAG must no longer depend on their uniqueness when this
// is used.
-static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
if (N.getNode()->getNodeId() == -1 ||
N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
- DAG.RepositionNode(Pos.getNode(), N.getNode());
+ DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
N.getNode()->setNodeId(Pos.getNode()->getNodeId());
}
}
@@ -807,7 +914,7 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
// safe. This allows us to convert the shift and and into an h-register
// extract and a scaled index. Returns false if the simplification is
// performed.
-static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
+static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
@@ -835,12 +942,12 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
// these nodes. We continually insert before 'N' in sequence as this is
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
- InsertDAGNode(DAG, N, Eight);
- InsertDAGNode(DAG, N, Srl);
- InsertDAGNode(DAG, N, NewMask);
- InsertDAGNode(DAG, N, And);
- InsertDAGNode(DAG, N, ShlCount);
- InsertDAGNode(DAG, N, Shl);
+ insertDAGNode(DAG, N, Eight);
+ insertDAGNode(DAG, N, Srl);
+ insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, And);
+ insertDAGNode(DAG, N, ShlCount);
+ insertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
@@ -850,7 +957,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
// allows us to fold the shift into this addressing mode. Returns false if the
// transform succeeded.
-static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
@@ -880,9 +987,9 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
// these nodes. We continually insert before 'N' in sequence as this is
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
- InsertDAGNode(DAG, N, NewMask);
- InsertDAGNode(DAG, N, NewAnd);
- InsertDAGNode(DAG, N, NewShift);
+ insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, NewAnd);
+ insertDAGNode(DAG, N, NewShift);
DAG.ReplaceAllUsesWith(N, NewShift);
AM.Scale = 1 << ShiftAmt;
@@ -917,7 +1024,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
// Note that this function assumes the mask is provided as a mask *after* the
// value is shifted. The input chain may or may not match that, but computing
// such a mask is trivial.
-static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
@@ -973,7 +1080,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
assert(X.getValueType() != VT);
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
- InsertDAGNode(DAG, N, NewX);
+ insertDAGNode(DAG, N, NewX);
X = NewX;
}
SDLoc DL(N);
@@ -987,10 +1094,10 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// these nodes. We continually insert before 'N' in sequence as this is
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
- InsertDAGNode(DAG, N, NewSRLAmt);
- InsertDAGNode(DAG, N, NewSRL);
- InsertDAGNode(DAG, N, NewSHLAmt);
- InsertDAGNode(DAG, N, NewSHL);
+ insertDAGNode(DAG, N, NewSRLAmt);
+ insertDAGNode(DAG, N, NewSRL);
+ insertDAGNode(DAG, N, NewSHLAmt);
+ insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
AM.Scale = 1 << AMShiftAmt;
@@ -998,7 +1105,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
return false;
}
-bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
DEBUG({
@@ -1007,7 +1114,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
});
// Limit recursion.
if (Depth > 5)
- return MatchAddressBase(N, AM);
+ return matchAddressBase(N, AM);
// If this is already a %rip relative address, we can only merge immediates
// into it. Instead of handling this in every case, we handle it here.
@@ -1020,7 +1127,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
return true;
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
- if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+ if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
return false;
return true;
}
@@ -1038,19 +1145,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
- if (!FoldOffsetIntoAddress(Val, AM))
+ if (!foldOffsetIntoAddress(Val, AM))
return false;
break;
}
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
- if (!MatchWrapper(N, AM))
+ if (!matchWrapper(N, AM))
return false;
break;
case ISD::LOAD:
- if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+ if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
return false;
break;
@@ -1087,7 +1194,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
- if (!FoldOffsetIntoAddress(Disp, AM))
+ if (!foldOffsetIntoAddress(Disp, AM))
return false;
}
@@ -1119,7 +1226,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Try to fold the mask and shift into the scale, and return false if we
// succeed.
- if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
+ if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
return false;
break;
}
@@ -1153,7 +1260,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
- if (FoldOffsetIntoAddress(Disp, AM))
+ if (foldOffsetIntoAddress(Disp, AM))
Reg = N.getNode()->getOperand(0);
} else {
Reg = N.getNode()->getOperand(0);
@@ -1179,7 +1286,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
- if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+ if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
@@ -1227,56 +1334,26 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
- InsertDAGNode(*CurDAG, N, Zero);
- InsertDAGNode(*CurDAG, N, Neg);
+ insertDAGNode(*CurDAG, N, Zero);
+ insertDAGNode(*CurDAG, N, Neg);
return false;
}
- case ISD::ADD: {
- // Add an artificial use to this node so that we can keep track of
- // it if it gets CSE'd with a different node.
- HandleSDNode Handle(N);
-
- X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
- !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
- return false;
- AM = Backup;
-
- // Try again after commuting the operands.
- if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
- !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ case ISD::ADD:
+ if (!matchAdd(N, AM, Depth))
return false;
- AM = Backup;
-
- // If we couldn't fold both operands into the address at the same time,
- // see if we can just put each operand into a register and fold at least
- // the add.
- if (AM.BaseType == X86ISelAddressMode::RegBase &&
- !AM.Base_Reg.getNode() &&
- !AM.IndexReg.getNode()) {
- N = Handle.getValue();
- AM.Base_Reg = N.getOperand(0);
- AM.IndexReg = N.getOperand(1);
- AM.Scale = 1;
- return false;
- }
- N = Handle.getValue();
break;
- }
case ISD::OR:
- // Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (CurDAG->isBaseWithConstantOffset(N)) {
- X86ISelAddressMode Backup = AM;
- ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
-
- // Start with the LHS as an addr mode.
- if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
- !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
- return false;
- AM = Backup;
- }
+ // We want to look through a transform in InstCombine and DAGCombiner that
+ // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
+ // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
+ // An 'lea' can then be used to match the shift (multiply) and add:
+ // and $1, %esi
+ // lea (%rsi, %rdi, 8), %rax
+ if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
+ !matchAdd(N, AM, Depth))
+ return false;
break;
case ISD::AND: {
@@ -1299,27 +1376,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
uint64_t Mask = N.getConstantOperandVal(1);
// Try to fold the mask and shift into an extract and scale.
- if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+ if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
return false;
// Try to fold the mask and shift directly into the scale.
- if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+ if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
return false;
// Try to swap the mask and shift to place shifts which can be done as
// a scale on the outside of the mask.
- if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
+ if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
return false;
break;
}
}
- return MatchAddressBase(N, AM);
+ return matchAddressBase(N, AM);
}
-/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
-bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
// If so, check to see if the scale index register is set.
@@ -1339,7 +1416,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
@@ -1362,7 +1439,7 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
// If Base is 0, the whole address is in index and the Scale is 1
if (isa<ConstantSDNode>(Base)) {
- assert(dyn_cast<ConstantSDNode>(Base)->isNullValue() &&
+ assert(cast<ConstantSDNode>(Base)->isNullValue() &&
"Unexpected base in gather/scatter");
Scale = getI8Imm(1, DL);
Base = CurDAG->getRegister(0, MVT::i32);
@@ -1375,14 +1452,14 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
-/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// Returns true if it is able to pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
///
/// Parent is the parent node of the addr operand that is being matched. It
/// is always a load, store, atomic node, or null. It is only null when
/// checking memory operands for inline asm nodes.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
@@ -1404,7 +1481,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
- if (MatchAddress(N, AM))
+ if (matchAddress(N, AM))
return false;
MVT VT = N.getSimpleValueType();
@@ -1420,14 +1497,14 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
-/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
-/// match a load whose top elements are either undef or zeros. The load flavor
-/// is derived from the type of N, which is either v4f32 or v2f64.
+/// Match a scalar SSE load. In particular, we want to match a load whose top
+/// elements are either undef or zeros. The load flavor is derived from the
+/// type of N, which is either v4f32 or v2f64.
///
/// We also return:
/// PatternChainNode: this is the matched node that has a chain input and
/// output.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
@@ -1439,7 +1516,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
- if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
return true;
}
@@ -1457,7 +1534,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
PatternNodeWithChain = SDValue(LD, 0);
return true;
@@ -1466,7 +1543,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
}
-bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
+bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CN->getZExtValue();
if ((uint32_t)ImmVal != (uint64_t)ImmVal)
@@ -1495,10 +1572,10 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
return TM.getCodeModel() == CodeModel::Small;
}
-bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
- if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
+ if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
return false;
SDLoc DL(N);
@@ -1533,9 +1610,9 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
return true;
}
-/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// Calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
@@ -1546,7 +1623,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
SDValue Copy = AM.Segment;
SDValue T = CurDAG->getRegister(0, MVT::i32);
AM.Segment = T;
- if (MatchAddress(N, AM))
+ if (matchAddress(N, AM))
return false;
assert (T == AM.Segment);
AM.Segment = Copy;
@@ -1572,13 +1649,12 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
Complexity++;
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
- // to a LEA. This is determined with some expermentation but is by no means
+ // to a LEA. This is determined with some experimentation but is by no means
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
if (AM.hasSymbolicDisplacement()) {
- // For X86-64, we should always use lea to materialize RIP relative
- // addresses.
+ // For X86-64, always use LEA to materialize RIP-relative addresses.
if (Subtarget->is64Bit())
Complexity = 4;
else
@@ -1596,8 +1672,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
return true;
}
-/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+/// This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1621,7 +1697,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
}
-bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
@@ -1630,14 +1706,13 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsLegalToFold(N, P, P, OptLevel))
return false;
- return SelectAddr(N.getNode(),
+ return selectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
-/// getGlobalBaseReg - Return an SDNode that returns the value of
-/// the global base register. Output instructions required to
-/// initialize the global base register, if necessary.
-///
+/// Return an SDNode that returns the value of the global base register.
+/// Output instructions required to initialize the global base register,
+/// if necessary.
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
auto &DL = MF->getDataLayout();
@@ -1828,7 +1903,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
return Val;
}
-SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
+SDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) {
if (Node->hasAnyUseOfValue(0))
return nullptr;
@@ -1841,7 +1916,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Base, Scale, Index, Disp, Segment;
- if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
+ if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
return nullptr;
// Which index into the table.
@@ -1933,9 +2008,9 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
return CurDAG->getMergeValues(RetVals, dl).getNode();
}
-/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
-/// any uses which require the SF or OF bits to be accurate.
-static bool HasNoSignedComparisonUses(SDNode *N) {
+/// Test whether the given X86ISD::CMP node has any uses which require the SF
+/// or OF bits to be accurate.
+static bool hasNoSignedComparisonUses(SDNode *N) {
// Examine each user of the node.
for (SDNode::use_iterator UI = N->use_begin(),
UE = N->use_end(); UI != UE; ++UI) {
@@ -1995,9 +2070,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
return true;
}
-/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
-/// is suitable for doing the {load; increment or decrement; store} to modify
-/// transformation.
+/// Check whether or not the chain ending in StoreNode is suitable for doing
+/// the {load; increment or decrement; store} to modify transformation.
static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue StoredVal, SelectionDAG *CurDAG,
LoadSDNode* &LoadNode, SDValue &InputChain) {
@@ -2081,8 +2155,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
return true;
}
-/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
-/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+/// Get the appropriate X86 opcode for an in-memory increment or decrement.
+/// Opc should be X86ISD::DEC or X86ISD::INC.
static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
if (Opc == X86ISD::DEC) {
if (LdVT == MVT::i64) return X86::DEC64m;
@@ -2099,9 +2173,8 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
-/// SelectGather - Customized ISel for GATHER operations.
-///
-SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+/// Customized ISel for GATHER operations.
+SDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) {
// Operands of Gather: VSrc, Base, VIdx, VMask, Scale
SDValue Chain = Node->getOperand(0);
SDValue VSrc = Node->getOperand(2);
@@ -2148,6 +2221,27 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::BRIND: {
+ if (Subtarget->isTargetNaCl())
+ // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
+ // leave the instruction alone.
+ break;
+ if (Subtarget->isTarget64BitILP32()) {
+ // Converts a 32-bit register to a 64-bit, zero-extended version of
+ // it. This is needed because x86-64 can do many things, but jmp %r32
+ // ain't one of them.
+ const SDValue &Target = Node->getOperand(1);
+ assert(Target.getSimpleValueType() == llvm::MVT::i32);
+ SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
+ SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
+ Node->getOperand(0), ZextTarget);
+ ReplaceUses(SDValue(Node, 0), Brind);
+ SelectCode(ZextTarget.getNode());
+ SelectCode(Brind.getNode());
+ return nullptr;
+ }
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
@@ -2190,7 +2284,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
}
- SDNode *RetVal = SelectGather(Node, Opc);
+ SDNode *RetVal = selectGather(Node, Opc);
if (RetVal)
// We already called ReplaceUses inside SelectGather.
return nullptr;
@@ -2217,7 +2311,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_ADD: {
- SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ SDNode *RetVal = selectAtomicLoadArith(Node, NVT);
if (RetVal)
return RetVal;
break;
@@ -2404,10 +2498,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
// Multiply is commmutative.
if (!foldedLoad) {
- foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (foldedLoad)
std::swap(N0, N1);
}
@@ -2549,7 +2643,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
SDValue InFlag;
@@ -2557,7 +2651,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
- if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
@@ -2692,7 +2786,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- HasNoSignedComparisonUses(Node))
+ hasNoSignedComparisonUses(Node))
N0 = N0.getOperand(0);
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -2709,7 +2803,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// For example, convert "testl %eax, $8" to "testb %al, $8"
if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
(!(C->getZExtValue() & 0x80) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Reg = N0.getNode()->getOperand(0);
@@ -2743,7 +2837,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// For example, "testl %eax, $2048" to "testb %ah, $8".
if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
(!(C->getZExtValue() & 0x8000) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
// Shift the immediate right by 8 bits.
SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
dl, MVT::i8);
@@ -2781,7 +2875,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
N0.getValueType() != MVT::i16 &&
(!(C->getZExtValue() & 0x8000) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
MVT::i16);
SDValue Reg = N0.getNode()->getOperand(0);
@@ -2804,7 +2898,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
N0.getValueType() == MVT::i64 &&
(!(C->getZExtValue() & 0x80000000) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
MVT::i32);
SDValue Reg = N0.getNode()->getOperand(0);
@@ -2854,7 +2948,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
SDValue Base, Scale, Index, Disp, Segment;
- if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+ if (!selectAddr(LoadNode, LoadNode->getBasePtr(),
Base, Scale, Index, Disp, Segment))
break;
@@ -2903,7 +2997,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
case InlineAsm::Constraint_v: // not offsetable ??
case InlineAsm::Constraint_m: // memory
case InlineAsm::Constraint_X:
- if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
+ if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
@@ -2916,9 +3010,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
return false;
}
-/// createX86ISelDag - This pass converts a legalized DAG into a
-/// X86-specific DAG, ready for instruction scheduling.
-///
+/// This pass converts a legalized DAG into a X86-specific DAG,
+/// ready for instruction scheduling.
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0f29b51..0927c2f 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -67,19 +68,14 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
"rather than promotion."),
cl::Hidden);
-// Forward declarations.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
- SDValue V2);
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- TD = TM.getDataLayout();
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the TargetLowering object.
- static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird. It always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
@@ -118,13 +114,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
-
- // The _ftol2 runtime function has an unusual calling conv, which
- // is modeled by a special pseudo-instruction.
- setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
}
if (Subtarget->isTargetDarwin()) {
@@ -175,14 +164,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
+ // f32/f64 are legal, f80 is custom.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ else
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!Subtarget->useSoftFloat()) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
- // FILD for other targets.
+ // FILD or VCVTUSI2SS/SD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
@@ -206,23 +199,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
}
- // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
- // are Legal, f80 is custom lowered.
- setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
- setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
-
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
- if (X86ScalarSSEf32) {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
- // f32 and f64 cases are Legal, f80 case is not
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ if (!Subtarget->useSoftFloat()) {
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
} else {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
@@ -232,8 +231,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+ // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ }
} else if (!Subtarget->useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
@@ -242,14 +247,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
+ // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
- }
- if (isTargetFTOL()) {
- // Use the _ftol2 runtime function, which has a pseudo-instruction
- // to handle its weird calling convention.
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
@@ -274,8 +276,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
- MVT VT = IntVTs[i];
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -295,6 +296,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BR_CC , MVT::f32, Expand);
setOperationAction(ISD::BR_CC , MVT::f64, Expand);
setOperationAction(ISD::BR_CC , MVT::f80, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f128, Expand);
setOperationAction(ISD::BR_CC , MVT::i8, Expand);
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
@@ -302,6 +304,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
@@ -312,7 +315,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
- setOperationAction(ISD::FREM , MVT::f32 , Expand);
+
+ if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
+ // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
+ // is. We should promote the value to 64-bits to solve this.
+ // This is what the CRT headers do - `fmodf` is an inline header
+ // function casting to f64 and calling `fmod`.
+ setOperationAction(ISD::FREM , MVT::f32 , Promote);
+ } else {
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ }
+
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
@@ -404,15 +417,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f128 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f128 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
@@ -456,8 +475,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// Expand certain atomics
- for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
- MVT VT = IntVTs[i];
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
@@ -473,13 +491,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
- if (Subtarget->is64Bit()) {
- setExceptionPointerRegister(X86::RAX);
- setExceptionSelectorRegister(X86::RDX);
- } else {
- setExceptionPointerRegister(X86::EAX);
- setExceptionSelectorRegister(X86::EDX);
- }
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
@@ -492,8 +503,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
- if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
- // TargetInfo::X86_64ABIBuiltinVaList
+ if (Subtarget->is64Bit()) {
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
@@ -505,7 +515,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
@@ -613,8 +623,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
- // Long double always uses X87.
+ // Long double always uses X87, except f128 in MMX.
if (!Subtarget->useSoftFloat()) {
+ if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::f128, &X86::FR128RegClass);
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ setOperationAction(ISD::FABS , MVT::f128, Custom);
+ setOperationAction(ISD::FNEG , MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+ }
+
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -846,15 +864,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
+ // ISD::CTTZ v2i64 - scalarization is faster.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
+
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(VT.getVectorNumElements()))
- continue;
- // Do not attempt to custom lower non-128-bit vectors
- if (!VT.is128BitVector())
- continue;
+ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -892,13 +912,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-128-bit vectors
- if (!VT.is128BitVector())
- continue;
-
+ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v2i64);
setOperationAction(ISD::OR, VT, Promote);
@@ -1036,6 +1050,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
+ if (Subtarget->hasXOP()) {
+ setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
+ setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::v2i64, Custom);
+ setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
+ setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
+ }
+
if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1126,7 +1151,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
- if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
+ setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
+
+ if (Subtarget->hasAnyFMA()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
@@ -1202,6 +1236,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
+ setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
+ setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
+ setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
+ setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
+ setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
+ setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
+ setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
+ setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
+ setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
+ setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
+ setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
}
// In the customized shift lowering, the legal cases in AVX2 will be
@@ -1243,15 +1290,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget->hasInt256())
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
-
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-256-bit vectors
- if (!VT.is256BitVector())
- continue;
-
+ for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v4i64);
setOperationAction(ISD::OR, VT, Promote);
@@ -1293,6 +1333,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::XOR, MVT::i1, Legal);
setOperationAction(ISD::OR, MVT::i1, Legal);
setOperationAction(ISD::AND, MVT::i1, Legal);
@@ -1311,6 +1352,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v16f32, Custom);
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
@@ -1318,19 +1360,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
- }
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
@@ -1348,12 +1381,62 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
+ if (Subtarget->hasVLX()){
+ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
+
+ setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
+ setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
+ setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+ } else {
+ setOperationAction(ISD::MLOAD, MVT::v8i32, Custom);
+ setOperationAction(ISD::MLOAD, MVT::v8f32, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v8i32, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v8f32, Custom);
+ }
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
if (Subtarget->hasDQI()) {
- setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ }
+ }
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
@@ -1386,7 +1469,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
@@ -1395,6 +1478,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
@@ -1439,9 +1523,49 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
if (Subtarget->hasCDI()) {
- setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
- }
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand);
+
+ setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
+
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ } else {
+ setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
+ }
+ } // Subtarget->hasCDI()
+
if (Subtarget->hasDQI()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
@@ -1455,7 +1579,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
}
- if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
+ if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
@@ -1481,15 +1605,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::MGATHER, VT, Legal);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
}
}
- for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-512-bit vectors.
- if (!VT.is512BitVector())
- continue;
-
+ for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
@@ -1515,22 +1635,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
@@ -1541,19 +1674,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
- for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
- const MVT VT = (MVT::SimpleValueType)i;
+ setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
+ setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
+ if (Subtarget->hasVLX())
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
- const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (Subtarget->hasCDI()) {
+ setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand);
+ }
- // Do not attempt to promote non-512-bit vectors.
- if (!VT.is512BitVector())
- continue;
+ for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
- if (EltSize < 32) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Legal);
- }
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v8i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v8i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
}
}
@@ -1571,6 +1716,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
@@ -1595,8 +1742,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- if (!Subtarget->is64Bit())
+ if (!Subtarget->is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ }
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -1604,9 +1753,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ if (VT == MVT::i64 && !Subtarget->is64Bit())
+ continue;
// Add/Sub/Mul with overflow operations are custom lowered.
- MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
@@ -1615,7 +1765,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, VT, Custom);
}
-
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
@@ -1658,12 +1807,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
+ setTargetDAGCombine(ISD::FMINNUM);
+ setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
@@ -1671,24 +1824,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(ISD::MSCATTER);
+ setTargetDAGCombine(ISD::MGATHER);
computeRegisterProperties(Subtarget->getRegisterInfo());
- // On Darwin, -Os means optimize for size without hurting performance,
- // do not reduce the limit.
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmoveOptSize = 4;
setPrefLoopAlignment(4); // 2^4 bytes.
- // Predictable cmov don't hurt on atom because it's in-order.
+ // A predictable cmov does not hurt on an in-order CPU.
+ // FIXME: Use a CPU attribute to trigger this, not a CPU model.
PredictableSelectIsExpensive = !Subtarget->isAtom();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
@@ -1716,40 +1869,43 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
- const unsigned NumElts = VT.getVectorNumElements();
- const EVT EltVT = VT.getVectorElementType();
- if (VT.is512BitVector()) {
- if (Subtarget->hasAVX512())
- if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
- EltVT == MVT::f32 || EltVT == MVT::f64)
- switch(NumElts) {
- case 8: return MVT::v8i1;
- case 16: return MVT::v16i1;
- }
- if (Subtarget->hasBWI())
- if (EltVT == MVT::i8 || EltVT == MVT::i16)
- switch(NumElts) {
- case 32: return MVT::v32i1;
- case 64: return MVT::v64i1;
- }
- }
+ if (VT.isSimple()) {
+ MVT VVT = VT.getSimpleVT();
+ const unsigned NumElts = VVT.getVectorNumElements();
+ const MVT EltVT = VVT.getVectorElementType();
+ if (VVT.is512BitVector()) {
+ if (Subtarget->hasAVX512())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+ if (Subtarget->hasBWI())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 32: return MVT::v32i1;
+ case 64: return MVT::v64i1;
+ }
+ }
- if (VT.is256BitVector() || VT.is128BitVector()) {
- if (Subtarget->hasVLX())
- if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
- EltVT == MVT::f32 || EltVT == MVT::f64)
- switch(NumElts) {
- case 2: return MVT::v2i1;
- case 4: return MVT::v4i1;
- case 8: return MVT::v8i1;
- }
- if (Subtarget->hasBWI() && Subtarget->hasVLX())
- if (EltVT == MVT::i8 || EltVT == MVT::i16)
- switch(NumElts) {
- case 8: return MVT::v8i1;
- case 16: return MVT::v16i1;
- case 32: return MVT::v32i1;
- }
+ if (VVT.is256BitVector() || VVT.is128BitVector()) {
+ if (Subtarget->hasVLX())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 2: return MVT::v2i1;
+ case 4: return MVT::v4i1;
+ case 8: return MVT::v8i1;
+ }
+ if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ case 32: return MVT::v32i1;
+ }
+ }
}
return VT.changeVectorElementTypeToInteger();
@@ -1769,9 +1925,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
- getMaxByValAlign(STy->getElementType(i), EltAlign);
+ getMaxByValAlign(EltTy, EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
@@ -1821,10 +1977,11 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
- (Subtarget->isUnalignedMemAccessFast() ||
+ (!Subtarget->isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
+ // FIXME: Check if unaligned 32-byte accesses are slow.
if (Subtarget->hasInt256())
return MVT::v8i32;
if (Subtarget->hasFp256())
@@ -1842,6 +1999,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::f64;
}
}
+ // This is a compromise. If we reach here, unaligned accesses may be slow on
+ // this target. However, creating smaller, aligned accesses could be even
+ // slower and would certainly be a lot more code.
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
@@ -1860,8 +2020,22 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
bool *Fast) const {
- if (Fast)
- *Fast = Subtarget->isUnalignedMemAccessFast();
+ if (Fast) {
+ switch (VT.getSizeInBits()) {
+ default:
+ // 8-byte and under are always assumed to be fast.
+ *Fast = true;
+ break;
+ case 128:
+ *Fast = !Subtarget->isUnalignedMem16Slow();
+ break;
+ case 256:
+ *Fast = !Subtarget->isUnalignedMem32Slow();
+ break;
+ // TODO: What about AVX-512 (512-bit) accesses?
+ }
+ }
+ // Misaligned accesses of any size are always allowed.
return true;
}
@@ -1964,6 +2138,32 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
return true;
}
+Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!Subtarget->isTargetAndroid())
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+
+ // Android provides a fixed TLS slot for the SafeStack pointer. See the
+ // definition of TLS_SLOT_SAFESTACK in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ unsigned AddressSpace, Offset;
+ if (Subtarget->is64Bit()) {
+ // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x48;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x24 on i386
+ Offset = 0x24;
+ AddressSpace = 256;
+ }
+
+ return ConstantExpr::getIntToPtr(
+ ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
+ Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
+}
+
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
@@ -1977,11 +2177,9 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
#include "X86GenCallingConv.inc"
-bool
-X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const {
+bool X86TargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
@@ -2001,6 +2199,9 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ if (CallConv == CallingConv::X86_INTR && !Outs.empty())
+ report_fatal_error("X86 interrupts may not return any value");
+
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -2025,7 +2226,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
else if (VA.getLocInfo() == CCValAssign::ZExt)
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::AExt) {
- if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
+ if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
else
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
@@ -2114,7 +2315,10 @@ X86TargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
+ X86ISD::NodeType opcode = X86ISD::RET_FLAG;
+ if (CallConv == CallingConv::X86_INTR)
+ opcode = X86ISD::IRET;
+ return DAG.getNode(opcode, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2193,7 +2397,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
EVT CopyVT = VA.getLocVT();
// If this is x86-64, and we disabled SSE, we can't return FP values
- if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -2244,28 +2448,28 @@ enum StructReturnType {
StackStructReturn
};
static StructReturnType
-callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
if (Outs.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
- if (Flags.isInReg())
+ if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
/// Determines whether a function uses struct return semantics.
static StructReturnType
-argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
if (Ins.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
- if (Flags.isInReg())
+ if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
@@ -2285,17 +2489,34 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
MachinePointerInfo(), MachinePointerInfo());
}
-/// Return true if the calling convention is one that
-/// supports tail call optimization.
-static bool IsTailCallConvention(CallingConv::ID CC) {
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
- CC == CallingConv::HiPE);
+ CC == CallingConv::HiPE || CC == CallingConv::HHVM);
}
-/// \brief Return true if the calling convention is a C calling convention.
-static bool IsCCallConvention(CallingConv::ID CC) {
- return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
- CC == CallingConv::X86_64_SysV);
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+ switch (CC) {
+ // C calling conventions:
+ case CallingConv::C:
+ case CallingConv::X86_64_Win64:
+ case CallingConv::X86_64_SysV:
+ // Callee pop conventions:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_VectorCall:
+ case CallingConv::X86_FastCall:
+ return true;
+ default:
+ return canGuaranteeTCO(CC);
+ }
+}
+
+/// Return true if the function is being made into a tailcall target by
+/// changing its ABI.
+static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
+ return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -2306,19 +2527,12 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
CallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
- if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
+ if (!mayTailCallThisCC(CalleeCC))
return false;
return true;
}
-/// Return true if the function is being made into
-/// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
- bool GuaranteedTailCallOpt) {
- return GuaranteedTailCallOpt && IsTailCallConvention(CC);
-}
-
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain,
CallingConv::ID CallConv,
@@ -2329,7 +2543,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
+ bool AlwaysUseMutable = shouldGuaranteeTCO(
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -2344,6 +2558,19 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
else
ValVT = VA.getValVT();
+ // Calculate SP offset of interrupt parameter, re-arrange the slot normally
+ // taken by a return address.
+ int Offset = 0;
+ if (CallConv == CallingConv::X86_INTR) {
+ const X86Subtarget& Subtarget =
+ static_cast<const X86Subtarget&>(DAG.getSubtarget());
+ // X86 interrupts may take one or two arguments.
+ // On the stack there will be no return address as in regular call.
+ // Offset of last argument need to be set to -4/-8 bytes.
+ // Where offset of the first argument out of two, should be set to 0 bytes.
+ Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
+ }
+
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
@@ -2352,14 +2579,24 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+ // Adjust SP offset of interrupt parameter.
+ if (CallConv == CallingConv::X86_INTR) {
+ MFI->setObjectOffset(FI, Offset);
+ }
return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
+ // Adjust SP offset of interrupt parameter.
+ if (CallConv == CallingConv::X86_INTR) {
+ MFI->setObjectOffset(FI, Offset);
+ }
+
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ SDValue Val = DAG.getLoad(
+ ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, false, 0);
return ExtendedInMem ?
DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
}
@@ -2413,15 +2650,10 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
}
-SDValue
-X86TargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
+SDValue X86TargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
@@ -2436,9 +2668,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
- assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
+ if (CallConv == CallingConv::X86_INTR) {
+ bool isLegal = Ins.size() == 1 ||
+ (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
+ (!Is64Bit && Ins[1].VT == MVT::i32)));
+ if (!isLegal)
+ report_fatal_error("X86 interrupts may take one or two arguments");
+ }
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
@@ -2471,6 +2711,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
+ else if (RegVT == MVT::f128)
+ RC = &X86::FR128RegClass;
else if (RegVT.is512BitVector())
RC = &X86::VR512RegClass;
else if (RegVT.is256BitVector())
@@ -2547,8 +2789,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
- if (FuncIsMadeTailCallSafe(CallConv,
- MF.getTarget().Options.GuaranteedTailCallOpt))
+ if (shouldGuaranteeTCO(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
@@ -2561,13 +2803,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
MFI->CreateFixedObject(1, StackSize, true));
}
- MachineModuleInfo &MMI = MF.getMMI();
- const Function *WinEHParent = nullptr;
- if (MMI.hasWinEHFuncInfo(Fn))
- WinEHParent = MMI.getWinEHParent(Fn);
- bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
- bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
-
// Figure out if XMM registers are in use.
assert(!(Subtarget->useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
@@ -2631,10 +2866,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
RSFIN, DAG.getIntPtrConstant(Offset, dl));
SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(
- FuncInfo->getRegSaveFrameIndex(), Offset),
- false, false, 0);
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
@@ -2656,27 +2892,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- } else if (IsWin64 && IsWinEHOutlined) {
- // Get to the caller-allocated home save location. Add 8 to account
- // for the return address.
- int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
- FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
- /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
-
- MMI.getWinEHFuncInfo(Fn)
- .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
- FuncInfo->getRegSaveFrameIndex();
-
- // Store the second integer parameter (rdx) into rsp+16 relative to the
- // stack pointer at the entry of the function.
- SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy(DAG.getDataLayout()));
- unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
- Chain = DAG.getStore(
- Val.getValue(1), dl, Val, RSFIN,
- MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
- /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
}
if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2723,12 +2938,15 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+ } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
+ // X86 interrupts must pop the error code if present
+ FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+ if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
- argsAreStructReturn(Ins) == StackStructReturn)
+ argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -2743,21 +2961,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setArgumentStackSize(StackSize);
- if (IsWinEHParent) {
- if (Is64Bit) {
- int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
- SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
- MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
- SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
- Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
- MachinePointerInfo::getFixedStack(UnwindHelpFI),
- /*isVolatile=*/true,
- /*isNonTemporal=*/false, /*Alignment=*/0);
- } else {
- // Functions using Win32 EH are considered to have opaque SP adjustments
- // to force local variables to be addressed from the frame or base
- // pointers.
- MFI->setHasOpaqueSPAdjustment(true);
+ if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
+ EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ if (Personality == EHPersonality::CoreCLR) {
+ assert(Is64Bit);
+ // TODO: Add a mechanism to frame lowering that will allow us to indicate
+ // that we'd prefer this slot be allocated towards the bottom of the frame
+ // (i.e. near the stack pointer after allocating the frame). Every
+ // funclet needs a copy of this slot in its (mostly empty) frame, and the
+ // offset from the bottom of this and each funclet's frame must be the
+ // same, so the size of funclets' (mostly empty) frames is dictated by
+ // how far this slot is from the bottom (since they allocate just enough
+ // space to accomodate holding this slot at the correct offset).
+ int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+ EHInfo->PSPSymFrameIdx = PSPSymFI;
}
}
@@ -2777,9 +2994,10 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- return DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ false, false, 0);
}
/// Emit a load of return address if tail call
@@ -2813,11 +3031,24 @@ static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
false);
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), NewReturnAddrFI),
false, false, 0);
return Chain;
}
+/// Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2835,11 +3066,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
- StructReturnType SR = callIsStructReturn(Outs);
+ StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+ if (CallConv == CallingConv::X86_INTR)
+ report_fatal_error("X86 interrupts may not be called directly");
+
if (Attr.getValueAsString() == "true")
isTailCall = false;
@@ -2878,7 +3112,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
++NumTailCalls;
}
- assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
@@ -2892,13 +3126,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- IsTailCallConvention(CallConv))
+ canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -2970,7 +3204,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
break;
case CCValAssign::AExt:
if (Arg.getValueType().isVector() &&
- Arg.getValueType().getScalarType() == MVT::i1)
+ Arg.getValueType().getVectorElementType() == MVT::i1)
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
else if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
@@ -2987,9 +3221,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, 0);
Arg = SpillSlot;
break;
}
@@ -3125,10 +3360,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Flags, DAG, dl));
} else {
// Store relative to framepointer.
- MemOpChains2.push_back(
- DAG.getStore(ArgChain, dl, Arg, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains2.push_back(DAG.getStore(
+ ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, 0));
}
}
@@ -3207,7 +3442,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ExtraLoad)
Callee = DAG.getLoad(
getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false,
+ false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
@@ -3261,9 +3497,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
- // If this is an invoke in a 32-bit function using an MSVC personality, assume
- // the function clobbers all registers. If an exception is thrown, the runtime
- // will not restore CSRs.
+ // If this is an invoke in a 32-bit function using a funclet-based
+ // personality, assume the function clobbers all registers. If an exception
+ // is thrown, the runtime will not restore CSRs.
// FIXME: Model this more precisely so that we can register allocate across
// the normal edge and spill and fill across the exceptional edge.
if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
@@ -3272,7 +3508,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallerFn->hasPersonalityFn()
? classifyEHPersonality(CallerFn->getPersonalityFn())
: EHPersonality::Unknown;
- if (isMSVCEHPersonality(Pers))
+ if (isFuncletEHPersonality(Pers))
Mask = RegInfo->getNoPreservedMask();
}
@@ -3300,7 +3536,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
- else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+ else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
@@ -3358,8 +3594,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// EDI
// local1 ..
-/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
-/// for a 16 byte align requirement.
+/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
+/// requirement.
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
@@ -3380,9 +3616,8 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
return Offset;
}
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
+/// Return true if the given stack call argument is already available in the
+/// same position (relatively) of the caller's incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
@@ -3435,25 +3670,19 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
}
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization. Targets which want to do tail call
-/// optimization should implement this function.
-bool
-X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG &DAG) const {
- if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
+/// Check whether the call is eligible for tail call optimization. Targets
+/// that want to do tail call optimization should implement this function.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+ bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+ if (!mayTailCallThisCC(CalleeCC))
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
- const MachineFunction &MF = DAG.getMachineFunction();
+ MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
// If the function return type is x86_fp80 and the callee return type is not,
@@ -3474,7 +3703,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
- if (IsTailCallConvention(CalleeCC) && CCMatch)
+ if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
}
@@ -3493,19 +3722,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // An stdcall/thiscall caller is expected to clean up its arguments; the
- // callee isn't going to do that.
- // FIXME: this is more restrictive than needed. We could produce a tailcall
- // when the stack adjustment matches. For example, with a thiscall that takes
- // only one argument.
- if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
- CallerCC == CallingConv::X86_ThisCall))
- return false;
-
// Do not sibcall optimize vararg calls unless all arguments are passed via
// registers.
if (isVarArg && !Outs.empty()) {
-
// Optimizing for varargs on Win64 is unlikely to be safe without
// additional testing.
if (IsCalleeWin64 || IsCallerWin64)
@@ -3573,6 +3792,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ unsigned StackArgsSize = 0;
+
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
@@ -3587,11 +3808,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
- if (CCInfo.getNextStackOffset()) {
- MachineFunction &MF = DAG.getMachineFunction();
- if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
- return false;
+ StackArgsSize = CCInfo.getNextStackOffset();
+ if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3642,6 +3861,21 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ bool CalleeWillPop =
+ X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt);
+
+ if (unsigned BytesToPop =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
+ // If we have bytes to pop, the callee must pop them.
+ bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
+ if (!CalleePopMatches)
+ return false;
+ } else if (CalleeWillPop && StackArgsSize > 0) {
+ // If we don't have bytes to pop, make sure the callee doesn't pop any.
+ return false;
+ }
+
return true;
}
@@ -3688,11 +3922,13 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
+ case X86ISD::VPERMV:
+ case X86ISD::VPERMV3:
return true;
}
}
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue V1, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
@@ -3707,7 +3943,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
}
}
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
@@ -3772,23 +4008,23 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
return false;
}
-/// isCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
+/// Determines whether the callee is required to pop its own arguments.
+/// Callee pop is necessary to support tail calls.
bool X86::isCalleePop(CallingConv::ID CallingConv,
- bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+ bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
+ // If GuaranteeTCO is true, we force some calls to be callee pop so that we
+ // can guarantee TCO.
+ if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
+ return true;
+
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
+ case CallingConv::X86_VectorCall:
return !is64Bit;
- case CallingConv::Fast:
- case CallingConv::GHC:
- case CallingConv::HiPE:
- if (IsVarArg)
- return false;
- return TailCallOpt;
}
}
@@ -3807,11 +4043,26 @@ static bool isX86CCUnsigned(unsigned X86CC) {
case X86::COND_BE: return true;
case X86::COND_AE: return true;
}
- llvm_unreachable("covered switch fell through?!");
}
-/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
-/// specific condition code, returning the condition code and the LHS/RHS of the
+static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+}
+
+/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
+/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
@@ -3833,19 +4084,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
}
}
- switch (SetCCOpcode) {
- default: llvm_unreachable("Invalid integer condition!");
- case ISD::SETEQ: return X86::COND_E;
- case ISD::SETGT: return X86::COND_G;
- case ISD::SETGE: return X86::COND_GE;
- case ISD::SETLT: return X86::COND_L;
- case ISD::SETLE: return X86::COND_LE;
- case ISD::SETNE: return X86::COND_NE;
- case ISD::SETULT: return X86::COND_B;
- case ISD::SETUGT: return X86::COND_A;
- case ISD::SETULE: return X86::COND_BE;
- case ISD::SETUGE: return X86::COND_AE;
- }
+ return TranslateIntegerX86CC(SetCCOpcode);
}
// First determine if it is required or is profitable to flip the operands.
@@ -3898,8 +4137,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
}
}
-/// hasFPCMov - is there a floating point cmov for the specific X86 condition
-/// code. Current x86 isa includes the following FP cmov instructions:
+/// Is there a floating point cmov for the specific X86 condition code?
+/// Current x86 isa includes the following FP cmov instructions:
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
static bool hasFPCMov(unsigned X86CC) {
switch (X86CC) {
@@ -3917,7 +4156,7 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
-/// isFPImmLegal - Returns true if the target can instruction select the
+/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@@ -3970,7 +4209,7 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasLZCNT();
}
-/// isUndefInRange - Return true if every element in Mask, beginning
+/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size is undef.
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
@@ -3979,19 +4218,18 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
return true;
}
-/// isUndefOrInRange - Return true if Val is undef or if its value falls within
-/// the specified range (L, H].
+/// Return true if Val is undef or if its value falls within the
+/// specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
-/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
-/// specified value.
+/// Val is either less than zero (undef) or equal to the specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
return (Val < 0 || Val == CmpVal);
}
-/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
+/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (Low, Low+Size]. or is undef.
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
@@ -4002,9 +4240,8 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
return true;
}
-/// isVEXTRACTIndex - Return true if the specified
-/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
-/// suitable for instruction that extract 128 or 256 bit vectors
+/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
+/// extract that is suitable for instruction that extract 128 or 256 bit vectors
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
@@ -4021,7 +4258,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
return Result;
}
-/// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
+/// Return true if the specified INSERT_SUBVECTOR
/// operand specifies a subvector insert that is suitable for input to
/// insertion of 128 or 256-bit subvectors
static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
@@ -4057,8 +4294,8 @@ bool X86::isVEXTRACT256Index(SDNode *N) {
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
- if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
- llvm_unreachable("Illegal extract subvector for VEXTRACT");
+ assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
+ "Illegal extract subvector for VEXTRACT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
@@ -4072,8 +4309,8 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
- if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
- llvm_unreachable("Illegal insert subvector for VINSERT");
+ assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
+ "Illegal insert subvector for VINSERT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
@@ -4085,53 +4322,71 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
return Index / NumElemsPerChunk;
}
-/// getExtractVEXTRACT128Immediate - Return the appropriate immediate
-/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
-/// and VINSERTI128 instructions.
+/// Return the appropriate immediate to extract the specified
+/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 128);
}
-/// getExtractVEXTRACT256Immediate - Return the appropriate immediate
-/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
-/// and VINSERTI64x4 instructions.
+/// Return the appropriate immediate to extract the specified
+/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 256);
}
-/// getInsertVINSERT128Immediate - Return the appropriate immediate
-/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
-/// and VINSERTI128 instructions.
+/// Return the appropriate immediate to insert at the specified
+/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 128);
}
-/// getInsertVINSERT256Immediate - Return the appropriate immediate
-/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
-/// and VINSERTI64x4 instructions.
+/// Return the appropriate immediate to insert at the specified
+/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
-/// isZero - Returns true if Elt is a constant integer zero
-static bool isZero(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isNullValue();
-}
-
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
+/// Returns true if Elt is a constant zero or a floating point constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- if (isZero(Elt))
- return true;
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
- return CFP->getValueAPF().isPosZero();
- return false;
+ return isNullConstant(Elt) || isNullFPConstant(Elt);
}
-/// getZeroVector - Returns a vector of specified type with all zero elements.
-///
-static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
+// Build a vector of constants
+// Use an UNDEF node if MaskElt == -1.
+// Spilt 64-bit constants in the 32-bit mode.
+static SDValue getConstVector(ArrayRef<int> Values, MVT VT,
+ SelectionDAG &DAG,
+ SDLoc dl, bool IsMask = false) {
+
+ SmallVector<SDValue, 32> Ops;
+ bool Split = false;
+
+ MVT ConstVecVT = VT;
+ unsigned NumElts = VT.getVectorNumElements();
+ bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
+ if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
+ ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
+ Split = true;
+ }
+
+ MVT EltVT = ConstVecVT.getVectorElementType();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ bool IsUndef = Values[i] < 0 && IsMask;
+ SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
+ DAG.getConstant(Values[i], dl, EltVT);
+ Ops.push_back(OpNode);
+ if (Split)
+ Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
+ DAG.getConstant(0, dl, EltVT));
+ }
+ SDValue ConstsNode = DAG.getNode(ISD::BUILD_VECTOR, dl, ConstVecVT, Ops);
+ if (Split)
+ ConstsNode = DAG.getBitcast(VT, ConstsNode);
+ return ConstsNode;
+}
+
+/// Returns a vector of specified type with all zero elements.
+static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
@@ -4163,7 +4418,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
- } else if (VT.getScalarType() == MVT::i1) {
+ } else if (VT.getVectorElementType() == MVT::i1) {
assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
&& "Unexpected vector type");
@@ -4195,19 +4450,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
- * ElemsPerChunk);
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
- ElemsPerChunk));
+ makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
@@ -4245,13 +4499,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
// Insert the relevant vectorWidth bits.
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
- * ElemsPerChunk);
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
@@ -4279,7 +4533,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
Vec, ZeroIndex);
// The blend instruction, and therefore its mask, depend on the data type.
- MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+ MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT();
if (ScalarType.isFloatingPoint()) {
// Choose either vblendps (float) or vblendpd (double).
unsigned ScalarSize = ScalarType.getSizeInBits();
@@ -4316,6 +4570,81 @@ static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
+/// Insert i1-subvector to i1-vector.
+static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) {
+
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue SubVec = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
+ return Op;
+
+ MVT OpVT = Op.getSimpleValueType();
+ MVT SubVecVT = SubVec.getSimpleValueType();
+ unsigned NumElems = OpVT.getVectorNumElements();
+ unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
+
+ assert(IdxVal + SubVecNumElems <= NumElems &&
+ IdxVal % SubVecVT.getSizeInBits() == 0 &&
+ "Unexpected index value in INSERT_SUBVECTOR");
+
+ // There are 3 possible cases:
+ // 1. Subvector should be inserted in the lower part (IdxVal == 0)
+ // 2. Subvector should be inserted in the upper part
+ // (IdxVal + SubVecNumElems == NumElems)
+ // 3. Subvector should be inserted in the middle (for example v2i1
+ // to v16i1, index 2)
+
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+ SDValue Undef = DAG.getUNDEF(OpVT);
+ SDValue WideSubVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx);
+ if (Vec.isUndef())
+ return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+
+ if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
+ unsigned ShiftLeft = NumElems - SubVecNumElems;
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+ WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec,
+ DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec;
+ }
+
+ if (IdxVal == 0) {
+ // Zero lower bits of the Vec
+ SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ // Merge them together
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
+ }
+
+ // Simple case when we put subvector in the upper part
+ if (IdxVal + SubVecNumElems == NumElems) {
+ // Zero upper bits of the Vec
+ WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
+ }
+ // Subvector should be inserted in the middle - use shuffle
+ SmallVector<int, 64> Mask;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
+ i : i + NumElems);
+ return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
+}
+
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
/// instructions. This is used because creating CONCAT_VECTOR nodes of
/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
@@ -4334,18 +4663,22 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
-/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
- SDLoc dl) {
+static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
SDValue Vec;
- if (VT.is256BitVector()) {
- if (HasInt256) { // AVX2
+ if (VT.is512BitVector()) {
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
+ } else if (VT.is256BitVector()) {
+ if (Subtarget->hasInt256()) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else { // AVX
@@ -4360,19 +4693,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
return DAG.getBitcast(VT, Vec);
}
-/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
-/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
- SDValue V2) {
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> Mask;
- Mask.push_back(NumElems);
- for (unsigned i = 1; i != NumElems; ++i)
- Mask.push_back(i);
- return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
-}
-
-/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4384,7 +4705,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
+/// Returns a vector_shuffle node for an unpackh operation.
static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4396,10 +4717,10 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector. This produces a shuffle where the low
-/// element of V2 is swizzled into the zero/undef vector, landing at element
-/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+/// Return a vector_shuffle of the specified vector of zero or undef vector.
+/// This produces a shuffle where the low element of V2 is swizzled into the
+/// zero/undef vector, landing at element Idx.
+/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
const X86Subtarget *Subtarget,
@@ -4415,10 +4736,10 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
}
-/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
-/// target specific opcode. Returns true if the Mask could be calculated. Sets
-/// IsUnary to true if only uses one source. Note that this will set IsUnary for
-/// shuffles which use a single input multiple times, and in those cases it will
+/// Calculates the shuffle mask corresponding to the target-specific opcode.
+/// Returns true if the Mask could be calculated. Sets IsUnary to true if only
+/// uses one source. Note that this will set IsUnary for shuffles which use a
+/// single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
@@ -4482,7 +4803,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
// If we have a build-vector, then things are easy.
- EVT VT = MaskNode.getValueType();
+ MVT VT = MaskNode.getSimpleValueType();
assert(VT.isVector() &&
"Can't produce a non-vector with a build_vector!");
if (!VT.isInteger())
@@ -4572,6 +4893,119 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLPS:
// Not yet implemented
return false;
+ case X86ISD::VPERMV: {
+ IsUnary = true;
+ SDValue MaskNode = N->getOperand(0);
+ while (MaskNode->getOpcode() == ISD::BITCAST)
+ MaskNode = MaskNode->getOperand(0);
+
+ unsigned MaskLoBits = Log2_64(VT.getVectorNumElements());
+ SmallVector<uint64_t, 32> RawMask;
+ if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+ // If we have a build-vector, then things are easy.
+ assert(MaskNode.getSimpleValueType().isInteger() &&
+ MaskNode.getSimpleValueType().getVectorNumElements() ==
+ VT.getVectorNumElements());
+
+ for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF)
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ else if (isa<ConstantSDNode>(Op)) {
+ APInt MaskElement = cast<ConstantSDNode>(Op)->getAPIntValue();
+ RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
+ } else
+ return false;
+ }
+ DecodeVPERMVMask(RawMask, Mask);
+ break;
+ }
+ if (MaskNode->getOpcode() == X86ISD::VBROADCAST) {
+ unsigned NumEltsInMask = MaskNode->getNumOperands();
+ MaskNode = MaskNode->getOperand(0);
+ if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode)) {
+ APInt MaskEltValue = CN->getAPIntValue();
+ for (unsigned i = 0; i < NumEltsInMask; ++i)
+ RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue());
+ DecodeVPERMVMask(RawMask, Mask);
+ break;
+ }
+ // It may be a scalar load
+ }
+
+ auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+ if (!MaskLoad)
+ return false;
+
+ SDValue Ptr = MaskLoad->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper ||
+ Ptr->getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr->getOperand(0);
+
+ auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+ return false;
+
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+ DecodeVPERMVMask(C, VT, Mask);
+ if (Mask.empty())
+ return false;
+ break;
+ }
+ return false;
+ }
+ case X86ISD::VPERMV3: {
+ IsUnary = false;
+ SDValue MaskNode = N->getOperand(1);
+ while (MaskNode->getOpcode() == ISD::BITCAST)
+ MaskNode = MaskNode->getOperand(1);
+
+ if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+ // If we have a build-vector, then things are easy.
+ assert(MaskNode.getSimpleValueType().isInteger() &&
+ MaskNode.getSimpleValueType().getVectorNumElements() ==
+ VT.getVectorNumElements());
+
+ SmallVector<uint64_t, 32> RawMask;
+ unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()*2);
+
+ for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF)
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ else {
+ auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
+ if (!CN)
+ return false;
+ APInt MaskElement = CN->getAPIntValue();
+ RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
+ }
+ }
+ DecodeVPERMV3Mask(RawMask, Mask);
+ break;
+ }
+
+ auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+ if (!MaskLoad)
+ return false;
+
+ SDValue Ptr = MaskLoad->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper ||
+ Ptr->getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr->getOperand(0);
+
+ auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+ return false;
+
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+ DecodeVPERMV3Mask(C, VT, Mask);
+ if (Mask.empty())
+ return false;
+ break;
+ }
+ return false;
+ }
default: llvm_unreachable("unknown target shuffle node");
}
@@ -4586,7 +5020,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
return true;
}
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
@@ -4650,8 +5084,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
return SDValue();
}
-/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
-///
+/// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
@@ -4721,8 +5154,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
return DAG.getBitcast(MVT::v16i8, V);
}
-/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
-///
+/// Custom lower build_vector of v8i16.
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
@@ -4753,7 +5185,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return V;
}
-/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
+/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
@@ -4924,7 +5356,7 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
return SDValue();
if ((Offset % RequiredAlign) & 3)
return SDValue();
- int64_t StartOffset = Offset & ~(RequiredAlign-1);
+ int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
if (StartOffset) {
SDLoc DL(Ptr);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -5157,8 +5589,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
// TODO: If multiple splats are generated to load the same constant,
// it may be detrimental to overall size. There needs to be a way to detect
// that condition to know if this is truly a size win.
- const Function *F = DAG.getMachineFunction().getFunction();
- bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
// Handle broadcasting a single constant scalar from the constant pool
// into a vector.
@@ -5188,9 +5619,10 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
SDValue CP =
DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ Ld = DAG.getLoad(
+ CVT, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
@@ -5329,7 +5761,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
return NV;
}
-static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
+static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector");
@@ -5366,7 +5798,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
}
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
+ SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getBitcast(VT, Imm);
SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
@@ -5600,7 +6032,7 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
/// node.
static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- EVT VT = BV->getValueType(0);
+ MVT VT = BV->getSimpleValueType(0);
if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
@@ -5662,12 +6094,12 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
// Update InVec0 and InVec1.
if (InVec0.getOpcode() == ISD::UNDEF) {
InVec0 = Op0.getOperand(0);
- if (InVec0.getValueType() != VT)
+ if (InVec0.getSimpleValueType() != VT)
return SDValue();
}
if (InVec1.getOpcode() == ISD::UNDEF) {
InVec1 = Op1.getOperand(0);
- if (InVec1.getValueType() != VT)
+ if (InVec1.getSimpleValueType() != VT)
return SDValue();
}
@@ -5703,7 +6135,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = BV->getValueType(0);
+ MVT VT = BV->getSimpleValueType(0);
unsigned NumElts = VT.getVectorNumElements();
unsigned NumUndefsLO = 0;
unsigned NumUndefsHI = 0;
@@ -5845,7 +6277,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned NumElems = Op.getNumOperands();
// Generate vectors for predicate vectors.
- if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG);
// Vectors containing all zeros can be matched by pxor and xorps later
@@ -5866,7 +6298,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return Op;
if (!VT.is512BitVector())
- return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ return getOnesVector(VT, Subtarget, DAG, dl);
}
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
@@ -5881,7 +6313,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned NumZero = 0;
unsigned NumNonZero = 0;
- unsigned NonZeros = 0;
+ uint64_t NonZeros = 0;
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
@@ -5895,7 +6327,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (X86::isZeroNode(Elt))
NumZero++;
else {
- NonZeros |= (1 << i);
+ assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
+ NonZeros |= ((uint64_t)1 << i);
NumNonZero++;
}
}
@@ -5919,7 +6352,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle SSE only.
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
- EVT VecVT = MVT::v4i32;
+ MVT VecVT = MVT::v4i32;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -6051,7 +6484,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// One half is zero or undef.
unsigned Idx = countTrailingZeros(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
- Op.getOperand(Idx));
+ Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
@@ -6059,13 +6492,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
- if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this))
+ if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
+ DAG, Subtarget, *this))
return V;
if (EVTBits == 16 && NumElems == 8)
- if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this))
+ if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
+ DAG, Subtarget, *this))
return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
@@ -6077,7 +6510,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SmallVector<SDValue, 8> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
- bool isZero = !(NonZeros & (1 << i));
+ bool isZero = !(NonZeros & (1ULL << i));
if (isZero)
V[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
@@ -6177,7 +6610,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -6193,8 +6626,8 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
if (Op.getNumOperands() == 4) {
- MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
- ResVT.getVectorNumElements()/2);
+ MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+ ResVT.getVectorNumElements()/2);
SDValue V3 = Op.getOperand(2);
SDValue V4 = Op.getOperand(3);
return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
@@ -6213,8 +6646,27 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(isPowerOf2_32(NumOfOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
+ SDValue Undef = DAG.getUNDEF(ResVT);
if (NumOfOperands > 2) {
- MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+ // Specialize the cases when all, or all but one, of the operands are undef.
+ unsigned NumOfDefinedOps = 0;
+ unsigned OpIdx = 0;
+ for (unsigned i = 0; i < NumOfOperands; i++)
+ if (!Op.getOperand(i).isUndef()) {
+ NumOfDefinedOps++;
+ OpIdx = i;
+ }
+ if (NumOfDefinedOps == 0)
+ return Undef;
+ if (NumOfDefinedOps == 1) {
+ unsigned SubVecNumElts =
+ Op.getOperand(OpIdx).getValueType().getVectorNumElements();
+ SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef,
+ Op.getOperand(OpIdx), IdxVal);
+ }
+
+ MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
ResVT.getVectorNumElements()/2);
SmallVector<SDValue, 2> Ops;
for (unsigned i = 0; i < NumOfOperands/2; i++)
@@ -6227,31 +6679,38 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
+ // 2 operands
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = ResVT.getVectorNumElements();
+ assert(V1.getValueType() == V2.getValueType() &&
+ V1.getValueType().getVectorNumElements() == NumElems/2 &&
+ "Unexpected operands in CONCAT_VECTORS");
+
+ if (ResVT.getSizeInBits() >= 16)
+ return Op; // The operation is legal with KUNPCK
+
bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
-
+ SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl);
if (IsZeroV1 && IsZeroV2)
- return getZeroVector(ResVT, Subtarget, DAG, dl);
+ return ZeroVec;
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
- SDValue Undef = DAG.getUNDEF(ResVT);
- unsigned NumElems = ResVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
+ if (V2.isUndef())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+ if (IsZeroV2)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx);
+
+ SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
+ if (V1.isUndef())
+ V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
- V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx);
- V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits);
if (IsZeroV1)
- return V2;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
- // Zero the upper bits of V1
- V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits);
- V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits);
- if (IsZeroV2)
- return V1;
- return DAG.getNode(ISD::OR, dl, ResVT, V1, V2);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal);
}
static SDValue LowerCONCAT_VECTORS(SDValue Op,
@@ -6272,7 +6731,6 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op,
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
-
//===----------------------------------------------------------------------===//
// Vector shuffle lowering
//
@@ -6422,6 +6880,127 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
return DAG.getConstant(Imm, DL, MVT::i8);
}
+/// \brief Compute whether each element of a shuffle is zeroable.
+///
+/// A "zeroable" vector shuffle element is one which can be lowered to zero.
+/// Either it is an undef element in the shuffle mask, the element of the input
+/// referenced is undef, or the element of the input referenced is known to be
+/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
+/// as many lanes with this technique as possible to simplify the remaining
+/// shuffle.
+static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ SmallBitVector Zeroable(Mask.size(), false);
+
+ while (V1.getOpcode() == ISD::BITCAST)
+ V1 = V1->getOperand(0);
+ while (V2.getOpcode() == ISD::BITCAST)
+ V2 = V2->getOperand(0);
+
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
+ // Handle the easy cases.
+ if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+ Zeroable[i] = true;
+ continue;
+ }
+
+ // If this is an index into a build_vector node (which has the same number
+ // of elements), dig out the input value and use it.
+ SDValue V = M < Size ? V1 : V2;
+ if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
+ continue;
+
+ SDValue Input = V.getOperand(M % Size);
+ // The UNDEF opcode check really should be dead code here, but not quite
+ // worth asserting on (it isn't invalid, just unexpected).
+ if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
+ Zeroable[i] = true;
+ }
+
+ return Zeroable;
+}
+
+// X86 has dedicated unpack instructions that can handle specific blend
+// operations: UNPCKH and UNPCKL.
+static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+ int NumElts = VT.getVectorNumElements();
+ int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+ SmallVector<int, 8> Unpckl;
+ SmallVector<int, 8> Unpckh;
+
+ for (int i = 0; i < NumElts; ++i) {
+ unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+ int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2);
+ int HiPos = LoPos + NumEltsInLane / 2;
+ Unpckl.push_back(LoPos);
+ Unpckh.push_back(HiPos);
+ }
+
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+
+ // Commute and try again.
+ ShuffleVectorSDNode::commuteMask(Unpckl);
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
+
+ ShuffleVectorSDNode::commuteMask(Unpckh);
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
+
+ return SDValue();
+}
+
+/// \brief Try to emit a bitmask instruction for a shuffle.
+///
+/// This handles cases where we can model a blend exactly as a bitmask due to
+/// one of the inputs being zeroable.
+static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ MVT EltVT = VT.getVectorElementType();
+ int NumEltBits = EltVT.getSizeInBits();
+ MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
+ SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
+ SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
+ IntEltVT);
+ if (EltVT.isFloatingPoint()) {
+ Zero = DAG.getBitcast(EltVT, Zero);
+ AllOnes = DAG.getBitcast(EltVT, AllOnes);
+ }
+ SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ SDValue V;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Zeroable[i])
+ continue;
+ if (Mask[i] % Size != i)
+ return SDValue(); // Not a blend.
+ if (!V)
+ V = Mask[i] < Size ? V1 : V2;
+ else if (V != (Mask[i] < Size ? V1 : V2))
+ return SDValue(); // Can only let one input through the mask.
+
+ VMaskOps[i] = AllOnes;
+ }
+ if (!V)
+ return SDValue(); // No non-zeroable elements!
+
+ SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
+ V = DAG.getNode(VT.isFloatingPoint()
+ ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
+ DL, VT, V, VMask);
+ return V;
+}
+
/// \brief Try to emit a blend instruction for a shuffle using bit math.
///
/// This is used as a fallback approach when first class blend instructions are
@@ -6431,7 +7010,7 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(VT.isInteger() && "Only supports integer vector types!");
- MVT EltVT = VT.getScalarType();
+ MVT EltVT = VT.getVectorElementType();
int NumEltBits = EltVT.getSizeInBits();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
@@ -6458,22 +7037,62 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
/// This doesn't do any checks for the availability of instructions for blending
/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
/// be matched in the backend with the type given. What it does check for is
-/// that the shuffle mask is in fact a blend.
+/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
+ SDValue V2, ArrayRef<int> Original,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+ SmallVector<int, 8> Mask(Original.begin(), Original.end());
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ bool ForceV1Zero = false, ForceV2Zero = false;
+
+ // Attempt to generate the binary blend mask. If an input is zero then
+ // we can use any lane.
+ // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
unsigned BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Mask[i] >= Size) {
- if (Mask[i] != i + Size)
- return SDValue(); // Shuffled V2 input!
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ if (M == i)
+ continue;
+ if (M == i + Size) {
BlendMask |= 1u << i;
continue;
}
- if (Mask[i] >= 0 && Mask[i] != i)
- return SDValue(); // Shuffled V1 input!
+ if (Zeroable[i]) {
+ if (V1IsZero) {
+ ForceV1Zero = true;
+ Mask[i] = i;
+ continue;
+ }
+ if (V2IsZero) {
+ ForceV2Zero = true;
+ BlendMask |= 1u << i;
+ Mask[i] = i + Size;
+ continue;
+ }
+ }
+ return SDValue(); // Shuffled input!
}
+
+ // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
+ if (ForceV1Zero)
+ V1 = getZeroVector(VT, Subtarget, DAG, DL);
+ if (ForceV2Zero)
+ V2 = getZeroVector(VT, Subtarget, DAG, DL);
+
+ auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) {
+ unsigned ScaledMask = 0;
+ for (int i = 0; i != Size; ++i)
+ if (BlendMask & (1u << i))
+ for (int j = 0; j != Scale; ++j)
+ ScaledMask |= 1u << (i * Scale + j);
+ return ScaledMask;
+ };
+
switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
@@ -6493,12 +7112,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
if (Subtarget->hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
- BlendMask = 0;
- for (int i = 0, Size = Mask.size(); i < Size; ++i)
- if (Mask[i] >= Size)
- for (int j = 0; j < Scale; ++j)
- BlendMask |= 1u << (i * Scale + j);
-
+ BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
@@ -6511,12 +7125,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
// For integer shuffles we need to expand the mask and cast the inputs to
// v8i16s prior to blending.
int Scale = 8 / VT.getVectorNumElements();
- BlendMask = 0;
- for (int i = 0, Size = Mask.size(); i < Size; ++i)
- if (Mask[i] >= Size)
- for (int j = 0; j < Scale; ++j)
- BlendMask |= 1u << (i * Scale + j);
-
+ BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = DAG.getBitcast(MVT::v8i16, V2);
return DAG.getBitcast(VT,
@@ -6541,9 +7150,13 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
// FALLTHROUGH
case MVT::v16i8:
case MVT::v32i8: {
- assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+ assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+ if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG))
+ return Masked;
+
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
@@ -6760,11 +7373,11 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
Hi = DAG.getBitcast(AlignVT, Hi);
return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi,
DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
}
- assert(VT.getSizeInBits() == 128 &&
+ assert(VT.is128BitVector() &&
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
@@ -6785,92 +7398,6 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
}
-/// \brief Compute whether each element of a shuffle is zeroable.
-///
-/// A "zeroable" vector shuffle element is one which can be lowered to zero.
-/// Either it is an undef element in the shuffle mask, the element of the input
-/// referenced is undef, or the element of the input referenced is known to be
-/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
-/// as many lanes with this technique as possible to simplify the remaining
-/// shuffle.
-static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- SmallBitVector Zeroable(Mask.size(), false);
-
- while (V1.getOpcode() == ISD::BITCAST)
- V1 = V1->getOperand(0);
- while (V2.getOpcode() == ISD::BITCAST)
- V2 = V2->getOperand(0);
-
- bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
- bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
-
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- int M = Mask[i];
- // Handle the easy cases.
- if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
- Zeroable[i] = true;
- continue;
- }
-
- // If this is an index into a build_vector node (which has the same number
- // of elements), dig out the input value and use it.
- SDValue V = M < Size ? V1 : V2;
- if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
- continue;
-
- SDValue Input = V.getOperand(M % Size);
- // The UNDEF opcode check really should be dead code here, but not quite
- // worth asserting on (it isn't invalid, just unexpected).
- if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
- Zeroable[i] = true;
- }
-
- return Zeroable;
-}
-
-/// \brief Try to emit a bitmask instruction for a shuffle.
-///
-/// This handles cases where we can model a blend exactly as a bitmask due to
-/// one of the inputs being zeroable.
-static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
- MVT EltVT = VT.getScalarType();
- int NumEltBits = EltVT.getSizeInBits();
- MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
- SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
- SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
- IntEltVT);
- if (EltVT.isFloatingPoint()) {
- Zero = DAG.getBitcast(EltVT, Zero);
- AllOnes = DAG.getBitcast(EltVT, AllOnes);
- }
- SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- SDValue V;
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Zeroable[i])
- continue;
- if (Mask[i] % Size != i)
- return SDValue(); // Not a blend.
- if (!V)
- V = Mask[i] < Size ? V1 : V2;
- else if (V != (Mask[i] < Size ? V1 : V2))
- return SDValue(); // Can only let one input through the mask.
-
- VMaskOps[i] = AllOnes;
- }
- if (!V)
- return SDValue(); // No non-zeroable elements!
-
- SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
- V = DAG.getNode(VT.isFloatingPoint()
- ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
- DL, VT, V, VMask);
- return V;
-}
-
/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -6982,7 +7509,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
// Determine the extraction length from the part of the
// lower half that isn't zeroable.
int Len = HalfSize;
- for (; Len >= 0; --Len)
+ for (; Len > 0; --Len)
if (!Zeroable[Len - 1])
break;
assert(Len > 0 && "Zeroable shuffle mask");
@@ -6997,8 +7524,9 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
SDValue &V = (M < Size ? V1 : V2);
M = M % Size;
- // All mask elements must be in the lower half.
- if (M > HalfSize)
+ // The extracted elements must start at a valid index and all mask
+ // elements must be in the lower half.
+ if (i > M || M >= HalfSize)
return SDValue();
if (Idx < 0 || (Src == V && Idx == (M - i))) {
@@ -7095,64 +7623,104 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
///
/// Given a specific number of elements, element bit width, and extension
/// stride, produce either a zero or any extension based on the available
-/// features of the subtarget.
+/// features of the subtarget. The extended elements are consecutive and
+/// begin and can start from an offseted element index in the input; to
+/// avoid excess shuffling the offset must either being in the bottom lane
+/// or at the start of a higher lane. All extended elements must be from
+/// the same lane.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
+ SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
- int NumElements = VT.getVectorNumElements();
int EltBits = VT.getScalarSizeInBits();
+ int NumElements = VT.getVectorNumElements();
+ int NumEltsPerLane = 128 / EltBits;
+ int OffsetLane = Offset / NumEltsPerLane;
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Only 8, 16, and 32 bit elements can be extended.");
assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
+ assert(0 <= Offset && "Extension offset must be positive.");
+ assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) &&
+ "Extension offset must be in the first lane or start an upper lane.");
+
+ // Check that an index is in same lane as the base offset.
+ auto SafeOffset = [&](int Idx) {
+ return OffsetLane == (Idx / NumEltsPerLane);
+ };
+
+ // Shift along an input so that the offset base moves to the first element.
+ auto ShuffleOffset = [&](SDValue V) {
+ if (!Offset)
+ return V;
+
+ SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
+ for (int i = 0; i * Scale < NumElements; ++i) {
+ int SrcIdx = i + Offset;
+ ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
+ }
+ return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
+ };
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
if (Subtarget->hasSSE41()) {
+ // Not worth offseting 128-bit vectors if scale == 2, a pattern using
+ // PUNPCK will catch this in a later shuffle match.
+ if (Offset && Scale == 2 && VT.is128BitVector())
+ return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+ InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, ShuffleOffset(InputV));
+ return DAG.getBitcast(VT, InputV);
}
+ assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
+
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
if (AnyExt && EltBits == 32) {
- int PSHUFDMask[4] = {0, -1, 1, -1};
+ int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,
+ -1};
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
if (AnyExt && EltBits == 16 && Scale > 2) {
- int PSHUFDMask[4] = {0, -1, 0, -1};
+ int PSHUFDMask[4] = {Offset / 2, -1,
+ SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};
InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
- int PSHUFHWMask[4] = {1, -1, -1, -1};
+ int PSHUFWMask[4] = {1, -1, -1, -1};
+ unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+ VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
DAG.getBitcast(MVT::v8i16, InputV),
- getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
+ getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG)));
}
// The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
// to 64-bits.
if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
- assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+ assert(VT.is128BitVector() && "Unexpected vector width!");
+ int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getConstant(EltBits, DL, MVT::i8),
- DAG.getConstant(0, DL, MVT::i8)));
- if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+ DAG.getConstant(LoIdx, DL, MVT::i8)));
+
+ if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) ||
+ !SafeOffset(Offset + 1))
return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
- SDValue Hi =
- DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
- DAG.getConstant(EltBits, DL, MVT::i8),
- DAG.getConstant(EltBits, DL, MVT::i8)));
+ int HiIdx = (Offset + 1) * EltBits;
+ SDValue Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(HiIdx, DL, MVT::i8)));
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
}
@@ -7163,9 +7731,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
assert(NumElements == 16 && "Unexpected byte vector width!");
SDValue PSHUFBMask[16];
- for (int i = 0; i < 16; ++i)
- PSHUFBMask[i] =
- DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8);
+ for (int i = 0; i < 16; ++i) {
+ int Idx = Offset + (i / Scale);
+ PSHUFBMask[i] = DAG.getConstant(
+ (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8);
+ }
InputV = DAG.getBitcast(MVT::v16i8, InputV);
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
@@ -7173,13 +7743,30 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT::v16i8, PSHUFBMask)));
}
+ // If we are extending from an offset, ensure we start on a boundary that
+ // we can unpack from.
+ int AlignToUnpack = Offset % (NumElements / Scale);
+ if (AlignToUnpack) {
+ SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
+ for (int i = AlignToUnpack; i < NumElements; ++i)
+ ShMask[i - AlignToUnpack] = i;
+ InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
+ Offset -= AlignToUnpack;
+ }
+
// Otherwise emit a sequence of unpacks.
do {
+ unsigned UnpackLoHi = X86ISD::UNPCKL;
+ if (Offset >= (NumElements / 2)) {
+ UnpackLoHi = X86ISD::UNPCKH;
+ Offset -= (NumElements / 2);
+ }
+
MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
: getZeroVector(InputVT, Subtarget, DAG, DL);
InputV = DAG.getBitcast(InputVT, InputV);
- InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
+ InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext);
Scale /= 2;
EltBits *= 2;
NumElements /= 2;
@@ -7205,7 +7792,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Bits = VT.getSizeInBits();
+ int NumLanes = Bits / 128;
int NumElements = VT.getVectorNumElements();
+ int NumEltsPerLane = NumElements / NumLanes;
assert(VT.getScalarSizeInBits() <= 32 &&
"Exceeds 32-bit integer zero extension limit");
assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
@@ -7215,8 +7804,11 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
auto Lower = [&](int Scale) -> SDValue {
SDValue InputV;
bool AnyExt = true;
+ int Offset = 0;
+ int Matches = 0;
for (int i = 0; i < NumElements; ++i) {
- if (Mask[i] == -1)
+ int M = Mask[i];
+ if (M == -1)
continue; // Valid anywhere but doesn't tell us anything.
if (i % Scale != 0) {
// Each of the extended elements need to be zeroable.
@@ -7230,14 +7822,29 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
// Each of the base elements needs to be consecutive indices into the
// same input vector.
- SDValue V = Mask[i] < NumElements ? V1 : V2;
- if (!InputV)
+ SDValue V = M < NumElements ? V1 : V2;
+ M = M % NumElements;
+ if (!InputV) {
InputV = V;
- else if (InputV != V)
+ Offset = M - (i / Scale);
+ } else if (InputV != V)
return SDValue(); // Flip-flopping inputs.
- if (Mask[i] % NumElements != i / Scale)
+ // Offset must start in the lowest 128-bit lane or at the start of an
+ // upper lane.
+ // FIXME: Is it ever worth allowing a negative base offset?
+ if (!((0 <= Offset && Offset < NumEltsPerLane) ||
+ (Offset % NumEltsPerLane) == 0))
+ return SDValue();
+
+ // If we are offsetting, all referenced entries must come from the same
+ // lane.
+ if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane))
+ return SDValue();
+
+ if ((M % NumElements) != (Offset + (i / Scale)))
return SDValue(); // Non-consecutive strided elements.
+ Matches++;
}
// If we fail to find an input, we have a zero-shuffle which should always
@@ -7246,8 +7853,13 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
if (!InputV)
return SDValue();
+ // If we are offsetting, don't extend if we only match a single input, we
+ // can always do better by using a basic PSHUF or PUNPCK.
+ if (Offset != 0 && Matches < 2)
+ return SDValue();
+
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
+ DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
@@ -7355,8 +7967,9 @@ static SDValue lowerVectorShuffleAsElementInsertion(
// all the smarts here sunk into that routine. However, the current
// lowering of BUILD_VECTOR makes that nearly impossible until the old
// vector shuffle lowering is dead.
- if (SDValue V2S = getScalarValueForVectorElement(
- V2, Mask[V2Index] - Mask.size(), DAG)) {
+ SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
+ DAG);
+ if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
@@ -7431,11 +8044,65 @@ static SDValue lowerVectorShuffleAsElementInsertion(
return V2;
}
+/// \brief Try to lower broadcast of a single - truncated - integer element,
+/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
+///
+/// This assumes we have AVX2.
+static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
+ int BroadcastIdx,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(Subtarget->hasAVX2() &&
+ "We can only lower integer broadcasts with AVX2!");
+
+ EVT EltVT = VT.getVectorElementType();
+ EVT V0VT = V0.getValueType();
+
+ assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
+ assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");
+
+ EVT V0EltVT = V0VT.getVectorElementType();
+ if (!V0EltVT.isInteger())
+ return SDValue();
+
+ const unsigned EltSize = EltVT.getSizeInBits();
+ const unsigned V0EltSize = V0EltVT.getSizeInBits();
+
+ // This is only a truncation if the original element type is larger.
+ if (V0EltSize <= EltSize)
+ return SDValue();
+
+ assert(((V0EltSize % EltSize) == 0) &&
+ "Scalar type sizes must all be powers of 2 on x86!");
+
+ const unsigned V0Opc = V0.getOpcode();
+ const unsigned Scale = V0EltSize / EltSize;
+ const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
+
+ if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) &&
+ V0Opc != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ SDValue Scalar = V0.getOperand(V0BroadcastIdx);
+
+ // If we're extracting non-least-significant bits, shift so we can truncate.
+ // Hopefully, we can fold away the trunc/srl/load into the broadcast.
+ // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
+ // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
+ if (const int OffsetIdx = BroadcastIdx % Scale)
+ Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
+ DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType()));
+
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
+}
+
/// \brief Try to lower broadcast of a single element.
///
/// For convenience, this code also bundles all of the subtarget feature set
/// filtering. While a little annoying to re-dispatch on type here, there isn't
/// a convenient way to factor it out.
+/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
@@ -7476,7 +8143,7 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
int BeginIdx = (int)ConstantIdx->getZExtValue();
int EndIdx =
- BeginIdx + (int)VInner.getValueType().getVectorNumElements();
+ BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
BroadcastIdx -= BeginIdx;
V = VInner;
@@ -7491,6 +8158,15 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Check if this is a broadcast of a scalar. We special case lowering
// for scalars so that we can more effectively fold with loads.
+ // First, look through bitcast: if the original value has a larger element
+ // type than the shuffle, the broadcast element is in essence truncated.
+ // Make that explicit to ease folding.
+ if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
+ if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
+ DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
+ return TruncBroadcast;
+
+ // Also check the simpler case, where we can directly reuse the scalar.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
@@ -7499,6 +8175,20 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
+ } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+ // If we are broadcasting a load that is only used by the shuffle
+ // then we can reduce the vector load to the broadcasted scalar load.
+ LoadSDNode *Ld = cast<LoadSDNode>(V);
+ SDValue BaseAddr = Ld->getOperand(1);
+ EVT AddrVT = BaseAddr.getValueType();
+ EVT SVT = VT.getScalarType();
+ unsigned Offset = BroadcastIdx * SVT.getStoreSize();
+ SDValue NewAddr = DAG.getNode(
+ ISD::ADD, DL, AddrVT, BaseAddr,
+ DAG.getConstant(Offset, DL, AddrVT));
+ V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
+ DAG.getMachineFunction().getMachineMemOperand(
+ Ld->getMemOperand(), Offset, SVT.getStoreSize()));
} else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
// We can't broadcast from a vector register without AVX2, and we can only
// broadcast from the zero-element of a vector register.
@@ -7595,9 +8285,10 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
/// because for floating point vectors we have a generalized SHUFPS lowering
/// strategy that handles everything that doesn't *exactly* match an unpack,
/// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsPermuteAndUnpack(SDLoc DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() &&
"This routine only supports integer vectors.");
assert(!isSingleInputShuffleMask(Mask) &&
@@ -7774,10 +8465,9 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
+ return V;
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
@@ -7869,10 +8559,9 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -8077,14 +8766,9 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
+ return V;
// Otherwise fall back to a SHUFPS lowering strategy.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
@@ -8161,14 +8845,9 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -8184,8 +8863,8 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
- if (SDValue Unpack =
- lowerVectorShuffleAsUnpack(DL, MVT::v4i32, V1, V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1,
+ V2, Mask, DAG))
return Unpack;
// We implement this with SHUFPS because it can blend from two vectors.
@@ -8218,7 +8897,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- assert(VT.getScalarType() == MVT::i16 && "Bad input type!");
+ assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
@@ -8286,16 +8965,18 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
assert(AToAInputs.size() + BToAInputs.size() == 4 &&
"Must call this with either 3:1 or 1:3 inputs (summing to 4).");
+ bool ThreeAInputs = AToAInputs.size() == 3;
+
// Compute the index of dword with only one word among the three inputs in
// a half by taking the sum of the half with three inputs and subtracting
// the sum of the actual three inputs. The difference is the remaining
// slot.
int ADWord, BDWord;
- int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord;
- int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord;
- int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset;
- ArrayRef<int> TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs;
- int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0];
+ int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
+ int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
+ int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
+ ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
+ int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
int TripleNonInputIdx =
TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
@@ -8364,8 +9045,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
} else {
assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
- int APinnedIdx =
- AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+ int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
}
}
@@ -8751,10 +9431,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V1, Mask, {0, 0, 1, 1, 2, 2, 3, 3}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V1);
- if (isShuffleEquivalent(V1, V1, Mask, {4, 4, 5, 5, 6, 6, 7, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
@@ -8798,10 +9477,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 2, 10, 3, 11}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 12, 5, 13, 6, 14, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
@@ -8812,8 +9490,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
return BitBlend;
- if (SDValue Unpack =
- lowerVectorShuffleAsUnpack(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
+ V2, Mask, DAG))
return Unpack;
// If we can't directly blend but can use PSHUFB, that will be better as it
@@ -9037,17 +9715,14 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return V;
}
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return Masked;
+
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
- 0, 16, 1, 17, 2, 18, 3, 19,
- // High half.
- 4, 20, 5, 21, 6, 22, 7, 23}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
- 8, 24, 9, 25, 10, 26, 11, 27,
- // High half.
- 12, 28, 13, 29, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ return V;
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
@@ -9086,8 +9761,8 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// FIXME: It might be worth trying to detect if the unpack-feeding
// shuffles will both be pshufb, in which case we shouldn't bother with
// this.
- if (SDValue Unpack =
- lowerVectorShuffleAsUnpack(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+ DL, MVT::v16i8, V1, V2, Mask, DAG))
return Unpack;
}
@@ -9296,7 +9971,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
int NumElements = VT.getVectorNumElements();
int SplitNumElements = NumElements / 2;
- MVT ScalarVT = VT.getScalarType();
+ MVT ScalarVT = VT.getVectorElementType();
MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
// Rather than splitting build-vectors, just build two narrower build
@@ -9308,7 +9983,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
MVT OrigVT = V.getSimpleValueType();
int OrigNumElements = OrigVT.getVectorNumElements();
int OrigSplitNumElements = OrigNumElements / 2;
- MVT OrigScalarVT = OrigVT.getScalarType();
+ MVT OrigScalarVT = OrigVT.getVectorElementType();
MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
SDValue LoV, HiV;
@@ -9478,7 +10153,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
// FIXME: This should probably be generalized for 512-bit vectors as well.
- assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+ assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
int LaneSize = Mask.size() / 2;
// If there are only inputs from one 128-bit lane, splitting will in fact be
@@ -9682,6 +10357,108 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}
+/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
+/// This allows for fast cases such as subvector extraction/insertion
+/// or shuffling smaller vector types which can lower more efficiently.
+static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+
+ bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
+ bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
+ if (!UndefLower && !UndefUpper)
+ return SDValue();
+
+ // Upper half is undef and lower half is whole upper subvector.
+ // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (UndefUpper &&
+ isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ // Lower half is undef and upper half is whole lower subvector.
+ // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (UndefLower &&
+ isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ }
+
+ // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
+ if (UndefLower && Subtarget->hasAVX2() &&
+ (VT == MVT::v4f64 || VT == MVT::v4i64))
+ return SDValue();
+
+ // If the shuffle only uses the lower halves of the input operands,
+ // then extract them and perform the 'half' shuffle at half width.
+ // e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
+ int HalfIdx1 = -1, HalfIdx2 = -1;
+ SmallVector<int, 8> HalfMask;
+ unsigned Offset = UndefLower ? HalfNumElts : 0;
+ for (unsigned i = 0; i != HalfNumElts; ++i) {
+ int M = Mask[i + Offset];
+ if (M < 0) {
+ HalfMask.push_back(M);
+ continue;
+ }
+
+ // Determine which of the 4 half vectors this element is from.
+ // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
+ int HalfIdx = M / HalfNumElts;
+
+ // Only shuffle using the lower halves of the inputs.
+ // TODO: Investigate usefulness of shuffling with upper halves.
+ if (HalfIdx != 0 && HalfIdx != 2)
+ return SDValue();
+
+ // Determine the element index into its half vector source.
+ int HalfElt = M % HalfNumElts;
+
+ // We can shuffle with up to 2 half vectors, set the new 'half'
+ // shuffle mask accordingly.
+ if (-1 == HalfIdx1 || HalfIdx1 == HalfIdx) {
+ HalfMask.push_back(HalfElt);
+ HalfIdx1 = HalfIdx;
+ continue;
+ }
+ if (-1 == HalfIdx2 || HalfIdx2 == HalfIdx) {
+ HalfMask.push_back(HalfElt + HalfNumElts);
+ HalfIdx2 = HalfIdx;
+ continue;
+ }
+
+ // Too many half vectors referenced.
+ return SDValue();
+ }
+ assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+
+ auto GetHalfVector = [&](int HalfIdx) {
+ if (HalfIdx < 0)
+ return DAG.getUNDEF(HalfVT);
+ SDValue V = (HalfIdx < 2 ? V1 : V2);
+ HalfIdx = (HalfIdx % 2) * HalfNumElts;
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
+ DAG.getIntPtrConstant(HalfIdx, DL));
+ };
+
+ SDValue Half1 = GetHalfVector(HalfIdx1);
+ SDValue Half2 = GetHalfVector(HalfIdx2);
+ SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
+ DAG.getIntPtrConstant(Offset, DL));
+}
+
/// \brief Test whether the specified input (0 or 1) is in-place blended by the
/// given mask.
///
@@ -9776,16 +10553,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG);
}
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG))
@@ -9876,14 +10647,9 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
+ return V;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -9941,14 +10707,9 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
+ return V;
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
// have already handled any direct blends. We also need to squash the
@@ -9974,9 +10735,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (Subtarget->hasAVX2())
return DAG.getNode(
X86ISD::VPERMV, DL, MVT::v8f32,
- DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL,
- MVT::v8i32, VPermMask)),
- V1);
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
@@ -10041,14 +10800,9 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
+ return V;
}
// Try to use shift instructions.
@@ -10115,18 +10869,9 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane:
- 0, 16, 1, 17, 2, 18, 3, 19,
- // Second 128-bit lane:
- 8, 24, 9, 25, 10, 26, 11, 27}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane:
- 4, 20, 5, 21, 6, 22, 7, 23,
- // Second 128-bit lane:
- 12, 28, 13, 29, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
+ return V;
// Try to use shift instructions.
if (SDValue Shift =
@@ -10215,22 +10960,9 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- // Note that these are repeated 128-bit lane unpacks, not unpacks across all
- // 256-bit lanes.
- if (isShuffleEquivalent(
- V1, V2, Mask,
- {// First 128-bit lane:
- 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
- // Second 128-bit lane:
- 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
- if (isShuffleEquivalent(
- V1, V2, Mask,
- {// First 128-bit lane:
- 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
- // Second 128-bit lane:
- 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
+ return V;
// Try to use shift instructions.
if (SDValue Shift =
@@ -10296,12 +11028,17 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, VT, V1, V2, Mask, Subtarget, DAG))
return Insertion;
- // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
- // check for those subtargets here and avoid much of the subtarget querying in
- // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
- // ability to manipulate a 256-bit vector with integer types. Since we'll use
- // floating point types there eventually, just immediately cast everything to
- // a float and operate entirely in that domain.
+ // Handle special cases where the lower or upper half is UNDEF.
+ if (SDValue V =
+ lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
+ // There is a really nice hard cut-over between AVX1 and AVX2 that means we
+ // can check for those subtargets here and avoid much of the subtarget
+ // querying in the per-vector-type lowering routines. With AVX1 we have
+ // essentially *zero* ability to manipulate a 256-bit vector with integer
+ // types. Since we'll use floating point types there eventually, just
+ // immediately cast everything to a float and operate entirely in that domain.
if (VT.isInteger() && !Subtarget->hasAVX2()) {
int ElementBits = VT.getScalarSizeInBits();
if (ElementBits < 32)
@@ -10334,6 +11071,57 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
+/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
+static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+ assert(VT.getScalarSizeInBits() == 64 &&
+ "Unexpected element type size for 128bit shuffle.");
+
+ // To handle 256 bit vector requires VLX and most probably
+ // function lowerV2X128VectorShuffle() is better solution.
+ assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle.");
+
+ SmallVector<int, 4> WidenedMask;
+ if (!canWidenShuffleElements(Mask, WidenedMask))
+ return SDValue();
+
+ // Form a 128-bit permutation.
+ // Convert the 64-bit shuffle mask selection values into 128-bit selection
+ // bits defined by a vshuf64x2 instruction's immediate control byte.
+ unsigned PermMask = 0, Imm = 0;
+ unsigned ControlBitsNum = WidenedMask.size() / 2;
+
+ for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
+ if (WidenedMask[i] == SM_SentinelZero)
+ return SDValue();
+
+ // Use first element in place of undef mask.
+ Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
+ PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+ }
+
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, DL, MVT::i8));
+}
+
+static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+
+ assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
+
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+
+ SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
+}
+
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
@@ -10345,21 +11133,21 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
+ if (SDValue Shuf128 =
+ lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ return Shuf128;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ return Unpck;
+
+ return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
@@ -10367,22 +11155,11 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 0, 16, 1, 17, 4, 20, 5, 21,
- // Second 128-bit lane.
- 8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 2, 18, 3, 19, 6, 22, 7, 23,
- // Second 128-bit lane.
- 10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
+ return Unpck;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+ return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
@@ -10396,21 +11173,21 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
+ if (SDValue Shuf128 =
+ lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ return Shuf128;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ return Unpck;
+
+ return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
@@ -10418,22 +11195,11 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 0, 16, 1, 17, 4, 20, 5, 21,
- // Second 128-bit lane.
- 8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 2, 18, 3, 19, 6, 22, 7, 23,
- // Second 128-bit lane.
- 10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
+ return Unpck;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+ return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10448,8 +11214,7 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG);
+ return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
@@ -10517,6 +11282,60 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
}
+// Lower vXi1 vector shuffles.
+// There is no a dedicated instruction on AVX-512 that shuffles the masks.
+// The only way to shuffle bits is to sign-extend the mask vector to SIMD
+// vector, shuffle and then truncate it back.
+static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Subtarget->hasAVX512() &&
+ "Cannot lower 512-bit vectors w/o basic ISA!");
+ MVT ExtVT;
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("Expected a vector of i1 elements");
+ case MVT::v2i1:
+ ExtVT = MVT::v2i64;
+ break;
+ case MVT::v4i1:
+ ExtVT = MVT::v4i32;
+ break;
+ case MVT::v8i1:
+ ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
+ break;
+ case MVT::v16i1:
+ ExtVT = MVT::v16i32;
+ break;
+ case MVT::v32i1:
+ ExtVT = MVT::v32i16;
+ break;
+ case MVT::v64i1:
+ ExtVT = MVT::v64i8;
+ break;
+ }
+
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
+ else if (ISD::isBuildVectorAllOnes(V1.getNode()))
+ V1 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ else
+ V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
+
+ if (V2.isUndef())
+ V2 = DAG.getUNDEF(ExtVT);
+ else if (ISD::isBuildVectorAllZeros(V2.getNode()))
+ V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
+ else if (ISD::isBuildVectorAllOnes(V2.getNode()))
+ V2 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ else
+ V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask));
+}
/// \brief Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
@@ -10533,8 +11352,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
MVT VT = Op.getSimpleValueType();
int NumElements = VT.getVectorNumElements();
SDLoc dl(Op);
+ bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);
- assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+ assert((VT.getSizeInBits() != 64 || Is1BitVector) &&
+ "Can't lower MMX shuffles");
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
@@ -10572,7 +11393,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// elements wider than 64 bits, but it might be interesting to form i128
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
- if (VT.getScalarSizeInBits() < 64 &&
+ if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
canWidenShuffleElements(Mask, WidenedMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
@@ -10640,17 +11461,17 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
}
// For each vector width, delegate to a specialized lowering routine.
- if (VT.getSizeInBits() == 128)
+ if (VT.is128BitVector())
return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
- if (VT.getSizeInBits() == 256)
+ if (VT.is256BitVector())
return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
- // Force AVX-512 vectors to be scalarized for now.
- // FIXME: Implement AVX-512 support!
- if (VT.getSizeInBits() == 512)
+ if (VT.is512BitVector())
return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+ if (Is1BitVector)
+ return lower1BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
llvm_unreachable("Unimplemented!");
}
@@ -10661,11 +11482,16 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
unsigned &MaskValue) {
MaskValue = 0;
unsigned NumElems = BuildVector->getNumOperands();
+
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ // We don't handle the >2 lanes case right now.
unsigned NumLanes = (NumElems - 1) / 8 + 1;
+ if (NumLanes > 2)
+ return false;
+
unsigned NumElemsInLane = NumElems / NumLanes;
- // Blend for v16i16 should be symetric for the both lanes.
+ // Blend for v16i16 should be symmetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
SDValue EltCond = BuildVector->getOperand(i);
SDValue SndLaneEltCond =
@@ -10673,20 +11499,25 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
int Lane1Cond = -1, Lane2Cond = -1;
if (isa<ConstantSDNode>(EltCond))
- Lane1Cond = !isZero(EltCond);
+ Lane1Cond = !isNullConstant(EltCond);
if (isa<ConstantSDNode>(SndLaneEltCond))
- Lane2Cond = !isZero(SndLaneEltCond);
+ Lane2Cond = !isNullConstant(SndLaneEltCond);
+ unsigned LaneMask = 0;
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
// Lane1Cond != 0, means we want the first argument.
// Lane1Cond == 0, means we want the second argument.
// The encoding of this argument is 0 for the first argument, 1
// for the second. Therefore, invert the condition.
- MaskValue |= !Lane1Cond << i;
+ LaneMask = !Lane1Cond << i;
else if (Lane1Cond < 0)
- MaskValue |= !Lane2Cond << i;
+ LaneMask = !Lane2Cond << i;
else
return false;
+
+ MaskValue |= LaneMask;
+ if (NumLanes == 2)
+ MaskValue |= LaneMask << NumElemsInLane;
}
return true;
}
@@ -10711,7 +11542,8 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
SDValue CondElt = CondBV->getOperand(i);
Mask.push_back(
- isa<ConstantSDNode>(CondElt) ? i + (isZero(CondElt) ? Size : 0) : -1);
+ isa<ConstantSDNode>(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0)
+ : -1);
}
return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
}
@@ -10776,9 +11608,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
}
if (VT.getSizeInBits() == 16) {
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
- if (Idx == 0)
+ if (isNullConstant(Op.getOperand(1)))
return DAG.getNode(
ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@@ -10801,8 +11632,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
if ((User->getOpcode() != ISD::STORE ||
- (isa<ConstantSDNode>(Op.getOperand(1)) &&
- cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ isNullConstant(Op.getOperand(1))) &&
(User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
@@ -10900,10 +11730,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
- //if (IdxVal >= NumElems/2)
- // IdxVal -= NumElems/2;
- IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
+ // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
+ // this can be done with a mask.
+ IdxVal &= ElemsPerChunk - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, dl, MVT::i32));
}
@@ -10918,8 +11749,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
SDValue Vec = Op.getOperand(0);
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (Idx == 0)
+ if (isNullConstant(Op.getOperand(1)))
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec),
@@ -10951,8 +11781,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (Idx == 0)
+ if (isNullConstant(Op.getOperand(1)))
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
@@ -11039,7 +11868,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
- unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
+ assert(isPowerOf2_32(NumEltsIn128));
+ // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
+ unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, dl, MVT::i32));
@@ -11078,8 +11909,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
- const Function *F = DAG.getMachineFunction().getFunction();
- bool MinSize = F->hasFnAttribute(Attribute::MinSize);
+ bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
@@ -11199,14 +12029,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
// --> load32 addr
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
- !Subtarget->isUnalignedMem32Slow()) {
- SDValue SubVec2 = Vec.getOperand(1);
- if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
- if (Idx2->getZExtValue() == 0) {
- SDValue Ops[] = { SubVec2, SubVec };
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
+ OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through a bitcast to get to the load.
+ if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+ SubVec2 = SubVec2.getOperand(0);
+
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = { SubVec2, SubVec };
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
}
}
}
@@ -11218,37 +12059,9 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
- if (OpVT.getVectorElementType() == MVT::i1) {
- if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal
- return Op;
- SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
- SDValue Undef = DAG.getUNDEF(OpVT);
- unsigned NumElems = OpVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
-
- if (IdxVal == OpVT.getVectorNumElements() / 2) {
- // Zero upper bits of the Vec
- Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
-
- SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
- SubVec, ZeroIdx);
- Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
- return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
- }
- if (IdxVal == 0) {
- SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
- SubVec, ZeroIdx);
- // Zero upper bits of the Vec2
- Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
- Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits);
- // Zero lower bits of the Vec
- Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
- // Merge them together
- return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
- }
- }
+ if (OpVT.getVectorElementType() == MVT::i1)
+ return Insert1BitVector(Op, DAG);
+
return SDValue();
}
@@ -11363,7 +12176,8 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
// load.
if (isGlobalStubReference(OpFlag))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
@@ -11430,7 +12244,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -11587,7 +12402,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -11599,10 +12415,18 @@ SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+ // Cygwin uses emutls.
+ // FIXME: It may be EmulatedTLS-generic also for X86-Android.
+ if (Subtarget->isTargetWindowsCygwin())
+ return LowerToTLSEmulatedModel(GA, DAG);
+
const GlobalValue *GV = GA->getGlobal();
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Subtarget->isTargetELF()) {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
@@ -11830,10 +12654,10 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ SDValue Chain = DAG.getStore(
+ DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
+ false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
@@ -11855,10 +12679,9 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
MachineMemOperand *MMO;
if (FI) {
int SSFI = FI->getIndex();
- MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
} else {
MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
StackSlot = StackSlot.getOperand(1);
@@ -11884,16 +12707,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
- MachineMemOperand *MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOStore, SSFISize, SSFISize);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
Ops, Op.getValueType(), MMO);
- Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, false, 0);
+ Result = DAG.getLoad(
+ Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ false, false, false, 0);
}
return Result;
@@ -11937,16 +12760,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Op.getOperand(0));
- SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue CLod0 =
+ DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
SDValue Unpck1 =
getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
- SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue CLod1 =
+ DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
@@ -11996,10 +12822,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
// Subtract the bias.
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
- EVT DestVT = Op.getValueType();
+ MVT DestVT = Op.getSimpleValueType();
if (DestVT.bitsLT(MVT::f64))
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
@@ -12025,14 +12852,23 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
// return (float4) lo + fhi;
+ // We shouldn't use it when unsafe-fp-math is enabled though: we might later
+ // reassociate the two FADDs, and if we do that, the algorithm fails
+ // spectacularly (PR24512).
+ // FIXME: If we ever have some kind of Machine FMF, this should be marked
+ // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because
+ // there's also the MachineCombiner reassociations happening on Machine IR.
+ if (DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
SDLoc DL(Op);
SDValue V = Op->getOperand(0);
- EVT VecIntVT = V.getValueType();
+ MVT VecIntVT = V.getSimpleValueType();
bool Is128 = VecIntVT == MVT::v4i32;
- EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
+ MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
// If we convert to something else than the supported type, e.g., to v4f64,
// abort early.
- if (VecFloatVT != Op->getValueType(0))
+ if (VecFloatVT != Op->getSimpleValueType(0))
return SDValue();
unsigned NumElts = VecIntVT.getVectorNumElements();
@@ -12070,7 +12906,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
SDValue Low, High;
if (Subtarget.hasSSE41()) {
- EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
+ MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
@@ -12108,6 +12944,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
@@ -12137,11 +12974,10 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
case MVT::v16i8:
case MVT::v16i16:
- if (Subtarget->hasAVX512())
- return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
+ assert(Subtarget->hasAVX512());
+ return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
}
- llvm_unreachable(nullptr);
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
@@ -12150,7 +12986,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (Op.getValueType().isVector())
+ if (Op.getSimpleValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
@@ -12161,6 +12997,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
+
+ if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
+ (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) {
+ // Conversions from unsigned i32 to f32/f64 are legal,
+ // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode.
+ return Op;
+ }
+
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
@@ -12193,10 +13037,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
- MachineMemOperand *MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, 8, 8);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
@@ -12223,24 +13066,52 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
- FudgePtr, MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, false, 4);
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
}
+// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
+// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
+// just return an <SDValue(), SDValue()> pair.
+// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
+// to i16, i32 or i64, and we lower it to a legal sequence.
+// If lowered to the final integer result we return a <result, SDValue()> pair.
+// Otherwise we lower it to a sequence ending with a FIST, return a
+// <FIST, StackSlot> pair, and the caller is responsible for loading
+// the final integer result from StackSlot.
std::pair<SDValue,SDValue>
-X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool IsSigned, bool IsReplace) const {
+X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
+ EVT TheVT = Op.getOperand(0).getValueType();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
+ if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
+ // f16 must be promoted before using the lowering in this routine.
+ // fp128 does not use this lowering.
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+ // If using FIST to compute an unsigned i64, we'll need some fixup
+ // to handle values above the maximum signed i64. A FIST is always
+ // used for the 32-bit subtarget, but also for f80 on a 64-bit target.
+ bool UnsignedFixup = !IsSigned &&
+ DstTy == MVT::i64 &&
+ (!Subtarget->is64Bit() ||
+ !isScalarFPTypeInSSEReg(TheVT));
+
+ if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
+ // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
+ // The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
@@ -12258,42 +13129,87 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
- // We lower FP->int64 either into FISTP64 followed by a load from a temporary
- // stack slot, or into the FTOL runtime function.
+ // We lower FP->int64 into FISTP64 followed by a load from a temporary
+ // stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
unsigned Opc;
- if (!IsSigned && isIntegerTypeFTOL(DstTy))
- Opc = X86ISD::WIN_FTOL;
- else
- switch (DstTy.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
- }
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
- EVT TheVT = Op.getOperand(0).getValueType();
+ SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
+
+ if (UnsignedFixup) {
+ //
+ // Conversion to unsigned i64 is implemented with a select,
+ // depending on whether the source value fits in the range
+ // of a signed i64. Let Thresh be the FP equivalent of
+ // 0x8000000000000000ULL.
+ //
+ // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
+ // FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
+ // Fist-to-mem64 FistSrc
+ // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
+ // to XOR'ing the high 32 bits with Adjust.
+ //
+ // Being a power of 2, Thresh is exactly representable in all FP formats.
+ // For X87 we'd like to use the smallest FP type for this constant, but
+ // for DAG type consistency we have to match the FP operand type.
+
+ APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
+ LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
+ bool LosesInfo = false;
+ if (TheVT == MVT::f64)
+ // The rounding mode is irrelevant as the conversion should be exact.
+ Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+ else if (TheVT == MVT::f80)
+ Status = Thresh.convert(APFloat::x87DoubleExtended,
+ APFloat::rmNearestTiesToEven, &LosesInfo);
+
+ assert(Status == APFloat::opOK && !LosesInfo &&
+ "FP conversion should have been exact");
+
+ SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
+
+ SDValue Cmp = DAG.getSetCC(DL,
+ getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT),
+ Value, ThreshVal, ISD::SETLT);
+ Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0x80000000, DL, MVT::i32));
+ SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
+ Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT),
+ Value, ThreshVal, ISD::SETLT);
+ Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
+ }
+
// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, SSFI), false,
+ false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(TheVT)
};
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, MemSize, MemSize);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
@@ -12301,28 +13217,52 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
}
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOStore, MemSize, MemSize);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
+ if (UnsignedFixup) {
+
+ // Insert the FIST, load its result as two i32's,
+ // and XOR the high i32 with Adjust.
+
+ SDValue FistOps[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ FistOps, DstTy, MMO);
+
+ SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot,
+ DAG.getConstant(4, DL, PtrVT));
- if (Opc != X86ISD::WIN_FTOL) {
+ SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
+
+ if (Subtarget->is64Bit()) {
+ // Join High32 and Low32 into a 64-bit result.
+ // (High32 << 32) | Low32
+ Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
+ High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
+ High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
+ DAG.getConstant(32, DL, MVT::i8));
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
+ return std::make_pair(Result, SDValue());
+ }
+
+ SDValue ResultOps[] = { Low32, High32 };
+
+ SDValue pair = IsReplace
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
+ : DAG.getMergeValues(ResultOps, DL);
+ return std::make_pair(pair, SDValue());
+ } else {
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
- } else {
- SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
- DAG.getVTList(MVT::Other, MVT::Glue),
- Chain, Value);
- SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
- MVT::i32, ftol.getValue(1));
- SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
- MVT::i32, eax.getValue(2));
- SDValue Ops[] = { eax, edx };
- SDValue pair = IsReplace
- ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
- : DAG.getMergeValues(Ops, DL);
- return std::make_pair(pair, SDValue());
}
}
@@ -12333,7 +13273,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
- if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1)
+ if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In);
// Optimize vectors in AVX mode:
@@ -12426,6 +13366,62 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
}
+static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue In = Op.getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+
+ assert(VT.getVectorElementType() == MVT::i1 && "Unexected vector type.");
+
+ // Shift LSB to MSB and use VPMOVB2M - SKX.
+ unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
+ if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
+ Subtarget->hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M
+ ((InVT.is256BitVector() || InVT.is128BitVector()) &&
+ InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() &&
+ Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
+ // Shift packed bytes not supported natively, bitcast to dword
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
+ DAG.getBitcast(ExtVT, In),
+ DAG.getConstant(ShiftInx, DL, ExtVT));
+ ShiftNode = DAG.getBitcast(InVT, ShiftNode);
+ return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+ }
+ if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
+ Subtarget->hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M
+ ((InVT.is256BitVector() || InVT.is128BitVector()) &&
+ InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() &&
+ Subtarget->hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M
+
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
+ DAG.getConstant(ShiftInx, DL, InVT));
+ return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+ }
+
+ // Shift LSB to MSB, extend if necessary and use TESTM.
+ unsigned NumElts = InVT.getVectorNumElements();
+ if (InVT.getSizeInBits() < 512 &&
+ (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 ||
+ !Subtarget->hasVLX())) {
+ assert((NumElts == 8 || NumElts == 16) && "Unexected vector type.");
+
+ // TESTD/Q should be used (if BW supported we use CVT2MASK above),
+ // so vector should be extended to packed dword/qword.
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+ In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+ InVT = ExtVT;
+ ShiftInx = InVT.getScalarSizeInBits() - 1;
+ }
+
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
+ DAG.getConstant(ShiftInx, DL, InVT));
+ return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
+}
+
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
@@ -12443,42 +13439,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
- // move vector to mask - truncate solution for SKX
- if (VT.getVectorElementType() == MVT::i1) {
- if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
- Subtarget->hasBWI())
- return Op; // legal, will go to VPMOVB2M, VPMOVW2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
- && InVT.getScalarSizeInBits() <= 16 &&
- Subtarget->hasBWI() && Subtarget->hasVLX())
- return Op; // legal, will go to VPMOVB2M, VPMOVW2M
- if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
- Subtarget->hasDQI())
- return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
- && InVT.getScalarSizeInBits() >= 32 &&
- Subtarget->hasDQI() && Subtarget->hasVLX())
- return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
- }
- if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
- if (VT.getVectorElementType().getSizeInBits() >=8)
- return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
-
- assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
- unsigned NumElts = InVT.getVectorNumElements();
- assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
- if (InVT.getSizeInBits() < 512) {
- MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
- In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
- InVT = ExtVT;
- }
-
- SDValue OneV =
- DAG.getConstant(APInt::getSignBit(InVT.getScalarSizeInBits()), DL, InVT);
- SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
- return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
- }
+ if (VT.getVectorElementType() == MVT::i1)
+ return LowerTruncateVecI1(Op, DAG, Subtarget);
+ // vpmovqb/w/d, vpmovdb/w, vpmovwb
+ if (Subtarget->hasAVX512()) {
+ // word to byte only under BWI
+ if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT,
+ DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
+ }
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
@@ -12583,7 +13554,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
/*IsSigned=*/ true, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
- if (!FIST.getNode()) return Op;
+ if (!FIST.getNode())
+ return Op;
if (StackSlot.getNode())
// Load the result.
@@ -12600,7 +13572,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ false, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
- assert(FIST.getNode() && "Unexpected failure");
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (!FIST.getNode())
+ return Op;
if (StackSlot.getNode())
// Load the result.
@@ -12643,6 +13617,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ bool IsF128 = (VT == MVT::f128);
+
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
@@ -12650,11 +13626,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
MVT LogicVT;
MVT EltVT;
unsigned NumElts;
-
+
if (VT.isVector()) {
LogicVT = VT;
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
+ } else if (IsF128) {
+ // SSE instructions are used for optimized f128 logical operations.
+ LogicVT = MVT::f128;
+ EltVT = VT;
+ NumElts = 1;
} else {
// There are no scalar bitwise logical SSE/AVX instructions, so we
// generate a 16-byte vector constant and logic op even for the scalar case.
@@ -12675,9 +13656,10 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ SDValue Mask =
+ DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
@@ -12685,7 +13667,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
- if (VT.isVector())
+ if (VT.isVector() || IsF128)
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
// For the scalar case extend to a 128-bit vector, perform the logic op,
@@ -12704,6 +13686,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op1.getSimpleValueType();
+ bool IsF128 = (VT == MVT::f128);
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -12718,13 +13701,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
+ assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
+ "Unexpected type in LowerFCOPYSIGN");
const fltSemantics &Sem =
- VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
+ VT == MVT::f64 ? APFloat::IEEEdouble :
+ (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
const unsigned SizeInBits = VT.getSizeInBits();
SmallVector<Constant *, 4> CV(
- VT == MVT::f64 ? 2 : 4,
+ VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
// First, clear all bits but the sign bit from the second operand (sign).
@@ -12737,11 +13723,13 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// Perform all logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE. This allows load folding of the
// constants into the logic instructions.
- MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
- SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
- Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
+ MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
+ SDValue Mask1 =
+ DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
+ if (!IsF128)
+ Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
// Next, clear the sign bit from the first operand (magnitude).
@@ -12750,8 +13738,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
APFloat APF = Op0CN->getValueAPF();
// If the magnitude is a positive zero, the sign bit alone is enough.
if (APF.isPosZero())
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
- DAG.getIntPtrConstant(0, dl));
+ return IsF128 ? SignBit :
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
+ DAG.getIntPtrConstant(0, dl));
APF.clearSign();
CV[0] = ConstantFP::get(*Context, APF);
} else {
@@ -12761,18 +13750,21 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, PtrVT, 16);
- SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue Val =
+ DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
// If the magnitude operand wasn't a constant, we need to AND out the sign.
if (!isa<ConstantFPSDNode>(Op0)) {
- Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
+ if (!IsF128)
+ Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
}
// OR the magnitude value with the sign bit.
Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
- DAG.getIntPtrConstant(0, dl));
+ return IsF128 ? Val :
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
+ DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -12859,7 +13851,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
}
- EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
// Cast all vectors into TestVT for PTEST.
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
@@ -12999,14 +13991,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
+ if (C->isOne() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
+ if (C->isAllOnesValue() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
@@ -13135,13 +14127,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
/// equivalent.
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SDLoc dl, SelectionDAG &DAG) const {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
- if (C->getAPIntValue() == 0)
- return EmitTest(Op0, X86CC, dl, DAG);
+ if (isNullConstant(Op1))
+ return EmitTest(Op0, X86CC, dl, DAG);
- if (Op0.getValueType() == MVT::i1)
- llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
- }
+ assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
+ "Unexpected comparison operation for MVT::i1 operands");
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
@@ -13150,8 +14140,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// if we're optimizing for size, however, as that'll allow better folding
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
- !DAG.getMachineFunction().getFunction()->hasFnAttribute(
- Attribute::MinSize) &&
+ !DAG.getMachineFunction().getFunction()->optForMinSize() &&
!Subtarget->isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
@@ -13188,6 +14177,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, dl, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+
+ // Some 64-bit targets lack SAHF support, but they do support FCOMI.
+ assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
@@ -13261,13 +14253,8 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
/// original divisions.
-bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
- return NumUsers > 1;
-}
-
-static bool isAllOnes(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isAllOnesValue();
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
+ return 2;
}
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
@@ -13285,8 +14272,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
- if (And00C->getZExtValue() == 1) {
+ if (isOneConstant(Op0.getOperand(0))) {
// If we looked past a truncate, check that it's only truncating away
// known zeros.
unsigned BitWidth = Op0.getValueSizeInBits();
@@ -13423,7 +14409,7 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getValueType().getVectorElementType() == MVT::i1 &&
+ assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Unexpected type for boolean compare operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
@@ -13467,8 +14453,8 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
- Op.getValueType().getScalarType() == MVT::i1 &&
+ assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 &&
+ Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
@@ -13515,7 +14501,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
for (unsigned i = 0; i < n; ++i) {
ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
- if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT)
+ if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
return SDValue();
// Avoid underflow.
@@ -13606,13 +14592,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
- EVT OpVT = Op1.getValueType();
+ MVT OpVT = Op1.getSimpleValueType();
if (OpVT.getVectorElementType() == MVT::i1)
return LowerBoolVSETCC_AVX512(Op, DAG);
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
if (Subtarget->hasAVX512()) {
- if (Op1.getValueType().is512BitVector() ||
+ if (Op1.getSimpleValueType().is512BitVector() ||
(Subtarget->hasBWI() && Subtarget->hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
@@ -13628,6 +14614,33 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
+ // Lower using XOP integer comparisons.
+ if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) {
+ // Translate compare code to XOP PCOM compare mode.
+ unsigned CmpMode = 0;
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETULT:
+ case ISD::SETLT: CmpMode = 0x00; break;
+ case ISD::SETULE:
+ case ISD::SETLE: CmpMode = 0x01; break;
+ case ISD::SETUGT:
+ case ISD::SETGT: CmpMode = 0x02; break;
+ case ISD::SETUGE:
+ case ISD::SETGE: CmpMode = 0x03; break;
+ case ISD::SETEQ: CmpMode = 0x04; break;
+ case ISD::SETNE: CmpMode = 0x05; break;
+ }
+
+ // Are we comparing unsigned or signed integers?
+ unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
+ ? X86ISD::VPCOMU : X86ISD::VPCOM;
+
+ return DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getConstant(CmpMode, dl, MVT::i8));
+ }
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -13777,7 +14790,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
@@ -13818,11 +14831,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
- Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->isNullValue() &&
+ isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
- if (NewSetCC.getNode()) {
+ if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
return NewSetCC;
@@ -13831,17 +14842,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
- if (Op1.getOpcode() == ISD::Constant &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// If the input is a setcc, then reuse the input setcc or use a new one with
// the inverted condition.
if (Op0.getOpcode() == X86ISD::SETCC) {
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
+ bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
if (!Invert)
return Op0;
@@ -13854,8 +14862,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SetCC;
}
}
- if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1) &&
+ if ((Op0.getValueType() == MVT::i1) && isOneConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
@@ -13876,6 +14883,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SetCC;
}
+SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Carry = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(3);
+ SDLoc DL(Op);
+
+ assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+ X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());
+
+ assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+ SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
+ return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(),
+ DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
+}
+
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
@@ -13918,7 +14942,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
- EVT VT = Op1.getValueType();
+ MVT VT = Op1.getSimpleValueType();
SDValue CC;
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
@@ -13927,7 +14951,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
- VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
+ VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
@@ -13961,12 +14985,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Convert to vectors, do a VSELECT, and convert back to scalar.
// All of the conversions should be optimized away.
- EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
+ MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
- EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
+ MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
VCmp = DAG.getBitcast(VCmpVT, VCmp);
SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
@@ -13980,26 +15004,26 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (VT.isVector() && VT.getScalarType() == MVT::i1) {
- SDValue Op1Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
- Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
- else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
- Op1Scalar = Op1.getOperand(0);
- SDValue Op2Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
- Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
- else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
- Op2Scalar = Op2.getOperand(0);
- if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
- SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
- Op1Scalar.getValueType(),
- Cond, Op1Scalar, Op2Scalar);
- if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getBitcast(VT, newSelect);
- SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
- DAG.getIntPtrConstant(0, DL));
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
+ SDValue Op1Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
+ Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
+ else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
+ Op1Scalar = Op1.getOperand(0);
+ SDValue Op2Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
+ Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
+ else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
+ Op2Scalar = Op2.getOperand(0);
+ if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+ Op1Scalar.getValueType(),
+ Cond, Op1Scalar, Op2Scalar);
+ if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
+ return DAG.getBitcast(VT, newSelect);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
+ DAG.getIntPtrConstant(0, DL));
}
}
@@ -14026,22 +15050,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
- isZero(Cond.getOperand(1).getOperand(1))) {
+ isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
- if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
- SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+ SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
SDValue CmpOp0 = Cmp.getOperand(0);
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
- if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
- if (YC->isNullValue() &&
- (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
+ if (isNullConstant(Y) &&
+ (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
DAG.getConstant(0, DL,
@@ -14061,11 +15084,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
- if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
- if (!N2C || !N2C->isNullValue())
+ if (!isNullConstant(Op2))
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
}
@@ -14073,11 +15095,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
- Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
- Cond = Cond.getOperand(0);
- }
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
+ isOneConstant(Cond.getOperand(1)))
+ Cond = Cond.getOperand(0);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
@@ -14136,15 +15156,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate if the high bits are known zero.
+ // Look past the truncate if the high bits are known zero.
if (isTruncWithZeroHighBitsInput(Cond, DAG))
- Cond = Cond.getOperand(0);
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
- if (NewSetCC.getNode()) {
+ if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
@@ -14166,11 +15185,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
- (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
+ (isNullConstant(Op1) || isNullConstant(Op2))) {
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8),
Cond);
- if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
return DAG.getNOT(DL, Res, Res.getValueType());
return Res;
}
@@ -14256,8 +15276,8 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
MVT InVT = In.getSimpleValueType();
assert(VT.getSizeInBits() == InVT.getSizeInBits());
- MVT InSVT = InVT.getScalarType();
- assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits());
+ MVT InSVT = InVT.getVectorElementType();
+ assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits());
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
@@ -14276,7 +15296,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
// As SRAI is only available on i16/i32 types, we expand only up to i32
// and handle i64 separately.
- while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) {
+ while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
@@ -14286,7 +15306,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
SDValue SignExt = Curr;
if (CurrVT != InVT) {
unsigned SignExtShift =
- CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits();
+ CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits();
SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
DAG.getConstant(SignExtShift, dl, MVT::i8));
}
@@ -14346,7 +15366,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
- MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
@@ -14470,7 +15490,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize / MemVT.getScalarType().getSizeInBits());
+ loadRegZize / MemVT.getScalarSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
@@ -14518,29 +15538,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
return Sext;
}
- // Otherwise we'll shuffle the small elements in the high bits of the
- // larger type and perform an arithmetic shift. If the shift is not legal
- // it's better to scalarize.
- assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) &&
- "We can't implement a sext load without an arithmetic right shift!");
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i * SizeRatio + SizeRatio - 1] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(
- WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
-
- Shuff = DAG.getBitcast(RegVT, Shuff);
-
- // Build the arithmetic shift.
- unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
- MemVT.getVectorElementType().getSizeInBits();
- Shuff =
- DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
- DAG.getConstant(Amt, dl, RegVT));
+ // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
+ // lanes.
+ assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
+ "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
+ SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
@@ -14577,11 +15580,9 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
static bool isXor1OfSetCC(SDValue Op) {
if (Op.getOpcode() != ISD::XOR)
return false;
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (N1C && N1C->getAPIntValue() == 1) {
+ if (isOneConstant(Op.getOperand(1)))
return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
- Op.getOperand(0).hasOneUse();
- }
+ Op.getOperand(0).hasOneUse();
return false;
}
@@ -14597,8 +15598,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (Cond.getOpcode() == ISD::SETCC) {
// Check for setcc([su]{add,sub,mul}o == 0).
if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
- isa<ConstantSDNode>(Cond.getOperand(1)) &&
- cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
+ isNullConstant(Cond.getOperand(1)) &&
Cond.getOperand(0).getResNo() == 1 &&
(Cond.getOperand(0).getOpcode() == ISD::SADDO ||
Cond.getOperand(0).getOpcode() == ISD::UADDO ||
@@ -14625,11 +15625,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// Look pass (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
- Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
- Cond = Cond.getOperand(0);
- }
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
+ isOneConstant(Cond.getOperand(1)))
+ Cond = Cond.getOperand(0);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
@@ -14673,16 +15671,14 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
break;
}
X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
break;
}
@@ -14844,8 +15840,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
- if (NewSetCC.getNode()) {
+ if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
@@ -14877,54 +15872,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SplitStack;
SDLoc dl(Op);
+ // Get the inputs.
+ SDNode *Node = Op.getNode();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ EVT VT = Node->getValueType(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
+
+ bool Is64Bit = Subtarget->is64Bit();
+ MVT SPTy = getPointerTy(DAG.getDataLayout());
+
+ SDValue Result;
if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDNode* Node = Op.getNode();
-
unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
- " not tell us which reg is the stack pointer!");
+ " not tell us which reg is the stack pointer!");
EVT VT = Node->getValueType(0);
- SDValue Tmp1 = SDValue(Node, 0);
- SDValue Tmp2 = SDValue(Node, 1);
SDValue Tmp3 = Node->getOperand(2);
- SDValue Chain = Tmp1.getOperand(0);
-
- // Chain the dynamic stack allocation so that it doesn't modify the stack
- // pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true),
- SDLoc(Node));
- SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
- Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
- DAG.getConstant(-(uint64_t)Align, dl, VT));
- Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
-
- Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
- DAG.getIntPtrConstant(0, dl, true), SDValue(),
- SDLoc(Node));
-
- SDValue Ops[2] = { Tmp1, Tmp2 };
- return DAG.getMergeValues(Ops, dl);
- }
-
- // Get the inputs.
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- EVT VT = Op.getNode()->getValueType(0);
-
- bool Is64Bit = Subtarget->is64Bit();
- MVT SPTy = getPointerTy(DAG.getDataLayout());
-
- if (SplitStack) {
+ Result = DAG.getNode(ISD::AND, dl, VT, Result,
+ DAG.getConstant(-(uint64_t)Align, dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
+ } else if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
@@ -14942,10 +15923,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+ Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
- SDValue Ops1[2] = { Value, Chain };
- return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
@@ -14967,9 +15946,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
- SDValue Ops1[2] = { SP, Chain };
- return DAG.getMergeValues(Ops1, dl);
+ Result = SP;
}
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+
+ SDValue Ops[2] = {Result, Chain};
+ return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -14980,7 +15964,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
- if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+ if (!Subtarget->is64Bit() ||
+ Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -15019,10 +16004,11 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
+ Subtarget->isTarget64BitLP64() ? 8 : 4, DL));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
- Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
- MachinePointerInfo(SV, 16), false, false, 0);
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(
+ SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
@@ -15030,10 +16016,13 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
- assert((Subtarget->isTargetLinux() ||
- Subtarget->isTargetDarwin()) &&
- "Unhandled target in LowerVAARG");
assert(Op.getNode()->getNumOperands() == 4);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+ // The Win64 ABI uses char* instead of a structure.
+ return DAG.expandVAArg(Op.getNode());
+
SDValue Chain = Op.getOperand(0);
SDValue SrcPtr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -15061,8 +16050,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!Subtarget->useSoftFloat() &&
- !(DAG.getMachineFunction().getFunction()->hasFnAttribute(
- Attribute::NoImplicitFloat)) &&
+ !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
@@ -15091,8 +16079,14 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
+ // where a va_list is still an i8*.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ if (Subtarget->isCallingConvWin64(
+ DAG.getMachineFunction().getFunction()->getCallingConv()))
+ // Probably a Win64 va_copy.
+ return DAG.expandVACopy(Op.getNode());
+
SDValue Chain = Op.getOperand(0);
SDValue DstPtr = Op.getOperand(1);
SDValue SrcPtr = Op.getOperand(2);
@@ -15230,72 +16224,126 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
- EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+ MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
-/// \brief Return (and \p Op, \p Mask) for compare instructions or
-/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
-/// necessary casting for \p Mask when lowering masking intrinsics.
-static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
- SDValue PreservedSrc,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
- MVT::i1, VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDLoc dl(Op);
+/// \brief Return Mask with the necessary casting or extending
+/// for \p Mask according to \p MaskVT when lowering masking intrinsics
+static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, SDLoc dl) {
- assert(MaskVT.isSimple() && "invalid mask type");
+ if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
+ // Mask should be extended
+ Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
+ MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
+ }
- if (isAllOnes(Mask))
- return Op;
+ if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) {
+ if (MaskVT == MVT::v64i1) {
+ assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
+ // In case 32bit mode, bitcast i64 is illegal, extend/split it.
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
+ DAG.getConstant(0, dl, MVT::i32));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ Lo = DAG.getBitcast(MVT::v32i1, Lo);
+ Hi = DAG.getBitcast(MVT::v32i1, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
+ } else {
+ // MaskVT require < 64bit. Truncate mask (should succeed in any case),
+ // and bitcast.
+ MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits());
+ return DAG.getBitcast(MaskVT,
+ DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask));
+ }
+
+ } else {
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
+ }
+}
- switch (Op.getOpcode()) {
- default: break;
- case X86ISD::PCMPEQM:
- case X86ISD::PCMPGTM:
- case X86ISD::CMPM:
- case X86ISD::CMPMU:
- return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
- }
- if (PreservedSrc.getOpcode() == ISD::UNDEF)
- PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
+/// \brief Return (and \p Op, \p Mask) for compare instructions or
+/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
+/// necessary casting or extending for \p Mask when lowering masking intrinsics
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ unsigned OpcodeSelect = ISD::VSELECT;
+ SDLoc dl(Op);
+
+ if (isAllOnesConstant(Mask))
+ return Op;
+
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+ case X86ISD::VFPCLASS:
+ case X86ISD::VFPCLASSS:
+ return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
+ case X86ISD::VTRUNC:
+ case X86ISD::VTRUNCS:
+ case X86ISD::VTRUNCUS:
+ // We can't use ISD::VSELECT here because it is not always "Legal"
+ // for the destination type. For example vpmovqb require only AVX512
+ // and vselect that can operate on byte element type require BWI
+ OpcodeSelect = X86ISD::SELECT;
+ break;
+ }
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
}
/// \brief Creates an SDNode for a predicated scalar operation.
/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
-/// The mask is comming as MVT::i8 and it should be truncated
+/// The mask is coming as MVT::i8 and it should be truncated
/// to MVT::i1 while lowering masking intrinsics.
/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
-/// "X86select" instead of "vselect". We just can't create the "vselect" node for
-/// a scalar instruction.
+/// "X86select" instead of "vselect". We just can't create the "vselect" node
+/// for a scalar instruction.
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (isAllOnes(Mask))
- return Op;
+ if (isAllOnesConstant(Mask))
+ return Op;
- EVT VT = Op.getValueType();
- SDLoc dl(Op);
- // The mask should be of type MVT::i1
- SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
+ MVT VT = Op.getSimpleValueType();
+ SDLoc dl(Op);
+ // The mask should be of type MVT::i1
+ SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
- if (PreservedSrc.getOpcode() == ISD::UNDEF)
- PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
+ if (Op.getOpcode() == X86ISD::FSETCC)
+ return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
+ if (Op.getOpcode() == X86ISD::VFPCLASS ||
+ Op.getOpcode() == X86ISD::VFPCLASSS)
+ return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
+
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
static int getSEHRegistrationNodeSize(const Function *Fn) {
@@ -15309,15 +16357,16 @@ static int getSEHRegistrationNodeSize(const Function *Fn) {
case EHPersonality::MSVC_CXX: return 16;
default: break;
}
- report_fatal_error("can only recover FP for MSVC EH personality functions");
+ report_fatal_error(
+ "can only recover FP for 32-bit MSVC EH personality functions");
}
-/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
+/// When the MSVC runtime transfers control to us, either to an outlined
/// function or when returning to a parent frame after catching an exception, we
/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
/// Here's the math:
/// RegNodeBase = EntryEBP - RegNodeSize
-/// ParentFP = RegNodeBase - RegNodeFrameOffset
+/// ParentFP = RegNodeBase - ParentFrameOffset
/// Subtracting RegNodeSize takes us to the offset of the registration node, and
/// subtracting the offset (negative on x86) takes us back to the parent FP.
static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
@@ -15334,29 +16383,35 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
if (!Fn->hasPersonalityFn())
return EntryEBP;
- int RegNodeSize = getSEHRegistrationNodeSize(Fn);
-
// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
- // registration.
+ // registration, or the .set_setframe offset.
MCSymbol *OffsetSym =
MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
GlobalValue::getRealLinkageName(Fn->getName()));
SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
- SDValue RegNodeFrameOffset =
+ SDValue ParentFrameOffset =
DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
+ // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
+ // prologue to RBP in the parent function.
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+ if (Subtarget.is64Bit())
+ return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
+
+ int RegNodeSize = getSEHRegistrationNodeSize(Fn);
// RegNodeBase = EntryEBP - RegNodeSize
- // ParentFP = RegNodeBase - RegNodeFrameOffset
+ // ParentFP = RegNodeBase - ParentFrameOffset
SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
DAG.getConstant(RegNodeSize, dl, PtrVT));
- return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset);
+ return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
}
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
if (IntrData) {
switch(IntrData->Type) {
@@ -15365,6 +16420,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case INTR_TYPE_2OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
+ case INTR_TYPE_2OP_IMM8:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2)));
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
@@ -15376,28 +16434,53 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue RoundingMode;
+ // We allways add rounding mode to the Node.
+ // If the rounding mode is not specified, we add the
+ // "current direction" mode.
if (Op.getNumOperands() == 4)
- RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ RoundingMode =
+ DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
else
RoundingMode = Op.getOperand(4);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
- if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+ if (IntrWithRoundingModeOpcode != 0)
+ if (cast<ConstantSDNode>(RoundingMode)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(), Src, RoundingMode),
Mask, PassThru, Subtarget, DAG);
- }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK: {
SDValue Src = Op.getOperand(1);
- SDValue Passthru = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
+ // We add rounding mode to the Node when
+ // - RM Opcode is specified and
+ // - RM is not "current direction".
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(4);
+ unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(),
+ Src, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
- Mask, Passthru, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_SCALAR_MASK: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue passThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
+ Mask, passThru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
@@ -15405,7 +16488,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Src0 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
// There are 2 kinds of intrinsics in this group:
- // (1) With supress-all-exceptions (sae) or rounding mode- 6 operands
+ // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
// (2) With rounding mode and sae - 7 operands.
if (Op.getNumOperands() == 6) {
SDValue Sae = Op.getOperand(5);
@@ -15421,11 +16504,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
RoundingMode, Sae),
Mask, Src0, Subtarget, DAG);
}
- case INTR_TYPE_2OP_MASK: {
+ case INTR_TYPE_2OP_MASK:
+ case INTR_TYPE_2OP_IMM8_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+
+ if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
+ Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15440,8 +16528,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask, PassThru, Subtarget, DAG);
}
}
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1,Src2),
+ // TODO: Intrinsics should have fast-math-flags to propagate.
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK_RM: {
@@ -15449,7 +16537,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- // We specify 2 possible modes for intrinsics, with/without rounding modes.
+ // We specify 2 possible modes for intrinsics, with/without rounding
+ // modes.
// First, we check if the intrinsic have rounding mode (6 operands),
// if not, we set rounding mode to "current".
SDValue Rnd;
@@ -15461,12 +16550,56 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
- case INTR_TYPE_3OP_MASK: {
+ case INTR_TYPE_3OP_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
+ SDValue Sae = Op.getOperand(6);
+
+ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
+ Src2, Src3, Sae),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_3OP_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Imm = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ // We specify 2 possible modes for intrinsics, with/without rounding
+ // modes.
+ // First, we check if the intrinsic have rounding mode (7 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 7)
+ Rnd = Op.getOperand(6);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Imm, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_3OP_IMM8_MASK:
+ case INTR_TYPE_3OP_MASK:
+ case INSERT_SUBVEC: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+
+ if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
+ Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
+ else if (IntrData->Type == INSERT_SUBVEC) {
+ // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
+ assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
+ unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
+ Imm *= Src2.getSimpleValueType().getVectorNumElements();
+ Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
+ }
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15486,7 +16619,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask, PassThru, Subtarget, DAG);
}
case VPERM_3OP_MASKZ:
- case VPERM_3OP_MASK:
+ case VPERM_3OP_MASK:{
+ // Src2 is the PassThru
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ MVT VT = Op.getSimpleValueType();
+ SDValue PassThru = SDValue();
+
+ // set PassThru element
+ if (IntrData->Type == VPERM_3OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+ else
+ PassThru = DAG.getBitcast(VT, Src2);
+
+ // Swap Src1 and Src2 in the node creation
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+ dl, Op.getValueType(),
+ Src2, Src1, Src3),
+ Mask, PassThru, Subtarget, DAG);
+ }
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
@@ -15494,11 +16647,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue PassThru = SDValue();
// set PassThru element
- if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+ if (IntrData->Type == FMA_OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else if (IntrData->Type == FMA_OP_MASK3)
PassThru = Src3;
@@ -15523,6 +16676,50 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case TERLOG_OP_MASK:
+ case TERLOG_OP_MASKZ: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
+ SDValue Mask = Op.getOperand(5);
+ MVT VT = Op.getSimpleValueType();
+ SDValue PassThru = Src1;
+ // Set PassThru element.
+ if (IntrData->Type == TERLOG_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Src3, Src4),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case FPCLASS: {
+ // FPclass intrinsics with mask
+ SDValue Src1 = Op.getOperand(1);
+ MVT VT = Src1.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue Imm = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
+ SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
+ SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask,
+ DAG.getTargetConstant(0, dl, MaskVT),
+ Subtarget, DAG);
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+ DAG.getUNDEF(BitcastVT), FPclassMask,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getBitcast(Op.getValueType(), Res);
+ }
+ case FPCLASSS: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Imm = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
+ SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
+ DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask);
+ }
case CMP_MASK:
case CMP_MASK_CC: {
// Comparison intrinsics with masks.
@@ -15534,12 +16731,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
// (v2i1 (and (PCMPEQM %a, %b),
// (extract_subvector
// (v8i1 (bitcast %mask)), 0))), 0))))
- EVT VT = Op.getOperand(1).getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
+ MVT VT = Op.getOperand(1).getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
SDValue CC = Op.getOperand(3);
@@ -15573,6 +16769,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(Op.getValueType(), Res);
}
+ case CMP_MASK_SCALAR_CC: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
+ SDValue Mask = Op.getOperand(4);
+
+ SDValue Cmp;
+ if (IntrData->Opc1 != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd);
+ }
+ //default rounding mode
+ if(!Cmp.getNode())
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC);
+
+ SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
+ DAG.getTargetConstant(0, dl,
+ MVT::i1),
+ Subtarget, DAG);
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8,
+ DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask),
+ DAG.getValueType(MVT::i1));
+ }
case COMI: { // Comparison intrinsics
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
@@ -15584,6 +16806,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
DAG.getConstant(X86CC, dl, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case COMI_RM: { // Comparison intrinsics with Sae
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ SDValue CC = Op.getOperand(3);
+ SDValue Sae = Op.getOperand(4);
+ auto ComiType = TranslateX86ConstCondToX86CC(CC);
+ // choose between ordered and unordered (comi/ucomi)
+ unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
+ SDValue Cond;
+ if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+ else
+ Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
@@ -15598,27 +16838,75 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
- if (isAllOnes(Mask)) // return data as is
+ if (isAllOnesConstant(Mask)) // return data as is
return Op.getOperand(1);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
DataToCompress),
Mask, PassThru, Subtarget, DAG);
}
+ case BROADCASTM: {
+ SDValue Mask = Op.getOperand(1);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
+ Mask = DAG.getBitcast(MaskVT, Mask);
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
+ }
case BLEND: {
SDValue Mask = Op.getOperand(3);
- EVT VT = Op.getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDLoc dl(Op);
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
}
+ case KUNPCK: {
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);
+
+ SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
+ SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
+ // Arguments should be swapped.
+ SDValue Res = DAG.getNode(IntrData->Opc0, dl,
+ MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
+ Src2, Src1);
+ return DAG.getBitcast(VT, Res);
+ }
+ case CONVERT_TO_MASK: {
+ MVT SrcVT = Op.getOperand(1).getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
+
+ SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT,
+ Op.getOperand(1));
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+ DAG.getUNDEF(BitcastVT), CvtMask,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getBitcast(Op.getValueType(), Res);
+ }
+ case CONVERT_MASK_TO_VEC: {
+ SDValue Mask = Op.getOperand(1);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc0, dl, VT, VMask);
+ }
+ case BRCST_SUBVEC_TO_VEC: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ EVT resVT = Passthru.getValueType();
+ SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT,
+ DAG.getUNDEF(resVT), Src,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue immVal;
+ if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector())
+ immVal = DAG.getConstant(0x44, dl, MVT::i8);
+ else
+ immVal = DAG.getConstant(0, dl, MVT::i8);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ subVec, subVec, immVal),
+ Mask, Passthru, Subtarget, DAG);
+ }
default:
break;
}
@@ -15832,23 +17120,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- if (!C)
- llvm_unreachable("Invalid scale type");
- unsigned ScaleVal = C->getZExtValue();
- if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
- llvm_unreachable("Valid scale values are 1, 2, 4, 8");
-
+ auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else {
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
@@ -15860,7 +17142,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
if (Src.getOpcode() == ISD::UNDEF)
- Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
@@ -15871,25 +17153,19 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- if (!C)
- llvm_unreachable("Invalid scale type");
- unsigned ScaleVal = C->getZExtValue();
- if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
- llvm_unreachable("Valid scale values are 1, 2, 4, 8");
-
+ auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else {
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
@@ -15907,12 +17183,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Mask, SDValue Base, SDValue Index,
SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
+ auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT =
+ MVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
@@ -16034,64 +17309,59 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
-static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
- const Function *Fn = MF.getFunction();
- SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
+ SDValue RegNode = Op.getOperand(2);
+ WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
+ if (!EHInfo)
+ report_fatal_error("EH registrations only live in functions using WinEH");
+
+ // Cast the operand to an alloca, and remember the frame index.
+ auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
+ if (!FINode)
+ report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca");
+ EHInfo->EHRegNodeFrameIndex = FINode->getIndex();
+
+ // Return the chain operand without making any DAG nodes.
+ return Chain;
+}
- assert(Subtarget->getFrameLowering()->hasFP(MF) &&
- "using llvm.x86.seh.restoreframe requires a frame pointer");
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT VT = TLI.getPointerTy(DAG.getDataLayout());
-
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- unsigned FrameReg =
- RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
- unsigned SPReg = RegInfo->getStackRegister();
- unsigned SlotSize = RegInfo->getSlotSize();
+/// \brief Lower intrinsics for TRUNCATE_TO_MEM case
+/// return truncate Store/MaskedStore Node
+static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op,
+ SelectionDAG &DAG,
+ MVT ElementType) {
+ SDLoc dl(Op);
+ SDValue Mask = Op.getOperand(4);
+ SDValue DataToTruncate = Op.getOperand(3);
+ SDValue Addr = Op.getOperand(2);
+ SDValue Chain = Op.getOperand(0);
- // Get incoming EBP.
- SDValue IncomingEBP =
- DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+ MVT VT = DataToTruncate.getSimpleValueType();
+ MVT SVT = MVT::getVectorVT(ElementType, VT.getVectorNumElements());
- // SP is saved in the first field of every registration node, so load
- // [EBP-RegNodeSize] into SP.
- int RegNodeSize = getSEHRegistrationNodeSize(Fn);
- SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP,
- DAG.getConstant(-RegNodeSize, dl, VT));
- SDValue NewSP =
- DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
- false, VT.getScalarSizeInBits() / 8);
- Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
-
- if (!RegInfo->needsStackRealignment(MF)) {
- // Adjust EBP to point back to the original frame position.
- SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP);
- Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
- } else {
- assert(RegInfo->hasBasePointer(MF) &&
- "functions with Win32 EH must use frame or base pointer register");
+ if (isAllOnesConstant(Mask)) // return just a truncate store
+ return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr,
+ MachinePointerInfo(), SVT, false, false,
+ SVT.getScalarSizeInBits()/8);
- // Reload the base pointer (ESI) with the adjusted incoming EBP.
- SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP);
- Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
- // Reload the spilled EBP value, now that the stack and base pointers are
- // set up.
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- X86FI->setHasSEHFramePtrSave(true);
- int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize);
- X86FI->setSEHFramePtrSaveIndex(FI);
- SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT),
- MachinePointerInfo(), false, false, false,
- VT.getScalarSizeInBits() / 8);
- Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP);
- }
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, SVT.getStoreSize(),
+ SVT.getScalarSizeInBits()/8);
- return Chain;
+ return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr,
+ VMask, SVT, MMO, true);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
@@ -16100,16 +17370,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
- if (IntNo == llvm::Intrinsic::x86_seh_restoreframe)
- return LowerSEHRESTOREFRAME(Op, Subtarget, DAG);
+ if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
+ return MarkEHRegistrationNode(Op, DAG);
return SDValue();
}
SDLoc dl(Op);
switch(IntrData->Type) {
- default:
- llvm_unreachable("Unknown Intrinsic Type");
- break;
+ default: llvm_unreachable("Unknown Intrinsic Type");
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
@@ -16214,8 +17482,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
- EVT VT = DataToCompress.getValueType();
- if (isAllOnes(Mask)) // return just a store
+ MVT VT = DataToCompress.getSimpleValueType();
+ if (isAllOnesConstant(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
@@ -16227,15 +17495,21 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
}
+ case TRUNCATE_TO_MEM_VI8:
+ return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8);
+ case TRUNCATE_TO_MEM_VI16:
+ return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16);
+ case TRUNCATE_TO_MEM_VI32:
+ return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
- if (isAllOnes(Mask)) // return just a load
+ if (isAllOnesConstant(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits()/8);
@@ -16359,6 +17633,21 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
}
+unsigned X86TargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
+ return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+
+ return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
+}
+
+unsigned X86TargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ // Funclet personalities don't use selectors (the runtime does the selection).
+ assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
+ return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+}
+
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
@@ -16497,9 +17786,11 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.hasAttribute(Idx, Attribute::InReg))
+ if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
+ auto &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
- InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+ InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
+ }
if (InRegCount > 2) {
report_fatal_error("Nest register in use - reduce number of inreg"
@@ -16588,8 +17879,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOStore, 2, 2);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+ MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
@@ -16623,12 +17914,75 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
-static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
+//
+// 1. i32/i64 128/256-bit vector (native support require VLX) are expended
+// to 512-bit vector.
+// 2. i8/i16 vector implemented using dword LZCNT vector instruction
+// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
+// split the vector, perform operation on it's Lo a Hi part and
+// concatenate the results.
+static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- EVT OpVT = VT;
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (EltVT == MVT::i64 || EltVT == MVT::i32) {
+ // Extend to 512 bit vector.
+ assert((VT.is256BitVector() || VT.is128BitVector()) &&
+ "Unsupported value type for operation");
+
+ MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits());
+ SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
+ DAG.getUNDEF(NewVT),
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ "Unsupported element type");
+
+ if (16 < NumElems) {
+ // Split vector, it's Lo and Hi parts will be handled in next iteration.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
+ MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ Lo = DAG.getNode(Op.getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(Op.getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ }
+
+ MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
+
+ assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
+ "Unsupported value type for operation");
+
+ // Use native supported vector instruction vplzcntd.
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
+ SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
+ SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
+ SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);
+
+ return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
+}
+
+static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
+ if (VT.isVector() && Subtarget->hasAVX512())
+ return LowerVectorCTLZ_AVX512(Op, DAG);
+
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
@@ -16658,7 +18012,8 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
@@ -16686,13 +18041,39 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- unsigned NumBits = VT.getSizeInBits();
+ unsigned NumBits = VT.getScalarSizeInBits();
SDLoc dl(Op);
- Op = Op.getOperand(0);
+
+ if (VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ // lsb(x) = (x & -x)
+ SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
+
+ // cttz_undef(x) = (width - 1) - ctlz(lsb)
+ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+ TLI.isOperationLegal(ISD::CTLZ, VT)) {
+ SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
+ return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
+ DAG.getNode(ISD::CTLZ, dl, VT, LSB));
+ }
+
+ // cttz(x) = ctpop(lsb - 1)
+ SDValue One = DAG.getConstant(1, dl, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT,
+ DAG.getNode(ISD::SUB, dl, VT, LSB, One));
+ }
+
+ assert(Op.getOpcode() == ISD::CTTZ &&
+ "Only scalar CTTZ requires custom lowering");
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
@@ -16753,6 +18134,13 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
return Lower256IntArith(Op, DAG);
}
+static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -16885,7 +18273,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SDValue AhiBlo = Ahi;
SDValue AloBhi = Bhi;
// Bit cast to 32-bit vectors for MULUDQ
- EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+ MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getBitcast(MulVT, A);
B = DAG.getBitcast(MulVT, B);
@@ -16962,7 +18350,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- EVT VT = Op0.getValueType();
+ MVT VT = Op0.getSimpleValueType();
SDLoc dl(Op);
assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
@@ -17034,7 +18422,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Ops, dl);
}
-// Return true if the requred (according to Opcode) shift-imm form is natively
+// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
@@ -17054,14 +18442,14 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
}
// The shift amount is a variable, but it is the same for all vector lanes.
-// These instrcutions are defined together with shift-immediate.
+// These instructions are defined together with shift-immediate.
static
bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
-// Return true if the requred (according to Opcode) variable-shift form is
+// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
@@ -17133,27 +18521,37 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// i64 SRA needs to be performed as partial shifts.
if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
- Op.getOpcode() == ISD::SRA)
+ Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP())
return ArithmeticShiftRight64(ShiftAmt);
- if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
+ if (VT == MVT::v16i8 ||
+ (Subtarget->hasInt256() && VT == MVT::v32i8) ||
+ VT == MVT::v64i8) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- if (Op.getOpcode() == ISD::SHL) {
- // Simple i8 add case
- if (ShiftAmt == 1)
- return DAG.getNode(ISD::ADD, dl, VT, R, R);
+ // Simple i8 add case
+ if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+ return DAG.getNode(ISD::ADD, dl, VT, R, R);
+
+ // ashr(R, 7) === cmp_slt(R, 0)
+ if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
+
+ // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
+ if (VT == MVT::v16i8 && Subtarget->hasXOP())
+ return SDValue();
+ if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
R, ShiftAmt, DAG);
SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
- SmallVector<SDValue, 32> V(
- NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+ DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
@@ -17161,24 +18559,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
R, ShiftAmt, DAG);
SRL = DAG.getBitcast(VT, SRL);
// Zero out the leftmost bits.
- SmallVector<SDValue, 32> V(
- NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
}
if (Op.getOpcode() == ISD::SRA) {
- if (ShiftAmt == 7) {
- // R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
- }
-
- // R s>> a === ((R u>> a) ^ m) - m
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
- SmallVector<SDValue, 32> V(NumElts,
- DAG.getConstant(128 >> ShiftAmt, dl,
- MVT::i8));
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
+
+ SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
@@ -17189,35 +18577,51 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
- if (!Subtarget->is64Bit() &&
- (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
- Amt.getOpcode() == ISD::BITCAST &&
- Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ if (!Subtarget->is64Bit() && !Subtarget->hasXOP() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) {
+
+ // Peek through any splat that was introduced for i64 shift vectorization.
+ int SplatIndex = -1;
+ if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
+ if (SVN->isSplat()) {
+ SplatIndex = SVN->getSplatIndex();
+ Amt = Amt.getOperand(0);
+ assert(SplatIndex < (int)VT.getVectorNumElements() &&
+ "Splat shuffle referencing second operand");
+ }
+
+ if (Amt.getOpcode() != ISD::BITCAST ||
+ Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
uint64_t ShiftAmt = 0;
+ unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
for (unsigned i = 0; i != Ratio; ++i) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
if (!C)
return SDValue();
// 6 == Log2(64)
ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
}
- // Check remaining shift amounts.
- for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
- uint64_t ShAmt = 0;
- for (unsigned j = 0; j != Ratio; ++j) {
- ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
- if (!C)
+
+ // Check remaining shift amounts (if not a splat).
+ if (SplatIndex < 0) {
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (!C)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
return SDValue();
- // 6 == Log2(64)
- ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
}
- if (ShAmt != ShiftAmt)
- return SDValue();
}
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
@@ -17245,7 +18649,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) {
SDValue BaseShAmt;
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
// Check if this build_vector node is doing a splat.
@@ -17262,7 +18666,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
SDValue InVec = Amt.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- assert((SplatIdx < InVec.getValueType().getVectorNumElements()) &&
+ assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) &&
"Unexpected shuffle index found!");
BaseShAmt = InVec.getOperand(SplatIdx);
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
@@ -17327,11 +18731,26 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return V;
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
- return V;
+ return V;
if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
return Op;
+ // XOP has 128-bit variable logical/arithmetic shifts.
+ // +ve/-ve Amt = shift left/right.
+ if (Subtarget->hasXOP() &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v8i16 || VT == MVT::v16i8)) {
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
+ SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
+ Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
+ }
+ if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
+ if (Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
+ }
+
// 2i64 vector logical shifts can efficiently avoid scalarization - do the
// shifts per-lane and then shuffle the partial results back together.
if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
@@ -17343,6 +18762,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
+ // i64 vector arithmetic shift can be emulated with the transform:
+ // M = lshr(SIGN_BIT, Amt)
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
+ if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) &&
+ Op.getOpcode() == ISD::SRA) {
+ SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
+ SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
+ R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ R = DAG.getNode(ISD::XOR, dl, VT, R, M);
+ R = DAG.getNode(ISD::SUB, dl, VT, R, M);
+ return R;
+ }
+
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
@@ -17351,9 +18783,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
(Subtarget->hasInt256() && VT == MVT::v16i16)) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
SmallVector<SDValue, 8> Elts;
- EVT SVT = VT.getScalarType();
+ MVT SVT = VT.getVectorElementType();
unsigned SVTBits = SVT.getSizeInBits();
- const APInt &One = APInt(SVTBits, 1);
+ APInt One(SVTBits, 1);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i=0; i !=NumElems; ++i) {
@@ -17364,7 +18796,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
}
ConstantSDNode *ND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
+ APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits) {
Elts.push_back(DAG.getUNDEF(SVT));
@@ -17443,7 +18875,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
// Replace this node with two shifts followed by a MOVSS/MOVSD.
- EVT CastVT = MVT::v4i32;
+ MVT CastVT = MVT::v4i32;
SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
@@ -17507,7 +18939,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
}
- if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
+ if (VT == MVT::v16i8 ||
+ (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
@@ -17627,7 +19060,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
- if (Subtarget->hasInt256() && VT == MVT::v16i16) {
+ if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
@@ -17710,7 +19143,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Extract the two vectors
SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
@@ -17743,6 +19176,40 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return SDValue();
}
+static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ assert(VT.isVector() && "Custom lowering only for vector rotates!");
+ assert(Subtarget->hasXOP() && "XOP support required for vector rotates!");
+ assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
+
+ // XOP has 128-bit vector variable + immediate rotates.
+ // +ve/-ve Amt = rotate left/right.
+
+ // Split 256-bit integers.
+ if (VT.is256BitVector())
+ return Lower256IntArith(Op, DAG);
+
+ assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
+
+ // Attempt to rotate by immediate.
+ if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
+ if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
+ uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
+ assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range");
+ return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
+ }
+ }
+
+ // Use general rotate by variable (per-element).
+ return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt);
+}
+
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
@@ -17759,8 +19226,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
@@ -17775,8 +19241,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
@@ -17827,7 +19292,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
-bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
+bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
@@ -17844,21 +19309,23 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Note: this turns large loads into lock cmpxchg8b/16b.
// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
-bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
- return needsCmpXchgNb(PTy->getElementType());
+ return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
- const Type *MemType = AI->getType();
+ Type *MemType = AI->getType();
// If the operand is too big, we must see if cmpxchg8/16b is available
// and default to library calls otherwise.
if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
- return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg
- : AtomicRMWExpansionKind::None;
+ return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
AtomicRMWInst::BinOp Op = AI->getOperation();
@@ -17869,14 +19336,14 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.
- return AtomicRMWExpansionKind::None;
+ return AtomicExpansionKind::None;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
- return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg
- : AtomicRMWExpansionKind::None;
+ return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
@@ -17884,7 +19351,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::UMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
- return AtomicRMWExpansionKind::CmpXChg;
+ return AtomicExpansionKind::CmpXChg;
}
}
@@ -17898,7 +19365,7 @@ static bool hasMFENCE(const X86Subtarget& Subtarget) {
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
- const Type *MemType = AI->getType();
+ Type *MemType = AI->getType();
// Accesses larger than the native width are turned into cmpxchg/libcalls, so
// there is no benefit in turning such RMWs into loads, and it is actually
// harmful as it introduces a mfence.
@@ -17926,7 +19393,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// lowered to just a load without a fence. A mfence flushes the store buffer,
// making the optimization clearly correct.
// FIXME: it is required if isAtLeastRelease(Order) but it is not clear
- // otherwise, we might be able to be more agressive on relaxed idempotent
+ // otherwise, we might be able to be more aggressive on relaxed idempotent
// rmw. In practice, they do not look useful, so we don't try to be
// especially clever.
if (SynchScope == SingleThread)
@@ -18043,7 +19510,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
SDValue InVec = Op->getOperand(0);
SDLoc dl(Op);
unsigned NumElts = SrcVT.getVectorNumElements();
- EVT SVT = SrcVT.getVectorElementType();
+ MVT SVT = SrcVT.getVectorElementType();
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
@@ -18103,7 +19570,8 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// chunks, thus directly computes the pop count for v2i64 and v4i64.
if (EltVT == MVT::i64) {
SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
- V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros);
+ MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
+ V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
return DAG.getBitcast(VT, V);
}
@@ -18119,9 +19587,10 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// Do the horizontal sums into two v2i64s.
Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
- Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
+ Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, Low), Zeros);
- High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, High), Zeros);
// Merge them together.
@@ -18311,7 +19780,7 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- assert(Op.getValueType().isVector() &&
+ assert(Op.getSimpleValueType().isVector() &&
"We only do custom lowering for vector population count.");
return LowerVectorCTPOP(Op, Subtarget, DAG);
}
@@ -18357,7 +19826,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getNode()->getSimpleValueType(0);
+ MVT VT = Op.getNode()->getSimpleValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -18435,31 +19904,203 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
}
+/// Widen a vector input to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
+ bool FillWithZeroes = false) {
+ // Check if InOp already has the right width.
+ MVT InVT = InOp.getSimpleValueType();
+ if (InVT == NVT)
+ return InOp;
+
+ if (InOp.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
+ "Unexpected request for vector widening");
+
+ EVT EltVT = NVT.getVectorElementType();
+
+ SDLoc dl(InOp);
+ if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
+ InOp.getNumOperands() == 2) {
+ SDValue N1 = InOp.getOperand(1);
+ if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) ||
+ N1.isUndef()) {
+ InOp = InOp.getOperand(0);
+ InVT = InOp.getSimpleValueType();
+ InNumElts = InVT.getVectorNumElements();
+ }
+ }
+ if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i < InNumElts; ++i)
+ Ops.push_back(InOp.getOperand(i));
+
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
+ for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
+ Ops.push_back(FillVal);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+ }
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
+ DAG.getUNDEF(NVT);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
+ InOp, DAG.getIntPtrConstant(0, dl));
+}
+
static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Subtarget->hasAVX512() &&
"MGATHER/MSCATTER are supported on AVX-512 arch only");
+ // X86 scatter kills mask register, so its type should be added to
+ // the list of return values.
+ // If the "scatter" has 2 return values, it is already handled.
+ if (Op.getNode()->getNumValues() == 2)
+ return Op;
+
MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
- EVT VT = N->getValue().getValueType();
+ SDValue Src = N->getValue();
+ MVT VT = Src.getSimpleValueType();
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
SDLoc dl(Op);
- // X86 scatter kills mask register, so its type should be added to
- // the list of return values
- if (N->getNumValues() == 1) {
- SDValue Index = N->getIndex();
- if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
- !Index.getValueType().is512BitVector())
+ SDValue NewScatter;
+ SDValue Index = N->getIndex();
+ SDValue Mask = N->getMask();
+ SDValue Chain = N->getChain();
+ SDValue BasePtr = N->getBasePtr();
+ MVT MemVT = N->getMemoryVT().getSimpleVT();
+ MVT IndexVT = Index.getSimpleValueType();
+ MVT MaskVT = Mask.getSimpleValueType();
+
+ if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) {
+ // The v2i32 value was promoted to v2i64.
+ // Now we "redo" the type legalizer's work and widen the original
+ // v2i32 value to v4i32. The original v2i32 is retrieved from v2i64
+ // with a shuffle.
+ assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) &&
+ "Unexpected memory type");
+ int ShuffleMask[] = {0, 2, -1, -1};
+ Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src),
+ DAG.getUNDEF(MVT::v4i32), ShuffleMask);
+ // Now we have 4 elements instead of 2.
+ // Expand the index.
+ MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4);
+ Index = ExtendToType(Index, NewIndexVT, DAG);
+
+ // Expand the mask with zeroes
+ // Mask may be <2 x i64> or <2 x i1> at this moment
+ assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) &&
+ "Unexpected mask type");
+ MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4);
+ Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
+ VT = MVT::v4i32;
+ }
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+ !Index.getSimpleValueType().is512BitVector()) {
+ // AVX512F supports only 512-bit vectors. Or data or index should
+ // be 512 bit wide. If now the both index and data are 256-bit, but
+ // the vector contains 8 elements, we just sign-extend the index
+ if (IndexVT == MVT::v8i32)
+ // Just extend index
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+ else {
+ // The minimal number of elts in scatter is 8
+ NumElts = 8;
+ // Index
+ MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
+ // Use original index here, do not modify the index twice
+ Index = ExtendToType(N->getIndex(), NewIndexVT, DAG);
+ if (IndexVT.getScalarType() == MVT::i32)
+ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+
+ // Mask
+ // At this point we have promoted mask operand
+ assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
+ MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
+ // Use the original mask here, do not modify the mask twice
+ Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
+
+ // The value that should be stored
+ MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
+ Src = ExtendToType(Src, NewVT, DAG);
+ }
+ }
+ // If the mask is "wide" at this point - truncate it to i1 vector
+ MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);
+
+ // The mask is killed by scatter, add it to the values
+ SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
+ SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
+ NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
+ return SDValue(NewScatter.getNode(), 0);
+}
+
+static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
- SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other);
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
- N->getOperand(3), Index };
+ MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
+ MVT VT = Op.getSimpleValueType();
+ SDValue Mask = N->getMask();
+ SDLoc dl(Op);
- SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand());
- DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
- return SDValue(NewScatter.getNode(), 0);
+ if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+ !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
+ // This operation is legal for targets with VLX, but without
+ // VLX the vector should be widened to 512 bit
+ unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+ MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
+ MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+ SDValue Src0 = N->getSrc0();
+ Src0 = ExtendToType(Src0, WideDataVT, DAG);
+ Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+ SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
+ N->getBasePtr(), Mask, Src0,
+ N->getMemoryVT(), N->getMemOperand(),
+ N->getExtensionType());
+
+ SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ NewLoad.getValue(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
+ return DAG.getMergeValues(RetOps, dl);
+ }
+ return Op;
+}
+
+static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
+ SDValue DataToStore = N->getValue();
+ MVT VT = DataToStore.getSimpleValueType();
+ SDValue Mask = N->getMask();
+ SDLoc dl(Op);
+
+ if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+ !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
+ // This operation is legal for targets with VLX, but without
+ // VLX the vector should be widened to 512 bit
+ unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+ MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
+ MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+ DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
+ Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+ return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
+ Mask, N->getMemoryVT(), N->getMemOperand(),
+ N->isTruncatingStore());
}
return Op;
}
@@ -18470,17 +20111,59 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
"MGATHER/MSCATTER are supported on AVX-512 arch only");
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
- EVT VT = Op.getValueType();
- assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
SDLoc dl(Op);
-
+ MVT VT = Op.getSimpleValueType();
SDValue Index = N->getIndex();
+ SDValue Mask = N->getMask();
+ SDValue Src0 = N->getValue();
+ MVT IndexVT = Index.getSimpleValueType();
+ MVT MaskVT = Mask.getSimpleValueType();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
+
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
- !Index.getValueType().is512BitVector()) {
- Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
- N->getOperand(3), Index };
- DAG.UpdateNodeOperands(N, Ops);
+ !Index.getSimpleValueType().is512BitVector()) {
+ // AVX512F supports only 512-bit vectors. Or data or index should
+ // be 512 bit wide. If now the both index and data are 256-bit, but
+ // the vector contains 8 elements, we just sign-extend the index
+ if (NumElts == 8) {
+ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), Index };
+ DAG.UpdateNodeOperands(N, Ops);
+ return Op;
+ }
+
+ // Minimal number of elements in Gather
+ NumElts = 8;
+ // Index
+ MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
+ Index = ExtendToType(Index, NewIndexVT, DAG);
+ if (IndexVT.getScalarType() == MVT::i32)
+ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+
+ // Mask
+ MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
+ // At this point we have promoted mask operand
+ assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
+ MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
+ Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
+ Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
+
+ // The pass-thru value
+ MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
+ Src0 = ExtendToType(Src0, NewVT, DAG);
+
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ NewGather.getValue(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue RetOps[] = {Exract, NewGather.getValue(1)};
+ return DAG.getMergeValues(RetOps, dl);
}
return Op;
}
@@ -18572,6 +20255,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::SETCCE: return LowerSETCCE(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@@ -18592,12 +20276,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
- case ISD::CTLZ: return LowerCTLZ(Op, DAG);
- case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
- case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, Subtarget, DAG);
+ case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, Subtarget, DAG);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::UMUL_LOHI:
case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
+ case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
@@ -18615,7 +20301,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN: return LowerMINMAX(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
+ case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
+ case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
case ISD::GC_TRANSITION_START:
@@ -18634,14 +20326,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case X86ISD::AVG: {
+ // Legalize types for X86ISD::AVG by expanding vectors.
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+
+ auto InVT = N->getValueType(0);
+ auto InVTSize = InVT.getSizeInBits();
+ const unsigned RegSize =
+ (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
+ assert((!Subtarget->hasAVX512() || RegSize < 512) &&
+ "512-bit vector requires AVX512");
+ assert((!Subtarget->hasAVX2() || RegSize < 256) &&
+ "256-bit vector requires AVX2");
+
+ auto ElemVT = InVT.getVectorElementType();
+ auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ RegSize / ElemVT.getSizeInBits());
+ assert(RegSize % InVT.getSizeInBits() == 0);
+ unsigned NumConcat = RegSize / InVT.getSizeInBits();
+
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ Ops[0] = N->getOperand(0);
+ SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ Ops[0] = N->getOperand(1);
+ SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+
+ SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
+ Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+ DAG.getIntPtrConstant(0, dl)));
+ return;
+ }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
case X86ISD::FMAXC:
case X86ISD::FMAX: {
EVT VT = N->getValueType(0);
- if (VT != MVT::v2f32)
- llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX.");
+ assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
SDValue UNDEF = DAG.getUNDEF(VT);
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(0), UNDEF);
@@ -18668,17 +20389,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
- // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert
- // (FP_TO_SINT (load f16)) to FP_TO_INT*.
- if (N->getOperand(0).getValueType() == MVT::f16)
- break;
- // fallthrough
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
- if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
- return;
-
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -18707,6 +20420,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
@@ -18740,6 +20454,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
}
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG))
+ Results.push_back(V);
+ return;
+ }
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
@@ -18748,7 +20467,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
- EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
+ MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(0, dl, HalfT));
@@ -18884,6 +20603,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::IRET: return "X86ISD::IRET";
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
@@ -18910,6 +20630,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::ABS: return "X86ISD::ABS";
+ case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
@@ -18937,12 +20658,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
- case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
+ case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
+ case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
+ case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -18978,6 +20701,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
@@ -19000,6 +20724,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
@@ -19009,11 +20734,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
+ case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::PSADBW: return "X86ISD::PSADBW";
+ case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -19022,10 +20749,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SFENCE: return "X86ISD::SFENCE";
case X86ISD::LFENCE: return "X86ISD::LFENCE";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
- case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
+ case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
+ case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
+ case X86ISD::VPROT: return "X86ISD::VPROT";
+ case X86ISD::VPROTI: return "X86ISD::VPROTI";
+ case X86ISD::VPSHA: return "X86ISD::VPSHA";
+ case X86ISD::VPSHL: return "X86ISD::VPSHL";
+ case X86ISD::VPCOM: return "X86ISD::VPCOM";
+ case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
@@ -19038,7 +20772,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
- case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE";
+ case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
+ case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
+ case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
@@ -19064,6 +20800,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND";
case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND";
+ case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
+ case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS";
}
return nullptr;
}
@@ -19218,7 +20956,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
+ if (!Subtarget->hasAnyFMA())
return false;
VT = VT.getScalarType();
@@ -19253,11 +20991,11 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
return false;
// Not for i1 vectors
- if (VT.getScalarType() == MVT::i1)
+ if (VT.getSimpleVT().getScalarType() == MVT::i1)
return false;
// Very little shuffling can be done for 64-bit vectors right now.
- if (VT.getSizeInBits() == 64)
+ if (VT.getSimpleVT().getSizeInBits() == 64)
return false;
// We only care that the types being shuffled are legal. The lowering can
@@ -19282,8 +21020,7 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
+ MachineFunction::iterator I = ++MBB->getIterator();
// For the v = xbegin(), we generate
//
@@ -19531,8 +21268,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
// Insert the new basic blocks
MF->insert(MBBIter, offsetMBB);
@@ -19702,8 +21438,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
// stores were performed.
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *F = MBB->getParent();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, XMMSaveMBB);
@@ -19727,7 +21462,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
- if (!Subtarget->isTargetWin64()) {
+ if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
@@ -19744,9 +21479,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
- MachineMemOperand *MMO =
- F->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand *MMO = F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
@@ -19800,6 +21534,39 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
return true;
}
+// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
+// together with other CMOV pseudo-opcodes into a single basic-block with
+// conditional jump around it.
+static bool isCMOVPseudo(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_GR8:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V4F64:
+ case X86::CMOV_V4I64:
+ case X86::CMOV_V16F32:
+ case X86::CMOV_V8F32:
+ case X86::CMOV_V8F64:
+ case X86::CMOV_V8I64:
+ case X86::CMOV_V8I1:
+ case X86::CMOV_V16I1:
+ case X86::CMOV_V32I1:
+ case X86::CMOV_V64I1:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -19811,8 +21578,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -19823,8 +21589,41 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
- // We also lower double CMOVs:
+ // This code lowers all pseudo-CMOV instructions. Generally it lowers these
+ // as described above, by inserting a BB, and then making a PHI at the join
+ // point to select the true and false operands of the CMOV in the PHI.
+ //
+ // The code also handles two different cases of multiple CMOV opcodes
+ // in a row.
+ //
+ // Case 1:
+ // In this case, there are multiple CMOVs in a row, all which are based on
+ // the same condition setting (or the exact opposite condition setting).
+ // In this case we can lower all the CMOVs using a single inserted BB, and
+ // then make a number of PHIs at the join point to model the CMOVs. The only
+ // trickiness here, is that in a case like:
+ //
+ // t2 = CMOV cond1 t1, f1
+ // t3 = CMOV cond1 t2, f2
+ //
+ // when rewriting this into PHIs, we have to perform some renaming on the
+ // temps since you cannot have a PHI operand refer to a PHI result earlier
+ // in the same block. The "simple" but wrong lowering would be:
+ //
+ // t2 = PHI t1(BB1), f1(BB2)
+ // t3 = PHI t2(BB1), f2(BB2)
+ //
+ // but clearly t2 is not defined in BB1, so that is incorrect. The proper
+ // renaming is to note that on the path through BB1, t2 is really just a
+ // copy of t1, and do that renaming, properly generating:
+ //
+ // t2 = PHI t1(BB1), f1(BB2)
+ // t3 = PHI t1(BB1), f2(BB2)
+ //
+ // Case 2, we lower cascaded CMOVs such as
+ //
// (CMOV (CMOV F, T, cc1), T, cc2)
+ //
// to two successives branches. For that, we look for another CMOV as the
// following instruction.
//
@@ -19890,19 +21689,42 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// .LBB5_4:
// retq
//
- MachineInstr *NextCMOV = nullptr;
+ MachineInstr *CascadedCMOV = nullptr;
+ MachineInstr *LastCMOV = MI;
+ X86::CondCode CC = X86::CondCode(MI->getOperand(3).getImm());
+ X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
- if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+
+ // Check for case 1, where there are multiple CMOVs with the same condition
+ // first. Of the two cases of multiple CMOV lowerings, case 1 reduces the
+ // number of jumps the most.
+
+ if (isCMOVPseudo(MI)) {
+ // See if we have a string of CMOVS with the same condition.
+ while (NextMIIt != BB->end() &&
+ isCMOVPseudo(NextMIIt) &&
+ (NextMIIt->getOperand(3).getImm() == CC ||
+ NextMIIt->getOperand(3).getImm() == OppCC)) {
+ LastCMOV = &*NextMIIt;
+ ++NextMIIt;
+ }
+ }
+
+ // This checks for case 2, but only do this if we didn't already find
+ // case 1, as indicated by LastCMOV == MI.
+ if (LastCMOV == MI &&
+ NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
- NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
- NextCMOV = &*NextMIIt;
+ NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) {
+ CascadedCMOV = &*NextMIIt;
+ }
MachineBasicBlock *jcc1MBB = nullptr;
- // If we have a double CMOV, we lower it to two successive branches to
+ // If we have a cascaded CMOV, we lower it to two successive branches to
// the same block. EFLAGS is used by both, so mark it as live in the second.
- if (NextCMOV) {
+ if (CascadedCMOV) {
jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, jcc1MBB);
jcc1MBB->addLiveIn(X86::EFLAGS);
@@ -19917,7 +21739,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+ MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
@@ -19926,12 +21748,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
- if (NextCMOV) {
- // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+ if (CascadedCMOV) {
+ // The fallthrough block may be jcc1MBB, if we have a cascaded CMOV.
BB->addSuccessor(jcc1MBB);
// In that case, jcc1MBB will itself fallthrough the copy0MBB, and
@@ -19946,13 +21768,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
- unsigned Opc =
- X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ unsigned Opc = X86::GetCondBranchFromCond(CC);
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
- if (NextCMOV) {
+ if (CascadedCMOV) {
unsigned Opc2 = X86::GetCondBranchFromCond(
- (X86::CondCode)NextCMOV->getOperand(3).getImm());
+ (X86::CondCode)CascadedCMOV->getOperand(3).getImm());
BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
}
@@ -19964,28 +21785,110 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- MachineInstrBuilder MIB =
- BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator MIItEnd =
+ std::next(MachineBasicBlock::iterator(LastCMOV));
+ MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin();
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
+ MachineInstrBuilder MIB;
+
+ // As we are creating the PHIs, we have to be careful if there is more than
+ // one. Later CMOVs may reference the results of earlier CMOVs, but later
+ // PHIs have to reference the individual true/false inputs from earlier PHIs.
+ // That also means that PHI construction must work forward from earlier to
+ // later, and that the code must maintain a mapping from earlier PHI's
+ // destination registers, and the registers that went into the PHI.
- // If we have a double CMOV, the second Jcc provides the same incoming
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+ unsigned DestReg = MIIt->getOperand(0).getReg();
+ unsigned Op1Reg = MIIt->getOperand(1).getReg();
+ unsigned Op2Reg = MIIt->getOperand(2).getReg();
+
+ // If this CMOV we are generating is the opposite condition from
+ // the jump we generated, then we have to swap the operands for the
+ // PHI that is going to be generated.
+ if (MIIt->getOperand(3).getImm() == OppCC)
+ std::swap(Op1Reg, Op2Reg);
+
+ if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
+ Op1Reg = RegRewriteTable[Op1Reg].first;
+
+ if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
+ Op2Reg = RegRewriteTable[Op2Reg].second;
+
+ MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(Op1Reg).addMBB(copy0MBB)
+ .addReg(Op2Reg).addMBB(thisMBB);
+
+ // Add this PHI to the rewrite table.
+ RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
+ }
+
+ // If we have a cascaded CMOV, the second Jcc provides the same incoming
// value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
- if (NextCMOV) {
+ if (CascadedCMOV) {
MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
// Copy the PHI result to the register defined by the second CMOV.
BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
- DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+ DL, TII->get(TargetOpcode::COPY),
+ CascadedCMOV->getOperand(0).getReg())
.addReg(MI->getOperand(0).getReg());
- NextCMOV->eraseFromParent();
+ CascadedCMOV->eraseFromParent();
}
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ // Now remove the CMOV(s).
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; )
+ (MIIt++)->eraseFromParent();
+
return sinkMBB;
}
MachineBasicBlock *
+X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // Combine the following atomic floating-point modification pattern:
+ // a.store(reg OP a.load(acquire), release)
+ // Transform them into:
+ // OPss (%gpr), %xmm
+ // movss %xmm, (%gpr)
+ // Or sd equivalent for 64-bit operations.
+ unsigned MOp, FOp;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
+ case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break;
+ case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break;
+ }
+ const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ MachineOperand MSrc = MI->getOperand(0);
+ unsigned VSrc = MI->getOperand(5).getReg();
+ const MachineOperand &Disp = MI->getOperand(3);
+ MachineOperand ZeroDisp = MachineOperand::CreateImm(0);
+ bool hasDisp = Disp.isGlobal() || Disp.isImm();
+ if (hasDisp && MSrc.isReg())
+ MSrc.setIsKill(false);
+ MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp))
+ .addOperand(/*Base=*/MSrc)
+ .addImm(/*Scale=*/1)
+ .addReg(/*Index=*/0)
+ .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
+ .addReg(0);
+ MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp),
+ MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
+ .addReg(VSrc)
+ .addOperand(/*Base=*/MSrc)
+ .addImm(/*Scale=*/1)
+ .addReg(/*Index=*/0)
+ .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
+ .addReg(/*Segment=*/0);
+ MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
@@ -20032,8 +21935,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
sizeVReg = MI->getOperand(1).getReg(),
physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
- MachineFunction::iterator MBBIter = BB;
- ++MBBIter;
+ MachineFunction::iterator MBBIter = ++BB->getIterator();
MF->insert(MBBIter, bumpMBB);
MF->insert(MBBIter, mallocMBB);
@@ -20120,14 +22022,60 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ assert(!Subtarget->isTargetMachO());
DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe(
+ *BB->getParent(), *BB, MI, DL, false);
+ MachineBasicBlock *ResumeBB = ResumeMI->getParent();
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return ResumeBB;
+}
- assert(!Subtarget->isTargetMachO());
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB();
+ DebugLoc DL = MI->getDebugLoc();
- Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI,
- DL);
+ assert(!isAsynchronousEHPersonality(
+ classifyEHPersonality(MF->getFunction()->getPersonalityFn())) &&
+ "SEH does not use catchret!");
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ // Only 32-bit EH needs to worry about manually restoring stack pointers.
+ if (!Subtarget->is32Bit())
+ return BB;
+
+ // C++ EH creates a new target block to hold the restore code, and wires up
+ // the new block to the return destination with a normal JMP_4.
+ MachineBasicBlock *RestoreMBB =
+ MF->CreateMachineBasicBlock(BB->getBasicBlock());
+ assert(BB->succ_size() == 1);
+ MF->insert(std::next(BB->getIterator()), RestoreMBB);
+ RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(RestoreMBB);
+ MI->getOperand(0).setMBB(RestoreMBB);
+
+ auto RestoreMBBI = RestoreMBB->begin();
+ BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE));
+ BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const Constant *PerFn = MF->getFunction()->getPersonalityFn();
+ bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
+ // Only 32-bit SEH requires special handling for catchpad.
+ if (IsSEH && Subtarget->is32Bit()) {
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
+ }
+ MI->eraseFromParent();
return BB;
}
@@ -20149,6 +22097,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
+ Subtarget->is64Bit() ?
+ Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() :
Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
@@ -20198,8 +22148,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
+ MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -20225,7 +22174,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// For v = setjmp(buf), we generate
//
// thisMBB:
- // buf[LabelOffset] = restoreMBB
+ // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB
// SjLjSetup restoreMBB
//
// mainMBB:
@@ -20245,6 +22194,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MF->push_back(restoreMBB);
+ restoreMBB->setHasAddressTaken();
MachineInstrBuilder MIB;
@@ -20511,35 +22461,44 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
+ case X86::CATCHRET:
+ return EmitLoweredCatchRet(MI, BB);
+ case X86::CATCHPAD:
+ return EmitLoweredCatchPad(MI, BB);
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
- case X86::CMOV_GR8:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
- case X86::CMOV_V4F32:
+ case X86::CMOV_FR128:
+ case X86::CMOV_GR8:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
- case X86::CMOV_V8F32:
+ case X86::CMOV_V4F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
+ case X86::CMOV_V8F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
- case X86::CMOV_GR16:
- case X86::CMOV_GR32:
- case X86::CMOV_RFP32:
- case X86::CMOV_RFP64:
- case X86::CMOV_RFP80:
case X86::CMOV_V8I1:
case X86::CMOV_V16I1:
case X86::CMOV_V32I1:
case X86::CMOV_V64I1:
return EmitLoweredSelect(MI, BB);
+ case X86::RELEASE_FADD32mr:
+ case X86::RELEASE_FADD64mr:
+ return EmitLoweredAtomicFP(MI, BB);
+
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
@@ -20793,7 +22752,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned Depth) const {
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
- return Op.getValueType().getScalarType().getSizeInBits();
+ return Op.getValueType().getScalarSizeInBits();
// Fallback case.
return 1;
@@ -20814,39 +22773,8 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
-/// same as extracting the high 128-bit part of 256-bit vector and then
-/// inserting the result into the low part of a new 256-bit vector
-static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
- if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
- SVOp->getMaskElt(j) >= 0)
- return false;
-
- return true;
-}
-
-/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
-/// same as extracting the low 128-bit part of 256-bit vector and then
-/// inserting the result into the high part of a new 256-bit vector
-static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
- if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
- SVOp->getMaskElt(j) >= 0)
- return false;
-
- return true;
-}
-
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+/// FIXME: This could be expanded to support 512 bit vectors as well.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget* Subtarget) {
@@ -20854,7 +22782,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
@@ -20920,24 +22848,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, InsV);
}
- //===--------------------------------------------------------------------===//
- // Combine some shuffles into subvector extracts and inserts:
- //
-
- // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- if (isShuffleHigh128VectorInsertLow(SVOp)) {
- SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
- return DCI.CombineTo(N, InsV);
- }
-
- // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- if (isShuffleLow128VectorInsertHigh(SVOp)) {
- SDValue V = Extract128BitVector(V1, 0, DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
- return DCI.CombineTo(N, InsV);
- }
-
return SDValue();
}
@@ -20966,10 +22876,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
MVT RootVT = Root.getSimpleValueType();
SDLoc DL(Root);
- // Just remove no-op shuffle masks.
if (Mask.size() == 1) {
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
- /*AddTo*/ true);
+ int Index = Mask[0];
+ assert((Index >= 0 || Index == SM_SentinelUndef ||
+ Index == SM_SentinelZero) &&
+ "Invalid shuffle index found!");
+
+ // We may end up with an accumulated mask of size 1 as a result of
+ // widening of shuffle operands (see function canWidenShuffleElements).
+ // If the only shuffle index is equal to SM_SentinelZero then propagate
+ // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+ // mask, and therefore the entire chain of shuffles can be folded away.
+ if (Index == SM_SentinelZero)
+ DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+ else
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+ /*AddTo*/ true);
return true;
}
@@ -20985,7 +22907,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// doesn't preclude something switching to the shorter encoding post-RA.
//
// FIXME: Should teach these routines about AVX vector widths.
- if (FloatDomain && VT.getSizeInBits() == 128) {
+ if (FloatDomain && VT.is128BitVector()) {
if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
bool Lo = Mask.equals({0, 0});
unsigned Shuffle;
@@ -21049,7 +22971,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
// variants as none of these have single-instruction variants that are
// superior to the UNPCK formulation.
- if (!FloatDomain && VT.getSizeInBits() == 128 &&
+ if (!FloatDomain && VT.is128BitVector() &&
(Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
@@ -21226,26 +23148,28 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
// See if we can recurse into the operand to combine more things.
switch (Op.getOpcode()) {
- case X86ISD::PSHUFB:
- HasPSHUFB = true;
- case X86ISD::PSHUFD:
- case X86ISD::PSHUFHW:
- case X86ISD::PSHUFLW:
- if (Op.getOperand(0).hasOneUse() &&
- combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
- HasPSHUFB, DAG, DCI, Subtarget))
- return true;
- break;
+ case X86ISD::PSHUFB:
+ HasPSHUFB = true;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ if (Op.getOperand(0).hasOneUse() &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
- case X86ISD::UNPCKL:
- case X86ISD::UNPCKH:
- assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
- // We can't check for single use, we have to check that this shuffle is the only user.
- if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
- combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
- HasPSHUFB, DAG, DCI, Subtarget))
- return true;
- break;
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ assert(Op.getOperand(0) == Op.getOperand(1) &&
+ "We only combine unary shuffles!");
+ // We can't check for single use, we have to check that this shuffle is the
+ // only user.
+ if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
}
// Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -21360,8 +23284,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
- if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
- V.getSimpleValueType().getScalarType() != MVT::i16)
+ if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
+ V.getSimpleValueType().getVectorElementType() != MVT::i16)
return SDValue();
// Search for a half-shuffle which we can combine with.
@@ -21438,7 +23362,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return V;
}
-/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
+/// \brief Search for a combinable shuffle across a chain ending in pshuflw or
+/// pshufhw.
///
/// We walk up the chain, skipping shuffles of the other half and looking
/// through shuffles which switch halves trying to find a shuffle of the same
@@ -21520,6 +23445,41 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
+ case X86ISD::UNPCKL: {
+ // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
+ // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
+ // moves upper half elements into the lower half part. For example:
+ //
+ // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
+ // undef:v16i8
+ // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
+ //
+ // will be combined to:
+ //
+ // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
+
+ // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
+ // happen due to advanced instructions.
+ if (!VT.is128BitVector())
+ return SDValue();
+
+ auto Op0 = N.getOperand(0);
+ auto Op1 = N.getOperand(1);
+ if (Op0.getOpcode() == ISD::UNDEF &&
+ Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> ExpectedMask(NumElts, -1);
+ std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
+ NumElts / 2);
+
+ auto ShufOp = Op1.getOperand(0);
+ if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
+ }
+ return SDValue();
+ }
default:
return SDValue();
}
@@ -21535,7 +23495,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
- assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
+ assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue(); // We combined away this shuffle, so we're done.
@@ -21624,14 +23584,19 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
return SDValue();
auto *SVN = cast<ShuffleVectorSDNode>(N);
- ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int, 8> Mask;
+ for (int M : SVN->getMask())
+ Mask.push_back(M);
+
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
- // We require the first shuffle operand to be the SUB node, and the second to
- // be the ADD node.
- // FIXME: We should support the commuted patterns.
- if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+ // We require the first shuffle operand to be the FSUB node, and the second to
+ // be the FADD node.
+ if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
return SDValue();
// If there are other uses of these operations we can't fold them.
@@ -21682,7 +23647,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return AddSub;
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasFp256() && VT.is256BitVector() &&
+ if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
@@ -21866,21 +23831,45 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
-/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
-/// special and don't usually play with other vector types, it's better to
-/// handle them early to be sure we emit efficient code by avoiding
-/// store-load conversions.
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
- if (N->getValueType(0) != MVT::x86mmx ||
- N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
- N->getOperand(0)->getValueType(0) != MVT::v2i32)
- return SDValue();
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
- SDValue V = N->getOperand(0);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
- return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
- N->getValueType(0), V.getOperand(0));
+ // Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+ // special and don't usually play with other vector types, it's better to
+ // handle them early to be sure we emit efficient code by avoiding
+ // store-load conversions.
+ if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getValueType() == MVT::v2i32 &&
+ isNullConstant(N0.getOperand(1))) {
+ SDValue N00 = N0->getOperand(0);
+ if (N00.getValueType() == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
+ }
+
+ // Convert a bitcasted integer logic operation that has one bitcasted
+ // floating-point operand and one constant operand into a floating-point
+ // logic operation. This may create a load of the constant, but that is
+ // cheaper than materializing the constant in an integer register and
+ // transferring it to an SSE register or transferring the SSE operand to
+ // integer register and back.
+ unsigned FPOpcode;
+ switch (N0.getOpcode()) {
+ case ISD::AND: FPOpcode = X86ISD::FAND; break;
+ case ISD::OR: FPOpcode = X86ISD::FOR; break;
+ case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+ default: return SDValue();
+ }
+ if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
+ (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getOperand(0).getValueType() == VT) {
+ SDValue N000 = N0.getOperand(0).getOperand(0);
+ SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
+ return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+ }
return SDValue();
}
@@ -21910,26 +23899,26 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
InputVector.getNode()->getOperand(0));
// The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))).
- SDValue MMXSrcOp = MMXSrc.getOperand(0);
if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
- MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() &&
- MMXSrcOp.getOpcode() == ISD::BITCAST &&
- MMXSrcOp.getValueType() == MVT::v1i64 &&
- MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
- return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
- N->getValueType(0),
- MMXSrcOp.getOperand(0));
+ MMXSrc.getValueType() == MVT::i64) {
+ SDValue MMXSrcOp = MMXSrc.getOperand(0);
+ if (MMXSrcOp.hasOneUse() && MMXSrcOp.getOpcode() == ISD::BITCAST &&
+ MMXSrcOp.getValueType() == MVT::v1i64 &&
+ MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
+ return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
+ N->getValueType(0), MMXSrcOp.getOperand(0));
+ }
}
EVT VT = N->getValueType(0);
- if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
+ if (VT == MVT::i1 && isa<ConstantSDNode>(N->getOperand(1)) &&
InputVector.getOpcode() == ISD::BITCAST &&
- dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
+ isa<ConstantSDNode>(InputVector.getOperand(0))) {
uint64_t ExtractedElt =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
uint64_t InputValue =
- cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+ cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
uint64_t Res = (InputValue >> ExtractedElt) & 1;
return DAG.getConstant(Res, dl, MVT::i1);
}
@@ -22036,96 +24025,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
-static std::pair<unsigned, bool>
-matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const X86Subtarget *Subtarget) {
- if (!VT.isVector())
- return std::make_pair(0, false);
-
- bool NeedSplit = false;
- switch (VT.getSimpleVT().SimpleTy) {
- default: return std::make_pair(0, false);
- case MVT::v4i64:
- case MVT::v2i64:
- if (!Subtarget->hasVLX())
- return std::make_pair(0, false);
- break;
- case MVT::v64i8:
- case MVT::v32i16:
- if (!Subtarget->hasBWI())
- return std::make_pair(0, false);
- break;
- case MVT::v16i32:
- case MVT::v8i64:
- if (!Subtarget->hasAVX512())
- return std::make_pair(0, false);
- break;
- case MVT::v32i8:
- case MVT::v16i16:
- case MVT::v8i32:
- if (!Subtarget->hasAVX2())
- NeedSplit = true;
- if (!Subtarget->hasAVX())
- return std::make_pair(0, false);
- break;
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- if (!Subtarget->hasSSE2())
- return std::make_pair(0, false);
- }
-
- // SSE2 has only a small subset of the operations.
- bool hasUnsigned = Subtarget->hasSSE41() ||
- (Subtarget->hasSSE2() && VT == MVT::v16i8);
- bool hasSigned = Subtarget->hasSSE41() ||
- (Subtarget->hasSSE2() && VT == MVT::v8i16);
-
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- unsigned Opc = 0;
- // Check for x CC y ? x : y.
- if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
- DAG.isEqualTo(RHS, Cond.getOperand(1))) {
- switch (CC) {
- default: break;
- case ISD::SETULT:
- case ISD::SETULE:
- Opc = hasUnsigned ? ISD::UMIN : 0; break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- Opc = hasUnsigned ? ISD::UMAX : 0; break;
- case ISD::SETLT:
- case ISD::SETLE:
- Opc = hasSigned ? ISD::SMIN : 0; break;
- case ISD::SETGT:
- case ISD::SETGE:
- Opc = hasSigned ? ISD::SMAX : 0; break;
- }
- // Check for x CC y ? y : x -- a min/max with reversed arms.
- } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
- DAG.isEqualTo(RHS, Cond.getOperand(0))) {
- switch (CC) {
- default: break;
- case ISD::SETULT:
- case ISD::SETULE:
- Opc = hasUnsigned ? ISD::UMAX : 0; break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- Opc = hasUnsigned ? ISD::UMIN : 0; break;
- case ISD::SETLT:
- case ISD::SETLE:
- Opc = hasSigned ? ISD::SMAX : 0; break;
- case ISD::SETGT:
- case ISD::SETGE:
- Opc = hasSigned ? ISD::SMIN : 0; break;
- }
- }
-
- return std::make_pair(Opc, NeedSplit);
-}
-
static SDValue
transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -22189,7 +24088,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// ignored in unsafe-math mode).
// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
+ VT != MVT::f80 && VT != MVT::f128 &&
+ (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -22535,32 +24435,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // Try to match a min/max vector operation.
- if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
- std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
- unsigned Opc = ret.first;
- bool NeedSplit = ret.second;
-
- if (Opc && NeedSplit) {
- unsigned NumElems = VT.getVectorNumElements();
- // Extract the LHS vectors
- SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
- SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
-
- // Extract the RHS vectors
- SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
- SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
-
- // Create min/max for each subvector
- LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
- RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
-
- // Merge the result
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
- } else if (Opc)
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
-
// Simplify vector selection if condition value type matches vselect
// operand type
if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
@@ -22635,7 +24509,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
- unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Cond.getValueType().getScalarSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
@@ -22656,14 +24530,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// FIXME: We don't support i16-element blends currently. We could and
// should support them by making *all* the bits in the condition be set
// rather than just the high bit and using an i8-element blend.
- if (VT.getScalarType() == MVT::i16)
+ if (VT.getVectorElementType() == MVT::i16)
return SDValue();
// Dynamic blending was only available from SSE4.1 onward.
- if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+ if (VT.is128BitVector() && !Subtarget->hasSSE41())
return SDValue();
// Byte blends are only available in AVX2
- if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
- !Subtarget->hasAVX2())
+ if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
@@ -22773,12 +24646,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
- ConstantSDNode *CS;
- if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
- CS->getZExtValue() == 1)
+ if (isOneConstant(SetCC.getOperand(0)))
OpIdx = 1;
- if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
- CS->getZExtValue() == 1)
+ if (isOneConstant(SetCC.getOperand(1)))
OpIdx = 0;
if (OpIdx == -1)
break;
@@ -22857,8 +24727,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
X86::CondCode &CC1, SDValue &Flags,
bool &isAnd) {
if (Cond->getOpcode() == X86ISD::CMP) {
- ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
- if (!CondOp1C || !CondOp1C->isNullValue())
+ if (!isNullConstant(Cond->getOperand(1)))
return false;
Cond = Cond->getOperand(0);
@@ -23102,106 +24971,15 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default: return SDValue();
- // SSE/AVX/AVX2 blend intrinsics.
- case Intrinsic::x86_avx2_pblendvb:
- // Don't try to simplify this intrinsic if we don't have AVX2.
- if (!Subtarget->hasAVX2())
- return SDValue();
- // FALL-THROUGH
- case Intrinsic::x86_avx_blendv_pd_256:
- case Intrinsic::x86_avx_blendv_ps_256:
- // Don't try to simplify this intrinsic if we don't have AVX.
- if (!Subtarget->hasAVX())
- return SDValue();
- // FALL-THROUGH
- case Intrinsic::x86_sse41_blendvps:
- case Intrinsic::x86_sse41_blendvpd:
- case Intrinsic::x86_sse41_pblendvb: {
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(2);
- SDValue Mask = N->getOperand(3);
-
- // Don't try to simplify this intrinsic if we don't have SSE4.1.
- if (!Subtarget->hasSSE41())
- return SDValue();
-
- // fold (blend A, A, Mask) -> A
- if (Op0 == Op1)
- return Op0;
- // fold (blend A, B, allZeros) -> A
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
- return Op0;
- // fold (blend A, B, allOnes) -> B
- if (ISD::isBuildVectorAllOnes(Mask.getNode()))
- return Op1;
-
- // Simplify the case where the mask is a constant i32 value.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
- if (C->isNullValue())
- return Op0;
- if (C->isAllOnesValue())
- return Op1;
- }
-
- return SDValue();
- }
-
- // Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d: {
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(2);
- EVT VT = Op0.getValueType();
- assert(VT.isVector() && "Expected a vector type!");
-
- if (isa<BuildVectorSDNode>(Op1))
- Op1 = Op1.getOperand(0);
-
- if (!isa<ConstantSDNode>(Op1))
- return SDValue();
-
- EVT SVT = VT.getVectorElementType();
- unsigned SVTBits = SVT.getSizeInBits();
-
- ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
- const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
- uint64_t ShAmt = C.getZExtValue();
-
- // Don't try to convert this shift into a ISD::SRA if the shift
- // count is bigger than or equal to the element size.
- if (ShAmt >= SVTBits)
- return SDValue();
-
- // Trivial case: if the shift count is zero, then fold this
- // into the first operand.
- if (ShAmt == 0)
- return Op0;
-
- // Replace this packed shift intrinsic with a target independent
- // shift dag node.
- SDLoc DL(N);
- SDValue Splat = DAG.getConstant(C, DL, VT);
- return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat);
- }
- }
-}
-
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ // An imul is usually smaller than the alternative sequence.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
@@ -23228,9 +25006,11 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
MulAmt1 = 3;
MulAmt2 = MulAmt / 3;
}
+
+ SDLoc DL(N);
+ SDValue NewMul;
if (MulAmt2 &&
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
- SDLoc DL(N);
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -23239,7 +25019,6 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
// is an add.
std::swap(MulAmt1, MulAmt2);
- SDValue NewMul;
if (isPowerOf2_64(MulAmt1))
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
@@ -23253,10 +25032,31 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
+ }
+
+ if (!NewMul) {
+ assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
+ && "Both cases that could cause potential overflows should have "
+ "already been handled.");
+ if (isPowerOf2_64(MulAmt - 1))
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt - 1), DL,
+ MVT::i8)));
+ else if (isPowerOf2_64(MulAmt + 1))
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
+ N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt + 1),
+ DL, MVT::i8)), N->getOperand(0));
+ }
+
+ if (NewMul)
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, NewMul, false);
- }
+
return SDValue();
}
@@ -23272,18 +25072,34 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
- ((N00.getOpcode() == ISD::ANY_EXTEND ||
- N00.getOpcode() == ISD::ZERO_EXTEND) &&
- N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- APInt ShAmt = N1C->getAPIntValue();
- Mask = Mask.shl(ShAmt);
- if (Mask != 0) {
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT,
- N00, DAG.getConstant(Mask, DL, VT));
- }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ bool MaskOK = false;
+ // We can handle cases concerning bit-widening nodes containing setcc_c if
+ // we carefully interrogate the mask to make sure we are semantics
+ // preserving.
+ // The transform is not safe if the result of C1 << C2 exceeds the bitwidth
+ // of the underlying setcc_c operation if the setcc_c was zero extended.
+ // Consider the following example:
+ // zext(setcc_c) -> i32 0x0000FFFF
+ // c1 -> i32 0x0000FFFF
+ // c2 -> i32 0x00000001
+ // (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE
+ // (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+ MaskOK = true;
+ } else if (N00.getOpcode() == ISD::SIGN_EXTEND &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ MaskOK = true;
+ } else if ((N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits());
+ }
+ if (MaskOK && Mask != 0) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));
}
}
@@ -23304,6 +25120,59 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned Size = VT.getSizeInBits();
+
+ // fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
+ // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
+ // into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
+ // depending on sign of (SarConst - [56,48,32,24,16])
+
+ // sexts in X86 are MOVs. The MOVs have the same code size
+ // as above SHIFTs (only SHIFT on 1 has lower code size).
+ // However the MOVs have 2 advantages to a SHIFT:
+ // 1. MOVs can write to a register that differs from source
+ // 2. MOVs accept memory operands
+
+ if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
+ N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
+ N0.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
+ APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
+ EVT CVT = N1.getValueType();
+
+ if (SarConst.isNegative())
+ return SDValue();
+
+ for (MVT SVT : MVT::integer_valuetypes()) {
+ unsigned ShiftSize = SVT.getSizeInBits();
+ // skipping types without corresponding sext/zext and
+ // ShlConst that is not one of [56,48,32,24,16]
+ if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
+ continue;
+ SDLoc DL(N);
+ SDValue NN =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
+ SarConst = SarConst - (Size - ShiftSize);
+ if (SarConst == 0)
+ return NN;
+ else if (SarConst.isNegative())
+ return DAG.getNode(ISD::SHL, DL, VT, NN,
+ DAG.getConstant(-SarConst, DL, CVT));
+ else
+ return DAG.getNode(ISD::SRA, DL, VT, NN,
+ DAG.getConstant(SarConst, DL, CVT));
+ }
+ return SDValue();
+}
+
/// \brief Returns a vector of 0s if the node in input is a vector logical
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
@@ -23321,14 +25190,15 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
APInt ShiftAmt = AmtSplat->getAPIntValue();
- unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
+ unsigned MaxAmount =
+ VT.getSimpleVT().getVectorElementType().getSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
// if the shift amount is bigger than or equal to
// the element size. The constant shift amount will be
// encoded as a 8-bit immediate.
if (ShiftAmt.trunc(8).uge(MaxAmount))
- return getZeroVector(VT, Subtarget, DAG, DL);
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
}
return SDValue();
@@ -23342,6 +25212,10 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (SDValue V = PerformSHLCombine(N, DAG))
return V;
+ if (N->getOpcode() == ISD::SRA)
+ if (SDValue V = PerformSRACombine(N, DAG))
+ return V;
+
// Try to fold this logical shift into a zero vector.
if (N->getOpcode() != ISD::SRA)
if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
@@ -23537,7 +25411,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
if (RHSConstSplat) {
- N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(),
SDValue(RHSConstSplat, 0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
@@ -23552,9 +25426,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND: {
- unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+ unsigned InBits = NarrowVT.getScalarSizeInBits();
APInt Mask = APInt::getAllOnesValue(InBits);
- Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+ Mask = Mask.zext(VT.getScalarSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
Op, DAG.getConstant(Mask, DL, VT));
}
@@ -23656,6 +25530,41 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(N0.getValueType(), NewShuffle);
}
+/// If both input operands of a logic op are being cast from floating point
+/// types, try to convert this into a floating point logic node to avoid
+/// unnecessary moves from SSE to integer registers.
+static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ unsigned FPOpcode = ISD::DELETED_NODE;
+ if (N->getOpcode() == ISD::AND)
+ FPOpcode = X86ISD::FAND;
+ else if (N->getOpcode() == ISD::OR)
+ FPOpcode = X86ISD::FOR;
+ else if (N->getOpcode() == ISD::XOR)
+ FPOpcode = X86ISD::FXOR;
+
+ assert(FPOpcode != ISD::DELETED_NODE &&
+ "Unexpected input node for FP logic conversion");
+
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
+ ((Subtarget->hasSSE1() && VT == MVT::i32) ||
+ (Subtarget->hasSSE2() && VT == MVT::i64))) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+ EVT N00Type = N00.getValueType();
+ EVT N10Type = N10.getValueType();
+ if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
+ SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+ return DAG.getBitcast(VT, FPLogic);
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -23668,6 +25577,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -23728,6 +25640,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
@@ -23799,7 +25714,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasSSE41())
return SDValue();
- EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+ MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
X = DAG.getBitcast(BlendVT, X);
Y = DAG.getBitcast(BlendVT, Y);
@@ -23813,9 +25728,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
- MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
// SHLD/SHRD instructions have lower register pressure, but on some
// platforms they have higher latency than the equivalent
@@ -23913,17 +25826,188 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+// Try to turn tests against the signbit in the form of:
+// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
+// into:
+// SETGT(X, -1)
+static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
+ // This is only worth doing if the output type is i8.
+ if (N->getValueType(0) != MVT::i8)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // We should be performing an xor against a truncated shift.
+ if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse())
+ return SDValue();
+
+ // Make sure we are performing an xor against one.
+ if (!isOneConstant(N1))
+ return SDValue();
+
+ // SetCC on x86 zero extends so only act on this if it's a logical shift.
+ SDValue Shift = N0.getOperand(0);
+ if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse())
+ return SDValue();
+
+ // Make sure we are truncating from one of i16, i32 or i64.
+ EVT ShiftTy = Shift.getValueType();
+ if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
+ return SDValue();
+
+ // Make sure the shift amount extracts the sign bit.
+ if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
+ Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
+ return SDValue();
+
+ // Create a greater-than comparison against -1.
+ // N.B. Using SETGE against 0 works but we want a canonical looking
+ // comparison, using SETGT matches up with what TranslateX86CC.
+ SDLoc DL(N);
+ SDValue ShiftOp = Shift.getOperand(0);
+ EVT ShiftOpTy = ShiftOp.getValueType();
+ SDValue Cond = DAG.getSetCC(DL, MVT::i8, ShiftOp,
+ DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
+ return Cond;
+}
+
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
+ return RV;
+
if (Subtarget->hasCMov())
if (SDValue RV = performIntegerAbsCombine(N, DAG))
return RV;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
+ return SDValue();
+}
+
+/// This function detects the AVG pattern between vectors of unsigned i8/i16,
+/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
+/// X86ISD::AVG instruction.
+static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, SDLoc DL) {
+ if (!VT.isVector() || !VT.isSimple())
+ return SDValue();
+ EVT InVT = In.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ EVT ScalarVT = VT.getVectorElementType();
+ if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
+ isPowerOf2_32(NumElems)))
+ return SDValue();
+
+ // InScalarVT is the intermediate type in AVG pattern and it should be greater
+ // than the original input type (i8/i16).
+ EVT InScalarVT = InVT.getVectorElementType();
+ if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
+ return SDValue();
+
+ if (Subtarget->hasAVX512()) {
+ if (VT.getSizeInBits() > 512)
+ return SDValue();
+ } else if (Subtarget->hasAVX2()) {
+ if (VT.getSizeInBits() > 256)
+ return SDValue();
+ } else {
+ if (VT.getSizeInBits() > 128)
+ return SDValue();
+ }
+
+ // Detect the following pattern:
+ //
+ // %1 = zext <N x i8> %a to <N x i32>
+ // %2 = zext <N x i8> %b to <N x i32>
+ // %3 = add nuw nsw <N x i32> %1, <i32 1 x N>
+ // %4 = add nuw nsw <N x i32> %3, %2
+ // %5 = lshr <N x i32> %N, <i32 1 x N>
+ // %6 = trunc <N x i32> %5 to <N x i8>
+ //
+ // In AVX512, the last instruction can also be a trunc store.
+
+ if (In.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // A lambda checking the given SDValue is a constant vector and each element
+ // is in the range [Min, Max].
+ auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
+ if (!BV || !BV->isConstant())
+ return false;
+ for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i));
+ if (!C)
+ return false;
+ uint64_t Val = C->getZExtValue();
+ if (Val < Min || Val > Max)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if each element of the vector is left-shifted by one.
+ auto LHS = In.getOperand(0);
+ auto RHS = In.getOperand(1);
+ if (!IsConstVectorInRange(RHS, 1, 1))
+ return SDValue();
+ if (LHS.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // Detect a pattern of a + b + 1 where the order doesn't matter.
+ SDValue Operands[3];
+ Operands[0] = LHS.getOperand(0);
+ Operands[1] = LHS.getOperand(1);
+
+ // Take care of the case when one of the operands is a constant vector whose
+ // element is in the range [1, 256].
+ if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
+ Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
+ Operands[0].getOperand(0).getValueType() == VT) {
+ // The pattern is detected. Subtract one from the constant vector, then
+ // demote it and emit X86ISD::AVG instruction.
+ SDValue One = DAG.getConstant(1, DL, InScalarVT);
+ SDValue Ones = DAG.getNode(ISD::BUILD_VECTOR, DL, InVT,
+ SmallVector<SDValue, 8>(NumElems, One));
+ Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], Ones);
+ Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
+ return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
+ Operands[1]);
+ }
+
+ if (Operands[0].getOpcode() == ISD::ADD)
+ std::swap(Operands[0], Operands[1]);
+ else if (Operands[1].getOpcode() != ISD::ADD)
+ return SDValue();
+ Operands[2] = Operands[1].getOperand(0);
+ Operands[1] = Operands[1].getOperand(1);
+
+ // Now we have three operands of two additions. Check that one of them is a
+ // constant vector with ones, and the other two are promoted from i8/i16.
+ for (int i = 0; i < 3; ++i) {
+ if (!IsConstVectorInRange(Operands[i], 1, 1))
+ continue;
+ std::swap(Operands[i], Operands[2]);
+
+ // Check if Operands[0] and Operands[1] are results of type promotion.
+ for (int j = 0; j < 2; ++j)
+ if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
+ Operands[j].getOperand(0).getValueType() != VT)
+ return SDValue();
+
+ // The pattern is detected, emit X86ISD::AVG instruction.
+ return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
+ Operands[1].getOperand(0));
+ }
+
return SDValue();
}
@@ -23940,10 +26024,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// For chips with slow 32-byte unaligned loads, break the 32-byte operation
// into two 16-byte operations.
ISD::LoadExtType Ext = Ld->getExtensionType();
+ bool Fast;
+ unsigned AddressSpace = Ld->getAddressSpace();
unsigned Alignment = Ld->getAlignment();
- bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
- if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
- !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+ if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
+ Ext == ISD::NON_EXTLOAD &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
+ AddressSpace, Alignment, &Fast) && !Fast) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
@@ -24012,8 +26099,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
- assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
- && "WideVecVT should be legal");
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+ "WideVecVT should be legal");
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
}
@@ -24026,8 +26113,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
- for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
- ShuffleVec[i] = NumElems*SizeRatio;
+ for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
+ ShuffleVec[i] = NumElems * SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, dl, WideVecVT),
&ShuffleVec[0]);
@@ -24055,7 +26142,6 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::NON_EXTLOAD);
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
-
}
/// PerformMSTORECombine - Resolve truncating stores
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
@@ -24073,6 +26159,15 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // The truncating store is legal in some cases. For example
+ // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
+ // are designated for truncate store.
+ // In this case we don't need any further transformations.
+ if (TLI.isTruncStoreLegal(VT, StVT))
+ return SDValue();
+
// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
@@ -24096,12 +26191,12 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
- assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
- && "WideVecVT should be legal");
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+ "WideVecVT should be legal");
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVecVT),
- &ShuffleVec[0]);
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
SDValue NewMask;
SDValue Mask = Mst->getMask();
@@ -24133,8 +26228,9 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
- return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
- NewMask, StVT, Mst->getMemOperand(), false);
+ return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
+ Mst->getBasePtr(), NewMask, StVT,
+ Mst->getMemOperand(), false);
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
@@ -24148,10 +26244,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// If we are saving a concatenation of two XMM registers and 32-byte stores
// are slow, such as on Sandy Bridge, perform two 16-byte stores.
+ bool Fast;
+ unsigned AddressSpace = St->getAddressSpace();
unsigned Alignment = St->getAlignment();
- bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
- if (VT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
- StVT == VT && !IsAligned) {
+ if (VT.is256BitVector() && StVT == VT &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ AddressSpace, Alignment, &Fast) && !Fast) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
@@ -24178,12 +26276,29 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
if (St->isTruncatingStore() && VT.isVector()) {
+ // Check if we can detect an AVG pattern from the truncation. If yes,
+ // replace the trunc store by a normal store with the result of X86ISD::AVG
+ // instruction.
+ SDValue Avg =
+ detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl);
+ if (Avg.getNode())
+ return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+ // The truncating store is legal in some cases. For example
+ // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
+ // are designated for truncate store.
+ // In this case we don't need any further transformations.
+ if (TLI.isTruncStoreLegal(VT, StVT))
+ return SDValue();
+
// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
// We are going to use the original vector elt for storing.
@@ -24306,7 +26421,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
- EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
@@ -24539,8 +26654,234 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
+static SDValue
+combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
+ SmallVector<SDValue, 8> &Regs) {
+ assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 ||
+ Regs[0].getValueType() == MVT::v2i64));
+ EVT OutVT = N->getValueType(0);
+ EVT OutSVT = OutVT.getVectorElementType();
+ EVT InVT = Regs[0].getValueType();
+ EVT InSVT = InVT.getVectorElementType();
+ SDLoc DL(N);
+
+ // First, use mask to unset all bits that won't appear in the result.
+ assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) &&
+ "OutSVT can only be either i8 or i16.");
+ SDValue MaskVal =
+ DAG.getConstant(OutSVT == MVT::i8 ? 0xFF : 0xFFFF, DL, InSVT);
+ SDValue MaskVec = DAG.getNode(
+ ISD::BUILD_VECTOR, DL, InVT,
+ SmallVector<SDValue, 8>(InVT.getVectorNumElements(), MaskVal));
+ for (auto &Reg : Regs)
+ Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVec, Reg);
+
+ MVT UnpackedVT, PackedVT;
+ if (OutSVT == MVT::i8) {
+ UnpackedVT = MVT::v8i16;
+ PackedVT = MVT::v16i8;
+ } else {
+ UnpackedVT = MVT::v4i32;
+ PackedVT = MVT::v8i16;
+ }
+
+ // In each iteration, truncate the type by a half size.
+ auto RegNum = Regs.size();
+ for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits();
+ j < e; j *= 2, RegNum /= 2) {
+ for (unsigned i = 0; i < RegNum; i++)
+ Regs[i] = DAG.getNode(ISD::BITCAST, DL, UnpackedVT, Regs[i]);
+ for (unsigned i = 0; i < RegNum / 2; i++)
+ Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2],
+ Regs[i * 2 + 1]);
+ }
+
+ // If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and
+ // then extract a subvector as the result since v8i8 is not a legal type.
+ if (OutVT == MVT::v8i8) {
+ Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]);
+ Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0],
+ DAG.getIntPtrConstant(0, DL));
+ return Regs[0];
+ } else if (RegNum > 1) {
+ Regs.resize(RegNum);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
+ } else
+ return Regs[0];
+}
+
+/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
+static SDValue
+combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+ SmallVector<SDValue, 8> &Regs) {
+ assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
+ EVT OutVT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Shift left by 16 bits, then arithmetic-shift right by 16 bits.
+ SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
+ for (auto &Reg : Regs) {
+ Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ }
+
+ for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
+ Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2],
+ Regs[i * 2 + 1]);
+
+ if (Regs.size() > 2) {
+ Regs.resize(Regs.size() / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
+ } else
+ return Regs[0];
+}
+
+/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
+/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
+/// legalization the truncation will be translated into a BUILD_VECTOR with each
+/// element that is extracted from a vector and then truncated, and it is
+/// diffcult to do this optimization based on them.
+static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT OutVT = N->getValueType(0);
+ if (!OutVT.isVector())
+ return SDValue();
+
+ SDValue In = N->getOperand(0);
+ if (!In.getValueType().isSimple())
+ return SDValue();
+
+ EVT InVT = In.getValueType();
+ unsigned NumElems = OutVT.getVectorNumElements();
+
+ // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
+ // SSE2, and we need to take care of it specially.
+ // AVX512 provides vpmovdb.
+ if (!Subtarget->hasSSE2() || Subtarget->hasAVX2())
+ return SDValue();
+
+ EVT OutSVT = OutVT.getVectorElementType();
+ EVT InSVT = InVT.getVectorElementType();
+ if (!((InSVT == MVT::i32 || InSVT == MVT::i64) &&
+ (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
+ NumElems >= 8))
+ return SDValue();
+
+ // SSSE3's pshufb results in less instructions in the cases below.
+ if (Subtarget->hasSSSE3() && NumElems == 8 &&
+ ((OutSVT == MVT::i8 && InSVT != MVT::i64) ||
+ (InSVT == MVT::i32 && OutSVT == MVT::i16)))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // Split a long vector into vectors of legal type.
+ unsigned RegNum = InVT.getSizeInBits() / 128;
+ SmallVector<SDValue, 8> SubVec(RegNum);
+ if (InSVT == MVT::i32) {
+ for (unsigned i = 0; i < RegNum; i++)
+ SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(i * 4, DL));
+ } else {
+ for (unsigned i = 0; i < RegNum; i++)
+ SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(i * 2, DL));
+ }
+
+ // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PAKCUS
+ // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
+ // truncate 2 x v4i32 to v8i16.
+ if (Subtarget->hasSSE41() || OutSVT == MVT::i8)
+ return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
+ else if (InSVT == MVT::i32)
+ return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+ else
+ return SDValue();
+}
+
+static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Try to detect AVG pattern first.
+ SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG,
+ Subtarget, SDLoc(N));
+ if (Avg.getNode())
+ return Avg;
+
+ return combineVectorTruncation(N, DAG, Subtarget);
+}
+
+/// Do target-specific dag combines on floating point negations.
+static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ SDValue Arg = N->getOperand(0);
+ SDLoc DL(N);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // If we're negating a FMUL node on a target with FMA, then we can avoid the
+ // use of a constant by performing (-0 - A*B) instead.
+ // FIXME: Check rounding control flags as well once it becomes available.
+ if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
+ Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+ return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Zero);
+ }
+
+ // If we're negating a FMA node, then we can adjust the
+ // instruction to include the extra negation.
+ if (Arg.hasOneUse()) {
+ switch (Arg.getOpcode()) {
+ case X86ISD::FMADD:
+ return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FMSUB:
+ return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMADD:
+ return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMSUB:
+ return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
+static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+ // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
+ // These logic operations may be executed in the integer domain.
+ SDLoc dl(N);
+ MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
+ unsigned IntOpcode = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected FP logic op");
+ case X86ISD::FOR: IntOpcode = ISD::OR; break;
+ case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+ case X86ISD::FAND: IntOpcode = ISD::AND; break;
+ case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+ }
+ SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
+ }
+ return SDValue();
+}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
-static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
@@ -24552,7 +26893,8 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
- return SDValue();
+
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
@@ -24576,8 +26918,65 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1));
}
+static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (Subtarget->useSoftFloat())
+ return SDValue();
+
+ // TODO: Check for global or instruction-level "nnan". In that case, we
+ // should be able to lower to FMAX/FMIN alone.
+ // TODO: If an operand is already known to be a NaN or not a NaN, this
+ // should be an optional swap and FMAX/FMIN.
+
+ EVT VT = N->getValueType(0);
+ if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+ return SDValue();
+
+ // This takes at least 3 instructions, so favor a library call when operating
+ // on a scalar and minimizing code size.
+ if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc DL(N);
+ EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // There are 4 possibilities involving NaN inputs, and these are the required
+ // outputs:
+ // Op1
+ // Num NaN
+ // ----------------
+ // Num | Max | Op0 |
+ // Op0 ----------------
+ // NaN | Op1 | NaN |
+ // ----------------
+ //
+ // The SSE FP max/min instructions were not designed for this case, but rather
+ // to implement:
+ // Min = Op1 < Op0 ? Op1 : Op0
+ // Max = Op1 > Op0 ? Op1 : Op0
+ //
+ // So they always return Op0 if either input is a NaN. However, we can still
+ // use those instructions for fmaxnum by selecting away a NaN input.
+
+ // If either operand is NaN, the 2nd source operand (Op0) is passed through.
+ auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;
+ SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
+ SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO);
+
+ // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
+ // are NaN, the NaN value of Op1 is the result.
+ auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+ return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
+}
+
/// Do target-specific dag combines on X86ISD::FAND nodes.
-static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FAND(0.0, x) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -24588,11 +26987,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FANDN nodes
-static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -24603,7 +27003,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
static SDValue PerformBTCombine(SDNode *N,
@@ -24673,6 +27073,57 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
+/// Promoting a sign extension ahead of an 'add nsw' exposes opportunities
+/// to combine math ops, use an LEA, or use a complex addressing mode. This can
+/// eliminate extend, add, and shift instructions.
+static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // TODO: This should be valid for other integer types.
+ EVT VT = Sext->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ // We need an 'add nsw' feeding into the 'sext'.
+ SDValue Add = Sext->getOperand(0);
+ if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap())
+ return SDValue();
+
+ // Having a constant operand to the 'add' ensures that we are not increasing
+ // the instruction count because the constant is extended for free below.
+ // A constant operand can also become the displacement field of an LEA.
+ auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
+ if (!AddOp1)
+ return SDValue();
+
+ // Don't make the 'add' bigger if there's no hope of combining it with some
+ // other 'add' or 'shl' instruction.
+ // TODO: It may be profitable to generate simpler LEA instructions in place
+ // of single 'add' instructions, but the cost model for selecting an LEA
+ // currently has a high threshold.
+ bool HasLEAPotential = false;
+ for (auto *User : Sext->uses()) {
+ if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {
+ HasLEAPotential = true;
+ break;
+ }
+ }
+ if (!HasLEAPotential)
+ return SDValue();
+
+ // Everything looks good, so pull the 'sext' ahead of the 'add'.
+ int64_t AddConstant = AddOp1->getSExtValue();
+ SDValue AddOp0 = Add.getOperand(0);
+ SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0);
+ SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
+
+ // The wider add is guaranteed to not wrap because both operands are
+ // sign-extended.
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(true);
+ return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
+}
+
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -24763,13 +27214,13 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
}
}
- if (!Subtarget->hasFp256())
- return SDValue();
-
- if (VT.isVector() && VT.getSizeInBits() == 256)
+ if (Subtarget->hasAVX() && VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
+ if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget))
+ return NewAdd;
+
return SDValue();
}
@@ -24783,9 +27234,7 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
- (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
- !Subtarget->hasAVX512()))
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA())
return SDValue();
SDValue A = N->getOperand(0);
@@ -24830,8 +27279,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!C || C->getZExtValue() != 1)
+ if (!isOneConstant(N0.getOperand(1)))
return SDValue();
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
@@ -24884,21 +27332,19 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
- if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
- LHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
- }
+ if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
+ LHS.getOperand(1));
+ return DAG.getSetCC(DL, N->getValueType(0), addV,
+ DAG.getConstant(0, DL, addV.getValueType()), CC);
+ }
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
- if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
- RHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
- }
+ if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
+ RHS.getOperand(1));
+ return DAG.getSetCC(DL, N->getValueType(0), addV,
+ DAG.getConstant(0, DL, addV.getValueType()), CC);
+ }
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
@@ -24936,52 +27382,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
- SelectionDAG &DAG) {
- SDLoc dl(Load);
- MVT VT = Load->getSimpleValueType(0);
- MVT EVT = VT.getVectorElementType();
- SDValue Addr = Load->getOperand(1);
- SDValue NewAddr = DAG.getNode(
- ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
- DAG.getConstant(Index * EVT.getStoreSize(), dl,
- Addr.getSimpleValueType()));
-
- SDValue NewLoad =
- DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
- DAG.getMachineFunction().getMachineMemOperand(
- Load->getMemOperand(), 0, EVT.getStoreSize()));
- return NewLoad;
-}
-
-static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- SDLoc dl(N);
- MVT VT = N->getOperand(1)->getSimpleValueType(0);
- assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
- "X86insertps is only defined for v4x32");
-
- SDValue Ld = N->getOperand(1);
- if (MayFoldLoad(Ld)) {
- // Extract the countS bits from the immediate so we can get the proper
- // address when narrowing the vector load to a specific element.
- // When the second source op is a memory address, insertps doesn't use
- // countS and just gets an f32 from that address.
- unsigned DestIndex =
- cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
-
- Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
-
- // Create this as a scalar to vector to match the instruction pattern.
- SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
- // countS bits are ignored when loading from memory on insertps, which
- // means we don't need to explicitly set them to 0.
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
- LoadScalarToVector, N->getOperand(2));
- }
- return SDValue();
-}
-
static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
@@ -25008,6 +27408,20 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ // Gather and Scatter instructions use k-registers for masks. The type of
+ // the masks is v*i1. So the mask will be truncated anyway.
+ // The SIGN_EXTEND_INREG my be dropped.
+ SDValue Mask = N->getOperand(2);
+ if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ NewOps[2] = Mask.getOperand(0);
+ DAG.UpdateNodeOperands(N, NewOps);
+ }
+ return SDValue();
+}
+
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
@@ -25182,7 +27596,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
- if (Op0.getOpcode() == ISD::LOAD) {
+ if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT LdVT = Ld->getValueType(0);
@@ -25357,15 +27771,14 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
}
// Check if we can bypass extracting and re-inserting an element of an input
- // vector. Essentialy:
+ // vector. Essentially:
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
SDValue ExtractedV = V.getOperand(0);
SDValue OrigV = ExtractedV.getOperand(0);
- if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
- if (ExtractIdx->getZExtValue() == 0) {
+ if (isNullConstant(ExtractedV.getOperand(1))) {
MVT OrigVT = OrigV.getSimpleValueType();
// Extract a subvector if necessary...
if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
@@ -25394,7 +27807,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT:
case X86ISD::SHRUNKBLEND:
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
- case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
+ case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
@@ -25414,12 +27827,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
+ case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget);
+ case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
- case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
- case X86ISD::FAND: return PerformFANDCombine(N, DAG);
- case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: return performFMinNumFMaxNumCombine(N, DAG,
+ Subtarget);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget);
+ case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
@@ -25447,14 +27865,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
- case X86ISD::INSERTPS: {
- if (getTargetMachine().getOptLevel() > CodeGenOpt::None)
- return PerformINSERTPSCombine(N, DAG, Subtarget);
- break;
- }
case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG);
+ case ISD::MGATHER:
+ case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG);
}
return SDValue();
@@ -26084,6 +28497,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::f64:
case MVT::i64:
return std::make_pair(0U, &X86::FR64RegClass);
+ // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
@@ -26168,17 +28582,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
unsigned Size = VT.getSizeInBits();
- MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
- : Size == 16 ? MVT::i16
- : Size == 32 ? MVT::i32
- : Size == 64 ? MVT::i64
- : MVT::Other;
- unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+ if (Size == 1) Size = 8;
+ unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
if (DestReg > 0) {
Res.first = DestReg;
- Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
- : SimpleTy == MVT::i16 ? &X86::GR16RegClass
- : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+ Res.second = Size == 8 ? &X86::GR8RegClass
+ : Size == 16 ? &X86::GR16RegClass
+ : Size == 32 ? &X86::GR32RegClass
: &X86::GR64RegClass;
assert(Res.second->contains(Res.first) && "Register in register class");
} else {
@@ -26196,6 +28606,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
+ // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
@@ -26244,6 +28655,15 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
-bool X86TargetLowering::isTargetFTOL() const {
- return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
+bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+ // Integer division on x86 is expensive. However, when aggressively optimizing
+ // for code size, we prefer to use a div instruction, as it is usually smaller
+ // than the alternative sequence.
+ // The exception to this is vector division. Since x86 doesn't have vector
+ // integer division, leaving the division as-is is a loss even in terms of
+ // size, because it will have to be scalarized, while the alternative code
+ // sequence can be performed in vector form.
+ bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+ return OptSize && !VT.isVector();
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 723d530..a29dc9a 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -126,6 +126,9 @@ namespace llvm {
/// 1 is the number of bytes of stack to pop.
RET_FLAG,
+ /// Return from interrupt. Operand 0 is the number of bytes to pop.
+ IRET,
+
/// Repeat fill, corresponds to X86::REP_STOSx.
REP_STOS,
@@ -182,6 +185,8 @@ namespace llvm {
/// Compute Sum of Absolute Differences.
PSADBW,
+ /// Compute Double Block Packed Sum-Absolute-Differences
+ DBPSADBW,
/// Bitwise Logical AND NOT of Packed FP values.
ANDNP,
@@ -211,6 +216,8 @@ namespace llvm {
// FP vector get exponent
FGETEXP_RND,
+ // Extract Normalized Mantissas
+ VGETMANT,
// FP Scale
SCALEF,
// Integer add/sub with unsigned saturation.
@@ -236,6 +243,9 @@ namespace llvm {
// Integer absolute value
ABS,
+ // Detect Conflicts Within a Vector
+ CONFLICT,
+
/// Floating point max and min.
FMAX, FMIN,
@@ -282,9 +292,8 @@ namespace llvm {
// Vector integer truncate.
VTRUNC,
-
- // Vector integer truncate with mask.
- VTRUNCM,
+ // Vector integer truncate with unsigned/signed saturation.
+ VTRUNCUS, VTRUNCS,
// Vector FP extend.
VFPEXT,
@@ -295,6 +304,9 @@ namespace llvm {
// Vector signed/unsigned integer to double.
CVTDQ2PD, CVTUDQ2PD,
+ // Convert a vector to mask, set bits base on MSB.
+ CVT2MASK,
+
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -349,6 +361,7 @@ namespace llvm {
// OR/AND test for masks
KORTEST,
+ KTEST,
// Several flavors of instructions with vector shuffle behaviors.
PACKSS,
@@ -382,12 +395,24 @@ namespace llvm {
VPERMIV3,
VPERMI,
VPERM2X128,
- //Fix Up Special Packed Float32/64 values
+ // Bitwise ternary logic
+ VPTERNLOG,
+ // Fix Up Special Packed Float32/64 values
VFIXUPIMM,
- //Range Restriction Calculation For Packed Pairs of Float32/64 values
+ // Range Restriction Calculation For Packed Pairs of Float32/64 values
VRANGE,
+ // Reduce - Perform Reduction Transformation on scalar\packed FP
+ VREDUCE,
+ // RndScale - Round FP Values To Include A Given Number Of Fraction Bits
+ VRNDSCALE,
+ // VFPCLASS - Tests Types Of a FP Values for packed types.
+ VFPCLASS,
+ // VFPCLASSS - Tests Types Of a FP Values for scalar types.
+ VFPCLASSS,
// Broadcast scalar to vector
VBROADCAST,
+ // Broadcast mask to vector
+ VBROADCASTM,
// Broadcast subvector to vector
SUBV_BROADCAST,
// Insert/Extract vector element
@@ -397,13 +422,21 @@ namespace llvm {
/// SSE4A Extraction and Insertion.
EXTRQI, INSERTQI,
+ // XOP variable/immediate rotations
+ VPROT, VPROTI,
+ // XOP arithmetic/logical shifts
+ VPSHA, VPSHL,
+ // XOP signed/unsigned integer comparisons
+ VPCOM, VPCOMU,
+
// Vector multiply packed unsigned doubleword integers
PMULUDQ,
// Vector multiply packed signed doubleword integers
PMULDQ,
// Vector Multiply Packed UnsignedIntegers with Round and Scale
MULHRS,
-
+ // Multiply and Add Packed Integers
+ VPMADDUBSW, VPMADDWD,
// FMA nodes
FMADD,
FNMADD,
@@ -418,7 +451,6 @@ namespace llvm {
FNMSUB_RND,
FMADDSUB_RND,
FMSUBADD_RND,
- RNDSCALE,
// Compress and expand
COMPRESS,
@@ -443,9 +475,6 @@ namespace llvm {
// falls back to heap allocation if not.
SEG_ALLOCA,
- // Windows's _ftol2 runtime routine to do fptoui.
- WIN_FTOL,
-
// Memory barrier
MEMBARRIER,
MFENCE,
@@ -580,15 +609,6 @@ namespace llvm {
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt);
- /// AVX512 static rounding constants. These need to match the values in
- /// avx512fintrin.h.
- enum STATIC_ROUNDING {
- TO_NEAREST_INT = 0,
- TO_NEG_INF = 1,
- TO_POS_INF = 2,
- TO_ZERO = 3,
- CUR_DIRECTION = 4
- };
}
//===--------------------------------------------------------------------===//
@@ -850,16 +870,7 @@ namespace llvm {
/// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
- }
-
- /// Return true if the target uses the MSVC _ftol2 routine for fptoui.
- bool isTargetFTOL() const;
-
- /// Return true if the MSVC _ftol2 routine should be used for fptoui to the
- /// given type.
- bool isIntegerTypeFTOL(EVT VT) const {
- return isTargetFTOL() && VT == MVT::i64;
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
/// \brief Returns true if it is beneficial to convert a load of a constant
@@ -879,6 +890,16 @@ namespace llvm {
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
/// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -890,6 +911,11 @@ namespace llvm {
bool getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const override;
+ /// Return true if the target stores SafeStack pointer at a fixed offset in
+ /// some non-standard address space, and populates the address space and
+ /// offset as appropriate.
+ Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
+
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
@@ -899,6 +925,8 @@ namespace llvm {
/// \brief Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+ bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -908,7 +936,6 @@ namespace llvm {
/// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- const DataLayout *TD;
/// Select between SSE or x87 floating point ops.
/// When SSE is available, use it for f32 operations.
@@ -955,7 +982,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
- bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
SDValue Chain, bool IsTailCall, bool Is64Bit,
int FPDiff, SDLoc dl) const;
@@ -969,7 +995,6 @@ namespace llvm {
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
@@ -994,9 +1019,9 @@ namespace llvm {
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
SDLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -1042,27 +1067,16 @@ namespace llvm {
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
- bool needsCmpXchgNb(const Type *MemType) const;
-
- /// Utility function to emit atomic-load-arith operations (and, or, xor,
- /// nand, max, min, umax, umin). It takes the corresponding instruction to
- /// expand, the associated machine basic block, and the associated X86
- /// opcodes for reg/reg.
- MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
- /// Utility function to emit atomic-load-arith operations (and, or, xor,
- /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
- MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ bool needsCmpXchgNb(Type *MemType) const;
// Utility function to emit the low-level va_arg code for X86-64.
MachineBasicBlock *EmitVAARG64WithCustomInserter(
@@ -1077,18 +1091,24 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredCatchPad(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const;
@@ -1121,7 +1141,7 @@ namespace llvm {
unsigned &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
};
namespace X86 {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index faa9150..8bf2925 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -79,7 +79,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
!if (!eq (TypeVariantName, "i"),
!if (!eq (Size, 128), "v2i64",
!if (!eq (Size, 256), "v4i64",
- !if (!eq (Size, 512),
+ !if (!eq (Size, 512),
!if (!eq (EltSize, 64), "v8i64", "v16i32"),
VTName))), VTName));
@@ -145,6 +145,8 @@ def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
// We map scalar types to the smallest (128-bit) vector type
// with the appropriate element type. This allows to use the same masking logic.
+def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
+def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
@@ -274,6 +276,22 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
+// operand differs from the output VT. This requires a bitconvert on
+// the preserved vector going into the vselect.
+multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
+ X86VectorVTInfo InVT,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, OutVT, Outs,
+ !con((ins InVT.RC:$src1), NonTiedIns),
+ !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
+ !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect InVT.KRCWM:$mask, RHS,
+ (bitconvert InVT.RC:$src1))>;
+
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
@@ -471,84 +489,123 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-
-multiclass vinsert_for_size_no_alt<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- PatFrag vinsert_insert,
- SDNodeXForm INSERT_get_vinsert_imm> {
+multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vinsert_insert> {
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
- def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, From.RC:$src2, u8imm:$src3),
- "vinsert" # From.EltTypeName # "x" # From.NumElts #
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}",
- [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm)))]>,
- EVEX_4V, EVEX_V512;
+ defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
+ (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (vinsert_insert:$src3 (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
- let mayLoad = 1 in
- def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, From.MemOp:$src2, u8imm:$src3),
- "vinsert" # From.EltTypeName # "x" # From.NumElts #
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}",
- []>,
- EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
- }
-}
-
-multiclass vinsert_for_size<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
- PatFrag vinsert_insert,
- SDNodeXForm INSERT_get_vinsert_imm> :
- vinsert_for_size_no_alt<Opcode, From, To,
- vinsert_insert, INSERT_get_vinsert_imm> {
- // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
- // vinserti32x4. Only add this if 64x2 and friends are not supported
- // natively via AVX512DQ.
- let Predicates = [NoDQI] in
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
+ (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (vinsert_insert:$src3 (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
+ EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+ }
+}
+
+multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+ X86VectorVTInfo To, PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
+ let Predicates = p in {
def : Pat<(vinsert_insert:$ins
- (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
- (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
- VR512:$src1, From.RC:$src2,
- (INSERT_get_vinsert_imm VR512:$ins)))>;
+ (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rr")
+ To.RC:$src1, From.RC:$src2,
+ (INSERT_get_vinsert_imm To.RC:$ins)))>;
+
+ def : Pat<(vinsert_insert:$ins
+ (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rm")
+ To.RC:$src1, addr:$src2,
+ (INSERT_get_vinsert_imm To.RC:$ins)))>;
+ }
}
multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
ValueType EltVT64, int Opcode256> {
- defm NAME # "32x4" : vinsert_for_size<Opcode128,
+
+ let Predicates = [HasVLX] in
+ defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ vinsert128_insert>, EVEX_V256;
+
+ defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 4, EltVT32, VR128X>,
X86VectorVTInfo<16, EltVT32, VR512>,
- X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vinsert128_insert>, EVEX_V512;
+
+ defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert,
- INSERT_get_vinsert128_imm>;
- let Predicates = [HasDQI] in
- defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
+ vinsert256_insert>, VEX_W, EVEX_V512;
+
+ let Predicates = [HasVLX, HasDQI] in
+ defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ vinsert128_insert>, VEX_W, EVEX_V256;
+
+ let Predicates = [HasDQI] in {
+ defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert,
- INSERT_get_vinsert128_imm>, VEX_W;
- defm NAME # "64x4" : vinsert_for_size<Opcode256,
- X86VectorVTInfo< 4, EltVT64, VR256X>,
- X86VectorVTInfo< 8, EltVT64, VR512>,
- X86VectorVTInfo< 8, EltVT32, VR256>,
- X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert,
- INSERT_get_vinsert256_imm>, VEX_W;
- let Predicates = [HasDQI] in
- defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
- X86VectorVTInfo< 8, EltVT32, VR256X>,
- X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert,
- INSERT_get_vinsert256_imm>;
+ vinsert128_insert>, VEX_W, EVEX_V512;
+
+ defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert>, EVEX_V512;
+ }
}
defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
+// Codegen pattern with the alternative types,
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+
+// Codegen pattern with the alternative types insert VEC128 into VEC256
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+// Codegen pattern with the alternative types insert VEC128 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types insert VEC256 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
@@ -566,90 +623,158 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
// AVX-512 VECTOR EXTRACT
//---
+multiclass vextract_for_size_first_position_lowering<X86VectorVTInfo From,
+ X86VectorVTInfo To> {
+ // A subvector extract from the first vector position is
+ // a subregister copy that needs no instruction.
+ def NAME # To.NumElts:
+ Pat<(To.VT (extract_subvector (From.VT From.RC:$src),(iPTR 0))),
+ (To.VT (EXTRACT_SUBREG (From.VT From.RC:$src), To.SubRegIdx))>;
+}
+
multiclass vextract_for_size<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
- PatFrag vextract_extract,
- SDNodeXForm EXTRACT_get_vextract_imm> {
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vextract_extract> :
+ vextract_for_size_first_position_lowering<From, To> {
+
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
+ // vextract_extract), we interesting only in patterns without mask,
+ // intrinsics pattern match generated bellow.
defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
- (ins VR512:$src1, u8imm:$idx),
- "vextract" # To.EltTypeName # "x4",
+ (ins From.RC:$src1, i32u8imm:$idx),
+ "vextract" # To.EltTypeName # "x" # To.NumElts,
"$idx, $src1", "$src1, $idx",
- [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
+ [(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
(iPTR imm)))]>,
- AVX512AIi8Base, EVEX, EVEX_V512;
- let mayStore = 1 in
- def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
- (ins To.MemOp:$dst, VR512:$src1, u8imm:$src2),
- "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
- "$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
- }
-
- // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
- // vextracti32x4
- def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
- (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
- VR512:$src1,
- (EXTRACT_get_vextract_imm To.RC:$ext)))>;
-
- // A 128/256-bit subvector extract from the first 512-bit vector position is
- // a subregister copy that needs no instruction.
- def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
- (To.VT
- (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
-
- // And for the alternative types.
- def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
- (AltTo.VT
- (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+ AVX512AIi8Base, EVEX;
+ let mayStore = 1 in {
+ def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
+ (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$src2),
+ "vextract" # To.EltTypeName # "x" # To.NumElts #
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX;
+
+ def rmk : AVX512AIi8<Opcode, MRMDestMem, (outs),
+ (ins To.MemOp:$dst, To.KRCWM:$mask,
+ From.RC:$src1, i32u8imm:$src2),
+ "vextract" # To.EltTypeName # "x" # To.NumElts #
+ "\t{$src2, $src1, $dst {${mask}}|"
+ "$dst {${mask}}, $src1, $src2}",
+ []>, EVEX_K, EVEX;
+ }//mayStore = 1
+ }
// Intrinsic call with masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x4_512")
- VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
- (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
- (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
- VR512:$src1, imm:$idx)>;
+ "x" # To.NumElts # "_" # From.Size)
+ From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+ From.ZSuffix # "rrk")
+ To.RC:$src0,
+ (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
+ From.RC:$src1, imm:$idx)>;
// Intrinsic call with zero-masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x4_512")
- VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
- (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
- (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
- VR512:$src1, imm:$idx)>;
+ "x" # To.NumElts # "_" # From.Size)
+ From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+ From.ZSuffix # "rrkz")
+ (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
+ From.RC:$src1, imm:$idx)>;
// Intrinsic call without masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x4_512")
- VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
- (!cast<Instruction>(NAME # To.EltSize # "x4rr")
- VR512:$src1, imm:$idx)>;
+ "x" # To.NumElts # "_" # From.Size)
+ From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+ (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+ From.ZSuffix # "rr")
+ From.RC:$src1, imm:$idx)>;
+}
+
+// Codegen pattern for the alternative types
+multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+ X86VectorVTInfo To, PatFrag vextract_extract,
+ SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> :
+ vextract_for_size_first_position_lowering<From, To> {
+
+ let Predicates = p in
+ def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rr")
+ From.RC:$src1,
+ (EXTRACT_get_vextract_imm To.RC:$ext)))>;
}
-multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
- ValueType EltVT64, int Opcode64> {
- defm NAME # "32x4" : vextract_for_size<Opcode32,
+multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
+ ValueType EltVT64, int Opcode256> {
+ defm NAME # "32x4Z" : vextract_for_size<Opcode128,
X86VectorVTInfo<16, EltVT32, VR512>,
X86VectorVTInfo< 4, EltVT32, VR128X>,
+ vextract128_extract>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+ defm NAME # "64x4Z" : vextract_for_size<Opcode256,
X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ vextract256_extract>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
+ let Predicates = [HasVLX] in
+ defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ vextract128_extract>,
+ EVEX_V256, EVEX_CD8<32, CD8VT4>;
+ let Predicates = [HasVLX, HasDQI] in
+ defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
- vextract128_extract,
- EXTRACT_get_vextract128_imm>;
- defm NAME # "64x4" : vextract_for_size<Opcode64,
+ vextract128_extract>,
+ VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
+ let Predicates = [HasDQI] in {
+ defm NAME # "64x2Z" : vextract_for_size<Opcode128,
X86VectorVTInfo< 8, EltVT64, VR512>,
- X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vextract128_extract>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
+ defm NAME # "32x8Z" : vextract_for_size<Opcode256,
X86VectorVTInfo<16, EltVT32, VR512>,
- X86VectorVTInfo< 8, EltVT32, VR256>,
- vextract256_extract,
- EXTRACT_get_vextract256_imm>, VEX_W;
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ vextract256_extract>,
+ EVEX_V512, EVEX_CD8<32, CD8VT8>;
+ }
}
defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
+// extract_subvector codegen patterns with the alternative types.
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+
+defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+
+// Codegen pattern with the alternative types extract VEC128 from VEC512
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types extract VEC256 from VEC512
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
@@ -677,6 +802,10 @@ def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)),
+ (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)),
+ (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
@@ -694,50 +823,49 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
- ValueType svt, X86VectorVTInfo _> {
- defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
- "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
- T8PD, EVEX;
- let mayLoad = 1 in {
- defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.ScalarMemOp:$src),
- "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
- (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
- T8PD, EVEX;
- }
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+
+ defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
+ (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
+ T8PD, EVEX;
+ let mayLoad = 1 in
+ defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (DestInfo.VT (X86VBroadcast
+ (SrcInfo.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
}
-multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
- AVX512VLVectorVTInfo _> {
- defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
- EVEX_V256;
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ EVEX_V256;
}
}
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
+ avx512vl_f32_info>;
let Predicates = [HasVLX] in {
- defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
- v4f32, v4f32x_info>, EVEX_V128,
- EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSSZ128 : avx512_broadcast_rm<0x18, "vbroadcastss",
+ v4f32x_info, v4f32x_info>, EVEX_V128;
}
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
+ avx512vl_f64_info>, VEX_W;
}
// avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
-// Later, we can canonize broadcast instructions before ISel phase and
+// Later, we can canonize broadcast instructions before ISel phase and
// eliminate additional patterns on ISel.
// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar
// representations of source
@@ -834,70 +962,50 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
(bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
(VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
-multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
- RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
- RegisterClass KRC> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
- def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
- VR128X:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} |${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
- VR128X:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
- x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}} , $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
- x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [(set DstRC:$dst, (OpVT (vselect KRC:$mask,
- (X86VBroadcast (ld_frag addr:$src)),
- (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ;
- }
-}
-
-defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
- loadi32, VR512, v16i32, v4i32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
- loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VT1>;
+// Provide aliases for broadcast from the same register class that
+// automatically does the extract.
+multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
+ def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
+ (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
+}
+
+multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in {
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+ avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
+ EVEX_V512;
+ // Defined separately to avoid redefinition.
+ defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+ EVEX_V128;
+ }
+}
+
+defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
+ avx512vl_i8_info, HasBWI>;
+defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
+ avx512vl_i16_info, HasBWI>;
+defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
+ avx512vl_i32_info, HasAVX512>;
+defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
+ avx512vl_i64_info, HasAVX512>, VEX_W;
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set _Dst.RC:$dst,
- (_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))))]>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
- _Src.MemOp:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
- _Src.MemOp:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- }
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
+ (_Dst.VT (X86SubVBroadcast
+ (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+ AVX5128IBase, EVEX;
}
defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
@@ -944,10 +1052,45 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
EVEX_V512, EVEX_CD8<32, CD8VT8>;
}
-def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
- (VPBROADCASTDZrr VR128X:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
- (VPBROADCASTQZrr VR128X:$src)>;
+multiclass avx512_broadcast_32x2<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _Dst, X86VectorVTInfo _Src,
+ SDNode OpNode = X86SubVBroadcast> {
+
+ defm r : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+ T8PD, EVEX;
+ let mayLoad = 1 in
+ defm m : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (_Dst.VT (OpNode
+ (_Src.VT (scalar_to_vector(loadi64 addr:$src)))))>,
+ T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>;
+}
+
+multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> {
+ let Predicates = [HasDQI] in
+ defm Z : avx512_broadcast_32x2<opc, OpcodeStr, _.info512, _.info128>,
+ EVEX_V512;
+ let Predicates = [HasDQI, HasVLX] in
+ defm Z256 : avx512_broadcast_32x2<opc, OpcodeStr, _.info256, _.info128>,
+ EVEX_V256;
+}
+
+multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> :
+ avx512_common_broadcast_32x2<opc, OpcodeStr, _> {
+
+ let Predicates = [HasDQI, HasVLX] in
+ defm Z128 : avx512_broadcast_32x2<opc, OpcodeStr, _.info128, _.info128,
+ X86SubV32x2Broadcast>, EVEX_V128;
+}
+
+defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
+ avx512vl_i32_info>;
+defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
+ avx512vl_f32_info>;
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
@@ -959,21 +1102,6 @@ def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
(VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
- (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))),
- (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
- (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))),
- (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
- (VBROADCASTSSZr VR128X:$src)>;
-def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
- (VBROADCASTSDZr VR128X:$src)>;
-
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
@@ -985,170 +1113,178 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST MASK TO VECTOR REGISTER
//---
-
-multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass KRC> {
-let Predicates = [HasCDI] in
-def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V512;
-
-let Predicates = [HasCDI, HasVLX] in {
-def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V128;
-def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256:$dst), (ins KRC:$src),
+multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, RegisterClass KRC> {
+ def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V256;
+ [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
}
+
+multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
+ let Predicates = [HasCDI] in
+ defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
+ let Predicates = [HasCDI, HasVLX] in {
+ defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
+ defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
+ }
}
-let Predicates = [HasCDI] in {
defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
- VK16>;
+ avx512vl_i32_info, VK16>;
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
- VK8>, VEX_W;
-}
+ avx512vl_i64_info, VK8>, VEX_W;
//===----------------------------------------------------------------------===//
-// AVX-512 - VPERM
-//
-// -- immediate form --
-multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
- def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
- EVEX;
- def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.MemOp:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (OpNode (_.LdFrag addr:$src1),
- (i8 imm:$src2))))]>,
- EVEX, EVEX_CD8<_.EltSize, CD8VF>;
+// -- VPERMI2 - 3 source operands form --
+multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+let Constraints = "$src1 = $dst" in {
+ defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ AVX5128IBase;
+
+ let mayLoad = 1 in
+ defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
+ (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+ EVEX_4V, AVX5128IBase;
+ }
}
+multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+ let mayLoad = 1, Constraints = "$src1 = $dst" in
+ defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr ),
+ (_.VT (X86VPermi2X IdxVT.RC:$src1,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
+ AVX5128IBase, EVEX_4V, EVEX_B;
}
-multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
- X86VectorVTInfo Ctrl> :
- avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
- let ExeDomain = _.ExeDomain in {
- def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2),
- !strconcat("vpermil" # _.Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (X86VPermilpv _.RC:$src1,
- (Ctrl.VT Ctrl.RC:$src2))))]>,
- EVEX_4V;
- def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, Ctrl.MemOp:$src2),
- !strconcat("vpermil" # _.Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (X86VPermilpv _.RC:$src1,
- (Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>,
- EVEX_4V;
- }
-}
-defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
- EVEX_V512;
-defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
- EVEX_V512, VEX_W;
-
-def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
- (VPERMILPSZri VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
- (VPERMILPDZri VR512:$src1, imm:$imm)>;
-
-// -- VPERM2I - 3 source operands form --
-multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86VectorVTInfo _> {
+multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo ShuffleMask> {
+ defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>, EVEX_V512;
+ let Predicates = [HasVLX] in {
+ defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>, EVEX_V256;
+ }
+}
+
+multiclass avx512_perm_i_sizes_w<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo Idx> {
+ let Predicates = [HasBWI] in
+ defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
+ Idx.info512>, EVEX_V512;
+ let Predicates = [HasBWI, HasVLX] in {
+ defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
+ Idx.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
+ Idx.info256>, EVEX_V256;
+ }
+}
+
+defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
+ avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
+ avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w",
+ avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
+ avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
+ avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// VPERMT2
+multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let Constraints = "$src1 = $dst" in {
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.RC:$src3),
+ (ins IdxVT.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
AVX5128IBase;
let mayLoad = 1 in
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.MemOp:$src3),
+ (ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2,
- (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+ (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
+ (bitconvert (_.LdFrag addr:$src3))))>,
EVEX_4V, AVX5128IBase;
}
}
-multiclass avx512_perm_3src_mb<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86VectorVTInfo _> {
+multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let mayLoad = 1, Constraints = "$src1 = $dst" in
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
- (_.VT (OpNode _.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
+ (_.VT (X86VPermt2 _.RC:$src1,
+ IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
AVX5128IBase, EVEX_4V, EVEX_B;
}
-multiclass avx512_perm_3src_sizes<bits<8> opc, string OpcodeStr,
- SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
- let Predicates = [HasAVX512] in
- defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo ShuffleMask> {
+ defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>,
+ avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
- EVEX_V128;
- defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
- EVEX_V256;
+ defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>,
+ avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>,
+ avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>, EVEX_V256;
}
}
-multiclass avx512_perm_3src_sizes_w<bits<8> opc, string OpcodeStr,
- SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+
+multiclass avx512_perm_t_sizes_w<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo Idx> {
let Predicates = [HasBWI] in
- defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>,
- EVEX_V512;
+ defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
+ Idx.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
- defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
- EVEX_V128;
- defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
- EVEX_V256;
- }
-}
-defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3,
- avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3,
- avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3,
- avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3,
- avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3,
- avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3,
- avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+ defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
+ Idx.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
+ Idx.info256>, EVEX_V256;
+ }
+}
+
+defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d",
+ avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q",
+ avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2W : avx512_perm_t_sizes_w<0x7D, "vpermt2w",
+ avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
+ avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
+ avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
@@ -1265,41 +1401,85 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
//===----------------------------------------------------------------------===//
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
-multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- SDNode OpNode, ValueType VT,
- PatFrag ld_frag, string Suffix> {
- def rr : AVX512Ii8<0xC2, MRMSrcReg,
- (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", Suffix,
+
+multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
+
+ defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)>, EVEX_4V;
+ let mayLoad = 1 in
+ defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+
+ defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1, hasSideEffects = 0 in {
+ defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs VK1:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
+ defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+
+ defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
+ EVEX_4V, EVEX_B;
+ }// let isAsmParserOnly = 1, hasSideEffects = 0
+
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512Ii8<0xC2, MRMSrcReg,
+ (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
+ !strconcat("vcmp${cc}", _.Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ [(set _.KRC:$dst, (OpNode _.FRC:$src1,
+ _.FRC:$src2,
+ imm:$cc))],
IIC_SSE_ALU_F32S_RR>, EVEX_4V;
- def rm : AVX512Ii8<0xC2, MRMSrcMem,
- (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VK1:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
- (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- !strconcat("vcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
let mayLoad = 1 in
- def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
- (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- !strconcat("vcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rm : AVX512Ii8<0xC2, MRMSrcMem,
+ (outs _.KRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+ !strconcat("vcmp${cc}", _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRC:$dst, (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src2),
+ imm:$cc))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
}
}
let Predicates = [HasAVX512] in {
-defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, X86cmpms, f32, loadf32, "ss">,
- XS;
-defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, X86cmpms, f64, loadf64, "sd">,
- XD, VEX_W;
+ defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
+ AVX512XSIi8Base;
+ defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
+ AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -1700,6 +1880,128 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
+// ----------------------------------------------------------------
+// FPClass
+//handle fpclass instruction mask = op(reg_scalar,imm)
+// op(mem_scalar,imm)
+multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in {
+ def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#
+ "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ let mayLoad = 1, AddedComplexity = 20 in {
+ def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ }
+ }
+}
+
+//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
+// fpclass(reg_vec, mem_vec, imm)
+// fpclass(reg_vec, broadcast(eltVt), imm)
+multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, string mem, string broadcast>{
+ def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#
+ "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ let mayLoad = 1 in {
+ def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##mem#
+ "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,(OpNode
+ (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##mem#
+ "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
+ (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+ _.BroadcastStr##", $dst | $dst, ${src1}"
+ ##_.BroadcastStr##", $src2}",
+ [(set _.KRC:$dst,(OpNode
+ (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1))),
+ (i32 imm:$src2)))], NoItinerary>,EVEX_B;
+ def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+ _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
+ _.BroadcastStr##", $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
+ (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>,
+ EVEX_B, EVEX_K;
+ }
+}
+
+multiclass avx512_vector_fpclass_all<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
+ string broadcast>{
+ let Predicates = [prd] in {
+ defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
+ broadcast>, EVEX_V512;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
+ broadcast>, EVEX_V128;
+ defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
+ broadcast>, EVEX_V256;
+ }
+}
+
+multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
+ bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
+ defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
+ VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
+ defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
+ VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
+ defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+ f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
+ defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+ f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
+ X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
+
//-----------------------------------------------------------------
// Mask register copy, including
// - copy between mask registers
@@ -1786,6 +2088,11 @@ let Predicates = [HasDQI] in {
(KMOVBmk addr:$dst, VK8:$src)>;
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(KMOVBkm addr:$src)>;
+
+ def : Pat<(store VK4:$src, addr:$dst),
+ (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
+ def : Pat<(store VK2:$src, addr:$dst),
+ (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
@@ -1837,10 +2144,15 @@ let Predicates = [HasAVX512] in {
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
def : Pat<(i32 (anyext VK1:$src)),
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
+
def : Pat<(i8 (zext VK1:$src)),
(EXTRACT_SUBREG
(AND32ri (KMOVWrk
(COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+ def : Pat<(i8 (anyext VK1:$src)),
+ (EXTRACT_SUBREG
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
+
def : Pat<(i64 (zext VK1:$src)),
(AND64ri8 (SUBREG_TO_REG (i64 0),
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
@@ -1848,17 +2160,19 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
sub_16bit)>;
- def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK16)>;
- def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK8)>;
-}
-let Predicates = [HasBWI] in {
- def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK32)>;
- def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK64)>;
}
+def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK16)>;
+def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK8)>;
+def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK4)>;
+def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK2)>;
+def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK32)>;
+def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK64)>;
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
@@ -1955,11 +2269,12 @@ multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
}
multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode, bit IsCommutable> {
+ SDPatternOperator OpNode, bit IsCommutable,
+ Predicate prdW = HasAVX512> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- HasAVX512, IsCommutable>, VEX_4V, VEX_L, PS;
+ prdW, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
@@ -1974,6 +2289,7 @@ defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
+defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
multiclass avx512_mask_binop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -2047,59 +2363,48 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
// Mask unpacking
-multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
- RegisterClass KRC> {
- let Predicates = [HasAVX512] in
- def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
-}
+multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
+ RegisterClass KRCSrc, Predicate prd> {
+ let Predicates = [prd] in {
+ def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
+ (ins KRC:$src1, KRC:$src2),
+ "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V, VEX_L;
-multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
- defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
- VEX_4V, VEX_L, PD;
+ def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
+ (!cast<Instruction>(NAME##rr)
+ (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
+ (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
+ }
}
-defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
-def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
- (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
- (COPY_TO_REGCLASS VK8:$src1, VK16))>;
-
-
-multiclass avx512_mask_unpck_int<string IntName, string InstName> {
- let Predicates = [HasAVX512] in
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
- (i16 GR16:$src1), (i16 GR16:$src2)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
- (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
- (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
-}
-defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
+defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- SDNode OpNode> {
- let Predicates = [HasAVX512], Defs = [EFLAGS] in
+ SDNode OpNode, Predicate prd> {
+ let Predicates = [prd], Defs = [EFLAGS] in
def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
}
-multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, PS;
- let Predicates = [HasDQI] in
- defm B : avx512_mask_testop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
- VEX, PD;
- let Predicates = [HasBWI] in {
- defm Q : avx512_mask_testop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
- VEX, PS, VEX_W;
- defm D : avx512_mask_testop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
- VEX, PD, VEX_W;
- }
+multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Predicate prdW = HasAVX512> {
+ defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
+ VEX, PD;
+ defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
+ VEX, PS;
+ defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
+ VEX, PS, VEX_W;
+ defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
+ VEX, PD, VEX_W;
}
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
+defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -2124,7 +2429,7 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
let Predicates = [HasDQI] in
defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
VEX, TAPD;
- }
+ }
}
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
@@ -2167,24 +2472,52 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))),
+ (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>;
+
+def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
+ (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
+
def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
(v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;
def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
(v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
-let Predicates = [HasVLX] in {
- def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
- (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
- def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
- (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
- def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
- (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
- def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
- (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
- def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
- (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
-}
+def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
+
+def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
+
+def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
+
+def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
+def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+
+def : Pat<(v32i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK2:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK4:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK8:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK16:$src, VK32))>;
+
+def : Pat<(v64i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK2:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK4:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK8:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK16:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK32:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK32:$src, VK64))>;
+
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS
@@ -2304,23 +2637,21 @@ multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag st_frag, PatFrag mstore> {
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
- OpcodeStr # "\t{$src, $dst|$dst, $src}", [],
- _.ExeDomain>, EVEX;
- let Constraints = "$src1 = $dst" in
- def rrk_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
- (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2),
- OpcodeStr #
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}",
+
+ def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
+ OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
+ [], _.ExeDomain>, EVEX;
+ def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
+ "${dst} {${mask}}, $src}",
[], _.ExeDomain>, EVEX, EVEX_K;
- def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
- OpcodeStr #
- "\t{$src, ${dst} {${mask}} {z}|" #
+ OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
"${dst} {${mask}} {z}, $src}",
[], _.ExeDomain>, EVEX, EVEX_KZ;
- }
+
let mayStore = 1 in {
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -2425,22 +2756,6 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
-let Predicates = [HasAVX512, NoVLX] in {
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
- (VMOVUPSZmrk addr:$ptr,
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
- (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
-
-def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-
-def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
- (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
- (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-}
-
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
@@ -2502,17 +2817,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
-// NoVLX patterns
-let Predicates = [HasAVX512, NoVLX] in {
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
- (VMOVDQU32Zmrk addr:$ptr,
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
- (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
-
-def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-}
// Move Int Doubleword to Packed Double Int
//
@@ -2520,32 +2824,37 @@ def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- EVEX, VEX_LIG;
+ EVEX;
def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
+ IIC_SSE_MOVDQ>, EVEX, VEX_W;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>,
+ EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
let isCodeGenOnly = 1 in {
-def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))],
+ [(set FR64X:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
-def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))],
+ [(set GR64:$dst, (bitconvert FR64X:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
-}
-def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
EVEX_CD8<64, CD8VT1>;
+}
// Move Int Doubleword to Single Scalar
//
@@ -2553,27 +2862,27 @@ let isCodeGenOnly = 1 in {
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
+ IIC_SSE_MOVDQ>, EVEX;
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
}
// Move doubleword from xmm register to r/m32
//
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
+ [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
- EVEX, VEX_LIG;
+ EVEX;
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
- [(store (i32 (vector_extract (v4i32 VR128X:$src),
+ [(store (i32 (extractelt (v4i32 VR128X:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ EVEX, EVEX_CD8<32, CD8VT1>;
// Move quadword from xmm1 register to r/m64
//
@@ -2581,16 +2890,28 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
(iPTR 0)))],
- IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
+ IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
Requires<[HasAVX512, In64BitMode]>;
-def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128X:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
- addr:$dst)], IIC_SSE_MOVDQ>,
- EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
- Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
+ Requires<[HasAVX512, In64BitMode]>;
+
+def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128X:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
+ addr:$dst)], IIC_SSE_MOVDQ>,
+ EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
+ Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+
+let hasSideEffects = 0 in
+def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
+ EVEX, VEX_W;
// Move Scalar Single to Double Int
//
@@ -2599,92 +2920,95 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
(ins FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32X:$src))],
- IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
+ IIC_SSE_MOVD_ToGP>, EVEX;
def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
}
// Move Quadword Int to Packed Quadword Int
//
-def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
- EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
//===----------------------------------------------------------------------===//
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar <string asm, RegisterClass RC,
- SDNode OpNode, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_pat> {
- let hasSideEffects = 0 in {
- def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
- (scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
- let Constraints = "$src1 = $dst" in
- def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
- !strconcat(asm,
- "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
- [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
- def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
- EVEX, VEX_LIG;
+multiclass avx512_move_scalar <string asm, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ asm, "$src2, $src1","$src1, $src2",
+ (_.VT (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2))),
+ IIC_SSE_MOV_S_RR>, EVEX_4V;
+ let Constraints = "$src1 = $dst" , mayLoad = 1 in
+ defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
+ (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src),
+ asm,"$src","$src",
+ (_.VT (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src)))))>, EVEX;
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.FRC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))))],
+ _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
+ let mayLoad = 1 in
+ def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+ _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+ }
let mayStore = 1 in {
- def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- EVEX, VEX_LIG;
- def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
- !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- [], IIC_SSE_MOV_S_MR>,
- EVEX, VEX_LIG, EVEX_K;
+ def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
+ EVEX;
+ def mrk: AVX512PI<0x11, MRMDestMem, (outs),
+ (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
+ !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+ [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
} // mayStore
- } //hasSideEffects = 0
}
-let ExeDomain = SSEPackedSingle in
-defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
- loadf32>, XS, EVEX_CD8<32, CD8VT1>;
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+ VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
-let ExeDomain = SSEPackedDouble in
-defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
- loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+ VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
- (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
- VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+ (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+ VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
- (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
- VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+ (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+ VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
(VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
-// For the disassembler
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins VR128X:$src1, FR32X:$src2),
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XS, EVEX_4V, VEX_LIG;
- def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins VR128X:$src1, FR64X:$src2),
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XD, EVEX_4V, VEX_LIG, VEX_W;
-}
+defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+ "vmovss.s", "$src2, $src1", "$src1, $src2", []>,
+ XS, EVEX_4V, VEX_LIG;
+
+defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+ "vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
+ XD, EVEX_4V, VEX_LIG, VEX_W;
let Predicates = [HasAVX512] in {
let AddedComplexity = 15 in {
@@ -2768,10 +3092,10 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
+ def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
addr:$dst),
(VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
- def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
+ def : Pat<(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
addr:$dst),
(VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
@@ -2835,7 +3159,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(v2i64 VR128X:$src))))],
IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
-let AddedComplexity = 20 in
+let AddedComplexity = 20 , isCodeGenOnly = 1 in
def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -2964,7 +3288,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable = 0> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
itins.rr, IsCommutable>,
@@ -2972,7 +3296,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let mayLoad = 1 in
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)))),
@@ -2986,7 +3310,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
let mayLoad = 1 in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
@@ -3058,20 +3382,20 @@ multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
- defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr, OpNode, itins, prd,
+ defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
IsCommutable>;
- defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr, OpNode, itins, prd,
+ defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
IsCommutable>;
}
multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
- defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr, OpNode, itins, prd,
+ defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
IsCommutable>;
- defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr, OpNode, itins, prd,
+ defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
IsCommutable>;
}
@@ -3086,15 +3410,15 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
}
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
- SDNode OpNode,X86VectorVTInfo _Src,
+ SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst, bit IsCommutable = 0> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
- "$src2, $src1","$src1, $src2",
- (_Dst.VT (OpNode
- (_Src.VT _Src.RC:$src1),
+ "$src2, $src1","$src1, $src2",
+ (_Dst.VT (OpNode
+ (_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
- itins.rr, IsCommutable>,
+ itins.rr, IsCommutable>,
AVX512BIBase, EVEX_4V;
let mayLoad = 1 in {
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
@@ -3106,12 +3430,12 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
AVX512BIBase, EVEX_4V;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
- (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
+ (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Dst.BroadcastStr##", $src1",
"$src1, ${src2}"##_Dst.BroadcastStr,
- (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
- (_Dst.VT (X86VBroadcast
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
+ (_Dst.VT (X86VBroadcast
(_Dst.ScalarLdFrag addr:$src2)))))),
itins.rm>,
AVX512BIBase, EVEX_4V, EVEX_B;
@@ -3127,24 +3451,24 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
SSE_INTALU_ITINS_P, HasBWI, 0>;
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
- SSE_INTALU_ITINS_P, HasBWI, 0>;
-defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
- SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
- SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P,
- HasBWI, 1>;
-defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P,
+ SSE_INTALU_ITINS_P, HasBWI, 0>;
+defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
+ SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
HasBWI, 1>;
-defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P,
- HasBWI, 1>, T8PD;
+defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>;
+defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>, T8PD;
defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
-
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+
multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
SDNode OpNode, bit IsCommutable = 0> {
@@ -3159,7 +3483,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
v4i32x_info, v2i64x_info, IsCommutable>,
EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
}
-}
+}
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
X86pmuldq, 1>,T8PD;
@@ -3170,25 +3494,25 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
let mayLoad = 1 in {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
- (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
+ (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Src.BroadcastStr##", $src1",
"$src1, ${src2}"##_Src.BroadcastStr,
- (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
- (_Src.VT (X86VBroadcast
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
+ (_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2))))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
}
}
-multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,X86VectorVTInfo _Src,
+multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
- "$src2, $src1","$src1, $src2",
- (_Dst.VT (OpNode
- (_Src.VT _Src.RC:$src1),
+ "$src2, $src1","$src1, $src2",
+ (_Dst.VT (OpNode
+ (_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2)))>,
EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
let mayLoad = 1 in {
@@ -3229,102 +3553,59 @@ multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
v16i8x_info>, EVEX_V128;
}
}
+
+multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo _Src,
+ AVX512VLVectorVTInfo _Dst> {
+ defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
+ _Dst.info512>, EVEX_V512;
+ let Predicates = [HasVLX] in {
+ defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
+ _Dst.info256>, EVEX_V256;
+ defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
+ _Dst.info128>, EVEX_V128;
+ }
+}
+
let Predicates = [HasBWI] in {
defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
+
+ defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
+ avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
+ defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
+ avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
}
-defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-
-//===----------------------------------------------------------------------===//
-// AVX-512 - Unpack Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
- PatFrag mem_frag, RegisterClass RC,
- X86MemOperand x86memop, string asm,
- Domain d> {
- def rr : AVX512PI<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- asm, [(set RC:$dst,
- (vt (OpNode RC:$src1, RC:$src2)))],
- d>, EVEX_4V;
- def rm : AVX512PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- asm, [(set RC:$dst,
- (vt (OpNode RC:$src1,
- (bitconvert (mem_frag addr:$src2)))))],
- d>, EVEX_4V;
-}
-
-defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64,
- VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64,
- VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64,
- VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64,
- VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop> {
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
- IIC_SSE_UNPCK>, EVEX_4V;
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))],
- IIC_SSE_UNPCK>, EVEX_4V;
-}
-defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
- VR512, loadv16i32, i512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
- VR512, loadv8i64, i512mem>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
- VR512, loadv16i32, i512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
- VR512, loadv8i64, i512mem>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
@@ -3362,12 +3643,12 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
let isCodeGenOnly = 1, isCommutable = IsCommutable,
Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
- (ins _.FRC:$src1, _.FRC:$src2),
+ (ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr>;
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
- (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rr>;
@@ -3375,7 +3656,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode VecNode, OpndItins itins, bit IsCommutable> {
+ SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@@ -3470,7 +3751,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
EVEX_4V, EVEX_B;
}
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
IsCommutable>, EVEX_V512, PS,
@@ -3514,7 +3795,7 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
@@ -3550,13 +3831,34 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}//let mayLoad = 1
}
-multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
+multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
+ defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
+ EVEX_4V,EVEX_CD8<32, CD8VT1>;
+ defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
+ EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
@@ -3569,7 +3871,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
-defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
@@ -3586,7 +3888,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
+ (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))))>,
EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
@@ -3748,12 +4050,12 @@ multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
VTInfo.info256>, EVEX_V256;
defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>,
- avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>, EVEX_V128;
}
}
-multiclass avx512_shift_rmi_w<bits<8> opcw,
+multiclass avx512_shift_rmi_w<bits<8> opcw,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
@@ -3846,6 +4148,27 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
avx512vl_i64_info>, VEX_W;
}
+// Use 512bit version to implement 128/256 bit in case NoVLX.
+multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
+ let Predicates = [HasBWI, NoVLX] in {
+ def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
+ (_.info256.VT _.info256.RC:$src2))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME#"WZrr")
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
+ sub_ymm)>;
+
+ def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
+ (_.info128.VT _.info128.RC:$src2))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME#"WZrr")
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
+ sub_xmm)>;
+ }
+}
+
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
@@ -3861,11 +4184,14 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
- avx512_var_shift_w<0x12, "vpsllvw", shl>;
+ avx512_var_shift_w<0x12, "vpsllvw", shl>,
+ avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
- avx512_var_shift_w<0x11, "vpsravw", sra>;
+ avx512_var_shift_w<0x11, "vpsravw", sra>,
+ avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>,
+ avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
@@ -3916,19 +4242,77 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
X86VPermi, avx512vl_f64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - VPERMIL
+//===----------------------------------------------------------------------===//
+multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
+ defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1,
+ (Ctrl.VT Ctrl.RC:$src2)))>,
+ T8PD, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode
+ _.RC:$src1,
+ (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
+ T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode
+ _.RC:$src1,
+ (Ctrl.VT (X86VBroadcast
+ (Ctrl.ScalarLdFrag addr:$src2)))))>,
+ T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
+ AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
+ Ctrl.info512>, EVEX_V512;
+ }
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
+ Ctrl.info128>, EVEX_V128;
+ defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
+ Ctrl.info256>, EVEX_V256;
+ }
+}
+
+multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
+ AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
+
+ defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
+ defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
+ X86VPermilpi, _>,
+ EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
+}
+
+defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
+ avx512vl_i32_info>;
+defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
+ avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
//===----------------------------------------------------------------------===//
defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
- X86PShufd, avx512vl_i32_info>,
+ X86PShufd, avx512vl_i32_info>,
EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
- X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
+ X86PShufhw>, EVEX, AVX512XSIi8Base;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
- X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
-
+ X86PShuflw>, EVEX, AVX512XDIi8Base;
+
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
@@ -3942,55 +4326,6 @@ multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
//===----------------------------------------------------------------------===//
-// AVX-512 - MOVDDUP
-//===----------------------------------------------------------------------===//
-
-multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
- X86MemOperand x86memop, PatFrag memop_frag> {
-def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
-def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst,
- (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
-}
-
-defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
- (VMOVDDUPZrm addr:$src)>;
-
-//===---------------------------------------------------------------------===//
-// Replicate Single FP - MOVSHDUP and MOVSLDUP
-//===---------------------------------------------------------------------===//
-multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
- ValueType vt, RegisterClass RC, PatFrag mem_frag,
- X86MemOperand x86memop> {
- def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
- let mayLoad = 1 in
- def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
-}
-
-defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))),
- (VMOVSHDUPZrm addr:$src)>;
-def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))),
- (VMOVSLDUPZrm addr:$src)>;
-
-//===----------------------------------------------------------------------===//
// Move Low to High and High to Low packed FP Instructions
//===----------------------------------------------------------------------===//
def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
@@ -4017,6 +4352,115 @@ let Predicates = [HasAVX512] in {
}
//===----------------------------------------------------------------------===//
+// VMOVHPS/PD VMOVLPS Instructions
+// All patterns was taken from SSS implementation.
+//===----------------------------------------------------------------------===//
+multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1,
+ (_.VT (bitconvert
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+}
+
+defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+
+let Predicates = [HasAVX512] in {
+ // VMOVHPS patterns
+ def : Pat<(X86Movlhps VR128X:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128X:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVHPD patterns
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVLPS patterns
+ def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+ (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+ (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVLPD patterns
+ def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+}
+
+let mayStore = 1 in {
+def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
+ (bc_v2f64 (v4f32 VR128X:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128X:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+let Predicates = [HasAVX512] in {
+ // VMOVHPD patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
+ (iPTR 0))), addr:$dst),
+ (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
+ // VMOVLPS patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
+ (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+ // VMOVLPD patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+}
+//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
@@ -4034,7 +4478,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
- AVX512FMA3Base;
+ AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -4435,50 +4879,55 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from float/double to integer
//===----------------------------------------------------------------------===//
-multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
- string asm> {
-let hasSideEffects = 0 in {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
- Requires<[HasAVX512]>;
- let mayLoad = 1 in
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
- Requires<[HasAVX512]>;
-} // hasSideEffects = 0
+multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int,
+ Operand memop, ComplexPattern mem_cpat, string asm> {
+ let hasSideEffects = 0, Predicates = [HasAVX512] in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
+ def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
+ !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>,
+ EVEX, VEX_LIG, EVEX_B, EVEX_RC;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
+ } // hasSideEffects = 0, Predicates = [HasAVX512]
}
-let Predicates = [HasAVX512] in {
+
// Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
+ int_x86_sse_cvtss2si64,
ssmem, sse_load_f32, "cvtss2si">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
+ int_x86_avx512_cvtss2usi,
ssmem, sse_load_f32, "cvtss2usi">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
int_x86_avx512_cvtss2usi64, ssmem,
sse_load_f32, "cvtss2usi">, XS, VEX_W,
EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
sdmem, sse_load_f64, "cvtsd2si">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
+ int_x86_sse2_cvtsd2si64,
sdmem, sse_load_f64, "cvtsd2si">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
+defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
+ int_x86_avx512_cvtsd2usi,
sdmem, sse_load_f64, "cvtsd2usi">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
int_x86_avx512_cvtsd2usi64, sdmem,
sse_load_f64, "cvtsd2usi">, XD, VEX_W,
EVEX_CD8<64, CD8VT1>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, EVEX_4V;
@@ -4495,121 +4944,170 @@ let isCodeGenOnly = 1 in {
defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-} // isCodeGenOnly = 1
+} // isCodeGenOnly = 1, Predicates = [HasAVX512]
// Convert float/double to signed/unsigned int 32/64 with truncation
-let isCodeGenOnly = 1 in {
- defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
- ssmem, sse_load_f32, "cvttss2si">,
- XS, EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
- int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
- sdmem, sse_load_f64, "cvttsd2si">, XD,
- EVEX_CD8<64, CD8VT1>;
- defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
- int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
- defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
- int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
- "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
- int_x86_avx512_cvttss2usi64, ssmem,
- sse_load_f32, "cvttss2usi">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
- int_x86_avx512_cvttsd2usi,
- sdmem, sse_load_f64, "cvttsd2usi">, XD,
- EVEX_CD8<64, CD8VT1>;
- defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
- int_x86_avx512_cvttsd2usi64, sdmem,
- sse_load_f64, "cvttsd2usi">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
-} // isCodeGenOnly = 1
-
-multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
+ X86VectorVTInfo _DstRC, SDNode OpNode,
+ SDNode OpNodeRnd>{
+let Predicates = [HasAVX512] in {
+ def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
+ def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+ !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+ []>, EVEX, EVEX_B;
+ def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
-}
-
-defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
- loadf32, "cvttss2si">, XS,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
- loadf32, "cvttss2usi">, XS,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
- loadf32, "cvttss2si">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
- loadf32, "cvttss2usi">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
- loadf64, "cvttsd2si">, XD,
- EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
- loadf64, "cvttsd2usi">, XD,
- EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
- loadf64, "cvttsd2si">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
- loadf64, "cvttsd2usi">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
+ [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
+ EVEX;
+
+ let isCodeGenOnly = 1,hasSideEffects = 0 in {
+ def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
+ (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
+ def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+ !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+ [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
+ (i32 FROUND_NO_EXC)))]>,
+ EVEX,VEX_LIG , EVEX_B;
+ let mayLoad = 1 in
+ def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
+ (ins _SrcRC.MemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ []>, EVEX, VEX_LIG;
+
+ } // isCodeGenOnly = 1, hasSideEffects = 0
+} //HasAVX512
+}
+
+
+defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info,
+ fp_to_sint,X86cvttss2IntRnd>,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info,
+ fp_to_sint,X86cvttss2IntRnd>,
+ VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info,
+ fp_to_sint,X86cvttsd2IntRnd>,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info,
+ fp_to_sint,X86cvttsd2IntRnd>,
+ VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info,
+ fp_to_uint,X86cvttss2UIntRnd>,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info,
+ fp_to_uint,X86cvttss2UIntRnd>,
+ XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info,
+ fp_to_uint,X86cvttsd2UIntRnd>,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info,
+ fp_to_uint,X86cvttsd2UIntRnd>,
+ XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
+ (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
+ (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
+ (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+ def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
+ (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+
} // HasAVX512
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in {
-def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
- (ins FR32X:$src1, FR32X:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
- (ins FR32X:$src1, f32mem:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
- EVEX_CD8<32, CD8VT1>;
-
-// Convert scalar double to scalar single
-def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
- (ins FR64X:$src1, FR64X:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
- (ins FR64X:$src1, f64mem:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, VEX_W,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
-}
-
-def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
- Requires<[HasAVX512]>;
-def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
-
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNode> {
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2)))>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src1),
+ (_Src.VT (scalar_to_vector
+ (_Src.ScalarLdFrag addr:$src2)))))>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+// Scalar Coversion with SAE - suppress all exceptions
+multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2),
+ (i32 FROUND_NO_EXC)))>,
+ EVEX_4V, VEX_LIG, EVEX_B;
+}
+
+// Scalar Conversion with rounding control (RC)
+multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
+ EVEX_B, EVEX_RC;
+}
+multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, X86VectorVTInfo _src,
+ X86VectorVTInfo _dst> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+ avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
+ OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
+ EVEX_V512, XD;
+ }
+}
+
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, X86VectorVTInfo _src,
+ X86VectorVTInfo _dst> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+ avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
+ EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
+ }
+}
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
+ X86froundRnd, f64x_info, f32x_info>;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext,
+ X86fpextRnd,f32x_info, f64x_info >;
+
+def : Pat<(f64 (fextend FR32X:$src)),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
+ (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+ Requires<[HasAVX512]>;
+def : Pat<(f64 (fextend (loadf32 addr:$src))),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ Requires<[HasAVX512]>;
+
+def : Pat<(f64 (extloadf32 addr:$src)),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[HasAVX512, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
- Requires<[HasAVX512, OptForSpeed]>;
+def : Pat<(f64 (extloadf32 addr:$src)),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+ Requires<[HasAVX512, OptForSpeed]>;
-def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
+def : Pat<(f32 (fround FR64X:$src)),
+ (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
+ (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
Requires<[HasAVX512]>;
-
//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer
@@ -4992,7 +5490,7 @@ defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
-let Predicates = [NoVLX] in {
+let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -5024,40 +5522,102 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop> {
- def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}",
- []>, EVEX;
- let hasSideEffects = 0, mayLoad = 1 in
- def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
-}
-
-multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop> {
- def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
- (ins srcRC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX;
- let hasSideEffects = 0, mayStore = 1 in
- def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
- (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop, PatFrag ld_frag> {
+ defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+ "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT _src.RC:$src),
+ (i32 FROUND_CURRENT))>, T8PD;
+ let hasSideEffects = 0, mayLoad = 1 in {
+ defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
+ "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
+ (i32 FROUND_CURRENT))>, T8PD;
+ }
}
-defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
+multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+ defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+ "vcvtph2ps", "{sae}, $src", "$src, {sae}",
+ (X86cvtph2ps (_src.VT _src.RC:$src),
+ (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
-def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
- imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
- (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
+}
-def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
- (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
- (VCVTPH2PSZrr VR256X:$src)>;
+let Predicates = [HasAVX512] in {
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
+ avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ let Predicates = [HasVLX] in {
+ defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
+ loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
+ loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+ }
+}
+
+multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop> {
+ defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph", "$src2, $src1", "$src1, $src2",
+ (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>, AVX512AIi8Base;
+ let hasSideEffects = 0, mayStore = 1 in {
+ def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2), (i32 FROUND_CURRENT) )),
+ addr:$dst)]>;
+ def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ []>, EVEX_K;
+ }
+}
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+ defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}",
+ (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base;
+}
+let Predicates = [HasAVX512] in {
+ defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
+ avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ let Predicates = [HasVLX] in {
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+ EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
+ EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+ }
+}
+
+// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
+multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
+ string OpcodeStr> {
+ def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2,
+ (i32 FROUND_NO_EXC)))],
+ IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
+ Sched<[WriteFAdd]>;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX512] in {
+ defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
+ AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
+ defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
+ AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
+ AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
+ defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
+ AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
@@ -5067,10 +5627,10 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
"ucomisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
let Pattern = []<dag> in {
- defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
+ defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
"comiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
- defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
+ defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
"comisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
@@ -5092,50 +5652,31 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
}
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
-multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> {
- let hasSideEffects = 0 in {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in {
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
}
}
}
-defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
- EVEX_CD8<32, CD8VT1>;
-defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
- VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
- EVEX_CD8<32, CD8VT1>;
-defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
- VEX_W, EVEX_CD8<64, CD8VT1>;
-
-def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
- (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
- (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
- (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
- (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
-
-def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
- (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
- (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
- (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
- (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
+defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
+ EVEX_CD8<32, CD8VT1>, T8PD;
+defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
+ VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
+defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
+ EVEX_CD8<32, CD8VT1>, T8PD;
+defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
+ VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5183,20 +5724,6 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
-def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
- (VRSQRT14PSZr VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VRSQRT14PDZr VR512:$src)>;
-
-def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
- (VRCP14PSZr VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VRCP14PDZr VR512:$src)>;
-
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode> {
@@ -5232,6 +5759,8 @@ let hasSideEffects = 0, Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
}
+
+defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -5322,67 +5851,6 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
}
}
-multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
- Intrinsic F32Int, Intrinsic F64Int,
- OpndItins itins_s, OpndItins itins_d> {
- def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
- (ins FR32X:$src1, FR32X:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], itins_s.rr>, XS, EVEX_4V;
- let isCodeGenOnly = 1 in
- def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F32Int VR128X:$src1, VR128X:$src2))],
- itins_s.rr>, XS, EVEX_4V;
- let mayLoad = 1 in {
- def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
- (ins FR32X:$src1, f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
- let isCodeGenOnly = 1 in
- def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, ssmem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F32Int VR128X:$src1, sse_load_f32:$src2))],
- itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
- }
- def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
- (ins FR64X:$src1, FR64X:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- XD, EVEX_4V, VEX_W;
- let isCodeGenOnly = 1 in
- def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F64Int VR128X:$src1, VR128X:$src2))],
- itins_s.rr>, XD, EVEX_4V, VEX_W;
- let mayLoad = 1 in {
- def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
- (ins FR64X:$src1, f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
- let isCodeGenOnly = 1 in
- def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, sdmem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
- XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
- }
-}
-
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
@@ -5416,93 +5884,77 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
}
+multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
+
+ defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in
+ defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src2))),
+ (i32 FROUND_CURRENT))>;
+
+ defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i32 imm:$rc))>,
+ EVEX_B, EVEX_RC;
+
+ let isCodeGenOnly = 1 in {
+ def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+
+ let mayLoad = 1 in
+ def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+ }
+
+ def : Pat<(_.EltVT (OpNode _.FRC:$src)),
+ (!cast<Instruction>(NAME#SUFF#Zr)
+ (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
+
+ def : Pat<(_.EltVT (OpNode (load addr:$src))),
+ (!cast<Instruction>(NAME#SUFF#Zm)
+ (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
+}
+
+multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
+ defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
+ X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
+ defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
+ X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
+}
+
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
-defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
- int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
- SSE_SQRTSS, SSE_SQRTSD>;
+defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
let Predicates = [HasAVX512] in {
- def : Pat<(f32 (fsqrt FR32X:$src)),
- (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
- def : Pat<(f32 (fsqrt (load addr:$src))),
- (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[OptForSize]>;
- def : Pat<(f64 (fsqrt FR64X:$src)),
- (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
- def : Pat<(f64 (fsqrt (load addr:$src))),
- (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[OptForSize]>;
-
def : Pat<(f32 (X86frsqrt FR32X:$src)),
- (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
-
def : Pat<(f32 (X86frcp FR32X:$src)),
- (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
def : Pat<(f32 (X86frcp (load addr:$src))),
- (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
-
- def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
- (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS VR128X:$src, FR32)),
- VR128X)>;
- def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
- (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
-
- def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
- (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS VR128X:$src, FR64)),
- VR128X)>;
- def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
- (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
-}
-
-
-multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, RegisterClass RC,
- PatFrag mem_frag, Domain d> {
-let ExeDomain = d in {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def r : AVX512AIi8<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX;
-
- // Vector intrinsic operation, mem
- def m : AVX512AIi8<opc, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX;
-} // ExeDomain
}
-defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
- loadv16f32, SSEPackedSingle>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
- imm:$src2, (v16f32 VR512:$src1), (i16 -1),
- FROUND_CURRENT)),
- (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
-
-
-defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
- loadv8f64, SSEPackedDouble>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
- imm:$src2, (v8f64 VR512:$src1), (i8 -1),
- FROUND_CURRENT)),
- (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
-
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
@@ -5510,20 +5962,20 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
- "{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
- (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
+ (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
let mayLoad = 1 in
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86RndScale (_.VT _.RC:$src1),
+ (_.VT (X86RndScales (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
}
@@ -5568,109 +6020,238 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
-let Predicates = [HasAVX512] in {
-def : Pat<(v16f32 (ffloor VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
-def : Pat<(v16f32 (fnearbyint VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
-def : Pat<(v16f32 (fceil VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
-def : Pat<(v16f32 (frint VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
-def : Pat<(v16f32 (ftrunc VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
-
-def : Pat<(v8f64 (ffloor VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
-def : Pat<(v8f64 (fnearbyint VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
-def : Pat<(v8f64 (fceil VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
-def : Pat<(v8f64 (frint VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
-def : Pat<(v8f64 (ftrunc VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
-}
//-------------------------------------------------
// Integer truncate and extend operations
//-------------------------------------------------
-multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
- RegisterClass dstRC, RegisterClass srcRC,
- RegisterClass KRC, X86MemOperand x86memop> {
- def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
- (ins srcRC:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
+ X86MemOperand x86memop> {
+
+ defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
+ EVEX, T8XS;
+
+ // for intrinsic patter match
+ def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+ undef)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
+ SrcInfo.RC:$src1)>;
+
+ def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
+ SrcInfo.RC:$src1)>;
+
+ def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
+ DestInfo.KRCWM:$mask ,
+ SrcInfo.RC:$src1)>;
+
+ let mayStore = 1 in {
+ def mr : AVX512XS8I<opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst |$dst, $src}",
[]>, EVEX;
- def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
- (ins KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[]>, EVEX, EVEX_K;
+ }//mayStore = 1
+}
- def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
- (ins KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
+multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
+ X86VectorVTInfo DestInfo,
+ PatFrag truncFrag, PatFrag mtruncFrag > {
- def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX;
+ def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
+ addr:$dst, SrcInfo.RC:$src)>;
- def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
- (ins x86memop:$dst, KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
+ def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
+ (SrcInfo.VT SrcInfo.RC:$src)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
+ addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
+}
+
+multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
+ X86VectorVTInfo DestInfo, string sat > {
+
+ def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
+ DestInfo.Suffix#"_mem_"#SrcInfo.Size)
+ addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
+ (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
+ (SrcInfo.VT SrcInfo.RC:$src))>;
+ def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
+ DestInfo.Suffix#"_mem_"#SrcInfo.Size)
+ addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
+ (SrcInfo.VT SrcInfo.RC:$src))>;
}
-defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
- i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
- i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
- i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
- i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
- i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
- i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-
-def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
-def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
-def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
-def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
-def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
-
-def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
- (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
-def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
- (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
-def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
- (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
-def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
- (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
+multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
+ X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
+ X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
+ X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
+ Predicate prd = HasAVX512>{
+
+ let Predicates = [HasVLX, prd] in {
+ defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
+ DestInfoZ128, x86memopZ128>,
+ avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
+ truncFrag, mtruncFrag>, EVEX_V128;
+
+ defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
+ DestInfoZ256, x86memopZ256>,
+ avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
+ truncFrag, mtruncFrag>, EVEX_V256;
+ }
+ let Predicates = [prd] in
+ defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
+ DestInfoZ, x86memopZ>,
+ avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
+ truncFrag, mtruncFrag>, EVEX_V512;
+}
+
+multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
+ X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
+ X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
+ X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
+
+ let Predicates = [HasVLX, prd] in {
+ defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
+ DestInfoZ128, x86memopZ128>,
+ avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
+ sat>, EVEX_V128;
+
+ defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
+ DestInfoZ256, x86memopZ256>,
+ avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
+ sat>, EVEX_V256;
+ }
+ let Predicates = [prd] in
+ defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
+ DestInfoZ, x86memopZ>,
+ avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
+ sat>, EVEX_V512;
+}
+
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
+ truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
+}
+multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
+ sat>, EVEX_CD8<8, CD8VO>;
+}
+
+multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
+ truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
+}
+multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
+ v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
+ sat>, EVEX_CD8<16, CD8VQ>;
+}
+
+multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
+ truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
+}
+multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
+ v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
+ sat>, EVEX_CD8<32, CD8VH>;
+}
+
+multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
+ truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
+}
+multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
+ sat>, EVEX_CD8<8, CD8VQ>;
+}
+
+multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
+ truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
+}
+multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
+ v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
+ sat>, EVEX_CD8<16, CD8VH>;
+}
+
+multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
+ v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
+ truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
+ v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
+ sat, HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
+defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>;
+defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>;
+
+defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
+defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>;
+defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>;
+
+defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
+defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>;
+defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>;
+
+defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
+defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>;
+defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>;
+
+defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
+defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>;
+defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>;
+
+defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
+defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>;
+defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>;
+
+let Predicates = [HasAVX512, NoVLX] in {
+def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
+ (v8i16 (EXTRACT_SUBREG
+ (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
+ VR256X:$src, sub_ymm)))), sub_xmm))>;
+def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
+ (v4i32 (EXTRACT_SUBREG
+ (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
+ VR256X:$src, sub_ymm)))), sub_xmm))>;
+}
+
+let Predicates = [HasBWI, NoVLX] in {
+def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
+ (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
+ VR256X:$src, sub_ymm))), sub_xmm))>;
+}
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
@@ -5985,163 +6566,11 @@ defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-//===----------------------------------------------------------------------===//
-// VSHUFPS - VSHUFPD Operations
-
-multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
- ValueType vt, string OpcodeStr, PatFrag mem_frag,
- Domain d> {
- def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
- EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
- def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
- EVEX_4V, Sched<[WriteShuffle]>;
-}
-
-defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", loadv16f32,
- SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", loadv8f64,
- SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v16i32 (X86Shufp VR512:$src1,
- (loadv16i32 addr:$src2), (i8 imm:$imm))),
- (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v8i64 (X86Shufp VR512:$src1,
- (loadv8i64 addr:$src2), (i8 imm:$imm))),
- (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
-multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
- RegisterClass RC, RegisterClass KRC,
- X86MemOperand x86memop,
- X86MemOperand x86scalar_mop, string BrdcstStr> {
- let hasSideEffects = 0 in {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
- []>, EVEX;
- let mayLoad = 1 in
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
- []>, EVEX;
- let mayLoad = 1 in
- def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
- []>, EVEX, EVEX_B;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in
- def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
- BrdcstStr, "}"),
- []>, EVEX, EVEX_KZ, EVEX_B;
-
- let Constraints = "$src1 = $dst" in {
- def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- []>, EVEX, EVEX_K;
- let mayLoad = 1 in
- def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- []>, EVEX, EVEX_K;
- let mayLoad = 1 in
- def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
- ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
- []>, EVEX, EVEX_K, EVEX_B;
- }
- }
-}
-
-let Predicates = [HasCDI] in {
-defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
- i512mem, i32mem, "{1to16}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
- i512mem, i64mem, "{1to8}">,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
- GR16:$mask),
- (VPCONFLICTDrrk VR512:$src1,
- (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
- GR8:$mask),
- (VPCONFLICTQrrk VR512:$src1,
- (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-let Predicates = [HasCDI] in {
-defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
- i512mem, i32mem, "{1to16}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
- i512mem, i64mem, "{1to8}">,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
- GR16:$mask),
- (VPLZCNTDrrk VR512:$src1,
- (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
- GR8:$mask),
- (VPLZCNTQrrk VR512:$src1,
- (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))),
- (VPLZCNTDrm addr:$src)>;
-def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
- (VPLZCNTDrr VR512:$src)>;
-def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))),
- (VPLZCNTQrm addr:$src)>;
-def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
- (VPLZCNTQrr VR512:$src)>;
-
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
@@ -6197,7 +6626,7 @@ defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set _.KRC:$dst, (trunc (_.VT _.RC:$src)))]>, EVEX;
+ [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
}
multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
@@ -6230,7 +6659,7 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
let mayStore = 1 in {
@@ -6242,7 +6671,7 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(store (_.VT (vselect _.KRCWM:$mask,
+ [(store (_.VT (vselect _.KRCWM:$mask,
(_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
@@ -6272,7 +6701,7 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
let mayLoad = 1 in
@@ -6302,6 +6731,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
EVEX, VEX_W;
+//handle instruction reg_vec1 = op(reg_vec,imm)
+// op(mem_vec,imm)
+// op(broadcast(eltVt),imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2, $src1", "$src2, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>;
+ defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
+ "${src1}"##_.BroadcastStr##", $src2",
+ (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>, EVEX_B;
+ }
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+ "$src1, {sae}, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+}
+
+multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+ let Predicates = [prd] in {
+ defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ EVEX_V512;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128;
+ defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256;
+ }
+}
+
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
@@ -6309,49 +6794,60 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
+multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
+
+ defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+ (SrcInfo.VT SrcInfo.RC:$src2),
+ (i8 imm:$src3)))>;
+ let mayLoad = 1 in
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+ (SrcInfo.VT (bitconvert
+ (SrcInfo.LdFrag addr:$src2))),
+ (i8 imm:$src3)))>;
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
- OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (i8 imm:$src3))>;
- let mayLoad = 1 in {
- defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- (i8 imm:$src3))>;
+ X86VectorVTInfo _>:
+ avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+
+ let mayLoad = 1 in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -6359,7 +6855,6 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B;
- }
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -6369,20 +6864,20 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let isAsmParserOnly = 1 in {
@@ -6398,18 +6893,25 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3,{sae}, $src2, $src1",
"$src1, $src2,{sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
- defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
+ defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
+ OpcodeStr, "$src3,{sae}, $src2, $src1",
+ "$src1, $src2,{sae}, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i32 imm:$src3),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
@@ -6428,6 +6930,20 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
}
}
+multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
+ AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
+ let Predicates = [HasBWI] in {
+ defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
+ SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
+ }
+ let Predicates = [HasBWI, HasVLX] in {
+ defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
+ SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
+ defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
+ SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
+ }
+}
+
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode>{
let Predicates = [HasAVX512] in {
@@ -6447,6 +6963,14 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
}
}
+multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
+ bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
+ defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
+ opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
+ defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
+ opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
+}
+
defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6461,6 +6985,14 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
0x55, X86VFixupimm, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
+ X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
+defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
+ X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
+defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
+ X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
+
+
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6475,6 +7007,19 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86VRange, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
+ 0x57, X86Reduces, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
+ 0x57, X86Reduces, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+
+defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
+ 0x27, X86GetMants, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
+ 0x27, X86GetMants, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode = X86Shuf128>{
@@ -6486,6 +7031,29 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
}
+let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 (ffloor VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
+}
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
@@ -6496,31 +7064,51 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
- AVX512VLVectorVTInfo VTInfo_FP>{
+multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
AVX512AIi8Base, EVEX_4V;
- let isCodeGenOnly = 1 in {
- defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>,
- AVX512AIi8Base, EVEX_4V;
- }
}
-defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
+defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
EVEX_CD8<32, CD8VF>;
-defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
+defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
+ let Predicates = p in
+ def NAME#_.VTName#rri:
+ Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rri)
+ _.RC:$src1, _.RC:$src2, imm:$imm)>;
+}
+
+multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
+ avx512_vpalign_lowering<_.info512, [HasBWI]>,
+ avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
+ avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
+
+defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
+ avx512vl_i8_info, avx512vl_i8_info>,
+ avx512_vpalign_lowering_common<avx512vl_i16_info>,
+ avx512_vpalign_lowering_common<avx512vl_i32_info>,
+ avx512_vpalign_lowering_common<avx512vl_f32_info>,
+ avx512_vpalign_lowering_common<avx512vl_i64_info>,
+ avx512_vpalign_lowering_common<avx512vl_f64_info>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
+ avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
+
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
(_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
let mayLoad = 1 in
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+ (ins _.MemOp:$src1), OpcodeStr,
"$src1", "$src1",
(_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
@@ -6531,7 +7119,7 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
let mayLoad = 1 in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+ (ins _.ScalarMemOp:$src1), OpcodeStr,
"${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr,
(_.VT (OpNode (X86VBroadcast
@@ -6568,15 +7156,16 @@ multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, Predicate prd> {
- defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+ defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
prd>, VEX_W;
- defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+ defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
+ prd>;
}
multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, Predicate prd> {
- defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
- defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+ defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
+ defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
}
multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
@@ -6598,3 +7187,332 @@ def : Pat<(xor
(bc_v8i64 (v8i1sextv8i64)),
(bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
(VPABSQZrr VR512:$src)>;
+
+multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
+
+ defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
+}
+
+defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
+defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
+
+//===---------------------------------------------------------------------===//
+// Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
+ defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
+ HasAVX512>, XS;
+}
+
+defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
+defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - MOVDDUP
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src)))))>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VH>;
+}
+
+multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo> {
+
+ defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
+ defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode,
+ avx512vl_f64_info>, XD, VEX_W;
+}
+
+defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
+
+def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+ (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>;
+defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>;
+
+defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasBWI>;
+
+defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Extract & Insert Integer Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayStore = 1 in
+ def mr : AVX512Ii8<opc, MRMDestMem, (outs),
+ (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
+ imm:$src2)))),
+ addr:$dst)]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
+}
+
+multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
+ let Predicates = [HasBWI] in {
+ def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32orGR64:$dst,
+ (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
+ EVEX, TAPD;
+
+ defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
+ }
+}
+
+multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
+ let Predicates = [HasBWI] in {
+ def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32orGR64:$dst,
+ (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
+ EVEX, PD;
+
+ def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ EVEX, TAPD;
+
+ defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
+ }
+}
+
+multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
+ RegisterClass GRC> {
+ let Predicates = [HasDQI] in {
+ def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GRC:$dst,
+ (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
+ EVEX, TAPD;
+
+ let mayStore = 1 in
+ def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
+ (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (extractelt (_.VT _.RC:$src1),
+ imm:$src2),addr:$dst)]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
+ }
+}
+
+defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
+defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
+defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
+defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
+
+multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, PatFrag LdFrag> {
+ def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set _.RC:$dst,
+ (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+}
+
+multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, PatFrag LdFrag> {
+ let Predicates = [HasBWI] in {
+ def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
+ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
+
+ defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
+ }
+}
+
+multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, RegisterClass GRC> {
+ let Predicates = [HasDQI] in {
+ def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
+ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set _.RC:$dst,
+ (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
+ EVEX_4V, TAPD;
+
+ defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
+ _.ScalarLdFrag>, TAPD;
+ }
+}
+
+defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
+ extloadi8>, TAPD;
+defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
+ extloadi16>, PD;
+defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
+defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+//===----------------------------------------------------------------------===//
+// VSHUFPS - VSHUFPD Operations
+//===----------------------------------------------------------------------===//
+multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
+ AVX512VLVectorVTInfo VTInfo_FP>{
+ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
+ EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
+ AVX512AIi8Base, EVEX_4V;
+}
+
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - Byte shift Left/Right
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
+ Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
+ def rr : AVX512<opc, MRMr,
+ (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
+ let mayLoad = 1 in
+ def rm : AVX512<opc, MRMm,
+ (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,(_.VT (OpNode
+ (_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
+}
+
+multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
+ Format MRMm, string OpcodeStr, Predicate prd>{
+ let Predicates = [prd] in
+ defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
+ OpcodeStr, v8i64_info>, EVEX_V512;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
+ OpcodeStr, v4i64x_info>, EVEX_V256;
+ defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
+ OpcodeStr, v2i64x_info>, EVEX_V128;
+ }
+}
+defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
+ HasBWI>, AVX512PDIi8Base, EVEX_4V;
+defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
+ HasBWI>, AVX512PDIi8Base, EVEX_4V;
+
+
+multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
+ string OpcodeStr, X86VectorVTInfo _dst,
+ X86VectorVTInfo _src>{
+ def rr : AVX512BI<opc, MRMSrcReg,
+ (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _dst.RC:$dst,(_dst.VT
+ (OpNode (_src.VT _src.RC:$src1),
+ (_src.VT _src.RC:$src2))))]>;
+ let mayLoad = 1 in
+ def rm : AVX512BI<opc, MRMSrcMem,
+ (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _dst.RC:$dst,(_dst.VT
+ (OpNode (_src.VT _src.RC:$src1),
+ (_src.VT (bitconvert
+ (_src.LdFrag addr:$src2))))))]>;
+}
+
+multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
+ string OpcodeStr, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
+ v64i8_info>, EVEX_V512;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
+ v32i8x_info>, EVEX_V256;
+ defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
+ v16i8x_info>, EVEX_V128;
+ }
+}
+
+defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
+ HasBWI>, EVEX_4V;
+
+multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst" in {
+ defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
+ OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.VT _.RC:$src3),
+ (i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
+ OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.VT (bitconvert (_.LdFrag addr:$src3))),
+ (i8 imm:$src4))>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
+ OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
+ "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ (i8 imm:$src4))>, EVEX_B,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ }
+ }// Constraints = "$src1 = $dst"
+}
+
+multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
+ defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
+ }
+}
+
+defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
+defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5e19ad4..1a2e786 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -615,14 +615,14 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
-def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
- Imm8 , i8imm , imm, i8imm , invalid_node,
+def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
+ Imm8, i8imm, imm8_su, i8imm, invalid_node,
0, OpSizeFixed, 0>;
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
- Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su,
1, OpSize16, 0>;
def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
- Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
1, OpSize32, 0>;
def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
Imm32S, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
@@ -928,15 +928,22 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let hasSideEffects = 0;
}
-// BinOpAI_FF - Instructions like "adc %eax, %eax, imm", that implicitly define
+// BinOpAI_RFF - Instructions like "adc %eax, %eax, imm", that implicitly define
// and use EFLAGS.
-class BinOpAI_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands>
+class BinOpAI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg, string operands>
: BinOpAI<opcode, mnemonic, typeinfo, areg, operands,
IIC_BIN_CARRY_NONMEM> {
let Uses = [areg, EFLAGS];
}
+// BinOpAI_F - Instructions like "cmp %eax, %eax, imm", that imp-def EFLAGS.
+class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg, string operands>
+ : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
+ let Defs = [EFLAGS];
+}
+
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
/// defined with "(set GPR:$dst, EFLAGS, (...".
///
@@ -1092,14 +1099,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // Uses = [EFLAGS], Defs = [EFLAGS]
- def NAME#8i8 : BinOpAI_FF<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI_FF<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def NAME#32i32 : BinOpAI_FF<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def NAME#64i32 : BinOpAI_FF<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ def NAME#8i8 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def NAME#16i16 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|ax, $src}">;
+ def NAME#32i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|eax, $src}">;
+ def NAME#64i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
}
/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
@@ -1170,14 +1177,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // Defs = [EFLAGS]
- def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|ax, $src}">;
+ def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|eax, $src}">;
+ def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
}
@@ -1246,14 +1253,14 @@ let isCompare = 1 in {
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
} // Defs = [EFLAGS]
- def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
- "{$src, %al|al, $src}">;
- def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX,
+ "{$src, %ax|ax, $src}">;
+ def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX,
+ "{$src, %eax|eax, $src}">;
+ def TEST64i32 : BinOpAI_F<0xA8, "test", Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
} // isCompare
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
index 2056056..787f15b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -156,10 +156,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
Flags |= MachineMemOperand::MOLoad;
if (MCID.mayStore())
Flags |= MachineMemOperand::MOStore;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
- Flags, MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 315f213..c73c950 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
-// SetCC instructions.
+// CMOV instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
isCommutable = 1, SchedRW = [WriteALU] in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 7f850d6..5d7283f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -132,26 +132,6 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
Requires<[In64BitMode]>;
}
-// The MSVC runtime contains an _ftol2 routine for converting floating-point
-// to integer values. It has a strange calling convention: the input is
-// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
-// used as a temporary register. No other registers (aside from flags) are
-// touched.
-// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
-// variant is unnecessary.
-
-let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
- def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
- "# win32 fptoui",
- [(X86WinFTOL RFP32:$src)]>,
- Requires<[Not64BitMode]>;
-
- def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
- "# win32 fptoui",
- [(X86WinFTOL RFP64:$src)]>,
- Requires<[Not64BitMode]>;
-}
-
//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
//
@@ -172,6 +152,29 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
}
+let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+ isCodeGenOnly = 1, isReturn = 1 in {
+ def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
+
+ // CATCHRET needs a custom inserter for SEH.
+ let usesCustomInserter = 1 in
+ def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),
+ "# CATCHRET",
+ [(catchret bb:$dst, bb:$from)]>;
+}
+
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
+def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;
+
+// This instruction is responsible for re-establishing stack pointers after an
+// exception has been caught and we are rejoining normal control flow in the
+// parent function or funclet. It generally sets ESP and EBP, and optionally
+// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us
+// elsewhere.
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in
+def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;
+
let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
usesCustomInserter = 1 in {
def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
@@ -247,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1 in
+ isPseudo = 1, AddedComplexity = 20 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
@@ -259,6 +262,33 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
let AddedComplexity = 20;
}
+let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
+ AddedComplexity = 15 in {
+ // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
+ // which only require 3 bytes compared to MOV32ri which requires 5.
+ let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
+ def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 1)]>;
+ def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, -1)]>;
+ }
+
+ // MOV16ri is 4 bytes, so the instructions above are smaller.
+ def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
+ def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
+}
+
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
+// FIXME: Add itinerary class and Schedule.
+def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
+ [(set GR32:$dst, i32immSExt8:$src)]>,
+ Requires<[OptForMinSize]>;
+def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
+ [(set GR64:$dst, i64immSExt8:$src)]>,
+ Requires<[OptForMinSize, NotWin64WithoutFP]>;
+}
+
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
@@ -268,9 +298,9 @@ def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// This 64-bit pseudo-move can be used for both a 64-bit constant that is
-// actually the zero-extension of a 32-bit constant, and for labels in the
+// actually the zero-extension of a 32-bit constant and for labels in the
// x86-64 small code model.
-def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
+def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
let AddedComplexity = 1 in
def : Pat<(i64 mov64imm32:$src),
@@ -509,6 +539,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in {
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
+ defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;
defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
@@ -752,67 +783,111 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
/* The following multiclass tries to make sure that in code like
* x.store (immediate op x.load(acquire), release)
+ * and
+ * x.store (register op x.load(acquire), release)
* an operation directly on memory is generated instead of wasting a register.
* It is not automatic as atomic_store/load are only lowered to MOV instructions
* extremely late to prevent them from being accidentally reordered in the backend
* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
*/
-multiclass RELEASE_BINOP_MI<string op> {
+multiclass RELEASE_BINOP_MI<SDNode op> {
def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#RELEASE_BINOP PSEUDO!",
- [(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
+ "#BINOP "#NAME#"8mi PSEUDO!",
+ [(atomic_store_8 addr:$dst, (op
(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
+ def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),
+ "#BINOP "#NAME#"8mr PSEUDO!",
+ [(atomic_store_8 addr:$dst, (op
+ (atomic_load_8 addr:$dst), GR8:$src))]>;
// NAME#16 is not generated as 16-bit arithmetic instructions are considered
// costly and avoided as far as possible by this backend anyway
def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#RELEASE_BINOP PSEUDO!",
- [(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
+ "#BINOP "#NAME#"32mi PSEUDO!",
+ [(atomic_store_32 addr:$dst, (op
(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
+ def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+ "#BINOP "#NAME#"32mr PSEUDO!",
+ [(atomic_store_32 addr:$dst, (op
+ (atomic_load_32 addr:$dst), GR32:$src))]>;
def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#RELEASE_BINOP PSEUDO!",
- [(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
+ "#BINOP "#NAME#"64mi32 PSEUDO!",
+ [(atomic_store_64 addr:$dst, (op
(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
+ def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+ "#BINOP "#NAME#"64mr PSEUDO!",
+ [(atomic_store_64 addr:$dst, (op
+ (atomic_load_64 addr:$dst), GR64:$src))]>;
+}
+let Defs = [EFLAGS] in {
+ defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
+ defm RELEASE_AND : RELEASE_BINOP_MI<and>;
+ defm RELEASE_OR : RELEASE_BINOP_MI<or>;
+ defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
+ // Note: we don't deal with sub, because substractions of constants are
+ // optimized into additions before this code can run.
+}
+
+// Same as above, but for floating-point.
+// FIXME: imm version.
+// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
+// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
+let usesCustomInserter = 1 in {
+multiclass RELEASE_FP_BINOP_MI<SDNode op> {
+ def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
+ "#BINOP "#NAME#"32mr PSEUDO!",
+ [(atomic_store_32 addr:$dst,
+ (i32 (bitconvert (op
+ (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
+ FR32:$src))))]>, Requires<[HasSSE1]>;
+ def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
+ "#BINOP "#NAME#"64mr PSEUDO!",
+ [(atomic_store_64 addr:$dst,
+ (i64 (bitconvert (op
+ (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
+ FR64:$src))))]>, Requires<[HasSSE2]>;
+}
+defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
+// FIXME: Add fsub, fmul, fdiv, ...
}
-defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
-defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
-defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
-defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
-// Note: we don't deal with sub, because substractions of constants are
-// optimized into additions before this code can run
multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"8m PSEUDO!",
[(atomic_store_8 addr:$dst, dag8)]>;
def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"16m PSEUDO!",
[(atomic_store_16 addr:$dst, dag16)]>;
def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"32m PSEUDO!",
[(atomic_store_32 addr:$dst, dag32)]>;
def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"64m PSEUDO!",
[(atomic_store_64 addr:$dst, dag64)]>;
}
-defm RELEASE_INC : RELEASE_UNOP<
- (add (atomic_load_8 addr:$dst), (i8 1)),
- (add (atomic_load_16 addr:$dst), (i16 1)),
- (add (atomic_load_32 addr:$dst), (i32 1)),
- (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
-defm RELEASE_DEC : RELEASE_UNOP<
- (add (atomic_load_8 addr:$dst), (i8 -1)),
- (add (atomic_load_16 addr:$dst), (i16 -1)),
- (add (atomic_load_32 addr:$dst), (i32 -1)),
- (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+let Defs = [EFLAGS] in {
+ defm RELEASE_INC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 1)),
+ (add (atomic_load_16 addr:$dst), (i16 1)),
+ (add (atomic_load_32 addr:$dst), (i32 1)),
+ (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
+ defm RELEASE_DEC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 -1)),
+ (add (atomic_load_16 addr:$dst), (i16 -1)),
+ (add (atomic_load_32 addr:$dst), (i32 -1)),
+ (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+}
/*
TODO: These don't work because the type inference of TableGen fails.
TODO: find a way to fix it.
-defm RELEASE_NEG : RELEASE_UNOP<
- (ineg (atomic_load_8 addr:$dst)),
- (ineg (atomic_load_16 addr:$dst)),
- (ineg (atomic_load_32 addr:$dst)),
- (ineg (atomic_load_64 addr:$dst))>;
+let Defs = [EFLAGS] in {
+ defm RELEASE_NEG : RELEASE_UNOP<
+ (ineg (atomic_load_8 addr:$dst)),
+ (ineg (atomic_load_16 addr:$dst)),
+ (ineg (atomic_load_32 addr:$dst)),
+ (ineg (atomic_load_64 addr:$dst))>;
+}
+// NOT doesn't set flags.
defm RELEASE_NOT : RELEASE_UNOP<
(not (atomic_load_8 addr:$dst)),
(not (atomic_load_16 addr:$dst)),
@@ -821,42 +896,42 @@ defm RELEASE_NOT : RELEASE_UNOP<
*/
def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV8mi PSEUDO!",
[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV16mi PSEUDO!",
[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV32mi PSEUDO!",
[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV64mi32 PSEUDO!",
[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV8mr PSEUDO!",
[(atomic_store_8 addr:$dst, GR8 :$src)]>;
def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV16mr PSEUDO!",
[(atomic_store_16 addr:$dst, GR16:$src)]>;
def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV32mr PSEUDO!",
[(atomic_store_32 addr:$dst, GR32:$src)]>;
def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV64mr PSEUDO!",
[(atomic_store_64 addr:$dst, GR64:$src)]>;
def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV8rm PSEUDO!",
[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV16rm PSEUDO!",
[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV32rm PSEUDO!",
[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV64rm PSEUDO!",
[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
//===----------------------------------------------------------------------===//
@@ -1077,11 +1152,11 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
-def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
-def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
+def : Pat<(zextloadi16i1 addr:$src), (AND16ri8 (MOVZX16rm8 addr:$src), (i16 1))>;
+def : Pat<(zextloadi32i1 addr:$src), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1))>;
def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0),
- (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
+ (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
// extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit
@@ -1298,7 +1373,6 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
sub_32bit)>;
// r & (2^16-1) ==> movz
-let AddedComplexity = 1 in // Give priority over i64immZExt32.
def : Pat<(and GR64:$src, 0xffff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 4cd5563..8c351a5 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -53,6 +53,19 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
"{l}ret{|f}q\t$amt", [], IIC_RET>, Requires<[In64BitMode]>;
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize16;
+
+ // The machine return from interrupt instruction, but sometimes we need to
+ // perform a post-epilogue stack adjustment. Codegen emits the pseudo form
+ // which expands to include an SP adjustment if necessary.
+ def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>,
+ OpSize16;
+ def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", [],
+ IIC_IRET>, OpSize32;
+ def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", [],
+ IIC_IRET>, Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def IRET : PseudoI<(outs), (ins i16imm:$adj), [(X86iret timm:$adj)]>;
+
}
// Unconditional branches.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index 7cc3b59..fd800cf 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -15,13 +15,31 @@
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst" in {
+// For all FMA opcodes declared in fma3p_rm and fma3s_rm milticlasses defined
+// below, both the register and memory variants are commutable.
+// For the register form the commutable operands are 1, 2 and 3.
+// For the memory variant the folded operand must be in 3. Thus,
+// in that case, only the operands 1 and 2 can be swapped.
+// Commuting some of operands may require the opcode change.
+// FMA*213*:
+// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
+// operands 1 and 3 (register forms only): *213* --> *231*;
+// operands 2 and 3 (register forms only): *213* --> *132*.
+// FMA*132*:
+// operands 1 and 2 (memory & register forms): *132* --> *231*;
+// operands 1 and 3 (register forms only): *132* --> *132*(no changes);
+// operands 2 and 3 (register forms only): *132* --> *213*.
+// FMA*231*:
+// operands 1 and 2 (memory & register forms): *231* --> *132*;
+// operands 1 and 3 (register forms only): *231* --> *213*;
+// operands 2 and 3 (register forms only): *231* --> *231*(no changes).
+
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
- bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
SDPatternOperator Op = null_frag> {
- let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+ let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -29,7 +47,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- let mayLoad = 1, isCommutable = IsMVariantCommutable in
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
@@ -37,7 +55,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
- let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+ let usesCustomInserter = 1 in
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -45,7 +63,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
VR256:$src3)))]>, VEX_L;
- let mayLoad = 1, isCommutable = IsMVariantCommutable in
+ let mayLoad = 1 in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
@@ -54,34 +72,20 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
(OpVT256 (Op VR256:$src2, VR256:$src1,
(MemFrag256 addr:$src3))))]>, VEX_L;
}
-} // Constraints = "$src1 = $dst"
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
- // For 213, both the register and memory variant are commutable.
- // Indeed, the commutable operands are 1 and 2 and both live in registers
- // for both variants.
defm r213 : fma3p_rm<opc213,
!strconcat(OpcodeStr, "213", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 1,
- Op>;
-let hasSideEffects = 0 in {
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
defm r132 : fma3p_rm<opc132,
!strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
- // For 231, only the register variant is commutable.
- // For the memory variant the folded operand must be in 3. Thus,
- // in that case, it cannot be swapped with 2.
defm r231 : fma3p_rm<opc231,
!strconcat(OpcodeStr, "231", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 0>;
-} // hasSideEffects = 0
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
}
// Fused Multiply-Add
@@ -126,83 +130,122 @@ let ExeDomain = SSEPackedDouble in {
v4f64>, VEX_W;
}
-let Constraints = "$src1 = $dst" in {
-multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
- RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
- bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
+// All source register operands of FMA opcodes defined in fma3s_rm multiclass
+// can be commuted. In many cases such commute transformation requres an opcode
+// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
+// would require an opcode change to FMA*231:
+// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
+// -->
+// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
+// Please see more detailed comment at the very beginning of the section
+// defining FMA3 opcodes above.
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode = null_frag> {
- let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+ let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>;
- let mayLoad = 1, isCommutable = IsMVariantCommutable in
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1,
- (mem_frag addr:$src3))))]>;
+ (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>;
+}
+
+// These FMA*_Int instructions are defined specially for being used when
+// the scalar FMA intrinsics are lowered to machine instructions, and in that
+// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
+// instructions.
+//
+// All of the FMA*_Int opcodes are defined as commutable here.
+// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
+// and the corresponding optimizations have been developed.
+// Commuting the 1st operand of FMA*_Int requires some additional analysis,
+// the commute optimization is legal only if all users of FMA*_Int use only
+// the lowest element of the FMA*_Int instruction. Even though such analysis
+// may be not implemented yet we allow the routines doing the actual commute
+// transformation to decide if one or another instruction is commutable or not.
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+ hasSideEffects = 0 in
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
+ Operand memopr, RegisterClass RC> {
+ def r_Int : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ []>;
+
+ let mayLoad = 1 in
+ def m_Int : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, memopr:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ []>;
}
-} // Constraints = "$src1 = $dst"
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, string PackTy, string PT2, Intrinsic Int,
- SDNode OpNode, RegisterClass RC, ValueType OpVT,
- X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
- ComplexPattern mem_cpat> {
-let hasSideEffects = 0 in {
- defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
- x86memop, RC, OpVT, mem_frag>;
- // See the other defm of r231 for the explanation regarding the
- // commutable flags.
- defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
- x86memop, RC, OpVT, mem_frag,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 0>;
+ string OpStr, string PackTy,
+ SDNode OpNode, RegisterClass RC,
+ X86MemOperand x86memop> {
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC>;
+ defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC,
+ OpNode>;
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC>;
}
-// See the other defm of r213 for the explanation regarding the
-// commutable flags.
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
- x86memop, RC, OpVT, mem_frag,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 1,
- OpNode>;
+// The FMA 213 form is created for lowering of scalar FMA intrinscis
+// to machine instructions.
+// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
+// of FMA 213 form.
+// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
+// forms and is possible only after special analysis of all uses of the initial
+// instruction. Such analysis do not exist yet and thus introducing the 231
+// form of FMA*_Int instructions is done using an optimistic assumption that
+// such analysis will be implemented eventually.
+multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, string PackTy,
+ RegisterClass RC, Operand memop> {
+ defm r132 : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
+ memop, RC>;
+ defm r213 : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+ memop, RC>;
+ defm r231 : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
+ memop, RC>;
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
SDNode OpNode> {
- defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", IntF32, OpNode,
- FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
- defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
- FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+ let ExeDomain = SSEPackedSingle in
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode,
+ FR32, f32mem>,
+ fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>;
+
+ let ExeDomain = SSEPackedDouble in
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode,
+ FR64, f64mem>,
+ fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>,
+ VEX_W;
-// These patterns use the 123 ordering, instead of 213, even though
-// they match the intrinsic to the 213 version of the instruction.
-// This is because src1 is tied to dest, and the scalar intrinsics
-// require the pass-through values to come from the first source
-// operand, not the second.
+ // These patterns use the 123 ordering, instead of 213, even though
+ // they match the intrinsic to the 213 version of the instruction.
+ // This is because src1 is tied to dest, and the scalar intrinsics
+ // require the pass-through values to come from the first source
+ // operand, not the second.
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(NAME#"SSr213r")
- (COPY_TO_REGCLASS $src1, FR32),
- (COPY_TO_REGCLASS $src2, FR32),
- (COPY_TO_REGCLASS $src3, FR32)),
- VR128)>;
+ (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int")
+ $src1, $src2, $src3), VR128)>;
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(NAME#"SDr213r")
- (COPY_TO_REGCLASS $src1, FR64),
- (COPY_TO_REGCLASS $src2, FR64),
- (COPY_TO_REGCLASS $src3, FR64)),
- VR128)>;
+ (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int")
+ $src1, $src2, $src3), VR128)>;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
@@ -334,36 +377,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
} // isCodeGenOnly = 1
}
-defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
- fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
-defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
- fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
- X86Fnmadd, loadf32>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
- X86Fnmadd, loadf64>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
- X86Fnmsub, loadf32>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
- X86Fnmsub, loadf64>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
-
let ExeDomain = SSEPackedSingle in {
+ // Scalar Instructions
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+ defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+ defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+ defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+ // Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
@@ -379,6 +409,22 @@ let ExeDomain = SSEPackedSingle in {
}
let ExeDomain = SSEPackedDouble in {
+ // Scalar Instructions
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+ defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+ defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+ // Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index 49068e9..03ae211 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -137,69 +137,99 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
// The FopST0 series are not included here because of the irregularities
// in where the 'r' goes in assembly output.
// These instructions cannot address 80-bit memory.
-multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
+ bit Forward = 1> {
// ST(0) = ST(0) + [mem]
def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP32:$dst,
- (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+ [!if(Forward,
+ (set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2))),
+ (set RFP32:$dst,
+ (OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
def _Fp64m : FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
- (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2))),
+ (set RFP64:$dst,
+ (OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
def _Fp64m32: FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
- (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
+ (set RFP64:$dst,
+ (OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
def _Fp80m32: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
- (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
+ (set RFP80:$dst,
+ (OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
def _Fp80m64: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
- (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
+ (set RFP80:$dst,
+ (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+let mayLoad = 1 in
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
- !strconcat("f", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("f", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
- !strconcat("f", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("f", asmstring, "{l}\t$src")>;
// ST(0) = ST(0) + [memint]
def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
- [(set RFP32:$dst, (OpNode RFP32:$src1,
- (X86fild addr:$src2, i16)))]>;
+ [!if(Forward,
+ (set RFP32:$dst,
+ (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+ (set RFP32:$dst,
+ (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
- [(set RFP32:$dst, (OpNode RFP32:$src1,
- (X86fild addr:$src2, i32)))]>;
+ [!if(Forward,
+ (set RFP32:$dst,
+ (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+ (set RFP32:$dst,
+ (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
- [(set RFP64:$dst, (OpNode RFP64:$src1,
- (X86fild addr:$src2, i16)))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+ (set RFP64:$dst,
+ (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
- [(set RFP64:$dst, (OpNode RFP64:$src1,
- (X86fild addr:$src2, i32)))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+ (set RFP64:$dst,
+ (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
- OneArgFPRW,
- [(set RFP80:$dst, (OpNode RFP80:$src1,
- (X86fild addr:$src2, i16)))]>;
+ OneArgFPRW,
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+ (set RFP80:$dst,
+ (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
- OneArgFPRW,
- [(set RFP80:$dst, (OpNode RFP80:$src1,
- (X86fild addr:$src2, i32)))]>;
+ OneArgFPRW,
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+ (set RFP80:$dst,
+ (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+let mayLoad = 1 in
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
- !strconcat("fi", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("fi", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
- !strconcat("fi", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("fi", asmstring, "{l}\t$src")>;
}
let Defs = [FPSW] in {
@@ -213,14 +243,14 @@ defm DIV : FPBinary_rr<fdiv>;
let SchedRW = [WriteFAddLd] in {
defm ADD : FPBinary<fadd, MRM0m, "add">;
defm SUB : FPBinary<fsub, MRM4m, "sub">;
-defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
}
let SchedRW = [WriteFMulLd] in {
defm MUL : FPBinary<fmul, MRM1m, "mul">;
}
let SchedRW = [WriteFDivLd] in {
defm DIV : FPBinary<fdiv, MRM6m, "div">;
-defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
}
}
@@ -306,13 +336,13 @@ def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
-def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+def FNSTSWm : FPI<0xDD, MRM7m, (outs i16mem:$dst), (ins), "fnstsw\t$dst">;
def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
-def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
-def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">;
+def FBSTPm : FPI<0xDF, MRM6m, (outs f80mem:$dst), (ins), "fbstp\t$dst">;
// Floating point cmovs.
class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
@@ -633,16 +663,18 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
-def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
-def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
- IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
-def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
-def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
- IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+let Predicates = [HasFXSR] in {
+ def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+ def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+ IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
+ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
+ def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+ IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+} // Predicates = [FeatureFXSR]
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1f61ffa..829cedd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -38,6 +38,8 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;
+def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@@ -58,13 +60,17 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>;
+def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86comiSae : SDNode<"X86ISD::COMI", SDTX86CmpTestSae>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86ucomiSae: SDNode<"X86ISD::UCOMI", SDTX86CmpTestSae>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
@@ -74,11 +80,18 @@ def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
SDTCisVT<1, v4i32>]>>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86psadbw : SDNode<"X86ISD::PSADBW",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
+def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -86,9 +99,11 @@ def X86psign : SDNode<"X86ISD::PSIGN",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
+ SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
+ SDTCisPtrTy<2>]>>;
def X86pinsrb : SDNode<"X86ISD::PINSRB",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
@@ -114,19 +129,17 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<1, 0>]>>;
-def X86vtrunc : SDNode<"X86ISD::VTRUNC",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<0, 1>]>>;
+def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisOpSmallerThanOp<0, 1>]>;
+
+def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
+def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
+def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+
def X86trunc : SDNode<"X86ISD::TRUNC",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
-
-def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVec<2>, SDTCisInt<2>,
- SDTCisOpSmallerThanOp<0, 2>]>>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
@@ -136,6 +149,35 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTCisFP<0>, SDTCisFP<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86fround: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCVecEltisVT<2, f64>,
+ SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86froundRnd: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCVecEltisVT<2, f64>,
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisInt<3>]>>;
+
+def X86fpext : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCVecEltisVT<2, f32>,
+ SDTCisOpSmallerThanOp<1, 0>]>>;
+
+def X86fpextRnd : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCVecEltisVT<2, f32>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisInt<3>]>>;
+
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
@@ -159,10 +201,15 @@ def X86CmpMaskCCRound :
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
-def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
-def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
-def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
-def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
+def X86CmpMaskCCScalarRound :
+ SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>,
+ SDTCisInt<4>]>;
+
+def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
+def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
+def X86cmpmsRnd : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalarRound>;
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -178,6 +225,29 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86vprot : SDNode<"X86ISD::VPROT",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86vproti : SDNode<"X86ISD::VPROTI",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i8>]>>;
+
+def X86vpshl : SDNode<"X86ISD::VPSHL",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86vpsha : SDNode<"X86ISD::VPSHA",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+
+def X86vpcom : SDNode<"X86ISD::VPCOM",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisVT<3, i8>]>>;
+def X86vpcomu : SDNode<"X86ISD::VPCOMU",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisVT<3, i8>]>>;
+
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
@@ -190,6 +260,7 @@ def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
+def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCVecEltisVT<0, i1>,
@@ -201,11 +272,15 @@ def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<1,2>]>>;
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+ SDTCVecEltisVT<1, i32>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
def X86pmuldq : SDNode<"X86ISD::PMULDQ",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<1,2>]>>;
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+ SDTCVecEltisVT<1, i32>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
def X86extrqi : SDNode<"X86ISD::EXTRQI",
SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
@@ -221,24 +296,30 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI",
def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
-def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisVec<2>]>;
+ SDTCisSameSizeAs<0,2>,
+ SDTCisSameNumEltsAs<0,2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
- SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+ SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+ SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
+def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisInt<2>, SDTCisInt<3>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
+ SDTCisInt<0>, SDTCisInt<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
+ SDTCisVT<4, i8>]>;
+
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
@@ -250,15 +331,17 @@ def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
- SDTCisVec<0>, SDTCisInt<2>]>;
+ SDTCisVec<0>, SDTCisVT<2, i32>]>;
def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
- SDTCisVec<0>, SDTCisInt<3>]>;
+ SDTCisVec<0>, SDTCisVT<3, i32>]>;
def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
- SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
+ SDTCisVec<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+
+def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -281,33 +364,74 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>;
def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
+def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
+def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack>;
+
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
-def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
+def X86VPermv : SDNode<"X86ISD::VPERMV",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
+ SDTCisSameNumEltsAs<0,1>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
-def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
-def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
+def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>,
+ SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>,
+ SDTCisSameSizeAs<0,2>,
+ SDTCisSameAs<0,3>]>, []>;
+
+def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
+ SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>]>, []>;
+
+def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
-def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
-def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
+def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
+def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
+def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>;
+def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisFP<1>,
+ SDTCisSameNumEltsAs<0,1>,
+ SDTCisVT<2, i32>]>, []>;
+def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i1>,
+ SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSubVecOfVec<1, 0>]>, []>;
+// SDTCisSubVecOfVec restriction cannot be applied for 128 bit version of VBROADCASTI32x2.
+def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST",
+ SDTypeProfile<1, 1, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>]>, []>;
+
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
+ [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>,
+ SDTCisPtrTy<3>]>, []>;
def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
- [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>;
+ [SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
+ SDTCisPtrTy<2>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
@@ -317,11 +441,13 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
-def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
-def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
-def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
-def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
-def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
+def X86fsqrtRnds : SDNode<"X86ISD::FSQRT_RND", STDFp2SrcRm>;
+def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
+def X86fgetexpRnds : SDNode<"X86ISD::FGETEXP_RND", STDFp2SrcRm>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
@@ -341,9 +467,11 @@ def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
-def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
-def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
-def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
+def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
+def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
+def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>;
+def X86Reduces : SDNode<"X86ISD::VREDUCE", STDFp3SrcRm>;
+def X86GetMants : SDNode<"X86ISD::VGETMANT", STDFp3SrcRm>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -362,7 +490,8 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
- SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
+ SDTCisSameAs<0,1>, SDTCisInt<2>,
+ SDTCisVT<3, i32>]>;
def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
@@ -371,9 +500,12 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTSDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisFP<1>,
+ SDTCVecEltisVT<1, f64>, SDTCisInt<2>]>;
def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
-
+def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
+ SDTCVecEltisVT<1, f32>, SDTCisInt<2>]>;
def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
SDTCisInt<2>]>;
@@ -392,6 +524,10 @@ def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+def X86cvttss2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSFloatToIntRnd>;
+def X86cvttss2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSFloatToIntRnd>;
+def X86cvttsd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSDoubleToIntRnd>;
+def X86cvttsd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSDoubleToIntRnd>;
// Vector with rounding mode
// cvtt fp-to-int staff
@@ -417,17 +553,35 @@ def X86cvtps2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToInt>;
def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>;
def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>;
+def X86cvtph2ps : SDNode<"ISD::FP16_TO_FP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>,
+ SDTCisFP<0>,
+ SDTCisVT<2, i32>]> >;
+
+def X86cvtps2ph : SDNode<"ISD::FP_TO_FP16",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisFP<1>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
+ SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
SDTCisOpSmallerThanOp<1, 0>,
- SDTCisInt<2>]>>;
+ SDTCisVT<2, i32>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
- SDTCisInt<2>]>>;
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisVT<2, i32>]>>;
+
+def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -436,10 +590,10 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
// These are 'extloads' from a scalar to the low element of a vector, zeroing
// the top elements. These are used for the SSE 'ss' and 'sd' instruction
// forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
SDNPWantRoot]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
SDNPWantRoot]>;
@@ -490,9 +644,9 @@ def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
// The memory operand is required to be a 128-bit load, so it must be converted
// from a vector to a scalar.
def loadf32_128 : PatFrag<(ops node:$ptr),
- (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>;
+ (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>;
def loadf64_128 : PatFrag<(ops node:$ptr),
- (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>;
+ (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>;
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -590,9 +744,9 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
// The memory operand is required to be a 128-bit load, so it must be converted
// from a vector to a scalar.
def memopfsf32_128 : PatFrag<(ops node:$ptr),
- (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>;
+ (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>;
def memopfsf64_128 : PatFrag<(ops node:$ptr),
- (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>;
+ (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
@@ -604,32 +758,6 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
-// MOVNT Support
-// Like 'store', but requires the non-temporal bit to be set
-def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal();
- return false;
-}]>;
-
-def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() && !ST->isTruncatingStore() &&
- ST->getAddressingMode() == ISD::UNINDEXED &&
- ST->getAlignment() >= 16;
- return false;
-}]>;
-
-def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() &&
- ST->getAlignment() < 16;
- return false;
-}]>;
-
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -851,29 +979,59 @@ def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return isa<MaskedLoadSDNode>(N);
}]>;
+// masked store fragments.
+// X86mstore can't be implemented in core DAG files because some targets
+// doesn't support vector type ( llvm-tblgen will fail)
+def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+
def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 16;
return false;
}]>;
def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 32;
return false;
}]>;
def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 64;
return false;
}]>;
def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
return isa<MaskedStoreSDNode>(N);
}]>;
+// masked truncstore fragments
+// X86mtruncstore can't be implemented in core DAG files because some targets
+// doesn't support vector type ( llvm-tblgen will fail)
+def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+def masked_truncstorevi8 :
+ PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def masked_truncstorevi16 :
+ PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def masked_truncstorevi32 :
+ PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index cf68ef0..63e78de 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
@@ -101,9 +102,11 @@ struct X86MemoryFoldTableEntry {
void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
- : X86GenInstrInfo(
- (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
- (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
+ : X86::ADJCALLSTACKDOWN32),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
+ : X86::ADJCALLSTACKUP32),
+ X86::CATCHRET),
Subtarget(STI), RI(STI.getTargetTriple()) {
static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
@@ -332,6 +335,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
{ X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
{ X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
+ { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
+ { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
+ { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
{ X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
{ X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
{ X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
@@ -495,7 +501,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
{ X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
@@ -605,7 +610,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
{ X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::VPABSBrr128, X86::VPABSBrm128, 0 },
{ X86::VPABSDrr128, X86::VPABSDrm128, 0 },
@@ -1647,6 +1651,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PEXT32rr, X86::PEXT32rm, 0 },
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
+ // ADX foldable instructions
+ { X86::ADCX32rr, X86::ADCX32rm, 0 },
+ { X86::ADCX64rr, X86::ADCX64rm, 0 },
+ { X86::ADOX32rr, X86::ADOX32rm, 0 },
+ { X86::ADOX64rr, X86::ADOX64rm, 0 },
+
// AVX-512 foldable instructions
{ X86::VADDPSZrr, X86::VADDPSZrm, 0 },
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
@@ -1729,11 +1739,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
// FMA foldable instructions
{ X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr231r_Int, X86::VFMADDSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr231r_Int, X86::VFMADDSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE },
{ X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE },
@@ -1749,11 +1765,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE },
{ X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr231r_Int, X86::VFNMADDSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr231r_Int, X86::VFNMADDSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE },
{ X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE },
@@ -1769,11 +1791,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE },
{ X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr231r_Int, X86::VFMSUBSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr231r_Int, X86::VFMSUBSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE },
{ X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE },
@@ -1789,11 +1817,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE },
{ X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr231r_Int, X86::VFNMSUBSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr231r_Int, X86::VFNMSUBSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE },
{ X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE },
@@ -2282,7 +2316,35 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::FsVMOVAPSrm:
case X86::FsVMOVAPDrm:
case X86::FsMOVAPSrm:
- case X86::FsMOVAPDrm: {
+ case X86::FsMOVAPDrm:
+ // AVX-512
+ case X86::VMOVAPDZ128rm:
+ case X86::VMOVAPDZ256rm:
+ case X86::VMOVAPDZrm:
+ case X86::VMOVAPSZ128rm:
+ case X86::VMOVAPSZ256rm:
+ case X86::VMOVAPSZrm:
+ case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQA32Z256rm:
+ case X86::VMOVDQA32Zrm:
+ case X86::VMOVDQA64Z128rm:
+ case X86::VMOVDQA64Z256rm:
+ case X86::VMOVDQA64Zrm:
+ case X86::VMOVDQU16Z128rm:
+ case X86::VMOVDQU16Z256rm:
+ case X86::VMOVDQU16Zrm:
+ case X86::VMOVDQU32Z128rm:
+ case X86::VMOVDQU32Z256rm:
+ case X86::VMOVDQU32Zrm:
+ case X86::VMOVDQU64Z128rm:
+ case X86::VMOVDQU64Z256rm:
+ case X86::VMOVDQU64Zrm:
+ case X86::VMOVDQU8Z128rm:
+ case X86::VMOVDQU8Z256rm:
+ case X86::VMOVDQU8Zrm:
+ case X86::VMOVUPSZ128rm:
+ case X86::VMOVUPSZ256rm:
+ case X86::VMOVUPSZrm: {
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
@@ -2363,9 +2425,8 @@ bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
// It is safe to clobber EFLAGS at the end of a block of no successor has it
// live in.
if (Iter == E) {
- for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *S : MBB.successors())
+ if (S->isLiveIn(X86::EFLAGS))
return false;
return true;
}
@@ -2411,13 +2472,29 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo &TRI) const {
- // MOV32r0 is implemented with a xor which clobbers condition code.
- // Re-materialize it as movri instructions to avoid side effects.
- unsigned Opc = Orig->getOpcode();
- if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
+ bool ClobbersEFLAGS = false;
+ for (const MachineOperand &MO : Orig->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
+ ClobbersEFLAGS = true;
+ break;
+ }
+ }
+
+ if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
+ // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
+ // effects.
+ int Value;
+ switch (Orig->getOpcode()) {
+ case X86::MOV32r0: Value = 0; break;
+ case X86::MOV32r1: Value = 1; break;
+ case X86::MOV32r_1: Value = -1; break;
+ default:
+ llvm_unreachable("Unexpected instruction!");
+ }
+
DebugLoc DL = Orig->getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
- .addImm(0);
+ .addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MBB.insert(I, MI);
@@ -2428,7 +2505,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
}
/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
-static bool hasLiveCondCodeDef(MachineInstr *MI) {
+bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr *MI) const {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() &&
@@ -2453,7 +2530,7 @@ inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
// Left shift instructions can be transformed into load-effective-address
// instructions if we can encode them appropriately.
- // A LEA instruction utilizes a SIB byte to encode it's scale factor.
+ // A LEA instruction utilizes a SIB byte to encode its scale factor.
// The SIB.scale field is two bits wide which means that we can encode any
// shift amount less than 4.
return ShAmt < 4 && ShAmt > 0;
@@ -2493,7 +2570,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src,
ImplicitOp = Src;
ImplicitOp.setImplicit();
- NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64);
+ NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
MachineBasicBlock::LivenessQueryResult LQR =
MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);
@@ -2914,10 +2991,162 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return NewMI;
}
-/// We have a few instructions that must be hacked on to commute them.
-///
-MachineInstr *
-X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+/// Returns true if the given instruction opcode is FMA3.
+/// Otherwise, returns false.
+/// The second parameter is optional and is used as the second return from
+/// the function. It is set to true if the given instruction has FMA3 opcode
+/// that is used for lowering of scalar FMA intrinsics, and it is set to false
+/// otherwise.
+static bool isFMA3(unsigned Opcode, bool *IsIntrinsic = nullptr) {
+ if (IsIntrinsic)
+ *IsIntrinsic = false;
+
+ switch (Opcode) {
+ case X86::VFMADDSDr132r: case X86::VFMADDSDr132m:
+ case X86::VFMADDSSr132r: case X86::VFMADDSSr132m:
+ case X86::VFMSUBSDr132r: case X86::VFMSUBSDr132m:
+ case X86::VFMSUBSSr132r: case X86::VFMSUBSSr132m:
+ case X86::VFNMADDSDr132r: case X86::VFNMADDSDr132m:
+ case X86::VFNMADDSSr132r: case X86::VFNMADDSSr132m:
+ case X86::VFNMSUBSDr132r: case X86::VFNMSUBSDr132m:
+ case X86::VFNMSUBSSr132r: case X86::VFNMSUBSSr132m:
+
+ case X86::VFMADDSDr213r: case X86::VFMADDSDr213m:
+ case X86::VFMADDSSr213r: case X86::VFMADDSSr213m:
+ case X86::VFMSUBSDr213r: case X86::VFMSUBSDr213m:
+ case X86::VFMSUBSSr213r: case X86::VFMSUBSSr213m:
+ case X86::VFNMADDSDr213r: case X86::VFNMADDSDr213m:
+ case X86::VFNMADDSSr213r: case X86::VFNMADDSSr213m:
+ case X86::VFNMSUBSDr213r: case X86::VFNMSUBSDr213m:
+ case X86::VFNMSUBSSr213r: case X86::VFNMSUBSSr213m:
+
+ case X86::VFMADDSDr231r: case X86::VFMADDSDr231m:
+ case X86::VFMADDSSr231r: case X86::VFMADDSSr231m:
+ case X86::VFMSUBSDr231r: case X86::VFMSUBSDr231m:
+ case X86::VFMSUBSSr231r: case X86::VFMSUBSSr231m:
+ case X86::VFNMADDSDr231r: case X86::VFNMADDSDr231m:
+ case X86::VFNMADDSSr231r: case X86::VFNMADDSSr231m:
+ case X86::VFNMSUBSDr231r: case X86::VFNMSUBSDr231m:
+ case X86::VFNMSUBSSr231r: case X86::VFNMSUBSSr231m:
+
+ case X86::VFMADDSUBPDr132r: case X86::VFMADDSUBPDr132m:
+ case X86::VFMADDSUBPSr132r: case X86::VFMADDSUBPSr132m:
+ case X86::VFMSUBADDPDr132r: case X86::VFMSUBADDPDr132m:
+ case X86::VFMSUBADDPSr132r: case X86::VFMSUBADDPSr132m:
+ case X86::VFMADDSUBPDr132rY: case X86::VFMADDSUBPDr132mY:
+ case X86::VFMADDSUBPSr132rY: case X86::VFMADDSUBPSr132mY:
+ case X86::VFMSUBADDPDr132rY: case X86::VFMSUBADDPDr132mY:
+ case X86::VFMSUBADDPSr132rY: case X86::VFMSUBADDPSr132mY:
+
+ case X86::VFMADDPDr132r: case X86::VFMADDPDr132m:
+ case X86::VFMADDPSr132r: case X86::VFMADDPSr132m:
+ case X86::VFMSUBPDr132r: case X86::VFMSUBPDr132m:
+ case X86::VFMSUBPSr132r: case X86::VFMSUBPSr132m:
+ case X86::VFNMADDPDr132r: case X86::VFNMADDPDr132m:
+ case X86::VFNMADDPSr132r: case X86::VFNMADDPSr132m:
+ case X86::VFNMSUBPDr132r: case X86::VFNMSUBPDr132m:
+ case X86::VFNMSUBPSr132r: case X86::VFNMSUBPSr132m:
+ case X86::VFMADDPDr132rY: case X86::VFMADDPDr132mY:
+ case X86::VFMADDPSr132rY: case X86::VFMADDPSr132mY:
+ case X86::VFMSUBPDr132rY: case X86::VFMSUBPDr132mY:
+ case X86::VFMSUBPSr132rY: case X86::VFMSUBPSr132mY:
+ case X86::VFNMADDPDr132rY: case X86::VFNMADDPDr132mY:
+ case X86::VFNMADDPSr132rY: case X86::VFNMADDPSr132mY:
+ case X86::VFNMSUBPDr132rY: case X86::VFNMSUBPDr132mY:
+ case X86::VFNMSUBPSr132rY: case X86::VFNMSUBPSr132mY:
+
+ case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPDr213m:
+ case X86::VFMADDSUBPSr213r: case X86::VFMADDSUBPSr213m:
+ case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPDr213m:
+ case X86::VFMSUBADDPSr213r: case X86::VFMSUBADDPSr213m:
+ case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPDr213mY:
+ case X86::VFMADDSUBPSr213rY: case X86::VFMADDSUBPSr213mY:
+ case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPDr213mY:
+ case X86::VFMSUBADDPSr213rY: case X86::VFMSUBADDPSr213mY:
+
+ case X86::VFMADDPDr213r: case X86::VFMADDPDr213m:
+ case X86::VFMADDPSr213r: case X86::VFMADDPSr213m:
+ case X86::VFMSUBPDr213r: case X86::VFMSUBPDr213m:
+ case X86::VFMSUBPSr213r: case X86::VFMSUBPSr213m:
+ case X86::VFNMADDPDr213r: case X86::VFNMADDPDr213m:
+ case X86::VFNMADDPSr213r: case X86::VFNMADDPSr213m:
+ case X86::VFNMSUBPDr213r: case X86::VFNMSUBPDr213m:
+ case X86::VFNMSUBPSr213r: case X86::VFNMSUBPSr213m:
+ case X86::VFMADDPDr213rY: case X86::VFMADDPDr213mY:
+ case X86::VFMADDPSr213rY: case X86::VFMADDPSr213mY:
+ case X86::VFMSUBPDr213rY: case X86::VFMSUBPDr213mY:
+ case X86::VFMSUBPSr213rY: case X86::VFMSUBPSr213mY:
+ case X86::VFNMADDPDr213rY: case X86::VFNMADDPDr213mY:
+ case X86::VFNMADDPSr213rY: case X86::VFNMADDPSr213mY:
+ case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPDr213mY:
+ case X86::VFNMSUBPSr213rY: case X86::VFNMSUBPSr213mY:
+
+ case X86::VFMADDSUBPDr231r: case X86::VFMADDSUBPDr231m:
+ case X86::VFMADDSUBPSr231r: case X86::VFMADDSUBPSr231m:
+ case X86::VFMSUBADDPDr231r: case X86::VFMSUBADDPDr231m:
+ case X86::VFMSUBADDPSr231r: case X86::VFMSUBADDPSr231m:
+ case X86::VFMADDSUBPDr231rY: case X86::VFMADDSUBPDr231mY:
+ case X86::VFMADDSUBPSr231rY: case X86::VFMADDSUBPSr231mY:
+ case X86::VFMSUBADDPDr231rY: case X86::VFMSUBADDPDr231mY:
+ case X86::VFMSUBADDPSr231rY: case X86::VFMSUBADDPSr231mY:
+
+ case X86::VFMADDPDr231r: case X86::VFMADDPDr231m:
+ case X86::VFMADDPSr231r: case X86::VFMADDPSr231m:
+ case X86::VFMSUBPDr231r: case X86::VFMSUBPDr231m:
+ case X86::VFMSUBPSr231r: case X86::VFMSUBPSr231m:
+ case X86::VFNMADDPDr231r: case X86::VFNMADDPDr231m:
+ case X86::VFNMADDPSr231r: case X86::VFNMADDPSr231m:
+ case X86::VFNMSUBPDr231r: case X86::VFNMSUBPDr231m:
+ case X86::VFNMSUBPSr231r: case X86::VFNMSUBPSr231m:
+ case X86::VFMADDPDr231rY: case X86::VFMADDPDr231mY:
+ case X86::VFMADDPSr231rY: case X86::VFMADDPSr231mY:
+ case X86::VFMSUBPDr231rY: case X86::VFMSUBPDr231mY:
+ case X86::VFMSUBPSr231rY: case X86::VFMSUBPSr231mY:
+ case X86::VFNMADDPDr231rY: case X86::VFNMADDPDr231mY:
+ case X86::VFNMADDPSr231rY: case X86::VFNMADDPSr231mY:
+ case X86::VFNMSUBPDr231rY: case X86::VFNMSUBPDr231mY:
+ case X86::VFNMSUBPSr231rY: case X86::VFNMSUBPSr231mY:
+ return true;
+
+ case X86::VFMADDSDr132r_Int: case X86::VFMADDSDr132m_Int:
+ case X86::VFMADDSSr132r_Int: case X86::VFMADDSSr132m_Int:
+ case X86::VFMSUBSDr132r_Int: case X86::VFMSUBSDr132m_Int:
+ case X86::VFMSUBSSr132r_Int: case X86::VFMSUBSSr132m_Int:
+ case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
+ case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
+ case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
+ case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
+
+ case X86::VFMADDSDr213r_Int: case X86::VFMADDSDr213m_Int:
+ case X86::VFMADDSSr213r_Int: case X86::VFMADDSSr213m_Int:
+ case X86::VFMSUBSDr213r_Int: case X86::VFMSUBSDr213m_Int:
+ case X86::VFMSUBSSr213r_Int: case X86::VFMSUBSSr213m_Int:
+ case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
+ case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
+ case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
+ case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
+
+ case X86::VFMADDSDr231r_Int: case X86::VFMADDSDr231m_Int:
+ case X86::VFMADDSSr231r_Int: case X86::VFMADDSSr231m_Int:
+ case X86::VFMSUBSDr231r_Int: case X86::VFMSUBSDr231m_Int:
+ case X86::VFMSUBSSr231r_Int: case X86::VFMSUBSSr231m_Int:
+ case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
+ case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
+ case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
+ case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
+ if (IsIntrinsic)
+ *IsIntrinsic = true;
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("Opcode not handled by the switch");
+}
+
+MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
switch (MI->getOpcode()) {
case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
@@ -2944,7 +3173,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
MI->setDesc(get(Opc));
MI->getOperand(3).setImm(Size-Amt);
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::BLENDPDrri:
case X86::BLENDPSrri:
@@ -2980,7 +3209,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
NewMI = false;
}
MI->getOperand(3).setImm(Mask ^ Imm);
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::PCLMULQDQrr:
case X86::VPCLMULQDQrr:{
@@ -2995,7 +3224,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
NewMI = false;
}
MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::CMPPDrri:
case X86::CMPPSrri:
@@ -3016,7 +3245,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI = MF.CloneMachineInstr(MI);
NewMI = false;
}
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
default:
return nullptr;
}
@@ -3045,7 +3274,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
NewMI = false;
}
MI->getOperand(3).setImm(Imm);
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
@@ -3124,11 +3353,272 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Fallthrough intended.
}
default:
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ if (isFMA3(MI->getOpcode())) {
+ unsigned Opc = getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2);
+ if (Opc == 0)
+ return nullptr;
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ }
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ }
+}
+
+bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+
+ unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3;
+
+ // Only the first RegOpsNum operands are commutable.
+ // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
+ // that the operand is not specified/fixed.
+ if (SrcOpIdx1 != CommuteAnyOperandIndex &&
+ (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum))
+ return false;
+ if (SrcOpIdx2 != CommuteAnyOperandIndex &&
+ (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum))
+ return false;
+
+ // Look for two different register operands assumed to be commutable
+ // regardless of the FMA opcode. The FMA opcode is adjusted later.
+ if (SrcOpIdx1 == CommuteAnyOperandIndex ||
+ SrcOpIdx2 == CommuteAnyOperandIndex) {
+ unsigned CommutableOpIdx1 = SrcOpIdx1;
+ unsigned CommutableOpIdx2 = SrcOpIdx2;
+
+ // At least one of operands to be commuted is not specified and
+ // this method is free to choose appropriate commutable operands.
+ if (SrcOpIdx1 == SrcOpIdx2)
+ // Both of operands are not fixed. By default set one of commutable
+ // operands to the last register operand of the instruction.
+ CommutableOpIdx2 = RegOpsNum;
+ else if (SrcOpIdx2 == CommuteAnyOperandIndex)
+ // Only one of operands is not fixed.
+ CommutableOpIdx2 = SrcOpIdx1;
+
+ // CommutableOpIdx2 is well defined now. Let's choose another commutable
+ // operand and assign its index to CommutableOpIdx1.
+ unsigned Op2Reg = MI->getOperand(CommutableOpIdx2).getReg();
+ for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) {
+ // The commuted operands must have different registers.
+ // Otherwise, the commute transformation does not change anything and
+ // is useless then.
+ if (Op2Reg != MI->getOperand(CommutableOpIdx1).getReg())
+ break;
+ }
+
+ // No appropriate commutable operands were found.
+ if (CommutableOpIdx1 == 0)
+ return false;
+
+ // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
+ // to return those values.
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+ }
+
+ // Check if we can adjust the opcode to preserve the semantics when
+ // commute the register operands.
+ return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2) != 0;
+}
+
+unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
+ unsigned SrcOpIdx1,
+ unsigned SrcOpIdx2) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Define the array that holds FMA opcodes in groups
+ // of 3 opcodes(132, 213, 231) in each group.
+ static const unsigned RegularOpcodeGroups[][3] = {
+ { X86::VFMADDSSr132r, X86::VFMADDSSr213r, X86::VFMADDSSr231r },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr213r, X86::VFMADDSDr231r },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr213r, X86::VFMADDPSr231r },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr213r, X86::VFMADDPDr231r },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr213rY, X86::VFMADDPSr231rY },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr213rY, X86::VFMADDPDr231rY },
+ { X86::VFMADDSSr132m, X86::VFMADDSSr213m, X86::VFMADDSSr231m },
+ { X86::VFMADDSDr132m, X86::VFMADDSDr213m, X86::VFMADDSDr231m },
+ { X86::VFMADDPSr132m, X86::VFMADDPSr213m, X86::VFMADDPSr231m },
+ { X86::VFMADDPDr132m, X86::VFMADDPDr213m, X86::VFMADDPDr231m },
+ { X86::VFMADDPSr132mY, X86::VFMADDPSr213mY, X86::VFMADDPSr231mY },
+ { X86::VFMADDPDr132mY, X86::VFMADDPDr213mY, X86::VFMADDPDr231mY },
+
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr213r, X86::VFMSUBSSr231r },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr213r, X86::VFMSUBSDr231r },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr213r, X86::VFMSUBPSr231r },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr213r, X86::VFMSUBPDr231r },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr213rY, X86::VFMSUBPSr231rY },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr213rY, X86::VFMSUBPDr231rY },
+ { X86::VFMSUBSSr132m, X86::VFMSUBSSr213m, X86::VFMSUBSSr231m },
+ { X86::VFMSUBSDr132m, X86::VFMSUBSDr213m, X86::VFMSUBSDr231m },
+ { X86::VFMSUBPSr132m, X86::VFMSUBPSr213m, X86::VFMSUBPSr231m },
+ { X86::VFMSUBPDr132m, X86::VFMSUBPDr213m, X86::VFMSUBPDr231m },
+ { X86::VFMSUBPSr132mY, X86::VFMSUBPSr213mY, X86::VFMSUBPSr231mY },
+ { X86::VFMSUBPDr132mY, X86::VFMSUBPDr213mY, X86::VFMSUBPDr231mY },
+
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr213r, X86::VFNMADDSSr231r },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr213r, X86::VFNMADDSDr231r },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr213r, X86::VFNMADDPSr231r },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr213r, X86::VFNMADDPDr231r },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY },
+ { X86::VFNMADDSSr132m, X86::VFNMADDSSr213m, X86::VFNMADDSSr231m },
+ { X86::VFNMADDSDr132m, X86::VFNMADDSDr213m, X86::VFNMADDSDr231m },
+ { X86::VFNMADDPSr132m, X86::VFNMADDPSr213m, X86::VFNMADDPSr231m },
+ { X86::VFNMADDPDr132m, X86::VFNMADDPDr213m, X86::VFNMADDPDr231m },
+ { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY },
+ { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY },
+
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr213r, X86::VFNMSUBSSr231r },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr213r, X86::VFNMSUBSDr231r },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr213r, X86::VFNMSUBPSr231r },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr213r, X86::VFNMSUBPDr231r },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY },
+ { X86::VFNMSUBSSr132m, X86::VFNMSUBSSr213m, X86::VFNMSUBSSr231m },
+ { X86::VFNMSUBSDr132m, X86::VFNMSUBSDr213m, X86::VFNMSUBSDr231m },
+ { X86::VFNMSUBPSr132m, X86::VFNMSUBPSr213m, X86::VFNMSUBPSr231m },
+ { X86::VFNMSUBPDr132m, X86::VFNMSUBPDr213m, X86::VFNMSUBPDr231m },
+ { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY },
+ { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY },
+
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr231r },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr231r },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY },
+ { X86::VFMADDSUBPSr132m, X86::VFMADDSUBPSr213m, X86::VFMADDSUBPSr231m },
+ { X86::VFMADDSUBPDr132m, X86::VFMADDSUBPDr213m, X86::VFMADDSUBPDr231m },
+ { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY },
+ { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY },
+
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr231r },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr231r },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY },
+ { X86::VFMSUBADDPSr132m, X86::VFMSUBADDPSr213m, X86::VFMSUBADDPSr231m },
+ { X86::VFMSUBADDPDr132m, X86::VFMSUBADDPDr213m, X86::VFMSUBADDPDr231m },
+ { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
+ { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
+ };
+
+ // Define the array that holds FMA*_Int opcodes in groups
+ // of 3 opcodes(132, 213, 231) in each group.
+ static const unsigned IntrinOpcodeGroups[][3] = {
+ { X86::VFMADDSSr132r_Int, X86::VFMADDSSr213r_Int, X86::VFMADDSSr231r_Int },
+ { X86::VFMADDSDr132r_Int, X86::VFMADDSDr213r_Int, X86::VFMADDSDr231r_Int },
+ { X86::VFMADDSSr132m_Int, X86::VFMADDSSr213m_Int, X86::VFMADDSSr231m_Int },
+ { X86::VFMADDSDr132m_Int, X86::VFMADDSDr213m_Int, X86::VFMADDSDr231m_Int },
+
+ { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr231r_Int },
+ { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr231r_Int },
+ { X86::VFMSUBSSr132m_Int, X86::VFMSUBSSr213m_Int, X86::VFMSUBSSr231m_Int },
+ { X86::VFMSUBSDr132m_Int, X86::VFMSUBSDr213m_Int, X86::VFMSUBSDr231m_Int },
+
+ { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
+ { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
+ { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
+ { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
+
+ { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
+ { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
+ { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
+ { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
+ };
+
+ const unsigned Form132Index = 0;
+ const unsigned Form213Index = 1;
+ const unsigned Form231Index = 2;
+ const unsigned FormsNum = 3;
+
+ bool IsIntrinOpcode;
+ isFMA3(Opc, &IsIntrinOpcode);
+
+ size_t GroupsNum;
+ const unsigned (*OpcodeGroups)[3];
+ if (IsIntrinOpcode) {
+ GroupsNum = array_lengthof(IntrinOpcodeGroups);
+ OpcodeGroups = IntrinOpcodeGroups;
+ } else {
+ GroupsNum = array_lengthof(RegularOpcodeGroups);
+ OpcodeGroups = RegularOpcodeGroups;
+ }
+
+ const unsigned *FoundOpcodesGroup = nullptr;
+ size_t FormIndex;
+
+ // Look for the input opcode in the corresponding opcodes table.
+ for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
+ ++GroupIndex) {
+ for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
+ if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
+ FoundOpcodesGroup = OpcodeGroups[GroupIndex];
+ break;
+ }
+ }
}
+
+ // The input opcode does not match with any of the opcodes from the tables.
+ // The unsupported FMA opcode must be added to one of the two opcode groups
+ // defined above.
+ assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode");
+
+ // Put the lowest index to SrcOpIdx1 to simplify the checks below.
+ if (SrcOpIdx1 > SrcOpIdx2)
+ std::swap(SrcOpIdx1, SrcOpIdx2);
+
+ // TODO: Commuting the 1st operand of FMA*_Int requires some additional
+ // analysis. The commute optimization is legal only if all users of FMA*_Int
+ // use only the lowest element of the FMA*_Int instruction. Such analysis are
+ // not implemented yet. So, just return 0 in that case.
+ // When such analysis are available this place will be the right place for
+ // calling it.
+ if (IsIntrinOpcode && SrcOpIdx1 == 1)
+ return 0;
+
+ unsigned Case;
+ if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
+ Case = 0;
+ else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3)
+ Case = 1;
+ else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3)
+ Case = 2;
+ else
+ return 0;
+
+ // Define the FMA forms mapping array that helps to map input FMA form
+ // to output FMA form to preserve the operation semantics after
+ // commuting the operands.
+ static const unsigned FormMapping[][3] = {
+ // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
+ // FMA132 A, C, b; ==> FMA231 C, A, b;
+ // FMA213 B, A, c; ==> FMA213 A, B, c;
+ // FMA231 C, A, b; ==> FMA132 A, C, b;
+ { Form231Index, Form213Index, Form132Index },
+ // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
+ // FMA132 A, c, B; ==> FMA132 B, c, A;
+ // FMA213 B, a, C; ==> FMA231 C, a, B;
+ // FMA231 C, a, B; ==> FMA213 B, a, C;
+ { Form132Index, Form231Index, Form213Index },
+ // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
+ // FMA132 a, C, B; ==> FMA213 a, B, C;
+ // FMA213 b, A, C; ==> FMA132 b, C, A;
+ // FMA231 c, A, B; ==> FMA231 c, B, A;
+ { Form213Index, Form132Index, Form231Index }
+ };
+
+ // Everything is ready, just adjust the FMA opcode and return it.
+ FormIndex = FormMapping[Case][FormIndex];
+ return FoundOpcodesGroup[FormIndex];
}
-bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
switch (MI->getOpcode()) {
case X86::CMPPDrri:
@@ -3141,46 +3631,22 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
// Ordered/Unordered/Equal/NotEqual tests
unsigned Imm = MI->getOperand(3).getImm() & 0x7;
switch (Imm) {
- case 0x00: // EQUAL
- case 0x03: // UNORDERED
- case 0x04: // NOT EQUAL
- case 0x07: // ORDERED
- SrcOpIdx1 = 1;
- SrcOpIdx2 = 2;
- return true;
+ case 0x00: // EQUAL
+ case 0x03: // UNORDERED
+ case 0x04: // NOT EQUAL
+ case 0x07: // ORDERED
+ // The indices of the commutable operands are 1 and 2.
+ // Assign them to the returned operand indices here.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
}
return false;
}
- case X86::VFMADDPDr231r:
- case X86::VFMADDPSr231r:
- case X86::VFMADDSDr231r:
- case X86::VFMADDSSr231r:
- case X86::VFMSUBPDr231r:
- case X86::VFMSUBPSr231r:
- case X86::VFMSUBSDr231r:
- case X86::VFMSUBSSr231r:
- case X86::VFNMADDPDr231r:
- case X86::VFNMADDPSr231r:
- case X86::VFNMADDSDr231r:
- case X86::VFNMADDSSr231r:
- case X86::VFNMSUBPDr231r:
- case X86::VFNMSUBPSr231r:
- case X86::VFNMSUBSDr231r:
- case X86::VFNMSUBSSr231r:
- case X86::VFMADDPDr231rY:
- case X86::VFMADDPSr231rY:
- case X86::VFMSUBPDr231rY:
- case X86::VFMSUBPSr231rY:
- case X86::VFNMADDPDr231rY:
- case X86::VFNMADDPSr231rY:
- case X86::VFNMSUBPDr231rY:
- case X86::VFNMSUBPSr231rY:
- SrcOpIdx1 = 2;
- SrcOpIdx2 = 3;
- return true;
default:
+ if (isFMA3(MI->getOpcode()))
+ return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
+ return false;
}
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
@@ -3821,15 +4287,58 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
return 0;
}
-inline static bool MaskRegClassContains(unsigned Reg) {
+static bool MaskRegClassContains(unsigned Reg) {
return X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg) ||
X86::VK32RegClass.contains(Reg) ||
X86::VK64RegClass.contains(Reg) ||
X86::VK1RegClass.contains(Reg);
}
+
+static bool GRRegClassContains(unsigned Reg) {
+ return X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg) ||
+ X86::GR16RegClass.contains(Reg) ||
+ X86::GR8RegClass.contains(Reg);
+}
+static
+unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) {
+ if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) {
+ DestReg = getX86SubSuperRegister(DestReg, 32);
+ return X86::KMOVBrk;
+ }
+ if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) {
+ SrcReg = getX86SubSuperRegister(SrcReg, 32);
+ return X86::KMOVBkr;
+ }
+ return 0;
+}
+
static
-unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
+unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) {
+ if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg))
+ return X86::KMOVQkk;
+ if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg))
+ return X86::KMOVDrk;
+ if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg))
+ return X86::KMOVQrk;
+ if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ return X86::KMOVDkr;
+ if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg))
+ return X86::KMOVQkr;
+ return 0;
+}
+
+static
+unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg,
+ const X86Subtarget &Subtarget)
+{
+ if (Subtarget.hasDQI())
+ if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg))
+ return Opc;
+ if (Subtarget.hasBWI())
+ if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
+ return Opc;
if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
X86::VR256XRegClass.contains(DestReg, SrcReg) ||
X86::VR512RegClass.contains(DestReg, SrcReg)) {
@@ -3837,21 +4346,14 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
SrcReg = get512BitSuperRegister(SrcReg);
return X86::VMOVAPSZrr;
}
- if (MaskRegClassContains(DestReg) &&
- MaskRegClassContains(SrcReg))
+ if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
return X86::KMOVWkk;
- if (MaskRegClassContains(DestReg) &&
- (X86::GR32RegClass.contains(SrcReg) ||
- X86::GR16RegClass.contains(SrcReg) ||
- X86::GR8RegClass.contains(SrcReg))) {
- SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
+ if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
+ SrcReg = getX86SubSuperRegister(SrcReg, 32);
return X86::KMOVWkr;
}
- if ((X86::GR32RegClass.contains(DestReg) ||
- X86::GR16RegClass.contains(DestReg) ||
- X86::GR8RegClass.contains(DestReg)) &&
- MaskRegClassContains(SrcReg)) {
- DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
+ if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) {
+ DestReg = getX86SubSuperRegister(DestReg, 32);
return X86::KMOVWrk;
}
return 0;
@@ -3886,7 +4388,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else if (HasAVX512)
- Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
+ Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
else if (X86::VR128RegClass.contains(DestReg, SrcReg))
Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
@@ -3900,34 +4402,86 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Moving EFLAGS to / from another register requires a push and a pop.
- // Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
- if (SrcReg == X86::EFLAGS) {
- if (X86::GR64RegClass.contains(DestReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSHF64));
- BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ bool FromEFLAGS = SrcReg == X86::EFLAGS;
+ bool ToEFLAGS = DestReg == X86::EFLAGS;
+ int Reg = FromEFLAGS ? DestReg : SrcReg;
+ bool is32 = X86::GR32RegClass.contains(Reg);
+ bool is64 = X86::GR64RegClass.contains(Reg);
+
+ if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
+ int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
+ int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
+ int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
+ int Pop = is64 ? X86::POP64r : X86::POP32r;
+ int PopF = is64 ? X86::POPF64 : X86::POPF32;
+ int AX = is64 ? X86::RAX : X86::EAX;
+
+ if (!Subtarget.hasLAHFSAHF()) {
+ assert(Subtarget.is64Bit() &&
+ "Not having LAHF/SAHF only happens on 64-bit.");
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - usesTheStack.
+ if (FromEFLAGS) {
+ BuildMI(MBB, MI, DL, get(PushF));
+ BuildMI(MBB, MI, DL, get(Pop), DestReg);
+ }
+ if (ToEFLAGS) {
+ BuildMI(MBB, MI, DL, get(Push))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(PopF));
+ }
return;
}
- if (X86::GR32RegClass.contains(DestReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSHF32));
- BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
- return;
+
+ // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
+ // inefficient. Instead:
+ // - Save the overflow flag OF into AL using SETO, and restore it using a
+ // signed 8-bit addition of AL and INT8_MAX.
+ // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
+ // using LAHF/SAHF.
+ // - When RAX/EAX is live and isn't the destination register, make sure it
+ // isn't clobbered by PUSH/POP'ing it before and after saving/restoring
+ // the flags.
+ // This approach is ~2.25x faster than using PUSHF/POPF.
+ //
+ // This is still somewhat inefficient because we don't know which flags are
+ // actually live inside EFLAGS. Were we able to do a single SETcc instead of
+ // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
+ //
+ // PUSHF/POPF is also potentially incorrect because it affects other flags
+ // such as TF/IF/DF, which LLVM doesn't model.
+ //
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - usesTheStack.
+
+
+ bool AXDead = (Reg == AX) ||
+ (MachineBasicBlock::LQR_Dead ==
+ MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+ if (!AXDead) {
+ // FIXME: If computeRegisterLiveness() reported LQR_Unknown then AX may
+ // actually be dead. This is not a problem for correctness as we are just
+ // (unnecessarily) saving+restoring a dead register. However the
+ // MachineVerifier expects operands that read from dead registers
+ // to be marked with the "undef" flag.
+ BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
}
- }
- if (DestReg == X86::EFLAGS) {
- if (X86::GR64RegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSH64r))
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(X86::POPF64));
- return;
+ if (FromEFLAGS) {
+ BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
+ BuildMI(MBB, MI, DL, get(X86::LAHF));
+ BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
}
- if (X86::GR32RegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSH32r))
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(X86::POPF32));
- return;
+ if (ToEFLAGS) {
+ BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
+ .addReg(X86::AL)
+ .addImm(INT8_MAX);
+ BuildMI(MBB, MI, DL, get(X86::SAHF));
}
+ if (!AXDead)
+ BuildMI(MBB, MI, DL, get(Pop), AX);
+ return;
}
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
@@ -4602,9 +5156,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// live-out. If it is live-out, do not optimize.
if ((IsCmpZero || IsSwapped) && !IsSafe) {
MachineBasicBlock *MBB = CmpInstr->getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *Successor : MBB->successors())
+ if (Successor->isLiveIn(X86::EFLAGS))
return false;
}
@@ -4645,8 +5198,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
CmpInstr->eraseFromParent();
// Modify the condition code of instructions in OpsToUpdate.
- for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
- OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+ for (auto &Op : OpsToUpdate)
+ Op.first->setDesc(get(Op.second));
return true;
}
@@ -4694,8 +5247,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
return nullptr;
// Check whether we can fold the def into SrcOperandId.
- MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
- if (FoldMI) {
+ if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI)) {
FoldAsLoadDefReg = 0;
return FoldMI;
}
@@ -4725,6 +5277,82 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
return true;
}
+/// Expand a single-def pseudo instruction to a two-addr
+/// instruction with two %k0 reads.
+/// This is used for mapping:
+/// %k4 = K_SET1
+/// to:
+/// %k4 = KXNORrr %k0, %k0
+static bool Expand2AddrKreg(MachineInstrBuilder &MIB,
+ const MCInstrDesc &Desc, unsigned Reg) {
+ assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+ MIB->setDesc(Desc);
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ return true;
+}
+
+static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
+ bool MinusOne) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+
+ // Insert the XOR.
+ BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+
+ // Turn the pseudo into an INC or DEC.
+ MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
+ MIB.addReg(Reg);
+
+ return true;
+}
+
+bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ int64_t Imm = MIB->getOperand(1).getImm();
+ assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
+ MachineBasicBlock::iterator I = MIB.getInstr();
+
+ int StackAdjustment;
+
+ if (Subtarget.is64Bit()) {
+ assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
+ MIB->getOpcode() == X86::MOV32ImmSExti8);
+ // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
+ // widen the register if necessary.
+ StackAdjustment = 8;
+ BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
+ MIB->setDesc(get(X86::POP64r));
+ MIB->getOperand(0)
+ .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
+ } else {
+ assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
+ StackAdjustment = 4;
+ BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
+ MIB->setDesc(get(X86::POP32r));
+ }
+
+ // Build CFI if necessary.
+ MachineFunction &MF = *MBB.getParent();
+ const X86FrameLowering *TFL = Subtarget.getFrameLowering();
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool NeedsDwarfCFI =
+ !IsWin64Prologue &&
+ (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
+ bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
+ if (EmitCFI) {
+ TFL->BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
+ TFL->BuildCFI(MBB, std::next(I), DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
+ }
+
+ return true;
+}
+
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -4735,8 +5363,8 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->
- getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 8, 8);
MachineBasicBlock::iterator I = MIB.getInstr();
BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
@@ -4753,6 +5381,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (MI->getOpcode()) {
case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+ case X86::MOV32r1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
+ case X86::MOV32r_1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
+ case X86::MOV32ImmSExti8:
+ case X86::MOV64ImmSExti8:
+ return ExpandMOVImmSExti8(MIB);
case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
@@ -4777,10 +5412,22 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
+
+ // KNL does not recognize dependency-breaking idioms for mask registers,
+ // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
+ // Using %k0 as the undef input register is a performance heuristic based
+ // on the assumption that %k0 is used less frequently than the other mask
+ // registers, since it is not usable as a write mask.
+ // FIXME: A more advanced approach would be to choose the best input mask
+ // register based on context.
case X86::KSET0B:
- case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
+ case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
+ case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
+ case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
case X86::KSET1B:
- case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
+ case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
+ case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
+ case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
case TargetOpcode::LOAD_STACK_GUARD:
expandLoadStackGuard(MIB, *this);
return true;
@@ -4788,12 +5435,28 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return false;
}
-static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) {
+static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
+ int PtrOffset = 0) {
unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+
+ if (NumAddrOps < 4) {
+ // FrameIndex only - add an immediate offset (whether its zero or not).
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ addOffset(MIB, PtrOffset);
+ } else {
+ // General Memory Addressing - we need to add any offset to an existing
+ // offset.
+ assert(MOs.size() == 5 && "Unexpected memory operand list length");
+ for (unsigned i = 0; i != NumAddrOps; ++i) {
+ const MachineOperand &MO = MOs[i];
+ if (i == 3 && PtrOffset != 0) {
+ MIB.addDisp(MO, PtrOffset);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ }
}
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
@@ -4828,7 +5491,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
unsigned OpNo, ArrayRef<MachineOperand> MOs,
MachineBasicBlock::iterator InsertPt,
- MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineInstr *MI, const TargetInstrInfo &TII,
+ int PtrOffset = 0) {
// Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
@@ -4838,7 +5502,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
assert(MO.isReg() && "Expected to fold into reg operand!");
- addOperands(MIB, MOs);
+ addOperands(MIB, MOs, PtrOffset);
} else {
MIB.addOperand(MO);
}
@@ -4860,6 +5524,40 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
return MIB.addImm(0);
}
+MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
+ MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
+ ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
+ unsigned Size, unsigned Align) const {
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ case X86::VINSERTPSrr:
+ // Attempt to convert the load of inserted vector into a fold load
+ // of a single float.
+ if (OpNum == 2) {
+ unsigned Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+ unsigned ZMask = Imm & 15;
+ unsigned DstIdx = (Imm >> 4) & 3;
+ unsigned SrcIdx = (Imm >> 6) & 3;
+
+ unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize();
+ if (Size <= RCSize && 4 <= Align) {
+ int PtrOffset = SrcIdx * 4;
+ unsigned NewImm = (DstIdx << 4) | ZMask;
+ unsigned NewOpCode =
+ (MI->getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm
+ : X86::INSERTPSrm);
+ MachineInstr *NewMI =
+ FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
+ NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
+ return NewMI;
+ }
+ }
+ break;
+ };
+
+ return nullptr;
+}
+
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
@@ -4869,10 +5567,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
bool isCallRegIndirect = Subtarget.callRegIndirect();
bool isTwoAddrFold = false;
- // For CPUs that favor the register form of a call,
- // do not fold loads into calls.
- if (isCallRegIndirect &&
- (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
+ // For CPUs that favor the register form of a call or push,
+ // do not fold loads into calls or pushes, unless optimizing for size
+ // aggressively.
+ if (isCallRegIndirect && !MF.getFunction()->optForMinSize() &&
+ (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r ||
+ MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r ||
+ MI->getOpcode() == X86::PUSH64r))
return nullptr;
unsigned NumOps = MI->getDesc().getNumOperands();
@@ -4886,6 +5587,12 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
MachineInstr *NewMI = nullptr;
+
+ // Attempt to fold any custom cases we have.
+ if (MachineInstr *CustomMI =
+ foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
+ return CustomMI;
+
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
@@ -4963,60 +5670,56 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// If the instruction and target operand are commutable, commute the
// instruction and try again.
if (AllowCommute) {
- unsigned OriginalOpIdx = OpNum, CommuteOpIdx1, CommuteOpIdx2;
+ unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
bool HasDef = MI->getDesc().getNumDefs();
unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
- bool Tied0 =
- 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
bool Tied1 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+ bool Tied2 =
0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
// If either of the commutable operands are tied to the destination
// then we can not commute + fold.
- if ((HasDef && Reg0 == Reg1 && Tied0) ||
- (HasDef && Reg0 == Reg2 && Tied1))
+ if ((HasDef && Reg0 == Reg1 && Tied1) ||
+ (HasDef && Reg0 == Reg2 && Tied2))
return nullptr;
- if ((CommuteOpIdx1 == OriginalOpIdx) ||
- (CommuteOpIdx2 == OriginalOpIdx)) {
- MachineInstr *CommutedMI = commuteInstruction(MI, false);
- if (!CommutedMI) {
- // Unable to commute.
- return nullptr;
- }
- if (CommutedMI != MI) {
- // New instruction. We can't fold from this.
- CommutedMI->eraseFromParent();
- return nullptr;
- }
+ MachineInstr *CommutedMI =
+ commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
+ if (!CommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (CommutedMI != MI) {
+ // New instruction. We can't fold from this.
+ CommutedMI->eraseFromParent();
+ return nullptr;
+ }
- // Attempt to fold with the commuted version of the instruction.
- unsigned CommuteOp =
- (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
- NewMI =
- foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
- /*AllowCommute=*/false);
- if (NewMI)
- return NewMI;
-
- // Folding failed again - undo the commute before returning.
- MachineInstr *UncommutedMI = commuteInstruction(MI, false);
- if (!UncommutedMI) {
- // Unable to commute.
- return nullptr;
- }
- if (UncommutedMI != MI) {
- // New instruction. It doesn't need to be kept.
- UncommutedMI->eraseFromParent();
- return nullptr;
- }
+ // Attempt to fold with the commuted version of the instruction.
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
+ Size, Align, /*AllowCommute=*/false);
+ if (NewMI)
+ return NewMI;
- // Return here to prevent duplicate fuse failure report.
+ // Folding failed again - undo the commute before returning.
+ MachineInstr *UncommutedMI =
+ commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
+ if (!UncommutedMI) {
+ // Unable to commute.
return nullptr;
}
+ if (UncommutedMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ UncommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Return here to prevent duplicate fuse failure report.
+ return nullptr;
}
}
@@ -5208,13 +5911,14 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
// If MI kills this register, the false dependence is already broken.
if (MI->killsRegister(Reg, TRI))
return;
+
if (X86::VR128RegClass.contains(Reg)) {
// These instructions are all floating point domain, so xorps is the best
// choice.
- bool HasAVX = Subtarget.hasAVX();
- unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+ unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ MI->addRegisterKilled(Reg, TRI, true);
} else if (X86::VR256RegClass.contains(Reg)) {
// Use vxorps to clear the full ymm register.
// It wants to read and write the xmm sub-register.
@@ -5222,21 +5926,20 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
.addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
.addReg(Reg, RegState::ImplicitDefine);
- } else
- return;
- MI->addRegisterKilled(Reg, TRI, true);
+ MI->addRegisterKilled(Reg, TRI, true);
+ }
}
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// Check switch flag
- if (NoFusing) return nullptr;
+ if (NoFusing)
+ return nullptr;
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
- hasPartialRegUpdate(MI->getOpcode()))
+ if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode()))
return nullptr;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -5303,6 +6006,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+ case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
+ case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
+ case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
+ case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
+ case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
+ case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
return false;
default:
return true;
@@ -5318,6 +6027,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+ case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
+ case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
+ case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
+ case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
+ case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
+ case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
return false;
default:
return true;
@@ -5342,10 +6057,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// Check switch flag
if (NoFusing) return nullptr;
- // Unless optimizing for size, don't fold to avoid partial
- // register update stalls
- if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
- hasPartialRegUpdate(MI->getOpcode()))
+ // Avoid partial register update stalls unless optimizing for size.
+ if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode()))
return nullptr;
// Determine the alignment of the load.
@@ -5460,62 +6173,6 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
-bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- // Check switch flag
- if (NoFusing) return 0;
-
- if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
- switch (MI->getOpcode()) {
- default: return false;
- case X86::TEST8rr:
- case X86::TEST16rr:
- case X86::TEST32rr:
- case X86::TEST64rr:
- return true;
- case X86::ADD32ri:
- // FIXME: AsmPrinter doesn't know how to handle
- // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
- if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
- return false;
- break;
- }
- }
-
- if (Ops.size() != 1)
- return false;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- unsigned NumOps = MI->getDesc().getNumOperands();
- bool isTwoAddr = NumOps > 1 &&
- MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
-
- // Folding a memory location into the two-address part of a two-address
- // instruction is different than folding it other places. It requires
- // replacing the *two* registers with the memory location.
- const DenseMap<unsigned,
- std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
- if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
- OpcodeTablePtr = &RegOp2MemOpTable2Addr;
- } else if (OpNum == 0) {
- if (Opc == X86::MOV32r0)
- return true;
-
- OpcodeTablePtr = &RegOp2MemOpTable0;
- } else if (OpNum == 1) {
- OpcodeTablePtr = &RegOp2MemOpTable1;
- } else if (OpNum == 2) {
- OpcodeTablePtr = &RegOp2MemOpTable2;
- } else if (OpNum == 3) {
- OpcodeTablePtr = &RegOp2MemOpTable3;
- }
-
- if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
- return true;
- return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
-}
-
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
@@ -5536,9 +6193,10 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const MCInstrDesc &MCID = get(Opc);
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+ // TODO: Check if 32-byte or greater accesses are slow too?
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
- !Subtarget.isUnalignedMemAccessFast())
+ Subtarget.isUnalignedMem16Slow())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
@@ -5582,20 +6240,19 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
- for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
- MIB.addOperand(BeforeOps[i]);
+ for (MachineOperand &BeforeOp : BeforeOps)
+ MIB.addOperand(BeforeOp);
if (FoldedLoad)
MIB.addReg(Reg);
- for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
- MIB.addOperand(AfterOps[i]);
- for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
- MachineOperand &MO = ImpOps[i];
- MIB.addReg(MO.getReg(),
- getDefRegState(MO.isDef()) |
+ for (MachineOperand &AfterOp : AfterOps)
+ MIB.addOperand(AfterOp);
+ for (MachineOperand &ImpOp : ImpOps) {
+ MIB.addReg(ImpOp.getReg(),
+ getDefRegState(ImpOp.isDef()) |
RegState::Implicit |
- getKillRegState(MO.isKill()) |
- getDeadRegState(MO.isDead()) |
- getUndefRegState(MO.isUndef()));
+ getKillRegState(ImpOp.isKill()) |
+ getDeadRegState(ImpOp.isDead()) |
+ getUndefRegState(ImpOp.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
switch (DataMI->getOpcode()) {
@@ -5686,9 +6343,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !Subtarget.isUnalignedMemAccessFast())
+ Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned load.
return false;
+ // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
+ // memory access is slow above.
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
@@ -5729,9 +6388,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !Subtarget.isUnalignedMemAccessFast())
+ Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned store.
return false;
+ // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
+ // memory access is slow above.
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
@@ -6192,16 +6853,16 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
// domains, but they require a bit more work than just switching opcodes.
static const uint16_t *lookup(unsigned opcode, unsigned domain) {
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
- if (ReplaceableInstrs[i][domain-1] == opcode)
- return ReplaceableInstrs[i];
+ for (const uint16_t (&Row)[3] : ReplaceableInstrs)
+ if (Row[domain-1] == opcode)
+ return Row;
return nullptr;
}
static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
- if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
- return ReplaceableInstrsAVX2[i];
+ for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2)
+ if (Row[domain-1] == opcode)
+ return Row;
return nullptr;
}
@@ -6347,230 +7008,181 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
return isHighLatencyDef(DefMI->getOpcode());
}
-static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
- const MachineBasicBlock *MBB) {
- assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
- const MachineOperand &Op1 = Inst.getOperand(1);
- const MachineOperand &Op2 = Inst.getOperand(2);
- const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- // We need virtual register definitions.
- MachineInstr *MI1 = nullptr;
- MachineInstr *MI2 = nullptr;
- if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
- MI1 = MRI.getUniqueVRegDef(Op1.getReg());
- if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
- MI2 = MRI.getUniqueVRegDef(Op2.getReg());
-
- // And they need to be in the trace (otherwise, they won't have a depth).
- if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
- return true;
+bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) const {
+ assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) &&
+ "Reassociation needs binary operators");
+
+ // Integer binary math/logic instructions have a third source operand:
+ // the EFLAGS register. That operand must be both defined here and never
+ // used; ie, it must be dead. If the EFLAGS operand is live, then we can
+ // not change anything because rearranging the operands could affect other
+ // instructions that depend on the exact status flags (zero, sign, etc.)
+ // that are set by using these particular operands with this operation.
+ if (Inst.getNumOperands() == 4) {
+ assert(Inst.getOperand(3).isReg() &&
+ Inst.getOperand(3).getReg() == X86::EFLAGS &&
+ "Unexpected operand in reassociable instruction");
+ if (!Inst.getOperand(3).isDead())
+ return false;
+ }
- return false;
-}
-
-static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
- const MachineBasicBlock *MBB = Inst.getParent();
- const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
- MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
- unsigned AssocOpcode = Inst.getOpcode();
-
- // If only one operand has the same opcode and it's the second source operand,
- // the operands must be commuted.
- Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
- if (Commuted)
- std::swap(MI1, MI2);
-
- // 1. The previous instruction must be the same type as Inst.
- // 2. The previous instruction must have virtual register definitions for its
- // operands in the same basic block as Inst.
- // 3. The previous instruction's result must only be used by Inst.
- if (MI1->getOpcode() == AssocOpcode &&
- hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
- MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
- return true;
-
- return false;
+ return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
}
// TODO: There are many more machine instruction opcodes to match:
// 1. Other data types (integer, vectors)
-// 2. Other math / logic operations (and, or)
-static bool isAssociativeAndCommutative(unsigned Opcode) {
- switch (Opcode) {
+// 2. Other math / logic operations (xor, or)
+// 3. Other forms of the same operation (intrinsics and other variants)
+bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ case X86::AND8rr:
+ case X86::AND16rr:
+ case X86::AND32rr:
+ case X86::AND64rr:
+ case X86::OR8rr:
+ case X86::OR16rr:
+ case X86::OR32rr:
+ case X86::OR64rr:
+ case X86::XOR8rr:
+ case X86::XOR16rr:
+ case X86::XOR32rr:
+ case X86::XOR64rr:
+ case X86::IMUL16rr:
+ case X86::IMUL32rr:
+ case X86::IMUL64rr:
+ case X86::PANDrr:
+ case X86::PORrr:
+ case X86::PXORrr:
+ case X86::VPANDrr:
+ case X86::VPANDYrr:
+ case X86::VPORrr:
+ case X86::VPORYrr:
+ case X86::VPXORrr:
+ case X86::VPXORYrr:
+ // Normal min/max instructions are not commutative because of NaN and signed
+ // zero semantics, but these are. Thus, there's no need to check for global
+ // relaxed math; the instructions themselves have the properties we need.
+ case X86::MAXCPDrr:
+ case X86::MAXCPSrr:
+ case X86::MAXCSDrr:
+ case X86::MAXCSSrr:
+ case X86::MINCPDrr:
+ case X86::MINCPSrr:
+ case X86::MINCSDrr:
+ case X86::MINCSSrr:
+ case X86::VMAXCPDrr:
+ case X86::VMAXCPSrr:
+ case X86::VMAXCPDYrr:
+ case X86::VMAXCPSYrr:
+ case X86::VMAXCSDrr:
+ case X86::VMAXCSSrr:
+ case X86::VMINCPDrr:
+ case X86::VMINCPSrr:
+ case X86::VMINCPDYrr:
+ case X86::VMINCPSYrr:
+ case X86::VMINCSDrr:
+ case X86::VMINCSSrr:
+ return true;
+ case X86::ADDPDrr:
+ case X86::ADDPSrr:
case X86::ADDSDrr:
case X86::ADDSSrr:
- case X86::VADDSDrr:
- case X86::VADDSSrr:
+ case X86::MULPDrr:
+ case X86::MULPSrr:
case X86::MULSDrr:
case X86::MULSSrr:
+ case X86::VADDPDrr:
+ case X86::VADDPSrr:
+ case X86::VADDPDYrr:
+ case X86::VADDPSYrr:
+ case X86::VADDSDrr:
+ case X86::VADDSSrr:
+ case X86::VMULPDrr:
+ case X86::VMULPSrr:
+ case X86::VMULPDYrr:
+ case X86::VMULPSYrr:
case X86::VMULSDrr:
case X86::VMULSSrr:
- return true;
+ return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
default:
return false;
}
}
-/// Return true if the input instruction is part of a chain of dependent ops
-/// that are suitable for reassociation, otherwise return false.
-/// If the instruction's operands must be commuted to have a previous
-/// instruction of the same type define the first source operand, Commuted will
-/// be set to true.
-static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
- // 1. The operation must be associative and commutative.
- // 2. The instruction must have virtual register definitions for its
- // operands in the same basic block.
- // 3. The instruction must have a reassociable sibling.
- if (isAssociativeAndCommutative(Inst.getOpcode()) &&
- hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
- hasReassocSibling(Inst, Commuted))
- return true;
-
- return false;
-}
-
-// FIXME: This has the potential to be expensive (compile time) while not
-// improving the code at all. Some ways to limit the overhead:
-// 1. Track successful transforms; bail out if hit rate gets too low.
-// 2. Only enable at -O3 or some other non-default optimization level.
-// 3. Pre-screen pattern candidates here: if an operand of the previous
-// instruction is known to not increase the critical path, then don't match
-// that pattern.
-bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
- if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
- return false;
-
- // TODO: There is nothing x86-specific here except the instruction type.
- // This logic could be hoisted into the machine combiner pass itself.
-
- // Look for this reassociation pattern:
- // B = A op X (Prev)
- // C = B op Y (Root)
-
- bool Commute;
- if (isReassocCandidate(Root, Commute)) {
- // We found a sequence of instructions that may be suitable for a
- // reassociation of operands to increase ILP. Specify each commutation
- // possibility for the Prev instruction in the sequence and let the
- // machine combiner decide if changing the operands is worthwhile.
- if (Commute) {
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
- } else {
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
- }
- return true;
- }
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+ MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const {
+ // Integer instructions define an implicit EFLAGS source register operand as
+ // the third source (fourth total) operand.
+ if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4)
+ return;
- return false;
+ assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 &&
+ "Unexpected instruction type for reassociation");
+
+ MachineOperand &OldOp1 = OldMI1.getOperand(3);
+ MachineOperand &OldOp2 = OldMI2.getOperand(3);
+ MachineOperand &NewOp1 = NewMI1.getOperand(3);
+ MachineOperand &NewOp2 = NewMI2.getOperand(3);
+
+ assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() &&
+ "Must have dead EFLAGS operand in reassociable instruction");
+ assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() &&
+ "Must have dead EFLAGS operand in reassociable instruction");
+
+ (void)OldOp1;
+ (void)OldOp2;
+
+ assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS &&
+ "Unexpected operand in reassociable instruction");
+ assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS &&
+ "Unexpected operand in reassociable instruction");
+
+ // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
+ // of this pass or other passes. The EFLAGS operands must be dead in these new
+ // instructions because the EFLAGS operands in the original instructions must
+ // be dead in order for reassociation to occur.
+ NewOp1.setIsDead();
+ NewOp2.setIsDead();
}
-/// Attempt the following reassociation to reduce critical path length:
-/// B = A op X (Prev)
-/// C = B op Y (Root)
-/// ===>
-/// B = X op Y
-/// C = A op B
-static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
- MachineCombinerPattern::MC_PATTERN Pattern,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
- MachineFunction *MF = Root.getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
-
- // This array encodes the operand index for each parameter because the
- // operands may be commuted. Each row corresponds to a pattern value,
- // and each column specifies the index of A, B, X, Y.
- unsigned OpIdx[4][4] = {
- { 1, 1, 2, 2 },
- { 1, 2, 2, 1 },
- { 2, 1, 1, 2 },
- { 2, 2, 1, 1 }
- };
-
- MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
- MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
- MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
- MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
- MachineOperand &OpC = Root.getOperand(0);
-
- unsigned RegA = OpA.getReg();
- unsigned RegB = OpB.getReg();
- unsigned RegX = OpX.getReg();
- unsigned RegY = OpY.getReg();
- unsigned RegC = OpC.getReg();
-
- if (TargetRegisterInfo::isVirtualRegister(RegA))
- MRI.constrainRegClass(RegA, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegB))
- MRI.constrainRegClass(RegB, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegX))
- MRI.constrainRegClass(RegX, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegY))
- MRI.constrainRegClass(RegY, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegC))
- MRI.constrainRegClass(RegC, RC);
-
- // Create a new virtual register for the result of (X op Y) instead of
- // recycling RegB because the MachineCombiner's computation of the critical
- // path requires a new register definition rather than an existing one.
- unsigned NewVR = MRI.createVirtualRegister(RC);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
-
- unsigned Opcode = Root.getOpcode();
- bool KillA = OpA.isKill();
- bool KillX = OpX.isKill();
- bool KillY = OpY.isKill();
-
- // Create new instructions for insertion.
- MachineInstrBuilder MIB1 =
- BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
- .addReg(RegX, getKillRegState(KillX))
- .addReg(RegY, getKillRegState(KillY));
- InsInstrs.push_back(MIB1);
-
- MachineInstrBuilder MIB2 =
- BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
- .addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(true));
- InsInstrs.push_back(MIB2);
-
- // Record old instructions for deletion.
- DelInstrs.push_back(&Prev);
- DelInstrs.push_back(&Root);
+std::pair<unsigned, unsigned>
+X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ return std::make_pair(TF, 0u);
}
-void X86InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root,
- MachineCombinerPattern::MC_PATTERN Pattern,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
- MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
-
- // Select the previous instruction in the sequence based on the input pattern.
- MachineInstr *Prev = nullptr;
- switch (Pattern) {
- case MachineCombinerPattern::MC_REASSOC_AX_BY:
- case MachineCombinerPattern::MC_REASSOC_XA_BY:
- Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- break;
- case MachineCombinerPattern::MC_REASSOC_AX_YB:
- case MachineCombinerPattern::MC_REASSOC_XA_YB:
- Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
- }
- assert(Prev && "Unknown pattern for machine combiner");
-
- reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
- return;
+ArrayRef<std::pair<unsigned, const char *>>
+X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace X86II;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
+ {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
+ {MO_GOT, "x86-got"},
+ {MO_GOTOFF, "x86-gotoff"},
+ {MO_GOTPCREL, "x86-gotpcrel"},
+ {MO_PLT, "x86-plt"},
+ {MO_TLSGD, "x86-tlsgd"},
+ {MO_TLSLD, "x86-tlsld"},
+ {MO_TLSLDM, "x86-tlsldm"},
+ {MO_GOTTPOFF, "x86-gottpoff"},
+ {MO_INDNTPOFF, "x86-indntpoff"},
+ {MO_TPOFF, "x86-tpoff"},
+ {MO_DTPOFF, "x86-dtpoff"},
+ {MO_NTPOFF, "x86-ntpoff"},
+ {MO_GOTNTPOFF, "x86-gotntpoff"},
+ {MO_DLLIMPORT, "x86-dllimport"},
+ {MO_DARWIN_STUB, "x86-darwin-stub"},
+ {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
+ {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
+ {MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, "x86-darwin-hidden-nonlazy-pic-base"},
+ {MO_TLVP, "x86-tlvp"},
+ {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
+ {MO_SECREL, "x86-secrel"}};
+ return makeArrayRef(TargetFlags);
}
namespace {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index bf63336..9d40334 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -23,22 +23,10 @@
#include "X86GenInstrInfo.inc"
namespace llvm {
+ class MachineInstrBuilder;
class X86RegisterInfo;
class X86Subtarget;
- namespace MachineCombinerPattern {
- enum MC_PATTERN : int {
- // These are commutative variants for reassociating a computation chain
- // of the form:
- // B = A op X (Prev)
- // C = B op Y (Root)
- MC_REASSOC_AX_BY = 0,
- MC_REASSOC_AX_YB = 1,
- MC_REASSOC_XA_BY = 2,
- MC_REASSOC_XA_YB = 3,
- };
- } // end namespace MachineCombinerPattern
-
namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
@@ -259,14 +247,64 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const override;
- /// commuteInstruction - We have a few instructions that must be hacked on to
- /// commute them.
+ /// Returns true iff the routine could find two commutable operands in the
+ /// given machine instruction.
+ /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. Their
+ /// input values can be re-defined in this method only if the input values
+ /// are not pre-defined, which is designated by the special value
+ /// 'CommuteAnyOperandIndex' assigned to it.
+ /// If both of indices are pre-defined and refer to some operands, then the
+ /// method simply returns true if the corresponding operands are commutable
+ /// and returns false otherwise.
///
- MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
-
+ /// For example, calling this method this way:
+ /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+ /// findCommutedOpIndices(MI, Op1, Op2);
+ /// can be interpreted as a query asking to find an operand that would be
+ /// commutable with the operand#1.
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
+ /// Returns true if the routine could find two commutable operands
+ /// in the given FMA instruction. Otherwise, returns false.
+ ///
+ /// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments.
+ /// The output indices of the commuted operands are returned in these
+ /// arguments. Also, the input values of these arguments may be preset either
+ /// to indices of operands that must be commuted or be equal to a special
+ /// value 'CommuteAnyOperandIndex' which means that the corresponding
+ /// operand index is not set and this method is free to pick any of
+ /// available commutable operands.
+ ///
+ /// For example, calling this method this way:
+ /// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex;
+ /// findFMA3CommutedOpIndices(MI, Idx1, Idx2);
+ /// can be interpreted as a query asking if the operand #1 can be swapped
+ /// with any other available operand (e.g. operand #2, operand #3, etc.).
+ ///
+ /// The returned FMA opcode may differ from the opcode in the given MI.
+ /// For example, commuting the operands #1 and #3 in the following FMA
+ /// FMA213 #1, #2, #3
+ /// results into instruction with adjusted opcode:
+ /// FMA231 #3, #2, #1
+ bool findFMA3CommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const;
+
+ /// Returns an adjusted FMA opcode that must be used in FMA instruction that
+ /// performs the same computations as the given MI but which has the operands
+ /// \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
+ /// It may return 0 if it is unsafe to commute the operands.
+ ///
+ /// The returned FMA opcode may differ from the opcode in the given \p MI.
+ /// For example, commuting the operands #1 and #3 in the following FMA
+ /// FMA213 #1, #2, #3
+ /// results into instruction with adjusted opcode:
+ /// FMA231 #3, #2, #1
+ unsigned getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
+ unsigned SrcOpIdx1,
+ unsigned SrcOpIdx2) const;
+
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr* MI) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -342,11 +380,6 @@ public:
MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
- /// canFoldMemoryOperand - Returns true if the specified load / store is
- /// folding is possible.
- bool canFoldMemoryOperand(const MachineInstr *,
- ArrayRef<unsigned>) const override;
-
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@@ -406,10 +439,9 @@ public:
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- static bool isX86_64ExtendedReg(const MachineOperand &MO) {
- if (!MO.isReg()) return false;
- return X86II::isX86_64ExtendedReg(MO.getReg());
- }
+ /// True if MI has a condition code def, e.g. EFLAGS, that is
+ /// not marked dead.
+ bool hasLiveCondCodeDef(MachineInstr *MI) const;
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
@@ -452,26 +484,19 @@ public:
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI,
unsigned UseIdx) const override;
-
bool useMachineCombiner() const override {
return true;
}
-
- /// Return true when there is potentially a faster code sequence
- /// for an instruction chain ending in <Root>. All potential patterns are
- /// output in the <Pattern> array.
- bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
-
- /// When getMachineCombinerPatterns() finds a pattern, this function generates
- /// the instructions that could replace the original code sequence.
- void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
+ bool hasReassociableOperands(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) const override;
+
+ void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const override;
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
@@ -500,16 +525,49 @@ public:
unsigned &FoldAsLoadDefReg,
MachineInstr *&DefMI) const override;
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+protected:
+ /// Commutes the operands in the given instruction by changing the operands
+ /// order and/or changing the instruction's opcode and/or the immediate value
+ /// operand.
+ ///
+ /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
+ /// to be commuted.
+ ///
+ /// Do not call this method for a non-commutable instruction or
+ /// non-commutable operands.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI, bool NewMI,
+ unsigned CommuteOpIdx1,
+ unsigned CommuteOpIdx2) const override;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
+ /// Handles memory folding for special case instructions, for instance those
+ /// requiring custom manipulation of the address.
+ MachineInstr *foldMemoryOperandCustom(MachineFunction &MF, MachineInstr *MI,
+ unsigned OpNum,
+ ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
+ unsigned Size, unsigned Align) const;
+
/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;
+
+ /// Expand the MOVImmSExti8 pseudo-instructions.
+ bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 52bab9c..f4ca2b8 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -106,8 +106,6 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -158,6 +156,8 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue]>;
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
@@ -250,9 +250,6 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
- [SDNPHasChain, SDNPOutGlue]>;
-
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
@@ -344,18 +341,21 @@ def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;
-// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
-// plain GR64, so that it doesn't potentially require a REX prefix.
-def i8mem_NOREX : Operand<i64> {
+// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
+// of a plain GPR, so that it doesn't potentially require a REX prefix.
+def ptr_rc_norex : PointerLikeRegClass<2>;
+def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
+
+def i8mem_NOREX : Operand<iPTR> {
let PrintMethod = "printi8mem";
- let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, i8imm);
let ParserMatchClass = X86Mem8AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
// GPRs available for tailcall.
// It represents GR32_TC, GR64_TC or GR64_TCW64.
-def ptr_rc_tailcall : PointerLikeRegClass<2>;
+def ptr_rc_tailcall : PointerLikeRegClass<4>;
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
@@ -697,34 +697,34 @@ def lea64mem : Operand<i64> {
// X86 Complex Pattern Definitions.
//
-// Define X86 specific addressing mode.
-def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
-def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+// Define X86-specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
-def lea64_32addr : ComplexPattern<i32, 5, "SelectLEA64_32Addr",
+def lea64_32addr : ComplexPattern<i32, 5, "selectLEA64_32Addr",
[add, sub, mul, X86mul_imm, shl, or,
frameindex, X86WrapperRIP],
[]>;
-def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+def tls32addr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+def tls32baseaddr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+def lea64addr : ComplexPattern<i64, 5, "selectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
+def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
@@ -767,12 +767,21 @@ def HasDQI : Predicate<"Subtarget->hasDQI()">,
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">,
AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">;
+def NoBWI : Predicate<"!Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
+def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
+def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
+def PKU : Predicate<"!Subtarget->hasPKU()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
+def HasFXSR : Predicate<"Subtarget->hasFXSR()">;
+def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">;
+def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">;
+def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">;
+def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">;
def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
def HasFMA : Predicate<"Subtarget->hasFMA()">;
def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
@@ -794,6 +803,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasMPX : Predicate<"Subtarget->hasMPX()">;
@@ -812,6 +822,8 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">,
AssemblerPredicate<"Mode32Bit", "32-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
+ "Subtarget->getFrameLowering()->hasFP(*MF)">;
def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -825,6 +837,7 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
def OptForSize : Predicate<"OptForSize">;
+def OptForMinSize : Predicate<"OptForMinSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
@@ -867,20 +880,54 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{
}]>;
-def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
-def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
-def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
+def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
+def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
+
+// If we have multiple users of an immediate, it's much smaller to reuse
+// the register, rather than encode the immediate in every instruction.
+// This has the risk of increasing register pressure from stretched live
+// ranges, however, the immediates should be trivial to rematerialize by
+// the RA in the event of high register pressure.
+// TODO : This is currently enabled for stores and binary ops. There are more
+// cases for which this can be enabled, though this catches the bulk of the
+// issues.
+// TODO2 : This should really also be enabled under O2, but there's currently
+// an issue with RA where we don't pull the constants into their users
+// when we rematerialize them. I'll follow-up on enabling O2 after we fix that
+// issue.
+// TODO3 : This is currently limited to single basic blocks (DAG creation
+// pulls block immediates to the top and merges them if necessary).
+// Eventually, it would be nice to allow ConstantHoisting to merge constants
+// globally for potentially added savings.
+//
+def imm8_su : PatLeaf<(i8 imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm16_su : PatLeaf<(i16 imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm32_su : PatLeaf<(i32 imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
-def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
// unsigned field.
-def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
def i64immZExt32SExt8 : ImmLeaf<i64, [{
- return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+ return isUInt<32>(Imm) && isInt<8>(static_cast<int32_t>(Imm));
}]>;
// Helper fragments for loads.
@@ -914,11 +961,12 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
@@ -1020,12 +1068,8 @@ def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize16;
-def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
- IIC_PUSH_MEM>, OpSize16;
def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
-def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
- IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
"push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
@@ -1039,6 +1083,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
+ IIC_PUSH_MEM>, OpSize16;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
+ IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, mayStore, SchedRW
+
}
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
@@ -1071,9 +1123,11 @@ def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
+} // mayStore, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>;
-} // mayStore, SchedRW
+} // mayLoad, mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
@@ -1275,13 +1329,13 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
+ [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>;
def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
+ [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
+ [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1457,10 +1511,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
- [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
+ [(set EFLAGS, (X86sahf AH))], IIC_AHF>,
+ Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
- IIC_AHF>; // AH = flags
+ IIC_AHF>, // AH = flags
+ Requires<[HasLAHFSAHF]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1894,37 +1950,38 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
}
// Table lookup instructions
+let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
Sched<[WriteLoad]>;
let SchedRW = [WriteMicrocoded] in {
// ASCII Adjust After Addition
-// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
Requires<[Not64BitMode]>;
// ASCII Adjust AX Before Division
-// sets AL, AH and EFLAGS and uses AL and AH
+let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
"aad\t$src", [], IIC_AAD>, Requires<[Not64BitMode]>;
// ASCII Adjust AX After Multiply
-// sets AL, AH and EFLAGS and uses AL
+let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
"aam\t$src", [], IIC_AAM>, Requires<[Not64BitMode]>;
// ASCII Adjust AL After Subtraction - sets
-// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>,
Requires<[Not64BitMode]>;
// Decimal Adjust AL after Addition
-// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
Requires<[Not64BitMode]>;
// Decimal Adjust AL after Subtraction
-// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
Requires<[Not64BitMode]>;
} // SchedRW
@@ -2357,6 +2414,32 @@ defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>;
} // HasTBM, EFLAGS
//===----------------------------------------------------------------------===//
+// MONITORX/MWAITX Instructions
+//
+let SchedRW = [WriteSystem] in {
+let Uses = [EAX, ECX, EDX] in
+def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", [],
+ IIC_SSE_MONITOR>, TB;
+let Uses = [ECX, EAX, EBX] in
+def MWAITXrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", [], IIC_SSE_MWAIT>,
+ TB;
+} // SchedRW
+
+def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrr)>, Requires<[Not64BitMode]>;
+def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
+ Requires<[Not64BitMode]>;
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
+ Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// CLZERO Instruction
+//
+let Uses = [EAX] in
+def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB;
+
+//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
//===----------------------------------------------------------------------===//
@@ -2498,8 +2581,8 @@ def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>;
def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>;
def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"loopz", "loope", "att">;
-def : MnemonicAlias<"loopnz", "loopne", "att">;
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>;
@@ -2532,14 +2615,15 @@ def : MnemonicAlias<"pusha", "pushaw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"popa", "popal", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"pusha", "pushal", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"repe", "rep", "att">;
-def : MnemonicAlias<"repz", "rep", "att">;
-def : MnemonicAlias<"repnz", "repne", "att">;
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sal", "shl", "intel">;
def : MnemonicAlias<"salb", "shlb", "att">;
def : MnemonicAlias<"salw", "shlw", "att">;
def : MnemonicAlias<"sall", "shll", "att">;
@@ -2579,14 +2663,14 @@ def : MnemonicAlias<"fcmova", "fcmovnbe", "att">;
def : MnemonicAlias<"fcmovnae", "fcmovb", "att">;
def : MnemonicAlias<"fcmovna", "fcmovbe", "att">;
def : MnemonicAlias<"fcmovae", "fcmovnb", "att">;
-def : MnemonicAlias<"fcomip", "fcompi", "att">;
+def : MnemonicAlias<"fcomip", "fcompi">;
def : MnemonicAlias<"fildq", "fildll", "att">;
def : MnemonicAlias<"fistpq", "fistpll", "att">;
def : MnemonicAlias<"fisttpq", "fisttpll", "att">;
def : MnemonicAlias<"fldcww", "fldcw", "att">;
def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
-def : MnemonicAlias<"fucomip", "fucompi", "att">;
+def : MnemonicAlias<"fucomip", "fucompi">;
def : MnemonicAlias<"fwait", "wait">;
def : MnemonicAlias<"fxsaveq", "fxsave64", "att">;
@@ -2594,7 +2678,9 @@ def : MnemonicAlias<"fxrstorq", "fxrstor64", "att">;
def : MnemonicAlias<"xsaveq", "xsave64", "att">;
def : MnemonicAlias<"xrstorq", "xrstor64", "att">;
def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">;
-
+def : MnemonicAlias<"xrstorsq", "xrstors64", "att">;
+def : MnemonicAlias<"xsavecq", "xsavec64", "att">;
+def : MnemonicAlias<"xsavesq", "xsaves64", "att">;
class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
string VariantName>
@@ -2640,8 +2726,8 @@ defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">;
//===----------------------------------------------------------------------===//
// aad/aam default to base 10 if no operand is specified.
-def : InstAlias<"aad", (AAD8i8 10)>;
-def : InstAlias<"aam", (AAM8i8 10)>;
+def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>;
+def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
// Likewise for btc/btr/bts.
@@ -2719,8 +2805,10 @@ def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>;
// Various unary fpstack operations default to operating on on ST1.
// For example, "fxch" -> "fxch %st(1)"
def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
+def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
+def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
@@ -2798,20 +2886,20 @@ def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16Bit
// "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
// inb %dx -> inb %al, %dx
def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>;
def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>;
-def : InstAlias<"inb\t$port", (IN8ri i8imm:$port), 0>;
-def : InstAlias<"inw\t$port", (IN16ri i8imm:$port), 0>;
-def : InstAlias<"inl\t$port", (IN32ri i8imm:$port), 0>;
+def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>;
+def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>;
+def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
@@ -2861,9 +2949,9 @@ def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:
def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>;
def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>;
def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>;
-def : InstAlias<"outb\t$port", (OUT8ir i8imm:$port), 0>;
-def : InstAlias<"outw\t$port", (OUT16ir i8imm:$port), 0>;
-def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port), 0>;
+def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>;
+def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>;
+def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>;
// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
@@ -2940,3 +3028,34 @@ def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
(XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>;
def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>;
+
+// These aliases exist to get the parser to prioritize matching 8-bit
+// immediate encodings over matching the implicit ax/eax/rax encodings. By
+// explicitly mentioning the A register here, these entries will be ordered
+// first due to the more explicit immediate type.
+def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}", (OR16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>;
+
+def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}", (OR32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>;
+
+def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}", (OR64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index eaa7894..11dc1e7 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -249,6 +249,7 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
(MMX_X86movd2w (x86mmx VR64:$src)))],
IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
+let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (bitconvert GR64:$src))],
@@ -262,7 +263,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
-let SchedRW = [WriteMove] in {
+let SchedRW = [WriteMove], isBitcast = 1 in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -303,7 +304,7 @@ def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (bitconvert
- (i64 (vector_extract (v2i64 VR128:$src),
+ (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))))))],
IIC_MMX_MOVQ_RR>;
@@ -326,6 +327,7 @@ def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
}
} // SchedRW
+let Predicates = [HasSSE1] in
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
@@ -355,6 +357,7 @@ defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
MMX_INTALU_ITINS, 1>;
defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
MMX_INTALU_ITINS, 1>;
+let Predicates = [HasSSE2] in
defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
MMX_INTALUQ_ITINS, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
@@ -382,6 +385,7 @@ defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
MMX_INTALU_ITINS>;
defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
MMX_INTALU_ITINS>;
+let Predicates = [HasSSE2] in
defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
MMX_INTALUQ_ITINS>;
@@ -408,8 +412,10 @@ defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
MMX_PMUL_ITINS, 1>;
+let Predicates = [HasSSE1] in
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
MMX_PMUL_ITINS, 1>;
+let Predicates = [HasSSE2] in
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
MMX_PMUL_ITINS, 1>;
let isCommutable = 1 in
@@ -422,6 +428,7 @@ defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>;
+let Predicates = [HasSSE1] in {
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
MMX_MISC_FUNC_ITINS, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
@@ -439,6 +446,7 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
MMX_PSADBW_ITINS, 1>;
+}
defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
MMX_MISC_FUNC_ITINS>;
@@ -594,6 +602,7 @@ let Constraints = "$src1 = $dst" in {
}
// Extract / Insert
+let Predicates = [HasSSE1] in
def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -601,6 +610,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
imm:$src2))],
IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
+let Predicates = [HasSSE1] in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
(ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
@@ -618,8 +628,10 @@ let Constraints = "$src1 = $dst" in {
imm:$src3))],
IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
+}
// Mask creation
+let Predicates = [HasSSE1] in
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -639,12 +651,12 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
// Misc.
let SchedRW = [WriteShuffle] in {
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [HasSSE1,In32BitMode] in
def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
IIC_MMX_MASKMOV>;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [HasSSE1,In64BitMode] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
@@ -653,10 +665,6 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
// 64-bit bit convert.
let Predicates = [HasSSE2] in {
-def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 99386b0..7a44212 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -330,9 +330,9 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
//===----------------------------------------------------------------------===//
// A vector extract of the first f32/f64 position is a subregister copy
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// A 128-bit subvector extract from the first 256-bit vector position
@@ -413,6 +413,8 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>;
+ def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>;
}
// Bitcasts between 256-bit vector types. Return the original type since
@@ -650,10 +652,10 @@ let Predicates = [UseAVX] in {
}
// Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
- def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
@@ -736,7 +738,7 @@ let Predicates = [UseSSE1] in {
}
// Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
@@ -770,7 +772,7 @@ let Predicates = [UseSSE2] in {
}
// Extract and store.
- def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
@@ -935,22 +937,6 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
}
-let Predicates = [HasAVX] in {
-def : Pat<(v8i32 (X86vzmovl
- (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v4i64 (X86vzmovl
- (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v8f32 (X86vzmovl
- (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v4f64 (X86vzmovl
- (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-}
-
-
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
@@ -1172,12 +1158,13 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
string base_opc, InstrItinClass itin> {
- defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ let Predicates = [UseAVX] in
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
itin>, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $dst|$dst, $src2}",
itin>;
}
@@ -1188,29 +1175,31 @@ let AddedComplexity = 20 in {
}
let SchedRW = [WriteStore] in {
+let Predicates = [UseAVX] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>, VEX;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
+ [(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>, VEX;
+}// UseAVX
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
+ [(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
} // SchedRW
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// Shuffle with VMOVLPS
def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
(VMOVLPSrm VR128:$src1, addr:$src2)>;
@@ -1243,7 +1232,7 @@ let Predicates = [HasAVX] in {
let Predicates = [UseSSE1] in {
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
- def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+ def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
@@ -1297,31 +1286,33 @@ let AddedComplexity = 20 in {
let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
+let Predicates = [UseAVX] in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+} // UseAVX
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
} // SchedRW
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// VMOVHPS patterns
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
@@ -1345,7 +1336,7 @@ let Predicates = [HasAVX] in {
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(store (f64 (vector_extract
+ def : Pat<(store (f64 (extractelt
(v2f64 (X86VPermilpi VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
@@ -1377,7 +1368,7 @@ let Predicates = [UseSSE2] in {
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(store (f64 (vector_extract
+ def : Pat<(store (f64 (extractelt
(v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
@@ -2073,14 +2064,16 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
(VCVTDQ2PSrm addr:$src)>;
+}
- def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
@@ -2149,7 +2142,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
@@ -2306,7 +2299,9 @@ let Predicates = [HasAVX] in {
(VCVTDQ2PSYrr VR256:$src)>;
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
+}
+let Predicates = [HasAVX, NoVLX] in {
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;
@@ -2452,9 +2447,9 @@ let Defs = [EFLAGS] in {
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
"ucomisd">, PD, VEX, VEX_LIG;
let Pattern = []<dag> in {
- defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss">, PS, VEX, VEX_LIG;
- defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd">, PD, VEX, VEX_LIG;
}
@@ -2475,9 +2470,9 @@ let Defs = [EFLAGS] in {
"ucomisd">, PD;
let Pattern = []<dag> in {
- defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss">, PS;
- defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd">, PD;
}
@@ -2605,19 +2600,20 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
Sched<[WriteFShuffle]>;
}
-defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+let Predicates = [HasAVX, NoVLX] in {
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f32, SSEPackedSingle>, PS, VEX_4V;
-defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
-defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv2f64, SSEPackedDouble>, PD, VEX_4V;
-defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -2627,7 +2623,7 @@ let Constraints = "$src1 = $dst" in {
memopv2f64, SSEPackedDouble>, PD;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (X86Shufp VR128:$src1,
(bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
@@ -2694,6 +2690,7 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
}
+let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, PS, VEX_4V;
@@ -2719,7 +2716,7 @@ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
@@ -2845,8 +2842,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
ValueType OpVT128, ValueType OpVT256,
- OpndItins itins, bit IsCommutable = 0> {
-let Predicates = [HasAVX, NoVLX] in
+ OpndItins itins, bit IsCommutable = 0, Predicate prd> {
+let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
@@ -2854,7 +2851,7 @@ let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
memopv2i64, i128mem, itins, IsCommutable, 1>;
-let Predicates = [HasAVX2, NoVLX] in
+let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
@@ -2863,13 +2860,13 @@ let Predicates = [HasAVX2, NoVLX] in
// These are ordered here for pattern ordering requirements with the fp versions
defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 0>;
+ SSE_VEC_BIT_ITINS_P, 0, NoVLX>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@@ -2911,7 +2908,7 @@ let isCodeGenOnly = 1 in {
// Multiclass for vectors using the X86 logical operation aliases for FP.
multiclass sse12_fp_packed_vector_logical_alias<
bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
- let Predicates = [HasAVX, NoVLX] in {
+ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
PS, VEX_4V;
@@ -2923,7 +2920,7 @@ multiclass sse12_fp_packed_vector_logical_alias<
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
PS, VEX_4V, VEX_L;
-
+
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
PD, VEX_4V, VEX_L;
@@ -3183,7 +3180,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE1] in {
// extracted scalar math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))))),
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3198,7 +3195,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE41] in {
// extracted scalar math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (i8 1))),
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3215,7 +3212,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [HasAVX] in {
// extracted scalar math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3241,7 +3238,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE2] in {
// extracted scalar math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3256,7 +3253,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE41] in {
// extracted scalar math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))), (i8 1))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3271,14 +3268,14 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [HasAVX] in {
// extracted scalar math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
// extracted scalar math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3449,8 +3446,8 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
/// sse1_fp_unop_p - SSE1 unops in packed form.
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
-let Predicates = [HasAVX] in {
+ OpndItins itins, list<Predicate> prds> {
+let Predicates = prds in {
def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
@@ -3546,16 +3543,16 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>,
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>;
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >;
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>;
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -4018,39 +4015,43 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
} // ExeDomain = SSEPackedInt
defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
- SSE_INTALUQ_ITINS_P, 1>;
+ SSE_INTALUQ_ITINS_P, 1, NoVLX>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
- SSE_INTALUQ_ITINS_P, 0>;
+ SSE_INTALUQ_ITINS_P, 0, NoVLX>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
+defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
+defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
// Intrinsic forms
defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
@@ -4067,26 +4068,18 @@ defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
-defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
- int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
-defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
- int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
-defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- int_x86_avx2_psad_bw, SSE_PMADD, 1>;
-
-let Predicates = [HasAVX2] in
- def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1),
- (v32i8 VR256:$src2))),
- (VPSADBWYrr VR256:$src2, VR256:$src1)>;
let Predicates = [HasAVX] in
- def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
- (v16i8 VR128:$src2))),
- (VPSADBWrr VR128:$src2, VR128:$src1)>;
-
-def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
- (v16i8 VR128:$src2))),
- (PSADBWrr VR128:$src2, VR128:$src1)>;
+defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
+ loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ VEX_4V;
+let Predicates = [HasAVX2] in
+defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
+ loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
+ memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>;
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
@@ -4105,9 +4098,6 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
//===---------------------------------------------------------------------===//
let Predicates = [HasAVX, NoVLX] in {
-defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
- VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4115,9 +4105,6 @@ defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
- VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4125,14 +4112,26 @@ defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
- VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX, NoVLX]
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
+
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] ,
+ Predicates = [HasAVX, NoVLX_Or_NoBWI]in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
@@ -4147,13 +4146,9 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
(v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX]
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
let Predicates = [HasAVX2, NoVLX] in {
-defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
- VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -4161,9 +4156,6 @@ defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
- VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -4171,14 +4163,25 @@ defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
- VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2, NoVLX]
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2, NoVLX_Or_NoBWI]
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 ,
+ Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
@@ -4193,8 +4196,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
(v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
VEX_4V, VEX_L;
// PSRADQYri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX2]
+} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
@@ -4247,17 +4249,17 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
//===---------------------------------------------------------------------===//
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
@@ -4511,40 +4513,43 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in {
+
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
bc_v16i8, loadv2i64, 0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
bc_v8i16, loadv2i64, 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
- bc_v4i32, loadv2i64, 0>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
- bc_v2i64, loadv2i64, 0>, VEX_4V;
-
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
bc_v16i8, loadv2i64, 0>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
bc_v8i16, loadv2i64, 0>, VEX_4V;
+}
+let Predicates = [HasAVX, NoVLX] in {
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
+ bc_v4i32, loadv2i64, 0>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
+ bc_v2i64, loadv2i64, 0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
bc_v4i32, loadv2i64, 0>, VEX_4V;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
bc_v2i64, loadv2i64, 0>, VEX_4V;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
bc_v32i8>, VEX_4V, VEX_L;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
bc_v16i16>, VEX_4V, VEX_L;
- defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
- bc_v8i32>, VEX_4V, VEX_L;
- defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
- bc_v4i64>, VEX_4V, VEX_L;
-
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
bc_v32i8>, VEX_4V, VEX_L;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
bc_v16i16>, VEX_4V, VEX_L;
+}
+let Predicates = [HasAVX2, NoVLX] in {
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
+ bc_v8i32>, VEX_4V, VEX_L;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
+ bc_v4i64>, VEX_4V, VEX_L;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
bc_v8i32>, VEX_4V, VEX_L;
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
@@ -4600,7 +4605,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
}
// Extract
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4615,7 +4620,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V;
let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
@@ -4683,7 +4688,7 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
} // ExeDomain = SSEPackedInt
//===---------------------------------------------------------------------===//
-// SSE2 - Move Doubleword
+// SSE2 - Move Doubleword/Quadword
//===---------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
@@ -4770,23 +4775,23 @@ let isCodeGenOnly = 1 in {
//
def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
Sched<[WriteMove]>;
def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (vector_extract (v4i32 VR128:$src),
+ [(store (i32 (extractelt (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
VEX, Sched<[WriteStore]>;
def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Sched<[WriteMove]>;
def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (vector_extract (v4i32 VR128:$src),
+ [(store (i32 (extractelt (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
@@ -4808,24 +4813,25 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))],
+ [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
+ (iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
VEX;
def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
} //SchedRW
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs i64mem:$dst),
- (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs i64mem:$dst), (ins VR128:$src),
+def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
@@ -4883,30 +4889,18 @@ let isCodeGenOnly = 1 in {
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
}
-//===---------------------------------------------------------------------===//
-// Patterns and instructions to describe movd/movq to XMM register zero-extends
-//
-let isCodeGenOnly = 1, SchedRW = [WriteMove] in {
-let AddedComplexity = 15 in {
-def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}", // X86-64 only
- [(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))],
- IIC_SSE_MOVDQ>,
- VEX, VEX_W;
-def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
- [(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))],
- IIC_SSE_MOVDQ>;
-}
-} // isCodeGenOnly, SchedRW
-
let Predicates = [UseAVX] in {
- let AddedComplexity = 15 in
+ let AddedComplexity = 15 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIrr GR32:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (VMOV64toPQIrr GR64:$src)>;
+
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>;
+ }
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
@@ -4924,16 +4918,16 @@ let Predicates = [UseAVX] in {
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 15 in
+ let AddedComplexity = 15 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(MOVDI2PDIrr GR32:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (MOV64toPQIrr GR64:$src)>;
+ }
let AddedComplexity = 20 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
@@ -4985,12 +4979,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
+ [(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, VEX;
def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
+ [(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>;
} // ExeDomain, SchedRW
@@ -5119,7 +5113,7 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
v4f32, VR128, loadv4f32, f128mem>, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
@@ -5134,7 +5128,7 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
memopv4f32, f128mem>;
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -5190,21 +5184,30 @@ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (v4f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>,
+ (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
Sched<[WriteLoad]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
-let Predicates = [HasAVX] in {
+
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+ // 256-bit version
+ def : Pat<(X86Movddup (loadv4i64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+}
+
+let Predicates = [HasAVX] in {
def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
@@ -5212,16 +5215,6 @@ let Predicates = [HasAVX] in {
def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-
- // 256-bit version
- def : Pat<(X86Movddup (loadv4f64 addr:$src)),
- (VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (loadv4i64 addr:$src)),
- (VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
- (VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (v4i64 VR256:$src)),
- (VMOVDDUPYrr VR256:$src)>;
}
let Predicates = [UseAVX, OptForSize] in {
@@ -5791,37 +5784,37 @@ let Predicates = [HasAVX2] in
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palignr<"palignr">;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -6145,7 +6138,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
imm:$src2)))), addr:$dst)]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -6170,7 +6163,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
imm:$src2)))), addr:$dst)]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -6194,7 +6187,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
@@ -6217,7 +6210,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, REX_W;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
@@ -6285,7 +6278,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
@@ -6311,7 +6304,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -6337,7 +6330,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -6543,71 +6536,71 @@ let Predicates = [HasAVX] in {
let Predicates = [UseAVX] in {
def : Pat<(ffloor FR32:$src),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
def : Pat<(f64 (ffloor FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
def : Pat<(f32 (fnearbyint FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
def : Pat<(f64 (fnearbyint FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
def : Pat<(f32 (fceil FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
def : Pat<(f64 (fceil FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
def : Pat<(f32 (frint FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
def : Pat<(f64 (frint FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
def : Pat<(f32 (ftrunc FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
def : Pat<(f64 (ftrunc FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
}
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x1))>;
+ (VROUNDPSr VR128:$src, (i32 0x9))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0xC))>;
def : Pat<(v4f32 (fceil VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x2))>;
+ (VROUNDPSr VR128:$src, (i32 0xA))>;
def : Pat<(v4f32 (frint VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x4))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x3))>;
+ (VROUNDPSr VR128:$src, (i32 0xB))>;
def : Pat<(v2f64 (ffloor VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x1))>;
+ (VROUNDPDr VR128:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0xC))>;
def : Pat<(v2f64 (fceil VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x2))>;
+ (VROUNDPDr VR128:$src, (i32 0xA))>;
def : Pat<(v2f64 (frint VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x3))>;
+ (VROUNDPDr VR128:$src, (i32 0xB))>;
def : Pat<(v8f32 (ffloor VR256:$src)),
- (VROUNDYPSr VR256:$src, (i32 0x1))>;
+ (VROUNDYPSr VR256:$src, (i32 0x9))>;
def : Pat<(v8f32 (fnearbyint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0xC))>;
def : Pat<(v8f32 (fceil VR256:$src)),
- (VROUNDYPSr VR256:$src, (i32 0x2))>;
+ (VROUNDYPSr VR256:$src, (i32 0xA))>;
def : Pat<(v8f32 (frint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x4))>;
def : Pat<(v8f32 (ftrunc VR256:$src)),
- (VROUNDYPSr VR256:$src, (i32 0x3))>;
+ (VROUNDYPSr VR256:$src, (i32 0xB))>;
def : Pat<(v4f64 (ffloor VR256:$src)),
- (VROUNDYPDr VR256:$src, (i32 0x1))>;
+ (VROUNDYPDr VR256:$src, (i32 0x9))>;
def : Pat<(v4f64 (fnearbyint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0xC))>;
def : Pat<(v4f64 (fceil VR256:$src)),
- (VROUNDYPDr VR256:$src, (i32 0x2))>;
+ (VROUNDYPDr VR256:$src, (i32 0xA))>;
def : Pat<(v4f64 (frint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x4))>;
def : Pat<(v4f64 (ftrunc VR256:$src)),
- (VROUNDYPDr VR256:$src, (i32 0x3))>;
+ (VROUNDYPDr VR256:$src, (i32 0xB))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6619,47 +6612,47 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
let Predicates = [UseSSE41] in {
def : Pat<(ffloor FR32:$src),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
def : Pat<(f32 (fnearbyint FR32:$src)),
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
def : Pat<(f64 (fnearbyint FR64:$src)),
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
def : Pat<(f32 (fceil FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
def : Pat<(f32 (frint FR32:$src)),
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
def : Pat<(f64 (frint FR64:$src)),
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
def : Pat<(f32 (ftrunc FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
def : Pat<(f64 (ftrunc FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
def : Pat<(v4f32 (ffloor VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x1))>;
+ (ROUNDPSr VR128:$src, (i32 0x9))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0xC))>;
def : Pat<(v4f32 (fceil VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x2))>;
+ (ROUNDPSr VR128:$src, (i32 0xA))>;
def : Pat<(v4f32 (frint VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x4))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x3))>;
+ (ROUNDPSr VR128:$src, (i32 0xB))>;
def : Pat<(v2f64 (ffloor VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x1))>;
+ (ROUNDPDr VR128:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0xC))>;
def : Pat<(v2f64 (fceil VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x2))>;
+ (ROUNDPDr VR128:$src, (i32 0xA))>;
def : Pat<(v2f64 (frint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x3))>;
+ (ROUNDPDr VR128:$src, (i32 0xB))>;
}
//===----------------------------------------------------------------------===//
@@ -7815,13 +7808,7 @@ def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
// VBROADCAST - Load from memory and broadcast to all elements of the
// destination operand
//
-class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> :
- AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
-
-class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
PatFrag ld_frag, SchedWrite Sched> :
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
@@ -7832,38 +7819,33 @@ class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
// AVX2 adds register forms
-class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
- Intrinsic Int, SchedWrite Sched> :
+class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
+ [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
+ Sched<[Sched]>, VEX;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
+ def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
f32mem, v4f32, loadf32, WriteLoad>;
- def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
+ def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
f32mem, v8f32, loadf32,
WriteFShuffleLd>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
+def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
v4f64, loadf64, WriteFShuffleLd>, VEX_L;
-def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
- int_x86_avx_vbroadcastf128_pd_256,
- WriteFShuffleLd>, VEX_L;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
- int_x86_avx2_vbroadcast_ss_ps,
- WriteFShuffle>;
- def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
- int_x86_avx2_vbroadcast_ss_ps_256,
- WriteFShuffle256>, VEX_L;
+ def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
+ v4f32, v4f32, WriteFShuffle>;
+ def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
+ v8f32, v4f32, WriteFShuffle256>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256,
- WriteFShuffle256>, VEX_L;
+def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
+ v4f64, v2f64, WriteFShuffle256>, VEX_L;
let mayLoad = 1, Predicates = [HasAVX2] in
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
@@ -7871,6 +7853,13 @@ def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteLoad]>, VEX, VEX_L;
+def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
+ (ins f128mem:$src),
+ "vbroadcastf128\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>,
+ Sched<[WriteFShuffleLd]>, VEX, VEX_L;
+
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
@@ -7891,7 +7880,7 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
(iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
@@ -8080,17 +8069,19 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(bitconvert (i_frag addr:$src2))))]>, VEX_4V,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
- def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ let Predicates = [HasAVX, NoVLX] in {
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffle]>;
- def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffleLd]>;
+ }// Predicates = [HasAVX, NoVLX]
}
let ExeDomain = SSEPackedSingle in {
@@ -8106,7 +8097,7 @@ let ExeDomain = SSEPackedDouble in {
loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
(VPERMILPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
@@ -8245,11 +8236,11 @@ let Predicates = [HasF16C] in {
def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(store (f64 (vector_extract (bc_v2f64 (v8i16
+ def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
addr:$dst),
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (i64 (vector_extract (bc_v2i64 (v8i16
+ def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
addr:$dst),
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
@@ -8309,97 +8300,62 @@ defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
//
multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
- Intrinsic Int128, Intrinsic Int256> {
- def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ ValueType OpVT128, ValueType OpVT256, Predicate prd> {
+ let Predicates = [HasAVX2, prd] in {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int128 VR128:$src))]>,
+ [(set VR128:$dst,
+ (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
Sched<[WriteShuffle]>, VEX;
- def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (Int128 (scalar_to_vector (ld_frag addr:$src))))]>,
+ (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
Sched<[WriteLoad]>, VEX;
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int256 VR128:$src))]>,
+ [(set VR256:$dst,
+ (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
+ (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
Sched<[WriteLoad]>, VEX, VEX_L;
+
+ // Provide aliases for broadcast from the same register class that
+ // automatically does the extract.
+ def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
+ (!cast<Instruction>(NAME#"Yrr")
+ (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
+ }
}
defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
- int_x86_avx2_pbroadcastb_128,
- int_x86_avx2_pbroadcastb_256>;
+ v16i8, v32i8, NoVLX_Or_NoBWI>;
defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
- int_x86_avx2_pbroadcastw_128,
- int_x86_avx2_pbroadcastw_256>;
+ v8i16, v16i16, NoVLX_Or_NoBWI>;
defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
- int_x86_avx2_pbroadcastd_128,
- int_x86_avx2_pbroadcastd_256>;
+ v4i32, v8i32, NoVLX>;
defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
- int_x86_avx2_pbroadcastq_128,
- int_x86_avx2_pbroadcastq_256>;
+ v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2] in {
- def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
- (VPBROADCASTBrm addr:$src)>;
- def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
- (VPBROADCASTBYrm addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWYrm addr:$src)>;
- def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDrm addr:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDYrm addr:$src)>;
- def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
- (VPBROADCASTQrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VPBROADCASTQYrm addr:$src)>;
-
- def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
- (VPBROADCASTBrr VR128:$src)>;
- def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
- (VPBROADCASTBYrr VR128:$src)>;
- def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
- (VPBROADCASTWrr VR128:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
- (VPBROADCASTWYrr VR128:$src)>;
- def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
- (VPBROADCASTDrr VR128:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
- (VPBROADCASTDYrr VR128:$src)>;
- def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
- (VPBROADCASTQrr VR128:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
- (VPBROADCASTQYrr VR128:$src)>;
- def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
- (VBROADCASTSSrr VR128:$src)>;
- def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
- (VBROADCASTSSYrr VR128:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
- (VPBROADCASTQrr VR128:$src)>;
- def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
- (VBROADCASTSDYrr VR128:$src)>;
+ // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+ // This means we'll encounter truncated i32 loads; match that here.
+ def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWYrm addr:$src)>;
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
- def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
- (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
- (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
- (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
- (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
- sub_xmm)))>;
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
(VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
sub_xmm)))>;
@@ -8598,7 +8554,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
[]>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
@@ -8722,16 +8678,16 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256>, VEX_W;
-def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
(VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
-def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
(VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
-def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
(VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
-def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
(VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
@@ -8776,10 +8732,10 @@ def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0)
(VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
VR128:$mask)>;
-def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
(VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
-def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
(VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
@@ -8804,10 +8760,10 @@ def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0)
(VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
-def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
(VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
-def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
(VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
@@ -8865,12 +8821,13 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>;
}
-defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
-defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
-defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
-defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
-defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
+let Predicates = [HasAVX2, NoVLX] in {
+ defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+ defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+}
//===----------------------------------------------------------------------===//
// VGATHER - GATHER Operations
multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
@@ -8905,3 +8862,59 @@ let mayLoad = 1, Constraints
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
}
}
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for FR128, f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+def : Pat<(store (f128 FR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
+
+def : Pat<(loadf128 addr:$src),
+ (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
+
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
+ (COPY_TO_REGCLASS
+ (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+ FR128)>;
+
+def : Pat<(X86fand FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(and FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
+ (COPY_TO_REGCLASS
+ (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+ FR128)>;
+
+def : Pat<(X86for FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(or FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
+ (COPY_TO_REGCLASS
+ (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+ FR128)>;
+
+def : Pat<(X86fxor FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(xor FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index caecf70..c1df978 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -31,21 +31,21 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>;
} // Uses = [CL]
-def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>,
OpSize16;
-def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"shl{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>,
OpSize32;
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"shl{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -85,19 +85,19 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t{%cl, $dst|$dst, cl}",
[(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
}
-def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
"shl{b}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, u8imm:$src),
"shl{w}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, u8imm:$src),
"shl{l}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, u8imm:$src),
"shl{q}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -137,18 +137,18 @@ def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>;
}
-def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$src2),
"shr{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"shr{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"shr{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$src2),
"shr{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>;
@@ -185,19 +185,19 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t{%cl, $dst|$dst, cl}",
[(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
}
-def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
"shr{b}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, u8imm:$src),
"shr{w}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, u8imm:$src),
"shr{l}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, u8imm:$src),
"shr{q}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -241,20 +241,20 @@ def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
IIC_SR>;
}
-def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"sar{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))],
IIC_SR>;
-def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"sar{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"sar{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"sar{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -298,19 +298,19 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
[(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
}
-def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
"sar{b}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, u8imm:$src),
"sar{w}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, u8imm:$src),
"sar{l}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, u8imm:$src),
"sar{q}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -342,7 +342,7 @@ let hasSideEffects = 0 in {
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
@@ -350,7 +350,7 @@ def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
let Uses = [CL] in
def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
@@ -358,7 +358,7 @@ def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
let Uses = [CL] in
def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
@@ -367,7 +367,7 @@ def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
"rcl{q}\t$dst", [], IIC_SR>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
@@ -376,7 +376,7 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t$dst", [], IIC_SR>;
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
@@ -384,7 +384,7 @@ def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
let Uses = [CL] in
def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
@@ -392,7 +392,7 @@ def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
let Uses = [CL] in
def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
@@ -400,7 +400,7 @@ def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
"rcr{q}\t$dst", [], IIC_SR>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
@@ -411,36 +411,36 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
let SchedRW = [WriteShiftLd, WriteRMW] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", [], IIC_SR>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, u8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
"rcl{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, u8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
"rcl{q}\t$dst", [], IIC_SR>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
"rcr{b}\t$dst", [], IIC_SR>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, u8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, u8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, u8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t$dst", [], IIC_SR>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in {
@@ -482,19 +482,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>;
}
-def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"rol{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"rol{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"rol{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -537,19 +537,19 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
[(store (rotl (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
}
-def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
"rol{b}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>;
-def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, u8imm:$src1),
"rol{w}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>, OpSize16;
-def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, u8imm:$src1),
"rol{l}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>, OpSize32;
-def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, u8imm:$src1),
"rol{q}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>;
@@ -589,19 +589,19 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>;
}
-def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"ror{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"ror{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"ror{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -644,19 +644,19 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
[(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
}
-def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
"ror{b}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, u8imm:$src),
"ror{w}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, u8imm:$src),
"ror{l}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
"ror{q}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -727,42 +727,42 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
let isCommutable = 1 in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ (ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
(outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ (ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
(outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ (ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
(outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ (ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
(outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ (ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ (ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
@@ -801,14 +801,14 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
}
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD16_MEM_IM>,
TB, OpSize16;
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)],
@@ -816,14 +816,14 @@ def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
TB, OpSize16;
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi32 addr:$dst), GR32:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD32_MEM_IM>,
TB, OpSize32;
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
(i8 imm:$src3)), addr:$dst)],
@@ -831,14 +831,14 @@ def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
TB, OpSize32;
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi64 addr:$dst), GR64:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD64_MEM_IM>,
TB;
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
(i8 imm:$src3)), addr:$dst)],
@@ -860,12 +860,12 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let hasSideEffects = 0 in {
- def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
+ def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
+ (ins x86memop:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShiftLd]>;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 0350566..85e17f5 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -44,7 +44,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
let SchedRW = [WriteSystem] in {
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)], IIC_INT>;
@@ -60,12 +60,6 @@ def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [],
IIC_SYS_ENTER_EXIT>, TB;
def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", [],
IIC_SYS_ENTER_EXIT>, TB, Requires<[In64BitMode]>;
-
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize16;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>,
- OpSize32;
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
- Requires<[In64BitMode]>;
} // SchedRW
def : Pat<(debugtrap),
@@ -88,13 +82,13 @@ def IN32rr : I<0xED, RawFrm, (outs), (ins),
"in{l}\t{%dx, %eax|eax, dx}", [], IIC_IN_RR>, OpSize32;
let Defs = [AL] in
-def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
"in{b}\t{$port, %al|al, $port}", [], IIC_IN_RI>;
let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{w}\t{$port, %ax|ax, $port}", [], IIC_IN_RI>, OpSize16;
let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{l}\t{$port, %eax|eax, $port}", [], IIC_IN_RI>, OpSize32;
let Uses = [DX, AL] in
@@ -108,13 +102,13 @@ def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
"out{l}\t{%eax, %dx|dx, eax}", [], IIC_OUT_RR>, OpSize32;
let Uses = [AL] in
-def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
"out{b}\t{%al, $port|$port, al}", [], IIC_OUT_IR>;
let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{w}\t{%ax, $port|$port, ax}", [], IIC_OUT_IR>, OpSize16;
let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{l}\t{%eax, $port|$port, eax}", [], IIC_OUT_IR>, OpSize32;
} // SchedRW
@@ -478,39 +472,60 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
+let Predicates = [HasXSAVE] in {
let Defs = [EDX, EAX], Uses = [ECX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+}
-let Uses = [RDX, RAX] in {
- def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsave\t$dst", []>, TB;
- def XSAVE64 : RI<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsave64\t$dst", []>, TB, Requires<[In64BitMode]>;
+let Uses = [EDX, EAX] in {
+let Predicates = [HasXSAVE] in {
+ def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsave\t$dst",
+ [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB;
+ def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsave64\t$dst",
+ [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
- "xrstor\t$dst", []>, TB;
+ "xrstor\t$dst",
+ [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB;
def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
- "xrstor64\t$dst", []>, TB, Requires<[In64BitMode]>;
- def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
- "xsaveopt\t$dst", []>, PS;
- def XSAVEOPT64 : RI<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
- "xsaveopt64\t$dst", []>, PS, Requires<[In64BitMode]>;
-
+ "xrstor64\t$dst",
+ [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVEOPT] in {
+ def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
+ "xsaveopt\t$dst",
+ [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB;
+ def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
+ "xsaveopt64\t$dst",
+ [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVEC] in {
+ def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsavec\t$dst",
+ [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB;
+ def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsavec64\t$dst",
+ [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVES] in {
+ def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xsaves\t$dst",
+ [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB;
+ def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xsaves64\t$dst",
+ [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
- "xrstors\t$dst", []>, TB;
+ "xrstors\t$dst",
+ [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB;
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
- "xrstors64\t$dst", []>, TB, Requires<[In64BitMode]>;
- def XSAVEC : I<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsavec\t$dst", []>, TB;
- def XSAVEC64 : RI<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsavec64\t$dst", []>, TB, Requires<[In64BitMode]>;
- def XSAVES : I<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
- "xsaves\t$dst", []>, TB;
- def XSAVES64 : RI<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
- "xsaves64\t$dst", []>, TB, Requires<[In64BitMode]>;
+ "xrstors64\t$dst",
+ [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
}
+} // Uses
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -534,6 +549,12 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
+//==-----------------------------------------------------------------------===//
+// PKU - enable protection key
+let Defs = [EAX, EDX], Uses = [ECX] in
+ def RDPKRU : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+let Uses = [EAX, ECX, EDX] in
+ def WRPKRU : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
//===----------------------------------------------------------------------===//
// FS/GS Base Instructions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index 8455b8d..4cb2304 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -83,57 +83,64 @@ let ExeDomain = SSEPackedDouble in {
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>;
}
-multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, XOP_4VOp3;
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
+ XOP_4VOp3, Sched<[WriteVarVecShift]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2))))]>,
- XOP_4V, VEX_W;
+ (vt128 (OpNode (vt128 VR128:$src1),
+ (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
+ XOP_4V, VEX_W, Sched<[WriteVarVecShift, ReadAfterLd]>;
def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int (bitconvert (loadv2i64 addr:$src1)), VR128:$src2))]>,
- XOP_4VOp3;
+ (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+ (vt128 VR128:$src2))))]>,
+ XOP_4VOp3, Sched<[WriteVarVecShift, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
- defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
- defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
- defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
- defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
- defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
- defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
- defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
- defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
- defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
- defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
- defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
- defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+ defm VPROTB : xop3op<0x90, "vprotb", X86vprot, v16i8>;
+ defm VPROTD : xop3op<0x92, "vprotd", X86vprot, v4i32>;
+ defm VPROTQ : xop3op<0x93, "vprotq", X86vprot, v2i64>;
+ defm VPROTW : xop3op<0x91, "vprotw", X86vprot, v8i16>;
+ defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8>;
+ defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32>;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64>;
+ defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16>;
+ defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8>;
+ defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32>;
+ defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64>;
+ defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16>;
}
-multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, i8imm:$src2),
+ (ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, imm:$src2))]>, XOP;
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, XOP;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, i8imm:$src2),
+ (ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int (bitconvert (loadv2i64 addr:$src1)), imm:$src2))]>, XOP;
+ (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>, XOP;
}
let ExeDomain = SSEPackedInt in {
- defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
- defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
- defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
- defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
+ defm VPROTB : xop3opimm<0xC0, "vprotb", X86vproti, v16i8>;
+ defm VPROTD : xop3opimm<0xC2, "vprotd", X86vproti, v4i32>;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vproti, v2i64>;
+ defm VPROTW : xop3opimm<0xC1, "vprotw", X86vproti, v8i16>;
}
// Instruction where second source can be memory, but third must be register
@@ -170,30 +177,34 @@ let ExeDomain = SSEPackedInt in {
}
// Instruction where second source can be memory, third must be imm8
-multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
+multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> {
let isCommutable = 1 in
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, i8immZExt3:$cc))]>,
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+ i8immZExt3:$cc)))]>,
XOP_4V;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- i8immZExt3:$cc))]>, XOP_4V;
+ (vt128 (OpNode (vt128 VR128:$src1),
+ (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ i8immZExt3:$cc)))]>,
+ XOP_4V;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V;
let mayLoad = 1 in
def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V;
@@ -201,14 +212,14 @@ multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
}
let ExeDomain = SSEPackedInt in { // SSE integer instructions
- defm VPCOMB : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>;
- defm VPCOMW : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>;
- defm VPCOMD : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>;
- defm VPCOMQ : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>;
- defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>;
- defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>;
- defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>;
- defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>;
+ defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8>;
+ defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16>;
+ defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32>;
+ defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64>;
+ defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8>;
+ defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16>;
+ defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32>;
+ defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>;
}
// Instruction where either second or third source can be memory
@@ -270,42 +281,52 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
let ExeDomain = SSEPackedInt in
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+let Predicates = [HasXOP] in {
+ def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+ def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+}
+
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
(Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
(Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
VEX_W, MemOp4;
def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
(Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
VEX_W, MemOp4, VEX_L;
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2c8b95b..dc6d85d 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -18,14 +18,19 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
- INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
- CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
- INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
- VPERM_3OP_MASKZ,
- INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
- EXPAND_FROM_MEM, BLEND
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
+ CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
+ INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
+ INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
+ FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+ VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
+ COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
+ TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
+ EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
+ TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
struct IntrinsicData {
@@ -138,6 +143,42 @@ static const IntrinsicData IntrinsicsWithChain[] = {
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -209,6 +250,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
@@ -241,6 +284,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
@@ -274,16 +318,56 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2b_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2b_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2b_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2d_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2d_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2d_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2q_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2q_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2q_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2w_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2w_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2w_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
-
+ X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
+ X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
+ X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
@@ -371,6 +455,50 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x8_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf64x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x8_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -388,6 +516,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
+ X86ISD::FSETCC),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
+ X86ISD::FSETCC),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -415,7 +547,184 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
-
+ X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTDQ2PD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0), // no rm
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, ISD::SINT_TO_FP), //er
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP_ROUND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP_ROUND, X86ISD::VFPROUND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_EXTEND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_EXTEND, X86ISD::VFPEXT),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_128, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTUDQ2PD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0), // no rm
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_128, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::DBPSADBW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::DBPSADBW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
@@ -452,6 +761,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
@@ -464,6 +781,62 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -472,10 +845,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
- X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
- X86ISD::FMAX_RND),
+ X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMAX, X86ISD::FMAX_RND),
+ X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMAX, X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
@@ -484,10 +857,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
- X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
- X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVDDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVDDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVDDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK,
+ X86ISD::MOVSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK,
+ X86ISD::MOVSS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSHDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSHDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movshdup_512, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSHDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movsldup_128, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSLDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movsldup_256, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSLDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSLDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
@@ -554,6 +949,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@@ -596,6 +997,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
@@ -644,6 +1057,114 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -700,6 +1221,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
@@ -728,16 +1255,98 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::RNDSCALE, 0),
+ X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::RNDSCALE, 0),
+ X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
@@ -750,6 +1359,38 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -758,6 +1399,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
X86ISD::FSQRT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FSQRT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FSQRT_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
@@ -782,9 +1427,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
- X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
@@ -821,7 +1511,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
@@ -852,54 +1541,56 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
@@ -910,7 +1601,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
@@ -959,14 +1661,59 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
+ X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
+ X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+ X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
@@ -1017,6 +1764,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
@@ -1024,6 +1773,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
@@ -1066,12 +1816,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
@@ -1105,7 +1849,31 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(ssse3_psign_b_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(ssse3_psign_d_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
- X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0)
+ X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshaq, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshaw, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshlb, INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+ X86_INTRINSIC_DATA(xop_vpshld, INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+ X86_INTRINSIC_DATA(xop_vpshlq, INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+ X86_INTRINSIC_DATA(xop_vpshlw, INTR_TYPE_2OP, X86ISD::VPSHL, 0)
};
/*
@@ -1128,6 +1896,102 @@ static void verifyIntrinsicTables() {
std::is_sorted(std::begin(IntrinsicsWithChain),
std::end(IntrinsicsWithChain)) &&
"Intrinsic data tables should be sorted by Intrinsic ID");
+ assert((std::adjacent_find(std::begin(IntrinsicsWithoutChain),
+ std::end(IntrinsicsWithoutChain)) ==
+ std::end(IntrinsicsWithoutChain)) &&
+ (std::adjacent_find(std::begin(IntrinsicsWithChain),
+ std::end(IntrinsicsWithChain)) ==
+ std::end(IntrinsicsWithChain)) &&
+ "Intrinsic data tables should have unique entries");
+}
+
+// X86 specific compare constants.
+// They must be kept in synch with avxintrin.h
+#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
+#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
+#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
+#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
+#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
+#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
+#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
+#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
+#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
+#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
+#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
+#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
+#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
+#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
+#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
+#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
+#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
+#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
+#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
+#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */
+#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
+#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
+#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
+#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */
+#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
+#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
+#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
+#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
+#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
+#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
+#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
+#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */
+
+/*
+* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
+* Return tuple <isOrdered, X86 condcode>
+*/
+static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
+ ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
+ unsigned IntImm = CImm->getZExtValue();
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (IntImm) {
+ default: llvm_unreachable("Invalid floating point compare value for Comi!");
+ case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling)
+ case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling)
+ return std::make_tuple(true, X86::COND_E);
+ case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling)
+ case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling)
+ return std::make_tuple(false , X86::COND_E);
+ case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling)
+ case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_B);
+ case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling)
+ case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
+ return std::make_tuple(false , X86::COND_B);
+ case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling)
+ case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_BE);
+ case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling)
+ case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling)
+ return std::make_tuple(false, X86::COND_BE);
+ case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling)
+ case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_A);
+ case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling)
+ case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
+ return std::make_tuple(false, X86::COND_A);
+ case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling)
+ case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_AE);
+ case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling)
+ case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling)
+ return std::make_tuple(false, X86::COND_AE);
+ case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling)
+ case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling)
+ return std::make_tuple(true, X86::COND_NE);
+ case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling)
+ case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling)
+ return std::make_tuple(false, X86::COND_NE);
+ }
}
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 3415ced..e186f70 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -92,7 +92,6 @@ namespace llvm {
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
CurrentShadowSize += Code.size();
if (CurrentShadowSize >= RequiredShadowSize)
InShadow = false; // The shadow is big enough. Stop counting.
@@ -128,7 +127,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = MF.getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
MCSymbol *Sym = nullptr;
@@ -151,7 +150,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
if (!Suffix.empty())
- Name += DL->getPrivateGlobalPrefix();
+ Name += DL.getPrivateGlobalPrefix();
unsigned PrefixLen = Name.size();
@@ -159,7 +158,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
const GlobalValue *GV = MO.getGlobal();
AsmPrinter.getNameWithPrefix(Name, GV);
} else if (MO.isSymbol()) {
- Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
} else if (MO.isMBB()) {
assert(Suffix.empty());
Sym = MO.getMBB()->getSymbol();
@@ -461,6 +460,7 @@ ReSimplify:
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
// if one of the registers is extended, but other isn't.
+ case X86::VMOVZPQILo2PQIrr:
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
case X86::VMOVAPSrr:
@@ -478,18 +478,19 @@ ReSimplify:
unsigned NewOpc;
switch (OutMI.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
- case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
- case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
- case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
- case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
- case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
- case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
- case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
- case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
- case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
- case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
- case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
}
OutMI.setOpcode(NewOpc);
}
@@ -532,6 +533,23 @@ ReSimplify:
break;
}
+ case X86::CLEANUPRET: {
+ // Replace CATCHRET with the appropriate RET.
+ OutMI = MCInst();
+ OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
+ break;
+ }
+
+ case X86::CATCHRET: {
+ // Replace CATCHRET with the appropriate RET.
+ const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
+ unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
+ OutMI = MCInst();
+ OutMI.setOpcode(getRetOpcode(Subtarget));
+ OutMI.addOperand(MCOperand::createReg(ReturnReg));
+ break;
+ }
+
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
case X86::TAILJMPr:
case X86::TAILJMPd:
@@ -598,17 +616,29 @@ ReSimplify:
case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
+ case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
+ case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
+ case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
+ case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
+ case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
+ case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
+ case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
+ case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
+ case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
+ case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
+ case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
+ case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
@@ -875,7 +905,10 @@ void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
MCInst LoadMI;
LoadMI.setOpcode(LoadOpcode);
- LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+
+ if (LoadDefRegister != X86::NoRegister)
+ LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+
for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
E = MI.operands_end();
I != E; ++I)
@@ -1062,6 +1095,18 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86ATTInstPrinter::getRegisterName(Reg));
break;
}
+ case X86::CLEANUPRET: {
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("CLEANUPRET");
+ break;
+ }
+
+ case X86::CATCHRET: {
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("CATCHRET");
+ break;
+ }
+
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
@@ -1095,12 +1140,30 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
.addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+ const X86FrameLowering* FrameLowering =
+ MF->getSubtarget<X86Subtarget>().getFrameLowering();
+ bool hasFP = FrameLowering->hasFP(*MF);
+
+ // TODO: This is needed only if we require precise CFA.
+ bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
+ !OutStreamer->getDwarfFrameInfos().back().End;
+
+ int stackGrowth = -RI->getSlotSize();
+
+ if (HasActiveDwarfFrame && !hasFP) {
+ OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
+ }
+
// Emit the label.
OutStreamer->EmitLabel(PICBase);
// popl $reg
EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
.addReg(MI->getOperand(0).getReg()));
+
+ if (HasActiveDwarfFrame && !hasFP) {
+ OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
+ }
return;
}
@@ -1206,19 +1269,48 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- // Lower PSHUFB and VPERMILP normally but add a comment if we can find
- // a constant shuffle mask. We won't be able to do this at the MC layer
- // because the mask isn't an immediate.
+ // Lower PSHUFB and VPERMILP normally but add a comment if we can find
+ // a constant shuffle mask. We won't be able to do this at the MC layer
+ // because the mask isn't an immediate.
case X86::PSHUFBrm:
case X86::VPSHUFBrm:
- case X86::VPSHUFBYrm: {
+ case X86::VPSHUFBYrm:
+ case X86::VPSHUFBZ128rm:
+ case X86::VPSHUFBZ128rmk:
+ case X86::VPSHUFBZ128rmkz:
+ case X86::VPSHUFBZ256rm:
+ case X86::VPSHUFBZ256rmk:
+ case X86::VPSHUFBZ256rmkz:
+ case X86::VPSHUFBZrm:
+ case X86::VPSHUFBZrmk:
+ case X86::VPSHUFBZrmkz: {
if (!OutStreamer->isVerboseAsm())
break;
- assert(MI->getNumOperands() > 5 &&
- "We should always have at least 5 operands!");
+ unsigned SrcIdx, MaskIdx;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::PSHUFBrm:
+ case X86::VPSHUFBrm:
+ case X86::VPSHUFBYrm:
+ case X86::VPSHUFBZ128rm:
+ case X86::VPSHUFBZ256rm:
+ case X86::VPSHUFBZrm:
+ SrcIdx = 1; MaskIdx = 5; break;
+ case X86::VPSHUFBZ128rmkz:
+ case X86::VPSHUFBZ256rmkz:
+ case X86::VPSHUFBZrmkz:
+ SrcIdx = 2; MaskIdx = 6; break;
+ case X86::VPSHUFBZ128rmk:
+ case X86::VPSHUFBZ256rmk:
+ case X86::VPSHUFBZrmk:
+ SrcIdx = 3; MaskIdx = 7; break;
+ }
+
+ assert(MI->getNumOperands() >= 6 &&
+ "We should always have at least 6 operands!");
const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp = MI->getOperand(1);
- const MachineOperand &MaskOp = MI->getOperand(5);
+ const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
+ const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
@@ -1240,35 +1332,53 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &SrcOp = MI->getOperand(1);
const MachineOperand &MaskOp = MI->getOperand(5);
+ unsigned ElSize;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break;
+ case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break;
+ }
+
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
- DecodeVPERMILPMask(C, Mask);
+ DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
}
break;
}
- // For loads from a constant pool to a vector register, print the constant
- // loaded.
- case X86::MOVAPDrm:
- case X86::VMOVAPDrm:
- case X86::VMOVAPDYrm:
- case X86::MOVUPDrm:
- case X86::VMOVUPDrm:
- case X86::VMOVUPDYrm:
- case X86::MOVAPSrm:
- case X86::VMOVAPSrm:
- case X86::VMOVAPSYrm:
- case X86::MOVUPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVUPSYrm:
- case X86::MOVDQArm:
- case X86::VMOVDQArm:
- case X86::VMOVDQAYrm:
- case X86::MOVDQUrm:
- case X86::VMOVDQUrm:
- case X86::VMOVDQUYrm:
+#define MOV_CASE(Prefix, Suffix) \
+ case X86::Prefix##MOVAPD##Suffix##rm: \
+ case X86::Prefix##MOVAPS##Suffix##rm: \
+ case X86::Prefix##MOVUPD##Suffix##rm: \
+ case X86::Prefix##MOVUPS##Suffix##rm: \
+ case X86::Prefix##MOVDQA##Suffix##rm: \
+ case X86::Prefix##MOVDQU##Suffix##rm:
+
+#define MOV_AVX512_CASE(Suffix) \
+ case X86::VMOVDQA64##Suffix##rm: \
+ case X86::VMOVDQA32##Suffix##rm: \
+ case X86::VMOVDQU64##Suffix##rm: \
+ case X86::VMOVDQU32##Suffix##rm: \
+ case X86::VMOVDQU16##Suffix##rm: \
+ case X86::VMOVDQU8##Suffix##rm: \
+ case X86::VMOVAPS##Suffix##rm: \
+ case X86::VMOVAPD##Suffix##rm: \
+ case X86::VMOVUPS##Suffix##rm: \
+ case X86::VMOVUPD##Suffix##rm:
+
+#define CASE_ALL_MOV_RM() \
+ MOV_CASE(, ) /* SSE */ \
+ MOV_CASE(V, ) /* AVX-128 */ \
+ MOV_CASE(V, Y) /* AVX-256 */ \
+ MOV_AVX512_CASE(Z) \
+ MOV_AVX512_CASE(Z256) \
+ MOV_AVX512_CASE(Z128)
+
+ // For loads from a constant pool to a vector register, print the constant
+ // loaded.
+ CASE_ALL_MOV_RM()
if (!OutStreamer->isVerboseAsm())
break;
if (MI->getNumOperands() > 4)
@@ -1302,7 +1412,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
- CS << CI->getZExtValue();
+ if (CI->getBitWidth() <= 64) {
+ CS << CI->getZExtValue();
+ } else {
+ // print multi-word constant as (w0,w1)
+ auto Val = CI->getValue();
+ CS << "(";
+ for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+ if (i > 0)
+ CS << ",";
+ CS << Val.getRawData()[i];
+ }
+ CS << ")";
+ }
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
SmallString<32> Str;
CF->getValueAPF().toString(Str);
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
index ac2cdc8c..c9e636f 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===//
+//===-- X86MachineFunctionInfo.cpp - X86 machine function info ------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index e6db970..3a7a98d 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//===-- X86MachineFunctionInfo.h - X86 machine function info ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -84,8 +84,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// of pushes to pass function parameters.
bool HasPushSequences = false;
- /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill
- /// slot for the frame pointer.
+ /// True if the function recovers from an SEH exception, and therefore needs
+ /// to spill and restore the frame pointer.
bool HasSEHFramePtrSave = false;
/// The frame index of a stack object containing the original frame pointer
@@ -100,7 +100,7 @@ private:
public:
X86MachineFunctionInfo() = default;
- explicit X86MachineFunctionInfo(MachineFunction &MF) {};
+ explicit X86MachineFunctionInfo(MachineFunction &MF) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
new file mode 100644
index 0000000..58020d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -0,0 +1,326 @@
+//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass that performs some optimizations with LEA
+// instructions in order to improve code size.
+// Currently, it does one thing:
+// 1) Address calculations in load and store instructions are replaced by
+// existing LEA def registers where possible.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-optimize-LEAs"
+
+static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
+ cl::desc("X86: Enable LEA optimizations."),
+ cl::init(false));
+
+STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+
+namespace {
+class OptimizeLEAPass : public MachineFunctionPass {
+public:
+ OptimizeLEAPass() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override { return "X86 LEA Optimize"; }
+
+ /// \brief Loop over all of the basic blocks, replacing address
+ /// calculations in load and store instructions, if it's already
+ /// been calculated by LEA. Also, remove redundant LEAs.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// \brief Returns a distance between two instructions inside one basic block.
+ /// Negative result means, that instructions occur in reverse order.
+ int calcInstrDist(const MachineInstr &First, const MachineInstr &Last);
+
+ /// \brief Choose the best \p LEA instruction from the \p List to replace
+ /// address calculation in \p MI instruction. Return the address displacement
+ /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift
+ /// and \p Dist.
+ bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
+ const MachineInstr &MI, MachineInstr *&LEA,
+ int64_t &AddrDispShift, int &Dist);
+
+ /// \brief Returns true if two machine operand are identical and they are not
+ /// physical registers.
+ bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2);
+
+ /// \brief Returns true if the instruction is LEA.
+ bool isLEA(const MachineInstr &MI);
+
+ /// \brief Returns true if two instructions have memory operands that only
+ /// differ by displacement. The numbers of the first memory operands for both
+ /// instructions are specified through \p N1 and \p N2. The address
+ /// displacement is returned through AddrDispShift.
+ bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
+ const MachineInstr &MI2, unsigned N2,
+ int64_t &AddrDispShift);
+
+ /// \brief Find all LEA instructions in the basic block.
+ void findLEAs(const MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineInstr *> &List);
+
+ /// \brief Removes redundant address calculations.
+ bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
+
+ MachineRegisterInfo *MRI;
+ const X86InstrInfo *TII;
+ const X86RegisterInfo *TRI;
+
+ static char ID;
+};
+char OptimizeLEAPass::ID = 0;
+}
+
+FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
+
+int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
+ const MachineInstr &Last) {
+ const MachineBasicBlock *MBB = First.getParent();
+
+ // Both instructions must be in the same basic block.
+ assert(Last.getParent() == MBB &&
+ "Instructions are in different basic blocks");
+
+ return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -
+ std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
+}
+
+// Find the best LEA instruction in the List to replace address recalculation in
+// MI. Such LEA must meet these requirements:
+// 1) The address calculated by the LEA differs only by the displacement from
+// the address used in MI.
+// 2) The register class of the definition of the LEA is compatible with the
+// register class of the address base register of MI.
+// 3) Displacement of the new memory operand should fit in 1 byte if possible.
+// 4) The LEA should be as close to MI as possible, and prior to it if
+// possible.
+bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
+ const MachineInstr &MI, MachineInstr *&LEA,
+ int64_t &AddrDispShift, int &Dist) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+ X86II::getOperandBias(Desc);
+
+ LEA = nullptr;
+
+ // Loop over all LEA instructions.
+ for (auto DefMI : List) {
+ int64_t AddrDispShiftTemp = 0;
+
+ // Compare instructions memory operands.
+ if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp))
+ continue;
+
+ // Make sure address displacement fits 4 bytes.
+ if (!isInt<32>(AddrDispShiftTemp))
+ continue;
+
+ // Check that LEA def register can be used as MI address base. Some
+ // instructions can use a limited set of registers as address base, for
+ // example MOV8mr_NOREX. We could constrain the register class of the LEA
+ // def to suit MI, however since this case is very rare and hard to
+ // reproduce in a test it's just more reliable to skip the LEA.
+ if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
+ MRI->getRegClass(DefMI->getOperand(0).getReg()))
+ continue;
+
+ // Choose the closest LEA instruction from the list, prior to MI if
+ // possible. Note that we took into account resulting address displacement
+ // as well. Also note that the list is sorted by the order in which the LEAs
+ // occur, so the break condition is pretty simple.
+ int DistTemp = calcInstrDist(*DefMI, MI);
+ assert(DistTemp != 0 &&
+ "The distance between two different instructions cannot be zero");
+ if (DistTemp > 0 || LEA == nullptr) {
+ // Do not update return LEA, if the current one provides a displacement
+ // which fits in 1 byte, while the new candidate does not.
+ if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) &&
+ isInt<8>(AddrDispShift))
+ continue;
+
+ LEA = DefMI;
+ AddrDispShift = AddrDispShiftTemp;
+ Dist = DistTemp;
+ }
+
+ // FIXME: Maybe we should not always stop at the first LEA after MI.
+ if (DistTemp < 0)
+ break;
+ }
+
+ return LEA != nullptr;
+}
+
+bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1,
+ const MachineOperand &MO2) {
+ return MO1.isIdenticalTo(MO2) &&
+ (!MO1.isReg() ||
+ !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+}
+
+bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
+ Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
+}
+
+// Check if MI1 and MI2 have memory operands which represent addresses that
+// differ only by displacement.
+bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
+ const MachineInstr &MI2, unsigned N2,
+ int64_t &AddrDispShift) {
+ // Address base, scale, index and segment operands must be identical.
+ static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt,
+ X86::AddrIndexReg, X86::AddrSegmentReg};
+ for (auto &N : IdenticalOpNums)
+ if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N)))
+ return false;
+
+ // Address displacement operands may differ by a constant.
+ const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp);
+ const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp);
+ if (!isIdenticalOp(*Op1, *Op2)) {
+ if (Op1->isImm() && Op2->isImm())
+ AddrDispShift = Op1->getImm() - Op2->getImm();
+ else if (Op1->isGlobal() && Op2->isGlobal() &&
+ Op1->getGlobal() == Op2->getGlobal())
+ AddrDispShift = Op1->getOffset() - Op2->getOffset();
+ else
+ return false;
+ }
+
+ return true;
+}
+
+void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineInstr *> &List) {
+ for (auto &MI : MBB) {
+ if (isLEA(MI))
+ List.push_back(const_cast<MachineInstr *>(&MI));
+ }
+}
+
+// Try to find load and store instructions which recalculate addresses already
+// calculated by some LEA and replace their memory operands with its def
+// register.
+bool OptimizeLEAPass::removeRedundantAddrCalc(
+ const SmallVectorImpl<MachineInstr *> &List) {
+ bool Changed = false;
+
+ assert(List.size() > 0);
+ MachineBasicBlock *MBB = List[0]->getParent();
+
+ // Process all instructions in basic block.
+ for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
+ MachineInstr &MI = *I++;
+ unsigned Opcode = MI.getOpcode();
+
+ // Instruction must be load or store.
+ if (!MI.mayLoadOrStore())
+ continue;
+
+ // Get the number of the first memory operand.
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode);
+
+ // If instruction has no memory operand - skip it.
+ if (MemOpNo < 0)
+ continue;
+
+ MemOpNo += X86II::getOperandBias(Desc);
+
+ // Get the best LEA instruction to replace address calculation.
+ MachineInstr *DefMI;
+ int64_t AddrDispShift;
+ int Dist;
+ if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist))
+ continue;
+
+ // If LEA occurs before current instruction, we can freely replace
+ // the instruction. If LEA occurs after, we can lift LEA above the
+ // instruction and this way to be able to replace it. Since LEA and the
+ // instruction have similar memory operands (thus, the same def
+ // instructions for these operands), we can always do that, without
+ // worries of using registers before their defs.
+ if (Dist < 0) {
+ DefMI->removeFromParent();
+ MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
+ }
+
+ // Since we can possibly extend register lifetime, clear kill flags.
+ MRI->clearKillFlags(DefMI->getOperand(0).getReg());
+
+ ++NumSubstLEAs;
+ DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););
+
+ // Change instruction operands.
+ MI.getOperand(MemOpNo + X86::AddrBaseReg)
+ .ChangeToRegister(DefMI->getOperand(0).getReg(), false);
+ MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1);
+ MI.getOperand(MemOpNo + X86::AddrIndexReg)
+ .ChangeToRegister(X86::NoRegister, false);
+ MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift);
+ MI.getOperand(MemOpNo + X86::AddrSegmentReg)
+ .ChangeToRegister(X86::NoRegister, false);
+
+ DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump(););
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ // Perform this optimization only if we care about code size.
+ if (!EnableX86LEAOpt || !MF.getFunction()->optForSize())
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+
+ // Process all basic blocks.
+ for (auto &MBB : MF) {
+ SmallVector<MachineInstr *, 16> LEAs;
+
+ // Find all LEA instructions in basic block.
+ findLEAs(MBB, LEAs);
+
+ // If current basic block has no LEAs, move on to the next one.
+ if (LEAs.empty())
+ continue;
+
+ // Remove redundant address calculations.
+ Changed |= removeRedundantAddrCalc(LEAs);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 143e70b..0f425e2 100644
--- a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -93,8 +93,7 @@ FunctionPass *llvm::createX86PadShortFunctions() {
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// NOOP instructions before early exits.
bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize)) {
+ if (MF.getFunction()->optForSize()) {
return false;
}
@@ -107,7 +106,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
VisitedBBs.clear();
- findReturns(MF.begin());
+ findReturns(&MF.front());
bool MadeChange = false;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index d8495e5..5840443 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
@@ -44,12 +43,6 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "X86GenRegisterInfo.inc"
-cl::opt<bool>
-ForceStackAlign("force-align-stack",
- cl::desc("Force align the stack to the minimum alignment"
- " needed for the function."),
- cl::init(false), cl::Hidden);
-
static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
@@ -174,21 +167,34 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
if (Subtarget.isTarget64BitLP64())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
- case 2: // Available for tailcall (not callee-saved GPRs).
- const Function *F = MF.getFunction();
- if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
- return &X86::GR64_TCW64RegClass;
- else if (Is64Bit)
- return &X86::GR64_TCRegClass;
-
- bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
- if (hasHipeCC)
- return &X86::GR32RegClass;
- return &X86::GR32_TCRegClass;
+ case 2: // NOREX GPRs.
+ if (Subtarget.isTarget64BitLP64())
+ return &X86::GR64_NOREXRegClass;
+ return &X86::GR32_NOREXRegClass;
+ case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
+ if (Subtarget.isTarget64BitLP64())
+ return &X86::GR64_NOREX_NOSPRegClass;
+ return &X86::GR32_NOREX_NOSPRegClass;
+ case 4: // Available for tailcall (not callee-saved GPRs).
+ return getGPRsForTailCall(MF);
}
}
const TargetRegisterClass *
+X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
+ return &X86::GR64_TCW64RegClass;
+ else if (Is64Bit)
+ return &X86::GR64_TCRegClass;
+
+ bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+ if (hasHipeCC)
+ return &X86::GR32RegClass;
+ return &X86::GR32_TCRegClass;
+}
+
+const TargetRegisterClass *
X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &X86::CCRRegClass) {
if (Is64Bit)
@@ -222,6 +228,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
+ bool HasSSE = Subtarget.hasSSE1();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
bool CallsEHReturn = MF->getMMI().callsEHReturn();
@@ -241,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_SaveList;
return CSR_64_RT_AllRegs_SaveList;
+ case CallingConv::CXX_FAST_TLS:
+ if (Is64Bit)
+ return CSR_64_TLS_Darwin_SaveList;
+ break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
@@ -254,6 +265,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_Intel_OCL_BI_SaveList;
break;
}
+ case CallingConv::HHVM:
+ return CSR_64_HHVM_SaveList;
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
@@ -264,6 +277,18 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (CallsEHReturn)
return CSR_64EHRet_SaveList;
return CSR_64_SaveList;
+ case CallingConv::X86_INTR:
+ if (Is64Bit) {
+ if (HasAVX)
+ return CSR_64_AllRegs_AVX_SaveList;
+ else
+ return CSR_64_AllRegs_SaveList;
+ } else {
+ if (HasSSE)
+ return CSR_32_AllRegs_SSE_SaveList;
+ else
+ return CSR_32_AllRegs_SaveList;
+ }
default:
break;
}
@@ -284,6 +309,7 @@ const uint32_t *
X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+ bool HasSSE = Subtarget.hasSSE1();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
@@ -301,6 +327,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_RegMask;
return CSR_64_RT_AllRegs_RegMask;
+ case CallingConv::CXX_FAST_TLS:
+ if (Is64Bit)
+ return CSR_64_TLS_Darwin_RegMask;
+ break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
@@ -314,16 +344,30 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_Intel_OCL_BI_RegMask;
break;
}
+ case CallingConv::HHVM:
+ return CSR_64_HHVM_RegMask;
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_RegMask;
break;
- default:
- break;
case CallingConv::X86_64_Win64:
return CSR_Win64_RegMask;
case CallingConv::X86_64_SysV:
return CSR_64_RegMask;
+ case CallingConv::X86_INTR:
+ if (Is64Bit) {
+ if (HasAVX)
+ return CSR_64_AllRegs_AVX_RegMask;
+ else
+ return CSR_64_AllRegs_RegMask;
+ } else {
+ if (HasSSE)
+ return CSR_32_AllRegs_SSE_RegMask;
+ else
+ return CSR_32_AllRegs_RegMask;
+ }
+ default:
+ break;
}
// Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
@@ -341,6 +385,10 @@ X86RegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
+const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
+ return CSR_64_TLS_Darwin_RegMask;
+}
+
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const X86FrameLowering *TFI = getFrameLowering(MF);
@@ -371,8 +419,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
"Stack realignment in presence of dynamic allocas is not supported with"
"this calling convention.");
- unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64,
- false);
+ unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
I.isValid(); ++I)
Reserved.set(*I);
@@ -439,6 +486,10 @@ void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
+static bool CantUseSP(const MachineFrameInfo *MFI) {
+ return MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
+}
+
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -451,13 +502,11 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// reference locals while also adjusting the stack pointer. When we can't
// use both the SP and the FP, we need a separate base pointer register.
bool CantUseFP = needsStackRealignment(MF);
- bool CantUseSP =
- MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
- return CantUseFP && CantUseSP;
+ return CantUseFP && CantUseSP(MFI);
}
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ if (!TargetRegisterInfo::canRealignStack(MF))
return false;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -470,26 +519,11 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
// If a base pointer is necessary. Check that it isn't too late to reserve
// it.
- if (MFI->hasVarSizedObjects())
+ if (CantUseSP(MFI))
return MRI->canReserveReg(BasePtr);
return true;
}
-bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86FrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- // If we've requested that we force align the stack do so now.
- if (ForceStackAlign)
- return canRealignStack(MF);
-
- return requiresRealignment && canRealignStack(MF);
-}
-
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
// Since X86 defines assignCalleeSavedSpillSlots which always return true
@@ -510,6 +544,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Opc = MI.getOpcode();
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
+
if (hasBasePointer(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
else if (needsStackRealignment(MF))
@@ -524,14 +559,11 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// offset is from the traditional base pointer location. On 64-bit, the
// offset is from the SP at the end of the prologue, not the FP location. This
// matches the behavior of llvm.frameaddress.
+ unsigned IgnoredFrameReg;
if (Opc == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int Offset;
- if (IsWinEH)
- Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex);
- else
- Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
FI.ChangeToImmediate(Offset);
return;
}
@@ -540,7 +572,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register as source operand, semantic is the same and destination is
// 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
- BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false);
+ BasePtr = getX86SubSuperRegister(BasePtr, 64);
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
@@ -553,7 +585,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const MachineFrameInfo *MFI = MF.getFrameInfo();
FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
} else
- FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+ FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
if (BasePtr == StackPtr)
FIOffset += SPAdj;
@@ -592,193 +624,11 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
unsigned FrameReg = getFrameRegister(MF);
if (Subtarget.isTarget64BitILP32())
- FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
+ FrameReg = getX86SubSuperRegister(FrameReg, 32);
return FrameReg;
}
-namespace llvm {
-unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT,
- bool High) {
- switch (VT) {
- default: return 0;
- case MVT::i8:
- if (High) {
- switch (Reg) {
- default: return getX86SubSuperRegister(Reg, MVT::i64);
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AH;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DH;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CH;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BH;
- }
- } else {
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AL;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DL;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CL;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BL;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SIL;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DIL;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BPL;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SPL;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8B;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9B;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10B;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11B;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12B;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13B;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14B;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15B;
- }
- }
- case MVT::i16:
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8W;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9W;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10W;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11W;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12W;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13W;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14W;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15W;
- }
- case MVT::i32:
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::EAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::EDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::ECX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::EBX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::ESI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::EDI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::EBP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::ESP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8D;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9D;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10D;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11D;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12D;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13D;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14D;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15D;
- }
- case MVT::i64:
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::RAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::RDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::RCX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::RBX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::RSI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::RDI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::RBP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::RSP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15;
- }
- }
-}
-
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
- bool High) {
- unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High);
- if (Res == 0)
- llvm_unreachable("Unexpected register or VT");
- return Res;
-}
-
-unsigned get512BitSuperRegister(unsigned Reg) {
+unsigned llvm::get512BitSuperRegister(unsigned Reg) {
if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
return X86::ZMM0 + (Reg - X86::XMM0);
if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
@@ -787,5 +637,3 @@ unsigned get512BitSuperRegister(unsigned Reg) {
return Reg;
llvm_unreachable("Unexpected SIMD register");
}
-
-}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 8de1d0b..f014c8f 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -87,6 +87,11 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
+ /// getGPRsForTailCall - Returns a register class with registers that can be
+ /// used in forming tail calls.
+ const TargetRegisterClass *
+ getGPRsForTailCall(const MachineFunction &MF) const;
+
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
@@ -96,7 +101,11 @@ public:
getCalleeSavedRegs(const MachineFunction* MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
+
+ // Calls involved in thread-local variable lookup save more registers than
+ // normal calls, so they need a different mask to represent this.
+ const uint32_t *getDarwinTLSCallPreservedMask() const;
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
@@ -108,9 +117,7 @@ public:
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
-
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const override;
@@ -128,16 +135,6 @@ public:
unsigned getSlotSize() const { return SlotSize; }
};
-/// Returns the sub or super register of a specific X86 register.
-/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX.
-/// Aborts on error.
-unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
-
-/// Returns the sub or super register of a specific X86 register.
-/// Like getX86SubSuperRegister() but returns 0 on error.
-unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType,
- bool High = false);
-
//get512BitRegister - X86 utility - returns 512-bit super register
unsigned get512BitSuperRegister(unsigned Reg);
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index cdb151c..56f0d93 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -225,15 +225,15 @@ let SubRegIndices = [sub_ymm] in {
}
}
- // Mask Registers, used by AVX-512 instructions.
- def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
- def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
- def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
- def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
- def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
- def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
- def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
- def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
+// Mask Registers, used by AVX-512 instructions.
+def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
+def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
+def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
+def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
+def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
+def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
+def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
+def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
@@ -375,7 +375,7 @@ def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
R8, R9, R11, RIP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
- R8, R9, R11)>;
+ R8, R9, R10, R11, RIP)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
@@ -423,6 +423,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>;
+
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
@@ -442,10 +444,11 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
}
// Generic vector registers: VR64 and VR128.
+// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
-def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
128, (add FR32)>;
-def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
// Status flags registers.
@@ -459,8 +462,8 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
}
// AVX-512 vector/mask registers.
-def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
- (sequence "ZMM%u", 0, 31)>;
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+ 512, (sequence "ZMM%u", 0, 31)>;
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
@@ -468,10 +471,10 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
// Extended VR128 and VR256 for AVX-512 instructions
-def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (add FR32X)>;
-def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- 256, (sequence "YMM%u", 0, 31)>;
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
+ 128, (add FR32X)>;
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+ 256, (sequence "YMM%u", 0, 31)>;
// Mask registers
def VK1 : RegisterClass<"X86", [i1], 8, (sequence "K%u", 0, 7)> {let Size = 8;}
@@ -491,4 +494,4 @@ def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
// Bound registers
-def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; \ No newline at end of file
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index ce79fcf..b1a0161 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -44,13 +44,10 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
return false;
}
-SDValue
-X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- MachinePointerInfo DstPtrInfo) const {
+SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
@@ -74,10 +71,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
- if (const char *bzeroEntry = V &&
+ if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
- EVT IntPtr =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -94,7 +91,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
0)
.setDiscardResult();
- std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
@@ -144,8 +141,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
BytesLeft = SizeVal % UBytes;
}
- Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
- InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
+ InFlag);
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
@@ -172,9 +169,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl,
CVT));
- Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
- X86::ECX,
- Left, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+ Left, InFlag);
InFlag = Chain.getValue(1);
Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
@@ -249,17 +245,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
unsigned BytesLeft = SizeVal % UBytes;
SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Dst, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI :
- X86::ESI,
- Src, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
+ Src, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index dff3624..8ef08c9 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -44,9 +44,8 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
cl::desc("Enable early if-conversion on X86"));
-/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
-/// current subtarget according to how we should reference it in a non-pcrel
-/// context.
+/// Classify a blockaddress reference for the current subtarget according to how
+/// we should reference it in a non-pcrel context.
unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
@@ -58,9 +57,8 @@ unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
return X86II::MO_NO_FLAG;
}
-/// ClassifyGlobalReference - Classify a global variable reference for the
-/// current subtarget according to how we should reference it in a non-pcrel
-/// context.
+/// Classify a global variable reference for the current subtarget according to
+/// how we should reference it in a non-pcrel context.
unsigned char X86Subtarget::
ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// DLLImport only exists on windows, it is implemented as a load from a
@@ -147,9 +145,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
}
-/// getBZeroEntry - This function returns the name of a function which has an
-/// interface like the non-standard bzero function, if such a function exists on
-/// the current subtarget and it is considered prefereable over memset with zero
+/// This function returns the name of a function which has an interface like
+/// the non-standard bzero function, if such a function exists on the
+/// current subtarget and it is considered preferable over memset with zero
/// passed as the second argument. Otherwise it returns null.
const char *X86Subtarget::getBZeroEntry() const {
// Darwin 10 has a __bzero entry point for this purpose.
@@ -166,8 +164,7 @@ bool X86Subtarget::hasSinCos() const {
is64Bit();
}
-/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
-/// to immediate address.
+/// Return true if the subtarget allows calls to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
@@ -192,9 +189,25 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit,+sse2";
}
+ // LAHF/SAHF are always supported in non-64-bit mode.
+ if (!In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+sahf," + FullFS;
+ else
+ FullFS = "+sahf";
+ }
+
+
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
+ // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
+ // 16-bytes and under that are reasonably fast. These features were
+ // introduced with Intel's Nehalem/Silvermont and AMD's Family10h
+ // micro-architectures respectively.
+ if (hasSSE42() || hasSSE4A())
+ IsUAMem16Slow = false;
+
InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -224,13 +237,18 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
}
void X86Subtarget::initializeEnvironment() {
- X86SSELevel = NoMMXSSE;
+ X86SSELevel = NoSSE;
X863DNowLevel = NoThreeDNow;
HasCMov = false;
HasX86_64 = false;
HasPOPCNT = false;
HasSSE4A = false;
HasAES = false;
+ HasFXSR = false;
+ HasXSAVE = false;
+ HasXSAVEOPT = false;
+ HasXSAVEC = false;
+ HasXSAVES = false;
HasPCLMUL = false;
HasFMA = false;
HasFMA4 = false;
@@ -252,13 +270,15 @@ void X86Subtarget::initializeEnvironment() {
HasBWI = false;
HasVLX = false;
HasADX = false;
+ HasPKU = false;
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasLAHFSAHF = false;
HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
- IsUAMemFast = false;
+ IsUAMem16Slow = false;
IsUAMem32Slow = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index f026d42..13d1026 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -47,11 +47,11 @@ class X86Subtarget final : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
+ NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
- NoThreeDNow, ThreeDNow, ThreeDNowA
+ NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
};
enum X86ProcFamilyEnum {
@@ -64,10 +64,10 @@ protected:
/// Which PIC style to use
PICStyles::Style PICStyle;
- /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
+ /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel;
- /// 3DNow, 3DNow Athlon, or none supported.
+ /// MMX, 3DNow, 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel;
/// True if this processor has conditional move instructions
@@ -86,6 +86,18 @@ protected:
/// Target has AES instructions
bool HasAES;
+ /// Target has FXSAVE/FXRESTOR instructions
+ bool HasFXSR;
+
+ /// Target has XSAVE instructions
+ bool HasXSAVE;
+ /// Target has XSAVEOPT instructions
+ bool HasXSAVEOPT;
+ /// Target has XSAVEC instructions
+ bool HasXSAVEC;
+ /// Target has XSAVES instructions
+ bool HasXSAVES;
+
/// Target has carry-less multiplication
bool HasPCLMUL;
@@ -140,16 +152,19 @@ protected:
/// Processor has RDSEED instructions.
bool HasRDSEED;
+ /// Processor has LAHF/SAHF instructions.
+ bool HasLAHFSAHF;
+
/// True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
- /// True if unaligned memory access is fast.
- bool IsUAMemFast;
+ /// True if unaligned memory accesses of 16-bytes are slow.
+ bool IsUAMem16Slow;
- /// True if unaligned 32-byte memory accesses are slow.
+ /// True if unaligned memory accesses of 32-bytes are slow.
bool IsUAMem32Slow;
/// True if SSE operations can have unaligned memory operands.
@@ -208,6 +223,9 @@ protected:
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX;
+ /// Processor has PKU extenstions
+ bool HasPKU;
+
/// Processot supports MPX - Memory Protection Extensions
bool HasMPX;
@@ -319,7 +337,6 @@ public:
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
bool hasCMov() const { return HasCMov; }
- bool hasMMX() const { return X86SSELevel >= MMX; }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -332,14 +349,22 @@ public:
bool hasFp256() const { return hasAVX(); }
bool hasInt256() const { return hasAVX2(); }
bool hasSSE4A() const { return HasSSE4A; }
+ bool hasMMX() const { return X863DNowLevel >= MMX; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAES() const { return HasAES; }
+ bool hasFXSR() const { return HasFXSR; }
+ bool hasXSAVE() const { return HasXSAVE; }
+ bool hasXSAVEOPT() const { return HasXSAVEOPT; }
+ bool hasXSAVEC() const { return HasXSAVEC; }
+ bool hasXSAVES() const { return HasXSAVES; }
bool hasPCLMUL() const { return HasPCLMUL; }
- bool hasFMA() const { return HasFMA; }
- // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
- bool hasFMA4() const { return HasFMA4 && !HasFMA; }
+ // Prefer FMA4 to FMA - its better for commutation/memory folding and
+ // has equal or better performance on all supported targets.
+ bool hasFMA() const { return HasFMA && !HasFMA4; }
+ bool hasFMA4() const { return HasFMA4; }
+ bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
bool hasMOVBE() const { return HasMOVBE; }
@@ -355,9 +380,10 @@ public:
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
+ bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
- bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+ bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
@@ -375,6 +401,7 @@ public:
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
+ bool hasPKU() const { return HasPKU; }
bool hasMPX() const { return HasMPX; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
@@ -394,9 +421,11 @@ public:
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
+ bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
@@ -406,6 +435,10 @@ public:
return TargetTriple.isKnownWindowsMSVCEnvironment();
}
+ bool isTargetWindowsCoreCLR() const {
+ return TargetTriple.isWindowsCoreCLREnvironment();
+ }
+
bool isTargetWindowsCygwin() const {
return TargetTriple.isWindowsCygwinEnvironment();
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index fb9cb4b..0e7e4c0 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -28,10 +28,17 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
+namespace llvm {
+void initializeWinEHStatePassPass(PassRegistry &);
+}
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86TargetMachine> Y(TheX86_64Target);
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeWinEHStatePassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -45,7 +52,7 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSBinFormatELF())
return make_unique<X86ELFTargetObjectFile>();
- if (TT.isKnownWindowsMSVCEnvironment())
+ if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment())
return make_unique<X86WindowsTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
return make_unique<TargetLoweringObjectFileCOFF>();
@@ -175,8 +182,9 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
//===----------------------------------------------------------------------===//
TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(X86TTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(X86TTIImpl(this, F));
+ });
}
@@ -246,6 +254,9 @@ bool X86PassConfig::addPreISel() {
}
void X86PassConfig::addPreRegAlloc() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createX86OptimizeLEAs());
+
addPass(createX86CallFrameOptimization());
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index 6f900ea..782768d 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
@@ -152,9 +153,8 @@ static std::string scalarConstantToHexString(const Constant *C) {
}
}
-MCSection *
-X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *X86WindowsTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isMergeableConst() && C) {
const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
@@ -171,5 +171,5 @@ X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
COFF::IMAGE_COMDAT_SELECT_ANY);
}
- return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
+ return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C);
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
index 66366b2..6b2448c 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -58,7 +58,7 @@ namespace llvm {
/// \brief Given a mergeable constant with the specified size and relocation
/// information, return a section that it should be placed in.
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
};
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7df7260..2e7bbb2 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
+
using namespace llvm;
#define DEBUG_TYPE "x86tti"
@@ -62,8 +63,8 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
if (ST->is64Bit())
return 64;
- return 32;
+ return 32;
}
unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
@@ -84,12 +85,12 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-unsigned X86TTIImpl::getArithmeticInstrCost(
+int X86TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -101,10 +102,9 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence SRA + SRL + ADD + SRA.
// The OperandValue properties many not be same as that of previous
// operation;conservatively assume OP_None.
- unsigned Cost =
- 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
+ int Cost = 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
@@ -115,8 +115,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
return Cost;
}
- static const CostTblEntry<MVT::SimpleValueType>
- AVX2UniformConstCostTable[] = {
+ static const CostTblEntry AVX2UniformConstCostTable[] = {
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
@@ -127,12 +126,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
ST->hasAVX2()) {
- int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * AVX2UniformConstCostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
}
- static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ static const CostTblEntry AVX512CostTable[] = {
{ ISD::SHL, MVT::v16i32, 1 },
{ ISD::SRL, MVT::v16i32, 1 },
{ ISD::SRA, MVT::v16i32, 1 },
@@ -141,7 +140,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v8i64, 1 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
+ if (ST->hasAVX512()) {
+ if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::SHL, MVT::v4i32, 1 },
@@ -154,7 +158,57 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRL, MVT::v2i64, 1 },
{ ISD::SHL, MVT::v4i64, 1 },
{ ISD::SRL, MVT::v4i64, 1 },
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+ // On AVX2, a packed v16i16 shift left by a constant build_vector
+ // is lowered into a vector multiply (vpmullw).
+ return LT.first;
+
+ if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry XOPCostTable[] = {
+ // 128bit shifts take 1cy, but right shifts require negation beforehand.
+ { ISD::SHL, MVT::v16i8, 1 },
+ { ISD::SRL, MVT::v16i8, 2 },
+ { ISD::SRA, MVT::v16i8, 2 },
+ { ISD::SHL, MVT::v8i16, 1 },
+ { ISD::SRL, MVT::v8i16, 2 },
+ { ISD::SRA, MVT::v8i16, 2 },
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 2 },
+ { ISD::SRA, MVT::v4i32, 2 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 2 },
+ { ISD::SRA, MVT::v2i64, 2 },
+ // 256bit shifts require splitting if AVX2 didn't catch them above.
+ { ISD::SHL, MVT::v32i8, 2 },
+ { ISD::SRL, MVT::v32i8, 4 },
+ { ISD::SRA, MVT::v32i8, 4 },
+ { ISD::SHL, MVT::v16i16, 2 },
+ { ISD::SRL, MVT::v16i16, 4 },
+ { ISD::SRA, MVT::v16i16, 4 },
+ { ISD::SHL, MVT::v8i32, 2 },
+ { ISD::SRL, MVT::v8i32, 4 },
+ { ISD::SRA, MVT::v8i32, 4 },
+ { ISD::SHL, MVT::v4i64, 2 },
+ { ISD::SRL, MVT::v4i64, 4 },
+ { ISD::SRA, MVT::v4i64, 4 },
+ };
+ // Look for XOP lowering tricks.
+ if (ST->hasXOP()) {
+ if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX2CustomCostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
@@ -163,7 +217,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
- { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
+ { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v32i8, 32*20 },
@@ -176,44 +231,44 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
- if (ST->hasAVX512()) {
- int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * AVX512CostTable[Idx].Cost;
- }
- // Look for AVX2 lowering tricks.
+ // Look for AVX2 lowering tricks for custom cases.
if (ST->hasAVX2()) {
- if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
- (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
- Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
- // On AVX2, a packed v16i16 shift left by a constant build_vector
- // is lowered into a vector multiply (vpmullw).
- return LT.first;
-
- int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * AVX2CostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
}
- static const CostTblEntry<MVT::SimpleValueType>
+ static const CostTblEntry
SSE2UniformConstCostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// Constant splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
+ { ISD::SHL, MVT::v32i8, 2 }, // psllw.
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
+ { ISD::SHL, MVT::v16i16, 2 }, // psllw.
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
+ { ISD::SHL, MVT::v8i32, 2 }, // pslld
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
+ { ISD::SHL, MVT::v4i64, 2 }, // psllq.
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
+ { ISD::SRL, MVT::v32i8, 2 }, // psrlw.
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
+ { ISD::SRL, MVT::v16i16, 2 }, // psrlw.
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
+ { ISD::SRL, MVT::v8i32, 2 }, // psrld.
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
+ { ISD::SRL, MVT::v4i64, 2 }, // psrlq.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
+ { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
+ { ISD::SRA, MVT::v16i16, 2 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ { ISD::SRA, MVT::v8i32, 2 }, // psrad.
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
+ { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
@@ -227,27 +282,34 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 15;
- int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
}
if (ISD == ISD::SHL &&
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
- EVT VT = LT.second;
+ MVT VT = LT.second;
+ // Vector shift left by non uniform constant can be lowered
+ // into vector multiply (pmullw/pmulld).
if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
(VT == MVT::v4i32 && ST->hasSSE41()))
- // Vector shift left by non uniform constant can be lowered
- // into vector multiply (pmullw/pmulld).
return LT.first;
+
+ // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+ // sequence of extract + two vector multiply + insert.
+ if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
+ (ST->hasAVX() && !ST->hasAVX2()))
+ ISD = ISD::MUL;
+
+ // A vector shift left by non uniform constant is converted
+ // into a vector multiply; the new multiply is eventually
+ // lowered into a sequence of shuffles and 2 x pmuludq.
if (VT == MVT::v4i32 && ST->hasSSE2())
- // A vector shift left by non uniform constant is converted
- // into a vector multiply; the new multiply is eventually
- // lowered into a sequence of shuffles and 2 x pmuludq.
ISD = ISD::MUL;
}
- static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
+ static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// For some cases, where the shift amount is a scalar we would be able
@@ -257,20 +319,31 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// used for vectorization and we don't want to make vectorized code worse
// than scalar code.
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
- { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
- { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
+ { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend.
+ { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
+ { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend.
+ { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
+ { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
// It is not a good idea to vectorize division. We have to scalarize it and
// in the process we will often end up having to spilling regular
@@ -289,12 +362,11 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
};
if (ST->hasSSE2()) {
- int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * SSE2CostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
+ static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
@@ -314,29 +386,21 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// Look for AVX1 lowering tricks.
if (ST->hasAVX() && !ST->hasAVX2()) {
- EVT VT = LT.second;
+ MVT VT = LT.second;
- // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
- // sequence of extract + two vector multiply + insert.
- if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
- Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
- ISD = ISD::MUL;
-
- int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
- if (Idx != -1)
- return LT.first * AVX1CostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT))
+ return LT.first * Entry->Cost;
}
// Custom lowering of vectors.
- static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
+ static const CostTblEntry CustomLowered[] = {
// A v2i64/v4i64 and multiply is custom lowered as a series of long
// multiplies(3), shifts(4) and adds(2).
{ ISD::MUL, MVT::v2i64, 9 },
{ ISD::MUL, MVT::v4i64, 9 },
};
- int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
- if (Idx != -1)
- return LT.first * CustomLowered[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
+ return LT.first * Entry->Cost;
// Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
// 2x pmuludq, 2x shuffle.
@@ -348,15 +412,15 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
-unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
// We only estimate the cost of reverse and alternate shuffles.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- unsigned Cost = 1;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ int Cost = 1;
if (LT.second.getSizeInBits() > 128)
Cost = 3; // Extract + insert + copy.
@@ -367,14 +431,14 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
if (ST->hasAVX2() && LT.second == MVT::v16i16)
return LT.first;
- static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
+ static const CostTblEntry AVXAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
@@ -390,13 +454,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
};
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * AVXAltShuffleTbl[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
+ static const CostTblEntry SSE41AltShuffleTbl[] = {
// These are lowered into movsd.
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
@@ -414,13 +477,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
};
- if (ST->hasSSE41()) {
- int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * SSE41AltShuffleTbl[Idx].Cost;
- }
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
+ static const CostTblEntry SSSE3AltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
@@ -433,13 +495,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
};
- if (ST->hasSSSE3()) {
- int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
- }
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
+ static const CostTblEntry SSEAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
@@ -454,65 +515,47 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
};
// Fall-back (SSE3 and SSE2).
- int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * SSEAltShuffleTbl[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
- std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
-
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- SSE2ConvTbl[] = {
- // These are somewhat magic numbers justified by looking at the output of
- // Intel's IACA, running some kernels and making sure when we take
- // legalization into account the throughput will be overestimated.
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
- // There are faster sequences for float conversions.
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ // FIXME: Need a better design of the cost table to handle non-simple types of
+ // potential massive combinations (elem_num x src_type x dst_type).
+
+ static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
+
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 },
};
- if (ST->hasSSE2() && !ST->hasAVX()) {
- int Idx =
- ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
- if (Idx != -1)
- return LTSrc.first * SSE2ConvTbl[Idx].Cost;
- }
-
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- AVX512ConversionTbl[] = {
+ static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
- { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
- { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
// v16i1 -> v16i32 - load + broadcast
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
@@ -522,33 +565,49 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
- { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
- };
- if (ST->hasAVX512()) {
- int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
- LTSrc.second);
- if (Idx != -1)
- return AVX512ConversionTbl[Idx].Cost;
- }
- EVT SrcTy = TLI->getValueType(DL, Src);
- EVT DstTy = TLI->getValueType(DL, Dst);
-
- // The function getSimpleVT only handles simple value types.
- if (!SrcTy.isSimple() || !DstTy.isSimple())
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
+
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
+ };
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- AVX2ConversionTbl[] = {
+ static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
@@ -579,8 +638,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
};
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- AVXConversionTbl[] = {
+ static const TypeConversionCostTblEntry AVXConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
@@ -650,34 +708,158 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
};
+ static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 },
+
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 30 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
+ };
+
+ static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
+ // These are somewhat magic numbers justified by looking at the output of
+ // Intel's IACA, running some kernels and making sure when we take
+ // legalization into account the throughput will be overestimated.
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ // There are faster sequences for float conversions.
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
+
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
+ };
+
+ std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
+
+ if (ST->hasSSE2() && !ST->hasAVX()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+ LTDest.second, LTSrc.second))
+ return LTSrc.first * Entry->Cost;
+ }
+
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
+
+ // The function getSimpleVT only handles simple value types.
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
+
+ if (ST->hasDQI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+
+ if (ST->hasAVX512())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+
if (ST->hasAVX2()) {
- int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return AVX2ConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
if (ST->hasAVX()) {
- int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
- if (Idx != -1)
- return AVXConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ if (ST->hasSSE41()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ if (ST->hasSSE2()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
+ static const CostTblEntry SSE42CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 1 },
{ ISD::SETCC, MVT::v4f32, 1 },
{ ISD::SETCC, MVT::v2i64, 1 },
@@ -686,7 +868,7 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i8, 1 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
+ static const CostTblEntry AVX1CostTbl[] = {
{ ISD::SETCC, MVT::v4f64, 1 },
{ ISD::SETCC, MVT::v8f32, 1 },
// AVX1 does not support 8-wide integer compare.
@@ -696,54 +878,45 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 4 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
+ static const CostTblEntry AVX2CostTbl[] = {
{ ISD::SETCC, MVT::v4i64, 1 },
{ ISD::SETCC, MVT::v8i32, 1 },
{ ISD::SETCC, MVT::v16i16, 1 },
{ ISD::SETCC, MVT::v32i8, 1 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ static const CostTblEntry AVX512CostTbl[] = {
{ ISD::SETCC, MVT::v8i64, 1 },
{ ISD::SETCC, MVT::v16i32, 1 },
{ ISD::SETCC, MVT::v8f64, 1 },
{ ISD::SETCC, MVT::v16f32, 1 },
};
- if (ST->hasAVX512()) {
- int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX512CostTbl[Idx].Cost;
- }
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2()) {
- int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX2CostTbl[Idx].Cost;
- }
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTbl[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42()) {
- int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTbl[Idx].Cost;
- }
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) {
+int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -761,10 +934,9 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
-unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
- bool Extract) {
+int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
assert (Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
+ int Cost = 0;
for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
if (Insert)
@@ -776,9 +948,8 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
return Cost;
}
-unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
+int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
@@ -796,22 +967,21 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Assume that all other non-power-of-two numbers are scalarized.
if (!isPowerOf2_32(NumElem)) {
- unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(),
- Alignment, AddressSpace);
- unsigned SplitCost = getScalarizationOverhead(Src,
- Opcode == Instruction::Load,
- Opcode==Instruction::Store);
+ int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
+ AddressSpace);
+ int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load,
+ Opcode == Instruction::Store);
return NumElem * Cost + SplitCost;
}
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
// Each load/store unit costs 1.
- unsigned Cost = LT.first * 1;
+ int Cost = LT.first * 1;
// On Sandybridge 256bit load/stores are double pumped
// (but not on Haswell).
@@ -821,9 +991,9 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return Cost;
}
-unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
- unsigned Alignment,
- unsigned AddressSpace) {
+int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
+ unsigned Alignment,
+ unsigned AddressSpace) {
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
@@ -832,34 +1002,33 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem);
- if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) ||
- (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
+ if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) ||
+ (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) ||
!isPowerOf2_32(NumElem)) {
// Scalarization
- unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
- unsigned ScalarCompareCost =
- getCmpSelInstrCost(Instruction::ICmp,
- Type::getInt8Ty(getGlobalContext()), NULL);
- unsigned BranchCost = getCFInstrCost(Instruction::Br);
- unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
-
- unsigned ValueSplitCost =
- getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
- Opcode == Instruction::Store);
- unsigned MemopCost =
+ int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
+ int ScalarCompareCost = getCmpSelInstrCost(
+ Instruction::ICmp, Type::getInt8Ty(getGlobalContext()), nullptr);
+ int BranchCost = getCFInstrCost(Instruction::Br);
+ int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
+
+ int ValueSplitCost = getScalarizationOverhead(
+ SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
+ int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
- unsigned Cost = 0;
- if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+ auto VT = TLI->getValueType(DL, SrcVTy);
+ int Cost = 0;
+ if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
- Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
- getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0);
+ Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) +
+ getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr);
else if (LT.second.getVectorNumElements() > NumElem) {
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
@@ -874,7 +1043,7 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}
-unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -887,10 +1056,10 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return BaseT::getAddressComputationCost(Ty, IsComplex);
}
-unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
- bool IsPairwise) {
+int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
+ bool IsPairwise) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -900,7 +1069,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
+ static const CostTblEntry SSE42CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
@@ -908,7 +1077,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i16, 5 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
+ static const CostTblEntry AVX1CostTblPairWise[] = {
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::FADD, MVT::v4f64, 5 },
{ ISD::FADD, MVT::v8f32, 7 },
@@ -919,7 +1088,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i32, 5 },
};
- static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
+ static const CostTblEntry SSE42CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
@@ -927,7 +1096,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
};
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
+ static const CostTblEntry AVX1CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v4f64, 3 },
{ ISD::FADD, MVT::v8f32, 4 },
@@ -939,29 +1108,21 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
};
if (IsPairwise) {
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTblPairWise[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42()) {
- int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTblPairWise[Idx].Cost;
- }
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
} else {
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42()) {
- int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
- }
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
}
return BaseT::getReductionCost(Opcode, ValTy, IsPairwise);
@@ -970,7 +1131,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
-unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
+int X86TTIImpl::getIntImmCost(int64_t Val) {
if (Val == 0)
return TTI::TCC_Free;
@@ -980,7 +1141,7 @@ unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
return 2 * TTI::TCC_Basic;
}
-unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1004,18 +1165,18 @@ unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
- unsigned Cost = 0;
+ int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
int64_t Val = Tmp.getSExtValue();
Cost += getIntImmCost(Val);
}
// We need at least one instruction to materialze the constant.
- return std::max(1U, Cost);
+ return std::max(1, Cost);
}
-unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1038,6 +1199,26 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
case Instruction::Store:
ImmIdx = 0;
break;
+ case Instruction::ICmp:
+ // This is an imperfect hack to prevent constant hoisting of
+ // compares that might be trying to check if a 64-bit value fits in
+ // 32-bits. The backend can optimize these cases using a right shift by 32.
+ // Ideally we would check the compare predicate here. There also other
+ // similar immediates the backend can use shifts for.
+ if (Idx == 1 && Imm.getBitWidth() == 64) {
+ uint64_t ImmVal = Imm.getZExtValue();
+ if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
+ return TTI::TCC_Free;
+ }
+ ImmIdx = 1;
+ break;
+ case Instruction::And:
+ // We support 64-bit ANDs with immediates with 32-bits of leading zeroes
+ // by using a 32-bit operation with implicit zero extension. Detect such
+ // immediates here as the normal path expects bit 31 to be sign extended.
+ if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Fallthrough
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1045,10 +1226,8 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- case Instruction::ICmp:
ImmIdx = 1;
break;
// Always return TCC_Free for the shift value of a shift instruction.
@@ -1073,18 +1252,18 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
}
if (Idx == ImmIdx) {
- unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
+ int NumConstants = (BitSize + 63) / 64;
+ int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
- ? static_cast<unsigned>(TTI::TCC_Free)
+ ? static_cast<int>(TTI::TCC_Free)
: Cost;
}
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1118,23 +1297,181 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
-bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
- int DataWidth = DataTy->getPrimitiveSizeInBits();
+// Return an average cost of Gather / Scatter instruction, maybe improved later
+int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
+ unsigned Alignment, unsigned AddressSpace) {
+
+ assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
+ unsigned VF = SrcVTy->getVectorNumElements();
+
+ // Try to reduce index size from 64 bit (default for GEP)
+ // to 32. It is essential for VF 16. If the index can't be reduced to 32, the
+ // operation will use 16 x 64 indices which do not fit in a zmm and needs
+ // to split. Also check that the base pointer is the same for all lanes,
+ // and that there's at most one variable index.
+ auto getIndexSizeInBits = [](Value *Ptr, const DataLayout& DL) {
+ unsigned IndexSize = DL.getPointerSizeInBits();
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (IndexSize < 64 || !GEP)
+ return IndexSize;
+
+ unsigned NumOfVarIndices = 0;
+ Value *Ptrs = GEP->getPointerOperand();
+ if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs))
+ return IndexSize;
+ for (unsigned i = 1; i < GEP->getNumOperands(); ++i) {
+ if (isa<Constant>(GEP->getOperand(i)))
+ continue;
+ Type *IndxTy = GEP->getOperand(i)->getType();
+ if (IndxTy->isVectorTy())
+ IndxTy = IndxTy->getVectorElementType();
+ if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
+ !isa<SExtInst>(GEP->getOperand(i))) ||
+ ++NumOfVarIndices > 1)
+ return IndexSize; // 64
+ }
+ return (unsigned)32;
+ };
+
+
+ // Trying to reduce IndexSize to 32 bits for vector 16.
+ // By default the IndexSize is equal to pointer size.
+ unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) :
+ DL.getPointerSizeInBits();
+
+ Type *IndexVTy = VectorType::get(IntegerType::get(getGlobalContext(),
+ IndexSize), VF);
+ std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
+ std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+ int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
+ if (SplitFactor > 1) {
+ // Handle splitting of vector of pointers
+ Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
+ return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
+ AddressSpace);
+ }
+
+ // The gather / scatter cost is given by Intel architects. It is a rough
+ // number since we are looking at one instruction in a time.
+ const int GSOverhead = 2;
+ return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+ Alignment, AddressSpace);
+}
+
+/// Return the cost of full scalarization of gather / scatter operation.
+///
+/// Opcode - Load or Store instruction.
+/// SrcVTy - The type of the data vector that should be gathered or scattered.
+/// VariableMask - The mask is non-constant at compile time.
+/// Alignment - Alignment for one element.
+/// AddressSpace - pointer[s] address space.
+///
+int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
+ bool VariableMask, unsigned Alignment,
+ unsigned AddressSpace) {
+ unsigned VF = SrcVTy->getVectorNumElements();
+
+ int MaskUnpackCost = 0;
+ if (VariableMask) {
+ VectorType *MaskTy =
+ VectorType::get(Type::getInt1Ty(getGlobalContext()), VF);
+ MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true);
+ int ScalarCompareCost =
+ getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(getGlobalContext()),
+ nullptr);
+ int BranchCost = getCFInstrCost(Instruction::Br);
+ MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
+ }
- // Todo: AVX512 allows gather/scatter, works with strided and random as well
- if ((DataWidth < 32) || (Consecutive == 0))
+ // The cost of the scalar loads/stores.
+ int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+ Alignment, AddressSpace);
+
+ int InsertExtractCost = 0;
+ if (Opcode == Instruction::Load)
+ for (unsigned i = 0; i < VF; ++i)
+ // Add the cost of inserting each scalar load into the vector
+ InsertExtractCost +=
+ getVectorInstrCost(Instruction::InsertElement, SrcVTy, i);
+ else
+ for (unsigned i = 0; i < VF; ++i)
+ // Add the cost of extracting each element out of the data vector
+ InsertExtractCost +=
+ getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i);
+
+ return MemoryOpCost + MaskUnpackCost + InsertExtractCost;
+}
+
+/// Calculate the cost of Gather / Scatter operation
+int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
+ Value *Ptr, bool VariableMask,
+ unsigned Alignment) {
+ assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
+ unsigned VF = SrcVTy->getVectorNumElements();
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ if (!PtrTy && Ptr->getType()->isVectorTy())
+ PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType());
+ assert(PtrTy && "Unexpected type for Ptr argument");
+ unsigned AddressSpace = PtrTy->getAddressSpace();
+
+ bool Scalarize = false;
+ if ((Opcode == Instruction::Load && !isLegalMaskedGather(SrcVTy)) ||
+ (Opcode == Instruction::Store && !isLegalMaskedScatter(SrcVTy)))
+ Scalarize = true;
+ // Gather / Scatter for vector 2 is not profitable on KNL / SKX
+ // Vector-4 of gather/scatter instruction does not exist on KNL.
+ // We can extend it to 8 elements, but zeroing upper bits of
+ // the mask vector will add more instructions. Right now we give the scalar
+ // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction is
+ // better in the VariableMask case.
+ if (VF == 2 || (VF == 4 && !ST->hasVLX()))
+ Scalarize = true;
+
+ if (Scalarize)
+ return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, AddressSpace);
+
+ return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
+}
+
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
+ Type *ScalarTy = DataTy->getScalarType();
+ int DataWidth = isa<PointerType>(ScalarTy) ?
+ DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+
+ return (DataWidth >= 32 && ST->hasAVX2());
+}
+
+bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
+ return isLegalMaskedLoad(DataType);
+}
+
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
+ // This function is called now in two cases: from the Loop Vectorizer
+ // and from the Scalarizer.
+ // When the Loop Vectorizer asks about legality of the feature,
+ // the vectorization factor is not calculated yet. The Loop Vectorizer
+ // sends a scalar type and the decision is based on the width of the
+ // scalar element.
+ // Later on, the cost model will estimate usage this intrinsic based on
+ // the vector type.
+ // The Scalarizer asks again about legality. It sends a vector type.
+ // In this case we can reject non-power-of-2 vectors.
+ if (isa<VectorType>(DataTy) && !isPowerOf2_32(DataTy->getVectorNumElements()))
return false;
- if (ST->hasAVX512() || ST->hasAVX2())
- return true;
- return false;
+ Type *ScalarTy = DataTy->getScalarType();
+ int DataWidth = isa<PointerType>(ScalarTy) ?
+ DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+
+ // AVX-512 allows gather and scatter
+ return DataWidth >= 32 && ST->hasAVX512();
}
-bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
- return isLegalMaskedLoad(DataType, Consecutive);
+bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
+ return isLegalMaskedGather(DataType);
}
-bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const {
+bool X86TTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
// Work this as a subsetting of subtarget features.
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index da3f36c..adb745e 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -33,13 +33,13 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *ST;
const X86TargetLowering *TLI;
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+ int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
public:
- explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
+ explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -62,38 +62,44 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
-
- unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex);
-
- unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
-
- unsigned getIntImmCost(int64_t);
-
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
-
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
- bool isLegalMaskedLoad(Type *DataType, int Consecutive);
- bool isLegalMaskedStore(Type *DataType, int Consecutive);
- bool hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const;
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ bool VariableMask, unsigned Alignment);
+ int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+
+ int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
+
+ int getIntImmCost(int64_t);
+
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ bool isLegalMaskedLoad(Type *DataType);
+ bool isLegalMaskedStore(Type *DataType);
+ bool isLegalMaskedGather(Type *DataType);
+ bool isLegalMaskedScatter(Type *DataType);
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+private:
+ int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
+ unsigned Alignment, unsigned AddressSpace);
+ int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ unsigned Alignment, unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index 9190d0b..dce94a9 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -15,7 +15,8 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
@@ -38,12 +39,16 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "winehstate"
+namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); }
+
namespace {
class WinEHStatePass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHStatePass() : FunctionPass(ID) {}
+ WinEHStatePass() : FunctionPass(ID) {
+ initializeWinEHStatePassPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &Fn) override;
@@ -62,18 +67,13 @@ private:
void linkExceptionRegistration(IRBuilder<> &Builder, Function *Handler);
void unlinkExceptionRegistration(IRBuilder<> &Builder);
- void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
- void addSEHStateStores(Function &F, MachineModuleInfo &MMI);
- void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
- Function &F, int BaseState);
+ void addStateStores(Function &F, WinEHFuncInfo &FuncInfo);
void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
Value *emitEHLSDA(IRBuilder<> &Builder, Function *F);
Function *generateLSDAInEAXThunk(Function *ParentFunc);
- int escapeRegNode(Function &F);
-
// Module-level type getters.
Type *getEHLinkRegistrationType();
Type *getSEHRegistrationType();
@@ -111,6 +111,9 @@ FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
char WinEHStatePass::ID = 0;
+INITIALIZE_PASS(WinEHStatePass, "x86-winehstate",
+ "Insert stores for EH state numbers", false, false)
+
bool WinEHStatePass::doInitialization(Module &M) {
TheModule = &M;
FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape);
@@ -138,14 +141,7 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool WinEHStatePass::runOnFunction(Function &F) {
- // If this is an outlined handler, don't do anything. We'll do state insertion
- // for it in the parent.
- StringRef WinEHParentName =
- F.getFnAttribute("wineh-parent").getValueAsString();
- if (WinEHParentName != F.getName() && !WinEHParentName.empty())
- return false;
-
- // Check the personality. Do nothing if this is not an MSVC personality.
+ // Check the personality. Do nothing if this personality doesn't use funclets.
if (!F.hasPersonalityFn())
return false;
PersonalityFn =
@@ -153,7 +149,19 @@ bool WinEHStatePass::runOnFunction(Function &F) {
if (!PersonalityFn)
return false;
Personality = classifyEHPersonality(PersonalityFn);
- if (!isMSVCEHPersonality(Personality))
+ if (!isFuncletEHPersonality(Personality))
+ return false;
+
+ // Skip this function if there are no EH pads and we aren't using IR-level
+ // outlining.
+ bool HasPads = false;
+ for (BasicBlock &BB : F) {
+ if (BB.isEHPad()) {
+ HasPads = true;
+ break;
+ }
+ }
+ if (!HasPads)
return false;
// Disable frame pointer elimination in this function.
@@ -163,14 +171,13 @@ bool WinEHStatePass::runOnFunction(Function &F) {
emitExceptionRegistrationRecord(&F);
- auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
- assert(MMIPtr && "MachineModuleInfo should always be available");
- MachineModuleInfo &MMI = *MMIPtr;
- switch (Personality) {
- default: llvm_unreachable("unexpected personality function");
- case EHPersonality::MSVC_CXX: addCXXStateStores(F, MMI); break;
- case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, MMI); break;
- }
+ // The state numbers calculated here in IR must agree with what we calculate
+ // later on for the MachineFunction. In particular, if an IR pass deletes an
+ // unreachable EH pad after this point before machine CFG construction, we
+ // will be in trouble. If this assumption is ever broken, we should turn the
+ // numbers into an immutable analysis pass.
+ WinEHFuncInfo FuncInfo;
+ addStateStores(F, FuncInfo);
// Reset per-function state.
PersonalityFn = nullptr;
@@ -261,7 +268,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
StateFieldIndex = 2;
- insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1);
+ insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(), -1);
// Handler = __ehhandler$F
Function *Trampoline = generateLSDAInEAXThunk(F);
Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
@@ -278,7 +285,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -2 / -1
StateFieldIndex = 4;
- insertStateNumberStore(RegNode, Builder.GetInsertPoint(),
+ insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(),
UseStackGuard ? -2 : -1);
// ScopeTable = llvm.x86.seh.lsda(F)
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
@@ -347,7 +354,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
Value *CastPersonality =
Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
auto AI = Trampoline->arg_begin();
- Value *Args[5] = {LSDA, AI++, AI++, AI++, AI++};
+ Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++};
CallInst *Call = Builder.CreateCall(CastPersonality, Args);
// Can't use musttail due to prototype mismatch, but we can use tail.
Call->setTailCall(true);
@@ -391,160 +398,53 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
Builder.CreateStore(Next, FSZero);
}
-void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
- calculateWinCXXEHStateNumbers(&F, FuncInfo);
-
- // The base state for the parent is -1.
- addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1);
-
- // Set up RegNodeEscapeIndex
- int RegNodeEscapeIndex = escapeRegNode(F);
- FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
-
- // Only insert stores in catch handlers.
- Constant *FI8 =
- ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext()));
- for (auto P : FuncInfo.HandlerBaseState) {
- Function *Handler = const_cast<Function *>(P.first);
- int BaseState = P.second;
- IRBuilder<> Builder(&Handler->getEntryBlock(),
- Handler->getEntryBlock().begin());
- // FIXME: Find and reuse such a call if present.
- Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)});
- Value *RecoveredRegNode = Builder.CreateCall(
- FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)});
- RecoveredRegNode =
- Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0));
- addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState);
- }
-}
-
-/// Escape RegNode so that we can access it from child handlers. Find the call
-/// to localescape, if any, in the entry block and append RegNode to the list
-/// of arguments.
-int WinEHStatePass::escapeRegNode(Function &F) {
- // Find the call to localescape and extract its arguments.
- IntrinsicInst *EscapeCall = nullptr;
- for (Instruction &I : F.getEntryBlock()) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::localescape) {
- EscapeCall = II;
- break;
- }
- }
- SmallVector<Value *, 8> Args;
- if (EscapeCall) {
- auto Ops = EscapeCall->arg_operands();
- Args.append(Ops.begin(), Ops.end());
- }
- Args.push_back(RegNode);
-
- // Replace the call (if it exists) with new one. Otherwise, insert at the end
- // of the entry block.
- Instruction *InsertPt = EscapeCall;
- if (!EscapeCall)
- InsertPt = F.getEntryBlock().getTerminator();
- IRBuilder<> Builder(&F.getEntryBlock(), InsertPt);
- Builder.CreateCall(FrameEscape, Args);
- if (EscapeCall)
- EscapeCall->eraseFromParent();
- return Args.size() - 1;
-}
+void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
+ // Mark the registration node. The backend needs to know which alloca it is so
+ // that it can recover the original frame pointer.
+ IRBuilder<> Builder(RegNode->getParent(), std::next(RegNode->getIterator()));
+ Value *RegNodeI8 = Builder.CreateBitCast(RegNode, Builder.getInt8PtrTy());
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_ehregnode),
+ {RegNodeI8});
+
+ // Calculate state numbers.
+ if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&F, FuncInfo);
+ else
+ calculateWinCXXEHStateNumbers(&F, FuncInfo);
-void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
- WinEHFuncInfo &FuncInfo,
- Function &F, int BaseState) {
// Iterate all the instructions and emit state number stores.
+ DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(F);
for (BasicBlock &BB : F) {
+ // Figure out what state we should assign calls in this block.
+ int BaseState = -1;
+ auto &BBColors = BlockColors[&BB];
+
+ assert(BBColors.size() == 1 &&
+ "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+ if (auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
+ }
+
for (Instruction &I : BB) {
if (auto *CI = dyn_cast<CallInst>(&I)) {
// Possibly throwing call instructions have no actions to take after
// an unwind. Ensure they are in the -1 state.
if (CI->doesNotThrow())
continue;
- insertStateNumberStore(ParentRegNode, CI, BaseState);
+ insertStateNumberStore(RegNode, CI, BaseState);
} else if (auto *II = dyn_cast<InvokeInst>(&I)) {
// Look up the state number of the landingpad this unwinds to.
- LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
- // FIXME: Why does this assertion fail?
- //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!");
- int State = FuncInfo.LandingPadStateMap[LPI];
- insertStateNumberStore(ParentRegNode, II, State);
- }
- }
- }
-}
-
-/// Assign every distinct landingpad a unique state number for SEH. Unlike C++
-/// EH, we can use this very simple algorithm while C++ EH cannot because catch
-/// handlers aren't outlined and the runtime doesn't have to figure out which
-/// catch handler frame to unwind to.
-/// FIXME: __finally blocks are outlined, so this approach may break down there.
-void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
-
- // Remember and return the index that we used. We save it in WinEHFuncInfo so
- // that we can lower llvm.x86.seh.recoverfp later in filter functions without
- // too much trouble.
- int RegNodeEscapeIndex = escapeRegNode(F);
- FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
-
- // Iterate all the instructions and emit state number stores.
- int CurState = 0;
- SmallPtrSet<BasicBlock *, 4> ExceptBlocks;
- for (BasicBlock &BB : F) {
- for (auto I = BB.begin(), E = BB.end(); I != E; ++I) {
- if (auto *CI = dyn_cast<CallInst>(I)) {
- auto *Intrin = dyn_cast<IntrinsicInst>(CI);
- if (Intrin) {
- // Calls that "don't throw" are considered to be able to throw asynch
- // exceptions, but intrinsics cannot.
- continue;
- }
- insertStateNumberStore(RegNode, CI, -1);
- } else if (auto *II = dyn_cast<InvokeInst>(I)) {
- // Look up the state number of the landingpad this unwinds to.
- LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
- auto InsertionPair =
- FuncInfo.LandingPadStateMap.insert(std::make_pair(LPI, CurState));
- auto Iter = InsertionPair.first;
- int &State = Iter->second;
- bool Inserted = InsertionPair.second;
- if (Inserted) {
- // Each action consumes a state number.
- auto *EHActions = cast<IntrinsicInst>(LPI->getNextNode());
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- parseEHActions(EHActions, ActionList);
- assert(!ActionList.empty());
- CurState += ActionList.size();
- State += ActionList.size() - 1;
-
- // Remember all the __except block targets.
- for (auto &Handler : ActionList) {
- if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
- auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
-#ifndef NDEBUG
- for (BasicBlock *Pred : predecessors(BA->getBasicBlock()))
- assert(Pred->isLandingPad() &&
- "WinEHPrepare failed to split block");
-#endif
- ExceptBlocks.insert(BA->getBasicBlock());
- }
- }
- }
+ assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
+ int State = FuncInfo.InvokeStateMap[II];
insertStateNumberStore(RegNode, II, State);
}
}
}
-
- // Insert llvm.x86.seh.restoreframe() into each __except block.
- Function *RestoreFrame =
- Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe);
- for (BasicBlock *ExceptBB : ExceptBlocks) {
- IRBuilder<> Builder(ExceptBB->begin());
- Builder.CreateCall(RestoreFrame, {});
- }
}
void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 2e44ac9..aaf267a 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -224,7 +224,7 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val > 11)
return MCDisassembler::Fail;
- static unsigned Values[] = {
+ static const unsigned Values[] = {
32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
};
Inst.addOperand(MCOperand::createImm(Values[Val]));
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 6fd2dec..dc513f7 100644
--- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -19,8 +19,6 @@
namespace llvm {
-class TargetMachine;
-
class XCoreInstPrinter : public MCInstPrinter {
public:
XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 702056d..b00cdd5 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -115,14 +115,14 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitSpecialLLVMGlobal(GV))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
OutStreamer->SwitchSection(
getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
- unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
-
+ unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType());
+
// Mark the start of the global
getTargetStreamer().emitCCTopData(GVSym->getName());
@@ -154,15 +154,15 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
}
- unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Size = DL.getTypeAllocSize(C->getType());
if (MAI->hasDotTypeDotSizeDirective()) {
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
MCConstantExpr::create(Size, OutContext));
}
OutStreamer->EmitLabel(GVSym);
-
- EmitGlobalConstant(C);
+
+ EmitGlobalConstant(DL, C);
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
@@ -208,7 +208,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
@@ -224,8 +224,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
getSymbol(MO.getGlobal())->print(O, MAI);
break;
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
- << '_' << MO.getIndex();
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
break;
case MachineOperand::MO_BlockAddress:
GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 76c3d81..ae493de 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -160,27 +160,26 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
/// As offsets are negative, the largest offsets will be first.
static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
MachineFrameInfo *MFI, XCoreFunctionInfo *XFI,
+ const Constant *PersonalityFn,
const TargetLowering *TL) {
assert(XFI->hasEHSpillSlot() && "There are no EH register spill slots");
- const int* EHSlot = XFI->getEHSpillSlot();
- SpillList.push_back(StackSlotInfo(EHSlot[0],
- MFI->getObjectOffset(EHSlot[0]),
- TL->getExceptionPointerRegister()));
- SpillList.push_back(StackSlotInfo(EHSlot[0],
- MFI->getObjectOffset(EHSlot[1]),
- TL->getExceptionSelectorRegister()));
+ const int *EHSlot = XFI->getEHSpillSlot();
+ SpillList.push_back(
+ StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[0]),
+ TL->getExceptionPointerRegister(PersonalityFn)));
+ SpillList.push_back(
+ StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[1]),
+ TL->getExceptionSelectorRegister(PersonalityFn)));
std::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
}
-
static MachineMemOperand *
getFrameIndexMMO(MachineBasicBlock &MBB, int FrameIndex, unsigned flags) {
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
- flags, MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex), flags,
+ MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex));
return MMO;
}
@@ -323,8 +322,11 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
if (XFI->hasEHSpillSlot()) {
// The unwinder requires stack slot & CFI offsets for the exception info.
// We do not save/spill these registers.
- SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI,
+ const Function *Fn = MF.getFunction();
+ const Constant *PersonalityFn =
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr;
+ SmallVector<StackSlotInfo, 2> SpillList;
+ GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
MF.getSubtarget().getTargetLowering());
assert(SpillList.size()==2 && "Unexpected SpillList size");
EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
@@ -355,8 +357,12 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
if (RetOpcode == XCore::EH_RETURN) {
// 'Restore' the exception info the unwinder has placed into the stack
// slots.
- SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering());
+ const Function *Fn = MF.getFunction();
+ const Constant *PersonalityFn =
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr;
+ SmallVector<StackSlotInfo, 2> SpillList;
+ GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
+ MF.getSubtarget().getTargetLowering());
RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
// Return to the landing pad.
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 9d4a966..9f61c84 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -151,8 +151,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
MVT::Other, CPIdx,
CurDAG->getEntryNode());
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);
+ MemOp[0] =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
return node;
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index d62e742..105b2cf 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -79,9 +79,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget.getRegisterInfo());
- // Division is expensive
- setIntDivIsCheap(false);
-
setStackPointerRegisterToSaveRestore(XCore::SP);
setSchedulingPreference(Sched::Source);
@@ -154,8 +151,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
// Exception handling
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
- setExceptionPointerRegister(XCore::R0);
- setExceptionSelectorRegister(XCore::R1);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
// Atomic operations
@@ -839,7 +834,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
return DAG.getLoad(
getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN,
- MachinePointerInfo::getFixedStack(FI), false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -1367,8 +1362,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
ArgIn = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false,
+ false, false, 0);
}
const ArgDataPair ADP = { ArgIn, Ins[i].Flags };
ArgData.push_back(ADP);
@@ -1517,9 +1512,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Create a SelectionDAG node corresponding to a store
// to this memory location.
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN,
- MachinePointerInfo::getFixedStack(FI), false, false,
- 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, dl, OutVals[i], FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, 0));
}
// Transform all store nodes into one single node because
@@ -1567,8 +1563,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -1828,9 +1823,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Chain = ST->getChain();
unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
- if (StoreBits % 8) {
- break;
- }
+ assert((StoreBits % 8) == 0 &&
+ "Store size in bits must be a multiple of 8");
unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
unsigned Alignment = ST->getAlignment();
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index ddd675c..b6f09ff 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -125,6 +125,20 @@ namespace llvm {
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return XCore::R0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return XCore::R1;
+ }
+
private:
const TargetMachine &TM;
const XCoreSubtarget &Subtarget;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index ee30344..e4129ae 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -368,11 +368,10 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
BuildMI(MBB, I, DL, get(XCore::STWFI))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FrameIndex)
@@ -391,11 +390,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
.addFrameIndex(FrameIndex)
.addImm(0)
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 996c6f5..f0b7201 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -228,12 +228,9 @@ bool XCoreLowerThreadLocal::runOnModule(Module &M) {
// Find thread local globals.
bool MadeChange = false;
SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ++GVI) {
- GlobalVariable *GV = GVI;
- if (GV->isThreadLocal())
- ThreadLocalGlobals.push_back(GV);
- }
+ for (GlobalVariable &GV : M.globals())
+ if (GV.isThreadLocal())
+ ThreadLocalGlobals.push_back(&GV);
for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index 9ef9752..6c77096 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===//
+//===-- XCoreMachineFunctionInfo.cpp - XCore machine function info --------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 078ffde..cdcc52f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//===- XCoreMachineFunctionInfo.h - XCore machine function info -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index f420081..4a79dac 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -85,7 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
}
TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(XCoreTTIImpl(this, F));
});
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index b5a9905..aa16ecc 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -123,18 +123,21 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
if (Kind.isMergeableConst16()) return MergeableConst16Section;
}
Type *ObjType = GV->getType()->getPointerElementType();
+ auto &DL = GV->getParent()->getDataLayout();
if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
- TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) {
+ DL.getTypeAllocSize(ObjType) < CodeModelLargeSize) {
if (Kind.isReadOnly()) return UseCPRel? ReadOnlySection
: DataRelROSection;
if (Kind.isBSS() || Kind.isCommon())return BSSSection;
- if (Kind.isDataRel()) return DataSection;
+ if (Kind.isData())
+ return DataSection;
if (Kind.isReadOnlyWithRel()) return DataRelROSection;
} else {
if (Kind.isReadOnly()) return UseCPRel? ReadOnlySectionLarge
: DataRelROSectionLarge;
if (Kind.isBSS() || Kind.isCommon())return BSSSectionLarge;
- if (Kind.isDataRel()) return DataSectionLarge;
+ if (Kind.isData())
+ return DataSectionLarge;
if (Kind.isReadOnlyWithRel()) return DataRelROSectionLarge;
}
@@ -142,9 +145,8 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
report_fatal_error("Target does not support TLS or Common sections");
}
-MCSection *
-XCoreTargetObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *XCoreTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isMergeableConst4()) return MergeableConst4Section;
if (Kind.isMergeableConst8()) return MergeableConst8Section;
if (Kind.isMergeableConst16()) return MergeableConst16Section;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
index 2a5ac23..6701c66 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -33,7 +33,7 @@ static const unsigned CodeModelLargeSize = 256;
Mangler &Mang,
const TargetMachine &TM) const override;
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
index e23aef3..b2cb889 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -37,7 +37,7 @@ class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> {
const XCoreTargetLowering *getTLI() const { return TLI; }
public:
- explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F)
+ explicit XCoreTTIImpl(const XCoreTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4762011..0e05129 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -34,8 +34,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
@@ -63,7 +66,8 @@ namespace {
///
struct ArgPromotion : public CallGraphSCCPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -81,7 +85,8 @@ namespace {
bool isDenselyPacked(Type *type, const DataLayout &DL);
bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
- bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+ AAResults &AAR) const;
CallGraphNode *DoPromotion(Function *F,
SmallPtrSetImpl<Argument*> &ArgsToPromote,
SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
@@ -90,15 +95,15 @@ namespace {
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
};
}
char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -217,9 +222,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- if (I->getType()->isPointerTy())
- PointerArgs.push_back(I);
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
if (PointerArgs.empty()) return nullptr;
// Second check: make sure that all callers are direct callers. We can't
@@ -237,6 +242,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
const DataLayout &DL = F->getParent()->getDataLayout();
+ // We need to manually construct BasicAA directly in order to disable its use
+ // of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -281,8 +294,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// If all the elements are single-value types, we can promote it.
bool AllSimple = true;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- if (!STy->getElementType(i)->isSingleValueType()) {
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
AllSimple = false;
break;
}
@@ -303,8 +316,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (isSelfRecursive) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
bool RecursiveType = false;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- if (STy->getElementType(i) == PtrArg->getType()) {
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
RecursiveType = true;
break;
}
@@ -315,7 +328,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
ArgsToPromote.insert(PtrArg);
}
@@ -416,7 +429,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
- bool isByValOrInAlloca) const {
+ bool isByValOrInAlloca,
+ AAResults &AAR) const {
typedef std::set<IndicesVector> GEPIndicesSet;
// Quick exit for unused arguments
@@ -453,12 +467,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// First, iterate the entry block and mark loads of (geps of) arguments as
// safe.
- BasicBlock *EntryBlock = Arg->getParent()->begin();
+ BasicBlock &EntryBlock = Arg->getParent()->front();
// Declare this here so we can reuse it
IndicesVector Indices;
- for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
- I != E; ++I)
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ for (Instruction &I : EntryBlock)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *V = LI->getPointerOperand();
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
V = GEP->getPointerOperand();
@@ -501,12 +514,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
- getAnalysis<AliasAnalysis>().deleteValue(GEP);
GEP->eraseFromParent();
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca);
+ return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
}
// Ensure that all of the indices are constants.
@@ -563,8 +575,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// blocks we know to be transparent to the load.
SmallPtrSet<BasicBlock*, 16> TranspBlocks;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
// Check to see if the load is invalidated from the start of the block to
// the load itself.
@@ -572,8 +582,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
BasicBlock *BB = Load->getParent();
MemoryLocation Loc = MemoryLocation::get(Load);
- if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
- AliasAnalysis::Mod))
+ if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
@@ -581,7 +590,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// loading block.
for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
- if (AA.canBasicBlockModify(*TranspBB, Loc))
+ if (AAR.canBasicBlockModify(*TranspBB, Loc))
return false;
}
}
@@ -637,13 +646,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
unsigned ArgIndex = 1;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgIndex) {
- if (ByValArgsToTransform.count(I)) {
+ if (ByValArgsToTransform.count(&*I)) {
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
Params.insert(Params.end(), STy->element_begin(), STy->element_end());
++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(I)) {
+ } else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
@@ -661,7 +670,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// In this table, we will track which indices are loaded from the argument
// (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
Type *SrcTy;
@@ -687,7 +696,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
else
// Take any load, we will use it only to update Alias Analysis
OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
}
// Add a parameter to the function for each element passed in.
@@ -722,15 +731,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
NF->copyAttributesFrom(F);
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end()) {
- DISubprogram *SP = DI->second;
- SP->replaceFunction(NF);
- // Ensure the map is updated so it can be reused on subsequent argument
- // promotions of the same function.
- FunctionDIs.erase(DI);
- FunctionDIs[NF] = SP;
- }
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
@@ -740,13 +742,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
AttributesVec.clear();
- F->getParent()->getFunctionList().insert(F, NF);
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Get the alias analysis information that we need to update to reflect our
- // changes.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
// Get the callgraph information that we need to update to reflect our
// changes.
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -775,7 +773,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ArgIndex = 1;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++AI, ++ArgIndex)
- if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
if (CallPAL.hasAttributes(ArgIndex)) {
@@ -783,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AttributesVec.
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
}
- } else if (ByValArgsToTransform.count(I)) {
+ } else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
@@ -798,14 +796,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
} else if (!I->use_empty()) {
// Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
// Store the Value* version of the indices in here, but declare it now
// for reuse.
std::vector<Value*> Ops;
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
if (!SI->second.empty()) {
Ops.reserve(SI->second.size());
Type *ElTy = V->getType();
@@ -873,10 +871,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Args.clear();
AttributesVec.clear();
- // Update the alias analysis implementation to know that we are replacing
- // the old call with a new one.
- AA.replaceWithNewValue(Call, New);
-
// Update the callgraph to know that the callsite has been transformed.
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
@@ -901,20 +895,19 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
//
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
I2 = NF->arg_begin(); I != E; ++I) {
- if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
- AA.replaceWithNewValue(I, I2);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
++I2;
continue;
}
- if (ByValArgsToTransform.count(I)) {
+ if (ByValArgsToTransform.count(&*I)) {
// In the callee, we create an alloca, and store each of the new incoming
// arguments into the alloca.
- Instruction *InsertPt = NF->begin()->begin();
+ Instruction *InsertPt = &NF->begin()->front();
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -929,13 +922,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
InsertPt);
I2->setName(I->getName()+"."+Twine(i));
- new StoreInst(I2++, Idx, InsertPt);
+ new StoreInst(&*I2++, Idx, InsertPt);
}
// Anything that used the arg should now use the alloca.
I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(I);
- AA.replaceWithNewValue(I, TheAlloca);
+ TheAlloca->takeName(&*I);
// If the alloca is used in a call, we must clear the tail flag since
// the callee now uses an alloca from the caller.
@@ -948,23 +940,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
continue;
}
- if (I->use_empty()) {
- AA.deleteValue(I);
+ if (I->use_empty())
continue;
- }
// Otherwise, if we promoted this argument, then all users are load
// instructions (or GEPs with only load users), and all loads should be
// using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
while (!I->use_empty()) {
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
- LI->replaceAllUsesWith(I2);
- AA.replaceWithNewValue(LI, I2);
+ LI->replaceAllUsesWith(&*I2);
LI->eraseFromParent();
DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
<< "' in function '" << F->getName() << "'\n");
@@ -1000,11 +989,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// the argument specified by ArgNo.
while (!GEP->use_empty()) {
LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(TheArg);
- AA.replaceWithNewValue(L, TheArg);
+ L->replaceAllUsesWith(&*TheArg);
L->eraseFromParent();
}
- AA.deleteValue(GEP);
GEP->eraseFromParent();
}
}
@@ -1013,10 +1000,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
std::advance(I2, ArgIndices.size());
}
- // Tell the alias analysis that the old function is about to disappear.
- AA.replaceWithNewValue(F, NF);
-
-
NF_CGN->stealCalledFunctionsFrom(CG[F]);
// Now that the old function is dead, delete it. If there is a dangling
@@ -1032,6 +1015,5 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
bool ArgPromotion::doInitialization(CallGraph &CG) {
- FunctionDIs = makeSubprogramMap(CG.getModule());
return CallGraphSCCPass::doInitialization(CG);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8ce7646..0aa49d6 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -119,7 +119,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// First: Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// If this GV is dead, remove it.
GV->removeDeadConstantUsers();
@@ -160,7 +160,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// invalidating the Constant* pointers in CMap.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// Only process constants with initializers in the default address space.
if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
new file mode 100644
index 0000000..5bbb751
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -0,0 +1,166 @@
+//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exports all llvm.bitset's found in the module in the form of a
+// __cfi_check function, which can be used to verify cross-DSO call targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cross-dso-cfi"
+
+STATISTIC(TypeIds, "Number of unique type identifiers");
+
+namespace {
+
+struct CrossDSOCFI : public ModulePass {
+ static char ID;
+ CrossDSOCFI() : ModulePass(ID) {
+ initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+ MDNode *VeryLikelyWeights;
+
+ ConstantInt *extractBitSetTypeId(MDNode *MD);
+ void buildCFICheck();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
+ false)
+INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
+char CrossDSOCFI::ID = 0;
+
+ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
+
+bool CrossDSOCFI::doInitialization(Module &Mod) {
+ M = &Mod;
+ VeryLikelyWeights =
+ MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
+
+ return false;
+}
+
+/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
+ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+ // This check excludes vtables for classes inside anonymous namespaces.
+ auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ if (!TM)
+ return nullptr;
+ auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
+ if (!C) return nullptr;
+ // We are looking for i64 constants.
+ if (C->getBitWidth() != 64) return nullptr;
+
+ // Sanity check.
+ auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
+ // Can be null if a function was removed by an optimization.
+ if (FM) {
+ auto F = dyn_cast<Function>(FM->getValue());
+ // But can never be a function declaration.
+ assert(!F || !F->isDeclaration());
+ (void)F; // Suppress unused variable warning in the no-asserts build.
+ }
+ return C;
+}
+
+/// buildCFICheck - emits __cfi_check for the current module.
+void CrossDSOCFI::buildCFICheck() {
+ // FIXME: verify that __cfi_check ends up near the end of the code section,
+ // but before the jump slots created in LowerBitSets.
+ llvm::DenseSet<uint64_t> BitSetIds;
+ NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ if (BitSetNM)
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
+ if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
+ BitSetIds.insert(TypeId->getZExtValue());
+
+ LLVMContext &Ctx = M->getContext();
+ Constant *C = M->getOrInsertFunction(
+ "__cfi_check",
+ FunctionType::get(
+ Type::getVoidTy(Ctx),
+ {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
+ false));
+ Function *F = dyn_cast<Function>(C);
+ F->setAlignment(4096);
+ auto args = F->arg_begin();
+ Argument &CallSiteTypeId = *(args++);
+ CallSiteTypeId.setName("CallSiteTypeId");
+ Argument &Addr = *(args++);
+ Addr.setName("Addr");
+ assert(args == F->arg_end());
+
+ BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+
+ BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
+ IRBuilder<> IRBTrap(TrapBB);
+ Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
+ llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ IRBTrap.CreateUnreachable();
+
+ BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
+ IRBuilder<> IRBExit(ExitBB);
+ IRBExit.CreateRetVoid();
+
+ IRBuilder<> IRB(BB);
+ SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
+ for (uint64_t TypeId : BitSetIds) {
+ ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
+ BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
+ IRBuilder<> IRBTest(TestBB);
+ Function *BitsetTestFn =
+ Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+
+ Value *Test = IRBTest.CreateCall(
+ BitsetTestFn, {&Addr, MetadataAsValue::get(
+ Ctx, ConstantAsMetadata::get(CaseTypeId))});
+ BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
+ BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
+
+ SI->addCase(CaseTypeId, TestBB);
+ ++TypeIds;
+ }
+}
+
+bool CrossDSOCFI::runOnModule(Module &M) {
+ if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
+ return false;
+ buildCFICheck();
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d044764..4de3d95 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -35,6 +35,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <map>
#include <set>
#include <tuple>
@@ -121,14 +122,6 @@ namespace {
typedef SmallVector<RetOrArg, 5> UseVector;
- // Map each LLVM function to corresponding metadata with debug info. If
- // the function is replaced with another one, we should patch the pointer
- // to LLVM function in metadata.
- // As the code generation for module is finished (and DIBuilder is
- // finalized) we assume that subprogram descriptors won't be changed, and
- // they are stored in map for short duration anyway.
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
-
protected:
// DAH uses this to specify a different ID.
explicit DAE(char &ID) : ModulePass(ID) {}
@@ -198,6 +191,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (Fn.hasAddressTaken())
return false;
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked)) {
+ return false;
+ }
+
// Okay, we know we can transform this function if safe. Scan its body
// looking for calls marked musttail or calls to llvm.vastart.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
@@ -229,7 +229,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, Fn.getLinkage());
NF->copyAttributesFrom(&Fn);
- Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
NF->takeName(&Fn);
// Loop over all of the callers of the function, transforming the call sites
@@ -296,20 +296,12 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
I2 = NF->arg_begin(); I != E; ++I, ++I2) {
// Move the name and users over to the new version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
}
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(&Fn);
- if (DI != FunctionDIs.end()) {
- DISubprogram *SP = DI->second;
- SP->replaceFunction(NF);
- // Ensure the map is updated so it can be reused on non-varargs argument
- // eliminations of the same function.
- FunctionDIs.erase(DI);
- FunctionDIs[NF] = SP;
- }
+ NF->setSubprogram(Fn.getSubprogram());
// Fix up any BlockAddresses that refer to the function.
Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
@@ -345,16 +337,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked))
+ return false;
+
if (Fn.use_empty())
return false;
SmallVector<unsigned, 8> UnusedArgs;
- for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
- I != E; ++I) {
- Argument *Arg = I;
-
- if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr())
- UnusedArgs.push_back(Arg->getArgNo());
+ for (Argument &Arg : Fn.args()) {
+ if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+ UnusedArgs.push_back(Arg.getArgNo());
}
if (UnusedArgs.empty())
@@ -485,6 +480,10 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
if (F) {
// Used in a direct call.
+ // The function argument is live if it is used as a bundle operand.
+ if (CS.isBundleOperand(U))
+ return Live;
+
// Find the argument number. We know for sure that this use is an
// argument, since if it was the function argument this would be an
// indirect call and the we know can't be looking at a value of the
@@ -543,6 +542,14 @@ void DAE::SurveyFunction(const Function &F) {
return;
}
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (F.hasFnAttribute(Attribute::Naked)) {
+ MarkLive(F);
+ return;
+ }
+
unsigned RetCount = NumRetVals(&F);
// Assume all return values are dead
typedef SmallVector<Liveness, 5> RetVals;
@@ -648,7 +655,7 @@ void DAE::SurveyFunction(const Function &F) {
} else {
// See what the effect of this use is (recording any uses that cause
// MaybeLive in MaybeLiveArgUses).
- Result = SurveyUses(AI, MaybeLiveArgUses);
+ Result = SurveyUses(&*AI, MaybeLiveArgUses);
}
// Mark the result.
@@ -878,7 +885,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
NF->setAttributes(NewPAL);
// Insert the new function before the old function, so we won't be processing
// it again.
- F->getParent()->getFunctionList().insert(F, NF);
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
// Loop over all of the callers of the function, transforming the call sites
@@ -946,7 +953,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, "", Call);
+ Args, "", Call->getParent());
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
cast<InvokeInst>(New)->setAttributes(NewCallPAL);
} else {
@@ -976,9 +983,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
" must have been a struct or an array!");
Instruction *InsertPt = Call;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- BasicBlock::iterator IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP)) ++IP;
- InsertPt = IP;
+ BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+ InsertPt = &*NewEdge->getFirstInsertionPt();
}
// We used to return a struct or array. Instead of doing smart stuff
@@ -1026,8 +1032,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
// If this is a live argument, move the name and users over to the new
// version.
- I->replaceAllUsesWith(I2);
- I2->takeName(I);
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
++I2;
} else {
// If this argument is dead, replace any uses of it with null constants
@@ -1079,9 +1085,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
// Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end())
- DI->second->replaceFunction(NF);
+ NF->setSubprogram(F->getSubprogram());
// Now that the old function is dead, delete it.
F->eraseFromParent();
@@ -1092,9 +1096,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
bool DAE::runOnModule(Module &M) {
bool Changed = false;
- // Collect debug info descriptors for functions.
- FunctionDIs = makeSubprogramMap(M);
-
// First pass: Do a simple check to see if any functions can have their "..."
// removed. We can do this if they never call va_start. This loop cannot be
// fused with the next loop, because deleting a function invalidates
@@ -1119,7 +1120,7 @@ bool DAE::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
// Increment now, because the function will probably get removed (ie.
// replaced by a new one).
- Function *F = I++;
+ Function *F = &*I++;
Changed |= RemoveDeadStuffFromFunction(F);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 67ba72d..af313a6 100644
--- a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,4 +1,5 @@
-//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===//
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
+//----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,9 +16,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -28,18 +27,18 @@ STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
namespace {
- struct EliminateAvailableExternally : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- EliminateAvailableExternally() : ModulePass(ID) {
- initializeEliminateAvailableExternallyPass(
- *PassRegistry::getPassRegistry());
- }
+struct EliminateAvailableExternally : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ EliminateAvailableExternally() : ModulePass(ID) {
+ initializeEliminateAvailableExternallyPass(
+ *PassRegistry::getPassRegistry());
+ }
- // run - Do the EliminateAvailableExternally pass on the specified module,
- // optionally updating the specified callgraph to reflect the changes.
- //
- bool runOnModule(Module &M) override;
- };
+ // run - Do the EliminateAvailableExternally pass on the specified module,
+ // optionally updating the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) override;
+};
}
char EliminateAvailableExternally::ID = 0;
@@ -54,30 +53,31 @@ bool EliminateAvailableExternally::runOnModule(Module &M) {
bool Changed = false;
// Drop initializers of available externally global variables.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (!I->hasAvailableExternallyLinkage())
+ for (GlobalVariable &GV : M.globals()) {
+ if (!GV.hasAvailableExternallyLinkage())
continue;
- if (I->hasInitializer()) {
- Constant *Init = I->getInitializer();
- I->setInitializer(nullptr);
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
if (isSafeToDestroyConstant(Init))
Init->destroyConstant();
}
- I->removeDeadConstantUsers();
- I->setLinkage(GlobalValue::ExternalLinkage);
+ GV.removeDeadConstantUsers();
+ GV.setLinkage(GlobalValue::ExternalLinkage);
NumVariables++;
+ Changed = true;
}
// Drop the bodies of available externally functions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->hasAvailableExternallyLinkage())
+ for (Function &F : M) {
+ if (!F.hasAvailableExternallyLinkage())
continue;
- if (!I->isDeclaration())
+ if (!F.isDeclaration())
// This will set the linkage to external
- I->deleteBody();
- I->removeDeadConstantUsers();
+ F.deleteBody();
+ F.removeDeadConstantUsers();
NumFunctions++;
+ Changed = true;
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index b9462f2..1a3b925 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -83,7 +83,7 @@ namespace {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -103,7 +103,7 @@ namespace {
// Visit the Functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -124,7 +124,7 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- bool Delete = deleteStuff == (bool)Named.count(CurI);
+ bool Delete = deleteStuff == (bool)Named.count(&*CurI);
makeVisible(*CurI, Delete);
if (Delete) {
@@ -143,7 +143,7 @@ namespace {
}
CurI->replaceAllUsesWith(Declaration);
- delete CurI;
+ delete &*CurI;
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
new file mode 100644
index 0000000..816291d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -0,0 +1,121 @@
+//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "forceattrs"
+
+static cl::list<std::string>
+ ForceAttributes("force-attribute", cl::Hidden,
+ cl::desc("Add an attribute to a function. This should be a "
+ "pair of 'function-name:attribute-name', for "
+ "example -force-add-attribute=foo:noinline. This "
+ "option can be specified multiple times."));
+
+static Attribute::AttrKind parseAttrKind(StringRef Kind) {
+ return StringSwitch<Attribute::AttrKind>(Kind)
+ .Case("alwaysinline", Attribute::AlwaysInline)
+ .Case("builtin", Attribute::Builtin)
+ .Case("cold", Attribute::Cold)
+ .Case("convergent", Attribute::Convergent)
+ .Case("inlinehint", Attribute::InlineHint)
+ .Case("jumptable", Attribute::JumpTable)
+ .Case("minsize", Attribute::MinSize)
+ .Case("naked", Attribute::Naked)
+ .Case("nobuiltin", Attribute::NoBuiltin)
+ .Case("noduplicate", Attribute::NoDuplicate)
+ .Case("noimplicitfloat", Attribute::NoImplicitFloat)
+ .Case("noinline", Attribute::NoInline)
+ .Case("nonlazybind", Attribute::NonLazyBind)
+ .Case("noredzone", Attribute::NoRedZone)
+ .Case("noreturn", Attribute::NoReturn)
+ .Case("norecurse", Attribute::NoRecurse)
+ .Case("nounwind", Attribute::NoUnwind)
+ .Case("optnone", Attribute::OptimizeNone)
+ .Case("optsize", Attribute::OptimizeForSize)
+ .Case("readnone", Attribute::ReadNone)
+ .Case("readonly", Attribute::ReadOnly)
+ .Case("argmemonly", Attribute::ArgMemOnly)
+ .Case("returns_twice", Attribute::ReturnsTwice)
+ .Case("safestack", Attribute::SafeStack)
+ .Case("sanitize_address", Attribute::SanitizeAddress)
+ .Case("sanitize_memory", Attribute::SanitizeMemory)
+ .Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("ssp", Attribute::StackProtect)
+ .Case("sspreq", Attribute::StackProtectReq)
+ .Case("sspstrong", Attribute::StackProtectStrong)
+ .Case("uwtable", Attribute::UWTable)
+ .Default(Attribute::None);
+}
+
+/// If F has any forced attributes given on the command line, add them.
+static void addForcedAttributes(Function &F) {
+ for (auto &S : ForceAttributes) {
+ auto KV = StringRef(S).split(':');
+ if (KV.first != F.getName())
+ continue;
+
+ auto Kind = parseAttrKind(KV.second);
+ if (Kind == Attribute::None) {
+ DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+ << " unknown or not handled!\n");
+ continue;
+ }
+ if (F.hasFnAttribute(Kind))
+ continue;
+ F.addFnAttr(Kind);
+ }
+}
+
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+ if (ForceAttributes.empty())
+ return PreservedAnalyses::all();
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Just conservatively invalidate analyses, this isn't likely to be important.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct ForceFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeForceFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (ForceAttributes.empty())
+ return false;
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Conservatively assume we changed something.
+ return true;
+ }
+};
+}
+
+char ForceFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
+ "Force set function attributes", false, false)
+
+Pass *llvm::createForceFunctionAttrsLegacyPass() {
+ return new ForceFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index bb5e64a..6dcfb3f 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,14 +23,21 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -42,230 +49,191 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
namespace {
- struct FunctionAttrs : public CallGraphSCCPass {
- static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
- initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
- }
-
- // runOnSCC - Analyze the SCC, performing the transformation if possible.
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
- bool AddReadAttrs(const CallGraphSCC &SCC);
-
- // AddArgumentAttrs - Deduce nocapture attributes for the SCC.
- bool AddArgumentAttrs(const CallGraphSCC &SCC);
-
- // IsFunctionMallocLike - Does this function allocate new memory?
- bool IsFunctionMallocLike(Function *F,
- SmallPtrSet<Function*, 8> &) const;
-
- // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
- bool AddNoAliasAttrs(const CallGraphSCC &SCC);
-
- // Utility methods used by inferPrototypeAttributes to add attributes
- // and maintain annotation statistics.
-
- void setDoesNotAccessMemory(Function &F) {
- if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
- }
- }
-
- void setOnlyReadsMemory(Function &F) {
- if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
- }
- }
-
- void setDoesNotThrow(Function &F) {
- if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
- }
- }
-
- void setDoesNotCapture(Function &F, unsigned n) {
- if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
- }
- }
-
- void setOnlyReadsMemory(Function &F, unsigned n) {
- if (!F.onlyReadsMemory(n)) {
- F.setOnlyReadsMemory(n);
- ++NumAnnotated;
- }
- }
-
- void setDoesNotAlias(Function &F, unsigned n) {
- if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
- }
- }
-
- // inferPrototypeAttributes - Analyze the name and prototype of the
- // given function and set any applicable attributes. Returns true
- // if any attributes were set and false otherwise.
- bool inferPrototypeAttributes(Function &F);
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
+}
- // annotateLibraryCalls - Adds attributes to well-known standard library
- // call declarations.
- bool annotateLibraryCalls(const CallGraphSCC &SCC);
+namespace {
+struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ bool doInitialization(CallGraph &CG) override {
+ Revisit.clear();
+ return false;
+ }
+ bool doFinalization(CallGraph &CG) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
- private:
- AliasAnalysis *AA;
- TargetLibraryInfo *TLI;
- };
+private:
+ TargetLibraryInfo *TLI;
+ SmallVector<WeakVH,16> Revisit;
+};
}
char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+ "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
+ "Deduce function attributes", false, false)
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+namespace {
+/// The three kinds of memory access relevant to 'readonly' and
+/// 'readnone' attributes.
+enum MemoryAccessKind {
+ MAK_ReadNone = 0,
+ MAK_ReadOnly = 1,
+ MAK_MayWrite = 2
+};
+}
-/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert((*I)->getFunction());
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
+ if (MRB == FMRB_DoesNotAccessMemory)
+ // Already perfect!
+ return MAK_ReadNone;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F.isDeclaration() || F.mayBeOverridden()) {
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return MAK_ReadOnly;
+
+ // Conservatively assume it writes to memory.
+ return MAK_MayWrite;
+ }
- // Check if any of the functions in the SCC read or write memory. If they
- // write memory then they can't be marked readnone or readonly.
+ // Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or node we don't want to optimize - assume it may write
- // memory and give up.
- return false;
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS) {
+ // Ignore calls to functions in the same SCC.
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
- if (MRB == AliasAnalysis::DoesNotAccessMemory)
- // Already perfect!
- continue;
+ // If the call doesn't access memory, we're done.
+ if (!(MRB & MRI_ModRef))
+ continue;
- // Definitions with weak linkage may be overridden at linktime with
- // something that writes memory, so treat them like declarations.
- if (F->isDeclaration() || F->mayBeOverridden()) {
- if (!AliasAnalysis::onlyReadsMemory(MRB))
- // May write memory. Just give up.
- return false;
+ if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & MRI_Mod)
+ return MAK_MayWrite;
+ // If it reads, note it.
+ if (MRB & MRI_Ref)
+ ReadsMemory = true;
+ continue;
+ }
- ReadsMemory = true;
- continue;
- }
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (!Arg->getType()->isPtrOrPtrVectorTy())
+ continue;
- // Scan the function body for instructions that may read or write memory.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+ MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
- // Some instructions can be ignored even if they read or write memory.
- // Detect these now, skipping to the next instruction if one is found.
- CallSite CS(cast<Value>(I));
- if (CS) {
- // Ignore calls to functions in the same SCC.
- if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ // Skip accesses to local or constant memory as they don't impact the
+ // externally visible mod/ref behavior.
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
- // If the call doesn't access arbitrary memory, we may be able to
- // figure out something.
- if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
- // If the call does access argument pointees, check each argument.
- if (AliasAnalysis::doesAccessArgPointees(MRB))
- // Check whether all pointer arguments point to local memory, and
- // ignore calls that only access local memory.
- for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
- CI != CE; ++CI) {
- Value *Arg = *CI;
- if (Arg->getType()->isPointerTy()) {
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
-
- MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
- if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
- if (MRB & AliasAnalysis::Mod)
- // Writes non-local memory. Give up.
- return false;
- if (MRB & AliasAnalysis::Ref)
- // Ok, it reads non-local memory.
- ReadsMemory = true;
- }
- }
- }
- continue;
- }
- // The call could access any memory. If that includes writes, give up.
- if (MRB & AliasAnalysis::Mod)
- return false;
- // If it reads, note it.
- if (MRB & AliasAnalysis::Ref)
+
+ if (MRB & MRI_Mod)
+ // Writes non-local memory. Give up.
+ return MAK_MayWrite;
+ if (MRB & MRI_Ref)
+ // Ok, it reads non-local memory.
ReadsMemory = true;
- continue;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore non-volatile loads from local memory. (Atomic is okay here.)
- if (!LI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(LI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore non-volatile stores to local memory. (Atomic is okay here.)
- if (!SI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(SI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
- // Ignore vaargs on local memory.
- MemoryLocation Loc = MemoryLocation::get(VI);
- if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ }
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+ if (!LI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(LI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+ if (!SI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(SI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ MemoryLocation Loc = MemoryLocation::get(VI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
- // Any remaining instructions need to be taken seriously! Check if they
- // read or write memory.
- if (I->mayWriteToMemory())
- // Writes memory. Just give up.
- return false;
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return MAK_MayWrite;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+
+ return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+}
- // If this instruction may read memory, remember that.
- ReadsMemory |= I->mayReadFromMemory();
+/// Deduce readonly/readnone attributes for the SCC.
+template <typename AARGetterT>
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (Function *F : SCCNodes) {
+ // Call the callable parameter to look up AA results for this function.
+ AAResults &AAR = AARGetter(*F);
+
+ switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ case MAK_MayWrite:
+ return false;
+ case MAK_ReadOnly:
+ ReadsMemory = true;
+ break;
+ case MAK_ReadNone:
+ // Nothing to do!
+ break;
}
}
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
bool MadeChange = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
+ for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
continue;
@@ -278,11 +246,10 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Clear out any existing attributes.
AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
- F->removeAttributes(AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(),
- AttributeSet::FunctionIndex, B));
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ F->removeAttributes(
+ AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
// Add in the new attribute.
F->addAttribute(AttributeSet::FunctionIndex,
@@ -298,124 +265,140 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
}
namespace {
- // For a given pointer Argument, this retains a list of Arguments of functions
- // in the same SCC that the pointer data flows into. We use this to build an
- // SCC of the arguments.
- struct ArgumentGraphNode {
- Argument *Definition;
- SmallVector<ArgumentGraphNode*, 4> Uses;
- };
-
- class ArgumentGraph {
- // We store pointers to ArgumentGraphNode objects, so it's important that
- // that they not move around upon insert.
- typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+/// For a given pointer Argument, this retains a list of Arguments of functions
+/// in the same SCC that the pointer data flows into. We use this to build an
+/// SCC of the arguments.
+struct ArgumentGraphNode {
+ Argument *Definition;
+ SmallVector<ArgumentGraphNode *, 4> Uses;
+};
+
+class ArgumentGraph {
+ // We store pointers to ArgumentGraphNode objects, so it's important that
+ // that they not move around upon insert.
+ typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
+
+ ArgumentMapTy ArgumentMap;
+
+ // There is no root node for the argument graph, in fact:
+ // void f(int *x, int *y) { if (...) f(x, y); }
+ // is an example where the graph is disconnected. The SCCIterator requires a
+ // single entry point, so we maintain a fake ("synthetic") root node that
+ // uses every node. Because the graph is directed and nothing points into
+ // the root, it will not participate in any SCCs (except for its own).
+ ArgumentGraphNode SyntheticRoot;
+
+public:
+ ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
+
+ iterator begin() { return SyntheticRoot.Uses.begin(); }
+ iterator end() { return SyntheticRoot.Uses.end(); }
+ ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+ ArgumentGraphNode *operator[](Argument *A) {
+ ArgumentGraphNode &Node = ArgumentMap[A];
+ Node.Definition = A;
+ SyntheticRoot.Uses.push_back(&Node);
+ return &Node;
+ }
+};
- ArgumentMapTy ArgumentMap;
+/// This tracker checks whether callees are in the SCC, and if so it does not
+/// consider that a capture, instead adding it to the "Uses" list and
+/// continuing with the analysis.
+struct ArgumentUsesTracker : public CaptureTracker {
+ ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
+ : Captured(false), SCCNodes(SCCNodes) {}
- // There is no root node for the argument graph, in fact:
- // void f(int *x, int *y) { if (...) f(x, y); }
- // is an example where the graph is disconnected. The SCCIterator requires a
- // single entry point, so we maintain a fake ("synthetic") root node that
- // uses every node. Because the graph is directed and nothing points into
- // the root, it will not participate in any SCCs (except for its own).
- ArgumentGraphNode SyntheticRoot;
+ void tooManyUses() override { Captured = true; }
- public:
- ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+ bool captured(const Use *U) override {
+ CallSite CS(U->getUser());
+ if (!CS.getInstruction()) {
+ Captured = true;
+ return true;
+ }
- typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+ Function *F = CS.getCalledFunction();
+ if (!F || F->isDeclaration() || F->mayBeOverridden() ||
+ !SCCNodes.count(F)) {
+ Captured = true;
+ return true;
+ }
- iterator begin() { return SyntheticRoot.Uses.begin(); }
- iterator end() { return SyntheticRoot.Uses.end(); }
- ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account for
+ // these.
- ArgumentGraphNode *operator[](Argument *A) {
- ArgumentGraphNode &Node = ArgumentMap[A];
- Node.Definition = A;
- SyntheticRoot.Uses.push_back(&Node);
- return &Node;
- }
- };
+ unsigned UseIndex =
+ std::distance(const_cast<const Use *>(CS.arg_begin()), U);
- // This tracker checks whether callees are in the SCC, and if so it does not
- // consider that a capture, instead adding it to the "Uses" list and
- // continuing with the analysis.
- struct ArgumentUsesTracker : public CaptureTracker {
- ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
- : Captured(false), SCCNodes(SCCNodes) {}
+ assert(UseIndex < CS.data_operands_size() &&
+ "Indirect function calls should have been filtered above!");
- void tooManyUses() override { Captured = true; }
+ if (UseIndex >= CS.getNumArgOperands()) {
+ // Data operand, but not a argument operand -- must be a bundle operand
+ assert(CS.hasOperandBundles() && "Must be!");
- bool captured(const Use *U) override {
- CallSite CS(U->getUser());
- if (!CS.getInstruction()) { Captured = true; return true; }
+ // CaptureTracking told us that we're being captured by an operand bundle
+ // use. In this case it does not matter if the callee is within our SCC
+ // or not -- we've been captured in some unknown way, and we have to be
+ // conservative.
+ Captured = true;
+ return true;
+ }
- Function *F = CS.getCalledFunction();
- if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
-
- bool Found = false;
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
- PI != PE; ++PI, ++AI) {
- if (AI == AE) {
- assert(F->isVarArg() && "More params than args in non-varargs call");
- Captured = true;
- return true;
- }
- if (PI == U) {
- Uses.push_back(AI);
- Found = true;
- break;
- }
- }
- assert(Found && "Capturing call-site captured nothing?");
- (void)Found;
- return false;
+ if (UseIndex >= F->arg_size()) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ Captured = true;
+ return true;
}
- bool Captured; // True only if certainly captured (used outside our SCC).
- SmallVector<Argument*, 4> Uses; // Uses within our SCC.
+ Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
+ return false;
+ }
- const SmallPtrSet<Function*, 8> &SCCNodes;
- };
+ bool Captured; // True only if certainly captured (used outside our SCC).
+ SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+
+ const SCCNodeSet &SCCNodes;
+};
}
namespace llvm {
- template<> struct GraphTraits<ArgumentGraphNode*> {
- typedef ArgumentGraphNode NodeType;
- typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+template <> struct GraphTraits<ArgumentGraphNode *> {
+ typedef ArgumentGraphNode NodeType;
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
- static inline NodeType *getEntryNode(NodeType *A) { return A; }
- static inline ChildIteratorType child_begin(NodeType *N) {
- return N->Uses.begin();
- }
- static inline ChildIteratorType child_end(NodeType *N) {
- return N->Uses.end();
- }
- };
- template<> struct GraphTraits<ArgumentGraph*>
- : public GraphTraits<ArgumentGraphNode*> {
- static NodeType *getEntryNode(ArgumentGraph *AG) {
- return AG->getEntryNode();
- }
- static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
- return AG->begin();
- }
- static ChildIteratorType nodes_end(ArgumentGraph *AG) {
- return AG->end();
- }
- };
+ static inline NodeType *getEntryNode(NodeType *A) { return A; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->Uses.begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->Uses.end();
+ }
+};
+template <>
+struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
+ static NodeType *getEntryNode(ArgumentGraph *AG) {
+ return AG->getEntryNode();
+ }
+ static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+ return AG->begin();
+ }
+ static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
+};
}
-// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
determinePointerReadAttrs(Argument *A,
- const SmallPtrSet<Argument*, 8> &SCCNodes) {
-
- SmallVector<Use*, 32> Worklist;
- SmallSet<Use*, 32> Visited;
- int Count = 0;
+ const SmallPtrSet<Argument *, 8> &SCCNodes) {
+
+ SmallVector<Use *, 32> Worklist;
+ SmallSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
if (A->hasInAllocaAttr())
@@ -425,9 +408,6 @@ determinePointerReadAttrs(Argument *A,
// We don't need to track IsWritten. If A is written to, return immediately.
for (Use &U : A->uses()) {
- if (Count++ >= 20)
- return Attribute::None;
-
Visited.insert(&U);
Worklist.push_back(&U);
}
@@ -435,7 +415,6 @@ determinePointerReadAttrs(Argument *A,
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
- Value *V = U->get();
switch (I->getOpcode()) {
case Instruction::BitCast:
@@ -479,24 +458,44 @@ determinePointerReadAttrs(Argument *A,
return Attribute::None;
}
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) {
- if (A->get() == V) {
- if (AI == AE) {
- assert(F->isVarArg() &&
- "More params than args in non-varargs call.");
- return Attribute::None;
- }
- Captures &= !CS.doesNotCapture(A - B);
- if (SCCNodes.count(AI))
- continue;
- if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B))
- return Attribute::None;
- if (!CS.doesNotAccessMemory(A - B))
- IsRead = true;
- }
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account
+ // for these.
+
+ unsigned UseIndex = std::distance(CS.arg_begin(), U);
+
+ // U cannot be the callee operand use: since we're exploring the
+ // transitive uses of an Argument, having such a use be a callee would
+ // imply the CallSite is an indirect call or invoke; and we'd take the
+ // early exit above.
+ assert(UseIndex < CS.data_operands_size() &&
+ "Data operand use expected!");
+
+ bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+
+ if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ return Attribute::None;
}
+
+ Captures &= !CS.doesNotCapture(UseIndex);
+
+ // Since the optimizer (by design) cannot see the data flow corresponding
+ // to a operand bundle use, these cannot participate in the optimistic SCC
+ // analysis. Instead, we model the operand bundle uses as arguments in
+ // call to a function external to the SCC.
+ if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
+ IsOperandBundleUse) {
+
+ // The accessors used on CallSite here do the right thing for calls and
+ // invokes with operand bundles.
+
+ if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+ return Attribute::None;
+ if (!CS.doesNotAccessMemory(UseIndex))
+ IsRead = true;
+ }
+
AddUsersToWorklistIfCapturing();
break;
}
@@ -517,21 +516,10 @@ determinePointerReadAttrs(Argument *A,
return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
}
-/// AddArgumentAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
+/// Deduce nocapture attributes for the SCC.
+static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
- if (F && !F->isDeclaration() && !F->mayBeOverridden() &&
- !F->hasFnAttribute(Attribute::OptimizeNone))
- SCCNodes.insert(F);
- }
-
ArgumentGraph AG;
AttrBuilder B;
@@ -539,14 +527,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// Check each function in turn, determining which pointer arguments are not
// captured.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or function we're trying not to optimize - only a problem
- // for arguments that we pass to it.
- continue;
-
+ for (Function *F : SCCNodes) {
// Definitions with weak linkage may be overridden at linktime with
// something that captures pointers, so treat them like declarations.
if (F->isDeclaration() || F->mayBeOverridden())
@@ -556,8 +537,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
F->getReturnType()->isVoidTy()) {
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
- A != E; ++A) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
@@ -567,26 +548,30 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
continue;
}
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
- A != E; ++A) {
- if (!A->getType()->isPointerTy()) continue;
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
+ if (!A->getType()->isPointerTy())
+ continue;
bool HasNonLocalUses = false;
if (!A->hasNoCaptureAttr()) {
ArgumentUsesTracker Tracker(SCCNodes);
- PointerMayBeCaptured(A, &Tracker);
+ PointerMayBeCaptured(&*A, &Tracker);
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
+ A->addAttr(
+ AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
// its particulars for Argument-SCC analysis later.
- ArgumentGraphNode *Node = AG[A];
- for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
- UE = Tracker.Uses.end(); UI != UE; ++UI) {
+ ArgumentGraphNode *Node = AG[&*A];
+ for (SmallVectorImpl<Argument *>::iterator
+ UI = Tracker.Uses.begin(),
+ UE = Tracker.Uses.end();
+ UI != UE; ++UI) {
Node->Uses.push_back(AG[*UI]);
if (*UI != A)
HasNonLocalUses = true;
@@ -600,9 +585,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// Note that we don't allow any calls at all here, or else our result
// will be dependent on the iteration order through the functions in the
// SCC.
- SmallPtrSet<Argument*, 8> Self;
- Self.insert(A);
- Attribute::AttrKind R = determinePointerReadAttrs(A, Self);
+ SmallPtrSet<Argument *, 8> Self;
+ Self.insert(&*A);
+ Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None) {
AttrBuilder B;
B.addAttribute(R);
@@ -621,10 +606,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// made. If the definition doesn't have a 'nocapture' attribute by now, it
// captures.
- for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
+ for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
- if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
+ if (!ArgumentSCC[0]->Definition)
+ continue; // synthetic root node
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
@@ -646,9 +632,10 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
SCCCaptured = true;
}
}
- if (SCCCaptured) continue;
+ if (SCCCaptured)
+ continue;
- SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+ SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
@@ -658,8 +645,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
- for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
- UE = N->Uses.end(); UI != UE; ++UI) {
+ for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
+ UE = N->Uses.end();
+ UI != UE; ++UI) {
Argument *A = (*UI)->Definition;
if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
continue;
@@ -667,7 +655,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
break;
}
}
- if (SCCCaptured) continue;
+ if (SCCCaptured)
+ continue;
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
@@ -704,8 +693,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
if (ReadAttr != Attribute::None) {
AttrBuilder B, R;
B.addAttribute(ReadAttr);
- R.addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::ReadNone);
+ R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
@@ -720,10 +708,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
return Changed;
}
-/// IsFunctionMallocLike - A function is malloc-like if it returns either null
-/// or a pointer that doesn't alias any other pointer visible to the caller.
-bool FunctionAttrs::IsFunctionMallocLike(Function *F,
- SmallPtrSet<Function*, 8> &SCCNodes) const {
+/// Tests whether a function is "malloc-like".
+///
+/// A function is "malloc-like" if it returns either null or a pointer that
+/// doesn't alias any other pointer visible to the caller.
+static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
SmallSetVector<Value *, 8> FlowsToReturn;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -744,39 +733,38 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
switch (RVI->getOpcode()) {
- // Extend the analysis by looking upwards.
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::AddrSpaceCast:
- FlowsToReturn.insert(RVI->getOperand(0));
- continue;
- case Instruction::Select: {
- SelectInst *SI = cast<SelectInst>(RVI);
- FlowsToReturn.insert(SI->getTrueValue());
- FlowsToReturn.insert(SI->getFalseValue());
- continue;
- }
- case Instruction::PHI: {
- PHINode *PN = cast<PHINode>(RVI);
- for (Value *IncValue : PN->incoming_values())
- FlowsToReturn.insert(IncValue);
- continue;
- }
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (Value *IncValue : PN->incoming_values())
+ FlowsToReturn.insert(IncValue);
+ continue;
+ }
- // Check whether the pointer came from an allocation.
- case Instruction::Alloca:
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
break;
- case Instruction::Call:
- case Instruction::Invoke: {
- CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
- break;
- if (CS.getCalledFunction() &&
- SCCNodes.count(CS.getCalledFunction()))
- break;
- } // fall-through
- default:
- return false; // Did not come from an allocation.
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
}
if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
@@ -786,24 +774,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
return true;
}
-/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
- SmallPtrSet<Function*, 8> SCCNodes;
-
- // Fill SCCNodes with the elements of the SCC. Used for quickly
- // looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert((*I)->getFunction());
-
+/// Deduce noalias attributes for the SCC.
+static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// Check each function in turn, determining which functions return noalias
// pointers.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
-
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
- // External node or node we don't want to optimize - skip it;
- return false;
-
+ for (Function *F : SCCNodes) {
// Already noalias.
if (F->doesNotAlias(0))
continue;
@@ -813,18 +788,17 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
if (F->isDeclaration() || F->mayBeOverridden())
return false;
- // We annotate noalias return values, which are only applicable to
+ // We annotate noalias return values, which are only applicable to
// pointer types.
if (!F->getReturnType()->isPointerTy())
continue;
- if (!IsFunctionMallocLike(F, SCCNodes))
+ if (!isFunctionMallocLike(F, SCCNodes))
return false;
}
bool MadeChange = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ for (Function *F : SCCNodes) {
if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
continue;
@@ -836,880 +810,249 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
-/// inferPrototypeAttributes - Analyze the name and prototype of the
-/// given function and set any applicable attributes. Returns true
-/// if any attributes were set and false otherwise.
-bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
- if (F.hasFnAttribute(Attribute::OptimizeNone))
- return false;
+/// Tests whether this function is known to not return null.
+///
+/// Requires that the function returns a pointer.
+///
+/// Returns true if it believes the function will not return a null, and sets
+/// \p Speculative based on whether the returned conclusion is a speculative
+/// conclusion due to SCC calls.
+static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI, bool &Speculative) {
+ assert(F->getReturnType()->isPointerTy() &&
+ "nonnull only meaningful on pointer types");
+ Speculative = false;
- FunctionType *FTy = F.getFunctionType();
- LibFunc::Func TheLibFunc;
- if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
- return false;
+ SmallSetVector<Value *, 8> FlowsToReturn;
+ for (BasicBlock &BB : *F)
+ if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
- switch (TheLibFunc) {
- case LibFunc::strlen:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::strchr:
- case LibFunc::strrchr:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- break;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
- case LibFunc::strncat:
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::strxfrm:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::strcmp: //0,1
- case LibFunc::strspn: // 0,1
- case LibFunc::strncmp: // 0,1
- case LibFunc::strcspn: //0,1
- case LibFunc::strcoll: //0,1
- case LibFunc::strcasecmp: // 0,1
- case LibFunc::strncasecmp: //
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::strstr:
- case LibFunc::strpbrk:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::strtok:
- case LibFunc::strtok_r:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::scanf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::strdup:
- case LibFunc::strndup:
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::stat:
- case LibFunc::statvfs:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::sscanf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::sprintf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::snprintf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 3);
- break;
- case LibFunc::setitimer:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::system:
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::malloc:
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::memcmp:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::memchr:
- case LibFunc::memrchr:
- if (FTy->getNumParams() != 3)
- return false;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- break;
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::memcpy:
- case LibFunc::memccpy:
- case LibFunc::memmove:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::memalign:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::mkdir:
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::mktime:
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::realloc:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::read:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::rewind:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::rename:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::readlink:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::write:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::bcopy:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::bcmp:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::bzero:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::calloc:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::chmod:
- case LibFunc::chown:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::ctermid:
- case LibFunc::clearerr:
- case LibFunc::closedir:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atof:
- case LibFunc::atoll:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::access:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::fopen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fdopen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::feof:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::ftell:
- case LibFunc::fgetc:
- case LibFunc::fseeko:
- case LibFunc::ftello:
- case LibFunc::fileno:
- case LibFunc::fflush:
- case LibFunc::fclose:
- case LibFunc::fsetpos:
- case LibFunc::flockfile:
- case LibFunc::funlockfile:
- case LibFunc::ftrylockfile:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::ferror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- break;
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::fgets:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- break;
- case LibFunc::fread:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::fwrite:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::fputs:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::fscanf:
- case LibFunc::fprintf:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fgetpos:
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::getc:
- case LibFunc::getlogin_r:
- case LibFunc::getc_unlocked:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::getenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::gets:
- case LibFunc::getchar:
- setDoesNotThrow(F);
- break;
- case LibFunc::getitimer:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::getpwnam:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::ungetc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::uname:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::unlink:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::unsetenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::utime:
- case LibFunc::utimes:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::puts:
- case LibFunc::printf:
- case LibFunc::perror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::pread:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pread" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::pwrite:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pwrite" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::putchar:
- setDoesNotThrow(F);
- break;
- case LibFunc::popen:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::pclose:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::vscanf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::vsscanf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::vfscanf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::valloc:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::vprintf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::vsnprintf:
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- setOnlyReadsMemory(F, 3);
- break;
- case LibFunc::open:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::opendir:
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::tmpfile:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::times:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- break;
- case LibFunc::lstat:
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::lchown:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::qsort:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return false;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- break;
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::dunder_strtok_r:
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::under_IO_getc:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::under_IO_putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::dunder_isoc99_scanf:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::dunder_isoc99_sscanf:
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fopen64:
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- setOnlyReadsMemory(F, 1);
- setOnlyReadsMemory(F, 2);
- break;
- case LibFunc::fseeko64:
- case LibFunc::ftello64:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- break;
- case LibFunc::tmpfile64:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- break;
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- break;
- case LibFunc::open64:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i];
+
+ // If this value is locally known to be non-null, we're good
+ if (isKnownNonNull(RetVal, &TLI))
+ continue;
+
+ // Otherwise, we need to look upwards since we can't make any local
+ // conclusions.
+ Instruction *RVI = dyn_cast<Instruction>(RetVal);
+ if (!RVI)
return false;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F, 1);
- break;
- case LibFunc::gettimeofday:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ Function *Callee = CS.getCalledFunction();
+ // A call to a node within the SCC is assumed to return null until
+ // proven otherwise
+ if (Callee && SCCNodes.count(Callee)) {
+ Speculative = true;
+ continue;
+ }
return false;
- // Currently some platforms have the restrict keyword on the arguments to
- // gettimeofday. To be conservative, do not add noalias to gettimeofday's
- // arguments.
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- break;
- default:
- // Didn't mark any attributes.
- return false;
+ }
+ default:
+ return false; // Unknown source, may be null
+ };
+ llvm_unreachable("should have either continued or returned");
}
return true;
}
-/// annotateLibraryCalls - Adds attributes to well-known standard library
-/// call declarations.
-bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+/// Deduce nonnull attributes for the SCC.
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI) {
+ // Speculative that all functions in the SCC return only nonnull
+ // pointers. We may refute this as we analyze functions.
+ bool SCCReturnsNonNull = true;
+
bool MadeChange = false;
- // Check each function in turn annotating well-known library function
- // declarations with attributes.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ // Check each function in turn, determining which functions return nonnull
+ // pointers.
+ for (Function *F : SCCNodes) {
+ // Already nonnull.
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate nonnull return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
- if (F && F->isDeclaration())
- MadeChange |= inferPrototypeAttributes(*F);
+ bool Speculative = false;
+ if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+ if (!Speculative) {
+ // Mark the function eagerly since we may discover a function
+ // which prevents us from speculating about the entire SCC
+ DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
+ continue;
+ }
+ // At least one function returns something which could be null, can't
+ // speculate any more.
+ SCCReturnsNonNull = false;
+ }
+
+ if (SCCReturnsNonNull) {
+ for (Function *F : SCCNodes) {
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull) ||
+ !F->getReturnType()->isPointerTy())
+ continue;
+
+ DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
}
return MadeChange;
}
+static bool setDoesNotRecurse(Function &F) {
+ if (F.doesNotRecurse())
+ return false;
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
+}
+
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
+ SmallVectorImpl<WeakVH> &Revisit) {
+ // Try and identify functions that do not recurse.
+
+ // If the SCC contains multiple nodes we know for sure there is recursion.
+ if (!SCC.isSingular())
+ return false;
+
+ const CallGraphNode *CGN = *SCC.begin();
+ Function *F = CGN->getFunction();
+ if (!F || F->isDeclaration() || F->doesNotRecurse())
+ return false;
+
+ // If all of the calls in F are identifiable and are to norecurse functions, F
+ // is norecurse. This check also detects self-recursion as F is not currently
+ // marked norecurse, so any called from F to F will not be marked norecurse.
+ if (std::all_of(CGN->begin(), CGN->end(),
+ [](const CallGraphNode::CallRecord &CR) {
+ Function *F = CR.second->getFunction();
+ return F && F->doesNotRecurse();
+ }))
+ // Function calls a potentially recursive function.
+ return setDoesNotRecurse(*F);
+
+ // We know that F is not obviously recursive, but we haven't been able to
+ // prove that it doesn't actually recurse. Add it to the Revisit list to try
+ // again top-down later.
+ Revisit.push_back(F);
+ return false;
+}
+
+static bool addNoRecurseAttrsTopDownOnly(Function *F) {
+ // If F is internal and all uses are in norecurse functions, then F is also
+ // norecurse.
+ if (F->doesNotRecurse())
+ return false;
+ if (F->hasInternalLinkage()) {
+ for (auto *U : F->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!I->getParent()->getParent()->doesNotRecurse())
+ return false;
+ } else {
+ return false;
+ }
+ return setDoesNotRecurse(*F);
+ }
+ return false;
+}
+
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
- AA = &getAnalysis<AliasAnalysis>();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ bool Changed = false;
- bool Changed = annotateLibraryCalls(SCC);
- Changed |= AddReadAttrs(SCC);
- Changed |= AddArgumentAttrs(SCC);
- Changed |= AddNoAliasAttrs(SCC);
+ // We compute dedicated AA results for each function in the SCC as needed. We
+ // use a lambda referencing external objects so that they live long enough to
+ // be queried, but we re-use them each time.
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+ AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+ return *AAR;
+ };
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
+ // whether a given CallGraphNode is in this SCC. Also track whether there are
+ // any external or opt-none nodes that will prevent us from optimizing any
+ // part of the SCC.
+ SCCNodeSet SCCNodes;
+ bool ExternalNode = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
+ // External node or function we're trying not to optimize - we both avoid
+ // transform them and avoid leveraging information they provide.
+ ExternalNode = true;
+ continue;
+ }
+
+ SCCNodes.insert(F);
+ }
+
+ Changed |= addReadAttrs(SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(SCCNodes);
+
+ // If we have no external nodes participating in the SCC, we can deduce some
+ // more precise attributes as well.
+ if (!ExternalNode) {
+ Changed |= addNoAliasAttrs(SCCNodes);
+ Changed |= addNonNullAttrs(SCCNodes, *TLI);
+ }
+
+ Changed |= addNoRecurseAttrs(SCC, Revisit);
+ return Changed;
+}
+
+bool FunctionAttrs::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
+ // the rules we have for identifying norecurse functions work best with a
+ // top-down walk, so look again at all the functions we previously marked as
+ // worth revisiting, in top-down order.
+ for (auto &F : reverse(Revisit))
+ if (F)
+ Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
new file mode 100644
index 0000000..d8b677b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -0,0 +1,433 @@
+//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Function import based on summaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-import"
+
+/// Limit on instruction count of imported functions.
+static cl::opt<unsigned> ImportInstrLimit(
+ "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
+ cl::desc("Only import functions with less than N instructions"));
+
+// Load lazily a module from \p FileName in \p Context.
+static std::unique_ptr<Module> loadFile(const std::string &FileName,
+ LLVMContext &Context) {
+ SMDiagnostic Err;
+ DEBUG(dbgs() << "Loading '" << FileName << "'\n");
+ std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ if (!Result) {
+ Err.print("function-import", errs());
+ return nullptr;
+ }
+
+ Result->materializeMetadata();
+ UpgradeDebugInfo(*Result);
+
+ return Result;
+}
+
+namespace {
+/// Helper to load on demand a Module from file and cache it for subsequent
+/// queries. It can be used with the FunctionImporter.
+class ModuleLazyLoaderCache {
+ /// Cache of lazily loaded module for import.
+ StringMap<std::unique_ptr<Module>> ModuleMap;
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+
+public:
+ /// Create the loader, Module will be initialized in \p Context.
+ ModuleLazyLoaderCache(std::function<
+ std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
+ : createLazyModule(createLazyModule) {}
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ Module &operator()(StringRef FileName);
+
+ std::unique_ptr<Module> takeModule(StringRef FileName) {
+ auto I = ModuleMap.find(FileName);
+ assert(I != ModuleMap.end());
+ std::unique_ptr<Module> Ret = std::move(I->second);
+ ModuleMap.erase(I);
+ return Ret;
+ }
+};
+
+// Get a Module for \p FileName from the cache, or load it lazily.
+Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
+ auto &Module = ModuleMap[Identifier];
+ if (!Module)
+ Module = createLazyModule(Identifier);
+ return *Module;
+}
+} // anonymous namespace
+
+/// Walk through the instructions in \p F looking for external
+/// calls not already in the \p CalledFunctions set. If any are
+/// found they are added to the \p Worklist for importing.
+static void findExternalCalls(const Module &DestModule, Function &F,
+ const FunctionInfoIndex &Index,
+ StringSet<> &CalledFunctions,
+ SmallVector<StringRef, 64> &Worklist) {
+ // We need to suffix internal function calls imported from other modules,
+ // prepare the suffix ahead of time.
+ std::string Suffix;
+ if (F.getParent() != &DestModule)
+ Suffix =
+ (Twine(".llvm.") +
+ Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
+
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (isa<CallInst>(I)) {
+ auto CalledFunction = cast<CallInst>(I).getCalledFunction();
+ // Insert any new external calls that have not already been
+ // added to set/worklist.
+ if (!CalledFunction || !CalledFunction->hasName())
+ continue;
+ // Ignore intrinsics early
+ if (CalledFunction->isIntrinsic()) {
+ assert(CalledFunction->getIntrinsicID() != 0);
+ continue;
+ }
+ auto ImportedName = CalledFunction->getName();
+ auto Renamed = (ImportedName + Suffix).str();
+ // Rename internal functions
+ if (CalledFunction->hasInternalLinkage()) {
+ ImportedName = Renamed;
+ }
+ auto It = CalledFunctions.insert(ImportedName);
+ if (!It.second) {
+ // This is a call to a function we already considered, skip.
+ continue;
+ }
+ // Ignore functions already present in the destination module
+ auto *SrcGV = DestModule.getNamedValue(ImportedName);
+ if (SrcGV) {
+ assert(isa<Function>(SrcGV) && "Name collision during import");
+ if (!cast<Function>(SrcGV)->isDeclaration()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
+ << ImportedName << " already in DestinationModule\n");
+ continue;
+ }
+ }
+
+ Worklist.push_back(It.first->getKey());
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Adding callee for : " << ImportedName << " : "
+ << F.getName() << "\n");
+ }
+ }
+ }
+}
+
+// Helper function: given a worklist and an index, will process all the worklist
+// and decide what to import based on the summary information.
+//
+// Nothing is actually imported, functions are materialized in their source
+// module and analyzed there.
+//
+// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
+// per Module.
+static void GetImportList(Module &DestModule,
+ SmallVector<StringRef, 64> &Worklist,
+ StringSet<> &CalledFunctions,
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ &ModuleToFunctionsToImportMap,
+ const FunctionInfoIndex &Index,
+ ModuleLazyLoaderCache &ModuleLoaderCache) {
+ while (!Worklist.empty()) {
+ auto CalledFunctionName = Worklist.pop_back_val();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
+ << CalledFunctionName << "\n");
+
+ // Try to get a summary for this function call.
+ auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
+ if (InfoList == Index.end()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
+ << CalledFunctionName << " Ignoring.\n");
+ continue;
+ }
+ assert(!InfoList->second.empty() && "No summary, error at import?");
+
+ // Comdat can have multiple entries, FIXME: what do we do with them?
+ auto &Info = InfoList->second[0];
+ assert(Info && "Nullptr in list, error importing summaries?\n");
+
+ auto *Summary = Info->functionSummary();
+ if (!Summary) {
+ // FIXME: in case we are lazyloading summaries, we can do it now.
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Missing summary for " << CalledFunctionName
+ << ", error at import?\n");
+ llvm_unreachable("Missing summary");
+ }
+
+ if (Summary->instCount() > ImportInstrLimit) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
+ << CalledFunctionName << " with " << Summary->instCount()
+ << " instructions (limit " << ImportInstrLimit << ")\n");
+ continue;
+ }
+
+ // Get the module path from the summary.
+ auto ModuleIdentifier = Summary->modulePath();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
+ << CalledFunctionName << " from " << ModuleIdentifier << "\n");
+
+ auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
+
+ // The function that we will import!
+ GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
+
+ if (!SGV) {
+ // The destination module is referencing function using their renamed name
+ // when importing a function that was originally local in the source
+ // module. The source module we have might not have been renamed so we try
+ // to remove the suffix added during the renaming to recover the original
+ // name in the source module.
+ std::pair<StringRef, StringRef> Split =
+ CalledFunctionName.split(".llvm.");
+ SGV = SrcModule.getNamedValue(Split.first);
+ assert(SGV && "Can't find function to import in source module");
+ }
+ if (!SGV) {
+ report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
+ "' in Module '" + SrcModule.getModuleIdentifier() +
+ "', error in the summary?\n");
+ }
+
+ Function *F = dyn_cast<Function>(SGV);
+ if (!F && isa<GlobalAlias>(SGV)) {
+ auto *SGA = dyn_cast<GlobalAlias>(SGV);
+ F = dyn_cast<Function>(SGA->getBaseObject());
+ CalledFunctionName = F->getName();
+ }
+ assert(F && "Imported Function is ... not a Function");
+
+ // We cannot import weak_any functions/aliases without possibly affecting
+ // the order they are seen and selected by the linker, changing program
+ // semantics.
+ if (SGV->hasWeakAnyLinkage()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Ignoring import request for weak-any "
+ << (isa<Function>(SGV) ? "function " : "alias ")
+ << CalledFunctionName << " from "
+ << SrcModule.getModuleIdentifier() << "\n");
+ continue;
+ }
+
+ // Add the function to the import list
+ auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
+ Entry.insert(F);
+
+ // Process the newly imported functions and add callees to the worklist.
+ F->materialize();
+ findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+ }
+}
+
+// Automatically import functions in Module \p DestModule based on the summaries
+// index.
+//
+// The current implementation imports every called functions that exists in the
+// summaries index.
+bool FunctionImporter::importFunctions(Module &DestModule) {
+ DEBUG(dbgs() << "Starting import for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ unsigned ImportedCount = 0;
+
+ /// First step is collecting the called external functions.
+ StringSet<> CalledFunctions;
+ SmallVector<StringRef, 64> Worklist;
+ for (auto &F : DestModule) {
+ if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+ findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
+ }
+ if (Worklist.empty())
+ return false;
+
+ /// Second step: for every call to an external function, try to import it.
+
+ // Linker that will be used for importing function
+ Linker TheLinker(DestModule);
+
+ // Map of Module -> List of Function to import from the Module
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ ModuleToFunctionsToImportMap;
+
+ // Analyze the summaries and get the list of functions to import by
+ // populating ModuleToFunctionsToImportMap
+ ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+ GetImportList(DestModule, Worklist, CalledFunctions,
+ ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
+ assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
+
+ StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+ ModuleToTempMDValsMap;
+
+ // Do the actual import of functions now, one Module at a time
+ for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+ // Get the module for the import
+ auto &FunctionsToImport = FunctionsToImportPerModule.second;
+ std::unique_ptr<Module> SrcModule =
+ ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+ assert(&DestModule.getContext() == &SrcModule->getContext() &&
+ "Context mismatch");
+
+ // Save the mapping of value ids to temporary metadata created when
+ // importing this function. If we have already imported from this module,
+ // add new temporary metadata to the existing mapping.
+ auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
+ if (!TempMDVals)
+ TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+ // Link in the specified functions.
+ if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
+ &Index, &FunctionsToImport, TempMDVals.get()))
+ report_fatal_error("Function Import: link error");
+
+ ImportedCount += FunctionsToImport.size();
+ }
+
+ // Now link in metadata for all modules from which we imported functions.
+ for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+ ModuleToTempMDValsMap) {
+ // Load the specified source module.
+ auto &SrcModule = ModuleLoaderCache(SME.getKey());
+
+ // Link in all necessary metadata from this module.
+ if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ return ImportedCount;
+}
+
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+ SummaryFile("summary-file",
+ cl::desc("The summary file to use for function importing."));
+
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+ raw_ostream &OS = errs();
+ DiagnosticPrinterRawOStream DP(OS);
+ DI.print(DP);
+ OS << '\n';
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(StringRef Path, std::string &Error,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ std::unique_ptr<MemoryBuffer> Buffer;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Path);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ Buffer = std::move(BufferOrErr.get());
+ ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+ object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
+ DiagnosticHandler);
+ if (std::error_code EC = ObjOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ return (*ObjOrErr)->takeIndex();
+}
+
+namespace {
+/// Pass that performs cross-module function import provided a summary file.
+class FunctionImportPass : public ModulePass {
+ /// Optional function summary index to use for importing, otherwise
+ /// the summary-file option must be specified.
+ const FunctionInfoIndex *Index;
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ const char *getPassName() const override {
+ return "Function Importing";
+ }
+
+ explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+ : ModulePass(ID), Index(Index) {}
+
+ bool runOnModule(Module &M) override {
+ if (SummaryFile.empty() && !Index)
+ report_fatal_error("error: -function-import requires -summary-file or "
+ "file from frontend\n");
+ std::unique_ptr<FunctionInfoIndex> IndexPtr;
+ if (!SummaryFile.empty()) {
+ if (Index)
+ report_fatal_error("error: -summary-file and index from frontend\n");
+ std::string Error;
+ IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+ if (!IndexPtr) {
+ errs() << "Error loading file '" << SummaryFile << "': " << Error
+ << "\n";
+ return false;
+ }
+ Index = IndexPtr.get();
+ }
+
+ // Perform the import now.
+ auto ModuleLoader = [&M](StringRef Identifier) {
+ return loadFile(Identifier, M.getContext());
+ };
+ FunctionImporter Importer(*Index, ModuleLoader);
+ return Importer.importFunctions(M);
+
+ return false;
+ }
+};
+} // anonymous namespace
+
+char FunctionImportPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+INITIALIZE_PASS_END(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+
+namespace llvm {
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+ return new FunctionImportPass(Index);
+}
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 61d0ff9..9b276ed 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -92,33 +92,28 @@ bool GlobalDCE::runOnModule(Module &M) {
ComdatMembers.insert(std::make_pair(C, &GA));
// Loop over the module, adding globals which are obviously necessary.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (Function &F : M) {
+ Changed |= RemoveUnusedGlobalValue(F);
// Functions with external linkage are needed if they have a body
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
- if (!I->isDiscardableIfUnused())
- GlobalIsNeeded(I);
- }
+ if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
+ if (!F.isDiscardableIfUnused())
+ GlobalIsNeeded(&F);
}
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (GlobalVariable &GV : M.globals()) {
+ Changed |= RemoveUnusedGlobalValue(GV);
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
- if (!I->isDiscardableIfUnused())
- GlobalIsNeeded(I);
- }
+ if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
+ if (!GV.isDiscardableIfUnused())
+ GlobalIsNeeded(&GV);
}
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- Changed |= RemoveUnusedGlobalValue(*I);
+ for (GlobalAlias &GA : M.aliases()) {
+ Changed |= RemoveUnusedGlobalValue(GA);
// Externally visible aliases are needed.
- if (!I->isDiscardableIfUnused()) {
- GlobalIsNeeded(I);
- }
+ if (!GA.isDiscardableIfUnused())
+ GlobalIsNeeded(&GA);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -126,52 +121,50 @@ bool GlobalDCE::runOnModule(Module &M) {
//
// The first pass is to drop initializers of global variables which are dead.
- std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!AliveGlobals.count(I)) {
- DeadGlobalVars.push_back(I); // Keep track of dead globals
- if (I->hasInitializer()) {
- Constant *Init = I->getInitializer();
- I->setInitializer(nullptr);
+ std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals
+ for (GlobalVariable &GV : M.globals())
+ if (!AliveGlobals.count(&GV)) {
+ DeadGlobalVars.push_back(&GV); // Keep track of dead globals
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
if (isSafeToDestroyConstant(Init))
Init->destroyConstant();
}
}
// The second pass drops the bodies of functions which are dead...
- std::vector<Function*> DeadFunctions;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!AliveGlobals.count(I)) {
- DeadFunctions.push_back(I); // Keep track of dead globals
- if (!I->isDeclaration())
- I->deleteBody();
+ std::vector<Function *> DeadFunctions;
+ for (Function &F : M)
+ if (!AliveGlobals.count(&F)) {
+ DeadFunctions.push_back(&F); // Keep track of dead globals
+ if (!F.isDeclaration())
+ F.deleteBody();
}
// The third pass drops targets of aliases which are dead...
std::vector<GlobalAlias*> DeadAliases;
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
- ++I)
- if (!AliveGlobals.count(I)) {
- DeadAliases.push_back(I);
- I->setAliasee(nullptr);
+ for (GlobalAlias &GA : M.aliases())
+ if (!AliveGlobals.count(&GA)) {
+ DeadAliases.push_back(&GA);
+ GA.setAliasee(nullptr);
}
if (!DeadFunctions.empty()) {
// Now that all interferences have been dropped, delete the actual objects
// themselves.
- for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadFunctions[i]);
- M.getFunctionList().erase(DeadFunctions[i]);
+ for (Function *F : DeadFunctions) {
+ RemoveUnusedGlobalValue(*F);
+ M.getFunctionList().erase(F);
}
NumFunctions += DeadFunctions.size();
Changed = true;
}
if (!DeadGlobalVars.empty()) {
- for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
- M.getGlobalList().erase(DeadGlobalVars[i]);
+ for (GlobalVariable *GV : DeadGlobalVars) {
+ RemoveUnusedGlobalValue(*GV);
+ M.getGlobalList().erase(GV);
}
NumVariables += DeadGlobalVars.size();
Changed = true;
@@ -179,9 +172,9 @@ bool GlobalDCE::runOnModule(Module &M) {
// Now delete any dead aliases.
if (!DeadAliases.empty()) {
- for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
- RemoveUnusedGlobalValue(*DeadAliases[i]);
- M.getAliasList().erase(DeadAliases[i]);
+ for (GlobalAlias *GA : DeadAliases) {
+ RemoveUnusedGlobalValue(*GA);
+ M.getAliasList().erase(GA);
}
NumAliases += DeadAliases.size();
Changed = true;
@@ -222,21 +215,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// any globals used will be marked as needed.
Function *F = cast<Function>(G);
- if (F->hasPrefixData())
- MarkUsedGlobalsAsNeeded(F->getPrefixData());
-
- if (F->hasPrologueData())
- MarkUsedGlobalsAsNeeded(F->getPrologueData());
+ for (Use &U : F->operands())
+ MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
- if (F->hasPersonalityFn())
- MarkUsedGlobalsAsNeeded(F->getPersonalityFn());
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
- if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ for (BasicBlock &BB : *F)
+ for (Instruction &I : BB)
+ for (Use &U : I.operands())
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
GlobalIsNeeded(GV);
- else if (Constant *C = dyn_cast<Constant>(*U))
+ else if (Constant *C = dyn_cast<Constant>(U))
MarkUsedGlobalsAsNeeded(C);
}
}
@@ -247,9 +234,9 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
- for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+ for (Use &U : C->operands()) {
// If we've already processed this constant there's no need to do it again.
- Constant *Op = dyn_cast<Constant>(*I);
+ Constant *Op = dyn_cast<Constant>(U);
if (Op && SeenConstants.insert(Op).second)
MarkUsedGlobalsAsNeeded(Op);
}
@@ -262,7 +249,8 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// might make it deader.
//
bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
- if (GV.use_empty()) return false;
+ if (GV.use_empty())
+ return false;
GV.removeDeadConstantUsers();
return GV.use_empty();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 5ffe15d..fd77369 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -54,7 +55,6 @@ STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
STATISTIC(NumDeleted , "Number of globals deleted");
-STATISTIC(NumFnDeleted , "Number of functions deleted");
STATISTIC(NumGlobUses , "Number of global uses devirtualized");
STATISTIC(NumLocalized , "Number of globals localized");
STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
@@ -69,6 +69,7 @@ namespace {
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -81,11 +82,14 @@ namespace {
bool OptimizeFunctions(Module &M);
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
- bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
- bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
- const GlobalStatus &GS);
+ bool deleteIfDead(GlobalValue &GV);
+ bool processGlobal(GlobalValue &GV);
+ bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+ bool isPointerValueDeadOnEntryToFunction(const Function *F,
+ GlobalValue *GV);
+
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
@@ -95,13 +99,14 @@ char GlobalOpt::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
-/// as a root? If so, we might not really want to eliminate the stores to it.
+/// Is this global variable possibly used by a leak checker as a root? If so,
+/// we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
// A global variable is a root if it is a pointer, or could plausibly contain
// a pointer. There are two challenges; one is that we could have a struct
@@ -176,10 +181,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
} while (1);
}
-/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users
-/// of the global and clean up any that obviously don't assign the global a
-/// value that isn't dynamically allocated.
-///
+/// This GV is a pointer root. Loop over all users of the global and clean up
+/// any that obviously don't assign the global a value that isn't dynamically
+/// allocated.
static bool CleanupPointerRootUsers(GlobalVariable *GV,
const TargetLibraryInfo *TLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
@@ -263,10 +267,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
return Changed;
}
-/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
-/// users of the global, cleaning up the obvious ones. This is largely just a
-/// quick scan over the use list to clean up the easy and obvious cruft. This
-/// returns true if it made a change.
+/// We just marked GV constant. Loop over all users of the global, cleaning up
+/// the obvious ones. This is largely just a quick scan over the use list to
+/// clean up the easy and obvious cruft. This returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
@@ -353,8 +356,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
return Changed;
}
-/// isSafeSROAElementUse - Return true if the specified instruction is a safe
-/// user of a derived expression from a global that we want to SROA.
+/// Return true if the specified instruction is a safe user of a derived
+/// expression from a global that we want to SROA.
static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
@@ -385,9 +388,8 @@ static bool isSafeSROAElementUse(Value *V) {
}
-/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
-/// Look at it and its uses and decide whether it is safe to SROA this global.
-///
+/// U is a direct user of the specified global value. Look at it and its uses
+/// and decide whether it is safe to SROA this global.
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
if (!isa<GetElementPtrInst>(U) &&
@@ -452,9 +454,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
return true;
}
-/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
-/// is safe for us to perform this transformation.
-///
+/// Look at all uses of the global and decide whether it is safe for us to
+/// perform this transformation.
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
for (User *U : GV->users())
if (!IsUserOfGlobalSafeForSRA(U, GV))
@@ -464,10 +465,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
}
-/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
-/// variable. This opens the door for other optimizations by exposing the
-/// behavior of the program in a more fine-grained way. We have determined that
-/// this transformation is safe already. We return the first global variable we
+/// Perform scalar replacement of aggregates on the specified global variable.
+/// This opens the door for other optimizations by exposing the behavior of the
+/// program in a more fine-grained way. We have determined that this
+/// transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
@@ -497,7 +498,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
In, GV->getName()+"."+Twine(i),
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- Globals.insert(GV, NGV);
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
NewGlobals.push_back(NGV);
// Calculate the known alignment of the field. If the original aggregate
@@ -530,7 +532,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
In, GV->getName()+"."+Twine(i),
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- Globals.insert(GV, NGV);
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
NewGlobals.push_back(NGV);
// Calculate the known alignment of the field. If the original aggregate
@@ -545,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (NewGlobals.empty())
return nullptr;
- DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -610,9 +613,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
}
-/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null. PHIs keeps track of any
-/// phi nodes we've seen to avoid reprocessing them.
+/// Return true if all users of the specified value will trap if the value is
+/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
+/// reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users())
@@ -653,9 +656,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
return true;
}
-/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
-/// from GV will trap if the loaded value is null. Note that this also permits
-/// comparisons of the loaded value against null, as a special case.
+/// Return true if all uses of any loads from GV will trap if the loaded value
+/// is null. Note that this also permits comparisons of the loaded value
+/// against null, as a special case.
static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
for (const User *U : GV->users())
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
@@ -735,10 +738,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
}
-/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
-/// value stored into it. If there are uses of the loaded value that would trap
-/// if the loaded value is dynamically null, then we know that they cannot be
-/// reachable with a null optimize away the load.
+/// The specified global has only one non-null value stored into it. If there
+/// are uses of the loaded value that would trap if the loaded value is
+/// dynamically null, then we know that they cannot be reachable with a null
+/// optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
@@ -778,7 +781,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
}
if (Changed) {
- DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
++NumGlobUses;
}
@@ -801,8 +804,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
return Changed;
}
-/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
-/// instructions that are foldable.
+/// Walk the use list of V, constant folding all of the instructions that are
+/// foldable.
static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
@@ -818,11 +821,11 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
}
}
-/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
-/// variable, and transforms the program as if it always contained the result of
-/// the specified malloc. Because it is always the result of the specified
-/// malloc, there is no reason to actually DO the malloc. Instead, turn the
-/// malloc into a global, and any loads of GV as uses of the new global.
+/// This function takes the specified global variable, and transforms the
+/// program as if it always contained the result of the specified malloc.
+/// Because it is always the result of the specified malloc, there is no reason
+/// to actually DO the malloc. Instead, turn the malloc into a global, and any
+/// loads of GV as uses of the new global.
static GlobalVariable *
OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
ConstantInt *NElements, const DataLayout &DL,
@@ -838,13 +841,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
- GlobalType, false,
- GlobalValue::InternalLinkage,
- UndefValue::get(GlobalType),
- GV->getName()+".body",
- GV,
- GV->getThreadLocalMode());
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
+ GV->getThreadLocalMode());
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
@@ -935,7 +935,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
cast<StoreInst>(InitBool->user_back())->eraseFromParent();
delete InitBool;
} else
- GV->getParent()->getGlobalList().insert(GV, InitBool);
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
// Now the GV is dead, nuke it and the malloc..
GV->eraseFromParent();
@@ -951,10 +951,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
return NewGV;
}
-/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
-/// to make sure that there are no complex uses of V. We permit simple things
-/// like dereferencing the pointer, but not storing through the address, unless
-/// it is to the specified global.
+/// Scan the use-list of V checking to make sure that there are no complex uses
+/// of V. We permit simple things like dereferencing the pointer, but not
+/// storing through the address, unless it is to the specified global.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
SmallPtrSetImpl<const PHINode*> &PHIs) {
@@ -998,10 +997,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
return true;
}
-/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
-/// somewhere. Transform all uses of the allocation into loads from the
-/// global and uses of the resultant pointer. Further, delete the store into
-/// GV. This assumes that these value pass the
+/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
+/// allocation into loads from the global and uses of the resultant pointer.
+/// Further, delete the store into GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
@@ -1043,9 +1041,9 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
}
}
-/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
-/// of a load) are simple enough to perform heap SRA on. This permits GEP's
-/// that index through the array and struct field, icmps of null, and PHIs.
+/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
+/// perform heap SRA on. This permits GEP's that index through the array and
+/// struct field, icmps of null, and PHIs.
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
@@ -1096,8 +1094,8 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
}
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
-/// GV are simple enough to perform HeapSRA, return true.
+/// If all users of values loaded from GV are simple enough to perform HeapSRA,
+/// return true.
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
Instruction *StoredVal) {
SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
@@ -1186,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
return FieldVals[FieldNo] = Result;
}
-/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
-/// the load, rewrite the derived value to use the HeapSRoA'd load.
+/// Given a load instruction and a value derived from the load, rewrite the
+/// derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1248,10 +1246,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
}
}
-/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
-/// is a value loaded from the global. Eliminate all uses of Ptr, making them
-/// use FieldGlobals instead. All uses of loaded values satisfy
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
+/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
+/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1266,8 +1263,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
}
}
-/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
-/// it up into multiple allocations of arrays of the fields.
+/// CI is an allocation of an array of structures. Break it up into multiple
+/// allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
@@ -1291,12 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Type *FieldTy = STy->getElementType(FieldNo);
PointerType *PFieldTy = PointerType::get(FieldTy, AS);
- GlobalVariable *NGV =
- new GlobalVariable(*GV->getParent(),
- PFieldTy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(PFieldTy),
- GV->getName() + ".f" + Twine(FieldNo), GV,
- GV->getThreadLocalMode());
+ GlobalVariable *NGV = new GlobalVariable(
+ *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
+ nullptr, GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
@@ -1336,7 +1331,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
- BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+ BasicBlock *ContBB =
+ OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
@@ -1376,9 +1372,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// CI is no longer needed, remove it.
CI->eraseFromParent();
- /// InsertedScalarizedLoads - As we process loads, if we can't immediately
- /// update all uses of the load, keep track of what scalarized loads are
- /// inserted for a given load.
+ /// As we process loads, if we can't immediately update all uses of the load,
+ /// keep track of what scalarized loads are inserted for a given load.
DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
@@ -1454,13 +1449,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
return cast<GlobalVariable>(FieldGlobals[0]);
}
-/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
-/// pointer global variable with a single value stored it that is a malloc or
-/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
+/// This function is called when we see a pointer global variable with a single
+/// value stored it that is a malloc or cast of malloc.
+static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
Type *AllocTy,
AtomicOrdering Ordering,
- Module::global_iterator &GVI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
// If this is a malloc of an abstract type, don't touch it.
@@ -1499,7 +1492,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
+ OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1544,19 +1537,18 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
CI = cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
- DL, TLI);
+ PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
+ TLI);
return true;
}
return false;
}
-// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
-// that only one value (besides its initializer) is ever stored to the global.
-static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+// Try to optimize globals based on the knowledge that only one value (besides
+// its initializer) is ever stored to the global.
+static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
- Module::global_iterator &GVI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
@@ -1577,9 +1569,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
Type *MallocType = getMallocAllocatedType(CI, TLI);
- if (MallocType &&
- TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
- DL, TLI))
+ if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ Ordering, DL, TLI))
return true;
}
}
@@ -1587,10 +1578,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return false;
}
-/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
-/// two values ever stored into GV are its initializer and OtherVal. See if we
-/// can shrink the global into a boolean and select between the two values
-/// whenever it is used. This exposes the values to other scalar optimizations.
+/// At this point, we have learned that the only two values ever stored into GV
+/// are its initializer and OtherVal. See if we can shrink the global into a
+/// boolean and select between the two values whenever it is used. This exposes
+/// the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
Type *GVElType = GV->getType()->getElementType();
@@ -1610,7 +1601,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
- DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
@@ -1620,7 +1611,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GV->getName()+".b",
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
- GV->getParent()->getGlobalList().insert(GV, NewGV);
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
Constant *InitVal = GV->getInitializer();
assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
@@ -1688,61 +1679,213 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
+bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+ GV.removeDeadConstantUsers();
-/// ProcessGlobal - Analyze the specified global variable and optimize it if
-/// possible. If we make a change, return true.
-bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI) {
- // Do more involved optimizations if the global is internal.
- GV->removeDeadConstantUsers();
+ if (!GV.isDiscardableIfUnused())
+ return false;
- if (GV->use_empty()) {
- DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
- GV->eraseFromParent();
- ++NumDeleted;
- return true;
- }
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
+ return false;
- if (!GV->hasLocalLinkage())
+ bool Dead;
+ if (auto *F = dyn_cast<Function>(&GV))
+ Dead = F->isDefTriviallyDead();
+ else
+ Dead = GV.use_empty();
+ if (!Dead)
+ return false;
+
+ DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ GV.eraseFromParent();
+ ++NumDeleted;
+ return true;
+}
+
+/// Analyze the specified global variable and optimize it if possible. If we
+/// make a change, return true.
+bool GlobalOpt::processGlobal(GlobalValue &GV) {
+ // Do more involved optimizations if the global is internal.
+ if (!GV.hasLocalLinkage())
return false;
GlobalStatus GS;
- if (GlobalStatus::analyzeGlobal(GV, GS))
+ if (GlobalStatus::analyzeGlobal(&GV, GS))
return false;
- if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
- GV->setUnnamedAddr(true);
+ bool Changed = false;
+ if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
+ GV.setUnnamedAddr(true);
NumUnnamed++;
+ Changed = true;
}
- if (GV->isConstant() || !GV->hasInitializer())
+ auto *GVar = dyn_cast<GlobalVariable>(&GV);
+ if (!GVar)
+ return Changed;
+
+ if (GVar->isConstant() || !GVar->hasInitializer())
+ return Changed;
+
+ return processInternalGlobal(GVar, GS) || Changed;
+}
+
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+ // Find all uses of GV. We expect them all to be in F, and if we can't
+ // identify any of the uses we bail out.
+ //
+ // On each of these uses, identify if the memory that GV points to is
+ // used/required/live at the start of the function. If it is not, for example
+ // if the first thing the function does is store to the GV, the GV can
+ // possibly be demoted.
+ //
+ // We don't do an exhaustive search for memory operations - simply look
+ // through bitcasts as they're quite common and benign.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ SmallVector<LoadInst *, 4> Loads;
+ SmallVector<StoreInst *, 4> Stores;
+ for (auto *U : GV->users()) {
+ if (Operator::getOpcode(U) == Instruction::BitCast) {
+ for (auto *UU : U->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(UU))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(UU))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+ continue;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ assert(I->getParent()->getParent() == F);
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+
+ // We have identified all uses of GV into loads and stores. Now check if all
+ // of them are known not to depend on the value of the global at the function
+ // entry point. We do this by ensuring that every load is dominated by at
+ // least one store.
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+ .getDomTree();
+
+ // The below check is quadratic. Check we're not going to do too many tests.
+ // FIXME: Even though this will always have worst-case quadratic time, we
+ // could put effort into minimizing the average time by putting stores that
+ // have been shown to dominate at least one load at the beginning of the
+ // Stores array, making subsequent dominance checks more likely to succeed
+ // early.
+ //
+ // The threshold here is fairly large because global->local demotion is a
+ // very powerful optimization should it fire.
+ const unsigned Threshold = 100;
+ if (Loads.size() * Stores.size() > Threshold)
return false;
- return ProcessInternalGlobal(GV, GVI, GS);
+ for (auto *L : Loads) {
+ auto *LTy = L->getType();
+ if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+ auto *STy = S->getValueOperand()->getType();
+ // The load is only dominated by the store if DomTree says so
+ // and the number of bits loaded in L is less than or equal to
+ // the number of bits stored in S.
+ return DT.dominates(S, L) &&
+ DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+ }))
+ return false;
+ }
+ // All loads have known dependences inside F, so the global can be localized.
+ return true;
+}
+
+/// C may have non-instruction users. Can all of those users be turned into
+/// instructions?
+static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
+ // We don't do this exhaustively. The most common pattern that we really need
+ // to care about is a constant GEP or constant bitcast - so just looking
+ // through one single ConstantExpr.
+ //
+ // The set of constants that this function returns true for must be able to be
+ // handled by makeAllConstantUsesInstructions.
+ for (auto *U : C->users()) {
+ if (isa<Instruction>(U))
+ continue;
+ if (!isa<ConstantExpr>(U))
+ // Non instruction, non-constantexpr user; cannot convert this.
+ return false;
+ for (auto *UU : U->users())
+ if (!isa<Instruction>(UU))
+ // A constantexpr used by another constant. We don't try and recurse any
+ // further but just bail out at this point.
+ return false;
+ }
+
+ return true;
+}
+
+/// C may have non-instruction users, and
+/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
+/// non-instruction users to instructions.
+static void makeAllConstantUsesInstructions(Constant *C) {
+ SmallVector<ConstantExpr*,4> Users;
+ for (auto *U : C->users()) {
+ if (isa<ConstantExpr>(U))
+ Users.push_back(cast<ConstantExpr>(U));
+ else
+ // We should never get here; allNonInstructionUsersCanBeMadeInstructions
+ // should not have returned true for C.
+ assert(
+ isa<Instruction>(U) &&
+ "Can't transform non-constantexpr non-instruction to instruction!");
+ }
+
+ SmallVector<Value*,4> UUsers;
+ for (auto *U : Users) {
+ UUsers.clear();
+ for (auto *UU : U->users())
+ UUsers.push_back(UU);
+ for (auto *UU : UUsers) {
+ Instruction *UI = cast<Instruction>(UU);
+ Instruction *NewU = U->getAsInstruction();
+ NewU->insertBefore(UI);
+ UI->replaceUsesOfWith(U, NewU);
+ }
+ U->dropAllReferences();
+ }
}
-/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI,
+bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
const GlobalStatus &GS) {
auto &DL = GV->getParent()->getDataLayout();
- // If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive), we replace
- // the global with a local alloca in this function.
+ // If this is a first class global and has only one accessing function and
+ // this function is non-recursive, we replace the global with a local alloca
+ // in this function.
//
// NOTE: It doesn't make sense to promote non-single-value types since we
// are just replacing static memory to stack memory.
//
// If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
- GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GS.AccessingFunction &&
GV->getType()->getElementType()->isSingleValueType() &&
- GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage() &&
- GV->getType()->getAddressSpace() == 0) {
- DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ GV->getType()->getAddressSpace() == 0 &&
+ !GV->isExternallyInitialized() &&
+ allNonInstructionUsersCanBeMadeInstructions(GV) &&
+ GS.AccessingFunction->doesNotRecurse() &&
+ isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getType()->getElementType();
@@ -1752,6 +1895,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+ makeAllConstantUsesInstructions(GV);
+
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
++NumLocalized;
@@ -1761,7 +1906,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
- DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
bool Changed;
if (isLeakCheckerRoot(GV)) {
@@ -1800,11 +1945,9 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
const DataLayout &DL = GV->getParent()->getDataLayout();
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
+ if (SRAGlobal(GV, DL))
return true;
- }
- } else if (GS.StoredType == GlobalStatus::StoredOnce) {
+ } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -1822,8 +1965,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
<< "simplify all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
- } else {
- GVI = GV;
}
++NumSubstitute;
return true;
@@ -1831,8 +1972,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
- DL, TLI))
+ if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1850,8 +1990,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return false;
}
-/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
-/// function, changing them to FastCC.
+/// Walk all of the direct calls of the specified function, changing them to
+/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
@@ -1898,38 +2038,38 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
bool Changed = false;
// Optimize functions.
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
- Function *F = FI++;
+ Function *F = &*FI++;
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
- const Comdat *C = F->getComdat();
- bool inComdat = C && NotDiscardableComdats.count(C);
- F->removeDeadConstantUsers();
- if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) {
- F->eraseFromParent();
+ if (deleteIfDead(*F)) {
Changed = true;
- ++NumFnDeleted;
- } else if (F->hasLocalLinkage()) {
- if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
- !F->hasAddressTaken()) {
- // If this function has a calling convention worth changing, is not a
- // varargs function, and is only called directly, promote it to use the
- // Fast calling convention.
- F->setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(F);
- ++NumFastCallFns;
- Changed = true;
- }
+ continue;
+ }
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
- !F->hasAddressTaken()) {
- // The function is not used by a trampoline intrinsic, so it is safe
- // to remove the 'nest' attribute.
- RemoveNestAttribute(F);
- ++NumNestRemoved;
- Changed = true;
- }
+ Changed |= processGlobal(*F);
+
+ if (!F->hasLocalLinkage())
+ continue;
+ if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has a calling convention worth changing, is not a
+ // varargs function, and is only called directly, promote it to use the
+ // Fast calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
}
}
return Changed;
@@ -1940,7 +2080,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
- GlobalVariable *GV = GVI++;
+ GlobalVariable *GV = &*GVI++;
// Global variables without names cannot be referenced outside this module.
if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
GV->setLinkage(GlobalValue::InternalLinkage);
@@ -1953,12 +2093,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
GV->setInitializer(New);
}
- if (GV->isDiscardableIfUnused()) {
- if (const Comdat *C = GV->getComdat())
- if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage())
- continue;
- Changed |= ProcessGlobal(GV, GVI);
+ if (deleteIfDead(*GV)) {
+ Changed = true;
+ continue;
}
+
+ Changed |= processGlobal(*GV);
}
return Changed;
}
@@ -1968,8 +2108,8 @@ isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSetImpl<Constant *> &SimpleConstants,
const DataLayout &DL);
-/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
-/// handled by the code generator. We don't want to generate something like:
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
/// void *X = &X/42;
/// because the code generator doesn't have a relocation that can handle that.
///
@@ -2044,11 +2184,11 @@ isSimpleEnoughValueToCommit(Constant *C,
}
-/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand. In particular, if it is a cast to anything
-/// other than from one pointer type to another pointer type, we punt.
-/// We basically just support direct accesses to globals and GEP's of
-/// globals. This should be kept up to date with CommitValueTo.
+/// Return true if this constant is simple enough for us to understand. In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt. We basically just support direct accesses to
+/// globals and GEP's of globals. This should be kept up to date with
+/// CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
// want to worry about them partially overlapping other stores.
@@ -2095,9 +2235,9 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
-/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
-/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
-/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+/// Evaluate a piece of a constantexpr store into a global initializer. This
+/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
+/// GEP operands of Addr [0, OpNo) have been stepped into.
static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
ConstantExpr *Addr, unsigned OpNo) {
// Base case of the recursion.
@@ -2144,7 +2284,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
return ConstantVector::get(Elts);
}
-/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// We have decided that Addr (which satisfies the predicate
/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
static void CommitValueTo(Constant *Val, Constant *Addr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
@@ -2160,10 +2300,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
namespace {
-/// Evaluator - This class evaluates LLVM IR, producing the Constant
-/// representing each SSA instruction. Changes to global variables are stored
-/// in a mapping that can be iterated over after the evaluation is complete.
-/// Once an evaluation call fails, the evaluation object should not be reused.
+/// This class evaluates LLVM IR, producing the Constant representing each SSA
+/// instruction. Changes to global variables are stored in a mapping that can
+/// be iterated over after the evaluation is complete. Once an evaluation call
+/// fails, the evaluation object should not be reused.
class Evaluator {
public:
Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
@@ -2180,15 +2320,15 @@ public:
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
}
- /// EvaluateFunction - Evaluate a call to function F, returning true if
- /// successful, false if we can't evaluate it. ActualArgs contains the formal
- /// arguments for the function.
+ /// Evaluate a call to function F, returning true if successful, false if we
+ /// can't evaluate it. ActualArgs contains the formal arguments for the
+ /// function.
bool EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs);
- /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
- /// successful, false if we can't evaluate it. NewBB returns the next BB that
- /// control flows into, or null upon return.
+ /// Evaluate all instructions in block BB, returning true if successful, false
+ /// if we can't evaluate it. NewBB returns the next BB that control flows
+ /// into, or null upon return.
bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
Constant *getVal(Value *V) {
@@ -2213,32 +2353,31 @@ public:
private:
Constant *ComputeLoadResult(Constant *P);
- /// ValueStack - As we compute SSA register values, we store their contents
- /// here. The back of the deque contains the current function and the stack
- /// contains the values in the calling frames.
+ /// As we compute SSA register values, we store their contents here. The back
+ /// of the deque contains the current function and the stack contains the
+ /// values in the calling frames.
std::deque<DenseMap<Value*, Constant*>> ValueStack;
- /// CallStack - This is used to detect recursion. In pathological situations
- /// we could hit exponential behavior, but at least there is nothing
- /// unbounded.
+ /// This is used to detect recursion. In pathological situations we could hit
+ /// exponential behavior, but at least there is nothing unbounded.
SmallVector<Function*, 4> CallStack;
- /// MutatedMemory - For each store we execute, we update this map. Loads
- /// check this to get the most up-to-date value. If evaluation is successful,
- /// this state is committed to the process.
+ /// For each store we execute, we update this map. Loads check this to get
+ /// the most up-to-date value. If evaluation is successful, this state is
+ /// committed to the process.
DenseMap<Constant*, Constant*> MutatedMemory;
- /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
- /// to represent its body. This vector is needed so we can delete the
- /// temporary globals when we are done.
+ /// To 'execute' an alloca, we create a temporary global variable to represent
+ /// its body. This vector is needed so we can delete the temporary globals
+ /// when we are done.
SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
- /// Invariants - These global variables have been marked invariant by the
- /// static constructor.
+ /// These global variables have been marked invariant by the static
+ /// constructor.
SmallPtrSet<GlobalVariable*, 8> Invariants;
- /// SimpleConstants - These are constants we have checked and know to be
- /// simple enough to live in a static initializer of a global.
+ /// These are constants we have checked and know to be simple enough to live
+ /// in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
const DataLayout &DL;
@@ -2247,9 +2386,8 @@ private:
} // anonymous namespace
-/// ComputeLoadResult - Return the value that would be computed by a load from
-/// P after the stores reflected by 'memory' have been performed. If we can't
-/// decide, return null.
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
@@ -2275,9 +2413,9 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
return nullptr; // don't know how to evaluate.
}
-/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
-/// successful, false if we can't evaluate it. NewBB returns the next BB that
-/// control flows into, or null upon return.
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return.
bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
BasicBlock *&NextBB) {
// This is the main evaluation loop.
@@ -2438,7 +2576,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult = AllocaTmps.back().get();
DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
- CallSite CS(CurInst);
+ CallSite CS(&*CurInst);
// Debug info can safely be ignored here.
if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
@@ -2504,6 +2642,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// Continue even if we do nothing.
++CurInst;
continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
}
DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
@@ -2600,7 +2742,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
- setVal(CurInst, InstResult);
+ setVal(&*CurInst, InstResult);
}
// If we just processed an invoke, we finished evaluating the block.
@@ -2615,9 +2757,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
}
-/// EvaluateFunction - Evaluate a call to function F, returning true if
-/// successful, false if we can't evaluate it. ActualArgs contains the formal
-/// arguments for the function.
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs) {
// Check to see if this function is already executing (recursion). If so,
@@ -2631,7 +2773,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
unsigned ArgNo = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
++AI, ++ArgNo)
- setVal(AI, ActualArgs[ArgNo]);
+ setVal(&*AI, ActualArgs[ArgNo]);
// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
// we can only evaluate any one basic block at most once. This set keeps
@@ -2639,7 +2781,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
// CurBB - The current basic block we're evaluating.
- BasicBlock *CurBB = F->begin();
+ BasicBlock *CurBB = &F->front();
BasicBlock::iterator CurInst = CurBB->begin();
@@ -2679,8 +2821,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
}
}
-/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
-/// we can. Return true if we can, false otherwise.
+/// Evaluate static constructors in the function, if we can. Return true if we
+/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Call the function.
@@ -2708,7 +2850,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
}
static int compareNames(Constant *const *A, Constant *const *B) {
- return (*A)->getName().compare((*B)->getName());
+ return (*A)->stripPointerCasts()->getName().compare(
+ (*B)->stripPointerCasts()->getName());
}
static void setUsedInitializer(GlobalVariable &V,
@@ -2742,7 +2885,7 @@ static void setUsedInitializer(GlobalVariable &V,
}
namespace {
-/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
+/// An easy to access representation of llvm.used and llvm.compiler.used.
class LLVMUsed {
SmallPtrSet<GlobalValue *, 8> Used;
SmallPtrSet<GlobalValue *, 8> CompilerUsed;
@@ -2861,10 +3004,17 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
- Module::alias_iterator J = I++;
+ GlobalAlias *J = &*I++;
+
// Aliases without names cannot be referenced outside this module.
if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
J->setLinkage(GlobalValue::InternalLinkage);
+
+ if (deleteIfDead(*J)) {
+ Changed = true;
+ continue;
+ }
+
// If the aliasee may change at link time, nothing can be done - bail out.
if (J->mayBeOverridden())
continue;
@@ -2889,15 +3039,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(J);
+ Target->takeName(&*J);
Target->setLinkage(J->getLinkage());
Target->setVisibility(J->getVisibility());
Target->setDLLStorageClass(J->getDLLStorageClass());
- if (Used.usedErase(J))
+ if (Used.usedErase(&*J))
Used.usedInsert(Target);
- if (Used.compilerUsedErase(J))
+ if (Used.compilerUsedErase(&*J))
Used.compilerUsedInsert(Target);
} else if (mayHaveOtherReferences(*J, Used))
continue;
@@ -2936,8 +3086,8 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
return Fn;
}
-/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
-/// destructor and can therefore be eliminated.
+/// Returns whether the given function is an empty C++ destructor and can
+/// therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
/// the code so we only look for a function with a single basic block, where
/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
@@ -3081,3 +3231,4 @@ bool GlobalOpt::runOnModule(Module &M) {
return Changed;
}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 50f56b0..7ea6c08 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the common infrastructure (including C bindings) for
-// libLLVMIPO.a, which implements several transformations over the LLVM
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
// intermediate representation.
//
//===----------------------------------------------------------------------===//
@@ -24,14 +24,17 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeArgPromotionPass(Registry);
initializeConstantMergePass(Registry);
+ initializeCrossDSOCFIPass(Registry);
initializeDAEPass(Registry);
initializeDAHPass(Registry);
+ initializeForceFunctionAttrsLegacyPassPass(Registry);
initializeFunctionAttrsPass(Registry);
initializeGlobalDCEPass(Registry);
initializeGlobalOptPass(Registry);
initializeIPCPPass(Registry);
initializeAlwaysInlinerPass(Registry);
initializeSimpleInlinerPass(Registry);
+ initializeInferFunctionAttrsLegacyPassPass(Registry);
initializeInternalizePassPass(Registry);
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
@@ -40,13 +43,15 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
- initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
initializeStripDebugDeclarePass(Registry);
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
initializeEliminateAvailableExternallyPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
+ initializeFunctionImportPassPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
new file mode 100644
index 0000000..d02c861
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -0,0 +1,937 @@
+//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inferattrs"
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+ if (F.doesNotCapture(n))
+ return false;
+ F.setDoesNotCapture(n);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+ if (F.onlyReadsMemory(n))
+ return false;
+ F.setOnlyReadsMemory(n);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+ if (F.doesNotAlias(n))
+ return false;
+ F.setDoesNotAlias(n);
+ ++NumNoAlias;
+ return true;
+}
+
+/// Analyze the name and prototype of the given function and set any applicable
+/// attributes.
+///
+/// Returns true if any attributes were set and false otherwise.
+static bool inferPrototypeAttributes(Function &F,
+ const TargetLibraryInfo &TLI) {
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strcmp: // 0,1
+ case LibFunc::strspn: // 0,1
+ case LibFunc::strncmp: // 0,1
+ case LibFunc::strcspn: // 0,1
+ case LibFunc::strcoll: // 0,1
+ case LibFunc::strcasecmp: // 0,1
+ case LibFunc::strncasecmp: //
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::sscanf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::sprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::mkdir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::rewind:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::rmdir:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::rename:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F);
+ return Changed;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc::fread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fputs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::uname:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::unlink:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::pread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vsscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ return Changed;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::gettimeofday:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ return false;
+ }
+}
+
+static bool inferAllPrototypeAttributes(Module &M,
+ const TargetLibraryInfo &TLI) {
+ bool Changed = false;
+
+ for (Function &F : M.functions())
+ // We only infer things using the prototype if the definition isn't around
+ // to analyze directly.
+ if (F.isDeclaration())
+ Changed |= inferPrototypeAttributes(F, TLI);
+
+ return Changed;
+}
+
+PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
+ AnalysisManager<Module> *AM) {
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+
+ if (!inferAllPrototypeAttributes(M, TLI))
+ // If we didn't infer anything, preserve all analyses.
+ return PreservedAnalyses::all();
+
+ // Otherwise, we may have changed fundamental function attributes, so clear
+ // out all the passes.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct InferFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ InferFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeInferFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return inferAllPrototypeAttributes(M, TLI);
+ }
+};
+}
+
+char InferFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+
+Pass *llvm::createInferFunctionAttrsLegacyPass() {
+ return new InferFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index dc56a02..1704bfe 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -14,10 +14,10 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -35,17 +35,15 @@ namespace {
/// \brief Inliner pass which only handles "always inline" functions.
class AlwaysInliner : public Inliner {
- InlineCostAnalysis *ICA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
- ICA(nullptr) {
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
AlwaysInliner(bool InsertLifetime)
- : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
+ : Inliner(ID, -2000000000, InsertLifetime) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -53,9 +51,6 @@ public:
InlineCost getInlineCost(CallSite CS) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnSCC(CallGraphSCC &SCC) override;
-
using llvm::Pass::doFinalization;
bool doFinalization(CallGraph &CG) override {
return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
@@ -67,10 +62,9 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
@@ -99,19 +93,8 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
// that are viable for inlining. FIXME: We shouldn't even get here for
// declarations.
if (Callee && !Callee->isDeclaration() &&
- CS.hasFnAttr(Attribute::AlwaysInline) &&
- ICA->isInlineViable(*Callee))
+ CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
return InlineCost::getAlways();
return InlineCost::getNever();
}
-
-bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
- ICA = &getAnalysis<InlineCostAnalysis>();
- return Inliner::runOnSCC(SCC);
-}
-
-void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<InlineCostAnalysis>();
- Inliner::getAnalysisUsage(AU);
-}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 9b01d81..45609f8 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
using namespace llvm;
@@ -37,26 +38,30 @@ namespace {
/// inliner pass and the always inliner pass. The two passes use different cost
/// analyses to determine when to inline.
class SimpleInliner : public Inliner {
- InlineCostAnalysis *ICA;
public:
- SimpleInliner() : Inliner(ID), ICA(nullptr) {
+ SimpleInliner() : Inliner(ID) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
SimpleInliner(int Threshold)
- : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) override {
- return ICA->getInlineCost(CS, getInlineThreshold(CS));
+ Function *Callee = CS.getCalledFunction();
+ TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+ return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
}
bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ TargetTransformInfoWrapperPass *TTIWP;
};
static int computeThresholdFromOptLevels(unsigned OptLevel,
@@ -75,10 +80,10 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
@@ -95,11 +100,11 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
}
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
- ICA = &getAnalysis<InlineCostAnalysis>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
return Inliner::runOnSCC(SCC);
}
void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<InlineCostAnalysis>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
Inliner::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 5273c3d..bbe5f876 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -64,20 +65,22 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
-Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
+}
Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
- InlineLimit : Threshold),
- InsertLifetime(InsertLifetime) {}
+ : CallGraphSCCPass(ID),
+ InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
+ : Threshold),
+ InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -85,39 +88,6 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
InlinedArrayAllocasTy;
-/// \brief If the inlined function had a higher stack protection level than the
-/// calling function, then bump up the caller's stack protection level.
-static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
- // If upgrading the SSP attribute, clear out the old SSP Attributes first.
- // Having multiple SSP attributes doesn't actually hurt, but it adds useless
- // clutter to the IR.
- AttrBuilder B;
- B.addAttribute(Attribute::StackProtect)
- .addAttribute(Attribute::StackProtectStrong)
- .addAttribute(Attribute::StackProtectReq);
- AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
- AttributeSet::FunctionIndex,
- B);
-
- if (Callee->hasFnAttribute(Attribute::SafeStack)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::SafeStack);
- } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) &&
- !Caller->hasFnAttribute(Attribute::SafeStack)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::StackProtectReq);
- } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
- !Caller->hasFnAttribute(Attribute::SafeStack) &&
- !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
- Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
- Caller->addFnAttr(Attribute::StackProtectStrong);
- } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
- !Caller->hasFnAttribute(Attribute::SafeStack) &&
- !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
- !Caller->hasFnAttribute(Attribute::StackProtectStrong))
- Caller->addFnAttr(Attribute::StackProtect);
-}
-
/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
@@ -126,18 +96,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
+ // We need to manually construct BasicAA directly in order to disable
+ // its use of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
+
// Try to inline the function. Get the list of static allocas that were
// inlined.
- if (!InlineFunction(CS, IFI, InsertLifetime))
+ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
return false;
- AdjustCallerSSPLevel(Caller, Callee);
+ AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
@@ -219,6 +197,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
<< *AvailableAlloca << '\n');
+ // Move affected dbg.declare calls immediately after the new alloca to
+ // avoid the situation when a dbg.declare preceeds its alloca.
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+ DDI->moveBefore(AvailableAlloca->getNextNode());
+
AI->replaceAllUsesWith(AvailableAlloca);
if (Align1 != Align2) {
@@ -258,39 +244,64 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
}
unsigned Inliner::getInlineThreshold(CallSite CS) const {
- int thres = InlineThreshold; // -inline-threshold or else selected by
- // overall opt level
+ int Threshold = InlineThreshold; // -inline-threshold or else selected by
+ // overall opt level
// If -inline-threshold is not given, listen to the optsize attribute when it
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
+ // FIXME: Use Function::optForSize().
Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
- OptSizeThreshold < thres)
- thres = OptSizeThreshold;
+ OptSizeThreshold < Threshold)
+ Threshold = OptSizeThreshold;
- // Listen to the inlinehint attribute when it would increase the threshold
- // and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
- bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::InlineHint);
- if (InlineHint && HintThreshold > thres &&
- !Caller->hasFnAttribute(Attribute::MinSize))
- thres = HintThreshold;
+ if (!Callee || Callee->isDeclaration())
+ return Threshold;
+
+ // If profile information is available, use that to adjust threshold of hot
+ // and cold functions.
+ // FIXME: The heuristic used below for determining hotness and coldness are
+ // based on preliminary SPEC tuning and may not be optimal. Replace this with
+ // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
+ uint64_t FunctionCount = 0, MaxFunctionCount = 0;
+ bool HasPGOCounts = false;
+ if (Callee->getEntryCount() &&
+ Callee->getParent()->getMaximumFunctionCount()) {
+ HasPGOCounts = true;
+ FunctionCount = Callee->getEntryCount().getValue();
+ MaxFunctionCount =
+ Callee->getParent()->getMaximumFunctionCount().getValue();
+ }
- // Listen to the cold attribute when it would decrease the threshold.
- bool ColdCallee = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::Cold);
+ // Listen to the inlinehint attribute or profile based hotness information
+ // when it would increase the threshold and the caller does not need to
+ // minimize its size.
+ bool InlineHint =
+ Callee->hasFnAttribute(Attribute::InlineHint) ||
+ (HasPGOCounts &&
+ FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
+ if (InlineHint && HintThreshold > Threshold &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
+ Threshold = HintThreshold;
+
+ // Listen to the cold attribute or profile based coldness information
+ // when it would decrease the threshold.
+ bool ColdCallee =
+ Callee->hasFnAttribute(Attribute::Cold) ||
+ (HasPGOCounts &&
+ FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
if ((InlineLimit.getNumOccurrences() == 0 ||
ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
- ColdThreshold < thres)
- thres = ColdThreshold;
+ ColdThreshold < Threshold)
+ Threshold = ColdThreshold;
- return thres;
+ return Threshold;
}
static void emitAnalysis(CallSite CS, const Twine &Msg) {
@@ -430,10 +441,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ ACT = &getAnalysis<AssumptionCacheTracker>();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -469,8 +478,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// If this is a direct call to an external function, we can never inline
// it. If it is an indirect call, inlining may resolve it to be a
// direct call, so we keep it.
- if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
- continue;
+ if (Function *Callee = CS.getCalledFunction())
+ if (Callee->isDeclaration())
+ continue;
CallSites.push_back(std::make_pair(CS, -1));
}
@@ -492,7 +502,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, AA, ACT);
+ InlineFunctionInfo InlineInfo(&CG, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -513,7 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
+ if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
DEBUG(dbgs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
@@ -550,7 +560,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
}
// Attempt to inline the function.
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
Twine(Callee->getName() +
@@ -647,8 +657,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
- for (auto I : CG) {
- CallGraphNode *CGN = I.second;
+ for (const auto &I : CG) {
+ CallGraphNode *CGN = I.second.get();
Function *F = CGN->getFunction();
if (!F || F->isDeclaration())
continue;
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index 7950163..21bb5d0 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -60,6 +60,10 @@ namespace {
explicit InternalizePass();
explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
+ bool maybeInternalize(GlobalValue &GV,
+ const std::set<const Comdat *> &ExternalComdats);
+ void checkComdatVisibility(GlobalValue &GV,
+ std::set<const Comdat *> &ExternalComdats);
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -105,40 +109,85 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
-static bool shouldInternalize(const GlobalValue &GV,
- const std::set<std::string> &ExternalNames) {
+static bool isExternallyVisible(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
// Function must be defined here
if (GV.isDeclaration())
- return false;
+ return true;
// Available externally is really just a "declaration with a body".
if (GV.hasAvailableExternallyLinkage())
- return false;
+ return true;
// Assume that dllexported symbols are referenced elsewhere
if (GV.hasDLLExportStorageClass())
- return false;
-
- // Already has internal linkage
- if (GV.hasLocalLinkage())
- return false;
+ return true;
// Marked to keep external?
- if (ExternalNames.count(GV.getName()))
- return false;
+ if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+ return true;
+
+ return false;
+}
+// Internalize GV if it is possible to do so, i.e. it is not externally visible
+// and is not a member of an externally visible comdat.
+bool InternalizePass::maybeInternalize(
+ GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ if (Comdat *C = GV.getComdat()) {
+ if (ExternalComdats.count(C))
+ return false;
+
+ // If a comdat is not externally visible we can drop it.
+ if (auto GO = dyn_cast<GlobalObject>(&GV))
+ GO->setComdat(nullptr);
+
+ if (GV.hasLocalLinkage())
+ return false;
+ } else {
+ if (GV.hasLocalLinkage())
+ return false;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ return false;
+ }
+
+ GV.setVisibility(GlobalValue::DefaultVisibility);
+ GV.setLinkage(GlobalValue::InternalLinkage);
return true;
}
+// If GV is part of a comdat and is externally visible, keep track of its
+// comdat so that we don't internalize any of its members.
+void InternalizePass::checkComdatVisibility(
+ GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+ Comdat *C = GV.getComdat();
+ if (!C)
+ return;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ ExternalComdats.insert(C);
+}
+
bool InternalizePass::runOnModule(Module &M) {
CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
- bool Changed = false;
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, false);
+ // Collect comdat visiblity information for the module.
+ std::set<const Comdat *> ExternalComdats;
+ if (!M.getComdatSymbolTable().empty()) {
+ for (Function &F : M)
+ checkComdatVisibility(F, ExternalComdats);
+ for (GlobalVariable &GV : M.globals())
+ checkComdatVisibility(GV, ExternalComdats);
+ for (GlobalAlias &GA : M.aliases())
+ checkComdatVisibility(GA, ExternalComdats);
+ }
+
// We must assume that globals in llvm.used have a reference that not even
// the linker can see, so we don't internalize them.
// For llvm.compiler.used the situation is a bit fuzzy. The assembler and
@@ -153,20 +202,16 @@ bool InternalizePass::runOnModule(Module &M) {
}
// Mark all functions not in the api as internal.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
-
if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
- Changed = true;
++NumFunctions;
- DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
}
// Never internalize the llvm.used symbol. It is used to implement
@@ -191,12 +236,9 @@ bool InternalizePass::runOnModule(Module &M) {
// internal as well.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ if (!maybeInternalize(*I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
++NumGlobals;
DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
}
@@ -204,17 +246,20 @@ bool InternalizePass::runOnModule(Module &M) {
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
- if (!shouldInternalize(*I, ExternalNames))
+ if (!maybeInternalize(*I, ExternalComdats))
continue;
- I->setVisibility(GlobalValue::DefaultVisibility);
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
++NumAliases;
DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
}
- return Changed;
+ // We do not keep track of whether this pass changed the module because
+ // it adds unnecessary complexity:
+ // 1) This pass will generally be near the start of the pass pipeline, so
+ // there will be no analyses to invalidate.
+ // 2) This pass will most likely end up changing the module and it isn't worth
+ // worrying about optimizing the case where the module is unchanged.
+ return true;
}
ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 41334ca..8e4ad64 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -43,12 +43,13 @@ namespace {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
};
}
@@ -79,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
//
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
-bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
@@ -92,6 +93,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
bool Changed = false;
// If there is more than one top-level loop in this function, extract all of
@@ -120,14 +122,14 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
}
if (ShouldExtractLoop) {
- // We must omit landing pads. Landing pads must accompany the invoke
+ // We must omit EH pads. EH pads must accompany the invoke
// instruction. But this would result in a loop in the extracted
// function. An infinite cycle occurs when it tries to extract that loop as
// well.
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i]->isLandingPad()) {
+ if (ExitBlocks[i]->isEHPad()) {
ShouldExtractLoop = false;
break;
}
@@ -141,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LPM.deleteLoopFromQueue(L);
+ LI.updateUnloop(L);
}
++NumExtracted;
}
@@ -259,7 +261,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
// Figure out which index the basic block is in its function.
Function::iterator BBI = MF->begin();
std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
- TranslatedBlocksToNotExtract.insert(BBI);
+ TranslatedBlocksToNotExtract.insert(&*BBI);
}
while (!BlocksToNotExtractByName.empty()) {
@@ -278,7 +280,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
BasicBlock &BB = *BI;
if (BB.getName() != BlockName) continue;
- TranslatedBlocksToNotExtract.insert(BI);
+ TranslatedBlocksToNotExtract.insert(&*BI);
}
}
@@ -291,8 +293,8 @@ bool BlockExtractorPass::runOnModule(Module &M) {
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
SplitLandingPadPreds(&*F);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (!TranslatedBlocksToNotExtract.count(BB))
- BlocksToExtract.push_back(BB);
+ if (!TranslatedBlocksToNotExtract.count(&*BB))
+ BlocksToExtract.push_back(&*BB);
}
for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
index c6795c6..7b51574 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -26,6 +28,8 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -59,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
bool BitSetInfo::containsValue(
const DataLayout &DL,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
uint64_t COffset) const {
- if (auto GV = dyn_cast<GlobalVariable>(V)) {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
auto I = GlobalLayout.find(GV);
if (I == GlobalLayout.end())
return false;
@@ -90,6 +94,21 @@ bool BitSetInfo::containsValue(
return false;
}
+void BitSetInfo::print(raw_ostream &OS) const {
+ OS << "offset " << ByteOffset << " size " << BitSize << " align "
+ << (1 << AlignLog2);
+
+ if (isAllOnes()) {
+ OS << " all-ones\n";
+ return;
+ }
+
+ OS << " { ";
+ for (uint64_t B : Bits)
+ OS << B << ' ';
+ OS << "}\n";
+}
+
BitSetInfo BitSetBuilder::build() {
if (Min > Max)
Min = 0;
@@ -193,34 +212,48 @@ struct LowerBitSets : public ModulePass {
Module *M;
bool LinkerSubsectionsViaSymbols;
+ Triple::ArchType Arch;
+ Triple::ObjectFormatType ObjectFormat;
IntegerType *Int1Ty;
IntegerType *Int8Ty;
IntegerType *Int32Ty;
Type *Int32PtrTy;
IntegerType *Int64Ty;
- Type *IntPtrTy;
+ IntegerType *IntPtrTy;
// The llvm.bitsets named metadata.
NamedMDNode *BitSetNM;
- // Mapping from bitset mdstrings to the call sites that test them.
- DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+ // Mapping from bitset identifiers to the call sites that test them.
+ DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
std::vector<ByteArrayInfo> ByteArrayInfos;
BitSetInfo
- buildBitSet(MDString *BitSet,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ buildBitSet(Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Value *BitOffset);
+ void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
+ Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
Value *
lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- GlobalVariable *CombinedGlobal,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
- void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
- const std::vector<GlobalVariable *> &Globals);
+ Constant *CombinedGlobal,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalVariable *> Globals);
+ unsigned getJumpTableEntrySize();
+ Type *getJumpTableEntryType();
+ Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance);
+ void verifyBitSetMDNode(MDNode *Op);
+ void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions);
+ void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalObject *> Globals);
bool buildBitSets();
bool eraseBitSetMetadata();
@@ -228,7 +261,7 @@ struct LowerBitSets : public ModulePass {
bool runOnModule(Module &M) override;
};
-} // namespace
+} // anonymous namespace
INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
"Lower bitset metadata", false, false)
@@ -244,6 +277,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
Triple TargetTriple(M->getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+ Arch = TargetTriple.getArch();
+ ObjectFormat = TargetTriple.getObjectFormat();
Int1Ty = Type::getInt1Ty(M->getContext());
Int8Ty = Type::getInt8Ty(M->getContext());
@@ -262,8 +297,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
/// Build a bit set for BitSet using the object layouts in
/// GlobalLayout.
BitSetInfo LowerBitSets::buildBitSet(
- MDString *BitSet,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
BitSetBuilder BSB;
// Compute the byte offset of each element of this bitset.
@@ -271,8 +306,11 @@ BitSetInfo LowerBitSets::buildBitSet(
for (MDNode *Op : BitSetNM->operands()) {
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
- auto OpGlobal = dyn_cast<GlobalVariable>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ Constant *OpConst =
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+ if (auto GA = dyn_cast<GlobalAlias>(OpConst))
+ OpConst = GA->getAliasee();
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
if (!OpGlobal)
continue;
uint64_t Offset =
@@ -360,9 +398,8 @@ void LowerBitSets::allocateByteArrays() {
if (LinkerSubsectionsViaSymbols) {
BAI->ByteArray->replaceAllUsesWith(GEP);
} else {
- GlobalAlias *Alias =
- GlobalAlias::create(PointerType::getUnqual(Int8Ty),
- GlobalValue::PrivateLinkage, "bits", GEP, M);
+ GlobalAlias *Alias = GlobalAlias::create(
+ Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
BAI->ByteArray->replaceAllUsesWith(Alias);
}
BAI->ByteArray->eraseFromParent();
@@ -404,7 +441,7 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
- ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+ ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
GlobalValue::PrivateLinkage, "bits_use",
ByteArray, M);
}
@@ -421,17 +458,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(
CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- GlobalVariable *CombinedGlobal,
- const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M->getDataLayout();
if (BSI.containsValue(DL, GlobalLayout, Ptr))
- return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+ return ConstantInt::getTrue(M->getContext());
- Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
- GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+ CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
BasicBlock *InitialBB = CI->getParent();
@@ -490,18 +526,19 @@ Value *LowerBitSets::lowerBitSetCall(
/// Given a disjoint set of bitsets and globals, layout the globals, build the
/// bit sets and lower the llvm.bitset.test calls.
-void LowerBitSets::buildBitSetsFromGlobals(
- const std::vector<MDString *> &BitSets,
- const std::vector<GlobalVariable *> &Globals) {
+void LowerBitSets::buildBitSetsFromGlobalVariables(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
// Build a new global with the combined contents of the referenced globals.
+ // This global is a struct whose even-indexed elements contain the original
+ // contents of the referenced globals and whose odd-indexed elements contain
+ // any padding required to align the next element to the next power of 2.
std::vector<Constant *> GlobalInits;
const DataLayout &DL = M->getDataLayout();
for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());
- uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
- // Compute the amount of padding required to align the next element to the
- // next power of 2.
+ // Compute the amount of padding required.
uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
// Cap at 128 was found experimentally to have a good data/instruction
@@ -515,34 +552,20 @@ void LowerBitSets::buildBitSetsFromGlobals(
if (!GlobalInits.empty())
GlobalInits.pop_back();
Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
- auto CombinedGlobal =
+ auto *CombinedGlobal =
new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);
- const StructLayout *CombinedGlobalLayout =
- DL.getStructLayout(cast<StructType>(NewInit->getType()));
+ StructType *NewTy = cast<StructType>(NewInit->getType());
+ const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
// Compute the offsets of the original globals within the new global.
- DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
for (unsigned I = 0; I != Globals.size(); ++I)
// Multiply by 2 to account for padding elements.
GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
- // For each bitset in this disjoint set...
- for (MDString *BS : BitSets) {
- // Build the bitset.
- BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
-
- ByteArrayInfo *BAI = 0;
-
- // Lower each call to llvm.bitset.test for this bitset.
- for (CallInst *CI : BitSetTestCallSites[BS]) {
- ++NumBitSetCallsLowered;
- Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
- }
- }
+ lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
// global from which we built the combined global, and replace references
@@ -556,9 +579,11 @@ void LowerBitSets::buildBitSetsFromGlobals(
if (LinkerSubsectionsViaSymbols) {
Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
} else {
- GlobalAlias *GAlias =
- GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
- "", CombinedGlobalElemPtr, M);
+ assert(Globals[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
+ Globals[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Globals[I]->getVisibility());
GAlias->takeName(Globals[I]);
Globals[I]->replaceAllUsesWith(GAlias);
}
@@ -566,6 +591,331 @@ void LowerBitSets::buildBitSetsFromGlobals(
}
}
+void LowerBitSets::lowerBitSetCalls(
+ ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr =
+ ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+
+ // For each bitset in this disjoint set...
+ for (Metadata *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+ DEBUG({
+ if (auto BSS = dyn_cast<MDString>(BS))
+ dbgs() << BSS->getString() << ": ";
+ else
+ dbgs() << "<unnamed>: ";
+ BSI.print(dbgs());
+ });
+
+ ByteArrayInfo *BAI = nullptr;
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered =
+ lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+}
+
+void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+ if (!Op->getOperand(1))
+ return;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
+ if (!OpGlobal)
+ return;
+
+ if (OpGlobal->isThreadLocal())
+ report_fatal_error("Bit set element may not be thread-local");
+ if (OpGlobal->hasSection())
+ report_fatal_error("Bit set element may not have an explicit section");
+
+ if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
+ report_fatal_error("Bit set global var element must be a definition");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error("Bit set element offset must be an integer constant");
+}
+
+static const unsigned kX86JumpTableEntrySize = 8;
+
+unsigned LowerBitSets::getJumpTableEntrySize() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return kX86JumpTableEntrySize;
+}
+
+// Create a constant representing a jump table entry for the target. This
+// consists of an instruction sequence containing a relative branch to Dest. The
+// constant will be laid out at address Src+(Len*Distance) where Len is the
+// target-specific jump table entry size.
+Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance) {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ const unsigned kJmpPCRel32Code = 0xe9;
+ const unsigned kInt3Code = 0xcc;
+
+ ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+
+ // Build a constant representing the displacement between the constant's
+ // address and Dest. This will resolve to a PC32 relocation referring to Dest.
+ Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
+ Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
+ Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
+ ConstantInt *DispOffset =
+ ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
+ Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
+ OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+
+ ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+
+ Constant *Fields[] = {
+ Jmp, OffsetedDisp, Int3, Int3, Int3,
+ };
+ return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+}
+
+Type *LowerBitSets::getJumpTableEntryType() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return StructType::get(M->getContext(),
+ {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
+ /*Packed=*/true);
+}
+
+/// Given a disjoint set of bitsets and functions, build a jump table for the
+/// functions, build the bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions) {
+ // Unlike the global bitset builder, the function bitset builder cannot
+ // re-arrange functions in a particular order and base its calculations on the
+ // layout of the functions' entry points, as we have no idea how large a
+ // particular function will end up being (the size could even depend on what
+ // this pass does!) Instead, we build a jump table, which is a block of code
+ // consisting of one branch instruction for each of the functions in the bit
+ // set that branches to the target function, and redirect any taken function
+ // addresses to the corresponding jump table entry. In the object file's
+ // symbol table, the symbols for the target functions also refer to the jump
+ // table entries, so that addresses taken outside the module will pass any
+ // verification done inside the module.
+ //
+ // In more concrete terms, suppose we have three functions f, g, h which are
+ // members of a single bitset, and a function foo that returns their
+ // addresses:
+ //
+ // f:
+ // mov 0, %eax
+ // ret
+ //
+ // g:
+ // mov 1, %eax
+ // ret
+ //
+ // h:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // To create a jump table for these functions, we instruct the LLVM code
+ // generator to output a jump table in the .text section. This is done by
+ // representing the instructions in the jump table as an LLVM constant and
+ // placing them in a global variable in the .text section. The end result will
+ // (conceptually) look like this:
+ //
+ // f:
+ // jmp .Ltmp0 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // g:
+ // jmp .Ltmp1 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // h:
+ // jmp .Ltmp2 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // .Ltmp0:
+ // mov 0, %eax
+ // ret
+ //
+ // .Ltmp1:
+ // mov 1, %eax
+ // ret
+ //
+ // .Ltmp2:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
+ // normal case the check can be carried out using the same kind of simple
+ // arithmetic that we normally use for globals.
+
+ assert(!Functions.empty());
+
+ // Build a simple layout based on the regular layout of jump tables.
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+ unsigned EntrySize = getJumpTableEntrySize();
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ GlobalLayout[Functions[I]] = I * EntrySize;
+
+ // Create a constant to hold the jump table.
+ ArrayType *JumpTableType =
+ ArrayType::get(getJumpTableEntryType(), Functions.size());
+ auto JumpTable = new GlobalVariable(*M, JumpTableType,
+ /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, nullptr);
+ JumpTable->setSection(ObjectFormat == Triple::MachO
+ ? "__TEXT,__text,regular,pure_instructions"
+ : ".text");
+ lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+
+ // Build aliases pointing to offsets into the jump table, and replace
+ // references to the original functions with references to the aliases.
+ for (unsigned I = 0; I != Functions.size(); ++I) {
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+ ConstantExpr::getGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)}),
+ Functions[I]->getType());
+ if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
+ Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ assert(Functions[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
+ Functions[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Functions[I]->getVisibility());
+ GAlias->takeName(Functions[I]);
+ Functions[I]->replaceAllUsesWith(GAlias);
+ }
+ if (!Functions[I]->isDeclarationForLinker())
+ Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+ }
+
+ // Build and set the jump table's initializer.
+ std::vector<Constant *> JumpTableEntries;
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ JumpTableEntries.push_back(
+ createJumpTableEntry(JumpTable, Functions[I], I));
+ JumpTable->setInitializer(
+ ConstantArray::get(JumpTableType, JumpTableEntries));
+}
+
+void LowerBitSets::buildBitSetsFromDisjointSet(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
+ llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
+ for (unsigned I = 0; I != BitSets.size(); ++I)
+ BitSetIndices[BitSets[I]] = I;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ GlobalIndices[Globals[I]] = I;
+
+ // For each bitset, build a set of indices that refer to globals referenced by
+ // the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(Op->getOperand(0));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ if (!OpGlobal)
+ continue;
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
+ // close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build the bitsets from this disjoint set.
+ if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+ // Build a vector of global variables with the computed layout.
+ std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+ auto OGI = OrderedGVs.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+ if (!GV)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OGI++ = GV;
+ }
+ }
+
+ buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+ } else {
+ // Build a vector of functions with the computed layout.
+ std::vector<Function *> OrderedFns(Globals.size());
+ auto OFI = OrderedFns.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto Fn = dyn_cast<Function>(Globals[Offset]);
+ if (!Fn)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OFI++ = Fn;
+ }
+ }
+
+ buildBitSetsFromFunctions(BitSets, OrderedFns);
+ }
+}
+
/// Lower all bit sets in this module.
bool LowerBitSets::buildBitSets() {
Function *BitSetTestFunc =
@@ -576,24 +926,36 @@ bool LowerBitSets::buildBitSets() {
// Equivalence class set containing bitsets and the globals they reference.
// This is used to partition the set of bitsets in the module into disjoint
// sets.
- typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+ typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
GlobalClassesTy;
GlobalClassesTy GlobalClasses;
+ // Verify the bitset metadata and build a mapping from bitset identifiers to
+ // their last observed index in BitSetNM. This will used later to
+ // deterministically order the list of bitset identifiers.
+ llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
+ if (BitSetNM) {
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
+ MDNode *Op = BitSetNM->getOperand(I);
+ verifyBitSetMDNode(Op);
+ BitSetIdIndices[Op->getOperand(0)] = I;
+ }
+ }
+
for (const Use &U : BitSetTestFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+ if (!BitSetMDVal)
report_fatal_error(
- "Second argument of llvm.bitset.test must be metadata string");
- auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+ "Second argument of llvm.bitset.test must be metadata");
+ auto BitSet = BitSetMDVal->getMetadata();
// Add the call site to the list of call sites for this bit set. We also use
// BitSetTestCallSites to keep track of whether we have seen this bit set
// before. If we have, we don't need to re-add the referenced globals to the
// equivalence class.
- std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+ std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
bool> Ins =
BitSetTestCallSites.insert(
std::make_pair(BitSet, std::vector<CallInst *>()));
@@ -608,31 +970,16 @@ bool LowerBitSets::buildBitSets() {
if (!BitSetNM)
continue;
- // Verify the bitset metadata and add the referenced globals to the bitset's
- // equivalence class.
+ // Add the referenced globals to the bitset's equivalence class.
for (MDNode *Op : BitSetNM->operands()) {
- if (Op->getNumOperands() != 3)
- report_fatal_error(
- "All operands of llvm.bitsets metadata must have 3 elements");
-
if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
continue;
- auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
- if (!OpConstMD)
- report_fatal_error("Bit set element must be a constant");
- auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
if (!OpGlobal)
continue;
- auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
- if (!OffsetConstMD)
- report_fatal_error("Bit set element offset must be a constant");
- auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
- if (!OffsetInt)
- report_fatal_error(
- "Bit set element offset must be an integer constant");
-
CurSet = GlobalClasses.unionSets(
CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
}
@@ -641,79 +988,51 @@ bool LowerBitSets::buildBitSets() {
if (GlobalClasses.empty())
return false;
- // For each disjoint set we found...
+ // Build a list of disjoint sets ordered by their maximum BitSetNM index
+ // for determinism.
+ std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
E = GlobalClasses.end();
I != E; ++I) {
if (!I->isLeader()) continue;
-
++NumBitSetDisjointSets;
- // Build the list of bitsets and referenced globals in this disjoint set.
- std::vector<MDString *> BitSets;
- std::vector<GlobalVariable *> Globals;
- llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
- llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+ unsigned MaxIndex = 0;
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
MI != GlobalClasses.member_end(); ++MI) {
- if ((*MI).is<MDString *>()) {
- BitSetIndices[MI->get<MDString *>()] = BitSets.size();
- BitSets.push_back(MI->get<MDString *>());
- } else {
- GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
- Globals.push_back(MI->get<GlobalVariable *>());
- }
+ if ((*MI).is<Metadata *>())
+ MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
}
+ Sets.emplace_back(I, MaxIndex);
+ }
+ std::sort(Sets.begin(), Sets.end(),
+ [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
+ const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
+ return S1.second < S2.second;
+ });
- // For each bitset, build a set of indices that refer to globals referenced
- // by the bitset.
- std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
- if (BitSetNM) {
- for (MDNode *Op : BitSetNM->operands()) {
- // Op = { bitset name, global, offset }
- if (!Op->getOperand(1))
- continue;
- auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
- if (I == BitSetIndices.end())
- continue;
-
- auto OpGlobal = dyn_cast<GlobalVariable>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
- if (!OpGlobal)
- continue;
- BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
- }
+ // For each disjoint set we found...
+ for (const auto &S : Sets) {
+ // Build the list of bitsets in this disjoint set.
+ std::vector<Metadata *> BitSets;
+ std::vector<GlobalObject *> Globals;
+ for (GlobalClassesTy::member_iterator MI =
+ GlobalClasses.member_begin(S.first);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<Metadata *>())
+ BitSets.push_back(MI->get<Metadata *>());
+ else
+ Globals.push_back(MI->get<GlobalObject *>());
}
- // Order the sets of indices by size. The GlobalLayoutBuilder works best
- // when given small index sets first.
- std::stable_sort(
- BitSetMembers.begin(), BitSetMembers.end(),
- [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
- return O1.size() < O2.size();
- });
-
- // Create a GlobalLayoutBuilder and provide it with index sets as layout
- // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
- // as close together as possible.
- GlobalLayoutBuilder GLB(Globals.size());
- for (auto &&MemSet : BitSetMembers)
- GLB.addFragment(MemSet);
-
- // Build a vector of globals with the computed layout.
- std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
- auto OGI = OrderedGlobals.begin();
- for (auto &&F : GLB.Fragments)
- for (auto &&Offset : F)
- *OGI++ = Globals[Offset];
-
- // Order bitsets by name for determinism.
- std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
- return S1->getString() < S2->getString();
+ // Order bitsets by BitSetNM index for determinism. This ordering is stable
+ // as there is a one-to-one mapping between metadata and indices.
+ std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
+ return BitSetIdIndices[M1] < BitSetIdIndices[M2];
});
- // Build the bitsets from this disjoint set.
- buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+ // Lower the bitsets in this disjoint set.
+ buildBitSetsFromDisjointSet(BitSets, Globals);
}
allocateByteArrays();
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 2e3519e..8a209a1 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,6 +27,14 @@
// -- We define Function* container class with custom "operator<" (FunctionPtr).
// -- "FunctionPtr" instances are stored in std::set collection, so every
// std::set::insert operation will give you result in log(N) time.
+//
+// As an optimization, a hash of the function structure is calculated first, and
+// two functions are only compared if they have the same hash. This hash is
+// cheap to compute, and has the property that if function F == G according to
+// the comparison function, then hash(F) == hash(G). This consistency property
+// is critical to ensuring all possible merging opportunities are exploited.
+// Collisions in the hash affect the speed of the pass but not the correctness
+// or determinism of the resulting transformation.
//
// When a match is found the functions are folded. If both functions are
// overridable, we move the functionality into a new internal function and
@@ -87,6 +95,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -97,12 +106,14 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mergefunc"
@@ -121,21 +132,64 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
namespace {
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+ struct Config : ValueMapConfig<GlobalValue*> {
+ enum { FollowRAUW = false };
+ };
+ // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+ // occurs, the mapping does not change. Tracking changes is unnecessary, and
+ // also problematic for weak symbols (which may be overwritten).
+ typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+ ValueNumberMap GlobalNumbers;
+ // The next unused serial number to assign to a global.
+ uint64_t NextNumber;
+ public:
+ GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
+ uint64_t getNumber(GlobalValue* Global) {
+ ValueNumberMap::iterator MapIter;
+ bool Inserted;
+ std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+ if (Inserted)
+ NextNumber++;
+ return MapIter->second;
+ }
+ void clear() {
+ GlobalNumbers.clear();
+ }
+};
+
/// FunctionComparator - Compares two functions to determine whether or not
/// they will generate machine code with the same behaviour. DataLayout is
/// used if available. The comparator always fails conservatively (erring on the
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const Function *F1, const Function *F2)
- : FnL(F1), FnR(F2) {}
+ FunctionComparator(const Function *F1, const Function *F2,
+ GlobalNumberState* GN)
+ : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
/// Test whether the two functions have equivalent behaviour.
int compare();
+ /// Hash a function. Equivalent functions will have the same hash, and unequal
+ /// functions will have different hashes with high probability.
+ typedef uint64_t FunctionHash;
+ static FunctionHash functionHash(Function &);
private:
/// Test whether two basic blocks have equivalent behaviour.
- int compare(const BasicBlock *BBL, const BasicBlock *BBR);
+ int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
/// Constants comparison.
/// Its analog to lexicographical comparison between hypothetical numbers
@@ -241,6 +295,10 @@ private:
/// If these properties are equal - compare their contents.
int cmpConstants(const Constant *L, const Constant *R);
+ /// Compares two global values by number. Uses the GlobalNumbersState to
+ /// identify the same gobals across function calls.
+ int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+
/// Assign or look up previously assigned numbers for the two values, and
/// return whether the numbers are equal. Numbers are assigned in the order
/// visited.
@@ -320,8 +378,9 @@ private:
///
/// 1. If types are of different kind (different type IDs).
/// Return result of type IDs comparison, treating them as numbers.
- /// 2. If types are vectors or integers, compare Type* values as numbers.
- /// 3. Types has same ID, so check whether they belongs to the next group:
+ /// 2. If types are integers, check that they have the same width. If they
+ /// are vectors, check that they have the same count and subtype.
+ /// 3. Types have the same ID, so check whether they are one of:
/// * Void
/// * Float
/// * Double
@@ -330,8 +389,7 @@ private:
/// * PPC_FP128
/// * Label
/// * Metadata
- /// If so - return 0, yes - we can treat these types as equal only because
- /// their IDs are same.
+ /// We can treat these types as equal whenever their IDs are same.
/// 4. If Left and Right are pointers, return result of address space
/// comparison (numbers comparison). We can treat pointer types of same
/// address space as equal.
@@ -343,11 +401,13 @@ private:
int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
-
int cmpAPInts(const APInt &L, const APInt &R) const;
int cmpAPFloats(const APFloat &L, const APFloat &R) const;
- int cmpStrings(StringRef L, StringRef R) const;
+ int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+ int cmpMem(StringRef L, StringRef R) const;
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+ int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+ int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
// The two functions undergoing comparison.
const Function *FnL, *FnR;
@@ -386,30 +446,30 @@ private:
/// could be operands from further BBs we didn't scan yet.
/// So it's impossible to use dominance properties in general.
DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+ // The global state we will use
+ GlobalNumberState* GlobalNumbers;
};
class FunctionNode {
mutable AssertingVH<Function> F;
-
+ FunctionComparator::FunctionHash Hash;
public:
- FunctionNode(Function *F) : F(F) {}
+ // Note the hash is recalculated potentially multiple times, but it is cheap.
+ FunctionNode(Function *F)
+ : F(F), Hash(FunctionComparator::functionHash(*F)) {}
Function *getFunc() const { return F; }
+ FunctionComparator::FunctionHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
void replaceBy(Function *G) const {
- assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) &&
- "The two functions must be equal");
-
F = G;
}
- void release() { F = 0; }
- bool operator<(const FunctionNode &RHS) const {
- return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
- }
+ void release() { F = nullptr; }
};
-}
+} // end anonymous namespace
int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
if (L < R) return -1;
@@ -426,13 +486,25 @@ int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
}
int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
- if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
- (uint64_t)&R.getSemantics()))
+ // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+ // then by value interpreted as a bitstring (aka APInt).
+ const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+ if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+ APFloat::semanticsPrecision(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+ APFloat::semanticsMaxExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+ APFloat::semanticsMinExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+ APFloat::semanticsSizeInBits(SR)))
return Res;
return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
}
-int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
// Prevent heavy comparison, compare sizes first.
if (int Res = cmpNumbers(L.size(), R.size()))
return Res;
@@ -466,6 +538,59 @@ int FunctionComparator::cmpAttrs(const AttributeSet L,
return 0;
}
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+ const MDNode* R) const {
+ if (L == R)
+ return 0;
+ if (!L)
+ return -1;
+ if (!R)
+ return 1;
+ // Range metadata is a sequence of numbers. Make sure they are the same
+ // sequence.
+ // TODO: Note that as this is metadata, it is possible to drop and/or merge
+ // this data when considering functions to merge. Thus this comparison would
+ // return 0 (i.e. equivalent), but merging would become more complicated
+ // because the ranges would need to be unioned. It is not likely that
+ // functions differ ONLY in this metadata if they are actually the same
+ // function semantically.
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+ for (size_t I = 0; I < L->getNumOperands(); ++I) {
+ ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ return Res;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+ const Instruction *R) const {
+ ImmutableCallSite LCS(L);
+ ImmutableCallSite RCS(R);
+
+ assert(LCS && RCS && "Must be calls or invokes!");
+ assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+ if (int Res =
+ cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+ return Res;
+
+ for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+ auto OBL = LCS.getOperandBundleAt(i);
+ auto OBR = RCS.getOperandBundleAt(i);
+
+ if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+ return Res;
+
+ if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+ return Res;
+ }
+
+ return 0;
+}
+
/// Constants comparison:
/// 1. Check whether type of L constant could be losslessly bitcasted to R
/// type.
@@ -500,9 +625,9 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
unsigned TyLWidth = 0;
unsigned TyRWidth = 0;
- if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+ if (auto *VecTyL = dyn_cast<VectorType>(TyL))
TyLWidth = VecTyL->getBitWidth();
- if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+ if (auto *VecTyR = dyn_cast<VectorType>(TyR))
TyRWidth = VecTyR->getBitWidth();
if (TyLWidth != TyRWidth)
@@ -538,11 +663,29 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
if (!L->isNullValue() && R->isNullValue())
return -1;
+ auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+ if (GlobalValueL && GlobalValueR) {
+ return cmpGlobalValues(GlobalValueL, GlobalValueR);
+ }
+
if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
return Res;
+ if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+ const auto *SeqR = cast<ConstantDataSequential>(R);
+ // This handles ConstantDataArray and ConstantDataVector. Note that we
+ // compare the two raw data arrays, which might differ depending on the host
+ // endianness. This isn't a problem though, because the endiness of a module
+ // will affect the order of the constants, but this order is the same
+ // for a given input module and host platform.
+ return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+ }
+
switch (L->getValueID()) {
- case Value::UndefValueVal: return TypesRes;
+ case Value::UndefValueVal:
+ case Value::ConstantTokenNoneVal:
+ return TypesRes;
case Value::ConstantIntVal: {
const APInt &LInt = cast<ConstantInt>(L)->getValue();
const APInt &RInt = cast<ConstantInt>(R)->getValue();
@@ -609,19 +752,55 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
}
return 0;
}
- case Value::FunctionVal:
- case Value::GlobalVariableVal:
- case Value::GlobalAliasVal:
- default: // Unknown constant, cast L and R pointers to numbers and compare.
- return cmpNumbers((uint64_t)L, (uint64_t)R);
+ case Value::BlockAddressVal: {
+ const BlockAddress *LBA = cast<BlockAddress>(L);
+ const BlockAddress *RBA = cast<BlockAddress>(R);
+ if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+ return Res;
+ if (LBA->getFunction() == RBA->getFunction()) {
+ // They are BBs in the same function. Order by which comes first in the
+ // BB order of the function. This order is deterministic.
+ Function* F = LBA->getFunction();
+ BasicBlock *LBB = LBA->getBasicBlock();
+ BasicBlock *RBB = RBA->getBasicBlock();
+ if (LBB == RBB)
+ return 0;
+ for(BasicBlock &BB : F->getBasicBlockList()) {
+ if (&BB == LBB) {
+ assert(&BB != RBB);
+ return -1;
+ }
+ if (&BB == RBB)
+ return 1;
+ }
+ llvm_unreachable("Basic Block Address does not point to a basic block in "
+ "its function.");
+ return -1;
+ } else {
+ // cmpValues said the functions are the same. So because they aren't
+ // literally the same pointer, they must respectively be the left and
+ // right functions.
+ assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+ // cmpValues will tell us if these are equivalent BasicBlocks, in the
+ // context of their respective functions.
+ return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+ }
}
+ default: // Unknown constant, abort.
+ DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ llvm_unreachable("Constant ValueID not recognized.");
+ return -1;
+ }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+ return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
}
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
-
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
@@ -642,10 +821,15 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
llvm_unreachable("Unknown type!");
// Fall through in Release mode.
case Type::IntegerTyID:
- case Type::VectorTyID:
- // TyL == TyR would have returned true earlier.
- return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
-
+ return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+ cast<IntegerType>(TyR)->getBitWidth());
+ case Type::VectorTyID: {
+ VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
+ if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
+ return Res;
+ return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
+ }
+ // TyL == TyR would have returned true earlier, because types are uniqued.
case Type::VoidTyID:
case Type::FloatTyID:
case Type::DoubleTyID:
@@ -654,6 +838,7 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
case Type::PPC_FP128TyID:
case Type::LabelTyID:
case Type::MetadataTyID:
+ case Type::TokenTyID:
return 0;
case Type::PointerTyID: {
@@ -759,8 +944,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
return Res;
- return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+ cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
if (int Res =
@@ -783,20 +968,24 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
return Res;
- return cmpNumbers(
- (uint64_t)CI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+ if (int Res = cmpOperandBundlesSchema(CI, R))
+ return Res;
+ return cmpRangeMetadata(
+ CI->getMetadata(LLVMContext::MD_range),
+ cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
}
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
- if (int Res = cmpNumbers(CI->getCallingConv(),
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(II->getCallingConv(),
cast<InvokeInst>(R)->getCallingConv()))
return Res;
if (int Res =
- cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(II, R))
return Res;
- return cmpNumbers(
- (uint64_t)CI->getMetadata(LLVMContext::MD_range),
- (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpRangeMetadata(
+ II->getMetadata(LLVMContext::MD_range),
+ cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -876,9 +1065,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
GEPR->accumulateConstantOffset(DL, OffsetR))
return cmpAPInts(OffsetL, OffsetR);
-
- if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
- (uint64_t)GEPR->getPointerOperand()->getType()))
+ if (int Res = cmpTypes(GEPL->getSourceElementType(),
+ GEPR->getSourceElementType()))
return Res;
if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
@@ -892,6 +1080,28 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
return 0;
}
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+ const InlineAsm *R) const {
+ // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+ // the same, otherwise compare the fields.
+ if (L == R)
+ return 0;
+ if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+ return Res;
+ if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+ return Res;
+ if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+ return Res;
+ if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+ return Res;
+ if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+ return Res;
+ if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+ return Res;
+ llvm_unreachable("InlineAsm blocks were not uniqued.");
+ return 0;
+}
+
/// Compare two values used by the two functions under pair-wise comparison. If
/// this is the first time the values are seen, they're added to the mapping so
/// that we will detect mismatches on next use.
@@ -926,7 +1136,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
if (InlineAsmL && InlineAsmR)
- return cmpNumbers((uint64_t)L, (uint64_t)R);
+ return cmpInlineAsm(InlineAsmL, InlineAsmR);
if (InlineAsmL)
return 1;
if (InlineAsmR)
@@ -938,12 +1148,13 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
return cmpNumbers(LeftSN.first->second, RightSN.first->second);
}
// Test whether two basic blocks have equivalent behaviour.
-int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+ const BasicBlock *BBR) {
BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
do {
- if (int Res = cmpValues(InstL, InstR))
+ if (int Res = cmpValues(&*InstL, &*InstR))
return Res;
const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
@@ -961,7 +1172,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res = cmpGEPs(GEPL, GEPR))
return Res;
} else {
- if (int Res = cmpOperations(InstL, InstR))
+ if (int Res = cmpOperations(&*InstL, &*InstR))
return Res;
assert(InstL->getNumOperands() == InstR->getNumOperands());
@@ -970,11 +1181,8 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
Value *OpR = InstR->getOperand(i);
if (int Res = cmpValues(OpL, OpR))
return Res;
- if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
- return Res;
- // TODO: Already checked in cmpOperation
- if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
- return Res;
+ // cmpValues should ensure this is true.
+ assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
}
}
@@ -990,7 +1198,6 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
// Test whether the two functions have equivalent behaviour.
int FunctionComparator::compare() {
-
sn_mapL.clear();
sn_mapR.clear();
@@ -1001,7 +1208,7 @@ int FunctionComparator::compare() {
return Res;
if (FnL->hasGC()) {
- if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC()))
+ if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
return Res;
}
@@ -1009,7 +1216,7 @@ int FunctionComparator::compare() {
return Res;
if (FnL->hasSection()) {
- if (int Res = cmpStrings(FnL->getSection(), FnR->getSection()))
+ if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
return Res;
}
@@ -1033,7 +1240,7 @@ int FunctionComparator::compare() {
ArgRI = FnR->arg_begin(),
ArgLE = FnL->arg_end();
ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
- if (cmpValues(ArgLI, ArgRI) != 0)
+ if (cmpValues(&*ArgLI, &*ArgRI) != 0)
llvm_unreachable("Arguments repeat!");
}
@@ -1055,7 +1262,7 @@ int FunctionComparator::compare() {
if (int Res = cmpValues(BBL, BBR))
return Res;
- if (int Res = compare(BBL, BBR))
+ if (int Res = cmpBasicBlocks(BBL, BBR))
return Res;
const TerminatorInst *TermL = BBL->getTerminator();
@@ -1074,6 +1281,68 @@ int FunctionComparator::compare() {
}
namespace {
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+ void add(uint64_t V) {
+ Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+ }
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const TerminatorInst *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
+
+
+namespace {
/// MergeFunctions finds functions which will generate identical machine code,
/// by considering all pointer types to be equivalent. Once identified,
@@ -1084,14 +1353,31 @@ class MergeFunctions : public ModulePass {
public:
static char ID;
MergeFunctions()
- : ModulePass(ID), HasGlobalAliases(false) {
+ : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
+ HasGlobalAliases(false) {
initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
private:
- typedef std::set<FunctionNode> FnTreeType;
+ // The function comparison operator is provided here so that FunctionNodes do
+ // not need to become larger with another pointer.
+ class FunctionNodeCmp {
+ GlobalNumberState* GlobalNumbers;
+ public:
+ FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+ bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
+ // Order first by hashes, then full function comparison.
+ if (LHS.getHash() != RHS.getHash())
+ return LHS.getHash() < RHS.getHash();
+ FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
+ return FCmp.compare() == -1;
+ }
+ };
+ typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+
+ GlobalNumberState GlobalNumbers;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
@@ -1133,17 +1419,23 @@ private:
void writeAlias(Function *F, Function *G);
/// Replace function F with function G in the function tree.
- void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G);
+ void replaceFunctionInTree(const FunctionNode &FN, Function *G);
/// The set of all distinct functions. Use the insert() and remove() methods
- /// to modify it.
+ /// to modify it. The map allows efficient lookup and deferring of Functions.
FnTreeType FnTree;
+ // Map functions to the iterators of the FunctionNode which contains them
+ // in the FnTree. This must be updated carefully whenever the FnTree is
+ // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
+ // dangling iterators into FnTree. The invariant that preserves this is that
+ // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
+ ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
};
-} // end anonymous namespace
+} // end anonymous namespace
char MergeFunctions::ID = 0;
INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
@@ -1166,8 +1458,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
- int Res1 = FunctionComparator(F1, F2).compare();
- int Res2 = FunctionComparator(F2, F1).compare();
+ int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
+ int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
@@ -1188,8 +1480,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
Function *F3 = cast<Function>(*K);
- int Res3 = FunctionComparator(F1, F3).compare();
- int Res4 = FunctionComparator(F2, F3).compare();
+ int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
+ int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
bool Transitive = true;
@@ -1227,11 +1519,33 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
- Deferred.push_back(WeakVH(I));
+ // All functions in the module, ordered by hash. Functions with a unique
+ // hash value are easily eliminated.
+ std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+ HashedFuncs;
+ for (Function &Func : M) {
+ if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+ HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+ }
}
+ std::stable_sort(
+ HashedFuncs.begin(), HashedFuncs.end(),
+ [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
+ const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
+ return a.first < b.first;
+ });
+
+ auto S = HashedFuncs.begin();
+ for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
+ // If the hash value matches the previous value or the next one, we must
+ // consider merging it. Otherwise it is dropped and never considered again.
+ if ((I != S && std::prev(I)->first == I->first) ||
+ (std::next(I) != IE && std::next(I)->first == I->first) ) {
+ Deferred.push_back(WeakVH(I->second));
+ }
+ }
+
do {
std::vector<WeakVH> Worklist;
Deferred.swap(Worklist);
@@ -1270,6 +1584,7 @@ bool MergeFunctions::runOnModule(Module &M) {
} while (!Deferred.empty());
FnTree.clear();
+ GlobalNumbers.clear();
return Changed;
}
@@ -1282,6 +1597,32 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
++UI;
CallSite CS(U->getUser());
if (CS && CS.isCallee(U)) {
+ // Transfer the called function's attributes to the call site. Due to the
+ // bitcast we will 'lose' ABI changing attributes because the 'called
+ // function' is no longer a Function* but the bitcast. Code that looks up
+ // the attributes from the called function will fail.
+
+ // FIXME: This is not actually true, at least not anymore. The callsite
+ // will always have the same ABI affecting attributes as the callee,
+ // because otherwise the original input has UB. Note that Old and New
+ // always have matching ABI, so no attributes need to be changed.
+ // Transferring other attributes may help other optimizations, but that
+ // should be done uniformly and not in this ad-hoc way.
+ auto &Context = New->getContext();
+ auto NewFuncAttrs = New->getAttributes();
+ auto CallSiteAttrs = CS.getAttributes();
+
+ CallSiteAttrs = CallSiteAttrs.addAttributes(
+ Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
+
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
+ AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
+ if (Attrs.getNumSlots())
+ CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
+ }
+
+ CS.setAttributes(CallSiteAttrs);
+
remove(CS.getInstruction()->getParent()->getParent());
U->set(BitcastNew);
}
@@ -1352,15 +1693,15 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
- AI != AE; ++AI) {
- Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
+ for (Argument & AI : NewG->args()) {
+ Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
CallInst *CI = Builder.CreateCall(F, Args);
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
+ CI->setAttributes(F->getAttributes());
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
@@ -1379,8 +1720,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
- PointerType *PTy = G->getType();
- auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
+ auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
@@ -1425,19 +1765,24 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
++NumFunctionsMerged;
}
-/// Replace function F for function G in the map.
-void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF,
+/// Replace function F by function G.
+void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
Function *G) {
- Function *F = IterToF->getFunc();
-
- // A total order is already guaranteed otherwise because we process strong
- // functions before weak functions.
- assert(((F->mayBeOverridden() && G->mayBeOverridden()) ||
- (!F->mayBeOverridden() && !G->mayBeOverridden())) &&
- "Only change functions if both are strong or both are weak");
- (void)F;
-
- IterToF->replaceBy(G);
+ Function *F = FN.getFunc();
+ assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
+ "The two functions must be equal");
+
+ auto I = FNodesInTree.find(F);
+ assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
+ assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
+
+ FnTreeType::iterator IterToFNInFnTree = I->second;
+ assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
+ // Remove F -> FN and insert G -> FN
+ FNodesInTree.erase(I);
+ FNodesInTree.insert({G, IterToFNInFnTree});
+ // Replace F with G in FN, which is stored inside the FnTree.
+ FN.replaceBy(G);
}
// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
@@ -1447,6 +1792,8 @@ bool MergeFunctions::insert(Function *NewFunction) {
FnTree.insert(FunctionNode(NewFunction));
if (Result.second) {
+ assert(FNodesInTree.count(NewFunction) == 0);
+ FNodesInTree.insert({NewFunction, Result.first});
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
@@ -1476,7 +1823,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
if (OldF.getFunc()->getName() > NewFunction->getName()) {
// Swap the two functions.
Function *F = OldF.getFunc();
- replaceFunctionInTree(Result.first, NewFunction);
+ replaceFunctionInTree(*Result.first, NewFunction);
NewFunction = F;
assert(OldF.getFunc() != F && "Must have swapped the functions.");
}
@@ -1495,18 +1842,13 @@ bool MergeFunctions::insert(Function *NewFunction) {
// Remove a function from FnTree. If it was already in FnTree, add
// it to Deferred so that we'll look at it in the next round.
void MergeFunctions::remove(Function *F) {
- // We need to make sure we remove F, not a function "equal" to F per the
- // function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionNode(F));
- size_t Erased = 0;
- if (found != FnTree.end() && found->getFunc() == F) {
- Erased = 1;
- FnTree.erase(found);
- }
-
- if (Erased) {
- DEBUG(dbgs() << "Removed " << F->getName()
- << " from set and deferred it.\n");
+ auto I = FNodesInTree.find(F);
+ if (I != FNodesInTree.end()) {
+ DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
+ FnTree.erase(I->second);
+ // I->second has been invalidated, remove it from the FNodesInTree map to
+ // preserve the invariant.
+ FNodesInTree.erase(I);
Deferred.emplace_back(F);
}
}
@@ -1516,6 +1858,8 @@ void MergeFunctions::remove(Function *F) {
void MergeFunctions::removeUsers(Value *V) {
std::vector<Value *> Worklist;
Worklist.push_back(V);
+ SmallSet<Value*, 8> Visited;
+ Visited.insert(V);
while (!Worklist.empty()) {
Value *V = Worklist.back();
Worklist.pop_back();
@@ -1526,8 +1870,10 @@ void MergeFunctions::removeUsers(Value *V) {
} else if (isa<GlobalValue>(U)) {
// do nothing
} else if (Constant *C = dyn_cast<Constant>(U)) {
- for (User *UU : C->users())
- Worklist.push_back(UU);
+ for (User *UU : C->users()) {
+ if (!Visited.insert(UU).second)
+ Worklist.push_back(UU);
+ }
}
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 4a7cb7b..0c5c84b 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -50,7 +50,7 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
Function* PartialInliner::unswitchFunction(Function* F) {
// First, verify that this function is an unswitching candidate...
- BasicBlock* entryBlock = F->begin();
+ BasicBlock *entryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
if (!BR || BR->isUnconditional())
return nullptr;
@@ -89,18 +89,18 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// of which will go outside.
BasicBlock* preReturn = newReturnBlock;
newReturnBlock = newReturnBlock->splitBasicBlock(
- newReturnBlock->getFirstNonPHI());
+ newReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = preReturn->begin();
- BasicBlock::iterator Ins = newReturnBlock->begin();
+ Instruction *Ins = &newReturnBlock->front();
while (I != preReturn->end()) {
PHINode* OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi) break;
-
- PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+
+ PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
OldPhi->replaceAllUsesWith(retPhi);
Ins = newReturnBlock->getFirstNonPHI();
-
- retPhi->addIncoming(I, preReturn);
+
+ retPhi->addIncoming(&*I, preReturn);
retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
newEntryBlock);
OldPhi->removeIncomingValue(newEntryBlock);
@@ -116,8 +116,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
FE = duplicateFunction->end(); FI != FE; ++FI)
if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
&*FI != newNonReturnBlock)
- toExtract.push_back(FI);
-
+ toExtract.push_back(&*FI);
+
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
DT.recalculate(*duplicateFunction);
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 909baae..9876efa 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -12,19 +12,26 @@
//
//===----------------------------------------------------------------------===//
-
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -89,11 +96,21 @@ static cl::opt<bool> EnableLoopDistribute(
"enable-loop-distribute", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopDistribution Pass"));
+static cl::opt<bool> EnableNonLTOGlobalsModRef(
+ "enable-non-lto-gmr", cl::init(true), cl::Hidden,
+ cl::desc(
+ "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
+
+static cl::opt<bool> EnableLoopLoadElim(
+ "enable-loop-load-elim", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
LibraryInfo = nullptr;
Inliner = nullptr;
+ FunctionIndex = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
@@ -143,10 +160,9 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses(
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
if (UseCFLAA)
- PM.add(createCFLAliasAnalysisPass());
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createScopedNoAliasAAPass());
- PM.add(createBasicAliasAnalysisPass());
+ PM.add(createCFLAAWrapperPass());
+ PM.add(createTypeBasedAAWrapperPass());
+ PM.add(createScopedNoAliasAAWrapperPass());
}
void PassManagerBuilder::populateFunctionPassManager(
@@ -172,6 +188,9 @@ void PassManagerBuilder::populateFunctionPassManager(
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
+ // Allow forcing function attributes as a debugging and tuning aid.
+ MPM.add(createForceFunctionAttrsLegacyPass());
+
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
@@ -201,10 +220,15 @@ void PassManagerBuilder::populateModulePassManager(
addInitialAliasAnalysisPasses(MPM);
if (!DisableUnitAtATime) {
+ // Infer attributes about declarations if possible.
+ MPM.add(createInferFunctionAttrsLegacyPass());
+
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ // Promote any localized global vars
+ MPM.add(createPromoteMemoryToRegisterPass());
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
@@ -213,6 +237,12 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
+ if (EnableNonLTOGlobalsModRef)
+ // We add a module alias analysis pass here. In part due to bugs in the
+ // analysis infrastructure this "works" in that the analysis stays alive
+ // for the entire SCC pass run below.
+ MPM.add(createGlobalsAAWrapperPass());
+
// Start of CallGraph SCC passes.
if (!DisableUnitAtATime)
MPM.add(createPruneEHPass()); // Remove dead EH info
@@ -245,6 +275,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
@@ -315,9 +346,42 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+ // Remove avail extern fns and globals definitions if we aren't
+ // compiling an object file for later LTO. For LTO we want to preserve
+ // these so they are eligible for inlining at link-time. Note if they
+ // are unreferenced they will be removed by GlobalDCE later, so
+ // this only impacts referenced available externally globals.
+ // Eventually they will be suppressed during codegen, but eliminating
+ // here enables more opportunity for GlobalDCE as it may make
+ // globals referenced by available external functions dead
+ // and saves running remaining passes on the eliminated functions.
+ MPM.add(createEliminateAvailableExternallyPass());
+ }
+
+ if (EnableNonLTOGlobalsModRef)
+ // We add a fresh GlobalsModRef run at this point. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ //
+ // Note that this relies on a bug in the pass manager which preserves
+ // a module analysis into a function pass pipeline (and throughout it) so
+ // long as the first function pass doesn't invalidate the module analysis.
+ // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+ // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+ // (doing nothing preserves it as it is required to be conservatively
+ // correct in the face of IR changes).
+ MPM.add(createGlobalsAAWrapperPass());
+
if (RunFloat2Int)
MPM.add(createFloat2IntPass());
+ addExtensionsToPM(EP_VectorizerStart, MPM);
+
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
@@ -329,6 +393,12 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopDistributePass());
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ if (EnableLoopLoadElim)
+ MPM.add(createLoopLoadEliminationPass());
+
// FIXME: Because of #pragma vectorize enable, the passes below are always
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
// on -O1 and no #pragma is found). Would be good to have these two passes
@@ -402,17 +472,6 @@ void PassManagerBuilder::populateModulePassManager(
// GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
if (OptLevel > 1) {
- if (!PrepareForLTO) {
- // Remove avail extern fns and globals definitions if we aren't
- // compiling an object file for later LTO. For LTO we want to preserve
- // these so they are eligible for inlining at link-time. Note if they
- // are unreferenced they will be removed by GlobalDCE below, so
- // this only impacts referenced available externally globals.
- // Eventually they will be suppressed during codegen, but eliminating
- // here enables more opportunity for GlobalDCE as it may make
- // globals referenced by available external functions dead.
- MPM.add(createEliminateAvailableExternallyPass());
- }
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
MPM.add(createConstantMergePass()); // Merge dup global constants
}
@@ -428,13 +487,25 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
+ if (FunctionIndex)
+ PM.add(createFunctionImportPass(FunctionIndex));
+
+ // Allow forcing function attributes as a debugging and tuning aid.
+ PM.add(createForceFunctionAttrsLegacyPass());
+
+ // Infer attributes about declarations if possible.
+ PM.add(createInferFunctionAttrsLegacyPass());
+
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
PM.add(createIPSCCPPass());
// Now that we internalized some globals, see if we can hack on them!
+ PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
PM.add(createGlobalOptimizerPass());
+ // Promote any localized global vars.
+ PM.add(createPromoteMemoryToRegisterPass());
// Linking modules together can lead to duplicated global constants, only
// keep one copy of each constant.
@@ -481,7 +552,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createFunctionAttrsPass()); // Add nocapture.
- PM.add(createGlobalsModRefPass()); // IP alias analysis.
+ PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
if (EnableMLSM)
@@ -500,6 +571,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoopVectorizePass(true, LoopVectorize));
+ // Now that we've optimized loops (in particular loop induction variables),
+ // we may have exposed more scalar opportunities. Run parts of the scalar
+ // optimizer again at this point.
+ PM.add(createInstructionCombiningPass()); // Initial cleanup
+ PM.add(createCFGSimplificationPass()); // if-convert
+ PM.add(createSCCPPass()); // Propagate exposed constants
+ PM.add(createInstructionCombiningPass()); // Clean up again
+ PM.add(createBitTrackingDCEPass());
+
// More scalar chains could be vectorized due to more alias information
if (RunSLPAfterLoopVectorization)
if (SLPVectorize)
@@ -524,6 +604,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
+ // Drop bodies of available externally objects to improve GlobalDCE.
+ PM.add(createEliminateAvailableExternallyPass());
+
// Now that we have optimized the program, discard unreachable functions.
PM.add(createGlobalDCEPass());
@@ -543,6 +626,10 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel > 1)
addLTOOptimizationPasses(PM);
+ // Create a function that performs CFI checks for cross-DSO calls with targets
+ // in the current module.
+ PM.add(createCrossDSOCFIPass());
+
// Lower bit sets to globals. This pass supports Clang's control flow
// integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
// is enabled. The pass does nothing if CFI is disabled.
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index b2f1010..3af4afb 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -21,7 +21,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -153,21 +153,16 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- AttrBuilder NewAttributes;
-
- if (!SCCMightUnwind)
- NewAttributes.addAttribute(Attribute::NoUnwind);
- if (!SCCMightReturn)
- NewAttributes.addAttribute(Attribute::NoReturn);
-
Function *F = (*I)->getFunction();
- const AttributeSet &PAL = F->getAttributes().getFnAttributes();
- const AttributeSet &NPAL = AttributeSet::get(
- F->getContext(), AttributeSet::FunctionIndex, NewAttributes);
- if (PAL != NPAL) {
+ if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
+ F->addFnAttr(Attribute::NoUnwind);
+ MadeChange = true;
+ }
+
+ if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) {
+ F->addFnAttr(Attribute::NoReturn);
MadeChange = true;
- F->addAttributes(AttributeSet::FunctionIndex, NPAL);
}
}
@@ -191,9 +186,13 @@ bool PruneEH::SimplifyFunction(Function *F) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+
// Insert a call instruction before the invoke.
- CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
Call->takeName(II);
Call->setCallingConv(II->getCallingConv());
Call->setAttributes(II->getAttributes());
@@ -233,7 +232,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
// Remove the uncond branch and add an unreachable.
BB->getInstList().pop_back();
- new UnreachableInst(BB->getContext(), BB);
+ new UnreachableInst(BB->getContext(), &*BB);
DeleteBasicBlock(New); // Delete the new BB.
MadeChange = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index c8dfa54..928d92e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -22,7 +22,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -44,7 +43,11 @@
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
using namespace llvm;
@@ -61,27 +64,51 @@ static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample-profile-max-propagate-iterations", cl::init(100),
cl::desc("Maximum number of iterations to go through when propagating "
"sample block/edge weights through the CFG."));
+static cl::opt<unsigned> SampleProfileRecordCoverage(
+ "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of records in the input profile "
+ "are matched to the IR."));
+static cl::opt<unsigned> SampleProfileSampleCoverage(
+ "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of samples in the input profile "
+ "are matched to the IR."));
+static cl::opt<double> SampleProfileHotThreshold(
+ "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
+ cl::desc("Inlined functions that account for more than N% of all samples "
+ "collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+ "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for more than N% of all samples "
+ "collected in the profile, will be marked as hot for the inliner "
+ "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+ "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for less than N% of all samples "
+ "collected in the profile, will be marked as cold for the inliner "
+ "to consider."));
namespace {
-typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
-typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
-typedef std::pair<BasicBlock *, BasicBlock *> Edge;
-typedef DenseMap<Edge, unsigned> EdgeWeightMap;
-typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
+typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
+typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
+typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
+typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
+typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
+ BlockEdgeMap;
/// \brief Sample profile pass.
///
/// This pass reads profile data from the file specified by
/// -sample-profile-file and annotates every affected function with the
/// profile information found in that file.
-class SampleProfileLoader : public FunctionPass {
+class SampleProfileLoader : public ModulePass {
public:
// Class identification, replacement for typeinfo
static char ID;
SampleProfileLoader(StringRef Name = SampleProfileFile)
- : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
- Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
+ : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+ Samples(nullptr), Filename(Name), ProfileIsValid(false),
+ TotalCollectedSamples(0) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}
@@ -91,36 +118,37 @@ public:
const char *getPassName() const override { return "Sample profile pass"; }
- bool runOnFunction(Function &F) override;
+ bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
}
protected:
+ bool runOnFunction(Function &F);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
- unsigned getInstWeight(Instruction &I);
- unsigned getBlockWeight(BasicBlock *BB);
+ ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
+ ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
+ const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+ const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+ bool inlineHotFunctions(Function &F);
+ bool emitInlineHints(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
- void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
- void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
+ void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
+ void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);
void findEquivalencesFor(BasicBlock *BB1,
SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree);
void propagateWeights(Function &F);
- unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+ uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F);
-
- /// \brief Line number for the function header. Used to compute absolute
- /// line numbers from the relative line numbers found in the profile.
- unsigned HeaderLineno;
+ void computeDominanceAndLoopInfo(Function &F);
+ unsigned getOffset(unsigned L, unsigned H) const;
+ void clearFunctionData();
/// \brief Map basic blocks to their computed weights.
///
@@ -135,7 +163,7 @@ protected:
EdgeWeightMap EdgeWeights;
/// \brief Set of visited blocks during propagation.
- SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
+ SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
/// \brief Set of visited edges during propagation.
SmallSet<Edge, 128> VisitedEdges;
@@ -149,9 +177,9 @@ protected:
EquivalenceClassMap EquivalenceClass;
/// \brief Dominance, post-dominance and loop information.
- DominatorTree *DT;
- PostDominatorTree *PDT;
- LoopInfo *LI;
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+ std::unique_ptr<LoopInfo> LI;
/// \brief Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -159,9 +187,6 @@ protected:
/// \brief Successors for each basic block in the CFG.
BlockEdgeMap Successors;
- /// \brief LLVM context holding the debug data we need.
- LLVMContext *Ctx;
-
/// \brief Profile reader object.
std::unique_ptr<SampleProfileReader> Reader;
@@ -173,7 +198,207 @@ protected:
/// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
+
+ /// \brief Total number of samples collected in this profile.
+ ///
+ /// This is the sum of all the samples collected in all the functions executed
+ /// at runtime.
+ uint64_t TotalCollectedSamples;
};
+
+class SampleCoverageTracker {
+public:
+ SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+ bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+ uint32_t Discriminator, uint64_t Samples);
+ unsigned computeCoverage(unsigned Used, unsigned Total) const;
+ unsigned countUsedRecords(const FunctionSamples *FS) const;
+ unsigned countBodyRecords(const FunctionSamples *FS) const;
+ uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+ uint64_t countBodySamples(const FunctionSamples *FS) const;
+ void clear() {
+ SampleCoverage.clear();
+ TotalUsedSamples = 0;
+ }
+
+private:
+ typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+ typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+ FunctionSamplesCoverageMap;
+
+ /// Coverage map for sampling records.
+ ///
+ /// This map keeps a record of sampling records that have been matched to
+ /// an IR instruction. This is used to detect some form of staleness in
+ /// profiles (see flag -sample-profile-check-coverage).
+ ///
+ /// Each entry in the map corresponds to a FunctionSamples instance. This is
+ /// another map that counts how many times the sample record at the
+ /// given location has been used.
+ FunctionSamplesCoverageMap SampleCoverage;
+
+ /// Number of samples used from the profile.
+ ///
+ /// When a sampling record is used for the first time, the samples from
+ /// that record are added to this accumulator. Coverage is later computed
+ /// based on the total number of samples available in this function and
+ /// its callsites.
+ ///
+ /// Note that this accumulator tracks samples used from a single function
+ /// and all the inlined callsites. Strictly, we should have a map of counters
+ /// keyed by FunctionSamples pointers, but these stats are cleared after
+ /// every function, so we just need to keep a single counter.
+ uint64_t TotalUsedSamples;
+};
+
+SampleCoverageTracker CoverageTracker;
+
+/// Return true if the given callsite is hot wrt to its caller.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compute the fraction
+/// of samples used by the callsite with respect to the total number of samples
+/// collected in the caller.
+///
+/// If that fraction is larger than the default given by
+/// SampleProfileHotThreshold, the callsite will be inlined again.
+bool callsiteIsHot(const FunctionSamples *CallerFS,
+ const FunctionSamples *CallsiteFS) {
+ if (!CallsiteFS)
+ return false; // The callsite was not inlined in the original binary.
+
+ uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
+ if (ParentTotalSamples == 0)
+ return false; // Avoid division by zero.
+
+ uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+ if (CallsiteTotalSamples == 0)
+ return false; // Callsite is trivially cold.
+
+ double PercentSamples =
+ (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
+ return PercentSamples >= SampleProfileHotThreshold;
+}
+
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+ uint32_t LineOffset,
+ uint32_t Discriminator,
+ uint64_t Samples) {
+ LineLocation Loc(LineOffset, Discriminator);
+ unsigned &Count = SampleCoverage[FS][Loc];
+ bool FirstTime = (++Count == 1);
+ if (FirstTime)
+ TotalUsedSamples += Samples;
+ return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+ auto I = SampleCoverage.find(FS);
+
+ // The size of the coverage map for FS represents the number of records
+ // that were marked used at least once.
+ unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+ // If there are inlined callsites in this function, count the samples found
+ // in the respective bodies. However, do not bother counting callees with 0
+ // total samples, these are callees that were never invoked at runtime.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+ unsigned Count = FS->getBodySamples().size();
+
+ // Only count records in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+ uint64_t Total = 0;
+ for (const auto &I : FS->getBodySamples())
+ Total += I.second.getSamples();
+
+ // Only count samples in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
+
+ return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+ unsigned Total) const {
+ assert(Used <= Total &&
+ "number of used records cannot exceed the total number of records");
+ return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Clear all the per-function data used to load samples and propagate weights.
+void SampleProfileLoader::clearFunctionData() {
+ BlockWeights.clear();
+ EdgeWeights.clear();
+ VisitedBlocks.clear();
+ VisitedEdges.clear();
+ EquivalenceClass.clear();
+ DT = nullptr;
+ PDT = nullptr;
+ LI = nullptr;
+ Predecessors.clear();
+ Successors.clear();
+ CoverageTracker.clear();
+}
+
+/// \brief Returns the offset of lineno \p L to head_lineno \p H
+///
+/// \param L Lineno
+/// \param H Header lineno of the function
+///
+/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// We assume that a single function will not exceed 65535 LOC.
+unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
+ return (L - H) & 0xffff;
}
/// \brief Print the weight of edge \p E on stream \p OS.
@@ -190,8 +415,8 @@ void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
/// \param OS Stream to emit the output to.
/// \param BB Block to print.
void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
- BasicBlock *BB) {
- BasicBlock *Equiv = EquivalenceClass[BB];
+ const BasicBlock *BB) {
+ const BasicBlock *Equiv = EquivalenceClass[BB];
OS << "equivalence[" << BB->getName()
<< "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
}
@@ -200,8 +425,11 @@ void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
///
/// \param OS Stream to emit the output to.
/// \param BB Block to print.
-void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
- OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
+ const BasicBlock *BB) const {
+ const auto &I = BlockWeights.find(BB);
+ uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
+ OS << "weight[" << BB->getName() << "]: " << W << "\n";
}
/// \brief Get the weight for an instruction.
@@ -214,51 +442,67 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
///
/// \param Inst Instruction to query.
///
-/// \returns The profiled weight of I.
-unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
+/// \returns the weight of \p Inst.
+ErrorOr<uint64_t>
+SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
DebugLoc DLoc = Inst.getDebugLoc();
if (!DLoc)
- return 0;
+ return std::error_code();
- unsigned Lineno = DLoc.getLine();
- if (Lineno < HeaderLineno)
- return 0;
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (!FS)
+ return std::error_code();
const DILocation *DIL = DLoc;
- int LOffset = Lineno - HeaderLineno;
- unsigned Discriminator = DIL->getDiscriminator();
- unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
- DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
- << " (line offset: " << LOffset << "." << Discriminator
- << " - weight: " << Weight << ")\n");
- return Weight;
+ unsigned Lineno = DLoc.getLine();
+ unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
+
+ uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
+ uint32_t Discriminator = DIL->getDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+ if (R) {
+ bool FirstMark =
+ CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
+ if (FirstMark) {
+ const Function *F = Inst.getParent()->getParent();
+ LLVMContext &Ctx = F->getContext();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, *F, DLoc,
+ Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
+ Twine(LineOffset) +
+ ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+ }
+ DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
+ << Inst << " (line offset: " << Lineno - HeaderLineno << "."
+ << DIL->getDiscriminator() << " - weight: " << R.get()
+ << ")\n");
+ }
+ return R;
}
/// \brief Compute the weight of a basic block.
///
/// The weight of basic block \p BB is the maximum weight of all the
-/// instructions in BB. The weight of \p BB is computed and cached in
-/// the BlockWeights map.
+/// instructions in BB.
///
/// \param BB The basic block to query.
///
-/// \returns The computed weight of BB.
-unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
- // If we've computed BB's weight before, return it.
- std::pair<BlockWeightMap::iterator, bool> Entry =
- BlockWeights.insert(std::make_pair(BB, 0));
- if (!Entry.second)
- return Entry.first->second;
-
- // Otherwise, compute and cache BB's weight.
- unsigned Weight = 0;
+/// \returns the weight for \p BB.
+ErrorOr<uint64_t>
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
+ bool Found = false;
+ uint64_t Weight = 0;
for (auto &I : BB->getInstList()) {
- unsigned InstWeight = getInstWeight(I);
- if (InstWeight > Weight)
- Weight = InstWeight;
+ const ErrorOr<uint64_t> &R = getInstWeight(I);
+ if (R && R.get() >= Weight) {
+ Weight = R.get();
+ Found = true;
+ }
}
- Entry.first->second = Weight;
- return Weight;
+ if (Found)
+ return Weight;
+ else
+ return std::error_code();
}
/// \brief Compute and store the weights of every basic block.
@@ -270,15 +514,199 @@ unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
bool SampleProfileLoader::computeBlockWeights(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "Block weights\n");
- for (auto &BB : F) {
- unsigned Weight = getBlockWeight(&BB);
- Changed |= (Weight > 0);
+ for (const auto &BB : F) {
+ ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
+ if (Weight) {
+ BlockWeights[&BB] = Weight.get();
+ VisitedBlocks.insert(&BB);
+ Changed = true;
+ }
DEBUG(printBlockWeight(dbgs(), &BB));
}
return Changed;
}
+/// \brief Get the FunctionSamples for a call instruction.
+///
+/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// instance in which that call instruction is calling to. It contains
+/// all samples that resides in the inlined instance. We first find the
+/// inlined instance in which the call instruction is from, then we
+/// traverse its children to find the callsite with the matching
+/// location and callee function name.
+///
+/// \param Inst Call instruction to query.
+///
+/// \returns The FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return nullptr;
+ }
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+
+ Function *CalleeFunc = Inst.getCalledFunction();
+ if (!CalleeFunc) {
+ return nullptr;
+ }
+
+ StringRef CalleeName = CalleeFunc->getName();
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return nullptr;
+
+ return FS->findFunctionSamplesAt(
+ CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+}
+
+/// \brief Get the FunctionSamples for an instruction.
+///
+/// The FunctionSamples of an instruction \p Inst is the inlined instance
+/// in which that instruction is coming from. We traverse the inline stack
+/// of that instruction, and match it with the tree nodes in the profile.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+ SmallVector<CallsiteLocation, 10> S;
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return Samples;
+ }
+ StringRef CalleeName;
+ for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
+ DIL = DIL->getInlinedAt()) {
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+ if (!CalleeName.empty()) {
+ S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+ }
+ CalleeName = SP->getLinkageName();
+ }
+ if (S.size() == 0)
+ return Samples;
+ const FunctionSamples *FS = Samples;
+ for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
+ FS = FS->findFunctionSamplesAt(S[i]);
+ }
+ return FS;
+}
+
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+ if (TotalCollectedSamples == 0)
+ return false;
+
+ uint64_t FunctionSamples = Samples->getTotalSamples();
+ double SamplesPercent =
+ (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+ // If the function collected more samples than the hot threshold, mark
+ // it globally hot.
+ if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied inline hint to globally hot function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ // If the function collected fewer samples than the cold threshold, mark
+ // it globally cold.
+ if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+ F.addFnAttr(llvm::Attribute::Cold);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied cold hint to globally cold function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Iteratively inline hot callsites of a function.
+///
+/// Iteratively traverse all callsites of the function \p F, and find if
+/// the corresponding inlined instance exists and is hot in profile. If
+/// it is hot enough, inline the callsites and adds new callsites of the
+/// callee into the caller.
+///
+/// TODO: investigate the possibility of not invoking InlineFunction directly.
+///
+/// \param F function to perform iterative inlining.
+///
+/// \returns True if there is any inline happened.
+bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+ bool Changed = false;
+ LLVMContext &Ctx = F.getContext();
+ while (true) {
+ bool LocalChanged = false;
+ SmallVector<CallInst *, 10> CIS;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
+ CIS.push_back(CI);
+ }
+ }
+ for (auto CI : CIS) {
+ InlineFunctionInfo IFI;
+ Function *CalledFunction = CI->getCalledFunction();
+ DebugLoc DLoc = CI->getDebugLoc();
+ uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
+ if (InlineFunction(CI, IFI)) {
+ LocalChanged = true;
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
+ Twine("inlined hot callee '") +
+ CalledFunction->getName() + "' with " +
+ Twine(NumSamples) + " samples into '" +
+ F.getName() + "'");
+ }
+ }
+ if (LocalChanged) {
+ Changed = true;
+ } else {
+ break;
+ }
+ }
+ return Changed;
+}
+
/// \brief Find equivalence classes for the given block.
///
/// This finds all the blocks that are guaranteed to execute the same
@@ -305,12 +733,13 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
void SampleProfileLoader::findEquivalencesFor(
BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree) {
- for (auto *BB2 : Descendants) {
+ const BasicBlock *EC = EquivalenceClass[BB1];
+ uint64_t Weight = BlockWeights[EC];
+ for (const auto *BB2 : Descendants) {
bool IsDomParent = DomTree->dominates(BB2, BB1);
bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
- if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
- IsInSameLoop) {
- EquivalenceClass[BB2] = BB1;
+ if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
+ EquivalenceClass[BB2] = EC;
// If BB2 is heavier than BB1, make BB2 have the same weight
// as BB1.
@@ -320,11 +749,10 @@ void SampleProfileLoader::findEquivalencesFor(
// during the propagation phase. Right now, we just want to
// make sure that BB1 has the largest weight of all the
// members of its equivalence set.
- unsigned &BB1Weight = BlockWeights[BB1];
- unsigned &BB2Weight = BlockWeights[BB2];
- BB1Weight = std::max(BB1Weight, BB2Weight);
+ Weight = std::max(Weight, BlockWeights[BB2]);
}
}
+ BlockWeights[EC] = Weight;
}
/// \brief Find equivalence classes.
@@ -364,19 +792,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
// class by making BB2's equivalence class be BB1.
DominatedBBs.clear();
DT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
-
- // Repeat the same logic for all the blocks post-dominated by BB1.
- // We are looking for every basic block BB2 such that:
- //
- // 1- BB1 post-dominates BB2.
- // 2- BB2 dominates BB1.
- // 3- BB1 and BB2 are in the same loop nest.
- //
- // If all those conditions hold, BB2's equivalence class is BB1.
- DominatedBBs.clear();
- PDT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, DT);
+ findEquivalencesFor(BB1, DominatedBBs, PDT.get());
DEBUG(printBlockEquivalence(dbgs(), BB1));
}
@@ -389,8 +805,8 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
// to all the blocks in that equivalence class.
DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
for (auto &BI : F) {
- BasicBlock *BB = &BI;
- BasicBlock *EquivBB = EquivalenceClass[BB];
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EquivBB = EquivalenceClass[BB];
if (BB != EquivBB)
BlockWeights[BB] = BlockWeights[EquivBB];
DEBUG(printBlockWeight(dbgs(), BB));
@@ -407,7 +823,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
/// \param UnknownEdge Set if E has not been visited before.
///
/// \returns E's weight, if known. Otherwise, return 0.
-unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
Edge *UnknownEdge) {
if (!VisitedEdges.count(E)) {
(*NumUnknownEdges)++;
@@ -432,8 +848,9 @@ unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
bool SampleProfileLoader::propagateThroughEdges(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "\nPropagation through edges\n");
- for (auto &BI : F) {
- BasicBlock *BB = &BI;
+ for (const auto &BI : F) {
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EC = EquivalenceClass[BB];
// Visit all the predecessor and successor edges to determine
// which ones have a weight assigned already. Note that it doesn't
@@ -441,7 +858,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
// only case we are interested in handling is when only a single
// edge is unknown (see setEdgeOrBlockWeight).
for (unsigned i = 0; i < 2; i++) {
- unsigned TotalWeight = 0;
+ uint64_t TotalWeight = 0;
unsigned NumUnknownEdges = 0;
Edge UnknownEdge, SelfReferentialEdge;
@@ -485,7 +902,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
// all edges will get a weight, or iteration will stop when
// it reaches SampleProfileMaxPropagateIterations.
if (NumUnknownEdges <= 1) {
- unsigned &BBWeight = BlockWeights[BB];
+ uint64_t &BBWeight = BlockWeights[EC];
if (NumUnknownEdges == 0) {
// If we already know the weight of all edges, the weight of the
// basic block can be computed. It should be no larger than the sum
@@ -497,9 +914,9 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
<< " known. Set weight for block: ";
printBlockWeight(dbgs(), BB););
}
- if (VisitedBlocks.insert(BB).second)
+ if (VisitedBlocks.insert(EC).second)
Changed = true;
- } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
+ } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
// If there is a single unknown edge and the block has been
// visited, then we can compute E's weight.
if (BBWeight >= TotalWeight)
@@ -511,8 +928,8 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
DEBUG(dbgs() << "Set weight for edge: ";
printEdgeWeight(dbgs(), UnknownEdge));
}
- } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
- unsigned &BBWeight = BlockWeights[BB];
+ } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
+ uint64_t &BBWeight = BlockWeights[BB];
// We have a self-referential edge and the weight of BB is known.
if (BBWeight >= TotalWeight)
EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
@@ -578,7 +995,7 @@ void SampleProfileLoader::buildEdges(Function &F) {
/// known).
void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
- unsigned i = 0;
+ unsigned I = 0;
// Add an entry count to the function using the samples gathered
// at the function entry.
@@ -592,14 +1009,15 @@ void SampleProfileLoader::propagateWeights(Function &F) {
buildEdges(F);
// Propagate until we converge or we go past the iteration limit.
- while (Changed && i++ < SampleProfileMaxPropagateIterations) {
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
Changed = propagateThroughEdges(F);
}
// Generate MD_prof metadata for every branch instruction using the
// edge weights computed during propagation.
DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
- MDBuilder MDB(F.getContext());
+ LLVMContext &Ctx = F.getContext();
+ MDBuilder MDB(Ctx);
for (auto &BI : F) {
BasicBlock *BB = &BI;
TerminatorInst *TI = BB->getTerminator();
@@ -610,24 +1028,44 @@ void SampleProfileLoader::propagateWeights(Function &F) {
DEBUG(dbgs() << "\nGetting weights for branch at line "
<< TI->getDebugLoc().getLine() << ".\n");
- SmallVector<unsigned, 4> Weights;
- bool AllWeightsZero = true;
+ SmallVector<uint32_t, 4> Weights;
+ uint32_t MaxWeight = 0;
+ DebugLoc MaxDestLoc;
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
- unsigned Weight = EdgeWeights[E];
+ uint64_t Weight = EdgeWeights[E];
DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
- Weights.push_back(Weight);
- if (Weight != 0)
- AllWeightsZero = false;
+ // Use uint32_t saturated arithmetic to adjust the incoming weights,
+ // if needed. Sample counts in profiles are 64-bit unsigned values,
+ // but internally branch weights are expressed as 32-bit values.
+ if (Weight > std::numeric_limits<uint32_t>::max()) {
+ DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+ Weight = std::numeric_limits<uint32_t>::max();
+ }
+ Weights.push_back(static_cast<uint32_t>(Weight));
+ if (Weight != 0) {
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+ }
+ }
}
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
- if (!AllWeightsZero) {
+ if (MaxWeight > 0) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
+ DebugLoc BranchLoc = TI->getDebugLoc();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, F, MaxDestLoc,
+ Twine("most popular destination for conditional branches at ") +
+ ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
+ Twine(BranchLoc.getLine()) + ":" +
+ Twine(BranchLoc.getCol()))
+ : Twine("<UNKNOWN LOCATION>")));
} else {
DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
}
@@ -649,7 +1087,7 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
if (DISubprogram *S = getDISubprogram(&F))
return S->getLine();
- // If could not find the start of \p F, emit a diagnostic to inform the user
+ // If the start of \p F is missing, emit a diagnostic to inform the user
// about the missed opportunity.
F.getContext().diagnose(DiagnosticInfoSampleProfile(
"No debug information found in function " + F.getName() +
@@ -658,6 +1096,17 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
return 0;
}
+void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
+ DT.reset(new DominatorTree);
+ DT->recalculate(F);
+
+ PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+ PDT->recalculate(F);
+
+ LI.reset(new LoopInfo);
+ LI->analyze(*DT);
+}
+
/// \brief Generate branch weight metadata for all branches in \p F.
///
/// Branch weights are computed out of instruction samples using a
@@ -710,18 +1159,23 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
bool SampleProfileLoader::emitAnnotations(Function &F) {
bool Changed = false;
- // Initialize invariants used during computation and propagation.
- HeaderLineno = getFunctionLoc(F);
- if (HeaderLineno == 0)
+ if (getFunctionLoc(F) == 0)
return false;
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
- << ": " << HeaderLineno << "\n");
+ << ": " << getFunctionLoc(F) << "\n");
+
+ Changed |= emitInlineHints(F);
+
+ Changed |= inlineHotFunctions(F);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Compute dominance and loop info needed for propagation.
+ computeDominanceAndLoopInfo(F);
+
// Find equivalence classes.
findEquivalenceClasses(F);
@@ -729,24 +1183,48 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
propagateWeights(F);
}
+ // If coverage checking was requested, compute it now.
+ if (SampleProfileRecordCoverage) {
+ unsigned Used = CoverageTracker.countUsedRecords(Samples);
+ unsigned Total = CoverageTracker.countBodyRecords(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileRecordCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile records (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
+
+ if (SampleProfileSampleCoverage) {
+ uint64_t Used = CoverageTracker.getTotalUsedSamples();
+ uint64_t Total = CoverageTracker.countBodySamples(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileSampleCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
return Changed;
}
char SampleProfileLoader::ID = 0;
INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
bool SampleProfileLoader::doInitialization(Module &M) {
- auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
+ auto &Ctx = M.getContext();
+ auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
- M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
@@ -754,22 +1232,32 @@ bool SampleProfileLoader::doInitialization(Module &M) {
return true;
}
-FunctionPass *llvm::createSampleProfileLoaderPass() {
+ModulePass *llvm::createSampleProfileLoaderPass() {
return new SampleProfileLoader(SampleProfileFile);
}
-FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoader(Name);
}
-bool SampleProfileLoader::runOnFunction(Function &F) {
+bool SampleProfileLoader::runOnModule(Module &M) {
if (!ProfileIsValid)
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PDT = &getAnalysis<PostDominatorTree>();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- Ctx = &F.getParent()->getContext();
+ // Compute the total number of samples collected in this profile.
+ for (const auto &I : Reader->getProfiles())
+ TotalCollectedSamples += I.second.getTotalSamples();
+
+ bool retval = false;
+ for (auto &F : M)
+ if (!F.isDeclaration()) {
+ clearFunctionData();
+ retval |= runOnFunction(F);
+ }
+ return retval;
+}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
Samples = Reader->getSamplesFor(F);
if (!Samples->empty())
return emitAnnotations(F);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 956991a..c94cc7c 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -7,47 +7,31 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass loops over all of the functions in the input module, looking for
+// This pass loops over all of the functions in the input module, looking for
// dead declarations and removes them. Dead declarations are declarations of
// functions for which no implementation is available (i.e., declarations for
// unused library functions).
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
using namespace llvm;
#define DEBUG_TYPE "strip-dead-prototypes"
STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
-namespace {
-
-/// @brief Pass to remove unused function declarations.
-class StripDeadPrototypesPass : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(ID) {
- initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override;
-};
-
-} // end anonymous namespace
-
-char StripDeadPrototypesPass::ID = 0;
-INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false)
-
-bool StripDeadPrototypesPass::runOnModule(Module &M) {
+static bool stripDeadPrototypes(Module &M) {
bool MadeChange = false;
-
+
// Erase dead function prototypes.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function *F = I++;
+ Function *F = &*I++;
// Function must be a prototype and unused.
if (F->isDeclaration() && F->use_empty()) {
F->eraseFromParent();
@@ -59,16 +43,42 @@ bool StripDeadPrototypesPass::runOnModule(Module &M) {
// Erase dead global var prototypes.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ) {
- GlobalVariable *GV = I++;
+ GlobalVariable *GV = &*I++;
// Global must be a prototype and unused.
if (GV->isDeclaration() && GV->use_empty())
GV->eraseFromParent();
}
-
+
// Return an indication of whether we changed anything or not.
return MadeChange;
}
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+ if (stripDeadPrototypes(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class StripDeadPrototypesLegacyPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesLegacyPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override {
+ return stripDeadPrototypes(M);
+ }
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesLegacyPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false)
+
ModulePass *llvm::createStripDeadPrototypesPass() {
- return new StripDeadPrototypesPass();
+ return new StripDeadPrototypesLegacyPass();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index a4f30c5..46f352f 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -211,13 +211,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
}
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
@@ -305,6 +305,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
SmallVector<Metadata *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
+ std::set<DISubprogram *> LiveSPs;
+ for (Function &F : M) {
+ if (DISubprogram *SP = F.getSubprogram())
+ LiveSPs.insert(SP);
+ }
+
for (DICompileUnit *DIC : F.compile_units()) {
// Create our live subprogram list.
bool SubprogramChange = false;
@@ -314,7 +320,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
continue;
// If the function referenced by DISP is not null, the function is live.
- if (DISP->getFunction())
+ if (LiveSPs.count(DISP))
LiveSubprograms.push_back(DISP);
else
SubprogramChange = true;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 2d2c109f..6f49399 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1,4 +1,4 @@
-//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/PatternMatch.h"
+
using namespace llvm;
using namespace PatternMatch;
@@ -67,17 +68,17 @@ namespace {
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
- APFloat *getFpValPtr(void)
+ APFloat *getFpValPtr()
{ return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
- const APFloat *getFpValPtr(void) const
+ const APFloat *getFpValPtr() const
{ return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
- const APFloat &getFpVal(void) const {
+ const APFloat &getFpVal() const {
assert(IsFp && BufHasFpVal && "Incorret state");
return *getFpValPtr();
}
- APFloat &getFpVal(void) {
+ APFloat &getFpVal() {
assert(IsFp && BufHasFpVal && "Incorret state");
return *getFpValPtr();
}
@@ -92,8 +93,8 @@ namespace {
// TODO: We should get rid of this function when APFloat can be constructed
// from an *SIGNED* integer.
APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
- private:
+ private:
bool IsFp;
// True iff FpValBuf contains an instance of APFloat.
@@ -114,10 +115,10 @@ namespace {
///
class FAddend {
public:
- FAddend() { Val = nullptr; }
+ FAddend() : Val(nullptr) {}
- Value *getSymVal (void) const { return Val; }
- const FAddendCoef &getCoef(void) const { return Coeff; }
+ Value *getSymVal() const { return Val; }
+ const FAddendCoef &getCoef() const { return Coeff; }
bool isConstant() const { return Val == nullptr; }
bool isZero() const { return Coeff.isZero(); }
@@ -182,7 +183,6 @@ namespace {
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
- private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
unsigned CreateInstrNum;
@@ -193,7 +193,8 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+
+} // anonymous namespace
//===----------------------------------------------------------------------===//
//
@@ -602,7 +603,6 @@ Value *FAddCombine::simplify(Instruction *I) {
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
-
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
@@ -886,7 +886,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
return Op0ZeroPosition >= Op1OnePosition;
}
-/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// Return true if we can prove that:
/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
@@ -1118,8 +1118,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
// transform them into (X + (signbit ^ C))
if (XorRHS->getValue().isSignBit())
- return BinaryOperator::CreateAdd(XorLHS,
- ConstantExpr::getXor(XorRHS, CI));
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1421,7 +1421,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
@@ -1589,7 +1588,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
}
}
-
{
Value *Y;
// X-(X+Y) == -Y X-(Y+X) == -Y
@@ -1611,32 +1609,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(A, B);
}
- // (sub (select (a, c, b)), (select (a, d, b))) -> (select (a, (sub c, d), 0))
- // (sub (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (sub c, d)))
- if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
- if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {
- if (SI0->getCondition() == SI1->getCondition()) {
- if (Value *V = SimplifySubInst(
- SI0->getFalseValue(), SI1->getFalseValue(), I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return SelectInst::Create(
- SI0->getCondition(),
- Builder->CreateSub(SI0->getTrueValue(), SI1->getTrueValue(), "",
- /*HasNUW=*/I.hasNoUnsignedWrap(),
- /*HasNSW=*/I.hasNoSignedWrap()),
- V);
- if (Value *V = SimplifySubInst(SI0->getTrueValue(), SI1->getTrueValue(),
- I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return SelectInst::Create(
- SI0->getCondition(), V,
- Builder->CreateSub(SI0->getFalseValue(), SI1->getFalseValue(), "",
- /*HasNUW=*/I.hasNoUnsignedWrap(),
- /*HasNSW=*/I.hasNoSignedWrap()));
- }
- }
- }
-
if (Op0->hasOneUse()) {
Value *Y = nullptr;
// ((X | Y) - X) --> (~X & Y)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 15e0889..95c50d3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -37,9 +37,9 @@ static inline Value *dyn_castNotVal(Value *V) {
return nullptr;
}
-/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
-/// predicate into a three bit mask. It also returns whether it is an ordered
-/// predicate by reference.
+/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
+/// a three bit mask. It also returns whether it is an ordered predicate by
+/// reference.
static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
isOrdered = false;
switch (CC) {
@@ -64,10 +64,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
}
}
-/// getNewICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or a brand
-/// new ICmp instruction. The sign is passed in to determine which kind
-/// of predicate to use in the new icmp instruction.
+/// This is the complement of getICmpCode, which turns an opcode and two
+/// operands into either a constant true or false, or a brand new ICmp
+/// instruction. The sign is passed in to determine which kind of predicate to
+/// use in the new icmp instruction.
static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
InstCombiner::BuilderTy *Builder) {
ICmpInst::Predicate NewPred;
@@ -76,9 +76,9 @@ static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
return Builder->CreateICmp(NewPred, LHS, RHS);
}
-/// getFCmpValue - This is the complement of getFCmpCode, which turns an
-/// opcode and two operands into either a FCmp instruction. isordered is passed
-/// in to determine which kind of predicate to use in the new fcmp instruction.
+/// This is the complement of getFCmpCode, which turns an opcode and two
+/// operands into either a FCmp instruction. isordered is passed in to determine
+/// which kind of predicate to use in the new fcmp instruction.
static Value *getFCmpValue(bool isordered, unsigned code,
Value *LHS, Value *RHS,
InstCombiner::BuilderTy *Builder) {
@@ -150,14 +150,13 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
else //if (Op == Instruction::Xor)
BinOp = Builder->CreateXor(NewLHS, NewRHS);
- Module *M = I.getParent()->getParent()->getParent();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+ Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
return Builder->CreateCall(F, BinOp);
}
-// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
-// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
-// guaranteed to be a binary operator.
+/// This handles expressions of the form ((val OP C1) & C2). Where
+/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+/// guaranteed to be a binary operator.
Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *OpRHS,
ConstantInt *AndRHS,
@@ -341,10 +340,10 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
return Builder->CreateICmpUGT(Add, LowerBound);
}
-// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
-// any number of 0s on either side. The 1s are allowed to wrap from LSB to
-// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
-// not, since all 1s are not contiguous.
+/// Returns true iff Val consists of one contiguous run of 1s with any number
+/// of 0s on either side. The 1s are allowed to wrap from LSB to MSB,
+/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+/// not, since all 1s are not contiguous.
static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
const APInt& V = Val->getValue();
uint32_t BitWidth = Val->getType()->getBitWidth();
@@ -357,9 +356,8 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
return true;
}
-/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
-/// where isSub determines whether the operator is a sub. If we can fold one of
-/// the following xforms:
+/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines
+/// whether the operator is a sub. If we can fold one of the following xforms:
///
/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
@@ -449,8 +447,8 @@ enum MaskedICmpType {
FoldMskICmp_BMask_NotMixed = 512
};
-/// return the set of pattern classes (from MaskedICmpType)
-/// that (icmp SCC (A & B), C) satisfies
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies.
static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
ICmpInst::Predicate SCC)
{
@@ -538,8 +536,8 @@ static unsigned conjugateICmpMask(unsigned Mask) {
return NewMask;
}
-/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
-/// if possible. The returned predicate is either == or !=. Returns false if
+/// Decompose an icmp into the form ((X & Y) pred Z) if possible.
+/// The returned predicate is either == or !=. Returns false if
/// decomposition fails.
static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
Value *&X, Value *&Y, Value *&Z) {
@@ -585,10 +583,9 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
return true;
}
-/// foldLogOpOfMaskedICmpsHelper:
-/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// return the set of pattern classes (from MaskedICmpType)
-/// that both LHS and RHS satisfy
+/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy.
static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
Value*& B, Value*& C,
Value*& D, Value*& E,
@@ -700,9 +697,9 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
return left_type & right_type;
}
-/// foldLogOpOfMaskedICmps:
-/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// into a single (icmp(A & X) ==/!= Y)
+
+/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y).
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy *Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
@@ -879,7 +876,7 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder->CreateICmp(NewPred, Input, RangeEnd);
}
-/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -1123,9 +1120,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return nullptr;
}
-/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
-/// instcombine, this returns a Value which should already be inserted into the
-/// function.
+/// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
@@ -1203,6 +1199,54 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return nullptr;
}
+/// Match De Morgan's Laws:
+/// (~A & ~B) == (~(A | B))
+/// (~A | ~B) == (~(A & B))
+static Instruction *matchDeMorgansLaws(BinaryOperator &I,
+ InstCombiner::BuilderTy *Builder) {
+ auto Opcode = I.getOpcode();
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Trying to match De Morgan's Laws with something other than and/or");
+ // Flip the logic operation.
+ if (Opcode == Instruction::And)
+ Opcode = Instruction::Or;
+ else
+ Opcode = Instruction::And;
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ // TODO: Use pattern matchers instead of dyn_cast.
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal,
+ I.getName() + ".demorgan");
+ return BinaryOperator::CreateNot(LogicOp);
+ }
+
+ // De Morgan's Law in disguise:
+ // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B))
+ // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B))
+ Value *A = nullptr;
+ Value *B = nullptr;
+ ConstantInt *C1 = nullptr;
+ if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) &&
+ match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) {
+ // TODO: This check could be loosened to handle different type sizes.
+ // Alternatively, we could fix the definition of m_Not to recognize a not
+ // operation hidden by a zext?
+ if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) &&
+ C1->isOne()) {
+ Value *LogicOp = Builder->CreateBinOp(Opcode, A, B,
+ I.getName() + ".demorgan");
+ Value *Not = Builder->CreateNot(LogicOp);
+ return CastInst::CreateZExtOrBitCast(Not, I.getType());
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1273,6 +1317,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
return BinaryOperator::CreateAnd(V, AndRHS);
+ // -x & 1 -> x & 1
+ if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
+ return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
+
// (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
// has 1's for all bits that the subtraction with A might affect.
if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
@@ -1329,15 +1377,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return NV;
}
-
- // (~A & ~B) == (~(A | B)) - De Morgan's Law
- if (Value *Op0NotVal = dyn_castNotVal(Op0))
- if (Value *Op1NotVal = dyn_castNotVal(Op1))
- if (Op0->hasOneUse() && Op1->hasOneUse()) {
- Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(Or);
- }
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ return DeMorgan;
{
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
@@ -1446,14 +1487,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Res);
- // fold (and (cast A), (cast B)) -> (cast (and A, B))
- if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ Value *Op0COp = Op0C->getOperand(0);
+ Type *SrcTy = Op0COp->getType();
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
- Type *SrcTy = Op0C->getOperand(0)->getType();
if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
SrcTy == Op1C->getOperand(0)->getType() &&
SrcTy->isIntOrIntVectorTy()) {
- Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+ Value *Op1COp = Op1C->getOperand(0);
// Only do this if the casts both really cause code to be generated.
if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
@@ -1478,6 +1520,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
+ // If we are masking off the sign bit of a floating-point value, convert
+ // this to the canonical fabs intrinsic call and cast back to integer.
+ // The backend should know how to optimize fabs().
+ // TODO: This transform should also apply to vectors.
+ ConstantInt *CI;
+ if (isa<BitCastInst>(Op0C) && SrcTy->isFloatingPointTy() &&
+ match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) {
+ Module *M = I.getModule();
+ Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy);
+ Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs");
+ return CastInst::CreateBitOrPointerCast(Call, I.getType());
+ }
+ }
+
{
Value *X = nullptr;
bool OpsSwapped = false;
@@ -1509,163 +1565,195 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-/// CollectBSwapParts - Analyze the specified subexpression and see if it is
-/// capable of providing pieces of a bswap. The subexpression provides pieces
-/// of a bswap if it is proven that each of the non-zero bytes in the output of
-/// the expression came from the corresponding "byte swapped" byte in some other
-/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
-/// we know that the expression deposits the low byte of %X into the high byte
-/// of the bswap result and that all other bytes are zero. This expression is
-/// accepted, the high byte of ByteValues is set to X to indicate a correct
-/// match.
+
+/// Analyze the specified subexpression and see if it is capable of providing
+/// pieces of a bswap or bitreverse. The subexpression provides a potential
+/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// the output of the expression came from a corresponding bit in some other
+/// value. This function is recursive, and the end result is a mapping of
+/// (value, bitnumber) to bitnumber. It is the caller's responsibility to
+/// validate that all `value`s are identical and that the bitnumber to bitnumber
+/// mapping is correct for a bswap or bitreverse.
+///
+/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
+/// that the expression deposits the low byte of %X into the high byte of the
+/// result and that all other bits are zero. This expression is accepted,
+/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7].
///
/// This function returns true if the match was unsuccessful and false if so.
/// On entry to the function the "OverallLeftShift" is a signed integer value
-/// indicating the number of bytes that the subexpression is later shifted. For
+/// indicating the number of bits that the subexpression is later shifted. For
/// example, if the expression is later right shifted by 16 bits, the
-/// OverallLeftShift value would be -2 on entry. This is used to specify which
-/// byte of ByteValues is actually being set.
+/// OverallLeftShift value would be -16 on entry. This is used to specify which
+/// bits of BitValues are actually being set.
///
-/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
-/// byte is masked to zero by a user. For example, in (X & 255), X will be
-/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
-/// this function to working on up to 32-byte (256 bit) values. ByteMask is
-/// always in the local (OverallLeftShift) coordinate space.
+/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding
+/// bit is masked to zero by a user. For example, in (X & 255), X will be
+/// processed with a bytemask of 255. BitMask is always in the local
+/// (OverallLeftShift) coordinate space.
///
-static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
- SmallVectorImpl<Value *> &ByteValues) {
+static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,
+ SmallVectorImpl<Value *> &BitValues,
+ SmallVectorImpl<int> &BitProvenance) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is an or instruction, it may be an inner node of the bswap.
- if (I->getOpcode() == Instruction::Or) {
- return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
- ByteValues) ||
- CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
- ByteValues);
- }
-
- // If this is a logical shift by a constant multiple of 8, recurse with
- // OverallLeftShift and ByteMask adjusted.
+ if (I->getOpcode() == Instruction::Or)
+ return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+ BitValues, BitProvenance) ||
+ CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,
+ BitValues, BitProvenance);
+
+ // If this is a logical shift by a constant, recurse with OverallLeftShift
+ // and BitMask adjusted.
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
unsigned ShAmt =
- cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
- // Ensure the shift amount is defined and of a byte value.
- if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined.
+ if (ShAmt > BitValues.size())
return true;
- unsigned ByteShift = ShAmt >> 3;
+ unsigned BitShift = ShAmt;
if (I->getOpcode() == Instruction::Shl) {
- // X << 2 -> collect(X, +2)
- OverallLeftShift += ByteShift;
- ByteMask >>= ByteShift;
+ // X << C -> collect(X, +C)
+ OverallLeftShift += BitShift;
+ BitMask = BitMask.lshr(BitShift);
} else {
- // X >>u 2 -> collect(X, -2)
- OverallLeftShift -= ByteShift;
- ByteMask <<= ByteShift;
- ByteMask &= (~0U >> (32-ByteValues.size()));
+ // X >>u C -> collect(X, -C)
+ OverallLeftShift -= BitShift;
+ BitMask = BitMask.shl(BitShift);
}
- if (OverallLeftShift >= (int)ByteValues.size()) return true;
- if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+ if (OverallLeftShift >= (int)BitValues.size())
+ return true;
+ if (OverallLeftShift <= -(int)BitValues.size())
+ return true;
- return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
- ByteValues);
+ return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+ BitValues, BitProvenance);
}
- // If this is a logical 'and' with a mask that clears bytes, clear the
- // corresponding bytes in ByteMask.
+ // If this is a logical 'and' with a mask that clears bits, clear the
+ // corresponding bits in BitMask.
if (I->getOpcode() == Instruction::And &&
isa<ConstantInt>(I->getOperand(1))) {
- // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
- unsigned NumBytes = ByteValues.size();
- APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+ unsigned NumBits = BitValues.size();
+ APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
- for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
- // If this byte is masked out by a later operation, we don't care what
+ for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) {
+ // If this bit is masked out by a later operation, we don't care what
// the and mask is.
- if ((ByteMask & (1 << i)) == 0)
+ if (BitMask[i] == 0)
continue;
- // If the AndMask is all zeros for this byte, clear the bit.
- APInt MaskB = AndMask & Byte;
+ // If the AndMask is zero for this bit, clear the bit.
+ APInt MaskB = AndMask & Bit;
if (MaskB == 0) {
- ByteMask &= ~(1U << i);
+ BitMask.clearBit(i);
continue;
}
- // If the AndMask is not all ones for this byte, it's not a bytezap.
- if (MaskB != Byte)
- return true;
-
- // Otherwise, this byte is kept.
+ // Otherwise, this bit is kept.
}
- return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
- ByteValues);
+ return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+ BitValues, BitProvenance);
}
}
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
- // the input value to the bswap. Some observations: 1) if more than one byte
- // is demanded from this input, then it could not be successfully assembled
- // into a byteswap. At least one of the two bytes would not be aligned with
- // their ultimate destination.
- if (!isPowerOf2_32(ByteMask)) return true;
- unsigned InputByteNo = countTrailingZeros(ByteMask);
-
- // 2) The input and ultimate destinations must line up: if byte 3 of an i32
- // is demanded, it needs to go into byte 0 of the result. This means that the
- // byte needs to be shifted until it lands in the right byte bucket. The
- // shift amount depends on the position: if the byte is coming from the high
- // part of the value (e.g. byte 3) then it must be shifted right. If from the
- // low part, it must be shifted left.
- unsigned DestByteNo = InputByteNo + OverallLeftShift;
- if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ // the input value to the bswap/bitreverse. To be part of a bswap or
+ // bitreverse we must be demanding a contiguous range of bits from it.
+ unsigned InputBitLen = BitMask.countPopulation();
+ unsigned InputBitNo = BitMask.countTrailingZeros();
+ if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo !=
+ InputBitLen)
+ // Not a contiguous set range of bits!
return true;
- // If the destination byte value is already defined, the values are or'd
- // together, which isn't a bswap (unless it's an or of the same bits).
- if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+ // We know we're moving a contiguous range of bits from the input to the
+ // output. Record which bits in the output came from which bits in the input.
+ unsigned DestBitNo = InputBitNo + OverallLeftShift;
+ for (unsigned I = 0; I < InputBitLen; ++I)
+ BitProvenance[DestBitNo + I] = InputBitNo + I;
+
+ // If the destination bit value is already defined, the values are or'd
+ // together, which isn't a bswap/bitreverse (unless it's an or of the same
+ // bits).
+ if (BitValues[DestBitNo] && BitValues[DestBitNo] != V)
return true;
- ByteValues[DestByteNo] = V;
+ for (unsigned I = 0; I < InputBitLen; ++I)
+ BitValues[DestBitNo + I] = V;
+
return false;
}
-/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
-/// If so, insert the new bswap intrinsic and return it.
-Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
- IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
- if (!ITy || ITy->getBitWidth() % 16 ||
- // ByteMask only allows up to 32-byte values.
- ITy->getBitWidth() > 32*8)
- return nullptr; // Can only bswap pairs of bytes. Can't do vectors.
+static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ if (From % 8 != To % 8)
+ return false;
+ // Convert from bit indices to byte indices and check for a byte reversal.
+ From >>= 3;
+ To >>= 3;
+ BitWidth >>= 3;
+ return From == BitWidth - To - 1;
+}
- /// ByteValues - For each byte of the result, we keep track of which value
- /// defines each byte.
- SmallVector<Value*, 8> ByteValues;
- ByteValues.resize(ITy->getBitWidth()/8);
+static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ return From == BitWidth - To - 1;
+}
+/// Given an OR instruction, check to see if this is a bswap or bitreverse
+/// idiom. If so, insert the new intrinsic and return it.
+Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
+ IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+ if (!ITy)
+ return nullptr; // Can't do vectors.
+ unsigned BW = ITy->getBitWidth();
+
+ /// We keep track of which bit (BitProvenance) inside which value (BitValues)
+ /// defines each bit in the result.
+ SmallVector<Value *, 8> BitValues(BW, nullptr);
+ SmallVector<int, 8> BitProvenance(BW, -1);
+
// Try to find all the pieces corresponding to the bswap.
- uint32_t ByteMask = ~0U >> (32-ByteValues.size());
- if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+ APInt BitMask = APInt::getAllOnesValue(BitValues.size());
+ if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance))
return nullptr;
- // Check to see if all of the bytes come from the same value.
- Value *V = ByteValues[0];
- if (!V) return nullptr; // Didn't find a byte? Must be zero.
+ // Check to see if all of the bits come from the same value.
+ Value *V = BitValues[0];
+ if (!V) return nullptr; // Didn't find a bit? Must be zero.
- // Check to make sure that all of the bytes come from the same value.
- for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
- if (ByteValues[i] != V)
- return nullptr;
- Module *M = I.getParent()->getParent()->getParent();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+ if (!std::all_of(BitValues.begin(), BitValues.end(),
+ [&](const Value *X) { return X == V; }))
+ return nullptr;
+
+ // Now, is the bit permutation correct for a bswap or a bitreverse? We can
+ // only byteswap values with an even number of bytes.
+ bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;;
+ for (unsigned i = 0, e = BitValues.size(); i != e; ++i) {
+ OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
+ OKForBitReverse &=
+ bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
+ }
+
+ Intrinsic::ID Intrin;
+ if (OKForBSwap)
+ Intrin = Intrinsic::bswap;
+ else if (OKForBitReverse)
+ Intrin = Intrinsic::bitreverse;
+ else
+ return nullptr;
+
+ Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy);
return CallInst::Create(F, V);
}
-/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
-/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
-/// we can simplify this expression to "cond ? C : D or B".
+/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0)
+/// and either B or D is ~(cond?-1,0) or (cond?0,-1), then we can simplify this
+/// expression to "cond ? C : D or B".
static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
Value *C, Value *D) {
// If A is not a select of -1/0, this cannot match.
@@ -1688,7 +1776,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return nullptr;
}
-/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+/// Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction *CxtI) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -1905,14 +1993,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
case ICmpInst::ICMP_EQ:
if (LHS->getOperand(0) == RHS->getOperand(0)) {
// if LHSCst and RHSCst differ only by one bit:
- // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
+ // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2
assert(LHSCst->getValue().ule(LHSCst->getValue()));
APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
if (Xor.isPowerOf2()) {
- Value *NegCst = Builder->getInt(~Xor);
- Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst);
- return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst);
+ Value *Cst = Builder->getInt(Xor);
+ Value *Or = Builder->CreateOr(LHS->getOperand(0), Cst);
+ return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSCst);
}
}
@@ -2020,9 +2108,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return nullptr;
}
-/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
-/// instcombine, this returns a Value which should already be inserted into the
-/// function.
+/// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
RHS->getPredicate() == FCmpInst::FCMP_UNO &&
@@ -2080,7 +2167,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return nullptr;
}
-/// FoldOrWithConstants - This helper function folds:
+/// This helper function folds:
///
/// ((A | B) & C1) | (B & C2)
///
@@ -2199,14 +2286,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
ConstantInt *C1 = nullptr, *C2 = nullptr;
// (A | B) | C and A | (B | C) -> bswap if possible.
+ bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value()));
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
- if (match(Op0, m_Or(m_Value(), m_Value())) ||
- match(Op1, m_Or(m_Value(), m_Value())) ||
- (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
- match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
- if (Instruction *BSwap = MatchBSwap(I))
+ bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value()));
+ // (A & B) | (C & D) -> bswap if possible.
+ bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
+ match(Op1, m_And(m_Value(), m_Value()));
+
+ if (OrOfOrs || OrOfShifts || OrOfAnds)
+ if (Instruction *BSwap = MatchBSwapOrBitReverse(I))
return BSwap;
- }
// (X^C)|Y -> (X|Y)^C iff Y&C == 0
if (Op0->hasOneUse() &&
@@ -2360,14 +2451,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
- // (~A | ~B) == (~(A & B)) - De Morgan's Law
- if (Value *Op0NotVal = dyn_castNotVal(Op0))
- if (Value *Op1NotVal = dyn_castNotVal(Op1))
- if (Op0->hasOneUse() && Op1->hasOneUse()) {
- Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(And);
- }
+ if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+ return DeMorgan;
// Canonicalize xor to the RHS.
bool SwappedForXor = false;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6de380b..e3634f2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -67,8 +67,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned CopyAlign = MI->getAlignment();
if (CopyAlign < MinAlign) {
- MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
- MinAlign, false));
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
return MI;
}
@@ -198,12 +197,140 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+static Value *SimplifyX86immshift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ LogicalShift = false; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ LogicalShift = true; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ LogicalShift = true; ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ // Simplify if count is constant.
+ auto Arg1 = II.getArgOperand(1);
+ auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
+ auto CDV = dyn_cast<ConstantDataVector>(Arg1);
+ auto CInt = dyn_cast<ConstantInt>(Arg1);
+ if (!CAZ && !CDV && !CInt)
+ return nullptr;
+
+ APInt Count(64, 0);
+ if (CDV) {
+ // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ auto VT = cast<VectorType>(CDV->getType());
+ unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
+ assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
+ unsigned NumSubElts = 64 / BitWidth;
+
+ // Concatenate the sub-elements to create the 64-bit value.
+ for (unsigned i = 0; i != NumSubElts; ++i) {
+ unsigned SubEltIdx = (NumSubElts - 1) - i;
+ auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+ Count = Count.shl(BitWidth);
+ Count |= SubElt->getValue().zextOrTrunc(64);
+ }
+ }
+ else if (CInt)
+ Count = CInt->getValue();
+
+ auto Vec = II.getArgOperand(0);
+ auto VT = cast<VectorType>(Vec->getType());
+ auto SVT = VT->getElementType();
+ unsigned VWidth = VT->getNumElements();
+ unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+ // If shift-by-zero then just return the original value.
+ if (Count == 0)
+ return Vec;
+
+ // Handle cases when Shift >= BitWidth.
+ if (Count.uge(BitWidth)) {
+ // If LogicalShift - just return zero.
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+
+ // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+ Count = APInt(64, BitWidth - 1);
+ }
+
+ // Get a constant vector of the same type as the first operand.
+ auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+ auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+static Value *SimplifyX86extend(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder,
+ bool SignExtend) {
+ VectorType *SrcTy = cast<VectorType>(II.getArgOperand(0)->getType());
+ VectorType *DstTy = cast<VectorType>(II.getType());
+ unsigned NumDstElts = DstTy->getNumElements();
+
+ // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
+ SmallVector<int, 8> ShuffleMask;
+ for (int i = 0; i != (int)NumDstElts; ++i)
+ ShuffleMask.push_back(i);
+
+ Value *SV = Builder.CreateShuffleVector(II.getArgOperand(0),
+ UndefValue::get(SrcTy), ShuffleMask);
+ return SignExtend ? Builder.CreateSExt(SV, DstTy)
+ : Builder.CreateZExt(SV, DstTy);
+}
+
static Value *SimplifyX86insertps(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
VectorType *VecTy = cast<VectorType>(II.getType());
assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
-
+
// The immediate permute control byte looks like this:
// [3:0] - zero mask for each 32-bit lane
// [5:4] - select one 32-bit destination lane
@@ -248,12 +375,202 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
// Replace the selected destination lane with the selected source lane.
ShuffleMask[DestLane] = SourceLane + 4;
}
-
+
return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
}
return nullptr;
}
+/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
+/// or conversion to a shuffle vector.
+static Value *SimplifyX86extrq(IntrinsicInst &II, Value *Op0,
+ ConstantInt *CILength, ConstantInt *CIIndex,
+ InstCombiner::BuilderTy &Builder) {
+ auto LowConstantHighUndef = [&](uint64_t Val) {
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ };
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ ConstantInt *CI0 =
+ C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Attempt to constant fold.
+ if (CILength && CIIndex) {
+ // From AMD documentation: "The bit index and field length are each six
+ // bits in length other bits of the field are ignored."
+ APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
+ APInt APLength = CILength->getValue().zextOrTrunc(6);
+
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize EXTRQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
+ for (int i = Length; i != 8; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+ Value *SV = Builder.CreateShuffleVector(
+ Builder.CreateBitCast(Op0, ShufTy),
+ ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // Constant Fold - shift Index'th bit to lowest position and mask off
+ // Length bits.
+ if (CI0) {
+ APInt Elt = CI0->getValue();
+ Elt = Elt.lshr(Index).zextOrTrunc(Length);
+ return LowConstantHighUndef(Elt.getZExtValue());
+ }
+
+ // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
+ Value *Args[] = {Op0, CILength, CIIndex};
+ Module *M = II.getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+ return Builder.CreateCall(F, Args);
+ }
+ }
+
+ // Constant Fold - extraction from zero is always {zero, undef}.
+ if (CI0 && CI0->equalsInt(0))
+ return LowConstantHighUndef(0);
+
+ return nullptr;
+}
+
+/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
+/// folding or conversion to a shuffle vector.
+static Value *SimplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
+ APInt APLength, APInt APIndex,
+ InstCombiner::BuilderTy &Builder) {
+
+ // From AMD documentation: "The bit index and field length are each six bits
+ // in length other bits of the field are ignored."
+ APIndex = APIndex.zextOrTrunc(6);
+ APLength = APLength.zextOrTrunc(6);
+
+ // Attempt to constant fold.
+ unsigned Index = APIndex.getZExtValue();
+
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if (End > 64)
+ return UndefValue::get(II.getType());
+
+ // If we are inserting whole bytes, we can convert this to a shuffle.
+ // Lowering can recognize INSERTQI shuffle masks.
+ if ((Length % 8) == 0 && (Index % 8) == 0) {
+ // Convert bit indices to byte indices.
+ Length /= 8;
+ Index /= 8;
+
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Type *IntTy32 = Type::getInt32Ty(II.getContext());
+ VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (int i = 0; i != (int)Index; ++i)
+ ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+ for (int i = 0; i != (int)Length; ++i)
+ ShuffleMask.push_back(
+ Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+ for (int i = Index + Length; i != 8; ++i)
+ ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+ Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
+ Builder.CreateBitCast(Op1, ShufTy),
+ ConstantVector::get(ShuffleMask));
+ return Builder.CreateBitCast(SV, II.getType());
+ }
+
+ // See if we're dealing with constant values.
+ Constant *C0 = dyn_cast<Constant>(Op0);
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI00 =
+ C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CI10 =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+
+ // Constant Fold - insert bottom Length bits starting at the Index'th bit.
+ if (CI00 && CI10) {
+ APInt V00 = CI00->getValue();
+ APInt V10 = CI10->getValue();
+ APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
+ V00 = V00 & ~Mask;
+ V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
+ APInt Val = V00 | V10;
+ Type *IntTy64 = Type::getInt64Ty(II.getContext());
+ Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
+ UndefValue::get(IntTy64)};
+ return ConstantVector::get(Args);
+ }
+
+ // If we were an INSERTQ call, we'll save demanded elements if we convert to
+ // INSERTQI.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
+ Type *IntTy8 = Type::getInt8Ty(II.getContext());
+ Constant *CILength = ConstantInt::get(IntTy8, Length, false);
+ Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
+
+ Value *Args[] = {Op0, Op1, CILength, CIIndex};
+ Module *M = II.getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ return Builder.CreateCall(F, Args);
+ }
+
+ return nullptr;
+}
+
/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
/// source vectors, unless a zero bit is set. If a zero bit is set,
/// then ignore that half of the mask and clear that half of the vector.
@@ -289,7 +606,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
// The high bit of the selection field chooses the 1st or 2nd operand.
bool LowInputSelect = Imm & 0x02;
bool HighInputSelect = Imm & 0x20;
-
+
// The low bit of the selection field chooses the low or high half
// of the selected operand.
bool LowHalfSelect = Imm & 0x01;
@@ -298,11 +615,11 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
// Determine which operand(s) are actually in use for this instruction.
Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
-
+
// If needed, replace operands based on zero mask.
V0 = LowHalfZero ? ZeroVector : V0;
V1 = HighHalfZero ? ZeroVector : V1;
-
+
// Permute low half of result.
unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
for (unsigned i = 0; i < HalfSize; ++i)
@@ -319,6 +636,43 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
return nullptr;
}
+/// Decode XOP integer vector comparison intrinsics.
+static Value *SimplifyX86vpcom(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder, bool IsSigned) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ uint64_t Imm = CInt->getZExtValue() & 0x7;
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+
+ switch (Imm) {
+ case 0x0:
+ Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ break;
+ case 0x1:
+ Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ break;
+ case 0x2:
+ Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ break;
+ case 0x3:
+ Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ break;
+ case 0x4:
+ Pred = ICmpInst::ICMP_EQ; break;
+ case 0x5:
+ Pred = ICmpInst::ICMP_NE; break;
+ case 0x6:
+ return ConstantInt::getSigned(VecTy, 0); // FALSE
+ case 0x7:
+ return ConstantInt::getSigned(VecTy, -1); // TRUE
+ }
+
+ if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), II.getArgOperand(1)))
+ return Builder.CreateSExtOrTrunc(Cmp, VecTy);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
@@ -371,7 +725,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
if (GVSrc->isConstant()) {
- Module *M = CI.getParent()->getParent()->getParent();
+ Module *M = CI.getModule();
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
Type *Tys[3] = { CI.getArgOperand(0)->getType(),
CI.getArgOperand(1)->getType(),
@@ -400,6 +754,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
+ auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width, unsigned DemandedWidth)
+ {
+ APInt UndefElts(Width, 0);
+ APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
+ return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
+ };
+
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
@@ -427,6 +788,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::bitreverse: {
+ Value *IIOperand = II->getArgOperand(0);
+ Value *X = nullptr;
+
+ // bitreverse(bitreverse(x)) -> x
+ if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
+ return ReplaceInstUsesWith(CI, X);
+ break;
+ }
+
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// powi(x, 0) -> 1.0
@@ -669,6 +1040,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
+
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
@@ -682,6 +1054,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
+ case Intrinsic::x86_vcvtph2ps_128:
+ case Intrinsic::x86_vcvtph2ps_256: {
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ auto RetType = cast<VectorType>(II->getType());
+ unsigned ArgWidth = ArgType->getNumElements();
+ unsigned RetWidth = RetType->getNumElements();
+ assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
+ assert(ArgType->isIntOrIntVectorTy() &&
+ ArgType->getScalarSizeInBits() == 16 &&
+ "CVTPH2PS input type should be 16-bit integer vector");
+ assert(RetType->getScalarType()->isFloatTy() &&
+ "CVTPH2PS output type should be 32-bit float vector");
+
+ // Constant folding: Convert to generic half to single conversion.
+ if (isa<ConstantAggregateZero>(Arg))
+ return ReplaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
+
+ if (isa<ConstantDataVector>(Arg)) {
+ auto VectorHalfAsShorts = Arg;
+ if (RetWidth < ArgWidth) {
+ SmallVector<int, 8> SubVecMask;
+ for (unsigned i = 0; i != RetWidth; ++i)
+ SubVecMask.push_back((int)i);
+ VectorHalfAsShorts = Builder->CreateShuffleVector(
+ Arg, UndefValue::get(ArgType), SubVecMask);
+ }
+
+ auto VectorHalfType =
+ VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
+ auto VectorHalfs =
+ Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
+ auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
+ return ReplaceInstUsesWith(*II, VectorFloats);
+ }
+
+ // We only use the lowest lanes of the argument.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse_cvttss2si:
@@ -692,194 +1108,229 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_cvttsd2si64: {
// These intrinsics only demand the 0th element of their input vectors. If
// we can simplify the input based on that, do so now.
- unsigned VWidth =
- cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
- APInt DemandedElts(VWidth, 1);
- APInt UndefElts(VWidth, 0);
- if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
- DemandedElts, UndefElts)) {
+ Value *Arg = II->getArgOperand(0);
+ unsigned VWidth = Arg->getType()->getVectorNumElements();
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
II->setArgOperand(0, V);
return II;
}
break;
}
- // Constant fold <A x Bi> << Ci.
- // FIXME: We don't handle _dq because it's a shift of an i128, but is
- // represented in the IR as <2 x i64>. A per element shift is wrong.
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_sse2_psll_w:
+ // Constant fold ashr( <A x Bi>, Ci ).
+ // Constant fold lshr( <A x Bi>, Ci ).
+ // Constant fold shl( <A x Bi>, Ci ).
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
+ if (Value *V = SimplifyX86immshift(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: {
- // Simplify if count is constant. To 0 if >= BitWidth,
- // otherwise to shl/lshr.
- auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
- auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (!CDV && !CInt)
- break;
- ConstantInt *Count;
- if (CDV)
- Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
- else
- Count = CInt;
-
- auto Vec = II->getArgOperand(0);
- auto VT = cast<VectorType>(Vec->getType());
- if (Count->getZExtValue() >
- VT->getElementType()->getPrimitiveSizeInBits() - 1)
- return ReplaceInstUsesWith(
- CI, ConstantAggregateZero::get(Vec->getType()));
-
- bool isPackedShiftLeft = true;
- switch (II->getIntrinsicID()) {
- default : break;
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
- }
-
- unsigned VWidth = VT->getNumElements();
- // Get a constant vector of the same type as the first operand.
- auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
- if (isPackedShiftLeft)
- return BinaryOperator::CreateShl(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
-
- return BinaryOperator::CreateLShr(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w: {
+ if (Value *V = SimplifyX86immshift(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+
+ // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ Value *Arg1 = II->getArgOperand(1);
+ assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
+ "Unexpected packed shift size");
+ unsigned VWidth = Arg1->getType()->getVectorNumElements();
+
+ if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
+ II->setArgOperand(1, V);
+ return II;
+ }
+ break;
}
- case Intrinsic::x86_sse41_pmovsxbw:
- case Intrinsic::x86_sse41_pmovsxwd:
- case Intrinsic::x86_sse41_pmovsxdq:
+ case Intrinsic::x86_avx2_pmovsxbd:
+ case Intrinsic::x86_avx2_pmovsxbq:
+ case Intrinsic::x86_avx2_pmovsxbw:
+ case Intrinsic::x86_avx2_pmovsxdq:
+ case Intrinsic::x86_avx2_pmovsxwd:
+ case Intrinsic::x86_avx2_pmovsxwq:
+ if (Value *V = SimplifyX86extend(*II, *Builder, true))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_pmovzxbd:
+ case Intrinsic::x86_sse41_pmovzxbq:
case Intrinsic::x86_sse41_pmovzxbw:
+ case Intrinsic::x86_sse41_pmovzxdq:
case Intrinsic::x86_sse41_pmovzxwd:
- case Intrinsic::x86_sse41_pmovzxdq: {
- // pmov{s|z}x ignores the upper half of their input vectors.
- unsigned VWidth =
- cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
- unsigned LowHalfElts = VWidth / 2;
- APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
- APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(
- II->getArgOperand(0), InputDemandedElts, UndefElts)) {
- II->setArgOperand(0, TmpV);
+ case Intrinsic::x86_sse41_pmovzxwq:
+ case Intrinsic::x86_avx2_pmovzxbd:
+ case Intrinsic::x86_avx2_pmovzxbq:
+ case Intrinsic::x86_avx2_pmovzxbw:
+ case Intrinsic::x86_avx2_pmovzxdq:
+ case Intrinsic::x86_avx2_pmovzxwd:
+ case Intrinsic::x86_avx2_pmovzxwq:
+ if (Value *V = SimplifyX86extend(*II, *Builder, false))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = SimplifyX86insertps(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse4a_extrq: {
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+ unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 16 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CILength =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+ : nullptr;
+ ConstantInt *CIIndex =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
+ if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+
+ // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
+ // operands and the lowest 16-bits of the second.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
+ II->setArgOperand(1, V);
return II;
}
break;
}
- case Intrinsic::x86_sse41_insertps:
- if (Value *V = SimplifyX86insertps(*II, *Builder))
+
+ case Intrinsic::x86_sse4a_extrqi: {
+ // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
+ // bits of the lower 64-bits. The upper 64-bits are undefined.
+ Value *Op0 = II->getArgOperand(0);
+ unsigned VWidth = Op0->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
return ReplaceInstUsesWith(*II, V);
+
+ // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse4a_insertq: {
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth = Op0->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+ Op1->getType()->getVectorNumElements() == 2 &&
+ "Unexpected operand size");
+
+ // See if we're dealing with constant values.
+ Constant *C1 = dyn_cast<Constant>(Op1);
+ ConstantInt *CI11 =
+ C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+ : nullptr;
+
+ // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
+ if (CI11) {
+ APInt V11 = CI11->getValue();
+ APInt Len = V11.zextOrTrunc(6);
+ APInt Idx = V11.lshr(8).zextOrTrunc(6);
+ if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ }
+
+ // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
+ // operand.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
break;
-
+ }
+
case Intrinsic::x86_sse4a_insertqi: {
- // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
- // ones undef
- // TODO: eventually we should lower this intrinsic to IR
- if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
- unsigned Index = CIStart->getZExtValue();
- // From AMD documentation: "a value of zero in the field length is
- // defined as length of 64".
- unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
-
- // From AMD documentation: "If the sum of the bit index + length field
- // is greater than 64, the results are undefined".
-
- // Note that both field index and field length are 8-bit quantities.
- // Since variables 'Index' and 'Length' are unsigned values
- // obtained from zero-extending field index and field length
- // respectively, their sum should never wrap around.
- if ((Index + Length) > 64)
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (Length == 64 && Index == 0) {
- Value *Vec = II->getArgOperand(1);
- Value *Undef = UndefValue::get(Vec->getType());
- const uint32_t Mask[] = { 0, 2 };
- return ReplaceInstUsesWith(
- CI,
- Builder->CreateShuffleVector(
- Vec, Undef, ConstantDataVector::get(
- II->getContext(), makeArrayRef(Mask))));
-
- } else if (auto Source =
- dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
- if (Source->hasOneUse() &&
- Source->getArgOperand(1) == II->getArgOperand(1)) {
- // If the source of the insert has only one use and it's another
- // insert (and they're both inserting from the same vector), try to
- // bundle both together.
- auto CISourceWidth =
- dyn_cast<ConstantInt>(Source->getArgOperand(2));
- auto CISourceStart =
- dyn_cast<ConstantInt>(Source->getArgOperand(3));
- if (CISourceStart && CISourceWidth) {
- unsigned Start = CIStart->getZExtValue();
- unsigned Width = CIWidth->getZExtValue();
- unsigned End = Start + Width;
- unsigned SourceStart = CISourceStart->getZExtValue();
- unsigned SourceWidth = CISourceWidth->getZExtValue();
- unsigned SourceEnd = SourceStart + SourceWidth;
- unsigned NewStart, NewWidth;
- bool ShouldReplace = false;
- if (Start <= SourceStart && SourceStart <= End) {
- NewStart = Start;
- NewWidth = std::max(End, SourceEnd) - NewStart;
- ShouldReplace = true;
- } else if (SourceStart <= Start && Start <= SourceEnd) {
- NewStart = SourceStart;
- NewWidth = std::max(SourceEnd, End) - NewStart;
- ShouldReplace = true;
- }
-
- if (ShouldReplace) {
- Constant *ConstantWidth = ConstantInt::get(
- II->getArgOperand(2)->getType(), NewWidth, false);
- Constant *ConstantStart = ConstantInt::get(
- II->getArgOperand(3)->getType(), NewStart, false);
- Value *Args[4] = { Source->getArgOperand(0),
- II->getArgOperand(1), ConstantWidth,
- ConstantStart };
- Module *M = CI.getParent()->getParent()->getParent();
- Value *F =
- Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
- return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
- }
- }
- }
- }
- }
+ // INSERTQI: Extract lowest Length bits from lower half of second source and
+ // insert over first source starting at Index bit. The upper 64-bits are
+ // undefined.
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+ unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+ assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+ Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+ VWidth1 == 2 && "Unexpected operand sizes");
+
+ // See if we're dealing with constant values.
+ ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
+ ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
+
+ // Attempt to simplify to a constant or shuffle vector.
+ if (CILength && CIIndex) {
+ APInt Len = CILength->getValue().zextOrTrunc(6);
+ APInt Idx = CIIndex->getValue().zextOrTrunc(6);
+ if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ }
+
+ // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
+ // operands.
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+
+ if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
+ II->setArgOperand(1, V);
+ return II;
}
break;
}
@@ -894,7 +1345,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// This optimization is convoluted because the intrinsic is defined as
// getting a vector of floats or doubles for the ps and pd versions.
// FIXME: That should be changed.
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Zero Mask - select 1st argument.
+ if (isa<ConstantAggregateZero>(Mask))
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
auto Tyi1 = Builder->getInt1Ty();
auto SelectorType = cast<VectorType>(Mask->getType());
@@ -917,11 +1381,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
}
auto NewSelector = ConstantVector::get(Selectors);
- return SelectInst::Create(NewSelector, II->getArgOperand(1),
- II->getArgOperand(0), "blendv");
- } else {
- break;
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+ break;
+ }
+
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b: {
+ // Turn pshufb(V1,mask) -> shuffle(V1,Zero,mask) if mask is a constant.
+ auto *V = II->getArgOperand(1);
+ auto *VTy = cast<VectorType>(V->getType());
+ unsigned NumElts = VTy->getNumElements();
+ assert((NumElts == 16 || NumElts == 32) &&
+ "Unexpected number of elements in shuffle mask!");
+ // Initialize the resulting shuffle mask to all zeroes.
+ uint32_t Indexes[32] = {0};
+
+ if (auto *Mask = dyn_cast<ConstantDataVector>(V)) {
+ // Each byte in the shuffle control mask forms an index to permute the
+ // corresponding byte in the destination operand.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ int8_t Index = Mask->getElementAsInteger(I);
+ // If the most significant bit (bit[7]) of each byte of the shuffle
+ // control mask is set, then zero is written in the result byte.
+ // The zero vector is in the right-hand side of the resulting
+ // shufflevector.
+
+ // The value of each index is the least significant 4 bits of the
+ // shuffle control byte.
+ Indexes[I] = (Index < 0) ? NumElts : Index & 0xF;
+ }
+ } else if (!isa<ConstantAggregateZero>(V))
+ break;
+
+ // The value of each index for the high 128-bit lane is the least
+ // significant 4 bits of the respective shuffle control byte.
+ for (unsigned I = 16; I < NumElts; ++I)
+ Indexes[I] += I & 0xF0;
+
+ auto NewC = ConstantDataVector::get(V->getContext(),
+ makeArrayRef(Indexes, NumElts));
+ auto V1 = II->getArgOperand(0);
+ auto V2 = Constant::getNullValue(II->getType());
+ auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
+ return ReplaceInstUsesWith(CI, Shuffle);
}
case Intrinsic::x86_avx_vpermilvar_ps:
@@ -972,6 +1475,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(*II, V);
break;
+ case Intrinsic::x86_xop_vpcomb:
+ case Intrinsic::x86_xop_vpcomd:
+ case Intrinsic::x86_xop_vpcomq:
+ case Intrinsic::x86_xop_vpcomw:
+ if (Value *V = SimplifyX86vpcom(*II, *Builder, true))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_xop_vpcomub:
+ case Intrinsic::x86_xop_vpcomud:
+ case Intrinsic::x86_xop_vpcomuq:
+ case Intrinsic::x86_xop_vpcomuw:
+ if (Value *V = SimplifyX86vpcom(*II, *Builder, false))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -1115,15 +1634,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// happen when variable allocas are DCE'd.
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (SS->getIntrinsicID() == Intrinsic::stacksave) {
- BasicBlock::iterator BI = SS;
- if (&*++BI == II)
+ if (&*++SS->getIterator() == II)
return EraseInstFromFunction(CI);
}
}
// Scan down this block to see if there is another stack restore in the
// same block without an intervening call/alloca.
- BasicBlock::iterator BI = II;
+ BasicBlock::iterator BI(II);
TerminatorInst *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
@@ -1153,6 +1671,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return EraseInstFromFunction(CI);
break;
}
+ case Intrinsic::lifetime_start: {
+ // Remove trivially empty lifetime_start/end ranges, i.e. a start
+ // immediately followed by an end (ignoring debuginfo or other
+ // lifetime markers in between).
+ BasicBlock::iterator BI = II->getIterator(), BE = II->getParent()->end();
+ for (++BI; BI != BE; ++BI) {
+ if (IntrinsicInst *LTE = dyn_cast<IntrinsicInst>(BI)) {
+ if (isa<DbgInfoIntrinsic>(LTE) ||
+ LTE->getIntrinsicID() == Intrinsic::lifetime_start)
+ continue;
+ if (LTE->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II->getOperand(0) == LTE->getOperand(0) &&
+ II->getOperand(1) == LTE->getOperand(1)) {
+ EraseInstFromFunction(*LTE);
+ return EraseInstFromFunction(*II);
+ }
+ continue;
+ }
+ }
+ break;
+ }
+ break;
+ }
case Intrinsic::assume: {
// Canonicalize assume(a && b) -> assume(a); assume(b);
// Note: New assumption intrinsics created here are registered by
@@ -1233,7 +1774,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
// isKnownNonNull -> nonnull attribute
- if (isKnownNonNull(DerivedPtr))
+ if (isKnownNonNullAt(DerivedPtr, II, DT, TLI))
II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
// isDereferenceablePointer -> deref attribute
@@ -1355,9 +1896,10 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
Value *TrampMem) {
// Visit all the previous instructions in the basic block, and try to find a
// init.trampoline which has a direct path to the adjust.trampoline.
- for (BasicBlock::iterator I = AdjustTramp,
- E = AdjustTramp->getParent()->begin(); I != E; ) {
- Instruction *Inst = --I;
+ for (BasicBlock::iterator I = AdjustTramp->getIterator(),
+ E = AdjustTramp->getParent()->begin();
+ I != E;) {
+ Instruction *Inst = &*--I;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
II->getOperand(0) == TrampMem)
@@ -1400,20 +1942,27 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// Mark any parameters that are known to be non-null with the nonnull
// attribute. This is helpful for inlining calls to functions with null
// checks on their arguments.
+ SmallVector<unsigned, 4> Indices;
unsigned ArgNo = 0;
+
for (Value *V : CS.args()) {
- if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
- isKnownNonNull(V)) {
- AttributeSet AS = CS.getAttributes();
- AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
- Attribute::NonNull);
- CS.setAttributes(AS);
- Changed = true;
- }
+ if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
+ isKnownNonNullAt(V, CS.getInstruction(), DT, TLI))
+ Indices.push_back(ArgNo + 1);
ArgNo++;
}
+
assert(ArgNo == CS.arg_size() && "sanity check");
+ if (!Indices.empty()) {
+ AttributeSet AS = CS.getAttributes();
+ LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AS = AS.addAttribute(Ctx, Indices,
+ Attribute::get(Ctx, Attribute::NonNull));
+ CS.setAttributes(AS);
+ Changed = true;
+ }
+
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
@@ -1725,16 +2274,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
attrVec);
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
- II->getUnwindDest(), Args);
+ NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles);
NC->takeName(II);
cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
} else {
CallInst *CI = cast<CallInst>(Caller);
- NC = Builder->CreateCall(Callee, Args);
+ NC = Builder->CreateCall(Callee, Args, OpBundles);
NC->takeName(CI);
if (CI->isTailCall())
cast<CallInst>(NC)->setTailCall();
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 48ab0eb..da835a1 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -21,11 +21,11 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
-/// expression. If so, decompose it, returning some value X, such that Val is
+/// Analyze 'Val', seeing if it is a simple linear expression.
+/// If so, decompose it, returning some value X, such that Val is
/// X*Scale+Offset.
///
-static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
uint64_t &Offset) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
Offset = CI->getZExtValue();
@@ -62,7 +62,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
// where C1 is divisible by C2.
unsigned SubScale;
Value *SubVal =
- DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
Offset += RHS->getZExtValue();
Scale = SubScale;
return SubVal;
@@ -76,14 +76,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
return Val;
}
-/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
-/// try to eliminate the cast by moving the type information into the alloc.
+/// If we find a cast of an allocation instruction, try to eliminate the cast by
+/// moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
- AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+ AllocaBuilder.SetInsertPoint(&AI);
// Get the type really allocated and the type casted to.
Type *AllocElTy = AI.getAllocatedType();
@@ -114,7 +114,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
unsigned ArraySizeScale;
uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
- DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+ decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
@@ -154,9 +154,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-/// EvaluateInDifferentType - Given an expression that
-/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
-/// insert the code to evaluate the expression.
+/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
+/// true for, actually insert the code to evaluate the expression.
Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
@@ -261,9 +260,9 @@ isEliminableCastPair(const CastInst *CI, ///< First cast instruction
return Instruction::CastOps(Res);
}
-/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
-/// results in any code being generated and is interesting to optimize out. If
-/// the cast can be eliminated by some other simple transformation, we prefer
+/// Return true if the cast from "V to Ty" actually results in any code being
+/// generated and is interesting to optimize out.
+/// If the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
Type *Ty) {
@@ -318,9 +317,9 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return nullptr;
}
-/// CanEvaluateTruncated - Return true if we can evaluate the specified
-/// expression tree as type Ty instead of its larger type, and arrive with the
-/// same value. This is used by code that tries to eliminate truncates.
+/// Return true if we can evaluate the specified expression tree as type Ty
+/// instead of its larger type, and arrive with the same value.
+/// This is used by code that tries to eliminate truncates.
///
/// Ty will always be a type smaller than V. We should return true if trunc(V)
/// can be computed by computing V in the smaller type. If V is an instruction,
@@ -329,7 +328,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
Instruction *CxtI) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
@@ -359,8 +358,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
case Instruction::Or:
case Instruction::Xor:
// These operators can all arbitrarily be extended or truncated.
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
case Instruction::UDiv:
case Instruction::URem: {
@@ -371,8 +370,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
}
}
break;
@@ -383,7 +382,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint32_t BitWidth = Ty->getScalarSizeInBits();
if (CI->getLimitedValue(BitWidth) < BitWidth)
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
break;
case Instruction::LShr:
@@ -396,7 +395,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
if (IC.MaskedValueIsZero(I->getOperand(0),
APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
CI->getLimitedValue(BitWidth) < BitWidth) {
- return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+ return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
}
break;
@@ -410,8 +409,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
- CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
+ return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
+ canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -419,7 +418,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
+ if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
return false;
return true;
}
@@ -431,6 +430,50 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
return false;
}
+/// Given a vector that is bitcast to an integer, optionally logically
+/// right-shifted, and truncated, convert it to an extractelement.
+/// Example (big endian):
+/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
+/// --->
+/// extractelement <4 x i32> %X, 1
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
+ const DataLayout &DL) {
+ Value *TruncOp = Trunc.getOperand(0);
+ Type *DestType = Trunc.getType();
+ if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
+ return nullptr;
+
+ Value *VecInput = nullptr;
+ ConstantInt *ShiftVal = nullptr;
+ if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
+ m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShiftVal)))) ||
+ !isa<VectorType>(VecInput->getType()))
+ return nullptr;
+
+ VectorType *VecType = cast<VectorType>(VecInput->getType());
+ unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+ unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+ unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
+
+ if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
+ return nullptr;
+
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to a vector type that we can extract from.
+ unsigned NumVecElts = VecWidth / DestWidth;
+ if (VecType->getElementType() != DestType) {
+ VecType = VectorType::get(DestType, NumVecElts);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+ }
+
+ unsigned Elt = ShiftAmount / DestWidth;
+ if (DL.isBigEndian())
+ Elt = NumVecElts - 1 - Elt;
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
+
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
@@ -441,7 +484,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// min/max.
Value *LHS, *RHS;
if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
- if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+ if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
// See if we can simplify any instructions used by the input whose sole
@@ -457,7 +500,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateTruncated(Src, DestTy, *this, &CI)) {
+ canEvaluateTruncated(Src, DestTy, *this, &CI)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
@@ -470,7 +513,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
- Constant *One = ConstantInt::get(Src->getType(), 1);
+ Constant *One = ConstantInt::get(SrcTy, 1);
Src = Builder->CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
@@ -489,31 +532,54 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// If the shift amount is larger than the size of A, then the result is
// known to be zero because all the input bits got shifted out.
if (Cst->getZExtValue() >= ASize)
- return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy));
// Since we're doing an lshr and a zero extend, and know that the shift
// amount is smaller than ASize, it is always safe to do the shift in A's
// type, then zero extend or truncate to the result.
Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
Shift->takeName(Src);
- return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ return CastInst::CreateIntegerCast(Shift, DestTy, false);
+ }
+
+ // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
+ // conversion.
+ // It works because bits coming from sign extension have the same value as
+ // the sign bit of the original value; performing ashr instead of lshr
+ // generates bits of the same value as the sign bit.
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+ const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+ // This optimization can be only performed when zero bits generated by
+ // the original lshr aren't pulled into the value after truncation, so we
+ // can only shift by values smaller than the size of destination type (in
+ // bits).
+ if (Cst->getValue().ult(ASize)) {
+ Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ }
}
// Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
// type isn't non-native.
- if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
- ShouldChangeType(Src->getType(), CI.getType()) &&
+ if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
+ ShouldChangeType(SrcTy, DestTy) &&
match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
- Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
return BinaryOperator::CreateAnd(NewTrunc,
- ConstantExpr::getTrunc(Cst, CI.getType()));
+ ConstantExpr::getTrunc(Cst, DestTy));
}
+ if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+ return I;
+
return nullptr;
}
-/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (zext icmp) to bitwise / integer operations in order to eliminate
+/// the icmp.
Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
@@ -637,8 +703,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return nullptr;
}
-/// CanEvaluateZExtd - Determine if the specified value can be computed in the
-/// specified wider type and produce the same low bits. If not, return false.
+/// Determine if the specified value can be computed in the specified wider type
+/// and produce the same low bits. If not, return false.
///
/// If this function returns true, it can also return a non-zero number of bits
/// (in BitsToClear) which indicates that the value it computes is correct for
@@ -655,7 +721,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
/// clear the top bits anyway, doing this has no extra cost.
///
/// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
+static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
InstCombiner &IC, Instruction *CxtI) {
BitsToClear = 0;
if (isa<Constant>(V))
@@ -685,8 +751,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
- !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
return false;
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
@@ -713,7 +779,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// We can promote shl(x, cst) if we can promote x. Since shl overwrites the
// upper bits we can reduce BitsToClear by the shift amount.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
uint64_t ShiftAmt = Amt->getZExtValue();
BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
@@ -724,7 +790,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
BitsToClear += Amt->getZExtValue();
if (BitsToClear > V->getType()->getScalarSizeInBits())
@@ -734,8 +800,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// Cannot promote variable LSHR.
return false;
case Instruction::Select:
- if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
- !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
+ if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+ !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear are
// known zero in the disagreeing side.
Tmp != BitsToClear)
@@ -747,10 +813,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
return false;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
+ if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear
// are known zero in the disagreeing input.
Tmp != BitsToClear)
@@ -787,13 +853,13 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
+ canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid zero extend: " << CI);
+ " to avoid zero extend: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
@@ -897,8 +963,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
return nullptr;
}
-/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
ICmpInst::Predicate Pred = ICI->getPredicate();
@@ -985,15 +1050,14 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
return nullptr;
}
-/// CanEvaluateSExtd - Return true if we can take the specified value
-/// and return it as type Ty without inserting any new casts and without
-/// changing the value of the common low bits. This is used by code that tries
-/// to promote integer operations to a wider types will allow us to eliminate
-/// the extension.
+/// Return true if we can take the specified value and return it as type Ty
+/// without inserting any new casts and without changing the value of the common
+/// low bits. This is used by code that tries to promote integer operations to
+/// a wider types will allow us to eliminate the extension.
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateSExtd(Value *V, Type *Ty) {
+static bool canEvaluateSExtd(Value *V, Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
// If this is a constant, it can be trivially promoted.
@@ -1023,15 +1087,15 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
case Instruction::Sub:
case Instruction::Mul:
// These operators can all arbitrarily be extended if their inputs can.
- return CanEvaluateSExtd(I->getOperand(0), Ty) &&
- CanEvaluateSExtd(I->getOperand(1), Ty);
+ return canEvaluateSExtd(I->getOperand(0), Ty) &&
+ canEvaluateSExtd(I->getOperand(1), Ty);
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
case Instruction::Select:
- return CanEvaluateSExtd(I->getOperand(1), Ty) &&
- CanEvaluateSExtd(I->getOperand(2), Ty);
+ return canEvaluateSExtd(I->getOperand(1), Ty) &&
+ canEvaluateSExtd(I->getOperand(2), Ty);
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -1039,7 +1103,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!CanEvaluateSExtd(IncValue, Ty)) return false;
+ if (!canEvaluateSExtd(IncValue, Ty)) return false;
return true;
}
default:
@@ -1081,10 +1145,10 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateSExtd(Src, DestTy)) {
+ canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid sign extend: " << CI);
+ " to avoid sign extend: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, true);
assert(Res->getType() == DestTy);
@@ -1149,9 +1213,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
}
-/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// Return a Constant* for the specified floating-point constant if it fits
/// in the specified FP type without changing its value.
-static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
bool losesInfo;
APFloat F = CFP->getValueAPF();
(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
@@ -1160,12 +1224,12 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
return nullptr;
}
-/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// If this is a floating-point extension instruction, look
/// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V) {
+static Value *lookThroughFPExtensions(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::FPExt)
- return LookThroughFPExtensions(I->getOperand(0));
+ return lookThroughFPExtensions(I->getOperand(0));
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
@@ -1174,14 +1238,14 @@ static Value *LookThroughFPExtensions(Value *V) {
if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
return V; // No constant folding of this.
// See if the value can be truncated to half and then reextended.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf))
return V;
// See if the value can be truncated to float and then reextended.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle))
return V;
if (CFP->getType()->isDoubleTy())
return V; // Won't shrink.
- if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble))
return V;
// Don't try to shrink to various long double types.
}
@@ -1193,7 +1257,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
- // simpilify this expression to avoid one or more of the trunc/extend
+ // simplify this expression to avoid one or more of the trunc/extend
// operations if we can do so without changing the numerical results.
//
// The exact manner in which the widths of the operands interact to limit
@@ -1201,8 +1265,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// is explained below in the various case statements.
BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
if (OpI && OpI->hasOneUse()) {
- Value *LHSOrig = LookThroughFPExtensions(OpI->getOperand(0));
- Value *RHSOrig = LookThroughFPExtensions(OpI->getOperand(1));
+ Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1));
unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth();
unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth();
@@ -1307,10 +1371,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
// (fptrunc (select cond, R1, Cst)) -->
// (select cond, (fptrunc R1), (fptrunc Cst))
+ //
+ // - but only if this isn't part of a min/max operation, else we'll
+ // ruin min/max canonical form which is to have the select and
+ // compare's operands be of the same type with no casts to look through.
+ Value *LHS, *RHS;
SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
if (SI &&
(isa<ConstantFP>(SI->getOperand(1)) ||
- isa<ConstantFP>(SI->getOperand(2)))) {
+ isa<ConstantFP>(SI->getOperand(2))) &&
+ matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
CI.getType());
Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
@@ -1327,9 +1397,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
CI.getType());
Type *IntrinsicType[] = { CI.getType() };
- Function *Overload =
- Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
- II->getIntrinsicID(), IntrinsicType);
+ Function *Overload = Intrinsic::getDeclaration(
+ CI.getModule(), II->getIntrinsicID(), IntrinsicType);
Value *Args[] = { InnerTrunc };
return CallInst::Create(Overload, Args, II->getName());
@@ -1483,12 +1552,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
-/// OptimizeVectorResize - This input value (which is known to have vector type)
-/// is being zero extended or truncated to the specified vector type. Try to
-/// replace it with a shuffle (and vector/vector bitcast) if possible.
+/// This input value (which is known to have vector type) is being zero extended
+/// or truncated to the specified vector type.
+/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
+static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
InstCombiner &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
@@ -1548,8 +1617,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
return Value / Ty->getPrimitiveSizeInBits();
}
-/// CollectInsertionElements - V is a value which is inserted into a vector of
-/// VecEltTy. Look through the value to see if we can decompose it into
+/// V is a value which is inserted into a vector of VecEltTy.
+/// Look through the value to see if we can decompose it into
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
@@ -1558,7 +1627,7 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned Shift,
+static bool collectInsertionElements(Value *V, unsigned Shift,
SmallVectorImpl<Value *> &Elements,
Type *VecEltTy, bool isBigEndian) {
assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
@@ -1595,7 +1664,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
// If the constant is the size of a vector element, we just need to bitcast
// it to the right type so it gets properly inserted.
if (NumElts == 1)
- return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
@@ -1611,7 +1680,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
isBigEndian))
return false;
}
@@ -1625,19 +1694,19 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian) &&
- CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
@@ -1645,7 +1714,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
- return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
}
@@ -1653,8 +1722,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
}
-/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
-/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// If the input is an 'or' instruction, we may be doing shifts and ors to
+/// assemble the elements of the vector manually.
/// Try to rip the code out and replace it with insertelements. This is to
/// optimize code like this:
///
@@ -1667,13 +1736,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
/// %tmp43 = bitcast i64 %ins35 to <2 x float>
///
/// Into two insertelements that do "buildvector{%inc, %inc5}".
-static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
- if (!CollectInsertionElements(IntInput, 0, Elements,
+ if (!collectInsertionElements(IntInput, 0, Elements,
DestVecTy->getElementType(),
IC.getDataLayout().isBigEndian()))
return nullptr;
@@ -1692,63 +1761,29 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
return Result;
}
-
-/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
-/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+ InstCombiner &IC,
const DataLayout &DL) {
- Value *Src = CI.getOperand(0);
- Type *DestTy = CI.getType();
-
- // If this is a bitcast from int to float, check to see if the int is an
- // extraction from a vector.
- Value *VecInput = nullptr;
- // bitcast(trunc(bitcast(somevector)))
- if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
- isa<VectorType>(VecInput->getType())) {
- VectorType *VecTy = cast<VectorType>(VecInput->getType());
- unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-
- if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
- // If the element type of the vector doesn't match the result type,
- // bitcast it to be a vector type we can extract from.
- if (VecTy->getElementType() != DestTy) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestWidth);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
-
- unsigned Elt = 0;
- if (DL.isBigEndian())
- Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
- }
- }
+ // TODO: Create and use a pattern matcher for ExtractElementInst.
+ auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+ if (!ExtElt || !ExtElt->hasOneUse())
+ return nullptr;
- // bitcast(trunc(lshr(bitcast(somevector), cst))
- ConstantInt *ShAmt = nullptr;
- if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
- m_ConstantInt(ShAmt)))) &&
- isa<VectorType>(VecInput->getType())) {
- VectorType *VecTy = cast<VectorType>(VecInput->getType());
- unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
- if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
- ShAmt->getZExtValue() % DestWidth == 0) {
- // If the element type of the vector doesn't match the result type,
- // bitcast it to be a vector type we can extract from.
- if (VecTy->getElementType() != DestTy) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestWidth);
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
+ // The bitcast must be to a vectorizable type, otherwise we can't make a new
+ // type to extract from.
+ Type *DestType = BitCast.getType();
+ if (!VectorType::isValidElementType(DestType))
+ return nullptr;
- unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (DL.isBigEndian())
- Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
- }
- }
- return nullptr;
+ unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+ auto *NewVecType = VectorType::get(DestType, NumElts);
+ auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+ NewVecType, "bc");
+ return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -1794,11 +1829,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
- // Try to optimize int -> float bitcasts.
- if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
- return I;
-
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
@@ -1815,7 +1845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
CastInst *SrcCast = cast<CastInst>(Src);
if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
cast<VectorType>(DestTy), *this))
return I;
}
@@ -1823,7 +1853,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the input is an 'or' instruction, we may be doing shifts and ors to
// assemble the elements of the vector manually. Try to rip the code out
// and replace it with insertelements.
- if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
return ReplaceInstUsesWith(CI, V);
}
}
@@ -1872,6 +1902,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
+ if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+ return I;
+
if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 95bba3c..c0786af 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -216,8 +216,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
Max = KnownOne|UnknownBits;
}
-
-
/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
/// cmp pred (load (gep GV, ...)), cmpcst
/// where GV is a global variable with a constant initializer. Try to simplify
@@ -371,7 +369,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
}
-
// If this element is in range, update our magic bitvector.
if (i < 64 && IsTrueForElt)
MagicBitvector |= 1ULL << i;
@@ -469,7 +466,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
-
// If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
@@ -496,7 +492,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
return nullptr;
}
-
/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we
/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can
@@ -562,8 +557,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
}
}
-
-
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
@@ -737,6 +730,83 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return nullptr;
}
+Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
+ Value *Other) {
+ assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
+
+ // It would be tempting to fold away comparisons between allocas and any
+ // pointer not based on that alloca (e.g. an argument). However, even
+ // though such pointers cannot alias, they can still compare equal.
+ //
+ // But LLVM doesn't specify where allocas get their memory, so if the alloca
+ // doesn't escape we can argue that it's impossible to guess its value, and we
+ // can therefore act as if any such guesses are wrong.
+ //
+ // The code below checks that the alloca doesn't escape, and that it's only
+ // used in a comparison once (the current instruction). The
+ // single-comparison-use condition ensures that we're trivially folding all
+ // comparisons against the alloca consistently, and avoids the risk of
+ // erroneously folding a comparison of the pointer with itself.
+
+ unsigned MaxIter = 32; // Break cycles and bound to constant-time.
+
+ SmallVector<Use *, 32> Worklist;
+ for (Use &U : Alloca->uses()) {
+ if (Worklist.size() >= MaxIter)
+ return nullptr;
+ Worklist.push_back(&U);
+ }
+
+ unsigned NumCmps = 0;
+ while (!Worklist.empty()) {
+ assert(Worklist.size() <= MaxIter);
+ Use *U = Worklist.pop_back_val();
+ Value *V = U->getUser();
+ --MaxIter;
+
+ if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
+ isa<SelectInst>(V)) {
+ // Track the uses.
+ } else if (isa<LoadInst>(V)) {
+ // Loading from the pointer doesn't escape it.
+ continue;
+ } else if (auto *SI = dyn_cast<StoreInst>(V)) {
+ // Storing *to* the pointer is fine, but storing the pointer escapes it.
+ if (SI->getValueOperand() == U->get())
+ return nullptr;
+ continue;
+ } else if (isa<ICmpInst>(V)) {
+ if (NumCmps++)
+ return nullptr; // Found more than one cmp.
+ continue;
+ } else if (auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
+ switch (Intrin->getIntrinsicID()) {
+ // These intrinsics don't escape or compare the pointer. Memset is safe
+ // because we don't allow ptrtoint. Memcpy and memmove are safe because
+ // we don't allow stores, so src cannot point to V.
+ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
+ continue;
+ default:
+ return nullptr;
+ }
+ } else {
+ return nullptr;
+ }
+ for (Use &U : V->uses()) {
+ if (Worklist.size() >= MaxIter)
+ return nullptr;
+ Worklist.push_back(&U);
+ }
+ }
+
+ Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
+ return ReplaceInstUsesWith(
+ ICI,
+ ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
+}
+
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
@@ -851,7 +921,6 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// to the same result value.
HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
}
-
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
if (CmpRHSV == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
@@ -996,7 +1065,6 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
return Res;
}
-
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
APInt Comp = CmpRHSV << ShAmtVal;
@@ -1074,18 +1142,22 @@ Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
if (AP1 == AP2)
return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
- // Get the distance between the highest bit that's set.
int Shift;
- // Both the constants are negative, take their positive to calculate log.
if (IsAShr && AP1.isNegative())
- // Get the ones' complement of AP2 and AP1 when computing the distance.
- Shift = (~AP2).logBase2() - (~AP1).logBase2();
+ Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
else
- Shift = AP2.logBase2() - AP1.logBase2();
+ Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
if (Shift > 0) {
- if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift))
+ if (IsAShr && AP1 == AP2.ashr(Shift)) {
+ // There are multiple solutions if we are comparing against -1 and the LHS
+ // of the ashr is not a power of two.
+ if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+ return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ } else if (AP1 == AP2.lshr(Shift)) {
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ }
}
// Shifting const2 will never be equal to const1.
return getConstant(false);
@@ -1145,6 +1217,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
switch (LHSI->getOpcode()) {
case Instruction::Trunc:
+ if (RHS->isOne() && RHSV.getBitWidth() > 1) {
+ // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
+ Value *V = nullptr;
+ if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+ match(LHSI->getOperand(0), m_Signum(m_Value(V))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, V,
+ ConstantInt::get(V->getType(), 1));
+ }
if (ICI.isEquality() && LHSI->hasOneUse()) {
// Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
// of the high bits truncated out of x are known.
@@ -1447,9 +1527,35 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
: ICmpInst::ICMP_ULE,
LHSI->getOperand(0), SubOne(RHS));
+
+ // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
+ // iff C is a power of 2
+ if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) {
+ if (auto *CI = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ const APInt &AI = CI->getValue();
+ int32_t ExactLogBase2 = AI.exactLogBase2();
+ if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+ Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1);
+ Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy);
+ return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ
+ ? ICmpInst::ICMP_SGE
+ : ICmpInst::ICMP_SLT,
+ Trunc, Constant::getNullValue(NTy));
+ }
+ }
+ }
break;
case Instruction::Or: {
+ if (RHS->isOne()) {
+ // icmp slt signum(V) 1 --> icmp slt V, 1
+ Value *V = nullptr;
+ if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+ match(LHSI, m_Signum(m_Value(V))))
+ return new ICmpInst(ICmpInst::ICMP_SLT, V,
+ ConstantInt::get(V->getType(), 1));
+ }
+
if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
break;
Value *P, *Q;
@@ -2083,11 +2189,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// If the pattern matches, truncate the inputs to the narrower type and
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
- Module *M = I.getParent()->getParent()->getParent();
-
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
- NewType);
+ Value *F = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::sadd_with_overflow, NewType);
InstCombiner::BuilderTy *Builder = IC.Builder;
@@ -2123,6 +2227,12 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
return true;
};
+ // If the overflow check was an add followed by a compare, the insertion point
+ // may be pointing to the compare. We want to insert the new instructions
+ // before the add in case there are uses of the add between the add and the
+ // compare.
+ Builder->SetInsertPoint(&OrigI);
+
switch (OCF) {
case OCF_INVALID:
llvm_unreachable("bad overflow check kind!");
@@ -2223,7 +2333,9 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
- Instruction *MulInstr = cast<Instruction>(MulVal);
+ auto *MulInstr = dyn_cast<Instruction>(MulVal);
+ if (!MulInstr)
+ return nullptr;
assert(MulInstr->getOpcode() == Instruction::Mul);
auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
@@ -2357,7 +2469,6 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
InstCombiner::BuilderTy *Builder = IC.Builder;
Builder->SetInsertPoint(MulInstr);
- Module *M = I.getParent()->getParent()->getParent();
// Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
Value *MulA = A, *MulB = B;
@@ -2365,8 +2476,8 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
MulA = Builder->CreateZExt(A, MulType);
if (WidthB < MulWidth)
MulB = Builder->CreateZExt(B, MulType);
- Value *F =
- Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
+ Value *F = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::umul_with_overflow, MulType);
CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
@@ -2468,7 +2579,6 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
default:
return APInt::getAllOnesValue(BitWidth);
}
-
}
/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
@@ -2905,7 +3015,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
break;
}
case ICmpInst::ICMP_NE: {
@@ -2950,7 +3059,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
break;
}
case ICmpInst::ICMP_ULT:
@@ -3103,7 +3211,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// comparison into the select arms, which will cause one to be
// constant folded and the select turned into a bitwise or.
Value *Op1 = nullptr, *Op2 = nullptr;
- ConstantInt *CI = 0;
+ ConstantInt *CI = nullptr;
if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
CI = dyn_cast<ConstantInt>(Op1);
@@ -3177,6 +3285,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ICmpInst::getSwappedPredicate(I.getPredicate()), I))
return NI;
+ // Try to optimize equality comparisons against alloca-based pointers.
+ if (Op0->getType()->isPointerTy() && I.isEquality()) {
+ assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
+ if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
+ if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1))
+ return New;
+ if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
+ if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0))
+ return New;
+ }
+
// Test to see if the operands of the icmp are casted versions of other
// values. If the ptr->ptr cast can be stripped off both arguments, we do so
// now.
@@ -3304,6 +3423,26 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
match(B, m_One()))
return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+ // icmp sgt X, (Y + -1) -> icmp sge X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(D, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
+
+ // icmp sle X, (Y + -1) -> icmp slt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(D, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
+
+ // icmp sge X, (Y + 1) -> icmp sgt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
+
+ // icmp slt X, (Y + 1) -> icmp sle X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+
// if C1 has greater magnitude than C2:
// icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
// s.t. C3 = C1 - C2
@@ -3473,6 +3612,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
}
+
+ if (BO0) {
+ // Transform A & (L - 1) `ult` L --> L != 0
+ auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
+ auto BitwiseAnd =
+ m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+
+ if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
+ auto *Zero = Constant::getNullValue(BO0->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
+ }
+ }
}
{ Value *A, *B;
@@ -3697,15 +3848,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
- // Check to see that the input is converted from an integer type that is small
- // enough that preserves all bits. TODO: check here for "known" sign bits.
- // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
- unsigned InputSize = IntTy->getScalarSizeInBits();
-
- // If this is a uitofp instruction, we need an extra bit to hold the sign.
bool LHSUnsigned = isa<UIToFPInst>(LHSI);
- if (LHSUnsigned)
- ++InputSize;
if (I.isEquality()) {
FCmpInst::Predicate P = I.getPredicate();
@@ -3732,13 +3875,30 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// equality compares as integer?
}
- // Comparisons with zero are a special case where we know we won't lose
- // information.
- bool IsCmpZero = RHS.isPosZero();
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = IntTy->getScalarSizeInBits();
- // If the conversion would lose info, don't hack on this.
- if ((int)InputSize > MantissaWidth && !IsCmpZero)
- return nullptr;
+ // Following test does NOT adjust InputSize downwards for signed inputs,
+ // because the most negative value still requires all the mantissa bits
+ // to distinguish it from one less than that value.
+ if ((int)InputSize > MantissaWidth) {
+ // Conversion would lose accuracy. Check if loss can impact comparison.
+ int Exp = ilogb(RHS);
+ if (Exp == APFloat::IEK_Inf) {
+ int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
+ if (MaxExponent < (int)InputSize - !LHSUnsigned)
+ // Conversion could create infinity.
+ return nullptr;
+ } else {
+ // Note that if RHS is zero or NaN, then Exp is negative
+ // and first condition is trivially false.
+ if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
+ // Conversion could affect comparison.
+ return nullptr;
+ }
+ }
// Otherwise, we can potentially simplify the comparison. We know that it
// will always come through as an integer value and we know the constant is
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index ac934f1..534f670 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -281,6 +281,7 @@ public:
ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other);
Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I);
Instruction *commonCastTransforms(CastInst &CI);
@@ -341,6 +342,7 @@ public:
const unsigned SIOpd);
private:
+ bool ShouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
@@ -360,6 +362,11 @@ private:
/// \brief Try to optimize a sequence of instructions checking if an operation
/// on LHS and RHS overflows.
///
+ /// If this overflow check is done via one of the overflow check intrinsics,
+ /// then CtxI has to be the call instruction calling that intrinsic. If this
+ /// overflow check is done by arithmetic followed by a compare, then CtxI has
+ /// to be the arithmetic instruction.
+ ///
/// If a simplification is possible, stores the simplified result of the
/// operation in OperationResult and result of the overflow check in
/// OverflowResult, and return true. If no simplification is possible,
@@ -393,7 +400,7 @@ public:
assert(New && !New->getParent() &&
"New instruction already inserted into a basic block!");
BasicBlock *BB = Old.getParent();
- BB->getInstList().insert(&Old, New); // Insert inst
+ BB->getInstList().insert(Old.getIterator(), New); // Insert inst
Worklist.Add(New);
return New;
}
@@ -539,6 +546,7 @@ private:
Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN);
Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
ConstantInt *AndRHS, BinaryOperator &TheAnd);
@@ -548,7 +556,7 @@ private:
Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
- Instruction *MatchBSwap(BinaryOperator &I);
+ Instruction *MatchBSwapOrBitReverse(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e3179db..47406b9 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
@@ -90,21 +91,23 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (CS.isCallee(&U))
continue;
+ unsigned DataOpNo = CS.getDataOperandNo(&U);
+ bool IsArgOperand = CS.isArgOperand(&U);
+
// Inalloca arguments are clobbered by the call.
- unsigned ArgNo = CS.getArgumentNo(&U);
- if (CS.isInAllocaArgument(ArgNo))
+ if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
return false;
// If this is a readonly/readnone call site, then we know it is just a
// load (but one that potentially returns the value itself), so we can
// ignore it if we know that the value isn't captured.
if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
continue;
// If this is being passed as a byval argument, the caller is making a
// copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
+ if (IsArgOperand && CS.isByValArgument(DataOpNo))
continue;
}
@@ -186,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// Scan to the end of the allocation instructions, to skip over a block of
// allocas if possible...also skip interleaved debug info
//
- BasicBlock::iterator It = New;
+ BasicBlock::iterator It(New);
while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
++It;
@@ -367,7 +370,13 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
MDB.createRange(NonNullInt, NullInt));
}
break;
-
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ // These only directly apply if the new type is also a pointer.
+ if (NewTy->isPointerTy())
+ NewLoad->setMetadata(ID, N);
+ break;
case LLVMContext::MD_range:
// FIXME: It would be nice to propagate this in some way, but the type
// conversions make it hard. If the new type is a pointer, we could
@@ -418,6 +427,9 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
case LLVMContext::MD_invariant_load:
case LLVMContext::MD_nonnull:
case LLVMContext::MD_range:
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
// These don't apply for stores.
break;
}
@@ -511,16 +523,46 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
if (!T->isAggregateType())
return nullptr;
- assert(LI.getAlignment() && "Alignement must be set at this point");
+ assert(LI.getAlignment() && "Alignment must be set at this point");
if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
- if (ST->getNumElements() == 1) {
+ unsigned Count = ST->getNumElements();
+ if (Count == 1) {
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
".unpack");
return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
UndefValue::get(T), NewLoad, 0, LI.getName()));
}
+
+ // We don't want to break loads with padding here as we'd loose
+ // the knowledge that padding exists for the rest of the pipeline.
+ const DataLayout &DL = IC.getDataLayout();
+ auto *SL = DL.getStructLayout(ST);
+ if (SL->hasPadding())
+ return nullptr;
+
+ auto Name = LI.getName();
+ SmallString<16> LoadName = Name;
+ LoadName += ".unpack";
+ SmallString<16> EltName = Name;
+ EltName += ".elt";
+ auto *Addr = LI.getPointerOperand();
+ Value *V = UndefValue::get(T);
+ auto *IdxType = Type::getInt32Ty(ST->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+ for (unsigned i = 0; i < Count; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName);
+ auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName);
+ V = IC.Builder->CreateInsertValue(V, L, i);
+ }
+
+ V->setName(Name);
+ return IC.ReplaceInstUsesWith(LI, V);
}
if (auto *AT = dyn_cast<ArrayType>(T)) {
@@ -681,7 +723,7 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
// FIXME: If the GEP is not inbounds, and there are extra indices after the
// one we'll replace, those could cause the address computation to wrap
// (rendering the IsAllNonNegative() check below insufficient). We can do
- // better, ignoring zero indicies (and other indicies we can prove small
+ // better, ignoring zero indices (and other indices we can prove small
// enough not to wrap).
if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
return false;
@@ -748,19 +790,19 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
- BasicBlock::iterator BBI = &LI;
+ BasicBlock::iterator BBI(LI);
AAMDNodes AATags;
- if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
- 6, AA, &AATags)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(Op, LI.getParent(), BBI,
+ DefMaxInstsToScan, AA, &AATags)) {
if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull,
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
combineMetadata(NLI, &LI, KnownIDs);
};
@@ -822,7 +864,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
}
// load (select (cond, null, P)) -> load P
- if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
+ if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
LI.getPointerAddressSpace() == 0) {
LI.setOperand(0, SI->getOperand(2));
return &LI;
@@ -857,7 +899,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
///
/// \returns true if the store was successfully combined away. This indicates
/// the caller must erase the store instruction. We have to let the caller erase
-/// the store instruction sas otherwise there is no way to signal whether it was
+/// the store instruction as otherwise there is no way to signal whether it was
/// combined or not: IC.EraseInstFromFunction returns a null pointer.
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and atomic
@@ -893,11 +935,38 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
- if (ST->getNumElements() == 1) {
+ unsigned Count = ST->getNumElements();
+ if (Count == 1) {
V = IC.Builder->CreateExtractValue(V, 0);
combineStoreToNewValue(IC, SI, V);
return true;
}
+
+ // We don't want to break loads with padding here as we'd loose
+ // the knowledge that padding exists for the rest of the pipeline.
+ const DataLayout &DL = IC.getDataLayout();
+ auto *SL = DL.getStructLayout(ST);
+ if (SL->hasPadding())
+ return false;
+
+ SmallString<16> EltName = V->getName();
+ EltName += ".elt";
+ auto *Addr = SI.getPointerOperand();
+ SmallString<16> AddrName = Addr->getName();
+ AddrName += ".repack";
+ auto *IdxType = Type::getInt32Ty(ST->getContext());
+ auto *Zero = ConstantInt::get(IdxType, 0);
+ for (unsigned i = 0; i < Count; i++) {
+ Value *Indices[2] = {
+ Zero,
+ ConstantInt::get(IdxType, i),
+ };
+ auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName);
+ auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+ IC.Builder->CreateStore(Val, Ptr);
+ }
+
+ return true;
}
if (auto *AT = dyn_cast<ArrayType>(T)) {
@@ -971,9 +1040,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return &SI;
}
- // Don't hack volatile/atomic stores.
- // FIXME: Some bits are legal for atomic stores; needs refactoring.
- if (!SI.isSimple()) return nullptr;
+ // Don't hack volatile/ordered stores.
+ // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
+ if (!SI.isUnordered()) return nullptr;
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
@@ -991,7 +1060,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Do really simple DSE, to catch cases where there are several consecutive
// stores to the same location, separated by a few arithmetic operations. This
// situation often occurs with bitfield accesses.
- BasicBlock::iterator BBI = &SI;
+ BasicBlock::iterator BBI(SI);
for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
--ScanInsts) {
--BBI;
@@ -1005,7 +1074,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
- if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+ if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
SI.getOperand(1))) {
++NumDeadStore;
++BBI;
@@ -1019,9 +1088,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
- LI->isSimple())
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
+ assert(SI.isUnordered() && "can't eliminate ordering operation");
return EraseInstFromFunction(SI);
+ }
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
@@ -1047,10 +1117,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (isa<UndefValue>(Val))
return EraseInstFromFunction(SI);
+ // The code below needs to be audited and adjusted for unordered atomics
+ if (!SI.isSimple())
+ return nullptr;
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = SI.getIterator();
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -1106,7 +1180,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
// Verify that the other block ends in a branch and is not otherwise empty.
- BasicBlock::iterator BBI = OtherBB->getTerminator();
+ BasicBlock::iterator BBI(OtherBB->getTerminator());
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index a554e9f..7ad0efc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -22,9 +22,9 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// simplifyValueKnownNonZero - The specific integer value is used in a context
-/// where it is known to be non-zero. If this allows us to simplify the
-/// computation, do so and return the new operand, otherwise return null.
+/// The specific integer value is used in a context where it is known to be
+/// non-zero. If this allows us to simplify the computation, do so and return
+/// the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
@@ -76,8 +76,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
}
-/// MultiplyOverflows - True if the multiply can not be expressed in an int
-/// this size.
+/// True if the multiply can not be expressed in an int this size.
static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
bool IsSigned) {
bool Overflow;
@@ -95,6 +94,14 @@ static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
assert(C1.getBitWidth() == C2.getBitWidth() &&
"Inconsistent width of constants!");
+ // Bail if we will divide by zero.
+ if (C2.isMinValue())
+ return false;
+
+ // Bail if we would divide INT_MIN by -1.
+ if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
+ return false;
+
APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
if (IsSigned)
APInt::sdivrem(C1, C2, Quotient, Remainder);
@@ -705,8 +712,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
-/// instruction.
+/// Try to fold a divide or remainder of a select instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));
@@ -740,7 +746,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
return true;
// Scan the current block backward, looking for other uses of SI.
- BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+ BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin();
while (BBI != BBFront) {
--BBI;
@@ -754,10 +760,10 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
I != E; ++I) {
if (*I == SI) {
*I = SI->getOperand(NonNullOperand);
- Worklist.Add(BBI);
+ Worklist.Add(&*BBI);
} else if (*I == SelectCond) {
*I = Builder->getInt1(NonNullOperand == 1);
- Worklist.Add(BBI);
+ Worklist.Add(&*BBI);
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 460f6eb..f1aa98b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "instcombine"
@@ -245,7 +246,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// non-address-taken alloca. Doing so will cause us to not promote the alloca
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
- BasicBlock::iterator BBI = L, E = L->getParent()->end();
+ BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
@@ -349,24 +350,40 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+ LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
+
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ };
- // Add all operands to the new PHI.
+ for (unsigned ID : KnownIDs)
+ NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
+
+ // Add all operands to the new PHI and combine TBAA metadata.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
- Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+ LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+ combineMetadata(NewLI, LI, KnownIDs);
+ Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
- Value *PhiVal;
if (InVal) {
// The new PHI unions all of the same values together. This is really
// common, so we handle it intelligently here for compile-time speed.
- PhiVal = InVal;
+ NewLI->setOperand(0, InVal);
delete NewPN;
} else {
InsertNewInstBefore(NewPN, PN);
- PhiVal = NewPN;
}
// If this was a volatile load that we are merging, make sure to loop through
@@ -376,17 +393,94 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
for (Value *IncValue : PN.incoming_values())
cast<LoadInst>(IncValue)->setVolatile(false);
- LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
return NewLI;
}
+/// TODO: This function could handle other cast types, but then it might
+/// require special-casing a cast from the 'i1' type. See the comment in
+/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types.
+Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
+ // We cannot create a new instruction after the PHI if the terminator is an
+ // EHPad because there is no valid insertion point.
+ if (TerminatorInst *TI = Phi.getParent()->getTerminator())
+ if (TI->isEHPad())
+ return nullptr;
+
+ // Early exit for the common case of a phi with two operands. These are
+ // handled elsewhere. See the comment below where we check the count of zexts
+ // and constants for more details.
+ unsigned NumIncomingValues = Phi.getNumIncomingValues();
+ if (NumIncomingValues < 3)
+ return nullptr;
+ // Find the narrower type specified by the first zext.
+ Type *NarrowType = nullptr;
+ for (Value *V : Phi.incoming_values()) {
+ if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+ NarrowType = Zext->getSrcTy();
+ break;
+ }
+ }
+ if (!NarrowType)
+ return nullptr;
+
+ // Walk the phi operands checking that we only have zexts or constants that
+ // we can shrink for free. Store the new operands for the new phi.
+ SmallVector<Value *, 4> NewIncoming;
+ unsigned NumZexts = 0;
+ unsigned NumConsts = 0;
+ for (Value *V : Phi.incoming_values()) {
+ if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+ // All zexts must be identical and have one use.
+ if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse())
+ return nullptr;
+ NewIncoming.push_back(Zext->getOperand(0));
+ NumZexts++;
+ } else if (auto *C = dyn_cast<Constant>(V)) {
+ // Make sure that constants can fit in the new type.
+ Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType);
+ if (ConstantExpr::getZExt(Trunc, C->getType()) != C)
+ return nullptr;
+ NewIncoming.push_back(Trunc);
+ NumConsts++;
+ } else {
+ // If it's not a cast or a constant, bail out.
+ return nullptr;
+ }
+ }
+
+ // The more common cases of a phi with no constant operands or just one
+ // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi()
+ // respectively. FoldOpIntoPhi() wants to do the opposite transform that is
+ // performed here. It tries to replicate a cast in the phi operand's basic
+ // block to expose other folding opportunities. Thus, InstCombine will
+ // infinite loop without this check.
+ if (NumConsts == 0 || NumZexts < 2)
+ return nullptr;
+
+ // All incoming values are zexts or constants that are safe to truncate.
+ // Create a new phi node of the narrow type, phi together all of the new
+ // operands, and zext the result back to the original type.
+ PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
+ Phi.getName() + ".shrunk");
+ for (unsigned i = 0; i != NumIncomingValues; ++i)
+ NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+
+ InsertNewInstBefore(NewPhi, Phi);
+ return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
+}
/// If all operands to a PHI node are the same "unary" operator and they all are
/// only used by the PHI, PHI together their inputs, and do the operation once,
/// to the result of the PHI.
Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ // We cannot create a new instruction after the PHI if the terminator is an
+ // EHPad because there is no valid insertion point.
+ if (TerminatorInst *TI = PN.getParent()->getTerminator())
+ if (TI->isEHPad())
+ return nullptr;
+
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
if (isa<GetElementPtrInst>(FirstInst))
@@ -740,7 +834,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
// Otherwise, do an extract in the predecessor.
- Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Builder->SetInsertPoint(Pred->getTerminator());
Value *Res = InVal;
if (Offset)
Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
@@ -787,6 +881,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AC))
return ReplaceInstUsesWith(PN, V);
+ if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
+ return Result;
+
// If all PHI operands are the same operation, pull them through the PHI,
// reducing code size.
if (isa<Instruction>(PN.getIncomingValue(0)) &&
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f51442a..776704d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -38,7 +38,8 @@ getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
}
}
-static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
+static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
+ bool Ordered=false) {
switch (SPF) {
default:
llvm_unreachable("unhandled!");
@@ -51,17 +52,22 @@ static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
return ICmpInst::ICMP_SGT;
case SPF_UMAX:
return ICmpInst::ICMP_UGT;
+ case SPF_FMINNUM:
+ return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
+ case SPF_FMAXNUM:
+ return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
}
}
static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
SelectPatternFlavor SPF, Value *A,
Value *B) {
- CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+ CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
+ assert(CmpInst::isIntPredicate(Pred));
return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
}
-/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// We want to turn code that looks like this:
/// %C = or %A, %B
/// %D = select %cond, %C, %A
/// into:
@@ -90,8 +96,8 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
}
}
-/// GetSelectFoldableConstant - For the same transformation as the previous
-/// function, return the identity constant that goes into the select.
+/// For the same transformation as the previous function, return the identity
+/// constant that goes into the select.
static Constant *GetSelectFoldableConstant(Instruction *I) {
switch (I->getOpcode()) {
default: llvm_unreachable("This cannot happen!");
@@ -110,7 +116,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
}
}
-/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// Here we have (select c, TI, FI), and we know that TI and FI
/// have the same opcode and only one use each. Try to simplify this.
Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI) {
@@ -197,8 +203,8 @@ static bool isSelect01(Constant *C1, Constant *C2) {
C2I->isOne() || C2I->isAllOnesValue();
}
-/// FoldSelectIntoOp - Try fold the select into one of the operands to
-/// facilitate further optimization.
+/// Try to fold the select into one of the operands to allow further
+/// optimization.
Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *FalseVal) {
// See the comment above GetSelectFoldableOperands for a description of the
@@ -276,7 +282,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
}
-/// foldSelectICmpAndOr - We want to turn:
+/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
/// (or (shl (and X, C1), C3), y)
@@ -394,9 +400,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return nullptr;
}
-/// visitSelectInstWithICmp - Visit a SelectInst that has an
-/// ICmpInst as its first operand.
-///
+/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
bool Changed = false;
@@ -595,10 +599,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
-/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
-/// PHI node (but the two may be in different blocks). See if the true/false
-/// values (V) are live in all of the predecessor blocks of the PHI. For
-/// example, cases like this cannot be mapped:
+/// SI is a select whose condition is a PHI node (but the two may be in
+/// different blocks). See if the true/false values (V) are live in all of the
+/// predecessor blocks of the PHI. For example, cases like this can't be mapped:
///
/// X = phi [ C1, BB1], [C2, BB2]
/// Y = add
@@ -632,7 +635,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
return false;
}
-/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// We have an SPF (e.g. a min or max) of an SPF of the form:
/// SPF2(SPF1(A, B), C)
Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
SelectPatternFlavor SPF1,
@@ -745,10 +748,10 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
return nullptr;
}
-/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
-/// both be) and we have an icmp instruction with zero, and we have an 'and'
-/// with the non-constant value and a power of two we can turn the select
-/// into a shift on the result of the 'and'.
+/// If one of the constants is zero (we know they can't both be) and we have an
+/// icmp instruction with zero, and we have an 'and' with the non-constant value
+/// and a power of two we can turn the select into a shift on the result of the
+/// 'and'.
static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
ConstantInt *FalseVal,
InstCombiner::BuilderTy *Builder) {
@@ -926,6 +929,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ Builder->SetFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
FCI->getName() + ".inv");
@@ -967,6 +972,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ Builder->SetFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
FCI->getName() + ".inv");
@@ -1054,35 +1061,50 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into one of our operands.
- if (SI.getType()->isIntOrIntVectorTy()) {
+ if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
Value *LHS, *RHS, *LHS2, *RHS2;
Instruction::CastOps CastOp;
- SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+ SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+ auto SPF = SPR.Flavor;
- if (SPF) {
+ if (SelectPatternResult::isMinOrMax(SPF)) {
// Canonicalize so that type casts are outside select patterns.
if (LHS->getType()->getPrimitiveSizeInBits() !=
SI.getType()->getPrimitiveSizeInBits()) {
- CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
- Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered);
+
+ Value *Cmp;
+ if (CmpInst::isIntPredicate(Pred)) {
+ Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ } else {
+ IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+ auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
+ Builder->SetFastMathFlags(FMF);
+ Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
+ }
+
Value *NewSI = Builder->CreateCast(CastOp,
Builder->CreateSelect(Cmp, LHS, RHS),
SI.getType());
return ReplaceInstUsesWith(SI, NewSI);
}
+ }
+ if (SPF) {
// MAX(MAX(a, b), a) -> MAX(a, b)
// MIN(MIN(a, b), a) -> MIN(a, b)
// MAX(MIN(a, b), a) -> a
// MIN(MAX(a, b), a) -> a
- if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
+ // ABS(ABS(a)) -> ABS(a)
+ // NABS(NABS(a)) -> NABS(a)
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
- if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2))
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
SI, SPF, LHS))
return R;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index d04ed58..0c7defa 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -55,7 +55,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return nullptr;
}
-/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// See if we can compute the specified value, but shifted
/// logically to the left or right by some number of bits. This should return
/// true if the expression can be computed for the same cost as the current
/// expression tree. This is used to eliminate extraneous shifting from things
@@ -184,7 +184,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
}
}
-/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
InstCombiner &IC, const DataLayout &DL) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 80628b2..743d514 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -410,9 +410,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If this is a select as part of a min/max pattern, don't simplify any
// further in case we break the structure.
Value *LHS, *RHS;
- if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+ if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
-
+
if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
@@ -1057,7 +1057,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
for (unsigned i = 0; i < VWidth; i++) {
- if (CV->getAggregateElement(i)->isNullValue())
+ Constant *CElt = CV->getAggregateElement(i);
+ // Method isNullValue always returns false when called on a
+ // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
+ // to avoid propagating incorrect information.
+ if (isa<ConstantExpr>(CElt))
+ continue;
+ if (CElt->isNullValue())
LeftDemanded.clearBit(i);
else
RightDemanded.clearBit(i);
@@ -1082,6 +1088,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (!VTy) break;
unsigned InVWidth = VTy->getNumElements();
APInt InputDemandedElts(InVWidth, 0);
+ UndefElts2 = APInt(InVWidth, 0);
unsigned Ratio;
if (VWidth == InVWidth) {
@@ -1089,29 +1096,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// elements as are demanded of us.
Ratio = 1;
InputDemandedElts = DemandedElts;
- } else if (VWidth > InVWidth) {
- // Untested so far.
- break;
-
- // If there are more elements in the result than there are in the source,
- // then an input element is live if any of the corresponding output
- // elements are live.
- Ratio = VWidth/InVWidth;
- for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ } else if ((VWidth % InVWidth) == 0) {
+ // If the number of elements in the output is a multiple of the number of
+ // elements in the input then an input element is live if any of the
+ // corresponding output elements are live.
+ Ratio = VWidth / InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
if (DemandedElts[OutIdx])
- InputDemandedElts.setBit(OutIdx/Ratio);
- }
- } else {
- // Untested so far.
- break;
-
- // If there are more elements in the source than there are in the result,
- // then an input element is live if the corresponding output element is
- // live.
- Ratio = InVWidth/VWidth;
+ InputDemandedElts.setBit(OutIdx / Ratio);
+ } else if ((InVWidth % VWidth) == 0) {
+ // If the number of elements in the input is a multiple of the number of
+ // elements in the output then an input element is live if the
+ // corresponding output element is live.
+ Ratio = InVWidth / VWidth;
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
- if (DemandedElts[InIdx/Ratio])
+ if (DemandedElts[InIdx / Ratio])
InputDemandedElts.setBit(InIdx);
+ } else {
+ // Unsupported so far.
+ break;
}
// div/rem demand all inputs, because they don't want divide by zero.
@@ -1122,24 +1125,26 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
MadeChange = true;
}
- UndefElts = UndefElts2;
- if (VWidth > InVWidth) {
- llvm_unreachable("Unimp");
- // If there are more elements in the result than there are in the source,
- // then an output element is undef if the corresponding input element is
- // undef.
+ if (VWidth == InVWidth) {
+ UndefElts = UndefElts2;
+ } else if ((VWidth % InVWidth) == 0) {
+ // If the number of elements in the output is a multiple of the number of
+ // elements in the input then an output element is undef if the
+ // corresponding input element is undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
- if (UndefElts2[OutIdx/Ratio])
+ if (UndefElts2[OutIdx / Ratio])
+ UndefElts.setBit(OutIdx);
+ } else if ((InVWidth % VWidth) == 0) {
+ // If the number of elements in the input is a multiple of the number of
+ // elements in the output then an output element is undef if all of the
+ // corresponding input elements are undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
+ if (SubUndef.countPopulation() == Ratio)
UndefElts.setBit(OutIdx);
- } else if (VWidth < InVWidth) {
+ }
+ } else {
llvm_unreachable("Unimp");
- // If there are more elements in the source than there are in the result,
- // then a result element is undef if all of the corresponding input
- // elements are undef.
- UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
- for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
- if (!UndefElts2[InIdx]) // Not undef?
- UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
}
break;
}
@@ -1237,6 +1242,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
break;
+
+ // SSE4A instructions leave the upper 64-bits of the 128-bit result
+ // in an undefined state.
+ case Intrinsic::x86_sse4a_extrq:
+ case Intrinsic::x86_sse4a_extrqi:
+ case Intrinsic::x86_sse4a_insertq:
+ case Intrinsic::x86_sse4a_insertqi:
+ UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2);
+ break;
}
break;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 2730472..e25639a 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -22,10 +22,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation. isConstant indicates whether we're
-/// extracting one known element. If false we're extracting a variable index.
-static bool CheapToScalarize(Value *V, bool isConstant) {
+/// Return true if the value is cheaper to scalarize than it is to leave as a
+/// vector operation. isConstant indicates whether we're extracting one known
+/// element. If false we're extracting a variable index.
+static bool cheapToScalarize(Value *V, bool isConstant) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (isConstant) return true;
@@ -50,13 +50,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
return true;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
if (BO->hasOneUse() &&
- (CheapToScalarize(BO->getOperand(0), isConstant) ||
- CheapToScalarize(BO->getOperand(1), isConstant)))
+ (cheapToScalarize(BO->getOperand(0), isConstant) ||
+ cheapToScalarize(BO->getOperand(1), isConstant)))
return true;
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->hasOneUse() &&
- (CheapToScalarize(CI->getOperand(0), isConstant) ||
- CheapToScalarize(CI->getOperand(1), isConstant)))
+ (cheapToScalarize(CI->getOperand(0), isConstant) ||
+ cheapToScalarize(CI->getOperand(1), isConstant)))
return true;
return false;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
@@ -115,8 +115,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
BasicBlock::iterator InsertPos;
if (pos && !isa<PHINode>(pos)) {
- InsertPos = pos;
- ++InsertPos;
+ InsertPos = ++pos->getIterator();
} else {
InsertPos = inBB->getFirstInsertionPt();
}
@@ -137,7 +136,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
- if (CheapToScalarize(C, false))
+ if (cheapToScalarize(C, false))
return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
// If extracting a specified index from the vector, see if we can recursively
@@ -163,7 +162,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- // If the this extractelement is directly using a bitcast from a vector of
+ // If this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
@@ -184,10 +183,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
// Push extractelement into predecessor operation if legal and
- // profitable to do so
+ // profitable to do so.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
if (I->hasOneUse() &&
- CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
EI.getName()+".lhs");
@@ -230,8 +229,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
SrcIdx, false));
}
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // Canonicalize extractelement(cast) -> cast(extractelement)
- // bitcasts can change the number of vector elements and they cost nothing
+ // Canonicalize extractelement(cast) -> cast(extractelement).
+ // Bitcasts can change the number of vector elements, and they cost
+ // nothing.
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
@@ -245,7 +245,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// fight the vectorizer.
// If we are extracting an element from a vector select or a select on
- // vectors, a select on the scalars extracted from the vector arguments.
+ // vectors, create a select on the scalars extracted from the vector
+ // arguments.
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
@@ -275,10 +276,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
return nullptr;
}
-/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
-/// Otherwise, return false.
-static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+/// If V is a shuffle of values that ONLY returns elements from either LHS or
+/// RHS, return the shuffle mask and true. Otherwise, return false.
+static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(LHS->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
@@ -315,7 +315,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
// We can handle this if the vector we are inserting into is
// transitively ok.
- if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted undef.
Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
return true;
@@ -330,7 +330,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
// We can handle this if the vector we are inserting into is
// transitively ok.
- if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
Mask[InsertedIdx % NumElts] =
@@ -352,6 +352,48 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
return false;
}
+/// If we have insertion into a vector that is wider than the vector that we
+/// are extracting from, try to widen the source vector to allow a single
+/// shufflevector to replace one or more insert/extract pairs.
+static void replaceExtractElements(InsertElementInst *InsElt,
+ ExtractElementInst *ExtElt,
+ InstCombiner &IC) {
+ VectorType *InsVecType = InsElt->getType();
+ VectorType *ExtVecType = ExtElt->getVectorOperandType();
+ unsigned NumInsElts = InsVecType->getVectorNumElements();
+ unsigned NumExtElts = ExtVecType->getVectorNumElements();
+
+ // The inserted-to vector must be wider than the extracted-from vector.
+ if (InsVecType->getElementType() != ExtVecType->getElementType() ||
+ NumExtElts >= NumInsElts)
+ return;
+
+ // Create a shuffle mask to widen the extended-from vector using undefined
+ // values. The mask selects all of the values of the original vector followed
+ // by as many undefined values as needed to create a vector of the same length
+ // as the inserted-to vector.
+ SmallVector<Constant *, 16> ExtendMask;
+ IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
+ for (unsigned i = 0; i < NumExtElts; ++i)
+ ExtendMask.push_back(ConstantInt::get(IntType, i));
+ for (unsigned i = NumExtElts; i < NumInsElts; ++i)
+ ExtendMask.push_back(UndefValue::get(IntType));
+
+ Value *ExtVecOp = ExtElt->getVectorOperand();
+ auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
+ ConstantVector::get(ExtendMask));
+
+ // Replace all extracts from the original narrow vector with extracts from
+ // the new wide vector.
+ WideVec->insertBefore(ExtElt);
+ for (User *U : ExtVecOp->users()) {
+ if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) {
+ auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+ NewExt->insertAfter(WideVec);
+ IC.ReplaceInstUsesWith(*OldExt, NewExt);
+ }
+ }
+}
/// We are building a shuffle to create V, which is a sequence of insertelement,
/// extractelement pairs. If PermittedRHS is set, then we must either use it or
@@ -363,9 +405,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// often been chosen carefully to be efficiently implementable on the target.
typedef std::pair<Value *, Value *> ShuffleOps;
-static ShuffleOps CollectShuffleElements(Value *V,
+static ShuffleOps collectShuffleElements(Value *V,
SmallVectorImpl<Constant *> &Mask,
- Value *PermittedRHS) {
+ Value *PermittedRHS,
+ InstCombiner &IC) {
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
@@ -396,10 +439,14 @@ static ShuffleOps CollectShuffleElements(Value *V,
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Value *RHS = EI->getOperand(0);
- ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
+ ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
assert(LR.second == nullptr || LR.second == RHS);
if (LR.first->getType() != RHS->getType()) {
+ // Although we are giving up for now, see if we can create extracts
+ // that match the inserts for another round of combining.
+ replaceExtractElements(IEI, EI, IC);
+
// We tried our best, but we can't find anything compatible with RHS
// further up the chain. Return a trivial shuffle.
for (unsigned i = 0; i < NumElts; ++i)
@@ -429,14 +476,14 @@ static ShuffleOps CollectShuffleElements(Value *V,
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
- CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+ collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
Mask))
return std::make_pair(EI->getOperand(0), PermittedRHS);
}
}
}
- // Otherwise, can't do anything fancy. Return an identity vector.
+ // Otherwise, we can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return std::make_pair(V, nullptr);
@@ -512,7 +559,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
SmallVector<Constant*, 16> Mask;
- ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr);
+ ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
// The proposed shuffle may be trivial, in which case we shouldn't
// perform the combine.
@@ -588,8 +635,8 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::GetElementPtr: {
- for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+ for (Value *Operand : I->operands()) {
+ if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
return false;
}
return true;
@@ -617,7 +664,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
/// Rebuild a new instruction just like 'I' but with the new operands given.
/// In the event of type mismatch, the type of the operands is correct.
-static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
// We don't want to use the IRBuilder here because we want the replacement
// instructions to appear next to 'I', not the builder's insertion point.
switch (I->getOpcode()) {
@@ -760,7 +807,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
NeedsRebuild |= (V != I->getOperand(i));
}
if (NeedsRebuild) {
- return BuildNew(I, NewOps);
+ return buildNew(I, NewOps);
}
return I;
}
@@ -792,7 +839,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
llvm_unreachable("failed to reorder elements of vector instruction!");
}
-static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
bool &isLHSID, bool &isRHSID) {
isLHSID = isRHSID = true;
@@ -891,7 +938,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID, isRHSID;
- RecognizeIdentityMask(Mask, isLHSID, isRHSID);
+ recognizeIdentityMask(Mask, isLHSID, isRHSID);
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
@@ -1177,7 +1224,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If the result mask is an identity, replace uses of this instruction with
// corresponding argument.
bool isLHSID, isRHSID;
- RecognizeIdentityMask(newMask, isLHSID, isRHSID);
+ recognizeIdentityMask(newMask, isLHSID, isRHSID);
if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index fd34a24..7c46cfd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -42,8 +42,9 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -79,14 +80,12 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(Builder, DL, GEP);
}
-/// ShouldChangeType - Return true if it is desirable to convert a computation
-/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
-/// type for example, or from a smaller to a larger illegal type.
-bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
- assert(From->isIntegerTy() && To->isIntegerTy());
-
- unsigned FromWidth = From->getPrimitiveSizeInBits();
- unsigned ToWidth = To->getPrimitiveSizeInBits();
+/// Return true if it is desirable to convert an integer computation from a
+/// given bit width to a new bit width.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(unsigned FromWidth,
+ unsigned ToWidth) const {
bool FromLegal = DL.isLegalInteger(FromWidth);
bool ToLegal = DL.isLegalInteger(ToWidth);
@@ -103,6 +102,17 @@ bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
return true;
}
+/// Return true if it is desirable to convert a computation from 'From' to 'To'.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
+ assert(From->isIntegerTy() && To->isIntegerTy());
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ return ShouldChangeType(FromWidth, ToWidth);
+}
+
// Return true, if No Signed Wrap should be maintained for I.
// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
// where both B and C should be ConstantInts, results in a constant that does
@@ -156,27 +166,26 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
I.setFastMathFlags(FMF);
}
-/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
-/// operators which are associative or commutative:
-//
-// Commutative operators:
-//
-// 1. Order operands such that they are listed from right (least complex) to
-// left (most complex). This puts constants before unary operators before
-// binary operators.
-//
-// Associative operators:
-//
-// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
-// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
-//
-// Associative and commutative operators:
-//
-// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
-// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
-// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
-// if C1 and C2 are constants.
-//
+/// This performs a few simplifications for operators that are associative or
+/// commutative:
+///
+/// Commutative operators:
+///
+/// 1. Order operands such that they are listed from right (least complex) to
+/// left (most complex). This puts constants before unary operators before
+/// binary operators.
+///
+/// Associative operators:
+///
+/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+///
+/// Associative and commutative operators:
+///
+/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+/// if C1 and C2 are constants.
bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Instruction::BinaryOps Opcode = I.getOpcode();
bool Changed = false;
@@ -322,7 +331,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
} while (1);
}
-/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// Return whether "X LOp (Y ROp Z)" is always equal to
/// "(X LOp Y) ROp (X LOp Z)".
static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
Instruction::BinaryOps ROp) {
@@ -361,7 +370,7 @@ static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
}
}
-/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// Return whether "(X LOp Y) ROp Z" is always equal to
/// "(X ROp Z) LOp (Y ROp Z)".
static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
Instruction::BinaryOps ROp) {
@@ -519,7 +528,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (isa<OverflowingBinaryOperator>(Op1))
HasNSW &= Op1->hasNoSignedWrap();
- // We can propogate 'nsw' if we know that
+ // We can propagate 'nsw' if we know that
// %Y = mul nsw i16 %X, C
// %Z = add nsw i16 %Y, %X
// =>
@@ -537,11 +546,11 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
return SimplifiedInst;
}
-/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
-/// which some other binary operation distributes over either by factorizing
-/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
-/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
-/// a win). Returns the simplified value, or null if it didn't simplify.
+/// This tries to simplify binary operations which some other binary operation
+/// distributes over either by factorizing out common terms
+/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
+/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
+/// Returns the simplified value, or null if it didn't simplify.
Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
@@ -623,12 +632,38 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
}
}
+ // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0))
+ // (op (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (op c, d)))
+ if (auto *SI0 = dyn_cast<SelectInst>(LHS)) {
+ if (auto *SI1 = dyn_cast<SelectInst>(RHS)) {
+ if (SI0->getCondition() == SI1->getCondition()) {
+ Value *SI = nullptr;
+ if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue(), DL, TLI, DT, AC))
+ SI = Builder->CreateSelect(SI0->getCondition(),
+ Builder->CreateBinOp(TopLevelOpcode,
+ SI0->getTrueValue(),
+ SI1->getTrueValue()),
+ V);
+ if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
+ SI1->getTrueValue(), DL, TLI, DT, AC))
+ SI = Builder->CreateSelect(
+ SI0->getCondition(), V,
+ Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
+ SI1->getFalseValue()));
+ if (SI) {
+ SI->takeName(&I);
+ return SI;
+ }
+ }
+ }
+ }
+
return nullptr;
}
-// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
-// if the LHS is a constant zero (which is the 'negate' form).
-//
+/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
+/// constant zero (which is the 'negate' form).
Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (BinaryOperator::isNeg(V))
return BinaryOperator::getNegArgument(V);
@@ -644,10 +679,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
return nullptr;
}
-// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
-// instruction if the LHS is a constant negative zero (which is the 'negate'
-// form).
-//
+/// Given a 'fsub' instruction, return the RHS of the instruction if the LHS is
+/// a constant negative zero (which is the 'negate' form).
Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
return BinaryOperator::getFNegArgument(V);
@@ -700,10 +733,10 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
llvm_unreachable("Unknown binary instruction type!");
}
-// FoldOpIntoSelect - Given an instruction with a select as one operand and a
-// constant as the other operand, try to fold the binary operator into the
-// select arguments. This also works for Cast instructions, which obviously do
-// not have a second operand.
+/// Given an instruction with a select as one operand and a constant as the
+/// other operand, try to fold the binary operator into the select arguments.
+/// This also works for Cast instructions, which obviously do not have a second
+/// operand.
Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Don't modify shared select instructions
if (!SI->hasOneUse()) return nullptr;
@@ -752,10 +785,9 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
}
-/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
-/// has a PHI node as operand #0, see if we can fold the instruction into the
-/// PHI (which is only possible if all operands to the PHI are constants).
-///
+/// Given a binary operator, cast instruction, or select which has a PHI node as
+/// operand #0, see if we can fold the instruction into the PHI (which is only
+/// possible if all operands to the PHI are constants).
Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
@@ -819,7 +851,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
NewPN->takeName(PN);
// If we are going to have to insert a new computation, do so right before the
- // predecessors terminator.
+ // predecessor's terminator.
if (NonConstBB)
Builder->SetInsertPoint(NonConstBB->getTerminator());
@@ -893,10 +925,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return ReplaceInstUsesWith(I, NewPN);
}
-/// FindElementAtOffset - Given a pointer type and a constant offset, determine
-/// whether or not there is a sequence of GEP indices into the pointed type that
-/// will land us at the specified offset. If so, fill them into NewIndices and
-/// return the resultant element type, otherwise return null.
+/// Given a pointer type and a constant offset, determine whether or not there
+/// is a sequence of GEP indices into the pointed type that will land us at the
+/// specified offset. If so, fill them into NewIndices and return the resultant
+/// element type, otherwise return null.
Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
SmallVectorImpl<Value *> &NewIndices) {
Type *Ty = PtrTy->getElementType();
@@ -965,8 +997,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
return true;
}
-/// Descale - Return a value X such that Val = X * Scale, or null if none. If
-/// the multiplication is known not to overflow then NoSignedWrap is set.
+/// Return a value X such that Val = X * Scale, or null if none.
+/// If the multiplication is known not to overflow, then NoSignedWrap is set.
Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
@@ -1008,11 +1040,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// 0'th operand of Val.
std::pair<Instruction*, unsigned> Parent;
- // RequireNoSignedWrap - Set if the transform requires a descaling at deeper
- // levels that doesn't overflow.
+ // Set if the transform requires a descaling at deeper levels that doesn't
+ // overflow.
bool RequireNoSignedWrap = false;
- // logScale - log base 2 of the scale. Negative if not a power of 2.
+ // Log base 2 of the scale. Negative if not a power of 2.
int32_t logScale = Scale.exactLogBase2();
for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
@@ -1213,16 +1245,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
/// specified one but with other operands.
static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
InstCombiner::BuilderTy *B) {
- Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
- if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
- if (isa<OverflowingBinaryOperator>(NewBO)) {
- NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap());
- NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap());
- }
- if (isa<PossiblyExactOperator>(NewBO))
- NewBO->setIsExact(Inst.isExact());
- }
- return BORes;
+ Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+ // If LHS and RHS are constant, BO won't be a binary operator.
+ if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO))
+ NewBO->copyIRFlags(&Inst);
+ return BO;
}
/// \brief Makes transformation of binary operation specific for vector types.
@@ -1256,9 +1283,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
LShuf->getMask() == RShuf->getMask()) {
Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
RShuf->getOperand(0), Builder);
- Value *Res = Builder->CreateShuffleVector(NewBO,
+ return Builder->CreateShuffleVector(NewBO,
UndefValue::get(NewBO->getType()), LShuf->getMask());
- return Res;
}
}
@@ -1294,18 +1320,11 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
}
if (MayChange) {
Constant *C2 = ConstantVector::get(C2M);
- Value *NewLHS, *NewRHS;
- if (isa<Constant>(LHS)) {
- NewLHS = C2;
- NewRHS = Shuffle->getOperand(0);
- } else {
- NewLHS = Shuffle->getOperand(0);
- NewRHS = C2;
- }
+ Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
+ Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
- Value *Res = Builder->CreateShuffleVector(NewBO,
+ return Builder->CreateShuffleVector(NewBO,
UndefValue::get(Inst.getType()), Shuffle->getMask());
- return Res;
}
}
@@ -1323,7 +1342,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
bool MadeChange = false;
- Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+ Type *IntPtrTy =
+ DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
@@ -1333,21 +1353,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!SeqTy)
continue;
+ // Index type should have the same width as IntPtr
+ Type *IndexTy = (*I)->getType();
+ Type *NewIndexType = IndexTy->isVectorTy() ?
+ VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+
// If the element type has zero size then any index over it is equivalent
// to an index of zero, so replace it with zero if it is not zero already.
if (SeqTy->getElementType()->isSized() &&
DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
- *I = Constant::getNullValue(IntPtrTy);
+ *I = Constant::getNullValue(NewIndexType);
MadeChange = true;
}
- Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy) {
+ if (IndexTy != NewIndexType) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ *I = Builder->CreateIntCast(*I, NewIndexType, true);
MadeChange = true;
}
}
@@ -1421,8 +1445,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+ // If not all GEPs are identical we'll have to create a new PHI node.
+ // Check that the old PHI node has only one use so that it will get
+ // removed.
+ if (DI != -1 && !PN->hasOneUse())
+ return nullptr;
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
if (DI == -1) {
// All the GEPs feeding the PHI are identical. Clone one down into our
// BB so that it can be merged with the current GEP.
@@ -1432,11 +1461,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
// into the current block so it can be merged, and create a new PHI to
// set that index.
- Instruction *InsertPt = Builder->GetInsertPoint();
- Builder->SetInsertPoint(PN);
- PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
- PN->getNumOperands());
- Builder->SetInsertPoint(InsertPt);
+ PHINode *NewPN;
+ {
+ IRBuilderBase::InsertPointGuard Guard(*Builder);
+ Builder->SetInsertPoint(PN);
+ NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+ PN->getNumOperands());
+ }
for (auto &I : PN->operands())
NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
@@ -1790,7 +1821,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
I->takeName(BCI);
- BCI->getParent()->getInstList().insert(BCI, I);
+ BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
ReplaceInstUsesWith(*BCI, I);
}
return &GEP;
@@ -1931,7 +1962,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
// Replace invoke with a NOP intrinsic to maintain the original CFG
- Module *M = II->getParent()->getParent()->getParent();
+ Module *M = II->getModule();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
None, "", II->getParent());
@@ -2280,9 +2311,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
}
if (LoadInst *L = dyn_cast<LoadInst>(Agg))
// If the (non-volatile) load only has one use, we can rewrite this to a
- // load from a GEP. This reduces the size of the load.
- // FIXME: If a load is used only by extractvalue instructions then this
- // could be done regardless of having multiple uses.
+ // load from a GEP. This reduces the size of the load. If a load is used
+ // only by extractvalue instructions then this either must have been
+ // optimized before, or it is a struct with padding, in which case we
+ // don't want to do the transformation as it loses padding knowledge.
if (L->isSimple() && L->hasOneUse()) {
// extractvalue has integer indices, getelementptr has Value*s. Convert.
SmallVector<Value*, 4> Indices;
@@ -2294,7 +2326,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
- Builder->SetInsertPoint(L->getParent(), L);
+ Builder->SetInsertPoint(L);
Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
L->getPointerOperand(), Indices);
// Returning the load directly will cause the main loop to insert it in
@@ -2312,7 +2344,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return nullptr;
}
-/// isCatchAll - Return 'true' if the given typeinfo will match anything.
+/// Return 'true' if the given typeinfo will match anything.
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
switch (Personality) {
case EHPersonality::GNU_C:
@@ -2330,6 +2362,7 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
case EHPersonality::MSVC_X86SEH:
case EHPersonality::MSVC_Win64SEH:
case EHPersonality::MSVC_CXX:
+ case EHPersonality::CoreCLR:
return TypeInfo->isNullValue();
}
llvm_unreachable("invalid enum");
@@ -2441,10 +2474,24 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
SawCatchAll = true;
break;
}
- if (AlreadyCaught.count(TypeInfo))
- // Already caught by an earlier clause, so having it in the filter
- // is pointless.
- continue;
+
+ // Even if we've seen a type in a catch clause, we don't want to
+ // remove it from the filter. An unexpected type handler may be
+ // set up for a call site which throws an exception of the same
+ // type caught. In order for the exception thrown by the unexpected
+ // handler to propogate correctly, the filter must be correctly
+ // described for the call site.
+ //
+ // Example:
+ //
+ // void unexpected() { throw 1;}
+ // void foo() throw (int) {
+ // std::set_unexpected(unexpected);
+ // try {
+ // throw 2.0;
+ // } catch (int i) {}
+ // }
+
// There is no point in having multiple copies of the same typeinfo in
// a filter, so only add it if we didn't already.
if (SeenInFilter.insert(TypeInfo).second)
@@ -2637,15 +2684,15 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return nullptr;
}
-/// TryToSinkInstruction - Try to move the specified instruction from its
-/// current block into the beginning of DestBlock, which can only happen if it's
-/// safe to move the instruction past all of the instructions between it and the
-/// end of its block.
+/// Try to move the specified instruction from its current block into the
+/// beginning of DestBlock, which can only happen if it's safe to move the
+/// instruction past all of the instructions between it and the end of its
+/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
assert(I->hasOneUse() && "Invariants didn't hold!");
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() ||
+ if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
isa<TerminatorInst>(I))
return false;
@@ -2654,17 +2701,24 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
&DestBlock->getParent()->getEntryBlock())
return false;
+ // Do not sink convergent call instructions.
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ if (CI->isConvergent())
+ return false;
+ }
+
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
if (I->mayReadFromMemory()) {
- for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+ for (BasicBlock::iterator Scan = I->getIterator(),
+ E = I->getParent()->end();
Scan != E; ++Scan)
if (Scan->mayWriteToMemory())
return false;
}
BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
- I->moveBefore(InsertPos);
+ I->moveBefore(&*InsertPos);
++NumSunkInst;
return true;
}
@@ -2698,6 +2752,27 @@ bool InstCombiner::run() {
}
}
+ // In general, it is possible for computeKnownBits to determine all bits in a
+ // value even when the operands are not all constants.
+ if (!I->use_empty() && I->getType()->isIntegerTy()) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
+ if ((KnownZero | KnownOne).isAllOnesValue()) {
+ Constant *C = ConstantInt::get(I->getContext(), KnownOne);
+ DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
+ " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ ReplaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ EraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+ }
+
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
BasicBlock *BB = I->getParent();
@@ -2738,7 +2813,7 @@ bool InstCombiner::run() {
}
// Now that we have an instruction, try combining it to simplify it.
- Builder->SetInsertPoint(I->getParent(), I);
+ Builder->SetInsertPoint(I);
Builder->SetCurrentDebugLocation(I->getDebugLoc());
#ifndef NDEBUG
@@ -2768,7 +2843,7 @@ bool InstCombiner::run() {
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
- BasicBlock::iterator InsertPos = I;
+ BasicBlock::iterator InsertPos = I->getIterator();
// If we replace a PHI with something that isn't a PHI, fix up the
// insertion point.
@@ -2801,8 +2876,8 @@ bool InstCombiner::run() {
return MadeIRChange;
}
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
+/// Walk the function in depth-first order, adding all reachable code to the
+/// worklist.
///
/// This has a couple of tricks to make the code faster and more powerful. In
/// particular, we constant fold and DCE instructions as we go, to avoid adding
@@ -2829,7 +2904,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
continue;
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = BBI++;
+ Instruction *Inst = &*BBI++;
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst, TLI)) {
@@ -2900,8 +2975,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
}
}
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- Worklist.push_back(TI->getSuccessor(i));
+ for (BasicBlock *SuccBB : TI->successors())
+ Worklist.push_back(SuccBB);
} while (!Worklist.empty());
// Once we've found all of the instructions to add to instcombine's worklist,
@@ -2909,8 +2984,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
- ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
- InstrsForInstCombineWorklist.size());
+ ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
return MadeIRChange;
}
@@ -2930,13 +3004,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// track of which blocks we visit.
SmallPtrSet<BasicBlock *, 64> Visited;
MadeIRChange |=
- AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
+ AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
// the instcombine code from having to deal with some bad special cases.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (Visited.count(BB))
+ if (Visited.count(&*BB))
continue;
// Delete the instructions backwards, as it has a reduced likelihood of
@@ -2944,11 +3018,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
while (EndInst != BB->begin()) {
// Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
- if (!Inst->use_empty())
+ Instruction *Inst = &*--EndInst->getIterator();
+ if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
+ if (Inst->isEHPad()) {
EndInst = Inst;
continue;
}
@@ -2956,7 +3029,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
++NumDeadInst;
MadeIRChange = true;
}
- Inst->eraseFromParent();
+ if (!Inst->getType()->isTokenTy())
+ Inst->eraseFromParent();
}
}
@@ -2968,8 +3042,6 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
AliasAnalysis *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, DominatorTree &DT,
LoopInfo *LI = nullptr) {
- // Minimizing size?
- bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
@@ -2992,7 +3064,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
Changed = true;
- InstCombiner IC(Worklist, &Builder, MinimizeSize,
+ InstCombiner IC(Worklist, &Builder, F.optForMinSize(),
AA, &AC, &TLI, &DT, DL, LI);
if (IC.run())
Changed = true;
@@ -3046,11 +3118,12 @@ public:
void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3058,7 +3131,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
return false;
// Required analyses.
- auto AA = &getAnalysis<AliasAnalysis>();
+ auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -3076,7 +3149,8 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index e7ef9f9..a9df5e5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -90,7 +91,9 @@ static const char *const kAsanUnregisterGlobalsName =
"__asan_unregister_globals";
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *const kAsanInitName = "__asan_init_v5";
+static const char *const kAsanInitName = "__asan_init";
+static const char *const kAsanVersionCheckName =
+ "__asan_version_mismatch_check_v6";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
@@ -119,6 +122,10 @@ static const unsigned kAllocaRzSize = 32;
static cl::opt<bool> ClEnableKasan(
"asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClRecover(
+ "asan-recover",
+ cl::desc("Enable recovery mode (continue-after-error)."),
+ cl::Hidden, cl::init(false));
// This flag may need to be replaced with -f[no-]asan-reads.
static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
@@ -177,7 +184,7 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
cl::init("__asan_"));
static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
cl::desc("instrument dynamic allocas"),
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClSkipPromotableAllocas(
"asan-skip-promotable-allocas",
cl::desc("Do not instrument promotable allocas"), cl::Hidden,
@@ -273,6 +280,11 @@ class GlobalsMetadata {
GlobalsMetadata() : inited_(false) {}
+ void reset() {
+ inited_ = false;
+ Entries.clear();
+ }
+
void init(Module &M) {
assert(!inited_);
inited_ = true;
@@ -321,7 +333,7 @@ struct ShadowMapping {
static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsKasan) {
- bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
+ bool IsAndroid = TargetTriple.isAndroid();
bool IsIOS = TargetTriple.isiOS();
bool IsFreeBSD = TargetTriple.isOSFreeBSD();
bool IsLinux = TargetTriple.isOSLinux();
@@ -338,6 +350,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
ShadowMapping Mapping;
if (LongSize == 32) {
+ // Android is always PIE, which means that the beginning of the address
+ // space is always available.
if (IsAndroid)
Mapping.Offset = 0;
else if (IsMIPS32)
@@ -376,7 +390,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
// OR-ing shadow offset if more efficient (at least on x86) if the offset
// is a power of two, but on ppc64 we have to use add since the shadow
// offset is not necessary 1/8-th of the address space.
- Mapping.OrShadowOffset = !IsPPC64 && !(Mapping.Offset & (Mapping.Offset - 1));
+ Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64
+ && !(Mapping.Offset & (Mapping.Offset - 1));
return Mapping;
}
@@ -389,8 +404,9 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- explicit AddressSanitizer(bool CompileKernel = false)
- : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan) {
+ explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan),
+ Recover(Recover || ClRecover) {
initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
}
const char *getPassName() const override {
@@ -437,7 +453,9 @@ struct AddressSanitizer : public FunctionPass {
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F) override;
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+ void markEscapedLocalAllocas(Function &F);
bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
static char ID; // Pass identification, replacement for typeid
DominatorTree &getDominatorTree() const { return *DT; }
@@ -450,10 +468,21 @@ struct AddressSanitizer : public FunctionPass {
bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
uint64_t TypeSize) const;
+ /// Helper to cleanup per-function state.
+ struct FunctionStateRAII {
+ AddressSanitizer *Pass;
+ FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {
+ assert(Pass->ProcessedAllocas.empty() &&
+ "last pass forgot to clear cache");
+ }
+ ~FunctionStateRAII() { Pass->ProcessedAllocas.clear(); }
+ };
+
LLVMContext *C;
Triple TargetTriple;
int LongSize;
bool CompileKernel;
+ bool Recover;
Type *IntptrTy;
ShadowMapping Mapping;
DominatorTree *DT;
@@ -477,8 +506,10 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- explicit AddressSanitizerModule(bool CompileKernel = false)
- : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan) {}
+ explicit AddressSanitizerModule(bool CompileKernel = false,
+ bool Recover = false)
+ : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan),
+ Recover(Recover || ClRecover) {}
bool runOnModule(Module &M) override;
static char ID; // Pass identification, replacement for typeid
const char *getPassName() const override { return "AddressSanitizerModule"; }
@@ -496,6 +527,7 @@ class AddressSanitizerModule : public ModulePass {
GlobalsMetadata GlobalsMD;
bool CompileKernel;
+ bool Recover;
Type *IntptrTy;
LLVMContext *C;
Triple TargetTriple;
@@ -525,6 +557,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
ShadowMapping Mapping;
SmallVector<AllocaInst *, 16> AllocaVec;
+ SmallSetVector<AllocaInst *, 16> NonInstrumentedStaticAllocaVec;
SmallVector<Instruction *, 8> RetVec;
unsigned StackAlignment;
@@ -545,12 +578,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
SmallVector<AllocaInst *, 1> DynamicAllocaVec;
SmallVector<IntrinsicInst *, 1> StackRestoreVec;
AllocaInst *DynamicAllocaLayout = nullptr;
+ IntrinsicInst *LocalEscapeCall = nullptr;
// Maps Value to an AllocaInst from which the Value is originated.
typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
AllocaForValueMapTy AllocaForValue;
- bool HasNonEmptyInlineAsm;
+ bool HasNonEmptyInlineAsm = false;
+ bool HasReturnsTwiceCall = false;
std::unique_ptr<CallInst> EmptyInlineAsm;
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
@@ -562,7 +597,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
IntptrPtrTy(PointerType::get(IntptrTy, 0)),
Mapping(ASan.Mapping),
StackAlignment(1 << Mapping.Scale),
- HasNonEmptyInlineAsm(false),
EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
bool runOnFunction() {
@@ -596,9 +630,24 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
Value *SavedStack) {
IRBuilder<> IRB(InstBefore);
+ Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy);
+ // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we
+ // need to adjust extracted SP to compute the address of the most recent
+ // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for
+ // this purpose.
+ if (!isa<ReturnInst>(InstBefore)) {
+ Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration(
+ InstBefore->getModule(), Intrinsic::get_dynamic_area_offset,
+ {IntptrTy});
+
+ Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {});
+
+ DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy),
+ DynamicAreaOffset);
+ }
+
IRB.CreateCall(AsanAllocasUnpoisonFunc,
- {IRB.CreateLoad(DynamicAllocaLayout),
- IRB.CreatePtrToInt(SavedStack, IntptrTy)});
+ {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr});
}
// Unpoison dynamic allocas redzones.
@@ -625,7 +674,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// \brief Collect Alloca instructions we want (and can) handle.
void visitAllocaInst(AllocaInst &AI) {
- if (!ASan.isInterestingAlloca(AI)) return;
+ if (!ASan.isInterestingAlloca(AI)) {
+ if (AI.isStaticAlloca()) NonInstrumentedStaticAllocaVec.insert(&AI);
+ return;
+ }
StackAlignment = std::max(StackAlignment, AI.getAlignment());
if (ASan.isDynamicAlloca(AI))
@@ -639,6 +691,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void visitIntrinsicInst(IntrinsicInst &II) {
Intrinsic::ID ID = II.getIntrinsicID();
if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II);
+ if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
if (!ClCheckLifetime) return;
if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
return;
@@ -660,9 +713,13 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AllocaPoisonCallVec.push_back(APC);
}
- void visitCallInst(CallInst &CI) {
- HasNonEmptyInlineAsm |=
- CI.isInlineAsm() && !CI.isIdenticalTo(EmptyInlineAsm.get());
+ void visitCallSite(CallSite CS) {
+ Instruction *I = CS.getInstruction();
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ HasNonEmptyInlineAsm |=
+ CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get());
+ HasReturnsTwiceCall |= CI->canReturnTwice();
+ }
}
// ---------------------- Helpers.
@@ -689,7 +746,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Instruction *ThenTerm, Value *ValueIfFalse);
};
-} // namespace
+} // anonymous namespace
char AddressSanitizer::ID = 0;
INITIALIZE_PASS_BEGIN(
@@ -697,12 +754,15 @@ INITIALIZE_PASS_BEGIN(
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
AddressSanitizer, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
-FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel) {
- return new AddressSanitizer(CompileKernel);
+FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
+ bool Recover) {
+ assert(!CompileKernel || Recover);
+ return new AddressSanitizer(CompileKernel, Recover);
}
char AddressSanitizerModule::ID = 0;
@@ -711,8 +771,10 @@ INITIALIZE_PASS(
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass",
false, false)
-ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel) {
- return new AddressSanitizerModule(CompileKernel);
+ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
+ bool Recover) {
+ assert(!CompileKernel || Recover);
+ return new AddressSanitizerModule(CompileKernel, Recover);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -799,8 +861,10 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
getAllocaSizeInBytes(&AI) > 0 &&
// We are only interested in allocas not promotable to registers.
// Promotable allocas are common under -O0.
- (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) ||
- isDynamicAlloca(AI)));
+ (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
+ // inalloca allocas are not treated as static, and we don't want
+ // dynamic alloca instrumentation for them as well.
+ !AI.isUsedWithInAlloca());
ProcessedAllocas[&AI] = IsInteresting;
return IsInteresting;
@@ -868,10 +932,8 @@ static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
} else {
return false;
}
- if (!isPointerOperand(I->getOperand(0)) ||
- !isPointerOperand(I->getOperand(1)))
- return false;
- return true;
+ return isPointerOperand(I->getOperand(0)) &&
+ isPointerOperand(I->getOperand(1));
}
bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
@@ -919,7 +981,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
- if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+ if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
NumOptimizedAccessesToGlobalVar++;
return;
@@ -1041,13 +1103,17 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
- BasicBlock *CrashBlock =
+ if (Recover) {
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false);
+ } else {
+ BasicBlock *CrashBlock =
BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
- CrashTerm = new UnreachableInst(*C, CrashBlock);
- BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
- ReplaceInstWithInst(CheckTerm, NewTerm);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
+ BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+ ReplaceInstWithInst(CheckTerm, NewTerm);
+ }
} else {
- CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover);
}
Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
@@ -1084,7 +1150,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
- IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt());
+ IRBuilder<> IRB(&GlobalInit.front(),
+ GlobalInit.front().getFirstInsertionPt());
// Add a call to poison all external globals before the given function starts.
Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
@@ -1147,6 +1214,14 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// Do not instrument globals from special LLVM sections.
if (Section.find("__llvm") != StringRef::npos) return false;
+ // Do not instrument function pointers to initialization and termination
+ // routines: dynamic linker will not properly handle redzones.
+ if (Section.startswith(".preinit_array") ||
+ Section.startswith(".init_array") ||
+ Section.startswith(".fini_array")) {
+ return false;
+ }
+
// Callbacks put into the CRT initializer/terminator sections
// should not be instrumented.
// See https://code.google.com/p/address-sanitizer/issues/detail?id=305
@@ -1162,10 +1237,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
bool TAAParsed;
std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
- if (!ErrorCode.empty()) {
- assert(false && "Invalid section specifier.");
- return false;
- }
+ assert(ErrorCode.empty() && "Invalid section specifier.");
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
@@ -1383,13 +1455,11 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
const std::string TypeStr = AccessIsWrite ? "store" : "load";
const std::string ExpStr = Exp ? "exp_" : "";
const std::string SuffixStr = CompileKernel ? "N" : "_n";
- const std::string EndingStr = CompileKernel ? "_noabort" : "";
- const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
- // TODO(glider): for KASan builds add _noabort to error reporting
- // functions and make them actually noabort (remove the UnreachableInst).
+ const std::string EndingStr = Recover ? "_noabort" : "";
+ Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
AsanErrorCallbackSized[AccessIsWrite][Exp] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr,
+ kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr,
IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
@@ -1400,7 +1470,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + Suffix,
+ kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
@@ -1448,15 +1518,20 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (!CompileKernel) {
std::tie(AsanCtorFunction, AsanInitFunction) =
- createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{});
+ createSanitizerCtorAndInitFunctions(
+ M, kAsanModuleCtorName, kAsanInitName,
+ /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName);
appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
}
Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
return true;
}
+bool AddressSanitizer::doFinalization(Module &M) {
+ GlobalsMD.reset();
+ return false;
+}
+
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// For each NSObject descendant having a +load method, this method is invoked
// by the ObjC runtime before any of the static constructors is called.
@@ -1466,13 +1541,41 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// We cannot just ignore these methods, because they may call other
// instrumented functions.
if (F.getName().find(" load]") != std::string::npos) {
- IRBuilder<> IRB(F.begin()->begin());
+ IRBuilder<> IRB(&F.front(), F.front().begin());
IRB.CreateCall(AsanInitFunction, {});
return true;
}
return false;
}
+void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
+ // Find the one possible call to llvm.localescape and pre-mark allocas passed
+ // to it as uninteresting. This assumes we haven't started processing allocas
+ // yet. This check is done up front because iterating the use list in
+ // isInterestingAlloca would be algorithmically slower.
+ assert(ProcessedAllocas.empty() && "must process localescape before allocas");
+
+ // Try to get the declaration of llvm.localescape. If it's not in the module,
+ // we can exit early.
+ if (!F.getParent()->getFunction("llvm.localescape")) return;
+
+ // Look for a call to llvm.localescape call in the entry block. It can't be in
+ // any other block.
+ for (Instruction &I : F.getEntryBlock()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::localescape) {
+ // We found a call. Mark all the allocas passed in as uninteresting.
+ for (Value *Arg : II->arg_operands()) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+ assert(AI && AI->isStaticAlloca() &&
+ "non-static alloca arg to localescape");
+ ProcessedAllocas[AI] = false;
+ }
+ break;
+ }
+ }
+}
+
bool AddressSanitizer::runOnFunction(Function &F) {
if (&F == AsanCtorFunction) return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
@@ -1488,6 +1591,12 @@ bool AddressSanitizer::runOnFunction(Function &F) {
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
+ FunctionStateRAII CleanupObj(this);
+
+ // We can't instrument allocas used with llvm.localescape. Only static allocas
+ // can be passed to that intrinsic.
+ markEscapedLocalAllocas(F);
+
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
SmallSet<Value *, 16> TempsToInstrument;
@@ -1715,6 +1824,16 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
void FunctionStackPoisoner::poisonStack() {
assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
+ // Insert poison calls for lifetime intrinsics for alloca.
+ bool HavePoisonedAllocas = false;
+ for (const auto &APC : AllocaPoisonCallVec) {
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ IRBuilder<> IRB(APC.InsBefore);
+ poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
+ HavePoisonedAllocas |= APC.DoPoison;
+ }
+
if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) {
// Handle dynamic allocas.
createDynamicAllocasInitStorage();
@@ -1723,7 +1842,7 @@ void FunctionStackPoisoner::poisonStack() {
unpoisonDynamicAllocas();
}
- if (AllocaVec.size() == 0) return;
+ if (AllocaVec.empty()) return;
int StackMallocIdx = -1;
DebugLoc EntryDebugLocation;
@@ -1734,6 +1853,19 @@ void FunctionStackPoisoner::poisonStack() {
IRBuilder<> IRB(InsBefore);
IRB.SetCurrentDebugLocation(EntryDebugLocation);
+ // Make sure non-instrumented allocas stay in the entry block. Otherwise,
+ // debug info is broken, because only entry-block allocas are treated as
+ // regular stack slots.
+ auto InsBeforeB = InsBefore->getParent();
+ assert(InsBeforeB == &F.getEntryBlock());
+ for (BasicBlock::iterator I(InsBefore); I != InsBeforeB->end(); ++I)
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ if (NonInstrumentedStaticAllocaVec.count(AI) > 0)
+ AI->moveBefore(InsBefore);
+
+ // If we have a call to llvm.localescape, keep it in the entry block.
+ if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore);
+
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
for (AllocaInst *AI : AllocaVec) {
@@ -1751,10 +1883,15 @@ void FunctionStackPoisoner::poisonStack() {
uint64_t LocalStackSize = L.FrameSize;
bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
LocalStackSize <= kMaxStackMallocSize;
- // Don't do dynamic alloca or stack malloc in presence of inline asm:
- // too often it makes assumptions on which registers are available.
- bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
- DoStackMalloc &= !HasNonEmptyInlineAsm;
+ bool DoDynamicAlloca = ClDynamicAllocaStack;
+ // Don't do dynamic alloca or stack malloc if:
+ // 1) There is inline asm: too often it makes assumptions on which registers
+ // are available.
+ // 2) There is a returns_twice call (typically setjmp), which is
+ // optimization-hostile, and doesn't play well with introduced indirect
+ // register-relative calculation of local variable addresses.
+ DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
+ DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
Value *StaticAlloca =
DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
@@ -1804,16 +1941,6 @@ void FunctionStackPoisoner::poisonStack() {
DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
}
- // Insert poison calls for lifetime intrinsics for alloca.
- bool HavePoisonedAllocas = false;
- for (const auto &APC : AllocaPoisonCallVec) {
- assert(APC.InsBefore);
- assert(APC.AI);
- IRBuilder<> IRB(APC.InsBefore);
- poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
- HavePoisonedAllocas |= APC.DoPoison;
- }
-
// Replace Alloca instructions with base+offset.
for (const auto &Desc : SVD) {
AllocaInst *AI = Desc.AI;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index f685803..fd3dfd9 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -106,7 +106,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
}
++ChecksAdded;
- Instruction *Inst = Builder->GetInsertPoint();
+ BasicBlock::iterator Inst = Builder->GetInsertPoint();
BasicBlock *OldBB = Inst->getParent();
BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
OldBB->getTerminator()->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
new file mode 100644
index 0000000..c47fdbf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -0,0 +1,217 @@
+//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Union-find algorithm to compute Minimum Spanning Tree
+// for a given CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+#define DEBUG_TYPE "cfgmst"
+
+/// \brief An union-find based Minimum Spanning Tree for CFG
+///
+/// Implements a Union-find algorithm to compute Minimum Spanning Tree
+/// for a given CFG.
+template <class Edge, class BBInfo> class CFGMST {
+public:
+ Function &F;
+
+ // Store all the edges in CFG. It may contain some stale edges
+ // when Removed is set.
+ std::vector<std::unique_ptr<Edge>> AllEdges;
+
+ // This map records the auxiliary information for each BB.
+ DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos;
+
+ // Find the root group of the G and compress the path from G to the root.
+ BBInfo *findAndCompressGroup(BBInfo *G) {
+ if (G->Group != G)
+ G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group));
+ return static_cast<BBInfo *>(G->Group);
+ }
+
+ // Union BB1 and BB2 into the same group and return true.
+ // Returns false if BB1 and BB2 are already in the same group.
+ bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1));
+ BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2));
+
+ if (BB1G == BB2G)
+ return false;
+
+ // Make the smaller rank tree a direct child or the root of high rank tree.
+ if (BB1G->Rank < BB2G->Rank)
+ BB1G->Group = BB2G;
+ else {
+ BB2G->Group = BB1G;
+ // If the ranks are the same, increment root of one tree by one.
+ if (BB1G->Rank == BB2G->Rank)
+ BB1G->Rank++;
+ }
+ return true;
+ }
+
+ // Give BB, return the auxiliary information.
+ BBInfo &getBBInfo(const BasicBlock *BB) const {
+ auto It = BBInfos.find(BB);
+ assert(It->second.get() != nullptr);
+ return *It->second.get();
+ }
+
+ // Traverse the CFG using a stack. Find all the edges and assign the weight.
+ // Edges with large weight will be put into MST first so they are less likely
+ // to be instrumented.
+ void buildEdges() {
+ DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
+
+ const BasicBlock *BB = &(F.getEntryBlock());
+ uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
+ // Add a fake edge to the entry.
+ addEdge(nullptr, BB, EntryWeight);
+
+ // Special handling for single BB functions.
+ if (succ_empty(BB)) {
+ addEdge(BB, nullptr, EntryWeight);
+ return;
+ }
+
+ static const uint32_t CriticalEdgeMultiplier = 1000;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ uint64_t BBWeight =
+ (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
+ uint64_t Weight = 2;
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ BasicBlock *TargetBB = TI->getSuccessor(i);
+ bool Critical = isCriticalEdge(TI, i);
+ uint64_t scaleFactor = BBWeight;
+ if (Critical) {
+ if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+ scaleFactor *= CriticalEdgeMultiplier;
+ else
+ scaleFactor = UINT64_MAX;
+ }
+ if (BPI != nullptr)
+ Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor);
+ addEdge(&*BB, TargetBB, Weight).IsCritical = Critical;
+ DEBUG(dbgs() << " Edge: from " << BB->getName() << " to "
+ << TargetBB->getName() << " w=" << Weight << "\n");
+ }
+ } else {
+ addEdge(&*BB, nullptr, BBWeight);
+ DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit"
+ << " w = " << BBWeight << "\n");
+ }
+ }
+ }
+
+ // Sort CFG edges based on its weight.
+ void sortEdgesByWeight() {
+ std::stable_sort(AllEdges.begin(), AllEdges.end(),
+ [](const std::unique_ptr<Edge> &Edge1,
+ const std::unique_ptr<Edge> &Edge2) {
+ return Edge1->Weight > Edge2->Weight;
+ });
+ }
+
+ // Traverse all the edges and compute the Minimum Weight Spanning Tree
+ // using union-find algorithm.
+ void computeMinimumSpanningTree() {
+ // First, put all the critical edge with landing-pad as the Dest to MST.
+ // This works around the insufficient support of critical edges split
+ // when destination BB is a landing pad.
+ for (auto &Ei : AllEdges) {
+ if (Ei->Removed)
+ continue;
+ if (Ei->IsCritical) {
+ if (Ei->DestBB && Ei->DestBB->isLandingPad()) {
+ if (unionGroups(Ei->SrcBB, Ei->DestBB))
+ Ei->InMST = true;
+ }
+ }
+ }
+
+ for (auto &Ei : AllEdges) {
+ if (Ei->Removed)
+ continue;
+ if (unionGroups(Ei->SrcBB, Ei->DestBB))
+ Ei->InMST = true;
+ }
+ }
+
+ // Dump the Debug information about the instrumentation.
+ void dumpEdges(raw_ostream &OS, const Twine &Message) const {
+ if (!Message.str().empty())
+ OS << Message << "\n";
+ OS << " Number of Basic Blocks: " << BBInfos.size() << "\n";
+ for (auto &BI : BBInfos) {
+ const BasicBlock *BB = BI.first;
+ OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " "
+ << BI.second->infoString() << "\n";
+ }
+
+ OS << " Number of Edges: " << AllEdges.size()
+ << " (*: Instrument, C: CriticalEdge, -: Removed)\n";
+ uint32_t Count = 0;
+ for (auto &EI : AllEdges)
+ OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->"
+ << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n";
+ }
+
+ // Add an edge to AllEdges with weight W.
+ Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
+ uint32_t Index = BBInfos.size();
+ auto Iter = BBInfos.end();
+ bool Inserted;
+ std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
+ if (Inserted) {
+ // Newly inserted, update the real info.
+ Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+ Index++;
+ }
+ std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
+ if (Inserted)
+ // Newly inserted, update the real info.
+ Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+ AllEdges.emplace_back(new Edge(Src, Dest, W));
+ return *AllEdges.back();
+ }
+
+ BranchProbabilityInfo *BPI;
+ BlockFrequencyInfo *BFI;
+
+public:
+ CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr,
+ BlockFrequencyInfo *BFI_ = nullptr)
+ : F(Func), BPI(BPI_), BFI(BFI_) {
+ buildEdges();
+ sortEdgesByWeight();
+ computeMinimumSpanningTree();
+ }
+};
+
+#undef DEBUG_TYPE // "cfgmst"
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 2de6e1a..d459fc5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -72,6 +72,11 @@
using namespace llvm;
+// External symbol to be used when generating the shadow address for
+// architectures with multiple VMAs. Instead of using a constant integer
+// the runtime will set the external mask based on the VMA range.
+static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
+
// The -dfsan-preserve-alignment flag controls whether this pass assumes that
// alignment requirements provided by the input IR are correct. For example,
// if the input IR contains a load with alignment 8, this flag will cause
@@ -124,6 +129,7 @@ static cl::opt<bool> ClDebugNonzeroLabels(
"load or return with a nonzero label"),
cl::Hidden);
+
namespace {
StringRef GetGlobalTypeString(const GlobalValue &G) {
@@ -231,6 +237,7 @@ class DataFlowSanitizer : public ModulePass {
void *(*GetRetvalTLSPtr)();
Constant *GetArgTLS;
Constant *GetRetvalTLS;
+ Constant *ExternalShadowMask;
FunctionType *DFSanUnionFnTy;
FunctionType *DFSanUnionLoadFnTy;
FunctionType *DFSanUnimplementedFnTy;
@@ -248,7 +255,7 @@ class DataFlowSanitizer : public ModulePass {
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
- DenseMap<const Function *, DISubprogram *> FunctionDIs;
+ bool DFSanRuntimeShadowMask;
Value *getShadowAddress(Value *Addr, Instruction *Pos);
bool isInstrumented(const Function *F);
@@ -362,7 +369,8 @@ llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
DataFlowSanitizer::DataFlowSanitizer(
const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
void *(*getRetValTLS)())
- : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+ DFSanRuntimeShadowMask(false) {
std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
ClABIListFiles.end());
@@ -420,6 +428,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el;
+ bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 ||
+ TargetTriple.getArch() == llvm::Triple::aarch64_be;
const DataLayout &DL = M.getDataLayout();
@@ -434,6 +444,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
else if (IsMIPS64)
ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
+ // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
+ else if (IsAArch64)
+ DFSanRuntimeShadowMask = true;
else
report_fatal_error("unsupported triple");
@@ -578,7 +591,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
- DFSF.ValShadowMap[ValAI] = ShadowAI;
+ DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
DFSanVisitor(DFSF).visitCallInst(*CI);
if (!FT->getReturnType()->isVoidTy())
new StoreInst(DFSF.getShadow(RI->getReturnValue()),
@@ -592,8 +605,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
if (ABIList.isIn(M, "skip"))
return false;
- FunctionDIs = makeSubprogramMap(M);
-
if (!GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
@@ -606,6 +617,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
}
+ ExternalShadowMask =
+ Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
+
DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
@@ -643,16 +657,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
std::vector<Function *> FnsToInstrument;
llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
- for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
- if (!i->isIntrinsic() &&
- i != DFSanUnionFn &&
- i != DFSanCheckedUnionFn &&
- i != DFSanUnionLoadFn &&
- i != DFSanUnimplementedFn &&
- i != DFSanSetLabelFn &&
- i != DFSanNonzeroLabelFn &&
- i != DFSanVarargWrapperFn)
- FnsToInstrument.push_back(&*i);
+ for (Function &i : M) {
+ if (!i.isIntrinsic() &&
+ &i != DFSanUnionFn &&
+ &i != DFSanCheckedUnionFn &&
+ &i != DFSanUnionLoadFn &&
+ &i != DFSanUnimplementedFn &&
+ &i != DFSanSetLabelFn &&
+ &i != DFSanNonzeroLabelFn &&
+ &i != DFSanVarargWrapperFn)
+ FnsToInstrument.push_back(&i);
}
// Give function aliases prefixes when necessary, and build wrappers where the
@@ -710,7 +724,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
NewFArg = NewF->arg_begin(),
FArgEnd = F.arg_end();
FArg != FArgEnd; ++FArg, ++NewFArg) {
- FArg->replaceAllUsesWith(NewFArg);
+ FArg->replaceAllUsesWith(&*NewFArg);
}
NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
@@ -750,11 +764,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
F.replaceAllUsesWith(WrappedFnCst);
- // Patch the pointer to LLVM function in debug info descriptor.
- auto DI = FunctionDIs.find(&F);
- if (DI != FunctionDIs.end())
- DI->second->replaceFunction(&F);
-
UnwrappedFnMap[WrappedFnCst] = &F;
*i = NewF;
@@ -842,7 +851,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
if (Instruction *I = dyn_cast<Instruction>(V))
Pos = I->getNextNode();
else
- Pos = DFSF.F->getEntryBlock().begin();
+ Pos = &DFSF.F->getEntryBlock().front();
while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
Pos = Pos->getNextNode();
IRBuilder<> IRB(Pos);
@@ -864,7 +873,7 @@ Value *DFSanFunction::getArgTLSPtr() {
if (DFS.ArgTLS)
return ArgTLSPtr = DFS.ArgTLS;
- IRBuilder<> IRB(F->getEntryBlock().begin());
+ IRBuilder<> IRB(&F->getEntryBlock().front());
return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
}
@@ -874,7 +883,7 @@ Value *DFSanFunction::getRetvalTLS() {
if (DFS.RetvalTLS)
return RetvalTLSPtr = DFS.RetvalTLS;
- IRBuilder<> IRB(F->getEntryBlock().begin());
+ IRBuilder<> IRB(&F->getEntryBlock().front());
return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
}
@@ -906,7 +915,7 @@ Value *DFSanFunction::getShadow(Value *V) {
Function::arg_iterator i = F->arg_begin();
while (ArgIdx--)
++i;
- Shadow = i;
+ Shadow = &*i;
assert(Shadow->getType() == DFS.ShadowTy);
break;
}
@@ -928,9 +937,15 @@ void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
assert(Addr != RetvalTLS && "Reinstrumenting?");
IRBuilder<> IRB(Pos);
+ Value *ShadowPtrMaskValue;
+ if (DFSanRuntimeShadowMask)
+ ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
+ else
+ ShadowPtrMaskValue = ShadowPtrMask;
return IRB.CreateIntToPtr(
IRB.CreateMul(
- IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+ IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
+ IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
ShadowPtrMul),
ShadowPtrTy);
}
@@ -991,7 +1006,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
Call->addAttribute(2, Attribute::ZExt);
BasicBlock *Tail = BI->getSuccessor(0);
- PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin());
+ PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
Phi->addIncoming(Call, Call->getParent());
Phi->addIncoming(V1, Head);
@@ -1105,7 +1120,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
BasicBlock *Head = Pos->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(Pos);
+ BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
if (DomTreeNode *OldNode = DT.getNode(Head)) {
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -1475,8 +1490,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (FT->isVarArg()) {
auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
CS.arg_size() - FT->getNumParams());
- auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
- DFSF.F->getEntryBlock().begin());
+ auto *LabelVAAlloca = new AllocaInst(
+ LabelVATy, "labelva", &DFSF.F->getEntryBlock().front());
for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
@@ -1490,7 +1505,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (!DFSF.LabelReturnAlloca) {
DFSF.LabelReturnAlloca =
new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
- DFSF.F->getEntryBlock().begin());
+ &DFSF.F->getEntryBlock().front());
}
Args.push_back(DFSF.LabelReturnAlloca);
}
@@ -1529,13 +1544,14 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (!CS.getType()->isVoidTy()) {
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
if (II->getNormalDest()->getSinglePredecessor()) {
- Next = II->getNormalDest()->begin();
+ Next = &II->getNormalDest()->front();
} else {
BasicBlock *NewBB =
SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
- Next = NewBB->begin();
+ Next = &NewBB->front();
}
} else {
+ assert(CS->getIterator() != CS->getParent()->end());
Next = CS->getNextNode();
}
@@ -1568,7 +1584,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
AllocaInst *VarArgShadow =
- new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+ new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front());
Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
for (unsigned n = 0; i != e; ++i, ++n) {
IRB.CreateStore(
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9a3ed5c..fa939ae 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -138,6 +138,7 @@ namespace {
Module *M;
LLVMContext *Ctx;
SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+ DenseMap<DISubprogram *, Function *> FnMap;
};
}
@@ -309,13 +310,12 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident,
- bool UseCfgChecksum, bool ExitBlockBeforeBody)
+ GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os,
+ uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody)
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
ReturnBlock(1, os) {
this->os = os;
- Function *F = SP->getFunction();
DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
uint32_t i = 0;
@@ -347,8 +347,8 @@ namespace {
std::string EdgeDestinations;
raw_string_ostream EDOS(EdgeDestinations);
Function *F = Blocks.begin()->first->getParent();
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- GCOVBlock &Block = getBlock(I);
+ for (BasicBlock &I : *F) {
+ GCOVBlock &Block = getBlock(&I);
for (int i = 0, e = Block.OutEdges.size(); i != e; ++i)
EDOS << Block.OutEdges[i]->Number;
}
@@ -389,8 +389,8 @@ namespace {
// Emit edges between blocks.
if (Blocks.empty()) return;
Function *F = Blocks.begin()->first->getParent();
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- GCOVBlock &Block = getBlock(I);
+ for (BasicBlock &I : *F) {
+ GCOVBlock &Block = getBlock(&I);
if (Block.OutEdges.empty()) continue;
writeBytes(EdgeTag, 4);
@@ -405,9 +405,8 @@ namespace {
}
// Emit lines for each block.
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- getBlock(I).writeOut();
- }
+ for (BasicBlock &I : *F)
+ getBlock(&I).writeOut();
}
private:
@@ -451,6 +450,12 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
+ FnMap.clear();
+ for (Function &F : M) {
+ if (DISubprogram *SP = F.getSubprogram())
+ FnMap[SP] = &F;
+ }
+
if (Options.EmitNotes) emitProfileNotes();
if (Options.EmitData) return emitProfileArcs();
return false;
@@ -495,7 +500,7 @@ void GCOVProfiler::emitProfileNotes() {
unsigned FunctionIdent = 0;
for (auto *SP : CU->getSubprograms()) {
- Function *F = SP->getFunction();
+ Function *F = FnMap[SP];
if (!F) continue;
if (!functionHasLines(F)) continue;
@@ -507,13 +512,13 @@ void GCOVProfiler::emitProfileNotes() {
++It;
EntryBlock.splitBasicBlock(It);
- Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
+ Funcs.push_back(make_unique<GCOVFunction>(SP, F, &out, FunctionIdent++,
Options.UseCfgChecksum,
Options.ExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
+ GCOVBlock &Block = Func.getBlock(&*BB);
TerminatorInst *TI = BB->getTerminator();
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
@@ -574,7 +579,7 @@ bool GCOVProfiler::emitProfileArcs() {
auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
for (auto *SP : CU->getSubprograms()) {
- Function *F = SP->getFunction();
+ Function *F = FnMap[SP];
if (!F) continue;
if (!functionHasLines(F)) continue;
if (!Result) Result = true;
@@ -605,7 +610,7 @@ bool GCOVProfiler::emitProfileArcs() {
int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
if (Successors) {
if (Successors == 1) {
- IRBuilder<> Builder(BB->getFirstInsertionPt());
+ IRBuilder<> Builder(&*BB->getFirstInsertionPt());
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
@@ -625,7 +630,7 @@ bool GCOVProfiler::emitProfileArcs() {
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else {
- ComplexEdgePreds.insert(BB);
+ ComplexEdgePreds.insert(&*BB);
for (int i = 0; i != Successors; ++i)
ComplexEdgeSuccs.insert(TI->getSuccessor(i));
}
@@ -641,13 +646,13 @@ bool GCOVProfiler::emitProfileArcs() {
GlobalVariable *EdgeState = getEdgeStateValue();
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
+ IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt());
Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
// Call runtime to perform increment.
- IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
+ IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt());
Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
@@ -731,8 +736,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
IRBuilder<> Builder(Succ);
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge + i);
- EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
- (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+ EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) +
+ (Preds.idFor(&*BB) - 1)] = cast<Constant>(Counter);
}
}
Edge += Successors;
@@ -901,7 +906,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
// uint32_t pred = *predecessor;
// if (pred == 0xffffffff) return;
- Argument *Arg = Fn->arg_begin();
+ Argument *Arg = &*Fn->arg_begin();
Arg->setName("predecessor");
Value *Pred = Builder.CreateLoad(Arg, "pred");
Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
@@ -912,7 +917,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
// uint64_t *counter = counters[pred];
// if (!counter) return;
Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
- Arg = std::next(Fn->arg_begin());
+ Arg = &*std::next(Fn->arg_begin());
Arg->setName("counters");
Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
Value *Counter = Builder.CreateLoad(GEP, "counter");
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 712bf8e..92e41ee 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -7,18 +7,18 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass lowers instrprof_increment intrinsics emitted by a frontend for
-// profiling. It also builds the data structures and initialization code needed
-// for updating execution counts and emitting the profile at runtime.
+// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
+// It also builds the data structures and initialization code needed for
+// updating execution counts and emitting the profile at runtime.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Instrumentation.h"
-
#include "llvm/ADT/Triple.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -49,7 +49,15 @@ public:
private:
InstrProfOptions Options;
Module *M;
- DenseMap<GlobalVariable *, GlobalVariable *> RegionCounters;
+ typedef struct PerFunctionProfileData {
+ uint32_t NumValueSites[IPVK_Last+1];
+ GlobalVariable* RegionCounters;
+ GlobalVariable* DataVar;
+ PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) {
+ memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1));
+ }
+ } PerFunctionProfileData;
+ DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
std::vector<Value *> UsedVars;
bool isMachO() const {
@@ -58,24 +66,30 @@ private:
/// Get the section name for the counter variables.
StringRef getCountersSection() const {
- return isMachO() ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
+ return getInstrProfCountersSectionName(isMachO());
}
/// Get the section name for the name variables.
StringRef getNameSection() const {
- return isMachO() ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
+ return getInstrProfNameSectionName(isMachO());
}
/// Get the section name for the profile data variables.
StringRef getDataSection() const {
- return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
+ return getInstrProfDataSectionName(isMachO());
}
/// Get the section name for the coverage mapping data.
StringRef getCoverageSection() const {
- return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+ return getInstrProfCoverageSectionName(isMachO());
}
+ /// Count the number of instrumented value sites for the function.
+ void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
+
+ /// Replace instrprof_value_profile with a call to runtime library.
+ void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
+
/// Replace instrprof_increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
@@ -117,20 +131,37 @@ bool InstrProfiling::runOnModule(Module &M) {
bool MadeChange = false;
this->M = &M;
- RegionCounters.clear();
+ ProfileDataMap.clear();
UsedVars.clear();
+ // We did not know how many value sites there would be inside
+ // the instrumented function. This is counting the number of instrumented
+ // target value sites to enter it as field in the profile data variable.
for (Function &F : M)
for (BasicBlock &BB : F)
for (auto I = BB.begin(), E = BB.end(); I != E;)
- if (auto *Inc = dyn_cast<InstrProfIncrementInst>(I++)) {
+ if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++))
+ computeNumValueSiteCounts(Ind);
+
+ for (Function &F : M)
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(), E = BB.end(); I != E;) {
+ auto Instr = I++;
+ if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
lowerIncrement(Inc);
MadeChange = true;
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ lowerValueProfileInst(Ind);
+ MadeChange = true;
}
- if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) {
+ }
+
+ if (GlobalVariable *Coverage =
+ M.getNamedGlobal(getCoverageMappingVarName())) {
lowerCoverageData(Coverage);
MadeChange = true;
}
+
if (!MadeChange)
return false;
@@ -141,10 +172,59 @@ bool InstrProfiling::runOnModule(Module &M) {
return true;
}
+static Constant *getOrInsertValueProfilingCall(Module &M) {
+ LLVMContext &Ctx = M.getContext();
+ auto *ReturnTy = Type::getVoidTy(M.getContext());
+ Type *ParamTypes[] = {
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *ValueProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
+ return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+ ValueProfilingCallTy);
+}
+
+void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
+
+ GlobalVariable *Name = Ind->getName();
+ uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+ uint64_t Index = Ind->getIndex()->getZExtValue();
+ auto It = ProfileDataMap.find(Name);
+ if (It == ProfileDataMap.end()) {
+ PerFunctionProfileData PD;
+ PD.NumValueSites[ValueKind] = Index + 1;
+ ProfileDataMap[Name] = PD;
+ } else if (It->second.NumValueSites[ValueKind] <= Index)
+ It->second.NumValueSites[ValueKind] = Index + 1;
+}
+
+void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+
+ GlobalVariable *Name = Ind->getName();
+ auto It = ProfileDataMap.find(Name);
+ assert(It != ProfileDataMap.end() && It->second.DataVar &&
+ "value profiling detected in function with no counter incerement");
+
+ GlobalVariable *DataVar = It->second.DataVar;
+ uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+ uint64_t Index = Ind->getIndex()->getZExtValue();
+ for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
+ Index += It->second.NumValueSites[Kind];
+
+ IRBuilder<> Builder(Ind);
+ Value* Args[3] = {Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index)};
+ Ind->replaceAllUsesWith(
+ Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args));
+ Ind->eraseFromParent();
+}
+
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
- IRBuilder<> Builder(Inc->getParent(), *Inc);
+ IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
Value *Count = Builder.CreateLoad(Addr, "pgocount");
@@ -172,9 +252,10 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
GlobalVariable *Name = cast<GlobalVariable>(V);
// If we have region counters for this name, we've already handled it.
- auto It = RegionCounters.find(Name);
- if (It != RegionCounters.end())
- continue;
+ auto It = ProfileDataMap.find(Name);
+ if (It != ProfileDataMap.end())
+ if (It->second.RegionCounters)
+ continue;
// Move the name variable to the right section.
Name->setSection(getNameSection());
@@ -183,69 +264,108 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
}
/// Get the name of a profiling variable for a particular function.
-static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) {
- auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());
- StringRef Name = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
- return ("__llvm_profile_" + VarName + "_" + Name).str();
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+ StringRef NamePrefix = getInstrProfNameVarPrefix();
+ StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
+ return (Prefix + Name).str();
+}
+
+static inline bool shouldRecordFunctionAddr(Function *F) {
+ // Check the linkage
+ if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+ !F->hasAvailableExternallyLinkage())
+ return true;
+ // Check uses of this function for other than direct calls or invokes to it.
+ return F->hasAddressTaken();
+}
+
+static inline Comdat *getOrCreateProfileComdat(Module &M,
+ InstrProfIncrementInst *Inc) {
+ // COFF format requires a COMDAT section to have a key symbol with the same
+ // name. The linker targeting COFF also requires that the COMDAT section
+ // a section is associated to must precede the associating section. For this
+ // reason, we must choose the name var's name as the name of the comdat.
+ StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
+ ? getInstrProfNameVarPrefix()
+ : getInstrProfComdatPrefix());
+ return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
}
GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
- GlobalVariable *Name = Inc->getName();
- auto It = RegionCounters.find(Name);
- if (It != RegionCounters.end())
- return It->second;
-
- // Move the name variable to the right section. Make sure it is placed in the
- // same comdat as its associated function. Otherwise, we may get multiple
- // counters for the same function in certain cases.
+ GlobalVariable *NamePtr = Inc->getName();
+ auto It = ProfileDataMap.find(NamePtr);
+ PerFunctionProfileData PD;
+ if (It != ProfileDataMap.end()) {
+ if (It->second.RegionCounters)
+ return It->second.RegionCounters;
+ PD = It->second;
+ }
+
+ // Move the name variable to the right section. Place them in a COMDAT group
+ // if the associated function is a COMDAT. This will make sure that
+ // only one copy of counters of the COMDAT function will be emitted after
+ // linking.
Function *Fn = Inc->getParent()->getParent();
- Name->setSection(getNameSection());
- Name->setAlignment(1);
- Name->setComdat(Fn->getComdat());
+ Comdat *ProfileVarsComdat = nullptr;
+ if (Fn->hasComdat())
+ ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc);
+ NamePtr->setSection(getNameSection());
+ NamePtr->setAlignment(1);
+ NamePtr->setComdat(ProfileVarsComdat);
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
// Create the counters variable.
- auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(),
- Constant::getNullValue(CounterTy),
- getVarName(Inc, "counters"));
- Counters->setVisibility(Name->getVisibility());
- Counters->setSection(getCountersSection());
- Counters->setAlignment(8);
- Counters->setComdat(Fn->getComdat());
-
- RegionCounters[Inc->getName()] = Counters;
+ auto *CounterPtr =
+ new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
+ Constant::getNullValue(CounterTy),
+ getVarName(Inc, getInstrProfCountersVarPrefix()));
+ CounterPtr->setVisibility(NamePtr->getVisibility());
+ CounterPtr->setSection(getCountersSection());
+ CounterPtr->setAlignment(8);
+ CounterPtr->setComdat(ProfileVarsComdat);
// Create data variable.
- auto *NameArrayTy = Name->getType()->getPointerElementType();
- auto *Int32Ty = Type::getInt32Ty(Ctx);
- auto *Int64Ty = Type::getInt64Ty(Ctx);
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
- auto *Int64PtrTy = Type::getInt64PtrTy(Ctx);
-
- Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy};
+ auto *Int16Ty = Type::getInt16Ty(Ctx);
+ auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1);
+ Type *DataTypes[] = {
+ #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
+ #include "llvm/ProfileData/InstrProfData.inc"
+ };
auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
+
+ Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ?
+ ConstantExpr::getBitCast(Fn, Int8PtrTy) :
+ ConstantPointerNull::get(Int8PtrTy);
+
+ Constant *Int16ArrayVals[IPVK_Last+1];
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+
Constant *DataVals[] = {
- ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()),
- ConstantInt::get(Int32Ty, NumCounters),
- ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()),
- ConstantExpr::getBitCast(Name, Int8PtrTy),
- ConstantExpr::getBitCast(Counters, Int64PtrTy)};
- auto *Data = new GlobalVariable(*M, DataTy, true, Name->getLinkage(),
+ #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+ #include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
ConstantStruct::get(DataTy, DataVals),
- getVarName(Inc, "data"));
- Data->setVisibility(Name->getVisibility());
+ getVarName(Inc, getInstrProfDataVarPrefix()));
+ Data->setVisibility(NamePtr->getVisibility());
Data->setSection(getDataSection());
- Data->setAlignment(8);
- Data->setComdat(Fn->getComdat());
+ Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
+ Data->setComdat(ProfileVarsComdat);
+
+ PD.RegionCounters = CounterPtr;
+ PD.DataVar = Data;
+ ProfileDataMap[NamePtr] = PD;
// Mark the data variable as used so that it isn't stripped out.
UsedVars.push_back(Data);
- return Counters;
+ return CounterPtr;
}
void InstrProfiling::emitRegistration() {
@@ -253,20 +373,24 @@ void InstrProfiling::emitRegistration() {
if (Triple(M->getTargetTriple()).isOSDarwin())
return;
+ // Use linker script magic to get data/cnts/name start/end.
+ if (Triple(M->getTargetTriple()).isOSLinux() ||
+ Triple(M->getTargetTriple()).isOSFreeBSD())
+ return;
+
// Construct the function.
auto *VoidTy = Type::getVoidTy(M->getContext());
auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
auto *RegisterFTy = FunctionType::get(VoidTy, false);
auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
- "__llvm_profile_register_functions", M);
+ getInstrProfRegFuncsName(), M);
RegisterF->setUnnamedAddr(true);
- if (Options.NoRedZone)
- RegisterF->addFnAttr(Attribute::NoRedZone);
+ if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone);
auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
auto *RuntimeRegisterF =
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
- "__llvm_profile_register_function", M);
+ getInstrProfRegFuncName(), M);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
for (Value *Data : UsedVars)
@@ -275,26 +399,27 @@ void InstrProfiling::emitRegistration() {
}
void InstrProfiling::emitRuntimeHook() {
- const char *const RuntimeVarName = "__llvm_profile_runtime";
- const char *const RuntimeUserName = "__llvm_profile_runtime_user";
- // If the module's provided its own runtime, we don't need to do anything.
- if (M->getGlobalVariable(RuntimeVarName))
+ // We expect the linker to be invoked with -u<hook_var> flag for linux,
+ // for which case there is no need to emit the user function.
+ if (Triple(M->getTargetTriple()).isOSLinux())
return;
+ // If the module's provided its own runtime, we don't need to do anything.
+ if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return;
+
// Declare an external variable that will pull in the runtime initialization.
auto *Int32Ty = Type::getInt32Ty(M->getContext());
auto *Var =
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
- nullptr, RuntimeVarName);
+ nullptr, getInstrProfRuntimeHookVarName());
// Make a function that uses it.
- auto *User =
- Function::Create(FunctionType::get(Int32Ty, false),
- GlobalValue::LinkOnceODRLinkage, RuntimeUserName, M);
+ auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+ GlobalValue::LinkOnceODRLinkage,
+ getInstrProfRuntimeHookVarUseFuncName(), M);
User->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- User->addFnAttr(Attribute::NoRedZone);
+ if (Options.NoRedZone) User->addFnAttr(Attribute::NoRedZone);
User->setVisibility(GlobalValue::HiddenVisibility);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
@@ -330,26 +455,23 @@ void InstrProfiling::emitUses() {
LLVMUsed =
new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, MergedVars), "llvm.used");
-
LLVMUsed->setSection("llvm.metadata");
}
void InstrProfiling::emitInitialization() {
std::string InstrProfileOutput = Options.InstrProfileOutput;
- Constant *RegisterF = M->getFunction("__llvm_profile_register_functions");
- if (!RegisterF && InstrProfileOutput.empty())
- return;
+ Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+ if (!RegisterF && InstrProfileOutput.empty()) return;
// Create the initialization function.
auto *VoidTy = Type::getVoidTy(M->getContext());
- auto *F =
- Function::Create(FunctionType::get(VoidTy, false),
- GlobalValue::InternalLinkage, "__llvm_profile_init", M);
+ auto *F = Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage,
+ getInstrProfInitFuncName(), M);
F->setUnnamedAddr(true);
F->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
+ if (Options.NoRedZone) F->addFnAttr(Attribute::NoRedZone);
// Add the basic block and the necessary calls.
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
@@ -358,9 +480,8 @@ void InstrProfiling::emitInitialization() {
if (!InstrProfileOutput.empty()) {
auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false);
- auto *SetNameF =
- Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
- "__llvm_profile_override_default_filename", M);
+ auto *SetNameF = Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
+ getInstrProfFileOverriderFuncName(), M);
// Create variable for profile name.
Constant *ProfileNameConst =
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 2750585..a05a5fa 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -12,12 +12,47 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm-c/Initialization.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
using namespace llvm;
+/// Moves I before IP. Returns new insert point.
+static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
+ // If I is IP, move the insert point down.
+ if (I == IP)
+ return ++IP;
+ // Otherwise, move I before IP and return IP.
+ I->moveBefore(&*IP);
+ return IP;
+}
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
+ BasicBlock::iterator IP) {
+ assert(&BB.getParent()->getEntryBlock() == &BB);
+ for (auto I = IP, E = BB.end(); I != E; ++I) {
+ bool KeepInEntry = false;
+ if (auto *AI = dyn_cast<AllocaInst>(I)) {
+ if (AI->isStaticAlloca())
+ KeepInEntry = true;
+ } else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == llvm::Intrinsic::localescape)
+ KeepInEntry = true;
+ }
+ if (KeepInEntry)
+ IP = moveBeforeInsertPoint(I, IP);
+ }
+ return IP;
+}
+
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
@@ -25,6 +60,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingPass(Registry);
initializeGCOVProfilerPass(Registry);
+ initializePGOInstrumentationGenPass(Registry);
+ initializePGOInstrumentationUsePass(Registry);
initializeInstrProfilingPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 286a563..5a7bce5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -148,7 +148,7 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
cl::desc("poison uninitialized stack variables with a call"),
cl::Hidden, cl::init(false));
static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
- cl::desc("poison uninitialized stack variables with the given patter"),
+ cl::desc("poison uninitialized stack variables with the given pattern"),
cl::Hidden, cl::init(0xff));
static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
cl::desc("poison undef temps"),
@@ -222,10 +222,17 @@ static const MemoryMapParams Linux_I386_MemoryMapParams = {
// x86_64 Linux
static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
+#ifdef MSAN_LINUX_X86_64_OLD_MAPPING
0x400000000000, // AndMask
0, // XorMask (not used)
0, // ShadowBase (not used)
0x200000000000, // OriginBase
+#else
+ 0, // AndMask (not used)
+ 0x500000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x100000000000, // OriginBase
+#endif
};
// mips64 Linux
@@ -244,6 +251,14 @@ static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
0x1C0000000000, // OriginBase
};
+// aarch64 Linux
+static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
+ 0, // AndMask (not used)
+ 0x06000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x01000000000, // OriginBase
+};
+
// i386 FreeBSD
static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
0x000180000000, // AndMask
@@ -266,15 +281,20 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
};
static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
- NULL,
+ nullptr,
&Linux_MIPS64_MemoryMapParams,
};
static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
- NULL,
+ nullptr,
&Linux_PowerPC64_MemoryMapParams,
};
+static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
+ nullptr,
+ &Linux_AArch64_MemoryMapParams,
+};
+
static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
&FreeBSD_I386_MemoryMapParams,
&FreeBSD_X86_64_MemoryMapParams,
@@ -353,8 +373,9 @@ class MemorySanitizer : public FunctionPass {
friend struct MemorySanitizerVisitor;
friend struct VarArgAMD64Helper;
friend struct VarArgMIPS64Helper;
+ friend struct VarArgAArch64Helper;
};
-} // namespace
+} // anonymous namespace
char MemorySanitizer::ID = 0;
INITIALIZE_PASS(MemorySanitizer, "msan",
@@ -377,7 +398,6 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
GlobalValue::PrivateLinkage, StrConst, "");
}
-
/// \brief Insert extern declaration of runtime-provided functions and globals.
void MemorySanitizer::initializeCallbacks(Module &M) {
// Only do this once.
@@ -496,6 +516,10 @@ bool MemorySanitizer::doInitialization(Module &M) {
case Triple::ppc64le:
MapParams = Linux_PowerPC_MemoryMapParams.bits64;
break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ MapParams = Linux_ARM_MemoryMapParams.bits64;
+ break;
default:
report_fatal_error("unsupported architecture");
}
@@ -697,7 +721,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Cmp = IRB.CreateICmpNE(
ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
- Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+ Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
IRBuilder<> IRBNew(CheckTerm);
paintOrigin(IRBNew, updateOrigin(Origin, IRBNew),
getOriginPtr(Addr, IRBNew, Alignment), StoreSize,
@@ -893,16 +917,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Offset = (Addr & ~AndMask) ^ XorMask
Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
+ Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
+
uint64_t AndMask = MS.MapParams->AndMask;
- assert(AndMask != 0 && "AndMask shall be specified");
- Value *OffsetLong =
- IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
- ConstantInt::get(MS.IntptrTy, ~AndMask));
+ if (AndMask)
+ OffsetLong =
+ IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
uint64_t XorMask = MS.MapParams->XorMask;
- if (XorMask != 0)
- OffsetLong = IRB.CreateXor(OffsetLong,
- ConstantInt::get(MS.IntptrTy, XorMask));
+ if (XorMask)
+ OffsetLong =
+ IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
return OffsetLong;
}
@@ -1339,6 +1364,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitBitCastInst(BitCastInst &I) {
+ // Special case: if this is the bitcast (there is exactly 1 allowed) between
+ // a musttail call and a ret, don't instrument. New instructions are not
+ // allowed after a musttail call.
+ if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
+ if (CI->isMustTailCall())
+ return;
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
setOrigin(&I, getOrigin(&I, 0));
@@ -1570,18 +1601,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Type *EltTy = Ty->getSequentialElementType();
SmallVector<Constant *, 16> Elements;
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
- ConstantInt *Elt =
- dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx));
- APInt V = Elt->getValue();
- APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
- Elements.push_back(ConstantInt::get(EltTy, V2));
+ if (ConstantInt *Elt =
+ dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
+ APInt V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ Elements.push_back(ConstantInt::get(EltTy, V2));
+ } else {
+ Elements.push_back(ConstantInt::get(EltTy, 1));
+ }
}
ShadowMul = ConstantVector::get(Elements);
} else {
- ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg);
- APInt V = Elt->getValue();
- APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
- ShadowMul = ConstantInt::get(Elt->getType(), V2);
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
+ APInt V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ ShadowMul = ConstantInt::get(Ty, V2);
+ } else {
+ ShadowMul = ConstantInt::get(Ty, 1);
+ }
}
IRBuilder<> IRB(&I);
@@ -1730,25 +1767,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Instrument signed relational comparisons.
///
- /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
- /// propagating the highest bit of the shadow. Everything else is delegated
- /// to handleShadowOr().
+ /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
+ /// bit of the shadow. Everything else is delegated to handleShadowOr().
void handleSignedRelationalComparison(ICmpInst &I) {
- Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
- Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
- Value* op = nullptr;
- CmpInst::Predicate pre = I.getPredicate();
- if (constOp0 && constOp0->isNullValue() &&
- (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
- op = I.getOperand(1);
- } else if (constOp1 && constOp1->isNullValue() &&
- (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
+ Constant *constOp;
+ Value *op = nullptr;
+ CmpInst::Predicate pre;
+ if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
op = I.getOperand(0);
+ pre = I.getPredicate();
+ } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
+ op = I.getOperand(1);
+ pre = I.getSwappedPredicate();
+ } else {
+ handleShadowOr(I);
+ return;
}
- if (op) {
+
+ if ((constOp->isNullValue() &&
+ (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
+ (constOp->isAllOnesValue() &&
+ (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
IRBuilder<> IRB(&I);
- Value* Shadow =
- IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
+ Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
+ "_msprop_icmp_s");
setShadow(&I, Shadow);
setOrigin(&I, getOrigin(op));
} else {
@@ -1860,25 +1902,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VAHelper->visitVACopyInst(I);
}
- enum IntrinsicKind {
- IK_DoesNotAccessMemory,
- IK_OnlyReadsMemory,
- IK_WritesMemory
- };
-
- static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
- const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
- const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
- const int OnlyReadsMemory = IK_OnlyReadsMemory;
- const int OnlyAccessesArgumentPointees = IK_WritesMemory;
- const int UnknownModRefBehavior = IK_WritesMemory;
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#define ModRefBehavior IntrinsicKind
-#include "llvm/IR/Intrinsics.gen"
-#undef ModRefBehavior
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
- }
-
/// \brief Handle vector store-like intrinsics.
///
/// Instrument intrinsics that look like a simple SIMD store: writes memory,
@@ -1978,17 +2001,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (NumArgOperands == 0)
return false;
- Intrinsic::ID iid = I.getIntrinsicID();
- IntrinsicKind IK = getIntrinsicKind(iid);
- bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
- bool WritesMemory = IK == IK_WritesMemory;
- assert(!(OnlyReadsMemory && WritesMemory));
-
if (NumArgOperands == 2 &&
I.getArgOperand(0)->getType()->isPointerTy() &&
I.getArgOperand(1)->getType()->isVectorTy() &&
I.getType()->isVoidTy() &&
- WritesMemory) {
+ !I.onlyReadsMemory()) {
// This looks like a vector store.
return handleVectorStoreIntrinsic(I);
}
@@ -1996,12 +2013,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (NumArgOperands == 1 &&
I.getArgOperand(0)->getType()->isPointerTy() &&
I.getType()->isVectorTy() &&
- OnlyReadsMemory) {
+ I.onlyReadsMemory()) {
// This looks like a vector load.
return handleVectorLoadIntrinsic(I);
}
- if (!OnlyReadsMemory && !WritesMemory)
+ if (I.doesNotAccessMemory())
if (maybeHandleSimpleNomemIntrinsic(I))
return true;
@@ -2493,13 +2510,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Now, get the shadow for the RetVal.
if (!I.getType()->isSized()) return;
+ // Don't emit the epilogue for musttail call returns.
+ if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
IRBuilder<> IRBBefore(&I);
// Until we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
- Instruction *NextInsn = nullptr;
+ BasicBlock::iterator NextInsn;
if (CS.isCall()) {
- NextInsn = I.getNextNode();
+ NextInsn = ++I.getIterator();
+ assert(NextInsn != I.getParent()->end());
} else {
BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
if (!NormalDest->getSinglePredecessor()) {
@@ -2511,10 +2531,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return;
}
NextInsn = NormalDest->getFirstInsertionPt();
- assert(NextInsn &&
+ assert(NextInsn != NormalDest->end() &&
"Could not find insertion point for retval shadow load");
}
- IRBuilder<> IRBAfter(NextInsn);
+ IRBuilder<> IRBAfter(&*NextInsn);
Value *RetvalShadow =
IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
kShadowTLSAlignment, "_msret");
@@ -2523,10 +2543,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
}
+ bool isAMustTailRetVal(Value *RetVal) {
+ if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
+ RetVal = I->getOperand(0);
+ }
+ if (auto *I = dyn_cast<CallInst>(RetVal)) {
+ return I->isMustTailCall();
+ }
+ return false;
+ }
+
void visitReturnInst(ReturnInst &I) {
IRBuilder<> IRB(&I);
Value *RetVal = I.getReturnValue();
if (!RetVal) return;
+ // Don't emit the epilogue for musttail call returns.
+ if (isAMustTailRetVal(RetVal)) return;
Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
if (CheckReturnValue) {
insertShadowCheck(RetVal, &I);
@@ -2653,6 +2685,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
}
+ void visitCatchSwitchInst(CatchSwitchInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitFuncletPadInst(FuncletPadInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
void visitGetElementPtrInst(GetElementPtrInst &I) {
handleShadowOr(I);
}
@@ -2696,6 +2738,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Nothing to do here.
}
+ void visitCleanupReturnInst(CleanupReturnInst &CRI) {
+ DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
+ // Nothing to do here.
+ }
+
+ void visitCatchReturnInst(CatchReturnInst &CRI) {
+ DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
+ // Nothing to do here.
+ }
+
void visitInstruction(Instruction &I) {
// Everything else: stop propagating and check for poisoned shadow.
if (ClDumpStrictInstructions)
@@ -2808,6 +2860,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVAStartInst(VAStartInst &I) override {
+ if (F.getCallingConv() == CallingConv::X86_64_Win64)
+ return;
IRBuilder<> IRB(&I);
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
@@ -2820,6 +2874,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVACopyInst(VACopyInst &I) override {
+ if (F.getCallingConv() == CallingConv::X86_64_Win64)
+ return;
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
@@ -2979,6 +3035,242 @@ struct VarArgMIPS64Helper : public VarArgHelper {
}
};
+
+/// \brief AArch64-specific implementation of VarArgHelper.
+struct VarArgAArch64Helper : public VarArgHelper {
+ static const unsigned kAArch64GrArgSize = 56;
+ static const unsigned kAArch64VrArgSize = 128;
+
+ static const unsigned AArch64GrBegOffset = 0;
+ static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
+ // Make VR space aligned to 16 bytes.
+ static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8;
+ static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
+ + kAArch64VrArgSize;
+ static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
+
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy;
+ Value *VAArgOverflowSize;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV)
+ : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+ VAArgOverflowSize(nullptr) {}
+
+ enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+ ArgKind classifyArgument(Value* arg) {
+ Type *T = arg->getType();
+ if (T->isFPOrFPVectorTy())
+ return AK_FloatingPoint;
+ if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+ || (T->isPointerTy()))
+ return AK_GeneralPurpose;
+ return AK_Memory;
+ }
+
+ // The instrumentation stores the argument shadow in a non ABI-specific
+ // format because it does not know which argument is named (since Clang,
+ // like x86_64 case, lowers the va_args in the frontend and this pass only
+ // sees the low level code that deals with va_list internals).
+ // The first seven GR registers are saved in the first 56 bytes of the
+ // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
+ // the remaining arguments.
+ // Using constant offset within the va_arg TLS array allows fast copy
+ // in the finalize instrumentation.
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ unsigned GrOffset = AArch64GrBegOffset;
+ unsigned VrOffset = AArch64VrBegOffset;
+ unsigned OverflowOffset = AArch64VAEndOffset;
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ ArgKind AK = classifyArgument(A);
+ if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
+ AK = AK_Memory;
+ if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
+ AK = AK_Memory;
+ Value *Base;
+ switch (AK) {
+ case AK_GeneralPurpose:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
+ GrOffset += 8;
+ break;
+ case AK_FloatingPoint:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
+ VrOffset += 16;
+ break;
+ case AK_Memory:
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ OverflowOffset += RoundUpToAlignment(ArgSize, 8);
+ break;
+ }
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+ Constant *OverflowSize =
+ ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
+ IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants (size of va_list).
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */32, /* alignment */8, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants (size of va_list).
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */32, /* alignment */8, false);
+ }
+
+ // Retrieve a va_list field of 'void*' size.
+ Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+ Value *SaveAreaPtrPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, offset)),
+ Type::getInt64PtrTy(*MS.C));
+ return IRB.CreateLoad(SaveAreaPtrPtr);
+ }
+
+ // Retrieve a va_list field of 'int' size.
+ Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+ Value *SaveAreaPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, offset)),
+ Type::getInt32PtrTy(*MS.C));
+ Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
+ return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ Value *CopySize =
+ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
+ VAArgOverflowSize);
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+ }
+
+ Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
+ Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
+
+ // Instrument va_start, copy va_list shadow from the backup copy of
+ // the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+
+ Value *VAListTag = OrigInst->getArgOperand(0);
+
+ // The variadic ABI for AArch64 creates two areas to save the incoming
+ // argument registers (one for 64-bit general register xn-x7 and another
+ // for 128-bit FP/SIMD vn-v7).
+ // We need then to propagate the shadow arguments on both regions
+ // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
+ // The remaning arguments are saved on shadow for 'va::stack'.
+ // One caveat is it requires only to propagate the non-named arguments,
+ // however on the call site instrumentation 'all' the arguments are
+ // saved. So to copy the shadow values from the va_arg TLS array
+ // we need to adjust the offset for both GR and VR fields based on
+ // the __{gr,vr}_offs value (since they are stores based on incoming
+ // named arguments).
+
+ // Read the stack pointer from the va_list.
+ Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
+
+ // Read both the __gr_top and __gr_off and add them up.
+ Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
+ Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
+
+ Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
+
+ // Read both the __vr_top and __vr_off and add them up.
+ Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
+ Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
+
+ Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
+
+ // It does not know how many named arguments is being used and, on the
+ // callsite all the arguments were saved. Since __gr_off is defined as
+ // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
+ // argument by ignoring the bytes of shadow from named arguments.
+ Value *GrRegSaveAreaShadowPtrOff =
+ IRB.CreateAdd(GrArgSize, GrOffSaveArea);
+
+ Value *GrRegSaveAreaShadowPtr =
+ MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+ Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ GrRegSaveAreaShadowPtrOff);
+ Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
+
+ IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8);
+
+ // Again, but for FP/SIMD values.
+ Value *VrRegSaveAreaShadowPtrOff =
+ IRB.CreateAdd(VrArgSize, VrOffSaveArea);
+
+ Value *VrRegSaveAreaShadowPtr =
+ MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+ Value *VrSrcPtr = IRB.CreateInBoundsGEP(
+ IRB.getInt8Ty(),
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ IRB.getInt32(AArch64VrBegOffset)),
+ VrRegSaveAreaShadowPtrOff);
+ Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
+
+ IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8);
+
+ // And finally for remaining arguments.
+ Value *StackSaveAreaShadowPtr =
+ MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+ Value *StackSrcPtr =
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ IRB.getInt32(AArch64VAEndOffset));
+
+ IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr,
+ VAArgOverflowSize, 16);
+ }
+ }
+};
+
/// \brief A no-op implementation of VarArgHelper.
struct VarArgNoOpHelper : public VarArgHelper {
VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -3003,11 +3295,13 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
else if (TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el)
return new VarArgMIPS64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.getArch() == llvm::Triple::aarch64)
+ return new VarArgAArch64Helper(Func, Msan, Visitor);
else
return new VarArgNoOpHelper(Func, Msan, Visitor);
}
-} // namespace
+} // anonymous namespace
bool MemorySanitizer::runOnFunction(Function &F) {
if (&F == MsanCtorFunction)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
new file mode 100644
index 0000000..4b59b93
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -0,0 +1,718 @@
+//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PGO instrumentation using a minimum spanning tree based
+// on the following paper:
+// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
+// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
+// Issue 3, pp 313-322
+// The idea of the algorithm based on the fact that for each node (except for
+// the entry and exit), the sum of incoming edge counts equals the sum of
+// outgoing edge counts. The count of edge on spanning tree can be derived from
+// those edges not on the spanning tree. Knuth proves this method instruments
+// the minimum number of edges.
+//
+// The minimal spanning tree here is actually a maximum weight tree -- on-tree
+// edges have higher frequencies (more likely to execute). The idea is to
+// instrument those less frequently executed edges to reduce the runtime
+// overhead of instrumented binaries.
+//
+// This file contains two passes:
+// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
+// count profile, and
+// (2) Pass PGOInstrumentationUse which reads the edge count profile and
+// annotates the branch weights.
+// To get the precise counter information, These two passes need to invoke at
+// the same compilation point (so they see the same IR). For pass
+// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
+// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
+// the profile is opened in module level and passed to each PGOUseFunc instance.
+// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
+// in class FuncPGOInstrumentation.
+//
+// Class PGOEdge represents a CFG edge and some auxiliary information. Class
+// BBInfo contains auxiliary information for each BB. These two classes are used
+// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
+// class of PGOEdge and BBInfo, respectively. They contains extra data structure
+// used in populating profile counters.
+// The MST implementation is in Class CFGMST (CFGMST.h).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "CFGMST.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/JamCRC.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-instrumentation"
+
+STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOEdge, "Number of edges.");
+STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
+STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
+STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
+STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
+STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+
+// Command line option to specify the file to read profile from. This is
+// mainly used for testing.
+static cl::opt<std::string>
+ PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path of profile data file. This is"
+ "mainly for test purpose."));
+
+namespace {
+class PGOInstrumentationGen : public ModulePass {
+public:
+ static char ID;
+
+ PGOInstrumentationGen() : ModulePass(ID) {
+ initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const override {
+ return "PGOInstrumentationGenPass";
+ }
+
+private:
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ }
+};
+
+class PGOInstrumentationUse : public ModulePass {
+public:
+ static char ID;
+
+ // Provide the profile filename as the parameter.
+ PGOInstrumentationUse(std::string Filename = "")
+ : ModulePass(ID), ProfileFileName(Filename) {
+ if (!PGOTestProfileFile.empty())
+ ProfileFileName = PGOTestProfileFile;
+ initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const override {
+ return "PGOInstrumentationUsePass";
+ }
+
+private:
+ std::string ProfileFileName;
+ std::unique_ptr<IndexedInstrProfReader> PGOReader;
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char PGOInstrumentationGen::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen",
+ "PGO instrumentation.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen",
+ "PGO instrumentation.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationGenPass() {
+ return new PGOInstrumentationGen();
+}
+
+char PGOInstrumentationUse::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use",
+ "Read PGO instrumentation profile.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use",
+ "Read PGO instrumentation profile.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) {
+ return new PGOInstrumentationUse(Filename.str());
+}
+
+namespace {
+/// \brief An MST based instrumentation for PGO
+///
+/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
+/// in the function level.
+struct PGOEdge {
+ // This class implements the CFG edges. Note the CFG can be a multi-graph.
+ // So there might be multiple edges with same SrcBB and DestBB.
+ const BasicBlock *SrcBB;
+ const BasicBlock *DestBB;
+ uint64_t Weight;
+ bool InMST;
+ bool Removed;
+ bool IsCritical;
+ PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
+ : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false),
+ IsCritical(false) {}
+ // Return the information string of an edge.
+ const std::string infoString() const {
+ return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
+ (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
+ }
+};
+
+// This class stores the auxiliary information for each BB.
+struct BBInfo {
+ BBInfo *Group;
+ uint32_t Index;
+ uint32_t Rank;
+
+ BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {}
+
+ // Return the information string of this object.
+ const std::string infoString() const {
+ return (Twine("Index=") + Twine(Index)).str();
+ }
+};
+
+// This class implements the CFG edges. Note the CFG can be a multi-graph.
+template <class Edge, class BBInfo> class FuncPGOInstrumentation {
+private:
+ Function &F;
+ void computeCFGHash();
+
+public:
+ std::string FuncName;
+ GlobalVariable *FuncNameVar;
+ // CFG hash value for this function.
+ uint64_t FunctionHash;
+
+ // The Minimum Spanning Tree of function CFG.
+ CFGMST<Edge, BBInfo> MST;
+
+ // Give an edge, find the BB that will be instrumented.
+ // Return nullptr if there is no BB to be instrumented.
+ BasicBlock *getInstrBB(Edge *E);
+
+ // Return the auxiliary BB information.
+ BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
+
+ // Dump edges and BB information.
+ void dumpInfo(std::string Str = "") const {
+ MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
+ Twine(FunctionHash) + "\t" + Str);
+ }
+
+ FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false,
+ BranchProbabilityInfo *BPI = nullptr,
+ BlockFrequencyInfo *BFI = nullptr)
+ : F(Func), FunctionHash(0), MST(F, BPI, BFI) {
+ FuncName = getPGOFuncName(F);
+ computeCFGHash();
+ DEBUG(dumpInfo("after CFGMST"));
+
+ NumOfPGOBB += MST.BBInfos.size();
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ NumOfPGOEdge++;
+ if (!E->InMST)
+ NumOfPGOInstrument++;
+ }
+
+ if (CreateGlobalVar)
+ FuncNameVar = createPGOFuncNameVar(F, FuncName);
+ };
+};
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
+ std::vector<char> Indexes;
+ JamCRC JC;
+ for (auto &BB : F) {
+ const TerminatorInst *TI = BB.getTerminator();
+ for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ uint32_t Index = getBBInfo(Succ).Index;
+ for (int J = 0; J < 4; J++)
+ Indexes.push_back((char)(Index >> (J * 8)));
+ }
+ }
+ JC.update(Indexes);
+ FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+}
+
+// Given a CFG E to be instrumented, find which BB to place the instrumented
+// code. The function will split the critical edge if necessary.
+template <class Edge, class BBInfo>
+BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
+ if (E->InMST || E->Removed)
+ return nullptr;
+
+ BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+ BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+ // For a fake edge, instrument the real BB.
+ if (SrcBB == nullptr)
+ return DestBB;
+ if (DestBB == nullptr)
+ return SrcBB;
+
+ // Instrument the SrcBB if it has a single successor,
+ // otherwise, the DestBB if this is not a critical edge.
+ TerminatorInst *TI = SrcBB->getTerminator();
+ if (TI->getNumSuccessors() <= 1)
+ return SrcBB;
+ if (!E->IsCritical)
+ return DestBB;
+
+ // For a critical edge, we have to split. Instrument the newly
+ // created BB.
+ NumOfPGOSplit++;
+ DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> "
+ << getBBInfo(DestBB).Index << "\n");
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ assert(InstrBB && "Critical edge is not split");
+
+ E->Removed = true;
+ return InstrBB;
+}
+
+// Visit all edge and instrument the edges not in MST.
+// Critical edges will be split.
+static void instrumentOneFunc(Function &F, Module *M,
+ BranchProbabilityInfo *BPI,
+ BlockFrequencyInfo *BFI) {
+ unsigned NumCounters = 0;
+ FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, true, BPI, BFI);
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (!E->InMST && !E->Removed)
+ NumCounters++;
+ }
+
+ uint32_t I = 0;
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
+ if (!InstrBB)
+ continue;
+
+ IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+ assert(Builder.GetInsertPoint() != InstrBB->end() &&
+ "Cannot get the Instrumentation point");
+ Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+ {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
+ Builder.getInt32(I++)});
+ }
+}
+
+// This class represents a CFG edge in profile use compilation.
+struct PGOUseEdge : public PGOEdge {
+ bool CountValid;
+ uint64_t CountValue;
+ PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
+ : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {}
+
+ // Set edge count value
+ void setEdgeCount(uint64_t Value) {
+ CountValue = Value;
+ CountValid = true;
+ }
+
+ // Return the information string for this object.
+ const std::string infoString() const {
+ if (!CountValid)
+ return PGOEdge::infoString();
+ return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)).str();
+ }
+};
+
+typedef SmallVector<PGOUseEdge *, 2> DirectEdges;
+
+// This class stores the auxiliary information for each BB.
+struct UseBBInfo : public BBInfo {
+ uint64_t CountValue;
+ bool CountValid;
+ int32_t UnknownCountInEdge;
+ int32_t UnknownCountOutEdge;
+ DirectEdges InEdges;
+ DirectEdges OutEdges;
+ UseBBInfo(unsigned IX)
+ : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0),
+ UnknownCountOutEdge(0) {}
+ UseBBInfo(unsigned IX, uint64_t C)
+ : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0),
+ UnknownCountOutEdge(0) {}
+
+ // Set the profile count value for this BB.
+ void setBBInfoCount(uint64_t Value) {
+ CountValue = Value;
+ CountValid = true;
+ }
+
+ // Return the information string of this object.
+ const std::string infoString() const {
+ if (!CountValid)
+ return BBInfo::infoString();
+ return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
+ }
+};
+
+// Sum up the count values for all the edges.
+static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
+ uint64_t Total = 0;
+ for (auto &E : Edges) {
+ if (E->Removed)
+ continue;
+ Total += E->CountValue;
+ }
+ return Total;
+}
+
+class PGOUseFunc {
+private:
+ Function &F;
+ Module *M;
+ // This member stores the shared information with class PGOGenFunc.
+ FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
+
+ // Return the auxiliary BB information.
+ UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+ return FuncInfo.getBBInfo(BB);
+ }
+
+ // The maximum count value in the profile. This is only used in PGO use
+ // compilation.
+ uint64_t ProgramMaxCount;
+
+ // Find the Instrumented BB and set the value.
+ void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+
+ // Set the edge counter value for the unknown edge -- there should be only
+ // one unknown edge.
+ void setEdgeCount(DirectEdges &Edges, uint64_t Value);
+
+ // Return FuncName string;
+ const std::string getFuncName() const { return FuncInfo.FuncName; }
+
+ // Set the hot/cold inline hints based on the count values.
+ // FIXME: This function should be removed once the functionality in
+ // the inliner is implemented.
+ void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
+ if (ProgramMaxCount == 0)
+ return;
+ // Threshold of the hot functions.
+ const BranchProbability HotFunctionThreshold(1, 100);
+ // Threshold of the cold functions.
+ const BranchProbability ColdFunctionThreshold(2, 10000);
+ if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
+ F.addFnAttr(llvm::Attribute::Cold);
+ }
+
+public:
+ PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr,
+ BlockFrequencyInfo *BFI = nullptr)
+ : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {}
+
+ // Read counts for the instrumented BB from profile.
+ bool readCounters(IndexedInstrProfReader *PGOReader);
+
+ // Populate the counts for all BBs.
+ void populateCounters();
+
+ // Set the branch weights based on the count values.
+ void setBranchWeights();
+};
+
+// Visit all the edges and assign the count value for the instrumented
+// edges and the BB.
+void PGOUseFunc::setInstrumentedCounts(
+ const std::vector<uint64_t> &CountFromProfile) {
+
+ // Use a worklist as we will update the vector during the iteration.
+ std::vector<PGOUseEdge *> WorkList;
+ for (auto &E : FuncInfo.MST.AllEdges)
+ WorkList.push_back(E.get());
+
+ uint32_t I = 0;
+ for (auto &E : WorkList) {
+ BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
+ if (!InstrBB)
+ continue;
+ uint64_t CountValue = CountFromProfile[I++];
+ if (!E->Removed) {
+ getBBInfo(InstrBB).setBBInfoCount(CountValue);
+ E->setEdgeCount(CountValue);
+ continue;
+ }
+
+ // Need to add two new edges.
+ BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+ BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+ // Add new edge of SrcBB->InstrBB.
+ PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
+ NewEdge.setEdgeCount(CountValue);
+ // Add new edge of InstrBB->DestBB.
+ PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
+ NewEdge1.setEdgeCount(CountValue);
+ NewEdge1.InMST = true;
+ getBBInfo(InstrBB).setBBInfoCount(CountValue);
+ }
+}
+
+// Set the count value for the unknown edge. There should be one and only one
+// unknown edge in Edges vector.
+void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
+ for (auto &E : Edges) {
+ if (E->CountValid)
+ continue;
+ E->setEdgeCount(Value);
+
+ getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ getBBInfo(E->DestBB).UnknownCountInEdge--;
+ return;
+ }
+ llvm_unreachable("Cannot find the unknown count edge");
+}
+
+// Read the profile from ProfileFileName and assign the value to the
+// instrumented BB and the edges. This function also updates ProgramMaxCount.
+// Return true if the profile are successfully read, and false on errors.
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
+ auto &Ctx = M->getContext();
+ ErrorOr<InstrProfRecord> Result =
+ PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
+ if (std::error_code EC = Result.getError()) {
+ if (EC == instrprof_error::unknown_function)
+ NumOfPGOMissing++;
+ else if (EC == instrprof_error::hash_mismatch ||
+ EC == llvm::instrprof_error::malformed)
+ NumOfPGOMismatch++;
+
+ std::string Msg = EC.message() + std::string(" ") + F.getName().str();
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ return false;
+ }
+ std::vector<uint64_t> &CountFromProfile = Result.get().Counts;
+
+ NumOfPGOFunc++;
+ DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+ uint64_t ValueSum = 0;
+ for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
+ DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
+ ValueSum += CountFromProfile[I];
+ }
+
+ DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
+
+ getBBInfo(nullptr).UnknownCountOutEdge = 2;
+ getBBInfo(nullptr).UnknownCountInEdge = 2;
+
+ setInstrumentedCounts(CountFromProfile);
+ ProgramMaxCount = PGOReader->getMaximumFunctionCount();
+ return true;
+}
+
+// Populate the counters from instrumented BBs to all BBs.
+// In the end of this operation, all BBs should have a valid count value.
+void PGOUseFunc::populateCounters() {
+ // First set up Count variable for all BBs.
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed)
+ continue;
+
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+ UseBBInfo &DestInfo = getBBInfo(DestBB);
+ SrcInfo.OutEdges.push_back(E.get());
+ DestInfo.InEdges.push_back(E.get());
+ SrcInfo.UnknownCountOutEdge++;
+ DestInfo.UnknownCountInEdge++;
+
+ if (!E->CountValid)
+ continue;
+ DestInfo.UnknownCountInEdge--;
+ SrcInfo.UnknownCountOutEdge--;
+ }
+
+ bool Changes = true;
+ unsigned NumPasses = 0;
+ while (Changes) {
+ NumPasses++;
+ Changes = false;
+
+ // For efficient traversal, it's better to start from the end as most
+ // of the instrumented edges are at the end.
+ for (auto &BB : reverse(F)) {
+ UseBBInfo &Count = getBBInfo(&BB);
+ if (!Count.CountValid) {
+ if (Count.UnknownCountOutEdge == 0) {
+ Count.CountValue = sumEdgeCount(Count.OutEdges);
+ Count.CountValid = true;
+ Changes = true;
+ } else if (Count.UnknownCountInEdge == 0) {
+ Count.CountValue = sumEdgeCount(Count.InEdges);
+ Count.CountValid = true;
+ Changes = true;
+ }
+ }
+ if (Count.CountValid) {
+ if (Count.UnknownCountOutEdge == 1) {
+ uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges);
+ setEdgeCount(Count.OutEdges, Total);
+ Changes = true;
+ }
+ if (Count.UnknownCountInEdge == 1) {
+ uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges);
+ setEdgeCount(Count.InEdges, Total);
+ Changes = true;
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
+ // Assert every BB has a valid counter.
+ uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
+ uint64_t FuncMaxCount = FuncEntryCount;
+ for (auto &BB : F) {
+ assert(getBBInfo(&BB).CountValid && "BB count is not valid");
+ uint64_t Count = getBBInfo(&BB).CountValue;
+ if (Count > FuncMaxCount)
+ FuncMaxCount = Count;
+ }
+ applyFunctionAttributes(FuncEntryCount, FuncMaxCount);
+
+ DEBUG(FuncInfo.dumpInfo("after reading profile."));
+}
+
+// Assign the scaled count values to the BB with multiple out edges.
+void PGOUseFunc::setBranchWeights() {
+ // Generate MD_prof metadata for every branch instruction.
+ DEBUG(dbgs() << "\nSetting branch weights.\n");
+ MDBuilder MDB(M->getContext());
+ for (auto &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
+ if (TI->getNumSuccessors() < 2)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+ if (getBBInfo(&BB).CountValue == 0)
+ continue;
+
+ // We have a non-zero Branch BB.
+ const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+ unsigned Size = BBCountInfo.OutEdges.size();
+ SmallVector<unsigned, 2> EdgeCounts(Size, 0);
+ uint64_t MaxCount = 0;
+ for (unsigned s = 0; s < Size; s++) {
+ const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ if (DestBB == 0)
+ continue;
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ uint64_t EdgeCount = E->CountValue;
+ if (EdgeCount > MaxCount)
+ MaxCount = EdgeCount;
+ EdgeCounts[SuccNum] = EdgeCount;
+ }
+ assert(MaxCount > 0 && "Bad max count");
+ uint64_t Scale = calculateCountScale(MaxCount);
+ SmallVector<unsigned, 4> Weights;
+ for (const auto &ECI : EdgeCounts)
+ Weights.push_back(scaleBranchCount(ECI, Scale));
+
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ DEBUG(dbgs() << "Weight is: ";
+ for (const auto &W : Weights) { dbgs() << W << " "; }
+ dbgs() << "\n";);
+ }
+}
+} // end anonymous namespace
+
+bool PGOInstrumentationGen::runOnModule(Module &M) {
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ BranchProbabilityInfo *BPI =
+ &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI());
+ BlockFrequencyInfo *BFI =
+ &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI());
+ instrumentOneFunc(F, &M, BPI, BFI);
+ }
+ return true;
+}
+
+static void setPGOCountOnFunc(PGOUseFunc &Func,
+ IndexedInstrProfReader *PGOReader) {
+ if (Func.readCounters(PGOReader)) {
+ Func.populateCounters();
+ Func.setBranchWeights();
+ }
+}
+
+bool PGOInstrumentationUse::runOnModule(Module &M) {
+ DEBUG(dbgs() << "Read in profile counters: ");
+ auto &Ctx = M.getContext();
+ // Read the counter array from file.
+ auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message()));
+ return false;
+ }
+
+ PGOReader = std::move(ReaderOrErr.get());
+ if (!PGOReader) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
+ "Cannot get PGOReader"));
+ return false;
+ }
+
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ BranchProbabilityInfo *BPI =
+ &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI());
+ BlockFrequencyInfo *BFI =
+ &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI());
+ PGOUseFunc Func(F, &M, BPI, BFI);
+ setPGOCountOnFunc(Func, PGOReader.get());
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
index 6b185a2..abed465 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
@@ -18,8 +18,9 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -37,6 +38,8 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -44,6 +47,17 @@ using namespace llvm;
#define DEBUG_TYPE "safestack"
+enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
+
+static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
+ cl::Hidden, cl::init(ThreadLocalUSP),
+ cl::desc("Type of storage for the unsafe stack pointer"),
+ cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
+ "Thread-local storage"),
+ clEnumValN(SingleThreadUSP, "single-thread",
+ "Non-thread-local storage"),
+ clEnumValEnd));
+
namespace llvm {
STATISTIC(NumFunctions, "Total number of functions");
@@ -54,118 +68,48 @@ STATISTIC(NumUnsafeStackRestorePointsFunctions,
STATISTIC(NumAllocas, "Total number of allocas");
STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
} // namespace llvm
namespace {
-/// Check whether a given alloca instruction (AI) should be put on the safe
-/// stack or not. The function analyzes all uses of AI and checks whether it is
-/// only accessed in a memory safe way (as decided statically).
-bool IsSafeStackAlloca(const AllocaInst *AI) {
- // Go through all uses of this alloca and check whether all accesses to the
- // allocated object are statically known to be memory safe and, hence, the
- // object can be placed on the safe stack.
-
- SmallPtrSet<const Value *, 16> Visited;
- SmallVector<const Instruction *, 8> WorkList;
- WorkList.push_back(AI);
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+///
+/// The implementation simply replaces all mentions of the alloca with zero.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+ const Value *AllocaPtr;
- // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
- while (!WorkList.empty()) {
- const Instruction *V = WorkList.pop_back_val();
- for (const Use &UI : V->uses()) {
- auto I = cast<const Instruction>(UI.getUser());
- assert(V == UI.get());
-
- switch (I->getOpcode()) {
- case Instruction::Load:
- // Loading from a pointer is safe.
- break;
- case Instruction::VAArg:
- // "va-arg" from a pointer is safe.
- break;
- case Instruction::Store:
- if (V == I->getOperand(0))
- // Stored the pointer - conservatively assume it may be unsafe.
- return false;
- // Storing to the pointee is safe.
- break;
-
- case Instruction::GetElementPtr:
- if (!cast<const GetElementPtrInst>(I)->hasAllConstantIndices())
- // GEP with non-constant indices can lead to memory errors.
- // This also applies to inbounds GEPs, as the inbounds attribute
- // represents an assumption that the address is in bounds, rather than
- // an assertion that it is.
- return false;
-
- // We assume that GEP on static alloca with constant indices is safe,
- // otherwise a compiler would detect it and warn during compilation.
-
- if (!isa<const ConstantInt>(AI->getArraySize()))
- // However, if the array size itself is not constant, the access
- // might still be unsafe at runtime.
- return false;
-
- /* fallthrough */
-
- case Instruction::BitCast:
- case Instruction::IntToPtr:
- case Instruction::PHI:
- case Instruction::PtrToInt:
- case Instruction::Select:
- // The object can be safe or not, depending on how the result of the
- // instruction is used.
- if (Visited.insert(I).second)
- WorkList.push_back(cast<const Instruction>(I));
- break;
-
- case Instruction::Call:
- case Instruction::Invoke: {
- // FIXME: add support for memset and memcpy intrinsics.
- ImmutableCallSite CS(I);
-
- // LLVM 'nocapture' attribute is only set for arguments whose address
- // is not stored, passed around, or used in any other non-trivial way.
- // We assume that passing a pointer to an object as a 'nocapture'
- // argument is safe.
- // FIXME: a more precise solution would require an interprocedural
- // analysis here, which would look at all uses of an argument inside
- // the function being called.
- ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
- for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
- if (A->get() == V && !CS.doesNotCapture(A - B))
- // The parameter is not marked 'nocapture' - unsafe.
- return false;
- continue;
- }
+public:
+ AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+ : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
- default:
- // The object is unsafe if it is used in any other way.
- return false;
- }
- }
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (Expr->getValue() == AllocaPtr)
+ return SE.getZero(Expr->getType());
+ return Expr;
}
+};
- // All uses of the alloca are safe, we can place it on the safe stack.
- return true;
-}
-
-/// The SafeStack pass splits the stack of each function into the
-/// safe stack, which is only accessed through memory safe dereferences
-/// (as determined statically), and the unsafe stack, which contains all
-/// local variables that are accessed in unsafe ways.
+/// The SafeStack pass splits the stack of each function into the safe
+/// stack, which is only accessed through memory safe dereferences (as
+/// determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in ways that we can't prove to
+/// be safe.
class SafeStack : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLoweringBase *TL;
const DataLayout *DL;
+ ScalarEvolution *SE;
Type *StackPtrTy;
Type *IntPtrTy;
Type *Int32Ty;
Type *Int8Ty;
- Constant *UnsafeStackPtr = nullptr;
+ Value *UnsafeStackPtr = nullptr;
/// Unsafe stack alignment. Each stack frame must ensure that the stack is
/// aligned to this value. We need to re-align the unsafe stack if the
@@ -175,26 +119,31 @@ class SafeStack : public FunctionPass {
/// might expect to appear on the stack on most common targets.
enum { StackAlignment = 16 };
- /// \brief Build a constant representing a pointer to the unsafe stack
- /// pointer.
- Constant *getOrCreateUnsafeStackPtr(Module &M);
+ /// \brief Build a value representing a pointer to the unsafe stack pointer.
+ Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
/// \brief Find all static allocas, dynamic allocas, return instructions and
/// stack restore points (exception unwind blocks and setjmp calls) in the
/// given function and append them to the respective vectors.
void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
SmallVectorImpl<ReturnInst *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints);
+ /// \brief Calculate the allocation size of a given alloca. Returns 0 if the
+ /// size can not be statically determined.
+ uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
+
/// \brief Allocate space for all static allocas in \p StaticAllocas,
/// replace allocas with pointers into the unsafe stack and generate code to
/// restore the stack pointer before all return instructions in \p Returns.
///
/// \returns A pointer to the top of the unsafe stack after all unsafe static
/// allocas are allocated.
- Value *moveStaticAllocasToUnsafeStack(Function &F,
+ Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments,
ArrayRef<ReturnInst *> Returns);
/// \brief Generate code to restore the stack after all stack restore points
@@ -203,7 +152,7 @@ class SafeStack : public FunctionPass {
/// \returns A local variable in which to maintain the dynamic top of the
/// unsafe stack if needed.
AllocaInst *
- createStackRestorePoints(Function &F,
+ createStackRestorePoints(IRBuilder<> &IRB, Function &F,
ArrayRef<Instruction *> StackRestorePoints,
Value *StaticTop, bool NeedDynamicTop);
@@ -214,17 +163,26 @@ class SafeStack : public FunctionPass {
AllocaInst *DynamicTop,
ArrayRef<AllocaInst *> DynamicAllocas);
+ bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize);
+
+ bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr, uint64_t AllocaSize);
+ bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
+ uint64_t AllocaSize);
+
public:
static char ID; // Pass identification, replacement for typeid.
- SafeStack() : FunctionPass(ID), DL(nullptr) {
+ SafeStack(const TargetMachine *TM)
+ : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
initializeSafeStackPass(*PassRegistry::getPassRegistry());
}
+ SafeStack() : SafeStack(nullptr) {}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
- virtual bool doInitialization(Module &M) {
+ bool doInitialization(Module &M) override {
DL = &M.getDataLayout();
StackPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -235,51 +193,203 @@ public:
return false;
}
- bool runOnFunction(Function &F);
-
+ bool runOnFunction(Function &F) override;
}; // class SafeStack
-Constant *SafeStack::getOrCreateUnsafeStackPtr(Module &M) {
- // The unsafe stack pointer is stored in a global variable with a magic name.
- const char *kUnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
+ uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
+ const Value *AllocaPtr, uint64_t AllocaSize) {
+ AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
+
+ uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+ ConstantRange SizeRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ ConstantRange AllocaRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
+ bool Safe = AllocaRange.contains(AccessRange);
+
+ DEBUG(dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << " Access " << *Addr << "\n"
+ << " SCEV " << *Expr
+ << " U: " << SE->getUnsignedRange(Expr)
+ << ", S: " << SE->getSignedRange(Expr) << "\n"
+ << " Range " << AccessRange << "\n"
+ << " AllocaRange " << AllocaRange << "\n"
+ << " " << (Safe ? "safe" : "unsafe") << "\n");
+
+ return Safe;
+}
+
+bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr,
+ uint64_t AllocaSize) {
+ // All MemIntrinsics have destination address in Arg0 and size in Arg2.
+ if (MI->getRawDest() != U) return true;
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len) return false;
+ return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize);
+}
+
+/// Check whether a given allocation must be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
+ // Go through all uses of this alloca and check whether all accesses to the
+ // allocated object are statically known to be memory safe and, hence, the
+ // object can be placed on the safe stack.
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AllocaPtr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load: {
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+ AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store: {
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n store of address: " << *I << "\n");
+ return false;
+ }
+
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+ AllocaPtr, AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::Ret: {
+ // Information leak.
+ return false;
+ }
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ ImmutableCallSite CS(I);
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe memintrinsic: " << *I
+ << "\n");
+ return false;
+ }
+ continue;
+ }
+ // LLVM 'nocapture' attribute is only set for arguments whose address
+ // is not stored, passed around, or used in any other non-trivial way.
+ // We assume that passing a pointer to an object as a 'nocapture
+ // readnone' argument is safe.
+ // FIXME: a more precise solution would require an interprocedural
+ // analysis here, which would look at all uses of an argument inside
+ // the function being called.
+ ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V)
+ if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
+ CS.doesNotAccessMemory()))) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe call: " << *I << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ // All uses of the alloca are safe, we can place it on the safe stack.
+ return true;
+}
+
+Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
+ // Check if there is a target-specific location for the unsafe stack pointer.
+ if (TL)
+ if (Value *V = TL->getSafeStackPointerLocation(IRB))
+ return V;
+
+ // Otherwise, assume the target links with compiler-rt, which provides a
+ // thread-local variable with a magic name.
+ Module &M = *F.getParent();
+ const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
auto UnsafeStackPtr =
- dyn_cast_or_null<GlobalVariable>(M.getNamedValue(kUnsafeStackPtrVar));
+ dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
+
+ bool UseTLS = USPStorage == ThreadLocalUSP;
if (!UnsafeStackPtr) {
+ auto TLSModel = UseTLS ?
+ GlobalValue::InitialExecTLSModel :
+ GlobalValue::NotThreadLocal;
// The global variable is not defined yet, define it ourselves.
- // We use the initial-exec TLS model because we do not support the variable
- // living anywhere other than in the main executable.
+ // We use the initial-exec TLS model because we do not support the
+ // variable living anywhere other than in the main executable.
UnsafeStackPtr = new GlobalVariable(
- /*Module=*/M, /*Type=*/StackPtrTy,
- /*isConstant=*/false, /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Initializer=*/0, /*Name=*/kUnsafeStackPtrVar,
- /*InsertBefore=*/nullptr,
- /*ThreadLocalMode=*/GlobalValue::InitialExecTLSModel);
+ M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+ UnsafeStackPtrVar, nullptr, TLSModel);
} else {
// The variable exists, check its type and attributes.
- if (UnsafeStackPtr->getValueType() != StackPtrTy) {
- report_fatal_error(Twine(kUnsafeStackPtrVar) + " must have void* type");
- }
-
- if (!UnsafeStackPtr->isThreadLocal()) {
- report_fatal_error(Twine(kUnsafeStackPtrVar) + " must be thread-local");
- }
+ if (UnsafeStackPtr->getValueType() != StackPtrTy)
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+ if (UseTLS != UnsafeStackPtr->isThreadLocal())
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+ (UseTLS ? "" : "not ") + "be thread-local");
}
-
return UnsafeStackPtr;
}
void SafeStack::findInsts(Function &F,
SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
SmallVectorImpl<ReturnInst *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints) {
- for (Instruction &I : inst_range(&F)) {
+ for (Instruction &I : instructions(&F)) {
if (auto AI = dyn_cast<AllocaInst>(&I)) {
++NumAllocas;
- if (IsSafeStackAlloca(AI))
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (IsSafeStackAlloca(AI, Size))
continue;
if (AI->isStaticAlloca()) {
@@ -304,19 +414,26 @@ void SafeStack::findInsts(Function &F,
"gcroot intrinsic not compatible with safestack attribute");
}
}
+ for (Argument &Arg : F.args()) {
+ if (!Arg.hasByValAttr())
+ continue;
+ uint64_t Size =
+ DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+ if (IsSafeStackAlloca(&Arg, Size))
+ continue;
+
+ ++NumUnsafeByValArguments;
+ ByValArguments.push_back(&Arg);
+ }
}
AllocaInst *
-SafeStack::createStackRestorePoints(Function &F,
+SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
ArrayRef<Instruction *> StackRestorePoints,
Value *StaticTop, bool NeedDynamicTop) {
if (StackRestorePoints.empty())
return nullptr;
- IRBuilder<> IRB(StaticTop
- ? cast<Instruction>(StaticTop)->getNextNode()
- : (Instruction *)F.getEntryBlock().getFirstInsertionPt());
-
// We need the current value of the shadow stack pointer to restore
// after longjmp or exception catching.
@@ -342,7 +459,7 @@ SafeStack::createStackRestorePoints(Function &F,
for (Instruction *I : StackRestorePoints) {
++NumUnsafeStackRestorePoints;
- IRB.SetInsertPoint(cast<Instruction>(I->getNextNode()));
+ IRB.SetInsertPoint(I->getNextNode());
Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
IRB.CreateStore(CurrentTop, UnsafeStackPtr);
}
@@ -350,14 +467,12 @@ SafeStack::createStackRestorePoints(Function &F,
return DynamicTop;
}
-Value *
-SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
- ArrayRef<AllocaInst *> StaticAllocas,
- ArrayRef<ReturnInst *> Returns) {
- if (StaticAllocas.empty())
+Value *SafeStack::moveStaticAllocasToUnsafeStack(
+ IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns) {
+ if (StaticAllocas.empty() && ByValArguments.empty())
return nullptr;
- IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
DIBuilder DIB(*F.getParent());
// We explicitly compute and set the unsafe stack layout for all unsafe
@@ -377,6 +492,13 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
// Compute maximum alignment among static objects on the unsafe stack.
unsigned MaxAlignment = 0;
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getType()->getPointerElementType();
+ unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ Arg->getParamAlignment());
+ if (Align > MaxAlignment)
+ MaxAlignment = Align;
+ }
for (AllocaInst *AI : StaticAllocas) {
Type *Ty = AI->getAllocatedType();
unsigned Align =
@@ -388,22 +510,51 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
if (MaxAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
assert(isPowerOf2_32(MaxAlignment));
- IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+ IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))),
StackPtrTy));
}
- // Allocate space for every unsafe static AllocaInst on the unsafe stack.
int64_t StaticOffset = 0; // Current stack top.
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getType()->getPointerElementType();
+
+ uint64_t Size = DL->getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ Arg->getParamAlignment());
+
+ // Add alignment.
+ // NOTE: we ensure that BasePointer itself is aligned to >= Align.
+ StaticOffset += Size;
+ StaticOffset = RoundUpToAlignment(StaticOffset, Align);
+
+ Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -StaticOffset));
+ Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
+ Arg->getName() + ".unsafe-byval");
+
+ // Replace alloc with the new location.
+ replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
+ /*Deref=*/true, -StaticOffset);
+ Arg->replaceAllUsesWith(NewArg);
+ IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
+ IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
+ }
+
+ // Allocate space for every unsafe static AllocaInst on the unsafe stack.
for (AllocaInst *AI : StaticAllocas) {
IRB.SetInsertPoint(AI);
- auto CArraySize = cast<ConstantInt>(AI->getArraySize());
Type *Ty = AI->getAllocatedType();
-
- uint64_t Size = DL->getTypeAllocSize(Ty) * CArraySize->getZExtValue();
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
@@ -423,7 +574,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
cast<Instruction>(NewAI)->takeName(AI);
// Replace alloc with the new location.
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -StaticOffset);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -434,7 +585,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment);
// Update shadow stack pointer in the function epilogue.
- IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+ IRB.SetInsertPoint(BasePointer->getNextNode());
Value *StaticTop =
IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset),
@@ -478,7 +629,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (DynamicTop)
IRB.CreateStore(NewTop, DynamicTop);
- Value *NewAI = IRB.CreateIntToPtr(SP, AI->getType());
+ Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType());
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
@@ -513,8 +664,6 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
}
bool SafeStack::runOnFunction(Function &F) {
- auto AA = &getAnalysis<AliasAnalysis>();
-
DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
if (!F.hasFnAttribute(Attribute::SafeStack)) {
@@ -529,6 +678,9 @@ bool SafeStack::runOnFunction(Function &F) {
return false;
}
+ TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
{
// Make sure the regular stack protector won't run on this function
// (safestack attribute takes precedence).
@@ -541,16 +693,11 @@ bool SafeStack::runOnFunction(Function &F) {
AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B));
}
- if (AA->onlyReadsMemory(&F)) {
- // XXX: we don't protect against information leak attacks for now.
- DEBUG(dbgs() << "[SafeStack] function only reads memory\n");
- return false;
- }
-
++NumFunctions;
SmallVector<AllocaInst *, 16> StaticAllocas;
SmallVector<AllocaInst *, 4> DynamicAllocas;
+ SmallVector<Argument *, 4> ByValArguments;
SmallVector<ReturnInst *, 4> Returns;
// Collect all points where stack gets unwound and needs to be restored
@@ -562,23 +709,26 @@ bool SafeStack::runOnFunction(Function &F) {
// Find all static and dynamic alloca instructions that must be moved to the
// unsafe stack, all return instructions and stack restore points.
- findInsts(F, StaticAllocas, DynamicAllocas, Returns, StackRestorePoints);
+ findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns,
+ StackRestorePoints);
if (StaticAllocas.empty() && DynamicAllocas.empty() &&
- StackRestorePoints.empty())
+ ByValArguments.empty() && StackRestorePoints.empty())
return false; // Nothing to do in this function.
- if (!StaticAllocas.empty() || !DynamicAllocas.empty())
+ if (!StaticAllocas.empty() || !DynamicAllocas.empty() ||
+ !ByValArguments.empty())
++NumUnsafeStackFunctions; // This function has the unsafe stack.
if (!StackRestorePoints.empty())
++NumUnsafeStackRestorePointsFunctions;
- if (!UnsafeStackPtr)
- UnsafeStackPtr = getOrCreateUnsafeStackPtr(*F.getParent());
+ IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
// The top of the unsafe stack after all unsafe static allocas are allocated.
- Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns);
+ Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas,
+ ByValArguments, Returns);
// Safe stack object that stores the current unsafe stack top. It is updated
// as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
@@ -587,7 +737,7 @@ bool SafeStack::runOnFunction(Function &F) {
// FIXME: a better alternative might be to store the unsafe stack pointer
// before setjmp / invoke instructions.
AllocaInst *DynamicTop = createStackRestorePoints(
- F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+ IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
// Handle dynamic allocas.
moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
@@ -597,13 +747,14 @@ bool SafeStack::runOnFunction(Function &F) {
return true;
}
-} // end anonymous namespace
+} // anonymous namespace
char SafeStack::ID = 0;
-INITIALIZE_PASS_BEGIN(SafeStack, "safe-stack",
- "Safe Stack instrumentation pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(SafeStack, "safe-stack", "Safe Stack instrumentation pass",
- false, false)
+INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
-FunctionPass *llvm::createSafeStackPass() { return new SafeStack(); }
+FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
+ return new SafeStack(TM);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 7a5b4cb..09de7a2 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -31,6 +31,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -59,6 +60,7 @@ static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp";
+static const char *const kSanCovTraceSwitch = "__sanitizer_cov_trace_switch";
static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
static const uint64_t kSanCtorAndDtorPriority = 2;
@@ -148,19 +150,25 @@ class SanitizerCoverageModule : public ModulePass {
void InjectCoverageForIndirectCalls(Function &F,
ArrayRef<Instruction *> IndirCalls);
void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+ void InjectTraceForSwitch(Function &F,
+ ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
void SetNoSanitizeMetadata(Instruction *I);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
unsigned NumberOfInstrumentedBlocks() {
- return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses();
+ return SanCovFunction->getNumUses() +
+ SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
+ SanCovTraceEnter->getNumUses();
}
Function *SanCovFunction;
Function *SanCovWithCheckFunction;
Function *SanCovIndirCallFunction;
Function *SanCovTraceEnter, *SanCovTraceBB;
Function *SanCovTraceCmpFunction;
+ Function *SanCovTraceSwitchFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy, *Int64Ty;
+ Type *IntptrTy, *Int64Ty, *Int64PtrTy;
+ Module *CurModule;
LLVMContext *C;
const DataLayout *DL;
@@ -177,11 +185,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
return false;
C = &(M.getContext());
DL = &M.getDataLayout();
+ CurModule = &M;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
Int64Ty = IRB.getInt64Ty();
SanCovFunction = checkSanitizerInterfaceFunction(
@@ -194,18 +204,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
SanCovTraceCmpFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+ SanCovTraceSwitchFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kSanCovTraceSwitch, VoidTy, Int64Ty, Int64PtrTy, nullptr));
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
- if (Options.TraceBB) {
- SanCovTraceEnter = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
- SanCovTraceBB = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
- }
+ SanCovTraceEnter = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
+ SanCovTraceBB = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
@@ -280,11 +291,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (F.empty()) return false;
if (F.getName().find(".module_ctor") != std::string::npos)
return false; // Should not instrument sanitizer init functions.
+ // Don't instrument functions using SEH for now. Splitting basic blocks like
+ // we do for coverage breaks WinEHPrepare.
+ // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+ if (F.hasPersonalityFn() &&
+ isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ return false;
if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
SplitAllCriticalEdges(F);
SmallVector<Instruction*, 8> IndirCalls;
SmallVector<BasicBlock*, 16> AllBlocks;
SmallVector<Instruction*, 8> CmpTraceTargets;
+ SmallVector<Instruction*, 8> SwitchTraceTargets;
for (auto &BB : F) {
AllBlocks.push_back(&BB);
for (auto &Inst : BB) {
@@ -293,13 +311,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (CS && !CS.getCalledFunction())
IndirCalls.push_back(&Inst);
}
- if (Options.TraceCmp && isa<ICmpInst>(&Inst))
- CmpTraceTargets.push_back(&Inst);
+ if (Options.TraceCmp) {
+ if (isa<ICmpInst>(&Inst))
+ CmpTraceTargets.push_back(&Inst);
+ if (isa<SwitchInst>(&Inst))
+ SwitchTraceTargets.push_back(&Inst);
+ }
}
}
InjectCoverage(F, AllBlocks);
InjectCoverageForIndirectCalls(F, IndirCalls);
InjectTraceForCmp(F, CmpTraceTargets);
+ InjectTraceForSwitch(F, SwitchTraceTargets);
return true;
}
@@ -348,6 +371,45 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
}
}
+// For every switch statement we insert a call:
+// __sanitizer_cov_trace_switch(CondValue,
+// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
+
+void SanitizerCoverageModule::InjectTraceForSwitch(
+ Function &F, ArrayRef<Instruction *> SwitchTraceTargets) {
+ for (auto I : SwitchTraceTargets) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ IRBuilder<> IRB(I);
+ SmallVector<Constant *, 16> Initializers;
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->getScalarSizeInBits() >
+ Int64Ty->getScalarSizeInBits())
+ continue;
+ Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases()));
+ Initializers.push_back(
+ ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits()));
+ if (Cond->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
+ for (auto It: SI->cases()) {
+ Constant *C = It.getCaseValue();
+ if (C->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+ Initializers.push_back(C);
+ }
+ ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
+ GlobalVariable *GV = new GlobalVariable(
+ *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
+ ConstantArray::get(ArrayOfInt64Ty, Initializers),
+ "__sancov_gen_cov_switch_values");
+ IRB.CreateCall(SanCovTraceSwitchFunction,
+ {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+ }
+ }
+}
+
+
void SanitizerCoverageModule::InjectTraceForCmp(
Function &F, ArrayRef<Instruction *> CmpTraceTargets) {
for (auto I : CmpTraceTargets) {
@@ -369,8 +431,7 @@ void SanitizerCoverageModule::InjectTraceForCmp(
void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
I->setMetadata(
- I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"),
- MDNode::get(*C, None));
+ I->getModule()->getMDKindID("nosanitize"), MDNode::get(*C, None));
}
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
@@ -382,34 +443,31 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
// locations.
if (isa<UnreachableInst>(BB.getTerminator()))
return;
- BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
- // Skip static allocas at the top of the entry block so they don't become
- // dynamic when we split the block. If we used our optimized stack layout,
- // then there will only be one alloca and it will come first.
- for (; IP != BE; ++IP) {
- AllocaInst *AI = dyn_cast<AllocaInst>(IP);
- if (!AI || !AI->isStaticAlloca())
- break;
- }
+ BasicBlock::iterator IP = BB.getFirstInsertionPt();
bool IsEntryBB = &BB == &F.getEntryBlock();
DebugLoc EntryLoc;
if (IsEntryBB) {
if (auto SP = getDISubprogram(&F))
EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ // Keep static allocas and llvm.localescape calls in the entry block. Even
+ // if we aren't splitting the block, it's nice for allocas to be before
+ // calls.
+ IP = PrepareToSplitEntryBlock(BB, IP);
} else {
EntryLoc = IP->getDebugLoc();
}
- IRBuilder<> IRB(IP);
+ IRBuilder<> IRB(&*IP);
IRB.SetCurrentDebugLocation(EntryLoc);
- SmallVector<Value *, 1> Indices;
Value *GuardP = IRB.CreateAdd(
IRB.CreatePointerCast(GuardArray, IntptrTy),
ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
- if (UseCalls) {
+ if (Options.TraceBB) {
+ IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
+ } else if (UseCalls) {
IRB.CreateCall(SanCovWithCheckFunction, GuardP);
} else {
LoadInst *Load = IRB.CreateLoad(GuardP);
@@ -418,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
SetNoSanitizeMetadata(Load);
Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
Instruction *Ins = SplitBlockAndInsertIfThen(
- Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(Ins);
IRB.SetCurrentDebugLocation(EntryLoc);
// __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
@@ -427,7 +485,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
if (Options.Use8bitCounters) {
- IRB.SetInsertPoint(IP);
+ IRB.SetInsertPoint(&*IP);
Value *P = IRB.CreateAdd(
IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
@@ -438,13 +496,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
SetNoSanitizeMetadata(LI);
SetNoSanitizeMetadata(SI);
}
-
- if (Options.TraceBB) {
- // Experimental support for tracing.
- // Insert a callback with the same guard variable as used for coverage.
- IRB.SetInsertPoint(IP);
- IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
- }
}
char SanitizerCoverageModule::ID = 0;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 1a46bbb..9331e1d 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -142,37 +142,35 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
- const size_t ByteSize = 1 << i;
- const size_t BitSize = ByteSize * 8;
- SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
+ const unsigned ByteSize = 1U << i;
+ const unsigned BitSize = ByteSize * 8;
+ std::string ByteSizeStr = utostr(ByteSize);
+ std::string BitSizeStr = utostr(BitSize);
+ SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
+ SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
- itostr(ByteSize));
+ SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
TsanUnalignedRead[i] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
- itostr(ByteSize));
+ SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
TsanUnalignedWrite[i] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
- SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
- "_load");
+ SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
- SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
- "_store");
+ SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
@@ -201,7 +199,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
}
- SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
+ SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
"_compare_exchange_val");
TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
@@ -513,8 +511,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -527,8 +525,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -544,8 +542,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
if (!F)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -558,8 +556,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- const size_t ByteSize = 1 << Idx;
- const size_t BitSize = ByteSize * 8;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4edd029..9d78e5a 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -49,7 +49,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
assert(CS && "Only calls can alter reference counts!");
// See if AliasAnalysis can help us with the call.
- AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
if (AliasAnalysis::onlyReadsMemory(MRB))
return false;
if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
@@ -226,7 +226,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
SmallPtrSetImpl<Instruction *> &DependingInsts,
SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA) {
- BasicBlock::iterator StartPos = StartInst;
+ BasicBlock::iterator StartPos = StartInst->getIterator();
SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
Worklist.push_back(std::make_pair(StartBB, StartPos));
@@ -252,7 +252,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
break;
}
- Instruction *Inst = --LocalStartPos;
+ Instruction *Inst = &*--LocalStartPos;
if (Depends(Flavor, Inst, Arg, PA)) {
DependingInsts.insert(Inst);
break;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 6ea038b..d860723 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -26,18 +26,10 @@ namespace llvm {
using namespace llvm;
using namespace llvm::objcarc;
-/// \brief A handy option to enable/disable all ARC Optimizations.
-bool llvm::objcarc::EnableARCOpts;
-static cl::opt<bool, true>
-EnableARCOptimizations("enable-objc-arc-opts",
- cl::desc("enable/disable all ARC Optimizations"),
- cl::location(EnableARCOpts),
- cl::init(true));
-
/// initializeObjCARCOptsPasses - Initialize all passes linked into the
/// ObjCARCOpts library.
void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
- initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCAAWrapperPassPass(Registry);
initializeObjCARCAPElimPass(Registry);
initializeObjCARCExpandPass(Registry);
initializeObjCARCContractPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 7595e2d..5fd45b0 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -26,6 +26,8 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
@@ -34,7 +36,6 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "ARCInstKind.h"
namespace llvm {
class raw_ostream;
@@ -43,99 +44,6 @@ class raw_ostream;
namespace llvm {
namespace objcarc {
-/// \brief A handy option to enable/disable all ARC Optimizations.
-extern bool EnableARCOpts;
-
-/// \brief Test if the given module looks interesting to run ARC optimization
-/// on.
-static inline bool ModuleHasARC(const Module &M) {
- return
- M.getNamedValue("objc_retain") ||
- M.getNamedValue("objc_release") ||
- M.getNamedValue("objc_autorelease") ||
- M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
- M.getNamedValue("objc_retainBlock") ||
- M.getNamedValue("objc_autoreleaseReturnValue") ||
- M.getNamedValue("objc_autoreleasePoolPush") ||
- M.getNamedValue("objc_loadWeakRetained") ||
- M.getNamedValue("objc_loadWeak") ||
- M.getNamedValue("objc_destroyWeak") ||
- M.getNamedValue("objc_storeWeak") ||
- M.getNamedValue("objc_initWeak") ||
- M.getNamedValue("objc_moveWeak") ||
- M.getNamedValue("objc_copyWeak") ||
- M.getNamedValue("objc_retainedObject") ||
- M.getNamedValue("objc_unretainedObject") ||
- M.getNamedValue("objc_unretainedPointer") ||
- M.getNamedValue("clang.arc.use");
-}
-
-/// \brief This is a wrapper around getUnderlyingObject which also knows how to
-/// look through objc_retain and objc_autorelease calls, which we know to return
-/// their argument verbatim.
-static inline const Value *GetUnderlyingObjCPtr(const Value *V,
- const DataLayout &DL) {
- for (;;) {
- V = GetUnderlyingObject(V, DL);
- if (!IsForwarding(GetBasicARCInstKind(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
-
- return V;
-}
-
-/// The RCIdentity root of a value \p V is a dominating value U for which
-/// retaining or releasing U is equivalent to retaining or releasing V. In other
-/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
-///
-/// We use this in the ARC optimizer to make it easier to match up ARC
-/// operations by always mapping ARC operations to RCIdentityRoots instead of
-/// pointers themselves.
-///
-/// The two ways that we see RCIdentical values in ObjC are via:
-///
-/// 1. PointerCasts
-/// 2. Forwarding Calls that return their argument verbatim.
-///
-/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
-/// This implies that two RCIdentical values must alias.
-static inline const Value *GetRCIdentityRoot(const Value *V) {
- for (;;) {
- V = V->stripPointerCasts();
- if (!IsForwarding(GetBasicARCInstKind(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
- return V;
-}
-
-/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
-/// casts away the const of the result. For documentation about what an
-/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
-/// function.
-static inline Value *GetRCIdentityRoot(Value *V) {
- return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
-}
-
-/// \brief Assuming the given instruction is one of the special calls such as
-/// objc_retain or objc_release, return the RCIdentity root of the argument of
-/// the call.
-static inline Value *GetArgRCIdentityRoot(Value *Inst) {
- return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
-}
-
-static inline bool IsNullOrUndef(const Value *V) {
- return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
-}
-
-static inline bool IsNoopInstruction(const Instruction *I) {
- return isa<BitCastInst>(I) ||
- (isa<GetElementPtrInst>(I) &&
- cast<GetElementPtrInst>(I)->hasAllZeroIndices());
-}
-
-
/// \brief Erase the given instruction.
///
/// Many ObjC calls return their argument verbatim,
@@ -162,152 +70,6 @@ static inline void EraseInstruction(Instruction *CI) {
RecursivelyDeleteTriviallyDeadInstructions(OldArg);
}
-/// \brief Test whether the given value is possible a retainable object pointer.
-static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
- // Pointers to static or stack storage are not valid retainable object
- // pointers.
- if (isa<Constant>(Op) || isa<AllocaInst>(Op))
- return false;
- // Special arguments can not be a valid retainable object pointer.
- if (const Argument *Arg = dyn_cast<Argument>(Op))
- if (Arg->hasByValAttr() ||
- Arg->hasInAllocaAttr() ||
- Arg->hasNestAttr() ||
- Arg->hasStructRetAttr())
- return false;
- // Only consider values with pointer types.
- //
- // It seemes intuitive to exclude function pointer types as well, since
- // functions are never retainable object pointers, however clang occasionally
- // bitcasts retainable object pointers to function-pointer type temporarily.
- PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty)
- return false;
- // Conservatively assume anything else is a potential retainable object
- // pointer.
- return true;
-}
-
-static inline bool IsPotentialRetainableObjPtr(const Value *Op,
- AliasAnalysis &AA) {
- // First make the rudimentary check.
- if (!IsPotentialRetainableObjPtr(Op))
- return false;
-
- // Objects in constant memory are not reference-counted.
- if (AA.pointsToConstantMemory(Op))
- return false;
-
- // Pointers in constant memory are not pointing to reference-counted objects.
- if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
- if (AA.pointsToConstantMemory(LI->getPointerOperand()))
- return false;
-
- // Otherwise assume the worst.
- return true;
-}
-
-/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
-/// is.
-static inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
- for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- if (IsPotentialRetainableObjPtr(*I))
- return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
-
- return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
-}
-
-/// \brief Return true if this value refers to a distinct and identifiable
-/// object.
-///
-/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
-/// special knowledge of ObjC conventions.
-static inline bool IsObjCIdentifiedObject(const Value *V) {
- // Assume that call results and arguments have their own "provenance".
- // Constants (including GlobalVariables) and Allocas are never
- // reference-counted.
- if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
- isa<Argument>(V) || isa<Constant>(V) ||
- isa<AllocaInst>(V))
- return true;
-
- if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
- const Value *Pointer =
- GetRCIdentityRoot(LI->getPointerOperand());
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
- // A constant pointer can't be pointing to an object on the heap. It may
- // be reference-counted, but it won't be deleted.
- if (GV->isConstant())
- return true;
- StringRef Name = GV->getName();
- // These special variables are known to hold values which are not
- // reference-counted pointers.
- if (Name.startswith("\01l_objc_msgSend_fixup_"))
- return true;
-
- StringRef Section = GV->getSection();
- if (Section.find("__message_refs") != StringRef::npos ||
- Section.find("__objc_classrefs") != StringRef::npos ||
- Section.find("__objc_superrefs") != StringRef::npos ||
- Section.find("__objc_methname") != StringRef::npos ||
- Section.find("__cstring") != StringRef::npos)
- return true;
- }
- }
-
- return false;
-}
-
-enum class ARCMDKindID {
- ImpreciseRelease,
- CopyOnEscape,
- NoObjCARCExceptions,
-};
-
-/// A cache of MDKinds used by various ARC optimizations.
-class ARCMDKindCache {
- Module *M;
-
- /// The Metadata Kind for clang.imprecise_release metadata.
- llvm::Optional<unsigned> ImpreciseReleaseMDKind;
-
- /// The Metadata Kind for clang.arc.copy_on_escape metadata.
- llvm::Optional<unsigned> CopyOnEscapeMDKind;
-
- /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
- llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
-
-public:
- void init(Module *Mod) {
- M = Mod;
- ImpreciseReleaseMDKind = NoneType::None;
- CopyOnEscapeMDKind = NoneType::None;
- NoObjCARCExceptionsMDKind = NoneType::None;
- }
-
- unsigned get(ARCMDKindID ID) {
- switch (ID) {
- case ARCMDKindID::ImpreciseRelease:
- if (!ImpreciseReleaseMDKind)
- ImpreciseReleaseMDKind =
- M->getContext().getMDKindID("clang.imprecise_release");
- return *ImpreciseReleaseMDKind;
- case ARCMDKindID::CopyOnEscape:
- if (!CopyOnEscapeMDKind)
- CopyOnEscapeMDKind =
- M->getContext().getMDKindID("clang.arc.copy_on_escape");
- return *CopyOnEscapeMDKind;
- case ARCMDKindID::NoObjCARCExceptions:
- if (!NoObjCARCExceptionsMDKind)
- NoObjCARCExceptionsMDKind =
- M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
- return *NoObjCARCExceptionsMDKind;
- }
- llvm_unreachable("Covered switch isn't covered?!");
- }
-};
-
} // end namespace objcarc
} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index d318643..969e77c 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -72,12 +72,9 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
if (const Function *Callee = CS.getCalledFunction()) {
if (Callee->isDeclaration() || Callee->mayBeOverridden())
return true;
- for (Function::const_iterator I = Callee->begin(), E = Callee->end();
- I != E; ++I) {
- const BasicBlock *BB = I;
- for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
- J != F; ++J)
- if (ImmutableCallSite JCS = ImmutableCallSite(J))
+ for (const BasicBlock &BB : *Callee) {
+ for (const Instruction &I : BB)
+ if (ImmutableCallSite JCS = ImmutableCallSite(&I))
// This recursion depth limit is arbitrary. It's just great
// enough to cover known interesting testcases.
if (Depth < 3 &&
@@ -96,7 +93,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
Instruction *Push = nullptr;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
switch (GetBasicARCInstKind(Inst)) {
case ARCInstKind::AutoreleasepoolPush:
Push = Inst;
@@ -169,7 +166,7 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
if (std::next(F->begin()) != F->end())
continue;
// Ok, a single-block constructor function definition. Try to optimize it.
- Changed |= OptimizeBB(F->begin());
+ Changed |= OptimizeBB(&F->front());
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
deleted file mode 100644
index eecc82f..0000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- C++ -*-----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
-/// of Objective C to enhance other optimization passes which rely on the Alias
-/// Analysis infrastructure.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Pass.h"
-
-namespace llvm {
-namespace objcarc {
-
- /// \brief This is a simple alias analysis implementation that uses knowledge
- /// of ARC constructs to answer queries.
- ///
- /// TODO: This class could be generalized to know about other ObjC-specific
- /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
- /// even though their offsets are dynamic.
- class ObjCARCAliasAnalysis : public ImmutablePass,
- public AliasAnalysis {
- public:
- static char ID; // Class identification, replacement for typeinfo
- ObjCARCAliasAnalysis() : ImmutablePass(ID) {
- initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- bool doInitialization(Module &M) override;
-
- /// This method is used when a pass implements an analysis interface through
- /// multiple inheritance. If needed, it should override this to adjust the
- /// this pointer as needed for the specified pass info.
- void *getAdjustedAnalysisPointer(const void *PI) override {
- if (PI == &AliasAnalysis::ID)
- return static_cast<AliasAnalysis *>(this);
- return this;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) override;
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
- ModRefBehavior getModRefBehavior(const Function *F) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override;
- };
-
-} // namespace objcarc
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index baca76b..1cdf568 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -119,9 +119,9 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
return false;
// Check that the call is next to the retain.
- BasicBlock::const_iterator I = Call;
- ++I;
- while (IsNoopInstruction(I)) ++I;
+ BasicBlock::const_iterator I = ++Call->getIterator();
+ while (IsNoopInstruction(&*I))
+ ++I;
if (&*I != Retain)
return false;
@@ -247,7 +247,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
// Ok, now we know we have not seen a store yet. See if Inst can write to
// our load location, if it can not, just ignore the instruction.
- if (!(AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod))
+ if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod))
continue;
Store = dyn_cast<StoreInst>(Inst);
@@ -282,9 +282,9 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
Instruction *Release,
ProvenanceAnalysis &PA) {
// Walk up from the Store to find the retain.
- BasicBlock::iterator I = Store;
+ BasicBlock::iterator I = Store->getIterator();
BasicBlock::iterator Begin = Store->getParent()->begin();
- while (I != Begin && GetBasicARCInstKind(I) != ARCInstKind::Retain) {
+ while (I != Begin && GetBasicARCInstKind(&*I) != ARCInstKind::Retain) {
Instruction *Inst = &*I;
// It is only safe to move the retain to the store if we can prove
@@ -294,7 +294,7 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
return nullptr;
--I;
}
- Instruction *Retain = I;
+ Instruction *Retain = &*I;
if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
return nullptr;
if (GetArgRCIdentityRoot(Retain) != New)
@@ -429,7 +429,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
// insert it now.
if (!RetainRVMarker)
return false;
- BasicBlock::iterator BBI = Inst;
+ BasicBlock::iterator BBI = Inst->getIterator();
BasicBlock *InstParent = Inst->getParent();
// Step up to see if the call immediately precedes the RetainRV call.
@@ -440,11 +440,11 @@ bool ObjCARCContract::tryToPeepholeInstruction(
BasicBlock *Pred = InstParent->getSinglePredecessor();
if (!Pred)
goto decline_rv_optimization;
- BBI = Pred->getTerminator();
+ BBI = Pred->getTerminator()->getIterator();
break;
}
--BBI;
- } while (IsNoopInstruction(BBI));
+ } while (IsNoopInstruction(&*BBI));
if (&*BBI == GetArgRCIdentityRoot(Inst)) {
DEBUG(dbgs() << "Adding inline asm marker for "
@@ -511,10 +511,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
return false;
Changed = false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PA.setAA(&getAnalysis<AliasAnalysis>());
+ PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
@@ -629,13 +629,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
char ObjCARCContract::ID = 0;
INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
"ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
"ObjC ARC contraction", false, false)
void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9edbb17..f0ee6e2 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -28,7 +28,6 @@
#include "ARCRuntimeEntryPoints.h"
#include "BlotMapVector.h"
#include "DependencyAnalysis.h"
-#include "ObjCARCAliasAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "PtrState.h"
#include "llvm/ADT/DenseMap.h"
@@ -36,6 +35,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -482,7 +482,7 @@ namespace {
/// A flag indicating whether this optimization pass should run.
bool Run;
- /// Flags which determine whether each of the interesting runtine functions
+ /// Flags which determine whether each of the interesting runtime functions
/// is in fact used in the current function.
unsigned UsedInThisFunction;
@@ -556,7 +556,7 @@ namespace {
char ObjCARCOpt::ID = 0;
INITIALIZE_PASS_BEGIN(ObjCARCOpt,
"objc-arc", "ObjC ARC optimization", false, false)
-INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
INITIALIZE_PASS_END(ObjCARCOpt,
"objc-arc", "ObjC ARC optimization", false, false)
@@ -565,8 +565,8 @@ Pass *llvm::createObjCARCOptPass() {
}
void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ObjCARCAliasAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ObjCARCAAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
// ARC optimization doesn't currently split critical edges.
AU.setPreservesCFG();
}
@@ -581,16 +581,18 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
ImmutableCallSite CS(Arg);
if (const Instruction *Call = CS.getInstruction()) {
if (Call->getParent() == RetainRV->getParent()) {
- BasicBlock::const_iterator I = Call;
+ BasicBlock::const_iterator I(Call);
++I;
- while (IsNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(&*I))
+ ++I;
if (&*I == RetainRV)
return false;
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock *RetainRVParent = RetainRV->getParent();
if (II->getNormalDest() == RetainRVParent) {
BasicBlock::const_iterator I = RetainRVParent->begin();
- while (IsNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(&*I))
+ ++I;
if (&*I == RetainRV)
return false;
}
@@ -599,18 +601,21 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// Check for being preceded by an objc_autoreleaseReturnValue on the same
// pointer. In this case, we can delete the pair.
- BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+ BasicBlock::iterator I = RetainRV->getIterator(),
+ Begin = RetainRV->getParent()->begin();
if (I != Begin) {
- do --I; while (I != Begin && IsNoopInstruction(I));
- if (GetBasicARCInstKind(I) == ARCInstKind::AutoreleaseRV &&
- GetArgRCIdentityRoot(I) == Arg) {
+ do
+ --I;
+ while (I != Begin && IsNoopInstruction(&*I));
+ if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
+ GetArgRCIdentityRoot(&*I) == Arg) {
Changed = true;
++NumPeeps;
DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
<< "Erasing " << *RetainRV << "\n");
- EraseInstruction(I);
+ EraseInstruction(&*I);
EraseInstruction(RetainRV);
return true;
}
@@ -1216,7 +1221,7 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
- Instruction *Inst = std::prev(I);
+ Instruction *Inst = &*std::prev(I);
// Invoke instructions are visited as part of their successors (below).
if (isa<InvokeInst>(Inst))
@@ -1264,7 +1269,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetArgRCIdentityRoot(Inst);
TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
NestingDetected |= S.InitTopDown(Class, Inst);
- // A retain can be a potential use; procede to the generic checking
+ // A retain can be a potential use; proceed to the generic checking
// code below.
break;
}
@@ -1342,12 +1347,10 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
<< "Performing Dataflow:\n");
// Visit all the instructions, top-down.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- Instruction *Inst = I;
+ for (Instruction &Inst : *BB) {
+ DEBUG(dbgs() << " Visiting " << Inst << "\n");
- DEBUG(dbgs() << " Visiting " << *Inst << "\n");
-
- NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+ NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
}
DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n"
@@ -1413,16 +1416,15 @@ ComputePostOrders(Function &F,
// Functions may have many exits, and there also blocks which we treat
// as exits due to ignored edges.
SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *ExitBB = I;
- BBState &MyStates = BBStates[ExitBB];
+ for (BasicBlock &ExitBB : F) {
+ BBState &MyStates = BBStates[&ExitBB];
if (!MyStates.isExit())
continue;
MyStates.SetAsExit();
- PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
- Visited.insert(ExitBB);
+ PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
+ Visited.insert(&ExitBB);
while (!PredStack.empty()) {
reverse_dfs_next_succ:
BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
@@ -1830,7 +1832,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
// analysis too, but that would want caching. A better approach would be to
// use the technique that EarlyCSE uses.
inst_iterator Current = std::prev(I);
- BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+ BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
for (BasicBlock::iterator B = CurrentBB->begin(),
J = Current.getInstructionIterator();
J != B; --J) {
@@ -2008,10 +2010,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
// Check that the call is a regular call.
ARCInstKind Class = GetBasicARCInstKind(Call);
- if (Class != ARCInstKind::CallOrUser && Class != ARCInstKind::Call)
- return false;
-
- return true;
+ return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call;
}
/// Find a dependent retain that precedes the given autorelease for which there
@@ -2081,9 +2080,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- BasicBlock *BB = FI;
- ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+ for (BasicBlock &BB: F) {
+ ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
@@ -2095,19 +2093,16 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
// Look for an ``autorelease'' instruction that is a predecessor of Ret and
// dependent on Arg such that there are no instructions dependent on Arg
// that need a positive ref count in between the autorelease and Ret.
- CallInst *Autorelease =
- FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
- DependingInstructions, Visited,
- PA);
+ CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath(
+ Arg, &BB, Ret, DependingInstructions, Visited, PA);
DependingInstructions.clear();
Visited.clear();
if (!Autorelease)
continue;
- CallInst *Retain =
- FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
- DependingInstructions, Visited, PA);
+ CallInst *Retain = FindPredecessorRetainWithSafePath(
+ Arg, &BB, Autorelease, DependingInstructions, Visited, PA);
DependingInstructions.clear();
Visited.clear();
@@ -2192,7 +2187,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
"\n");
- PA.setAA(&getAnalysis<AliasAnalysis>());
+ PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
#ifndef NDEBUG
if (AreStatisticsEnabled()) {
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 0ac41d3..1a12b659 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -26,10 +26,10 @@
#define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
class Value;
- class AliasAnalysis;
class DataLayout;
class PHINode;
class SelectInst;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 0be75af..c274e81 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -35,7 +35,7 @@ char PAEval::ID = 0;
PAEval::PAEval() : FunctionPass(ID) {}
void PAEval::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
static StringRef getName(Value *V) {
@@ -65,7 +65,7 @@ bool PAEval::runOnFunction(Function &F) {
}
ProvenanceAnalysis PA;
- PA.setAA(&getAnalysis<AliasAnalysis>());
+ PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
const DataLayout &DL = F.getParent()->getDataLayout();
for (Value *V1 : Values) {
@@ -89,6 +89,6 @@ FunctionPass *llvm::createPAEvalPass() { return new PAEval(); }
INITIALIZE_PASS_BEGIN(PAEval, "pa-eval",
"Evaluate ProvenanceAnalysis on all pairs", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(PAEval, "pa-eval",
"Evaluate ProvenanceAnalysis on all pairs", false, true)
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
index ae20e7e..df64fa3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
@@ -256,9 +256,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
// one of its successor blocks, since we can't insert code after it
// in its own block, and we don't want to split critical edges.
if (isa<InvokeInst>(Inst))
- InsertReverseInsertPt(BB->getFirstInsertionPt());
+ InsertReverseInsertPt(&*BB->getFirstInsertionPt());
else
- InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ InsertReverseInsertPt(&*++Inst->getIterator());
SetSeq(S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; "
@@ -268,9 +268,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
assert(!HasReverseInsertPts());
// As above; handle invoke specially.
if (isa<InvokeInst>(Inst))
- InsertReverseInsertPt(BB->getFirstInsertionPt());
+ InsertReverseInsertPt(&*BB->getFirstInsertionPt());
else
- InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ InsertReverseInsertPt(&*++Inst->getIterator());
}
break;
case S_Stop:
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
index e45e1ea..9749e44 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -17,8 +17,8 @@
#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
-#include "ARCInstKind.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/raw_ostream.h"
@@ -96,7 +96,7 @@ struct RRInfo {
};
/// \brief This class summarizes several per-pointer runtime properties which
-/// are propogated through the flow graph.
+/// are propagated through the flow graph.
class PtrState {
protected:
/// True if the reference count is known to be incremented.
@@ -172,7 +172,7 @@ struct BottomUpPtrState : PtrState {
bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I);
/// Return true if this set of releases can be paired with a release. Modifies
- /// state appropriately to reflect that the matching occured if it is
+ /// state appropriately to reflect that the matching occurred if it is
/// successful.
///
/// It is assumed that one has already checked that the RCIdentity of the
@@ -194,7 +194,7 @@ struct TopDownPtrState : PtrState {
/// Return true if this set of retains can be paired with the given
/// release. Modifies state appropriately to reflect that the matching
- /// occured.
+ /// occurred.
bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release);
void HandlePotentialUse(Instruction *Inst, const Value *Ptr,
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index d6fc916..590a52d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -1,4 +1,4 @@
-//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,52 +14,33 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "adce"
STATISTIC(NumRemoved, "Number of instructions removed");
-namespace {
-struct ADCE : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(ID) {
- initializeADCEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function& F) override;
-
- void getAnalysisUsage(AnalysisUsage& AU) const override {
- AU.setPreservesCFG();
- }
-};
-}
-
-char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
-
-bool ADCE::runOnFunction(Function& F) {
- if (skipOptnoneFunction(F))
- return false;
-
+static bool aggressiveDCE(Function& F) {
SmallPtrSet<Instruction*, 128> Alive;
SmallVector<Instruction*, 128> Worklist;
// Collect the set of "root" instructions that are known live.
- for (Instruction &I : inst_range(F)) {
- if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- isa<LandingPadInst>(I) || I.mayHaveSideEffects()) {
+ for (Instruction &I : instructions(F)) {
+ if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || I.isEHPad() ||
+ I.mayHaveSideEffects()) {
Alive.insert(&I);
Worklist.push_back(&I);
}
@@ -79,7 +60,7 @@ bool ADCE::runOnFunction(Function& F) {
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
if (!Alive.count(&I)) {
Worklist.push_back(&I);
I.dropAllReferences();
@@ -94,6 +75,34 @@ bool ADCE::runOnFunction(Function& F) {
return !Worklist.empty();
}
-FunctionPass *llvm::createAggressiveDCEPass() {
- return new ADCE();
+PreservedAnalyses ADCEPass::run(Function &F) {
+ if (aggressiveDCE(F))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
+
+namespace {
+struct ADCELegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCELegacyPass() : FunctionPass(ID) {
+ initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function& F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+ return aggressiveDCE(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char ADCELegacyPass::ID = 0;
+INITIALIZE_PASS(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
+ false, false)
+
+FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 8918909..4b721d3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -21,6 +21,8 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -54,13 +56,15 @@ struct AlignmentFromAssumptions : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
}
// For memory transfers, we need a common alignment for both the source and
@@ -84,7 +88,7 @@ INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
@@ -249,8 +253,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
// The mask must have some trailing ones (otherwise the condition is
// trivial and tells us nothing about the alignment of the left operand).
- unsigned TrailingOnes =
- MaskSCEV->getValue()->getValue().countTrailingOnes();
+ unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
if (!TrailingOnes)
return false;
@@ -270,7 +273,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
OffSCEV = nullptr;
if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
AAPtr = PToI->getPointerOperand();
- OffSCEV = SE->getConstant(Int64Ty, 0);
+ OffSCEV = SE->getZero(Int64Ty);
} else if (const SCEVAddExpr* AndLHSAddSCEV =
dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
// Try to find the ptrtoint; subtract it and the rest is the offset.
@@ -410,7 +413,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
bool AlignmentFromAssumptions::runOnFunction(Function &F) {
bool Changed = false;
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
NewDestAlignments.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 09c605e..cb9b8b6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -15,26 +15,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/DemandedBits.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
#define DEBUG_TYPE "bdce"
@@ -53,342 +45,42 @@ struct BDCE : public FunctionPass {
void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<DemandedBits>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
-
- void determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2);
-
- AssumptionCache *AC;
- DominatorTree *DT;
};
}
char BDCE::ID = 0;
INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
-static bool isAlwaysLive(Instruction *I) {
- return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- isa<LandingPadInst>(I) || I->mayHaveSideEffects();
-}
-
-void BDCE::determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2) {
- unsigned BitWidth = AB.getBitWidth();
-
- // We're called once per operand, but for some instructions, we need to
- // compute known bits of both operands in order to determine the live bits of
- // either (when both operands are instructions themselves). We don't,
- // however, want to do this twice, so we cache the result in APInts that live
- // in the caller. For the two-relevant-operands case, both operand values are
- // provided here.
- auto ComputeKnownBits =
- [&](unsigned BitWidth, const Value *V1, const Value *V2) {
- const DataLayout &DL = I->getModule()->getDataLayout();
- KnownZero = APInt(BitWidth, 0);
- KnownOne = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
- AC, UserI, DT);
-
- if (V2) {
- KnownZero2 = APInt(BitWidth, 0);
- KnownOne2 = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
- 0, AC, UserI, DT);
- }
- };
-
- switch (UserI->getOpcode()) {
- default: break;
- case Instruction::Call:
- case Instruction::Invoke:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::bswap:
- // The alive bits of the input are the swapped alive bits of
- // the output.
- AB = AOut.byteSwap();
- break;
- case Intrinsic::ctlz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the left of, and including, the leftmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
- AB = APInt::getHighBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countLeadingZeros()+1));
- }
- break;
- case Intrinsic::cttz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the right of, and including, the rightmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
- AB = APInt::getLowBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countTrailingZeros()+1));
- }
- break;
- }
- break;
- case Instruction::Add:
- case Instruction::Sub:
- // Find the highest live output bit. We don't need any more input
- // bits than that (adds, and thus subtracts, ripple only to the
- // left).
- AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
- break;
- case Instruction::Shl:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.lshr(ShiftAmt);
-
- // If the shift is nuw/nsw, then the high bits are not dead
- // (because we've promised that they *must* be zero).
- const ShlOperator *S = cast<ShlOperator>(UserI);
- if (S->hasNoSignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
- else if (S->hasNoUnsignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::LShr:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.shl(ShiftAmt);
-
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<LShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::AShr:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.shl(ShiftAmt);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
- .getBoolValue())
- AB.setBit(BitWidth-1);
-
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<AShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::And:
- AB = AOut;
-
- // For bits that are known zero, the corresponding bits in the
- // other operand are dead (unless they're both zero, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownZero2;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownZero & ~KnownZero2);
- }
- break;
- case Instruction::Or:
- AB = AOut;
-
- // For bits that are known one, the corresponding bits in the
- // other operand are dead (unless they're both one, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownOne2;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownOne & ~KnownOne2);
- }
- break;
- case Instruction::Xor:
- case Instruction::PHI:
- AB = AOut;
- break;
- case Instruction::Trunc:
- AB = AOut.zext(BitWidth);
- break;
- case Instruction::ZExt:
- AB = AOut.trunc(BitWidth);
- break;
- case Instruction::SExt:
- AB = AOut.trunc(BitWidth);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
- AOut.getBitWidth() - BitWidth))
- .getBoolValue())
- AB.setBit(BitWidth-1);
- break;
- case Instruction::Select:
- if (OperandNo != 0)
- AB = AOut;
- break;
- }
-}
-
bool BDCE::runOnFunction(Function& F) {
if (skipOptnoneFunction(F))
return false;
+ DemandedBits &DB = getAnalysis<DemandedBits>();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- DenseMap<Instruction *, APInt> AliveBits;
SmallVector<Instruction*, 128> Worklist;
-
- // The set of visited instructions (non-integer-typed only).
- SmallPtrSet<Instruction*, 128> Visited;
-
- // Collect the set of "root" instructions that are known live.
- for (Instruction &I : inst_range(F)) {
- if (!isAlwaysLive(&I))
- continue;
-
- DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
- // For integer-valued instructions, set up an initial empty set of alive
- // bits and add the instruction to the work list. For other instructions
- // add their operands to the work list (for integer values operands, mark
- // all bits as live).
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- if (!AliveBits.count(&I)) {
- AliveBits[&I] = APInt(IT->getBitWidth(), 0);
- Worklist.push_back(&I);
- }
-
- continue;
- }
-
- // Non-integer-typed instructions...
- for (Use &OI : I.operands()) {
- if (Instruction *J = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
- AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
- Worklist.push_back(J);
- }
- }
- // To save memory, we don't add I to the Visited set here. Instead, we
- // check isAlwaysLive on every instruction when searching for dead
- // instructions later (we need to check isAlwaysLive for the
- // integer-typed instructions anyway).
- }
-
- // Propagate liveness backwards to operands.
- while (!Worklist.empty()) {
- Instruction *UserI = Worklist.pop_back_val();
-
- DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
- APInt AOut;
- if (UserI->getType()->isIntegerTy()) {
- AOut = AliveBits[UserI];
- DEBUG(dbgs() << " Alive Out: " << AOut);
- }
- DEBUG(dbgs() << "\n");
-
- if (!UserI->getType()->isIntegerTy())
- Visited.insert(UserI);
-
- APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
- // Compute the set of alive bits for each operand. These are anded into the
- // existing set, if any, and if that changes the set of alive bits, the
- // operand is added to the work-list.
- for (Use &OI : UserI->operands()) {
- if (Instruction *I = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
- unsigned BitWidth = IT->getBitWidth();
- APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntegerTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- AB = APInt(BitWidth, 0);
- } else {
- // If all bits of the output are dead, then all bits of the input
- // Bits of each operand that are used to compute alive bits of the
- // output are alive, all others are dead.
- determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
- KnownZero, KnownOne,
- KnownZero2, KnownOne2);
- }
-
- // If we've added to the set of alive bits (or the operand has not
- // been previously visited), then re-queue the operand to be visited
- // again.
- APInt ABPrev(BitWidth, 0);
- auto ABI = AliveBits.find(I);
- if (ABI != AliveBits.end())
- ABPrev = ABI->second;
-
- APInt ABNew = AB | ABPrev;
- if (ABNew != ABPrev || ABI == AliveBits.end()) {
- AliveBits[I] = std::move(ABNew);
- Worklist.push_back(I);
- }
- } else if (!Visited.count(I)) {
- Worklist.push_back(I);
- }
- }
- }
- }
-
bool Changed = false;
- // The inverse of the live set is the dead set. These are those instructions
- // which have no side effects and do not influence the control flow or return
- // value of the function, and may therefore be deleted safely.
- // NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : inst_range(F)) {
- // For live instructions that have all dead bits, first make them dead by
- // replacing all uses with something else. Then, if they don't need to
- // remain live (because they have side effects, etc.) we can remove them.
- if (I.getType()->isIntegerTy()) {
- auto ABI = AliveBits.find(&I);
- if (ABI != AliveBits.end()) {
- if (ABI->second.getBoolValue())
- continue;
-
- DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
- // FIXME: In theory we could substitute undef here instead of zero.
- // This should be reconsidered once we settle on the semantics of
- // undef, poison, etc.
- Value *Zero = ConstantInt::get(I.getType(), 0);
- ++NumSimplified;
- I.replaceAllUsesWith(Zero);
- Changed = true;
- }
- } else if (Visited.count(&I)) {
- continue;
+ for (Instruction &I : instructions(F)) {
+ if (I.getType()->isIntegerTy() &&
+ !DB.getDemandedBits(&I).getBoolValue()) {
+ // For live instructions that have all dead bits, first make them dead by
+ // replacing all uses with something else. Then, if they don't need to
+ // remain live (because they have side effects, etc.) we can remove them.
+ DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+ // FIXME: In theory we could substitute undef here instead of zero.
+ // This should be reconsidered once we settle on the semantics of
+ // undef, poison, etc.
+ Value *Zero = ConstantInt::get(I.getType(), 0);
+ ++NumSimplified;
+ I.replaceAllUsesWith(Zero);
+ Changed = true;
}
-
- if (isAlwaysLive(&I))
+ if (!DB.isInstructionDead(&I))
continue;
Worklist.push_back(&I);
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 4288742..84f7f5f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -223,10 +223,10 @@ Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
}
// The simple and common case. This also includes constant expressions.
- if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
+ if (!isa<PHINode>(Inst) && !Inst->isEHPad())
return Inst;
- // We can't insert directly before a phi node or landing pad. Insert before
+ // We can't insert directly before a phi node or an eh pad. Insert before
// the terminator of the incoming or dominating block.
assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
if (Idx != ~0U && isa<PHINode>(Inst))
@@ -365,9 +365,9 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
/// into an instruction itself.
void ConstantHoisting::collectConstantCandidates(Function &Fn) {
ConstCandMapType ConstCandMap;
- for (Function::iterator BB : Fn)
- for (BasicBlock::iterator Inst : *BB)
- collectConstantCandidates(ConstCandMap, Inst);
+ for (BasicBlock &BB : Fn)
+ for (Instruction &Inst : BB)
+ collectConstantCandidates(ConstCandMap, &Inst);
}
/// \brief Find the base constant within the given range and rebase all other
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 79624b2..686bd40 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/IR/CFG.h"
@@ -32,6 +33,7 @@ STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
+STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
namespace {
@@ -43,6 +45,11 @@ namespace {
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
bool processSwitch(SwitchInst *SI);
+ bool processCallSite(CallSite CS);
+
+ /// Return a constant value for V usable at At and everything it
+ /// dominates. If no such Constant can be found, return nullptr.
+ Constant *getConstantAt(Value *V, Instruction *At);
public:
static char ID;
@@ -54,6 +61,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
@@ -178,44 +186,33 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
return true;
}
-/// processCmp - If the value of this comparison could be determined locally,
-/// constant propagation would already have figured it out. Instead, walk
-/// the predecessors and statically evaluate the comparison based on information
-/// available on that edge. If a given static evaluation is true on ALL
-/// incoming edges, then it's true universally and we can simplify the compare.
+/// processCmp - See if LazyValueInfo's ability to exploit edge conditions,
+/// or range information is sufficient to prove this comparison. Even for
+/// local conditions, this can sometimes prove conditions instcombine can't by
+/// exploiting range information.
bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
Value *Op0 = C->getOperand(0);
- if (isa<Instruction>(Op0) &&
- cast<Instruction>(Op0)->getParent() == C->getParent())
- return false;
-
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
- pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
- if (PI == PE) return false;
+ // As a policy choice, we choose not to waste compile time on anything where
+ // the comparison is testing local values. While LVI can sometimes reason
+ // about such cases, it's not its primary purpose. We do make sure to do
+ // the block local query for uses from terminator instructions, but that's
+ // handled in the code for each terminator.
+ auto *I = dyn_cast<Instruction>(Op0);
+ if (I && I->getParent() == C->getParent())
+ return false;
- LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI,
- C->getParent(), C);
+ LazyValueInfo::Tristate Result =
+ LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C);
if (Result == LazyValueInfo::Unknown) return false;
- ++PI;
- while (PI != PE) {
- LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI,
- C->getParent(), C);
- if (Res != Result) return false;
- ++PI;
- }
-
++NumCmps;
-
if (Result == LazyValueInfo::True)
C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
else
C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
C->eraseFromParent();
return true;
@@ -307,6 +304,59 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
return Changed;
}
+/// processCallSite - Infer nonnull attributes for the arguments at the
+/// specified callsite.
+bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
+ SmallVector<unsigned, 4> Indices;
+ unsigned ArgNo = 0;
+
+ for (Value *V : CS.args()) {
+ PointerType *Type = dyn_cast<PointerType>(V->getType());
+
+ if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
+ ConstantPointerNull::get(Type),
+ CS.getInstruction()) == LazyValueInfo::False)
+ Indices.push_back(ArgNo + 1);
+ ArgNo++;
+ }
+
+ assert(ArgNo == CS.arg_size() && "sanity check");
+
+ if (Indices.empty())
+ return false;
+
+ AttributeSet AS = CS.getAttributes();
+ LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
+ CS.setAttributes(AS);
+
+ return true;
+}
+
+Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) {
+ if (Constant *C = LVI->getConstant(V, At->getParent(), At))
+ return C;
+
+ // TODO: The following really should be sunk inside LVI's core algorithm, or
+ // at least the outer shims around such.
+ auto *C = dyn_cast<CmpInst>(V);
+ if (!C) return nullptr;
+
+ Value *Op0 = C->getOperand(0);
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return nullptr;
+
+ LazyValueInfo::Tristate Result =
+ LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
+ if (Result == LazyValueInfo::Unknown)
+ return nullptr;
+
+ return (Result == LazyValueInfo::True) ?
+ ConstantInt::getTrue(C->getContext()) :
+ ConstantInt::getFalse(C->getContext());
+}
+
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
@@ -318,7 +368,7 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
bool BBChanged = false;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
- Instruction *II = BI++;
+ Instruction *II = &*BI++;
switch (II->getOpcode()) {
case Instruction::Select:
BBChanged |= processSelect(cast<SelectInst>(II));
@@ -334,6 +384,10 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
case Instruction::Store:
BBChanged |= processMemAccess(II);
break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ BBChanged |= processCallSite(CallSite(II));
+ break;
}
}
@@ -342,7 +396,21 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
case Instruction::Switch:
BBChanged |= processSwitch(cast<SwitchInst>(Term));
break;
+ case Instruction::Ret: {
+ auto *RI = cast<ReturnInst>(Term);
+ // Try to determine the return value if we can. This is mainly here to
+ // simplify the writing of unit tests, but also helps to enable IPO by
+ // constant folding the return values of callees.
+ auto *RetVal = RI->getReturnValue();
+ if (!RetVal) break; // handle "ret void"
+ if (isa<Constant>(RetVal)) break; // nothing to do
+ if (auto *C = getConstantAt(RetVal, RI)) {
+ ++NumReturns;
+ RI->replaceUsesOfWith(RetVal, C);
+ BBChanged = true;
+ }
}
+ };
FnChanged |= BBChanged;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 3b262a2..b67c3c7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
@@ -46,7 +47,7 @@ namespace {
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
- Instruction *Inst = DI++;
+ Instruction *Inst = &*DI++;
if (isInstructionTriviallyDead(Inst, TLI)) {
Inst->eraseFromParent();
Changed = true;
@@ -92,6 +93,34 @@ namespace {
char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+static bool DCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+ ++DCEEliminated;
+ return true;
+ }
+ return false;
+}
+
bool DCE::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
@@ -99,39 +128,24 @@ bool DCE::runOnFunction(Function &F) {
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- // Start out with all of the instructions in the worklist...
- std::vector<Instruction*> WorkList;
- for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
- WorkList.push_back(&*i);
-
- // Loop over the worklist finding instructions that are dead. If they are
- // dead make them drop all of their uses, making other instructions
- // potentially dead, and work until the worklist is empty.
- //
bool MadeChange = false;
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (inst_iterator FI = inst_begin(F), FE = inst_end(F); FI != FE;) {
+ Instruction *I = &*FI;
+ ++FI;
+
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= DCEInstruction(I, WorkList, TLI);
+ }
+
while (!WorkList.empty()) {
- Instruction *I = WorkList.back();
- WorkList.pop_back();
-
- if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
- // Loop over all of the values that the instruction uses, if there are
- // instructions being used, add them to the worklist, because they might
- // go dead after this one is removed.
- //
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *Used = dyn_cast<Instruction>(*OI))
- WorkList.push_back(Used);
-
- // Remove the instruction.
- I->eraseFromParent();
-
- // Remove the instruction from the worklist if it still exists in it.
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
- WorkList.end());
-
- MadeChange = true;
- ++DCEEliminated;
- }
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= DCEInstruction(I, WorkList, TLI);
}
return MadeChange;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c505584..36ad0a5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -40,6 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "dse"
+STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -59,23 +61,24 @@ namespace {
if (skipOptnoneFunction(F))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = AA->getTargetLibraryInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (BasicBlock &I : F)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
- if (DT->isReachableFromEntry(I))
- Changed |= runOnBasicBlock(*I);
+ if (DT->isReachableFromEntry(&I))
+ Changed |= runOnBasicBlock(I);
AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
bool runOnBasicBlock(BasicBlock &BB);
+ bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
@@ -85,10 +88,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
@@ -97,8 +101,10 @@ namespace {
char DSE::ID = 0;
INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -115,7 +121,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
+ const TargetLibraryInfo &TLI,
SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -140,7 +146,7 @@ static void DeleteDeadInstruction(Instruction *I,
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI, TLI))
+ if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
@@ -153,7 +159,7 @@ static void DeleteDeadInstruction(Instruction *I,
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -170,20 +176,20 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
}
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
- if (TLI && TLI->has(LibFunc::strcpy) &&
- F->getName() == TLI->getName(LibFunc::strcpy)) {
+ if (TLI.has(LibFunc::strcpy) &&
+ F->getName() == TLI.getName(LibFunc::strcpy)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strncpy) &&
- F->getName() == TLI->getName(LibFunc::strncpy)) {
+ if (TLI.has(LibFunc::strncpy) &&
+ F->getName() == TLI.getName(LibFunc::strncpy)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strcat) &&
- F->getName() == TLI->getName(LibFunc::strcat)) {
+ if (TLI.has(LibFunc::strcat) &&
+ F->getName() == TLI.getName(LibFunc::strcat)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strncat) &&
- F->getName() == TLI->getName(LibFunc::strncat)) {
+ if (TLI.has(LibFunc::strncat) &&
+ F->getName() == TLI.getName(LibFunc::strncat)) {
return true;
}
}
@@ -224,9 +230,9 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
/// instruction if any.
-static MemoryLocation getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
- assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
- "Unknown instruction case");
+static MemoryLocation getLocForRead(Instruction *Inst,
+ const TargetLibraryInfo &TLI) {
+ assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
@@ -313,9 +319,9 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo &TLI) {
uint64_t Size;
- if (getObjectSize(V, Size, DL, TLI))
+ if (getObjectSize(V, Size, DL, &TLI))
return Size;
return MemoryLocation::UnknownSize;
}
@@ -336,7 +342,7 @@ namespace {
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
- const TargetLibraryInfo *TLI,
+ const TargetLibraryInfo &TLI,
int64_t &EarlierOff, int64_t &LaterOff) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -442,10 +448,12 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
/// because the DSE inducing instruction may be a self-read.
static bool isPossibleSelfRead(Instruction *Inst,
const MemoryLocation &InstStoreLoc,
- Instruction *DepWrite, AliasAnalysis &AA) {
+ Instruction *DepWrite,
+ const TargetLibraryInfo &TLI,
+ AliasAnalysis &AA) {
// Self reads can only happen for instructions that read memory. Get the
// location read.
- MemoryLocation InstReadLoc = getLocForRead(Inst, AA);
+ MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
@@ -459,7 +467,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Here we don't know if A/B may alias, but we do know that B/B are must
// aliases, so removing the first memcpy is safe (assuming it writes <= #
// bytes as the second one.
- MemoryLocation DepReadLoc = getLocForRead(DepWrite, AA);
+ MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
return false;
@@ -475,11 +483,12 @@ static bool isPossibleSelfRead(Instruction *Inst,
//===----------------------------------------------------------------------===//
bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
- Instruction *Inst = BBI++;
+ Instruction *Inst = &*BBI++;
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(Inst, TLI)) {
@@ -488,42 +497,68 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
// If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst, TLI))
- continue;
-
- MemDepResult InstDep = MD->getDependency(Inst);
-
- // Ignore any store where we can't find a local dependence.
- // FIXME: cross-block DSE would be fun. :)
- if (!InstDep.isDef() && !InstDep.isClobber())
+ if (!hasMemoryWrite(Inst, *TLI))
continue;
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+
+ auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(&*BBI);
+
+ DeleteDeadInstruction(DeadInst, *MD, *TLI);
+
+ if (!NextInst) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumRedundantStores;
+ MadeChange = true;
+ };
+
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad && isRemovable(SI)) {
+ isRemovable(SI) &&
+ MemoryIsNotModifiedBetween(DepLoad, SI)) {
+
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
+ RemoveDeadInstAndUpdateBBI(SI);
+ continue;
+ }
+ }
- DeleteDeadInstruction(SI, *MD, TLI);
+ // Remove null stores into the calloc'ed objects
+ Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
- if (!NextInst) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- ++NumFastStores;
- MadeChange = true;
+ if (StoredConstant && StoredConstant->isNullValue() &&
+ isRemovable(SI)) {
+ Instruction *UnderlyingPointer = dyn_cast<Instruction>(
+ GetUnderlyingObject(SI->getPointerOperand(), DL));
+
+ if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
+ MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {
+ DEBUG(dbgs()
+ << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
+ << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
+
+ RemoveDeadInstAndUpdateBBI(SI);
continue;
}
}
}
+ MemDepResult InstDep = MD->getDependency(Inst);
+
+ // Ignore any store where we can't find a local dependence.
+ // FIXME: cross-block DSE would be fun. :)
+ if (!InstDep.isDef() && !InstDep.isClobber())
+ continue;
+
// Figure out what location is being stored to.
MemoryLocation Loc = getLocForWrite(Inst, *AA);
@@ -549,24 +584,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
if (isRemovable(DepWrite) &&
- !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- const DataLayout &DL = BB.getModule()->getDataLayout();
OverwriteResult OR =
- isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
- DepWriteOffset, InstWriteOffset);
+ isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, TLI);
+ DeleteDeadInstruction(DepWrite, *MD, *TLI);
++NumFastStores;
MadeChange = true;
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
- BBI = Inst;
+ BBI = Inst->getIterator();
if (BBI != BB.begin())
--BBI;
break;
@@ -609,10 +642,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (DepWrite == &BB.front()) break;
// Can't look past this instruction if it might read 'Loc'.
- if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
break;
- InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+ InstDep = MD->getPointerDependencyFrom(Loc, false,
+ DepWrite->getIterator(), &BB);
}
}
@@ -624,6 +658,64 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
+/// Returns true if the memory which is accessed by the second instruction is not
+/// modified between the first and the second instruction.
+/// Precondition: Second instruction must be dominated by the first
+/// instruction.
+bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
+ Instruction *SecondI) {
+ SmallVector<BasicBlock *, 16> WorkList;
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ BasicBlock::iterator FirstBBI(FirstI);
+ ++FirstBBI;
+ BasicBlock::iterator SecondBBI(SecondI);
+ BasicBlock *FirstBB = FirstI->getParent();
+ BasicBlock *SecondBB = SecondI->getParent();
+ MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+
+ // Start checking the store-block.
+ WorkList.push_back(SecondBB);
+ bool isFirstBlock = true;
+
+ // Check all blocks going backward until we reach the load-block.
+ while (!WorkList.empty()) {
+ BasicBlock *B = WorkList.pop_back_val();
+
+ // Ignore instructions before LI if this is the FirstBB.
+ BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
+
+ BasicBlock::iterator EI;
+ if (isFirstBlock) {
+ // Ignore instructions after SI if this is the first visit of SecondBB.
+ assert(B == SecondBB && "first block is not the store block");
+ EI = SecondBBI;
+ isFirstBlock = false;
+ } else {
+ // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
+ // In this case we also have to look at instructions after SI.
+ EI = B->end();
+ }
+ for (; BI != EI; ++BI) {
+ Instruction *I = &*BI;
+ if (I->mayWriteToMemory() && I != SecondI) {
+ auto Res = AA->getModRefInfo(I, MemLoc);
+ if (Res != MRI_NoModRef)
+ return false;
+ }
+ }
+ if (B != FirstBB) {
+ assert(B != &FirstBB->getParent()->getEntryBlock() &&
+ "Should not hit the entry block because SI must be dominated by LI");
+ for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
+ if (!Visited.insert(*PredI).second)
+ continue;
+ WorkList.push_back(*PredI);
+ }
+ }
+ }
+ return true;
+}
+
/// Find all blocks that will unconditionally lead to the block BB and append
/// them to F.
static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
@@ -655,10 +747,11 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *InstPt = BB->getTerminator();
if (BB == F->getParent()) InstPt = F;
- MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+ MemDepResult Dep =
+ MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB);
while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
+ if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency))
break;
Value *DepPointer =
@@ -668,10 +761,10 @@ bool DSE::HandleFree(CallInst *F) {
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
break;
- Instruction *Next = std::next(BasicBlock::iterator(Dependency));
+ auto Next = ++Dependency->getIterator();
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD, TLI);
+ DeleteDeadInstruction(Dependency, *MD, *TLI);
++NumFastStores;
MadeChange = true;
@@ -704,23 +797,22 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
SmallSetVector<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
- BasicBlock *Entry = BB.getParent()->begin();
- for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
- if (isa<AllocaInst>(I))
- DeadStackObjects.insert(I);
+ BasicBlock &Entry = BB.getParent()->front();
+ for (Instruction &I : Entry) {
+ if (isa<AllocaInst>(&I))
+ DeadStackObjects.insert(&I);
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
- DeadStackObjects.insert(I);
+ else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
+ DeadStackObjects.insert(&I);
}
// Treat byval or inalloca arguments the same, stores to them are dead at the
// end of the function.
- for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
- AE = BB.getParent()->arg_end(); AI != AE; ++AI)
- if (AI->hasByValOrInAllocaAttr())
- DeadStackObjects.insert(AI);
+ for (Argument &AI : BB.getParent()->args())
+ if (AI.hasByValOrInAllocaAttr())
+ DeadStackObjects.insert(&AI);
const DataLayout &DL = BB.getModule()->getDataLayout();
@@ -729,10 +821,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store, check to see if it points into a dead stack value.
- if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
+ if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
- GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
+ GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -744,7 +836,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
if (AllDead) {
- Instruction *Dead = BBI++;
+ Instruction *Dead = &*BBI++;
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Objects: ";
@@ -757,7 +849,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -765,9 +857,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI, TLI)) {
- Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
+ if (isInstructionTriviallyDead(&*BBI, TLI)) {
+ Instruction *Inst = &*BBI++;
+ DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -776,15 +868,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (isa<AllocaInst>(BBI)) {
// Remove allocas from the list of dead stack objects; there can't be
// any references before the definition.
- DeadStackObjects.remove(BBI);
+ DeadStackObjects.remove(&*BBI);
continue;
}
- if (auto CS = CallSite(BBI)) {
+ if (auto CS = CallSite(&*BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI, TLI))
- DeadStackObjects.remove(BBI);
+ if (isAllocLikeFn(&*BBI, TLI))
+ DeadStackObjects.remove(&*BBI);
// If this call does not access memory, it can't be loading any of our
// pointers.
@@ -795,10 +887,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// the call is live.
DeadStackObjects.remove_if([&](Value *I) {
// See if the call site touches the value.
- AliasAnalysis::ModRefResult A = AA->getModRefInfo(
- CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+ ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI));
- return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ return A == MRI_ModRef || A == MRI_Ref;
});
// If all of the allocas were clobbered by the call then we're not going
@@ -864,8 +955,7 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
// Remove objects that could alias LoadedLoc.
DeadStackObjects.remove_if([&](Value *I) {
// See if the loaded location could alias the stack location.
- MemoryLocation StackLoc(I,
- getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+ MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
return !AA->isNoAlias(StackLoc, LoadedLoc);
});
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 029b44c..7ef062e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -263,7 +264,6 @@ namespace {
/// expected that a later pass of GVN will catch the interesting/hard cases.
class EarlyCSE {
public:
- Function &F;
const TargetLibraryInfo &TLI;
const TargetTransformInfo &TTI;
DominatorTree &DT;
@@ -281,20 +281,37 @@ public:
/// that dominated values can succeed in their lookup.
ScopedHTType AvailableValues;
- /// \brief A scoped hash table of the current values of loads.
+ /// A scoped hash table of the current values of previously encounted memory
+ /// locations.
///
- /// This allows us to get efficient access to dominating loads when we have
- /// a fully redundant load. In addition to the most recent load, we keep
- /// track of a generation count of the read, which is compared against the
- /// current generation count. The current generation count is incremented
+ /// This allows us to get efficient access to dominating loads or stores when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
- /// CSE loads with other loads that have no intervening store.
- typedef RecyclingAllocator<
- BumpPtrAllocator,
- ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+ /// CSE loads with other loads that have no intervening store. Ordering
+ /// events (such as fences or atomic instructions) increment the generation
+ /// count as well; essentially, we model these as writes to all possible
+ /// locations. Note that atomic and/or volatile loads and stores can be
+ /// present the table; it is the responsibility of the consumer to inspect
+ /// the atomicity/volatility if needed.
+ struct LoadValue {
+ Value *Data;
+ unsigned Generation;
+ int MatchingId;
+ bool IsAtomic;
+ LoadValue()
+ : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+ LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+ bool IsAtomic)
+ : Data(Data), Generation(Generation), MatchingId(MatchingId),
+ IsAtomic(IsAtomic) {}
+ };
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value *, LoadValue>>
LoadMapAllocator;
- typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
- DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+ typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+ LoadMapAllocator> LoadHTType;
LoadHTType AvailableLoads;
/// \brief A scoped hash table of the current values of read-only call
@@ -308,10 +325,9 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
- const TargetTransformInfo &TTI, DominatorTree &DT,
- AssumptionCache &AC)
- : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
+ EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
+ DominatorTree &DT, AssumptionCache &AC)
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
bool run();
@@ -382,57 +398,91 @@ private:
class ParseMemoryInst {
public:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
- : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
- MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
- MayReadFromMemory = Inst->mayReadFromMemory();
- MayWriteToMemory = Inst->mayWriteToMemory();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- MemIntrinsicInfo Info;
- if (!TTI.getTgtMemIntrinsic(II, Info))
- return;
- if (Info.NumMemRefs == 1) {
- Store = Info.WriteMem;
- Load = Info.ReadMem;
- MatchingId = Info.MatchingId;
- MayReadFromMemory = Info.ReadMem;
- MayWriteToMemory = Info.WriteMem;
- Vol = Info.Vol;
- Ptr = Info.PtrVal;
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Load = true;
- Vol = !LI->isSimple();
- Ptr = LI->getPointerOperand();
+ : IsTargetMemInst(false), Inst(Inst) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+ IsTargetMemInst = true;
+ }
+ bool isLoad() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return isa<LoadInst>(Inst);
+ }
+ bool isStore() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return isa<StoreInst>(Inst);
+ }
+ bool isAtomic() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ return Inst->isAtomic();
+ }
+ bool isUnordered() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return true;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isUnordered();
+ }
+ // Conservative answer
+ return !Inst->isAtomic();
+ }
+
+ bool isVolatile() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isVolatile();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- Store = true;
- Vol = !SI->isSimple();
- Ptr = SI->getPointerOperand();
+ return SI->isVolatile();
}
+ // Conservative answer
+ return true;
}
- bool isLoad() { return Load; }
- bool isStore() { return Store; }
- bool isVolatile() { return Vol; }
- bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
- return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+
+
+ bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
+ return (getPointerOperand() == Inst.getPointerOperand() &&
+ getMatchingId() == Inst.getMatchingId());
}
- bool isValid() { return Ptr != nullptr; }
- int getMatchingId() { return MatchingId; }
- Value *getPtr() { return Ptr; }
- bool mayReadFromMemory() { return MayReadFromMemory; }
- bool mayWriteToMemory() { return MayWriteToMemory; }
+ bool isValid() const { return getPointerOperand() != nullptr; }
- private:
- bool Load;
- bool Store;
- bool Vol;
- bool MayReadFromMemory;
- bool MayWriteToMemory;
// For regular (non-intrinsic) loads/stores, this is set to -1. For
// intrinsic loads/stores, the id is retrieved from the corresponding
// field in the MemIntrinsicInfo structure. That field contains
// non-negative values only.
- int MatchingId;
- Value *Ptr;
+ int getMatchingId() const {
+ if (IsTargetMemInst) return Info.MatchingId;
+ return -1;
+ }
+ Value *getPointerOperand() const {
+ if (IsTargetMemInst) return Info.PtrVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->getPointerOperand();
+ }
+ return nullptr;
+ }
+ bool mayReadFromMemory() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return Inst->mayReadFromMemory();
+ }
+ bool mayWriteToMemory() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return Inst->mayWriteToMemory();
+ }
+
+ private:
+ bool IsTargetMemInst;
+ MemIntrinsicInfo Info;
+ Instruction *Inst;
};
bool processNode(DomTreeNode *Node);
@@ -497,7 +547,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst, &TLI)) {
@@ -548,24 +598,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
ParseMemoryInst MemInst(Inst, TTI);
// If this is a non-volatile load, process it.
if (MemInst.isValid() && MemInst.isLoad()) {
- // Ignore volatile loads.
- if (MemInst.isVolatile()) {
+ // (conservatively) we can't peak past the ordering implied by this
+ // operation, but we can add this load to our set of available values
+ if (MemInst.isVolatile() || !MemInst.isUnordered()) {
LastStore = nullptr;
- // Don't CSE across synchronization boundaries.
- if (Inst->mayWriteToMemory())
- ++CurrentGeneration;
- continue;
+ ++CurrentGeneration;
}
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
- std::pair<Value *, unsigned> InVal =
- AvailableLoads.lookup(MemInst.getPtr());
- if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing loads with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered() &&
+ // We can't replace an atomic load with one which isn't also atomic.
+ InVal.IsAtomic >= MemInst.isAtomic()) {
+ Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
- << " to: " << *InVal.first << '\n');
+ << " to: " << *InVal.Data << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@@ -576,8 +628,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, remember that we have this instruction.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
LastStore = nullptr;
continue;
}
@@ -613,6 +667,44 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // A release fence requires that all stores complete before it, but does
+ // not prevent the reordering of following loads 'before' the fence. As a
+ // result, we don't need to consider it as writing to memory and don't need
+ // to advance the generation. We do need to prevent DSE across the fence,
+ // but that's handled above.
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ if (FI->getOrdering() == Release) {
+ assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
+ continue;
+ }
+
+ // write back DSE - If we write back the same value we just loaded from
+ // the same location and haven't passed any intervening writes or ordering
+ // operations, we can remove the write. The primary benefit is in allowing
+ // the available load table to remain valid and value forward past where
+ // the store originally was.
+ if (MemInst.isValid() && MemInst.isStore()) {
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data &&
+ InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+ InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing stores with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered()) {
+ assert((!LastStore ||
+ ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+ MemInst.getPointerOperand()) &&
+ "can't have an intervening store!");
+ DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ // We can avoid incrementing the generation count since we were able
+ // to eliminate this store.
+ continue;
+ }
+ }
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
@@ -622,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
+ // At the moment, we don't remove ordered stores, but do remove
+ // unordered atomic stores. There's no special requirement (for
+ // unordered atomics) about removing atomic stores only in favor of
+ // other atomic stores since we we're going to execute the non-atomic
+ // one anyway and the atomic one might never have become visible.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+ assert(LastStoreMemInst.isUnordered() &&
+ !LastStoreMemInst.isVolatile() &&
+ "Violated invariant");
if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << *Inst << '\n');
@@ -640,12 +740,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
-
- // Remember that this was the last store we saw for DSE.
- if (!MemInst.isVolatile())
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
+
+ // Remember that this was the last unordered store we saw for DSE. We
+ // don't yet handle DSE on ordered or volatile stores since we don't
+ // have a good way to model the ordering requirement for following
+ // passes once the store is removed. We could insert a fence, but
+ // since fences are slightly stronger than stores in their ordering,
+ // it's not clear this is a profitable transform. Another option would
+ // be to merge the ordering with that of the post dominating store.
+ if (MemInst.isUnordered() && !MemInst.isVolatile())
LastStore = Inst;
+ else
+ LastStore = nullptr;
}
}
}
@@ -714,7 +824,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &AC = AM->getResult<AssumptionAnalysis>(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -751,7 +861,7 @@ public:
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
return CSE.run();
}
@@ -761,6 +871,7 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 0430c18..185cdbd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -30,7 +30,7 @@ public:
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
private:
@@ -41,7 +41,7 @@ private:
char FlattenCFGPass::ID = 0;
INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
false)
@@ -59,7 +59,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
- if (FlattenCFG(BBIt++, AA)) {
+ if (FlattenCFG(&*BBIt++, AA)) {
LocalChange = true;
}
}
@@ -69,7 +69,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
}
bool FlattenCFGPass::runOnFunction(Function &F) {
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverChanged = false;
// iterativelyFlattenCFG can make some blocks dead.
while (iterativelyFlattenCFG(F, AA)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
index c931422..7f5d786 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
@@ -41,7 +43,7 @@ using namespace llvm;
// integer domain inputs, produce an integer output; fadd, for example.
//
// If a non-mappable instruction is seen, this entire def-use graph is marked
-// as non-transformable. If we see an instruction that converts from the
+// as non-transformable. If we see an instruction that converts from the
// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
/// The largest integer type worth dealing with.
@@ -60,6 +62,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
@@ -82,7 +85,9 @@ namespace {
}
char Float2Int::ID = 0;
-INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_BEGIN(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(Float2Int, "float2int", "Float to int", false, false)
// Given a FCmp predicate, return a matching ICmp predicate if one
// exists, otherwise return BAD_ICMP_PREDICATE.
@@ -125,7 +130,9 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
// Find the roots - instructions that convert from the FP domain to
// integer domain.
void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
+ if (isa<VectorType>(I.getType()))
+ continue;
switch (I.getOpcode()) {
default: break;
case Instruction::FPToUI:
@@ -133,7 +140,7 @@ void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
Roots.insert(&I);
break;
case Instruction::FCmp:
- if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
+ if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
CmpInst::BAD_ICMP_PREDICATE)
Roots.insert(&I);
break;
@@ -176,7 +183,7 @@ ConstantRange Float2Int::validateRange(ConstantRange R) {
// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
// defs before their uses. Calculate the real range info.
-// Breadth-first walk of the use-def graph; determine the set of nodes
+// Breadth-first walk of the use-def graph; determine the set of nodes
// we care about and eagerly determine if some of them are poisonous.
void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
@@ -222,14 +229,14 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
seen(I, unknownRange());
break;
}
-
+
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
// Unify def-use chains if they interfere.
ECs.unionSets(I, OI);
- if (SeenInsts.find(I)->second != badRange())
+ if (SeenInsts.find(I)->second != badRange())
Worklist.push_back(OI);
- } else if (!isa<ConstantFP>(O)) {
+ } else if (!isa<ConstantFP>(O)) {
// Not an instruction or ConstantFP? we can't do anything.
seen(I, badRange());
}
@@ -240,11 +247,11 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
// Walk forwards down the list of seen instructions, so we visit defs before
// uses.
void Float2Int::walkForwards() {
- for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
- if (It->second != unknownRange())
+ for (auto &It : make_range(SeenInsts.rbegin(), SeenInsts.rend())) {
+ if (It.second != unknownRange())
continue;
- Instruction *I = It->first;
+ Instruction *I = It.first;
std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
switch (I->getOpcode()) {
// FIXME: Handle select and phi nodes.
@@ -299,7 +306,7 @@ void Float2Int::walkForwards() {
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
assert(SeenInsts.find(OI) != SeenInsts.end() &&
- "def not seen before use!");
+ "def not seen before use!");
OpRanges.push_back(SeenInsts.find(OI)->second);
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
// Work out if the floating point number can be losslessly represented
@@ -314,11 +321,11 @@ void Float2Int::walkForwards() {
APFloat F = CF->getValueAPF();
// First, weed out obviously incorrect values. Non-finite numbers
- // can't be represented and neither can negative zero, unless
+ // can't be represented and neither can negative zero, unless
// we're in fast math mode.
if (!F.isFinite() ||
(F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
- !I->hasNoSignedZeros())) {
+ !I->hasNoSignedZeros())) {
seen(I, badRange());
Abort = true;
break;
@@ -345,7 +352,7 @@ void Float2Int::walkForwards() {
// Reduce the operands' ranges to a single range and return.
if (!Abort)
- seen(I, Op(OpRanges));
+ seen(I, Op(OpRanges));
}
}
@@ -395,7 +402,7 @@ bool Float2Int::validateAndTransform() {
R.isFullSet() || R.isSignWrappedSet())
continue;
assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
-
+
// The number of bits required is the maximum of the upper and
// lower limits, plus one so it can be signed.
unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
@@ -505,9 +512,8 @@ Value *Float2Int::convert(Instruction *I, Type *ToTy) {
// Perform dead code elimination on the instructions we just modified.
void Float2Int::cleanup() {
- for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
- I != E; ++I)
- I->first->eraseFromParent();
+ for (auto &I : make_range(ConvertedInsts.rbegin(), ConvertedInsts.rend()))
+ I.first->eraseFromParent();
}
bool Float2Int::runOnFunction(Function &F) {
@@ -534,7 +540,4 @@ bool Float2Int::runOnFunction(Function &F) {
return Modified;
}
-FunctionPass *llvm::createFloat2IntPass() {
- return new Float2Int();
-}
-
+FunctionPass *llvm::createFloat2IntPass() { return new Float2Int(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 89a0d0a..a028b8c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -128,6 +129,7 @@ namespace {
uint32_t lookup(Value *V) const;
uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
Value *LHS, Value *RHS);
+ bool exists(Value *V) const;
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
@@ -388,6 +390,9 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
}
}
+/// Returns true if a value number exists for the specified value.
+bool ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t ValueTable::lookup_or_add(Value *V) {
@@ -608,6 +613,10 @@ namespace {
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
+ // Block-local map of equivalent values to their leader, does not
+ // propagate to any successors. Entries added mid-block are applied
+ // to the remaining instructions in the block.
+ SmallMapVector<llvm::Value *, llvm::Constant *, 4> ReplaceWithConstMap;
SmallVector<Instruction*, 8> InstrsToErase;
typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
@@ -689,16 +698,17 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
- // Helper fuctions of redundant load elimination
+ // Helper functions of redundant load elimination
bool processLoad(LoadInst *L);
bool processNonLocalLoad(LoadInst *L);
+ bool processAssumeIntrinsic(IntrinsicInst *II);
void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);
@@ -719,7 +729,9 @@ namespace {
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
- bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool replaceOperandsWithConsts(Instruction *I) const;
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+ bool DominatesByEdge);
bool processFoldableCondBr(BranchInst *BI);
void addDeadBlock(BasicBlock *BB);
void assignValNumForDeadCode();
@@ -738,7 +750,8 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1290,8 +1303,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ for (const AvailableValueInBlock &AV : ValuesPerBlock) {
BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
@@ -1301,24 +1313,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
}
// Perform PHI construction.
- Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
-
- // If new PHI nodes were created, notify alias analysis.
- if (V->getType()->getScalarType()->isPointerTy()) {
- AliasAnalysis *AA = gvn.getAliasAnalysis();
-
- // Scan the new PHIs and inform alias analysis that we've added potentially
- // escaping uses to any values that are operands to these PHIs.
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
- PHINode *P = NewPHIs[i];
- for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
- unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
- AA->addEscapingUse(P->getOperandUse(jj));
- }
- }
- }
-
- return V;
+ return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
}
Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
@@ -1518,9 +1513,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// that we only have to insert *one* load (which means we're basically moving
// the load, not inserting a new one).
- SmallPtrSet<BasicBlock *, 4> Blockers;
- for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
- Blockers.insert(UnavailableBlocks[i]);
+ SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
+ UnavailableBlocks.end());
// Let's find the first basic block with more than one predecessor. Walk
// backwards through predecessors if needed.
@@ -1550,15 +1544,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// available.
MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
- for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
- FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+ for (const AvailableValueInBlock &AV : ValuesPerBlock)
+ FullyAvailableBlocks[AV.BB] = true;
+ for (BasicBlock *UnavailableBB : UnavailableBlocks)
+ FullyAvailableBlocks[UnavailableBB] = false;
SmallVector<BasicBlock *, 4> CriticalEdgePred;
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
- PI != E; ++PI) {
- BasicBlock *Pred = *PI;
+ for (BasicBlock *Pred : predecessors(LoadBB)) {
+ // If any predecessor block is an EH pad that does not allow non-PHI
+ // instructions before the terminator, we can't PRE the load.
+ if (Pred->getTerminator()->isEHPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
@@ -1570,9 +1571,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- if (LoadBB->isLandingPad()) {
+ if (LoadBB->isEHPad()) {
DEBUG(dbgs()
- << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
@@ -1655,12 +1656,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
<< *NewInsts.back() << '\n');
// Assign value numbers to the new instructions.
- for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ for (Instruction *I : NewInsts) {
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
- VN.lookup_or_add(NewInsts[i]);
+ VN.lookup_or_add(I);
}
for (const auto &PredLoad : PredLoads) {
@@ -1677,6 +1678,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (Tags)
NewLoad->setAAMetadata(Tags);
+ if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
+ NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
+ if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
+ NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
+
// Transfer DebugLoc.
NewLoad->setDebugLoc(LI->getDebugLoc());
@@ -1704,6 +1710,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // non-local speculations are not allowed under asan.
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
+ return false;
+
// Step 1: Find the non-local dependencies of the load.
LoadDepVect Deps;
MD->getNonLocalPointerDependency(LI, Deps);
@@ -1777,6 +1787,63 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
+bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
+ assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
+ "This function can only be called with llvm.assume intrinsic");
+ Value *V = IntrinsicI->getArgOperand(0);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
+ if (Cond->isZero()) {
+ Type *Int8Ty = Type::getInt8Ty(V->getContext());
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. FIXME: We could insert unreachable
+ // instruction directly because we can modify the CFG.
+ new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ IntrinsicI);
+ }
+ markInstructionForDeletion(IntrinsicI);
+ return false;
+ }
+
+ Constant *True = ConstantInt::getTrue(V->getContext());
+ bool Changed = false;
+
+ for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
+ BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
+
+ // This property is only true in dominated successors, propagateEquality
+ // will check dominance for us.
+ Changed |= propagateEquality(V, True, Edge, false);
+ }
+
+ // We can replace assume value with true, which covers cases like this:
+ // call void @llvm.assume(i1 %cmp)
+ // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
+ ReplaceWithConstMap[V] = True;
+
+ // If one of *cmp *eq operand is const, adding it to map will cover this:
+ // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
+ // call void @llvm.assume(i1 %cmp)
+ // ret float %0 ; will change it to ret float 3.000000e+00
+ if (auto *CmpI = dyn_cast<CmpInst>(V)) {
+ if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
+ CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
+ (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
+ CmpI->getFastMathFlags().noNaNs())) {
+ Value *CmpLHS = CmpI->getOperand(0);
+ Value *CmpRHS = CmpI->getOperand(1);
+ if (isa<Constant>(CmpLHS))
+ std::swap(CmpLHS, CmpRHS);
+ auto *RHSConst = dyn_cast<Constant>(CmpRHS);
+
+ // If only one operand is constant.
+ if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
+ ReplaceWithConstMap[CmpLHS] = RHSConst;
+ }
+ }
+ return Changed;
+}
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
@@ -1789,7 +1856,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
// FIXME: If both the original and replacement value are part of the
// same control-flow region (meaning that the execution of one
- // guarentees the executation of the other), then we can combine the
+ // guarantees the execution of the other), then we can combine the
// noalias scopes here and do better than the general conservative
// answer used in combineMetadata().
@@ -1797,13 +1864,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// regions, and so we need a conservative combination of the noalias
// scopes.
static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group};
combineMetadata(ReplInst, I, KnownIDs);
}
}
@@ -1890,10 +1954,8 @@ bool GVN::processLoad(LoadInst *L) {
++NumGVNLoad;
return true;
}
- }
- // If the value isn't available, don't do anything!
- if (Dep.isClobber()) {
+ // If the value isn't available, don't do anything!
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -2049,11 +2111,31 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
+// Tries to replace instruction with const, using information from
+// ReplaceWithConstMap.
+bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
+ bool Changed = false;
+ for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
+ Value *Operand = Instr->getOperand(OpNum);
+ auto it = ReplaceWithConstMap.find(Operand);
+ if (it != ReplaceWithConstMap.end()) {
+ assert(!isa<Constant>(Operand) &&
+ "Replacing constants with constants is invalid");
+ DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
+ << " in instruction " << *Instr << '\n');
+ Instr->setOperand(OpNum, it->second);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
/// The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS,
- const BasicBlockEdge &Root) {
+/// If DominatesByEdge is false, then it means that it is dominated by Root.End.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+ bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2065,11 +2147,13 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
LHS = Item.first; RHS = Item.second;
- if (LHS == RHS) continue;
+ if (LHS == RHS)
+ continue;
assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
// Don't try to propagate equalities between constants.
- if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+ if (isa<Constant>(LHS) && isa<Constant>(RHS))
+ continue;
// Prefer a constant on the right-hand side, or an Argument if no constants.
if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
@@ -2108,7 +2192,11 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
// LHS always has at least one use that is not dominated by Root, this will
// never do anything if LHS has only one use.
if (!LHS->hasOneUse()) {
- unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
+ unsigned NumReplacements =
+ DominatesByEdge
+ ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
+ : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getEnd());
+
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2180,7 +2268,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
- replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root);
+ DominatesByEdge
+ ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
+ : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
+ Root.getEnd());
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2220,6 +2311,10 @@ bool GVN::processInstruction(Instruction *I) {
return true;
}
+ if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
+ if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
+ return processAssumeIntrinsic(IntrinsicI);
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (processLoad(LI))
return true;
@@ -2250,11 +2345,11 @@ bool GVN::processInstruction(Instruction *I) {
Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
BasicBlockEdge TrueE(Parent, TrueSucc);
- Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
BasicBlockEdge FalseE(Parent, FalseSucc);
- Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
return Changed;
}
@@ -2276,7 +2371,7 @@ bool GVN::processInstruction(Instruction *I) {
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
}
}
return Changed;
@@ -2284,7 +2379,8 @@ bool GVN::processInstruction(Instruction *I) {
// Instructions with void type don't return a value, so there's
// no point in trying to find redundancies in them.
- if (I->getType()->isVoidTy()) return false;
+ if (I->getType()->isVoidTy())
+ return false;
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
@@ -2306,17 +2402,21 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
- Value *repl = findLeader(I->getParent(), Num);
- if (!repl) {
+ Value *Repl = findLeader(I->getParent(), Num);
+ if (!Repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
+ } else if (Repl == I) {
+ // If I was the result of a shortcut PRE, it might already be in the table
+ // and the best replacement for itself. Nothing to do.
+ return false;
}
// Remove it!
- patchAndReplaceAllUsesWith(I, repl);
- if (MD && repl->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(repl);
+ patchAndReplaceAllUsesWith(I, Repl);
+ if (MD && Repl->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Repl);
markInstructionForDeletion(I);
return true;
}
@@ -2331,7 +2431,7 @@ bool GVN::runOnFunction(Function& F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setAliasAnalysis(&getAnalysis<AAResultsWrapperPass>().getAAResults());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2341,10 +2441,10 @@ bool GVN::runOnFunction(Function& F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = FI++;
+ BasicBlock *BB = &*FI++;
- bool removedBlock = MergeBlockIntoPredecessor(
- BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD);
+ bool removedBlock =
+ MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD);
if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
@@ -2382,7 +2482,6 @@ bool GVN::runOnFunction(Function& F) {
return Changed;
}
-
bool GVN::processBlock(BasicBlock *BB) {
// FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
// (and incrementing BI before processing an instruction).
@@ -2391,11 +2490,16 @@ bool GVN::processBlock(BasicBlock *BB) {
if (DeadBlocks.count(BB))
return false;
+ // Clearing map before every BB because it can be used only for single BB.
+ ReplaceWithConstMap.clear();
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- ChangedFunction |= processInstruction(BI);
+ if (!ReplaceWithConstMap.empty())
+ ChangedFunction |= replaceOperandsWithConsts(&*BI);
+ ChangedFunction |= processInstruction(&*BI);
+
if (InstrsToErase.empty()) {
++BI;
continue;
@@ -2439,7 +2543,14 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Value *Op = Instr->getOperand(i);
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
-
+ // This could be a newly inserted instruction, in which case, we won't
+ // find a value number, and should give up before we hurt ourselves.
+ // FIXME: Rewrite the infrastructure to let it easier to value number
+ // and process newly inserted instructions.
+ if (!VN.exists(Op)) {
+ success = false;
+ break;
+ }
if (Value *V = findLeader(Pred, VN.lookup(Op))) {
Instr->setOperand(i, V);
} else {
@@ -2499,9 +2610,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
BasicBlock *CurrentBlock = CurInst->getParent();
predMap.clear();
- for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(CurrentBlock)) {
// We're not interested in PRE where the block is its
// own predecessor, or in blocks with predecessors
// that are not reachable.
@@ -2570,7 +2679,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
// Create a PHI to make the value available in this block.
PHINode *Phi =
PHINode::Create(CurInst->getType(), predMap.size(),
- CurInst->getName() + ".pre-phi", CurrentBlock->begin());
+ CurInst->getName() + ".pre-phi", &CurrentBlock->front());
for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
if (Value *V = predMap[i].first)
Phi->addIncoming(V, predMap[i].second);
@@ -2582,18 +2691,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (Phi->getType()->getScalarType()->isPointerTy()) {
- // Because we have added a PHI-use of the pointer value, it has now
- // "escaped" from alias analysis' perspective. We need to inform
- // AA of this.
- for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) {
- unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
- VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
- }
-
- if (MD)
- MD->invalidateCachedPointerInfo(Phi);
- }
+ if (MD && Phi->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
@@ -2616,15 +2715,15 @@ bool GVN::performPRE(Function &F) {
if (CurrentBlock == &F.getEntryBlock())
continue;
- // Don't perform PRE on a landing pad.
- if (CurrentBlock->isLandingPad())
+ // Don't perform PRE on an EH pad.
+ if (CurrentBlock->isEHPad())
continue;
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end();
BI != BE;) {
- Instruction *CurInst = BI++;
- Changed = performScalarPRE(CurInst);
+ Instruction *CurInst = &*BI++;
+ Changed |= performScalarPRE(CurInst);
}
}
@@ -2637,8 +2736,8 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
- BasicBlock *BB = SplitCriticalEdge(
- Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+ BasicBlock *BB =
+ SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
if (MD)
MD->invalidateCachedPredecessors();
return BB;
@@ -2652,7 +2751,7 @@ bool GVN::splitCriticalEdges() {
do {
std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
SplitCriticalEdge(Edge.first, Edge.second,
- CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+ CriticalEdgeSplittingOptions(DT));
} while (!toSplit.empty());
if (MD) MD->invalidateCachedPredecessors();
return true;
@@ -2728,17 +2827,14 @@ void GVN::addDeadBlock(BasicBlock *BB) {
DeadBlocks.insert(Dom.begin(), Dom.end());
// Figure out the dominance-frontier(D).
- for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
- E = Dom.end(); I != E; I++) {
- BasicBlock *B = *I;
- for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
- BasicBlock *S = *SI;
+ for (BasicBlock *B : Dom) {
+ for (BasicBlock *S : successors(B)) {
if (DeadBlocks.count(S))
continue;
bool AllPredDead = true;
- for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
- if (!DeadBlocks.count(*PI)) {
+ for (BasicBlock *P : predecessors(S))
+ if (!DeadBlocks.count(P)) {
AllPredDead = false;
break;
}
@@ -2766,10 +2862,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
continue;
SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
- for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; PI++) {
- BasicBlock *P = *PI;
-
+ for (BasicBlock *P : Preds) {
if (!DeadBlocks.count(P))
continue;
@@ -2794,7 +2887,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// R be the target of the dead out-coming edge.
// 1) Identify the set of dead blocks implied by the branch's dead outcoming
// edge. The result of this step will be {X| X is dominated by R}
-// 2) Identify those blocks which haves at least one dead prodecessor. The
+// 2) Identify those blocks which haves at least one dead predecessor. The
// result of this step will be dominance-frontier(R).
// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
@@ -2829,14 +2922,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
void GVN::assignValNumForDeadCode() {
- for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
- E = DeadBlocks.end(); I != E; I++) {
- BasicBlock *BB = *I;
- for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
- II != EE; II++) {
- Instruction *Inst = &*II;
- unsigned ValNum = VN.lookup_or_add(Inst);
- addToLeaderTable(ValNum, Inst, BB);
+ for (BasicBlock *BB : DeadBlocks) {
+ for (Instruction &Inst : *BB) {
+ unsigned ValNum = VN.lookup_or_add(&Inst);
+ addToLeaderTable(ValNum, &Inst, BB);
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 2a954d9..ec5e15f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -28,9 +28,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -48,6 +50,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
@@ -83,64 +86,62 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
namespace {
struct RewritePhi;
-}
-namespace {
- class IndVarSimplify : public LoopPass {
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
-
- SmallVector<WeakVH, 16> DeadInsts;
- bool Changed;
- public:
-
- static char ID; // Pass identification, replacement for typeid
- IndVarSimplify()
- : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
- initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
- }
+class IndVarSimplify : public LoopPass {
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreservedID(LCSSAID);
- AU.setPreservesCFG();
- }
+ SmallVector<WeakVH, 16> DeadInsts;
+ bool Changed;
+public:
- private:
- void releaseMemory() override {
- DeadInsts.clear();
- }
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify()
+ : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
- bool isValidRewrite(Value *FromVal, Value *ToVal);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
- void HandleFloatingPointIV(Loop *L, PHINode *PH);
- void RewriteNonIntegerIVs(Loop *L);
+private:
+ void releaseMemory() override {
+ DeadInsts.clear();
+ }
- void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
- bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+ void handleFloatingPointIV(Loop *L, PHINode *PH);
+ void rewriteNonIntegerIVs(Loop *L);
- Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar, SCEVExpander &Rewriter);
+ void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
- void SinkUnusedInvariants(Loop *L);
+ bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
+ void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
- Instruction *InsertPt, Type *Ty,
- bool &IsHighCostExpansion);
- };
+ Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
+
+ void sinkUnusedInvariants(Loop *L);
+
+ Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
+ Instruction *InsertPt, Type *Ty);
+};
}
char IndVarSimplify::ID = 0;
@@ -148,7 +149,7 @@ INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
@@ -158,10 +159,10 @@ Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
-/// isValidRewrite - Return true if the SCEV expansion generated by the
-/// rewriter can replace the original value. SCEV guarantees that it
-/// produces the same value, but the way it is produced may be illegal IR.
-/// Ideally, this function will only be called for verification.
+/// Return true if the SCEV expansion generated by the rewriter can replace the
+/// original value. SCEV guarantees that it produces the same value, but the way
+/// it is produced may be illegal IR. Ideally, this function will only be
+/// called for verification.
bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// If an SCEV expression subsumed multiple pointers, its expansion could
// reassociate the GEP changing the base pointer. This is illegal because the
@@ -175,10 +176,10 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// because it understands lcssa phis while SCEV does not.
Value *FromPtr = FromVal;
Value *ToPtr = ToVal;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
FromPtr = GEP->getPointerOperand();
}
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
ToPtr = GEP->getPointerOperand();
}
if (FromPtr != FromVal || ToPtr != ToVal) {
@@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
/// common dominator for the incoming blocks.
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
- DominatorTree *DT) {
+ DominatorTree *DT, LoopInfo *LI) {
PHINode *PHI = dyn_cast<PHINode>(User);
if (!PHI)
return User;
@@ -234,17 +235,28 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
InsertPt = InsertBB->getTerminator();
}
assert(InsertPt && "Missing phi operand");
- assert((!isa<Instruction>(Def) ||
- DT->dominates(cast<Instruction>(Def), InsertPt)) &&
- "def does not dominate all uses");
- return InsertPt;
+
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!DefI)
+ return InsertPt;
+
+ assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+ auto *L = LI->getLoopFor(DefI->getParent());
+ assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+ for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+ if (LI->getLoopFor(DTN->getBlock()) == L)
+ return DTN->getBlock()->getTerminator();
+
+ llvm_unreachable("DefI dominates InsertPt!");
}
//===----------------------------------------------------------------------===//
-// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
//===----------------------------------------------------------------------===//
-/// ConvertToSInt - Convert APF to an integer, if possible.
+/// Convert APF to an integer, if possible.
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
// See if we can convert this to an int64_t
@@ -256,8 +268,8 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
return true;
}
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
+/// If the loop has floating induction variable then insert corresponding
+/// integer induction variable if possible.
/// For example,
/// for(double i = 0; i < 10000; ++i)
/// bar(i)
@@ -265,13 +277,12 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
/// for(int i = 0; i < 10000; ++i)
/// bar((double)i);
///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
// Check incoming value.
- ConstantFP *InitValueVal =
- dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+ auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
int64_t InitValue;
if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
@@ -279,8 +290,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check IV increment. Reject this PN if increment operation is not
// an add or increment value can not be represented by an integer.
- BinaryOperator *Incr =
- dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
@@ -456,14 +466,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// platforms.
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstInsertionPt());
+ &*PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
// First step. Check to see if there are any floating-point recurrences.
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
@@ -477,7 +487,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
- HandleFloatingPointIV(L, PN);
+ handleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
// may not have been able to compute a trip count. Now that we've done some
@@ -488,7 +498,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
namespace {
// Collect information about PHI nodes which can be transformed in
-// RewriteLoopExitValues.
+// rewriteLoopExitValues.
struct RewritePhi {
PHINode *PN;
unsigned Ith; // Ith incoming value.
@@ -501,70 +511,37 @@ struct RewritePhi {
};
}
-Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
+Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
Loop *L, Instruction *InsertPt,
- Type *ResultTy,
- bool &IsHighCostExpansion) {
- using namespace llvm::PatternMatch;
-
- if (!Rewriter.isHighCostExpansion(S, L)) {
- IsHighCostExpansion = false;
- return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
- }
-
+ Type *ResultTy) {
// Before expanding S into an expensive LLVM expression, see if we can use an
- // already existing value as the expansion for S. There is potential to make
- // this significantly smarter, but this simple heuristic already gets some
- // interesting cases.
-
- SmallVector<BasicBlock *, 4> Latches;
- L->getLoopLatches(Latches);
-
- for (BasicBlock *BB : Latches) {
- ICmpInst::Predicate Pred;
- Instruction *LHS, *RHS;
- BasicBlock *TrueBB, *FalseBB;
-
- if (!match(BB->getTerminator(),
- m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
- TrueBB, FalseBB)))
- continue;
-
- if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
- IsHighCostExpansion = false;
- return LHS;
- }
-
- if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
- IsHighCostExpansion = false;
- return RHS;
- }
- }
+ // already existing value as the expansion for S.
+ if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
+ if (ExistingValue->getType() == ResultTy)
+ return ExistingValue;
// We didn't find anything, fall back to using SCEVExpander.
- assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
- IsHighCostExpansion = true;
return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
}
//===----------------------------------------------------------------------===//
-// RewriteLoopExitValues - Optimize IV users outside the loop.
+// rewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop.
//===----------------------------------------------------------------------===//
-/// RewriteLoopExitValues - Check to see if this loop has a computable
-/// loop-invariant execution count. If so, this means that we can compute the
-/// final value of any expressions that are recurrent in the loop, and
-/// substitute the exit values from the loop into any instructions outside of
-/// the loop that use the final values of the current expressions.
+/// Check to see if this loop has a computable loop-invariant execution count.
+/// If so, this means that we can compute the final value of any expressions
+/// that are recurrent in the loop, and substitute the exit values from the loop
+/// into any instructions outside of the loop that use the final values of the
+/// current expressions.
///
/// This is mostly redundant with the regular IndVarSimplify activities that
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
- // Verify the input to the pass in already in LCSSA form.
- assert(L->isLCSSAForm(*DT));
+void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+ // Check a pre-condition.
+ assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -679,9 +656,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
continue;
}
- bool HighCost = false;
- Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
- PN->getType(), HighCost);
+ bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
+ Value *ExitVal =
+ expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
@@ -698,7 +675,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
}
}
- bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
// Transformation.
for (const RewritePhi &Phi : RewritePhiSet) {
@@ -735,10 +712,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
Rewriter.clearInsertPoint();
}
-/// CanLoopBeDeleted - Check whether it is possible to delete the loop after
-/// rewriting exit value. If it is possible, ignore ReplaceExitValue and
-/// do rewriting aggressively.
-bool IndVarSimplify::CanLoopBeDeleted(
+/// Check whether it is possible to delete the loop after rewriting exit
+/// value. If it is possible, ignore ReplaceExitValue and do rewriting
+/// aggressively.
+bool IndVarSimplify::canLoopBeDeleted(
Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
BasicBlock *Preheader = L->getLoopPreheader();
@@ -782,14 +759,9 @@ bool IndVarSimplify::CanLoopBeDeleted(
++BI;
}
- for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
- LI != LE; ++LI) {
- for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
- ++BI) {
- if (BI->mayHaveSideEffects())
- return false;
- }
- }
+ for (auto *BB : L->blocks())
+ if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return false;
return true;
}
@@ -799,22 +771,19 @@ bool IndVarSimplify::CanLoopBeDeleted(
//===----------------------------------------------------------------------===//
namespace {
- // Collect information about induction variables that are used by sign/zero
- // extend operations. This information is recorded by CollectExtend and
- // provides the input to WidenIV.
- struct WideIVInfo {
- PHINode *NarrowIV;
- Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was a sext user seen before a zext?
-
- WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
- IsSigned(false) {}
- };
+// Collect information about induction variables that are used by sign/zero
+// extend operations. This information is recorded by CollectExtend and provides
+// the input to WidenIV.
+struct WideIVInfo {
+ PHINode *NarrowIV = nullptr;
+ Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
+ bool IsSigned = false; // Was a sext user seen before a zext?
+};
}
-/// visitCast - Update information about the induction variable that is
-/// extended by this sign or zero extend operation. This is used to determine
-/// the final width of the IV before actually widening it.
+/// Update information about the induction variable that is extended by this
+/// sign or zero extend operation. This is used to determine the final width of
+/// the IV before actually widening it.
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
@@ -855,24 +824,29 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
namespace {
-/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
-/// WideIV that computes the same value as the Narrow IV def. This avoids
-/// caching Use* pointers.
+/// Record a link in the Narrow IV def-use chain along with the WideIV that
+/// computes the same value as the Narrow IV def. This avoids caching Use*
+/// pointers.
struct NarrowIVDefUse {
- Instruction *NarrowDef;
- Instruction *NarrowUse;
- Instruction *WideDef;
-
- NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
-
- NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
- NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+ Instruction *NarrowDef = nullptr;
+ Instruction *NarrowUse = nullptr;
+ Instruction *WideDef = nullptr;
+
+ // True if the narrow def is never negative. Tracking this information lets
+ // us use a sign extension instead of a zero extension or vice versa, when
+ // profitable and legal.
+ bool NeverNegative = false;
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+ bool NeverNegative)
+ : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+ NeverNegative(NeverNegative) {}
};
-/// WidenIV - The goal of this transform is to remove sign and zero extends
-/// without creating any new induction variables. To do this, it creates a new
-/// phi of the wider type and redirects all users, either removing extends or
-/// inserting truncs whenever we stop propagating the type.
+/// The goal of this transform is to remove sign and zero extends without
+/// creating any new induction variables. To do this, it creates a new phi of
+/// the wider type and redirects all users, either removing extends or inserting
+/// truncs whenever we stop propagating the type.
///
class WidenIV {
// Parameters
@@ -913,32 +887,35 @@ public:
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
- PHINode *CreateWideIV(SCEVExpander &Rewriter);
+ PHINode *createWideIV(SCEVExpander &Rewriter);
protected:
- Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use);
+ Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
- Instruction *CloneIVUser(NarrowIVDefUse DU);
+ Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+ Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR);
+ Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
- const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+ const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
- const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+ const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
- const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const;
- Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+ Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
- bool WidenLoopCompare(NarrowIVDefUse DU);
+ bool widenLoopCompare(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-/// isLoopInvariant - Perform a quick domtree based check for loop invariance
-/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
-/// gratuitous for this purpose.
+/// Perform a quick domtree based check for loop invariance assuming that V is
+/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
+/// purpose.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst)
@@ -947,8 +924,8 @@ static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
return DT->properlyDominates(Inst->getParent(), L->getHeader());
}
-Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use) {
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+ bool IsSigned, Instruction *Use) {
// Set the debug location and conservative insertion point.
IRBuilder<> Builder(Use);
// Hoist the insertion point into loop preheaders as far as possible.
@@ -961,10 +938,11 @@ Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
Builder.CreateZExt(NarrowOper, WideType);
}
-/// CloneIVUser - Instantiate a wide operation to replace a narrow
-/// operation. This only needs to handle operations that can evaluation to
-/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
-Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
@@ -973,40 +951,140 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::Sub:
+ return cloneArithmeticIVUser(DU, WideAR);
+
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
-
- // Replace NarrowDef operands with WideDef. Otherwise, we don't know
- // anything about the narrow operand yet so must insert a [sz]ext. It is
- // probably loop invariant and will be folded or hoisted. If it actually
- // comes from a widened IV, it should be removed during a future call to
- // WidenIVUse.
- Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
- Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
-
- BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
- BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
- LHS, RHS,
- NarrowBO->getName());
- IRBuilder<> Builder(DU.NarrowUse);
- Builder.Insert(WideBO);
- if (const OverflowingBinaryOperator *OBO =
- dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
- if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
- if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ return cloneBitwiseIVUser(DU);
+ }
+}
+
+Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+ // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+ // invariant and will be folded or hoisted. If it actually comes from a
+ // widened IV, it should be removed during a future call to widenIVUse.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ IsSigned, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ IsSigned, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse = nullptr;
+
+ switch (NarrowUse->getOpcode()) {
+ default:
+ llvm_unreachable("No other possibility!");
+
+ case Instruction::Add:
+ WideUse = SE->getAddExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Mul:
+ WideUse = SE->getMulExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::UDiv:
+ WideUse = SE->getUDivExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Sub:
+ WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
+ break;
}
- return WideBO;
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = IsSigned;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
}
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ SignExtend, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ SignExtend, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
}
-const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const {
if (OpCode == Instruction::Add)
return SE->getAddExpr(LHS, RHS);
@@ -1022,7 +1100,7 @@ const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
/// operands. Generate the SCEV value for the widened operation without
/// actually modifying the IR yet. If the expression after extending the
/// operands is an AddRec for this loop, return it.
-const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
const unsigned OpCode = DU.NarrowUse->getOpcode();
@@ -1062,19 +1140,18 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
if (ExtendOperIdx == 0)
std::swap(lhs, rhs);
const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
+ dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
if (!AddRec || AddRec->getLoop() != L)
return nullptr;
return AddRec;
}
-/// GetWideRecurrence - Is this instruction potentially interesting for further
-/// simplification after widening it's type? In other words, can the
-/// extend be safely hoisted out of the loop with SCEV reducing the value to a
-/// recurrence on the same loop. If so, return the sign or zero extended
-/// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the sign or zero extended recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return nullptr;
@@ -1097,10 +1174,11 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
/// This IV user cannot be widen. Replace this use of the original narrow IV
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
-static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
<< " for user " << *DU.NarrowUse << "\n");
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ IRBuilder<> Builder(
+ getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
@@ -1108,13 +1186,27 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
/// If the narrow use is a compare instruction, then widen the compare
// (and possibly the other operand). The extend operation is hoisted into the
// loop preheader as far as possible.
-bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
+bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
if (!Cmp)
return false;
- // Sign of IV user and compare must match.
- if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+
+ if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
return false;
Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
@@ -1123,20 +1215,21 @@ bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ IRBuilder<> Builder(
+ getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
if (CastWidth < IVWidth) {
- Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+ Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
}
return true;
}
-/// WidenIVUse - Determine whether an individual user of the narrow IV can be
-/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
@@ -1145,13 +1238,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// After SimplifyCFG most loop exit targets have a single predecessor.
// Otherwise fall back to a truncate within the loop.
if (UsePhi->getNumOperands() != 1)
- truncateIVUse(DU, DT);
+ truncateIVUse(DU, DT, LI);
else {
PHINode *WidePhi =
PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
UsePhi);
WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
- IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+ IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
UsePhi->replaceAllUsesWith(Trunc);
DeadInsts.emplace_back(UsePhi);
@@ -1200,20 +1293,20 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
}
// Does this user itself evaluate to a recurrence after widening?
- const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
if (!WideAddRec)
- WideAddRec = GetExtendedOperandRecurrence(DU);
+ WideAddRec = getExtendedOperandRecurrence(DU);
if (!WideAddRec) {
// If use is a loop condition, try to promote the condition instead of
// truncating the IV first.
- if (WidenLoopCompare(DU))
+ if (widenLoopCompare(DU))
return nullptr;
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT);
+ truncateIVUse(DU, DT, LI);
return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
@@ -1228,7 +1321,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
- WideUse = CloneIVUser(DU);
+ WideUse = cloneIVUser(DU, WideAddRec);
if (!WideUse)
return nullptr;
}
@@ -1248,9 +1341,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
return WideUse;
}
-/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+/// Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+ bool NeverNegative =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+ SE->getConstant(NarrowSCEV->getType(), 0));
for (User *U : NarrowDef->users()) {
Instruction *NarrowUser = cast<Instruction>(U);
@@ -1258,21 +1355,21 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
if (!Widened.insert(NarrowUser).second)
continue;
- NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
+ NarrowIVUsers.push_back(
+ NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative));
}
}
-/// CreateWideIV - Process a single induction variable. First use the
-/// SCEVExpander to create a wide induction variable that evaluates to the same
-/// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
-/// interesting IV users, the narrow IV will be isolated for removal by
-/// DeleteDeadPHIs.
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
-PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
@@ -1302,11 +1399,11 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// either find an existing phi or materialize a new one. Either way, we
// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
// of the phi-SCC dominates the loop entry.
- Instruction *InsertPt = L->getHeader()->begin();
+ Instruction *InsertPt = &L->getHeader()->front();
WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
// Remembering the WideIV increment generated by SCEVExpander allows
- // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // widenIVUse to reuse it when widening the narrow IV's increment. We don't
// employ a general reuse mechanism because the call above is the only call to
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
@@ -1329,13 +1426,13 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *WideUse = WidenIVUse(DU, Rewriter);
+ Instruction *WideUse = widenIVUse(DU, Rewriter);
// Follow all def-use edges from the previous narrow use.
if (WideUse)
pushNarrowIVUsers(DU.NarrowUse, WideUse);
- // WidenIVUse may have removed the def-use edge.
+ // widenIVUse may have removed the def-use edge.
if (DU.NarrowDef->use_empty())
DeadInsts.emplace_back(DU.NarrowDef);
}
@@ -1352,38 +1449,38 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
//===----------------------------------------------------------------------===//
namespace {
- class IndVarSimplifyVisitor : public IVVisitor {
- ScalarEvolution *SE;
- const TargetTransformInfo *TTI;
- PHINode *IVPhi;
-
- public:
- WideIVInfo WI;
-
- IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
- const TargetTransformInfo *TTI,
- const DominatorTree *DTree)
- : SE(SCEV), TTI(TTI), IVPhi(IV) {
- DT = DTree;
- WI.NarrowIV = IVPhi;
- if (ReduceLiveIVs)
- setSplitOverflowIntrinsics();
- }
+class IndVarSimplifyVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const TargetTransformInfo *TTI;
+ PHINode *IVPhi;
- // Implement the interface used by simplifyUsersOfIV.
- void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
- };
+public:
+ WideIVInfo WI;
+
+ IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+ const TargetTransformInfo *TTI,
+ const DominatorTree *DTree)
+ : SE(SCEV), TTI(TTI), IVPhi(IV) {
+ DT = DTree;
+ WI.NarrowIV = IVPhi;
+ if (ReduceLiveIVs)
+ setSplitOverflowIntrinsics();
+ }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
+};
}
-/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
-/// users. Each successive simplification may push more users which may
-/// themselves be candidates for simplification.
+/// Iteratively perform simplification on a worklist of IV users. Each
+/// successive simplification may push more users which may themselves be
+/// candidates for simplification.
///
/// Sign/Zero extend elimination is interleaved with IV simplification.
///
-void IndVarSimplify::SimplifyAndExtend(Loop *L,
+void IndVarSimplify::simplifyAndExtend(Loop *L,
SCEVExpander &Rewriter,
- LPPassManager &LPM) {
+ LoopInfo *LI) {
SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
@@ -1400,14 +1497,14 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
- // other SCEV based analysis prior to SimplifyAndExtend.
+ // other SCEV based analysis prior to simplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
+ Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
@@ -1416,7 +1513,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
for (; !WideIVs.empty(); WideIVs.pop_back()) {
WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
- if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
@@ -1425,12 +1522,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
}
//===----------------------------------------------------------------------===//
-// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
+/// Return true if this loop's backedge taken count expression can be safely and
+/// cheaply expanded into an instruction sequence that can be used by
+/// linearFunctionTestReplace.
///
/// TODO: This fails for pointer-type loop counters with greater than one byte
/// strides, consequently preventing LFTR from running. For the purpose of LFTR
@@ -1461,8 +1558,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
return true;
}
-/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
-/// invariant value to the phi.
+/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
@@ -1513,8 +1609,8 @@ static ICmpInst *getLoopTest(Loop *L) {
return dyn_cast<ICmpInst>(BI->getCondition());
}
-/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
-/// that the current exit test is already sufficiently canonical.
+/// linearFunctionTestReplace policy. Return true unless we can show that the
+/// current exit test is already sufficiently canonical.
static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR to simplify the exit condition to an ICMP.
ICmpInst *Cond = getLoopTest(L);
@@ -1574,10 +1670,10 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
return false;
// Optimistically handle other instructions.
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
- if (!Visited.insert(*OI).second)
+ for (Value *Op : I->operands()) {
+ if (!Visited.insert(Op).second)
continue;
- if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+ if (!hasConcreteDefImpl(Op, Visited, Depth+1))
return false;
}
return true;
@@ -1594,8 +1690,8 @@ static bool hasConcreteDef(Value *V) {
return hasConcreteDefImpl(V, Visited, 0);
}
-/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
-/// be rewritten) loop exit test.
+/// Return true if this IV has any uses other than the (soon to be rewritten)
+/// loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
@@ -1608,7 +1704,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
return true;
}
-/// FindLoopCounter - Find an affine IV in canonical form.
+/// Find an affine IV in canonical form.
///
/// BECount may be an i8* pointer type. The pointer difference is already
/// valid count without scaling the address stride, so it remains a pointer
@@ -1702,8 +1798,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
return BestPhi;
}
-/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
-/// holds the RHS of the new loop test.
+/// Help linearFunctionTestReplace by generating a value that holds the RHS of
+/// the new loop test.
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -1785,13 +1881,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
}
}
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable. This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
+/// This method rewrites the exit condition of the loop to be a canonical !=
+/// comparison against the incremented loop induction variable. This pass is
+/// able to rewrite the exit tests of any loop where the SCEV analysis can
+/// determine a loop-invariant trip count of the loop, which is actually a much
+/// broader range than just linear tests.
Value *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
+linearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
@@ -1809,7 +1905,7 @@ LinearFunctionTestReplace(Loop *L,
// This addition may overflow, which is valid as long as the comparison is
// truncated to BackedgeTakenCount->getType().
IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
+ SE->getOne(BackedgeTakenCount->getType()));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
@@ -1847,8 +1943,8 @@ LinearFunctionTestReplace(Loop *L,
const SCEV *ARStep = AR->getStepRecurrence(*SE);
// For constant IVCount, avoid truncation.
if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
- const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
- APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+ const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
+ APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
// Note that the post-inc value of BackedgeTakenCount may have overflowed
// above such that IVCount is now zero.
if (IVCount != BackedgeTakenCount && Count == 0) {
@@ -1886,21 +1982,21 @@ LinearFunctionTestReplace(Loop *L,
}
//===----------------------------------------------------------------------===//
-// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
//===----------------------------------------------------------------------===//
/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
- Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
- BasicBlock::iterator I = Preheader->getTerminator();
+ Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
+ BasicBlock::iterator I(Preheader->getTerminator());
while (I != Preheader->begin()) {
--I;
// New instructions were inserted at the end of the preheader.
@@ -1920,8 +2016,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<DbgInfoIntrinsic>(I))
continue;
- // Skip landingpad instructions.
- if (isa<LandingPadInst>(I))
+ // Skip eh pad instructions.
+ if (I->isEHPad())
continue;
// Don't sink alloca: we never want to sink static alloca's out of the
@@ -1953,7 +2049,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
continue;
// Otherwise, sink it to the exit block.
- Instruction *ToMove = I;
+ Instruction *ToMove = &*I;
bool Done = false;
if (I != Preheader->begin()) {
@@ -1994,7 +2090,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -2007,7 +2103,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
- RewriteNonIntegerIVs(L);
+ rewriteNonIntegerIVs(L);
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
@@ -2024,7 +2120,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
Rewriter.disableCanonicalMode();
- SimplifyAndExtend(L, Rewriter, LPM);
+ simplifyAndExtend(L, Rewriter, LI);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -2034,7 +2130,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
//
if (ReplaceExitValue != NeverRepl &&
!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- RewriteLoopExitValues(L, Rewriter);
+ rewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -2054,7 +2150,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// explicitly check any assumptions made by SCEV. Brittle.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
if (!AR || AR->getLoop()->getLoopPreheader())
- (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
Rewriter);
}
}
@@ -2074,13 +2170,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Loop-invariant instructions in the preheader that aren't used in the
// loop may be sunk below the loop to reduce register pressure.
- SinkUnusedInvariants(L);
+ sinkUnusedInvariants(L);
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
+
// Check a post-condition.
- assert(L->isLCSSAForm(*DT) &&
- "Indvars did not leave the loop in lcssa form!");
+ assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index cbdacad..dea61f6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -214,8 +214,8 @@ public:
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -224,8 +224,15 @@ public:
char InductiveRangeCheckElimination::ID = 0;
}
-INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
- "Inductive range check elimination", false, false)
+INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
const char *InductiveRangeCheck::rangeCheckKindToStr(
InductiveRangeCheck::RangeCheckKind RCK) {
@@ -1044,9 +1051,9 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
- &F, BBInsertLocation);
+ &F, &*BBInsertLocation);
RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
- BBInsertLocation);
+ &*BBInsertLocation);
BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
bool Increasing = LS.IndVarIncreasing;
@@ -1399,8 +1406,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
LLVMContext &Context = Preheader->getContext();
InductiveRangeCheck::AllocatorTy IRCAlloc;
SmallVector<InductiveRangeCheck *, 16> RangeChecks;
- ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
- BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ BranchProbabilityInfo &BPI =
+ getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
for (auto BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1130d22..087ce8a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -18,15 +18,22 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -36,6 +43,8 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <memory>
using namespace llvm;
#define DEBUG_TYPE "jump-threading"
@@ -49,6 +58,13 @@ BBDuplicateThreshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+static cl::opt<unsigned>
+ImplicationSearchThreshold(
+ "jump-threading-implication-search-threshold",
+ cl::desc("The number of predecessors to search for a stronger "
+ "condition to use to thread over a weaker condition"),
+ cl::init(3), cl::Hidden);
+
namespace {
// These are at global scope so static functions can use them too.
typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
@@ -80,6 +96,9 @@ namespace {
class JumpThreading : public FunctionPass {
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ bool HasProfileData;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
@@ -114,9 +133,15 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
+ void releaseMemory() override {
+ BFI.reset();
+ BPI.reset();
+ }
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
@@ -134,9 +159,16 @@ namespace {
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
+ bool ProcessImpliedCondition(BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+
+ private:
+ BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix);
+ void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
+ BasicBlock *NewBB, BasicBlock *SuccBB);
};
}
@@ -160,11 +192,21 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
LVI = &getAnalysis<LazyValueInfo>();
+ BFI.reset();
+ BPI.reset();
+ // When profile data is available, we need to update edge weights after
+ // successful jump threading, which requires both BPI and BFI being available.
+ HasProfileData = F.getEntryCount().hasValue();
+ if (HasProfileData) {
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
// Remove unreachable blocks from function as they may result in infinite
// loop. We do threading if we found something profitable. Jump threading a
// branch can create other opportunities. If these opportunities form a cycle
- // i.e. if any jump treading is undoing previous threading in the path, then
+ // i.e. if any jump threading is undoing previous threading in the path, then
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
@@ -176,7 +218,7 @@ bool JumpThreading::runOnFunction(Function &F) {
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *BB = I;
+ BasicBlock *BB = &*I;
// Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
@@ -239,11 +281,26 @@ bool JumpThreading::runOnFunction(Function &F) {
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
unsigned Threshold) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
- BasicBlock::const_iterator I = BB->getFirstNonPHI();
+ BasicBlock::const_iterator I(BB->getFirstNonPHI());
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
+ unsigned Bonus = 0;
+ const TerminatorInst *BBTerm = BB->getTerminator();
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(BBTerm))
+ Bonus = 6;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(BBTerm))
+ Bonus = 8;
+
+ // Bump the threshold up so the early exit from the loop doesn't skip the
+ // terminator-based Size adjustment at the end.
+ Threshold += Bonus;
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
@@ -260,6 +317,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
+ // Bail out if this instruction gives back a token type, it is not possible
+ // to duplicate it if it is used outside this BB.
+ if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
+ return ~0U;
+
// All other instructions count for at least one unit.
++Size;
@@ -268,7 +330,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->cannotDuplicate())
+ if (CI->cannotDuplicate() || CI->isConvergent())
// Blocks with NoDuplicate are modelled as having infinite cost, so they
// are never duplicated.
return ~0U;
@@ -279,16 +341,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
}
}
- // Threading through a switch statement is particularly profitable. If this
- // block ends in a switch, decrease its cost to make it more likely to happen.
- if (isa<SwitchInst>(I))
- Size = Size > 6 ? Size-6 : 0;
-
- // The same holds for indirect branches, but slightly more so.
- if (isa<IndirectBrInst>(I))
- Size = Size > 8 ? Size-8 : 0;
-
- return Size;
+ return Size > Bonus ? Size - Bonus : 0;
}
/// FindLoopHeaders - We do not want jump threading to turn proper loop
@@ -669,7 +722,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ const TerminatorInst *TI = SinglePred->getTerminator();
+ if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
@@ -761,7 +815,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
// against a constant at this time.
- // TODO: This should be extended to handle switches as well.
+ // TODO: This should be extended to handle switches as well.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
if (CondBr && CondConst && CondBr->isConditional()) {
@@ -829,9 +883,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+ // Search for a stronger dominating condition that can be used to simplify a
+ // conditional branch leaving BB.
+ if (ProcessImpliedCondition(BB))
+ return true;
+
+ return false;
+}
+
+bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ BasicBlock *CurrentBB = BB;
+ BasicBlock *CurrentPred = BB->getSinglePredecessor();
+ unsigned Iter = 0;
+
+ auto &DL = BB->getModule()->getDataLayout();
+
+ while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
+ auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
+ if (!PBI || !PBI->isConditional() || PBI->getSuccessor(0) != CurrentBB)
+ return false;
- // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)", thread through this block.
+ if (isImpliedCondition(PBI->getCondition(), Cond, DL)) {
+ BI->getSuccessor(1)->removePredecessor(BB);
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ BI->eraseFromParent();
+ return true;
+ }
+ CurrentBB = CurrentPred;
+ CurrentPred = CurrentBB->getSinglePredecessor();
+ }
return false;
}
@@ -850,10 +935,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (LoadBB->getSinglePredecessor())
return false;
- // If the load is defined in a landing pad, it can't be partially redundant,
- // because the edges between the invoke and the landing pad cannot have other
+ // If the load is defined in an EH pad, it can't be partially redundant,
+ // because the edges between the invoke and the EH pad cannot have other
// instructions between them.
- if (LoadBB->isLandingPad())
+ if (LoadBB->isEHPad())
return false;
Value *LoadedPtr = LI->getOperand(0);
@@ -866,11 +951,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
- BasicBlock::iterator BBIt = LI;
+ BasicBlock::iterator BBIt(LI);
if (Value *AvailableVal =
- FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
- // If the value if the load is locally available within the block, just use
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, DefMaxInstsToScan)) {
+ // If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -914,7 +999,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
AAMDNodes ThisAATags;
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt,
+ DefMaxInstsToScan,
nullptr, &ThisAATags);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
@@ -968,8 +1054,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
}
// Split them out to their own block.
- UnavailablePred =
- SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
+ UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
@@ -995,7 +1080,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Create a PHI node at the start of the block for the PRE'd load value.
pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
- LoadBB->begin());
+ &LoadBB->front());
PN->takeName(LI);
PN->setDebugLoc(LI->getDebugLoc());
@@ -1262,7 +1347,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Into:
// BB':
// %Y = icmp ne i32 %A, %B
- // br i1 %Z, ...
+ // br i1 %Y, ...
PredValueInfoTy XorOpValues;
bool isLHS = true;
@@ -1387,14 +1472,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
return false;
}
- // And finally, do it! Start by factoring the predecessors is needed.
+ // And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
@@ -1415,6 +1500,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ auto NewBBFreq =
+ BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -1425,7 +1517,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
Instruction *New = BI->clone();
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
- ValueMapping[BI] = New;
+ ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -1438,7 +1530,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
@@ -1475,8 +1567,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+ SSAUpdate.AddAvailableValue(BB, &*I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1499,11 +1591,98 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TLI);
+ // Update the edge weight from BB to SuccBB, which should be less than before.
+ UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+
// Threaded an edge!
++NumThreads;
return true;
}
+/// Create a new basic block that will be the predecessor of BB and successor of
+/// all blocks in Preds. When profile data is availble, update the frequency of
+/// this new block.
+BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix) {
+ // Collect the frequencies of all predecessors of BB, which will be used to
+ // update the edge weight on BB->SuccBB.
+ BlockFrequency PredBBFreq(0);
+ if (HasProfileData)
+ for (auto Pred : Preds)
+ PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
+
+ BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
+
+ // Set the block frequency of the newly created PredBB, which is the sum of
+ // frequencies of Preds.
+ if (HasProfileData)
+ BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
+ return PredBB;
+}
+
+/// Update the block frequency of BB and branch weight and the metadata on the
+/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
+/// Freq(PredBB->BB) / Freq(BB->SuccBB).
+void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *NewBB,
+ BasicBlock *SuccBB) {
+ if (!HasProfileData)
+ return;
+
+ assert(BFI && BPI && "BFI & BPI should have been created here");
+
+ // As the edge from PredBB to BB is deleted, we have to update the block
+ // frequency of BB.
+ auto BBOrigFreq = BFI->getBlockFreq(BB);
+ auto NewBBFreq = BFI->getBlockFreq(NewBB);
+ auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
+ auto BBNewFreq = BBOrigFreq - NewBBFreq;
+ BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
+
+ // Collect updated outgoing edges' frequencies from BB and use them to update
+ // edge probabilities.
+ SmallVector<uint64_t, 4> BBSuccFreq;
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ auto SuccFreq = (*I == SuccBB)
+ ? BB2SuccBBFreq - NewBBFreq
+ : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ BBSuccFreq.push_back(SuccFreq.getFrequency());
+ }
+
+ uint64_t MaxBBSuccFreq =
+ *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
+
+ SmallVector<BranchProbability, 4> BBSuccProbs;
+ if (MaxBBSuccFreq == 0)
+ BBSuccProbs.assign(BBSuccFreq.size(),
+ {1, static_cast<unsigned>(BBSuccFreq.size())});
+ else {
+ for (uint64_t Freq : BBSuccFreq)
+ BBSuccProbs.push_back(
+ BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
+ // Normalize edge probabilities so that they sum up to one.
+ BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
+ BBSuccProbs.end());
+ }
+
+ // Update edge probabilities in BPI.
+ for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
+ BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
+
+ if (BBSuccProbs.size() >= 2) {
+ SmallVector<uint32_t, 4> Weights;
+ for (auto Prob : BBSuccProbs)
+ Weights.push_back(Prob.getNumerator());
+
+ auto TI = BB->getTerminator();
+ TI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
+ }
+}
+
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
@@ -1530,14 +1709,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
return false;
}
- // And finally, do it! Start by factoring the predecessors is needed.
+ // And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
@@ -1581,12 +1760,12 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (Value *IV =
SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
delete New;
- ValueMapping[BI] = IV;
+ ValueMapping[&*BI] = IV;
} else {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
- PredBB->getInstList().insert(OldPredBranch, New);
- ValueMapping[BI] = New;
+ PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
+ ValueMapping[&*BI] = New;
}
}
@@ -1628,8 +1807,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+ SSAUpdate.AddAvailableValue(BB, &*I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 43fc50e..6d70cdc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -34,10 +34,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -118,9 +121,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -164,9 +170,12 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -183,7 +192,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -264,9 +273,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// FIXME: This is really heavy handed. It would be a bit better to use an
// SSAUpdater strategy during promotion that was LCSSA aware and reformed
// it as it went.
- if (Changed)
- formLCSSARecursively(*L, *DT, LI,
- getAnalysisIfAvailable<ScalarEvolution>());
+ if (Changed) {
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ formLCSSARecursively(*L, *DT, LI, SEWP ? &SEWP->getSE() : nullptr);
+ }
}
// Check that neither this loop nor its parent have had LCSSA broken. LICM is
@@ -402,7 +412,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
/// Computes loop safety information, checks loop body & header
-/// for the possiblity of may throw exception.
+/// for the possibility of may throw exception.
///
void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
assert(CurLoop != nullptr && "CurLoop cant be null");
@@ -410,7 +420,7 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
// Setting default safety values.
SafetyInfo->MayThrow = false;
SafetyInfo->HeaderMayThrow = false;
- // Iterate over header and compute dafety info.
+ // Iterate over header and compute safety info.
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
SafetyInfo->HeaderMayThrow |= I->mayThrow();
@@ -445,7 +455,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
- Size = AA->getTypeStoreSize(LI->getType());
+ Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
@@ -457,10 +467,21 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
return false;
// Handle simple cases by querying alias analysis.
- AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
- if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == FMRB_DoesNotAccessMemory)
return true;
if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ // A readonly argmemonly function only reads from memory pointed to by
+ // it's arguments with arbitrary offsets. If we can prove there are no
+ // writes to this memory in the loop, we can hoist or sink.
+ if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
+ for (Value *Op : CI->arg_operands())
+ if (Op->getType()->isPointerTy() &&
+ pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
+ AAMDNodes(), CurAST))
+ return false;
+ return true;
+ }
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
@@ -566,7 +587,7 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
if (!OLoop->contains(&PN)) {
PHINode *OpPN =
PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
- OInst->getName() + ".lcssa", ExitBlock.begin());
+ OInst->getName() + ".lcssa", &ExitBlock.front());
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
*OI = OpPN;
@@ -651,6 +672,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction.
+ I.dropUnknownNonDebugMetadata();
+
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumHoisted;
@@ -730,9 +755,9 @@ namespace {
if (!L->contains(BB)) {
// We need to create an LCSSA PHI node for the incoming value and
// store that.
- PHINode *PN = PHINode::Create(
- I->getType(), PredCache.size(BB),
- I->getName() + ".lcssa", BB->begin());
+ PHINode *PN =
+ PHINode::Create(I->getType(), PredCache.size(BB),
+ I->getName() + ".lcssa", &BB->front());
for (BasicBlock *Pred : PredCache.get(BB))
PN->addIncoming(I, Pred);
return PN;
@@ -942,7 +967,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
CurLoop->getUniqueExitBlocks(ExitBlocks);
InsertPts.resize(ExitBlocks.size());
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt();
}
// We use the SSAUpdater interface to insert phi nodes as required.
@@ -973,7 +998,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
return Changed;
}
-/// Simple Analysis hook. Clone alias set info.
+/// Simple analysis hook. Clone alias set info.
///
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index c19cd19..1648878 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -56,7 +57,7 @@ class LoadCombine : public BasicBlockPass {
public:
LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
+ initializeLoadCombinePass(*PassRegistry::getPassRegistry());
}
using llvm::Pass::doInitialization;
@@ -223,7 +224,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
if (skipOptnoneFunction(BB))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
IRBuilder<true, TargetFolder> TheBuilder(
BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
@@ -262,8 +263,8 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
char LoadCombine::ID = 0;
@@ -274,7 +275,8 @@ BasicBlockPass *llvm::createLoadCombinePass() {
INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 98b068e..bc00ff3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
@@ -35,18 +36,19 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
@@ -64,7 +66,7 @@ INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
@@ -130,7 +132,7 @@ bool LoopDeletion::isLoopDead(Loop *L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
@@ -169,7 +171,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
@@ -242,9 +244,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
for (BasicBlock *BB : blocks)
loopInfo.removeBlock(BB);
- // The last step is to inform the loop pass manager that we've
- // eliminated this loop.
- LPM.deleteLoopFromQueue(L);
+ // The last step is to update LoopInfo now that we've eliminated this loop.
+ loopInfo.updateUnloop(L);
Changed = true;
++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 1b9859b..3d3cf3e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <list>
@@ -54,6 +55,11 @@ static cl::opt<bool> DistributeNonIfConvertible(
"if-convertible by the loop vectorizer"),
cl::init(false));
+static cl::opt<unsigned> DistributeSCEVCheckThreshold(
+ "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Distribution"));
+
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
namespace {
@@ -164,9 +170,7 @@ public:
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
- for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
- auto *Inst = *I;
-
+ for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) {
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
Inst->eraseFromParent();
@@ -373,7 +377,7 @@ public:
/// \brief This performs the main chunk of the work of cloning the loops for
/// the partitions.
- void cloneLoops(Pass *P) {
+ void cloneLoops() {
BasicBlock *OrigPH = L->getLoopPreheader();
// At this point the predecessor of the preheader is either the memcheck
// block or the top part of the original preheader.
@@ -547,11 +551,11 @@ public:
MemoryInstructionDependences(
const SmallVectorImpl<Instruction *> &Instructions,
- const SmallVectorImpl<Dependence> &InterestingDependences) {
+ const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());
DEBUG(dbgs() << "Backward dependences:\n");
- for (auto &Dep : InterestingDependences)
+ for (auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
// order, i.e. source is always first. (The direction is given by the
@@ -567,25 +571,6 @@ private:
AccessesType Accesses;
};
-/// \brief Returns the instructions that use values defined in the loop.
-static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
- SmallVector<Instruction *, 8> UsedOutside;
-
- for (auto *Block : L->getBlocks())
- // FIXME: I believe that this could use copy_if if the Inst reference could
- // be adapted into a pointer.
- for (auto &Inst : *Block) {
- auto Users = Inst.users();
- if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
- auto *Use = cast<Instruction>(U);
- return !L->contains(Use->getParent());
- }))
- UsedOutside.push_back(&Inst);
- }
-
- return UsedOutside;
-}
-
/// \brief The pass class.
class LoopDistribute : public FunctionPass {
public:
@@ -597,6 +582,7 @@ public:
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LAA = &getAnalysis<LoopAccessAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Build up a worklist of inner-loops to vectorize. This is necessary as the
// act of distributing a loop creates new loops and can invalidate iterators
@@ -619,6 +605,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<LoopAccessAnalysis>();
@@ -629,6 +616,45 @@ public:
static char ID;
private:
+ /// \brief Filter out checks between pointers from the same partition.
+ ///
+ /// \p PtrToPartition contains the partition number for pointers. Partition
+ /// number -1 means that the pointer is used in multiple partitions. In this
+ /// case we can't safely omit the check.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4>
+ includeOnlyCrossPartitionChecks(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
+ const SmallVectorImpl<int> &PtrToPartition,
+ const RuntimePointerChecking *RtPtrChecking) {
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
+
+ return Checks;
+ }
+
/// \brief Try to distribute an inner-most loop.
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
@@ -655,9 +681,8 @@ private:
DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
return false;
}
- auto *InterestingDependences =
- LAI.getDepChecker().getInterestingDependences();
- if (!InterestingDependences || InterestingDependences->empty()) {
+ auto *Dependences = LAI.getDepChecker().getDependences();
+ if (!Dependences || Dependences->empty()) {
DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
return false;
}
@@ -685,7 +710,7 @@ private:
// NumUnsafeDependencesActive reaches 0.
const MemoryDepChecker &DepChecker = LAI.getDepChecker();
MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
- *InterestingDependences);
+ *Dependences);
int NumUnsafeDependencesActive = 0;
for (auto &InstDep : MID) {
@@ -735,6 +760,13 @@ private:
return false;
}
+ // Don't distribute the loop if we need too many SCEV run-time checks.
+ const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+ if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
+ DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+ return false;
+ }
+
DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
@@ -746,20 +778,25 @@ private:
if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
SplitBlock(PH, PH->getTerminator(), DT, LI);
- // If we need run-time checks to disambiguate pointers are run-time, version
- // the loop now.
+ // If we need run-time checks, version the loop now.
auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
- LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
- if (LVer.needsRuntimeChecks()) {
+ const auto *RtPtrChecking = LAI.getRuntimePointerChecking();
+ const auto &AllChecks = RtPtrChecking->getChecks();
+ auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
+ RtPtrChecking);
+
+ if (!Pred.isAlwaysTrue() || !Checks.empty()) {
DEBUG(dbgs() << "\nPointers:\n");
- DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
- LVer.versionLoop(this);
- LVer.addPHINodes(DefsUsedOutside);
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+ LoopVersioning LVer(LAI, L, LI, DT, SE, false);
+ LVer.setAliasChecks(std::move(Checks));
+ LVer.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LVer.versionLoop(DefsUsedOutside);
}
// Create identical copies of the original loop for each partition and hook
// them up sequentially.
- Partitions.cloneLoops(this);
+ Partitions.cloneLoops();
// Now, we remove the instruction from each loop that don't belong to that
// partition.
@@ -780,6 +817,7 @@ private:
LoopInfo *LI;
LoopAccessAnalysis *LAA;
DominatorTree *DT;
+ ScalarEvolution *SE;
};
} // anonymous namespace
@@ -790,6 +828,7 @@ INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
namespace llvm {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a21ca24..2d577de 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -31,11 +31,6 @@
// void foo(_Complex float *P)
// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
//
-// We should enhance this to handle negative strides through memory.
-// Alternatively (and perhaps better) we could rely on an earlier pass to force
-// forward iteration through memory, which is generally better for cache
-// behavior. Negative strides *do* happen for memset/memcpy loops.
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
@@ -44,7 +39,10 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -67,149 +65,85 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
namespace {
- class LoopIdiomRecognize;
+class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
+ const DataLayout *DL;
+
+public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
- /// This class defines some utility functions for loop idiom recognization.
- class LIRUtil {
- public:
- /// Return true iff the block contains nothing but an uncondition branch
- /// (aka goto instruction).
- static bool isAlmostEmpty(BasicBlock *);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
- static BranchInst *getBranch(BasicBlock *BB) {
- return dyn_cast<BranchInst>(BB->getTerminator());
- }
+private:
+ typedef SmallVector<StoreInst *, 8> StoreList;
+ StoreList StoreRefs;
- /// Derive the precondition block (i.e the block that guards the loop
- /// preheader) from the given preheader.
- static BasicBlock *getPrecondBb(BasicBlock *PreHead);
- };
-
- /// This class is to recoginize idioms of population-count conducted in
- /// a noncountable loop. Currently it only recognizes this pattern:
- /// \code
- /// while(x) {cnt++; ...; x &= x - 1; ...}
- /// \endcode
- class NclPopcountRecognize {
- LoopIdiomRecognize &LIR;
- Loop *CurLoop;
- BasicBlock *PreCondBB;
-
- typedef IRBuilder<> IRBuilderTy;
-
- public:
- explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
- bool recognize();
-
- private:
- /// Take a glimpse of the loop to see if we need to go ahead recoginizing
- /// the idiom.
- bool preliminaryScreen();
-
- /// Check if the given conditional branch is based on the comparison
- /// between a variable and zero, and if the variable is non-zero, the
- /// control yields to the loop entry. If the branch matches the behavior,
- /// the variable involved in the comparion is returned. This function will
- /// be called to see if the precondition and postcondition of the loop
- /// are in desirable form.
- Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
-
- /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the population bit. 2) \p CntPhi
- /// is set to the corresponding phi node. 3) \p Var is set to the value
- /// whose population bits are being counted.
- bool detectIdiom
- (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
-
- /// Insert ctpop intrinsic function and some obviously dead instructions.
- void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
-
- /// Create llvm.ctpop.* intrinsic function.
- CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
- };
-
- class LoopIdiomRecognize : public LoopPass {
- Loop *CurLoop;
- DominatorTree *DT;
- ScalarEvolution *SE;
- TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
- public:
- static char ID;
- explicit LoopIdiomRecognize() : LoopPass(ID) {
- initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DT = nullptr;
- SE = nullptr;
- TLI = nullptr;
- TTI = nullptr;
- }
+ /// \name Countable Loop Idiom Handling
+ /// @{
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks);
-
- bool processLoopStore(StoreInst *SI, const SCEV *BECount);
- bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-
- bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment,
- Value *SplatValue, Instruction *TheStore,
- const SCEVAddRecExpr *Ev,
- const SCEV *BECount);
- bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
+ bool runOnCountableLoop();
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks);
- DominatorTree *getDominatorTree() {
- return DT ? DT
- : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
- }
+ void collectStores(BasicBlock *BB);
+ bool isLegalStore(StoreInst *SI);
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- ScalarEvolution *getScalarEvolution() {
- return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
- }
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *SplatValue,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEV *BECount, bool NegStride);
- TargetLibraryInfo *getTargetLibraryInfo() {
- if (!TLI)
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ /// @}
+ /// \name Noncountable Loop Idiom Handling
+ /// @{
- return TLI;
- }
+ bool runOnNoncountableLoop();
- const TargetTransformInfo *getTargetTransformInfo() {
- return TTI ? TTI
- : (TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *CurLoop->getHeader()->getParent()));
- }
+ bool recognizePopcount();
+ void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
+ PHINode *CntPhi, Value *Var);
- Loop *getLoop() const { return CurLoop; }
+ /// @}
+};
- private:
- bool runOnNoncountableLoop();
- bool runOnCountableLoop();
- };
-}
+} // End anonymous namespace.
char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
@@ -218,9 +152,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
@@ -242,406 +179,64 @@ static void deleteDeadInstruction(Instruction *I,
//===----------------------------------------------------------------------===//
//
-// Implementation of LIRUtil
-//
-//===----------------------------------------------------------------------===//
-
-// This function will return true iff the given block contains nothing but goto.
-// A typical usage of this function is to check if the preheader function is
-// "almost" empty such that generated intrinsic functions can be moved across
-// the preheader and be placed at the end of the precondition block without
-// the concern of breaking data dependence.
-bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
- if (BranchInst *Br = getBranch(BB)) {
- return Br->isUnconditional() && Br == BB->begin();
- }
- return false;
-}
-
-BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
- if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
- BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : nullptr;
- }
- return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Implementation of NclPopcountRecognize
+// Implementation of LoopIdiomRecognize
//
//===----------------------------------------------------------------------===//
-NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
-}
-
-bool NclPopcountRecognize::preliminaryScreen() {
- const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
- if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
- return false;
-
- // Counting population are usually conducted by few arithmetic instructions.
- // Such instructions can be easilly "absorbed" by vacant slots in a
- // non-compact loop. Therefore, recognizing popcount idiom only makes sense
- // in a compact loop.
-
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
-
- BasicBlock *LoopBody = *(CurLoop->block_begin());
- if (LoopBody->size() >= 20) {
- // The loop is too big, bail out.
- return false;
- }
-
- // It should have a preheader containing nothing but a goto instruction.
- BasicBlock *PreHead = CurLoop->getLoopPreheader();
- if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
return false;
- // It should have a precondition block where the generated popcount instrinsic
- // function will be inserted.
- PreCondBB = LIRUtil::getPrecondBb(PreHead);
- if (!PreCondBB)
+ CurLoop = L;
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
return false;
- return true;
-}
-
-Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
- BasicBlock *LoopEntry) const {
- if (!Br || !Br->isConditional())
- return nullptr;
-
- ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
- if (!Cond)
- return nullptr;
-
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
- if (!CmpZero || !CmpZero->isZero())
- return nullptr;
-
- ICmpInst::Predicate Pred = Cond->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
- (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
- return Cond->getOperand(0);
-
- return nullptr;
-}
-
-bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
- PHINode *&CntPhi,
- Value *&Var) const {
- // Following code tries to detect this idiom:
- //
- // if (x0 != 0)
- // goto loop-exit // the precondition of the loop
- // cnt0 = init-val;
- // do {
- // x1 = phi (x0, x2);
- // cnt1 = phi(cnt0, cnt2);
- //
- // cnt2 = cnt1 + 1;
- // ...
- // x2 = x1 & (x1 - 1);
- // ...
- // } while(x != 0);
- //
- // loop-exit:
- //
-
- // step 1: Check to see if the look-back branch match this pattern:
- // "if (a!=0) goto loop-entry".
- BasicBlock *LoopEntry;
- Instruction *DefX2, *CountInst;
- Value *VarX1, *VarX0;
- PHINode *PhiX, *CountPhi;
-
- DefX2 = CountInst = nullptr;
- VarX1 = VarX0 = nullptr;
- PhiX = CountPhi = nullptr;
- LoopEntry = *(CurLoop->block_begin());
-
- // step 1: Check if the loop-back branch is in desirable form.
- {
- if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
- DefX2 = dyn_cast<Instruction>(T);
- else
- return false;
- }
-
- // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
- {
- if (!DefX2 || DefX2->getOpcode() != Instruction::And)
- return false;
-
- BinaryOperator *SubOneOp;
-
- if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
- VarX1 = DefX2->getOperand(1);
- else {
- VarX1 = DefX2->getOperand(0);
- SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
- }
- if (!SubOneOp)
- return false;
-
- Instruction *SubInst = cast<Instruction>(SubOneOp);
- ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
- if (!Dec ||
- !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
- (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
- return false;
- }
- }
-
- // step 3: Check the recurrence of variable X
- {
- PhiX = dyn_cast<PHINode>(VarX1);
- if (!PhiX ||
- (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
- return false;
- }
- }
-
- // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
- {
- CountInst = nullptr;
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
- IterE = LoopEntry->end(); Iter != IterE; Iter++) {
- Instruction *Inst = Iter;
- if (Inst->getOpcode() != Instruction::Add)
- continue;
-
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
- if (!Inc || !Inc->isOne())
- continue;
-
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
- continue;
-
- // Check if the result of the instruction is live of the loop.
- bool LiveOutLoop = false;
- for (User *U : Inst->users()) {
- if ((cast<Instruction>(U))->getParent() != LoopEntry) {
- LiveOutLoop = true; break;
- }
- }
-
- if (LiveOutLoop) {
- CountInst = Inst;
- CountPhi = Phi;
- break;
- }
- }
-
- if (!CountInst)
- return false;
- }
-
- // step 5: check if the precondition is in this form:
- // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
- {
- BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
- Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
- if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
- return false;
-
- CntInst = CountInst;
- CntPhi = CountPhi;
- Var = T;
- }
-
- return true;
-}
-
-void NclPopcountRecognize::transform(Instruction *CntInst,
- PHINode *CntPhi, Value *Var) {
-
- ScalarEvolution *SE = LIR.getScalarEvolution();
- TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
- BasicBlock *PreHead = CurLoop->getLoopPreheader();
- BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
- const DebugLoc DL = CntInst->getDebugLoc();
-
- // Assuming before transformation, the loop is following:
- // if (x) // the precondition
- // do { cnt++; x &= x - 1; } while(x);
-
- // Step 1: Insert the ctpop instruction at the end of the precondition block
- IRBuilderTy Builder(PreCondBr);
- Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
- {
- PopCnt = createPopcntIntrinsic(Builder, Var, DL);
- NewCount = PopCntZext =
- Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
-
- if (NewCount != PopCnt)
- (cast<Instruction>(NewCount))->setDebugLoc(DL);
-
- // TripCnt is exactly the number of iterations the loop has
- TripCnt = NewCount;
-
- // If the population counter's initial value is not zero, insert Add Inst.
- Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
- ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
- if (!InitConst || !InitConst->isZero()) {
- NewCount = Builder.CreateAdd(NewCount, CntInitVal);
- (cast<Instruction>(NewCount))->setDebugLoc(DL);
- }
- }
-
- // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
- // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
- // function would be partial dead code, and downstream passes will drag
- // it back from the precondition block to the preheader.
- {
- ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
-
- Value *Opnd0 = PopCntZext;
- Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
- if (PreCond->getOperand(0) != Var)
- std::swap(Opnd0, Opnd1);
-
- ICmpInst *NewPreCond =
- cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
- PreCondBr->setCondition(NewPreCond);
-
- RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
- }
-
- // Step 3: Note that the population count is exactly the trip count of the
- // loop in question, which enble us to to convert the loop from noncountable
- // loop into a countable one. The benefit is twofold:
- //
- // - If the loop only counts population, the entire loop become dead after
- // the transformation. It is lots easier to prove a countable loop dead
- // than to prove a noncountable one. (In some C dialects, a infite loop
- // isn't dead even if it computes nothing useful. In general, DCE needs
- // to prove a noncountable loop finite before safely delete it.)
- //
- // - If the loop also performs something else, it remains alive.
- // Since it is transformed to countable form, it can be aggressively
- // optimized by some optimizations which are in general not applicable
- // to a noncountable loop.
- //
- // After this step, this loop (conceptually) would look like following:
- // newcnt = __builtin_ctpop(x);
- // t = newcnt;
- // if (x)
- // do { cnt++; x &= x-1; t--) } while (t > 0);
- BasicBlock *Body = *(CurLoop->block_begin());
- {
- BranchInst *LbBr = LIRUtil::getBranch(Body);
- ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
- Type *Ty = TripCnt->getType();
-
- PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
-
- Builder.SetInsertPoint(LbCond);
- Value *Opnd1 = cast<Value>(TcPhi);
- Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
- Instruction *TcDec =
- cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
-
- TcPhi->addIncoming(TripCnt, PreHead);
- TcPhi->addIncoming(TcDec, Body);
-
- CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
- CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
- LbCond->setPredicate(Pred);
- LbCond->setOperand(0, TcDec);
- LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
- }
-
- // Step 4: All the references to the original population counter outside
- // the loop are replaced with the NewCount -- the value returned from
- // __builtin_ctpop().
- CntInst->replaceUsesOutsideBlock(NewCount, Body);
-
- // step 5: Forget the "non-computable" trip-count SCEV associated with the
- // loop. The loop would otherwise not be deleted even if it becomes empty.
- SE->forgetLoop(CurLoop);
-}
-
-CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
- Value *Val, DebugLoc DL) {
- Value *Ops[] = { Val };
- Type *Tys[] = { Val->getType() };
-
- Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
- CallInst *CI = IRBuilder.CreateCall(Func, Ops);
- CI->setDebugLoc(DL);
-
- return CI;
-}
-
-/// recognize - detect population count idiom in a non-countable loop. If
-/// detected, transform the relevant code to popcount intrinsic function
-/// call, and return true; otherwise, return false.
-bool NclPopcountRecognize::recognize() {
-
- if (!LIR.getTargetTransformInfo())
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy")
return false;
- LIR.getScalarEvolution();
-
- if (!preliminaryScreen())
- return false;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *CurLoop->getHeader()->getParent());
+ DL = &CurLoop->getHeader()->getModule()->getDataLayout();
- Instruction *CntInst;
- PHINode *CntPhi;
- Value *Val;
- if (!detectIdiom(CntInst, CntPhi, Val))
- return false;
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
- transform(CntInst, CntPhi, Val);
- return true;
+ return runOnNoncountableLoop();
}
-//===----------------------------------------------------------------------===//
-//
-// Implementation of LoopIdiomRecognize
-//
-//===----------------------------------------------------------------------===//
-
bool LoopIdiomRecognize::runOnCountableLoop() {
const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
assert(!isa<SCEVCouldNotCompute>(BECount) &&
- "runOnCountableLoop() called on a loop without a predictable"
- "backedge-taken count");
+ "runOnCountableLoop() called on a loop without a predictable"
+ "backedge-taken count");
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- if (BECst->getValue()->getValue() == 0)
+ if (BECst->getAPInt() == 0)
return false;
- // set DT
- (void)getDominatorTree();
-
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- // set TLI
- (void)getTargetLibraryInfo();
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
DEBUG(dbgs() << "loop-idiom Scanning: F["
- << CurLoop->getHeader()->getParent()->getName()
- << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+ << CurLoop->getHeader()->getParent()->getName() << "] Loop %"
+ << CurLoop->getHeader()->getName() << "\n");
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
- if (LI.getLoopFor(BB) != CurLoop)
+ if (LI->getLoopFor(BB) != CurLoop)
continue;
MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
@@ -649,41 +244,109 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
return MadeChange;
}
-bool LoopIdiomRecognize::runOnNoncountableLoop() {
- NclPopcountRecognize Popcount(*this);
- if (Popcount.recognize())
- return true;
+static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
+ uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
+ "Don't overflow unsigned.");
+ return (unsigned)SizeInBits >> 3;
+}
- return false;
+static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) {
+ const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
+ return ConstStride->getAPInt().getZExtValue();
}
-bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return nullptr;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = DL->getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+ return nullptr;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (DL->isBigEndian())
+ return nullptr;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16)
+ return nullptr;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16)
+ return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16 / Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
+bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+ // Don't touch volatile stores.
+ if (!SI->isSimple())
return false;
- CurLoop = L;
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
- // If the loop could not be converted to canonical form, it must have an
- // indirectbr in it, just give up.
- if (!L->getLoopPreheader())
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
- // Disable loop idiom recognition if the function's name is a common idiom.
- StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy")
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
- SE = &getAnalysis<ScalarEvolution>();
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
- return runOnNoncountableLoop();
+ // Check to see if we have a constant stride.
+ if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
+ return false;
+
+ return true;
+}
+
+void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
+ StoreRefs.clear();
+ for (Instruction &I : *BB) {
+ StoreInst *SI = dyn_cast<StoreInst>(&I);
+ if (!SI)
+ continue;
+
+ // Make sure this is a strided store with a constant stride.
+ if (!isLegalStore(SI))
+ continue;
+
+ // Save the store locations.
+ StoreRefs.push_back(SI);
+ }
}
/// runOnLoopBlock - Process the specified block, which lives in a counted loop
/// with the specified backedge count. This block is known to be in the current
/// loop and not in any subloops.
-bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+bool LoopIdiomRecognize::runOnLoopBlock(
+ BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks) {
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
@@ -692,25 +355,18 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
return false;
bool MadeChange = false;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = I++;
- // Look for store instructions, which may be optimized to memset/memcpy.
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- WeakVH InstPtr(I);
- if (!processLoopStore(SI, BECount)) continue;
- MadeChange = true;
-
- // If processing the store invalidated our iterator, start over from the
- // top of the block.
- if (!InstPtr)
- I = BB->begin();
- continue;
- }
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ collectStores(BB);
+ for (auto &SI : StoreRefs)
+ MadeChange |= processLoopStore(SI, BECount);
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
- WeakVH InstPtr(I);
- if (!processLoopMemSet(MSI, BECount)) continue;
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(&*I);
+ if (!processLoopMemSet(MSI, BECount))
+ continue;
MadeChange = true;
// If processing the memset invalidated our iterator, start over from the
@@ -724,71 +380,38 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
return MadeChange;
}
-
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- if (!SI->isSimple()) return false;
+ assert(SI->isSimple() && "Expected only non-volatile stores.");
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
- // Reject stores that are so large that they overflow an unsigned.
- auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
- uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
- if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
- return false;
-
- // See if the pointer expression is an AddRec like {base,+,1} on the current
- // loop, which indicates a strided store. If we have something else, it's a
- // random store we can't handle.
- const SCEVAddRecExpr *StoreEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
- return false;
-
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- unsigned StoreSize = (unsigned)SizeInBits >> 3;
- const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
- if (!Stride || StoreSize != Stride->getValue()->getValue()) {
- // TODO: Could also handle negative stride here someday, that will require
- // the validity check in mayLoopAccessLocation to be updated though.
- // Enable this to print exact negative strides.
- if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
- dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
- dbgs() << "BB: " << *SI->getParent();
- }
-
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ if (StoreSize != Stride && StoreSize != -Stride)
return false;
- }
+
+ bool NegStride = StoreSize == -Stride;
// See if we can optimize just this store in isolation.
if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount))
+ StoredVal, SI, StoreEv, BECount, NegStride))
return true;
- // If the stored value is a strided load in the same loop with the same stride
- // this this may be transformable into a memcpy. This kicks in for stuff like
- // for (i) A[i] = B[i];
- if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
- const SCEVAddRecExpr *LoadEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
- if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
- StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
- if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
- return true;
- }
- //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
-
- return false;
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
-bool LoopIdiomRecognize::
-processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
+ const SCEV *BECount) {
// We can only handle non-volatile memsets with a constant size.
- if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ return false;
// If we're not allowed to hack on memset, we fail.
if (!TLI->has(LibFunc::memset))
@@ -818,17 +441,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), MSI->getValue(),
- MSI, Ev, BECount);
+ MSI->getAlignment(), MSI->getValue(), MSI, Ev,
+ BECount, /*NegStride=*/false);
}
-
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
-static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
- Loop *L, const SCEV *BECount,
- unsigned StoreSize, AliasAnalysis &AA,
+static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
Instruction *IgnoredStore) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
@@ -838,7 +460,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+ AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -849,59 +471,31 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
- if (&*I != IgnoredStore &&
- (AA.getModRefInfo(I, StoreLoc) & Access))
+ if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
return true;
return false;
}
-/// getMemSetPatternValue - If a strided store of the specified value is safe to
-/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
-/// be passed in. Otherwise, return null.
-///
-/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
-/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
- // If the value isn't a constant, we can't promote it to being in a constant
- // array. We could theoretically do a store to an alloca or something, but
- // that doesn't seem worthwhile.
- Constant *C = dyn_cast<Constant>(V);
- if (!C) return nullptr;
-
- // Only handle simple values that are a power of two bytes in size.
- uint64_t Size = DL.getTypeSizeInBits(V->getType());
- if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return nullptr;
-
- // Don't care enough about darwin/ppc to implement this.
- if (DL.isBigEndian())
- return nullptr;
-
- // Convert to size in bytes.
- Size /= 8;
-
- // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
- // if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return nullptr;
-
- // If the constant is exactly 16 bytes, just use it.
- if (Size == 16) return C;
-
- // Otherwise, we'll use an array of the constants.
- unsigned ArraySize = 16/Size;
- ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
- return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+// If we have a negative stride, Start refers to the end of the memory location
+// we're trying to memset. Therefore, we need to recompute the base pointer,
+// which is just Start - BECount*Size.
+static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
+ Type *IntPtr, unsigned StoreSize,
+ ScalarEvolution *SE) {
+ const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ if (StoreSize != 1)
+ Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+ return SE->getMinusSCEV(Start, Index);
}
-
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
-bool LoopIdiomRecognize::
-processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *StoredVal,
- Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopStridedStore(
+ Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
+ Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride) {
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
@@ -909,7 +503,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
- auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
// If we're allowed to form a memset, and the stored value would be acceptable
@@ -936,9 +529,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, DL, "loop-idiom");
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+ Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
+
+ const SCEV *Start = Ev->getStart();
+ // Handle negative strided loops.
+ if (NegStride)
+ Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
@@ -946,12 +545,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
Value *BasePtr =
- Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
- Preheader->getTerminator());
-
- if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
- CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
+ Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
+ if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
+ *AA, TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -962,36 +558,30 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- SCEV::FlagNUW);
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW);
if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
}
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
CallInst *NewCall;
if (SplatValue) {
- NewCall = Builder.CreateMemSet(BasePtr,
- SplatValue,
- NumBytes,
- StoreAlignment);
+ NewCall =
+ Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
- Module *M = TheStore->getParent()->getParent()->getParent();
- Value *MSP = M->getOrInsertFunction("memset_pattern16",
- Builder.getVoidTy(),
- Int8PtrTy,
- Int8PtrTy,
- IntPtr,
- (void*)nullptr);
+ Module *M = TheStore->getModule();
+ Value *MSP =
+ M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
+ Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1015,26 +605,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
return true;
}
-/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
-/// same-strided load.
-bool LoopIdiomRecognize::
-processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount) {
+/// If the stored value is a strided load in the same loop with the same stride
+/// this may be transformable into a memcpy. This kicks in for stuff like
+/// for (i) A[i] = B[i];
+bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
+ StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
+ const SCEV *BECount, bool NegStride) {
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
- LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- const DataLayout &DL = Preheader->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "loop-idiom");
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
+
+ const SCEV *StrStart = StoreEv->getStart();
+ unsigned StrAS = SI->getPointerAddressSpace();
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+
+ // Handle negative strided loops.
+ if (NegStride)
+ StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1042,29 +653,31 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// or write the memory region we're storing to. This includes the load that
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
- Value *StoreBasePtr =
- Expander.expandCodeFor(StoreEv->getStart(),
- Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
-
- if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
- CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI)) {
+ Value *StoreBasePtr = Expander.expandCodeFor(
+ StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
return false;
}
+ const SCEV *LdStart = LoadEv->getStart();
+ unsigned LdAS = LI->getPointerAddressSpace();
+
+ // Handle negative strided loops.
+ if (NegStride)
+ LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
- Value *LoadBasePtr =
- Expander.expandCodeFor(LoadEv->getStart(),
- Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ Value *LoadBasePtr = Expander.expandCodeFor(
+ LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
- if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
+ *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -1074,34 +687,368 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, everything is safe, we can transform this!
-
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
- SCEV::FlagNUW);
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
SCEV::FlagNUW);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
CallInst *NewCall =
- Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
- std::min(SI->getAlignment(), LI->getAlignment()));
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
NewCall->setDebugLoc(SI->getDebugLoc());
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
-
- // Okay, the memset has been formed. Zap the original store and anything that
+ // Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI, TLI);
++NumMemCpy;
return true;
}
+
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+ return recognizePopcount();
+}
+
+/// Check if the given conditional branch is based on the comparison between
+/// a variable and zero, and if the variable is non-zero, the control yields to
+/// the loop entry. If the branch matches the behavior, the variable involved
+/// in the comparion is returned. This function will be called to see if the
+/// precondition and postcondition of the loop are in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+ if (!BI || !BI->isConditional())
+ return nullptr;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return nullptr;
+
+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpZero || !CmpZero->isZero())
+ return nullptr;
+
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
+ (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+ return Cond->getOperand(0);
+
+ return nullptr;
+}
+
+/// Return true iff the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction counting the population bit.
+/// 2) \p CntPhi is set to the corresponding phi node.
+/// 3) \p Var is set to the value whose population bits are being counted.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (x0 != 0)
+/// goto loop-exit // the precondition of the loop
+/// cnt0 = init-val;
+/// do {
+/// x1 = phi (x0, x2);
+/// cnt1 = phi(cnt0, cnt2);
+///
+/// cnt2 = cnt1 + 1;
+/// ...
+/// x2 = x1 & (x1 - 1);
+/// ...
+/// } while(x != 0);
+///
+/// loop-exit:
+/// \endcode
+static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
+ Instruction *&CntInst, PHINode *&CntPhi,
+ Value *&Var) {
+ // step 1: Check to see if the look-back branch match this pattern:
+ // "if (a!=0) goto loop-entry".
+ BasicBlock *LoopEntry;
+ Instruction *DefX2, *CountInst;
+ Value *VarX1, *VarX0;
+ PHINode *PhiX, *CountPhi;
+
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ {
+ if (Value *T = matchCondition(
+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
+ DefX2 = dyn_cast<Instruction>(T);
+ else
+ return false;
+ }
+
+ // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+ {
+ if (!DefX2 || DefX2->getOpcode() != Instruction::And)
+ return false;
+
+ BinaryOperator *SubOneOp;
+
+ if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+ VarX1 = DefX2->getOperand(1);
+ else {
+ VarX1 = DefX2->getOperand(0);
+ SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+ }
+ if (!SubOneOp)
+ return false;
+
+ Instruction *SubInst = cast<Instruction>(SubOneOp);
+ ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+ if (!Dec ||
+ !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+ (SubInst->getOpcode() == Instruction::Add &&
+ Dec->isAllOnesValue()))) {
+ return false;
+ }
+ }
+
+ // step 3: Check the recurrence of variable X
+ {
+ PhiX = dyn_cast<PHINode>(VarX1);
+ if (!PhiX ||
+ (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+ return false;
+ }
+ }
+
+ // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+ {
+ CountInst = nullptr;
+ for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
+ IterE = LoopEntry->end();
+ Iter != IterE; Iter++) {
+ Instruction *Inst = &*Iter;
+ if (Inst->getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ if (!Inc || !Inc->isOne())
+ continue;
+
+ PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+ if (!Phi || Phi->getParent() != LoopEntry)
+ continue;
+
+ // Check if the result of the instruction is live of the loop.
+ bool LiveOutLoop = false;
+ for (User *U : Inst->users()) {
+ if ((cast<Instruction>(U))->getParent() != LoopEntry) {
+ LiveOutLoop = true;
+ break;
+ }
+ }
+
+ if (LiveOutLoop) {
+ CountInst = Inst;
+ CountPhi = Phi;
+ break;
+ }
+ }
+
+ if (!CountInst)
+ return false;
+ }
+
+ // step 5: check if the precondition is in this form:
+ // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+ {
+ auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
+ if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+ return false;
+
+ CntInst = CountInst;
+ CntPhi = CountPhi;
+ Var = T;
+ }
+
+ return true;
+}
+
+/// Recognizes a population count idiom in a non-countable loop.
+///
+/// If detected, transforms the relevant code to issue the popcount intrinsic
+/// function call, and returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizePopcount() {
+ if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
+ return false;
+
+ // Counting population are usually conducted by few arithmetic instructions.
+ // Such instructions can be easily "absorbed" by vacant slots in a
+ // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+ // in a compact loop.
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ BasicBlock *LoopBody = *(CurLoop->block_begin());
+ if (LoopBody->size() >= 20) {
+ // The loop is too big, bail out.
+ return false;
+ }
+
+ // It should have a preheader containing nothing but an unconditional branch.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ if (!PH)
+ return false;
+ if (&PH->front() != PH->getTerminator())
+ return false;
+ auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
+ if (!EntryBI || EntryBI->isConditional())
+ return false;
+
+ // It should have a precondition block where the generated popcount instrinsic
+ // function can be inserted.
+ auto *PreCondBB = PH->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ if (!PreCondBI || PreCondBI->isUnconditional())
+ return false;
+
+ Instruction *CntInst;
+ PHINode *CntPhi;
+ Value *Val;
+ if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
+ return false;
+
+ transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
+ return true;
+}
+
+static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+ DebugLoc DL) {
+ Value *Ops[] = {Val};
+ Type *Tys[] = {Val->getType()};
+
+ Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
+ Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+ CI->setDebugLoc(DL);
+
+ return CI;
+}
+
+void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
+ Instruction *CntInst,
+ PHINode *CntPhi, Value *Var) {
+ BasicBlock *PreHead = CurLoop->getLoopPreheader();
+ auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ const DebugLoc DL = CntInst->getDebugLoc();
+
+ // Assuming before transformation, the loop is following:
+ // if (x) // the precondition
+ // do { cnt++; x &= x - 1; } while(x);
+
+ // Step 1: Insert the ctpop instruction at the end of the precondition block
+ IRBuilder<> Builder(PreCondBr);
+ Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+ {
+ PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+ NewCount = PopCntZext =
+ Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+ if (NewCount != PopCnt)
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+ // TripCnt is exactly the number of iterations the loop has
+ TripCnt = NewCount;
+
+ // If the population counter's initial value is not zero, insert Add Inst.
+ Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero()) {
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+ }
+ }
+
+ // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
+ // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
+ // function would be partial dead code, and downstream passes will drag
+ // it back from the precondition block to the preheader.
+ {
+ ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+ Value *Opnd0 = PopCntZext;
+ Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+ if (PreCond->getOperand(0) != Var)
+ std::swap(Opnd0, Opnd1);
+
+ ICmpInst *NewPreCond = cast<ICmpInst>(
+ Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+ PreCondBr->setCondition(NewPreCond);
+
+ RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
+ }
+
+ // Step 3: Note that the population count is exactly the trip count of the
+ // loop in question, which enable us to to convert the loop from noncountable
+ // loop into a countable one. The benefit is twofold:
+ //
+ // - If the loop only counts population, the entire loop becomes dead after
+ // the transformation. It is a lot easier to prove a countable loop dead
+ // than to prove a noncountable one. (In some C dialects, an infinite loop
+ // isn't dead even if it computes nothing useful. In general, DCE needs
+ // to prove a noncountable loop finite before safely delete it.)
+ //
+ // - If the loop also performs something else, it remains alive.
+ // Since it is transformed to countable form, it can be aggressively
+ // optimized by some optimizations which are in general not applicable
+ // to a noncountable loop.
+ //
+ // After this step, this loop (conceptually) would look like following:
+ // newcnt = __builtin_ctpop(x);
+ // t = newcnt;
+ // if (x)
+ // do { cnt++; x &= x-1; t--) } while (t > 0);
+ BasicBlock *Body = *(CurLoop->block_begin());
+ {
+ auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+ ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+ Type *Ty = TripCnt->getType();
+
+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
+
+ Builder.SetInsertPoint(LbCond);
+ Instruction *TcDec = cast<Instruction>(
+ Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
+ "tcdec", false, true));
+
+ TcPhi->addIncoming(TripCnt, PreHead);
+ TcPhi->addIncoming(TcDec, Body);
+
+ CmpInst::Predicate Pred =
+ (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+ LbCond->setPredicate(Pred);
+ LbCond->setOperand(0, TcDec);
+ LbCond->setOperand(1, ConstantInt::get(Ty, 0));
+ }
+
+ // Step 4: All the references to the original population counter outside
+ // the loop are replaced with the NewCount -- the value returned from
+ // __builtin_ctpop().
+ CntInst->replaceUsesOutsideBlock(NewCount, Body);
+
+ // step 5: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+ SE->forgetLoop(CurLoop);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index e125026..b4102fe 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -48,7 +48,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -112,7 +112,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify instructions in the current basic block.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 9d7e57f..4295235 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -99,7 +99,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
return false;
if (St && !St->isSimple())
return false;
- MemInstr.push_back(I);
+ MemInstr.push_back(&*I);
}
}
@@ -176,7 +176,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
}
}
- // We don't have a DepMatrix to check legality return false
+ // We don't have a DepMatrix to check legality return false.
if (DepMatrix.size() == 0)
return false;
return true;
@@ -331,9 +331,9 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
class LoopInterchangeLegality {
public:
LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
- LoopInterchange *Pass)
- : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
- InnerLoopHasReduction(false) {}
+ LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+ PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
/// Check if the loops can be interchanged.
bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -357,9 +357,10 @@ private:
Loop *OuterLoop;
Loop *InnerLoop;
- /// Scev analysis.
ScalarEvolution *SE;
- LoopInterchange *CurrentPass;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ bool PreserveLCSSA;
bool InnerLoopHasReduction;
};
@@ -371,7 +372,7 @@ public:
LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
- /// Check if the loop interchange is profitable
+ /// Check if the loop interchange is profitable.
bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix);
@@ -385,12 +386,12 @@ private:
ScalarEvolution *SE;
};
-/// LoopInterchangeTransform interchanges the loop
+/// LoopInterchangeTransform interchanges the loop.
class LoopInterchangeTransform {
public:
LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
- LoopInterchange *Pass, BasicBlock *LoopNestExit,
+ BasicBlock *LoopNestExit,
bool InnerLoopContainsReductions)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
LoopExit(LoopNestExit),
@@ -424,21 +425,22 @@ private:
bool InnerLoopHasReduction;
};
-// Main LoopInterchange Pass
+// Main LoopInterchange Pass.
struct LoopInterchange : public FunctionPass {
static char ID;
ScalarEvolution *SE;
LoopInfo *LI;
DependenceAnalysis *DA;
DominatorTree *DT;
+ bool PreserveLCSSA;
LoopInterchange()
: FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DependenceAnalysis>();
@@ -447,11 +449,13 @@ struct LoopInterchange : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<DependenceAnalysis>();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
// Build up a worklist of loop pairs to analyze.
SmallVector<LoopVector, 8> Worklist;
@@ -489,7 +493,7 @@ struct LoopInterchange : public FunctionPass {
unsigned selectLoopForInterchange(LoopVector LoopList) {
// TODO: Add a better heuristic to select the loop to be interchanged based
- // on the dependece matrix. Currently we select the innermost loop.
+ // on the dependence matrix. Currently we select the innermost loop.
return LoopList.size() - 1;
}
@@ -544,7 +548,7 @@ struct LoopInterchange : public FunctionPass {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- // Move the selected loop outwards to the best posible position.
+ // Move the selected loop outwards to the best possible position.
for (unsigned i = SelecLoopId; i > 0; i--) {
bool Interchanged =
processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
@@ -574,7 +578,8 @@ struct LoopInterchange : public FunctionPass {
Loop *InnerLoop = LoopList[InnerLoopId];
Loop *OuterLoop = LoopList[OuterLoopId];
- LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+ LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
+ PreserveLCSSA);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
return false;
@@ -586,7 +591,7 @@ struct LoopInterchange : public FunctionPass {
return false;
}
- LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
LoopNestExit, LIL.hasInnerLoopReduction());
LIT.transform();
DEBUG(dbgs() << "Loops interchanged\n");
@@ -655,7 +660,7 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
// We do not have any basic block in between now make sure the outer header
- // and outer loop latch doesnt contain any unsafe instructions.
+ // and outer loop latch doesn't contain any unsafe instructions.
if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
containsUnsafeInstructionsInLatch(OuterLoopLatch))
return false;
@@ -698,9 +703,9 @@ bool LoopInterchangeLegality::findInductionAndReductions(
return false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
RecurrenceDescriptor RD;
+ InductionDescriptor ID;
PHINode *PHI = cast<PHINode>(I);
- ConstantInt *StepValue = nullptr;
- if (isInductionPHI(PHI, SE, StepValue))
+ if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
Inductions.push_back(PHI);
else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
Reductions.push_back(PHI);
@@ -836,7 +841,7 @@ bool LoopInterchangeLegality::currentLimitations() {
else
FoundInduction = true;
}
- // The loop latch ended and we didnt find the induction variable return as
+ // The loop latch ended and we didn't find the induction variable return as
// current limitation.
if (!FoundInduction)
return true;
@@ -867,12 +872,14 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
isa<PHINode>(OuterLoopPreHeader->begin()) ||
!OuterLoopPreHeader->getUniquePredecessor()) {
- OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+ OuterLoopPreHeader =
+ InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
}
if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
InnerLoopPreHeader == OuterLoop->getHeader()) {
- InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+ InnerLoopPreHeader =
+ InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
}
// TODO: The loops could not be interchanged due to current limitations in the
@@ -966,7 +973,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
unsigned OuterLoopId,
CharMatrix &DepMatrix) {
- // TODO: Add Better Profitibility checks.
+ // TODO: Add better profitability checks.
// e.g
// 1) Construct dependency matrix and move the one with no loop carried dep
// inside to enable vectorization.
@@ -980,7 +987,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
if (Cost < 0)
return true;
- // It is not profitable as per current cache profitibility model. But check if
+ // It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
bool ImprovesPar =
isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
@@ -996,7 +1003,7 @@ void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
return;
}
}
- assert(false && "Couldn't find loop");
+ llvm_unreachable("Couldn't find loop");
}
void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
@@ -1045,7 +1052,7 @@ bool LoopInterchangeTransform::transform() {
splitInnerLoopLatch(InnerIndexVar);
DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
- // Splits the inner loops phi nodes out into a seperate basic block.
+ // Splits the inner loops phi nodes out into a separate basic block.
splitInnerLoopHeader();
DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
}
@@ -1113,8 +1120,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
auto &ToList = InsertBefore->getParent()->getInstList();
auto &FromList = FromBB->getInstList();
- ToList.splice(InsertBefore, FromList, FromList.begin(),
- FromBB->getTerminator());
+ ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
+ FromBB->getTerminator()->getIterator());
}
void LoopInterchangeTransform::adjustOuterLoopPreheader() {
@@ -1181,8 +1188,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
return false;
- BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
- if (!InnerLoopHeaderSucessor)
+ BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
+ if (!InnerLoopHeaderSuccessor)
return false;
// Adjust Loop Preheader and headers
@@ -1198,11 +1205,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
OuterLoopHeaderBI->setSuccessor(i, LoopExit);
else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
- OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+ OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
}
// Adjust reduction PHI's now that the incoming block has changed.
- updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
+ updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
OuterLoopHeader);
BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
@@ -1286,10 +1293,10 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
char LoopInterchange::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
new file mode 100644
index 0000000..1064d08
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -0,0 +1,566 @@
+//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implement a loop-aware load elimination pass.
+//
+// It uses LoopAccessAnalysis to identify loop-carried dependences with a
+// distance of one between stores and loads. These form the candidates for the
+// transformation. The source value of each store then propagated to the user
+// of the corresponding load. This makes the load dead.
+//
+// The pass can also version the loop and add memchecks in order to prove that
+// may-aliasing stores can't change the value in memory before it's read by the
+// load.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include <forward_list>
+
+#define LLE_OPTION "loop-load-elim"
+#define DEBUG_TYPE LLE_OPTION
+
+using namespace llvm;
+
+static cl::opt<unsigned> CheckPerElim(
+ "runtime-check-per-loop-load-elim", cl::Hidden,
+ cl::desc("Max number of memchecks allowed per eliminated load on average"),
+ cl::init(1));
+
+static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
+ "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Load Elimination"));
+
+
+STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
+
+namespace {
+
+/// \brief Represent a store-to-forwarding candidate.
+struct StoreToLoadForwardingCandidate {
+ LoadInst *Load;
+ StoreInst *Store;
+
+ StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
+ : Load(Load), Store(Store) {}
+
+ /// \brief Return true if the dependence from the store to the load has a
+ /// distance of one. E.g. A[i+1] = A[i]
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
+ Value *LoadPtr = Load->getPointerOperand();
+ Value *StorePtr = Store->getPointerOperand();
+ Type *LoadPtrType = LoadPtr->getType();
+ Type *LoadType = LoadPtrType->getPointerElementType();
+
+ assert(LoadPtrType->getPointerAddressSpace() ==
+ StorePtr->getType()->getPointerAddressSpace() &&
+ LoadType == StorePtr->getType()->getPointerElementType() &&
+ "Should be a known dependence");
+
+ auto &DL = Load->getParent()->getModule()->getDataLayout();
+ unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
+
+ auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
+ auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
+
+ // We don't need to check non-wrapping here because forward/backward
+ // dependence wouldn't be valid if these weren't monotonic accesses.
+ auto *Dist = cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+ const APInt &Val = Dist->getAPInt();
+ return Val.abs() == TypeByteSize;
+ }
+
+ Value *getLoadPtr() const { return Load->getPointerOperand(); }
+
+#ifndef NDEBUG
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const StoreToLoadForwardingCandidate &Cand) {
+ OS << *Cand.Store << " -->\n";
+ OS.indent(2) << *Cand.Load << "\n";
+ return OS;
+ }
+#endif
+};
+
+/// \brief Check if the store dominates all latches, so as long as there is no
+/// intervening store this value will be loaded in the next iteration.
+bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
+ DominatorTree *DT) {
+ SmallVector<BasicBlock *, 8> Latches;
+ L->getLoopLatches(Latches);
+ return std::all_of(Latches.begin(), Latches.end(),
+ [&](const BasicBlock *Latch) {
+ return DT->dominates(StoreBlock, Latch);
+ });
+}
+
+/// \brief The per-loop class that does most of the work.
+class LoadEliminationForLoop {
+public:
+ LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
+ DominatorTree *DT)
+ : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {}
+
+ /// \brief Look through the loop-carried and loop-independent dependences in
+ /// this loop and find store->load dependences.
+ ///
+ /// Note that no candidate is returned if LAA has failed to analyze the loop
+ /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
+ std::forward_list<StoreToLoadForwardingCandidate>
+ findStoreToLoadDependences(const LoopAccessInfo &LAI) {
+ std::forward_list<StoreToLoadForwardingCandidate> Candidates;
+
+ const auto *Deps = LAI.getDepChecker().getDependences();
+ if (!Deps)
+ return Candidates;
+
+ // Find store->load dependences (consequently true dep). Both lexically
+ // forward and backward dependences qualify. Disqualify loads that have
+ // other unknown dependences.
+
+ SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
+
+ for (const auto &Dep : *Deps) {
+ Instruction *Source = Dep.getSource(LAI);
+ Instruction *Destination = Dep.getDestination(LAI);
+
+ if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
+ if (isa<LoadInst>(Source))
+ LoadsWithUnknownDepedence.insert(Source);
+ if (isa<LoadInst>(Destination))
+ LoadsWithUnknownDepedence.insert(Destination);
+ continue;
+ }
+
+ if (Dep.isBackward())
+ // Note that the designations source and destination follow the program
+ // order, i.e. source is always first. (The direction is given by the
+ // DepType.)
+ std::swap(Source, Destination);
+ else
+ assert(Dep.isForward() && "Needs to be a forward dependence");
+
+ auto *Store = dyn_cast<StoreInst>(Source);
+ if (!Store)
+ continue;
+ auto *Load = dyn_cast<LoadInst>(Destination);
+ if (!Load)
+ continue;
+ Candidates.emplace_front(Load, Store);
+ }
+
+ if (!LoadsWithUnknownDepedence.empty())
+ Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
+ return LoadsWithUnknownDepedence.count(C.Load);
+ });
+
+ return Candidates;
+ }
+
+ /// \brief Return the index of the instruction according to program order.
+ unsigned getInstrIndex(Instruction *Inst) {
+ auto I = InstOrder.find(Inst);
+ assert(I != InstOrder.end() && "No index for instruction");
+ return I->second;
+ }
+
+ /// \brief If a load has multiple candidates associated (i.e. different
+ /// stores), it means that it could be forwarding from multiple stores
+ /// depending on control flow. Remove these candidates.
+ ///
+ /// Here, we rely on LAA to include the relevant loop-independent dependences.
+ /// LAA is known to omit these in the very simple case when the read and the
+ /// write within an alias set always takes place using the *same* pointer.
+ ///
+ /// However, we know that this is not the case here, i.e. we can rely on LAA
+ /// to provide us with loop-independent dependences for the cases we're
+ /// interested. Consider the case for example where a loop-independent
+ /// dependece S1->S2 invalidates the forwarding S3->S2.
+ ///
+ /// A[i] = ... (S1)
+ /// ... = A[i] (S2)
+ /// A[i+1] = ... (S3)
+ ///
+ /// LAA will perform dependence analysis here because there are two
+ /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
+ void removeDependencesFromMultipleStores(
+ std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
+ // If Store is nullptr it means that we have multiple stores forwarding to
+ // this store.
+ typedef DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>
+ LoadToSingleCandT;
+ LoadToSingleCandT LoadToSingleCand;
+
+ for (const auto &Cand : Candidates) {
+ bool NewElt;
+ LoadToSingleCandT::iterator Iter;
+
+ std::tie(Iter, NewElt) =
+ LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
+ if (!NewElt) {
+ const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
+ // Already multiple stores forward to this load.
+ if (OtherCand == nullptr)
+ continue;
+
+ // Handle the very basic of case when the two stores are in the same
+ // block so deciding which one forwards is easy. The later one forwards
+ // as long as they both have a dependence distance of one to the load.
+ if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
+ Cand.isDependenceDistanceOfOne(PSE) &&
+ OtherCand->isDependenceDistanceOfOne(PSE)) {
+ // They are in the same block, the later one will forward to the load.
+ if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
+ OtherCand = &Cand;
+ } else
+ OtherCand = nullptr;
+ }
+ }
+
+ Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
+ if (LoadToSingleCand[Cand.Load] != &Cand) {
+ DEBUG(dbgs() << "Removing from candidates: \n" << Cand
+ << " The load may have multiple stores forwarding to "
+ << "it\n");
+ return true;
+ }
+ return false;
+ });
+ }
+
+ /// \brief Given two pointers operations by their RuntimePointerChecking
+ /// indices, return true if they require an alias check.
+ ///
+ /// We need a check if one is a pointer for a candidate load and the other is
+ /// a pointer for a possibly intervening store.
+ bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
+ const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
+ const std::set<Value *> &CandLoadPtrs) {
+ Value *Ptr1 =
+ LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
+ Value *Ptr2 =
+ LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
+ return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
+ (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
+ }
+
+ /// \brief Return pointers that are possibly written to on the path from a
+ /// forwarding store to a load.
+ ///
+ /// These pointers need to be alias-checked against the forwarding candidates.
+ SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
+ const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+ // From FirstStore to LastLoad neither of the elimination candidate loads
+ // should overlap with any of the stores.
+ //
+ // E.g.:
+ //
+ // st1 C[i]
+ // ld1 B[i] <-------,
+ // ld0 A[i] <----, | * LastLoad
+ // ... | |
+ // st2 E[i] | |
+ // st3 B[i+1] -- | -' * FirstStore
+ // st0 A[i+1] ---'
+ // st4 D[i]
+ //
+ // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
+ // ld0.
+
+ LoadInst *LastLoad =
+ std::max_element(Candidates.begin(), Candidates.end(),
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Load) < getInstrIndex(B.Load);
+ })
+ ->Load;
+ StoreInst *FirstStore =
+ std::min_element(Candidates.begin(), Candidates.end(),
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Store) <
+ getInstrIndex(B.Store);
+ })
+ ->Store;
+
+ // We're looking for stores after the first forwarding store until the end
+ // of the loop, then from the beginning of the loop until the last
+ // forwarded-to load. Collect the pointer for the stores.
+ SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
+
+ auto InsertStorePtr = [&](Instruction *I) {
+ if (auto *S = dyn_cast<StoreInst>(I))
+ PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
+ };
+ const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
+ std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
+ MemInstrs.end(), InsertStorePtr);
+ std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
+ InsertStorePtr);
+
+ return PtrsWrittenOnFwdingPath;
+ }
+
+ /// \brief Determine the pointer alias checks to prove that there are no
+ /// intervening stores.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> collectMemchecks(
+ const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+
+ SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
+ findPointersWrittenOnForwardingPath(Candidates);
+
+ // Collect the pointers of the candidate loads.
+ // FIXME: SmallSet does not work with std::inserter.
+ std::set<Value *> CandLoadPtrs;
+ std::transform(Candidates.begin(), Candidates.end(),
+ std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
+ std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
+
+ const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (auto PtrIdx1 : Check.first->Members)
+ for (auto PtrIdx2 : Check.second->Members)
+ if (needsChecking(PtrIdx1, PtrIdx2,
+ PtrsWrittenOnFwdingPath, CandLoadPtrs))
+ return true;
+ return false;
+ });
+
+ DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+
+ return Checks;
+ }
+
+ /// \brief Perform the transformation for a candidate.
+ void
+ propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
+ SCEVExpander &SEE) {
+ //
+ // loop:
+ // %x = load %gep_i
+ // = ... %x
+ // store %y, %gep_i_plus_1
+ //
+ // =>
+ //
+ // ph:
+ // %x.initial = load %gep_0
+ // loop:
+ // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+ // %x = load %gep_i <---- now dead
+ // = ... %x.storeforward
+ // store %y, %gep_i_plus_1
+
+ Value *Ptr = Cand.Load->getPointerOperand();
+ auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
+ auto *PH = L->getLoopPreheader();
+ Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
+ PH->getTerminator());
+ Value *Initial =
+ new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
+ PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
+ &L->getHeader()->front());
+ PHI->addIncoming(Initial, PH);
+ PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
+
+ Cand.Load->replaceAllUsesWith(PHI);
+ }
+
+ /// \brief Top-level driver for each loop: find store->load forwarding
+ /// candidates, add run-time checks and perform transformation.
+ bool processLoop() {
+ DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
+ << "\" checking " << *L << "\n");
+ // Look for store-to-load forwarding cases across the
+ // backedge. E.g.:
+ //
+ // loop:
+ // %x = load %gep_i
+ // = ... %x
+ // store %y, %gep_i_plus_1
+ //
+ // =>
+ //
+ // ph:
+ // %x.initial = load %gep_0
+ // loop:
+ // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+ // %x = load %gep_i <---- now dead
+ // = ... %x.storeforward
+ // store %y, %gep_i_plus_1
+
+ // First start with store->load dependences.
+ auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
+ if (StoreToLoadDependences.empty())
+ return false;
+
+ // Generate an index for each load and store according to the original
+ // program order. This will be used later.
+ InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
+
+ // To keep things simple for now, remove those where the load is potentially
+ // fed by multiple stores.
+ removeDependencesFromMultipleStores(StoreToLoadDependences);
+ if (StoreToLoadDependences.empty())
+ return false;
+
+ // Filter the candidates further.
+ SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
+ unsigned NumForwarding = 0;
+ for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
+ DEBUG(dbgs() << "Candidate " << Cand);
+ // Make sure that the stored values is available everywhere in the loop in
+ // the next iteration.
+ if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
+ continue;
+
+ // Check whether the SCEV difference is the same as the induction step,
+ // thus we load the value in the next iteration.
+ if (!Cand.isDependenceDistanceOfOne(PSE))
+ continue;
+
+ ++NumForwarding;
+ DEBUG(dbgs()
+ << NumForwarding
+ << ". Valid store-to-load forwarding across the loop backedge\n");
+ Candidates.push_back(Cand);
+ }
+ if (Candidates.empty())
+ return false;
+
+ // Check intervening may-alias stores. These need runtime checks for alias
+ // disambiguation.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks =
+ collectMemchecks(Candidates);
+
+ // Too many checks are likely to outweigh the benefits of forwarding.
+ if (Checks.size() > Candidates.size() * CheckPerElim) {
+ DEBUG(dbgs() << "Too many run-time checks needed.\n");
+ return false;
+ }
+
+ if (LAI.PSE.getUnionPredicate().getComplexity() >
+ LoadElimSCEVCheckThreshold) {
+ DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+ return false;
+ }
+
+ // Point of no-return, start the transformation. First, version the loop if
+ // necessary.
+ if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
+ LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
+ LV.setAliasChecks(std::move(Checks));
+ LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LV.versionLoop();
+ }
+
+ // Next, propagate the value stored by the store to the users of the load.
+ // Also for the first iteration, generate the initial value of the load.
+ SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
+ "storeforward");
+ for (const auto &Cand : Candidates)
+ propagateStoredValueToLoadUsers(Cand, SEE);
+ NumLoopLoadEliminted += NumForwarding;
+
+ return true;
+ }
+
+private:
+ Loop *L;
+
+ /// \brief Maps the load/store instructions to their index according to
+ /// program order.
+ DenseMap<Instruction *, unsigned> InstOrder;
+
+ // Analyses used.
+ LoopInfo *LI;
+ const LoopAccessInfo &LAI;
+ DominatorTree *DT;
+ PredicatedScalarEvolution PSE;
+};
+
+/// \brief The pass. Most of the work is delegated to the per-loop
+/// LoadEliminationForLoop class.
+class LoopLoadElimination : public FunctionPass {
+public:
+ LoopLoadElimination() : FunctionPass(ID) {
+ initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *LAA = &getAnalysis<LoopAccessAnalysis>();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // act of distributing a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+ // The actual work is performed by LoadEliminationForLoop.
+ LoadEliminationForLoop LEL(L, LI, LAI, DT);
+ Changed |= LEL.processLoop();
+ }
+
+ // Process each loop nest in the function.
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ static char ID;
+};
+}
+
+char LoopLoadElimination::ID;
+static const char LLE_name[] = "Loop Load Elimination";
+
+INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopLoadEliminationPass() {
+ return new LoopLoadElimination();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index ed103e6..27c2d88 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -147,12 +147,12 @@ namespace {
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -162,11 +162,15 @@ namespace {
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
DominatorTree *DT;
+ bool PreserveLCSSA;
typedef SmallVector<Instruction *, 16> SmallInstructionVector;
typedef SmallSet<Instruction *, 16> SmallInstructionSet;
- // A chain of isomorphic instructions, indentified by a single-use PHI,
+ // Map between induction variable and its increment
+ DenseMap<Instruction *, int64_t> IVToIncMap;
+
+ // A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
// loop.
struct SimpleLoopReduction {
@@ -300,22 +304,6 @@ namespace {
// The functions below can be called after we've finished processing all
// instructions in the loop, and we know which reductions were selected.
- // Is the provided instruction the PHI of a reduction selected for
- // rerolling?
- bool isSelectedPHI(Instruction *J) {
- if (!isa<PHINode>(J))
- return false;
-
- for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
- RI != RIE; ++RI) {
- int i = *RI;
- if (cast<Instruction>(J) == PossibleReds[i].getPHI())
- return true;
- }
-
- return false;
- }
-
bool validateSelected();
void replaceSelected();
@@ -335,7 +323,7 @@ namespace {
// x[i*3+1] = y2
// x[i*3+2] = y3
//
- // Base instruction -> i*3
+ // Base instruction -> i*3
// +---+----+
// / | \
// ST[y1] +1 +2 <-- Roots
@@ -366,8 +354,11 @@ namespace {
struct DAGRootTracker {
DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
ScalarEvolution *SE, AliasAnalysis *AA,
- TargetLibraryInfo *TLI)
- : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
+ TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA,
+ DenseMap<Instruction *, int64_t> &IncrMap)
+ : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
+ PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -413,11 +404,14 @@ namespace {
ScalarEvolution *SE;
AliasAnalysis *AA;
TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ bool PreserveLCSSA;
// The loop induction variable.
Instruction *IV;
// Loop step amount.
- uint64_t Inc;
+ int64_t Inc;
// Loop reroll count; if Inc == 1, this records the scaling applied
// to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
// If Inc is not 1, Scale = Inc.
@@ -430,6 +424,8 @@ namespace {
// they are used in (or specially, IL_All for instructions
// used in the loop increment mechanism).
UsesTy Uses;
+ // Map between induction variable and its increment
+ DenseMap<Instruction *, int64_t> &IVToIncMap;
};
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
@@ -442,10 +438,10 @@ namespace {
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
@@ -477,21 +473,20 @@ void LoopReroll::collectPossibleIVs(Loop *L,
continue;
if (const SCEVAddRecExpr *PHISCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
if (PHISCEV->getLoop() != L)
continue;
if (!PHISCEV->isAffine())
continue;
if (const SCEVConstant *IncSCEV =
dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
- if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+ const APInt &AInt = IncSCEV->getAPInt().abs();
+ if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
continue;
- if (IncSCEV->getValue()->uge(MaxInc))
- continue;
-
- DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
- *PHISCEV << "\n");
- PossibleIVs.push_back(I);
+ IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
+ DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
+ << "\n");
+ PossibleIVs.push_back(&*I);
}
}
}
@@ -552,7 +547,7 @@ void LoopReroll::collectPossibleReductions(Loop *L,
if (!I->getType()->isSingleValueType())
continue;
- SimpleLoopReduction SLR(I, L);
+ SimpleLoopReduction SLR(&*I, L);
if (!SLR.valid())
continue;
@@ -699,17 +694,11 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
}
}
- int64_t V = CI->getValue().getSExtValue();
+ int64_t V = std::abs(CI->getValue().getSExtValue());
if (Roots.find(V) != Roots.end())
// No duplicates, please.
return false;
- // FIXME: Add support for negative values.
- if (V < 0) {
- DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
- return false;
- }
-
Roots[V] = cast<Instruction>(I);
}
@@ -731,7 +720,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
unsigned NumBaseUses = BaseUsers.size();
if (NumBaseUses == 0)
NumBaseUses = Roots.begin()->second->getNumUses();
-
+
// Check that every node has the same number of users.
for (auto &KV : Roots) {
if (KV.first == 0)
@@ -744,7 +733,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
}
}
- return true;
+ return true;
}
bool LoopReroll::DAGRootTracker::
@@ -787,7 +776,7 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
if (!collectPossibleRoots(IVU, V))
return false;
- // If we didn't get a root for index zero, then IVU must be
+ // If we didn't get a root for index zero, then IVU must be
// subsumed.
if (V.find(0) == V.end())
SubsumedInsts.insert(IVU);
@@ -818,13 +807,10 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
}
bool LoopReroll::DAGRootTracker::findRoots() {
-
- const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
- Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
- getValue()->getZExtValue();
+ Inc = IVToIncMap[IV];
assert(RootSets.empty() && "Unclean state!");
- if (Inc == 1) {
+ if (std::abs(Inc) == 1) {
for (auto *IVU : IV->users()) {
if (isLoopIncrement(IVU, IV))
LoopIncs.push_back(cast<Instruction>(IVU));
@@ -996,6 +982,25 @@ bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
return false;
}
+static bool isIgnorableInst(const Instruction *I) {
+ if (isa<DbgInfoIntrinsic>(I))
+ return true;
+ const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case llvm::Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // TODO: the following intrinsics may also be whitelisted:
+ // lifetime_start, lifetime_end, invariant_start, invariant_end
+ return true;
+ }
+ return false;
+}
+
bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// We now need to check for equivalence of the use graph of each root with
// that of the primary induction variable (excluding the roots). Our goal
@@ -1029,7 +1034,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// Make sure all instructions in the loop are in one and only one
// set.
for (auto &KV : Uses) {
- if (KV.second.count() != 1) {
+ if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
<< *KV.first << " (#uses=" << KV.second.count() << ")\n");
return false;
@@ -1103,15 +1108,15 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
" vs. " << *RootInst << "\n");
return false;
}
-
+
RootIt = TryIt;
RootInst = TryIt->first;
}
// All instructions between the last root and this root
- // may belong to some other iteration. If they belong to a
+ // may belong to some other iteration. If they belong to a
// future iteration, then they're dangerous to alias with.
- //
+ //
// Note that because we allow a limited amount of flexibility in the order
// that we visit nodes, LastRootIt might be *before* RootIt, in which
// case we've already checked this set of instructions so we shouldn't
@@ -1267,6 +1272,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J;
}
+ bool Negative = IVToIncMap[IV] < 0;
const DataLayout &DL = Header->getModule()->getDataLayout();
// We need to create a new induction variable for each different BaseInst.
@@ -1275,13 +1281,12 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEVAddRecExpr *RealIVSCEV =
cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
const SCEV *Start = RealIVSCEV->getStart();
- const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
- (SE->getAddRecExpr(Start,
- SE->getConstant(RealIVSCEV->getType(), 1),
- L, SCEV::FlagAnyWrap));
+ const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(
+ Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
+ SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, DL, "reroll");
- Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front());
for (auto &KV : Uses) {
if (KV.second.find_first() == 0)
@@ -1294,8 +1299,8 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
// Iteration count SCEV minus 1
- const SCEV *ICMinus1SCEV =
- SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+ const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
+ ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
Value *ICMinus1; // Iteration count minus 1
if (isa<SCEVConstant>(ICMinus1SCEV)) {
@@ -1303,7 +1308,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
} else {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, Parent);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
Preheader->getTerminator());
@@ -1444,13 +1449,14 @@ void LoopReroll::ReductionTracker::replaceSelected() {
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
- DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
+ DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
+ IVToIncMap);
if (!DAGRoots.findRoots())
return false;
DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
*IV << "\n");
-
+
if (!DAGRoots.validate(Reductions))
return false;
if (!Reductions.validateSelected())
@@ -1469,11 +1475,12 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
@@ -1490,13 +1497,13 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
- const SCEV *IterCount =
- SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+ const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
// First, we need to find the induction variable with respect to which we can
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
+ IVToIncMap.clear();
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index a675e12..5e6c2da 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,11 +13,15 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -41,95 +45,6 @@ DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
-namespace {
-
- class LoopRotate : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
- initializeLoopRotatePass(*PassRegistry::getPassRegistry());
- if (SpecifiedMaxHeaderSize == -1)
- MaxHeaderSize = DefaultRotationThreshold;
- else
- MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
- }
-
- // LCSSA form makes instruction renaming easier.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L, bool SimplifiedLatch);
-
- private:
- unsigned MaxHeaderSize;
- LoopInfo *LI;
- const TargetTransformInfo *TTI;
- AssumptionCache *AC;
- DominatorTree *DT;
- };
-}
-
-char LoopRotate::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
- return new LoopRotate(MaxHeaderSize);
-}
-
-/// Rotate Loop L as many times as possible. Return true if
-/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
-
- // Save the loop metadata.
- MDNode *LoopMD = L->getLoopID();
-
- Function &F = *L->getHeader()->getParent();
-
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
- // Simplify the loop latch before attempting to rotate the header
- // upward. Rotation may not be needed if the loop tail can be folded into the
- // loop exit.
- bool SimplifiedLatch = simplifyLoopLatch(L);
-
- // One loop can be rotated multiple times.
- bool MadeChange = false;
- while (rotateLoop(L, SimplifiedLatch)) {
- MadeChange = true;
- SimplifiedLatch = false;
- }
-
- // Restore the loop metadata.
- // NB! We presume LoopRotation DOESN'T ADD its own metadata.
- if ((MadeChange || SimplifiedLatch) && LoopMD)
- L->setLoopID(LoopMD);
-
- return MadeChange;
-}
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
/// old header into the preheader. If there were uses of the values produced by
@@ -147,7 +62,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// as necessary.
SSAUpdater SSA;
for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = I;
+ Value *OrigHeaderVal = &*I;
// If there are no uses of the value (e.g. because it returns void), there
// is nothing to rewrite.
@@ -196,127 +111,6 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
-/// Determine whether the instructions in this range may be safely and cheaply
-/// speculated. This is not an important enough situation to develop complex
-/// heuristics. We handle a single arithmetic instruction along with any type
-/// conversions.
-static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
- BasicBlock::iterator End, Loop *L) {
- bool seenIncrement = false;
- bool MultiExitLoop = false;
-
- if (!L->getExitingBlock())
- MultiExitLoop = true;
-
- for (BasicBlock::iterator I = Begin; I != End; ++I) {
-
- if (!isSafeToSpeculativelyExecute(I))
- return false;
-
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- switch (I->getOpcode()) {
- default:
- return false;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GEPOperator>(I)->hasAllConstantIndices())
- return false;
- // fall-thru to increment case
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr: {
- Value *IVOpnd = !isa<Constant>(I->getOperand(0))
- ? I->getOperand(0)
- : !isa<Constant>(I->getOperand(1))
- ? I->getOperand(1)
- : nullptr;
- if (!IVOpnd)
- return false;
-
- // If increment operand is used outside of the loop, this speculation
- // could cause extra live range interference.
- if (MultiExitLoop) {
- for (User *UseI : IVOpnd->users()) {
- auto *UserInst = cast<Instruction>(UseI);
- if (!L->contains(UserInst))
- return false;
- }
- }
-
- if (seenIncrement)
- return false;
- seenIncrement = true;
- break;
- }
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- // ignore type conversions
- break;
- }
- }
- return true;
-}
-
-/// Fold the loop tail into the loop exit by speculating the loop tail
-/// instructions. Typically, this is a single post-increment. In the case of a
-/// simple 2-block loop, hoisting the increment can be much better than
-/// duplicating the entire loop header. In the case of loops with early exits,
-/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
-/// canonical form so downstream passes can handle it.
-///
-/// I don't believe this invalidates SCEV.
-bool LoopRotate::simplifyLoopLatch(Loop *L) {
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch || Latch->hasAddressTaken())
- return false;
-
- BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!Jmp || !Jmp->isUnconditional())
- return false;
-
- BasicBlock *LastExit = Latch->getSinglePredecessor();
- if (!LastExit || !L->isLoopExiting(LastExit))
- return false;
-
- BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
- if (!BI)
- return false;
-
- if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L))
- return false;
-
- DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
- << LastExit->getName() << "\n");
-
- // Hoist the instructions from Latch into LastExit.
- LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
-
- unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
- BasicBlock *Header = Jmp->getSuccessor(0);
- assert(Header == L->getHeader() && "expected a backward branch");
-
- // Remove Latch from the CFG so that LastExit becomes the new Latch.
- BI->setSuccessor(FallThruPath, Header);
- Latch->replaceSuccessorsPhiUsesWith(LastExit);
- Jmp->eraseFromParent();
-
- // Nuke the Latch block.
- assert(Latch->empty() && "unable to evacuate Latch");
- LI->removeBlock(Latch);
- if (DT)
- DT->eraseNode(Latch);
- Latch->eraseFromParent();
- return true;
-}
-
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -327,7 +121,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
/// rotation. LoopRotate should be repeatable and converge to a canonical
/// form. This property is satisfied because simplifying the loop latch can only
/// happen once across multiple invocations of the LoopRotate pass.
-bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
+static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -382,7 +179,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
- if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ if (SE)
SE->forgetLoop(L);
DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
@@ -420,7 +217,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
while (I != E) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// If the instruction's operands are invariant and it doesn't read or write
// memory, then it is safe to hoist. Doing this doesn't change the order of
@@ -465,8 +262,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
TerminatorInst *TI = OrigHeader->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+ for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock::iterator BI = SuccBB->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
@@ -607,3 +404,221 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
++NumRotated;
return true;
}
+
+/// Determine whether the instructions in this range may be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End, Loop *L) {
+ bool seenIncrement = false;
+ bool MultiExitLoop = false;
+
+ if (!L->getExitingBlock())
+ MultiExitLoop = true;
+
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(&*I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr: {
+ Value *IVOpnd = !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1))
+ ? I->getOperand(1)
+ : nullptr;
+ if (!IVOpnd)
+ return false;
+
+ // If increment operand is used outside of the loop, this speculation
+ // could cause extra live range interference.
+ if (MultiExitLoop) {
+ for (User *UseI : IVOpnd->users()) {
+ auto *UserInst = cast<Instruction>(UseI);
+ if (!L->contains(UserInst))
+ return false;
+ }
+ }
+
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the case of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return false;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return false;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return false;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
+ return false;
+
+ DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
+ Latch->begin(), Jmp->getIterator());
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DT)
+ DT->eraseNode(Latch);
+ Latch->eraseFromParent();
+ return true;
+}
+
+/// Rotate \c L as many times as possible. Return true if the loop is rotated
+/// at least once.
+static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ AssumptionCache *AC, DominatorTree *DT,
+ ScalarEvolution *SE) {
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT);
+
+ // One loop can be rotated multiple times.
+ bool MadeChange = false;
+ while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) {
+ MadeChange = true;
+ SimplifiedLatch = false;
+ }
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
+ return MadeChange;
+}
+
+namespace {
+
+class LoopRotate : public LoopPass {
+ unsigned MaxHeaderSize;
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
+ }
+
+ // LCSSA form makes instruction renaming easier.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipOptnoneFunction(L))
+ return false;
+ Function &F = *L->getHeader()->getParent();
+
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ auto *SE = SEWP ? &SEWP->getSE() : nullptr;
+
+ return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE);
+ }
+};
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4b59f3d..2101225 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -105,10 +105,33 @@ static bool StressIVChain = false;
namespace {
-/// RegSortData - This class holds data which is used to order reuse candidates.
+struct MemAccessTy {
+ /// Used in situations where the accessed memory type is unknown.
+ static const unsigned UnknownAddressSpace = ~0u;
+
+ Type *MemTy;
+ unsigned AddrSpace;
+
+ MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
+
+ MemAccessTy(Type *Ty, unsigned AS) :
+ MemTy(Ty), AddrSpace(AS) {}
+
+ bool operator==(MemAccessTy Other) const {
+ return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
+ }
+
+ bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
+
+ static MemAccessTy getUnknown(LLVMContext &Ctx) {
+ return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
+ }
+};
+
+/// This class holds data which is used to order reuse candidates.
class RegSortData {
public:
- /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// This represents the set of LSRUse indices which reference
/// a particular register.
SmallBitVector UsedByIndices;
@@ -122,16 +145,14 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// RegUseTracker - Map register candidates to information about how they are
-/// used.
+/// Map register candidates to information about how they are used.
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
@@ -139,9 +160,9 @@ class RegUseTracker {
SmallVector<const SCEV *, 16> RegSequence;
public:
- void CountRegister(const SCEV *Reg, size_t LUIdx);
- void DropRegister(const SCEV *Reg, size_t LUIdx);
- void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+ void countRegister(const SCEV *Reg, size_t LUIdx);
+ void dropRegister(const SCEV *Reg, size_t LUIdx);
+ void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -160,7 +181,7 @@ public:
}
void
-RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
@@ -171,7 +192,7 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
@@ -180,7 +201,7 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
// Update RegUses. The data structure is not optimized for this purpose;
@@ -219,9 +240,8 @@ void RegUseTracker::clear() {
namespace {
-/// Formula - This class holds information that describes a formula for
-/// computing satisfying a use. It may include broken-out immediates and scaled
-/// registers.
+/// This class holds information that describes a formula for computing
+/// satisfying a use. It may include broken-out immediates and scaled registers.
struct Formula {
/// Global base address used for complex addressing.
GlobalValue *BaseGV;
@@ -235,8 +255,8 @@ struct Formula {
/// The scale of any complex addressing.
int64_t Scale;
- /// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty. The canonical representation of a formula is
+ /// The list of "base" registers for this use. When this is non-empty. The
+ /// canonical representation of a formula is
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
/// #1 enforces that the scaled register is always used when at least two
@@ -247,31 +267,31 @@ struct Formula {
/// form.
SmallVector<const SCEV *, 4> BaseRegs;
- /// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when Scale is not zero.
+ /// The 'scaled' register for this use. This should be non-null when Scale is
+ /// not zero.
const SCEV *ScaledReg;
- /// UnfoldedOffset - An additional constant offset which added near the
- /// use. This requires a temporary register, but the offset itself can
- /// live in an add immediate field rather than a register.
+ /// An additional constant offset which added near the use. This requires a
+ /// temporary register, but the offset itself can live in an add immediate
+ /// field rather than a register.
int64_t UnfoldedOffset;
Formula()
: BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
ScaledReg(nullptr), UnfoldedOffset(0) {}
- void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+ void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
bool isCanonical() const;
- void Canonicalize();
+ void canonicalize();
- bool Unscale();
+ bool unscale();
size_t getNumRegs() const;
Type *getType() const;
- void DeleteBaseReg(const SCEV *&S);
+ void deleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
@@ -283,7 +303,7 @@ struct Formula {
}
-/// DoInitialMatch - Recursion helper for InitialMatch.
+/// Recursion helper for initialMatch.
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
@@ -336,10 +356,9 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
Bad.push_back(S);
}
-/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
-/// attempting to keep all loop-invariant and loop-computable values in a
-/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+/// Incorporate loop-variant parts of S into this Formula, attempting to keep
+/// all loop-invariant and loop-computable values in a single base register.
+void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
@@ -355,7 +374,7 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
- Canonicalize();
+ canonicalize();
}
/// \brief Check whether or not this formula statisfies the canonical
@@ -373,7 +392,7 @@ bool Formula::isCanonical() const {
/// field. Otherwise, we would have to do special cases everywhere in LSR
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
/// On the other hand, 1*reg should be canonicalized into reg.
-void Formula::Canonicalize() {
+void Formula::canonicalize() {
if (isCanonical())
return;
// So far we did not need this case. This is easy to implement but it is
@@ -394,7 +413,7 @@ void Formula::Canonicalize() {
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
/// \return true if it was possible to get rid of the scale, false otherwise.
/// \note After this operation the formula may not be in the canonical form.
-bool Formula::Unscale() {
+bool Formula::unscale() {
if (Scale != 1)
return false;
Scale = 0;
@@ -403,15 +422,14 @@ bool Formula::Unscale() {
return true;
}
-/// getNumRegs - Return the total number of register operands used by this
-/// formula. This does not include register uses implied by non-constant
-/// addrec strides.
+/// Return the total number of register operands used by this formula. This does
+/// not include register uses implied by non-constant addrec strides.
size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
-/// getType - Return the type of this formula, if it has one, or null
-/// otherwise. This type is meaningless except for the bit size.
+/// Return the type of this formula, if it has one, or null otherwise. This type
+/// is meaningless except for the bit size.
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
@@ -419,21 +437,21 @@ Type *Formula::getType() const {
nullptr;
}
-/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
-void Formula::DeleteBaseReg(const SCEV *&S) {
+/// Delete the given base reg from the BaseRegs list.
+void Formula::deleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
-/// referencesReg - Test if this formula references the given register.
+/// Test if this formula references the given register.
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
-/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
-/// which are used by uses other than the use with the given index.
+/// Test whether this formula uses registers which are used by uses other than
+/// the use with the given index.
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
@@ -481,30 +499,29 @@ void Formula::print(raw_ostream &OS) const {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void Formula::dump() const {
print(errs()); errs() << '\n';
}
-#endif
-/// isAddRecSExtable - Return true if the given addrec can be sign-extended
-/// without changing its value.
+/// Return true if the given addrec can be sign-extended without changing its
+/// value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
-/// isAddSExtable - Return true if the given add can be sign-extended
-/// without changing its value.
+/// Return true if the given add can be sign-extended without changing its
+/// value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
-/// isMulSExtable - Return true if the given mul can be sign-extended
-/// without changing its value.
+/// Return true if the given mul can be sign-extended without changing its
+/// value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(),
@@ -512,12 +529,11 @@ static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
-/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
-/// and if the remainder is known to be zero, or null otherwise. If
-/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
-/// to Y, ignoring that the multiplication may overflow, which is useful when
-/// the result will be used in a context where the most significant bits are
-/// ignored.
+/// Return an expression for LHS /s RHS, if it can be determined and if the
+/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
+/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
+/// the multiplication may overflow, which is useful when the result will be
+/// used in a context where the most significant bits are ignored.
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
@@ -528,7 +544,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Handle a few RHS special cases.
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &RA = RC->getAPInt();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
if (RA.isAllOnesValue())
@@ -542,8 +558,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return nullptr;
- const APInt &LA = C->getValue()->getValue();
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &LA = C->getAPInt();
+ const APInt &RA = RC->getAPInt();
if (LA.srem(RA) != 0)
return nullptr;
return SE.getConstant(LA.sdiv(RA));
@@ -603,12 +619,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
return nullptr;
}
-/// ExtractImmediate - If S involves the addition of a constant integer value,
-/// return that integer value, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a constant integer value, return that integer
+/// value, and mutate S to point to a new SCEV with that value excluded.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ if (C->getAPInt().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
@@ -630,9 +645,8 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
return 0;
}
-/// ExtractSymbol - If S involves the addition of a GlobalValue address,
-/// return that symbol, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a GlobalValue address, return that symbol, and
+/// mutate S to point to a new SCEV with that value excluded.
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
@@ -657,8 +671,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
return nullptr;
}
-/// isAddressUse - Returns true if the specified instruction is using the
-/// specified value as an address.
+/// Returns true if the specified instruction is using the specified value as an
+/// address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -682,12 +696,15 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
return isAddress;
}
-/// getAccessType - Return the type of the memory being accessed.
-static Type *getAccessType(const Instruction *Inst) {
- Type *AccessTy = Inst->getType();
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
- AccessTy = SI->getOperand(0)->getType();
- else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+/// Return the type of the memory being accessed.
+static MemAccessTy getAccessType(const Instruction *Inst) {
+ MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ AccessTy.MemTy = SI->getOperand(0)->getType();
+ AccessTy.AddrSpace = SI->getPointerAddressSpace();
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ AccessTy.AddrSpace = LI->getPointerAddressSpace();
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
@@ -696,21 +713,21 @@ static Type *getAccessType(const Instruction *Inst) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getArgOperand(0)->getType();
+ AccessTy.MemTy = II->getArgOperand(0)->getType();
break;
}
}
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
- if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
- AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace());
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
+ AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
return AccessTy;
}
-/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+/// Return true if this AddRec is already a phi in its loop.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
@@ -793,9 +810,8 @@ static bool isHighCostExpansion(const SCEV *S,
return true;
}
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
+/// If any of the instructions is the specified set are trivially dead, delete
+/// them and see if this makes any of their operands subsequently dead.
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
@@ -842,7 +858,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
namespace {
-/// Cost - This class is used to measure and compare candidate formulae.
+/// This class is used to measure and compare candidate formulae.
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
@@ -905,7 +921,7 @@ private:
}
-/// RateRegister - Tally up interesting quantities from the given register.
+/// Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
@@ -951,9 +967,9 @@ void Cost::RateRegister(const SCEV *Reg,
SE.hasComputableLoopEvolution(Reg, L);
}
-/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it. Optional LoserRegs provides a way to declare any formula
-/// that refers to one of those regs an instant loser.
+/// Record this register in the set. If we haven't seen it before, rate
+/// it. Optional LoserRegs provides a way to declare any formula that refers to
+/// one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
@@ -1024,7 +1040,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
assert(isValid() && "invalid cost");
}
-/// Lose - Set this cost to a losing value.
+/// Set this cost to a losing value.
void Cost::Lose() {
NumRegs = ~0u;
AddRecCost = ~0u;
@@ -1035,7 +1051,7 @@ void Cost::Lose() {
ScaleCost = ~0u;
}
-/// operator< - Choose the lower cost.
+/// Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
@@ -1061,37 +1077,35 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void Cost::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// LSRFixup - An operand value in an instruction which is to be replaced
-/// with some equivalent, possibly strength-reduced, replacement.
+/// An operand value in an instruction which is to be replaced with some
+/// equivalent, possibly strength-reduced, replacement.
struct LSRFixup {
- /// UserInst - The instruction which will be updated.
+ /// The instruction which will be updated.
Instruction *UserInst;
- /// OperandValToReplace - The operand of the instruction which will
- /// be replaced. The operand may be used more than once; every instance
- /// will be replaced.
+ /// The operand of the instruction which will be replaced. The operand may be
+ /// used more than once; every instance will be replaced.
Value *OperandValToReplace;
- /// PostIncLoops - If this user is to use the post-incremented value of an
- /// induction variable, this variable is non-null and holds the loop
- /// associated with the induction variable.
+ /// If this user is to use the post-incremented value of an induction
+ /// variable, this variable is non-null and holds the loop associated with the
+ /// induction variable.
PostIncLoopSet PostIncLoops;
- /// LUIdx - The index of the LSRUse describing the expression which
- /// this fixup needs, minus an offset (below).
+ /// The index of the LSRUse describing the expression which this fixup needs,
+ /// minus an offset (below).
size_t LUIdx;
- /// Offset - A constant offset to be added to the LSRUse expression.
- /// This allows multiple fixups to share the same LSRUse with different
- /// offsets, for example in an unrolled loop.
+ /// A constant offset to be added to the LSRUse expression. This allows
+ /// multiple fixups to share the same LSRUse with different offsets, for
+ /// example in an unrolled loop.
int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
@@ -1108,8 +1122,7 @@ LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
Offset(0) {}
-/// isUseFullyOutsideLoop - Test whether this fixup always uses its
-/// value outside of the given loop.
+/// Test whether this fixup always uses its value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
@@ -1149,16 +1162,15 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
-/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
+/// SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 4> getEmptyKey() {
SmallVector<const SCEV *, 4> V;
@@ -1182,17 +1194,17 @@ struct UniquifierDenseMapInfo {
}
};
-/// LSRUse - This class holds the state that LSR keeps for each use in
-/// IVUsers, as well as uses invented by LSR itself. It includes information
-/// about what kinds of things can be folded into the user, information about
-/// the user itself, and information about how the use may be satisfied.
-/// TODO: Represent multiple users of the same expression in common?
+/// This class holds the state that LSR keeps for each use in IVUsers, as well
+/// as uses invented by LSR itself. It includes information about what kinds of
+/// things can be folded into the user, information about the user itself, and
+/// information about how the use may be satisfied. TODO: Represent multiple
+/// users of the same expression in common?
class LSRUse {
DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
- /// KindType - An enum for a kind of use, indicating what types of
- /// scaled and immediate operands it might support.
+ /// An enum for a kind of use, indicating what types of scaled and immediate
+ /// operands it might support.
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
@@ -1204,15 +1216,14 @@ public:
typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
KindType Kind;
- Type *AccessTy;
+ MemAccessTy AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
- /// AllFixupsOutsideLoop - This records whether all of the fixups using this
- /// LSRUse are outside of the loop, in which case some special-case heuristics
- /// may be used.
+ /// This records whether all of the fixups using this LSRUse are outside of
+ /// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop;
/// RigidFormula is set to true to guarantee that this use will be associated
@@ -1222,26 +1233,24 @@ public:
/// changing the formula.
bool RigidFormula;
- /// WidestFixupType - This records the widest use type for any fixup using
- /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
- /// max fixup widths to be equivalent, because the narrower one may be relying
- /// on the implicit truncation to truncate away bogus bits.
+ /// This records the widest use type for any fixup using this
+ /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
+ /// fixup widths to be equivalent, because the narrower one may be relying on
+ /// the implicit truncation to truncate away bogus bits.
Type *WidestFixupType;
- /// Formulae - A list of ways to build a value that can satisfy this user.
- /// After the list is populated, one of these is selected heuristically and
- /// used to formulate a replacement for OperandValToReplace in UserInst.
+ /// A list of ways to build a value that can satisfy this user. After the
+ /// list is populated, one of these is selected heuristically and used to
+ /// formulate a replacement for OperandValToReplace in UserInst.
SmallVector<Formula, 12> Formulae;
- /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ /// The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
- MinOffset(INT64_MAX),
- MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true),
- RigidFormula(false),
- WidestFixupType(nullptr) {}
+ LSRUse(KindType K, MemAccessTy AT)
+ : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true), RigidFormula(false),
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1254,8 +1263,8 @@ public:
}
-/// HasFormula - Test whether this use as a formula which has the same
-/// registers as the given formula.
+/// Test whether this use as a formula which has the same registers as the given
+/// formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
@@ -1264,9 +1273,8 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
return Uniquifier.count(Key);
}
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
-/// The formula must be in canonical form.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise. The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
assert(F.isCanonical() && "Invalid canonical representation");
@@ -1300,14 +1308,14 @@ bool LSRUse::InsertFormula(const Formula &F) {
return true;
}
-/// DeleteFormula - Remove the given formula from this use's list.
+/// Remove the given formula from this use's list.
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
}
-/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+/// Recompute the Regs field, and update RegUses.
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Now that we've filtered out some formulae, recompute the Regs set.
SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
@@ -1320,7 +1328,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Update the RegTracker.
for (const SCEV *S : OldRegs)
if (!Regs.count(S))
- RegUses.DropRegister(S, LUIdx);
+ RegUses.dropRegister(S, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
@@ -1331,10 +1339,13 @@ void LSRUse::print(raw_ostream &OS) const {
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
- if (AccessTy->isPointerTy())
+ if (AccessTy.MemTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
- else
- OS << *AccessTy;
+ else {
+ OS << *AccessTy.MemTy;
+ }
+
+ OS << " in addrspace(" << AccessTy.AddrSpace << ')';
}
OS << ", Offsets={";
@@ -1353,19 +1364,19 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
-#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
- return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale, AccessTy.AddrSpace);
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
@@ -1412,7 +1423,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
// Check for overflow.
@@ -1433,7 +1444,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
const Formula &F) {
// For the purpose of isAMCompletelyFolded either having a canonical formula
// or a scale not equal to zero is correct.
@@ -1447,11 +1458,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
-/// isLegalUse - Test whether we know how to expand the current formula.
+/// Test whether we know how to expand the current formula.
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ MemAccessTy AccessTy, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
// We know how to expand completely foldable formulae.
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale) ||
@@ -1463,8 +1474,8 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- const Formula &F) {
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ MemAccessTy AccessTy, const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
@@ -1490,14 +1501,12 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
switch (LU.Kind) {
case LSRUse::Address: {
// Check the scaling factor cost with both the min and max offsets.
- int ScaleCostMinOffset =
- TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
- F.BaseOffset + LU.MinOffset,
- F.HasBaseReg, F.Scale);
- int ScaleCostMaxOffset =
- TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
- F.BaseOffset + LU.MaxOffset,
- F.HasBaseReg, F.Scale);
+ int ScaleCostMinOffset = TTI.getScalingFactorCost(
+ LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
+ F.Scale, LU.AccessTy.AddrSpace);
+ int ScaleCostMaxOffset = TTI.getScalingFactorCost(
+ LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
+ F.Scale, LU.AccessTy.AddrSpace);
assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
"Legal addressing mode has an illegal cost!");
@@ -1515,7 +1524,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
@@ -1539,7 +1548,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
- Type *AccessTy, const SCEV *S, bool HasBaseReg) {
+ MemAccessTy AccessTy, const SCEV *S,
+ bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
@@ -1564,9 +1574,9 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
namespace {
-/// IVInc - An individual increment in a Chain of IV increments.
-/// Relate an IV user to an expression that computes the IV it uses from the IV
-/// used by the previous link in the Chain.
+/// An individual increment in a Chain of IV increments. Relate an IV user to
+/// an expression that computes the IV it uses from the IV used by the previous
+/// link in the Chain.
///
/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
/// original IVOperand. The head of the chain's IVOperand is only valid during
@@ -1582,8 +1592,8 @@ struct IVInc {
UserInst(U), IVOperand(O), IncExpr(E) {}
};
-// IVChain - The list of IV increments in program order.
-// We typically add the head of a chain without finding subsequent links.
+// The list of IV increments in program order. We typically add the head of a
+// chain without finding subsequent links.
struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
@@ -1595,7 +1605,7 @@ struct IVChain {
typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
- // begin - return the first increment in the chain.
+ // Return the first increment in the chain.
const_iterator begin() const {
assert(!Incs.empty());
return std::next(Incs.begin());
@@ -1604,32 +1614,30 @@ struct IVChain {
return Incs.end();
}
- // hasIncs - Returns true if this chain contains any increments.
+ // Returns true if this chain contains any increments.
bool hasIncs() const { return Incs.size() >= 2; }
- // add - Add an IVInc to the end of this chain.
+ // Add an IVInc to the end of this chain.
void add(const IVInc &X) { Incs.push_back(X); }
- // tailUserInst - Returns the last UserInst in the chain.
+ // Returns the last UserInst in the chain.
Instruction *tailUserInst() const { return Incs.back().UserInst; }
- // isProfitableIncrement - Returns true if IncExpr can be profitably added to
- // this chain.
+ // Returns true if IncExpr can be profitably added to this chain.
bool isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution&);
};
-/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
-/// Distinguish between FarUsers that definitely cross IV increments and
-/// NearUsers that may be used between IV increments.
+/// Helper for CollectChains to track multiple IV increment uses. Distinguish
+/// between FarUsers that definitely cross IV increments and NearUsers that may
+/// be used between IV increments.
struct ChainUsers {
SmallPtrSet<Instruction*, 4> FarUsers;
SmallPtrSet<Instruction*, 4> NearUsers;
};
-/// LSRInstance - This class holds state for the main loop strength reduction
-/// logic.
+/// This class holds state for the main loop strength reduction logic.
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
@@ -1639,25 +1647,25 @@ class LSRInstance {
Loop *const L;
bool Changed;
- /// IVIncInsertPos - This is the insert position that the current loop's
- /// induction variable increment should be placed. In simple loops, this is
- /// the latch block's terminator. But in more complicated cases, this is a
- /// position which will dominate all the in-loop post-increment users.
+ /// This is the insert position that the current loop's induction variable
+ /// increment should be placed. In simple loops, this is the latch block's
+ /// terminator. But in more complicated cases, this is a position which will
+ /// dominate all the in-loop post-increment users.
Instruction *IVIncInsertPos;
- /// Factors - Interesting factors between use strides.
+ /// Interesting factors between use strides.
SmallSetVector<int64_t, 8> Factors;
- /// Types - Interesting use types, to facilitate truncation reuse.
+ /// Interesting use types, to facilitate truncation reuse.
SmallSetVector<Type *, 4> Types;
- /// Fixups - The list of operands which are to be replaced.
+ /// The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
- /// Uses - The list of interesting uses.
+ /// The list of interesting uses.
SmallVector<LSRUse, 16> Uses;
- /// RegUses - Track which uses use which register candidates.
+ /// Track which uses use which register candidates.
RegUseTracker RegUses;
// Limit the number of chains to avoid quadratic behavior. We don't expect to
@@ -1665,10 +1673,10 @@ class LSRInstance {
// back to normal LSR behavior for those uses.
static const unsigned MaxChains = 8;
- /// IVChainVec - IV users can form a chain of IV increments.
+ /// IV users can form a chain of IV increments.
SmallVector<IVChain, MaxChains> IVChainVec;
- /// IVIncSet - IV users that belong to profitable IVChains.
+ /// IV users that belong to profitable IVChains.
SmallPtrSet<Use*, MaxChains> IVIncSet;
void OptimizeShadowIV();
@@ -1696,11 +1704,10 @@ class LSRInstance {
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy);
+ LSRUse::KindType Kind, MemAccessTy AccessTy);
- std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
- LSRUse::KindType Kind,
- Type *AccessTy);
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
+ MemAccessTy AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
@@ -1769,18 +1776,16 @@ class LSRInstance {
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const;
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const;
- void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
- Pass *P);
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
- LSRInstance(Loop *L, Pass *P);
+ LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
+ LoopInfo &LI, const TargetTransformInfo &TTI);
bool getChanged() const { return Changed; }
@@ -1793,8 +1798,8 @@ public:
}
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast operation.
+/// If IV is used in a int-to-float cast inside the loop then try to eliminate
+/// the cast operation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
@@ -1902,9 +1907,8 @@ void LSRInstance::OptimizeShadowIV() {
}
}
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
+/// If Cond has an operand that is an expression of an IV, set the IV user and
+/// stride information and return true, otherwise return false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVStrideUse &U : IU)
if (U.getUser() == Cond) {
@@ -1917,8 +1921,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
return false;
}
-/// OptimizeMax - Rewrite the loop's terminating condition if it uses
-/// a max computation.
+/// Rewrite the loop's terminating condition if it uses a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
@@ -2076,8 +2079,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
return NewCond;
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
+/// Change loop terminating condition to use the postinc iv when possible.
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
@@ -2152,16 +2154,18 @@ LSRInstance::OptimizeLoopTermCond() {
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
- Type *AccessTy = getAccessType(UI->getUser());
+ MemAccessTy AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
- /*BaseOffset=*/ 0,
- /*HasBaseReg=*/ false, Scale))
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
- /*BaseOffset=*/ 0,
- /*HasBaseReg=*/ false, Scale))
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
goto decline_post_inc;
}
}
@@ -2180,7 +2184,7 @@ LSRInstance::OptimizeLoopTermCond() {
ICmpInst *OldCond = Cond;
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
- ExitingBlock->getInstList().insert(TermBr, Cond);
+ ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
// Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
@@ -2213,15 +2217,14 @@ LSRInstance::OptimizeLoopTermCond() {
}
}
-/// reconcileNewOffset - Determine if the given use can accommodate a fixup
-/// at the given offset and other details. If so, update the use and
-/// return true.
-bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy) {
+/// Determine if the given use can accommodate a fixup at the given offset and
+/// other details. If so, update the use and return true.
+bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+ bool HasBaseReg, LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
- Type *NewAccessTy = AccessTy;
+ MemAccessTy NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
@@ -2232,8 +2235,10 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// Check for a mismatched access type, and fall back conservatively as needed.
// TODO: Be less conservative when the type is similar and can use the same
// addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+ if (Kind == LSRUse::Address) {
+ if (AccessTy != LU.AccessTy)
+ NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
+ }
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
@@ -2257,12 +2262,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
return true;
}
-/// getUse - Return an LSRUse index and an offset value for a fixup which
-/// needs the given expression, with the given kind and optional access type.
-/// Either reuse an existing use or create a new one, as needed.
-std::pair<size_t, int64_t>
-LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind, Type *AccessTy) {
+/// Return an LSRUse index and an offset value for a fixup which needs the given
+/// expression, with the given kind and optional access type. Either reuse an
+/// existing use or create a new one, as needed.
+std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
@@ -2300,18 +2305,18 @@ LSRInstance::getUse(const SCEV *&Expr,
return std::make_pair(LUIdx, Offset);
}
-/// DeleteUse - Delete the given use from the Uses list.
+/// Delete the given use from the Uses list.
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
// Update RegUses.
- RegUses.SwapAndDropUse(LUIdx, Uses.size());
+ RegUses.swapAndDropUse(LUIdx, Uses.size());
}
-/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
-/// a formula that has the same registers as the given formula.
+/// Look for a use distinct from OrigLU which is has a formula that has the same
+/// registers as the given formula.
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
@@ -2396,14 +2401,14 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
- if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
- Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ if (Factor->getAPInt().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getAPInt().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
- if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
- Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ if (Factor->getAPInt().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getAPInt().getSExtValue());
}
}
@@ -2415,9 +2420,9 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
DEBUG(print_factors_and_types(dbgs()));
}
-/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
-/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
-/// Instructions to IVStrideUses, we could partially skip this.
+/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
+/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
+/// IVStrideUses, we could partially skip this.
static User::op_iterator
findIVOperand(User::op_iterator OI, User::op_iterator OE,
Loop *L, ScalarEvolution &SE) {
@@ -2436,29 +2441,28 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE,
return OI;
}
-/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
-/// operands, so wrap it in a convenient helper.
+/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
+/// a convenient helper.
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
return Trunc->getOperand(0);
return Oper;
}
-/// isCompatibleIVType - Return true if we allow an IV chain to include both
-/// types.
+/// Return true if we allow an IV chain to include both types.
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
}
-/// getExprBase - Return an approximation of this SCEV expression's "base", or
-/// NULL for any constant. Returning the expression itself is
-/// conservative. Returning a deeper subexpression is more precise and valid as
-/// long as it isn't less complex than another subexpression. For expressions
-/// involving multiple unscaled values, we need to return the pointer-type
-/// SCEVUnknown. This avoids forming chains across objects, such as:
-/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+/// Return an approximation of this SCEV expression's "base", or NULL for any
+/// constant. Returning the expression itself is conservative. Returning a
+/// deeper subexpression is more precise and valid as long as it isn't less
+/// complex than another subexpression. For expressions involving multiple
+/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
+/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
+/// IVInc==b-a.
///
/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
/// SCEVUnknown, we simply return the rightmost SCEV operand.
@@ -2601,8 +2605,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
return cost < 0;
}
-/// ChainInstruction - Add this IV user to an existing chain or make it the head
-/// of a new chain.
+/// Add this IV user to an existing chain or make it the head of a new chain.
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
// When IVs are used as types of varying widths, they are generally converted
@@ -2714,7 +2717,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
}
-/// CollectChains - Populate the vector of Chains.
+/// Populate the vector of Chains.
///
/// This decreases ILP at the architecture level. Targets with ample registers,
/// multiple memory ports, and no register renaming probably don't want
@@ -2755,19 +2758,19 @@ void LSRInstance::CollectChains() {
for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
I != E; ++I) {
// Skip instructions that weren't seen by IVUsers analysis.
- if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I))
continue;
// Ignore users that are part of a SCEV expression. This way we only
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
- if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I)))
continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
- ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ ChainUsersVec[ChainIdx].NearUsers.erase(&*I);
}
// Search for operands that can be chained.
SmallPtrSet<Instruction*, 4> UniqueOperands;
@@ -2776,7 +2779,7 @@ void LSRInstance::CollectChains() {
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst).second)
- ChainInstruction(I, IVOpInst, ChainUsersVec);
+ ChainInstruction(&*I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
@@ -2828,20 +2831,20 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
- if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ if (IncConst->getAPInt().getMinSignedBits() > 64)
return false;
+ MemAccessTy AccessTy = getAccessType(UserInst);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
- if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ nullptr,
- IncOffset, /*HaseBaseReg=*/ false))
+ if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
+ IncOffset, /*HaseBaseReg=*/false))
return false;
return true;
}
-/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
-/// materialize the IV user's operand from the previous IV user's operand.
+/// Generate an add or subtract for each IVInc in a chain to materialize the IV
+/// user's operand from the previous IV user's operand.
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
@@ -2961,7 +2964,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = U.getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = nullptr;
+ MemAccessTy AccessTy;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -3027,9 +3030,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
DEBUG(print_fixups(dbgs()));
}
-/// InsertInitialFormula - Insert a formula for the given expression into
-/// the given use, separating out loop-variant portions from loop-invariant
-/// and loop-computable portions.
+/// Insert a formula for the given expression into the given use, separating out
+/// loop-variant portions from loop-invariant and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
@@ -3037,13 +3039,13 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
LU.RigidFormula = true;
Formula F;
- F.InitialMatch(S, L, SE);
+ F.initialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
-/// InsertSupplementalFormula - Insert a simple single-register formula for
-/// the given expression into the given use.
+/// Insert a simple single-register formula for the given expression into the
+/// given use.
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
@@ -3054,17 +3056,16 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
-/// CountRegisters - Note which registers are used by the given formula,
-/// updating RegUses.
+/// Note which registers are used by the given formula, updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
- RegUses.CountRegister(F.ScaledReg, LUIdx);
+ RegUses.countRegister(F.ScaledReg, LUIdx);
for (const SCEV *BaseReg : F.BaseRegs)
- RegUses.CountRegister(BaseReg, LUIdx);
+ RegUses.countRegister(BaseReg, LUIdx);
}
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
// Do not insert formula that we will not be able to expand.
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
@@ -3076,9 +3077,9 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
return true;
}
-/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
-/// loop-invariant values which we're tracking. These other uses will pin these
-/// values in registers, making them less profitable for elimination.
+/// Check for other uses of loop-invariant values which we're tracking. These
+/// other uses will pin these values in registers, making them less profitable
+/// for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
@@ -3124,6 +3125,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
+ // Don't bother if the instruction is in a BB which ends in an EHPad.
+ if (UseBB->getTerminator()->isEHPad())
+ continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
@@ -3148,7 +3152,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
+ std::pair<size_t, int64_t> P = getUse(
+ S, LSRUse::Basic, MemAccessTy());
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3165,8 +3170,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
}
}
-/// CollectSubexprs - Split S into subexpressions which can be pulled out into
-/// separate registers. If C is non-null, multiply each subexpression by C.
+/// Split S into subexpressions which can be pulled out into separate
+/// registers. If C is non-null, multiply each subexpression by C.
///
/// Return remainder expression after factoring the subexpressions captured by
/// Ops. If Ops is complete, return NULL.
@@ -3300,7 +3305,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
F.BaseRegs.push_back(*J);
// We may have changed the number of register in base regs, adjust the
// formula accordingly.
- F.Canonicalize();
+ F.canonicalize();
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
@@ -3309,8 +3314,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
}
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
+/// Split out subexpressions from adds and the bases of addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
assert(Base.isCanonical() && "Input must be in the canonical form");
@@ -3326,8 +3330,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
/* Idx */ -1, /* IsScaledReg */ true);
}
-/// GenerateCombinations - Generate a formula consisting of all of the
-/// loop-dominating registers added into a single register.
+/// Generate a formula consisting of all of the loop-dominating registers added
+/// into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
@@ -3336,7 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
// processing the formula.
- Base.Unscale();
+ Base.unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3354,7 +3358,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
- F.Canonicalize();
+ F.canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3379,7 +3383,7 @@ void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
(void)InsertFormula(LU, LUIdx, F);
}
-/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+/// Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
@@ -3410,8 +3414,8 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.Scale = 0;
F.ScaledReg = nullptr;
} else
- F.DeleteBaseReg(F.BaseRegs[Idx]);
- F.Canonicalize();
+ F.deleteBaseReg(F.BaseRegs[Idx]);
+ F.canonicalize();
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
@@ -3452,8 +3456,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
/* IsScaledReg */ true);
}
-/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
-/// the comparison. For example, x == y -> x*c == y*c.
+/// For ICmpZero, check to see if we can scale up the comparison. For example, x
+/// == y -> x*c == y*c.
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
@@ -3538,8 +3542,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
}
}
-/// GenerateScales - Generate stride factor reuse formulae by making use of
-/// scaled-offset address modes, for example.
+/// Generate stride factor reuse formulae by making use of scaled-offset address
+/// modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
Type *IntTy = Base.getType();
@@ -3547,10 +3551,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// If this Formula already has a scaled register, we can't add another one.
// Try to unscale the formula to generate a better scale.
- if (Base.Scale != 0 && !Base.Unscale())
+ if (Base.Scale != 0 && !Base.unscale())
return;
- assert(Base.Scale == 0 && "Unscale did not did its job!");
+ assert(Base.Scale == 0 && "unscale did not did its job!");
// Check each interesting stride.
for (int64_t Factor : Factors) {
@@ -3587,7 +3591,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
- F.DeleteBaseReg(F.BaseRegs[i]);
+ F.deleteBaseReg(F.BaseRegs[i]);
// The canonical representation of 1*reg is reg, which is already in
// Base. In that case, do not try to insert the formula, it will be
// rejected anyway.
@@ -3599,7 +3603,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
}
}
-/// GenerateTruncates - Generate reuse formulae from different IV types.
+/// Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Don't bother truncating symbolic values.
if (Base.BaseGV) return;
@@ -3629,9 +3633,9 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
namespace {
-/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
-/// defer modifications so that the search phase doesn't have to worry about
-/// the data structures moving underneath it.
+/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
+/// modifications so that the search phase doesn't have to worry about the data
+/// structures moving underneath it.
struct WorkItem {
size_t LUIdx;
int64_t Imm;
@@ -3651,14 +3655,13 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
-#endif
-/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
-/// distance apart and try to form reuse opportunities between them.
+/// Look for registers which are a constant distance apart and try to form reuse
+/// opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
@@ -3751,7 +3754,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// very similar but slightly different. Investigate if they
// could be merged. That way, we would not have to unscale the
// Formula.
- F.Unscale();
+ F.unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3770,14 +3773,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
- if (C->getValue()->isNegative() !=
- (NewF.BaseOffset < 0) &&
- (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
- .ule(std::abs(NewF.BaseOffset)))
+ if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
+ (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
+ .ule(std::abs(NewF.BaseOffset)))
continue;
// OK, looks good.
- NewF.Canonicalize();
+ NewF.canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3801,15 +3803,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// zero than the immediate itself, then the formula isn't worthwhile.
for (const SCEV *NewReg : NewF.BaseRegs)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
- if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
- std::abs(NewF.BaseOffset)) &&
- (C->getValue()->getValue() +
- NewF.BaseOffset).countTrailingZeros() >=
- countTrailingZeros<uint64_t>(NewF.BaseOffset))
+ if ((C->getAPInt() + NewF.BaseOffset)
+ .abs()
+ .slt(std::abs(NewF.BaseOffset)) &&
+ (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
- NewF.Canonicalize();
+ NewF.canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3819,7 +3821,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
}
}
-/// GenerateAllReuseFormulae - Generate formulae for each use.
+/// Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
@@ -3959,10 +3961,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// This is a rough guess that seems to work fairly well.
static const size_t ComplexityLimit = UINT16_MAX;
-/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
-/// solutions the solver might have to consider. It almost never considers
-/// this many solutions because it prune the search space, but the pruning
-/// isn't always sufficient.
+/// Estimate the worst-case number of solutions the solver might have to
+/// consider. It almost never considers this many solutions because it prune the
+/// search space, but the pruning isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
for (const LSRUse &LU : Uses) {
@@ -3978,10 +3979,9 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
return Power;
}
-/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
-/// of the registers of another formula, it won't help reduce register
-/// pressure (though it may not necessarily hurt register pressure); remove
-/// it to simplify the system.
+/// When one formula uses a superset of the registers of another formula, it
+/// won't help reduce register pressure (though it may not necessarily hurt
+/// register pressure); remove it to simplify the system.
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -4042,9 +4042,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
}
}
-/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
-/// for expressions like A, A+1, A+2, etc., allocate a single register for
-/// them.
+/// When there are many registers for expressions like A, A+1, A+2, etc.,
+/// allocate a single register for them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
@@ -4121,8 +4120,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
-/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
@@ -4139,9 +4137,9 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
}
}
-/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
-/// to be profitable, and then in any use which has any reference to that
-/// register, delete all formulae which do not reference that register.
+/// Pick a register which seems likely to be profitable, and then in any use
+/// which has any reference to that register, delete all formulae which do not
+/// reference that register.
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
@@ -4202,10 +4200,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
}
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
+/// If there are an extraordinary number of formulae to choose from, use some
+/// rough heuristics to prune down the number of formulae. This keeps the main
+/// solver from taking an extraordinary amount of time in some worst-case
+/// scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
@@ -4213,7 +4211,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByPickingWinnerRegs();
}
-/// SolveRecurse - This is the recursive solver.
+/// This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
@@ -4291,8 +4289,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
}
}
-/// Solve - Choose one formula from each use. Return the results in the given
-/// Solution vector.
+/// Choose one formula from each use. Return the results in the given Solution
+/// vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
@@ -4326,10 +4324,9 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
-/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
-/// the dominator tree far as we can go while still being dominated by the
-/// input positions. This helps canonicalize the insert position, which
-/// encourages sharing.
+/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
+/// we can go while still being dominated by the input positions. This helps
+/// canonicalize the insert position, which encourages sharing.
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
@@ -4365,21 +4362,21 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
- BetterPos = std::next(BasicBlock::iterator(Inst));
+ BetterPos = &*std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
- IP = BetterPos;
+ IP = BetterPos->getIterator();
else
- IP = Tentative;
+ IP = Tentative->getIterator();
}
return IP;
}
-/// AdjustInsertPositionForExpand - Determine an input position which will be
-/// dominated by the operands and which will dominate the result.
+/// Determine an input position which will be dominated by the operands and
+/// which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
@@ -4417,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
}
}
- assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+ assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
@@ -4429,7 +4426,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
while (isa<PHINode>(IP)) ++IP;
// Ignore landingpad instructions.
- while (isa<LandingPadInst>(IP)) ++IP;
+ while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -4437,13 +4434,14 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
// Set IP below instructions recently inserted by SCEVExpander. This keeps the
// IP consistent across expansions and allows the previously inserted
// instructions to be reused by subsequent expansion.
- while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+ while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
+ ++IP;
return IP;
}
-/// Expand - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding").
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding").
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
@@ -4487,7 +4485,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
}
// Expand the ScaledReg portion.
@@ -4505,14 +4503,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
- SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
@@ -4522,11 +4520,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4538,7 +4536,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4548,7 +4546,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4584,7 +4582,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
- Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
@@ -4626,15 +4624,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
return FullV;
}
-/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
-/// of their operands effectively happens in their predecessor blocks, so the
-/// expression may need to be expanded in multiple places.
+/// Helper for Rewrite. PHI nodes are special because the use of their operands
+/// effectively happens in their predecessor blocks, so the expression may need
+/// to be expanded in multiple places.
void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -4658,8 +4655,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
.setDontDeleteUselessPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
- /*AliasAnalysis*/ nullptr, &DT, &LI);
+ SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
NewBB = NewBBs[0];
}
// If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -4685,7 +4681,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
- Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+ Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(),
+ Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -4702,20 +4699,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
}
-/// Rewrite - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding"), and update the UserInst to reference
-/// the newly expanded value.
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding"), and update the UserInst to reference the newly
+/// expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
- RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts);
} else {
- Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+ Value *FullV =
+ Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -4740,11 +4737,10 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
DeadInsts.emplace_back(LF.OperandValToReplace);
}
-/// ImplementSolution - Rewrite all the fixup locations with new values,
-/// following the chosen solution.
-void
-LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
- Pass *P) {
+/// Rewrite all the fixup locations with new values, following the chosen
+/// solution.
+void LSRInstance::ImplementSolution(
+ const SmallVectorImpl<const Formula *> &Solution) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
@@ -4766,7 +4762,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// Expand the new value definitions and update the users.
for (const LSRFixup &Fixup : Fixups) {
- Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts);
Changed = true;
}
@@ -4782,13 +4778,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(Loop *L, Pass *P)
- : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
- LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
- TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent())),
- L(L), Changed(false), IVIncInsertPos(nullptr) {
+LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
+ DominatorTree &DT, LoopInfo &LI,
+ const TargetTransformInfo &TTI)
+ : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -4879,7 +4873,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
#endif
// Now that we've decided what we want, make it so.
- ImplementSolution(Solution, P);
+ ImplementSolution(Solution);
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -4931,11 +4925,10 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
@@ -4956,7 +4949,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -4982,8 +4975,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
@@ -4996,17 +4989,24 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
if (skipOptnoneFunction(L))
return false;
+ auto &IU = getAnalysis<IVUsers>();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent());
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(L, this).getChanged();
+ Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL,
+ "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d78db6c..56ae5c0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -130,27 +131,29 @@ namespace {
bool UserAllowPartial;
bool UserRuntime;
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
// Fill in the UnrollingPreferences parameter with values from the
@@ -186,7 +189,7 @@ namespace {
// total unrolled size. Parameters Threshold and PartialThreshold
// are set to the maximum unrolled size for fully and partially
// unrolled loops respectively.
- void selectThresholds(const Loop *L, bool HasPragma,
+ void selectThresholds(const Loop *L, bool UsePragmaThreshold,
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned &Threshold, unsigned &PartialThreshold,
unsigned &PercentDynamicCostSavedThreshold,
@@ -207,12 +210,13 @@ namespace {
: UP.DynamicCostSavingsDiscount;
if (!UserThreshold &&
+ // FIXME: Use Function::optForSize().
L->getHeader()->getParent()->hasFnAttribute(
Attribute::OptimizeForSize)) {
Threshold = UP.OptSizeThreshold;
PartialThreshold = UP.PartialOptSizeThreshold;
}
- if (HasPragma) {
+ if (UsePragmaThreshold) {
// If the loop has an unrolling pragma, we want to be more
// aggressive with unrolling limits. Set thresholds to at
// least the PragmaTheshold value which is larger than the
@@ -235,10 +239,11 @@ char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
@@ -278,8 +283,8 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
public:
UnrolledInstAnalyzer(unsigned Iteration,
DenseMap<Value *, Constant *> &SimplifiedValues,
- const Loop *L, ScalarEvolution &SE)
- : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {
+ ScalarEvolution &SE)
+ : SimplifiedValues(SimplifiedValues), SE(SE) {
IterationNumber = SE.getConstant(APInt(64, Iteration));
}
@@ -295,13 +300,6 @@ private:
/// results saved.
DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
- /// \brief Number of currently simulated iteration.
- ///
- /// If an expression is ConstAddress+Constant, then the Constant is
- /// Start + Iteration*Step, where Start and Step could be obtained from
- /// SCEVGEPCache.
- unsigned Iteration;
-
/// \brief SCEV expression corresponding to number of currently simulated
/// iteration.
const SCEV *IterationNumber;
@@ -316,7 +314,6 @@ private:
/// post-unrolling.
DenseMap<Value *, Constant *> &SimplifiedValues;
- const Loop *L;
ScalarEvolution &SE;
/// \brief Try to simplify instruction \param I using its SCEV expression.
@@ -368,11 +365,9 @@ private:
return simplifyInstWithSCEV(&I);
}
- /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
-
/// Try to simplify binary operator I.
///
- /// TODO: Probaly it's worth to hoist the code for estimating the
+ /// TODO: Probably it's worth to hoist the code for estimating the
/// simplifications effects to a separate class, since we have a very similar
/// code in InlineCost already.
bool visitBinaryOperator(BinaryOperator &I) {
@@ -412,7 +407,7 @@ private:
auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
// We're only interested in loads that can be completely folded to a
// constant.
- if (!GV || !GV->hasInitializer())
+ if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
return false;
ConstantDataSequential *CDS =
@@ -420,6 +415,12 @@ private:
if (!CDS)
return false;
+ // We might have a vector load from an array. FIXME: for now we just bail
+ // out in this case, but we should be able to resolve and simplify such
+ // loads.
+ if(!CDS->isElementTypeCompatible(I.getType()))
+ return false;
+
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
"Unexpectedly large index value.");
@@ -436,6 +437,59 @@ private:
return true;
}
+
+ bool visitCastInst(CastInst &I) {
+ // Propagate constants through casts.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ return Base::visitCastInst(I);
+ }
+
+ bool visitCmpInst(CmpInst &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ // First try to handle simplified comparisons.
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+
+ if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
+ auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
+ if (SimplifiedLHS != SimplifiedAddresses.end()) {
+ auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
+ if (SimplifiedRHS != SimplifiedAddresses.end()) {
+ SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
+ SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
+ if (LHSAddr.Base == RHSAddr.Base) {
+ LHS = LHSAddr.Offset;
+ RHS = RHSAddr.Offset;
+ }
+ }
+ }
+ }
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+ }
+ }
+
+ return Base::visitCmpInst(I);
+ }
};
} // namespace
@@ -443,11 +497,11 @@ private:
namespace {
struct EstimatedUnrollCost {
/// \brief The estimated cost after unrolling.
- unsigned UnrolledCost;
+ int UnrolledCost;
/// \brief The estimated dynamic cost of executing the instructions in the
/// rolled form.
- unsigned RolledDynamicCost;
+ int RolledDynamicCost;
};
}
@@ -464,10 +518,10 @@ struct EstimatedUnrollCost {
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
-Optional<EstimatedUnrollCost>
-analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
- const TargetTransformInfo &TTI,
- unsigned MaxUnrolledLoopSize) {
+static Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ int MaxUnrolledLoopSize) {
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -481,24 +535,61 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
SmallSetVector<BasicBlock *, 16> BBWorklist;
DenseMap<Value *, Constant *> SimplifiedValues;
+ SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
// The estimated cost of the unrolled form of the loop. We try to estimate
// this by simplifying as much as we can while computing the estimate.
- unsigned UnrolledCost = 0;
+ int UnrolledCost = 0;
// We also track the estimated dynamic (that is, actually executed) cost in
// the rolled form. This helps identify cases when the savings from unrolling
// aren't just exposing dead control flows, but actual reduced dynamic
// instructions due to the simplifications which we expect to occur after
// unrolling.
- unsigned RolledDynamicCost = 0;
+ int RolledDynamicCost = 0;
+
+ // Ensure that we don't violate the loop structure invariants relied on by
+ // this analysis.
+ assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
+ assert(L->isLCSSAForm(DT) &&
+ "Must have loops in LCSSA form to track live-out values.");
+
+ DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
// Since the same load will take different values on different iterations,
// we literally have to go through all loop's iterations.
for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+ DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
+
+ // Prepare for the iteration by collecting any simplified entry or backedge
+ // inputs.
+ for (Instruction &I : *L->getHeader()) {
+ auto *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+
+ // The loop header PHI nodes must have exactly two input: one from the
+ // loop preheader and one from the loop latch.
+ assert(
+ PHI->getNumIncomingValues() == 2 &&
+ "Must have an incoming value only for the preheader and the latch.");
+
+ Value *V = PHI->getIncomingValueForBlock(
+ Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
+ Constant *C = dyn_cast<Constant>(V);
+ if (Iteration != 0 && !C)
+ C = SimplifiedValues.lookup(V);
+ if (C)
+ SimplifiedInputValues.push_back({PHI, C});
+ }
+
+ // Now clear and re-populate the map for the next iteration.
SimplifiedValues.clear();
- UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);
+ while (!SimplifiedInputValues.empty())
+ SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
+
+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);
BBWorklist.clear();
BBWorklist.insert(L->getHeader());
@@ -510,21 +601,67 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
- unsigned InstCost = TTI.getUserCost(&I);
+ int InstCost = TTI.getUserCost(&I);
// Visit the instruction to analyze its loop cost after unrolling,
// and if the visitor returns false, include this instruction in the
// unrolled cost.
if (!Analyzer.visit(I))
UnrolledCost += InstCost;
+ else {
+ DEBUG(dbgs() << " " << I
+ << " would be simplified if loop is unrolled.\n");
+ (void)0;
+ }
// Also track this instructions expected cost when executing the rolled
// loop form.
RolledDynamicCost += InstCost;
// If unrolled body turns out to be too big, bail out.
- if (UnrolledCost > MaxUnrolledLoopSize)
+ if (UnrolledCost > MaxUnrolledLoopSize) {
+ DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost
+ << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
+ << "\n");
return None;
+ }
+ }
+
+ TerminatorInst *TI = BB->getTerminator();
+
+ // Add in the live successors by first checking whether we have terminator
+ // that may be simplified based on the values simplified by this call.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional()) {
+ if (Constant *SimpleCond =
+ SimplifiedValues.lookup(BI->getCondition())) {
+ BasicBlock *Succ = nullptr;
+ // Just take the first successor if condition is undef
+ if (isa<UndefValue>(SimpleCond))
+ Succ = BI->getSuccessor(0);
+ else
+ Succ = BI->getSuccessor(
+ cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ continue;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (Constant *SimpleCond =
+ SimplifiedValues.lookup(SI->getCondition())) {
+ BasicBlock *Succ = nullptr;
+ // Just take the first successor if condition is undef
+ if (isa<UndefValue>(SimpleCond))
+ Succ = SI->getSuccessor(0);
+ else
+ Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
+ .getCaseSuccessor();
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ continue;
+ }
}
// Add BB's successors to the worklist.
@@ -535,9 +672,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// If we found no optimization opportunities on the first iteration, we
// won't find them on later ones too.
- if (UnrolledCost == RolledDynamicCost)
+ if (UnrolledCost == RolledDynamicCost) {
+ DEBUG(dbgs() << " No opportunities found.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost << "\n");
return None;
+ }
}
+ DEBUG(dbgs() << "Analysis finished:\n"
+ << "UnrolledCost: " << UnrolledCost << ", "
+ << "RolledDynamicCost: " << RolledDynamicCost << "\n");
return {{UnrolledCost, RolledDynamicCost}};
}
@@ -583,6 +726,12 @@ static bool HasUnrollFullPragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
}
+// Returns true if the loop has an unroll(enable) pragma. This metadata is used
+// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
+static bool HasUnrollEnablePragma(const Loop *L) {
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
+}
+
// Returns true if the loop has an unroll(disable) pragma.
static bool HasUnrollDisablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
@@ -708,7 +857,7 @@ unsigned LoopUnroll::selectUnrollCount(
unsigned Count = UserCount ? CurrentCount : 0;
// If there is no user-specified count, unroll pragmas have the next
- // highest precendence.
+ // highest precedence.
if (Count == 0) {
if (PragmaCount) {
Count = PragmaCount;
@@ -737,17 +886,19 @@ unsigned LoopUnroll::selectUnrollCount(
return Count;
}
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
Function &F = *L->getHeader()->getParent();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -757,8 +908,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
+ bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
unsigned PragmaCount = UnrollCountPragmaValue(L);
- bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
+ bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
TargetTransformInfo::UnrollingPreferences UP;
getUnrollingPreferences(L, TTI, UP);
@@ -806,7 +958,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
unsigned Threshold, PartialThreshold;
unsigned PercentDynamicCostSavedThreshold;
unsigned DynamicCostSavingsDiscount;
- selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
+ // Only use the high pragma threshold when we have a target unroll factor such
+ // as with "#pragma unroll N" or a pragma indicating full unrolling and the
+ // trip count is known. Otherwise we rely on the standard threshold to
+ // heuristically select a reasonable unroll count.
+ bool UsePragmaThreshold =
+ PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
+
+ selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
PercentDynamicCostSavedThreshold,
DynamicCostSavingsDiscount);
@@ -824,8 +984,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
+ if (Optional<EstimatedUnrollCost> Cost =
+ analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
+ Threshold + DynamicCostSavingsDiscount))
if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
DynamicCostSavingsDiscount, Cost->UnrolledCost,
Cost->RolledDynamicCost)) {
@@ -840,14 +1001,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime =
- (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
+ (UserRuntime ? CurrentRuntime : UP.Runtime);
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ bool AllowPartial = PragmaEnableUnroll ||
+ (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
@@ -887,23 +1049,27 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
DebugLoc LoopLoc = L->getStartLoc();
Function *F = Header->getParent();
LLVMContext &Ctx = F->getContext();
- if (PragmaFullUnroll && PragmaCount == 0) {
- if (TripCount && Count != TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because unrolled size is too large.");
- } else if (!TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
- }
- } else if (PragmaCount > 0 && Count != OriginalCount) {
+ if ((PragmaCount > 0) && Count != OriginalCount) {
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
"Unable to unroll loop the number of times directed by "
"unroll_count pragma because unrolled size is too large.");
+ } else if (PragmaFullUnroll && !TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
+ "because loop has a runtime trip count.");
+ } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop as directed by unroll(enable) pragma because "
+ "unrolled size is too large.");
+ } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ Count != TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll pragma because "
+ "unrolled size is too large.");
}
}
@@ -915,7 +1081,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Unroll the loop.
if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
- TripMultiple, LI, this, &LPM, &AC))
+ TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
return false;
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index cbc563b..95d7f8a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -37,6 +38,10 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -70,6 +75,19 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Coldness threshold in percentage. The loop header frequency "
+ "(relative to the entry frequency) is compared with this "
+ "threshold to determine if non-trivial unswitching should be "
+ "enabled."));
+
namespace {
class LUAnalysisCache {
@@ -148,12 +166,19 @@ namespace {
LPPassManager *LPM;
AssumptionCache *AC;
- // LoopProcessWorklist - Used to check if second loop needs processing
- // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ // Used to check if second loop needs processing after
+ // RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
LUAnalysisCache BranchesInfo;
+ bool EnabledPGO;
+
+ // BFI and ColdEntryFreq are only used when PGO and
+ // LoopUnswitchWithBlockFrequency are enabled.
+ BlockFrequencyInfo BFI;
+ BlockFrequency ColdEntryFreq;
+
bool OptimizeForSize;
bool redoLoop;
@@ -192,9 +217,11 @@ namespace {
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
@@ -210,7 +237,10 @@ namespace {
/// Split all of the edges from inside the loop to their exit blocks.
/// Update the appropriate Phi nodes as we do so.
- void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+ void SplitExitEdges(Loop *L,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+
+ bool TryTrivialLoopUnswitch(bool &Changed);
bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
TerminatorInst *TI = nullptr);
@@ -229,9 +259,6 @@ namespace {
TerminatorInst *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
- BasicBlock **LoopExit = nullptr);
-
};
}
@@ -367,9 +394,8 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
}
-/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
-/// invariant in the loop, or has an invariant piece, return the invariant.
-/// Otherwise, return null.
+/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
+/// an invariant piece, return the invariant. Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We started analyze new instruction, increment scanned instructions counter.
@@ -411,11 +437,23 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
*L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPM_Ref;
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
+
+ EnabledPGO = F->getEntryCount().hasValue();
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ BranchProbabilityInfo BPI(*F, *LI);
+ BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+ // Use BranchProbability to compute a minimum frequency based on
+ // function entry baseline frequency. Loops with headers below this
+ // frequency are considered as cold.
+ const BranchProbability ColdProb(ColdnessThreshold, 100);
+ ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+ }
+
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
@@ -423,16 +461,13 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
Changed |= processCurrentLoop();
} while(redoLoop);
- if (Changed) {
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- if (DT)
- DT->recalculate(*F);
- }
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (Changed)
+ DT->recalculate(*F);
return Changed;
}
-/// processCurrentLoop - Do actual work and unswitch loop if possible
-/// and profitable.
+/// Do actual work and unswitch loop if possible and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
@@ -452,14 +487,48 @@ bool LoopUnswitch::processCurrentLoop() {
LLVMContext &Context = loopHeader->getContext();
- // Probably we reach the quota of branches for this loop. If so
- // stop unswitching.
+ // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
if (!BranchesInfo.countLoop(
currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*currentLoop->getHeader()->getParent()),
AC))
return false;
+ // Try trivial unswitch first before loop over other basic blocks in the loop.
+ if (TryTrivialLoopUnswitch(Changed)) {
+ return true;
+ }
+
+ // Do not unswitch loops containing convergent operations, as we might be
+ // making them control dependent on the unswitch value when they were not
+ // before.
+ // FIXME: This could be refined to only bail if the convergent operation is
+ // not already control-dependent on the unswitch value.
+ for (const auto BB : currentLoop->blocks()) {
+ for (auto &I : *BB) {
+ auto CS = CallSite(&I);
+ if (!CS) continue;
+ if (CS.hasFnAttr(Attribute::Convergent))
+ return false;
+ }
+ }
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ // FIXME: Use Function::optForSize().
+ if (OptimizeForSize ||
+ loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ // Compute the weighted frequency of the hottest block in the
+ // loop (loopHeader in this case since inner loops should be
+ // processed before outer loop). If it is less than ColdFrequency,
+ // we should not unswitch.
+ BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+ if (LoopEntryFreq < ColdEntryFreq)
+ return false;
+ }
+
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
@@ -528,8 +597,8 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
-/// loop with no side effects (including infinite loops).
+/// Check to see if all paths from BB exit the loop with no side effects
+/// (including infinite loops).
///
/// If true, we return true and set ExitBB to the block we
/// exit through.
@@ -566,9 +635,9 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
return true;
}
-/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the
-/// process. If so, return the block that is exited to, otherwise return null.
+/// Return true if the specified block unconditionally leads to an exit from
+/// the specified loop, and has no side-effects in the process. If so, return
+/// the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
@@ -578,105 +647,11 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
return nullptr;
}
-/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
-/// trivial: that is, that the condition controls whether or not the loop does
-/// anything at all. If this is a trivial condition, unswitching produces no
-/// code duplications (equivalently, it produces a simpler loop and a new empty
-/// loop, which gets deleted).
-///
-/// If this is a trivial condition, return true, otherwise return false. When
-/// returning true, this sets Cond and Val to the condition that controls the
-/// trivial condition: when Cond dynamically equals Val, the loop is known to
-/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
-/// Cond == Val.
-///
-bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
- BasicBlock **LoopExit) {
- BasicBlock *Header = currentLoop->getHeader();
- TerminatorInst *HeaderTerm = Header->getTerminator();
- LLVMContext &Context = Header->getContext();
-
- BasicBlock *LoopExitBB = nullptr;
- if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
- // If the header block doesn't end with a conditional branch on Cond, we
- // can't handle it.
- if (!BI->isConditional() || BI->getCondition() != Cond)
- return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- BI->getSuccessor(0)))) {
- if (Val) *Val = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- BI->getSuccessor(1)))) {
- if (Val) *Val = ConstantInt::getFalse(Context);
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
- // If this isn't a switch on Cond, we can't handle it.
- if (SI->getCondition() != Cond) return false;
-
- // Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- // Note that we can't trivially unswitch on the default case or
- // on already unswitched cases.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
- i.getCaseSuccessor()))) {
- // Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = i.getCaseValue();
-
- // Check that it was not unswitched before, since already unswitched
- // trivial vals are looks trivial too.
- if (BranchesInfo.isUnswitched(SI, CaseVal))
- continue;
- LoopExitBB = LoopExitCandidate;
- if (Val) *Val = CaseVal;
- break;
- }
- }
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit block
- // contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- if (LoopExit) *LoopExit = LoopExitBB;
-
- // We already know that nothing uses any scalar values defined inside of this
- // loop. As such, we just have to check to see if this loop will execute any
- // side-effecting instructions (e.g. stores, calls, volatile loads) in the
- // part of the loop that the code *would* execute. We already checked the
- // tail, check the header now.
- for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
- if (I->mayHaveSideEffects())
- return false;
- return true;
-}
-
-/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
-/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// We have found that we can unswitch currentLoop when LoopCond == Val to
+/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
TerminatorInst *TI) {
- Function *F = loopHeader->getParent();
- Constant *CondVal = nullptr;
- BasicBlock *ExitBlock = nullptr;
-
- if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
- // If the condition is trivial, always unswitch. There is no code growth
- // for this case.
- UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
- return true;
- }
-
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.CostAllowsUnswitching()) {
DEBUG(dbgs() << "NOT unswitching loop %"
@@ -687,32 +662,27 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
- // Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
- return false;
-
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
}
-/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
- Loop *New = new Loop();
- LPM->insertLoop(New, PL);
+ Loop &New = LPM->addLoop(PL);
// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
if (LI->getLoopFor(*I) == L)
- New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
// Add all of the subloops to the new loop.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- CloneLoop(*I, New, VM, LI, LPM);
+ CloneLoop(*I, &New, VM, LI, LPM);
- return New;
+ return &New;
}
static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
@@ -744,15 +714,15 @@ static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
}
}
// fallthrough.
+ case LLVMContext::MD_make_implicit:
case LLVMContext::MD_dbg:
DstInst->setMetadata(MD.first, MD.second);
}
}
}
-/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
-/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
-/// code immediately before InsertPt.
+/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
+/// otherwise branch to FalseDest. Insert the code immediately before InsertPt.
void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
@@ -782,11 +752,11 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
SplitCriticalEdge(BI, 1, Options);
}
-/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
-/// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no
-/// side-effects), unswitch it. This doesn't involve any code duplication, just
-/// moving the conditional branch outside of the loop and updating loop info.
+/// Given a loop that has a trivial unswitchable condition in it (a cond branch
+/// from its header block to its latch block, where the path through the loop
+/// that doesn't execute its body has no side-effects), unswitch it. This
+/// doesn't involve any code duplication, just moving the conditional branch
+/// outside of the loop and updating loop info.
void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock,
TerminatorInst *TI) {
@@ -810,7 +780,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
+ BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
@@ -829,8 +799,155 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
++NumTrivial;
}
-/// SplitExitEdges - Split all of the edges from inside the loop to their exit
-/// blocks. Update the appropriate Phi nodes as we do so.
+/// Check if the first non-constant condition starting from the loop header is
+/// a trivial unswitch condition: that is, a condition controls whether or not
+/// the loop does anything at all. If it is a trivial condition, unswitching
+/// produces no code duplications (equivalently, it produces a simpler loop and
+/// a new empty loop, which gets deleted). Therefore always unswitch trivial
+/// condition.
+bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
+ BasicBlock *CurrentBB = currentLoop->getHeader();
+ TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+ LLVMContext &Context = CurrentBB->getContext();
+
+ // If loop header has only one reachable successor (currently via an
+ // unconditional branch or constant foldable conditional branch, but
+ // should also consider adding constant foldable switch instruction in
+ // future), we should keep looking for trivial condition candidates in
+ // the successor as well. An alternative is to constant fold conditions
+ // and merge successors into loop header (then we only need to check header's
+ // terminator). The reason for not doing this in LoopUnswitch pass is that
+ // it could potentially break LoopPassManager's invariants. Folding dead
+ // branches could either eliminate the current loop or make other loops
+ // unreachable. LCSSA form might also not be preserved after deleting
+ // branches. The following code keeps traversing loop header's successors
+ // until it finds the trivial condition candidate (condition that is not a
+ // constant). Since unswitching generates branches with constant conditions,
+ // this scenario could be very common in practice.
+ SmallSet<BasicBlock*, 8> Visited;
+
+ while (true) {
+ // If we exit loop or reach a previous visited block, then
+ // we can not reach any trivial condition candidates (unfoldable
+ // branch instructions or switch instructions) and no unswitch
+ // can happen. Exit and return false.
+ if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
+ return false;
+
+ // Check if this loop will execute any side-effecting instructions (e.g.
+ // stores, calls, volatile loads) in the part of the loop that the code
+ // *would* execute. Check the header first.
+ for (Instruction &I : *CurrentBB)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ // FIXME: add check for constant foldable switch instructions.
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+ if (BI->isUnconditional()) {
+ CurrentBB = BI->getSuccessor(0);
+ } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
+ CurrentBB = BI->getSuccessor(0);
+ } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
+ CurrentBB = BI->getSuccessor(1);
+ } else {
+ // Found a trivial condition candidate: non-foldable conditional branch.
+ break;
+ }
+ } else {
+ break;
+ }
+
+ CurrentTerm = CurrentBB->getTerminator();
+ }
+
+ // CondVal is the condition that controls the trivial condition.
+ // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
+ Constant *CondVal = nullptr;
+ BasicBlock *LoopExitBB = nullptr;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+ // If this isn't branching on an invariant condition, we can't unswitch it.
+ if (!BI->isConditional())
+ return false;
+
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+
+ // Unswitch only if the trivial condition itself is an LIV (not
+ // partial LIV which could occur in and/or)
+ if (!LoopCond || LoopCond != BI->getCondition())
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes
+ // it to do this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ CondVal = ConstantInt::getTrue(Context);
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ CondVal = ConstantInt::getFalse(Context);
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit
+ // block contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+ CurrentTerm);
+ ++NumBranches;
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // If this isn't switching on an invariant condition, we can't unswitch it.
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+
+ // Unswitch only if the trivial condition itself is an LIV (not
+ // partial LIV which could occur in and/or)
+ if (!LoopCond || LoopCond != SI->getCondition())
+ return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+ i.getCaseSuccessor()))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ ConstantInt *CaseVal = i.getCaseValue();
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (BranchesInfo.isUnswitched(SI, CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ CondVal = CaseVal;
+ break;
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit
+ // block contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+ nullptr);
+ ++NumSwitches;
+ return true;
+ }
+ return false;
+}
+
+/// Split all of the edges from inside the loop to their exit blocks.
+/// Update the appropriate Phi nodes as we do so.
void LoopUnswitch::SplitExitEdges(Loop *L,
const SmallVectorImpl<BasicBlock *> &ExitBlocks){
@@ -841,15 +958,14 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa",
- /*AliasAnalysis*/ nullptr, DT, LI,
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI,
/*PreserveLCSSA*/ true);
}
}
-/// UnswitchNontrivialCondition - We determined that the loop is profitable
-/// to unswitch when LIC equal Val. Split it into loop versions and test the
-/// condition outside of either loop. Return the loops created as Out1/Out2.
+/// We determined that the loop is profitable to unswitch when LIC equal Val.
+/// Split it into loop versions and test the condition outside of either loop.
+/// Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
Loop *L, TerminatorInst *TI) {
Function *F = loopHeader->getParent();
@@ -858,8 +974,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n");
- if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
- SE->forgetLoop(L);
+ if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
+ SEWP->getSE().forgetLoop(L);
LoopBlocks.clear();
NewBlocks.clear();
@@ -901,8 +1017,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// Splice the newly inserted blocks into the function right before the
// original preheader.
- F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
- NewBlocks[0], F->end());
+ F->getBasicBlockList().splice(NewPreheader->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -944,7 +1061,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
- ExitSucc->getFirstInsertionPt());
+ &*ExitSucc->getFirstInsertionPt());
for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
I != E; ++I) {
@@ -960,7 +1077,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ RemapInstruction(&*I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -994,8 +1112,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
-/// RemoveFromWorklist - Remove all instances of I from the worklist vector
-/// specified.
+/// Remove all instances of I from the worklist vector specified.
static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
@@ -1003,7 +1120,7 @@ static void RemoveFromWorklist(Instruction *I,
Worklist.end());
}
-/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
@@ -1025,9 +1142,9 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
-// the value specified by Val in the specified loop, or we know it does NOT have
-// that value. Rewrite any uses of LIC or of properties correlated to it.
+/// We know either that the value LIC has the value specified by Val in the
+/// specified loop, or we know it does NOT have that value.
+/// Rewrite any uses of LIC or of properties correlated to it.
void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val,
bool IsEqual) {
@@ -1138,18 +1255,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// domtree here -- instead we force it to do a full recomputation
// after the pass is complete -- but we do need to inform it of
// new blocks.
- if (DT)
- DT->addNewBlock(Abort, NewSISucc);
+ DT->addNewBlock(Abort, NewSISucc);
}
SimplifyCode(Worklist, L);
}
-/// SimplifyCode - Okay, now that we have simplified some instructions in the
-/// loop, walk over it and constant prop, dce, and fold control flow where
-/// possible. Note that this is effectively a very simple loop-structure-aware
-/// optimizer. During processing of this loop, L could very well be deleted, so
-/// it must not be used.
+/// Now that we have simplified some instructions in the loop, walk over it and
+/// constant prop, dce, and fold control flow where possible. Note that this is
+/// effectively a very simple loop-structure-aware optimizer. During processing
+/// of this loop, L could very well be deleted, so it must not be used.
///
/// FIXME: When the loop optimizer is more mature, separate this out to a new
/// pass.
@@ -1207,8 +1322,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Succ->replaceAllUsesWith(Pred);
// Move all of the successor contents from Succ to Pred.
- Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
- Succ->end());
+ Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
+ Succ->begin(), Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 3314e1e..41511bc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
#define DEBUG_TYPE "loweratomic"
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
- IRBuilder<> Builder(CXI->getParent(), CXI);
+ IRBuilder<> Builder(CXI);
Value *Ptr = CXI->getPointerOperand();
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
@@ -41,7 +41,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
}
static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
- IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ IRBuilder<> Builder(RMWI);
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
@@ -120,7 +120,7 @@ namespace {
return false;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
- Instruction *Inst = DI++;
+ Instruction *Inst = &*DI++;
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
Changed |= LowerFenceInst(FI);
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0c47cbd..2ace902 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -139,7 +139,7 @@ static bool lowerExpectIntrinsic(Function &F) {
ExpectIntrinsicsHandled++;
}
- // remove llvm.expect intrinsics.
+ // Remove llvm.expect intrinsics.
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
CallInst *CI = dyn_cast<CallInst>(BI++);
if (!CI)
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 85012af..0333bf2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -30,7 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <list>
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
@@ -71,9 +72,9 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
return Offset;
}
-/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
-/// constant offset, and return that constant offset. For example, Ptr1 might
-/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
+/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
+/// might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
const DataLayout &DL) {
Ptr1 = Ptr1->stripPointerCasts();
@@ -125,7 +126,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
}
-/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
/// store 0 -> P+1
/// store 0 -> P+0
@@ -164,8 +165,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If any of the stores are a memset, then it is always good to extend the
// memset.
- for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
- if (!isa<StoreInst>(TheStores[i]))
+ for (Instruction *SI : TheStores)
+ if (!isa<StoreInst>(SI))
return true;
// Assume that the code generator is capable of merging pairs of stores
@@ -189,7 +190,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
+ unsigned NumByteStores = Bytes % MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
@@ -200,15 +201,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
namespace {
class MemsetRanges {
- /// Ranges - A sorted list of the memset ranges. We use std::list here
- /// because each element is relatively large and expensive to copy.
- std::list<MemsetRange> Ranges;
- typedef std::list<MemsetRange>::iterator range_iterator;
+ /// A sorted list of the memset ranges.
+ SmallVector<MemsetRange, 8> Ranges;
+ typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
const DataLayout &DL;
public:
MemsetRanges(const DataLayout &DL) : DL(DL) {}
- typedef std::list<MemsetRange>::const_iterator const_iterator;
+ typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
@@ -240,26 +240,20 @@ public:
} // end anon namespace
-/// addRange - Add a new store to the MemsetRanges data structure. This adds a
+/// Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
-///
-/// Do a linear search of the ranges to see if this can be joined and/or to
-/// find the insertion point in the list. We keep the ranges sorted for
-/// simplicity here. This is a linear search of a linked list, which is ugly,
-/// however the number of ranges is limited, so this won't get crazy slow.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
- range_iterator I = Ranges.begin(), E = Ranges.end();
- while (I != E && Start > I->End)
- ++I;
+ range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
+ [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
// to insert a new range. Handle this now.
- if (I == E || End < I->Start) {
+ if (I == Ranges.end() || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
@@ -295,7 +289,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
if (End > I->End) {
I->End = End;
range_iterator NextI = I;
- while (++NextI != E && End >= NextI->Start) {
+ while (++NextI != Ranges.end() && End >= NextI->Start) {
// Merge the range in.
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
if (NextI->End > I->End)
@@ -331,9 +325,9 @@ namespace {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -357,7 +351,7 @@ namespace {
char MemCpyOpt::ID = 0;
}
-// createMemCpyOptPass - The public interface to this file...
+/// The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
@@ -366,14 +360,15 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-/// tryMergingIntoMemset - When scanning forward over instructions, we look for
-/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consecutive ones, it
-/// attempts to merge them together into a memcpy/memset.
+/// When scanning forward over instructions, we look for some other patterns to
+/// fold away. In particular, this looks for stores to neighboring locations of
+/// memory. If it sees enough consecutive ones, it attempts to merge them
+/// together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
@@ -384,7 +379,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// are stored.
MemsetRanges Ranges(DL);
- BasicBlock::iterator BI = StartInst;
+ BasicBlock::iterator BI(StartInst);
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
@@ -439,14 +434,12 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we create any memsets, we put it right before the first instruction that
// isn't part of the memset block. This ensure that the memset is dominated
// by any addressing instruction needed by the start of the block.
- IRBuilder<> Builder(BI);
+ IRBuilder<> Builder(&*BI);
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
Instruction *AMemSet = nullptr;
- for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
- I != E; ++I) {
- const MemsetRange &Range = *I;
+ for (const MemsetRange &Range : Ranges) {
if (Range.TheStores.size() == 1) continue;
@@ -470,19 +463,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
DEBUG(dbgs() << "Replace stores:\n";
- for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i] << '\n';
+ for (Instruction *SI : Range.TheStores)
+ dbgs() << *SI << '\n';
dbgs() << "With: " << *AMemSet << '\n');
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
- for (SmallVectorImpl<Instruction *>::const_iterator
- SI = Range.TheStores.begin(),
- SE = Range.TheStores.end(); SI != SE; ++SI) {
- MD->removeInstruction(*SI);
- (*SI)->eraseFromParent();
+ for (Instruction *SI : Range.TheStores) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
}
++NumMemSetInfer;
}
@@ -493,6 +484,16 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
+
+ // Avoid merging nontemporal stores since the resulting
+ // memcpy/memset would not be able to preserve the nontemporal hint.
+ // In theory we could teach how to propagate the !nontemporal metadata to
+ // memset calls. However, that change would force the backend to
+ // conservatively expand !nontemporal memset calls back to sequences of
+ // store instructions (effectively undoing the merging).
+ if (SI->getMetadata(LLVMContext::MD_nontemporal))
+ return false;
+
const DataLayout &DL = SI->getModule()->getDataLayout();
// Detect cases where we're performing call slot forwarding, but
@@ -509,11 +510,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
- E = C; I != E; --I) {
- if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+ for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
+ I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
C = nullptr;
break;
}
@@ -554,7 +555,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
- BBI = I; // Don't invalidate iterator.
+ BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
@@ -567,14 +568,14 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
MSI->getValue())) {
- BBI = I; // Don't invalidate iterator.
+ BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
return false;
}
-/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
@@ -710,12 +711,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
// If necessary, perform additional analysis.
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -749,11 +750,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- };
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_invariant_group};
combineMetadata(C, cpy, KnownIDs);
// Remove the memcpy.
@@ -763,10 +762,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return true;
}
-/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
-/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
-/// copy from MDep's input if we can.
-///
+/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
+/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
@@ -788,7 +785,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
@@ -802,8 +799,9 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false, M, M->getParent());
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -860,8 +858,9 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return false;
// Check that there are no other dependencies on the memset destination.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
+ MemDepResult DstDepInfo =
+ MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
+ MemCpy->getIterator(), MemCpy->getParent());
if (DstDepInfo.getInst() != MemSet)
return false;
@@ -936,7 +935,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
@@ -998,8 +997,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
}
MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
- M, M->getParent());
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+ SrcLoc, true, M->getIterator(), M->getParent());
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
@@ -1037,10 +1036,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return false;
}
-/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
-/// are guaranteed not to alias.
+/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
+/// not to alias.
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
if (!TLI->has(LibFunc::memmove))
return false;
@@ -1053,12 +1052,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
// If not, then we know we can transform this.
- Module *Mod = M->getParent()->getParent()->getParent();
Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
- ArgTys));
+ M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
+ Intrinsic::memcpy, ArgTys));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
@@ -1068,7 +1066,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
return true;
}
-/// processByValArgument - This is called on every byval argument in call sites.
+/// This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
// Find out what feeds this byval argument.
@@ -1076,8 +1074,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
- CS.getInstruction()->getParent());
+ MemoryLocation(ByValArg, ByValSize), true,
+ CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
if (!DepInfo.isClobber())
return false;
@@ -1119,9 +1117,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- CS.getInstruction(), MDep->getParent());
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false,
+ CS.getInstruction()->getIterator(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1140,7 +1138,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
return true;
}
-/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+/// Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
@@ -1148,7 +1146,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
bool RepeatInstruction = false;
@@ -1177,9 +1175,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
return MadeChange;
}
-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
-// function.
-//
+/// This is the main transformation entry point for a function.
bool MemCpyOpt::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 643f374..c812d61 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -78,6 +78,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -91,6 +92,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mldst-motion"
@@ -106,7 +108,7 @@ class MergedLoadStoreMotion : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit MergedLoadStoreMotion(void)
+ MergedLoadStoreMotion()
: FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
}
@@ -116,10 +118,11 @@ public:
private:
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
- AU.addPreserved<AliasAnalysis>();
}
// Helper routines
@@ -156,7 +159,7 @@ private:
};
char MergedLoadStoreMotion::ID = 0;
-}
+} // anonymous namespace
///
/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
@@ -169,7 +172,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
@@ -236,12 +240,11 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
/// being loaded or protect against the load from happening
/// it is considered a hoist barrier.
///
-
bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
const Instruction& End,
LoadInst* LI) {
MemoryLocation Loc = MemoryLocation::get(LI);
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);
+ return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
}
///
@@ -256,7 +259,7 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
++BBI) {
- Instruction *Inst = BBI;
+ Instruction *Inst = &*BBI;
// Only merge and hoist loads when their result in used only in BB
if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
@@ -293,7 +296,7 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
// Intersect optional metadata.
HoistCand->intersectOptionalDataWith(ElseInst);
- HoistCand->dropUnknownMetadata();
+ HoistCand->dropUnknownNonDebugMetadata();
// Prepend point for instruction insert
Instruction *HoistPt = BB->getTerminator();
@@ -363,8 +366,7 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
int NLoads = 0;
for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
BBI != BBE;) {
-
- Instruction *I = BBI;
+ Instruction *I = &*BBI;
++BBI;
// Only move non-simple (atomic, volatile) loads.
@@ -394,11 +396,10 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
/// value being stored or protect against the store from
/// happening it is considered a sink barrier.
///
-
bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
const Instruction &End,
MemoryLocation Loc) {
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
+ return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
}
///
@@ -438,23 +439,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
StoreInst *S1) {
// Create a phi if the values mismatch.
- PHINode *NewPN = 0;
+ PHINode *NewPN = nullptr;
Value *Opd1 = S0->getValueOperand();
Value *Opd2 = S1->getValueOperand();
if (Opd1 != Opd2) {
NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
- BB->begin());
+ &BB->front());
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
- if (NewPN->getType()->getScalarType()->isPointerTy()) {
- // AA needs to be informed when a PHI-use of the pointer value is added
- for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
- unsigned J = PHINode::getOperandNumForIncomingValue(I);
- AA->addEscapingUse(NewPN->getOperandUse(J));
- }
- if (MD)
- MD->invalidateCachedPointerInfo(NewPN);
- }
+ if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(NewPN);
}
return NewPN;
}
@@ -479,12 +473,12 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
// Intersect optional metadata.
S0->intersectOptionalDataWith(S1);
- S0->dropUnknownMetadata();
+ S0->dropUnknownNonDebugMetadata();
// Create the new store to be inserted at the join point.
StoreInst *SNew = (StoreInst *)(S0->clone());
Instruction *ANew = A0->clone();
- SNew->insertBefore(InsertPt);
+ SNew->insertBefore(&*InsertPt);
ANew->insertBefore(SNew);
assert(S0->getParent() == A0->getParent());
@@ -566,12 +560,13 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
}
return MergedStores;
}
+
///
/// \brief Run the transformation for each function
///
bool MergedLoadStoreMotion::runOnFunction(Function &F) {
MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool Changed = false;
DEBUG(dbgs() << "Instruction Merger\n");
@@ -579,7 +574,7 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
- BasicBlock *BB = FI++;
+ BasicBlock *BB = &*FI++;
// Hoist equivalent loads and sink stores
// outside diamonds when possible
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index f42f830..c8f885e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -71,8 +71,8 @@
//
// Limitations and TODO items:
//
-// 1) We only considers n-ary adds for now. This should be extended and
-// generalized.
+// 1) We only considers n-ary adds and muls for now. This should be extended
+// and generalized.
//
//===----------------------------------------------------------------------===//
@@ -110,11 +110,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
@@ -145,12 +145,23 @@ private:
unsigned I, Value *LHS,
Value *RHS, Type *IndexedType);
- // Reassociate Add for better CSE.
- Instruction *tryReassociateAdd(BinaryOperator *I);
- // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
- Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
- // Rewrites I to LHS + RHS if LHS is computed already.
- Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+ // Reassociate binary operators for better CSE.
+ Instruction *tryReassociateBinaryOp(BinaryOperator *I);
+
+ // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
+ // passed.
+ Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
+ BinaryOperator *I);
+ // Rewrites I to (LHS op RHS) if LHS is computed already.
+ Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
+ BinaryOperator *I);
+
+ // Tries to match Op1 and Op2 by using V.
+ bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
+
+ // Gets SCEV for (LHS op RHS).
+ const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+ const SCEV *RHS);
// Returns the closest dominator of \c Dominatee that computes
// \c CandidateExpr. Returns null if not found.
@@ -161,11 +172,6 @@ private:
// GEP's pointer size, i.e., whether Index needs to be sign-extended in order
// to be an index of GEP.
bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
- // Returns whether V is known to be non-negative at context \c Ctxt.
- bool isKnownNonNegative(Value *V, Instruction *Ctxt);
- // Returns whether AO may sign overflow at context \c Ctxt. It computes a
- // conservative result -- it answers true when not sure.
- bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
AssumptionCache *AC;
const DataLayout *DL;
@@ -182,7 +188,7 @@ private:
// foo(a + b);
// if (p2)
// bar(a + b);
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+ DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
};
} // anonymous namespace
@@ -191,7 +197,7 @@ INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
@@ -207,7 +213,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -224,6 +230,7 @@ static bool isPotentiallyNaryReassociable(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::GetElementPtr:
+ case Instruction::Mul:
return true;
default:
return false;
@@ -239,19 +246,21 @@ bool NaryReassociate::doOneIteration(Function &F) {
Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
- if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
- const SCEV *OldSCEV = SE->getSCEV(I);
- if (Instruction *NewI = tryReassociate(I)) {
+ if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
+ const SCEV *OldSCEV = SE->getSCEV(&*I);
+ if (Instruction *NewI = tryReassociate(&*I)) {
Changed = true;
- SE->forgetValue(I);
+ SE->forgetValue(&*I);
I->replaceAllUsesWith(NewI);
- RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
- I = NewI;
+ // If SeenExprs constains I's WeakVH, that entry will be replaced with
+ // nullptr.
+ RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
+ I = NewI->getIterator();
}
// Add the rewritten instruction to SeenExprs; the original instruction
// is deleted.
- const SCEV *NewSCEV = SE->getSCEV(I);
- SeenExprs[NewSCEV].push_back(I);
+ const SCEV *NewSCEV = SE->getSCEV(&*I);
+ SeenExprs[NewSCEV].push_back(WeakVH(&*I));
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
// weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -271,7 +280,7 @@ bool NaryReassociate::doOneIteration(Function &F) {
//
// This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
if (NewSCEV != OldSCEV)
- SeenExprs[OldSCEV].push_back(I);
+ SeenExprs[OldSCEV].push_back(WeakVH(&*I));
}
}
}
@@ -281,7 +290,8 @@ bool NaryReassociate::doOneIteration(Function &F) {
Instruction *NaryReassociate::tryReassociate(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Add:
- return tryReassociateAdd(cast<BinaryOperator>(I));
+ case Instruction::Mul:
+ return tryReassociateBinaryOp(cast<BinaryOperator>(I));
case Instruction::GetElementPtr:
return tryReassociateGEP(cast<GetElementPtrInst>(I));
default:
@@ -352,27 +362,6 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
}
-bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
- bool NonNegative, Negative;
- // TODO: ComputeSignBits is expensive. Consider caching the results.
- ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
- return NonNegative;
-}
-
-bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
- if (AO->hasNoSignedWrap())
- return false;
-
- Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
- // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
- // TODO: handle the negative case as well.
- if (isKnownNonNegative(AO, Ctxt) &&
- (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
- return false;
-
- return true;
-}
-
GetElementPtrInst *
NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
Type *IndexedType) {
@@ -381,7 +370,7 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
IndexToSplit = SExt->getOperand(0);
} else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
// zext can be treated as sext if the source is non-negative.
- if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+ if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
IndexToSplit = ZExt->getOperand(0);
}
@@ -389,8 +378,11 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
// If the I-th index needs sext and the underlying add is not equipped with
// nsw, we cannot split the add because
// sext(LHS + RHS) != sext(LHS) + sext(RHS).
- if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
+ if (requiresSignExtension(IndexToSplit, GEP) &&
+ computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
+ OverflowResult::NeverOverflows)
return nullptr;
+
Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
// IndexToSplit = LHS + RHS.
if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
@@ -415,7 +407,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
IndexExprs.push_back(SE->getSCEV(*Index));
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
- if (isKnownNonNegative(LHS, GEP) &&
+ if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
DL->getTypeSizeInBits(LHS->getType()) <
DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
// Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
@@ -429,19 +421,20 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
IndexExprs, GEP->isInBounds());
- auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
+ Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
if (Candidate == nullptr)
return nullptr;
- PointerType *TypeOfCandidate = dyn_cast<PointerType>(Candidate->getType());
- // Pretty rare but theoretically possible when a numeric value happens to
- // share CandidateExpr.
- if (TypeOfCandidate == nullptr)
- return nullptr;
+ IRBuilder<> Builder(GEP);
+ // Candidate does not necessarily have the same pointer type as GEP. Use
+ // bitcast or pointer cast to make sure they have the same type, so that the
+ // later RAUW doesn't complain.
+ Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType());
+ assert(Candidate->getType() == GEP->getType());
// NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
- Type *ElementType = TypeOfCandidate->getElementType();
+ Type *ElementType = GEP->getType()->getElementType();
uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
// Another less rare case: because I is not necessarily the last index of the
// GEP, the size of the type at the I-th index (IndexedSize) is not
@@ -461,8 +454,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
return nullptr;
// NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
- IRBuilder<> Builder(GEP);
- Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate);
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
if (RHS->getType() != IntPtrTy)
RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
if (IndexedSize != ElementSize) {
@@ -476,54 +468,89 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
return NewGEP;
}
-Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
- if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+ if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
return NewI;
- if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+ if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
return NewI;
return nullptr;
}
-Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
- Instruction *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
+ BinaryOperator *I) {
Value *A = nullptr, *B = nullptr;
- // To be conservative, we reassociate I only when it is the only user of A+B.
- if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
- // I = (A + B) + RHS
- // = (A + RHS) + B or (B + RHS) + A
+ // To be conservative, we reassociate I only when it is the only user of (A op
+ // B).
+ if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
+ // I = (A op B) op RHS
+ // = (A op RHS) op B or (B op RHS) op A
const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
const SCEV *RHSExpr = SE->getSCEV(RHS);
if (BExpr != RHSExpr) {
- if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+ if (auto *NewI =
+ tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
return NewI;
}
if (AExpr != RHSExpr) {
- if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+ if (auto *NewI =
+ tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
return NewI;
}
}
return nullptr;
}
-Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
- Value *RHS, Instruction *I) {
- auto Pos = SeenExprs.find(LHSExpr);
- // Bail out if LHSExpr is not previously seen.
- if (Pos == SeenExprs.end())
- return nullptr;
-
+Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
+ Value *RHS,
+ BinaryOperator *I) {
// Look for the closest dominator LHS of I that computes LHSExpr, and replace
- // I with LHS + RHS.
+ // I with LHS op RHS.
auto *LHS = findClosestMatchingDominator(LHSExpr, I);
if (LHS == nullptr)
return nullptr;
- Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ Instruction *NewI = nullptr;
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ break;
+ case Instruction::Mul:
+ NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
+ break;
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
NewI->takeName(I);
return NewI;
}
+bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
+ Value *&Op2) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
+ case Instruction::Mul:
+ return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
+ return false;
+}
+
+const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+ const SCEV *RHS) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return SE->getAddExpr(LHS, RHS);
+ case Instruction::Mul:
+ return SE->getMulExpr(LHS, RHS);
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
+ return nullptr;
+}
+
Instruction *
NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
Instruction *Dominatee) {
@@ -537,9 +564,13 @@ NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
// future instruction either. Therefore, we pop it out of the stack. This
// optimization makes the algorithm O(n).
while (!Candidates.empty()) {
- Instruction *Candidate = Candidates.back();
- if (DT->dominates(Candidate, Dominatee))
- return Candidate;
+ // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+ // during rewriting.
+ if (Value *Candidate = Candidates.back()) {
+ Instruction *CandidateInstruction = cast<Instruction>(Candidate);
+ if (DT->dominates(CandidateInstruction, Dominatee))
+ return CandidateInstruction;
+ }
Candidates.pop_back();
}
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 31d7df3..9f26f78 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -154,7 +154,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
Phi->addIncoming(Call, &CurrBB);
Phi->addIncoming(LibCall, LibCallBB);
- BB = JoinBB;
+ BB = JoinBB->getIterator();
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 366301a..28c610c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -27,7 +27,7 @@
// well defined state for inspection by the collector. In the current
// implementation, this is done via the insertion of poll sites at method entry
// and the backedge of most loops. We try to avoid inserting more polls than
-// are neccessary to ensure a finite period between poll sites. This is not
+// are necessary to ensure a finite period between poll sites. This is not
// because the poll itself is expensive in the generated code; it's not. Polls
// do tend to impact the optimizer itself in negative ways; we'd like to avoid
// perturbing the optimization of the method as much as we can.
@@ -91,13 +91,15 @@ STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
using namespace llvm;
-// Ignore oppurtunities to avoid placing safepoints on backedges, useful for
+// Ignore opportunities to avoid placing safepoints on backedges, useful for
// validation
static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
cl::init(false));
-/// If true, do not place backedge safepoints in counted loops.
-static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
+/// How narrow does the trip count of a loop have to be to have to be considered
+/// "counted"? Counted loops do not get safepoints at backedges.
+static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width",
+ cl::Hidden, cl::init(32));
// If true, split the backedge of a loop when placing the safepoint, otherwise
// split the latch block itself. Both are useful to support for
@@ -121,7 +123,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
std::vector<TerminatorInst *> PollLocations;
/// True unless we're running spp-no-calls in which case we need to disable
- /// the call dependend placement opts.
+ /// the call-dependent placement opts.
bool CallSafepointsEnabled;
ScalarEvolution *SE = nullptr;
@@ -142,7 +144,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
@@ -153,7 +155,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
// We no longer modify the IR at all in this pass. Thus all
// analysis are preserved.
@@ -190,10 +192,8 @@ static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallSite> &ParsePointsNeeded /*rval*/);
-static bool isGCLeafFunction(const CallSite &CS);
-
static bool needsStatepoint(const CallSite &CS) {
- if (isGCLeafFunction(CS))
+ if (callsGCLeafFunction(CS))
return false;
if (CS.isCall()) {
CallInst *call = cast<CallInst>(CS.getInstruction());
@@ -206,7 +206,7 @@ static bool needsStatepoint(const CallSite &CS) {
return true;
}
-static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
+static Value *ReplaceWithStatepoint(const CallSite &CS);
/// Returns true if this loop is known to contain a call safepoint which
/// must unconditionally execute on any iteration of the loop which returns
@@ -220,7 +220,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
// For the moment, we look only for the 'cuts' that consist of a single call
// instruction in a block which is dominated by the Header and dominates the
// loop latch (Pred) block. Somewhat surprisingly, walking the entire chain
- // of such dominating blocks gets substaintially more occurences than just
+ // of such dominating blocks gets substantially more occurrences than just
// checking the Pred and Header blocks themselves. This may be due to the
// density of loop exit conditions caused by range and null checks.
// TODO: structure this as an analysis pass, cache the result for subloops,
@@ -255,18 +255,12 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
/// conservatism in the analysis.
static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
BasicBlock *Pred) {
- // Only used when SkipCounted is off
- const unsigned upperTripBound = 8192;
-
// A conservative bound on the loop as a whole.
const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
- if (MaxTrips != SE->getCouldNotCompute()) {
- if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
- return true;
- if (SkipCounted &&
- SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
- return true;
- }
+ if (MaxTrips != SE->getCouldNotCompute() &&
+ SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
+ CountedLoopTripWidth))
+ return true;
// If this is a conditional branch to the header with the alternate path
// being outside the loop, we can ask questions about the execution frequency
@@ -275,13 +269,10 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
// This returns an exact expression only. TODO: We really only need an
// upper bound here, but SE doesn't expose that.
const SCEV *MaxExec = SE->getExitCount(L, Pred);
- if (MaxExec != SE->getCouldNotCompute()) {
- if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
- return true;
- if (SkipCounted &&
- SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
+ if (MaxExec != SE->getCouldNotCompute() &&
+ SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
+ CountedLoopTripWidth))
return true;
- }
}
return /* not finite */ false;
@@ -432,14 +423,14 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
assert(hasNextInstruction(I) &&
"first check if there is a next instruction!");
if (I->isTerminator()) {
- return I->getParent()->getUniqueSuccessor()->begin();
+ return &I->getParent()->getUniqueSuccessor()->front();
} else {
- return std::next(BasicBlock::iterator(I));
+ return &*++I->getIterator();
}
};
Instruction *cursor = nullptr;
- for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
+ for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor);
cursor = nextInstruction(cursor)) {
// We need to ensure a safepoint poll occurs before any 'real' call. The
@@ -466,7 +457,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
static void findCallSafepoints(Function &F,
std::vector<CallSite> &Found /*rval*/) {
assert(Found.empty() && "must be empty!");
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
Instruction *inst = &I;
if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
CallSite CS(inst);
@@ -713,7 +704,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
Invoke->getParent());
}
- Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
+ Value *GCResult = ReplaceWithStatepoint(CS);
Results.push_back(GCResult);
}
assert(Results.size() == ParsePointNeeded.size());
@@ -747,7 +738,7 @@ FunctionPass *llvm::createPlaceSafepointsPass() {
INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
"place-backedge-safepoints-impl",
"Place Backedge Safepoints", false, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
@@ -759,31 +750,6 @@ INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
false, false)
-static bool isGCLeafFunction(const CallSite &CS) {
- Instruction *inst = CS.getInstruction();
- if (isa<IntrinsicInst>(inst)) {
- // Most LLVM intrinsics are things which can never take a safepoint.
- // As a result, we don't need to have the stack parsable at the
- // callsite. This is a highly useful optimization since intrinsic
- // calls are fairly prevelent, particularly in debug builds.
- return true;
- }
-
- // If this function is marked explicitly as a leaf call, we don't need to
- // place a safepoint of it. In fact, for correctness we *can't* in many
- // cases. Note: Indirect calls return Null for the called function,
- // these obviously aren't runtime functions with attributes
- // TODO: Support attributes on the call site as well.
- const Function *F = CS.getCalledFunction();
- bool isLeaf =
- F &&
- F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
- if (isLeaf) {
- return true;
- }
- return false;
-}
-
static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
@@ -796,6 +762,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
// path call - where we need to insert a safepoint (parsepoint).
auto *F = M->getFunction(GCSafepointPollName);
+ assert(F && "gc.safepoint_poll function is missing");
assert(F->getType()->getElementType() ==
FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
"gc.safepoint_poll declared with wrong type");
@@ -864,10 +831,8 @@ InsertSafepointPoll(Instruction *InsertBefore,
/// Replaces the given call site (Call or Invoke) with a gc.statepoint
/// intrinsic with an empty deoptimization arguments list. This does
/// NOT do explicit relocation for GC support.
-static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
- Pass *P) {
- assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
- "must be set");
+static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
+ assert(CS.getInstruction()->getModule() && "must be set");
// TODO: technically, a pass is not allowed to get functions from within a
// function pass since it might trigger a new function addition. Refactor
@@ -917,15 +882,10 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
AttrsToRemove);
- Value *StatepointTarget = NumPatchBytes == 0
- ? CS.getCalledValue()
- : ConstantPointerNull::get(cast<PointerType>(
- CS.getCalledValue()->getType()));
-
if (CS.isCall()) {
CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
CallInst *Call = Builder.CreateGCStatepointCall(
- ID, NumPatchBytes, StatepointTarget,
+ ID, NumPatchBytes, CS.getCalledValue(),
makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
"safepoint_token");
Call->setTailCall(ToReplace->isTailCall());
@@ -938,7 +898,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
Token = Call;
- // Put the following gc_result and gc_relocate calls immediately after the
+ // Put the following gc_result and gc_relocate calls immediately after
// the old call (which we're about to delete).
assert(ToReplace->getNextNode() && "not a terminator, must have next");
Builder.SetInsertPoint(ToReplace->getNextNode());
@@ -951,7 +911,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
// original block.
Builder.SetInsertPoint(ToReplace->getParent());
InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
- ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
+ ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
None, None, "safepoint_token");
@@ -967,7 +927,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
// We'll insert the gc.result into the normal block
BasicBlock *NormalDest = ToReplace->getNormalDest();
// Can not insert gc.result in case of phi nodes preset.
- // Should have removed this cases prior to runnning this function
+ // Should have removed this cases prior to running this function
assert(!isa<PHINode>(NormalDest->begin()));
Instruction *IP = &*(NormalDest->getFirstInsertionPt());
Builder.SetInsertPoint(IP);
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index d1acf78..fb970c7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -26,6 +26,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -62,7 +64,7 @@ namespace {
/// Print out the expression identified in the Ops list.
///
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
- Module *M = I->getParent()->getParent()->getParent();
+ Module *M = I->getModule();
dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -82,20 +84,6 @@ namespace {
Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
- /// \brief Sort factors by their Base.
- struct BaseSorter {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Base < RHS.Base;
- }
- };
-
- /// \brief Compare factors for equal bases.
- struct BaseEqual {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Base == RHS.Base;
- }
- };
-
/// \brief Sort factors in descending order by their power.
struct PowerDescendingSorter {
bool operator()(const Factor &LHS, const Factor &RHS) {
@@ -172,6 +160,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
void BuildRankMap(Function &F);
@@ -255,27 +244,6 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
return nullptr;
}
-static bool isUnmovableInstruction(Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::PHI:
- case Instruction::LandingPad:
- case Instruction::Alloca:
- case Instruction::Load:
- case Instruction::Invoke:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- return true;
- case Instruction::Call:
- return !isa<DbgInfoIntrinsic>(I);
- default:
- return false;
- }
-}
-
void Reassociate::BuildRankMap(Function &F) {
unsigned i = 2;
@@ -295,7 +263,7 @@ void Reassociate::BuildRankMap(Function &F) {
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (isUnmovableInstruction(I))
+ if (mayBeMemoryDependent(*I))
ValueRankMap[&*I] = ++BBRank;
}
}
@@ -913,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds. These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+ SetVector<AssertingVH<Instruction>> &ToRedo) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->getType()->isFPOrFPVectorTy()) {
return ConstantExpr::getFNeg(C);
@@ -934,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (BinaryOperator *I =
isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(I->getOperand(0), BI));
- I->setOperand(1, NegateValue(I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
if (I->getOpcode() == Instruction::Add) {
I->setHasNoUnsignedWrap(false);
I->setHasNoSignedWrap(false);
@@ -948,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
//
I->moveBefore(BI);
I->setName(I->getName()+".neg");
+
+ // Add the intermediate negates to the redo list as processing them later
+ // could expose more reassociating opportunities.
+ ToRedo.insert(I);
return I;
}
@@ -972,26 +948,28 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
InsertPt = II->getNormalDest()->begin();
} else {
- InsertPt = InstInput;
- ++InsertPt;
+ InsertPt = ++InstInput->getIterator();
}
while (isa<PHINode>(InsertPt)) ++InsertPt;
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
- TheNeg->moveBefore(InsertPt);
+ TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
} else {
TheNeg->andIRFlags(BI);
}
+ ToRedo.insert(TheNeg);
return TheNeg;
}
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- return CreateNeg(V, V->getName() + ".neg", BI, BI);
+ BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+ ToRedo.insert(NewNeg);
+ return NewNeg;
}
/// Return true if we should break up this subtract of X-Y into (X + -Y).
@@ -1025,14 +1003,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// If we have (X-Y), and if either X is an add, or if this is only used by an
/// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
// Calculate the negative value of Operand 1 of the sub instruction,
// and set it as the RHS of the add instruction we just made.
//
- Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -1166,7 +1145,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
}
- BasicBlock::iterator InsertPt = BO; ++InsertPt;
+ BasicBlock::iterator InsertPt = ++BO->getIterator();
// If this was just a single multiply, remove the multiply and return the only
// remaining operand.
@@ -1179,7 +1158,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
}
if (NeedsNegate)
- V = CreateNeg(V, "neg", InsertPt, BO);
+ V = CreateNeg(V, "neg", &*InsertPt, BO);
return V;
}
@@ -1250,7 +1229,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
return nullptr;
}
-/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// Helper function of CombineXorOpnd(). It creates a bitwise-and
/// instruction with the given two operands, and return the resulting
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
@@ -2083,7 +2062,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
return;
// Don't optimize floating point instructions that don't have unsafe algebra.
- if (I->getType()->isFloatingPointTy() && !I->hasUnsafeAlgebra())
+ if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
return;
// Do not reassociate boolean (i1) expressions. We want to preserve the
@@ -2099,7 +2078,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2110,6 +2089,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2117,7 +2102,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
}
} else if (I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2127,7 +2112,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
Instruction *NI = LowerNegateToMultiply(I);
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2142,8 +2133,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
- if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+ if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+ // During the initial run we will get to the root of the tree.
+ // But if we get here while we are redoing instructions, there is no
+ // guarantee that the root will be visited. So Redo later
+ if (BO->user_back() != BO)
+ RedoInsts.insert(BO->user_back());
return;
+ }
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
@@ -2250,10 +2247,10 @@ bool Reassociate::runOnFunction(Function &F) {
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Optimize every instruction in the basic block.
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
- if (isInstructionTriviallyDead(II)) {
- EraseInst(II++);
+ if (isInstructionTriviallyDead(&*II)) {
+ EraseInst(&*II++);
} else {
- OptimizeInst(II);
+ OptimizeInst(&*II);
assert(II->getParent() == BI && "Moved to a different block!");
++II;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 1b46727..915f897 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -82,10 +82,9 @@ bool RegToMem::runOnFunction(Function &F) {
BasicBlock::iterator I = BBEntry->begin();
while (isa<AllocaInst>(I)) ++I;
- CastInst *AllocaInsertionPoint =
- new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- Type::getInt32Ty(F.getContext()),
- "reg2mem alloca point", I);
+ CastInst *AllocaInsertionPoint = new BitCastInst(
+ Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ Type::getInt32Ty(F.getContext()), "reg2mem alloca point", &*I);
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
@@ -95,7 +94,7 @@ bool RegToMem::runOnFunction(Function &F) {
for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
iib != iie; ++iib) {
if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
- valueEscapes(iib)) {
+ valueEscapes(&*iib)) {
WorkList.push_front(&*iib);
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index ae2ae3a..db127c3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -14,12 +14,14 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@@ -46,10 +48,6 @@
using namespace llvm;
-// Print tracing output
-static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
- cl::init(false));
-
// Print the liveset found at the insert location
static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
cl::init(false));
@@ -74,6 +72,12 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
cl::location(ClobberNonLive),
cl::Hidden);
+static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool>
+ AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
+ cl::Hidden, cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -88,10 +92,10 @@ struct RewriteStatepointsForGC : public ModulePass {
Changed |= runOnFunction(F);
if (Changed) {
- // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn
+ // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
// returns true for at least one function in the module. Since at least
// one function changed, we know that the precondition is satisfied.
- stripDereferenceabilityInfo(M);
+ stripNonValidAttributes(M);
}
return Changed;
@@ -108,15 +112,16 @@ struct RewriteStatepointsForGC : public ModulePass {
/// dereferenceability that are no longer valid/correct after
/// RewriteStatepointsForGC has run. This is because semantically, after
/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
- /// heap. stripDereferenceabilityInfo (conservatively) restores correctness
+ /// heap. stripNonValidAttributes (conservatively) restores correctness
/// by erasing all attributes in the module that externally imply
/// dereferenceability.
- ///
- void stripDereferenceabilityInfo(Module &M);
+ /// Similar reasoning also applies to the noalias attributes. gc.statepoint
+ /// can touch the entire heap including noalias objects.
+ void stripNonValidAttributes(Module &M);
- // Helpers for stripDereferenceabilityInfo
- void stripDereferenceabilityInfoFromBody(Function &F);
- void stripDereferenceabilityInfoFromPrototype(Function &F);
+ // Helpers for stripNonValidAttributes
+ void stripNonValidAttributesFromBody(Function &F);
+ void stripNonValidAttributesFromPrototype(Function &F);
};
} // namespace
@@ -160,15 +165,16 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
typedef DenseMap<Value *, Value *> DefiningValueMapTy;
-typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
-typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
+typedef DenseSet<Value *> StatepointLiveSetTy;
+typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
+ RematerializedValueMapTy;
struct PartiallyConstructedSafepointRecord {
- /// The set of values known to be live accross this safepoint
- StatepointLiveSetTy liveset;
+ /// The set of values known to be live across this safepoint
+ StatepointLiveSetTy LiveSet;
/// Mapping from live pointers to a base-defining-value
- DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+ DenseMap<Value *, Value *> PointerToBase;
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
@@ -179,12 +185,26 @@ struct PartiallyConstructedSafepointRecord {
Instruction *UnwindToken;
/// Record live values we are rematerialized instead of relocating.
- /// They are not included into 'liveset' field.
+ /// They are not included into 'LiveSet' field.
/// Maps rematerialized copy to it's original value.
RematerializedValueMapTy RematerializedValues;
};
}
+static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+ assert(UseDeoptBundles && "Should not be called otherwise!");
+
+ Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
+
+ if (!DeoptBundle.hasValue()) {
+ assert(AllowStatepointWithNoDeoptInfo &&
+ "Found non-leaf call without deopt info!");
+ return None;
+ }
+
+ return DeoptBundle.getValue().Inputs;
+}
+
/// Compute the live-in set for every basic block in the function
static void computeLiveInValues(DominatorTree &DT, Function &F,
GCPtrLivenessData &Data);
@@ -195,10 +215,10 @@ static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
StatepointLiveSetTy &out);
// TODO: Once we can get to the GCStrategy, this becomes
-// Optional<bool> isGCManagedPointer(const Value *V) const override {
+// Optional<bool> isGCManagedPointer(const Type *Ty) const override {
-static bool isGCPointerType(const Type *T) {
- if (const PointerType *PT = dyn_cast<PointerType>(T))
+static bool isGCPointerType(Type *T) {
+ if (auto *PT = dyn_cast<PointerType>(T))
// For the sake of this example GC, we arbitrarily pick addrspace(1) as our
// GC managed heap. We know that a pointer into this heap needs to be
// updated and that no other pointer does.
@@ -233,9 +253,8 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return std::any_of(
- ST->subtypes().begin(), ST->subtypes().end(),
- [](Type *SubType) { return containsGCPtrType(SubType); });
+ return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
+ containsGCPtrType);
return false;
}
@@ -247,7 +266,7 @@ static bool isUnhandledGCPointerType(Type *Ty) {
}
#endif
-static bool order_by_name(llvm::Value *a, llvm::Value *b) {
+static bool order_by_name(Value *a, Value *b) {
if (a->hasName() && b->hasName()) {
return -1 == a->getName().compare(b->getName());
} else if (a->hasName() && !b->hasName()) {
@@ -260,6 +279,13 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
}
}
+// Return the name of the value suffixed with the provided value, or if the
+// value didn't have a name, the default value specified.
+static std::string suffixed_name_or(Value *V, StringRef Suffix,
+ StringRef DefaultName) {
+ return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
+}
+
// Conservatively identifies any definitions which might be live at the
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
@@ -269,30 +295,56 @@ static void analyzeParsePointLiveness(
const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
Instruction *inst = CS.getInstruction();
- StatepointLiveSetTy liveset;
- findLiveSetAtInst(inst, OriginalLivenessData, liveset);
+ StatepointLiveSetTy LiveSet;
+ findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
if (PrintLiveSet) {
// Note: This output is used by several of the test cases
- // The order of elemtns in a set is not stable, put them in a vec and sort
+ // The order of elements in a set is not stable, put them in a vec and sort
// by name
- SmallVector<Value *, 64> temp;
- temp.insert(temp.end(), liveset.begin(), liveset.end());
- std::sort(temp.begin(), temp.end(), order_by_name);
+ SmallVector<Value *, 64> Temp;
+ Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
errs() << "Live Variables:\n";
- for (Value *V : temp) {
- errs() << " " << V->getName(); // no newline
- V->dump();
- }
+ for (Value *V : Temp)
+ dbgs() << " " << V->getName() << " " << *V << "\n";
}
if (PrintLiveSetSize) {
errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
- errs() << "Number live values: " << liveset.size() << "\n";
+ errs() << "Number live values: " << LiveSet.size() << "\n";
+ }
+ result.LiveSet = LiveSet;
+}
+
+static bool isKnownBaseResult(Value *V);
+namespace {
+/// A single base defining value - An immediate base defining value for an
+/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
+/// For instructions which have multiple pointer [vector] inputs or that
+/// transition between vector and scalar types, there is no immediate base
+/// defining value. The 'base defining value' for 'Def' is the transitive
+/// closure of this relation stopping at the first instruction which has no
+/// immediate base defining value. The b.d.v. might itself be a base pointer,
+/// but it can also be an arbitrary derived pointer.
+struct BaseDefiningValueResult {
+ /// Contains the value which is the base defining value.
+ Value * const BDV;
+ /// True if the base defining value is also known to be an actual base
+ /// pointer.
+ const bool IsKnownBase;
+ BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
+ : BDV(BDV), IsKnownBase(IsKnownBase) {
+#ifndef NDEBUG
+ // Check consistency between new and old means of checking whether a BDV is
+ // a base.
+ bool MustBeBase = isKnownBaseResult(BDV);
+ assert(!MustBeBase || MustBeBase == IsKnownBase);
+#endif
}
- result.liveset = liveset;
+};
}
-static Value *findBaseDefiningValue(Value *I);
+static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
@@ -303,8 +355,8 @@ static Value *findBaseDefiningValue(Value *I);
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
/// particular element in 'I'.
-static std::pair<Value *, bool>
-findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
+static BaseDefiningValueResult
+findBaseDefiningValueOfVector(Value *I) {
assert(I->getType()->isVectorTy() &&
cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
@@ -314,7 +366,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
- return std::make_pair(I, true);
+ return BaseDefiningValueResult(I, true);
// We shouldn't see the address of a global as a vector value?
assert(!isa<GlobalVariable>(I) &&
@@ -325,7 +377,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with partially optimized
// code.
- return std::make_pair(I, true);
+ return BaseDefiningValueResult(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
@@ -333,31 +385,17 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
assert(Con->isNullValue() && "null is the only case which makes sense");
- return std::make_pair(Con, true);
+ return BaseDefiningValueResult(Con, true);
}
if (isa<LoadInst>(I))
- return std::make_pair(I, true);
-
- // For an insert element, we might be able to look through it if we know
- // something about the indexes.
- if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
- if (Index) {
- Value *InsertIndex = IEI->getOperand(2);
- // This index is inserting the value, look for its BDV
- if (InsertIndex == Index)
- return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
- // Both constant, and can't be equal per above. This insert is definitely
- // not relevant, look back at the rest of the vector and keep trying.
- if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
- return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
- }
-
+ return BaseDefiningValueResult(I, true);
+
+ if (isa<InsertElementInst>(I))
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
- return std::make_pair(IEI, false);
- }
+ return BaseDefiningValueResult(I, false);
if (isa<ShuffleVectorInst>(I))
// We don't know whether this vector contains entirely base pointers or
@@ -365,105 +403,62 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
// duplicate code as needed to construct a parallel vector of bases.
// TODO: There a number of local optimizations which could be applied here
// for particular sufflevector patterns.
- return std::make_pair(I, false);
+ return BaseDefiningValueResult(I, false);
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"unknown vector instruction - no base found for vector element");
- return std::make_pair(I, false);
+ return BaseDefiningValueResult(I, false);
}
-static bool isKnownBaseResult(Value *V);
-
/// Helper function for findBasePointer - Will return a value which either a)
-/// defines the base pointer for the input or b) blocks the simple search
-/// (i.e. a PHI or Select of two derived pointers)
-static Value *findBaseDefiningValue(Value *I) {
+/// defines the base pointer for the input, b) blocks the simple search
+/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
+/// from pointer to vector type or back.
+static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
if (I->getType()->isVectorTy())
- return findBaseDefiningValueOfVector(I).first;
+ return findBaseDefiningValueOfVector(I);
assert(I->getType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
- // This case is a bit of a hack - it only handles extracts from vectors which
- // trivially contain only base pointers or cases where we can directly match
- // the index of the original extract element to an insertion into the vector.
- // See note inside the function for how to improve this.
- if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
- Value *VectorOperand = EEI->getVectorOperand();
- Value *Index = EEI->getIndexOperand();
- std::pair<Value *, bool> pair =
- findBaseDefiningValueOfVector(VectorOperand, Index);
- Value *VectorBase = pair.first;
- if (VectorBase->getType()->isPointerTy())
- // We found a BDV for this specific element with the vector. This is an
- // optimization, but in practice it covers most of the useful cases
- // created via scalarization.
- return VectorBase;
- else {
- assert(VectorBase->getType()->isVectorTy());
- if (pair.second)
- // If the entire vector returned is known to be entirely base pointers,
- // then the extractelement is valid base for this value.
- return EEI;
- else {
- // Otherwise, we have an instruction which potentially produces a
- // derived pointer and we need findBasePointers to clone code for us
- // such that we can create an instruction which produces the
- // accompanying base pointer.
- // Note: This code is currently rather incomplete. We don't currently
- // support the general form of shufflevector of insertelement.
- // Conceptually, these are just 'base defining values' of the same
- // variety as phi or select instructions. We need to update the
- // findBasePointers algorithm to insert new 'base-only' versions of the
- // original instructions. This is relative straight forward to do, but
- // the case which would motivate the work hasn't shown up in real
- // workloads yet.
- assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
- "need to extend findBasePointers for generic vector"
- "instruction cases");
- return VectorBase;
- }
- }
- }
-
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- return I;
+ return BaseDefiningValueResult(I, true);
if (isa<GlobalVariable>(I))
// base case
- return I;
+ return BaseDefiningValueResult(I, true);
// inlining could possibly introduce phi node that contains
// undef if callee has multiple returns
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with
// partially optimized code.
- return I;
+ return BaseDefiningValueResult(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
+ if (isa<Constant>(I)) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
- // Note: Finding a constant base for something marked for relocation
- // doesn't really make sense. The most likely case is either a) some
- // screwed up the address space usage or b) your validating against
- // compiled C++ code w/o the proper separation. The only real exception
- // is a null pointer. You could have generic code written to index of
- // off a potentially null value and have proven it null. We also use
- // null pointers in dead paths of relocation phis (which we might later
- // want to find a base pointer for).
- assert(isa<ConstantPointerNull>(Con) &&
- "null is the only case which makes sense");
- return Con;
+ // Note: Even for frontends which don't have constant references, we can
+ // see constants appearing after optimizations. A simple example is
+ // specialization of an address computation on null feeding into a merge
+ // point where the actual use of the now-constant input is protected by
+ // another null check. (e.g. test4 in constants.ll)
+ return BaseDefiningValueResult(I, true);
}
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
+ // If stripping pointer casts changes the address space there is an
+ // addrspacecast in between.
+ assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
+ cast<PointerType>(CI->getType())->getAddressSpace() &&
+ "unsupported addrspacecast");
// If we find a cast instruction here, it means we've found a cast which is
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
@@ -472,7 +467,9 @@ static Value *findBaseDefiningValue(Value *I) {
}
if (isa<LoadInst>(I))
- return I; // The value loaded is an gc base itself
+ // The value loaded is an gc base itself
+ return BaseDefiningValueResult(I, true);
+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
// The base of this GEP is the base
@@ -480,14 +477,11 @@ static Value *findBaseDefiningValue(Value *I) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- case Intrinsic::experimental_gc_result_ptr:
default:
// fall through to general call handling
break;
case Intrinsic::experimental_gc_statepoint:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_int:
- llvm_unreachable("these don't produce pointers");
+ llvm_unreachable("statepoints don't produce pointers");
case Intrinsic::experimental_gc_relocate: {
// Rerunning safepoint insertion after safepoints are already
// inserted is not supported. It could probably be made to work,
@@ -506,17 +500,17 @@ static Value *findBaseDefiningValue(Value *I) {
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return I;
+ return BaseDefiningValueResult(I, true);
// I have absolutely no idea how to implement this part yet. It's not
- // neccessarily hard, I just haven't really looked at it yet.
+ // necessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
if (isa<AtomicCmpXchgInst>(I))
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
// like a load.
- return I;
+ return BaseDefiningValueResult(I, true);
assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
"binary ops which don't apply to pointers");
@@ -525,34 +519,41 @@ static Value *findBaseDefiningValue(Value *I) {
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
if (isa<ExtractValueInst>(I))
- return I;
+ return BaseDefiningValueResult(I, true);
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
assert(!isa<InsertValueInst>(I) &&
"Base pointer for a struct is meaningless");
+ // An extractelement produces a base result exactly when it's input does.
+ // We may need to insert a parallel instruction to extract the appropriate
+ // element out of the base vector corresponding to the input. Given this,
+ // it's analogous to the phi and select case even though it's not a merge.
+ if (isa<ExtractElementInst>(I))
+ // Note: There a lot of obvious peephole cases here. This are deliberately
+ // handled after the main base pointer inference algorithm to make writing
+ // test cases to exercise that code easier.
+ return BaseDefiningValueResult(I, false);
+
// The last two cases here don't return a base pointer. Instead, they
- // return a value which dynamically selects from amoung several base
+ // return a value which dynamically selects from among several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"missing instruction case in findBaseDefiningValing");
- return I;
+ return BaseDefiningValueResult(I, false);
}
/// Returns the base defining value for this value.
static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
Value *&Cached = Cache[I];
if (!Cached) {
- Cached = findBaseDefiningValue(I);
+ Cached = findBaseDefiningValue(I).BDV;
+ DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
+ << Cached->getName() << "\n");
}
assert(Cache[I] != nullptr);
-
- if (TraceLSP) {
- dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
- << "\n";
- }
return Cached;
}
@@ -572,7 +573,9 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
/// is it known to be a base pointer? Or do we need to continue searching.
static bool isKnownBaseResult(Value *V) {
- if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
+ if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
+ !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
+ !isa<ShuffleVectorInst>(V)) {
// no recursion possible
return true;
}
@@ -587,17 +590,19 @@ static bool isKnownBaseResult(Value *V) {
return false;
}
-// TODO: find a better name for this
namespace {
-class PhiState {
+/// Models the state of a single base defining value in the findBasePointer
+/// algorithm for determining where a new instruction is needed to propagate
+/// the base of this BDV.
+class BDVState {
public:
enum Status { Unknown, Base, Conflict };
- PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
+ BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
assert(status != Base || b);
}
- PhiState(Value *b) : status(Base), base(b) {}
- PhiState() : status(Unknown), base(nullptr) {}
+ explicit BDVState(Value *b) : status(Base), base(b) {}
+ BDVState() : status(Unknown), base(nullptr) {}
Status getStatus() const { return status; }
Value *getBase() const { return base; }
@@ -606,72 +611,80 @@ public:
bool isUnknown() const { return getStatus() == Unknown; }
bool isConflict() const { return getStatus() == Conflict; }
- bool operator==(const PhiState &other) const {
+ bool operator==(const BDVState &other) const {
return base == other.base && status == other.status;
}
- bool operator!=(const PhiState &other) const { return !(*this == other); }
+ bool operator!=(const BDVState &other) const { return !(*this == other); }
- void dump() {
- errs() << status << " (" << base << " - "
- << (base ? base->getName() : "nullptr") << "): ";
+ LLVM_DUMP_METHOD
+ void dump() const { print(dbgs()); dbgs() << '\n'; }
+
+ void print(raw_ostream &OS) const {
+ switch (status) {
+ case Unknown:
+ OS << "U";
+ break;
+ case Base:
+ OS << "B";
+ break;
+ case Conflict:
+ OS << "C";
+ break;
+ };
+ OS << " (" << base << " - "
+ << (base ? base->getName() : "nullptr") << "): ";
}
private:
Status status;
- Value *base; // non null only if status == base
+ AssertingVH<Value> base; // non null only if status == base
};
+}
-typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
-// Values of type PhiState form a lattice, and this is a helper
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
+ State.print(OS);
+ return OS;
+}
+#endif
+
+namespace {
+// Values of type BDVState form a lattice, and this is a helper
// class that implementes the meet operation. The meat of the meet
-// operation is implemented in MeetPhiStates::pureMeet
-class MeetPhiStates {
+// operation is implemented in MeetBDVStates::pureMeet
+class MeetBDVStates {
public:
- // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
- explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
- : phiStates(phiStates) {}
-
- // Destructively meet the current result with the base V. V can
- // either be a merge instruction (SelectInst / PHINode), in which
- // case its status is looked up in the phiStates map; or a regular
- // SSA value, in which case it is assumed to be a base.
- void meetWith(Value *V) {
- PhiState otherState = getStateForBDV(V);
- assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
- MeetPhiStates::pureMeet(currentResult, otherState)) &&
- "math is wrong: meet does not commute!");
- currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
+ /// Initializes the currentResult to the TOP state so that if can be met with
+ /// any other state to produce that state.
+ MeetBDVStates() {}
+
+ // Destructively meet the current result with the given BDVState
+ void meetWith(BDVState otherState) {
+ currentResult = meet(otherState, currentResult);
}
- PhiState getResult() const { return currentResult; }
+ BDVState getResult() const { return currentResult; }
private:
- const ConflictStateMapTy &phiStates;
- PhiState currentResult;
-
- /// Return a phi state for a base defining value. We'll generate a new
- /// base state for known bases and expect to find a cached state otherwise
- PhiState getStateForBDV(Value *baseValue) {
- if (isKnownBaseResult(baseValue)) {
- return PhiState(baseValue);
- } else {
- return lookupFromMap(baseValue);
- }
- }
+ BDVState currentResult;
- PhiState lookupFromMap(Value *V) {
- auto I = phiStates.find(V);
- assert(I != phiStates.end() && "lookup failed!");
- return I->second;
+ /// Perform a meet operation on two elements of the BDVState lattice.
+ static BDVState meet(BDVState LHS, BDVState RHS) {
+ assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
+ "math is wrong: meet does not commute!");
+ BDVState Result = pureMeet(LHS, RHS);
+ DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
+ << " produced " << Result << "\n");
+ return Result;
}
- static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
+ static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
switch (stateA.getStatus()) {
- case PhiState::Unknown:
+ case BDVState::Unknown:
return stateB;
- case PhiState::Base:
+ case BDVState::Base:
assert(stateA.getBase() && "can't be null");
if (stateB.isUnknown())
return stateA;
@@ -681,18 +694,20 @@ private:
assert(stateA == stateB && "equality broken!");
return stateA;
}
- return PhiState(PhiState::Conflict);
+ return BDVState(BDVState::Conflict);
}
assert(stateB.isConflict() && "only three states!");
- return PhiState(PhiState::Conflict);
+ return BDVState(BDVState::Conflict);
- case PhiState::Conflict:
+ case BDVState::Conflict:
return stateA;
}
llvm_unreachable("only three states!");
}
};
}
+
+
/// For a given value or instruction, figure out what base ptr it's derived
/// from. For gc objects, this is simply itself. On success, returns a value
/// which is the base pointer. (This is reliable and can be used for
@@ -723,171 +738,252 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
//
// Note: A simpler form of this would be to add the conflict form of all
// PHIs without running the optimistic algorithm. This would be
- // analougous to pessimistic data flow and would likely lead to an
+ // analogous to pessimistic data flow and would likely lead to an
// overall worse solution.
- ConflictStateMapTy states;
- states[def] = PhiState();
- // Recursively fill in all phis & selects reachable from the initial one
- // for which we don't already know a definite base value for
- // TODO: This should be rewritten with a worklist
- bool done = false;
- while (!done) {
- done = true;
- // Since we're adding elements to 'states' as we run, we can't keep
- // iterators into the set.
- SmallVector<Value *, 16> Keys;
- Keys.reserve(states.size());
- for (auto Pair : states) {
- Value *V = Pair.first;
- Keys.push_back(V);
- }
- for (Value *v : Keys) {
- assert(!isKnownBaseResult(v) && "why did it get added?");
- if (PHINode *phi = dyn_cast<PHINode>(v)) {
- assert(phi->getNumIncomingValues() > 0 &&
- "zero input phis are illegal");
- for (Value *InVal : phi->incoming_values()) {
- Value *local = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
- }
- } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
- Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
- local = findBaseOrBDV(sel->getFalseValue(), cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
+#ifndef NDEBUG
+ auto isExpectedBDVType = [](Value *BDV) {
+ return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
+ isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
+ };
+#endif
+
+ // Once populated, will contain a mapping from each potentially non-base BDV
+ // to a lattice value (described above) which corresponds to that BDV.
+ // We use the order of insertion (DFS over the def/use graph) to provide a
+ // stable deterministic ordering for visiting DenseMaps (which are unordered)
+ // below. This is important for deterministic compilation.
+ MapVector<Value *, BDVState> States;
+
+ // Recursively fill in all base defining values reachable from the initial
+ // one for which we don't already know a definite base value for
+ /* scope */ {
+ SmallVector<Value*, 16> Worklist;
+ Worklist.push_back(def);
+ States.insert(std::make_pair(def, BDVState()));
+ while (!Worklist.empty()) {
+ Value *Current = Worklist.pop_back_val();
+ assert(!isKnownBaseResult(Current) && "why did it get added?");
+
+ auto visitIncomingValue = [&](Value *InVal) {
+ Value *Base = findBaseOrBDV(InVal, cache);
+ if (isKnownBaseResult(Base))
+ // Known bases won't need new instructions introduced and can be
+ // ignored safely
+ return;
+ assert(isExpectedBDVType(Base) && "the only non-base values "
+ "we see should be base defining values");
+ if (States.insert(std::make_pair(Base, BDVState())).second)
+ Worklist.push_back(Base);
+ };
+ if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
+ for (Value *InVal : Phi->incoming_values())
+ visitIncomingValue(InVal);
+ } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
+ visitIncomingValue(Sel->getTrueValue());
+ visitIncomingValue(Sel->getFalseValue());
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
+ visitIncomingValue(EE->getVectorOperand());
+ } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
+ visitIncomingValue(IE->getOperand(0)); // vector operand
+ visitIncomingValue(IE->getOperand(1)); // scalar operand
+ } else {
+ // There is one known class of instructions we know we don't handle.
+ assert(isa<ShuffleVectorInst>(Current));
+ llvm_unreachable("unimplemented instruction case");
}
}
}
- if (TraceLSP) {
- errs() << "States after initialization:\n";
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
- state.dump();
- v->dump();
- }
+#ifndef NDEBUG
+ DEBUG(dbgs() << "States after initialization:\n");
+ for (auto Pair : States) {
+ DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
+#endif
- // TODO: come back and revisit the state transitions around inputs which
- // have reached conflict state. The current version seems too conservative.
+ // Return a phi state for a base defining value. We'll generate a new
+ // base state for known bases and expect to find a cached state otherwise.
+ auto getStateForBDV = [&](Value *baseValue) {
+ if (isKnownBaseResult(baseValue))
+ return BDVState(baseValue);
+ auto I = States.find(baseValue);
+ assert(I != States.end() && "lookup failed!");
+ return I->second;
+ };
bool progress = true;
while (progress) {
#ifndef NDEBUG
- size_t oldSize = states.size();
+ const size_t oldSize = States.size();
#endif
progress = false;
- // We're only changing keys in this loop, thus safe to keep iterators
- for (auto Pair : states) {
- MeetPhiStates calculateMeet(states);
- Value *v = Pair.first;
- assert(!isKnownBaseResult(v) && "why did it get added?");
- if (SelectInst *select = dyn_cast<SelectInst>(v)) {
- calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
- calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
- } else
- for (Value *Val : cast<PHINode>(v)->incoming_values())
- calculateMeet.meetWith(findBaseOrBDV(Val, cache));
-
- PhiState oldState = states[v];
- PhiState newState = calculateMeet.getResult();
+ // We're only changing values in this loop, thus safe to keep iterators.
+ // Since this is computing a fixed point, the order of visit does not
+ // effect the result. TODO: We could use a worklist here and make this run
+ // much faster.
+ for (auto Pair : States) {
+ Value *BDV = Pair.first;
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+
+ // Given an input value for the current instruction, return a BDVState
+ // instance which represents the BDV of that value.
+ auto getStateForInput = [&](Value *V) mutable {
+ Value *BDV = findBaseOrBDV(V, cache);
+ return getStateForBDV(BDV);
+ };
+
+ MeetBDVStates calculateMeet;
+ if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
+ calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
+ calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
+ } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
+ for (Value *Val : Phi->incoming_values())
+ calculateMeet.meetWith(getStateForInput(Val));
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
+ // The 'meet' for an extractelement is slightly trivial, but it's still
+ // useful in that it drives us to conflict if our input is.
+ calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
+ } else {
+ // Given there's a inherent type mismatch between the operands, will
+ // *always* produce Conflict.
+ auto *IE = cast<InsertElementInst>(BDV);
+ calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
+ calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
+ }
+
+ BDVState oldState = States[BDV];
+ BDVState newState = calculateMeet.getResult();
if (oldState != newState) {
progress = true;
- states[v] = newState;
+ States[BDV] = newState;
}
}
- assert(oldSize <= states.size());
- assert(oldSize == states.size() || progress);
+ assert(oldSize == States.size() &&
+ "fixed point shouldn't be adding any new nodes to state");
}
- if (TraceLSP) {
- errs() << "States after meet iteration:\n";
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
- state.dump();
- v->dump();
- }
+#ifndef NDEBUG
+ DEBUG(dbgs() << "States after meet iteration:\n");
+ for (auto Pair : States) {
+ DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
-
+#endif
+
// Insert Phis for all conflicts
- // We want to keep naming deterministic in the loop that follows, so
- // sort the keys before iteration. This is useful in allowing us to
- // write stable tests. Note that there is no invalidation issue here.
- SmallVector<Value *, 16> Keys;
- Keys.reserve(states.size());
- for (auto Pair : states) {
- Value *V = Pair.first;
- Keys.push_back(V);
- }
- std::sort(Keys.begin(), Keys.end(), order_by_name);
// TODO: adjust naming patterns to avoid this order of iteration dependency
- for (Value *V : Keys) {
- Instruction *v = cast<Instruction>(V);
- PhiState state = states[V];
- assert(!isKnownBaseResult(v) && "why did it get added?");
- assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!state.isConflict())
+ for (auto Pair : States) {
+ Instruction *I = cast<Instruction>(Pair.first);
+ BDVState State = Pair.second;
+ assert(!isKnownBaseResult(I) && "why did it get added?");
+ assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+
+ // extractelement instructions are a bit special in that we may need to
+ // insert an extract even when we know an exact base for the instruction.
+ // The problem is that we need to convert from a vector base to a scalar
+ // base for the particular indice we're interested in.
+ if (State.isBase() && isa<ExtractElementInst>(I) &&
+ isa<VectorType>(State.getBase()->getType())) {
+ auto *EE = cast<ExtractElementInst>(I);
+ // TODO: In many cases, the new instruction is just EE itself. We should
+ // exploit this, but can't do it here since it would break the invariant
+ // about the BDV not being known to be a base.
+ auto *BaseInst = ExtractElementInst::Create(State.getBase(),
+ EE->getIndexOperand(),
+ "base_ee", EE);
+ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+ States[I] = BDVState(BDVState::Base, BaseInst);
+ }
+
+ // Since we're joining a vector and scalar base, they can never be the
+ // same. As a result, we should always see insert element having reached
+ // the conflict state.
+ if (isa<InsertElementInst>(I)) {
+ assert(State.isConflict());
+ }
+
+ if (!State.isConflict())
continue;
- if (isa<PHINode>(v)) {
- int num_preds =
- std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
- assert(num_preds > 0 && "how did we reach here");
- PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- phi->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, phi);
+ /// Create and insert a new instruction which will represent the base of
+ /// the given instruction 'I'.
+ auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
+ if (isa<PHINode>(I)) {
+ BasicBlock *BB = I->getParent();
+ int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
+ assert(NumPreds > 0 && "how did we reach here");
+ std::string Name = suffixed_name_or(I, ".base", "base_phi");
+ return PHINode::Create(I->getType(), NumPreds, Name, I);
+ } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
+ // The undef will be replaced later
+ UndefValue *Undef = UndefValue::get(Sel->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_select");
+ return SelectInst::Create(Sel->getCondition(), Undef,
+ Undef, Name, Sel);
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
+ UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_ee");
+ return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
+ EE);
+ } else {
+ auto *IE = cast<InsertElementInst>(I);
+ UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
+ UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_ie");
+ return InsertElementInst::Create(VecUndef, ScalarUndef,
+ IE->getOperand(2), Name, IE);
+ }
+
+ };
+ Instruction *BaseInst = MakeBaseInstPlaceholder(I);
+ // Add metadata marking this as a base value
+ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+ States[I] = BDVState(BDVState::Conflict, BaseInst);
+ }
+
+ // Returns a instruction which produces the base pointer for a given
+ // instruction. The instruction is assumed to be an input to one of the BDVs
+ // seen in the inference algorithm above. As such, we must either already
+ // know it's base defining value is a base, or have inserted a new
+ // instruction to propagate the base of it's BDV and have entered that newly
+ // introduced instruction into the state table. In either case, we are
+ // assured to be able to determine an instruction which produces it's base
+ // pointer.
+ auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
+ Value *BDV = findBaseOrBDV(Input, cache);
+ Value *Base = nullptr;
+ if (isKnownBaseResult(BDV)) {
+ Base = BDV;
} else {
- SelectInst *sel = cast<SelectInst>(v);
- // The undef will be replaced later
- UndefValue *undef = UndefValue::get(sel->getType());
- SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
- undef, "base_select", sel);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- basesel->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, basesel);
+ // Either conflict or base.
+ assert(States.count(BDV));
+ Base = States[BDV].getBase();
}
- }
+ assert(Base && "can't be null");
+ // The cast is needed since base traversal may strip away bitcasts
+ if (Base->getType() != Input->getType() &&
+ InsertPt) {
+ Base = new BitCastInst(Base, Input->getType(), "cast",
+ InsertPt);
+ }
+ return Base;
+ };
- // Fixup all the inputs of the new PHIs
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
+ // Fixup all the inputs of the new PHIs. Visit order needs to be
+ // deterministic and predictable because we're naming newly created
+ // instructions.
+ for (auto Pair : States) {
+ Instruction *BDV = cast<Instruction>(Pair.first);
+ BDVState State = Pair.second;
- assert(!isKnownBaseResult(v) && "why did it get added?");
- assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!state.isConflict())
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+ assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+ if (!State.isConflict())
continue;
- if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
- PHINode *phi = cast<PHINode>(v);
+ if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
+ PHINode *phi = cast<PHINode>(BDV);
unsigned NumPHIValues = phi->getNumIncomingValues();
for (unsigned i = 0; i < NumPHIValues; i++) {
Value *InVal = phi->getIncomingValue(i);
@@ -906,104 +1002,145 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
if (blockIndex != -1) {
Value *oldBase = basephi->getIncomingValue(blockIndex);
basephi->addIncoming(oldBase, InBB);
+
#ifndef NDEBUG
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
-
- // In essense this assert states: the only way two
+ Value *Base = getBaseForInput(InVal, nullptr);
+ // In essence this assert states: the only way two
// values incoming from the same basic block may be
// different is by being different bitcasts of the same
// value. A cleanup that remains TODO is changing
// findBaseOrBDV to return an llvm::Value of the correct
// type (and still remain pure). This will remove the
// need to add bitcasts.
- assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+ assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
"sanity -- findBaseOrBDV should be pure!");
#endif
continue;
}
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basephi->getType()) {
- base = new BitCastInst(base, basephi->getType(), "cast",
- InBB->getTerminator());
- }
- basephi->addIncoming(base, InBB);
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast in the incoming block.
+ // TODO: Need to split critical edges if insertion is needed
+ Value *Base = getBaseForInput(InVal, InBB->getTerminator());
+ basephi->addIncoming(Base, InBB);
}
assert(basephi->getNumIncomingValues() == NumPHIValues);
- } else {
- SelectInst *basesel = cast<SelectInst>(state.getBase());
- SelectInst *sel = cast<SelectInst>(v);
+ } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
+ SelectInst *Sel = cast<SelectInst>(BDV);
// Operand 1 & 2 are true, false path respectively. TODO: refactor to
// something more safe and less hacky.
for (int i = 1; i <= 2; i++) {
- Value *InVal = sel->getOperand(i);
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basesel->getType()) {
- base = new BitCastInst(base, basesel->getType(), "cast", basesel);
- }
- basesel->setOperand(i, base);
+ Value *InVal = Sel->getOperand(i);
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast.
+ Value *Base = getBaseForInput(InVal, BaseSel);
+ BaseSel->setOperand(i, Base);
}
+ } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
+ Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast.
+ Value *Base = getBaseForInput(InVal, BaseEE);
+ BaseEE->setOperand(0, Base);
+ } else {
+ auto *BaseIE = cast<InsertElementInst>(State.getBase());
+ auto *BdvIE = cast<InsertElementInst>(BDV);
+ auto UpdateOperand = [&](int OperandIdx) {
+ Value *InVal = BdvIE->getOperand(OperandIdx);
+ Value *Base = getBaseForInput(InVal, BaseIE);
+ BaseIE->setOperand(OperandIdx, Base);
+ };
+ UpdateOperand(0); // vector operand
+ UpdateOperand(1); // scalar operand
+ }
+
+ }
+
+ // Now that we're done with the algorithm, see if we can optimize the
+ // results slightly by reducing the number of new instructions needed.
+ // Arguably, this should be integrated into the algorithm above, but
+ // doing as a post process step is easier to reason about for the moment.
+ DenseMap<Value *, Value *> ReverseMap;
+ SmallPtrSet<Instruction *, 16> NewInsts;
+ SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
+ // Note: We need to visit the states in a deterministic order. We uses the
+ // Keys we sorted above for this purpose. Note that we are papering over a
+ // bigger problem with the algorithm above - it's visit order is not
+ // deterministic. A larger change is needed to fix this.
+ for (auto Pair : States) {
+ auto *BDV = Pair.first;
+ auto State = Pair.second;
+ Value *Base = State.getBase();
+ assert(BDV && Base);
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+ assert(isKnownBaseResult(Base) &&
+ "must be something we 'know' is a base pointer");
+ if (!State.isConflict())
+ continue;
+
+ ReverseMap[Base] = BDV;
+ if (auto *BaseI = dyn_cast<Instruction>(Base)) {
+ NewInsts.insert(BaseI);
+ Worklist.insert(BaseI);
+ }
+ }
+ auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
+ Value *Replacement) {
+ // Add users which are new instructions (excluding self references)
+ for (User *U : BaseI->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (NewInsts.count(UI) && UI != BaseI)
+ Worklist.insert(UI);
+ // Then do the actual replacement
+ NewInsts.erase(BaseI);
+ ReverseMap.erase(BaseI);
+ BaseI->replaceAllUsesWith(Replacement);
+ assert(States.count(BDV));
+ assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
+ States[BDV] = BDVState(BDVState::Conflict, Replacement);
+ BaseI->eraseFromParent();
+ };
+ const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
+ while (!Worklist.empty()) {
+ Instruction *BaseI = Worklist.pop_back_val();
+ assert(NewInsts.count(BaseI));
+ Value *Bdv = ReverseMap[BaseI];
+ if (auto *BdvI = dyn_cast<Instruction>(Bdv))
+ if (BaseI->isIdenticalTo(BdvI)) {
+ DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
+ ReplaceBaseInstWith(Bdv, BaseI, Bdv);
+ continue;
+ }
+ if (Value *V = SimplifyInstruction(BaseI, DL)) {
+ DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
+ ReplaceBaseInstWith(Bdv, BaseI, V);
+ continue;
}
}
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
- for (auto item : states) {
- Value *v = item.first;
- Value *base = item.second.getBase();
- assert(v && base);
- assert(!isKnownBaseResult(v) && "why did it get added?");
-
- if (TraceLSP) {
- std::string fromstr =
- cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
- : "none";
- errs() << "Updating base value cache"
- << " for: " << (v->hasName() ? v->getName() : "")
- << " from: " << fromstr
- << " to: " << (base->hasName() ? base->getName() : "") << "\n";
- }
-
- assert(isKnownBaseResult(base) &&
- "must be something we 'know' is a base pointer");
- if (cache.count(v)) {
+ for (auto Pair : States) {
+ auto *BDV = Pair.first;
+ Value *base = Pair.second.getBase();
+ assert(BDV && base);
+
+ std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
+ DEBUG(dbgs() << "Updating base value cache"
+ << " for: " << BDV->getName()
+ << " from: " << fromstr
+ << " to: " << base->getName() << "\n");
+
+ if (cache.count(BDV)) {
// Once we transition from the BDV relation being store in the cache to
// the base relation being stored, it must be stable
- assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+ assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
"base relation should be stable");
}
- cache[v] = base;
+ cache[BDV] = base;
}
- assert(cache.find(def) != cache.end());
+ assert(cache.count(def));
return cache[def];
}
@@ -1024,7 +1161,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// pointer was a base pointer.
static void
findBasePointers(const StatepointLiveSetTy &live,
- DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+ DenseMap<Value *, Value *> &PointerToBase,
DominatorTree *DT, DefiningValueMapTy &DVCache) {
// For the naming of values inserted to be deterministic - which makes for
// much cleaner and more stable tests - we need to assign an order to the
@@ -1043,7 +1180,7 @@ findBasePointers(const StatepointLiveSetTy &live,
// If you see this trip and like to live really dangerously, the code should
// be correct, just with idioms the verifier can't handle. You can try
- // disabling the verifier at your own substaintial risk.
+ // disabling the verifier at your own substantial risk.
assert(!isa<ConstantPointerNull>(base) &&
"the relocation code needs adjustment to handle the relocation of "
"a null pointer constant without causing false positives in the "
@@ -1056,8 +1193,8 @@ findBasePointers(const StatepointLiveSetTy &live,
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
const CallSite &CS,
PartiallyConstructedSafepointRecord &result) {
- DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
- findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
+ DenseMap<Value *, Value *> PointerToBase;
+ findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
if (PrintBasePointers) {
// Note: Need to print these in a stable order since this is checked in
@@ -1071,8 +1208,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
std::sort(Temp.begin(), Temp.end(), order_by_name);
for (Value *Ptr : Temp) {
Value *Base = PointerToBase[Ptr];
- errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
- << "\n";
+ errs() << " derived ";
+ Ptr->printAsOperand(errs(), false);
+ errs() << " base ";
+ Base->printAsOperand(errs(), false);
+ errs() << "\n";;
}
}
@@ -1086,10 +1226,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
PartiallyConstructedSafepointRecord &result);
static void recomputeLiveInValues(
- Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
- // again. The old values are still live and will help it stablize quickly.
+ // again. The old values are still live and will help it stabilize quickly.
GCPtrLivenessData RevisedLivenessData;
computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
@@ -1099,69 +1239,66 @@ static void recomputeLiveInValues(
}
}
-// When inserting gc.relocate calls, we need to ensure there are no uses
-// of the original value between the gc.statepoint and the gc.relocate call.
-// One case which can arise is a phi node starting one of the successor blocks.
-// We also need to be able to insert the gc.relocates only on the path which
-// goes through the statepoint. We might need to split an edge to make this
-// possible.
+// When inserting gc.relocate and gc.result calls, we need to ensure there are
+// no uses of the original value / return value between the gc.statepoint and
+// the gc.relocate / gc.result call. One case which can arise is a phi node
+// starting one of the successor blocks. We also need to be able to insert the
+// gc.relocates only on the path which goes through the statepoint. We might
+// need to split an edge to make this possible.
static BasicBlock *
normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
DominatorTree &DT) {
BasicBlock *Ret = BB;
- if (!BB->getUniquePredecessor()) {
- Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT);
- }
+ if (!BB->getUniquePredecessor())
+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
- // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+ // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
// from it
FoldSingleEntryPHINodes(Ret);
- assert(!isa<PHINode>(Ret->begin()));
+ assert(!isa<PHINode>(Ret->begin()) &&
+ "All PHI nodes should have been removed!");
- // At this point, we can safely insert a gc.relocate as the first instruction
- // in Ret if needed.
+ // At this point, we can safely insert a gc.relocate or gc.result as the first
+ // instruction in Ret if needed.
return Ret;
}
-static int find_index(ArrayRef<Value *> livevec, Value *val) {
- auto itr = std::find(livevec.begin(), livevec.end(), val);
- assert(livevec.end() != itr);
- size_t index = std::distance(livevec.begin(), itr);
- assert(index < livevec.size());
- return index;
-}
-
-// Create new attribute set containing only attributes which can be transfered
+// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeSet legalizeCallAttributes(AttributeSet AS) {
- AttributeSet ret;
+ AttributeSet Ret;
for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
- unsigned index = AS.getSlotIndex(Slot);
+ unsigned Index = AS.getSlotIndex(Slot);
- if (index == AttributeSet::ReturnIndex ||
- index == AttributeSet::FunctionIndex) {
+ if (Index == AttributeSet::ReturnIndex ||
+ Index == AttributeSet::FunctionIndex) {
- for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
- ++it) {
- Attribute attr = *it;
+ for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
// Do not allow certain attributes - just skip them
// Safepoint can not be read only or read none.
- if (attr.hasAttribute(Attribute::ReadNone) ||
- attr.hasAttribute(Attribute::ReadOnly))
+ if (Attr.hasAttribute(Attribute::ReadNone) ||
+ Attr.hasAttribute(Attribute::ReadOnly))
+ continue;
+
+ // These attributes control the generation of the gc.statepoint call /
+ // invoke itself; and once the gc.statepoint is in place, they're of no
+ // use.
+ if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
+ Attr.hasAttribute("statepoint-id"))
continue;
- ret = ret.addAttributes(
- AS.getContext(), index,
- AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
+ Ret = Ret.addAttributes(
+ AS.getContext(), Index,
+ AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
}
}
// Just skip parameter attributes for now
}
- return ret;
+ return Ret;
}
/// Helper function to place all gc relocates necessary for the given
@@ -1173,225 +1310,290 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
/// statepointToken - statepoint instruction to which relocates should be
/// bound.
/// Builder - Llvm IR builder to be used to construct new calls.
-static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
+static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
const int LiveStart,
- ArrayRef<llvm::Value *> BasePtrs,
+ ArrayRef<Value *> BasePtrs,
Instruction *StatepointToken,
IRBuilder<> Builder) {
- SmallVector<Instruction *, 64> NewDefs;
- NewDefs.reserve(LiveVariables.size());
+ if (LiveVariables.empty())
+ return;
- Module *M = StatepointToken->getParent()->getParent()->getParent();
+ auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
+ auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
+ assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
+ size_t Index = std::distance(LiveVec.begin(), ValIt);
+ assert(Index < LiveVec.size() && "Bug in std::find?");
+ return Index;
+ };
- for (unsigned i = 0; i < LiveVariables.size(); i++) {
- // We generate a (potentially) unique declaration for every pointer type
- // combination. This results is some blow up the function declarations in
- // the IR, but removes the need for argument bitcasts which shrinks the IR
- // greatly and makes it much more readable.
- SmallVector<Type *, 1> Types; // one per 'any' type
- // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
- // cases where the actual value's type mangling is not supported by llvm. A
- // bitcast is added later to convert gc_relocate to the actual value's type.
- Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
- Value *GCRelocateDecl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_gc_relocate, Types);
+ // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
+ // unique declarations for each pointer type, but this proved problematic
+ // because the intrinsic mangling code is incomplete and fragile. Since
+ // we're moving towards a single unified pointer type anyways, we can just
+ // cast everything to an i8* of the right address space. A bitcast is added
+ // later to convert gc_relocate to the actual value's type.
+ Module *M = StatepointToken->getModule();
+ auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
+ Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
+ Value *GCRelocateDecl =
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+ for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
Value *BaseIdx =
- ConstantInt::get(Type::getInt32Ty(M->getContext()),
- LiveStart + find_index(LiveVariables, BasePtrs[i]));
- Value *LiveIdx = ConstantInt::get(
- Type::getInt32Ty(M->getContext()),
- LiveStart + find_index(LiveVariables, LiveVariables[i]));
+ Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
+ Value *LiveIdx = Builder.getInt32(LiveStart + i);
// only specify a debug name if we can give a useful one
- Value *Reloc = Builder.CreateCall(
+ CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
- LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
- : "");
+ suffixed_name_or(LiveVariables[i], ".relocated", ""));
// Trick CodeGen into thinking there are lots of free registers at this
// fake call.
- cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
+ Reloc->setCallingConv(CallingConv::Cold);
+ }
+}
- NewDefs.push_back(cast<Instruction>(Reloc));
+namespace {
+
+/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
+/// avoids having to worry about keeping around dangling pointers to Values.
+class DeferredReplacement {
+ AssertingVH<Instruction> Old;
+ AssertingVH<Instruction> New;
+
+public:
+ explicit DeferredReplacement(Instruction *Old, Instruction *New) :
+ Old(Old), New(New) {
+ assert(Old != New && "Not allowed!");
}
- assert(NewDefs.size() == LiveVariables.size() &&
- "missing or extra redefinition at safepoint");
+
+ /// Does the task represented by this instance.
+ void doReplacement() {
+ Instruction *OldI = Old;
+ Instruction *NewI = New;
+
+ assert(OldI != NewI && "Disallowed at construction?!");
+
+ Old = nullptr;
+ New = nullptr;
+
+ if (NewI)
+ OldI->replaceAllUsesWith(NewI);
+ OldI->eraseFromParent();
+ }
+};
}
static void
-makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
- const SmallVectorImpl<llvm::Value *> &basePtrs,
- const SmallVectorImpl<llvm::Value *> &liveVariables,
- Pass *P,
- PartiallyConstructedSafepointRecord &result) {
- assert(basePtrs.size() == liveVariables.size());
- assert(isStatepoint(CS) &&
+makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+ const SmallVectorImpl<Value *> &BasePtrs,
+ const SmallVectorImpl<Value *> &LiveVariables,
+ PartiallyConstructedSafepointRecord &Result,
+ std::vector<DeferredReplacement> &Replacements) {
+ assert(BasePtrs.size() == LiveVariables.size());
+ assert((UseDeoptBundles || isStatepoint(CS)) &&
"This method expects to be rewriting a statepoint");
- BasicBlock *BB = CS.getInstruction()->getParent();
- assert(BB);
- Function *F = BB->getParent();
- assert(F && "must be set");
- Module *M = F->getParent();
- (void)M;
- assert(M && "must be set");
-
- // We're not changing the function signature of the statepoint since the gc
- // arguments go into the var args section.
- Function *gc_statepoint_decl = CS.getCalledFunction();
-
// Then go ahead and use the builder do actually do the inserts. We insert
// immediately before the previous instruction under the assumption that all
// arguments will be available here. We can't insert afterwards since we may
// be replacing a terminator.
- Instruction *insertBefore = CS.getInstruction();
- IRBuilder<> Builder(insertBefore);
- // Copy all of the arguments from the original statepoint - this includes the
- // target, call args, and deopt args
- SmallVector<llvm::Value *, 64> args;
- args.insert(args.end(), CS.arg_begin(), CS.arg_end());
- // TODO: Clear the 'needs rewrite' flag
-
- // add all the pointers to be relocated (gc arguments)
- // Capture the start of the live variable list for use in the gc_relocates
- const int live_start = args.size();
- args.insert(args.end(), liveVariables.begin(), liveVariables.end());
+ Instruction *InsertBefore = CS.getInstruction();
+ IRBuilder<> Builder(InsertBefore);
+
+ ArrayRef<Value *> GCArgs(LiveVariables);
+ uint64_t StatepointID = 0xABCDEF00;
+ uint32_t NumPatchBytes = 0;
+ uint32_t Flags = uint32_t(StatepointFlags::None);
+
+ ArrayRef<Use> CallArgs;
+ ArrayRef<Use> DeoptArgs;
+ ArrayRef<Use> TransitionArgs;
+
+ Value *CallTarget = nullptr;
+
+ if (UseDeoptBundles) {
+ CallArgs = {CS.arg_begin(), CS.arg_end()};
+ DeoptArgs = GetDeoptBundleOperands(CS);
+ // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
+ // could have an operand bundle for that too.
+ AttributeSet OriginalAttrs = CS.getAttributes();
+
+ Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "statepoint-id");
+ if (AttrID.isStringAttribute())
+ AttrID.getValueAsString().getAsInteger(10, StatepointID);
+
+ Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
+ AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
+ if (AttrNumPatchBytes.isStringAttribute())
+ AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
+
+ CallTarget = CS.getCalledValue();
+ } else {
+ // This branch will be gone soon, and we will soon only support the
+ // UseDeoptBundles == true configuration.
+ Statepoint OldSP(CS);
+ StatepointID = OldSP.getID();
+ NumPatchBytes = OldSP.getNumPatchBytes();
+ Flags = OldSP.getFlags();
+
+ CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
+ DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
+ TransitionArgs = {OldSP.gc_transition_args_begin(),
+ OldSP.gc_transition_args_end()};
+ CallTarget = OldSP.getCalledValue();
+ }
// Create the statepoint given all the arguments
- Instruction *token = nullptr;
- AttributeSet return_attributes;
+ Instruction *Token = nullptr;
+ AttributeSet ReturnAttrs;
if (CS.isCall()) {
- CallInst *toReplace = cast<CallInst>(CS.getInstruction());
- CallInst *call =
- Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
- call->setTailCall(toReplace->isTailCall());
- call->setCallingConv(toReplace->getCallingConv());
+ CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
+ CallInst *Call = Builder.CreateGCStatepointCall(
+ StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
+ TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
+
+ Call->setTailCall(ToReplace->isTailCall());
+ Call->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
- // In case if we can handle this set of sttributes - set up function attrs
+ AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ // In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- call->setAttributes(new_attrs.getFnAttributes());
- return_attributes = new_attrs.getRetAttributes();
+ Call->setAttributes(NewAttrs.getFnAttributes());
+ ReturnAttrs = NewAttrs.getRetAttributes();
- token = call;
+ Token = Call;
// Put the following gc_result and gc_relocate calls immediately after the
// the old call (which we're about to delete)
- BasicBlock::iterator next(toReplace);
- assert(BB->end() != next && "not a terminator, must have next");
- next++;
- Instruction *IP = &*(next);
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(IP->getDebugLoc());
-
+ assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
+ Builder.SetInsertPoint(ToReplace->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
} else {
- InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
+ InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
// Insert the new invoke into the old block. We'll remove the old one in a
// moment at which point this will become the new terminator for the
// original block.
- InvokeInst *invoke = InvokeInst::Create(
- gc_statepoint_decl, toReplace->getNormalDest(),
- toReplace->getUnwindDest(), args, "", toReplace->getParent());
- invoke->setCallingConv(toReplace->getCallingConv());
+ InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
+ StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
+ ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
+ GCArgs, "statepoint_token");
+
+ Invoke->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
- // In case if we can handle this set of sttributes - set up function attrs
+ AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ // In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- invoke->setAttributes(new_attrs.getFnAttributes());
- return_attributes = new_attrs.getRetAttributes();
+ Invoke->setAttributes(NewAttrs.getFnAttributes());
+ ReturnAttrs = NewAttrs.getRetAttributes();
- token = invoke;
+ Token = Invoke;
// Generate gc relocates in exceptional path
- BasicBlock *unwindBlock = toReplace->getUnwindDest();
- assert(!isa<PHINode>(unwindBlock->begin()) &&
- unwindBlock->getUniquePredecessor() &&
+ BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+ assert(!isa<PHINode>(UnwindBlock->begin()) &&
+ UnwindBlock->getUniquePredecessor() &&
"can't safely insert in this block!");
- Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
+ Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
- // Extract second element from landingpad return value. We will attach
- // exceptional gc relocates to it.
- const unsigned idx = 1;
- Instruction *exceptional_token =
- cast<Instruction>(Builder.CreateExtractValue(
- unwindBlock->getLandingPadInst(), idx, "relocate_token"));
- result.UnwindToken = exceptional_token;
+ // Attach exceptional gc relocates to the landingpad.
+ Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
+ Result.UnwindToken = ExceptionalToken;
- // Just throw away return value. We will use the one we got for normal
- // block.
- (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
- exceptional_token, Builder);
+ const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
+ Builder);
// Generate gc relocates and returns for normal block
- BasicBlock *normalDest = toReplace->getNormalDest();
- assert(!isa<PHINode>(normalDest->begin()) &&
- normalDest->getUniquePredecessor() &&
+ BasicBlock *NormalDest = ToReplace->getNormalDest();
+ assert(!isa<PHINode>(NormalDest->begin()) &&
+ NormalDest->getUniquePredecessor() &&
"can't safely insert in this block!");
- IP = &*(normalDest->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
+ Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
// gc relocates will be generated later as if it were regular call
// statepoint
}
- assert(token);
-
- // Take the name of the original value call if it had one.
- token->takeName(CS.getInstruction());
+ assert(Token && "Should be set in one of the above branches!");
+
+ if (UseDeoptBundles) {
+ Token->setName("statepoint_token");
+ if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
+ StringRef Name =
+ CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
+ CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+ GCResult->setAttributes(CS.getAttributes().getRetAttributes());
+
+ // We cannot RAUW or delete CS.getInstruction() because it could be in the
+ // live set of some other safepoint, in which case that safepoint's
+ // PartiallyConstructedSafepointRecord will hold a raw pointer to this
+ // llvm::Instruction. Instead, we defer the replacement and deletion to
+ // after the live sets have been made explicit in the IR, and we no longer
+ // have raw pointers to worry about.
+ Replacements.emplace_back(CS.getInstruction(), GCResult);
+ } else {
+ Replacements.emplace_back(CS.getInstruction(), nullptr);
+ }
+ } else {
+ assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
+ "only valid use before rewrite is gc.result");
+ assert(!CS.getInstruction()->hasOneUse() ||
+ isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
-// The GCResult is already inserted, we just need to find it
-#ifndef NDEBUG
- Instruction *toReplace = CS.getInstruction();
- assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
- "only valid use before rewrite is gc.result");
- assert(!toReplace->hasOneUse() ||
- isGCResult(cast<Instruction>(*toReplace->user_begin())));
-#endif
+ // Take the name of the original statepoint token if there was one.
+ Token->takeName(CS.getInstruction());
- // Update the gc.result of the original statepoint (if any) to use the newly
- // inserted statepoint. This is safe to do here since the token can't be
- // considered a live reference.
- CS.getInstruction()->replaceAllUsesWith(token);
+ // Update the gc.result of the original statepoint (if any) to use the newly
+ // inserted statepoint. This is safe to do here since the token can't be
+ // considered a live reference.
+ CS.getInstruction()->replaceAllUsesWith(Token);
+ CS.getInstruction()->eraseFromParent();
+ }
- result.StatepointToken = token;
+ Result.StatepointToken = Token;
// Second, create a gc.relocate for every live variable
- CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
+ const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
}
namespace {
-struct name_ordering {
- Value *base;
- Value *derived;
- bool operator()(name_ordering const &a, name_ordering const &b) {
- return -1 == a.derived->getName().compare(b.derived->getName());
+struct NameOrdering {
+ Value *Base;
+ Value *Derived;
+
+ bool operator()(NameOrdering const &a, NameOrdering const &b) {
+ return -1 == a.Derived->getName().compare(b.Derived->getName());
}
};
}
-static void stablize_order(SmallVectorImpl<Value *> &basevec,
- SmallVectorImpl<Value *> &livevec) {
- assert(basevec.size() == livevec.size());
-
- SmallVector<name_ordering, 64> temp;
- for (size_t i = 0; i < basevec.size(); i++) {
- name_ordering v;
- v.base = basevec[i];
- v.derived = livevec[i];
- temp.push_back(v);
- }
- std::sort(temp.begin(), temp.end(), name_ordering());
- for (size_t i = 0; i < basevec.size(); i++) {
- basevec[i] = temp[i].base;
- livevec[i] = temp[i].derived;
+
+static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
+ SmallVectorImpl<Value *> &LiveVec) {
+ assert(BaseVec.size() == LiveVec.size());
+
+ SmallVector<NameOrdering, 64> Temp;
+ for (size_t i = 0; i < BaseVec.size(); i++) {
+ NameOrdering v;
+ v.Base = BaseVec[i];
+ v.Derived = LiveVec[i];
+ Temp.push_back(v);
+ }
+
+ std::sort(Temp.begin(), Temp.end(), NameOrdering());
+ for (size_t i = 0; i < BaseVec.size(); i++) {
+ BaseVec[i] = Temp[i].Base;
+ LiveVec[i] = Temp[i].Derived;
}
}
@@ -1401,40 +1603,39 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
-makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
- PartiallyConstructedSafepointRecord &result) {
- auto liveset = result.liveset;
- auto PointerToBase = result.PointerToBase;
+makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
+ PartiallyConstructedSafepointRecord &Result,
+ std::vector<DeferredReplacement> &Replacements) {
+ const auto &LiveSet = Result.LiveSet;
+ const auto &PointerToBase = Result.PointerToBase;
// Convert to vector for efficient cross referencing.
- SmallVector<Value *, 64> basevec, livevec;
- livevec.reserve(liveset.size());
- basevec.reserve(liveset.size());
- for (Value *L : liveset) {
- livevec.push_back(L);
-
- assert(PointerToBase.find(L) != PointerToBase.end());
- Value *base = PointerToBase[L];
- basevec.push_back(base);
+ SmallVector<Value *, 64> BaseVec, LiveVec;
+ LiveVec.reserve(LiveSet.size());
+ BaseVec.reserve(LiveSet.size());
+ for (Value *L : LiveSet) {
+ LiveVec.push_back(L);
+ assert(PointerToBase.count(L));
+ Value *Base = PointerToBase.find(L)->second;
+ BaseVec.push_back(Base);
}
- assert(livevec.size() == basevec.size());
+ assert(LiveVec.size() == BaseVec.size());
// To make the output IR slightly more stable (for use in diffs), ensure a
// fixed order of the values in the safepoint (by sorting the value name).
// The order is otherwise meaningless.
- stablize_order(basevec, livevec);
+ StabilizeOrder(BaseVec, LiveVec);
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
- CS.getInstruction()->eraseFromParent();
+ makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
}
// Helper function for the relocationViaAlloca.
-// It receives iterator to the statepoint gc relocates and emits store to the
-// assigned
-// location (via allocaMap) for the each one of them.
-// Add visited values into the visitedLiveValues set we will later use them
-// for sanity check.
+//
+// It receives iterator to the statepoint gc relocates and emits a store to the
+// assigned location (via allocaMap) for the each one of them. It adds the
+// visited values into the visitedLiveValues set, which we will later use them
+// for sanity checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, Value *> &AllocaMap,
@@ -1459,13 +1660,15 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
Value *Alloca = AllocaMap[OriginalValue];
// Emit store into the related alloca
- // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
+ // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
// the correct type according to alloca.
- assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
+ assert(RelocatedValue->getNextNode() &&
+ "Should always have one since it's not a terminator");
IRBuilder<> Builder(RelocatedValue->getNextNode());
Value *CastedRelocatedValue =
- Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
- RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
+ Builder.CreateBitCast(RelocatedValue,
+ cast<AllocaInst>(Alloca)->getAllocatedType(),
+ suffixed_name_or(RelocatedValue, ".casted", ""));
StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
@@ -1501,10 +1704,10 @@ insertRematerializationStores(
}
}
-/// do all the relocation update via allocas and mem2reg
+/// Do all the relocation update via allocas and mem2reg
static void relocationViaAlloca(
Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
- ArrayRef<struct PartiallyConstructedSafepointRecord> Records) {
+ ArrayRef<PartiallyConstructedSafepointRecord> Records) {
#ifndef NDEBUG
// record initial number of (static) allocas; we'll check we have the same
// number when we get done.
@@ -1531,15 +1734,12 @@ static void relocationViaAlloca(
PromotableAllocas.push_back(Alloca);
};
- // emit alloca for each live gc pointer
- for (unsigned i = 0; i < Live.size(); i++) {
- emitAllocaFor(Live[i]);
- }
-
- // emit allocas for rematerialized values
- for (size_t i = 0; i < Records.size(); i++) {
- const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ // Emit alloca for each live gc pointer
+ for (Value *V : Live)
+ emitAllocaFor(V);
+ // Emit allocas for rematerialized values
+ for (const auto &Info : Records)
for (auto RematerializedValuePair : Info.RematerializedValues) {
Value *OriginalValue = RematerializedValuePair.second;
if (AllocaMap.count(OriginalValue) != 0)
@@ -1548,20 +1748,17 @@ static void relocationViaAlloca(
emitAllocaFor(OriginalValue);
++NumRematerializedValues;
}
- }
// The next two loops are part of the same conceptual operation. We need to
// insert a store to the alloca after the original def and at each
// redefinition. We need to insert a load before each use. These are split
// into distinct loops for performance reasons.
- // update gc pointer after each statepoint
- // either store a relocated value or null (if no relocated value found for
- // this gc pointer and it is not a gc_result)
- // this must happen before we update the statepoint with load of alloca
- // otherwise we lose the link between statepoint and old def
- for (size_t i = 0; i < Records.size(); i++) {
- const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ // Update gc pointer after each statepoint: either store a relocated value or
+ // null (if no relocated value was found for this gc pointer and it is not a
+ // gc_result). This must happen before we update the statepoint with load of
+ // alloca otherwise we lose the link between statepoint and old def.
+ for (const auto &Info : Records) {
Value *Statepoint = Info.StatepointToken;
// This will be used for consistency check
@@ -1582,7 +1779,7 @@ static void relocationViaAlloca(
VisitedLiveValues);
if (ClobberNonLive) {
- // As a debuging aid, pretend that an unrelocated pointer becomes null at
+ // As a debugging aid, pretend that an unrelocated pointer becomes null at
// the gc.statepoint. This will turn some subtle GC problems into
// slightly easier to debug SEGVs. Note that on large IR files with
// lots of gc.statepoints this is extremely costly both memory and time
@@ -1612,23 +1809,22 @@ static void relocationViaAlloca(
// Insert the clobbering stores. These may get intermixed with the
// gc.results and gc.relocates, but that's fine.
if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
- InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
- InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+ InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
+ InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
} else {
- BasicBlock::iterator Next(cast<CallInst>(Statepoint));
- Next++;
- InsertClobbersAt(Next);
+ InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
}
}
}
- // update use with load allocas and add store for gc_relocated
+
+ // Update use with load allocas and add store for gc_relocated.
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
Value *Alloca = Pair.second;
- // we pre-record the uses of allocas so that we dont have to worry about
- // later update
- // that change the user information.
+ // We pre-record the uses of allocas so that we dont have to worry about
+ // later update that changes the user information..
+
SmallVector<Instruction *, 20> Uses;
// PERF: trade a linear scan for repeated reallocation
Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
@@ -1663,9 +1859,9 @@ static void relocationViaAlloca(
}
}
- // emit store for the initial gc value
- // store must be inserted after load, otherwise store will be in alloca's
- // use list and an extra load will be inserted before it
+ // Emit store for the initial gc value. Store must be inserted after load,
+ // otherwise store will be in alloca's use list and an extra load will be
+ // inserted before it.
StoreInst *Store = new StoreInst(Def, Alloca);
if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
@@ -1688,14 +1884,13 @@ static void relocationViaAlloca(
assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
if (!PromotableAllocas.empty()) {
- // apply mem2reg to promote alloca to SSA
+ // Apply mem2reg to promote alloca to SSA
PromoteMemToReg(PromotableAllocas, DT);
}
#ifndef NDEBUG
- for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
- I++)
- if (isa<AllocaInst>(*I))
+ for (auto &I : F.getEntryBlock())
+ if (isa<AllocaInst>(I))
InitialAllocaNum--;
assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
#endif
@@ -1719,28 +1914,27 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
// No values to hold live, might as well not insert the empty holder
return;
- Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+ Module *M = CS.getInstruction()->getModule();
// Use a dummy vararg function to actually hold the values live
Function *Func = cast<Function>(M->getOrInsertFunction(
"__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
if (CS.isCall()) {
// For call safepoints insert dummy calls right after safepoint
- BasicBlock::iterator Next(CS.getInstruction());
- Next++;
- Holders.push_back(CallInst::Create(Func, Values, "", Next));
+ Holders.push_back(CallInst::Create(Func, Values, "",
+ &*++CS.getInstruction()->getIterator()));
return;
}
// For invoke safepooints insert dummy calls both in normal and
// exceptional destination blocks
auto *II = cast<InvokeInst>(CS.getInstruction());
Holders.push_back(CallInst::Create(
- Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+ Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
Holders.push_back(CallInst::Create(
- Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
+ Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
}
static void findLiveReferences(
- Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
GCPtrLivenessData OriginalLivenessData;
computeLiveInValues(DT, F, OriginalLivenessData);
@@ -1751,12 +1945,12 @@ static void findLiveReferences(
}
}
-/// Remove any vector of pointers from the liveset by scalarizing them over the
-/// statepoint instruction. Adds the scalarized pieces to the liveset. It
-/// would be preferrable to include the vector in the statepoint itself, but
+/// Remove any vector of pointers from the live set by scalarizing them over the
+/// statepoint instruction. Adds the scalarized pieces to the live set. It
+/// would be preferable to include the vector in the statepoint itself, but
/// the lowering code currently does not handle that. Extending it would be
/// slightly non-trivial since it requires a format change. Given how rare
-/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+/// such cases are (for the moment?) scalarizing is an acceptable compromise.
static void splitVectorValues(Instruction *StatepointInst,
StatepointLiveSetTy &LiveSet,
DenseMap<Value *, Value *>& PointerToBase,
@@ -1887,7 +2081,7 @@ static void splitVectorValues(Instruction *StatepointInst,
// Helper function for the "rematerializeLiveValues". It walks use chain
// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
// values are visited (currently it is GEP's and casts). Returns true if it
-// sucessfully reached "BaseValue" and false otherwise.
+// successfully reached "BaseValue" and false otherwise.
// Fills "ChainToBase" array with all visited values. "BaseValue" is not
// recorded.
static bool findRematerializableChainToBasePointer(
@@ -1907,16 +2101,12 @@ static bool findRematerializableChainToBasePointer(
}
if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
- Value *Def = CI->stripPointerCasts();
-
- // This two checks are basically similar. First one is here for the
- // consistency with findBasePointers logic.
- assert(!isa<CastInst>(Def) && "not a pointer cast found");
if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
return false;
ChainToBase.push_back(CI);
- return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+ return findRematerializableChainToBasePointer(ChainToBase,
+ CI->getOperand(0), BaseValue);
}
// Not supported instruction in the chain
@@ -1957,8 +2147,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
return Cost;
}
-// From the statepoint liveset pick values that are cheaper to recompute then to
-// relocate. Remove this values from the liveset, rematerialize them after
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
// statepoint and record them in "Info" structure. Note that similar to
// relocated values we don't do any user adjustments here.
static void rematerializeLiveValues(CallSite CS,
@@ -1970,10 +2160,10 @@ static void rematerializeLiveValues(CallSite CS,
// We can not di this in following loop due to iterator invalidation.
SmallVector<Value *, 32> LiveValuesToBeDeleted;
- for (Value *LiveValue: Info.liveset) {
+ for (Value *LiveValue: Info.LiveSet) {
// For each live pointer find it's defining chain
SmallVector<Instruction *, 3> ChainToBase;
- assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+ assert(Info.PointerToBase.count(LiveValue));
bool FoundChain =
findRematerializableChainToBasePointer(ChainToBase,
LiveValue,
@@ -2059,9 +2249,9 @@ static void rematerializeLiveValues(CallSite CS,
InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
Instruction *NormalInsertBefore =
- Invoke->getNormalDest()->getFirstInsertionPt();
+ &*Invoke->getNormalDest()->getFirstInsertionPt();
Instruction *UnwindInsertBefore =
- Invoke->getUnwindDest()->getFirstInsertionPt();
+ &*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue =
rematerializeChain(NormalInsertBefore);
@@ -2075,22 +2265,23 @@ static void rematerializeLiveValues(CallSite CS,
// Remove rematerializaed values from the live set
for (auto LiveValue: LiveValuesToBeDeleted) {
- Info.liveset.erase(LiveValue);
+ Info.LiveSet.erase(LiveValue);
}
}
-static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
- SmallVectorImpl<CallSite> &toUpdate) {
+static bool insertParsePoints(Function &F, DominatorTree &DT,
+ TargetTransformInfo &TTI,
+ SmallVectorImpl<CallSite> &ToUpdate) {
#ifndef NDEBUG
// sanity check the input
- std::set<CallSite> uniqued;
- uniqued.insert(toUpdate.begin(), toUpdate.end());
- assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
+ std::set<CallSite> Uniqued;
+ Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
+ assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
- for (size_t i = 0; i < toUpdate.size(); i++) {
- CallSite &CS = toUpdate[i];
+ for (CallSite CS : ToUpdate) {
assert(CS.getInstruction()->getParent()->getParent() == &F);
- assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
+ assert((UseDeoptBundles || isStatepoint(CS)) &&
+ "expected to already be a deopt statepoint");
}
#endif
@@ -2098,50 +2289,45 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// the top of the successor blocks. See the comment on
// normalForInvokeSafepoint on exactly what is needed. Note that this step
// may restructure the CFG.
- for (CallSite CS : toUpdate) {
+ for (CallSite CS : ToUpdate) {
if (!CS.isInvoke())
continue;
- InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
- normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
- DT);
- normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
- DT);
+ auto *II = cast<InvokeInst>(CS.getInstruction());
+ normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
+ normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
}
// A list of dummy calls added to the IR to keep various values obviously
// live in the IR. We'll remove all of these when done.
- SmallVector<CallInst *, 64> holders;
+ SmallVector<CallInst *, 64> Holders;
// Insert a dummy call with all of the arguments to the vm_state we'll need
// for the actual safepoint insertion. This ensures reference arguments in
// the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
- for (size_t i = 0; i < toUpdate.size(); i++) {
- CallSite &CS = toUpdate[i];
- Statepoint StatepointCS(CS);
-
+ for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
- for (Use &U : StatepointCS.vm_state_args()) {
- Value *Arg = cast<Value>(&U);
+
+ iterator_range<const Use *> DeoptStateRange =
+ UseDeoptBundles
+ ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
+ : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
+
+ for (Value *Arg : DeoptStateRange) {
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
- insertUseHolderAfter(CS, DeoptValues, holders);
- }
- SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
- records.reserve(toUpdate.size());
- for (size_t i = 0; i < toUpdate.size(); i++) {
- struct PartiallyConstructedSafepointRecord info;
- records.push_back(info);
+ insertUseHolderAfter(CS, DeoptValues, Holders);
}
- assert(records.size() == toUpdate.size());
- // A) Identify all gc pointers which are staticly live at the given call
+ SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
+
+ // A) Identify all gc pointers which are statically live at the given call
// site.
- findLiveReferences(F, DT, P, toUpdate, records);
+ findLiveReferences(F, DT, ToUpdate, Records);
// B) Find the base pointers for each live pointer
/* scope for caching */ {
@@ -2150,10 +2336,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// large numbers of duplicate base_phis.
DefiningValueMapTy DVCache;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
- findBasePointers(DT, DVCache, CS, info);
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &info = Records[i];
+ findBasePointers(DT, DVCache, ToUpdate[i], info);
}
} // end of cache scope
@@ -2170,63 +2355,75 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// the base pointers which were identified for that safepoint. We'll then
// ask liveness for _every_ base inserted to see what is now live. Then we
// remove the dummy calls.
- holders.reserve(holders.size() + records.size());
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
+ Holders.reserve(Holders.size() + Records.size());
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
SmallVector<Value *, 128> Bases;
- for (auto Pair : info.PointerToBase) {
+ for (auto Pair : Info.PointerToBase)
Bases.push_back(Pair.second);
- }
- insertUseHolderAfter(CS, Bases, holders);
+
+ insertUseHolderAfter(ToUpdate[i], Bases, Holders);
}
// By selecting base pointers, we've effectively inserted new uses. Thus, we
// need to rerun liveness. We may *also* have inserted new defs, but that's
// not the key issue.
- recomputeLiveInValues(F, DT, P, toUpdate, records);
+ recomputeLiveInValues(F, DT, ToUpdate, Records);
if (PrintBasePointers) {
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
+ for (auto &Info : Records) {
errs() << "Base Pairs: (w/Relocation)\n";
- for (auto Pair : info.PointerToBase) {
- errs() << " derived %" << Pair.first->getName() << " base %"
- << Pair.second->getName() << "\n";
+ for (auto Pair : Info.PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
}
}
}
- for (size_t i = 0; i < holders.size(); i++) {
- holders[i]->eraseFromParent();
- holders[i] = nullptr;
- }
- holders.clear();
+
+ // It is possible that non-constant live variables have a constant base. For
+ // example, a GEP with a variable offset from a global. In this case we can
+ // remove it from the liveset. We already don't add constants to the liveset
+ // because we assume they won't move at runtime and the GC doesn't need to be
+ // informed about them. The same reasoning applies if the base is constant.
+ // Note that the relocation placement code relies on this filtering for
+ // correctness as it expects the base to be in the liveset, which isn't true
+ // if the base is constant.
+ for (auto &Info : Records)
+ for (auto &BasePair : Info.PointerToBase)
+ if (isa<Constant>(BasePair.second))
+ Info.LiveSet.erase(BasePair.first);
+
+ for (CallInst *CI : Holders)
+ CI->eraseFromParent();
+
+ Holders.clear();
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
// the cost of some possible performance loss. TODO: it would be nice to
// natively support vectors all the way through the backend so we don't need
// to scalarize here.
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- Instruction *statepoint = toUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(statepoint), info.liveset,
- info.PointerToBase, DT);
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
}
// In order to reduce live set of statepoint we might choose to rematerialize
- // some values instead of relocating them. This is purelly an optimization and
+ // some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
- TargetTransformInfo &TTI =
- P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ for (size_t i = 0; i < Records.size(); i++)
+ rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
-
- rematerializeLiveValues(CS, info, TTI);
- }
+ // We need this to safely RAUW and delete call or invoke return values that
+ // may themselves be live over a statepoint. For details, please see usage in
+ // makeStatepointExplicitImpl.
+ std::vector<DeferredReplacement> Replacements;
// Now run through and replace the existing statepoints with new ones with
// the live variables listed. We do not yet update uses of the values being
@@ -2234,61 +2431,77 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// survive to the last iteration of this loop. (By construction, the
// previous statepoint can not be a live variable, thus we can and remove
// the old statepoint calls as we go.)
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
- makeStatepointExplicit(DT, CS, P, info);
+ for (size_t i = 0; i < Records.size(); i++)
+ makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+
+ ToUpdate.clear(); // prevent accident use of invalid CallSites
+
+ for (auto &PR : Replacements)
+ PR.doReplacement();
+
+ Replacements.clear();
+
+ for (auto &Info : Records) {
+ // These live sets may contain state Value pointers, since we replaced calls
+ // with operand bundles with calls wrapped in gc.statepoint, and some of
+ // those calls may have been def'ing live gc pointers. Clear these out to
+ // avoid accidentally using them.
+ //
+ // TODO: We should create a separate data structure that does not contain
+ // these live sets, and migrate to using that data structure from this point
+ // onward.
+ Info.LiveSet.clear();
+ Info.PointerToBase.clear();
}
- toUpdate.clear(); // prevent accident use of invalid CallSites
// Do all the fixups of the original live variables to their relocated selves
- SmallVector<Value *, 128> live;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
+ SmallVector<Value *, 128> Live;
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+
// We can't simply save the live set from the original insertion. One of
// the live values might be the result of a call which needs a safepoint.
// That Value* no longer exists and we need to use the new gc_result.
- // Thankfully, the liveset is embedded in the statepoint (and updated), so
+ // Thankfully, the live set is embedded in the statepoint (and updated), so
// we just grab that.
- Statepoint statepoint(info.StatepointToken);
- live.insert(live.end(), statepoint.gc_args_begin(),
- statepoint.gc_args_end());
+ Statepoint Statepoint(Info.StatepointToken);
+ Live.insert(Live.end(), Statepoint.gc_args_begin(),
+ Statepoint.gc_args_end());
#ifndef NDEBUG
// Do some basic sanity checks on our liveness results before performing
// relocation. Relocation can and will turn mistakes in liveness results
// into non-sensical code which is must harder to debug.
// TODO: It would be nice to test consistency as well
- assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+ assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
"statepoint must be reachable or liveness is meaningless");
- for (Value *V : statepoint.gc_args()) {
+ for (Value *V : Statepoint.gc_args()) {
if (!isa<Instruction>(V))
// Non-instruction values trivial dominate all possible uses
continue;
- auto LiveInst = cast<Instruction>(V);
+ auto *LiveInst = cast<Instruction>(V);
assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
"unreachable values should never be live");
- assert(DT.dominates(LiveInst, info.StatepointToken) &&
+ assert(DT.dominates(LiveInst, Info.StatepointToken) &&
"basic SSA liveness expectation violated by liveness analysis");
}
#endif
}
- unique_unsorted(live);
+ unique_unsorted(Live);
#ifndef NDEBUG
// sanity check
- for (auto ptr : live) {
- assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
- }
+ for (auto *Ptr : Live)
+ assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
#endif
- relocationViaAlloca(F, DT, live, records);
- return !records.empty();
+ relocationViaAlloca(F, DT, Live, Records);
+ return !Records.empty();
}
// Handles both return values and arguments for Functions and CallSites.
template <typename AttrHolder>
-static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
- unsigned Index) {
+static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
+ unsigned Index) {
AttrBuilder R;
if (AH.getDereferenceableBytes(Index))
R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
@@ -2296,6 +2509,8 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
if (AH.getDereferenceableOrNullBytes(Index))
R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
AH.getDereferenceableOrNullBytes(Index)));
+ if (AH.doesNotAlias(Index))
+ R.addAttribute(Attribute::NoAlias);
if (!R.empty())
AH.setAttributes(AH.getAttributes().removeAttributes(
@@ -2303,25 +2518,25 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
}
void
-RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) {
+RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
LLVMContext &Ctx = F.getContext();
for (Argument &A : F.args())
if (isa<PointerType>(A.getType()))
- RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1);
+ RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
if (isa<PointerType>(F.getReturnType()))
- RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
}
-void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
+void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
if (F.empty())
return;
LLVMContext &Ctx = F.getContext();
MDBuilder Builder(Ctx);
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
bool IsImmutableTBAA =
@@ -2344,9 +2559,9 @@ void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
if (CallSite CS = CallSite(&I)) {
for (int i = 0, e = CS.arg_size(); i != e; i++)
if (isa<PointerType>(CS.getArgument(i)->getType()))
- RemoveDerefAttrAtIndex(Ctx, CS, i + 1);
+ RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
if (isa<PointerType>(CS.getType()))
- RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
}
}
}
@@ -2365,17 +2580,17 @@ static bool shouldRewriteStatepointsIn(Function &F) {
return false;
}
-void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) {
+void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
#ifndef NDEBUG
assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
"precondition!");
#endif
for (Function &F : M)
- stripDereferenceabilityInfoFromPrototype(F);
+ stripNonValidAttributesFromPrototype(F);
for (Function &F : M)
- stripDereferenceabilityInfoFromBody(F);
+ stripNonValidAttributesFromBody(F);
}
bool RewriteStatepointsForGC::runOnFunction(Function &F) {
@@ -2389,15 +2604,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ auto NeedsRewrite = [](Instruction &I) {
+ if (UseDeoptBundles) {
+ if (ImmutableCallSite CS = ImmutableCallSite(&I))
+ return !callsGCLeafFunction(CS);
+ return false;
+ }
+
+ return isStatepoint(I);
+ };
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
// when rewriting. We'll delete the unreachable ones in a moment.
SmallVector<CallSite, 64> ParsePointNeeded;
bool HasUnreachableStatepoint = false;
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
// TODO: only the ones with the flag set!
- if (isStatepoint(I)) {
+ if (NeedsRewrite(I)) {
if (DT.isReachableFromEntry(I.getParent()))
ParsePointNeeded.push_back(CallSite(&I));
else
@@ -2428,7 +2655,38 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
FoldSingleEntryPHINodes(&BB);
}
- MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+ // Before we start introducing relocations, we want to tweak the IR a bit to
+ // avoid unfortunate code generation effects. The main example is that we
+ // want to try to make sure the comparison feeding a branch is after any
+ // safepoints. Otherwise, we end up with a comparison of pre-relocation
+ // values feeding a branch after relocation. This is semantically correct,
+ // but results in extra register pressure since both the pre-relocation and
+ // post-relocation copies must be available in registers. For code without
+ // relocations this is handled elsewhere, but teaching the scheduler to
+ // reverse the transform we're about to do would be slightly complex.
+ // Note: This may extend the live range of the inputs to the icmp and thus
+ // increase the liveset of any statepoint we move over. This is profitable
+ // as long as all statepoints are in rare blocks. If we had in-register
+ // lowering for live values this would be a much safer transform.
+ auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
+ if (auto *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional())
+ return dyn_cast<Instruction>(BI->getCondition());
+ // TODO: Extend this to handle switches
+ return nullptr;
+ };
+ for (BasicBlock &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
+ if (auto *Cond = getConditionInst(TI))
+ // TODO: Handle more than just ICmps here. We should be able to move
+ // most instructions without side effects or memory access.
+ if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
+ MadeChange = true;
+ Cond->moveBefore(TI);
+ }
+ }
+
+ MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
return MadeChange;
}
@@ -2461,7 +2719,7 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
"support for FCA unimplemented");
if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
// The choice to exclude all things constant here is slightly subtle.
- // There are two idependent reasons:
+ // There are two independent reasons:
// - We assume that things which are constant (from LLVM's definition)
// do not move at runtime. For example, the address of a global
// variable is fixed, even though it's contents may not be.
@@ -2599,7 +2857,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
} // while( !worklist.empty() )
#ifndef NDEBUG
- // Sanity check our ouput against SSA properties. This helps catch any
+ // Sanity check our output against SSA properties. This helps catch any
// missing kills during the above iteration.
for (BasicBlock &BB : F) {
checkBasicSSA(DT, Data, BB);
@@ -2620,7 +2878,7 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
// call result is not live (normal), nor are it's arguments
// (unless they're used again later). This adjustment is
// specifically what we need to relocate
- BasicBlock::reverse_iterator rend(Inst);
+ BasicBlock::reverse_iterator rend(Inst->getIterator());
computeLiveInValues(BB->rbegin(), rend, LiveOut);
LiveOut.erase(Inst);
Out.insert(LiveOut.begin(), LiveOut.end());
@@ -2669,5 +2927,5 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
assert(Updated.count(KVPair.first) && "record for non-live value");
#endif
- Info.liveset = Updated;
+ Info.LiveSet = Updated;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 4d3a708..2fca803 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
@@ -479,6 +480,13 @@ private:
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+ void visitFuncletPadInst(FuncletPadInst &FPI) {
+ markAnythingOverdefined(&FPI);
+ }
+ void visitCatchSwitchInst(CatchSwitchInst &CPI) {
+ markAnythingOverdefined(&CPI);
+ visitTerminatorInst(CPI);
+ }
// Instructions that cannot be folded away.
void visitStoreInst (StoreInst &I);
@@ -539,9 +547,9 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
}
- if (isa<InvokeInst>(TI)) {
- // Invoke instructions successors are always executable.
- Succs[0] = Succs[1] = true;
+ // Unwinding instructions successors are always executable.
+ if (TI.isExceptional()) {
+ Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -605,8 +613,8 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
return BI->getSuccessor(CI->isZero()) == To;
}
- // Invoke instructions successors are always executable.
- if (isa<InvokeInst>(TI))
+ // Unwinding instructions successors are always executable.
+ if (TI->isExceptional())
return true;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -630,7 +638,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(nullptr);
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
// visit Implementations - Something changed in this instruction, either an
@@ -1126,7 +1134,7 @@ CallOverdefined:
// entry block executable and merge in the actual arguments to the call into
// the formal arguments of the function.
if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
- MarkBlockExecutable(F->begin());
+ MarkBlockExecutable(&F->front());
// Propagate information from this call site into the callee.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1135,17 +1143,17 @@ CallOverdefined:
// If this argument is byval, and if the function is not readonly, there
// will be an implicit copy formed of the input aggregate.
if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
- markOverdefined(AI);
+ markOverdefined(&*AI);
continue;
}
if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal CallArg = getStructValueState(*CAI, i);
- mergeInValue(getStructValueState(AI, i), AI, CallArg);
+ mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg);
}
} else {
- mergeInValue(AI, getValueState(*CAI));
+ mergeInValue(&*AI, getValueState(*CAI));
}
}
}
@@ -1246,18 +1254,18 @@ void SCCPSolver::Solve() {
/// even if X isn't defined.
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BBExecutable.count(BB))
+ if (!BBExecutable.count(&*BB))
continue;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (Instruction &I : *BB) {
// Look for instructions which produce undef values.
- if (I->getType()->isVoidTy()) continue;
+ if (I.getType()->isVoidTy()) continue;
- if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(I.getType())) {
// Only a few things that can be structs matter for undef.
// Tracked calls must never be marked overdefined in ResolvedUndefsIn.
- if (CallSite CS = CallSite(I))
+ if (CallSite CS = CallSite(&I))
if (Function *F = CS.getCalledFunction())
if (MRVFunctionsTracked.count(F))
continue;
@@ -1270,14 +1278,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Send the results of everything else to overdefined. We could be
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- LatticeVal &LV = getStructValueState(I, i);
+ LatticeVal &LV = getStructValueState(&I, i);
if (LV.isUndefined())
- markOverdefined(LV, I);
+ markOverdefined(LV, &I);
}
continue;
}
- LatticeVal &LV = getValueState(I);
+ LatticeVal &LV = getValueState(&I);
if (!LV.isUndefined()) continue;
// extractvalue is safe; check here because the argument is a struct.
@@ -1287,24 +1295,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Compute the operand LatticeVals, for convenience below.
// Anything taking a struct is conservatively assumed to require
// overdefined markings.
- if (I->getOperand(0)->getType()->isStructTy()) {
- markOverdefined(I);
+ if (I.getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(&I);
return true;
}
- LatticeVal Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op0LV = getValueState(I.getOperand(0));
LatticeVal Op1LV;
- if (I->getNumOperands() == 2) {
- if (I->getOperand(1)->getType()->isStructTy()) {
- markOverdefined(I);
+ if (I.getNumOperands() == 2) {
+ if (I.getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(&I);
return true;
}
- Op1LV = getValueState(I->getOperand(1));
+ Op1LV = getValueState(I.getOperand(1));
}
// If this is an instructions whose result is defined even if the input is
// not fully defined, propagate the information.
- Type *ITy = I->getType();
- switch (I->getOpcode()) {
+ Type *ITy = I.getType();
+ switch (I.getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Trunc:
@@ -1318,9 +1326,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::FRem:
// Floating-point binary operation: be conservative.
if (Op0LV.isUndefined() && Op1LV.isUndefined())
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
else
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::ZExt:
case Instruction::SExt:
@@ -1332,7 +1340,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::SIToFP:
case Instruction::UIToFP:
// undef -> 0; some outputs are impossible
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Mul:
case Instruction::And:
@@ -1341,7 +1349,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Or:
@@ -1349,7 +1357,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (Op0LV.isUndefined() && Op1LV.isUndefined())
break;
// undef | X -> -1. X could be -1.
- markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ markForcedConstant(&I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::Xor:
@@ -1357,7 +1365,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// necessary, but we try to be nice to people who expect this
// behavior in simple cases
if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
}
// undef ^ X -> undef
@@ -1373,7 +1381,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::AShr:
@@ -1381,7 +1389,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (Op1LV.isUndefined()) break;
// undef >>a X -> all ones
- markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ markForcedConstant(&I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::LShr:
case Instruction::Shl:
@@ -1391,17 +1399,17 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef << X -> 0
// undef >> X -> 0
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Select:
- Op1LV = getValueState(I->getOperand(1));
+ Op1LV = getValueState(I.getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
if (Op0LV.isUndefined()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
- Op1LV = getValueState(I->getOperand(2));
+ Op1LV = getValueState(I.getOperand(2));
} else if (Op1LV.isUndefined()) {
// c ? undef : undef -> undef. No change.
- Op1LV = getValueState(I->getOperand(2));
+ Op1LV = getValueState(I.getOperand(2));
if (Op1LV.isUndefined())
break;
// Otherwise, c ? undef : x -> x.
@@ -1410,9 +1418,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
if (Op1LV.isConstant())
- markForcedConstant(I, Op1LV.getConstant());
+ markForcedConstant(&I, Op1LV.getConstant());
else
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::Load:
// A load here means one of two things: a load of undef from a global,
@@ -1421,9 +1429,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
break;
case Instruction::ICmp:
// X == undef -> undef. Other comparisons get more complicated.
- if (cast<ICmpInst>(I)->isEquality())
+ if (cast<ICmpInst>(&I)->isEquality())
break;
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::Call:
case Instruction::Invoke: {
@@ -1432,19 +1440,19 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// 2. It could be constant-foldable.
// Because of the way we solve return values, tracked calls must
// never be marked overdefined in ResolvedUndefsIn.
- if (Function *F = CallSite(I).getCalledFunction())
+ if (Function *F = CallSite(&I).getCalledFunction())
if (TrackedRetVals.count(F))
break;
// If the call is constant-foldable, we mark it overdefined because
// we do not know what return values are valid.
- markOverdefined(I);
+ markOverdefined(&I);
return true;
}
default:
// If we don't know what should happen here, conservatively mark it
// overdefined.
- markOverdefined(I);
+ markOverdefined(&I);
return true;
}
}
@@ -1462,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// false.
if (isa<UndefValue>(BI->getCondition())) {
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
- markEdgeExecutable(BB, TI->getSuccessor(1));
+ markEdgeExecutable(&*BB, TI->getSuccessor(1));
return true;
}
@@ -1484,7 +1492,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
SI->setCondition(SI->case_begin().getCaseValue());
- markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
+ markEdgeExecutable(&*BB, SI->case_begin().getCaseSuccessor());
return true;
}
@@ -1506,6 +1514,7 @@ namespace {
struct SCCP : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
@@ -1541,11 +1550,10 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
while (EndInst != BB->begin()) {
// Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
+ Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
+ if (Inst->isEHPad()) {
EndInst = Inst;
continue;
}
@@ -1568,11 +1576,11 @@ bool SCCP::runOnFunction(Function &F) {
SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
- Solver.MarkBlockExecutable(F.begin());
+ Solver.MarkBlockExecutable(&F.front());
// Mark all arguments to the function as being overdefined.
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
- Solver.markAnythingOverdefined(AI);
+ for (Argument &AI : F.args())
+ Solver.markAnythingOverdefined(&AI);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1589,8 +1597,8 @@ bool SCCP::runOnFunction(Function &F) {
// as we cannot modify the CFG of the function.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(BB)) {
- DeleteInstructionInBlock(BB);
+ if (!Solver.isBlockExecutable(&*BB)) {
+ DeleteInstructionInBlock(&*BB);
MadeChanges = true;
continue;
}
@@ -1599,7 +1607,7 @@ bool SCCP::runOnFunction(Function &F) {
// constants if we have found them to be of constant values.
//
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
- Instruction *Inst = BI++;
+ Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
continue;
@@ -1713,36 +1721,34 @@ bool IPSCCP::runOnModule(Module &M) {
// If this is a strong or ODR definition of this function, then we can
// propagate information about its result into callsites of it.
if (!F->mayBeOverridden())
- Solver.AddTrackedFunction(F);
+ Solver.AddTrackedFunction(&*F);
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
if (F->hasLocalLinkage()) {
- if (AddressIsTaken(F))
- AddressTakenFunctions.insert(F);
+ if (AddressIsTaken(&*F))
+ AddressTakenFunctions.insert(&*F);
else {
- Solver.AddArgumentTrackedFunction(F);
+ Solver.AddArgumentTrackedFunction(&*F);
continue;
}
}
// Assume the function is called.
- Solver.MarkBlockExecutable(F->begin());
+ Solver.MarkBlockExecutable(&F->front());
// Assume nothing about the incoming arguments.
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI)
- Solver.markAnythingOverdefined(AI);
+ for (Argument &AI : F->args())
+ Solver.markAnythingOverdefined(&AI);
}
// Loop over global variables. We inform the solver about any internal global
// variables that do not have their 'addresses taken'. If they don't have
// their addresses taken, we can propagate constants through them.
- for (Module::global_iterator G = M.global_begin(), E = M.global_end();
- G != E; ++G)
- if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
- Solver.TrackValueOfGlobalVariable(G);
+ for (GlobalVariable &G : M.globals())
+ if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G))
+ Solver.TrackValueOfGlobalVariable(&G);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1763,7 +1769,10 @@ bool IPSCCP::runOnModule(Module &M) {
SmallVector<BasicBlock*, 512> BlocksToErase;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (Solver.isBlockExecutable(F->begin())) {
+ if (F->isDeclaration())
+ continue;
+
+ if (Solver.isBlockExecutable(&F->front())) {
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
if (AI->use_empty() || AI->getType()->isStructTy()) continue;
@@ -1771,7 +1780,7 @@ bool IPSCCP::runOnModule(Module &M) {
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
- LatticeVal IV = Solver.getLatticeValueFor(AI);
+ LatticeVal IV = Solver.getLatticeValueFor(&*AI);
if (IV.isOverdefined()) continue;
Constant *CST = IV.isConstant() ?
@@ -1786,28 +1795,27 @@ bool IPSCCP::runOnModule(Module &M) {
}
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(BB)) {
- DeleteInstructionInBlock(BB);
+ if (!Solver.isBlockExecutable(&*BB)) {
+ DeleteInstructionInBlock(&*BB);
MadeChanges = true;
TerminatorInst *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = TI->getSuccessor(i);
+ for (BasicBlock *Succ : TI->successors()) {
if (!Succ->empty() && isa<PHINode>(Succ->begin()))
- TI->getSuccessor(i)->removePredecessor(BB);
+ Succ->removePredecessor(&*BB);
}
if (!TI->use_empty())
TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
TI->eraseFromParent();
- new UnreachableInst(M.getContext(), BB);
+ new UnreachableInst(M.getContext(), &*BB);
if (&*BB != &F->front())
- BlocksToErase.push_back(BB);
+ BlocksToErase.push_back(&*BB);
continue;
}
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
- Instruction *Inst = BI++;
+ Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
continue;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 947513a..a7361b5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -23,12 +23,12 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -37,8 +37,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -53,9 +51,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#if __cplusplus >= 201103L && !defined(NDEBUG)
// We only use this for a debug check in C++11
@@ -63,6 +61,7 @@
#endif
using namespace llvm;
+using namespace llvm::sroa;
#define DEBUG_TYPE "sroa"
@@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
-/// Hidden option to force the pass to not use DomTree and mem2reg, instead
-/// forming SSA values through the SSAUpdater infrastructure.
-static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false),
- cl::Hidden);
-
/// Hidden option to enable randomly shuffling the slices to help uncover
/// instability in their order.
static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
@@ -205,7 +199,6 @@ template <typename T> struct isPodLike;
template <> struct isPodLike<Slice> { static const bool value = true; };
}
-namespace {
/// \brief Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
@@ -213,7 +206,7 @@ namespace {
/// for the slices used and we reflect that in this structure. The uses are
/// stored, sorted by increasing beginning offset and with unsplittable slices
/// starting at a particular offset before splittable slices.
-class AllocaSlices {
+class llvm::sroa::AllocaSlices {
public:
/// \brief Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
@@ -253,281 +246,10 @@ public:
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
}
- // Forward declare an iterator to befriend it.
+ // Forward declare the iterator and range accessor for walking the
+ // partitions.
class partition_iterator;
-
- /// \brief A partition of the slices.
- ///
- /// An ephemeral representation for a range of slices which can be viewed as
- /// a partition of the alloca. This range represents a span of the alloca's
- /// memory which cannot be split, and provides access to all of the slices
- /// overlapping some part of the partition.
- ///
- /// Objects of this type are produced by traversing the alloca's slices, but
- /// are only ephemeral and not persistent.
- class Partition {
- private:
- friend class AllocaSlices;
- friend class AllocaSlices::partition_iterator;
-
- /// \brief The begining and ending offsets of the alloca for this partition.
- uint64_t BeginOffset, EndOffset;
-
- /// \brief The start end end iterators of this partition.
- iterator SI, SJ;
-
- /// \brief A collection of split slice tails overlapping the partition.
- SmallVector<Slice *, 4> SplitTails;
-
- /// \brief Raw constructor builds an empty partition starting and ending at
- /// the given iterator.
- Partition(iterator SI) : SI(SI), SJ(SI) {}
-
- public:
- /// \brief The start offset of this partition.
- ///
- /// All of the contained slices start at or after this offset.
- uint64_t beginOffset() const { return BeginOffset; }
-
- /// \brief The end offset of this partition.
- ///
- /// All of the contained slices end at or before this offset.
- uint64_t endOffset() const { return EndOffset; }
-
- /// \brief The size of the partition.
- ///
- /// Note that this can never be zero.
- uint64_t size() const {
- assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
- return EndOffset - BeginOffset;
- }
-
- /// \brief Test whether this partition contains no slices, and merely spans
- /// a region occupied by split slices.
- bool empty() const { return SI == SJ; }
-
- /// \name Iterate slices that start within the partition.
- /// These may be splittable or unsplittable. They have a begin offset >= the
- /// partition begin offset.
- /// @{
- // FIXME: We should probably define a "concat_iterator" helper and use that
- // to stitch together pointee_iterators over the split tails and the
- // contiguous iterators of the partition. That would give a much nicer
- // interface here. We could then additionally expose filtered iterators for
- // split, unsplit, and unsplittable splices based on the usage patterns.
- iterator begin() const { return SI; }
- iterator end() const { return SJ; }
- /// @}
-
- /// \brief Get the sequence of split slice tails.
- ///
- /// These tails are of slices which start before this partition but are
- /// split and overlap into the partition. We accumulate these while forming
- /// partitions.
- ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
- };
-
- /// \brief An iterator over partitions of the alloca's slices.
- ///
- /// This iterator implements the core algorithm for partitioning the alloca's
- /// slices. It is a forward iterator as we don't support backtracking for
- /// efficiency reasons, and re-use a single storage area to maintain the
- /// current set of split slices.
- ///
- /// It is templated on the slice iterator type to use so that it can operate
- /// with either const or non-const slice iterators.
- class partition_iterator
- : public iterator_facade_base<partition_iterator,
- std::forward_iterator_tag, Partition> {
- friend class AllocaSlices;
-
- /// \brief Most of the state for walking the partitions is held in a class
- /// with a nice interface for examining them.
- Partition P;
-
- /// \brief We need to keep the end of the slices to know when to stop.
- AllocaSlices::iterator SE;
-
- /// \brief We also need to keep track of the maximum split end offset seen.
- /// FIXME: Do we really?
- uint64_t MaxSplitSliceEndOffset;
-
- /// \brief Sets the partition to be empty at given iterator, and sets the
- /// end iterator.
- partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
- : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
- // If not already at the end, advance our state to form the initial
- // partition.
- if (SI != SE)
- advance();
- }
-
- /// \brief Advance the iterator to the next partition.
- ///
- /// Requires that the iterator not be at the end of the slices.
- void advance() {
- assert((P.SI != SE || !P.SplitTails.empty()) &&
- "Cannot advance past the end of the slices!");
-
- // Clear out any split uses which have ended.
- if (!P.SplitTails.empty()) {
- if (P.EndOffset >= MaxSplitSliceEndOffset) {
- // If we've finished all splits, this is easy.
- P.SplitTails.clear();
- MaxSplitSliceEndOffset = 0;
- } else {
- // Remove the uses which have ended in the prior partition. This
- // cannot change the max split slice end because we just checked that
- // the prior partition ended prior to that max.
- P.SplitTails.erase(
- std::remove_if(
- P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
- P.SplitTails.end());
- assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() == MaxSplitSliceEndOffset;
- }) &&
- "Could not find the current max split slice offset!");
- assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() <= MaxSplitSliceEndOffset;
- }) &&
- "Max split slice end offset is not actually the max!");
- }
- }
-
- // If P.SI is already at the end, then we've cleared the split tail and
- // now have an end iterator.
- if (P.SI == SE) {
- assert(P.SplitTails.empty() && "Failed to clear the split slices!");
- return;
- }
-
- // If we had a non-empty partition previously, set up the state for
- // subsequent partitions.
- if (P.SI != P.SJ) {
- // Accumulate all the splittable slices which started in the old
- // partition into the split list.
- for (Slice &S : P)
- if (S.isSplittable() && S.endOffset() > P.EndOffset) {
- P.SplitTails.push_back(&S);
- MaxSplitSliceEndOffset =
- std::max(S.endOffset(), MaxSplitSliceEndOffset);
- }
-
- // Start from the end of the previous partition.
- P.SI = P.SJ;
-
- // If P.SI is now at the end, we at most have a tail of split slices.
- if (P.SI == SE) {
- P.BeginOffset = P.EndOffset;
- P.EndOffset = MaxSplitSliceEndOffset;
- return;
- }
-
- // If the we have split slices and the next slice is after a gap and is
- // not splittable immediately form an empty partition for the split
- // slices up until the next slice begins.
- if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
- !P.SI->isSplittable()) {
- P.BeginOffset = P.EndOffset;
- P.EndOffset = P.SI->beginOffset();
- return;
- }
- }
-
- // OK, we need to consume new slices. Set the end offset based on the
- // current slice, and step SJ past it. The beginning offset of the
- // parttion is the beginning offset of the next slice unless we have
- // pre-existing split slices that are continuing, in which case we begin
- // at the prior end offset.
- P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
- P.EndOffset = P.SI->endOffset();
- ++P.SJ;
-
- // There are two strategies to form a partition based on whether the
- // partition starts with an unsplittable slice or a splittable slice.
- if (!P.SI->isSplittable()) {
- // When we're forming an unsplittable region, it must always start at
- // the first slice and will extend through its end.
- assert(P.BeginOffset == P.SI->beginOffset());
-
- // Form a partition including all of the overlapping slices with this
- // unsplittable slice.
- while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
- if (!P.SJ->isSplittable())
- P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
- ++P.SJ;
- }
-
- // We have a partition across a set of overlapping unsplittable
- // partitions.
- return;
- }
-
- // If we're starting with a splittable slice, then we need to form
- // a synthetic partition spanning it and any other overlapping splittable
- // splices.
- assert(P.SI->isSplittable() && "Forming a splittable partition!");
-
- // Collect all of the overlapping splittable slices.
- while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
- P.SJ->isSplittable()) {
- P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
- ++P.SJ;
- }
-
- // Back upiP.EndOffset if we ended the span early when encountering an
- // unsplittable slice. This synthesizes the early end offset of
- // a partition spanning only splittable slices.
- if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
- assert(!P.SJ->isSplittable());
- P.EndOffset = P.SJ->beginOffset();
- }
- }
-
- public:
- bool operator==(const partition_iterator &RHS) const {
- assert(SE == RHS.SE &&
- "End iterators don't match between compared partition iterators!");
-
- // The observed positions of partitions is marked by the P.SI iterator and
- // the emptyness of the split slices. The latter is only relevant when
- // P.SI == SE, as the end iterator will additionally have an empty split
- // slices list, but the prior may have the same P.SI and a tail of split
- // slices.
- if (P.SI == RHS.P.SI &&
- P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
- assert(P.SJ == RHS.P.SJ &&
- "Same set of slices formed two different sized partitions!");
- assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
- "Same slice position with differently sized non-empty split "
- "slice tails!");
- return true;
- }
- return false;
- }
-
- partition_iterator &operator++() {
- advance();
- return *this;
- }
-
- Partition &operator*() { return P; }
- };
-
- /// \brief A forward range over the partitions of the alloca's slices.
- ///
- /// This accesses an iterator range over the partitions of the alloca's
- /// slices. It computes these partitions on the fly based on the overlapping
- /// offsets of the slices and the ability to split them. It will visit "empty"
- /// partitions to cover regions of the alloca only accessed via split
- /// slices.
- iterator_range<partition_iterator> partitions() {
- return make_range(partition_iterator(begin(), end()),
- partition_iterator(end(), end()));
- }
+ iterator_range<partition_iterator> partitions();
/// \brief Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
@@ -595,6 +317,280 @@ private:
/// the alloca.
SmallVector<Use *, 8> DeadOperands;
};
+
+/// \brief A partition of the slices.
+///
+/// An ephemeral representation for a range of slices which can be viewed as
+/// a partition of the alloca. This range represents a span of the alloca's
+/// memory which cannot be split, and provides access to all of the slices
+/// overlapping some part of the partition.
+///
+/// Objects of this type are produced by traversing the alloca's slices, but
+/// are only ephemeral and not persistent.
+class llvm::sroa::Partition {
+private:
+ friend class AllocaSlices;
+ friend class AllocaSlices::partition_iterator;
+
+ typedef AllocaSlices::iterator iterator;
+
+ /// \brief The beginning and ending offsets of the alloca for this
+ /// partition.
+ uint64_t BeginOffset, EndOffset;
+
+ /// \brief The start end end iterators of this partition.
+ iterator SI, SJ;
+
+ /// \brief A collection of split slice tails overlapping the partition.
+ SmallVector<Slice *, 4> SplitTails;
+
+ /// \brief Raw constructor builds an empty partition starting and ending at
+ /// the given iterator.
+ Partition(iterator SI) : SI(SI), SJ(SI) {}
+
+public:
+ /// \brief The start offset of this partition.
+ ///
+ /// All of the contained slices start at or after this offset.
+ uint64_t beginOffset() const { return BeginOffset; }
+
+ /// \brief The end offset of this partition.
+ ///
+ /// All of the contained slices end at or before this offset.
+ uint64_t endOffset() const { return EndOffset; }
+
+ /// \brief The size of the partition.
+ ///
+ /// Note that this can never be zero.
+ uint64_t size() const {
+ assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
+ return EndOffset - BeginOffset;
+ }
+
+ /// \brief Test whether this partition contains no slices, and merely spans
+ /// a region occupied by split slices.
+ bool empty() const { return SI == SJ; }
+
+ /// \name Iterate slices that start within the partition.
+ /// These may be splittable or unsplittable. They have a begin offset >= the
+ /// partition begin offset.
+ /// @{
+ // FIXME: We should probably define a "concat_iterator" helper and use that
+ // to stitch together pointee_iterators over the split tails and the
+ // contiguous iterators of the partition. That would give a much nicer
+ // interface here. We could then additionally expose filtered iterators for
+ // split, unsplit, and unsplittable splices based on the usage patterns.
+ iterator begin() const { return SI; }
+ iterator end() const { return SJ; }
+ /// @}
+
+ /// \brief Get the sequence of split slice tails.
+ ///
+ /// These tails are of slices which start before this partition but are
+ /// split and overlap into the partition. We accumulate these while forming
+ /// partitions.
+ ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
+};
+
+/// \brief An iterator over partitions of the alloca's slices.
+///
+/// This iterator implements the core algorithm for partitioning the alloca's
+/// slices. It is a forward iterator as we don't support backtracking for
+/// efficiency reasons, and re-use a single storage area to maintain the
+/// current set of split slices.
+///
+/// It is templated on the slice iterator type to use so that it can operate
+/// with either const or non-const slice iterators.
+class AllocaSlices::partition_iterator
+ : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
+ Partition> {
+ friend class AllocaSlices;
+
+ /// \brief Most of the state for walking the partitions is held in a class
+ /// with a nice interface for examining them.
+ Partition P;
+
+ /// \brief We need to keep the end of the slices to know when to stop.
+ AllocaSlices::iterator SE;
+
+ /// \brief We also need to keep track of the maximum split end offset seen.
+ /// FIXME: Do we really?
+ uint64_t MaxSplitSliceEndOffset;
+
+ /// \brief Sets the partition to be empty at given iterator, and sets the
+ /// end iterator.
+ partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
+ : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
+ // If not already at the end, advance our state to form the initial
+ // partition.
+ if (SI != SE)
+ advance();
+ }
+
+ /// \brief Advance the iterator to the next partition.
+ ///
+ /// Requires that the iterator not be at the end of the slices.
+ void advance() {
+ assert((P.SI != SE || !P.SplitTails.empty()) &&
+ "Cannot advance past the end of the slices!");
+
+ // Clear out any split uses which have ended.
+ if (!P.SplitTails.empty()) {
+ if (P.EndOffset >= MaxSplitSliceEndOffset) {
+ // If we've finished all splits, this is easy.
+ P.SplitTails.clear();
+ MaxSplitSliceEndOffset = 0;
+ } else {
+ // Remove the uses which have ended in the prior partition. This
+ // cannot change the max split slice end because we just checked that
+ // the prior partition ended prior to that max.
+ P.SplitTails.erase(
+ std::remove_if(
+ P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
+ P.SplitTails.end());
+ assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) {
+ return S->endOffset() == MaxSplitSliceEndOffset;
+ }) &&
+ "Could not find the current max split slice offset!");
+ assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) {
+ return S->endOffset() <= MaxSplitSliceEndOffset;
+ }) &&
+ "Max split slice end offset is not actually the max!");
+ }
+ }
+
+ // If P.SI is already at the end, then we've cleared the split tail and
+ // now have an end iterator.
+ if (P.SI == SE) {
+ assert(P.SplitTails.empty() && "Failed to clear the split slices!");
+ return;
+ }
+
+ // If we had a non-empty partition previously, set up the state for
+ // subsequent partitions.
+ if (P.SI != P.SJ) {
+ // Accumulate all the splittable slices which started in the old
+ // partition into the split list.
+ for (Slice &S : P)
+ if (S.isSplittable() && S.endOffset() > P.EndOffset) {
+ P.SplitTails.push_back(&S);
+ MaxSplitSliceEndOffset =
+ std::max(S.endOffset(), MaxSplitSliceEndOffset);
+ }
+
+ // Start from the end of the previous partition.
+ P.SI = P.SJ;
+
+ // If P.SI is now at the end, we at most have a tail of split slices.
+ if (P.SI == SE) {
+ P.BeginOffset = P.EndOffset;
+ P.EndOffset = MaxSplitSliceEndOffset;
+ return;
+ }
+
+ // If the we have split slices and the next slice is after a gap and is
+ // not splittable immediately form an empty partition for the split
+ // slices up until the next slice begins.
+ if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
+ !P.SI->isSplittable()) {
+ P.BeginOffset = P.EndOffset;
+ P.EndOffset = P.SI->beginOffset();
+ return;
+ }
+ }
+
+ // OK, we need to consume new slices. Set the end offset based on the
+ // current slice, and step SJ past it. The beginning offset of the
+ // partition is the beginning offset of the next slice unless we have
+ // pre-existing split slices that are continuing, in which case we begin
+ // at the prior end offset.
+ P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
+ P.EndOffset = P.SI->endOffset();
+ ++P.SJ;
+
+ // There are two strategies to form a partition based on whether the
+ // partition starts with an unsplittable slice or a splittable slice.
+ if (!P.SI->isSplittable()) {
+ // When we're forming an unsplittable region, it must always start at
+ // the first slice and will extend through its end.
+ assert(P.BeginOffset == P.SI->beginOffset());
+
+ // Form a partition including all of the overlapping slices with this
+ // unsplittable slice.
+ while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+ if (!P.SJ->isSplittable())
+ P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+ ++P.SJ;
+ }
+
+ // We have a partition across a set of overlapping unsplittable
+ // partitions.
+ return;
+ }
+
+ // If we're starting with a splittable slice, then we need to form
+ // a synthetic partition spanning it and any other overlapping splittable
+ // splices.
+ assert(P.SI->isSplittable() && "Forming a splittable partition!");
+
+ // Collect all of the overlapping splittable slices.
+ while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
+ P.SJ->isSplittable()) {
+ P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+ ++P.SJ;
+ }
+
+ // Back upiP.EndOffset if we ended the span early when encountering an
+ // unsplittable slice. This synthesizes the early end offset of
+ // a partition spanning only splittable slices.
+ if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+ assert(!P.SJ->isSplittable());
+ P.EndOffset = P.SJ->beginOffset();
+ }
+ }
+
+public:
+ bool operator==(const partition_iterator &RHS) const {
+ assert(SE == RHS.SE &&
+ "End iterators don't match between compared partition iterators!");
+
+ // The observed positions of partitions is marked by the P.SI iterator and
+ // the emptiness of the split slices. The latter is only relevant when
+ // P.SI == SE, as the end iterator will additionally have an empty split
+ // slices list, but the prior may have the same P.SI and a tail of split
+ // slices.
+ if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
+ assert(P.SJ == RHS.P.SJ &&
+ "Same set of slices formed two different sized partitions!");
+ assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
+ "Same slice position with differently sized non-empty split "
+ "slice tails!");
+ return true;
+ }
+ return false;
+ }
+
+ partition_iterator &operator++() {
+ advance();
+ return *this;
+ }
+
+ Partition &operator*() { return P; }
+};
+
+/// \brief A forward range over the partitions of the alloca's slices.
+///
+/// This accesses an iterator range over the partitions of the alloca's
+/// slices. It computes these partitions on the fly based on the overlapping
+/// offsets of the slices and the ability to split them. It will visit "empty"
+/// partitions to cover regions of the alloca only accessed via split
+/// slices.
+iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
+ return make_range(partition_iterator(begin(), end()),
+ partition_iterator(end(), end()));
}
static Value *foldSelectInst(SelectInst &SI) {
@@ -1072,217 +1068,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-namespace {
-/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
-///
-/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
-/// the loads and stores of an alloca instruction, as well as updating its
-/// debug information. This is used when a domtree is unavailable and thus
-/// mem2reg in its full form can't be used to handle promotion of allocas to
-/// scalar values.
-class AllocaPromoter : public LoadAndStorePromoter {
- AllocaInst &AI;
- DIBuilder &DIB;
-
- SmallVector<DbgDeclareInst *, 4> DDIs;
- SmallVector<DbgValueInst *, 4> DVIs;
-
-public:
- AllocaPromoter(ArrayRef<const Instruction *> Insts,
- SSAUpdater &S,
- AllocaInst &AI, DIBuilder &DIB)
- : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
-
- void run(const SmallVectorImpl<Instruction *> &Insts) {
- // Retain the debug information attached to the alloca for use when
- // rewriting loads and stores.
- if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
- if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
- for (User *U : DINode->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DVIs.push_back(DVI);
- }
- }
-
- LoadAndStorePromoter::run(Insts);
-
- // While we have the debug information, clear it off of the alloca. The
- // caller takes care of deleting the alloca.
- while (!DDIs.empty())
- DDIs.pop_back_val()->eraseFromParent();
- while (!DVIs.empty())
- DVIs.pop_back_val()->eraseFromParent();
- }
-
- bool
- isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction *> &Insts) const override {
- Value *Ptr;
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- Ptr = LI->getOperand(0);
- else
- Ptr = cast<StoreInst>(I)->getPointerOperand();
-
- // Only used to detect cycles, which will be rare and quickly found as
- // we're walking up a chain of defs rather than down through uses.
- SmallPtrSet<Value *, 4> Visited;
-
- do {
- if (Ptr == &AI)
- return true;
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
- Ptr = BCI->getOperand(0);
- else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEPI->getPointerOperand();
- else
- return false;
-
- } while (Visited.insert(Ptr).second);
-
- return false;
- }
-
- void updateDebugInfo(Instruction *Inst) const override {
- for (DbgDeclareInst *DDI : DDIs)
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- for (DbgValueInst *DVI : DVIs) {
- Value *Arg = nullptr;
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // If an argument is zero extended then use argument directly. The ZExt
- // may be zapped by an optimization pass in future.
- if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(SExt->getOperand(0));
- if (!Arg)
- Arg = SI->getValueOperand();
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getPointerOperand();
- } else {
- continue;
- }
- DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
- DVI->getExpression(), DVI->getDebugLoc(),
- Inst);
- }
- }
-};
-} // end anon namespace
-
-namespace {
-/// \brief An optimization pass providing Scalar Replacement of Aggregates.
-///
-/// This pass takes allocations which can be completely analyzed (that is, they
-/// don't escape) and tries to turn them into scalar SSA values. There are
-/// a few steps to this process.
-///
-/// 1) It takes allocations of aggregates and analyzes the ways in which they
-/// are used to try to split them into smaller allocations, ideally of
-/// a single scalar data type. It will split up memcpy and memset accesses
-/// as necessary and try to isolate individual scalar accesses.
-/// 2) It will transform accesses into forms which are suitable for SSA value
-/// promotion. This can be replacing a memset with a scalar store of an
-/// integer value, or it can involve speculating operations on a PHI or
-/// select to be a PHI or select of the results.
-/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
-/// onto insert and extract operations on a vector value, and convert them to
-/// this form. By doing so, it will enable promotion of vector aggregates to
-/// SSA vector values.
-class SROA : public FunctionPass {
- const bool RequiresDomTree;
-
- LLVMContext *C;
- DominatorTree *DT;
- AssumptionCache *AC;
-
- /// \brief Worklist of alloca instructions to simplify.
- ///
- /// Each alloca in the function is added to this. Each new alloca formed gets
- /// added to it as well to recursively simplify unless that alloca can be
- /// directly promoted. Finally, each time we rewrite a use of an alloca other
- /// the one being actively rewritten, we add it back onto the list if not
- /// already present to ensure it is re-visited.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
-
- /// \brief A collection of instructions to delete.
- /// We try to batch deletions to simplify code and make things a bit more
- /// efficient.
- SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
-
- /// \brief Post-promotion worklist.
- ///
- /// Sometimes we discover an alloca which has a high probability of becoming
- /// viable for SROA after a round of promotion takes place. In those cases,
- /// the alloca is enqueued here for re-processing.
- ///
- /// Note that we have to be very careful to clear allocas out of this list in
- /// the event they are deleted.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
-
- /// \brief A collection of alloca instructions we can directly promote.
- std::vector<AllocaInst *> PromotableAllocas;
-
- /// \brief A worklist of PHIs to speculate prior to promoting allocas.
- ///
- /// All of these PHIs have been checked for the safety of speculation and by
- /// being speculated will allow promoting allocas currently in the promotable
- /// queue.
- SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
-
- /// \brief A worklist of select instructions to speculate prior to promoting
- /// allocas.
- ///
- /// All of these select instructions have been checked for the safety of
- /// speculation and by being speculated will allow promoting allocas
- /// currently in the promotable queue.
- SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
-
-public:
- SROA(bool RequiresDomTree = true)
- : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
- DT(nullptr) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- const char *getPassName() const override { return "SROA"; }
- static char ID;
-
-private:
- friend class PHIOrSelectSpeculator;
- friend class AllocaSliceRewriter;
-
- bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
- AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P);
- bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
- bool runOnAlloca(AllocaInst &AI);
- void clobberUse(Use &U);
- void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
- bool promoteAllocas(Function &F);
-};
-}
-
-char SROA::ID = 0;
-
-FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
- return new SROA(RequiresDomTree);
-}
-
-INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
- false)
-
/// Walk the range of a partitioning looking for a common type to cover this
/// sequence of slices.
static Type *findCommonType(AllocaSlices::const_iterator B,
@@ -1373,7 +1158,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// Ensure that there are no instructions between the PHI and the load that
// could store.
- for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
@@ -1934,10 +1719,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
/// \brief Test whether the given slice use can be promoted to a vector.
///
-/// This function is called to test each entry in a partioning which is slated
+/// This function is called to test each entry in a partition which is slated
/// for a single slice.
-static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
- const Slice &S, VectorType *Ty,
+static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
+ VectorType *Ty,
uint64_t ElementSize,
const DataLayout &DL) {
// First validate the slice offsets.
@@ -2012,8 +1797,7 @@ static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
/// SSA value. We only can ensure this for a limited set of operations, and we
/// don't want to do the rewrites unless we are confident that the result will
/// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
- const DataLayout &DL) {
+static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Collect the candidate types for vector-based promotion. Also track whether
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
@@ -2130,7 +1914,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
// We can't reasonably handle cases where the load or store extends past
- // the end of the aloca's type and into its padding.
+ // the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
@@ -2199,7 +1983,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
/// This is a quick test to check whether we can rewrite the integer loads and
/// stores to a particular alloca into wider loads and stores and be able to
/// promote the resulting alloca.
-static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy,
+static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
const DataLayout &DL) {
uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
// Don't create integer types larger than the maximum bitwidth.
@@ -2368,14 +2152,14 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
return V;
}
-namespace {
/// \brief Visitor to rewrite instructions using p particular slice of an alloca
/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
/// lives here.
-class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
+class llvm::sroa::AllocaSliceRewriter
+ : public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
@@ -2583,9 +2367,19 @@ private:
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
- if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
- V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- "extract");
+ if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
+ IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
+ V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
+ }
+ // It is possible that the extracted type is not the load type. This
+ // happens if there is a load past the end of the alloca, and as
+ // a consequence the slice is narrower but still a candidate for integer
+ // lowering. To handle this case, we just zero extend the extracted
+ // integer.
+ assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
+ "Can only handle an extract for an overly wide load");
+ if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
+ V = IRB.CreateZExt(V, LI.getType());
return V;
}
@@ -2648,7 +2442,7 @@ private:
DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
- IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
+ IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -3126,7 +2920,7 @@ private:
// dominate the PHI.
IRBuilderTy PtrBuilder(IRB);
if (isa<PHINode>(OldPtr))
- PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+ PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
else
PtrBuilder.SetInsertPoint(OldPtr);
PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
@@ -3169,7 +2963,6 @@ private:
return true;
}
};
-}
namespace {
/// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3181,8 +2974,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
- const DataLayout &DL;
-
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
@@ -3194,8 +2985,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
Use *U;
public:
- AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
-
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
@@ -3711,7 +3500,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
return true;
}),
Stores.end());
- // Now we have to go *back* through all te stores, because a later store may
+ // Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
@@ -3773,7 +3562,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
"Cannot represent alloca access size using 64-bit integers!");
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
- IRB.SetInsertPoint(BasicBlock::iterator(LI));
+ IRB.SetInsertPoint(LI);
DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
@@ -3825,7 +3614,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
Value *StoreBasePtr = SI->getPointerOperand();
- IRB.SetInsertPoint(BasicBlock::iterator(SI));
+ IRB.SetInsertPoint(SI);
DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
@@ -3914,7 +3703,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
if (SplitLoads) {
PLoad = (*SplitLoads)[Idx];
} else {
- IRB.SetInsertPoint(BasicBlock::iterator(LI));
+ IRB.SetInsertPoint(LI);
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3924,7 +3713,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// And store this partition.
- IRB.SetInsertPoint(BasicBlock::iterator(SI));
+ IRB.SetInsertPoint(SI);
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3972,7 +3761,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Mark the original store as dead now that we've split it up and kill its
// slice. Note that we leave the original load in place unless this store
- // was its ownly use. It may in turn be split up if it is an alloca load
+ // was its only use. It may in turn be split up if it is an alloca load
// for some other alloca, but it may be a normal load. This may introduce
// redundant loads, but where those can be merged the rest of the optimizer
// should handle the merging, and this uncovers SSA splits which is more
@@ -4024,7 +3813,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P) {
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4230,12 +4019,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
// Migrate debug information from the old alloca to the new alloca(s)
- // and the individial partitions.
+ // and the individual partitions.
if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
auto *Var = DbgDecl->getVariable();
auto *Expr = DbgDecl->getExpression();
- DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
bool IsSplit = Pieces.size() > 1;
for (auto Piece : Pieces) {
// Create a piece expression describing the new partition or reuse AI's
@@ -4308,7 +4096,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(DL);
+ AggLoadStoreRewriter AggRewriter;
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
@@ -4388,107 +4176,29 @@ void SROA::deleteDeadInstructions(
}
}
-static void enqueueUsersInWorklist(Instruction &I,
- SmallVectorImpl<Instruction *> &Worklist,
- SmallPtrSetImpl<Instruction *> &Visited) {
- for (User *U : I.users())
- if (Visited.insert(cast<Instruction>(U)).second)
- Worklist.push_back(cast<Instruction>(U));
-}
-
/// \brief Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
-/// If there is a domtree available, we attempt to promote using the full power
-/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
-/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occurred.
+/// This function returns whether any promotion occurred.
bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
NumPromoted += PromotableAllocas.size();
- if (DT && !ForceSSAUpdater) {
- DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
- PromotableAllocas.clear();
- return true;
- }
-
- DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
- SSAUpdater SSA;
- DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
- SmallVector<Instruction *, 64> Insts;
-
- // We need a worklist to walk the uses of each alloca.
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- SmallVector<Instruction *, 32> DeadInsts;
-
- for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
- AllocaInst *AI = PromotableAllocas[Idx];
- Insts.clear();
- Worklist.clear();
- Visited.clear();
-
- enqueueUsersInWorklist(*AI, Worklist, Visited);
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
- // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
- // leading to them) here. Eventually it should use them to optimize the
- // scalar values produced.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end);
- II->eraseFromParent();
- continue;
- }
-
- // Push the loads and stores we find onto the list. SROA will already
- // have validated that all loads and stores are viable candidates for
- // promotion.
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- assert(LI->getType() == AI->getAllocatedType());
- Insts.push_back(LI);
- continue;
- }
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
- Insts.push_back(SI);
- continue;
- }
-
- // For everything else, we know that only no-op bitcasts and GEPs will
- // make it this far, just recurse through them and recall them for later
- // removal.
- DeadInsts.push_back(I);
- enqueueUsersInWorklist(*I, Worklist, Visited);
- }
- AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
- while (!DeadInsts.empty())
- DeadInsts.pop_back_val()->eraseFromParent();
- AI->eraseFromParent();
- }
-
+ DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
PromotableAllocas.clear();
return true;
}
-bool SROA::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
+PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC) {
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DT = &RunDT;
+ AC = &RunAC;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
@@ -4527,12 +4237,55 @@ bool SROA::runOnFunction(Function &F) {
PostPromotionWorklist.clear();
} while (!Worklist.empty());
- return Changed;
+ // FIXME: Even when promoting allocas we should preserve some abstract set of
+ // CFG-specific analyses.
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
-void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AssumptionCacheTracker>();
- if (RequiresDomTree)
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
+PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> *AM) {
+ return runImpl(F, AM->getResult<DominatorTreeAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F));
}
+
+/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
+///
+/// This is in the llvm namespace purely to allow it to be a friend of the \c
+/// SROA pass.
+class llvm::sroa::SROALegacyPass : public FunctionPass {
+ /// The SROA implementation.
+ SROA Impl;
+
+public:
+ SROALegacyPass() : FunctionPass(ID) {
+ initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ auto PA = Impl.runImpl(
+ F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+ return !PA.areAllPreserved();
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ const char *getPassName() const override { return "SROA"; }
+ static char ID;
+};
+
+char SROALegacyPass::ID = 0;
+
+FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); }
+
+INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
+ "Scalar Replacement Of Aggregates", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index d5d3605..52d477c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -16,7 +16,10 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -27,10 +30,9 @@ using namespace llvm;
/// initializeScalarOptsPasses - Initialize all passes linked into the
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
- initializeADCEPass(Registry);
+ initializeADCELegacyPassPass(Registry);
initializeBDCEPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
- initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
@@ -66,7 +68,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeRewriteStatepointsForGCPass(Registry);
initializeSCCPPass(Registry);
initializeIPSCCPPass(Registry);
- initializeSROAPass(Registry);
+ initializeSROALegacyPassPass(Registry);
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
@@ -81,6 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePlaceSafepointsPass(Registry);
initializeFloat2IntPass(Registry);
initializeLoopDistributePass(Registry);
+ initializeLoopLoadEliminationPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -225,15 +228,15 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
}
void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+ unwrap(PM)->add(createTypeBasedAAWrapperPass());
}
void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScopedNoAliasAAPass());
+ unwrap(PM)->add(createScopedNoAliasAAWrapperPass());
}
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBasicAliasAnalysisPass());
+ unwrap(PM)->add(createBasicAAWrapperPass());
}
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d955da7..114d22d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -60,6 +60,7 @@ STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
namespace {
+#define SROA SROA_
struct SROA : public FunctionPass {
SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
: FunctionPass(ID), HasDomTree(hasDT) {
@@ -382,8 +383,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
- AI->getParent()->begin());
+ AllocaInst *NewAI =
+ new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front());
ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -1195,7 +1196,7 @@ static bool isSafePHIToSpeculate(PHINode *PN) {
// Ensure that there are no instructions between the PHI and the load that
// could store.
- for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 0493003..054bacd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -253,10 +253,10 @@ bool Scalarizer::doInitialization(Module &M) {
}
bool Scalarizer::runOnFunction(Function &F) {
- for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
- Instruction *I = II;
+ assert(Gathered.empty() && Scattered.empty());
+ for (BasicBlock &BB : F) {
+ for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+ Instruction *I = &*II;
bool Done = visit(I);
++II;
if (Done && I->getType()->isVoidTy())
@@ -285,7 +285,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point, V);
+ return Scatterer(Point->getParent(), Point->getIterator(), V);
}
// Replace Op with the gathered form of the components in CV. Defer the
@@ -377,7 +377,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(I.getParent(), &I);
+ IRBuilder<> Builder(&I);
Scatterer Op0 = scatter(&I, I.getOperand(0));
Scatterer Op1 = scatter(&I, I.getOperand(1));
assert(Op0.size() == NumElems && "Mismatched binary operation");
@@ -397,7 +397,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(SI.getParent(), &SI);
+ IRBuilder<> Builder(&SI);
Scatterer Op1 = scatter(&SI, SI.getOperand(1));
Scatterer Op2 = scatter(&SI, SI.getOperand(2));
assert(Op1.size() == NumElems && "Mismatched select");
@@ -438,7 +438,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (!VT)
return false;
- IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+ IRBuilder<> Builder(&GEPI);
unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
@@ -472,7 +472,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(CI.getParent(), &CI);
+ IRBuilder<> Builder(&CI);
Scatterer Op0 = scatter(&CI, CI.getOperand(0));
assert(Op0.size() == NumElems && "Mismatched cast");
ValueVector Res;
@@ -492,7 +492,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
unsigned DstNumElems = DstVT->getNumElements();
unsigned SrcNumElems = SrcVT->getNumElements();
- IRBuilder<> Builder(BCI.getParent(), &BCI);
+ IRBuilder<> Builder(&BCI);
Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
ValueVector Res;
Res.resize(DstNumElems);
@@ -569,7 +569,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(PHI.getParent(), &PHI);
+ IRBuilder<> Builder(&PHI);
ValueVector Res;
Res.resize(NumElems);
@@ -600,7 +600,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
- IRBuilder<> Builder(LI.getParent(), &LI);
+ IRBuilder<> Builder(&LI);
Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
ValueVector Res;
Res.resize(NumElems);
@@ -625,7 +625,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
- IRBuilder<> Builder(SI.getParent(), &SI);
+ IRBuilder<> Builder(&SI);
Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
Scatterer Val = scatter(&SI, FullValue);
@@ -642,7 +642,9 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
// Delete the instructions that we scalarized. If a full vector result
// is still needed, recreate it using InsertElements.
bool Scalarizer::finish() {
- if (Gathered.empty())
+ // The presence of data in Gathered or Scattered indicates changes
+ // made to the Function.
+ if (Gathered.empty() && Scattered.empty())
return false;
for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
GMI != GME; ++GMI) {
@@ -655,7 +657,7 @@ bool Scalarizer::finish() {
Value *Res = UndefValue::get(Ty);
BasicBlock *BB = Op->getParent();
unsigned Count = Ty->getVectorNumElements();
- IRBuilder<> Builder(BB, Op);
+ IRBuilder<> Builder(Op);
if (isa<PHINode>(Op))
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
for (unsigned I = 0; I < Count; ++I)
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4a87531..86a10d2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,6 +156,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -164,6 +168,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -174,6 +179,7 @@
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
"disable-separate-const-offset-from-gep", cl::init(false),
@@ -319,8 +325,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool doInitialization(Module &M) override {
@@ -373,15 +382,42 @@ private:
///
/// Verified in @i32_add in split-gep.ll
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+ /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
+ /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
+ /// the constant offset. After extraction, it becomes desirable to reunion the
+ /// distributed sexts. For example,
+ ///
+ /// &a[sext(i +nsw (j +nsw 5)]
+ /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
+ /// => constant extraction &a[sext(i) + sext(j)] + 5
+ /// => reunion &a[sext(i +nsw j)] + 5
+ bool reuniteExts(Function &F);
+ /// A helper that reunites sexts in an instruction.
+ bool reuniteExts(Instruction *I);
+ /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
+ Instruction *findClosestMatchingDominator(const SCEV *Key,
+ Instruction *Dominatee);
/// Verify F is free of dead code.
void verifyNoDeadCode(Function &F);
+ bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
+ // Swap the index operand of two GEP.
+ void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
+ // Check if it is safe to swap operand of two GEP.
+ bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
+ Loop *CurLoop);
+
const DataLayout *DL;
- const DominatorTree *DT;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
const TargetMachine *TM;
+
+ LoopInfo *LI;
+ TargetLibraryInfo *TLI;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
bool LowerGEP;
+ DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
};
} // anonymous namespace
@@ -391,7 +427,10 @@ INITIALIZE_PASS_BEGIN(
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -734,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Type *I8PtrTy =
Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
Value *ResultPtr = Variadic->getOperand(0);
+ Loop *L = LI->getLoopFor(Variadic->getParent());
+ // Check if the base is not loop invariant or used more than once.
+ bool isSwapCandidate =
+ L && L->isLoopInvariant(ResultPtr) &&
+ !hasMoreThanOneUseInLoop(ResultPtr, L);
+ Value *FirstResult = nullptr;
+
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
@@ -762,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// Create an ugly GEP with a single index for each index.
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
+ if (FirstResult == nullptr)
+ FirstResult = ResultPtr;
}
}
@@ -770,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
- }
+ } else
+ isSwapCandidate = false;
+
+ // If we created a GEP with constant index, and the base is loop invariant,
+ // then we swap the first one with it, so LICM can move constant GEP out
+ // later.
+ GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
+ GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+ if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
+ swapGEPOperand(FirstGEP, SecondGEP);
+
if (ResultPtr->getType() != Variadic->getType())
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
@@ -891,13 +949,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Clear the inbounds attribute because the new index may be off-bound.
// e.g.,
//
- // b = add i64 a, 5
- // addr = gep inbounds float* p, i64 b
+ // b = add i64 a, 5
+ // addr = gep inbounds float, float* p, i64 b
//
// is transformed to:
//
- // addr2 = gep float* p, i64 a
- // addr = gep float* addr2, i64 5
+ // addr2 = gep float, float* p, i64 a ; inbounds removed
+ // addr = gep inbounds float, float* addr2, i64 5
//
// If a is -4, although the old index b is in bounds, the new index a is
// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -907,6 +965,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
//
// TODO(jingyue): do some range analysis to keep as many inbounds as
// possible. GEPs with inbounds are more friendly to alias analysis.
+ bool GEPWasInBounds = GEP->isInBounds();
GEP->setIsInBounds(false);
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -968,6 +1027,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
ConstantInt::get(IntPtrTy, Index, true),
GEP->getName(), GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
@@ -990,6 +1051,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Type::getInt8Ty(GEP->getContext()), NewGEP,
ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
@@ -1008,24 +1071,96 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
return false;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
Changed |= splitGEP(GEP);
- }
- // No need to split GEP ConstantExprs because all its indices are constant
- // already.
- }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
}
+ Changed |= reuniteExts(F);
+
if (VerifyNoDeadCode)
verifyNoDeadCode(F);
return Changed;
}
+Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
+ const SCEV *Key, Instruction *Dominatee) {
+ auto Pos = DominatingExprs.find(Key);
+ if (Pos == DominatingExprs.end())
+ return nullptr;
+
+ auto &Candidates = Pos->second;
+ // Because we process the basic blocks in pre-order of the dominator tree, a
+ // candidate that doesn't dominate the current instruction won't dominate any
+ // future instruction either. Therefore, we pop it out of the stack. This
+ // optimization makes the algorithm O(n).
+ while (!Candidates.empty()) {
+ Instruction *Candidate = Candidates.back();
+ if (DT->dominates(Candidate, Dominatee))
+ return Candidate;
+ Candidates.pop_back();
+ }
+ return nullptr;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Dom: LHS+RHS
+ // I: sext(LHS)+sext(RHS)
+ // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
+ // TODO: handle zext
+ Value *LHS = nullptr, *RHS = nullptr;
+ if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
+ match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+ if (LHS->getType() == RHS->getType()) {
+ const SCEV *Key =
+ SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+ Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+ NewSExt->takeName(I);
+ I->replaceAllUsesWith(NewSExt);
+ RecursivelyDeleteTriviallyDeadInstructions(I);
+ return true;
+ }
+ }
+ }
+
+ // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
+ if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
+ match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+ if (isKnownNotFullPoison(I)) {
+ const SCEV *Key =
+ SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ DominatingExprs[Key].push_back(I);
+ }
+ }
+ return false;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
+ bool Changed = false;
+ DominatingExprs.clear();
+ for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+ Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+ BasicBlock *BB = Node->getBlock();
+ for (auto I = BB->begin(); I != BB->end(); ) {
+ Instruction *Cur = &*I++;
+ Changed |= reuniteExts(Cur);
+ }
+ }
+ return Changed;
+}
+
void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
for (auto &B : F) {
for (auto &I : B) {
@@ -1038,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
}
}
}
+
+bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
+ GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
+ if (!FirstGEP || !FirstGEP->hasOneUse())
+ return false;
+
+ if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
+ return false;
+
+ if (FirstGEP == SecondGEP)
+ return false;
+
+ unsigned FirstNum = FirstGEP->getNumOperands();
+ unsigned SecondNum = SecondGEP->getNumOperands();
+ // Give up if the number of operands are not 2.
+ if (FirstNum != SecondNum || FirstNum != 2)
+ return false;
+
+ Value *FirstBase = FirstGEP->getOperand(0);
+ Value *SecondBase = SecondGEP->getOperand(0);
+ Value *FirstOffset = FirstGEP->getOperand(1);
+ // Give up if the index of the first GEP is loop invariant.
+ if (CurLoop->isLoopInvariant(FirstOffset))
+ return false;
+
+ // Give up if base doesn't have same type.
+ if (FirstBase->getType() != SecondBase->getType())
+ return false;
+
+ Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
+
+ // Check if the second operand of first GEP has constant coefficient.
+ // For an example, for the following code, we won't gain anything by
+ // hoisting the second GEP out because the second GEP can be folded away.
+ // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
+ // %67 = shl i64 %scevgep.sum.ur159, 2
+ // %uglygep160 = getelementptr i8* %65, i64 %67
+ // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
+
+ // Skip constant shift instruction which may be generated by Splitting GEPs.
+ if (FirstOffsetDef && FirstOffsetDef->isShift() &&
+ isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
+ FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
+
+ // Give up if FirstOffsetDef is an Add or Sub with constant.
+ // Because it may not profitable at all due to constant folding.
+ if (FirstOffsetDef)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
+ unsigned opc = BO->getOpcode();
+ if ((opc == Instruction::Add || opc == Instruction::Sub) &&
+ (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1))))
+ return false;
+ }
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
+ int UsesInLoop = 0;
+ for (User *U : V->users()) {
+ if (Instruction *User = dyn_cast<Instruction>(U))
+ if (L->contains(User))
+ if (++UsesInLoop > 1)
+ return true;
+ }
+ return false;
+}
+
+void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
+ GetElementPtrInst *Second) {
+ Value *Offset1 = First->getOperand(1);
+ Value *Offset2 = Second->getOperand(1);
+ First->setOperand(1, Offset2);
+ Second->setOperand(1, Offset1);
+
+ // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
+ const DataLayout &DAL = First->getModule()->getDataLayout();
+ APInt Offset(DAL.getPointerSizeInBits(
+ cast<PointerType>(First->getType())->getAddressSpace()),
+ 0);
+ Value *NewBase =
+ First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
+ uint64_t ObjectSize;
+ if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
+ Offset.ugt(ObjectSize)) {
+ First->setIsInBounds(false);
+ Second->setIsInBounds(false);
+ } else
+ First->setIsInBounds(true);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 231411a..63c8836 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
@@ -67,15 +68,14 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// single PHI node that is the operand to the return.
if (Ret != &BB.front()) {
// Check for something else in the block.
- BasicBlock::iterator I = Ret;
+ BasicBlock::iterator I(Ret);
--I;
// Skip over debug info.
while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
--I;
if (!isa<DbgInfoIntrinsic>(I) &&
- (!isa<PHINode>(I) || I != BB.begin() ||
- Ret->getNumOperands() == 0 ||
- Ret->getOperand(0) != I))
+ (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != &*I))
continue;
}
@@ -136,7 +136,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
+ if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
LocalChange = true;
++NumSimpl;
}
@@ -217,6 +217,7 @@ struct CFGSimplifyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index f49f4ea..64109b2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -48,7 +48,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -66,7 +66,7 @@ char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -99,7 +99,7 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
bool Sinking::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool MadeChange, EverMadeChange = false;
@@ -119,7 +119,7 @@ bool Sinking::runOnFunction(Function &F) {
bool Sinking::ProcessBlock(BasicBlock &BB) {
// Can't sink anything out of a block that has less than two successors.
- if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+ if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
@@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
bool ProcessedBegin = false;
SmallPtrSet<Instruction *, 8> Stores;
do {
- Instruction *Inst = I; // The instruction to sink.
+ Instruction *Inst = &*I; // The instruction to sink.
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
@@ -165,14 +165,16 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
MemoryLocation Loc = MemoryLocation::get(L);
for (Instruction *S : Stores)
- if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
+ if (AA->getModRefInfo(S, Loc) & MRI_Mod)
return false;
}
- if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst) || Inst->isEHPad() ||
+ Inst->mayThrow())
return false;
- // Convergent operations can only be moved to control equivalent blocks.
+ // Convergent operations cannot be made control-dependent on additional
+ // values.
if (auto CS = CallSite(Inst)) {
if (CS.hasFnAttr(Attribute::Convergent))
return false;
@@ -193,6 +195,11 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (Inst->getParent() == SuccToSinkTo)
return false;
+ // It's never legal to sink an instruction into a block which terminates in an
+ // EH-pad.
+ if (SuccToSinkTo->getTerminator()->isExceptional())
+ return false;
+
// If the block has multiple predecessors, this would introduce computation
// on different code paths. We could split the critical edge, but for now we
// just punt.
@@ -278,6 +285,6 @@ bool Sinking::SinkInstruction(Instruction *Inst,
dbgs() << ")\n");
// Move the instruction.
- Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
+ Inst->moveBefore(&*SuccToSinkTo->getFirstInsertionPt());
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ff3f00a..147d615 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,7 +227,7 @@ bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
// changes the list that I is iterating through.
auto Current = I;
++I;
- if (!NotHoisted.count(Current)) {
+ if (!NotHoisted.count(&*Current)) {
Current->moveBefore(ToBlock.getTerminator());
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 6d9d417..1faa65e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -131,7 +131,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
// We do not modify the shape of the CFG.
AU.setPreservesCFG();
@@ -212,7 +212,7 @@ char StraightLineStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
@@ -234,6 +234,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
Basis.CandidateKind == C.CandidateKind);
}
+// TODO: use TTI->getGEPCost.
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI,
const DataLayout *DL) {
@@ -523,7 +524,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
continue;
const SCEV *OrigIndexExpr = IndexExprs[I - 1];
- IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0);
+ IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
// The base of this candidate is GEP's base plus the offsets of all
// indices except this current one.
@@ -689,7 +690,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 4f23e20..662513c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -358,13 +358,9 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
BasicBlock *BB = N->getNodeAs<BasicBlock>();
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = Term->getSuccessor(i);
-
- if (Visited.count(Succ)) {
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
Loops[Succ] = BB;
- }
- }
}
}
@@ -903,14 +899,14 @@ void StructurizeCFG::rebuildSSA() {
continue;
}
- if (DT->dominates(II, User))
+ if (DT->dominates(&*II, User))
continue;
if (!Initialized) {
Value *Undef = UndefValue::get(II->getType());
Updater.Initialize(II->getType(), "");
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
- Updater.AddAvailableValue(BB, II);
+ Updater.AddAvailableValue(BB, &*II);
Initialized = true;
}
Updater.RewriteUseAfterInsertions(U);
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index c7de2e2..0e0b00d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,6 +54,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
@@ -136,6 +137,7 @@ FunctionPass *llvm::createTailCallEliminationPass() {
void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
/// \brief Scan the specified function for alloca instructions.
@@ -195,8 +197,8 @@ struct AllocaDerivedValueTracker {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(I);
- bool IsNocapture = !CS.isCallee(U) &&
- CS.doesNotCapture(CS.getArgumentNo(U));
+ bool IsNocapture =
+ CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
callUsesLocalStack(CS, IsNocapture);
if (IsNocapture) {
// If the alloca-derived argument is passed in as nocapture, then it
@@ -302,7 +304,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
if (!CI || CI->isTailCall())
continue;
- if (CI->doesNotAccessMemory()) {
+ bool IsNoTail = CI->isNoTailCall();
+
+ if (!IsNoTail && CI->doesNotAccessMemory()) {
// A call to a readnone function whose arguments are all things computed
// outside this function can be marked tail. Even if you stored the
// alloca address into a global, a readnone function can't load the
@@ -330,7 +334,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
}
}
- if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
DeferredTails.push_back(CI);
} else {
AllCallsAreTailCalls = false;
@@ -404,7 +408,7 @@ bool TailCallElim::runTRE(Function &F) {
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
- BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
+ BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall);
@@ -574,7 +578,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
CallInst *CI = nullptr;
- BasicBlock::iterator BBI = TI;
+ BasicBlock::iterator BBI(TI);
while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == F)
@@ -595,9 +599,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- FirstNonDbg(BB->front()) == CI &&
- FirstNonDbg(std::next(BB->begin())) == TI &&
- CI->getCalledFunction() &&
+ FirstNonDbg(BB->front().getIterator()) == CI &&
+ FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -636,19 +639,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- BasicBlock::iterator BBI = CI;
+ BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
- if (CanMoveAboveCall(BBI, CI)) continue;
+ if (CanMoveAboveCall(&*BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
+ CanTransformAccumulatorRecursion(&*BBI, CI))) {
// Yes, this is accumulator recursion. Remember which instruction
// accumulates.
- AccumulatorRecursionInstr = BBI;
+ AccumulatorRecursionInstr = &*BBI;
} else {
return false; // Otherwise, we cannot eliminate the tail recursion!
}
@@ -698,19 +701,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
NEBI = NewEntry->begin(); OEBI != E; )
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
if (isa<ConstantInt>(AI->getArraySize()))
- AI->moveBefore(NEBI);
+ AI->moveBefore(&*NEBI);
// Now that we have created a new block, which jumps to the entry
// block, insert a PHI node for each argument of the function.
// For now, we initialize each PHI to only have the real arguments
// which are passed in.
- Instruction *InsertPos = OldEntry->begin();
+ Instruction *InsertPos = &OldEntry->front();
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I) {
PHINode *PN = PHINode::Create(I->getType(), 2,
I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
- PN->addIncoming(I, NewEntry);
+ PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
}
}
@@ -739,10 +742,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
- PHINode *AccPN =
- PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
- std::distance(PB, PE) + 1,
- "accumulator.tr", OldEntry->begin());
+ PHINode *AccPN = PHINode::Create(
+ AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front());
// Loop over all of the predecessors of the tail recursion block. For the
// real entry into the function we seed the PHI with the initial value,
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 03c3a80..409326e 100644
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
namespace llvm {
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index e9f6239..0262358f 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -52,32 +52,34 @@
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "add-discriminators"
namespace {
- struct AddDiscriminators : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- AddDiscriminators() : FunctionPass(ID) {
- initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
- }
+struct AddDiscriminators : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ AddDiscriminators() : FunctionPass(ID) {
+ initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override;
- };
+ bool runOnFunction(Function &F) override;
+};
}
char AddDiscriminators::ID = 0;
@@ -89,17 +91,17 @@ INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
// Command line option to disable discriminator generation even in the
// presence of debug information. This is only needed when debugging
// debug info generation issues.
-static cl::opt<bool>
-NoDiscriminators("no-discriminators", cl::init(false),
- cl::desc("Disable generation of discriminator information."));
+static cl::opt<bool> NoDiscriminators(
+ "no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
FunctionPass *llvm::createAddDiscriminatorsPass() {
return new AddDiscriminators();
}
static bool hasDebugInfo(const Function &F) {
- NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
- return CUNodes != nullptr;
+ DISubprogram *S = getDISubprogram(&F);
+ return S != nullptr;
}
/// \brief Assign DWARF discriminators.
@@ -159,8 +161,7 @@ bool AddDiscriminators::runOnFunction(Function &F) {
// Simlarly, if the function has no debug info, do nothing.
// Finally, if this module is built with dwarf versions earlier than 4,
// do nothing (discriminator support is a DWARF 4 feature).
- if (NoDiscriminators ||
- !hasDebugInfo(F) ||
+ if (NoDiscriminators || !hasDebugInfo(F) ||
F.getParent()->getDwarfVersion() < 4)
return false;
@@ -169,59 +170,77 @@ bool AddDiscriminators::runOnFunction(Function &F) {
LLVMContext &Ctx = M->getContext();
DIBuilder Builder(*M, /*AllowUnresolved*/ false);
- // Traverse all the blocks looking for instructions in different
- // blocks that are at the same file:line location.
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *B = I;
- TerminatorInst *Last = B->getTerminator();
- const DILocation *LastDIL = Last->getDebugLoc();
- if (!LastDIL)
- continue;
-
- for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
- BasicBlock *Succ = Last->getSuccessor(I);
- Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
- const DILocation *FirstDIL = First->getDebugLoc();
- if (!FirstDIL)
+ typedef std::pair<StringRef, unsigned> Location;
+ typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap;
+ typedef DenseMap<Location, BBScopeMap> LocationBBMap;
+
+ LocationBBMap LBM;
+
+ // Traverse all instructions in the function. If the source line location
+ // of the instruction appears in other basic block, assign a new
+ // discriminator for this instruction.
+ for (BasicBlock &B : F) {
+ for (auto &I : B.getInstList()) {
+ if (isa<DbgInfoIntrinsic>(&I))
+ continue;
+ const DILocation *DIL = I.getDebugLoc();
+ if (!DIL)
+ continue;
+ Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
+ auto &BBMap = LBM[L];
+ auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr));
+ if (BBMap.size() == 1)
+ continue;
+ bool InsertSuccess = R.second;
+ Metadata *&NewScope = R.first->second;
+ // If we could insert a different block in the same location, a
+ // discriminator is needed to distinguish both instructions.
+ if (InsertSuccess) {
+ auto *Scope = DIL->getScope();
+ auto *File =
+ Builder.createFile(DIL->getFilename(), Scope->getDirectory());
+ NewScope = Builder.createLexicalBlockFile(
+ Scope, File, DIL->computeNewDiscriminator());
+ }
+ I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(),
+ NewScope, DIL->getInlinedAt()));
+ DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":"
+ << dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator()
+ << I << "\n");
+ Changed = true;
+ }
+ }
+
+ // Traverse all instructions and assign new discriminators to call
+ // instructions with the same lineno that are in the same basic block.
+ // Sample base profile needs to distinguish different function calls within
+ // a same source line for correct profile annotation.
+ for (BasicBlock &B : F) {
+ const DILocation *FirstDIL = NULL;
+ for (auto &I : B.getInstList()) {
+ CallInst *Current = dyn_cast<CallInst>(&I);
+ if (!Current || isa<DbgInfoIntrinsic>(&I))
continue;
- // If the first instruction (First) of Succ is at the same file
- // location as B's last instruction (Last), add a new
- // discriminator for First's location and all the instructions
- // in Succ that share the same location with First.
- if (!FirstDIL->canDiscriminate(*LastDIL)) {
- // Create a new lexical scope and compute a new discriminator
- // number for it.
- StringRef Filename = FirstDIL->getFilename();
- auto *Scope = FirstDIL->getScope();
- auto *File = Builder.createFile(Filename, Scope->getDirectory());
-
- // FIXME: Calculate the discriminator here, based on local information,
- // and delete DILocation::computeNewDiscriminator(). The current
- // solution gives different results depending on other modules in the
- // same context. All we really need is to discriminate between
- // FirstDIL and LastDIL -- a local map would suffice.
- unsigned Discriminator = FirstDIL->computeNewDiscriminator();
- auto *NewScope =
- Builder.createLexicalBlockFile(Scope, File, Discriminator);
- auto *NewDIL =
- DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
- NewScope, FirstDIL->getInlinedAt());
- DebugLoc newDebugLoc = NewDIL;
-
- // Attach this new debug location to First and every
- // instruction following First that shares the same location.
- for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
- ++I1) {
- if (I1->getDebugLoc().get() != FirstDIL)
- break;
- I1->setDebugLoc(newDebugLoc);
- DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
- << ":" << NewDIL->getColumn() << ":"
- << NewDIL->getDiscriminator() << *I1 << "\n");
+ DILocation *CurrentDIL = Current->getDebugLoc();
+ if (FirstDIL) {
+ if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() &&
+ CurrentDIL->getFilename() == FirstDIL->getFilename()) {
+ auto *Scope = FirstDIL->getScope();
+ auto *File = Builder.createFile(FirstDIL->getFilename(),
+ Scope->getDirectory());
+ auto *NewScope = Builder.createLexicalBlockFile(
+ Scope, File, FirstDIL->computeNewDiscriminator());
+ Current->setDebugLoc(DILocation::get(
+ Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope,
+ CurrentDIL->getInlinedAt()));
+ Changed = true;
+ } else {
+ FirstDIL = CurrentDIL;
}
- DEBUG(dbgs() << "\n");
- Changed = true;
+ } else {
+ FirstDIL = CurrentDIL;
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ef7daca..a5137e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -41,8 +41,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
// Loop through all of our successors and make sure they know that one
// of their predecessors is going away.
- for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
- BBTerm->getSuccessor(i)->removePredecessor(BB);
+ for (BasicBlock *Succ : BBTerm->successors())
+ Succ->removePredecessor(BB);
// Zap all the instructions in the block.
while (!BB->empty()) {
@@ -65,7 +65,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceAnalysis *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
@@ -77,8 +77,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
if (MemDep)
MemDep->removeInstruction(PN); // Memdep updates AA itself.
- else if (AA && isa<PointerType>(PN->getType()))
- AA->deleteValue(PN);
PN->eraseFromParent();
}
@@ -108,7 +106,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
- LoopInfo *LI, AliasAnalysis *AA,
+ LoopInfo *LI,
MemoryDependenceAnalysis *MemDep) {
// Don't merge away blocks who have their address taken.
if (BB->hasAddressTaken()) return false;
@@ -119,8 +117,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Don't break self-loops.
if (PredBB == BB) return false;
- // Don't break invokes.
- if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+ // Don't break unwinding instructions.
+ if (PredBB->getTerminator()->isExceptional())
+ return false;
succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
BasicBlock *OnlySucc = BB;
@@ -145,7 +144,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
// Begin by getting rid of unneeded PHIs.
if (isa<PHINode>(BB->front()))
- FoldSingleEntryPHINodes(BB, AA, MemDep);
+ FoldSingleEntryPHINodes(BB, MemDep);
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -253,7 +252,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, Succ->begin(), DT, LI);
+ return SplitBlock(Succ, &Succ->front(), DT, LI);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
@@ -284,8 +283,8 @@ llvm::SplitAllCriticalEdges(Function &F,
///
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DominatorTree *DT, LoopInfo *LI) {
- BasicBlock::iterator SplitIt = SplitPt;
- while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
@@ -393,7 +392,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
/// from NewBB. This also updates AliasAnalysis, if available.
static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds, BranchInst *BI,
- AliasAnalysis *AA, bool HasLoopExit) {
+ bool HasLoopExit) {
// Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
@@ -474,17 +473,20 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
- const char *Suffix, AliasAnalysis *AA,
- DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA) {
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ // Do not attempt to split that which cannot be split.
+ if (!BB->canSplitPredecessors())
+ return nullptr;
+
// For the landingpads we need to act a bit differently.
// Delegate this work to the SplitLandingPadPredecessors.
if (BB->isLandingPad()) {
SmallVector<BasicBlock*, 2> NewBBs;
std::string NewName = std::string(Suffix) + ".split-lp";
- SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(),
- NewBBs, AA, DT, LI, PreserveLCSSA);
+ SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
+ LI, PreserveLCSSA);
return NewBBs[0];
}
@@ -523,7 +525,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
return NewBB;
}
@@ -544,8 +546,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
- AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
// Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -574,7 +576,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
- UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
// Move the remaining edges from OrigBB to point to NewBB2.
SmallVector<BasicBlock*, 8> NewBB2Preds;
@@ -611,7 +613,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
- UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit);
}
LandingPadInst *LPad = OrigBB->getLandingPadInst();
@@ -661,7 +663,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// return instruction.
V = BCI->getOperand(0);
NewBC = BCI->clone();
- Pred->getInstList().insert(NewRet, NewBC);
+ Pred->getInstList().insert(NewRet->getIterator(), NewBC);
*i = NewBC;
}
if (PHINode *PN = dyn_cast<PHINode>(V)) {
@@ -707,7 +709,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
MDNode *BranchWeights,
DominatorTree *DT) {
BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
TerminatorInst *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
@@ -757,7 +759,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
TerminatorInst **ElseTerm,
MDNode *BranchWeights) {
BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
TerminatorInst *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 7e83c9e..9582599 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -101,10 +101,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
continue;
// Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN =
- PHINode::Create(PN->getType(), Preds.size(), "split",
- SplitBB->isLandingPad() ?
- SplitBB->begin() : SplitBB->getTerminator());
+ PHINode *NewPN = PHINode::Create(
+ PN->getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
NewPN->addIncoming(V, Preds[i]);
@@ -141,9 +140,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
BasicBlock *TIBB = TI->getParent();
BasicBlock *DestBB = TI->getSuccessor(SuccNum);
- // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // Splitting the critical edge to a pad block is non-trivial. Don't do
// it in this generic function.
- if (DestBB->isLandingPad()) return nullptr;
+ if (DestBB->isEHPad()) return nullptr;
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
@@ -157,7 +156,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
- Function::iterator FBBI = TIBB;
+ Function::iterator FBBI = TIBB->getIterator();
F.getBasicBlockList().insert(++FBBI, NewBB);
// If there are any PHI nodes in DestBB, we need to update them so that they
@@ -197,7 +196,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
// If we have nothing to update, just return.
- auto *AA = Options.AA;
auto *DT = Options.DT;
auto *LI = Options.LI;
if (!DT && !LI)
@@ -319,10 +317,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
LoopPreds.push_back(P);
}
if (!LoopPreds.empty()) {
- assert(!DestBB->isLandingPad() &&
- "We don't split edges to landing pads!");
+ assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
BasicBlock *NewExitBB = SplitBlockPredecessors(
- DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA);
+ DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 8aa7b2a..64b44a6 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -21,7 +22,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -55,32 +55,6 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
return CI;
}
-/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
-/// specified pointer. Ptr is required to be some pointer type, MaxLen must
-/// be of size_t type, and the return value has 'intptr_t' type.
-Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strnlen))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrNLen =
- M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS),
- DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), nullptr);
- CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen");
- if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
/// EmitStrChr - Emit a call to the strchr function to the builder, for the
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index f2d5e07..0914699 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -82,7 +82,7 @@ static bool insertFastDiv(Function &F,
bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
- Instruction *Instr = J;
+ Instruction *Instr = &*J;
Value *Dividend = Instr->getOperand(0);
Value *Divisor = Instr->getOperand(1);
@@ -94,7 +94,7 @@ static bool insertFastDiv(Function &F,
}
// Basic Block is split before divide
- BasicBlock *MainBB = I;
+ BasicBlock *MainBB = &*I;
BasicBlock *SuccessorBB = I->splitBasicBlock(J);
++I; //advance iterator I to successorBB
@@ -190,7 +190,7 @@ static bool reuseOrInsertFastDiv(Function &F,
bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
- Instruction *Instr = J;
+ Instruction *Instr = &*J;
DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index cc4d6c6..854a3b8 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -52,8 +52,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- VMap[II] = NewInst; // Add instruction map to value.
-
+ VMap[&*II] = NewInst; // Add instruction map to value.
+
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@@ -85,9 +85,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); I != E; ++I)
- assert(VMap.count(I) && "No mapping from source argument specified!");
+ for (const Argument &I : OldFunc->args())
+ assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif
// Copy all attributes other than those stored in the AttributeSet. We need
@@ -96,6 +95,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
NewFunc->copyAttributesFrom(OldFunc);
NewFunc->setAttributes(NewAttrs);
+ // Fix up the personality function that got copied over.
+ if (OldFunc->hasPersonalityFn())
+ NewFunc->setPersonalityFn(
+ MapValue(OldFunc->getPersonalityFn(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+
AttributeSet OldAttrs = OldFunc->getAttributes();
// Clone any argument attributes that are present in the VMap.
for (const Argument &OldArg : OldFunc->args())
@@ -136,7 +142,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
if (BB.hasAddressTaken()) {
Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
const_cast<BasicBlock*>(&BB));
- VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
}
// Note return instructions for the caller.
@@ -146,11 +152,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
- for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
- BE = NewFunc->end(); BB != BE; ++BB)
+ for (Function::iterator BB =
+ cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+ BE = NewFunc->end();
+ BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
- for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap,
+ for (Instruction &II : *BB)
+ RemapInstruction(&II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer);
}
@@ -187,11 +195,9 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
if (!OldSubprogramMDNode) return;
- // Ensure that OldFunc appears in the map.
- // (if it's already there it must point to NewFunc anyway)
- VMap[OldFunc] = NewFunc;
auto *NewSubprogram =
cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
+ NewFunc->setSubprogram(NewSubprogram);
for (auto *CU : Finder.compile_units()) {
auto Subprograms = CU->getSubprograms();
@@ -222,10 +228,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
// The user might be deleting arguments to the function by specifying them in
// the VMap. If so, we need to not add the arguments to the arg ty vector
//
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
- ArgTypes.push_back(I->getType());
+ for (const Argument &I : F->args())
+ if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I.getType());
// Create a new function type...
FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
@@ -236,11 +241,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (VMap.count(I) == 0) { // Is this argument preserved?
- DestI->setName(I->getName()); // Copy the name over...
- VMap[I] = DestI++; // Add mapping to VMap
+ for (const Argument & I : F->args())
+ if (VMap.count(&I) == 0) { // Is this argument preserved?
+ DestI->setName(I.getName()); // Copy the name over...
+ VMap[&I] = &*DestI++; // Add mapping to VMap
}
if (ModuleLevelChanges)
@@ -330,8 +334,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
II != IE; ++II) {
// If the "Director" remaps the instruction, don't clone it.
if (Director) {
- CloningDirector::CloningAction Action
- = Director->handleInstruction(VMap, II, NewBB);
+ CloningDirector::CloningAction Action =
+ Director->handleInstruction(VMap, &*II, NewBB);
// If the cloning director says stop, we want to stop everything, not
// just break out of the loop (which would cause the terminator to be
// cloned). The cloning director is responsible for inserting a proper
@@ -365,7 +369,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (Value *MappedV = VMap.lookup(V))
V = MappedV;
- VMap[II] = V;
+ VMap[&*II] = V;
delete NewInst;
continue;
}
@@ -373,9 +377,15 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
- VMap[II] = NewInst; // Add instruction map to value.
+ VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(&*II))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
hasStaticAllocas = true;
@@ -400,8 +410,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If the director says to skip with a terminate instruction, we still
// need to clone this block's successors.
const TerminatorInst *TI = NewBB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- ToClone.push_back(TI->getSuccessor(i));
+ for (const BasicBlock *Succ : TI->successors())
+ ToClone.push_back(Succ);
return;
}
assert(Action != CloningDirector::SkipInstruction &&
@@ -447,11 +457,16 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
NewInst->setName(OldTI->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
VMap[OldTI] = NewInst; // Add instruction map to value.
-
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(OldTI))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
// Recursively clone any reachable successor blocks.
const TerminatorInst *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- ToClone.push_back(TI->getSuccessor(i));
+ for (const BasicBlock *Succ : TI->successors())
+ ToClone.push_back(Succ);
}
if (CodeInfo) {
@@ -484,12 +499,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
#ifndef NDEBUG
- // If the cloning starts at the begining of the function, verify that
+ // If the cloning starts at the beginning of the function, verify that
// the function arguments are mapped.
if (!StartingInst)
- for (Function::const_arg_iterator II = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); II != E; ++II)
- assert(VMap.count(II) && "No mapping from source argument specified!");
+ for (const Argument &II : OldFunc->args())
+ assert(VMap.count(&II) && "No mapping from source argument specified!");
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
@@ -499,12 +513,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
StartingBB = StartingInst->getParent();
else {
StartingBB = &OldFunc->getEntryBlock();
- StartingInst = StartingBB->begin();
+ StartingInst = &StartingBB->front();
}
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
- PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist);
+ PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
CloneWorklist.pop_back();
@@ -517,9 +531,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
//
// Defer PHI resolution until rest of function is resolved.
SmallVector<const PHINode*, 16> PHIToResolve;
- for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
- BI != BE; ++BI) {
- Value *V = VMap[BI];
+ for (const BasicBlock &BI : *OldFunc) {
+ Value *V = VMap[&BI];
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (!NewBB) continue; // Dead block.
@@ -528,7 +541,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
// PHI nodes may have been remapped to non-PHI nodes by the caller or
// during the cloning process.
if (const PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -621,8 +634,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
while ((PN = dyn_cast<PHINode>(I++))) {
Value *NV = UndefValue::get(PN->getType());
PN->replaceAllUsesWith(NV);
- assert(VMap[OldI] == PN && "VMap mismatch");
- VMap[OldI] = NV;
+ assert(VMap[&*OldI] == PN && "VMap mismatch");
+ VMap[&*OldI] = NV;
PN->eraseFromParent();
++OldI;
}
@@ -644,15 +657,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
- if (I != Begin && (pred_begin(I) == pred_end(I) ||
- I->getSinglePredecessor() == I)) {
- BasicBlock *DeadBB = I++;
+ if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
+ I->getSinglePredecessor() == &*I)) {
+ BasicBlock *DeadBB = &*I++;
DeleteDeadBlock(DeadBB);
continue;
}
@@ -662,7 +675,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// simplification required looking through PHI nodes, those are only
// available after forming the full basic block. That may leave some here,
// and we still want to prune the dead code as early as possible.
- ConstantFoldTerminator(I);
+ ConstantFoldTerminator(&*I);
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
@@ -681,7 +694,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
BI->eraseFromParent();
// Make all PHI nodes that referred to Dest now refer to I as their source.
- Dest->replaceAllUsesWith(I);
+ Dest->replaceAllUsesWith(&*I);
// Move all the instructions in the succ to the pred.
I->getInstList().splice(I->end(), Dest->getInstList());
@@ -695,7 +708,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Make a final pass over the basic blocks from the old function to gather
// any return instructions which survived folding. We have to do this here
// because we can iteratively remove and merge returns above.
- for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
+ for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
E = NewFunc->end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -717,7 +730,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
- CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap,
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
nullptr);
}
@@ -780,9 +793,10 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
}
// Move them physically from the end of the block list.
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
- NewLoop->getHeader(), F->end());
+ F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+ NewPH);
+ F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+ NewLoop->getHeader()->getIterator(), F->end());
return NewLoop;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index 61f1811..ab08335 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -20,21 +20,28 @@
#include "llvm-c/Core.h"
using namespace llvm;
-/// CloneModule - Return an exact copy of the specified module. This is not as
-/// easy as it might seem because we have to worry about making copies of global
-/// variables and functions, and making their (initializers and references,
-/// respectively) refer to the right globals.
+/// This is not as easy as it might seem because we have to worry about making
+/// copies of global variables and functions, and making their (initializers and
+/// references, respectively) refer to the right globals.
///
-Module *llvm::CloneModule(const Module *M) {
+std::unique_ptr<Module> llvm::CloneModule(const Module *M) {
// Create the value map that maps things from the old module over to the new
// module.
ValueToValueMapTy VMap;
return CloneModule(M, VMap);
}
-Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+std::unique_ptr<Module> llvm::CloneModule(const Module *M,
+ ValueToValueMapTy &VMap) {
+ return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
+}
+
+std::unique_ptr<Module> llvm::CloneModule(
+ const Module *M, ValueToValueMapTy &VMap,
+ std::function<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
- Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+ std::unique_ptr<Module> New =
+ llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
New->setDataLayout(M->getDataLayout());
New->setTargetTriple(M->getTargetTriple());
New->setModuleInlineAsm(M->getModuleInlineAsm());
@@ -52,26 +59,48 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
(GlobalVariable*) nullptr,
I->getThreadLocalMode(),
I->getType()->getAddressSpace());
- GV->copyAttributesFrom(I);
- VMap[I] = GV;
+ GV->copyAttributesFrom(&*I);
+ VMap[&*I] = GV;
}
// Loop over the functions in the module, making external functions as before
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
Function *NF =
- Function::Create(cast<FunctionType>(I->getType()->getElementType()),
- I->getLinkage(), I->getName(), New);
- NF->copyAttributesFrom(I);
- VMap[I] = NF;
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ I->getLinkage(), I->getName(), New.get());
+ NF->copyAttributesFrom(&*I);
+ VMap[&*I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- auto *PTy = cast<PointerType>(I->getType());
- auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New);
- GA->copyAttributesFrom(I);
- VMap[I] = GA;
+ if (!ShouldCloneDefinition(&*I)) {
+ // An alias cannot act as an external reference, so we need to create
+ // either a function or a global variable depending on the value type.
+ // FIXME: Once pointee types are gone we can probably pick one or the
+ // other.
+ GlobalValue *GV;
+ if (I->getValueType()->isFunctionTy())
+ GV = Function::Create(cast<FunctionType>(I->getValueType()),
+ GlobalValue::ExternalLinkage, I->getName(),
+ New.get());
+ else
+ GV = new GlobalVariable(
+ *New, I->getValueType(), false, GlobalValue::ExternalLinkage,
+ (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr,
+ I->getThreadLocalMode(), I->getType()->getAddressSpace());
+ VMap[&*I] = GV;
+ // We do not copy attributes (mainly because copying between different
+ // kinds of globals is forbidden), but this is generally not required for
+ // correctness.
+ continue;
+ }
+ auto *GA = GlobalAlias::create(I->getValueType(),
+ I->getType()->getPointerAddressSpace(),
+ I->getLinkage(), I->getName(), New.get());
+ GA->copyAttributesFrom(&*I);
+ VMap[&*I] = GA;
}
// Now that all of the things that global variable initializer can refer to
@@ -80,7 +109,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
- GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+ if (!ShouldCloneDefinition(&*I)) {
+ // Skip after setting the correct linkage for an external reference.
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ continue;
+ }
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
}
@@ -88,18 +122,22 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// Similarly, copy over function bodies now...
//
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
- Function *F = cast<Function>(VMap[I]);
+ Function *F = cast<Function>(VMap[&*I]);
+ if (!ShouldCloneDefinition(&*I)) {
+ // Skip after setting the correct linkage for an external reference.
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ continue;
+ }
if (!I->isDeclaration()) {
Function::arg_iterator DestI = F->arg_begin();
for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
++J) {
DestI->setName(J->getName());
- VMap[J] = DestI++;
+ VMap[&*J] = &*DestI++;
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
-
+ CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
if (I->hasPersonalityFn())
@@ -109,7 +147,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+ // We already dealt with undefined aliases above.
+ if (!ShouldCloneDefinition(&*I))
+ continue;
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]);
if (const Constant *C = I->getAliasee())
GA->setAliasee(MapValue(C, VMap));
}
@@ -129,7 +170,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
extern "C" {
LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
- return wrap(CloneModule(unwrap(M)));
+ return wrap(CloneModule(unwrap(M)).release());
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index ab89b41..823696d 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -51,7 +51,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
/// \brief Test whether a block is valid for extraction.
static bool isBlockValidForExtraction(const BasicBlock &BB) {
// Landing pads must be in the function where they were inserted for cleanup.
- if (BB.isLandingPad())
+ if (BB.isEHPad())
return false;
// Don't hoist code containing allocas, invokes, or vastarts.
@@ -175,7 +175,7 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
for (User *U : II->users())
if (!definedInRegion(Blocks, U)) {
- Outputs.insert(II);
+ Outputs.insert(&*II);
break;
}
}
@@ -211,7 +211,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// containing PHI nodes merging values from outside of the region, and a
// second that contains all of the code for the block and merges back any
// incoming values from inside of the region.
- BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();
BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
Header->getName()+".ce");
@@ -246,7 +246,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
- PN->getName()+".ce", NewBB->begin());
+ PN->getName() + ".ce", &NewBB->front());
NewPN->addIncoming(PN, OldPred);
// Loop over all of the incoming value in PN, moving them to NewPN if they
@@ -266,7 +266,8 @@ void CodeExtractor::splitReturnBlocks() {
for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
- BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+ BasicBlock *New =
+ (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret");
if (DT) {
// Old dominates New. New node dominates all other nodes dominated
// by Old.
@@ -365,10 +366,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
TerminatorInst *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
} else
- RewriteVal = AI++;
+ RewriteVal = &*AI++;
std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
@@ -440,8 +441,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructValues.push_back(*i);
} else {
AllocaInst *alloca =
- new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc",
- codeReplacer->getParent()->begin()->begin());
+ new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc",
+ &codeReplacer->getParent()->front().front());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
}
@@ -457,9 +458,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Allocate a struct at the beginning of this function
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
- Struct =
- new AllocaInst(StructArgTy, nullptr, "structArg",
- codeReplacer->getParent()->begin()->begin());
+ Struct = new AllocaInst(StructArgTy, nullptr, "structArg",
+ &codeReplacer->getParent()->front().front());
params.push_back(Struct);
for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
@@ -566,8 +566,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
bool DominatesDef = true;
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
- DefBlock = Invoke->getNormalDest();
+ BasicBlock *NormalDest = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out]))
+ NormalDest = Invoke->getNormalDest();
+
+ if (NormalDest) {
+ DefBlock = NormalDest;
// Make sure we are looking at the original successor block, not
// at a newly inserted exit block, which won't be in the dominator
@@ -606,11 +610,11 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
FirstOut+out);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(),
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(),
NTRet);
new StoreInst(outputs[out], GEP, NTRet);
} else {
- new StoreInst(outputs[out], OAI, NTRet);
+ new StoreInst(outputs[out], &*OAI, NTRet);
}
}
// Advance output iterator even if we don't emit a store
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index dc95089..b56ff68 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -50,7 +50,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
GlobalVariable *NGV =
new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
CA, "", GCL->getThreadLocalMode());
- GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
NGV->takeName(GCL);
// Nuke the old list, replacing any uses with the new one.
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 003da58..75a1dde 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -35,8 +35,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
I.getName()+".reg2mem", AllocaPoint);
} else {
Function *F = I.getParent()->getParent();
- Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem",
- F->getEntryBlock().begin());
+ Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem",
+ &F->getEntryBlock().front());
}
// We cannot demote invoke instructions to the stack if their normal edge
@@ -89,16 +89,15 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
// AFTER the terminator instruction.
BasicBlock::iterator InsertPt;
if (!isa<TerminatorInst>(I)) {
- InsertPt = &I;
- ++InsertPt;
- for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ InsertPt = ++I.getIterator();
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
} else {
InvokeInst &II = cast<InvokeInst>(I);
InsertPt = II.getNormalDest()->getFirstInsertionPt();
}
- new StoreInst(&I, Slot, InsertPt);
+ new StoreInst(&I, Slot, &*InsertPt);
return Slot;
}
@@ -118,8 +117,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem",
- F->getEntryBlock().begin());
+ Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem",
+ &F->getEntryBlock().front());
}
// Iterate over each operand inserting a store in each predecessor.
@@ -133,12 +132,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
}
// Insert a load in place of the PHI and replace all uses.
- BasicBlock::iterator InsertPt = P;
+ BasicBlock::iterator InsertPt = P->getIterator();
- for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
- Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
+ Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
P->replaceAllUsesWith(V);
// Delete PHI.
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 4eb3e3d..492ae9f 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -28,12 +28,11 @@ class FlattenCFGOpt {
AliasAnalysis *AA;
/// \brief Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
- bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
- Pass *P = nullptr);
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
/// \brief If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
- bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr);
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
/// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
@@ -122,8 +121,7 @@ public:
/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
/// as its predecessors.
///
-bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
- Pass *P) {
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
PHINode *PHI = dyn_cast<PHINode>(BB->begin());
if (PHI)
return false; // For simplicity, avoid cases containing PHI nodes.
@@ -177,8 +175,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
// Instructions in the internal condition blocks should be safe
// to hoist up.
- for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
- Instruction *CI = BI++;
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
+ BI != BE;) {
+ Instruction *CI = &*BI++;
if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
return false;
}
@@ -315,7 +314,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
BasicBlock *Block1,
BasicBlock *Block2) {
TerminatorInst *PTI2 = Head2->getTerminator();
- Instruction *PBI2 = Head2->begin();
+ Instruction *PBI2 = &Head2->front();
bool eq1 = (Block1 == Head1);
bool eq2 = (Block2 == Head2);
@@ -327,9 +326,9 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
// Check whether instructions in Block1 and Block2 are identical
// and do not alias with instructions in Head2.
BasicBlock::iterator iter1 = Block1->begin();
- BasicBlock::iterator end1 = Block1->getTerminator();
+ BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
BasicBlock::iterator iter2 = Block2->begin();
- BasicBlock::iterator end2 = Block2->getTerminator();
+ BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
while (1) {
if (iter1 == end1) {
@@ -338,7 +337,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
break;
}
- if (!iter1->isIdenticalTo(iter2))
+ if (!iter1->isIdenticalTo(&*iter2))
return false;
// Illegal to remove instructions with side effects except
@@ -356,10 +355,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
return false;
if (iter1->mayWriteToMemory()) {
- for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
// Check alias with Head2.
- if (!AA || AA->alias(iter1, BI))
+ if (!AA || AA->alias(&*iter1, &*BI))
return false;
}
}
@@ -386,8 +385,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
/// if (a || b)
/// statement;
///
-bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
- Pass *P) {
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *IfTrue2, *IfFalse2;
Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
@@ -413,7 +411,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
return false;
TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
- Instruction *PBI2 = SecondEntryBlock->begin();
+ Instruction *PBI2 = &SecondEntryBlock->front();
if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
IfTrue2))
@@ -425,8 +423,8 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
// Check whether \param SecondEntryBlock has side-effect and is safe to
// speculate.
- for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
- Instruction *CI = BI;
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+ Instruction *CI = &*BI;
if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
!isSafeToSpeculativelyExecute(CI))
return false;
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 44b7d25..3893a75 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -49,6 +49,10 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
SmallPtrSetImpl<const PHINode *> &PhiUsers) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (GV->isExternallyInitialized())
+ GS.StoredType = GlobalStatus::StoredOnce;
+
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index d2d60d7..1457411 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
@@ -41,6 +42,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#include <algorithm>
+
using namespace llvm;
static cl::opt<bool>
@@ -54,17 +56,17 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::desc("Convert align attributes to assumptions during inlining."));
bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
- bool InsertLifetime) {
- return InlineFunction(CallSite(CI), IFI, InsertLifetime);
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
}
bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
- bool InsertLifetime) {
- return InlineFunction(CallSite(II), IFI, InsertLifetime);
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
}
namespace {
- /// A class for recording information about inlining through an invoke.
- class InvokeInliningInfo {
+ /// A class for recording information about inlining a landing pad.
+ class LandingPadInliningInfo {
BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke.
@@ -72,7 +74,7 @@ namespace {
SmallVector<Value*, 8> UnwindDestPHIValues;
public:
- InvokeInliningInfo(InvokeInst *II)
+ LandingPadInliningInfo(InvokeInst *II)
: OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
// If there are PHI nodes in the unwind destination block, we need to keep
@@ -121,14 +123,14 @@ namespace {
}
}
};
-}
+} // anonymous namespace
/// Get or create a target for the branch from ResumeInsts.
-BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
+BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
if (InnerResumeDest) return InnerResumeDest;
// Split the landing pad.
- BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+ BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
InnerResumeDest =
OuterResumeDest->splitBasicBlock(SplitPoint,
OuterResumeDest->getName() + ".body");
@@ -137,7 +139,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
const unsigned PHICapacity = 2;
// Create corresponding new PHIs for all the PHIs in the outer landing pad.
- BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+ Instruction *InsertPoint = &InnerResumeDest->front();
BasicBlock::iterator I = OuterResumeDest->begin();
for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
PHINode *OuterPHI = cast<PHINode>(I);
@@ -162,8 +164,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI,
- SmallPtrSetImpl<LandingPadInst*> &InlinedLPads) {
+void LandingPadInliningInfo::forwardResume(
+ ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
BasicBlock *Src = RI->getParent();
@@ -182,33 +184,39 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
/// This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
-static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
- InvokeInliningInfo &Invoke) {
+static BasicBlock *
+HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *I = BBI++;
+ Instruction *I = &*BBI++;
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
- // If this call cannot unwind, don't convert it to an invoke.
- // Inline asm calls cannot throw.
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
// Convert this function call into an invoke instruction. First, split the
// basic block.
- BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+ BasicBlock *Split =
+ BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
// Delete the unconditional branch inserted by splitBasicBlock
BB->getInstList().pop_back();
// Create the new invoke instruction.
- ImmutableCallSite CS(CI);
- SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
- InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
- Invoke.getOuterResumeDest(),
- InvokeArgs, CI->getName(), BB);
+ SmallVector<Value*, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ // Note: we're round tripping operand bundles through memory here, and that
+ // can potentially be avoided with a cleverer API design that we do not have
+ // as of this time.
+
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
+ OpBundles, CI->getName(), BB);
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -219,12 +227,9 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// Delete the original call
Split->getInstList().pop_front();
-
- // Update any PHI nodes in the exceptional block to indicate that there is
- // now a new entry in them.
- Invoke.addIncomingPHIValuesFor(BB);
- return;
+ return BB;
}
+ return nullptr;
}
/// If we inlined an invoke site, we need to convert calls
@@ -233,8 +238,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
/// II is the invoke instruction being inlined. FirstNewBlock is the first
/// block of the inlined code (the last block is the end of the function),
/// and InlineCodeInfo is information about the code that got inlined.
-static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
- ClonedCodeInfo &InlinedCodeInfo) {
+static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
BasicBlock *InvokeDest = II->getUnwindDest();
Function *Caller = FirstNewBlock->getParent();
@@ -242,11 +247,12 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// The inlined code is currently at the end of the function, scan from the
// start of the inlined code to its end, checking for stuff we need to
// rewrite.
- InvokeInliningInfo Invoke(II);
+ LandingPadInliningInfo Invoke(II);
// Get all of the inlined landing pad instructions.
SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
- for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
+ I != E; ++I)
if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
InlinedLPads.insert(II->getLandingPadInst());
@@ -262,9 +268,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InlinedLPad->setCleanup(true);
}
- for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
if (InlinedCodeInfo.ContainsCalls)
- HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+ &*BB, Invoke.getOuterResumeDest()))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(NewBB);
// Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -278,6 +289,99 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InvokeDest->removePredecessor(II->getParent());
}
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Function *Caller = FirstNewBlock->getParent();
+
+ assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
+
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing the
+ // edge from this block.
+ SmallVector<Value *, 8> UnwindDestPHIValues;
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ for (Instruction &I : *UnwindDest) {
+ // Save the value to use for this edge.
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ // Add incoming-PHI values to the unwind destination block for the given basic
+ // block, using the values for the original invoke's source block.
+ auto UpdatePHINodes = [&](BasicBlock *Src) {
+ BasicBlock::iterator I = UnwindDest->begin();
+ for (Value *V : UnwindDestPHIValues) {
+ PHINode *PHI = cast<PHINode>(I);
+ PHI->addIncoming(V, Src);
+ ++I;
+ }
+ };
+
+ // This connects all the instructions which 'unwind to caller' to the invoke
+ // destination.
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (CRI->unwindsToCaller()) {
+ CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI);
+ CRI->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ }
+ }
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ Instruction *Replacement = nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (CatchSwitch->unwindsToCaller()) {
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), UnwindDest,
+ CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
+ CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+ Replacement = NewCatchSwitch;
+ }
+ } else if (!isa<FuncletPadInst>(I)) {
+ llvm_unreachable("unexpected EHPad!");
+ }
+
+ if (Replacement) {
+ Replacement->takeName(I);
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ }
+ }
+
+ if (InlinedCodeInfo.ContainsCalls)
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB)
+ if (BasicBlock *NewBB =
+ HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ UpdatePHINodes(NewBB);
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ UnwindDest->removePredecessor(InvokeBB);
+}
+
/// When inlining a function that contains noalias scope metadata,
/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
@@ -395,17 +499,16 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
/// parameters with noalias metadata specifying the new scope, and tag all
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
- const DataLayout &DL, AliasAnalysis *AA) {
+ const DataLayout &DL, AAResults *CalleeAAR) {
if (!EnableNoAliasConversion)
return;
const Function *CalledFunc = CS.getCalledFunction();
SmallVector<const Argument *, 4> NoAliasArgs;
- for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
- E = CalledFunc->arg_end(); I != E; ++I) {
- if (I->hasNoAliasAttr() && !I->hasNUses(0))
- NoAliasArgs.push_back(I);
+ for (const Argument &I : CalledFunc->args()) {
+ if (I.hasNoAliasAttr() && !I.hasNUses(0))
+ NoAliasArgs.push_back(&I);
}
if (NoAliasArgs.empty())
@@ -480,10 +583,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
continue;
IsFuncCall = true;
- if (AA) {
- AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS);
- if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees ||
- MRB == AliasAnalysis::OnlyReadsArgumentPointees)
+ if (CalleeAAR) {
+ FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+ if (MRB == FMRB_OnlyAccessesArgumentPointees ||
+ MRB == FMRB_OnlyReadsArgumentPointees)
IsArgMemOnlyCall = true;
}
@@ -518,7 +621,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
SmallVector<Value *, 4> Objects;
GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]),
- Objects, DL, /* MaxLookup = */ 0);
+ Objects, DL, /* LI = */ nullptr);
for (Value *O : Objects)
ObjSet.insert(O);
@@ -646,7 +749,7 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
// caller, then don't bother inserting the assumption.
Value *Arg = CS.getArgument(I->getArgNo());
if (getKnownAlignment(Arg, DL, CS.getInstruction(),
- &IFI.ACT->getAssumptionCache(*CalledFunc),
+ &IFI.ACT->getAssumptionCache(*CS.getCaller()),
&DT) >= Align)
continue;
@@ -731,7 +834,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
BasicBlock *InsertBlock,
InlineFunctionInfo &IFI) {
Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
- IRBuilder<> Builder(InsertBlock->begin());
+ IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
@@ -851,9 +954,8 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
// Starting from the top, rebuild the nodes to point to the new inlined-at
// location (then rebuilding the rest of the chain behind it) and update the
// map of already-constructed inlined-at nodes.
- for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend();
- I != E; ++I) {
- const DILocation *MD = *I;
+ for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(),
+ InlinedAtLocations.rend())) {
Last = IANodes[MD] = DILocation::getDistinct(
Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
}
@@ -917,7 +1019,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
/// exists in the instruction stream. Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- bool InsertLifetime) {
+ AAResults *CalleeAAR, bool InsertLifetime) {
Instruction *TheCall = CS.getInstruction();
assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
"Instruction not in function!");
@@ -930,6 +1032,22 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
+ // The inliner does not know how to inline through calls with operand bundles
+ // in general ...
+ if (CS.hasOperandBundles()) {
+ for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+ uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
+ // ... but it knows how to inline through "deopt" operand bundles ...
+ if (Tag == LLVMContext::OB_deopt)
+ continue;
+ // ... and "funclet" operand bundles.
+ if (Tag == LLVMContext::OB_funclet)
+ continue;
+
+ return false;
+ }
+ }
+
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CS.doesNotThrow();
@@ -950,13 +1068,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Get the personality function from the callee if it contains a landing pad.
Constant *CalledPersonality =
- CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr;
+ CalledFunc->hasPersonalityFn()
+ ? CalledFunc->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
// Find the personality function used by the landing pads of the caller. If it
// exists, then check to see that it matches the personality function used in
// the callee.
Constant *CallerPersonality =
- Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr;
+ Caller->hasPersonalityFn()
+ ? Caller->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
if (CalledPersonality) {
if (!CallerPersonality)
Caller->setPersonalityFn(CalledPersonality);
@@ -968,9 +1090,46 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
return false;
}
+ // We need to figure out which funclet the callsite was in so that we may
+ // properly nest the callee.
+ Instruction *CallSiteEHPad = nullptr;
+ if (CallerPersonality) {
+ EHPersonality Personality = classifyEHPersonality(CallerPersonality);
+ if (isFuncletEHPersonality(Personality)) {
+ Optional<OperandBundleUse> ParentFunclet =
+ CS.getOperandBundle(LLVMContext::OB_funclet);
+ if (ParentFunclet)
+ CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
+
+ // OK, the inlining site is legal. What about the target function?
+
+ if (CallSiteEHPad) {
+ if (Personality == EHPersonality::MSVC_CXX) {
+ // The MSVC personality cannot tolerate catches getting inlined into
+ // cleanup funclets.
+ if (isa<CleanupPadInst>(CallSiteEHPad)) {
+ // Ok, the call site is within a cleanuppad. Let's check the callee
+ // for catchpads.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
+ return false;
+ }
+ }
+ } else if (isAsynchronousEHPersonality(Personality)) {
+ // SEH is even less tolerant, there may not be any sort of exceptional
+ // funclet in the callee.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (CalledBB.isEHPad())
+ return false;
+ }
+ }
+ }
+ }
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
- Function::iterator LastBlock = &Caller->back();
+ Function::iterator LastBlock = --Caller->end();
// Make sure to capture all of the return instructions from the cloned
// function.
@@ -1007,7 +1166,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
- VMap[I] = ActualArg;
+ VMap[&*I] = ActualArg;
}
// Add alignment assumptions if necessary. We do this before the inlined
@@ -1029,7 +1188,61 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
- FirstNewBlock, IFI);
+ &*FirstNewBlock, IFI);
+
+ Optional<OperandBundleUse> ParentDeopt =
+ CS.getOperandBundle(LLVMContext::OB_deopt);
+ if (ParentDeopt) {
+ SmallVector<OperandBundleDef, 2> OpDefs;
+
+ for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+ Instruction *I = dyn_cast_or_null<Instruction>(VH);
+ if (!I) continue; // instruction was DCE'd or RAUW'ed to undef
+
+ OpDefs.clear();
+
+ CallSite ICS(I);
+ OpDefs.reserve(ICS.getNumOperandBundles());
+
+ for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+ auto ChildOB = ICS.getOperandBundleAt(i);
+ if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+ // If the inlined call has other operand bundles, let them be
+ OpDefs.emplace_back(ChildOB);
+ continue;
+ }
+
+ // It may be useful to separate this logic (of handling operand
+ // bundles) out to a separate "policy" component if this gets crowded.
+ // Prepend the parent's deoptimization continuation to the newly
+ // inlined call's deoptimization continuation.
+ std::vector<Value *> MergedDeoptArgs;
+ MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
+ ChildOB.Inputs.size());
+
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(),
+ ParentDeopt->Inputs.begin(),
+ ParentDeopt->Inputs.end());
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+ ChildOB.Inputs.end());
+
+ OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+ }
+
+ Instruction *NewI = nullptr;
+ if (isa<CallInst>(I))
+ NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+ else
+ NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+
+ // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+ // this even if the call returns void.
+ I->replaceAllUsesWith(NewI);
+
+ VH = nullptr;
+ I->eraseFromParent();
+ }
+ }
// Update the callgraph if requested.
if (IFI.CG)
@@ -1042,7 +1255,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAliasScopeMetadata(CS, VMap);
// Add noalias metadata if necessary.
- AddAliasScopeMetadata(CS, VMap, DL, IFI.AA);
+ AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -1085,9 +1298,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Transfer all of the allocas over in a block. Using splice means
// that the instructions aren't removed from the symbol table, then
// reinserted.
- Caller->getEntryBlock().getInstList().splice(InsertPoint,
- FirstNewBlock->getInstList(),
- AI, I);
+ Caller->getEntryBlock().getInstList().splice(
+ InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
}
// Move any dbg.declares describing the allocas into the entry basic block.
DIBuilder DIB(*Caller->getParent());
@@ -1137,7 +1349,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
if (InsertLifetime && !IFI.StaticAllocas.empty()) {
- IRBuilder<> builder(FirstNewBlock->begin());
+ IRBuilder<> builder(&FirstNewBlock->front());
for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
AllocaInst *AI = IFI.StaticAllocas[ai];
@@ -1189,7 +1401,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
// Insert the llvm.stacksave.
- CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+ CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
.CreateCall(StackSave, {}, "savedstack");
// Insert a call to llvm.stackrestore before any return instructions in the
@@ -1203,10 +1415,74 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ // Update the lexical scopes of the new funclets and callsites.
+ // Anything that had 'none' as its parent is now nested inside the callsite's
+ // EHPad.
+
+ if (CallSiteEHPad) {
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB) {
+ // Add bundle operands to any top-level call sites.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
+ Instruction *I = &*BBI++;
+ CallSite CS(I);
+ if (!CS)
+ continue;
+
+ // Skip call sites which are nounwind intrinsics.
+ auto *CalledFn =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+ continue;
+
+ // Skip call sites which already have a "funclet" bundle.
+ if (CS.getOperandBundle(LLVMContext::OB_funclet))
+ continue;
+
+ CS.getOperandBundlesAsDefs(OpBundles);
+ OpBundles.emplace_back("funclet", CallSiteEHPad);
+
+ Instruction *NewInst;
+ if (CS.isCall())
+ NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+ else
+ NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
+ NewInst->setDebugLoc(I->getDebugLoc());
+ NewInst->takeName(I);
+ I->replaceAllUsesWith(NewInst);
+ I->eraseFromParent();
+
+ OpBundles.clear();
+ }
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
+ CatchSwitch->setParentPad(CallSiteEHPad);
+ } else {
+ auto *FPI = cast<FuncletPadInst>(I);
+ if (isa<ConstantTokenNone>(FPI->getParentPad()))
+ FPI->setParentPad(CallSiteEHPad);
+ }
+ }
+ }
+
// If we are inlining for an invoke instruction, we must make sure to rewrite
// any call instructions into invoke instructions.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
- HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+ if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
+ if (isa<LandingPadInst>(FirstNonPHI)) {
+ HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ } else {
+ HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ }
+ }
// Handle any inlined musttail call sites. In order for a new call site to be
// musttail, the source of the clone and the inlined call site must have been
@@ -1250,7 +1526,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the calling basic block.
if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
// Move all of the instructions right before the call.
- OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ OrigBB->getInstList().splice(TheCall->getIterator(),
+ FirstNewBlock->getInstList(),
FirstNewBlock->begin(), FirstNewBlock->end());
// Remove the cloned basic block.
Caller->getBasicBlockList().pop_back();
@@ -1297,15 +1574,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
// symmetric to the call case.
- AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
- CalledFunc->getName()+".exit");
+ AfterCallBB =
+ OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
+ CalledFunc->getName() + ".exit");
} else { // It's a call
// If this is a call instruction, we need to split the basic block that
// the call lives in.
//
- AfterCallBB = OrigBB->splitBasicBlock(TheCall,
- CalledFunc->getName()+".exit");
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
+ CalledFunc->getName() + ".exit");
}
// Change the branch that used to go to AfterCallBB to branch to the first
@@ -1314,14 +1592,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
TerminatorInst *Br = OrigBB->getTerminator();
assert(Br && Br->getOpcode() == Instruction::Br &&
"splitBasicBlock broken!");
- Br->setOperand(0, FirstNewBlock);
-
+ Br->setOperand(0, &*FirstNewBlock);
// Now that the function is correct, make it a little bit nicer. In
// particular, move the basic blocks inserted from the end of the function
// into the space made by splitting the source basic block.
- Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
- FirstNewBlock, Caller->end());
+ Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
+ Caller->getBasicBlockList(), FirstNewBlock,
+ Caller->end());
// Handle all of the return instructions that we just cloned in, and eliminate
// any users of the original call/invoke instruction.
@@ -1333,7 +1611,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// possible incoming values.
if (!TheCall->use_empty()) {
PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
- AfterCallBB->begin());
+ &AfterCallBB->front());
// Anything that used the result of the function call should now use the
// PHI node as their operand.
TheCall->replaceAllUsesWith(PHI);
@@ -1350,7 +1628,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
-
// Add a branch to the merge points and remove return instructions.
DebugLoc Loc;
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
@@ -1413,7 +1690,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Splice the code entry block into calling block, right before the
// unconditional branch.
CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
- OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+ OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
// Remove the unconditional branch.
OrigBB->getInstList().erase(Br);
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 30edf3b..5687afa 100644
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -380,14 +380,10 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
IRBuilder<> Builder(Rem);
- Type *RemTy = Rem->getType();
- if (RemTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
-
- unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
-
- if (RemTyBitWidth != 32 && RemTyBitWidth != 64)
- llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+ assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
+ assert((Rem->getType()->getIntegerBitWidth() == 32 ||
+ Rem->getType()->getIntegerBitWidth() == 64) &&
+ "Div of bitwidth other than 32 or 64 not supported");
// First prepare the sign if it's a signed remainder
if (Rem->getOpcode() == Instruction::SRem) {
@@ -401,7 +397,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
// If we didn't actually generate an urem instruction, we're done
// This happens for example if the input were constant. In this case the
// Builder insertion point was unchanged
- if (Rem == Builder.GetInsertPoint())
+ if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked())
return true;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -440,14 +436,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {
IRBuilder<> Builder(Div);
- Type *DivTy = Div->getType();
- if (DivTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
-
- unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
-
- if (DivTyBitWidth != 32 && DivTyBitWidth != 64)
- llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+ assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
+ assert((Div->getType()->getIntegerBitWidth() == 32 ||
+ Div->getType()->getIntegerBitWidth() == 64) &&
+ "Div of bitwidth other than 32 or 64 not supported");
// First prepare the sign if it's a signed division
if (Div->getOpcode() == Instruction::SDiv) {
@@ -461,7 +453,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
// If we didn't actually generate an udiv instruction, we're done
// This happens for example if the input were constant. In this case the
// Builder insertion point was unchanged
- if (Div == Builder.GetInsertPoint())
+ if (Div == Builder.GetInsertPoint().getNodePtrUnchecked())
return true;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -492,15 +484,14 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
"Trying to expand remainder from a non-remainder function");
Type *RemTy = Rem->getType();
- if (RemTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
- if (RemTyBitWidth > 32)
- llvm_unreachable("Div of bitwidth greater than 32 not supported");
+ assert(RemTyBitWidth <= 32 &&
+ "Div of bitwidth greater than 32 not supported");
- if (RemTyBitWidth == 32)
+ if (RemTyBitWidth == 32)
return expandRemainder(Rem);
// If bitwidth smaller than 32 extend inputs, extend output and proceed
@@ -542,15 +533,13 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
"Trying to expand remainder from a non-remainder function");
Type *RemTy = Rem->getType();
- if (RemTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
- if (RemTyBitWidth > 64)
- llvm_unreachable("Div of bitwidth greater than 64 not supported");
+ assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
- if (RemTyBitWidth == 64)
+ if (RemTyBitWidth == 64)
return expandRemainder(Rem);
// If bitwidth smaller than 64 extend inputs, extend output and proceed
@@ -593,13 +582,11 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
"Trying to expand division from a non-division function");
Type *DivTy = Div->getType();
- if (DivTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
- if (DivTyBitWidth > 32)
- llvm_unreachable("Div of bitwidth greater than 32 not supported");
+ assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported");
if (DivTyBitWidth == 32)
return expandDivision(Div);
@@ -643,13 +630,12 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
"Trying to expand division from a non-division function");
Type *DivTy = Div->getType();
- if (DivTy->isVectorTy())
- llvm_unreachable("Div over vectors not supported");
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
- if (DivTyBitWidth > 64)
- llvm_unreachable("Div of bitwidth greater than 64 not supported");
+ assert(DivTyBitWidth <= 64 &&
+ "Div of bitwidth greater than 64 not supported");
if (DivTyBitWidth == 64)
return expandDivision(Div);
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 9d40b69..b4b2e14 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -31,8 +31,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -64,6 +66,13 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
PredIteratorCache &PredCache, LoopInfo *LI) {
SmallVector<Use *, 16> UsesToRewrite;
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (Inst.getType()->isTokenTy())
+ return false;
+
BasicBlock *InstBB = Inst.getParent();
for (Use &U : Inst.uses()) {
@@ -84,9 +93,8 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Invoke instructions are special in that their result value is not available
// along their unwind edge. The code below tests to see whether DomBB
- // dominates
- // the value, so adjust DomBB to the normal destination block, which is
- // effectively where the value is first usable.
+ // dominates the value, so adjust DomBB to the normal destination block,
+ // which is effectively where the value is first usable.
BasicBlock *DomBB = Inst.getParent();
if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
DomBB = Inv->getNormalDest();
@@ -101,10 +109,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
- for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
- BBE = ExitBlocks.end();
- BBI != BBE; ++BBI) {
- BasicBlock *ExitBB = *BBI;
+ for (BasicBlock *ExitBB : ExitBlocks) {
if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
continue;
@@ -113,7 +118,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
continue;
PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
- Inst.getName() + ".lcssa", ExitBB->begin());
+ Inst.getName() + ".lcssa", &ExitBB->front());
// Add inputs from inside the loop for this PHI.
for (BasicBlock *Pred : PredCache.get(ExitBB)) {
@@ -148,26 +153,26 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
// Rewrite all uses outside the loop in terms of the new PHIs we just
// inserted.
- for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+ for (Use *UseToRewrite : UsesToRewrite) {
// If this use is in an exit block, rewrite to use the newly inserted PHI.
// This is required for correctness because SSAUpdate doesn't handle uses in
// the same block. It assumes the PHI we inserted is at the end of the
// block.
- Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+ Instruction *User = cast<Instruction>(UseToRewrite->getUser());
BasicBlock *UserBB = User->getParent();
if (PHINode *PN = dyn_cast<PHINode>(User))
- UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+ UserBB = PN->getIncomingBlock(*UseToRewrite);
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
// Tell the VHs that the uses changed. This updates SCEV's caches.
- if (UsesToRewrite[i]->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
- UsesToRewrite[i]->set(UserBB->begin());
+ if (UseToRewrite->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
+ UseToRewrite->set(&UserBB->front());
continue;
}
// Otherwise, do full PHI insertion.
- SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+ SSAUpdate.RewriteUse(*UseToRewrite);
}
// Post process PHI instructions that were inserted into another disjoint loop
@@ -190,10 +195,9 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
}
// Remove PHI nodes that did not have any uses rewritten.
- for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
- if (AddedPHIs[i]->use_empty())
- AddedPHIs[i]->eraseFromParent();
- }
+ for (PHINode *PN : AddedPHIs)
+ if (PN->use_empty())
+ PN->eraseFromParent();
return true;
}
@@ -205,8 +209,8 @@ blockDominatesAnExit(BasicBlock *BB,
DominatorTree &DT,
const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
DomTreeNode *DomNode = DT.getNode(BB);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i])))
+ for (BasicBlock *ExitBB : ExitBlocks)
+ if (DT.dominates(DomNode, DT.getNode(ExitBB)))
return true;
return false;
@@ -227,25 +231,22 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, rewrite those uses.
- for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end();
- BBI != BBE; ++BBI) {
- BasicBlock *BB = *BBI;
-
+ for (BasicBlock *BB : L.blocks()) {
// For large loops, avoid use-scanning by using dominance information: In
// particular, if a block does not dominate any of the loop exits, then none
// of the values defined in the block could be used outside the loop.
if (!blockDominatesAnExit(BB, DT, ExitBlocks))
continue;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (Instruction &I : *BB) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
- if (I->use_empty() ||
- (I->hasOneUse() && I->user_back()->getParent() == BB &&
- !isa<PHINode>(I->user_back())))
+ if (I.use_empty() ||
+ (I.hasOneUse() && I.user_back()->getParent() == BB &&
+ !isa<PHINode>(I.user_back())))
continue;
- Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI);
+ Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI);
}
}
@@ -266,8 +267,8 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
bool Changed = false;
// Recurse depth-first through inner loops.
- for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I)
- Changed |= formLCSSARecursively(**I, DT, LI, SE);
+ for (Loop *SubLoop : L.getSubLoops())
+ Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
Changed |= formLCSSA(L, DT, LI, SE);
return Changed;
@@ -296,8 +297,10 @@ struct LCSSA : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
}
};
}
@@ -306,6 +309,8 @@ char LCSSA::ID = 0;
INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
@@ -317,7 +322,8 @@ bool LCSSA::runOnFunction(Function &F) {
bool Changed = false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ SE = SEWP ? &SEWP->getSE() : nullptr;
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index ba8af47..e75163f 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -17,10 +17,11 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -188,9 +189,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BasicBlock *BB = SI->getParent();
// Remove entries from PHI nodes which we no longer branch to...
- for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ for (BasicBlock *Succ : SI->successors()) {
// Found case matching a constant operand?
- BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == TheOnlyDest)
TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
else
@@ -230,6 +230,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SIDef->getValue().getZExtValue()));
}
+ // Update make.implicit metadata to the newly-created conditional branch.
+ MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit);
+ if (MakeImplicitMD)
+ NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD);
+
// Delete the old switch.
SI->eraseFromParent();
return true;
@@ -283,8 +288,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
- // We don't want the landingpad instruction removed by anything this general.
- if (isa<LandingPadInst>(I))
+ // We don't want the landingpad-like instructions removed by anything this
+ // general.
+ if (I->isEHPad())
return false;
// We don't want debug info removed by anything this general, unless
@@ -414,6 +420,49 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
return false;
}
+static bool
+simplifyAndDCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+
+ return true;
+ }
+
+ if (Value *SimpleV = SimplifyInstruction(I, DL)) {
+ // Add the users to the worklist. CAREFUL: an instruction can use itself,
+ // in the case of a phi node.
+ for (User *U : I->users())
+ if (U != I)
+ WorkList.insert(cast<Instruction>(U));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+ I->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
/// simplify any instructions in it and recursively delete dead instructions.
///
@@ -422,30 +471,34 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
const TargetLibraryInfo *TLI) {
bool MadeChange = false;
+ const DataLayout &DL = BB->getModule()->getDataLayout();
#ifndef NDEBUG
// In debug builds, ensure that the terminator of the block is never replaced
// or deleted by these simplifications. The idea of simplification is that it
// cannot introduce new instructions, and there is no way to replace the
// terminator of a block without introducing a new instruction.
- AssertingVH<Instruction> TerminatorVH(--BB->end());
+ AssertingVH<Instruction> TerminatorVH(&BB->back());
#endif
- for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) {
assert(!BI->isTerminator());
- Instruction *Inst = BI++;
+ Instruction *I = &*BI;
+ ++BI;
- WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TLI)) {
- MadeChange = true;
- if (BIHandle != BI)
- BI = BB->begin();
- continue;
- }
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+ }
- MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
- if (BIHandle != BI)
- BI = BB->begin();
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
}
return MadeChange;
}
@@ -808,7 +861,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
// Copy over any phi, debug or lifetime instruction.
BB->getTerminator()->eraseFromParent();
- Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+ Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(),
+ BB->getInstList());
} else {
while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
// We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
@@ -1017,8 +1071,13 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (LdStHasDebugValue(DIVar, LI))
return true;
- Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
- DDI->getDebugLoc(), LI);
+ // We are now tracking the loaded value instead of the address. In the
+ // future if multi-location support is added to the IR, it might be
+ // preferable to keep tracking both the loaded value and the original
+ // address in case the alloca can not be elided.
+ Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
+ LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
+ DbgValue->insertAfter(LI);
return true;
}
@@ -1034,8 +1093,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
SmallVector<DbgDeclareInst *, 4> Dbgs;
for (auto &FI : F)
- for (BasicBlock::iterator BI : FI)
- if (auto DDI = dyn_cast<DbgDeclareInst>(BI))
+ for (Instruction &BI : FI)
+ if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
Dbgs.push_back(DDI);
if (Dbgs.empty())
@@ -1060,9 +1119,13 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
+ SmallVector<uint64_t, 1> NewDIExpr;
+ auto *DIExpr = DDI->getExpression();
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
- DDI->getExpression(), DDI->getDebugLoc(),
- CI);
+ DIB.createExpression(NewDIExpr),
+ DDI->getDebugLoc(), CI);
}
DDI->eraseFromParent();
}
@@ -1082,9 +1145,10 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return nullptr;
}
-bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool Deref) {
- DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
+ Instruction *InsertBefore, DIBuilder &Builder,
+ bool Deref, int Offset) {
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address);
if (!DDI)
return false;
DebugLoc Loc = DDI->getDebugLoc();
@@ -1092,29 +1156,40 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- if (Deref) {
+ if (Deref || Offset) {
// Create a copy of the original DIDescriptor for user variable, prepending
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
SmallVector<uint64_t, 4> NewDIExpr;
- NewDIExpr.push_back(dwarf::DW_OP_deref);
+ if (Deref)
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
+ if (Offset > 0) {
+ NewDIExpr.push_back(dwarf::DW_OP_plus);
+ NewDIExpr.push_back(Offset);
+ } else if (Offset < 0) {
+ NewDIExpr.push_back(dwarf::DW_OP_minus);
+ NewDIExpr.push_back(-Offset);
+ }
if (DIExpr)
NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIExpr = Builder.createExpression(NewDIExpr);
}
- // Insert llvm.dbg.declare in the same basic block as the original alloca,
- // and remove old llvm.dbg.declare.
- BasicBlock *BB = AI->getParent();
- Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
+ // Insert llvm.dbg.declare immediately after the original alloca, and remove
+ // old llvm.dbg.declare.
+ Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
DDI->eraseFromParent();
return true;
}
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder, bool Deref, int Offset) {
+ return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
+ Deref, Offset);
+}
+
+void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
@@ -1132,7 +1207,7 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
new UnreachableInst(I->getContext(), I);
// All instructions after this are dead.
- BasicBlock::iterator BBI = I, BBE = BB->end();
+ BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
while (BBI != BBE) {
if (!BBI->use_empty())
BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
@@ -1142,8 +1217,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
/// changeToCall - Convert the specified invoke into a normal call.
static void changeToCall(InvokeInst *II) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
@@ -1162,7 +1240,7 @@ static bool markAliveBlocks(Function &F,
SmallPtrSetImpl<BasicBlock*> &Reachable) {
SmallVector<BasicBlock*, 128> Worklist;
- BasicBlock *BB = F.begin();
+ BasicBlock *BB = &F.front();
Worklist.push_back(BB);
Reachable.insert(BB);
bool Changed = false;
@@ -1187,7 +1265,7 @@ static bool markAliveBlocks(Function &F,
if (MakeUnreachable) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
+ changeToUnreachable(&*BBI, false);
Changed = true;
break;
}
@@ -1201,7 +1279,7 @@ static bool markAliveBlocks(Function &F,
++BBI;
if (!isa<UnreachableInst>(BBI)) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
+ changeToUnreachable(&*BBI, false);
Changed = true;
}
break;
@@ -1253,6 +1331,40 @@ static bool markAliveBlocks(Function &F,
return Changed;
}
+void llvm::removeUnwindEdge(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ changeToCall(II);
+ return;
+ }
+
+ TerminatorInst *NewTI;
+ BasicBlock *UnwindDest;
+
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI);
+ UnwindDest = CRI->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(),
+ CatchSwitch->getName(), CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+
+ NewTI = NewCatchSwitch;
+ UnwindDest = CatchSwitch->getUnwindDest();
+ } else {
+ llvm_unreachable("Could not find unwind successor");
+ }
+
+ NewTI->takeName(TI);
+ NewTI->setDebugLoc(TI->getDebugLoc());
+ UnwindDest->removePredecessor(BB);
+ TI->replaceAllUsesWith(NewTI);
+ TI->eraseFromParent();
+}
+
/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
@@ -1270,17 +1382,18 @@ bool llvm::removeUnreachableBlocks(Function &F) {
// Loop over all of the basic blocks that are not reachable, dropping all of
// their internal references...
for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
+ if (Reachable.count(&*BB))
continue;
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE;
+ ++SI)
if (Reachable.count(*SI))
- (*SI)->removePredecessor(BB);
+ (*SI)->removePredecessor(&*BB);
BB->dropAllReferences();
}
for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(I))
+ if (!Reachable.count(&*I))
I = F.getBasicBlockList().erase(I);
else
++I;
@@ -1288,9 +1401,10 @@ bool llvm::removeUnreachableBlocks(Function &F) {
return true;
}
-void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs) {
+void llvm::combineMetadata(Instruction *K, const Instruction *J,
+ ArrayRef<unsigned> KnownIDs) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
- K->dropUnknownMetadata(KnownIDs);
+ K->dropUnknownNonDebugMetadata(KnownIDs);
K->getAllMetadataOtherThanDebugLoc(Metadata);
for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
unsigned Kind = Metadata[i].first;
@@ -1326,8 +1440,29 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
// Only set the !nonnull if it is present in both instructions.
K->setMetadata(Kind, JMD);
break;
+ case LLVMContext::MD_invariant_group:
+ // Preserve !invariant.group in K.
+ break;
+ case LLVMContext::MD_align:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
}
}
+ // Set !invariant.group from J if J has it. If both instructions have it
+ // then we will just pick it from J - even when they are different.
+ // Also make sure that K is load or store - f.e. combining bitcast with load
+ // could produce bitcast with invariant.group metadata, which is invalid.
+ // FIXME: we should try to preserve both invariant.group md if they are
+ // different, but right now instruction can only have one invariant.group.
+ if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group))
+ if (isa<LoadInst>(K) || isa<StoreInst>(K))
+ K->setMetadata(LLVMContext::MD_invariant_group, JMD);
}
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
@@ -1349,3 +1484,40 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
}
return Count;
}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlock *BB) {
+ assert(From->getType() == To->getType());
+
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *I = cast<Instruction>(U.getUser());
+ if (DT.dominates(BB, I->getParent())) {
+ U.set(To);
+ DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
+ << *To << " in " << *U << "\n");
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
+ if (isa<IntrinsicInst>(CS.getInstruction()))
+ // Most LLVM intrinsics are things which can never take a safepoint.
+ // As a result, we don't need to have the stack parsable at the
+ // callsite. This is a highly useful optimization since intrinsic
+ // calls are fairly prevalent, particularly in debug builds.
+ return true;
+
+ // Check if the function is specifically marked as a gc leaf function.
+ //
+ // TODO: we should be checking the attributes on the call site as well.
+ if (const Function *F = CS.getCalledFunction())
+ return F->hasFnAttribute("gc-leaf-function");
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 5c98043..1fa4695 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -44,11 +44,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -78,7 +81,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock *> &SplitPreds,
Loop *L) {
// Check to see if NewBB is already well placed.
- Function::iterator BBI = NewBB; --BBI;
+ Function::iterator BBI = --NewBB->getIterator();
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
if (&*BBI == SplitPreds[i])
return;
@@ -92,9 +95,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
// block that neighbors a BB actually in the loop.
BasicBlock *FoundBB = nullptr;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
- L->contains(BBI)) {
+ Function::iterator BBI = SplitPreds[i]->getIterator();
+ if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
FoundBB = SplitPreds[i];
break;
}
@@ -112,17 +114,10 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
/// preheader, this method is called to insert one. This method has two phases:
/// preheader insertion and analysis updating.
///
-BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Header = L->getHeader();
- // Get analyses that we try to update.
- auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>();
- auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>();
- auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
// Compute the set of predecessors of the loop that are not in the loop.
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -141,8 +136,10 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- AA, DT, LI, PreserveLCSSA);
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
+ LI, PreserveLCSSA);
+ if (!PreheaderBB)
+ return nullptr;
DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
<< PreheaderBB->getName() << "\n");
@@ -159,8 +156,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
/// This method is used to split exit blocks that have predecessors outside of
/// the loop.
static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
- AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, Pass *PP) {
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
BasicBlock *P = *I;
@@ -175,10 +172,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewExitBB = nullptr;
- bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT,
- LI, PreserveLCSSA);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI,
+ PreserveLCSSA);
+ if (!NewExitBB)
+ return nullptr;
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
<< NewExitBB->getName() << "\n");
@@ -206,8 +203,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
/// \brief The first part of loop-nestification is to find a PHI node that tells
/// us how to partition the loops.
-static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
- DominatorTree *DT,
+static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
AssumptionCache *AC) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
@@ -216,7 +212,6 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
- if (AA) AA->deleteValue(PN);
PN->eraseFromParent();
continue;
}
@@ -251,18 +246,18 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
/// created.
///
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
- AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE, Pass *PP,
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, bool PreserveLCSSA,
AssumptionCache *AC) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
// The header is not a landing pad; preheader insertion should ensure this.
- assert(!L->getHeader()->isLandingPad() &&
- "Can't insert backedge to landing pad");
+ BasicBlock *Header = L->getHeader();
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
- PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC);
+ PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
if (!PN) return nullptr; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -286,11 +281,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (SE)
SE->forgetLoop(L);
- bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
- BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- AA, DT, LI, PreserveLCSSA);
+ DT, LI, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -357,7 +349,6 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
/// and have that block branch to the loop header. This ensures that loops
/// have exactly one backedge.
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
- AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
@@ -369,8 +360,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
if (!Preheader)
return nullptr;
- // The header is not a landing pad; preheader insertion should ensure this.
- assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+ // The header is not an EH pad; preheader insertion should ensure this.
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
@@ -394,7 +385,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
<< BEBlock->getName() << "\n");
// Move the new backedge block to right after the last backedge block.
- Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
// Now that the block has been inserted into the function, create PHI nodes in
@@ -443,7 +434,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// eliminate the PHI Node.
if (HasUniqueIncomingValue) {
NewPN->replaceAllUsesWith(UniqueValue);
- if (AA) AA->deleteValue(NewPN);
BEBlock->getInstList().erase(NewPN);
}
}
@@ -470,15 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
}
/// \brief Simplify one loop and queue further loops for simplification.
-///
-/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
-/// Pass pointer. The Pass pointer is used by numerous utilities to update
-/// specific analyses. Rather than a pass it would be much cleaner and more
-/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
- AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, Pass *PP,
- AssumptionCache *AC) {
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ bool PreserveLCSSA) {
bool Changed = false;
ReprocessLoop:
@@ -544,7 +529,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, PP);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (Preheader) {
++NumInserted;
Changed = true;
@@ -568,7 +553,7 @@ ReprocessLoop:
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
- if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
+ if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) {
++NumInserted;
Changed = true;
}
@@ -585,7 +570,7 @@ ReprocessLoop:
// common backedge instead.
if (L->getNumBackEdges() < 8) {
if (Loop *OuterL =
- separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) {
+ separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
@@ -602,7 +587,7 @@ ReprocessLoop:
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
- LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
if (LoopLatch) {
++NumInserted;
Changed = true;
@@ -618,7 +603,6 @@ ReprocessLoop:
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
- if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
@@ -654,7 +638,7 @@ ReprocessLoop:
bool AllInvariant = true;
bool AnyInvariant = false;
for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// Skip debug info intrinsics.
if (isa<DbgInfoIntrinsic>(Inst))
continue;
@@ -716,9 +700,9 @@ ReprocessLoop:
return Changed;
}
-bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
- AliasAnalysis *AA, ScalarEvolution *SE,
- AssumptionCache *AC) {
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ bool PreserveLCSSA) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -734,8 +718,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
}
while (!Worklist.empty())
- Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
- SE, PP, AC);
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
+ AC, PreserveLCSSA);
return Changed;
}
@@ -747,9 +731,6 @@ namespace {
initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
}
- // AA - If we have an alias analysis object to update, this is it, otherwise
- // this is null.
- AliasAnalysis *AA;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
@@ -767,8 +748,11 @@ namespace {
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addPreserved<DependenceAnalysis>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
}
@@ -784,6 +768,9 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
@@ -796,15 +783,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
///
bool LoopSimplify::runOnFunction(Function &F) {
bool Changed = false;
- AA = getAnalysisIfAvailable<AliasAnalysis>();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ SE = SEWP ? &SEWP->getSE() : nullptr;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC);
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 1dbce47..2499b88 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -73,7 +73,7 @@ static inline void RemapInstruction(Instruction *I,
/// of loops that have already been forgotten to prevent redundant, expensive
/// calls to ScalarEvolution::forgetLoop. Returns the new combined block.
static BasicBlock *
-FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
+FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
SmallPtrSetImpl<Loop *> &ForgottenLoops) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
@@ -109,12 +109,10 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
// Erase basic block from the function...
// ScalarEvolution holds references to loop exit blocks.
- if (LPM) {
- if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
- if (Loop *L = LI->getLoopFor(BB)) {
- if (ForgottenLoops.insert(L).second)
- SE->forgetLoop(L);
- }
+ if (SE) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (ForgottenLoops.insert(L).second)
+ SE->forgetLoop(L);
}
}
LI->removeBlock(BB);
@@ -155,15 +153,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
-/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
-/// removed from the LoopPassManager as well. LPM can also be NULL.
-///
-/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
-/// available from the Pass it must also preserve those analyses.
+/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
+/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool AllowRuntime, bool AllowExpensiveTripCount,
- unsigned TripMultiple, LoopInfo *LI, Pass *PP,
- LPPassManager *LPM, AssumptionCache *AC) {
+ unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -220,6 +216,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ Loop *ParentL = L->getParentLoop();
+ bool AllExitsAreInsideParentLoop = !ParentL ||
+ std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
+ [&](BasicBlock *BB) { return ParentL->contains(BB); });
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
@@ -227,13 +229,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
if (RuntimeTripCount &&
- !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
+ !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
+ PreserveLCSSA))
return false;
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- ScalarEvolution *SE =
- PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr;
if (SE)
SE->forgetLoop(L);
@@ -392,7 +393,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- ::RemapInstruction(I, LastValueMap);
+ ::RemapInstruction(&*I, LastValueMap);
}
// Loop over the PHI nodes in the original block, setting incoming values.
@@ -432,8 +433,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
- if (CompletelyUnroll && j == 0) {
- Dest = LoopExit;
+ if (CompletelyUnroll) {
+ if (j == 0)
+ Dest = LoopExit;
NeedConditional = false;
}
@@ -473,7 +475,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM,
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
ForgottenLoops))
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
}
@@ -483,29 +485,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// whole function's cache.
AC->clear();
- DominatorTree *DT = nullptr;
- if (PP) {
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- // Incrementally updating domtree after loop unrolling would be easy.
- if (DominatorTreeWrapperPass *DTWP =
- PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DT = &DTWP->getDomTree();
- DT->recalculate(*L->getHeader()->getParent());
- }
-
- // Simplify any new induction variables in the partially unrolled loop.
- if (SE && !CompletelyUnroll) {
- SmallVector<WeakVH, 16> DeadInsts;
- simplifyLoopIVs(L, SE, LPM, DeadInsts);
-
- // Aggressively clean up dead instructions that simplifyLoopIVs already
- // identified. Any remaining should be cleaned up below.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
- }
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ // Incrementally updating domtree after loop unrolling would be easy.
+ if (DT)
+ DT->recalculate(*L->getHeader()->getParent());
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && !CompletelyUnroll) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
+
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
@@ -514,7 +511,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
@@ -529,29 +526,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
++NumUnrolled;
Loop *OuterL = L->getParentLoop();
- // Remove the loop from the LoopPassManager if it's completely removed.
- if (CompletelyUnroll && LPM != nullptr)
- LPM->deleteLoopFromQueue(L);
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->updateUnloop(L);;
// If we have a pass and a DominatorTree we should re-simplify impacted loops
// to ensure subsequent analyses can rely on this form. We want to simplify
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
- if (PP && DT) {
+ if (DT) {
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC);
+ bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
- // deleteLoopFromQueue updates LoopInfo.
+ // LoopInfo's been updated by updateUnloop.
Loop *LatchLoop = LI->getLoopFor(Latches.back());
if (!OuterL->contains(LatchLoop))
while (OuterL->getParentLoop() != LatchLoop)
OuterL = OuterL->getParentLoop();
- formLCSSARecursively(*OuterL, *DT, LI, SE);
+ if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
+ formLCSSARecursively(*OuterL, *DT, LI, SE);
+ else
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index add5432..0d68f18 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -62,8 +62,8 @@ STATISTIC(NumRuntimeUnrolled,
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, AliasAnalysis *AA,
- DominatorTree *DT, LoopInfo *LI, Pass *P) {
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
@@ -127,8 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
- SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
- P->mustPreserveAnalysisID(LCSSAID));
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
+ PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, Exit, NewPH);
InsertPt->eraseFromParent();
@@ -150,7 +150,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
Function *F = Header->getParent();
LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
- Loop *NewLoop = 0;
+ Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
if (!UnrollProlog) {
NewLoop = new Loop();
@@ -206,9 +206,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// Change the incoming values to the ones defined in the preheader or
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
if (UnrollProlog) {
- VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
} else {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
@@ -279,7 +279,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
bool AllowExpensiveTripCount, LoopInfo *LI,
- LPPassManager *LPM) {
+ ScalarEvolution *SE, DominatorTree *DT,
+ bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
return false;
@@ -291,9 +292,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
// Use Scalar Evolution to compute the trip count. This allows more
// loops to be unrolled than relying on induction var simplification
- if (!LPM)
- return false;
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
if (!SE)
return false;
@@ -308,7 +306,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
- SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
+ SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
@@ -333,10 +331,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
- // Grab analyses that we preserve.
- auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
BasicBlock *PH = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
// It helps to splits the original preheader twice, one for the end of the
@@ -397,8 +391,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
VMap, LI);
// Insert the cloned blocks into function just before the original loop
- F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],
- F->end());
+ F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
// Rewrite the cloned instruction operands to use the values
// created when the clone is created.
@@ -406,7 +400,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end();
I != E; ++I) {
- RemapInstruction(I, VMap,
+ RemapInstruction(&*I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
}
}
@@ -414,8 +408,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
// Connect the prolog code to the original loop and update the
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
- /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
+ ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
+ PreserveLCSSA);
NumRuntimeUnrolled++;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 5cbde94..e038805 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -34,6 +34,124 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
return true;
}
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_IntegerOr:
+ case RK_IntegerAnd:
+ case RK_IntegerXor:
+ case RK_IntegerMinMax:
+ return true;
+ }
+ return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+ return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_FloatAdd:
+ case RK_FloatMult:
+ return true;
+ }
+ return false;
+}
+
+Instruction *
+RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+ if (!Phi->hasOneUse())
+ return Phi;
+
+ const APInt *M = nullptr;
+ Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+ // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+ // with a new integer type of the corresponding bit width.
+ if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)),
+ m_And(m_APInt(M), m_Instruction(I))))) {
+ int32_t Bits = (*M + 1).exactLogBase2();
+ if (Bits > 0) {
+ RT = IntegerType::get(Phi->getContext(), Bits);
+ Visited.insert(Phi);
+ CI.insert(J);
+ return J;
+ }
+ }
+ return Phi;
+}
+
+bool RecurrenceDescriptor::getSourceExtensionKind(
+ Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+
+ SmallVector<Instruction *, 8> Worklist;
+ bool FoundOneOperand = false;
+ unsigned DstSize = RT->getPrimitiveSizeInBits();
+ Worklist.push_back(Exit);
+
+ // Traverse the instructions in the reduction expression, beginning with the
+ // exit value.
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ for (Use &U : I->operands()) {
+
+ // Terminate the traversal if the operand is not an instruction, or we
+ // reach the starting value.
+ Instruction *J = dyn_cast<Instruction>(U.get());
+ if (!J || J == Start)
+ continue;
+
+ // Otherwise, investigate the operation if it is also in the expression.
+ if (Visited.count(J)) {
+ Worklist.push_back(J);
+ continue;
+ }
+
+ // If the operand is not in Visited, it is not a reduction operation, but
+ // it does feed into one. Make sure it is either a single-use sign- or
+ // zero-extend instruction.
+ CastInst *Cast = dyn_cast<CastInst>(J);
+ bool IsSExtInst = isa<SExtInst>(J);
+ if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
+ return false;
+
+ // Ensure the source type of the extend is no larger than the reduction
+ // type. It is not necessary for the types to be identical.
+ unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ if (SrcSize > DstSize)
+ return false;
+
+ // Furthermore, ensure that all such extends are of the same kind.
+ if (FoundOneOperand) {
+ if (IsSigned != IsSExtInst)
+ return false;
+ } else {
+ FoundOneOperand = true;
+ IsSigned = IsSExtInst;
+ }
+
+ // Lastly, if the source type of the extend matches the reduction type,
+ // add the extend to CI so that we can avoid accounting for it in the
+ // cost model.
+ if (SrcSize == DstSize)
+ CI.insert(Cast);
+ }
+ }
+ return true;
+}
+
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
Loop *TheLoop, bool HasFunNoNaNAttr,
RecurrenceDescriptor &RedDes) {
@@ -68,10 +186,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
unsigned NumCmpSelectPatternInst = 0;
InstDesc ReduxDesc(false, nullptr);
+ // Data used for determining if the recurrence has been type-promoted.
+ Type *RecurrenceType = Phi->getType();
+ SmallPtrSet<Instruction *, 4> CastInsts;
+ Instruction *Start = Phi;
+ bool IsSigned = false;
+
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
- Worklist.push_back(Phi);
- VisitedInsts.insert(Phi);
+
+ // Return early if the recurrence kind does not match the type of Phi. If the
+ // recurrence kind is arithmetic, we attempt to look through AND operations
+ // resulting from the type promotion performed by InstCombine. Vector
+ // operations are not limited to the legal integer widths, so we may be able
+ // to evaluate the reduction in the narrower width.
+ if (RecurrenceType->isFloatingPointTy()) {
+ if (!isFloatingPointRecurrenceKind(Kind))
+ return false;
+ } else {
+ if (!isIntegerRecurrenceKind(Kind))
+ return false;
+ if (isArithmeticRecurrenceKind(Kind))
+ Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+ }
+
+ Worklist.push_back(Start);
+ VisitedInsts.insert(Start);
// A value in the reduction can be used:
// - By the reduction:
@@ -110,10 +250,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
!VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
return false;
- // Any reduction instruction must be of one of the allowed kinds.
- ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
- if (!ReduxDesc.isRecurrence())
- return false;
+ // Any reduction instruction must be of one of the allowed kinds. We ignore
+ // the starting value (the Phi or an AND instruction if the Phi has been
+ // type-promoted).
+ if (Cur != Start) {
+ ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+ if (!ReduxDesc.isRecurrence())
+ return false;
+ }
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
@@ -131,7 +275,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
++NumCmpSelectPatternInst;
// Check whether we found a reduction operator.
- FoundReduxOp |= !IsAPhi;
+ FoundReduxOp |= !IsAPhi && Cur != Start;
// Process users of current instruction. Push non-PHI nodes after PHI nodes
// onto the stack. This way we are going to have seen all inputs to PHI
@@ -193,6 +337,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
+ // If we think Phi may have been type-promoted, we also need to ensure that
+ // all source operands of the reduction are either SExtInsts or ZEstInsts. If
+ // so, we will be able to evaluate the reduction in the narrower bit width.
+ if (Start != Phi)
+ if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
+ IsSigned, VisitedInsts, CastInsts))
+ return false;
+
// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.
@@ -200,9 +352,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// is saved as part of the RecurrenceDescriptor.
// Save the description of this reduction variable.
- RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind,
- ReduxDesc.getMinMaxKind());
-
+ RecurrenceDescriptor RD(
+ RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD;
return true;
@@ -263,14 +415,14 @@ RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {
bool FP = I->getType()->isFloatingPointTy();
- bool FastMath = FP && I->hasUnsafeAlgebra();
+ Instruction *UAI = Prev.getUnsafeAlgebraInst();
+ if (!UAI && FP && !I->hasUnsafeAlgebra())
+ UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
switch (I->getOpcode()) {
default:
return InstDesc(false, I);
case Instruction::PHI:
- if (FP &&
- (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
- return InstDesc(false, I);
return InstDesc(I, Prev.getMinMaxKind());
case Instruction::Sub:
case Instruction::Add:
@@ -284,10 +436,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
case Instruction::Xor:
return InstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
- return InstDesc(Kind == RK_FloatMult && FastMath, I);
+ return InstDesc(Kind == RK_FloatMult, I, UAI);
case Instruction::FSub:
case Instruction::FAdd:
- return InstDesc(Kind == RK_FloatAdd && FastMath, I);
+ return InstDesc(Kind == RK_FloatAdd, I, UAI);
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
@@ -442,6 +594,13 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
break;
}
+ // We only match FP sequences with unsafe algebra, so we can unconditionally
+ // set it on any generated instructions.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ Builder.SetFastMathFlags(FMF);
+
Value *Cmp;
if (RK == MRK_FloatMin || RK == MRK_FloatMax)
Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
@@ -452,8 +611,54 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
return Select;
}
-bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
- ConstantInt *&StepValue) {
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+ ConstantInt *Step)
+ : StartValue(Start), IK(K), StepValue(Step) {
+ assert(IK != IK_NoInduction && "Not an induction");
+ assert(StartValue && "StartValue is null");
+ assert(StepValue && !StepValue->isZero() && "StepValue is zero");
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+ "StartValue is not a pointer for pointer induction");
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+ "StartValue is not an integer for integer induction");
+ assert(StepValue->getType()->isIntegerTy() &&
+ "StepValue is not an integer");
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+ if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
+ return StepValue->getSExtValue();
+ return 0;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
+ switch (IK) {
+ case IK_IntInduction:
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (StepValue->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ if (!StepValue->isOne())
+ Index = B.CreateMul(Index, StepValue);
+ return B.CreateAdd(StartValue, Index);
+
+ case IK_PtrInduction:
+ assert(Index->getType() == StepValue->getType() &&
+ "Index type does not match StepValue type");
+ if (StepValue->isMinusOne())
+ Index = B.CreateNeg(Index);
+ else if (!StepValue->isOne())
+ Index = B.CreateMul(Index, StepValue);
+ return B.CreateGEP(nullptr, StartValue, Index);
+
+ case IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+ InductionDescriptor &D) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -467,6 +672,10 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
return false;
}
+ assert(AR->getLoop()->getHeader() == Phi->getParent() &&
+ "PHI is an AddRec for a different loop?!");
+ Value *StartValue =
+ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -475,7 +684,7 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
ConstantInt *CV = C->getValue();
if (PhiTy->isIntegerTy()) {
- StepValue = CV;
+ D = InductionDescriptor(StartValue, IK_IntInduction, CV);
return true;
}
@@ -494,6 +703,27 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
- StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+ auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
return true;
}
+
+/// \brief Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
+ SmallVector<Instruction *, 8> UsedOutside;
+
+ for (auto *Block : L->getBlocks())
+ // FIXME: I believe that this could use copy_if if the Inst reference could
+ // be adapted into a pointer.
+ for (auto &Inst : *Block) {
+ auto Users = Inst.users();
+ if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
+ auto *Use = cast<Instruction>(U);
+ return !L->contains(Use->getParent());
+ }))
+ UsedOutside.push_back(&Inst);
+ }
+
+ return UsedOutside;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 832079d..9a2a06c 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -13,43 +13,81 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopVersioning.h"
using namespace llvm;
LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT,
- const SmallVector<int, 8> *PtrToPartition)
- : VersionedLoop(L), NonVersionedLoop(nullptr),
- PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool UseLAIChecks)
+ : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
+ SE(SE) {
assert(L->getExitBlock() && "No single exit block");
assert(L->getLoopPreheader() && "No preheader");
+ if (UseLAIChecks) {
+ setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
+ setSCEVChecks(LAI.PSE.getUnionPredicate());
+ }
}
-bool LoopVersioning::needsRuntimeChecks() const {
- return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition);
+void LoopVersioning::setAliasChecks(
+ const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
+ AliasChecks = std::move(Checks);
}
-void LoopVersioning::versionLoop(Pass *P) {
+void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
+ Preds = std::move(Check);
+}
+
+void LoopVersioning::versionLoop(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
Instruction *FirstCheckInst;
Instruction *MemRuntimeCheck;
+ Value *SCEVRuntimeCheck;
+ Value *RuntimeCheck = nullptr;
+
// Add the memcheck in the original preheader (this is empty initially).
- BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
+ BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
std::tie(FirstCheckInst, MemRuntimeCheck) =
- LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
+ LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
+ const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+ SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
+ "scev.check");
+ SCEVRuntimeCheck =
+ Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
+ auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
+
+ // Discard the SCEV runtime check if it is always true.
+ if (CI && CI->isZero())
+ SCEVRuntimeCheck = nullptr;
+
+ if (MemRuntimeCheck && SCEVRuntimeCheck) {
+ RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
+ SCEVRuntimeCheck, "ldist.safe");
+ if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
+ I->insertBefore(RuntimeCheckBB->getTerminator());
+ } else
+ RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
+
+ assert(RuntimeCheck && "called even though we don't need "
+ "any runtime checks");
+
// Rename the block to make the IR more readable.
- MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck");
+ RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() +
+ ".lver.check");
// Create empty preheader for the loop (and after cloning for the
// non-versioned loop).
- BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+ BasicBlock *PH =
+ SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
// Clone the loop including the preheader.
@@ -58,20 +96,23 @@ void LoopVersioning::versionLoop(Pass *P) {
// block is a join between the two loops.
SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
NonVersionedLoop =
- cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig",
- LI, DT, NonVersionedLoopBlocks);
+ cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap,
+ ".lver.orig", LI, DT, NonVersionedLoopBlocks);
remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
// Insert the conditional branch based on the result of the memchecks.
- Instruction *OrigTerm = MemCheckBB->getTerminator();
+ Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
- OrigTerm);
+ VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
// memchecking block.
- DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
+ DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB);
+
+ // Adds the necessary PHI nodes for the versioned loops based on the
+ // loop-defined values used outside of the loop.
+ addPHINodes(DefsUsedOutside);
}
void LoopVersioning::addPHINodes(
@@ -94,7 +135,7 @@ void LoopVersioning::addPHINodes(
// If not create it.
if (!PN) {
PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
- PHIBlock->begin());
+ &PHIBlock->front());
for (auto *User : Inst->users())
if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
User->replaceUsesOfWith(Inst, PN);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 66d57b0..b0ad4d5 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -69,7 +69,7 @@ bool LowerInvoke::runOnFunction(Function &F) {
BranchInst::Create(II->getNormalDest(), II);
// Remove any PHI node entries from the exception destination.
- II->getUnwindDest()->removePredecessor(BB);
+ II->getUnwindDest()->removePredecessor(&*BB);
// Remove the invoke instruction now.
BB->getInstList().erase(II);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 4acd988..52beb15 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -49,8 +49,7 @@ namespace {
return I != Ranges.end() && I->Low <= R.Low;
}
- /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
- /// instructions.
+ /// Replace all SwitchInst instructions with chained branch instructions.
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
@@ -78,7 +77,7 @@ namespace {
typedef std::vector<CaseRange> CaseVector;
typedef std::vector<CaseRange>::iterator CaseItr;
private:
- void processSwitchInst(SwitchInst *SI);
+ void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
@@ -116,21 +115,30 @@ FunctionPass *llvm::createLowerSwitchPass() {
bool LowerSwitch::runOnFunction(Function &F) {
bool Changed = false;
+ SmallPtrSet<BasicBlock*, 8> DeleteList;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+ BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
+
+ // If the block is a dead Default block that will be deleted later, don't
+ // waste time processing it.
+ if (DeleteList.count(Cur))
+ continue;
if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
Changed = true;
- processSwitchInst(SI);
+ processSwitchInst(SI, DeleteList);
}
}
+ for (BasicBlock* BB: DeleteList) {
+ DeleteDeadBlock(BB);
+ }
+
return Changed;
}
-// operator<< - Used for debugging purposes.
-//
+/// Used for debugging purposes.
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C)
LLVM_ATTRIBUTE_USED;
@@ -147,23 +155,24 @@ static raw_ostream& operator<<(raw_ostream &O,
return O << "]";
}
-// \brief Update the first occurrence of the "switch statement" BB in the PHI
-// node with the "new" BB. The other occurrences will:
-//
-// 1) Be updated by subsequent calls to this function. Switch statements may
-// have more than one outcoming edge into the same BB if they all have the same
-// value. When the switch statement is converted these incoming edges are now
-// coming from multiple BBs.
-// 2) Removed if subsequent incoming values now share the same case, i.e.,
-// multiple outcome edges are condensed into one. This is necessary to keep the
-// number of phi values equal to the number of branches to SuccBB.
+/// \brief Update the first occurrence of the "switch statement" BB in the PHI
+/// node with the "new" BB. The other occurrences will:
+///
+/// 1) Be updated by subsequent calls to this function. Switch statements may
+/// have more than one outcoming edge into the same BB if they all have the same
+/// value. When the switch statement is converted these incoming edges are now
+/// coming from multiple BBs.
+/// 2) Removed if subsequent incoming values now share the same case, i.e.,
+/// multiple outcome edges are condensed into one. This is necessary to keep the
+/// number of phi values equal to the number of branches to SuccBB.
static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
unsigned NumMergedCases) {
- for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI();
+ for (BasicBlock::iterator I = SuccBB->begin(),
+ IE = SuccBB->getFirstNonPHI()->getIterator();
I != IE; ++I) {
PHINode *PN = cast<PHINode>(I);
- // Only update the first occurence.
+ // Only update the first occurrence.
unsigned Idx = 0, E = PN->getNumIncomingValues();
unsigned LocalNumMergedCases = NumMergedCases;
for (; Idx != E; ++Idx) {
@@ -173,7 +182,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
- // Remove additional occurences coming from condensed cases and keep the
+ // Remove additional occurrences coming from condensed cases and keep the
// number of incoming values equal to the number of branches to SuccBB.
SmallVector<unsigned, 8> Indices;
for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
@@ -188,11 +197,11 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-// switchConvert - Convert the switch statement into a binary lookup of
-// the case values. The function recursively builds this tree.
-// LowerBound and UpperBound are used to keep track of the bounds for Val
-// that have already been checked by a block emitted by one of the previous
-// calls to switchConvert in the call stack.
+/// Convert the switch statement into a binary lookup of the case values.
+/// The function recursively builds this tree. LowerBound and UpperBound are
+/// used to keep track of the bounds for Val that have already been checked by
+/// a block emitted by one of the previous calls to switchConvert in the call
+/// stack.
BasicBlock *
LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
ConstantInt *UpperBound, Value *Val,
@@ -278,28 +287,24 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
UpperBound, Val, NewNode, OrigBlock,
Default, UnreachableRanges);
- Function::iterator FI = OrigBlock;
- F->getBasicBlockList().insert(++FI, NewNode);
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
NewNode->getInstList().push_back(Comp);
BranchInst::Create(LBranch, RBranch, Comp, NewNode);
return NewNode;
}
-// newLeafBlock - Create a new leaf block for the binary lookup tree. It
-// checks if the switch's value == the case's value. If not, then it
-// jumps to the default branch. At this point in the tree, the value
-// can't be another valid case value, so the jump to the "default" branch
-// is warranted.
-//
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
BasicBlock* OrigBlock,
BasicBlock* Default)
{
Function* F = OrigBlock->getParent();
BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
- Function::iterator FI = OrigBlock;
- F->getBasicBlockList().insert(++FI, NewLeaf);
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
// Emit comparison
ICmpInst* Comp = nullptr;
@@ -352,7 +357,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
return NewLeaf;
}
-// Clusterify - Transform simple list of Cases into list of CaseRange's
+/// Transform simple list of Cases into list of CaseRange's.
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
unsigned numCmps = 0;
@@ -394,10 +399,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
return numCmps;
}
-// processSwitchInst - Replace the specified switch instruction with a sequence
-// of chained if-then insts in a balanced binary search.
-//
-void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+/// Replace the specified switch instruction with a sequence of chained if-then
+/// insts in a balanced binary search.
+void LowerSwitch::processSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock*> &DeleteList) {
BasicBlock *CurBlock = SI->getParent();
BasicBlock *OrigBlock = CurBlock;
Function *F = CurBlock->getParent();
@@ -424,7 +429,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
std::vector<IntRange> UnreachableRanges;
if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
- // Make the bounds tightly fitted around the case value range, becase we
+ // Make the bounds tightly fitted around the case value range, because we
// know that the value passed to the switch must be exactly one of the case
// values.
assert(!Cases.empty());
@@ -495,7 +500,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default, NewDefault);
+ F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
BranchInst::Create(Default, NewDefault);
// If there is an entry in any PHI nodes for the default edge, make sure
@@ -518,7 +523,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
BasicBlock *OldDefault = SI->getDefaultDest();
CurBlock->getInstList().erase(SI);
- // If the Default block has no more predecessors just remove it.
+ // If the Default block has no more predecessors just add it to DeleteList.
if (pred_begin(OldDefault) == pred_end(OldDefault))
- DeleteDeadBlock(OldDefault);
+ DeleteList.insert(OldDefault);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 00cf4e6..aa1e35d 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -63,6 +63,9 @@ bool PromotePass::runOnFunction(Function &F) {
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 395a46b..c999bd0 100644
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -42,6 +42,24 @@ namespace {
}
};
+ static const char *const metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+ };
+
+ struct Renamer {
+ Renamer(unsigned int seed) {
+ prng.srand(seed);
+ }
+
+ const char *newName() {
+ return metaNames[prng.rand() % array_lengthof(metaNames)];
+ }
+
+ PRNG prng;
+ };
+
struct MetaRenamer : public ModulePass {
static char ID; // Pass identification, replacement for typeid
MetaRenamer() : ModulePass(ID) {
@@ -53,36 +71,26 @@ namespace {
}
bool runOnModule(Module &M) override {
- static const char *const metaNames[] = {
- // See http://en.wikipedia.org/wiki/Metasyntactic_variable
- "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
- "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
- };
-
// Seed our PRNG with simple additive sum of ModuleID. We're looking to
// simply avoid always having the same function names, and we need to
// remain deterministic.
unsigned int randSeed = 0;
- for (std::string::const_iterator I = M.getModuleIdentifier().begin(),
- E = M.getModuleIdentifier().end(); I != E; ++I)
- randSeed += *I;
+ for (auto C : M.getModuleIdentifier())
+ randSeed += C;
- PRNG prng;
- prng.srand(randSeed);
+ Renamer renamer(randSeed);
// Rename all aliases
- for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
- AI != AE; ++AI) {
+ for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
StringRef Name = AI->getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
AI->setName("alias");
}
-
+
// Rename all global variables
- for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
- GI != GE; ++GI) {
+ for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
StringRef Name = GI->getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
@@ -93,40 +101,37 @@ namespace {
// Rename all struct types
TypeFinder StructTypes;
StructTypes.run(M, true);
- for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
- StructType *STy = StructTypes[i];
+ for (StructType *STy : StructTypes) {
if (STy->isLiteral() || STy->getName().empty()) continue;
SmallString<128> NameStorage;
- STy->setName((Twine("struct.") + metaNames[prng.rand() %
- array_lengthof(metaNames)]).toStringRef(NameStorage));
+ STy->setName((Twine("struct.") +
+ renamer.newName()).toStringRef(NameStorage));
}
// Rename all functions
- for (Module::iterator FI = M.begin(), FE = M.end();
- FI != FE; ++FI) {
- StringRef Name = FI->getName();
+ for (auto &F : M) {
+ StringRef Name = F.getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
- FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
- runOnFunction(*FI);
+ F.setName(renamer.newName());
+ runOnFunction(F);
}
return true;
}
bool runOnFunction(Function &F) {
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
- AI != AE; ++AI)
+ for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
if (!AI->getType()->isVoidTy())
AI->setName("arg");
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- BB->setName("bb");
+ for (auto &BB : F) {
+ BB.setName("bb");
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (!I->getType()->isVoidTy())
- I->setName("tmp");
+ for (auto &I : BB)
+ if (!I.getType()->isVoidTy())
+ I.setName("tmp");
}
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index d69a81e..9ec28a3 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array,
}
GVCtor->eraseFromParent();
} else {
- // Use a simple two-field struct if there isn't one already.
+ // Use the new three-field struct if there isn't one already.
EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- nullptr);
+ IRB.getInt8PtrTy(), nullptr);
}
// Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
@@ -107,7 +107,8 @@ Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
- ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) {
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ StringRef VersionCheckName) {
assert(!InitName.empty() && "Expected init function name");
assert(InitArgTypes.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
@@ -122,6 +123,13 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
AttributeSet()));
InitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(InitFunction, InitArgs);
+ if (!VersionCheckName.empty()) {
+ Function *VersionCheckFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+ AttributeSet()));
+ IRB.CreateCall(VersionCheckFunction, {});
+ }
return std::make_pair(Ctor, InitFunction);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a87f850..c4f9b9f 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -205,10 +205,9 @@ public:
// avoid gratuitus rescans.
const BasicBlock *BB = I->getParent();
unsigned InstNo = 0;
- for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
- ++BBI)
- if (isInterestingInstruction(BBI))
- InstNumbers[BBI] = InstNo++;
+ for (const Instruction &BBI : *BB)
+ if (isInterestingInstruction(&BBI))
+ InstNumbers[&BBI] = InstNo++;
It = InstNumbers.find(I);
assert(It != InstNumbers.end() && "Didn't insert instruction?");
@@ -402,8 +401,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
DDI->eraseFromParent();
LBI.deleteValue(DDI);
@@ -425,14 +423,17 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
/// using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
-/// return true. This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths. e.g. code like
-/// this is potentially correct:
-///
-/// for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
-static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+/// return false. This is necessary in cases where, due to control flow, the
+/// alloca is undefined only on some control flow paths. e.g. code like
+/// this is correct in LLVM IR:
+/// // A is an alloca with no stores so far
+/// for (...) {
+/// int t = *A;
+/// if (!first_iteration)
+/// use(t);
+/// *A = 42;
+/// }
+static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LargeBlockInfo &LBI,
AliasSetTracker *AST) {
// The trickiest case to handle is when we have large blocks. Because of this,
@@ -467,10 +468,15 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
std::make_pair(LoadIdx,
static_cast<StoreInst *>(nullptr)),
less_first());
-
- if (I == StoresByIndex.begin())
- // If there is no store before this load, the load takes the undef value.
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (I == StoresByIndex.begin()) {
+ if (StoresByIndex.empty())
+ // If there are no stores, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // There is no store before this load, bail out (load may be affected
+ // by the following stores - see main comment).
+ return false;
+ }
else
// Otherwise, there was a store before this load, the load takes its value.
LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
@@ -486,8 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
}
SI->eraseFromParent();
@@ -506,6 +511,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
}
++NumLocalPromoted;
+ return true;
}
void PromoteMem2Reg::run() {
@@ -557,9 +563,8 @@ void PromoteMem2Reg::run() {
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
- if (Info.OnlyUsedInOneBlock) {
- promoteSingleBlockAlloca(AI, Info, LBI, AST);
-
+ if (Info.OnlyUsedInOneBlock &&
+ promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
@@ -636,7 +641,7 @@ void PromoteMem2Reg::run() {
// and inserting the phi nodes we marked as necessary
//
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values));
+ RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
do {
RenamePassData RPD;
RPD.swap(RenamePassWorkList.back());
@@ -854,7 +859,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
Allocas[AllocaNo]->getName() + "." + Twine(Version++),
- BB->begin());
+ &BB->front());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
@@ -919,7 +924,7 @@ NextIteration:
return;
for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
- Instruction *I = II++; // get the instruction, increment iterator
+ Instruction *I = &*II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 36781c1..d0932f83 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -43,7 +44,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <map>
@@ -73,6 +73,22 @@ static cl::opt<bool> HoistCondStores(
"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
cl::desc("Hoist conditional stores if an unconditional store precedes"));
+static cl::opt<bool> MergeCondStores(
+ "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores even if an unconditional store does not "
+ "precede - hoist multiple conditional stores into a single "
+ "predicated store"));
+
+static cl::opt<bool> MergeCondStoresAggressively(
+ "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
+ cl::desc("When merging conditional stores, do so even if the resultant "
+ "basic blocks are unlikely to be if-converted as a result"));
+
+static cl::opt<bool> SpeculateOneExpensiveInst(
+ "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
+ cl::desc("Allow exactly one expensive instruction to be speculatively "
+ "executed"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
@@ -83,13 +99,13 @@ STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
// The first field contains the value that the switch produces when a certain
- // case group is selected, and the second field is a vector containing the cases
- // composing the case group.
+ // case group is selected, and the second field is a vector containing the
+ // cases composing the case group.
typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
SwitchCaseResultVectorTy;
// The first field contains the phi node that generates a result of the switch
- // and the second field contains the value generated for a certain case in the switch
- // for that PHI.
+ // and the second field contains the value generated for a certain case in the
+ // switch for that PHI.
typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -124,6 +140,7 @@ class SimplifyCFGOpt {
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifyCleanupReturn(CleanupReturnInst *RI);
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
bool SimplifyIndirectBr(IndirectBrInst *IBI);
@@ -226,6 +243,7 @@ static unsigned ComputeSpeculationCost(const User *I,
"Instruction is not safe to speculatively execute!");
return TTI.getUserCost(I);
}
+
/// If we have a merge point of an "if condition" as accepted above,
/// return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -246,7 +264,8 @@ static unsigned ComputeSpeculationCost(const User *I,
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction*> *AggressiveInsts,
unsigned &CostRemaining,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ unsigned Depth = 0) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -284,15 +303,24 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
unsigned Cost = ComputeSpeculationCost(I, TTI);
- if (Cost > CostRemaining)
+ // Allow exactly one instruction to be speculated regardless of its cost
+ // (as long as it is safe to do so).
+ // This is intended to flatten the CFG even if the instruction is a division
+ // or other expensive operation. The speculation of an expensive instruction
+ // is expected to be undone in CodeGenPrepare if the speculation has not
+ // enabled further IR optimizations.
+ if (Cost > CostRemaining &&
+ (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
return false;
- CostRemaining -= Cost;
+ // Avoid unsigned wrap.
+ CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+ Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -970,8 +998,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Okay, at this point, we know which new successor Pred will get. Make
// sure we update the number of entries in the PHI nodes for these
// successors.
- for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
- AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+ for (BasicBlock *NewSuccessor : NewSuccessors)
+ AddPredecessorToBlock(NewSuccessor, Pred, BB);
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
@@ -984,8 +1012,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
PredCases.size());
NewSI->setDebugLoc(PTI->getDebugLoc());
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+ for (ValueEqualityComparisonCase &V : PredCases)
+ NewSI->addCase(V.Value, V.Dest);
if (PredHasWeights || SuccHasWeights) {
// Halve the weights if any of them cannot fit in an uint32_t
@@ -1059,15 +1087,15 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
BasicBlock::iterator BB1_Itr = BB1->begin();
BasicBlock::iterator BB2_Itr = BB2->begin();
- Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
// Skip debug info if it is not identical.
DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
while (isa<DbgInfoIntrinsic>(I1))
- I1 = BB1_Itr++;
+ I1 = &*BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I2))
- I2 = BB2_Itr++;
+ I2 = &*BB2_Itr++;
}
if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
@@ -1088,31 +1116,30 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
// For a normal instruction, we just move one to right before the branch,
// then replace all uses of the other with the first. Finally, we remove
// the now redundant second instruction.
- BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1);
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align, LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
combineMetadata(I1, I2, KnownIDs);
I2->eraseFromParent();
Changed = true;
- I1 = BB1_Itr++;
- I2 = BB2_Itr++;
+ I1 = &*BB1_Itr++;
+ I2 = &*BB2_Itr++;
// Skip debug info if it is not identical.
DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
while (isa<DbgInfoIntrinsic>(I1))
- I1 = BB1_Itr++;
+ I1 = &*BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I2))
- I2 = BB2_Itr++;
+ I2 = &*BB2_Itr++;
}
} while (I1->isIdenticalToWhenDefined(I2));
@@ -1147,7 +1174,7 @@ HoistTerminator:
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
- BIParent->getInstList().insert(BI, NT);
+ BIParent->getInstList().insert(BI->getIterator(), NT);
if (!NT->getType()->isVoidTy()) {
I1->replaceAllUsesWith(NT);
I2->replaceAllUsesWith(NT);
@@ -1265,7 +1292,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
- isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ I1->isEHPad() || I2->isEHPad() ||
isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
@@ -1324,7 +1351,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
if (!NewPN) {
NewPN =
PHINode::Create(DifferentOp1->getType(), 2,
- DifferentOp1->getName() + ".sink", BBEnd->begin());
+ DifferentOp1->getName() + ".sink", &BBEnd->front());
NewPN->addIncoming(DifferentOp1, BB1);
NewPN->addIncoming(DifferentOp2, BB2);
DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
@@ -1339,7 +1366,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// instruction in the basic block down.
bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
// Sink the instruction.
- BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
+ BB1->getInstList(), I1);
if (!OldPN->use_empty())
OldPN->replaceAllUsesWith(I1);
OldPN->eraseFromParent();
@@ -1355,7 +1383,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
RE1 = BB1->getInstList().rend();
if (UpdateRE2)
RE2 = BB2->getInstList().rend();
- FirstNonPhiInBBEnd = I1;
+ FirstNonPhiInBBEnd = &*I1;
NumSinkCommons++;
Changed = true;
}
@@ -1491,7 +1519,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = std::prev(ThenBB->end());
BBI != BBE; ++BBI) {
- Instruction *I = BBI;
+ Instruction *I = &*BBI;
// Skip debug info.
if (isa<DbgInfoIntrinsic>(I))
continue;
@@ -1604,9 +1632,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
SpeculatedStore->setOperand(0, S);
}
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction.
+ for (auto &I: *ThenBB)
+ I.dropUnknownNonDebugMetadata();
+
// Hoist the instructions.
- BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
- std::prev(ThenBB->end()));
+ BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
+ ThenBB->begin(), std::prev(ThenBB->end()));
// Insert selects and rewrite the PHI operands.
IRBuilder<true, NoFolder> Builder(BI);
@@ -1747,13 +1780,13 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
// Check for trivial simplification.
if (Value *V = SimplifyInstruction(N, DL)) {
- TranslateMap[BBI] = V;
+ TranslateMap[&*BBI] = V;
delete N; // Instruction folded away, don't need actual inst
} else {
// Insert the new instruction into its new home.
EdgeBB->getInstList().insert(InsertPt, N);
if (!BBI->use_empty())
- TranslateMap[BBI] = N;
+ TranslateMap[&*BBI] = N;
}
}
@@ -1850,7 +1883,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock1);
for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
- if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
// flow, so the xform is not worth it.
@@ -1863,7 +1896,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock2);
for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
- if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
// flow, so the xform is not worth it.
@@ -1882,13 +1915,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
if (IfBlock1)
- DomBlock->getInstList().splice(InsertPt,
+ DomBlock->getInstList().splice(InsertPt->getIterator(),
IfBlock1->getInstList(), IfBlock1->begin(),
- IfBlock1->getTerminator());
+ IfBlock1->getTerminator()->getIterator());
if (IfBlock2)
- DomBlock->getInstList().splice(InsertPt,
+ DomBlock->getInstList().splice(InsertPt->getIterator(),
IfBlock2->getInstList(), IfBlock2->begin(),
- IfBlock2->getTerminator());
+ IfBlock2->getTerminator()->getIterator());
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
@@ -2057,7 +2090,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
BI->getSuccessor(0) == PBI->getSuccessor(1))) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end();
I != E; ) {
- Instruction *Curr = I++;
+ Instruction *Curr = &*I++;
if (isa<CmpInst>(Curr)) {
Cond = Curr;
break;
@@ -2077,7 +2110,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
return false;
// Make sure the instruction after the condition is the cond branch.
- BasicBlock::iterator CondIt = Cond; ++CondIt;
+ BasicBlock::iterator CondIt = ++Cond->getIterator();
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
@@ -2095,7 +2128,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
+ if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
return false;
// I has only one use and can be executed unconditionally.
Instruction *User = dyn_cast<Instruction>(I->user_back());
@@ -2192,17 +2225,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
Instruction *NewBonusInst = BonusInst->clone();
RemapInstruction(NewBonusInst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
- VMap[BonusInst] = NewBonusInst;
+ VMap[&*BonusInst] = NewBonusInst;
// If we moved a load, we cannot any longer claim any knowledge about
// its potential value. The previous information might have been valid
// only given the branch precondition.
// For an analogous reason, we must also drop all the metadata whose
// semantics we don't understand.
- NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg);
+ NewBonusInst->dropUnknownNonDebugMetadata();
- PredBlock->getInstList().insert(PBI, NewBonusInst);
- NewBonusInst->takeName(BonusInst);
+ PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
+ NewBonusInst->takeName(&*BonusInst);
BonusInst->setName(BonusInst->getName() + ".old");
}
@@ -2211,7 +2244,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
Instruction *New = Cond->clone();
RemapInstruction(New, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
- PredBlock->getInstList().insert(PBI, New);
+ PredBlock->getInstList().insert(PBI->getIterator(), New);
New->takeName(Cond);
Cond->setName(New->getName() + ".old");
@@ -2332,11 +2365,297 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
return false;
}
+// If there is only one store in BB1 and BB2, return it, otherwise return
+// nullptr.
+static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
+ StoreInst *S = nullptr;
+ for (auto *BB : {BB1, BB2}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (S)
+ // Multiple stores seen.
+ return nullptr;
+ else
+ S = SI;
+ }
+ }
+ return S;
+}
+
+static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
+ Value *AlternativeV = nullptr) {
+ // PHI is going to be a PHI node that allows the value V that is defined in
+ // BB to be referenced in BB's only successor.
+ //
+ // If AlternativeV is nullptr, the only value we care about in PHI is V. It
+ // doesn't matter to us what the other operand is (it'll never get used). We
+ // could just create a new PHI with an undef incoming value, but that could
+ // increase register pressure if EarlyCSE/InstCombine can't fold it with some
+ // other PHI. So here we directly look for some PHI in BB's successor with V
+ // as an incoming operand. If we find one, we use it, else we create a new
+ // one.
+ //
+ // If AlternativeV is not nullptr, we care about both incoming values in PHI.
+ // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
+ // where OtherBB is the single other predecessor of BB's only successor.
+ PHINode *PHI = nullptr;
+ BasicBlock *Succ = BB->getSingleSuccessor();
+
+ for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
+ if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
+ PHI = cast<PHINode>(I);
+ if (!AlternativeV)
+ break;
+
+ assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2);
+ auto PredI = pred_begin(Succ);
+ BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
+ if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
+ break;
+ PHI = nullptr;
+ }
+ if (PHI)
+ return PHI;
+
+ // If V is not an instruction defined in BB, just return it.
+ if (!AlternativeV &&
+ (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
+ return V;
+
+ PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+ PHI->addIncoming(V, BB);
+ for (BasicBlock *PredBB : predecessors(Succ))
+ if (PredBB != BB)
+ PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()),
+ PredBB);
+ return PHI;
+}
+
+static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
+ BasicBlock *QTB, BasicBlock *QFB,
+ BasicBlock *PostBB, Value *Address,
+ bool InvertPCond, bool InvertQCond) {
+ auto IsaBitcastOfPointerType = [](const Instruction &I) {
+ return Operator::getOpcode(&I) == Instruction::BitCast &&
+ I.getType()->isPointerTy();
+ };
+
+ // If we're not in aggressive mode, we only optimize if we have some
+ // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+ auto IsWorthwhile = [&](BasicBlock *BB) {
+ if (!BB)
+ return true;
+ // Heuristic: if the block can be if-converted/phi-folded and the
+ // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+ // thread this store.
+ unsigned N = 0;
+ for (auto &I : *BB) {
+ // Cheap instructions viable for folding.
+ if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I))
+ ++N;
+ // Free instructions.
+ else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ IsaBitcastOfPointerType(I))
+ continue;
+ else
+ return false;
+ }
+ return N <= PHINodeFoldingThreshold;
+ };
+
+ if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) ||
+ !IsWorthwhile(PFB) ||
+ !IsWorthwhile(QTB) ||
+ !IsWorthwhile(QFB)))
+ return false;
+
+ // For every pointer, there must be exactly two stores, one coming from
+ // PTB or PFB, and the other from QTB or QFB. We don't support more than one
+ // store (to any address) in PTB,PFB or QTB,QFB.
+ // FIXME: We could relax this restriction with a bit more work and performance
+ // testing.
+ StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
+ StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
+ if (!PStore || !QStore)
+ return false;
+
+ // Now check the stores are compatible.
+ if (!QStore->isUnordered() || !PStore->isUnordered())
+ return false;
+
+ // Check that sinking the store won't cause program behavior changes. Sinking
+ // the store out of the Q blocks won't change any behavior as we're sinking
+ // from a block to its unconditional successor. But we're moving a store from
+ // the P blocks down through the middle block (QBI) and past both QFB and QTB.
+ // So we need to check that there are no aliasing loads or stores in
+ // QBI, QTB and QFB. We also need to check there are no conflicting memory
+ // operations between PStore and the end of its parent block.
+ //
+ // The ideal way to do this is to query AliasAnalysis, but we don't
+ // preserve AA currently so that is dangerous. Be super safe and just
+ // check there are no other memory operations at all.
+ for (auto &I : *QFB->getSinglePredecessor())
+ if (I.mayReadOrWriteMemory())
+ return false;
+ for (auto &I : *QFB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ if (QTB)
+ for (auto &I : *QTB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
+ I != E; ++I)
+ if (&*I != PStore && I->mayReadOrWriteMemory())
+ return false;
+
+ // OK, we're going to sink the stores to PostBB. The store has to be
+ // conditional though, so first create the predicate.
+ Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+ Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+
+ Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
+ PStore->getParent());
+ Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
+ QStore->getParent(), PPHI);
+
+ IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+
+ Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
+ Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
+
+ if (InvertPCond)
+ PPred = QB.CreateNot(PPred);
+ if (InvertQCond)
+ QPred = QB.CreateNot(QPred);
+ Value *CombinedPred = QB.CreateOr(PPred, QPred);
+
+ auto *T =
+ SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
+ QB.SetInsertPoint(T);
+ StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
+ AAMDNodes AAMD;
+ PStore->getAAMetadata(AAMD, /*Merge=*/false);
+ PStore->getAAMetadata(AAMD, /*Merge=*/true);
+ SI->setAAMetadata(AAMD);
+
+ QStore->eraseFromParent();
+ PStore->eraseFromParent();
+
+ return true;
+}
+
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
+ // The intention here is to find diamonds or triangles (see below) where each
+ // conditional block contains a store to the same address. Both of these
+ // stores are conditional, so they can't be unconditionally sunk. But it may
+ // be profitable to speculatively sink the stores into one merged store at the
+ // end, and predicate the merged store on the union of the two conditions of
+ // PBI and QBI.
+ //
+ // This can reduce the number of stores executed if both of the conditions are
+ // true, and can allow the blocks to become small enough to be if-converted.
+ // This optimization will also chain, so that ladders of test-and-set
+ // sequences can be if-converted away.
+ //
+ // We only deal with simple diamonds or triangles:
+ //
+ // PBI or PBI or a combination of the two
+ // / \ | \
+ // PTB PFB | PFB
+ // \ / | /
+ // QBI QBI
+ // / \ | \
+ // QTB QFB | QFB
+ // \ / | /
+ // PostBB PostBB
+ //
+ // We model triangles as a type of diamond with a nullptr "true" block.
+ // Triangles are canonicalized so that the fallthrough edge is represented by
+ // a true condition, as in the diagram above.
+ //
+ BasicBlock *PTB = PBI->getSuccessor(0);
+ BasicBlock *PFB = PBI->getSuccessor(1);
+ BasicBlock *QTB = QBI->getSuccessor(0);
+ BasicBlock *QFB = QBI->getSuccessor(1);
+ BasicBlock *PostBB = QFB->getSingleSuccessor();
+
+ bool InvertPCond = false, InvertQCond = false;
+ // Canonicalize fallthroughs to the true branches.
+ if (PFB == QBI->getParent()) {
+ std::swap(PFB, PTB);
+ InvertPCond = true;
+ }
+ if (QFB == PostBB) {
+ std::swap(QFB, QTB);
+ InvertQCond = true;
+ }
+
+ // From this point on we can assume PTB or QTB may be fallthroughs but PFB
+ // and QFB may not. Model fallthroughs as a nullptr block.
+ if (PTB == QBI->getParent())
+ PTB = nullptr;
+ if (QTB == PostBB)
+ QTB = nullptr;
+
+ // Legality bailouts. We must have at least the non-fallthrough blocks and
+ // the post-dominating block, and the non-fallthroughs must only have one
+ // predecessor.
+ auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
+ return BB->getSinglePredecessor() == P &&
+ BB->getSingleSuccessor() == S;
+ };
+ if (!PostBB ||
+ !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+ !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
+ return false;
+ if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
+ (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
+ return false;
+ if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2)
+ return false;
+
+ // OK, this is a sequence of two diamonds or triangles.
+ // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
+ SmallPtrSet<Value *,4> PStoreAddresses, QStoreAddresses;
+ for (auto *BB : {PTB, PFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ PStoreAddresses.insert(SI->getPointerOperand());
+ }
+ for (auto *BB : {QTB, QFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ QStoreAddresses.insert(SI->getPointerOperand());
+ }
+
+ set_intersect(PStoreAddresses, QStoreAddresses);
+ // set_intersect mutates PStoreAddresses in place. Rename it here to make it
+ // clear what it contains.
+ auto &CommonAddresses = PStoreAddresses;
+
+ bool Changed = false;
+ for (auto *Address : CommonAddresses)
+ Changed |= mergeConditionalStoreToAddress(
+ PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond);
+ return Changed;
+}
+
/// If we have a conditional branch as a predecessor of another block,
/// this function tries to simplify it. We know
/// that PBI and BI are both conditional branches, and BI is in one of the
/// successor blocks of PBI - PBI branches to BI.
-static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ const DataLayout &DL) {
assert(PBI->isConditional() && BI->isConditional());
BasicBlock *BB = BI->getParent();
@@ -2360,10 +2679,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// simplifycfg will thread the block.
if (BlockIsSimpleEnoughToThreadThrough(BB)) {
pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
- PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
- std::distance(PB, PE),
- BI->getCondition()->getName() + ".pr",
- BB->begin());
+ PHINode *NewPN = PHINode::Create(
+ Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
+ BI->getCondition()->getName() + ".pr", &BB->front());
// Okay, we're going to insert the PHI node. Since PBI is not the only
// predecessor, compute the PHI'd conditional value for all of the preds.
// Any predecessor where the condition is not computable we keep symbolic.
@@ -2386,6 +2704,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
}
}
+ if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ // If BI is reached from the true path of PBI and PBI's condition implies
+ // BI's condition, we know the direction of the BI branch.
+ if (PBI->getSuccessor(0) == BI->getParent() &&
+ isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
+ BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ BI->setCondition(ConstantInt::getTrue(BB->getContext()));
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return true; // Nuke the branch on constant.
+ }
+
+ // If both branches are conditional and both contain stores to the same
+ // address, remove the stores from the conditionals and create a conditional
+ // merged store at the end.
+ if (MergeCondStores && mergeConditionalStores(PBI, BI))
+ return true;
+
// If this is a conditional branch in an empty block, and if any
// predecessors are a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
@@ -2396,11 +2737,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (&*BBI != BI)
return false;
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
- if (CE->canTrap())
- return false;
-
int PBIOp, BIOp;
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
PBIOp = BIOp = 0;
@@ -2565,15 +2901,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
// Then remove the rest.
- for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
- BasicBlock *Succ = OldTerm->getSuccessor(I);
+ for (BasicBlock *Succ : OldTerm->successors()) {
// Make sure only to keep exactly one copy of each edge.
if (Succ == KeepEdge1)
KeepEdge1 = nullptr;
else if (Succ == KeepEdge2)
KeepEdge2 = nullptr;
else
- Succ->removePredecessor(OldTerm->getParent());
+ Succ->removePredecessor(OldTerm->getParent(),
+ /*DontDeleteUselessPHIs=*/true);
}
IRBuilder<> Builder(OldTerm);
@@ -2827,7 +3163,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
// If Extra was used, we require at least two switch values to do the
- // transformation. A switch with one value is just an cond branch.
+ // transformation. A switch with one value is just a conditional branch.
if (ExtraCase && Values.size() < 2) return false;
// TODO: Preserve branch weight metadata, similarly to how
@@ -2847,7 +3183,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
if (ExtraCase) {
- BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
// Remove the uncond branch added to the old block.
TerminatorInst *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
@@ -2911,34 +3248,15 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Check that there are no other instructions except for debug intrinsics.
- BasicBlock::iterator I = LPInst, E = RI;
+ BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
while (++I != E)
if (!isa<DbgInfoIntrinsic>(I))
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
- InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- // Insert a call instruction before the invoke.
- CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
- Call->takeName(II);
- Call->setCallingConv(II->getCallingConv());
- Call->setAttributes(II->getAttributes());
- Call->setDebugLoc(II->getDebugLoc());
-
- // Anything that used the value produced by the invoke instruction now uses
- // the value produced by the call instruction. Note that we do this even
- // for void functions and calls with no uses so that the callgraph edge is
- // updated.
- II->replaceAllUsesWith(Call);
- BB->removePredecessor(II->getParent());
-
- // Insert a branch to the normal destination right before the invoke.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Finally, delete the invoke instruction!
- II->eraseFromParent();
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
}
// The landingpad is now unreachable. Zap it.
@@ -2946,6 +3264,124 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return true;
}
+bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+ // If this is a trivial cleanup pad that executes no instructions, it can be
+ // eliminated. If the cleanup pad continues to the caller, any predecessor
+ // that is an EH pad will be updated to continue to the caller and any
+ // predecessor that terminates with an invoke instruction will have its invoke
+ // instruction converted to a call instruction. If the cleanup pad being
+ // simplified does not continue to the caller, each predecessor will be
+ // updated to continue to the unwind destination of the cleanup pad being
+ // simplified.
+ BasicBlock *BB = RI->getParent();
+ CleanupPadInst *CPInst = RI->getCleanupPad();
+ if (CPInst->getParent() != BB)
+ // This isn't an empty cleanup.
+ return false;
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // If the cleanup return we are simplifying unwinds to the caller, this will
+ // set UnwindDest to nullptr.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
+
+ // We're about to remove BB from the control flow. Before we do, sink any
+ // PHINodes into the unwind destination. Doing this before changing the
+ // control flow avoids some potentially slow checks, since we can currently
+ // be certain that UnwindDest and BB have no common predecessors (since they
+ // are both EH pads).
+ if (UnwindDest) {
+ // First, go through the PHI nodes in UnwindDest and update any nodes that
+ // reference the block we are removing
+ for (BasicBlock::iterator I = UnwindDest->begin(),
+ IE = DestEHPad->getIterator();
+ I != IE; ++I) {
+ PHINode *DestPN = cast<PHINode>(I);
+
+ int Idx = DestPN->getBasicBlockIndex(BB);
+ // Since BB unwinds to UnwindDest, it has to be in the PHI node.
+ assert(Idx != -1);
+ // This PHI node has an incoming value that corresponds to a control
+ // path through the cleanup pad we are removing. If the incoming
+ // value is in the cleanup pad, it must be a PHINode (because we
+ // verified above that the block is otherwise empty). Otherwise, the
+ // value is either a constant or a value that dominates the cleanup
+ // pad being removed.
+ //
+ // Because BB and UnwindDest are both EH pads, all of their
+ // predecessors must unwind to these blocks, and since no instruction
+ // can have multiple unwind destinations, there will be no overlap in
+ // incoming blocks between SrcPN and DestPN.
+ Value *SrcVal = DestPN->getIncomingValue(Idx);
+ PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
+
+ // Remove the entry for the block we are deleting.
+ DestPN->removeIncomingValue(Idx, false);
+
+ if (SrcPN && SrcPN->getParent() == BB) {
+ // If the incoming value was a PHI node in the cleanup pad we are
+ // removing, we need to merge that PHI node's incoming values into
+ // DestPN.
+ for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
+ SrcIdx != SrcE; ++SrcIdx) {
+ DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
+ SrcPN->getIncomingBlock(SrcIdx));
+ }
+ } else {
+ // Otherwise, the incoming value came from above BB and
+ // so we can just reuse it. We must associate all of BB's
+ // predecessors with this value.
+ for (auto *pred : predecessors(BB)) {
+ DestPN->addIncoming(SrcVal, pred);
+ }
+ }
+ }
+
+ // Sink any remaining PHI nodes directly into UnwindDest.
+ Instruction *InsertPt = DestEHPad;
+ for (BasicBlock::iterator I = BB->begin(),
+ IE = BB->getFirstNonPHI()->getIterator();
+ I != IE;) {
+ // The iterator must be incremented here because the instructions are
+ // being moved to another block.
+ PHINode *PN = cast<PHINode>(I++);
+ if (PN->use_empty())
+ // If the PHI node has no uses, just leave it. It will be erased
+ // when we erase BB below.
+ continue;
+
+ // Otherwise, sink this PHI node into UnwindDest.
+ // Any predecessors to UnwindDest which are not already represented
+ // must be back edges which inherit the value from the path through
+ // BB. In this case, the PHI value must reference itself.
+ for (auto *pred : predecessors(UnwindDest))
+ if (pred != BB)
+ PN->addIncoming(PN, pred);
+ PN->moveBefore(InsertPt);
+ }
+ }
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ // The iterator must be updated here because we are removing this pred.
+ BasicBlock *PredBB = *PI++;
+ if (UnwindDest == nullptr) {
+ removeUnwindEdge(PredBB);
+ } else {
+ TerminatorInst *TI = PredBB->getTerminator();
+ TI->replaceUsesOfWith(BB, UnwindDest);
+ }
+ }
+
+ // The cleanup pad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
+}
+
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
@@ -3003,8 +3439,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
- while (UI != BB->begin()) {
- BasicBlock::iterator BBI = UI;
+ while (UI->getIterator() != BB->begin()) {
+ BasicBlock::iterator BBI = UI->getIterator();
--BBI;
// Do not delete instructions that can have side effects which might cause
// the unreachable to not be reachable; specifically, calls and volatile
@@ -3075,26 +3511,18 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
--i; --e;
Changed = true;
}
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
- if (II->getUnwindDest() == BB) {
- // Convert the invoke to a call instruction. This would be a good
- // place to note that the call does not throw though.
- BranchInst *BI = Builder.CreateBr(II->getNormalDest());
- II->removeFromParent(); // Take out of symbol table
-
- // Insert the call now...
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
- Builder.SetInsertPoint(BI);
- CallInst *CI = Builder.CreateCall(II->getCalledValue(),
- Args, II->getName());
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the call does now instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
+ } else if ((isa<InvokeInst>(TI) &&
+ cast<InvokeInst>(TI)->getUnwindDest() == BB) ||
+ isa<CatchSwitchInst>(TI)) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ } else if (isa<CleanupReturnInst>(TI)) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
}
+ // TODO: We can remove a catchswitch if all it's catchpads end in
+ // unreachable.
}
// If this block is now dead, remove it.
@@ -3249,6 +3677,29 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
}
}
+ // If we can prove that the cases must cover all possible values, the
+ // default destination becomes dead and we can remove it. If we know some
+ // of the bits in the value, we can use that to more precisely compute the
+ // number of possible unique case values.
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const unsigned NumUnknownBits = Bits -
+ (KnownZero.Or(KnownOne)).countPopulation();
+ assert(NumUnknownBits <= Bits);
+ if (HasDefault && DeadCases.empty() &&
+ NumUnknownBits < 64 /* avoid overflow */ &&
+ SI->getNumCases() == (1ULL << NumUnknownBits)) {
+ DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(),
+ SI->getParent(), "");
+ SI->setDefaultDest(&*NewDefault);
+ SplitBlock(&*NewDefault, &NewDefault->front());
+ auto *OldTI = NewDefault->getTerminator();
+ new UnreachableInst(SI->getContext(), OldTI);
+ EraseTerminatorInstAndDCECond(OldTI);
+ return true;
+ }
+
SmallVector<uint64_t, 8> Weights;
bool HasWeight = HasBranchWeights(SI);
if (HasWeight) {
@@ -3439,7 +3890,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
+ } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
// If the instruction has uses outside this block or a phi node slot for
@@ -3456,7 +3907,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
return false;
}
- ConstantPool.insert(std::make_pair(I, C));
+ ConstantPool.insert(std::make_pair(&*I, C));
} else {
break;
}
@@ -3664,7 +4115,7 @@ namespace {
/// Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
- const Type *ElementType);
+ Type *ElementType);
private:
// Depending on the contents of the table, it can be represented in
@@ -3880,8 +4331,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
uint64_t TableSize,
- const Type *ElementType) {
- const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ Type *ElementType) {
+ auto *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
return false;
// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
@@ -3992,7 +4443,7 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
"Expect true or false as compare result.");
}
-
+
// Check if the branch instruction dominates the phi node. It's a simple
// dominance check, but sufficient for our needs.
// Although this check is invariant in the calling loops, it's better to do it
@@ -4422,7 +4873,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
- BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
@@ -4457,6 +4908,16 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
return false;
}
+static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
+ BasicBlock *PredPred = nullptr;
+ for (auto *P : predecessors(BB)) {
+ BasicBlock *PPred = P->getSinglePredecessor();
+ if (!PPred || (PredPred && PredPred != PPred))
+ return nullptr;
+ PredPred = PPred;
+ }
+ return PredPred;
+}
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
@@ -4537,9 +4998,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI))
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ // Look for diamond patterns.
+ if (MergeCondStores)
+ if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (mergeConditionalStores(PBI, BI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
return false;
}
@@ -4663,6 +5132,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (SimplifyReturn(RI, Builder)) return true;
} else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
if (SimplifyResume(RI, Builder)) return true;
+ } else if (CleanupReturnInst *RI =
+ dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (SimplifyCleanupReturn(RI)) return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
if (SimplifySwitch(SI, Builder)) return true;
} else if (UnreachableInst *UI =
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index ab30aa1..ddd8775 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -47,15 +47,16 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
+ DominatorTree *DT;
SmallVectorImpl<WeakVH> &DeadInsts;
bool Changed;
public:
- SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI,
- SmallVectorImpl<WeakVH> &Dead)
- : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) {
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead)
+ : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -63,11 +64,13 @@ namespace {
/// Iteratively perform simplification on a worklist of users of the
/// specified induction variable. This is the top-level driver that applies
- /// all simplicitions to users of an IV.
+ /// all simplifications to users of an IV.
void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+ bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
@@ -166,19 +169,65 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
S = SE->getSCEVAtScope(S, ICmpLoop);
X = SE->getSCEVAtScope(X, ICmpLoop);
+ ICmpInst::Predicate InvariantPredicate;
+ const SCEV *InvariantLHS, *InvariantRHS;
+
// If the condition is always true or always false, replace it with
// a constant value.
- if (SE->isKnownPredicate(Pred, S, X))
+ if (SE->isKnownPredicate(Pred, S, X)) {
ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ DeadInsts.emplace_back(ICmp);
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- else
+ DeadInsts.emplace_back(ICmp);
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (isa<PHINode>(IVOperand) &&
+ SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop,
+ InvariantPredicate, InvariantLHS,
+ InvariantRHS)) {
+
+ // Rewrite the comparison to a loop invariant comparison if it can be done
+ // cheaply, where cheaply means "we don't need to emit any new
+ // instructions".
+
+ Value *NewLHS = nullptr, *NewRHS = nullptr;
+
+ if (S == InvariantLHS || X == InvariantLHS)
+ NewLHS =
+ ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx));
+
+ if (S == InvariantRHS || X == InvariantRHS)
+ NewRHS =
+ ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
+
+ for (Value *Incoming : cast<PHINode>(IVOperand)->incoming_values()) {
+ if (NewLHS && NewRHS)
+ break;
+
+ const SCEV *IncomingS = SE->getSCEV(Incoming);
+
+ if (!NewLHS && IncomingS == InvariantLHS)
+ NewLHS = Incoming;
+ if (!NewRHS && IncomingS == InvariantRHS)
+ NewRHS = Incoming;
+ }
+
+ if (!NewLHS || !NewRHS)
+ // We could not find an existing value to replace either LHS or RHS.
+ // Generating new instructions has subtler tradeoffs, so avoid doing that
+ // for now.
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(InvariantPredicate);
+ ICmp->setOperand(0, NewLHS);
+ ICmp->setOperand(1, NewRHS);
+ } else
return;
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
++NumElimCmp;
Changed = true;
- DeadInsts.emplace_back(ICmp);
}
/// SimplifyIVUsers helper for eliminating useless
@@ -207,8 +256,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
Rem->replaceAllUsesWith(Rem->getOperand(0));
else {
// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne =
- SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType()));
if (IsSigned && !SE->isKnownNonNegative(LessOne))
return;
@@ -232,9 +280,9 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
DeadInsts.emplace_back(Rem);
}
-/// Eliminate an operation that consumes a simple IV and has
-/// no observable side-effect given the range of IV values.
-/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+/// Eliminate an operation that consumes a simple IV and has no observable
+/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
+/// but UseInst may not be.
bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
Instruction *IVOperand) {
if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
@@ -249,12 +297,45 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
}
}
- // Eliminate any operation that SCEV can prove is an identity function.
+ if (eliminateIdentitySCEV(UseInst, IVOperand))
+ return true;
+
+ return false;
+}
+
+/// Eliminate any operation that SCEV can prove is an identity function.
+bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
+ Instruction *IVOperand) {
if (!SE->isSCEVable(UseInst->getType()) ||
(UseInst->getType() != IVOperand->getType()) ||
(SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
return false;
+ // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
+ // dominator tree, even if X is an operand to Y. For instance, in
+ //
+ // %iv = phi i32 {0,+,1}
+ // br %cond, label %left, label %merge
+ //
+ // left:
+ // %X = add i32 %iv, 0
+ // br label %merge
+ //
+ // merge:
+ // %M = phi (%X, %iv)
+ //
+ // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
+ // %M.replaceAllUsesWith(%X) would be incorrect.
+
+ if (isa<PHINode>(UseInst))
+ // If UseInst is not a PHI node then we know that IVOperand dominates
+ // UseInst directly from the legality of SSA.
+ if (!DT || !DT->dominates(IVOperand, UseInst))
+ return false;
+
+ if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
+ return false;
+
DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
UseInst->replaceAllUsesWith(IVOperand);
@@ -436,8 +517,8 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
/// This algorithm does not require IVUsers analysis. Instead, it simplifies
/// instructions in-place during analysis. Rather than rewriting induction
/// variables bottom-up from their users, it transforms a chain of IVUsers
-/// top-down, updating the IR only when it encouters a clear optimization
-/// opportunitiy.
+/// top-down, updating the IR only when it encounters a clear optimization
+/// opportunity.
///
/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
///
@@ -513,22 +594,21 @@ void IVVisitor::anchor() { }
/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
-{
- LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead);
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+ IVVisitor *V) {
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
SIV.simplifyUsers(CurrIV, V);
return SIV.hasChanged();
}
/// Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
-bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead) {
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) {
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
}
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index c499c87..d5377f9 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,12 +20,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -64,7 +64,7 @@ namespace {
// Here be subtlety: the iterator must be incremented before the loop
// body (not sure why), so a range-for loop won't work here.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
// empty and we only bother simplifying instructions that are in it.
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6bbf828..81dea6d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -30,8 +31,8 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
@@ -52,16 +53,8 @@ static cl::opt<bool>
//===----------------------------------------------------------------------===//
static bool ignoreCallingConv(LibFunc::Func Func) {
- switch (Func) {
- case LibFunc::abs:
- case LibFunc::labs:
- case LibFunc::llabs:
- case LibFunc::strlen:
- return true;
- default:
- return false;
- }
- llvm_unreachable("All cases should be covered in the switch.");
+ return Func == LibFunc::abs || Func == LibFunc::labs ||
+ Func == LibFunc::llabs || Func == LibFunc::strlen;
}
/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -93,16 +86,13 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
}
static bool callHasFloatingPointArgument(const CallInst *CI) {
- for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
- it != e; ++it) {
- if ((*it)->getType()->isFloatingPointTy())
- return true;
- }
- return false;
+ return std::any_of(CI->op_begin(), CI->op_end(), [](const Use &OI) {
+ return OI->getType()->isFloatingPointTy();
+ });
}
/// \brief Check whether the overloaded unary floating point function
-/// corresponing to \a Ty is available.
+/// corresponding to \a Ty is available.
static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
LibFunc::Func LongDoubleFn) {
@@ -116,6 +106,23 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
+/// \brief Check whether we can use unsafe floating point math for
+/// the function passed as input.
+static bool canUseUnsafeFPMath(Function *F) {
+
+ // FIXME: For finer-grain optimization, we need intrinsics to have the same
+ // fast-math flag decorations that are applied to FP instructions. For now,
+ // we have to rely on the function-level unsafe-fp-math attribute to do this
+ // optimization because there's no other way to express that the call can be
+ // relaxed.
+ if (F->hasFnAttribute("unsafe-fp-math")) {
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ if (Attr.getValueAsString() == "true")
+ return true;
+ }
+ return false;
+}
+
/// \brief Returns whether \p F matches the signature expected for the
/// string/memory copying library function \p Func.
/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
@@ -467,9 +474,6 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Verify the "stpcpy" function prototype.
- FunctionType *FT = Callee->getFunctionType();
-
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
return nullptr;
@@ -484,7 +488,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
if (Len == 0)
return nullptr;
- Type *PT = FT->getParamType(0);
+ Type *PT = Callee->getFunctionType()->getParamType(0);
Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
Value *DstEnd =
B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
@@ -497,8 +501,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
-
if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
return nullptr;
@@ -531,7 +533,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
if (Len > SrcLen + 1)
return nullptr;
- Type *PT = FT->getParamType(0);
+ Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
@@ -862,6 +864,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
return B.CreateSub(LHSV, RHSV, "chardiff");
}
+ // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+ if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+
+ IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+ unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
+
+ if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
+ getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
+
+ Type *LHSPtrTy =
+ IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+ Type *RHSPtrTy =
+ IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+
+ Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+ Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+
+ return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+ }
+ }
+
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
StringRef LHSStr, RHSStr;
if (getConstantStringInfo(LHS, LHSStr) &&
@@ -972,7 +995,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
- Module *M = CI->getParent()->getParent()->getParent();
+ Module *M = CI->getModule();
Intrinsic::ID IID = Callee->getIntrinsicID();
Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
V = B.CreateCall(F, V);
@@ -1015,9 +1038,9 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) {
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- }
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
@@ -1035,13 +1058,37 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
+static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
+ // Multiplications calculated using Addition Chains.
+ // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
+
+ assert(Exp != 0 && "Incorrect exponent 0 not handled");
+
+ if (InnerChain[Exp])
+ return InnerChain[Exp];
+
+ static const unsigned AddChain[33][2] = {
+ {0, 0}, // Unused.
+ {0, 0}, // Unused (base case = pow1).
+ {1, 1}, // Unused (pre-computed).
+ {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
+ {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
+ {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
+ {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
+ {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
+ };
+
+ InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
+ getPow(InnerChain, AddChain[Exp][1], B));
+ return InnerChain[Exp];
+}
+
Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
-
Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) {
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- }
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
@@ -1060,7 +1107,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (Op1C->isExactlyValue(2.0) &&
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
LibFunc::exp2l))
- return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B,
+ Callee->getAttributes());
// pow(10.0, x) -> exp10(x)
if (Op1C->isExactlyValue(10.0) &&
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
@@ -1069,6 +1117,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
+ bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
+
+ // pow(exp(x), y) -> exp(x*y)
+ // pow(exp2(x), y) -> exp2(x * y)
+ // We enable these only under fast-math. Besides rounding
+ // differences the transformation changes overflow and
+ // underflow behavior quite dramatically.
+ // Example: x = 1000, y = 0.001.
+ // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
+ if (unsafeFPMath) {
+ if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ B.SetFastMathFlags(FMF);
+
+ LibFunc::Func Func;
+ Function *OpCCallee = OpC->getCalledFunction();
+ if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+ TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
+ return EmitUnaryFloatFnCall(
+ B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
+ OpCCallee->getName(), B, OpCCallee->getAttributes());
+ }
+ }
+
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
if (!Op2C)
return Ret;
@@ -1081,10 +1155,15 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::sqrtl) &&
hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
LibFunc::fabsl)) {
+
+ // In -ffast-math, pow(x, 0.5) -> sqrt(x).
+ if (unsafeFPMath)
+ return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ Callee->getAttributes());
+
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
// and negative infinity correctly.
- // TODO: In fast-math mode, this could be just sqrt(x).
// TODO: In finite-only mode, this could be just fabs(sqrt(x)).
Value *Inf = ConstantFP::getInfinity(CI->getType());
Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
@@ -1102,18 +1181,42 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return B.CreateFMul(Op1, Op1, "pow2");
if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+
+ // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
+ if (unsafeFPMath) {
+ APFloat V = abs(Op2C->getValueAPF());
+ // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+ // This transformation applies to integer exponents only.
+ if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
+ !V.isInteger())
+ return nullptr;
+
+ // We will memoize intermediate products of the Addition Chain.
+ Value *InnerChain[33] = {nullptr};
+ InnerChain[1] = Op1;
+ InnerChain[2] = B.CreateFMul(Op1, Op1);
+
+ // We cannot readily convert a non-double type (like float) to a double.
+ // So we first convert V to something which could be converted to double.
+ bool ignored;
+ V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
+ // For negative exponents simply compute the reciprocal.
+ if (Op2C->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+ return FMul;
+ }
+
return nullptr;
}
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Function *Caller = CI->getParent()->getParent();
-
Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2f)) {
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- }
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
@@ -1162,11 +1265,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
-
Value *Ret = nullptr;
- if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) {
+ StringRef Name = Callee->getName();
+ if (Name == "fabs" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, false);
- }
FunctionType *FT = Callee->getFunctionType();
// Make sure this has 1 argument of FP type which matches the result type.
@@ -1184,6 +1286,105 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+ // If we can shrink the call to a float function rather than a double
+ // function, do that first.
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ if ((Name == "fmin" && hasFloatVersion(Name)) ||
+ (Name == "fmax" && hasFloatVersion(Name))) {
+ Value *Ret = optimizeBinaryDoubleFP(CI, B);
+ if (Ret)
+ return Ret;
+ }
+
+ // Make sure this has 2 arguments of FP type which match the result type.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return nullptr;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ Function *F = CI->getParent()->getParent();
+ if (canUseUnsafeFPMath(F)) {
+ // Unsafe algebra sets all fast-math-flags to true.
+ FMF.setUnsafeAlgebra();
+ } else {
+ // At a minimum, no-nans-fp-math must be true.
+ Attribute Attr = F->getFnAttribute("no-nans-fp-math");
+ if (Attr.getValueAsString() != "true")
+ return nullptr;
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
+ // might be impractical."
+ FMF.setNoSignedZeros();
+ FMF.setNoNaNs();
+ }
+ B.SetFastMathFlags(FMF);
+
+ // We have a relaxed floating-point environment. We can ignore NaN-handling
+ // and transform to a compare and select. We do not have to consider errno or
+ // exceptions, because fmin/fmax do not have those.
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ Value *Cmp = Callee->getName().startswith("fmin") ?
+ B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
+ return B.CreateSelect(Cmp, Op0, Op1);
+}
+
+Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+ FunctionType *FT = Callee->getFunctionType();
+
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ return Ret;
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (!OpC)
+ return Ret;
+
+ // log(pow(x,y)) -> y*log(x)
+ // This is only applicable to log, log2, log10.
+ if (Name != "log" && Name != "log2" && Name != "log10")
+ return Ret;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ B.SetFastMathFlags(FMF);
+
+ LibFunc::Func Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow))
+ return B.CreateFMul(OpC->getArgOperand(1),
+ EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
+ Callee->getAttributes()), "mul");
+
+ // log(exp2(y)) -> y*log(2)
+ if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
+ TLI->has(Func) && Func == LibFunc::exp2)
+ return B.CreateFMul(
+ OpC->getArgOperand(0),
+ EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
+ Callee->getName(), B, Callee->getAttributes()),
+ "logmul");
+ return Ret;
+}
+
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
@@ -1191,19 +1392,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
+ if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ return Ret;
- // FIXME: For finer-grain optimization, we need intrinsics to have the same
- // fast-math flag decorations that are applied to FP instructions. For now,
- // we have to rely on the function-level unsafe-fp-math attribute to do this
- // optimization because there's no other way to express that the sqrt can be
- // reassociated.
- Function *F = CI->getParent()->getParent();
- if (F->hasFnAttribute("unsafe-fp-math")) {
- // Check for unsafe-fp-math = true.
- Attribute Attr = F->getFnAttribute("unsafe-fp-math");
- if (Attr.getValueAsString() != "true")
- return Ret;
- }
Value *Op = CI->getArgOperand(0);
if (Instruction *I = dyn_cast<Instruction>(Op)) {
if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
@@ -1238,8 +1429,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
// and multiply.
// FIXME: We're not checking the sqrt because it doesn't have
// fast-math-flags (see earlier comment).
- IRBuilder<true, ConstantFolder,
- IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
+ IRBuilder<>::FastMathFlagGuard Guard(B);
B.SetFastMathFlags(I->getFastMathFlags());
// If we found a repeated factor, hoist it out of the square root and
// replace it with the fabs of that factor.
@@ -1262,6 +1452,40 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
+Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+ FunctionType *FT = Callee->getFunctionType();
+
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ return Ret;
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (!OpC)
+ return Ret;
+
+ // tan(atan(x)) -> x
+ // tanf(atanf(x)) -> x
+ // tanl(atanl(x)) -> x
+ LibFunc::Func Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ ((Func == LibFunc::atan && Callee->getName() == "tan") ||
+ (Func == LibFunc::atanf && Callee->getName() == "tanf") ||
+ (Func == LibFunc::atanl && Callee->getName() == "tanl")))
+ Ret = OpC->getArgOperand(0);
+ return Ret;
+}
+
static bool isTrigLibCall(CallInst *CI);
static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
bool UseFloat, Value *&Sin, Value *&Cos,
@@ -1329,9 +1553,9 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
return;
Function *Callee = CI->getCalledFunction();
- StringRef FuncName = Callee->getName();
LibFunc::Func Func;
- if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || !isTrigLibCall(CI))
+ if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
return;
if (IsFloat) {
@@ -1353,10 +1577,8 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls,
Value *Res) {
- for (SmallVectorImpl<CallInst *>::iterator I = Calls.begin(), E = Calls.end();
- I != E; ++I) {
- replaceAllUsesWith(*I, Res);
- }
+ for (CallInst *C : Calls)
+ replaceAllUsesWith(C, Res);
}
void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
@@ -1387,8 +1609,7 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
// sincos call there.
- BasicBlock::iterator Loc = ArgInst;
- B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
} else {
// Otherwise (e.g. for a constant) the beginning of the function is as
// good a place as any.
@@ -1413,15 +1634,16 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
+static bool checkIntUnaryReturnAndParam(Function *Callee) {
+ FunctionType *FT = Callee->getFunctionType();
+ return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isIntegerTy();
+}
+
Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) ||
- !FT->getParamType(0)->isIntegerTy())
+ if (!checkIntUnaryReturnAndParam(Callee))
return nullptr;
-
Value *Op = CI->getArgOperand(0);
// Constant fold.
@@ -1436,7 +1658,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Type *ArgType = Op->getType();
Value *F =
Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
+ Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1461,11 +1683,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
+ if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
return nullptr;
// isdigit(c) -> (c-'0') <u 10
@@ -1476,11 +1694,7 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
+ if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
return nullptr;
// isascii(c) -> c <u 128
@@ -1490,11 +1704,7 @@ Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require i32(i32)
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isIntegerTy(32))
+ if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
return nullptr;
// toascii(c) -> c & 0x7f
@@ -1529,10 +1739,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
}
static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
- if (!ColdErrorCalls)
- return false;
-
- if (!Callee || !Callee->isDeclaration())
+ if (!ColdErrorCalls || !Callee || !Callee->isDeclaration())
return false;
if (StreamArg < 0)
@@ -1968,16 +2175,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
// Command-line parameter overrides function attribute.
if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
UnsafeFPShrink = EnableUnsafeFPShrink;
- else if (Callee->hasFnAttribute("unsafe-fp-math")) {
- // FIXME: This is the same problem as described in optimizeSqrt().
- // If calls gain access to IR-level FMF, then use that instead of a
- // function attribute.
-
- // Check for unsafe-fp-math = true.
- Attribute Attr = Callee->getFnAttribute("unsafe-fp-math");
- if (Attr.getValueAsString() == "true")
- UnsafeFPShrink = true;
- }
+ else if (canUseUnsafeFPMath(Callee))
+ UnsafeFPShrink = true;
// First, check for intrinsics.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
@@ -1990,6 +2189,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeExp2(CI, Builder);
case Intrinsic::fabs:
return optimizeFabs(CI, Builder);
+ case Intrinsic::log:
+ return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
default:
@@ -2001,13 +2202,17 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
// Try to further simplify the result.
CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
- if (SimplifiedCI && SimplifiedCI->getCalledFunction())
- if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
+ if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
+ // Use an IR Builder from SimplifiedCI if available instead of CI
+ // to guarantee we reach all uses we might replace later on.
+ IRBuilder<> TmpBuilder(SimplifiedCI);
+ if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
// If we were able to further simplify, remove the now redundant call.
SimplifiedCI->replaceAllUsesWith(V);
SimplifiedCI->eraseFromParent();
return V;
}
+ }
return SimplifiedFortifiedCI;
}
@@ -2068,8 +2273,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeFWrite(CI, Builder);
case LibFunc::fputs:
return optimizeFPuts(CI, Builder);
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ return optimizeLog(CI, Builder);
case LibFunc::puts:
return optimizePuts(CI, Builder);
+ case LibFunc::tan:
+ case LibFunc::tanf:
+ case LibFunc::tanl:
+ return optimizeTan(CI, Builder);
case LibFunc::perror:
return optimizeErrorReporting(CI, Builder);
case LibFunc::vfprintf:
@@ -2097,24 +2312,23 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
case LibFunc::exp:
case LibFunc::exp10:
case LibFunc::expm1:
- case LibFunc::log:
- case LibFunc::log10:
- case LibFunc::log1p:
- case LibFunc::log2:
- case LibFunc::logb:
case LibFunc::sin:
case LibFunc::sinh:
- case LibFunc::tan:
case LibFunc::tanh:
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return optimizeUnaryDoubleFP(CI, Builder, true);
return nullptr;
case LibFunc::copysign:
- case LibFunc::fmin:
- case LibFunc::fmax:
if (hasFloatVersion(FuncName))
return optimizeBinaryDoubleFP(CI, Builder);
return nullptr;
+ case LibFunc::fminf:
+ case LibFunc::fmin:
+ case LibFunc::fminl:
+ case LibFunc::fmaxf:
+ case LibFunc::fmax:
+ case LibFunc::fmaxl:
+ return optimizeFMinFMax(CI, Builder);
default:
return nullptr;
}
@@ -2133,37 +2347,27 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
Replacer(I, With);
}
-/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I,
- Value *With) {
- I->replaceAllUsesWith(With);
- I->eraseFromParent();
-}
-
// TODO:
// Additional cases that we need to add to this file:
//
// cbrt:
// * cbrt(expN(X)) -> expN(x/3)
// * cbrt(sqrt(x)) -> pow(x,1/6)
-// * cbrt(sqrt(x)) -> pow(x,1/9)
+// * cbrt(cbrt(x)) -> pow(x,1/9)
//
// exp, expf, expl:
// * exp(log(x)) -> x
//
// log, logf, logl:
// * log(exp(x)) -> x
-// * log(x**y) -> y*log(x)
// * log(exp(y)) -> y*log(e)
-// * log(exp2(y)) -> y*log(2)
// * log(exp10(y)) -> y*log(10)
// * log(sqrt(x)) -> 0.5*log(x)
-// * log(pow(x,y)) -> y*log(x)
//
// lround, lroundf, lroundl:
// * lround(cnst) -> cnst'
//
// pow, powf, powl:
-// * pow(exp(x),y) -> exp(x*y)
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
@@ -2179,9 +2383,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// tan, tanf, tanl:
-// * tan(atan(x)) -> x
-//
// trunc, truncf, truncl:
// * trunc(cnst) -> cnst'
//
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
new file mode 100644
index 0000000..ad6b782
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -0,0 +1,85 @@
+//===- SplitModule.cpp - Split a module into partitions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+static void externalize(GlobalValue *GV) {
+ if (GV->hasLocalLinkage()) {
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ }
+
+ // Unnamed entities must be named consistently between modules. setName will
+ // give a distinct name to each such entity.
+ if (!GV->hasName())
+ GV->setName("__llvmsplit_unnamed");
+}
+
+// Returns whether GV should be in partition (0-based) I of N.
+static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
+ if (auto GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalObject *Base = GA->getBaseObject())
+ GV = Base;
+
+ StringRef Name;
+ if (const Comdat *C = GV->getComdat())
+ Name = C->getName();
+ else
+ Name = GV->getName();
+
+ // Partition by MD5 hash. We only need a few bits for evenness as the number
+ // of partitions will generally be in the 1-2 figure range; the low 16 bits
+ // are enough.
+ MD5 H;
+ MD5::MD5Result R;
+ H.update(Name);
+ H.final(R);
+ return (R[0] | (R[1] << 8)) % N == I;
+}
+
+void llvm::SplitModule(
+ std::unique_ptr<Module> M, unsigned N,
+ std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
+ for (Function &F : *M)
+ externalize(&F);
+ for (GlobalVariable &GV : M->globals())
+ externalize(&GV);
+ for (GlobalAlias &GA : M->aliases())
+ externalize(&GA);
+
+ // FIXME: We should be able to reuse M as the last partition instead of
+ // cloning it.
+ for (unsigned I = 0; I != N; ++I) {
+ ValueToValueMapTy VMap;
+ std::unique_ptr<Module> MPart(
+ CloneModule(M.get(), VMap, [=](const GlobalValue *GV) {
+ return isInPartition(GV, I, N);
+ }));
+ if (I != 0)
+ MPart->setModuleInlineAsm("");
+ ModuleCallback(std::move(MPart));
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index a2a54da..1d1f602 100644
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -69,7 +69,6 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 7e00a80..6b1d1da 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -50,11 +50,11 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
//
std::vector<BasicBlock*> ReturningBlocks;
std::vector<BasicBlock*> UnreachableBlocks;
- for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (isa<ReturnInst>(I->getTerminator()))
- ReturningBlocks.push_back(I);
- else if (isa<UnreachableInst>(I->getTerminator()))
- UnreachableBlocks.push_back(I);
+ for (BasicBlock &I : F)
+ if (isa<ReturnInst>(I.getTerminator()))
+ ReturningBlocks.push_back(&I);
+ else if (isa<UnreachableInst>(I.getTerminator()))
+ UnreachableBlocks.push_back(&I);
// Then unreachable blocks.
if (UnreachableBlocks.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 8c72641..1add78e 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -19,11 +19,14 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
using namespace llvm;
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
void ValueMaterializer::anchor() {}
+void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) {
+}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
@@ -35,15 +38,28 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// If we have a materializer and it can materialize a value, use that.
if (Materializer) {
- if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
- return VM[V] = NewV;
+ if (Value *NewV =
+ Materializer->materializeDeclFor(const_cast<Value *>(V))) {
+ VM[V] = NewV;
+ if (auto *NewGV = dyn_cast<GlobalValue>(NewV))
+ Materializer->materializeInitFor(
+ NewGV, const_cast<GlobalValue *>(cast<GlobalValue>(V)));
+ return NewV;
+ }
}
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
- if (isa<GlobalValue>(V))
+ if (isa<GlobalValue>(V)) {
+ if (Flags & RF_NullMapMissingGlobalValues) {
+ assert(!(Flags & RF_IgnoreMissingEntries) &&
+ "Illegal to specify both RF_NullMapMissingGlobalValues and "
+ "RF_IgnoreMissingEntries");
+ return nullptr;
+ }
return VM[V] = const_cast<Value*>(V);
-
+ }
+
if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
// Inline asm may need *type* remapping.
FunctionType *NewTy = IA->getFunctionType();
@@ -73,7 +89,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// correct. For now, just match behaviour from before the metadata/value
// split.
//
- // assert(MappedMD && "Referenced metadata value not in value map");
+ // assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) &&
+ // "Referenced metadata value not in value map");
return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD);
}
@@ -127,9 +144,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
Flags, TypeMapper, Materializer));
}
-
+ Type *NewSrcTy = nullptr;
+ if (TypeMapper)
+ if (auto *GEPO = dyn_cast<GEPOperator>(C))
+ NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- return VM[V] = CE->getWithOperands(Ops, NewTy);
+ return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
if (isa<ConstantArray>(C))
return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
if (isa<ConstantStruct>(C))
@@ -146,29 +167,42 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
}
static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key,
- Metadata *Val) {
+ Metadata *Val, ValueMaterializer *Materializer,
+ RemapFlags Flags) {
VM.MD()[Key].reset(Val);
+ if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) {
+ auto *N = dyn_cast_or_null<MDNode>(Val);
+ // Need to invoke this once we have non-temporary MD.
+ if (!N || !N->isTemporary())
+ Materializer->replaceTemporaryMetadata(Key, Val);
+ }
return Val;
}
-static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
- return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));
+static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD,
+ ValueMaterializer *Materializer, RemapFlags Flags) {
+ return mapToMetadata(VM, MD, const_cast<Metadata *>(MD), Materializer, Flags);
}
static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer);
-static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
+static Metadata *mapMetadataOp(Metadata *Op,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
if (!Op)
return nullptr;
- if (Metadata *MappedOp =
- MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))
+
+ if (Materializer && !Materializer->isMetadataNeeded(Op))
+ return nullptr;
+
+ if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags,
+ TypeMapper, Materializer))
return MappedOp;
// Use identity map if MappedOp is null and we can ignore missing entries.
if (Flags & RF_IgnoreMissingEntries)
@@ -178,89 +212,113 @@ static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
// correct. For now, just match behaviour from before the metadata/value
// split.
//
- // llvm_unreachable("Referenced metadata not in value map!");
+ // assert((Flags & RF_NullMapMissingGlobalValues) &&
+ // "Referenced metadata not in value map!");
return nullptr;
}
-/// \brief Remap nodes.
+/// Resolve uniquing cycles involving the given metadata.
+static void resolveCycles(Metadata *MD, bool MDMaterialized) {
+ if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
+ if (!MDMaterialized && N->isTemporary())
+ return;
+ if (!N->isResolved())
+ N->resolveCycles(MDMaterialized);
+ }
+}
+
+/// Remap the operands of an MDNode.
///
-/// Insert \c NewNode in the value map, and then remap \c OldNode's operands.
-/// Assumes that \c NewNode is already a clone of \c OldNode.
+/// If \c Node is temporary, uniquing cycles are ignored. If \c Node is
+/// distinct, uniquing cycles are resolved as they're found.
///
-/// \pre \c NewNode is a clone of \c OldNode.
-static bool remap(const MDNode *OldNode, MDNode *NewNode,
- SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- assert(OldNode->getNumOperands() == NewNode->getNumOperands() &&
- "Expected nodes to match");
- assert(OldNode->isResolved() && "Expected resolved node");
- assert(!NewNode->isUniqued() && "Expected non-uniqued node");
-
- // Map the node upfront so it's available for cyclic references.
- mapToMetadata(VM, OldNode, NewNode);
- bool AnyChanged = false;
- for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) {
- Metadata *Old = OldNode->getOperand(I);
- assert(NewNode->getOperand(I) == Old &&
- "Expected old operands to already be in place");
+/// \pre \c Node.isDistinct() or \c Node.isTemporary().
+static bool remapOperands(MDNode &Node,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
+ ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(!Node.isUniqued() && "Expected temporary or distinct node");
+ const bool IsDistinct = Node.isDistinct();
- Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags,
- TypeMapper, Materializer);
+ bool AnyChanged = false;
+ for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) {
+ Metadata *Old = Node.getOperand(I);
+ Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
if (Old != New) {
AnyChanged = true;
- NewNode->replaceOperandWith(I, New);
+ Node.replaceOperandWith(I, New);
+
+ // Resolve uniquing cycles underneath distinct nodes on the fly so they
+ // don't infect later operands.
+ if (IsDistinct)
+ resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata));
}
}
return AnyChanged;
}
-/// \brief Map a distinct MDNode.
+/// Map a distinct MDNode.
///
-/// Distinct nodes are not uniqued, so they must always recreated.
+/// Whether distinct nodes change is independent of their operands. If \a
+/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in
+/// place; effectively, they're moved from one graph to another. Otherwise,
+/// they're cloned/duplicated, and the new copy's operands are remapped.
static Metadata *mapDistinctNode(const MDNode *Node,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
assert(Node->isDistinct() && "Expected distinct node");
- MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone());
- remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer);
+ MDNode *NewMD;
+ if (Flags & RF_MoveDistinctMDs)
+ NewMD = const_cast<MDNode *>(Node);
+ else
+ NewMD = MDNode::replaceWithDistinct(Node->clone());
- // Track any cycles beneath this node.
- for (Metadata *Op : NewMD->operands())
- if (auto *Node = dyn_cast_or_null<MDNode>(Op))
- if (!Node->isResolved())
- Cycles.push_back(Node);
-
- return NewMD;
+ // Remap operands later.
+ DistinctWorklist.push_back(NewMD);
+ return mapToMetadata(VM, Node, NewMD, Materializer, Flags);
}
/// \brief Map a uniqued MDNode.
///
/// Uniqued nodes may not need to be recreated (they may map to themselves).
static Metadata *mapUniquedNode(const MDNode *Node,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
- assert(Node->isUniqued() && "Expected uniqued node");
+ assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) &&
+ "Expected uniqued node");
- // Create a temporary node upfront in case we have a metadata cycle.
+ // Create a temporary node and map it upfront in case we have a uniquing
+ // cycle. If necessary, this mapping will get updated by RAUW logic before
+ // returning.
auto ClonedMD = Node->clone();
- if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer))
- // No operands changed, so use the identity mapping.
- return mapToSelf(VM, Node);
+ mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags);
+ if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer)) {
+ // No operands changed, so use the original.
+ ClonedMD->replaceAllUsesWith(const_cast<MDNode *>(Node));
+ // Even though replaceAllUsesWith would have replaced the value map
+ // entry, we need to explictly map with the final non-temporary node
+ // to replace any temporary metadata via the callback.
+ return mapToSelf(VM, Node, Materializer, Flags);
+ }
- // At least one operand has changed, so uniquify the cloned node.
+ // Uniquify the cloned node. Explicitly map it with the final non-temporary
+ // node so that replacement of temporary metadata via the callback occurs.
return mapToMetadata(VM, Node,
- MDNode::replaceWithUniqued(std::move(ClonedMD)));
+ MDNode::replaceWithUniqued(std::move(ClonedMD)),
+ Materializer, Flags);
}
static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<MDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &DistinctWorklist,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
@@ -269,26 +327,28 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
return NewMD;
if (isa<MDString>(MD))
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
if (isa<ConstantAsMetadata>(MD))
if ((Flags & RF_NoModuleLevelChanges))
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) {
Value *MappedV =
MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer);
if (VMD->getValue() == MappedV ||
(!MappedV && (Flags & RF_IgnoreMissingEntries)))
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
// FIXME: This assert crashes during bootstrap, but I think it should be
// correct. For now, just match behaviour from before the metadata/value
// split.
//
- // assert(MappedV && "Referenced metadata not in value map!");
+ // assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) &&
+ // "Referenced metadata not in value map!");
if (MappedV)
- return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV));
+ return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer,
+ Flags);
return nullptr;
}
@@ -299,37 +359,54 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
// If this is a module-level metadata and we know that nothing at the
// module level is changing, then use an identity mapping.
if (Flags & RF_NoModuleLevelChanges)
- return mapToSelf(VM, MD);
+ return mapToSelf(VM, MD, Materializer, Flags);
// Require resolved nodes whenever metadata might be remapped.
- assert(Node->isResolved() && "Unexpected unresolved node");
+ assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) &&
+ "Unexpected unresolved node");
+
+ if (Materializer && Node->isTemporary()) {
+ assert(Flags & RF_HaveUnmaterializedMetadata);
+ Metadata *TempMD =
+ Materializer->mapTemporaryMetadata(const_cast<Metadata *>(MD));
+ // If the above callback returned an existing temporary node, use it
+ // instead of the current temporary node. This happens when earlier
+ // function importing passes already created and saved a temporary
+ // metadata node for the same value id.
+ if (TempMD) {
+ mapToMetadata(VM, MD, TempMD, Materializer, Flags);
+ return TempMD;
+ }
+ }
if (Node->isDistinct())
- return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+ return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
- return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+ return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
}
Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
- SmallVector<MDNode *, 8> Cycles;
- Metadata *NewMD =
- MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
-
- // Resolve cycles underneath MD.
- if (NewMD && NewMD != MD) {
- if (auto *N = dyn_cast<MDNode>(NewMD))
- if (!N->isResolved())
- N->resolveCycles();
-
- for (MDNode *N : Cycles)
- if (!N->isResolved())
- N->resolveCycles();
- } else {
- // Shouldn't get unresolved cycles if nothing was remapped.
- assert(Cycles.empty() && "Expected no unresolved cycles");
- }
+ SmallVector<MDNode *, 8> DistinctWorklist;
+ Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper,
+ Materializer);
+
+ // When there are no module-level changes, it's possible that the metadata
+ // graph has temporaries. Skip the logic to resolve cycles, since it's
+ // unnecessary (and invalid) in that case.
+ if (Flags & RF_NoModuleLevelChanges)
+ return NewMD;
+
+ // Resolve cycles involving the entry metadata.
+ resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata));
+
+ // Remap the operands of distinct MDNodes.
+ while (!DistinctWorklist.empty())
+ remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags,
+ TypeMapper, Materializer);
return NewMD;
}
@@ -374,14 +451,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
// Remap attached metadata.
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
I->getAllMetadata(MDs);
- for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
- MI = MDs.begin(),
- ME = MDs.end();
- MI != ME; ++MI) {
- MDNode *Old = MI->second;
+ for (const auto &MI : MDs) {
+ MDNode *Old = MI.second;
MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
- I->setMetadata(MI->first, New);
+ I->setMetadata(MI.first, New);
}
if (!TypeMapper)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 215d6f9..8844d57 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -25,8 +25,11 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -204,9 +207,10 @@ namespace {
BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
: BasicBlockPass(ID), Config(C) {
- AA = &P->getAnalysis<AliasAnalysis>();
+ AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &P->getAnalysis<ScalarEvolution>();
+ SE = &P->getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &P->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = IgnoreTargetInfo
? nullptr
: &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -221,6 +225,7 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ScalarEvolution *SE;
+ const TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
// FIXME: const correct?
@@ -437,9 +442,10 @@ namespace {
bool runOnBasicBlock(BasicBlock &BB) override {
// OptimizeNone check deferred to vectorizeBB().
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = IgnoreTargetInfo
? nullptr
: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
@@ -450,13 +456,15 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
BasicBlockPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.setPreservesCFG();
}
@@ -842,7 +850,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
+ (void)SimplifyInstructionsInBlock(&BB, TLI);
return true;
}
@@ -1239,20 +1247,23 @@ namespace {
if (I == Start) IAfterStart = true;
bool IsSimpleLoadStore;
- if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
+ if (!isInstVectorizable(&*I, IsSimpleLoadStore))
+ continue;
// Look for an instruction with which to pair instruction *I...
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
+ if (I->mayWriteToMemory())
+ WriteSet.add(&*I);
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = std::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
- if (J == Start) JAfterStart = true;
+ if (&*J == Start)
+ JAfterStart = true;
// Determine if J uses I, if so, exit the loop.
- bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+ bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep);
if (Config.FastDep) {
// Note: For this heuristic to be effective, independent operations
// must tend to be intermixed. This is likely to be true from some
@@ -1269,25 +1280,26 @@ namespace {
// J does not use I, and comes before the first use of I, so it can be
// merged with I if the instructions are compatible.
int CostSavings, FixedOrder;
- if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
- CostSavings, FixedOrder)) continue;
+ if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len,
+ CostSavings, FixedOrder))
+ continue;
// J is a candidate for merging with I.
if (PairableInsts.empty() ||
- PairableInsts[PairableInsts.size()-1] != I) {
- PairableInsts.push_back(I);
+ PairableInsts[PairableInsts.size() - 1] != &*I) {
+ PairableInsts.push_back(&*I);
}
- CandidatePairs[I].push_back(J);
+ CandidatePairs[&*I].push_back(&*J);
++TotalPairs;
if (TTI)
- CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
- CostSavings));
+ CandidatePairCostSavings.insert(
+ ValuePairWithCost(ValuePair(&*I, &*J), CostSavings));
if (FixedOrder == 1)
- FixedOrderPairs.insert(ValuePair(I, J));
+ FixedOrderPairs.insert(ValuePair(&*I, &*J));
else if (FixedOrder == -1)
- FixedOrderPairs.insert(ValuePair(J, I));
+ FixedOrderPairs.insert(ValuePair(&*J, &*I));
// The next call to this function must start after the last instruction
// selected during this invocation.
@@ -1468,14 +1480,16 @@ namespace {
BasicBlock::iterator E = BB.end(), EL =
BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
- if (IsInPair.find(I) == IsInPair.end()) continue;
+ if (IsInPair.find(&*I) == IsInPair.end())
+ continue;
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
- if (I->mayWriteToMemory()) WriteSet.add(I);
+ if (I->mayWriteToMemory())
+ WriteSet.add(&*I);
for (BasicBlock::iterator J = std::next(I); J != E; ++J) {
- (void) trackUsesOfI(Users, WriteSet, I, J);
+ (void)trackUsesOfI(Users, WriteSet, &*I, &*J);
if (J == EL)
break;
@@ -1484,7 +1498,7 @@ namespace {
for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
U != E; ++U) {
if (IsInPair.find(*U) == IsInPair.end()) continue;
- PairableInstUsers.insert(ValuePair(I, *U));
+ PairableInstUsers.insert(ValuePair(&*I, *U));
}
if (I == EL)
@@ -2806,55 +2820,51 @@ namespace {
Instruction *J, Instruction *K,
Instruction *&InsertionPt,
Instruction *&K1, Instruction *&K2) {
- if (isa<StoreInst>(I)) {
- AA->replaceWithNewValue(I, K);
- AA->replaceWithNewValue(J, K);
- } else {
- Type *IType = I->getType();
- Type *JType = J->getType();
+ if (isa<StoreInst>(I))
+ return;
- VectorType *VType = getVecTypeForPair(IType, JType);
- unsigned numElem = VType->getNumElements();
+ Type *IType = I->getType();
+ Type *JType = J->getType();
- unsigned numElemI = getNumScalarElements(IType);
- unsigned numElemJ = getNumScalarElements(JType);
+ VectorType *VType = getVecTypeForPair(IType, JType);
+ unsigned numElem = VType->getNumElements();
- if (IType->isVectorTy()) {
- std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
- for (unsigned v = 0; v < numElemI; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v);
- }
+ unsigned numElemI = getNumScalarElements(IType);
+ unsigned numElemJ = getNumScalarElements(JType);
- K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get( Mask1),
- getReplacementName(K, false, 1));
- } else {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- K1 = ExtractElementInst::Create(K, CV0,
- getReplacementName(K, false, 1));
+ if (IType->isVectorTy()) {
+ std::vector<Constant *> Mask1(numElemI), Mask2(numElemI);
+ for (unsigned v = 0; v < numElemI; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v);
}
- if (JType->isVectorTy()) {
- std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
- for (unsigned v = 0; v < numElemJ; ++v) {
- Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
- Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v);
- }
+ K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(Mask1),
+ getReplacementName(K, false, 1));
+ } else {
+ Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1));
+ }
- K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get( Mask2),
- getReplacementName(K, false, 2));
- } else {
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
- K2 = ExtractElementInst::Create(K, CV1,
- getReplacementName(K, false, 2));
+ if (JType->isVectorTy()) {
+ std::vector<Constant *> Mask1(numElemJ), Mask2(numElemJ);
+ for (unsigned v = 0; v < numElemJ; ++v) {
+ Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+ Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v);
}
- K1->insertAfter(K);
- K2->insertAfter(K1);
- InsertionPt = K2;
+ K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+ ConstantVector::get(Mask2),
+ getReplacementName(K, false, 2));
+ } else {
+ Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1);
+ K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2));
}
+
+ K1->insertAfter(K);
+ K2->insertAfter(K1);
+ InsertionPt = K2;
}
// Move all uses of the function I (including pairing-induced uses) after J.
@@ -2869,7 +2879,7 @@ namespace {
if (I->mayWriteToMemory()) WriteSet.add(I);
for (; cast<Instruction>(L) != J; ++L)
- (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
+ (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs);
assert(cast<Instruction>(L) == J &&
"Tracking has not proceeded far enough to check for dependencies");
@@ -2891,9 +2901,9 @@ namespace {
if (I->mayWriteToMemory()) WriteSet.add(I);
for (; cast<Instruction>(L) != J;) {
- if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
+ if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) {
// Move this instruction
- Instruction *InstToMove = L; ++L;
+ Instruction *InstToMove = &*L++;
DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
" to after " << *InsertionPt << "\n");
@@ -2924,11 +2934,11 @@ namespace {
// Note: We cannot end the loop when we reach J because J could be moved
// farther down the use chain by another instruction pairing. Also, J
// could be before I if this is an inverted input.
- for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
- if (trackUsesOfI(Users, WriteSet, I, L)) {
+ for (BasicBlock::iterator E = BB.end(); L != E; ++L) {
+ if (trackUsesOfI(Users, WriteSet, I, &*L)) {
if (L->mayReadFromMemory()) {
- LoadMoveSet[L].push_back(I);
- LoadMoveSetPairs.insert(ValuePair(L, I));
+ LoadMoveSet[&*L].push_back(I);
+ LoadMoveSetPairs.insert(ValuePair(&*L, I));
}
}
}
@@ -2991,7 +3001,7 @@ namespace {
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
+ DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(&*PI);
if (P == ChosenPairs.end()) {
++PI;
continue;
@@ -3116,12 +3126,9 @@ namespace {
} else if (!isa<StoreInst>(K))
K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_fpmath
- };
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_group};
combineMetadata(K, H, KnownIDs);
K->intersectOptionalDataWith(H);
@@ -3145,8 +3152,6 @@ namespace {
if (!isa<StoreInst>(I)) {
L->replaceAllUsesWith(K1);
H->replaceAllUsesWith(K2);
- AA->replaceWithNewValue(L, K1);
- AA->replaceWithNewValue(H, K2);
}
// Instructions that may read from memory may be in the load move set.
@@ -3197,10 +3202,14 @@ namespace {
char BBVectorize::ID = 0;
static const char bb_vectorize_name[] = "Basic-Block Vectorization";
INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 69ca268..a627dd6 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,7 +48,6 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
@@ -58,10 +57,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -99,6 +101,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
+#include <functional>
#include <map>
#include <tuple>
@@ -123,6 +126,11 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
"trip count that is smaller than this "
"value."));
+static cl::opt<bool> MaximizeBandwidth(
+ "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
+ cl::desc("Maximize bandwidth when selecting vectorization factor which "
+ "will be determined by the smallest type in loop."));
+
/// This enables versioning on the strides of symbolically striding memory
/// accesses in code like the following.
/// for (i = 0; i < N; ++i)
@@ -136,7 +144,7 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
/// ...
static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
- cl::desc("Enable symblic stride memory access versioning"));
+ cl::desc("Enable symbolic stride memory access versioning"));
static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
@@ -214,12 +222,27 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
cl::desc("The maximum interleave count to use when interleaving a scalar "
"reduction in a nested loop."));
+static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
+ "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum allowed number of runtime memory checks with a "
+ "vectorize(enable) pragma."));
+
+static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
+ "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed."));
+
+static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
+ "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed with a "
+ "vectorize(enable) pragma"));
+
namespace {
// Forward declarations.
+class LoopVectorizeHints;
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
-class LoopVectorizeHints;
+class LoopVectorizationRequirements;
/// \brief This modifies LoopAccessReport to initialize message with
/// loop-vectorizer-specific part.
@@ -245,6 +268,32 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
return VectorType::get(Scalar, VF);
}
+/// A helper function that returns GEP instruction and knows to skip a
+/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination
+/// pointee types of the 'bitcast' have the same size.
+/// For example:
+/// bitcast double** %var to i64* - can be skipped
+/// bitcast double** %var to i8* - can not
+static GetElementPtrInst *getGEPInstruction(Value *Ptr) {
+
+ if (isa<GetElementPtrInst>(Ptr))
+ return cast<GetElementPtrInst>(Ptr);
+
+ if (isa<BitCastInst>(Ptr) &&
+ isa<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0))) {
+ Type *BitcastTy = Ptr->getType();
+ Type *GEPTy = cast<BitCastInst>(Ptr)->getSrcTy();
+ if (!isa<PointerType>(BitcastTy) || !isa<PointerType>(GEPTy))
+ return nullptr;
+ Type *Pointee1Ty = cast<PointerType>(BitcastTy)->getPointerElementType();
+ Type *Pointee2Ty = cast<PointerType>(GEPTy)->getPointerElementType();
+ const DataLayout &DL = cast<BitCastInst>(Ptr)->getModule()->getDataLayout();
+ if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty))
+ return cast<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0));
+ }
+ return nullptr;
+}
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -261,25 +310,30 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
/// and reduction variables that were found to a given vectorization factor.
class InnerLoopVectorizer {
public:
- InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
+ InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned VecWidth,
unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
- VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+ : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
+ VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
- Legal(nullptr), AddedSafetyChecks(false) {}
+ TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
+ AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
- void vectorize(LoopVectorizationLegality *L) {
+ // MinimumBitWidths maps scalar integer values to the smallest bitwidth they
+ // can be validly truncated to. The cost model has assumed this truncation
+ // will happen when vectorizing.
+ void vectorize(LoopVectorizationLegality *L,
+ MapVector<Instruction*,uint64_t> MinimumBitWidths) {
+ MinBWs = MinimumBitWidths;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
createEmptyLoop();
// Widen each instruction in the old loop to a new one in the new loop.
// Use the Legality module to find the induction and reduction variables.
vectorizeLoop();
- // Register the new loop and update the analysis passes.
- updateAnalysis();
}
// Return true if any runtime check is added.
@@ -302,14 +356,11 @@ protected:
typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
VectorParts> EdgeMaskCache;
- /// \brief Add checks for strides that were assumed to be 1.
- ///
- /// Returns the last check instruction and the first check instruction in the
- /// pair as (first, last).
- std::pair<Instruction *, Instruction *> addStrideCheck(Instruction *Loc);
-
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop();
+ /// Create a new induction variable inside L.
+ PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
+ Value *Step, Instruction *DL);
/// Copy and widen the instructions from the old loop.
virtual void vectorizeLoop();
@@ -319,6 +370,9 @@ protected:
/// See PR14725.
void fixLCSSAPHIs();
+ /// Shrinks vector element sizes based on information in "MinBWs".
+ void truncateToMinimalBitwidths();
+
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
/// mask for the block BB.
@@ -329,7 +383,7 @@ protected:
/// A helper function to vectorize a single BB within the innermost loop.
void vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV);
-
+
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
@@ -374,6 +428,23 @@ protected:
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
+ /// Returns (and creates if needed) the original loop trip count.
+ Value *getOrCreateTripCount(Loop *NewLoop);
+
+ /// Returns (and creates if needed) the trip count of the widened loop.
+ Value *getOrCreateVectorTripCount(Loop *NewLoop);
+
+ /// Emit a bypass check to see if the trip count would overflow, or we
+ /// wouldn't have enough iterations to execute one vector loop.
+ void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
+ /// Emit a bypass check to see if the vector trip count is nonzero.
+ void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
+ /// Emit a bypass check to see if all of the SCEV assumptions we've
+ /// had to make are correct.
+ void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
+ /// Emit bypass checks to check any memory assumptions we may have made.
+ void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
/// then a single scalar value is mapped to multiple vector parts. The parts
@@ -416,8 +487,10 @@ protected:
/// The original loop.
Loop *OrigLoop;
- /// Scev analysis to use.
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies
+ /// dynamic knowledge to simplify SCEV expressions and converts them to a
+ /// more usable form.
+ PredicatedScalarEvolution &PSE;
/// Loop Info.
LoopInfo *LI;
/// Dominator Tree.
@@ -462,12 +535,21 @@ protected:
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
- /// Holds the extended (to the widest induction type) start index.
- Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
+ /// Store instructions that should be predicated, as a pair
+ /// <StoreInst, Predicate>
+ SmallVector<std::pair<StoreInst*,Value*>, 4> PredicatedStores;
EdgeMaskCache MaskCache;
-
+ /// Trip count of the original loop.
+ Value *TripCount;
+ /// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
+ Value *VectorTripCount;
+
+ /// Map of scalar integer values to the smallest bitwidth they can be legally
+ /// represented as. The vector equivalents of these values should be truncated
+ /// to this type.
+ MapVector<Instruction*,uint64_t> MinBWs;
LoopVectorizationLegality *Legal;
// Record whether runtime check is added.
@@ -476,10 +558,11 @@ protected:
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
- InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
+ InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned UnrollFactor)
- : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
@@ -551,7 +634,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
if (Kind != LLVMContext::MD_tbaa &&
Kind != LLVMContext::MD_alias_scope &&
Kind != LLVMContext::MD_noalias &&
- Kind != LLVMContext::MD_fpmath)
+ Kind != LLVMContext::MD_fpmath &&
+ Kind != LLVMContext::MD_nontemporal)
continue;
To->setMetadata(Kind, M.second);
@@ -559,7 +643,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
}
/// \brief Propagate known metadata from one instruction to a vector of others.
-static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *From) {
+static void propagateMetadata(SmallVectorImpl<Value *> &To,
+ const Instruction *From) {
for (Value *V : To)
if (Instruction *I = dyn_cast<Instruction>(V))
propagateMetadata(I, From);
@@ -699,8 +784,9 @@ private:
/// between the member and the group in a map.
class InterleavedAccessInfo {
public:
- InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT)
- : SE(SE), TheLoop(L), DT(DT) {}
+ InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+ DominatorTree *DT)
+ : PSE(PSE), TheLoop(L), DT(DT) {}
~InterleavedAccessInfo() {
SmallSet<InterleaveGroup *, 4> DelSet;
@@ -730,7 +816,11 @@ public:
}
private:
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+ /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+ /// The interleaved access analysis can also add new predicates (for example
+ /// by versioning strides of pointers).
+ PredicatedScalarEvolution &PSE;
Loop *TheLoop;
DominatorTree *DT;
@@ -778,6 +868,304 @@ private:
const ValueToValueMap &Strides);
};
+/// Utility class for getting and setting loop vectorizer hints in the form
+/// of loop metadata.
+/// This class keeps a number of loop annotations locally (as member variables)
+/// and can, upon request, write them back as metadata on the loop. It will
+/// initially scan the loop for existing metadata, and will update the local
+/// values based on information in the loop.
+/// We cannot write all values to metadata, as the mere presence of some info,
+/// for example 'force', means a decision has been made. So, we need to be
+/// careful NOT to add them if the user hasn't specifically asked so.
+class LoopVectorizeHints {
+ enum HintKind {
+ HK_WIDTH,
+ HK_UNROLL,
+ HK_FORCE
+ };
+
+ /// Hint - associates name and validation with the hint value.
+ struct Hint {
+ const char * Name;
+ unsigned Value; // This may have to change for non-numeric values.
+ HintKind Kind;
+
+ Hint(const char * Name, unsigned Value, HintKind Kind)
+ : Name(Name), Value(Value), Kind(Kind) { }
+
+ bool validate(unsigned Val) {
+ switch (Kind) {
+ case HK_WIDTH:
+ return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
+ case HK_UNROLL:
+ return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
+ case HK_FORCE:
+ return (Val <= 1);
+ }
+ return false;
+ }
+ };
+
+ /// Vectorization width.
+ Hint Width;
+ /// Vectorization interleave factor.
+ Hint Interleave;
+ /// Vectorization forced
+ Hint Force;
+
+ /// Return the loop metadata prefix.
+ static StringRef Prefix() { return "llvm.loop."; }
+
+public:
+ enum ForceKind {
+ FK_Undefined = -1, ///< Not selected.
+ FK_Disabled = 0, ///< Forcing disabled.
+ FK_Enabled = 1, ///< Forcing enabled.
+ };
+
+ LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
+ : Width("vectorize.width", VectorizerParams::VectorizationFactor,
+ HK_WIDTH),
+ Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+ Force("vectorize.enable", FK_Undefined, HK_FORCE),
+ TheLoop(L) {
+ // Populate values with existing loop metadata.
+ getHintsFromMetadata();
+
+ // force-vector-interleave overrides DisableInterleaving.
+ if (VectorizerParams::isInterleaveForced())
+ Interleave.Value = VectorizerParams::VectorizationInterleave;
+
+ DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+ << "LV: Interleaving disabled by the pass manager\n");
+ }
+
+ /// Mark the loop L as already vectorized by setting the width to 1.
+ void setAlreadyVectorized() {
+ Width.Value = Interleave.Value = 1;
+ Hint Hints[] = {Width, Interleave};
+ writeHintsToMetadata(Hints);
+ }
+
+ bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
+ if (getForce() == LoopVectorizeHints::FK_Disabled) {
+ DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(),
+ vectorizeAnalysisPassName(), *F,
+ L->getStartLoc(), emitRemark());
+ return false;
+ }
+
+ if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+ DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(),
+ vectorizeAnalysisPassName(), *F,
+ L->getStartLoc(), emitRemark());
+ return false;
+ }
+
+ if (getWidth() == 1 && getInterleave() == 1) {
+ // FIXME: Add a separate metadata to indicate when the loop has already
+ // been vectorized instead of setting width and count to 1.
+ DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+ // FIXME: Add interleave.disable metadata. This will allow
+ // vectorize.disable to be used without disabling the pass and errors
+ // to differentiate between disabled vectorization and a width of 1.
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), vectorizeAnalysisPassName(), *F, L->getStartLoc(),
+ "loop not vectorized: vectorization and interleaving are explicitly "
+ "disabled, or vectorize width and interleave count are both set to "
+ "1");
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Dumps all the hint information.
+ std::string emitRemark() const {
+ VectorizationReport R;
+ if (Force.Value == LoopVectorizeHints::FK_Disabled)
+ R << "vectorization is explicitly disabled";
+ else {
+ R << "use -Rpass-analysis=loop-vectorize for more info";
+ if (Force.Value == LoopVectorizeHints::FK_Enabled) {
+ R << " (Force=true";
+ if (Width.Value != 0)
+ R << ", Vector Width=" << Width.Value;
+ if (Interleave.Value != 0)
+ R << ", Interleave Count=" << Interleave.Value;
+ R << ")";
+ }
+ }
+
+ return R.str();
+ }
+
+ unsigned getWidth() const { return Width.Value; }
+ unsigned getInterleave() const { return Interleave.Value; }
+ enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+ const char *vectorizeAnalysisPassName() const {
+ // If hints are provided that don't disable vectorization use the
+ // AlwaysPrint pass name to force the frontend to print the diagnostic.
+ if (getWidth() == 1)
+ return LV_NAME;
+ if (getForce() == LoopVectorizeHints::FK_Disabled)
+ return LV_NAME;
+ if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+ return LV_NAME;
+ return DiagnosticInfo::AlwaysPrint;
+ }
+
+ bool allowReordering() const {
+ // When enabling loop hints are provided we allow the vectorizer to change
+ // the order of operations that is given by the scalar loop. This is not
+ // enabled by default because can be unsafe or inefficient. For example,
+ // reordering floating-point operations will change the way round-off
+ // error accumulates in the loop.
+ return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+ }
+
+private:
+ /// Find hints specified in the loop metadata and update local values.
+ void getHintsFromMetadata() {
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (!LoopID)
+ return;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ const MDString *S = nullptr;
+ SmallVector<Metadata *, 4> Args;
+
+ // The expected hint is either a MDString or a MDNode with the first
+ // operand a MDString.
+ if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
+ if (!MD || MD->getNumOperands() == 0)
+ continue;
+ S = dyn_cast<MDString>(MD->getOperand(0));
+ for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
+ Args.push_back(MD->getOperand(i));
+ } else {
+ S = dyn_cast<MDString>(LoopID->getOperand(i));
+ assert(Args.size() == 0 && "too many arguments for MDString");
+ }
+
+ if (!S)
+ continue;
+
+ // Check if the hint starts with the loop metadata prefix.
+ StringRef Name = S->getString();
+ if (Args.size() == 1)
+ setHint(Name, Args[0]);
+ }
+ }
+
+ /// Checks string hint with one operand and set value if valid.
+ void setHint(StringRef Name, Metadata *Arg) {
+ if (!Name.startswith(Prefix()))
+ return;
+ Name = Name.substr(Prefix().size(), StringRef::npos);
+
+ const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
+ if (!C) return;
+ unsigned Val = C->getZExtValue();
+
+ Hint *Hints[] = {&Width, &Interleave, &Force};
+ for (auto H : Hints) {
+ if (Name == H->Name) {
+ if (H->validate(Val))
+ H->Value = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
+ break;
+ }
+ }
+ }
+
+ /// Create a new hint from name / value pair.
+ MDNode *createHintMetadata(StringRef Name, unsigned V) const {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {MDString::get(Context, Name),
+ ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+ }
+
+ /// Matches metadata with hint name.
+ bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
+ MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
+ if (!Name)
+ return false;
+
+ for (auto H : HintTypes)
+ if (Name->getString().endswith(H.Name))
+ return true;
+ return false;
+ }
+
+ /// Sets current hints into loop metadata, keeping other values intact.
+ void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
+ if (HintTypes.size() == 0)
+ return;
+
+ // Reserve the first element to LoopID (see below).
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, then ignore the existing operands.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If node in update list, ignore old value.
+ if (!matchesHintMetadataName(Node, HintTypes))
+ MDs.push_back(Node);
+ }
+ }
+
+ // Now, add the missing hints.
+ for (auto H : HintTypes)
+ MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
+
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+
+ TheLoop->setLoopID(NewLoopID);
+ }
+
+ /// The loop these hints belong to.
+ const Loop *TheLoop;
+};
+
+static void emitAnalysisDiag(const Function *TheFunction, const Loop *TheLoop,
+ const LoopVectorizeHints &Hints,
+ const LoopAccessReport &Message) {
+ const char *Name = Hints.vectorizeAnalysisPassName();
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, Name);
+}
+
+static void emitMissedWarning(Function *F, Loop *L,
+ const LoopVectorizeHints &LH) {
+ emitOptimizationRemarkMissed(F->getContext(), LV_NAME, *F, L->getStartLoc(),
+ LH.emitRemark());
+
+ if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
+ if (LH.getWidth() != 1)
+ emitLoopVectorizeWarning(
+ F->getContext(), *F, L->getStartLoc(),
+ "failed explicitly specified loop vectorization");
+ else if (LH.getInterleave() != 1)
+ emitLoopInterleaveWarning(
+ F->getContext(), *F, L->getStartLoc(),
+ "failed explicitly specified loop interleaving");
+ }
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -793,87 +1181,17 @@ private:
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
- TargetLibraryInfo *TLI, AliasAnalysis *AA,
- Function *F, const TargetTransformInfo *TTI,
- LoopAccessAnalysis *LAA)
- : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
- TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
- Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
-
- /// This enum represents the kinds of inductions that we support.
- enum InductionKind {
- IK_NoInduction, ///< Not an induction variable.
- IK_IntInduction, ///< Integer induction variable. Step = C.
- IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
- };
-
- /// A struct for saving information about induction variables.
- struct InductionInfo {
- InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
- : StartValue(Start), IK(K), StepValue(Step) {
- assert(IK != IK_NoInduction && "Not an induction");
- assert(StartValue && "StartValue is null");
- assert(StepValue && !StepValue->isZero() && "StepValue is zero");
- assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
- "StartValue is not a pointer for pointer induction");
- assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
- "StartValue is not an integer for integer induction");
- assert(StepValue->getType()->isIntegerTy() &&
- "StepValue is not an integer");
- }
- InductionInfo()
- : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
-
- /// Get the consecutive direction. Returns:
- /// 0 - unknown or non-consecutive.
- /// 1 - consecutive and increasing.
- /// -1 - consecutive and decreasing.
- int getConsecutiveDirection() const {
- if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
- return StepValue->getSExtValue();
- return 0;
- }
-
- /// Compute the transformed value of Index at offset StartValue using step
- /// StepValue.
- /// For integer induction, returns StartValue + Index * StepValue.
- /// For pointer induction, returns StartValue[Index * StepValue].
- /// FIXME: The newly created binary instructions should contain nsw/nuw
- /// flags, which can be found from the original scalar operations.
- Value *transform(IRBuilder<> &B, Value *Index) const {
- switch (IK) {
- case IK_IntInduction:
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
- if (StepValue->isMinusOne())
- return B.CreateSub(StartValue, Index);
- if (!StepValue->isOne())
- Index = B.CreateMul(Index, StepValue);
- return B.CreateAdd(StartValue, Index);
-
- case IK_PtrInduction:
- assert(Index->getType() == StepValue->getType() &&
- "Index type does not match StepValue type");
- if (StepValue->isMinusOne())
- Index = B.CreateNeg(Index);
- else if (!StepValue->isOne())
- Index = B.CreateMul(Index, StepValue);
- return B.CreateGEP(nullptr, StartValue, Index);
-
- case IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
- }
-
- /// Start value.
- TrackingVH<Value> StartValue;
- /// Induction kind.
- InductionKind IK;
- /// Step value.
- ConstantInt *StepValue;
- };
+ LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ AliasAnalysis *AA, Function *F,
+ const TargetTransformInfo *TTI,
+ LoopAccessAnalysis *LAA,
+ LoopVectorizationRequirements *R,
+ const LoopVectorizeHints *H)
+ : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT),
+ Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+ Requirements(R), Hints(H) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
@@ -881,7 +1199,7 @@ public:
/// InductionList saves induction variables and maps them to the
/// induction descriptor.
- typedef MapVector<PHINode*, InductionInfo> InductionList;
+ typedef MapVector<PHINode*, InductionDescriptor> InductionList;
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
@@ -903,6 +1221,9 @@ public:
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
+ /// Returns True if PN is a reduction variable in this loop.
+ bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
+
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
bool blockNeedsPredication(BasicBlock *BB);
@@ -954,12 +1275,12 @@ public:
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
- return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
+ return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType);
}
/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
- return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
+ return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType);
}
/// Returns true if vector representation of the instruction \p I
/// requires mask.
@@ -999,10 +1320,6 @@ private:
/// and we know that we can read from them without segfault.
bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
- /// Returns the induction kind of Phi and record the step. This function may
- /// return NoInduction if the PHI is not an induction variable.
- InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
-
/// \brief Collect memory access with loop invariant strides.
///
/// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
@@ -1013,16 +1330,20 @@ private:
/// not vectorized. These are handled as LoopAccessReport rather than
/// VectorizationReport because the << operator of VectorizationReport returns
/// LoopAccessReport.
- void emitAnalysis(const LoopAccessReport &Message) {
- LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
+ void emitAnalysis(const LoopAccessReport &Message) const {
+ emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
}
unsigned NumPredStores;
/// The loop that we evaluate.
Loop *TheLoop;
- /// Scev analysis.
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
+ /// Applies dynamic knowledge to simplify SCEV expressions in the context
+ /// of existing SCEV assumptions. The analysis will also add a minimal set
+ /// of new predicates if this is required to enable vectorization and
+ /// unrolling.
+ PredicatedScalarEvolution &PSE;
/// Target Library Info.
TargetLibraryInfo *TLI;
/// Parent function
@@ -1065,12 +1386,18 @@ private:
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
+ /// Vectorization requirements that will go through late-evaluation.
+ LoopVectorizationRequirements *Requirements;
+
+ /// Used to emit an analysis of any legality issues.
+ const LoopVectorizeHints *Hints;
+
ValueToValueMap Strides;
SmallPtrSet<Value *, 8> StrideSet;
/// While vectorizing these instructions we have to generate a
/// call to the appropriate masked intrinsic
- SmallPtrSet<const Instruction*, 8> MaskedOp;
+ SmallPtrSet<const Instruction *, 8> MaskedOp;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1082,15 +1409,14 @@ private:
/// different operations.
class LoopVectorizationCostModel {
public:
- LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
- LoopVectorizationLegality *Legal,
+ LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI, AssumptionCache *AC,
- const Function *F, const LoopVectorizeHints *Hints)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
- TheFunction(F), Hints(Hints) {
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
- }
+ const TargetLibraryInfo *TLI, DemandedBits *DB,
+ AssumptionCache *AC, const Function *F,
+ const LoopVectorizeHints *Hints)
+ : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
+ AC(AC), TheFunction(F), Hints(Hints) {}
/// Information about vectorization costs
struct VectorizationFactor {
@@ -1103,10 +1429,10 @@ public:
/// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize);
- /// \return The size (in bits) of the widest type in the code that
- /// needs to be vectorized. We ignore values that remain scalar such as
+ /// \return The size (in bits) of the smallest and widest types in the code
+ /// that needs to be vectorized. We ignore values that remain scalar such as
/// 64 bit loop indices.
- unsigned getWidestType();
+ std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
/// \return The desired interleave count.
/// If interleave count has been specified by metadata it will be returned.
@@ -1133,8 +1459,13 @@ public:
unsigned NumInstructions;
};
- /// \return information about the register usage of the loop.
- RegisterUsage calculateRegisterUsage();
+ /// \return Returns information about the register usages of the loop for the
+ /// given vectorization factors.
+ SmallVector<RegisterUsage, 8>
+ calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
+
+ /// Collect values we want to ignore in the cost model.
+ void collectValuesToIgnore();
private:
/// Returns the expected execution cost. The unit of the cost does
@@ -1155,17 +1486,20 @@ private:
/// not vectorized. These are handled as LoopAccessReport rather than
/// VectorizationReport because the << operator of VectorizationReport returns
/// LoopAccessReport.
- void emitAnalysis(const LoopAccessReport &Message) {
- LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
+ void emitAnalysis(const LoopAccessReport &Message) const {
+ emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
}
- /// Values used only by @llvm.assume calls.
- SmallPtrSet<const Value *, 32> EphValues;
+public:
+ /// Map of scalar integer values to the smallest bitwidth they can be legally
+ /// represented as. The vector equivalents of these values should be truncated
+ /// to this type.
+ MapVector<Instruction*,uint64_t> MinBWs;
/// The loop that we evaluate.
Loop *TheLoop;
- /// Scev analysis.
- ScalarEvolution *SE;
+ /// Predicated scalar evolution analysis.
+ PredicatedScalarEvolution &PSE;
/// Loop Info analysis.
LoopInfo *LI;
/// Vectorization legality.
@@ -1174,247 +1508,78 @@ private:
const TargetTransformInfo &TTI;
/// Target Library Info.
const TargetLibraryInfo *TLI;
+ /// Demanded bits analysis.
+ DemandedBits *DB;
+ /// Assumption cache.
+ AssumptionCache *AC;
const Function *TheFunction;
- // Loop Vectorize Hint.
+ /// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
+ /// Values to ignore in the cost model.
+ SmallPtrSet<const Value *, 16> ValuesToIgnore;
+ /// Values to ignore in the cost model when VF > 1.
+ SmallPtrSet<const Value *, 16> VecValuesToIgnore;
};
-/// Utility class for getting and setting loop vectorizer hints in the form
-/// of loop metadata.
-/// This class keeps a number of loop annotations locally (as member variables)
-/// and can, upon request, write them back as metadata on the loop. It will
-/// initially scan the loop for existing metadata, and will update the local
-/// values based on information in the loop.
-/// We cannot write all values to metadata, as the mere presence of some info,
-/// for example 'force', means a decision has been made. So, we need to be
-/// careful NOT to add them if the user hasn't specifically asked so.
-class LoopVectorizeHints {
- enum HintKind {
- HK_WIDTH,
- HK_UNROLL,
- HK_FORCE
- };
-
- /// Hint - associates name and validation with the hint value.
- struct Hint {
- const char * Name;
- unsigned Value; // This may have to change for non-numeric values.
- HintKind Kind;
-
- Hint(const char * Name, unsigned Value, HintKind Kind)
- : Name(Name), Value(Value), Kind(Kind) { }
-
- bool validate(unsigned Val) {
- switch (Kind) {
- case HK_WIDTH:
- return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
- case HK_UNROLL:
- return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
- case HK_FORCE:
- return (Val <= 1);
- }
- return false;
- }
- };
-
- /// Vectorization width.
- Hint Width;
- /// Vectorization interleave factor.
- Hint Interleave;
- /// Vectorization forced
- Hint Force;
-
- /// Return the loop metadata prefix.
- static StringRef Prefix() { return "llvm.loop."; }
-
+/// \brief This holds vectorization requirements that must be verified late in
+/// the process. The requirements are set by legalize and costmodel. Once
+/// vectorization has been determined to be possible and profitable the
+/// requirements can be verified by looking for metadata or compiler options.
+/// For example, some loops require FP commutativity which is only allowed if
+/// vectorization is explicitly specified or if the fast-math compiler option
+/// has been provided.
+/// Late evaluation of these requirements allows helpful diagnostics to be
+/// composed that tells the user what need to be done to vectorize the loop. For
+/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
+/// evaluation should be used only when diagnostics can generated that can be
+/// followed by a non-expert user.
+class LoopVectorizationRequirements {
public:
- enum ForceKind {
- FK_Undefined = -1, ///< Not selected.
- FK_Disabled = 0, ///< Forcing disabled.
- FK_Enabled = 1, ///< Forcing enabled.
- };
-
- LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
- : Width("vectorize.width", VectorizerParams::VectorizationFactor,
- HK_WIDTH),
- Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
- Force("vectorize.enable", FK_Undefined, HK_FORCE),
- TheLoop(L) {
- // Populate values with existing loop metadata.
- getHintsFromMetadata();
-
- // force-vector-interleave overrides DisableInterleaving.
- if (VectorizerParams::isInterleaveForced())
- Interleave.Value = VectorizerParams::VectorizationInterleave;
-
- DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
- << "LV: Interleaving disabled by the pass manager\n");
- }
-
- /// Mark the loop L as already vectorized by setting the width to 1.
- void setAlreadyVectorized() {
- Width.Value = Interleave.Value = 1;
- Hint Hints[] = {Width, Interleave};
- writeHintsToMetadata(Hints);
- }
-
- /// Dumps all the hint information.
- std::string emitRemark() const {
- VectorizationReport R;
- if (Force.Value == LoopVectorizeHints::FK_Disabled)
- R << "vectorization is explicitly disabled";
- else {
- R << "use -Rpass-analysis=loop-vectorize for more info";
- if (Force.Value == LoopVectorizeHints::FK_Enabled) {
- R << " (Force=true";
- if (Width.Value != 0)
- R << ", Vector Width=" << Width.Value;
- if (Interleave.Value != 0)
- R << ", Interleave Count=" << Interleave.Value;
- R << ")";
- }
- }
-
- return R.str();
- }
-
- unsigned getWidth() const { return Width.Value; }
- unsigned getInterleave() const { return Interleave.Value; }
- enum ForceKind getForce() const { return (ForceKind)Force.Value; }
-
-private:
- /// Find hints specified in the loop metadata and update local values.
- void getHintsFromMetadata() {
- MDNode *LoopID = TheLoop->getLoopID();
- if (!LoopID)
- return;
-
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- const MDString *S = nullptr;
- SmallVector<Metadata *, 4> Args;
-
- // The expected hint is either a MDString or a MDNode with the first
- // operand a MDString.
- if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
- if (!MD || MD->getNumOperands() == 0)
- continue;
- S = dyn_cast<MDString>(MD->getOperand(0));
- for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
- Args.push_back(MD->getOperand(i));
- } else {
- S = dyn_cast<MDString>(LoopID->getOperand(i));
- assert(Args.size() == 0 && "too many arguments for MDString");
- }
-
- if (!S)
- continue;
-
- // Check if the hint starts with the loop metadata prefix.
- StringRef Name = S->getString();
- if (Args.size() == 1)
- setHint(Name, Args[0]);
+ LoopVectorizationRequirements()
+ : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr) {}
+
+ void addUnsafeAlgebraInst(Instruction *I) {
+ // First unsafe algebra instruction.
+ if (!UnsafeAlgebraInst)
+ UnsafeAlgebraInst = I;
+ }
+
+ void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
+
+ bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
+ const char *Name = Hints.vectorizeAnalysisPassName();
+ bool Failed = false;
+ if (UnsafeAlgebraInst && !Hints.allowReordering()) {
+ emitOptimizationRemarkAnalysisFPCommute(
+ F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(),
+ VectorizationReport() << "cannot prove it is safe to reorder "
+ "floating-point operations");
+ Failed = true;
}
- }
-
- /// Checks string hint with one operand and set value if valid.
- void setHint(StringRef Name, Metadata *Arg) {
- if (!Name.startswith(Prefix()))
- return;
- Name = Name.substr(Prefix().size(), StringRef::npos);
-
- const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
- if (!C) return;
- unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force};
- for (auto H : Hints) {
- if (Name == H->Name) {
- if (H->validate(Val))
- H->Value = Val;
- else
- DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
- break;
- }
+ // Test if runtime memcheck thresholds are exceeded.
+ bool PragmaThresholdReached =
+ NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+ bool ThresholdReached =
+ NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
+ if ((ThresholdReached && !Hints.allowReordering()) ||
+ PragmaThresholdReached) {
+ emitOptimizationRemarkAnalysisAliasing(
+ F->getContext(), Name, *F, L->getStartLoc(),
+ VectorizationReport()
+ << "cannot prove it is safe to reorder memory operations");
+ DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+ Failed = true;
}
- }
- /// Create a new hint from name / value pair.
- MDNode *createHintMetadata(StringRef Name, unsigned V) const {
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- Metadata *MDs[] = {MDString::get(Context, Name),
- ConstantAsMetadata::get(
- ConstantInt::get(Type::getInt32Ty(Context), V))};
- return MDNode::get(Context, MDs);
+ return Failed;
}
- /// Matches metadata with hint name.
- bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
- MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
- if (!Name)
- return false;
-
- for (auto H : HintTypes)
- if (Name->getString().endswith(H.Name))
- return true;
- return false;
- }
-
- /// Sets current hints into loop metadata, keeping other values intact.
- void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
- if (HintTypes.size() == 0)
- return;
-
- // Reserve the first element to LoopID (see below).
- SmallVector<Metadata *, 4> MDs(1);
- // If the loop already has metadata, then ignore the existing operands.
- MDNode *LoopID = TheLoop->getLoopID();
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
- // If node in update list, ignore old value.
- if (!matchesHintMetadataName(Node, HintTypes))
- MDs.push_back(Node);
- }
- }
-
- // Now, add the missing hints.
- for (auto H : HintTypes)
- MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
-
- // Replace current metadata node with new one.
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
-
- TheLoop->setLoopID(NewLoopID);
- }
-
- /// The loop these hints belong to.
- const Loop *TheLoop;
+private:
+ unsigned NumRuntimePointerChecks;
+ Instruction *UnsafeAlgebraInst;
};
-static void emitMissedWarning(Function *F, Loop *L,
- const LoopVectorizeHints &LH) {
- emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), LH.emitRemark());
-
- if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
- if (LH.getWidth() != 1)
- emitLoopVectorizeWarning(
- F->getContext(), *F, L->getStartLoc(),
- "failed explicitly specified loop vectorization");
- else if (LH.getInterleave() != 1)
- emitLoopInterleaveWarning(
- F->getContext(), *F, L->getStartLoc(),
- "failed explicitly specified loop interleaving");
- }
-}
-
static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
if (L.empty())
return V.push_back(&L);
@@ -1441,6 +1606,7 @@ struct LoopVectorize : public FunctionPass {
DominatorTree *DT;
BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
+ DemandedBits *DB;
AliasAnalysis *AA;
AssumptionCache *AC;
LoopAccessAnalysis *LAA;
@@ -1450,16 +1616,17 @@ struct LoopVectorize : public FunctionPass {
BlockFrequency ColdEntryFreq;
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- BFI = &getAnalysis<BlockFrequencyInfo>();
+ BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
LAA = &getAnalysis<LoopAccessAnalysis>();
+ DB = &getAnalysis<DemandedBits>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
@@ -1562,26 +1729,8 @@ struct LoopVectorize : public FunctionPass {
// less verbose reporting vectorized loops and unvectorized loops that may
// benefit from vectorization, respectively.
- if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
- DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
- emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), Hints.emitRemark());
- return false;
- }
-
- if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
- DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
- emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), Hints.emitRemark());
- return false;
- }
-
- if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) {
- DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "loop not vectorized: vector width and interleave count are "
- "explicitly set to 1");
+ if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
+ DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
return false;
}
@@ -1595,15 +1744,19 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "vectorization is not beneficial and is not explicitly forced");
+ emitAnalysisDiag(F, L, Hints, VectorizationReport()
+ << "vectorization is not beneficial "
+ "and is not explicitly forced");
return false;
}
}
+ PredicatedScalarEvolution PSE(*SE);
+
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
+ LoopVectorizationRequirements Requirements;
+ LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA,
+ &Requirements, &Hints);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
@@ -1611,16 +1764,18 @@ struct LoopVectorize : public FunctionPass {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
+ LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
+ &Hints);
+ CM.collectValuesToIgnore();
// Check the function attributes to find out if this function should be
// optimized for size.
bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- F->hasFnAttribute(Attribute::OptimizeForSize);
+ F->optForSize();
// Compute the weighted frequency of this loop being executed and see if it
// is less than 20% of the function entry baseline frequency. Note that we
- // always have a canonical loop here because we think we *can* vectoriez.
+ // always have a canonical loop here because we think we *can* vectorize.
// FIXME: This is hidden behind a flag due to pervasive problems with
// exactly what block frequency models.
if (LoopVectorizeWithBlockFrequency) {
@@ -1630,16 +1785,17 @@ struct LoopVectorize : public FunctionPass {
OptForSize = true;
}
- // Check the function attributes to see if implicit floats are allowed.a
+ // Check the function attributes to see if implicit floats are allowed.
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
// vector instructions?
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "loop not vectorized due to NoImplicitFloat attribute");
+ emitAnalysisDiag(
+ F, L, Hints,
+ VectorizationReport()
+ << "loop not vectorized due to NoImplicitFloat attribute");
emitMissedWarning(F, L, Hints);
return false;
}
@@ -1651,32 +1807,86 @@ struct LoopVectorize : public FunctionPass {
// Select the interleave count.
unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
- DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
- << DebugLocStr << '\n');
- DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+ // Get user interleave count.
+ unsigned UserIC = Hints.getInterleave();
+
+ // Identify the diagnostic messages that should be produced.
+ std::string VecDiagMsg, IntDiagMsg;
+ bool VectorizeLoop = true, InterleaveLoop = true;
+
+ if (Requirements.doesNotMeet(F, L, Hints)) {
+ DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
+ "requirements.\n");
+ emitMissedWarning(F, L, Hints);
+ return false;
+ }
if (VF.Width == 1) {
- DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ VecDiagMsg =
+ "the cost-model indicates that vectorization is not beneficial";
+ VectorizeLoop = false;
+ }
- if (IC == 1) {
- emitOptimizationRemarkAnalysis(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- "not beneficial to vectorize and user disabled interleaving");
- return false;
- }
- DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
+ if (IC == 1 && UserIC <= 1) {
+ // Tell the user interleaving is not beneficial.
+ DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
+ IntDiagMsg =
+ "the cost-model indicates that interleaving is not beneficial";
+ InterleaveLoop = false;
+ if (UserIC == 1)
+ IntDiagMsg +=
+ " and is explicitly disabled or interleave count is set to 1";
+ } else if (IC > 1 && UserIC == 1) {
+ // Tell the user interleaving is beneficial, but it explicitly disabled.
+ DEBUG(dbgs()
+ << "LV: Interleaving is beneficial but is explicitly disabled.");
+ IntDiagMsg = "the cost-model indicates that interleaving is beneficial "
+ "but is explicitly disabled or interleave count is set to 1";
+ InterleaveLoop = false;
+ }
- // Report the unrolling decision.
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- Twine("interleaved by " + Twine(IC) +
- " (vectorization not beneficial)"));
+ // Override IC if user provided an interleave count.
+ IC = UserIC > 0 ? UserIC : IC;
+
+ // Emit diagnostic messages, if any.
+ const char *VAPassName = Hints.vectorizeAnalysisPassName();
+ if (!VectorizeLoop && !InterleaveLoop) {
+ // Do not vectorize or interleaving the loop.
+ emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
+ L->getStartLoc(), VecDiagMsg);
+ emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
+ L->getStartLoc(), IntDiagMsg);
+ return false;
+ } else if (!VectorizeLoop && InterleaveLoop) {
+ DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+ emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
+ L->getStartLoc(), VecDiagMsg);
+ } else if (VectorizeLoop && !InterleaveLoop) {
+ DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+ << DebugLocStr << '\n');
+ emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
+ L->getStartLoc(), IntDiagMsg);
+ } else if (VectorizeLoop && InterleaveLoop) {
+ DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+ << DebugLocStr << '\n');
+ DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+ }
+
+ if (!VectorizeLoop) {
+ assert(IC > 1 && "interleave count should not be 1 or 0");
+ // If we decided that it is not legal to vectorize the loop then
+ // interleave it.
+ InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC);
+ Unroller.vectorize(&LVL, CM.MinBWs);
- InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC);
- Unroller.vectorize(&LVL);
+ emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
+ Twine("interleaved loop (interleaved count: ") +
+ Twine(IC) + ")");
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC);
- LB.vectorize(&LVL);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC);
+ LB.vectorize(&LVL, CM.MinBWs);
++LoopsVectorized;
// Add metadata to disable runtime unrolling scalar loop when there's no
@@ -1686,7 +1896,7 @@ struct LoopVectorize : public FunctionPass {
AddRuntimeUnrollDisableMetaData(L);
// Report the vectorization decision.
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
Twine("vectorized loop (vectorization width: ") +
Twine(VF.Width) + ", interleaved count: " +
Twine(IC) + ")");
@@ -1703,16 +1913,19 @@ struct LoopVectorize : public FunctionPass {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<BlockFrequencyInfo>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<DemandedBits>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
@@ -1773,6 +1986,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
+ auto *SE = PSE.getSE();
// Make sure that the pointer does not point to structs.
if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
@@ -1780,11 +1994,11 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// If this value is a pointer induction variable we know it is consecutive.
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) {
- InductionInfo II = Inductions[Phi];
+ InductionDescriptor II = Inductions[Phi];
return II.getConsecutiveDirection();
}
- GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
+ GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (!Gep)
return 0;
@@ -1802,10 +2016,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// Make sure that all of the index operands are loop invariant.
for (unsigned i = 1; i < NumOperands; ++i)
- if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
- InductionInfo II = Inductions[Phi];
+ InductionDescriptor II = Inductions[Phi];
return II.getConsecutiveDirection();
}
@@ -1815,14 +2029,14 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// operand.
for (unsigned i = 0; i != NumOperands; ++i)
if (i != InductionOperand &&
- !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
const SCEV *Last = nullptr;
if (!Strides.count(Gep))
- Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+ Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
else {
// Because of the multiplication by a stride we can have a s/zext cast.
// We are going to replace this stride by 1 so the cast is safe to ignore.
@@ -1833,7 +2047,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// %idxprom = zext i32 %mul to i64 << Safe cast.
// %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
//
- Last = replaceSymbolicStrideSCEV(SE, Strides,
+ Last = replaceSymbolicStrideSCEV(PSE, Strides,
Gep->getOperand(InductionOperand), Gep);
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
Last =
@@ -2177,7 +2391,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
VectorParts &Entry = WidenMap.get(Instr);
// Handle consecutive loads/stores.
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
setDebugLocFromInst(Builder, Gep);
Value *PtrOperand = Gep->getPointerOperand();
@@ -2191,8 +2405,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
setDebugLocFromInst(Builder, Gep);
- assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
- OrigLoop) && "Base ptr must be invariant");
+ assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+ OrigLoop) &&
+ "Base ptr must be invariant");
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
@@ -2209,7 +2424,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (i == InductionOperand ||
(GepOperandInst && OrigLoop->contains(GepOperandInst))) {
assert((i == InductionOperand ||
- SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
+ PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
+ OrigLoop)) &&
"Must be last index or loop invariant");
VectorParts &GEPParts = getVectorValue(GepOperand);
@@ -2237,14 +2453,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't use a reference type for "StoredVal".
VectorParts StoredVal = getVectorValue(SI->getValueOperand());
-
+
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr =
Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
- // If we store to reverse consecutive memory locations then we need
+ // If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal[Part] = reverseVector(StoredVal[Part]);
// If the address is consecutive but reversed, then the
@@ -2298,7 +2514,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
}
}
-void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) {
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
+ bool IfPredicateStore) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
@@ -2318,7 +2535,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
// Try using previously calculated values.
Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
- // If the src is an instruction that appeared earlier in the basic block
+ // If the src is an instruction that appeared earlier in the basic block,
// then it should already be vectorized.
if (SrcInst && OrigLoop->contains(SrcInst)) {
assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
@@ -2343,19 +2560,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- Instruction *InsertPt = Builder.GetInsertPoint();
- BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = nullptr;
-
VectorParts Cond;
- Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
- VectorLp = LI->getLoopFor(IfBlock);
- assert(VectorLp && "Must have a loop for this block");
}
// For each vector unroll 'part':
@@ -2367,12 +2577,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
Value *Cmp = nullptr;
if (IfPredicateStore) {
Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
- Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, *LI);
- // Update Builder with newly created basic block.
- Builder.SetInsertPoint(InsertPt);
+ Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
+ ConstantInt::get(Cmp->getType(), 1));
}
Instruction *Cloned = Instr->clone();
@@ -2396,85 +2602,223 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
Builder.getInt32(Width));
// End if-block.
- if (IfPredicateStore) {
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
- Builder.SetInsertPoint(InsertPt);
- ReplaceInstWithInst(IfBlock->getTerminator(),
- BranchInst::Create(CondBlock, NewIfBlock, Cmp));
- IfBlock = NewIfBlock;
- }
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+ Cmp));
}
}
}
-static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
- Instruction *Loc) {
- if (FirstInst)
- return FirstInst;
- if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == Loc->getParent() ? I : nullptr;
- return nullptr;
+PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
+ Value *End, Value *Step,
+ Instruction *DL) {
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // As we're just creating this loop, it's possible no latch exists
+ // yet. If so, use the header as this will be a single block loop.
+ if (!Latch)
+ Latch = Header;
+
+ IRBuilder<> Builder(&*Header->getFirstInsertionPt());
+ setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
+ auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index");
+
+ Builder.SetInsertPoint(Latch->getTerminator());
+
+ // Create i+1 and fill the PHINode.
+ Value *Next = Builder.CreateAdd(Induction, Step, "index.next");
+ Induction->addIncoming(Start, L->getLoopPreheader());
+ Induction->addIncoming(Next, Latch);
+ // Create the compare.
+ Value *ICmp = Builder.CreateICmpEQ(Next, End);
+ Builder.CreateCondBr(ICmp, L->getExitBlock(), Header);
+
+ // Now we have two terminators. Remove the old one from the block.
+ Latch->getTerminator()->eraseFromParent();
+
+ return Induction;
}
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
- Instruction *tnullptr = nullptr;
- if (!Legal->mustCheckStrides())
- return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
-
- IRBuilder<> ChkBuilder(Loc);
-
- // Emit checks.
- Value *Check = nullptr;
- Instruction *FirstInst = nullptr;
- for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(),
- SE = Legal->strides_end();
- SI != SE; ++SI) {
- Value *Ptr = stripIntegerCast(*SI);
- Value *C = ChkBuilder.CreateICmpNE(Ptr, ConstantInt::get(Ptr->getType(), 1),
- "stride.chk");
- // Store the first instruction we create.
- FirstInst = getFirstInst(FirstInst, C, Loc);
- if (Check)
- Check = ChkBuilder.CreateOr(Check, C);
- else
- Check = C;
- }
+Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
+ if (TripCount)
+ return TripCount;
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- LLVMContext &Ctx = Loc->getContext();
- Instruction *TheCheck =
- BinaryOperator::CreateAnd(Check, ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(TheCheck, "stride.not.one");
- FirstInst = getFirstInst(FirstInst, TheCheck, Loc);
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ // Find the loop boundaries.
+ ScalarEvolution *SE = PSE.getSE();
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
+ assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
+ "Invalid loop count");
- return std::make_pair(FirstInst, TheCheck);
+ Type *IdxTy = Legal->getWidestInductionType();
+
+ // The exit count might have the type of i64 while the phi is i32. This can
+ // happen if we have an induction variable that is sign extended before the
+ // compare. The only way that we get a backedge taken count is that the
+ // induction variable was signed and as such will not overflow. In such a case
+ // truncation is legal.
+ if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() >
+ IdxTy->getPrimitiveSizeInBits())
+ BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy);
+ BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy);
+
+ // Get the total trip count from the count by adding 1.
+ const SCEV *ExitCount = SE->getAddExpr(
+ BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
+
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, DL, "induction");
+
+ // Count holds the overall loop count (N).
+ TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ L->getLoopPreheader()->getTerminator());
+
+ if (TripCount->getType()->isPointerTy())
+ TripCount =
+ CastInst::CreatePointerCast(TripCount, IdxTy,
+ "exitcount.ptrcnt.to.int",
+ L->getLoopPreheader()->getTerminator());
+
+ return TripCount;
}
+Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
+ if (VectorTripCount)
+ return VectorTripCount;
+
+ Value *TC = getOrCreateTripCount(L);
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+
+ // Now we need to generate the expression for N - (N % VF), which is
+ // the part that the vectorized body will execute.
+ // The loop step is equal to the vectorization factor (num of SIMD elements)
+ // times the unroll factor (num of SIMD instructions).
+ Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
+ Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
+ VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
+
+ return VectorTripCount;
+}
+
+void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
+ BasicBlock *Bypass) {
+ Value *Count = getOrCreateTripCount(L);
+ BasicBlock *BB = L->getLoopPreheader();
+ IRBuilder<> Builder(BB->getTerminator());
+
+ // Generate code to check that the loop's trip count that we computed by
+ // adding one to the backedge-taken count will not overflow.
+ Value *CheckMinIters =
+ Builder.CreateICmpULT(Count,
+ ConstantInt::get(Count->getType(), VF * UF),
+ "min.iters.check");
+
+ BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
+ "min.iters.checked");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, CheckMinIters));
+ LoopBypassBlocks.push_back(BB);
+}
+
+void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
+ BasicBlock *Bypass) {
+ Value *TC = getOrCreateVectorTripCount(L);
+ BasicBlock *BB = L->getLoopPreheader();
+ IRBuilder<> Builder(BB->getTerminator());
+
+ // Now, compare the new count to zero. If it is zero skip the vector loop and
+ // jump to the scalar loop.
+ Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
+ "cmp.zero");
+
+ // Generate code to check that the loop's trip count that we computed by
+ // adding one to the backedge-taken count will not overflow.
+ BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
+ "vector.ph");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, Cmp));
+ LoopBypassBlocks.push_back(BB);
+}
+
+void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
+ BasicBlock *BB = L->getLoopPreheader();
+
+ // Generate the code to check that the SCEV assumptions that we made.
+ // We want the new basic block to start at the first instruction in a
+ // sequence of instructions that form a check.
+ SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
+ "scev.check");
+ Value *SCEVCheck =
+ Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
+
+ if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
+ if (C->isZero())
+ return;
+
+ // Create a new block containing the stride check.
+ BB->setName("vector.scevcheck");
+ auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, SCEVCheck));
+ LoopBypassBlocks.push_back(BB);
+ AddedSafetyChecks = true;
+}
+
+void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
+ BasicBlock *Bypass) {
+ BasicBlock *BB = L->getLoopPreheader();
+
+ // Generate the code that checks in runtime if arrays overlap. We put the
+ // checks into a separate block to make the more common case of few elements
+ // faster.
+ Instruction *FirstCheckInst;
+ Instruction *MemRuntimeCheck;
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ Legal->getLAI()->addRuntimeChecks(BB->getTerminator());
+ if (!MemRuntimeCheck)
+ return;
+
+ // Create a new block containing the memory check.
+ BB->setName("vector.memcheck");
+ auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+ if (L->getParentLoop())
+ L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+ ReplaceInstWithInst(BB->getTerminator(),
+ BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
+ LoopBypassBlocks.push_back(BB);
+ AddedSafetyChecks = true;
+}
+
+
void InnerLoopVectorizer::createEmptyLoop() {
/*
In this function we generate a new loop. The new loop will contain
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- Back-edge taken count overflow check.
+ [ ] <-- loop iteration number check.
/ |
/ v
| [ ] <-- vector loop bypass (may consist of multiple blocks).
| / |
| / v
|| [ ] <-- vector pre header.
- || |
- || v
- || [ ] \
- || [ ]_| <-- vector loop.
- || |
- | \ v
- | >[ ] <--- middle-block.
+ |/ |
+ | v
+ | [ ] \
+ | [ ]_| <-- vector loop.
+ | |
+ | v
+ | -[ ] <--- middle-block.
| / |
| / v
-|- >[ ] <--- new preheader.
@@ -2498,65 +2842,16 @@ void InnerLoopVectorizer::createEmptyLoop() {
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
+ //
+ // We try to obtain an induction variable from the original loop as hard
+ // as possible. However if we don't find one that:
+ // - is an integer
+ // - counts from zero, stepping by one
+ // - is the size of the widest induction variable type
+ // then we create a new one.
OldInduction = Legal->getInduction();
Type *IdxTy = Legal->getWidestInductionType();
- // Find the loop boundaries.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
- assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
-
- // The exit count might have the type of i64 while the phi is i32. This can
- // happen if we have an induction variable that is sign extended before the
- // compare. The only way that we get a backedge taken count is that the
- // induction variable was signed and as such will not overflow. In such a case
- // truncation is legal.
- if (ExitCount->getType()->getPrimitiveSizeInBits() >
- IdxTy->getPrimitiveSizeInBits())
- ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
-
- const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
- // Get the total trip count from the count by adding 1.
- ExitCount = SE->getAddExpr(BackedgeTakeCount,
- SE->getConstant(BackedgeTakeCount->getType(), 1));
-
- const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
-
- // Expand the trip count and place the new instructions in the preheader.
- // Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*SE, DL, "induction");
-
- // We need to test whether the backedge-taken count is uint##_max. Adding one
- // to it will cause overflow and an incorrect loop trip count in the vector
- // body. In case of overflow we want to directly jump to the scalar remainder
- // loop.
- Value *BackedgeCount =
- Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
- VectorPH->getTerminator());
- if (BackedgeCount->getType()->isPointerTy())
- BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
- "backedge.ptrcnt.to.int",
- VectorPH->getTerminator());
- Instruction *CheckBCOverflow =
- CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
- Constant::getAllOnesValue(BackedgeCount->getType()),
- "backedge.overflow", VectorPH->getTerminator());
-
- // The loop index does not have to start at Zero. Find the original start
- // value from the induction PHI node. If we don't have an induction variable
- // then we know that it starts at zero.
- Builder.SetInsertPoint(VectorPH->getTerminator());
- Value *StartIdx = ExtendedIdx =
- OldInduction
- ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH),
- IdxTy)
- : ConstantInt::get(IdxTy, 0);
-
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- VectorPH->getTerminator());
-
- LoopBypassBlocks.push_back(VectorPH);
-
// Split the single block loop into the two loop structure described above.
BasicBlock *VecBody =
VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
@@ -2580,118 +2875,36 @@ void InnerLoopVectorizer::createEmptyLoop() {
}
Lp->addBasicBlockToLoop(VecBody, *LI);
- // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
- // inside the loop.
- Builder.SetInsertPoint(VecBody->getFirstNonPHI());
-
- // Generate the induction variable.
- setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
- Induction = Builder.CreatePHI(IdxTy, 2, "index");
- // The loop step is equal to the vectorization factor (num of SIMD elements)
- // times the unroll factor (num of SIMD instructions).
- Constant *Step = ConstantInt::get(IdxTy, VF * UF);
-
- // Generate code to check that the loop's trip count that we computed by
- // adding one to the backedge-taken count will not overflow.
- BasicBlock *NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- ReplaceInstWithInst(
- VectorPH->getTerminator(),
- BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
- VectorPH = NewVectorPH;
-
- // This is the IR builder that we use to add all of the logic for bypassing
- // the new vector loop.
- IRBuilder<> BypassBuilder(VectorPH->getTerminator());
- setDebugLocFromInst(BypassBuilder,
- getDebugLocFromInstOrOperands(OldInduction));
-
- // We may need to extend the index in case there is a type mismatch.
- // We know that the count starts at zero and does not overflow.
- if (Count->getType() != IdxTy) {
- // The exit count can be of pointer type. Convert it to the correct
- // integer type.
- if (ExitCount->getType()->isPointerTy())
- Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
- else
- Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
- }
-
- // Add the start index to the loop count to get the new end index.
- Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
+ // Find the loop boundaries.
+ Value *Count = getOrCreateTripCount(Lp);
- // Now we need to generate the expression for N - (N % VF), which is
- // the part that the vectorized body will execute.
- Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
- Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
- Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
- "end.idx.rnd.down");
+ Value *StartIdx = ConstantInt::get(IdxTy, 0);
+ // We need to test whether the backedge-taken count is uint##_max. Adding one
+ // to it will cause overflow and an incorrect loop trip count in the vector
+ // body. In case of overflow we want to directly jump to the scalar remainder
+ // loop.
+ emitMinimumIterationCountCheck(Lp, ScalarPH);
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
- Value *Cmp =
- BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
- NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- LoopBypassBlocks.push_back(VectorPH);
- ReplaceInstWithInst(VectorPH->getTerminator(),
- BranchInst::Create(MiddleBlock, NewVectorPH, Cmp));
- VectorPH = NewVectorPH;
-
- // Generate the code to check that the strides we assumed to be one are really
- // one. We want the new basic block to start at the first instruction in a
- // sequence of instructions that form a check.
- Instruction *StrideCheck;
- Instruction *FirstCheckInst;
- std::tie(FirstCheckInst, StrideCheck) =
- addStrideCheck(VectorPH->getTerminator());
- if (StrideCheck) {
- AddedSafetyChecks = true;
- // Create a new block containing the stride check.
- VectorPH->setName("vector.stridecheck");
- NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- LoopBypassBlocks.push_back(VectorPH);
-
- // Replace the branch into the memory check block with a conditional branch
- // for the "few elements case".
- ReplaceInstWithInst(
- VectorPH->getTerminator(),
- BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck));
-
- VectorPH = NewVectorPH;
- }
+ emitVectorLoopEnteredCheck(Lp, ScalarPH);
+ // Generate the code to check any assumptions that we've made for SCEV
+ // expressions.
+ emitSCEVChecks(Lp, ScalarPH);
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- Instruction *MemRuntimeCheck;
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator());
- if (MemRuntimeCheck) {
- AddedSafetyChecks = true;
- // Create a new block containing the memory check.
- VectorPH->setName("vector.memcheck");
- NewVectorPH =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
- LoopBypassBlocks.push_back(VectorPH);
-
- // Replace the branch into the memory check block with a conditional branch
- // for the "few elements case".
- ReplaceInstWithInst(
- VectorPH->getTerminator(),
- BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck));
-
- VectorPH = NewVectorPH;
- }
+ emitMemRuntimeChecks(Lp, ScalarPH);
+
+ // Generate the induction variable.
+ // The loop step is equal to the vectorization factor (num of SIMD elements)
+ // times the unroll factor (num of SIMD instructions).
+ Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
+ Constant *Step = ConstantInt::get(IdxTy, VF * UF);
+ Induction =
+ createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+ getDebugLocFromInstOrOperands(OldInduction));
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
@@ -2701,152 +2914,60 @@ void InnerLoopVectorizer::createEmptyLoop() {
// If we come from a bypass edge then we need to start from the original
// start value.
- // This variable saves the new starting index for the scalar loop.
- PHINode *ResumeIndex = nullptr;
+ // This variable saves the new starting index for the scalar loop. It is used
+ // to test if there are any tail iterations left once the vector loop has
+ // completed.
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
- // Set builder to point to last bypass block.
- BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
- LoopVectorizationLegality::InductionInfo II = I->second;
-
- Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
- PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
- MiddleBlock->getTerminator());
- // We might have extended the type of the induction variable but we need a
- // truncated version for the scalar loop.
- PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
- PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
- MiddleBlock->getTerminator()) : nullptr;
+ InductionDescriptor II = I->second;
// Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val",
+ PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3,
+ "bc.resume.val",
ScalarPH->getTerminator());
- BCResumeVal->addIncoming(ResumeVal, MiddleBlock);
-
- PHINode *BCTruncResumeVal = nullptr;
+ Value *EndValue;
if (OrigPhi == OldInduction) {
- BCTruncResumeVal =
- PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val",
- ScalarPH->getTerminator());
- BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock);
- }
-
- Value *EndValue = nullptr;
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- // Handle the integer induction counter.
- assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
-
- // We have the canonical induction variable.
- if (OrigPhi == OldInduction) {
- // Create a truncated version of the resume value for the scalar loop,
- // we might have promoted the type to a larger width.
- EndValue =
- BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
- // The new PHI merges the original incoming value, in case of a bypass,
- // or the value at the end of the vectorized loop.
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
- TruncResumeVal->addIncoming(EndValue, VecBody);
-
- BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
-
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
- break;
- }
-
- // Not the canonical induction variable - add the vector loop count to the
- // start value.
- Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
- II.StartValue->getType(),
- "cast.crd");
- EndValue = II.transform(BypassBuilder, CRD);
+ // We know what the end value is.
+ EndValue = CountRoundDown;
+ } else {
+ IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
+ Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
+ II.getStepValue()->getType(),
+ "cast.crd");
+ EndValue = II.transform(B, CRD);
EndValue->setName("ind.end");
- break;
}
- case LoopVectorizationLegality::IK_PtrInduction: {
- Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
- II.StepValue->getType(),
- "cast.crd");
- EndValue = II.transform(BypassBuilder, CRD);
- EndValue->setName("ptr.ind.end");
- break;
- }
- }// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) {
- if (OrigPhi == OldInduction)
- ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
- else
- ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
- }
- ResumeVal->addIncoming(EndValue, VecBody);
+ BCResumeVal->addIncoming(EndValue, MiddleBlock);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
// The old induction's phi node in the scalar body needs the truncated
// value.
- if (OrigPhi == OldInduction) {
- BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]);
- OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal);
- } else {
- BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
- OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
- }
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ BCResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]);
+ OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
}
- // If we are generating a new induction variable then we also need to
- // generate the code that calculates the exit value. This value is not
- // simply the end of the counter because we may skip the vectorized body
- // in case of a runtime check.
- if (!OldInduction){
- assert(!ResumeIndex && "Unexpected resume value found");
- ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
- MiddleBlock->getTerminator());
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
- ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
- }
-
- // Make sure that we found the index where scalar loop needs to continue.
- assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
- "Invalid resume Index");
-
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
- Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
- ResumeIndex, "cmp.n",
+ Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n",
MiddleBlock->getTerminator());
ReplaceInstWithInst(MiddleBlock->getTerminator(),
BranchInst::Create(ExitBlock, ScalarPH, CmpN));
- // Create i+1 and fill the PHINode.
- Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
- Induction->addIncoming(StartIdx, VectorPH);
- Induction->addIncoming(NextIdx, VecBody);
- // Create the compare.
- Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
- Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
-
- // Now we have two terminators. Remove the old one from the block.
- VecBody->getTerminator()->eraseFromParent();
-
// Get ready to start creating new instructions into the vectorized body.
- Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+ Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
// Save the state.
- LoopVectorPreHeader = VectorPH;
+ LoopVectorPreHeader = Lp->getLoopPreheader();
LoopScalarPreHeader = ScalarPH;
LoopMiddleBlock = MiddleBlock;
LoopExitBlock = ExitBlock;
@@ -2899,7 +3020,7 @@ static void cse(SmallVector<BasicBlock *, 4> &BBs) {
for (unsigned i = 0, e = BBs.size(); i != e; ++i) {
BasicBlock *BB = BBs[i];
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *In = I++;
+ Instruction *In = &*I++;
if (!CSEDenseMapInfo::canHandle(In))
continue;
@@ -3021,6 +3142,117 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
}
+static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
+ IntegerType *I1 = cast<IntegerType>(T1->getVectorElementType());
+ IntegerType *I2 = cast<IntegerType>(T2->getVectorElementType());
+ return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2;
+}
+static Type *largestIntegerVectorType(Type *T1, Type *T2) {
+ IntegerType *I1 = cast<IntegerType>(T1->getVectorElementType());
+ IntegerType *I2 = cast<IntegerType>(T2->getVectorElementType());
+ return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
+}
+
+void InnerLoopVectorizer::truncateToMinimalBitwidths() {
+ // For every instruction `I` in MinBWs, truncate the operands, create a
+ // truncated version of `I` and reextend its result. InstCombine runs
+ // later and will remove any ext/trunc pairs.
+ //
+ for (auto &KV : MinBWs) {
+ VectorParts &Parts = WidenMap.get(KV.first);
+ for (Value *&I : Parts) {
+ if (I->use_empty())
+ continue;
+ Type *OriginalTy = I->getType();
+ Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(),
+ KV.second);
+ Type *TruncatedTy = VectorType::get(ScalarTruncatedTy,
+ OriginalTy->getVectorNumElements());
+ if (TruncatedTy == OriginalTy)
+ continue;
+
+ IRBuilder<> B(cast<Instruction>(I));
+ auto ShrinkOperand = [&](Value *V) -> Value* {
+ if (auto *ZI = dyn_cast<ZExtInst>(V))
+ if (ZI->getSrcTy() == TruncatedTy)
+ return ZI->getOperand(0);
+ return B.CreateZExtOrTrunc(V, TruncatedTy);
+ };
+
+ // The actual instruction modification depends on the instruction type,
+ // unfortunately.
+ Value *NewI = nullptr;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ NewI = B.CreateBinOp(BO->getOpcode(),
+ ShrinkOperand(BO->getOperand(0)),
+ ShrinkOperand(BO->getOperand(1)));
+ cast<BinaryOperator>(NewI)->copyIRFlags(I);
+ } else if (ICmpInst *CI = dyn_cast<ICmpInst>(I)) {
+ NewI = B.CreateICmp(CI->getPredicate(),
+ ShrinkOperand(CI->getOperand(0)),
+ ShrinkOperand(CI->getOperand(1)));
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ NewI = B.CreateSelect(SI->getCondition(),
+ ShrinkOperand(SI->getTrueValue()),
+ ShrinkOperand(SI->getFalseValue()));
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ switch (CI->getOpcode()) {
+ default: llvm_unreachable("Unhandled cast!");
+ case Instruction::Trunc:
+ NewI = ShrinkOperand(CI->getOperand(0));
+ break;
+ case Instruction::SExt:
+ NewI = B.CreateSExtOrTrunc(CI->getOperand(0),
+ smallestIntegerVectorType(OriginalTy,
+ TruncatedTy));
+ break;
+ case Instruction::ZExt:
+ NewI = B.CreateZExtOrTrunc(CI->getOperand(0),
+ smallestIntegerVectorType(OriginalTy,
+ TruncatedTy));
+ break;
+ }
+ } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+ auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements();
+ auto *O0 =
+ B.CreateZExtOrTrunc(SI->getOperand(0),
+ VectorType::get(ScalarTruncatedTy, Elements0));
+ auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements();
+ auto *O1 =
+ B.CreateZExtOrTrunc(SI->getOperand(1),
+ VectorType::get(ScalarTruncatedTy, Elements1));
+
+ NewI = B.CreateShuffleVector(O0, O1, SI->getMask());
+ } else if (isa<LoadInst>(I)) {
+ // Don't do anything with the operands, just extend the result.
+ continue;
+ } else {
+ llvm_unreachable("Unhandled instruction type!");
+ }
+
+ // Lastly, extend the result.
+ NewI->takeName(cast<Instruction>(I));
+ Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy);
+ I->replaceAllUsesWith(Res);
+ cast<Instruction>(I)->eraseFromParent();
+ I = Res;
+ }
+ }
+
+ // We'll have created a bunch of ZExts that are now parentless. Clean up.
+ for (auto &KV : MinBWs) {
+ VectorParts &Parts = WidenMap.get(KV.first);
+ for (Value *&I : Parts) {
+ ZExtInst *Inst = dyn_cast<ZExtInst>(I);
+ if (Inst && Inst->use_empty()) {
+ Value *NewI = Inst->getOperand(0);
+ Inst->eraseFromParent();
+ I = NewI;
+ }
+ }
+ }
+}
+
void InnerLoopVectorizer::vectorizeLoop() {
//===------------------------------------------------===//
//
@@ -3051,6 +3283,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
be = DFS.endRPO(); bb != be; ++bb)
vectorizeBlockInLoop(*bb, &RdxPHIsToFix);
+ // Insert truncates and extends for any truncated instructions as hints to
+ // InstCombine.
+ if (VF > 1)
+ truncateToMinimalBitwidths();
+
// At this point every instruction in the original loop is widened to
// a vector form. We are almost done. Now, we need to fix the PHI nodes
// that we vectorized. The PHI nodes are currently empty because we did
@@ -3066,7 +3303,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
assert(RdxPhi && "Unable to recover vectorized PHI");
// Find the reduction variable descriptor.
- assert(Legal->getReductionVars()->count(RdxPhi) &&
+ assert(Legal->isReductionVariable(RdxPhi) &&
"Unable to find the reduction variable");
RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
@@ -3141,21 +3378,33 @@ void InnerLoopVectorizer::vectorizeLoop() {
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
// instructions.
- Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- VectorParts RdxParts;
+ VectorParts RdxParts = getVectorValue(LoopExitInst);
setDebugLocFromInst(Builder, LoopExitInst);
- for (unsigned part = 0; part < UF; ++part) {
- // This PHINode contains the vectorized reduction variable, or
- // the initial value vector, if we bypass the vector loop.
- VectorParts &RdxExitVal = getVectorValue(LoopExitInst);
- PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
- Value *StartVal = (part == 0) ? VectorStart : Identity;
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
- NewPhi->addIncoming(RdxExitVal[part],
- LoopVectorBody.back());
- RdxParts.push_back(NewPhi);
+
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ if (VF > 1 && RdxPhi->getType() != RdxDesc.getRecurrenceType()) {
+ Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+ Builder.SetInsertPoint(LoopVectorBody.back()->getTerminator());
+ for (unsigned part = 0; part < UF; ++part) {
+ Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
+ : Builder.CreateZExt(Trunc, VecTy);
+ for (Value::user_iterator UI = RdxParts[part]->user_begin();
+ UI != RdxParts[part]->user_end();)
+ if (*UI != Trunc) {
+ (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
+ RdxParts[part] = Extnd;
+ } else {
+ ++UI;
+ }
+ }
+ Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+ for (unsigned part = 0; part < UF; ++part)
+ RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
}
// Reduce all of the unrolled parts into a single vector.
@@ -3208,13 +3457,22 @@ void InnerLoopVectorizer::vectorizeLoop() {
// The result is in the first element of the vector.
ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
Builder.getInt32(0));
+
+ // If the reduction can be performed in a smaller type, we need to extend
+ // the reduction to the wider type before we branch to the original loop.
+ if (RdxPhi->getType() != RdxDesc.getRecurrenceType())
+ ReducedPartRdx =
+ RdxDesc.isSigned()
+ ? Builder.CreateSExt(ReducedPartRdx, RdxPhi->getType())
+ : Builder.CreateZExt(ReducedPartRdx, RdxPhi->getType());
}
// Create a phi node that merges control-flow from the backedge-taken check
// block and the middle block.
PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
LoopScalarPreHeader->getTerminator());
- BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[0]);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
// Now, we need to fix the users of the reduction variable
@@ -3252,6 +3510,20 @@ void InnerLoopVectorizer::vectorizeLoop() {
fixLCSSAPHIs();
+ // Make sure DomTree is updated.
+ updateAnalysis();
+
+ // Predicate any stores.
+ for (auto KV : PredicatedStores) {
+ BasicBlock::iterator I(KV.first);
+ auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI);
+ auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DT);
+ I->moveBefore(T);
+ I->getParent()->setName("pred.store.if");
+ BB->setName("pred.store.continue");
+ }
+ DEBUG(DT->verifyDomTree());
// Remove redundant induction instructions.
cse(LoopVectorBody);
}
@@ -3326,18 +3598,18 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
return BlockMask;
}
-void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
- InnerLoopVectorizer::VectorParts &Entry,
- unsigned UF, unsigned VF, PhiVector *PV) {
+void InnerLoopVectorizer::widenPHIInstruction(
+ Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
+ unsigned VF, PhiVector *PV) {
PHINode* P = cast<PHINode>(PN);
// Handle reduction variables:
- if (Legal->getReductionVars()->count(P)) {
+ if (Legal->isReductionVariable(P)) {
for (unsigned part = 0; part < UF; ++part) {
// This is phase one of vectorizing PHIs.
Type *VecTy = (VF == 1) ? PN->getType() :
VectorType::get(PN->getType(), VF);
- Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody.back()-> getFirstInsertionPt());
+ Entry[part] = PHINode::Create(
+ VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt());
}
PV->push_back(P);
return;
@@ -3385,53 +3657,44 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
assert(Legal->getInductionVars()->count(P) &&
"Not an induction variable");
- LoopVectorizationLegality::InductionInfo II =
- Legal->getInductionVars()->lookup(P);
+ InductionDescriptor II = Legal->getInductionVars()->lookup(P);
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
+ switch (II.getKind()) {
+ case InductionDescriptor::IK_NoInduction:
llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- assert(P->getType() == II.StartValue->getType() && "Types must match");
- Type *PhiTy = P->getType();
- Value *Broadcasted;
- if (P == OldInduction) {
- // Handle the canonical induction variable. We might have had to
- // extend the type.
- Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
- } else {
- // Handle other induction variables that are now based on the
- // canonical one.
- Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
- "normalized.idx");
- NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
- Broadcasted = II.transform(Builder, NormalizedIdx);
- Broadcasted->setName("offset.idx");
+ case InductionDescriptor::IK_IntInduction: {
+ assert(P->getType() == II.getStartValue()->getType() &&
+ "Types must match");
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *V = Induction;
+ if (P != OldInduction) {
+ V = Builder.CreateSExtOrTrunc(Induction, P->getType());
+ V = II.transform(Builder, V);
+ V->setName("offset.idx");
}
- Broadcasted = getBroadcastInstrs(Broadcasted);
+ Value *Broadcasted = getBroadcastInstrs(V);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
+ Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
return;
}
- case LoopVectorizationLegality::IK_PtrInduction:
+ case InductionDescriptor::IK_PtrInduction:
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
// This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx =
- Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
- NormalizedIdx =
- Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType());
+ Value *PtrInd = Induction;
+ PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
int EltIndex = part;
- Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
Entry[part] = SclrGep;
@@ -3441,8 +3704,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = i + part * VF;
- Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
@@ -3458,7 +3721,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- VectorParts &Entry = WidenMap.get(it);
+ VectorParts &Entry = WidenMap.get(&*it);
+
switch (it->getOpcode()) {
case Instruction::Br:
// Nothing to do for PHIs and BR, since we already took care of the
@@ -3466,7 +3730,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
continue;
case Instruction::PHI: {
// Vectorize PHINodes.
- widenPHIInstruction(it, Entry, UF, VF, PV);
+ widenPHIInstruction(&*it, Entry, UF, VF, PV);
continue;
}// End of PHI.
@@ -3504,16 +3768,17 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Entry[Part] = V;
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
case Instruction::Select: {
// Widen selects.
// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.
- bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
- OrigLoop);
- setDebugLocFromInst(Builder, it);
+ auto *SE = PSE.getSE();
+ bool InvariantCond =
+ SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop);
+ setDebugLocFromInst(Builder, &*it);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
@@ -3522,7 +3787,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &Cond = getVectorValue(it->getOperand(0));
VectorParts &Op0 = getVectorValue(it->getOperand(1));
VectorParts &Op1 = getVectorValue(it->getOperand(2));
-
+
Value *ScalarCond = (VF == 1) ? Cond[0] :
Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
@@ -3533,7 +3798,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Op1[Part]);
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
@@ -3542,25 +3807,27 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Widen compares. Generate vector compares.
bool FCmp = (it->getOpcode() == Instruction::FCmp);
CmpInst *Cmp = dyn_cast<CmpInst>(it);
- setDebugLocFromInst(Builder, it);
+ setDebugLocFromInst(Builder, &*it);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
Value *C = nullptr;
- if (FCmp)
+ if (FCmp) {
C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
- else
+ cast<FCmpInst>(C)->copyFastMathFlags(&*it);
+ } else {
C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+ }
Entry[Part] = C;
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
case Instruction::Store:
case Instruction::Load:
- vectorizeMemoryInstruction(it);
+ vectorizeMemoryInstruction(&*it);
break;
case Instruction::ZExt:
case Instruction::SExt:
@@ -3575,7 +3842,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
CastInst *CI = dyn_cast<CastInst>(it);
- setDebugLocFromInst(Builder, it);
+ setDebugLocFromInst(Builder, &*it);
/// Optimize the special case where the source is the induction
/// variable. Notice that we can only optimize the 'trunc' case
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
@@ -3585,13 +3852,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
- LoopVectorizationLegality::InductionInfo II =
+ InductionDescriptor II =
Legal->getInductionVars()->lookup(OldInduction);
- Constant *Step =
- ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
+ Constant *Step = ConstantInt::getSigned(
+ CI->getType(), II.getStepValue()->getSExtValue());
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
/// Vectorize casts.
@@ -3601,7 +3868,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
@@ -3609,7 +3876,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(it))
break;
- setDebugLocFromInst(Builder, it);
+ setDebugLocFromInst(Builder, &*it);
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
@@ -3625,7 +3892,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
if (ID &&
(ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
ID == Intrinsic::lifetime_start)) {
- scalarizeInstruction(it);
+ scalarizeInstruction(&*it);
break;
}
// The flag shows whether we use Intrinsic or a usual Call for vectorized
@@ -3636,7 +3903,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
bool UseVectorIntrinsic =
ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
if (!UseVectorIntrinsic && NeedToScalarize) {
- scalarizeInstruction(it);
+ scalarizeInstruction(&*it);
break;
}
@@ -3677,13 +3944,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Entry[Part] = Builder.CreateCall(VectorF, Args);
}
- propagateMetadata(Entry, it);
+ propagateMetadata(Entry, &*it);
break;
}
default:
// All other instructions are unsupported. Scalarize them.
- scalarizeInstruction(it);
+ scalarizeInstruction(&*it);
break;
}// end of switch.
}// end of for_each instr.
@@ -3691,7 +3958,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
- SE->forgetLoop(OrigLoop);
+ PSE.getSE()->forgetLoop(OrigLoop);
// Update the dominator tree information.
assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
@@ -3701,19 +3968,12 @@ void InnerLoopVectorizer::updateAnalysis() {
DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
- // Due to if predication of stores we might create a sequence of "if(pred)
- // a[i] = ...; " blocks.
- for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) {
- if (i == 0)
- DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
- else if (isPredicatedBlock(i)) {
- DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-1]);
- } else {
- DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-2]);
- }
- }
+ // We don't predicate stores by this point, so the vector body should be a
+ // single loop.
+ assert(LoopVectorBody.size() == 1 && "Expected single block loop!");
+ DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
- DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
+ DT->addNewBlock(LoopMiddleBlock, LoopVectorBody.back());
DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
@@ -3850,10 +4110,10 @@ bool LoopVectorizationLegality::canVectorize() {
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(VectorizationReport() <<
- "could not determine number of loop iterations");
+ const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+ emitAnalysis(VectorizationReport()
+ << "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -3879,10 +4139,28 @@ bool LoopVectorizationLegality::canVectorize() {
: "")
<< "!\n");
+ bool UseInterleaved = TTI->enableInterleavedAccessVectorization();
+
+ // If an override option has been passed in for interleaved accesses, use it.
+ if (EnableInterleavedMemAccesses.getNumOccurrences() > 0)
+ UseInterleaved = EnableInterleavedMemAccesses;
+
// Analyze interleaved memory accesses.
- if (EnableInterleavedMemAccesses)
+ if (UseInterleaved)
InterleaveInfo.analyzeInterleaving(Strides);
+ unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
+ if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
+ SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
+
+ if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
+ emitAnalysis(VectorizationReport()
+ << "Too many SCEV assumptions need to be made and checked "
+ << "at runtime");
+ DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+ return false;
+ }
+
// Okay! We can vectorize. At this point we don't have any other mem analysis
// which may limit our maximum vectorization factor, so just return true with
// no restrictions.
@@ -3929,7 +4207,6 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
- BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
// Look for the attribute signaling the absence of NaNs.
@@ -3953,7 +4230,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
- emitAnalysis(VectorizationReport(it)
+ emitAnalysis(VectorizationReport(&*it)
<< "loop control flow is not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
@@ -3965,9 +4242,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
- if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit))
continue;
- emitAnalysis(VectorizationReport(it) <<
+ emitAnalysis(VectorizationReport(&*it) <<
"value could not be identified as "
"an induction or reduction variable");
return false;
@@ -3975,19 +4252,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// We only allow if-converted PHIs with exactly two incoming values.
if (Phi->getNumIncomingValues() != 2) {
- emitAnalysis(VectorizationReport(it)
+ emitAnalysis(VectorizationReport(&*it)
<< "control flow not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
- // This is the value coming from the preheader.
- Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
- ConstantInt *StepValue = nullptr;
- // Check if this is an induction variable.
- InductionKind IK = isInductionVariable(Phi, StepValue);
-
- if (IK_NoInduction != IK) {
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) {
+ Inductions[Phi] = ID;
// Get the widest type.
if (!WidestIndTy)
WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
@@ -3995,21 +4268,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV.
- if (IK == IK_IntInduction && StepValue->isOne()) {
+ if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
+ ID.getStepValue()->isOne() &&
+ isa<Constant>(ID.getStartValue()) &&
+ cast<Constant>(ID.getStartValue())->isNullValue()) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
- // than it is expedient).
+ // than it is expedient). We've checked that it begins at zero and
+ // steps by one, so this is a canonical induction variable.
if (!Induction || PhiTy == WidestIndTy)
Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
- Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
- emitAnalysis(VectorizationReport(it) <<
+ if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
+ emitAnalysis(VectorizationReport(&*it) <<
"use of induction value outside of the "
"loop is not handled by vectorizer");
return false;
@@ -4020,11 +4296,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
Reductions[Phi])) {
+ if (Reductions[Phi].hasUnsafeAlgebra())
+ Requirements->addUnsafeAlgebraInst(
+ Reductions[Phi].getUnsafeAlgebraInst());
AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
continue;
}
- emitAnalysis(VectorizationReport(it) <<
+ emitAnalysis(VectorizationReport(&*it) <<
"value that could not be identified as "
"reduction is used outside the loop");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
@@ -4039,8 +4318,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
- emitAnalysis(VectorizationReport(it) <<
- "call instruction cannot be vectorized");
+ emitAnalysis(VectorizationReport(&*it)
+ << "call instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
return false;
}
@@ -4049,8 +4328,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// second argument is the same (i.e. loop invariant)
if (CI &&
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
- if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
- emitAnalysis(VectorizationReport(it)
+ auto *SE = PSE.getSE();
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
+ emitAnalysis(VectorizationReport(&*it)
<< "intrinsic instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
return false;
@@ -4061,7 +4341,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
- emitAnalysis(VectorizationReport(it)
+ emitAnalysis(VectorizationReport(&*it)
<< "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
@@ -4085,8 +4365,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
- emitAnalysis(VectorizationReport(it) <<
+ if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
+ emitAnalysis(VectorizationReport(&*it) <<
"value cannot be used outside the loop");
return false;
}
@@ -4104,6 +4384,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
+ // Now we know the widest induction type, check if our found induction
+ // is the same size. If it's not, unset it here and InnerLoopVectorizer
+ // will create another.
+ if (Induction && WidestIndTy != Induction->getType())
+ Induction = nullptr;
+
return true;
}
@@ -4116,7 +4402,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
else
return;
- Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
+ Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop);
if (!Stride)
return;
@@ -4142,7 +4428,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
BE = TheLoop->block_end(); B != BE; ++B)
for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
I != IE; ++I)
- if (I->getType()->isPointerTy() && isConsecutivePtr(I))
+ if (I->getType()->isPointerTy() && isConsecutivePtr(&*I))
Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
while (!Worklist.empty()) {
@@ -4179,30 +4465,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
}
- if (LAI->getNumRuntimePointerChecks() >
- VectorizerParams::RuntimeMemoryCheckThreshold) {
- emitAnalysis(VectorizationReport()
- << LAI->getNumRuntimePointerChecks() << " exceeds limit of "
- << VectorizerParams::RuntimeMemoryCheckThreshold
- << " dependent memory operations checked at runtime");
- DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
- return false;
- }
- return true;
-}
+ Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
+ PSE.addPredicate(LAI->PSE.getUnionPredicate());
-LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
- ConstantInt *&StepValue) {
- if (!isInductionPHI(Phi, SE, StepValue))
- return IK_NoInduction;
-
- Type *PhiTy = Phi->getType();
- // Found an Integer induction variable.
- if (PhiTy->isIntegerTy())
- return IK_IntInduction;
- // Found an Pointer induction variable.
- return IK_PtrInduction;
+ return true;
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
@@ -4256,8 +4522,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
!isSinglePredecessor) {
- // Build a masked store if it is legal for the target, otherwise scalarize
- // the block.
+ // Build a masked store if it is legal for the target, otherwise
+ // scalarize the block.
bool isLegalMaskedOp =
isLegalMaskedStore(SI->getValueOperand()->getType(),
SI->getPointerOperand());
@@ -4315,7 +4581,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses(
StoreInst *SI = dyn_cast<StoreInst>(I);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
- int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides);
+ int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides);
// The factor of the corresponding interleave group.
unsigned Factor = std::abs(Stride);
@@ -4324,7 +4590,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses(
if (Factor < 2 || Factor > MaxInterleaveGroupFactor)
continue;
- const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
@@ -4411,12 +4677,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
continue;
// Calculate the distance and prepare for the rule 3.
- const SCEVConstant *DistToA =
- dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev));
+ const SCEVConstant *DistToA = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev));
if (!DistToA)
continue;
- int DistanceToA = DistToA->getValue()->getValue().getSExtValue();
+ int DistanceToA = DistToA->getAPInt().getSExtValue();
// Skip if the distance is not multiple of size as they are not in the
// same group.
@@ -4454,8 +4720,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
emitAnalysis(VectorizationReport() <<
"runtime pointer checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os");
- DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+ "compiling with -Os/-Oz");
+ DEBUG(dbgs() <<
+ "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
return Factor;
}
@@ -4467,10 +4734,12 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
}
// Find the trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
- unsigned WidestType = getWidestType();
+ MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
+ unsigned SmallestType, WidestType;
+ std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
unsigned MaxSafeDepDist = -1U;
if (Legal->getMaxSafeDepDistBytes() != -1U)
@@ -4478,7 +4747,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
- DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+
+ DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
+ << WidestType << " bits.\n");
DEBUG(dbgs() << "LV: The Widest register is: "
<< WidestRegister << " bits.\n");
@@ -4491,6 +4762,26 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
" into one vector!");
unsigned VF = MaxVectorSize;
+ if (MaximizeBandwidth && !OptForSize) {
+ // Collect all viable vectorization factors.
+ SmallVector<unsigned, 8> VFs;
+ unsigned NewMaxVectorSize = WidestRegister / SmallestType;
+ for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2)
+ VFs.push_back(VS);
+
+ // For each VF calculate its register usage.
+ auto RUs = calculateRegisterUsage(VFs);
+
+ // Select the largest VF which doesn't require more registers than existing
+ // ones.
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
+ for (int i = RUs.size() - 1; i >= 0; --i) {
+ if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
+ VF = VFs[i];
+ break;
+ }
+ }
+ }
// If we optimize the program for size, avoid creating the tail loop.
if (OptForSize) {
@@ -4499,7 +4790,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
emitAnalysis
(VectorizationReport() <<
"unable to calculate the loop count due to complex control flow");
- DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
return Factor;
}
@@ -4515,8 +4806,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
"cannot optimize for size and vectorize at the "
"same time. Enable vectorization of this loop "
"with '#pragma clang loop vectorize(enable)' "
- "when compiling with -Os");
- DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+ "when compiling with -Os/-Oz");
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
return Factor;
}
}
@@ -4566,7 +4857,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
return Factor;
}
-unsigned LoopVectorizationCostModel::getWidestType() {
+std::pair<unsigned, unsigned>
+LoopVectorizationCostModel::getSmallestAndWidestTypes() {
+ unsigned MinWidth = -1U;
unsigned MaxWidth = 8;
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
@@ -4579,18 +4872,22 @@ unsigned LoopVectorizationCostModel::getWidestType() {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Type *T = it->getType();
- // Ignore ephemeral values.
- if (EphValues.count(it))
+ // Skip ignored values.
+ if (ValuesToIgnore.count(&*it))
continue;
// Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue;
- // Examine PHI nodes that are reduction variables.
- if (PHINode *PN = dyn_cast<PHINode>(it))
- if (!Legal->getReductionVars()->count(PN))
+ // Examine PHI nodes that are reduction variables. Update the type to
+ // account for the recurrence type.
+ if (PHINode *PN = dyn_cast<PHINode>(it)) {
+ if (!Legal->isReductionVariable(PN))
continue;
+ RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN];
+ T = RdxDesc.getRecurrenceType();
+ }
// Examine the stored values.
if (StoreInst *ST = dyn_cast<StoreInst>(it))
@@ -4599,15 +4896,17 @@ unsigned LoopVectorizationCostModel::getWidestType() {
// Ignore loaded pointer types and stored pointer types that are not
// consecutive. However, we do want to take consecutive stores/loads of
// pointer vectors into account.
- if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
+ if (T->isPointerTy() && !isConsecutiveLoadOrStore(&*it))
continue;
+ MinWidth = std::min(MinWidth,
+ (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
MaxWidth = std::max(MaxWidth,
(unsigned)DL.getTypeSizeInBits(T->getScalarType()));
}
}
- return MaxWidth;
+ return {MinWidth, MaxWidth};
}
unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
@@ -4628,11 +4927,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
// 3. We don't interleave if we think that we will spill registers to memory
// due to the increased register pressure.
- // Use the user preference, unless 'auto' is selected.
- int UserUF = Hints->getInterleave();
- if (UserUF != 0)
- return UserUF;
-
// When we optimize for size, we don't interleave.
if (OptForSize)
return 1;
@@ -4642,7 +4936,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
return 1;
// Do not interleave loops with a relatively small trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
@@ -4658,7 +4952,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
TargetNumRegisters = ForceTargetNumVectorRegs;
}
- LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
+ RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
@@ -4756,8 +5050,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
}
// Interleave if this is a large loop (small loops are already dealt with by
- // this
- // point) that could benefit from interleaving.
+ // this point) that could benefit from interleaving.
bool HasReductions = (Legal->getReductionVars()->size() > 0);
if (TTI.enableAggressiveInterleaving(HasReductions)) {
DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
@@ -4768,8 +5061,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
return 1;
}
-LoopVectorizationCostModel::RegisterUsage
-LoopVectorizationCostModel::calculateRegisterUsage() {
+SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
+LoopVectorizationCostModel::calculateRegisterUsage(
+ const SmallVector<unsigned, 8> &VFs) {
// This function calculates the register usage by measuring the highest number
// of values that are alive at a single location. Obviously, this is a very
// rough estimation. We scan the loop in a topological order in order and
@@ -4790,8 +5084,8 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
LoopBlocksDFS DFS(TheLoop);
DFS.perform(LI);
- RegisterUsage R;
- R.NumInstructions = 0;
+ RegisterUsage RU;
+ RU.NumInstructions = 0;
// Each 'key' in the map opens a new interval. The values
// of the map are the index of the 'last seen' usage of the
@@ -4810,15 +5104,13 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
unsigned Index = 0;
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
be = DFS.endRPO(); bb != be; ++bb) {
- R.NumInstructions += (*bb)->size();
- for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
- ++it) {
- Instruction *I = it;
- IdxToInstr[Index++] = I;
+ RU.NumInstructions += (*bb)->size();
+ for (Instruction &I : **bb) {
+ IdxToInstr[Index++] = &I;
// Save the end location of each USE.
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *U = I->getOperand(i);
+ for (unsigned i = 0; i < I.getNumOperands(); ++i) {
+ Value *U = I.getOperand(i);
Instruction *Instr = dyn_cast<Instruction>(U);
// Ignore non-instruction values such as arguments, constants, etc.
@@ -4847,42 +5139,85 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
TransposeEnds[it->second].push_back(it->first);
SmallSet<Instruction*, 8> OpenIntervals;
- unsigned MaxUsage = 0;
+ // Get the size of the widest register.
+ unsigned MaxSafeDepDist = -1U;
+ if (Legal->getMaxSafeDepDistBytes() != -1U)
+ MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
+ unsigned WidestRegister =
+ std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist);
+ const DataLayout &DL = TheFunction->getParent()->getDataLayout();
+
+ SmallVector<RegisterUsage, 8> RUs(VFs.size());
+ SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+
+ // A lambda that gets the register usage for the given type and VF.
+ auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) {
+ unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType());
+ return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
+ };
+
for (unsigned int i = 0; i < Index; ++i) {
Instruction *I = IdxToInstr[i];
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
- // Ignore ephemeral values.
- if (EphValues.count(I))
- continue;
-
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
- for (unsigned int j=0, e = List.size(); j < e; ++j)
+ for (unsigned int j = 0, e = List.size(); j < e; ++j)
OpenIntervals.erase(List[j]);
- // Count the number of live interals.
- MaxUsage = std::max(MaxUsage, OpenIntervals.size());
+ // Skip ignored values.
+ if (ValuesToIgnore.count(I))
+ continue;
- DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
- OpenIntervals.size() << '\n');
+ // For each VF find the maximum usage of registers.
+ for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
+ if (VFs[j] == 1) {
+ MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+ continue;
+ }
+
+ // Count the number of live intervals.
+ unsigned RegUsage = 0;
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.count(Inst))
+ continue;
+ RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
+ }
+ MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
+ }
+
+ DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
+ << OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
- unsigned Invariant = LoopInvariants.size();
- DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
- DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
- DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
+ for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
+ unsigned Invariant = 0;
+ if (VFs[i] == 1)
+ Invariant = LoopInvariants.size();
+ else {
+ for (auto Inst : LoopInvariants)
+ Invariant += GetRegUsage(Inst->getType(), VFs[i]);
+ }
+
+ DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
- R.LoopInvariantRegs = Invariant;
- R.MaxLocalUsers = MaxUsage;
- return R;
+ RU.LoopInvariantRegs = Invariant;
+ RU.MaxLocalUsers = MaxUsages[i];
+ RUs[i] = RU;
+ }
+
+ return RUs;
}
unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
@@ -4900,11 +5235,11 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
if (isa<DbgInfoIntrinsic>(it))
continue;
- // Ignore ephemeral values.
- if (EphValues.count(it))
+ // Skip ignored values.
+ if (ValuesToIgnore.count(&*it))
continue;
- unsigned C = getInstructionCost(it, VF);
+ unsigned C = getInstructionCost(&*it, VF);
// Check if we should override the cost.
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
@@ -4969,7 +5304,7 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
if (!C)
return true;
- const APInt &APStepVal = C->getValue()->getValue();
+ const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
@@ -4981,9 +5316,8 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
- if (Legal->hasStride(I->getOperand(0)) || Legal->hasStride(I->getOperand(1)))
- return true;
- return false;
+ return Legal->hasStride(I->getOperand(0)) ||
+ Legal->hasStride(I->getOperand(1));
}
unsigned
@@ -4994,7 +5328,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
VF = 1;
Type *RetTy = I->getType();
+ if (VF > 1 && MinBWs.count(I))
+ RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
Type *VectorTy = ToVectorTy(RetTy, VF);
+ auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
@@ -5076,6 +5413,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
+ Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
+ auto It = MinBWs.find(Op0AsInstruction);
+ if (VF > 1 && It != MinBWs.end())
+ ValTy = IntegerType::get(ValTy->getContext(), It->second);
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
@@ -5199,8 +5540,28 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
Legal->isInductionVariable(I->getOperand(0)))
return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
I->getOperand(0)->getType());
-
- Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+
+ Type *SrcScalarTy = I->getOperand(0)->getType();
+ Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
+ if (VF > 1 && MinBWs.count(I)) {
+ // This cast is going to be shrunk. This may remove the cast or it might
+ // turn it into slightly different cast. For example, if MinBW == 16,
+ // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
+ //
+ // Calculate the modified src and dest types.
+ Type *MinVecTy = VectorTy;
+ if (I->getOpcode() == Instruction::Trunc) {
+ SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
+ VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF),
+ MinVecTy);
+ } else if (I->getOpcode() == Instruction::ZExt ||
+ I->getOpcode() == Instruction::SExt) {
+ SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
+ VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF),
+ MinVecTy);
+ }
+ }
+
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
@@ -5240,15 +5601,18 @@ char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
@@ -5269,6 +5633,79 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
+void LoopVectorizationCostModel::collectValuesToIgnore() {
+ // Ignore ephemeral values.
+ CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
+
+ // Ignore type-promoting instructions we identified during reduction
+ // detection.
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ RecurrenceDescriptor &RedDes = Reduction.second;
+ SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
+ VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+ }
+
+ // Ignore induction phis that are only used in either GetElementPtr or ICmp
+ // instruction to exit loop. Induction variables usually have large types and
+ // can have big impact when estimating register usage.
+ // This is for when VF > 1.
+ for (auto &Induction : *Legal->getInductionVars()) {
+ auto *PN = Induction.first;
+ auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
+
+ // Check that the PHI is only used by the induction increment (UpdateV) or
+ // by GEPs. Then check that UpdateV is only used by a compare instruction or
+ // the loop header PHI.
+ // FIXME: Need precise def-use analysis to determine if this instruction
+ // variable will be vectorized.
+ if (std::all_of(PN->user_begin(), PN->user_end(),
+ [&](const User *U) -> bool {
+ return U == UpdateV || isa<GetElementPtrInst>(U);
+ }) &&
+ std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
+ [&](const User *U) -> bool {
+ return U == PN || isa<ICmpInst>(U);
+ })) {
+ VecValuesToIgnore.insert(PN);
+ VecValuesToIgnore.insert(UpdateV);
+ }
+ }
+
+ // Ignore instructions that will not be vectorized.
+ // This is for when VF > 1.
+ for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
+ ++bb) {
+ for (auto &Inst : **bb) {
+ switch (Inst.getOpcode()) {
+ case Instruction::GetElementPtr: {
+ // Ignore GEP if its last operand is an induction variable so that it is
+ // a consecutive load/store and won't be vectorized as scatter/gather
+ // pattern.
+
+ GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
+ unsigned NumOperands = Gep->getNumOperands();
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
+ bool GepToIgnore = true;
+
+ // Check that all of the gep indices are uniform except for the
+ // induction operand.
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ if (i != InductionOperand &&
+ !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
+ TheLoop)) {
+ GepToIgnore = false;
+ break;
+ }
+ }
+
+ if (GepToIgnore)
+ VecValuesToIgnore.insert(&Inst);
+ break;
+ }
+ }
+ }
+ }
+}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
bool IfPredicateStore) {
@@ -5316,19 +5753,12 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- Instruction *InsertPt = Builder.GetInsertPoint();
- BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = nullptr;
-
VectorParts Cond;
- Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
- VectorLp = LI->getLoopFor(IfBlock);
- assert(VectorLp && "Must have a loop for this block");
}
// For each vector unroll 'part':
@@ -5343,11 +5773,6 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part],
ConstantInt::get(Cond[Part]->getType(), 1));
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, *LI);
- // Update Builder with newly created basic block.
- Builder.SetInsertPoint(InsertPt);
}
Instruction *Cloned = Instr->clone();
@@ -5367,16 +5792,10 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
if (!IsVoidRetTy)
VecResults[Part] = Cloned;
- // End if-block.
- if (IfPredicateStore) {
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
- Builder.SetInsertPoint(InsertPt);
- ReplaceInstWithInst(IfBlock->getTerminator(),
- BranchInst::Create(CondBlock, NewIfBlock, Cmp));
- IfBlock = NewIfBlock;
- }
+ // End if-block.
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+ Cmp));
}
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b180c97..9ed44d1 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -61,7 +62,7 @@ static cl::opt<int>
"number "));
static cl::opt<bool>
-ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
static cl::opt<bool> ShouldStartVectorizeHorAtStore(
@@ -73,6 +74,14 @@ static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
+/// Limits the size of scheduling regions in a block.
+/// It avoid long compile times for _very_ large blocks where vector
+/// instructions are spread over a wide range.
+/// This limit is way higher than needed by real-world functions.
+static cl::opt<int>
+ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
+ cl::desc("Limit the size of the SLP scheduling region per block"));
+
namespace {
// FIXME: Set this via cl::opt to allow overriding.
@@ -89,6 +98,10 @@ static const unsigned AliasedCheckLimit = 10;
// This limit is useful for very large basic blocks.
static const unsigned MaxMemDepDistance = 160;
+/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
+/// regions to be handled.
+static const int MinScheduleRegionSize = 16;
+
/// \brief Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
@@ -156,13 +169,11 @@ static unsigned getAltOpcode(unsigned Op) {
/// of an alternate sequence which can later be merged as
/// a ShuffleVector instruction.
static bool canCombineAsAltInst(unsigned Op) {
- if (Op == Instruction::FAdd || Op == Instruction::FSub ||
- Op == Instruction::Sub || Op == Instruction::Add)
- return true;
- return false;
+ return Op == Instruction::FAdd || Op == Instruction::FSub ||
+ Op == Instruction::Sub || Op == Instruction::Add;
}
-/// \returns ShuffleVector instruction if intructions in \p VL have
+/// \returns ShuffleVector instruction if instructions in \p VL have
/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
static unsigned isAltInst(ArrayRef<Value *> VL) {
@@ -242,6 +253,9 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
case LLVMContext::MD_fpmath:
MD = MDNode::getMostGenericFPMath(MD, IMD);
break;
+ case LLVMContext::MD_nontemporal:
+ MD = MDNode::intersect(MD, IMD);
+ break;
}
}
I->setMetadata(Kind, MD);
@@ -393,7 +407,7 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// \returns true if it is benefitial to reverse the vector order.
+ /// \returns true if it is beneficial to reverse the vector order.
bool shouldReorder() const {
return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder;
}
@@ -441,7 +455,7 @@ private:
/// \returns a vector from a collection of scalars in \p VL.
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
- /// \returns whether the VectorizableTree is fully vectoriable and will
+ /// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree();
@@ -506,7 +520,7 @@ private:
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser (Value *S, llvm::User *U, int L) :
- Scalar(S), User(U), Lane(L){};
+ Scalar(S), User(U), Lane(L){}
// Which scalar in our function.
Value *Scalar;
// Which user that uses the scalar.
@@ -717,6 +731,8 @@ private:
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
ScheduleStart(nullptr), ScheduleEnd(nullptr),
FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
+ ScheduleRegionSize(0),
+ ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
// Make sure that the initial SchedulingRegionID is greater than the
// initial SchedulingRegionID in ScheduleData (which is 0).
SchedulingRegionID(1) {}
@@ -728,6 +744,13 @@ private:
FirstLoadStoreInRegion = nullptr;
LastLoadStoreInRegion = nullptr;
+ // Reduce the maximum schedule region size by the size of the
+ // previous scheduling run.
+ ScheduleRegionSizeLimit -= ScheduleRegionSize;
+ if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
+ ScheduleRegionSizeLimit = MinScheduleRegionSize;
+ ScheduleRegionSize = 0;
+
// Make a new scheduling region, i.e. all existing ScheduleData is not
// in the new region yet.
++SchedulingRegionID;
@@ -804,7 +827,8 @@ private:
void cancelScheduling(ArrayRef<Value *> VL);
/// Extends the scheduling region so that V is inside the region.
- void extendSchedulingRegion(Value *V);
+ /// \returns true if the region size is within the limit.
+ bool extendSchedulingRegion(Value *V);
/// Initialize the ScheduleData structures for new instructions in the
/// scheduling region.
@@ -858,6 +882,12 @@ private:
/// (can be null).
ScheduleData *LastLoadStoreInRegion;
+ /// The current size of the scheduling region.
+ int ScheduleRegionSize;
+
+ /// The maximum size allowed for the scheduling region.
+ int ScheduleRegionSizeLimit;
+
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
int SchedulingRegionID;
@@ -1077,7 +1107,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (!BS.tryScheduleBundle(VL, this)) {
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
- BS.cancelScheduling(VL);
+ assert((!BS.getScheduleData(VL[0]) ||
+ !BS.getScheduleData(VL[0])->isPartOfBundle()) &&
+ "tryScheduleBundle should cancelScheduling on failure");
newTreeEntry(VL, false);
return;
}
@@ -1125,6 +1157,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
case Instruction::Load: {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load.
+ // For example we don't want vectorize loads that are smaller than 8 bit.
+ // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats
+ // loading/storing it as an i8 struct. If we vectorize loads/stores from
+ // such a struct we read/write packed bits disagreeing with the
+ // unvectorized version.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *ScalarTy = VL[0]->getType();
+
+ if (DL.getTypeSizeInBits(ScalarTy) !=
+ DL.getTypeAllocSizeInBits(ScalarTy)) {
+ BS.cancelScheduling(VL);
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+ return;
+ }
// Check if the loads are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
LoadInst *L = cast<LoadInst>(VL[i]);
@@ -1134,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- const DataLayout &DL = F->getParent()->getDataLayout();
+
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
++NumLoadsWantToChangeOrder;
@@ -1690,7 +1739,8 @@ int BoUpSLP::getSpillCost() {
}
// Now find the sequence of instructions between PrevInst and Inst.
- BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
+ BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
+ PrevInstIt(PrevInst->getIterator());
--PrevInstIt;
while (InstIt != PrevInstIt) {
if (PrevInstIt == PrevInst->getParent()->rend()) {
@@ -1890,106 +1940,126 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
}
}
+// Return true if I should be commuted before adding it's left and right
+// operands to the arrays Left and Right.
+//
+// The vectorizer is trying to either have all elements one side being
+// instruction with the same opcode to enable further vectorization, or having
+// a splat to lower the vectorizing cost.
+static bool shouldReorderOperands(int i, Instruction &I,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ bool AllSameOpcodeLeft,
+ bool AllSameOpcodeRight, bool SplatLeft,
+ bool SplatRight) {
+ Value *VLeft = I.getOperand(0);
+ Value *VRight = I.getOperand(1);
+ // If we have "SplatRight", try to see if commuting is needed to preserve it.
+ if (SplatRight) {
+ if (VRight == Right[i - 1])
+ // Preserve SplatRight
+ return false;
+ if (VLeft == Right[i - 1]) {
+ // Commuting would preserve SplatRight, but we don't want to break
+ // SplatLeft either, i.e. preserve the original order if possible.
+ // (FIXME: why do we care?)
+ if (SplatLeft && VLeft == Left[i - 1])
+ return false;
+ return true;
+ }
+ }
+ // Symmetrically handle Right side.
+ if (SplatLeft) {
+ if (VLeft == Left[i - 1])
+ // Preserve SplatLeft
+ return false;
+ if (VRight == Left[i - 1])
+ return true;
+ }
+
+ Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+ Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+ // If we have "AllSameOpcodeRight", try to see if the left operands preserves
+ // it and not the right, in this case we want to commute.
+ if (AllSameOpcodeRight) {
+ unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
+ if (IRight && RightPrevOpcode == IRight->getOpcode())
+ // Do not commute, a match on the right preserves AllSameOpcodeRight
+ return false;
+ if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
+ // We have a match and may want to commute, but first check if there is
+ // not also a match on the existing operands on the Left to preserve
+ // AllSameOpcodeLeft, i.e. preserve the original order if possible.
+ // (FIXME: why do we care?)
+ if (AllSameOpcodeLeft && ILeft &&
+ cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
+ return false;
+ return true;
+ }
+ }
+ // Symmetrically handle Left side.
+ if (AllSameOpcodeLeft) {
+ unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
+ if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
+ return false;
+ if (IRight && LeftPrevOpcode == IRight->getOpcode())
+ return true;
+ }
+ return false;
+}
+
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
- SmallVector<Value *, 16> OrigLeft, OrigRight;
-
- bool AllSameOpcodeLeft = true;
- bool AllSameOpcodeRight = true;
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- Instruction *I = cast<Instruction>(VL[i]);
- Value *VLeft = I->getOperand(0);
- Value *VRight = I->getOperand(1);
-
- OrigLeft.push_back(VLeft);
- OrigRight.push_back(VRight);
-
- Instruction *ILeft = dyn_cast<Instruction>(VLeft);
- Instruction *IRight = dyn_cast<Instruction>(VRight);
-
- // Check whether all operands on one side have the same opcode. In this case
- // we want to preserve the original order and not make things worse by
- // reordering.
- if (i && AllSameOpcodeLeft && ILeft) {
- if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
- if (PLeft->getOpcode() != ILeft->getOpcode())
- AllSameOpcodeLeft = false;
- } else
- AllSameOpcodeLeft = false;
- }
- if (i && AllSameOpcodeRight && IRight) {
- if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
- if (PRight->getOpcode() != IRight->getOpcode())
- AllSameOpcodeRight = false;
- } else
- AllSameOpcodeRight = false;
- }
-
- // Sort two opcodes. In the code below we try to preserve the ability to use
- // broadcast of values instead of individual inserts.
- // vl1 = load
- // vl2 = phi
- // vr1 = load
- // vr2 = vr2
- // = vl1 x vr1
- // = vl2 x vr2
- // If we just sorted according to opcode we would leave the first line in
- // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
- // = vl1 x vr1
- // = vr2 x vl2
- // Because vr2 and vr1 are from the same load we loose the opportunity of a
- // broadcast for the packed right side in the backend: we have [vr1, vl2]
- // instead of [vr1, vr2=vr1].
- if (ILeft && IRight) {
- if (!i && ILeft->getOpcode() > IRight->getOpcode()) {
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else if (i && ILeft->getOpcode() > IRight->getOpcode() &&
- Right[i - 1] != IRight) {
- // Try not to destroy a broad cast for no apparent benefit.
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
- Right[i - 1] == ILeft) {
- // Try preserve broadcasts.
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
- Left[i - 1] == IRight) {
- // Try preserve broadcasts.
- Left.push_back(IRight);
- Right.push_back(ILeft);
- } else {
- Left.push_back(ILeft);
- Right.push_back(IRight);
- }
- continue;
- }
- // One opcode, put the instruction on the right.
- if (ILeft) {
- Left.push_back(VRight);
- Right.push_back(ILeft);
- continue;
- }
+ if (VL.size()) {
+ // Peel the first iteration out of the loop since there's nothing
+ // interesting to do anyway and it simplifies the checks in the loop.
+ auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
+ auto VRight = cast<Instruction>(VL[0])->getOperand(1);
+ if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
+ // Favor having instruction to the right. FIXME: why?
+ std::swap(VLeft, VRight);
Left.push_back(VLeft);
Right.push_back(VRight);
}
- bool LeftBroadcast = isSplat(Left);
- bool RightBroadcast = isSplat(Right);
-
- // If operands end up being broadcast return this operand order.
- if (LeftBroadcast || RightBroadcast)
- return;
+ // Keep track if we have instructions with all the same opcode on one side.
+ bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
+ bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
+ // Keep track if we have one side with all the same value (broadcast).
+ bool SplatLeft = true;
+ bool SplatRight = true;
- // Don't reorder if the operands where good to begin.
- if (AllSameOpcodeRight || AllSameOpcodeLeft) {
- Left = OrigLeft;
- Right = OrigRight;
+ for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ assert(I->isCommutative() && "Can only process commutative instruction");
+ // Commute to favor either a splat or maximizing having the same opcodes on
+ // one side.
+ if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft,
+ AllSameOpcodeRight, SplatLeft, SplatRight)) {
+ Left.push_back(I->getOperand(1));
+ Right.push_back(I->getOperand(0));
+ } else {
+ Left.push_back(I->getOperand(0));
+ Right.push_back(I->getOperand(1));
+ }
+ // Update Splat* and AllSameOpcode* after the insertion.
+ SplatRight = SplatRight && (Right[i - 1] == Right[i]);
+ SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
+ AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
+ (cast<Instruction>(Left[i - 1])->getOpcode() ==
+ cast<Instruction>(Left[i])->getOpcode());
+ AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
+ (cast<Instruction>(Right[i - 1])->getOpcode() ==
+ cast<Instruction>(Right[i])->getOpcode());
}
+ // If one operand end up being broadcast, return this operand order.
+ if (SplatRight || SplatLeft)
+ return;
+
const DataLayout &DL = F->getParent()->getDataLayout();
// Finally check if we can get longer vectorizable chain by reordering
@@ -2030,7 +2100,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
Instruction *VL0 = cast<Instruction>(VL[0]);
- BasicBlock::iterator NextInst = VL0;
+ BasicBlock::iterator NextInst(VL0);
++NextInst;
Builder.SetInsertPoint(VL0->getParent(), NextInst);
Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
@@ -2487,7 +2557,7 @@ Value *BoUpSLP::vectorizeTree() {
scheduleBlock(BSIter.second.get());
}
- Builder.SetInsertPoint(F->getEntryBlock().begin());
+ Builder.SetInsertPoint(&F->getEntryBlock().front());
vectorizeTree(&VectorizableTree[0]);
DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
@@ -2532,7 +2602,7 @@ Value *BoUpSLP::vectorizeTree() {
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
- Builder.SetInsertPoint(F->getEntryBlock().begin());
+ Builder.SetInsertPoint(&F->getEntryBlock().front());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, Ex);
@@ -2641,7 +2711,7 @@ void BoUpSLP::optimizeGatherSequence() {
BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
- Instruction *In = it++;
+ Instruction *In = &*it++;
if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
continue;
@@ -2681,8 +2751,15 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
ScheduleData *Bundle = nullptr;
bool ReSchedule = false;
DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
+
+ // Make sure that the scheduling region contains all
+ // instructions of the bundle.
+ for (Value *V : VL) {
+ if (!extendSchedulingRegion(V))
+ return false;
+ }
+
for (Value *V : VL) {
- extendSchedulingRegion(V);
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member (maybe not in same basic block)");
@@ -2743,7 +2820,11 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
schedule(pickedSD, ReadyInsts);
}
}
- return Bundle->isReady();
+ if (!Bundle->isReady()) {
+ cancelScheduling(VL);
+ return false;
+ }
+ return true;
}
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
@@ -2772,9 +2853,9 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
}
}
-void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
+bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
if (getScheduleData(V))
- return;
+ return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
@@ -2785,21 +2866,26 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
ScheduleEnd = I->getNextNode();
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
- return;
+ return true;
}
// Search up and down at the same time, because we don't know if the new
// instruction is above or below the existing scheduling region.
- BasicBlock::reverse_iterator UpIter(ScheduleStart);
+ BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator());
BasicBlock::reverse_iterator UpperEnd = BB->rend();
BasicBlock::iterator DownIter(ScheduleEnd);
BasicBlock::iterator LowerEnd = BB->end();
for (;;) {
+ if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
+ DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
+ return false;
+ }
+
if (UpIter != UpperEnd) {
if (&*UpIter == I) {
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
ScheduleStart = I;
DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n");
- return;
+ return true;
}
UpIter++;
}
@@ -2810,13 +2896,14 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
ScheduleEnd = I->getNextNode();
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
- return;
+ return true;
}
DownIter++;
}
assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
"instruction not found in block");
}
+ return true;
}
void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
@@ -2896,8 +2983,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
}
} else {
// I'm not sure if this can ever happen. But we need to be safe.
- // This lets the instruction/bundle never be scheduled and eventally
- // disable vectorization.
+ // This lets the instruction/bundle never be scheduled and
+ // eventually disable vectorization.
BundleMember->Dependencies++;
BundleMember->incrementUnscheduledDeps(1);
}
@@ -3003,7 +3090,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
};
std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
- // Ensure that all depencency data is updated and fill the ready-list with
+ // Ensure that all dependency data is updated and fill the ready-list with
// initial instructions.
int Idx = 0;
int NumToSchedule = 0;
@@ -3035,7 +3122,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
Instruction *pickedInst = BundleMember->Inst;
if (LastScheduledInst->getNextNode() != pickedInst) {
BS->BB->getInstList().remove(pickedInst);
- BS->BB->getInstList().insert(LastScheduledInst, pickedInst);
+ BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
+ pickedInst);
}
LastScheduledInst = pickedInst;
BundleMember = BundleMember->NextInBundle;
@@ -3074,11 +3162,11 @@ struct SLPVectorizer : public FunctionPass {
if (skipOptnoneFunction(F))
return false;
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -3139,13 +3227,15 @@ struct SLPVectorizer : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
@@ -3260,15 +3350,26 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
// Do a quadratic search on all of the given stores and find
// all of the pairs of stores that follow each other.
+ SmallVector<unsigned, 16> IndexQueue;
for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
- for (unsigned j = 0; j < e; ++j) {
- if (i == j)
- continue;
- const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
- if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
- Tails.insert(Stores[j]);
+ const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+ IndexQueue.clear();
+ // If a store has multiple consecutive store candidates, search Stores
+ // array according to the sequence: from i+1 to e, then from i-1 to 0.
+ // This is because usually pairing with immediate succeeding or preceding
+ // candidate create the best chance to find slp vectorization opportunity.
+ unsigned j = 0;
+ for (j = i + 1; j < e; ++j)
+ IndexQueue.push_back(j);
+ for (j = i; j > 0; --j)
+ IndexQueue.push_back(j - 1);
+
+ for (auto &k : IndexQueue) {
+ if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
+ Tails.insert(Stores[k]);
Heads.insert(Stores[i]);
- ConsecutiveChain[Stores[i]] = Stores[j];
+ ConsecutiveChain[Stores[i]] = Stores[k];
+ break;
}
}
}
@@ -3428,7 +3529,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned VecIdx = 0;
for (auto &V : BuildVectorSlice) {
IRBuilder<true, NoFolder> Builder(
- ++BasicBlock::iterator(InsertAfter));
+ InsertAfter->getParent(), ++BasicBlock::iterator(InsertAfter));
InsertElementInst *IE = cast<InsertElementInst>(V);
Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
VectorizedRoot, Builder.getInt32(VecIdx++)));
@@ -3552,16 +3653,17 @@ class HorizontalReduction {
unsigned ReductionOpcode;
/// The opcode of the values we perform a reduction on.
unsigned ReducedValueOpcode;
- /// The width of one full horizontal reduction operation.
- unsigned ReduxWidth;
/// Should we model this reduction as a pairwise reduction tree or a tree that
/// splits the vector in halves and adds those halves.
bool IsPairwiseReduction;
public:
+ /// The width of one full horizontal reduction operation.
+ unsigned ReduxWidth;
+
HorizontalReduction()
: ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
- ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+ ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0) {}
/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
@@ -3607,11 +3709,11 @@ public:
return false;
// Post order traverse the reduction tree starting at B. We only handle true
- // trees containing only binary operators.
- SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+ // trees containing only binary operators or selects.
+ SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
Stack.push_back(std::make_pair(B, 0));
while (!Stack.empty()) {
- BinaryOperator *TreeN = Stack.back().first;
+ Instruction *TreeN = Stack.back().first;
unsigned EdgeToVist = Stack.back().second++;
bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
@@ -3647,9 +3749,10 @@ public:
// Visit left or right.
Value *NextV = TreeN->getOperand(EdgeToVist);
- BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
- if (Next)
- Stack.push_back(std::make_pair(Next, 0));
+ // We currently only allow BinaryOperator's and SelectInst's as reduction
+ // values in our tree.
+ if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV))
+ Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));
else if (NextV != Phi)
return false;
}
@@ -3717,9 +3820,12 @@ public:
return VectorizedTree != nullptr;
}
-private:
+ unsigned numReductionValues() const {
+ return ReducedVals.size();
+ }
- /// \brief Calcuate the cost of a reduction.
+private:
+ /// \brief Calculate the cost of a reduction.
int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
Type *ScalarTy = FirstReducedVal->getType();
Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
@@ -3825,6 +3931,82 @@ static bool PhiTypeSorterFunc(Value *V, Value *V2) {
return V->getType() < V2->getType();
}
+/// \brief Try and get a reduction value from a phi node.
+///
+/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
+/// if they come from either \p ParentBB or a containing loop latch.
+///
+/// \returns A candidate reduction value if possible, or \code nullptr \endcode
+/// if not possible.
+static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
+ BasicBlock *ParentBB, LoopInfo *LI) {
+ // There are situations where the reduction value is not dominated by the
+ // reduction phi. Vectorizing such cases has been reported to cause
+ // miscompiles. See PR25787.
+ auto DominatedReduxValue = [&](Value *R) {
+ return (
+ dyn_cast<Instruction>(R) &&
+ DT->dominates(P->getParent(), dyn_cast<Instruction>(R)->getParent()));
+ };
+
+ Value *Rdx = nullptr;
+
+ // Return the incoming value if it comes from the same BB as the phi node.
+ if (P->getIncomingBlock(0) == ParentBB) {
+ Rdx = P->getIncomingValue(0);
+ } else if (P->getIncomingBlock(1) == ParentBB) {
+ Rdx = P->getIncomingValue(1);
+ }
+
+ if (Rdx && DominatedReduxValue(Rdx))
+ return Rdx;
+
+ // Otherwise, check whether we have a loop latch to look at.
+ Loop *BBL = LI->getLoopFor(ParentBB);
+ if (!BBL)
+ return nullptr;
+ BasicBlock *BBLatch = BBL->getLoopLatch();
+ if (!BBLatch)
+ return nullptr;
+
+ // There is a loop latch, return the incoming value if it comes from
+ // that. This reduction pattern occassionaly turns up.
+ if (P->getIncomingBlock(0) == BBLatch) {
+ Rdx = P->getIncomingValue(0);
+ } else if (P->getIncomingBlock(1) == BBLatch) {
+ Rdx = P->getIncomingValue(1);
+ }
+
+ if (Rdx && DominatedReduxValue(Rdx))
+ return Rdx;
+
+ return nullptr;
+}
+
+/// \brief Attempt to reduce a horizontal reduction.
+/// If it is legal to match a horizontal reduction feeding
+/// the phi node P with reduction operators BI, then check if it
+/// can be done.
+/// \returns true if a horizontal reduction was matched and reduced.
+/// \returns false if a horizontal reduction was not matched.
+static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
+ BoUpSLP &R, TargetTransformInfo *TTI) {
+ if (!ShouldVectorizeHor)
+ return false;
+
+ HorizontalReduction HorRdx;
+ if (!HorRdx.matchAssociativeReduction(P, BI))
+ return false;
+
+ // If there is a sufficient number of reduction values, reduce
+ // to a nearby power-of-2. Can safely generate oversized
+ // vectors and rely on the backend to split them to legal sizes.
+ HorRdx.ReduxWidth =
+ std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
+
+ return HorRdx.tryToReduce(R, TTI);
+}
+
bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
@@ -3881,7 +4063,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
// We may go through BB multiple times so skip the one we have checked.
- if (!VisitedInstrs.insert(it).second)
+ if (!VisitedInstrs.insert(&*it).second)
continue;
if (isa<DbgInfoIntrinsic>(it))
@@ -3892,20 +4074,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() != 2)
return Changed;
- Value *Rdx =
- (P->getIncomingBlock(0) == BB
- ? (P->getIncomingValue(0))
- : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1)
- : nullptr));
+
+ Value *Rdx = getReductionValue(DT, P, BB, LI);
+
// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)
continue;
// Try to match and vectorize a horizontal reduction.
- HorizontalReduction HorRdx;
- if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
- HorRdx.tryToReduce(R, TTI)) {
+ if (canMatchHorizontalReduction(P, BI, R, TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
@@ -3928,15 +4106,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
}
- // Try to vectorize horizontal reductions feeding into a store.
if (ShouldStartVectorizeHorAtStore)
if (StoreInst *SI = dyn_cast<StoreInst>(it))
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
- HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
- HorRdx.tryToReduce(R, TTI)) ||
- tryToVectorize(BinOp, R))) {
+ if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI) ||
+ tryToVectorize(BinOp, R)) {
Changed = true;
it = BB->begin();
e = BB->end();
@@ -4037,10 +4212,10 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
diff --git a/contrib/llvm/tools/bugpoint/BugDriver.cpp b/contrib/llvm/tools/bugpoint/BugDriver.cpp
index 43f4c29..030749f 100644
--- a/contrib/llvm/tools/bugpoint/BugDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/BugDriver.cpp
@@ -72,7 +72,7 @@ BugDriver::BugDriver(const char *toolname, bool find_bugs,
LLVMContext& ctxt)
: Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
Program(nullptr), Interpreter(nullptr), SafeInterpreter(nullptr),
- gcc(nullptr), run_find_bugs(find_bugs), Timeout(timeout),
+ cc(nullptr), run_find_bugs(find_bugs), Timeout(timeout),
MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
BugDriver::~BugDriver() {
@@ -80,7 +80,7 @@ BugDriver::~BugDriver() {
if (Interpreter != SafeInterpreter)
delete Interpreter;
delete SafeInterpreter;
- delete gcc;
+ delete cc;
}
std::unique_ptr<Module> llvm::parseInputFile(StringRef Filename,
@@ -132,7 +132,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
if (!M.get()) return true;
outs() << "Linking in input file: '" << Filenames[i] << "'\n";
- if (Linker::LinkModules(Program, M.get()))
+ if (Linker::linkModules(*Program, std::move(M)))
return true;
}
diff --git a/contrib/llvm/tools/bugpoint/BugDriver.h b/contrib/llvm/tools/bugpoint/BugDriver.h
index 5797812..20efff3 100644
--- a/contrib/llvm/tools/bugpoint/BugDriver.h
+++ b/contrib/llvm/tools/bugpoint/BugDriver.h
@@ -36,7 +36,7 @@ class LLVMContext;
class DebugCrashes;
-class GCC;
+class CC;
extern bool DisableSimplifyCFG;
@@ -52,7 +52,7 @@ class BugDriver {
std::vector<std::string> PassesToRun;
AbstractInterpreter *Interpreter; // How to run the program
AbstractInterpreter *SafeInterpreter; // To generate reference output, etc.
- GCC *gcc;
+ CC *cc;
bool run_find_bugs;
unsigned Timeout;
unsigned MemoryLimit;
@@ -321,16 +321,21 @@ void PrintFunctionList(const std::vector<Function*> &Funcs);
///
void PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs);
+// DeleteGlobalInitializer - "Remove" the global variable by deleting its
+// initializer, making it external.
+//
+void DeleteGlobalInitializer(GlobalVariable *GV);
+
// DeleteFunctionBody - "Remove" the function by deleting all of it's basic
// blocks, making it external.
//
void DeleteFunctionBody(Function *F);
-/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
-/// module, split the functions OUT of the specified module, and place them in
-/// the new module.
-Module *SplitFunctionsOutOfModule(Module *M, const std::vector<Function*> &F,
- ValueToValueMapTy &VMap);
+/// Given a module and a list of functions in the module, split the functions
+/// OUT of the specified module, and place them in the new module.
+std::unique_ptr<Module>
+SplitFunctionsOutOfModule(Module *M, const std::vector<Function *> &F,
+ ValueToValueMapTy &VMap);
} // End llvm namespace
diff --git a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
index e2aaf6b..6cdc43ab 100644
--- a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -15,6 +15,7 @@
#include "ListReducer.h"
#include "ToolRunner.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -49,6 +50,10 @@ namespace {
DontReducePassList("disable-pass-list-reduction",
cl::desc("Skip pass list reduction steps"),
cl::init(false));
+
+ cl::opt<bool> NoNamedMDRM("disable-namedmd-remove",
+ cl::desc("Do not remove global named metadata"),
+ cl::init(false));
}
namespace llvm {
@@ -138,7 +143,7 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
std::vector<GlobalVariable*> &GVs) {
// Clone the program to try hacking it apart...
ValueToValueMapTy VMap;
- Module *M = CloneModule(BD.getProgram(), VMap);
+ Module *M = CloneModule(BD.getProgram(), VMap).release();
// Convert list to set for fast lookup...
std::set<GlobalVariable*> GVSet;
@@ -155,11 +160,10 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
// Loop over and delete any global variables which we aren't supposed to be
// playing with...
- for (Module::global_iterator I = M->global_begin(), E = M->global_end();
- I != E; ++I)
- if (I->hasInitializer() && !GVSet.count(I)) {
- I->setInitializer(nullptr);
- I->setLinkage(GlobalValue::ExternalLinkage);
+ for (GlobalVariable &I : M->globals())
+ if (I.hasInitializer() && !GVSet.count(&I)) {
+ DeleteGlobalInitializer(&I);
+ I.setLinkage(GlobalValue::ExternalLinkage);
}
// Try running the hacked up program...
@@ -235,7 +239,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
// Clone the program to try hacking it apart...
ValueToValueMapTy VMap;
- Module *M = CloneModule(BD.getProgram(), VMap);
+ Module *M = CloneModule(BD.getProgram(), VMap).release();
// Convert list to set for fast lookup...
std::set<Function*> Functions;
@@ -253,9 +257,9 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
if (!ReplaceFuncsWithNull) {
// Loop over and delete any functions which we aren't supposed to be playing
// with...
- for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
- if (!I->isDeclaration() && !Functions.count(I))
- DeleteFunctionBody(I);
+ for (Function &I : *M)
+ if (!I.isDeclaration() && !Functions.count(&I))
+ DeleteFunctionBody(&I);
} else {
std::vector<GlobalValue*> ToRemove;
// First, remove aliases to functions we're about to purge.
@@ -280,12 +284,12 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
ToRemove.push_back(&Alias);
}
- for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
- if (!I->isDeclaration() && !Functions.count(I)) {
- PointerType *Ty = cast<PointerType>(I->getType());
+ for (Function &I : *M) {
+ if (!I.isDeclaration() && !Functions.count(&I)) {
+ PointerType *Ty = cast<PointerType>(I.getType());
Constant *Replacement = ConstantPointerNull::get(Ty);
- I->replaceAllUsesWith(Replacement);
- ToRemove.push_back(I);
+ I.replaceAllUsesWith(Replacement);
+ ToRemove.push_back(&I);
}
}
@@ -342,7 +346,7 @@ namespace {
bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
// Clone the program to try hacking it apart...
ValueToValueMapTy VMap;
- Module *M = CloneModule(BD.getProgram(), VMap);
+ Module *M = CloneModule(BD.getProgram(), VMap).release();
// Convert list to set for fast lookup...
SmallPtrSet<BasicBlock*, 8> Blocks;
@@ -361,20 +365,22 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
// Loop over and delete any hack up any blocks that are not listed...
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
- if (!Blocks.count(BB) && BB->getTerminator()->getNumSuccessors()) {
+ if (!Blocks.count(&*BB) && BB->getTerminator()->getNumSuccessors()) {
// Loop over all of the successors of this block, deleting any PHI nodes
// that might include it.
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
- (*SI)->removePredecessor(BB);
+ for (succ_iterator SI = succ_begin(&*BB), E = succ_end(&*BB); SI != E;
+ ++SI)
+ (*SI)->removePredecessor(&*BB);
TerminatorInst *BBTerm = BB->getTerminator();
-
- if (!BB->getTerminator()->getType()->isVoidTy())
+ if (BBTerm->isEHPad())
+ continue;
+ if (!BBTerm->getType()->isVoidTy() && !BBTerm->getType()->isTokenTy())
BBTerm->replaceAllUsesWith(Constant::getNullValue(BBTerm->getType()));
// Replace the old terminator instruction.
BB->getInstList().pop_back();
- new UnreachableInst(BB->getContext(), BB);
+ new UnreachableInst(BB->getContext(), &*BB);
}
// The CFG Simplifier pass may delete one of the basic blocks we are
@@ -450,7 +456,7 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
&Insts) {
// Clone the program to try hacking it apart...
ValueToValueMapTy VMap;
- Module *M = CloneModule(BD.getProgram(), VMap);
+ Module *M = CloneModule(BD.getProgram(), VMap).release();
// Convert list to set for fast lookup...
SmallPtrSet<Instruction*, 64> Instructions;
@@ -468,10 +474,10 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst) &&
- !isa<LandingPadInst>(Inst)) {
- if (!Inst->getType()->isVoidTy())
+ !Inst->isEHPad()) {
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
Inst->eraseFromParent();
}
@@ -497,6 +503,149 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
return false;
}
+namespace {
+// Reduce the list of Named Metadata nodes. We keep this as a list of
+// names to avoid having to convert back and forth every time.
+class ReduceCrashingNamedMD : public ListReducer<std::string> {
+ BugDriver &BD;
+ bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+ ReduceCrashingNamedMD(BugDriver &bd,
+ bool (*testFn)(const BugDriver &, Module *))
+ : BD(bd), TestFn(testFn) {}
+
+ TestResult doTest(std::vector<std::string> &Prefix,
+ std::vector<std::string> &Kept,
+ std::string &Error) override {
+ if (!Kept.empty() && TestNamedMDs(Kept))
+ return KeepSuffix;
+ if (!Prefix.empty() && TestNamedMDs(Prefix))
+ return KeepPrefix;
+ return NoFailure;
+ }
+
+ bool TestNamedMDs(std::vector<std::string> &NamedMDs);
+};
+}
+
+bool ReduceCrashingNamedMD::TestNamedMDs(std::vector<std::string> &NamedMDs) {
+
+ ValueToValueMapTy VMap;
+ Module *M = CloneModule(BD.getProgram(), VMap).release();
+
+ outs() << "Checking for crash with only these named metadata nodes:";
+ unsigned NumPrint = std::min<size_t>(NamedMDs.size(), 10);
+ for (unsigned i = 0, e = NumPrint; i != e; ++i)
+ outs() << " " << NamedMDs[i];
+ if (NumPrint < NamedMDs.size())
+ outs() << "... <" << NamedMDs.size() << " total>";
+ outs() << ": ";
+
+ // Make a StringMap for faster lookup
+ StringSet<> Names;
+ for (const std::string &Name : NamedMDs)
+ Names.insert(Name);
+
+ // First collect all the metadata to delete in a vector, then
+ // delete them all at once to avoid invalidating the iterator
+ std::vector<NamedMDNode *> ToDelete;
+ ToDelete.reserve(M->named_metadata_size() - Names.size());
+ for (auto &NamedMD : M->named_metadata())
+ if (!Names.count(NamedMD.getName()))
+ ToDelete.push_back(&NamedMD);
+
+ for (auto *NamedMD : ToDelete)
+ NamedMD->eraseFromParent();
+
+ // Verify that this is still valid.
+ legacy::PassManager Passes;
+ Passes.add(createVerifierPass());
+ Passes.run(*M);
+
+ // Try running on the hacked up program...
+ if (TestFn(BD, M)) {
+ BD.setNewProgram(M); // It crashed, keep the trimmed version...
+ return true;
+ }
+ delete M; // It didn't crash, try something else.
+ return false;
+}
+
+namespace {
+// Reduce the list of operands to named metadata nodes
+class ReduceCrashingNamedMDOps : public ListReducer<const MDNode *> {
+ BugDriver &BD;
+ bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+ ReduceCrashingNamedMDOps(BugDriver &bd,
+ bool (*testFn)(const BugDriver &, Module *))
+ : BD(bd), TestFn(testFn) {}
+
+ TestResult doTest(std::vector<const MDNode *> &Prefix,
+ std::vector<const MDNode *> &Kept,
+ std::string &Error) override {
+ if (!Kept.empty() && TestNamedMDOps(Kept))
+ return KeepSuffix;
+ if (!Prefix.empty() && TestNamedMDOps(Prefix))
+ return KeepPrefix;
+ return NoFailure;
+ }
+
+ bool TestNamedMDOps(std::vector<const MDNode *> &NamedMDOps);
+};
+}
+
+bool ReduceCrashingNamedMDOps::TestNamedMDOps(
+ std::vector<const MDNode *> &NamedMDOps) {
+ // Convert list to set for fast lookup...
+ SmallPtrSet<const MDNode *, 64> OldMDNodeOps;
+ for (unsigned i = 0, e = NamedMDOps.size(); i != e; ++i) {
+ OldMDNodeOps.insert(NamedMDOps[i]);
+ }
+
+ outs() << "Checking for crash with only " << OldMDNodeOps.size();
+ if (OldMDNodeOps.size() == 1)
+ outs() << " named metadata operand: ";
+ else
+ outs() << " named metadata operands: ";
+
+ ValueToValueMapTy VMap;
+ Module *M = CloneModule(BD.getProgram(), VMap).release();
+
+ // This is a little wasteful. In the future it might be good if we could have
+ // these dropped during cloning.
+ for (auto &NamedMD : BD.getProgram()->named_metadata()) {
+ // Drop the old one and create a new one
+ M->eraseNamedMetadata(M->getNamedMetadata(NamedMD.getName()));
+ NamedMDNode *NewNamedMDNode =
+ M->getOrInsertNamedMetadata(NamedMD.getName());
+ for (MDNode *op : NamedMD.operands())
+ if (OldMDNodeOps.count(op))
+ NewNamedMDNode->addOperand(cast<MDNode>(MapMetadata(op, VMap)));
+ }
+
+ // Verify that this is still valid.
+ legacy::PassManager Passes;
+ Passes.add(createVerifierPass());
+ Passes.run(*M);
+
+ // Try running on the hacked up program...
+ if (TestFn(BD, M)) {
+ // Make sure to use instruction pointers that point into the now-current
+ // module, and that they don't include any deleted blocks.
+ NamedMDOps.clear();
+ for (const MDNode *Node : OldMDNodeOps)
+ NamedMDOps.push_back(cast<MDNode>(VMap.MD()[Node].get()));
+
+ BD.setNewProgram(M); // It crashed, keep the trimmed version...
+ return true;
+ }
+ delete M; // It didn't crash, try something else.
+ return false;
+}
+
/// DebugACrash - Given a predicate that determines whether a component crashes
/// on a program, try to destructively reduce the program while still keeping
/// the predicate true.
@@ -509,13 +658,13 @@ static bool DebugACrash(BugDriver &BD,
BD.getProgram()->global_begin() != BD.getProgram()->global_end()) {
// Now try to reduce the number of global variable initializers in the
// module to something small.
- Module *M = CloneModule(BD.getProgram());
+ Module *M = CloneModule(BD.getProgram()).release();
bool DeletedInit = false;
for (Module::global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I)
if (I->hasInitializer()) {
- I->setInitializer(nullptr);
+ DeleteGlobalInitializer(&*I);
I->setLinkage(GlobalValue::ExternalLinkage);
DeletedInit = true;
}
@@ -538,7 +687,7 @@ static bool DebugACrash(BugDriver &BD,
for (Module::global_iterator I = BD.getProgram()->global_begin(),
E = BD.getProgram()->global_end(); I != E; ++I)
if (I->hasInitializer())
- GVs.push_back(I);
+ GVs.push_back(&*I);
if (GVs.size() > 1 && !BugpointIsInterrupted) {
outs() << "\n*** Attempting to reduce the number of global "
@@ -558,10 +707,9 @@ static bool DebugACrash(BugDriver &BD,
// Now try to reduce the number of functions in the module to something small.
std::vector<Function*> Functions;
- for (Module::iterator I = BD.getProgram()->begin(),
- E = BD.getProgram()->end(); I != E; ++I)
- if (!I->isDeclaration())
- Functions.push_back(I);
+ for (Function &F : *BD.getProgram())
+ if (!F.isDeclaration())
+ Functions.push_back(&F);
if (Functions.size() > 1 && !BugpointIsInterrupted) {
outs() << "\n*** Attempting to reduce the number of functions "
@@ -581,10 +729,9 @@ static bool DebugACrash(BugDriver &BD,
//
if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
std::vector<const BasicBlock*> Blocks;
- for (Module::const_iterator I = BD.getProgram()->begin(),
- E = BD.getProgram()->end(); I != E; ++I)
- for (Function::const_iterator FI = I->begin(), E = I->end(); FI !=E; ++FI)
- Blocks.push_back(FI);
+ for (Function &F : *BD.getProgram())
+ for (BasicBlock &BB : F)
+ Blocks.push_back(&BB);
unsigned OldSize = Blocks.size();
ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks, Error);
if (Blocks.size() < OldSize)
@@ -595,14 +742,11 @@ static bool DebugACrash(BugDriver &BD,
// cases with large basic blocks where the problem is at one end.
if (!BugpointIsInterrupted) {
std::vector<const Instruction*> Insts;
- for (Module::const_iterator MI = BD.getProgram()->begin(),
- ME = BD.getProgram()->end(); MI != ME; ++MI)
- for (Function::const_iterator FI = MI->begin(), FE = MI->end(); FI != FE;
- ++FI)
- for (BasicBlock::const_iterator I = FI->begin(), E = FI->end();
- I != E; ++I)
- if (!isa<TerminatorInst>(I))
- Insts.push_back(I);
+ for (const Function &F : *BD.getProgram())
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ if (!isa<TerminatorInst>(&I))
+ Insts.push_back(&I);
ReduceCrashingInstructions(BD, TestFn).reduceList(Insts, Error);
}
@@ -642,12 +786,12 @@ static bool DebugACrash(BugDriver &BD,
} else {
if (BugpointIsInterrupted) goto ExitLoops;
- if (isa<LandingPadInst>(I))
+ if (I->isEHPad() || I->getType()->isTokenTy())
continue;
outs() << "Checking instruction: " << *I;
std::unique_ptr<Module> M =
- BD.deleteInstructionFromProgram(I, Simplification);
+ BD.deleteInstructionFromProgram(&*I, Simplification);
// Find out if the pass still crashes on this pass...
if (TestFn(BD, M.get())) {
@@ -666,12 +810,37 @@ static bool DebugACrash(BugDriver &BD,
}
} while (Simplification);
+
+ if (!NoNamedMDRM) {
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-instructions");
+
+ if (!BugpointIsInterrupted) {
+ // Try to reduce the amount of global metadata (particularly debug info),
+ // by dropping global named metadata that anchors them
+ outs() << "\n*** Attempting to remove named metadata: ";
+ std::vector<std::string> NamedMDNames;
+ for (auto &NamedMD : BD.getProgram()->named_metadata())
+ NamedMDNames.push_back(NamedMD.getName().str());
+ ReduceCrashingNamedMD(BD, TestFn).reduceList(NamedMDNames, Error);
+ }
+
+ if (!BugpointIsInterrupted) {
+ // Now that we quickly dropped all the named metadata that doesn't
+ // contribute to the crash, bisect the operands of the remaining ones
+ std::vector<const MDNode *> NamedMDOps;
+ for (auto &NamedMD : BD.getProgram()->named_metadata())
+ for (auto op : NamedMD.operands())
+ NamedMDOps.push_back(op);
+ ReduceCrashingNamedMDOps(BD, TestFn).reduceList(NamedMDOps, Error);
+ }
+ }
+
ExitLoops:
// Try to clean up the testcase by running funcresolve and globaldce...
if (!BugpointIsInterrupted) {
outs() << "\n*** Attempting to perform final cleanups: ";
- Module *M = CloneModule(BD.getProgram());
+ Module *M = CloneModule(BD.getProgram()).release();
M = BD.performFinalCleanups(M, true).release();
// Find out if the pass still crashes on the cleaned up program...
diff --git a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
index 25813b3..41b8ccc 100644
--- a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -17,6 +17,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Program.h"
#include "llvm/Support/SystemUtils.h"
#include "llvm/Support/raw_ostream.h"
#include <fstream>
@@ -124,11 +125,10 @@ namespace {
cl::ZeroOrMore, cl::PositionalEatsArgs);
cl::opt<std::string>
- GCCBinary("gcc", cl::init("gcc"),
- cl::desc("The gcc binary to use. (default 'gcc')"));
+ CCBinary("gcc", cl::init(""), cl::desc("The gcc binary to use."));
cl::list<std::string>
- GCCToolArgv("gcc-tool-args", cl::Positional,
+ CCToolArgv("gcc-tool-args", cl::Positional,
cl::desc("<gcc-tool arguments>..."),
cl::ZeroOrMore, cl::PositionalEatsArgs);
}
@@ -148,6 +148,13 @@ bool BugDriver::initializeExecutionEnvironment() {
SafeInterpreter = nullptr;
std::string Message;
+ if (CCBinary.empty()) {
+ if (sys::findProgramByName("clang"))
+ CCBinary = "clang";
+ else
+ CCBinary = "gcc";
+ }
+
switch (InterpreterSel) {
case AutoPick:
if (!Interpreter) {
@@ -158,8 +165,8 @@ bool BugDriver::initializeExecutionEnvironment() {
if (!Interpreter) {
InterpreterSel = RunLLC;
Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
- GCCBinary, &ToolArgv,
- &GCCToolArgv);
+ CCBinary, &ToolArgv,
+ &CCToolArgv);
}
if (!Interpreter) {
InterpreterSel = RunLLI;
@@ -179,8 +186,8 @@ bool BugDriver::initializeExecutionEnvironment() {
case RunLLCIA:
case LLC_Safe:
Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
- GCCBinary, &ToolArgv,
- &GCCToolArgv,
+ CCBinary, &ToolArgv,
+ &CCToolArgv,
InterpreterSel == RunLLCIA);
break;
case RunJIT:
@@ -213,9 +220,9 @@ bool BugDriver::initializeExecutionEnvironment() {
SafeInterpreterSel = RunLLC;
SafeToolArgs.push_back("--relocation-model=pic");
SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
- GCCBinary,
+ CCBinary,
&SafeToolArgs,
- &GCCToolArgv);
+ &CCToolArgv);
}
if (!SafeInterpreter &&
@@ -224,9 +231,9 @@ bool BugDriver::initializeExecutionEnvironment() {
SafeInterpreterSel = RunLLC;
SafeToolArgs.push_back("--relocation-model=pic");
SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
- GCCBinary,
+ CCBinary,
&SafeToolArgs,
- &GCCToolArgv);
+ &CCToolArgv);
}
if (!SafeInterpreter) {
SafeInterpreterSel = AutoPick;
@@ -237,8 +244,8 @@ bool BugDriver::initializeExecutionEnvironment() {
case RunLLCIA:
SafeToolArgs.push_back("--relocation-model=pic");
SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
- GCCBinary, &SafeToolArgs,
- &GCCToolArgv,
+ CCBinary, &SafeToolArgs,
+ &CCToolArgv,
SafeInterpreterSel == RunLLCIA);
break;
case Custom:
@@ -252,8 +259,8 @@ bool BugDriver::initializeExecutionEnvironment() {
}
if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
- gcc = GCC::create(Message, GCCBinary, &GCCToolArgv);
- if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); }
+ cc = CC::create(Message, CCBinary, &CCToolArgv);
+ if (!cc) { outs() << Message << "\nExiting.\n"; exit(1); }
// If there was an error creating the selected interpreter, quit with error.
return Interpreter == nullptr;
@@ -388,13 +395,13 @@ std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
std::string OutputFile;
// Using the known-good backend.
- GCC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
+ CC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
Error);
if (!Error.empty())
return "";
std::string SharedObjectFile;
- bool Failure = gcc->MakeSharedObject(OutputFile, FT, SharedObjectFile,
+ bool Failure = cc->MakeSharedObject(OutputFile, FT, SharedObjectFile,
AdditionalLinkerArgs, Error);
if (!Error.empty())
return "";
diff --git a/contrib/llvm/tools/bugpoint/ExtractFunction.cpp b/contrib/llvm/tools/bugpoint/ExtractFunction.cpp
index 238cbbc..fe0ab69 100644
--- a/contrib/llvm/tools/bugpoint/ExtractFunction.cpp
+++ b/contrib/llvm/tools/bugpoint/ExtractFunction.cpp
@@ -86,7 +86,7 @@ std::unique_ptr<Module>
BugDriver::deleteInstructionFromProgram(const Instruction *I,
unsigned Simplification) {
// FIXME, use vmap?
- Module *Clone = CloneModule(Program);
+ Module *Clone = CloneModule(Program).release();
const BasicBlock *PBB = I->getParent();
const Function *PF = PBB->getParent();
@@ -100,7 +100,7 @@ BugDriver::deleteInstructionFromProgram(const Instruction *I,
BasicBlock::iterator RI = RBI->begin(); // Get iterator to corresponding inst
std::advance(RI, std::distance(PBB->begin(), BasicBlock::const_iterator(I)));
- Instruction *TheInst = RI; // Got the corresponding instruction!
+ Instruction *TheInst = &*RI; // Got the corresponding instruction!
// If this instruction produces a value, replace any users with null values
if (!TheInst->getType()->isVoidTy())
@@ -179,11 +179,43 @@ std::unique_ptr<Module> BugDriver::extractLoop(Module *M) {
return NewM;
}
+static void eliminateAliases(GlobalValue *GV) {
+ // First, check whether a GlobalAlias references this definition.
+ // GlobalAlias MAY NOT reference declarations.
+ for (;;) {
+ // 1. Find aliases
+ SmallVector<GlobalAlias*,1> aliases;
+ Module *M = GV->getParent();
+ for (Module::alias_iterator I=M->alias_begin(), E=M->alias_end(); I!=E; ++I)
+ if (I->getAliasee()->stripPointerCasts() == GV)
+ aliases.push_back(&*I);
+ if (aliases.empty())
+ break;
+ // 2. Resolve aliases
+ for (unsigned i=0, e=aliases.size(); i<e; ++i) {
+ aliases[i]->replaceAllUsesWith(aliases[i]->getAliasee());
+ aliases[i]->eraseFromParent();
+ }
+ // 3. Repeat until no more aliases found; there might
+ // be an alias to an alias...
+ }
+}
+
+//
+// DeleteGlobalInitializer - "Remove" the global variable by deleting its initializer,
+// making it external.
+//
+void llvm::DeleteGlobalInitializer(GlobalVariable *GV) {
+ eliminateAliases(GV);
+ GV->setInitializer(nullptr);
+}
// DeleteFunctionBody - "Remove" the function by deleting all of its basic
// blocks, making it external.
//
void llvm::DeleteFunctionBody(Function *F) {
+ eliminateAliases(F);
+
// delete the body of the function...
F->deleteBody();
assert(F->isDeclaration() && "This didn't make the function external!");
@@ -271,13 +303,8 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
}
}
-
-/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
-/// module, split the functions OUT of the specified module, and place them in
-/// the new module.
-Module *
-llvm::SplitFunctionsOutOfModule(Module *M,
- const std::vector<Function*> &F,
+std::unique_ptr<Module>
+llvm::SplitFunctionsOutOfModule(Module *M, const std::vector<Function *> &F,
ValueToValueMapTy &VMap) {
// Make sure functions & globals are all external so that linkage
// between the two modules will work.
@@ -291,7 +318,7 @@ llvm::SplitFunctionsOutOfModule(Module *M,
}
ValueToValueMapTy NewVMap;
- Module *New = CloneModule(M, NewVMap);
+ std::unique_ptr<Module> New = CloneModule(M, NewVMap);
// Remove the Test functions from the Safe module
std::set<Function *> TestFunctions;
@@ -306,16 +333,14 @@ llvm::SplitFunctionsOutOfModule(Module *M,
// Remove the Safe functions from the Test module
- for (Module::iterator I = New->begin(), E = New->end(); I != E; ++I)
- if (!TestFunctions.count(I))
- DeleteFunctionBody(I);
-
+ for (Function &I : *New)
+ if (!TestFunctions.count(&I))
+ DeleteFunctionBody(&I);
// Try to split the global initializers evenly
- for (Module::global_iterator I = M->global_begin(), E = M->global_end();
- I != E; ++I) {
- GlobalVariable *GV = cast<GlobalVariable>(NewVMap[I]);
- if (Function *TestFn = globalInitUsesExternalBA(I)) {
+ for (GlobalVariable &I : M->globals()) {
+ GlobalVariable *GV = cast<GlobalVariable>(NewVMap[&I]);
+ if (Function *TestFn = globalInitUsesExternalBA(&I)) {
if (Function *SafeFn = globalInitUsesExternalBA(GV)) {
errs() << "*** Error: when reducing functions, encountered "
"the global '";
@@ -325,18 +350,18 @@ llvm::SplitFunctionsOutOfModule(Module *M,
<< "' and from test function '" << TestFn->getName() << "'.\n";
exit(1);
}
- I->setInitializer(nullptr); // Delete the initializer to make it external
+ DeleteGlobalInitializer(&I); // Delete the initializer to make it external
} else {
// If we keep it in the safe module, then delete it in the test module
- GV->setInitializer(nullptr);
+ DeleteGlobalInitializer(GV);
}
}
// Make sure that there is a global ctor/dtor array in both halves of the
// module if they both have static ctor/dtor functions.
- SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
- SplitStaticCtorDtor("llvm.global_dtors", M, New, NewVMap);
-
+ SplitStaticCtorDtor("llvm.global_ctors", M, New.get(), NewVMap);
+ SplitStaticCtorDtor("llvm.global_dtors", M, New.get(), NewVMap);
+
return New;
}
diff --git a/contrib/llvm/tools/bugpoint/ListReducer.h b/contrib/llvm/tools/bugpoint/ListReducer.h
index a0bb570..f08bc97 100644
--- a/contrib/llvm/tools/bugpoint/ListReducer.h
+++ b/contrib/llvm/tools/bugpoint/ListReducer.h
@@ -75,6 +75,11 @@ struct ListReducer {
// Maximal number of allowed splitting iterations,
// before the elements are randomly shuffled.
const unsigned MaxIterationsWithoutProgress = 3;
+
+ // Maximal number of allowed single-element trim iterations. We add a
+ // threshhold here as single-element reductions may otherwise take a
+ // very long time to complete.
+ const unsigned MaxTrimIterationsWithoutBackJump = 3;
bool ShufflingEnabled = true;
Backjump:
@@ -157,6 +162,7 @@ Backjump:
if (TheList.size() > 2) {
bool Changed = true;
std::vector<ElTy> EmptyList;
+ unsigned TrimIterations = 0;
while (Changed) { // Trimming loop.
Changed = false;
@@ -186,9 +192,9 @@ Backjump:
if (!Error.empty())
return true;
}
- // This can take a long time if left uncontrolled. For now, don't
- // iterate.
- break;
+ if (TrimIterations >= MaxTrimIterationsWithoutBackJump)
+ break;
+ TrimIterations++;
}
}
diff --git a/contrib/llvm/tools/bugpoint/Miscompilation.cpp b/contrib/llvm/tools/bugpoint/Miscompilation.cpp
index fad1636..16919f5 100644
--- a/contrib/llvm/tools/bugpoint/Miscompilation.cpp
+++ b/contrib/llvm/tools/bugpoint/Miscompilation.cpp
@@ -176,12 +176,15 @@ ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
namespace {
class ReduceMiscompilingFunctions : public ListReducer<Function*> {
BugDriver &BD;
- bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
+ bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>, std::string &);
+
public:
ReduceMiscompilingFunctions(BugDriver &bd,
- bool (*F)(BugDriver &, Module *, Module *,
+ bool (*F)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>,
std::string &))
- : BD(bd), TestFn(F) {}
+ : BD(bd), TestFn(F) {}
TestResult doTest(std::vector<Function*> &Prefix,
std::vector<Function*> &Suffix,
@@ -207,32 +210,24 @@ namespace {
};
}
-/// TestMergedProgram - Given two modules, link them together and run the
-/// program, checking to see if the program matches the diff. If there is
-/// an error, return NULL. If not, return the merged module. The Broken argument
-/// will be set to true if the output is different. If the DeleteInputs
-/// argument is set to true then this function deletes both input
-/// modules before it returns.
+/// Given two modules, link them together and run the program, checking to see
+/// if the program matches the diff. If there is an error, return NULL. If not,
+/// return the merged module. The Broken argument will be set to true if the
+/// output is different. If the DeleteInputs argument is set to true then this
+/// function deletes both input modules before it returns.
///
-static Module *TestMergedProgram(const BugDriver &BD, Module *M1, Module *M2,
- bool DeleteInputs, std::string &Error,
- bool &Broken) {
- // Link the two portions of the program back to together.
- if (!DeleteInputs) {
- M1 = CloneModule(M1);
- M2 = CloneModule(M2);
- }
- if (Linker::LinkModules(M1, M2))
+static std::unique_ptr<Module> testMergedProgram(const BugDriver &BD,
+ std::unique_ptr<Module> M1,
+ std::unique_ptr<Module> M2,
+ std::string &Error,
+ bool &Broken) {
+ if (Linker::linkModules(*M1, std::move(M2)))
exit(1);
- delete M2; // We are done with this module.
// Execute the program.
- Broken = BD.diffProgram(M1, "", "", false, &Error);
- if (!Error.empty()) {
- // Delete the linked module
- delete M1;
+ Broken = BD.diffProgram(M1.get(), "", "", false, &Error);
+ if (!Error.empty())
return nullptr;
- }
return M1;
}
@@ -259,7 +254,7 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
// we can conclude that a function triggers the bug when in fact one
// needs a larger set of original functions to do so.
ValueToValueMapTy VMap;
- Module *Clone = CloneModule(BD.getProgram(), VMap);
+ Module *Clone = CloneModule(BD.getProgram(), VMap).release();
Module *Orig = BD.swapProgramIn(Clone);
std::vector<Function*> FuncsOnClone;
@@ -270,12 +265,12 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
// Split the module into the two halves of the program we want.
VMap.clear();
- Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
- Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, FuncsOnClone,
- VMap);
+ std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+ std::unique_ptr<Module> ToOptimize =
+ SplitFunctionsOutOfModule(ToNotOptimize.get(), FuncsOnClone, VMap);
- // Run the predicate, note that the predicate will delete both input modules.
- bool Broken = TestFn(BD, ToOptimize, ToNotOptimize, Error);
+ bool Broken =
+ TestFn(BD, std::move(ToOptimize), std::move(ToNotOptimize), Error);
delete BD.swapProgramIn(Orig);
@@ -294,29 +289,29 @@ static void DisambiguateGlobalSymbols(Module *M) {
I->setName("anon_fn");
}
-/// ExtractLoops - Given a reduced list of functions that still exposed the bug,
-/// check to see if we can extract the loops in the region without obscuring the
-/// bug. If so, it reduces the amount of code identified.
+/// Given a reduced list of functions that still exposed the bug, check to see
+/// if we can extract the loops in the region without obscuring the bug. If so,
+/// it reduces the amount of code identified.
///
static bool ExtractLoops(BugDriver &BD,
- bool (*TestFn)(BugDriver &, Module *, Module *,
- std::string &),
- std::vector<Function*> &MiscompiledFunctions,
+ bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>, std::string &),
+ std::vector<Function *> &MiscompiledFunctions,
std::string &Error) {
bool MadeChange = false;
while (1) {
if (BugpointIsInterrupted) return MadeChange;
ValueToValueMapTy VMap;
- Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
- Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
- MiscompiledFunctions,
- VMap);
- Module *ToOptimizeLoopExtracted = BD.extractLoop(ToOptimize).release();
+ std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+ Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize.get(),
+ MiscompiledFunctions, VMap)
+ .release();
+ std::unique_ptr<Module> ToOptimizeLoopExtracted =
+ BD.extractLoop(ToOptimize);
if (!ToOptimizeLoopExtracted) {
// If the loop extractor crashed or if there were no extractible loops,
// then this chapter of our odyssey is over with.
- delete ToNotOptimize;
delete ToOptimize;
return MadeChange;
}
@@ -330,13 +325,14 @@ static bool ExtractLoops(BugDriver &BD,
// extraction.
AbstractInterpreter *AI = BD.switchToSafeInterpreter();
bool Failure;
- Module *New = TestMergedProgram(BD, ToOptimizeLoopExtracted,
- ToNotOptimize, false, Error, Failure);
+ std::unique_ptr<Module> New =
+ testMergedProgram(BD, std::move(ToOptimizeLoopExtracted),
+ std::move(ToNotOptimize), Error, Failure);
if (!New)
return false;
// Delete the original and set the new program.
- Module *Old = BD.swapProgramIn(New);
+ Module *Old = BD.swapProgramIn(New.release());
for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
MiscompiledFunctions[i] = cast<Function>(VMap[MiscompiledFunctions[i]]);
delete Old;
@@ -350,16 +346,15 @@ static bool ExtractLoops(BugDriver &BD,
errs() << " Continuing on with un-loop-extracted version.\n";
BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-tno.bc",
- ToNotOptimize);
+ ToNotOptimize.get());
BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to.bc",
ToOptimize);
BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to-le.bc",
- ToOptimizeLoopExtracted);
+ ToOptimizeLoopExtracted.get());
errs() << "Please submit the "
<< OutputPrefix << "-loop-extract-fail-*.bc files.\n";
delete ToOptimize;
- delete ToNotOptimize;
return MadeChange;
}
delete ToOptimize;
@@ -367,18 +362,20 @@ static bool ExtractLoops(BugDriver &BD,
outs() << " Testing after loop extraction:\n";
// Clone modules, the tester function will free them.
- Module *TOLEBackup = CloneModule(ToOptimizeLoopExtracted, VMap);
- Module *TNOBackup = CloneModule(ToNotOptimize, VMap);
+ std::unique_ptr<Module> TOLEBackup =
+ CloneModule(ToOptimizeLoopExtracted.get(), VMap);
+ std::unique_ptr<Module> TNOBackup = CloneModule(ToNotOptimize.get(), VMap);
for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
MiscompiledFunctions[i] = cast<Function>(VMap[MiscompiledFunctions[i]]);
- Failure = TestFn(BD, ToOptimizeLoopExtracted, ToNotOptimize, Error);
+ Failure = TestFn(BD, std::move(ToOptimizeLoopExtracted),
+ std::move(ToNotOptimize), Error);
if (!Error.empty())
return false;
- ToOptimizeLoopExtracted = TOLEBackup;
- ToNotOptimize = TNOBackup;
+ ToOptimizeLoopExtracted = std::move(TOLEBackup);
+ ToNotOptimize = std::move(TNOBackup);
if (!Failure) {
outs() << "*** Loop extraction masked the problem. Undoing.\n";
@@ -390,7 +387,8 @@ static bool ExtractLoops(BugDriver &BD,
MisCompFunctions.emplace_back(F->getName(), F->getFunctionType());
}
- if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted))
+ if (Linker::linkModules(*ToNotOptimize,
+ std::move(ToOptimizeLoopExtracted)))
exit(1);
MiscompiledFunctions.clear();
@@ -401,8 +399,7 @@ static bool ExtractLoops(BugDriver &BD,
MiscompiledFunctions.push_back(NewF);
}
- delete ToOptimizeLoopExtracted;
- BD.setNewProgram(ToNotOptimize);
+ BD.setNewProgram(ToNotOptimize.release());
return MadeChange;
}
@@ -418,11 +415,9 @@ static bool ExtractLoops(BugDriver &BD,
// extraction both didn't break the program, and didn't mask the problem.
// Replace the current program with the loop extracted version, and try to
// extract another loop.
- if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted))
+ if (Linker::linkModules(*ToNotOptimize, std::move(ToOptimizeLoopExtracted)))
exit(1);
- delete ToOptimizeLoopExtracted;
-
// All of the Function*'s in the MiscompiledFunctions list are in the old
// module. Update this list to include all of the functions in the
// optimized and loop extracted module.
@@ -434,7 +429,7 @@ static bool ExtractLoops(BugDriver &BD,
MiscompiledFunctions.push_back(NewF);
}
- BD.setNewProgram(ToNotOptimize);
+ BD.setNewProgram(ToNotOptimize.release());
MadeChange = true;
}
}
@@ -442,14 +437,15 @@ static bool ExtractLoops(BugDriver &BD,
namespace {
class ReduceMiscompiledBlocks : public ListReducer<BasicBlock*> {
BugDriver &BD;
- bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
+ bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>, std::string &);
std::vector<Function*> FunctionsBeingTested;
public:
ReduceMiscompiledBlocks(BugDriver &bd,
- bool (*F)(BugDriver &, Module *, Module *,
- std::string &),
- const std::vector<Function*> &Fns)
- : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
+ bool (*F)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>, std::string &),
+ const std::vector<Function *> &Fns)
+ : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
TestResult doTest(std::vector<BasicBlock*> &Prefix,
std::vector<BasicBlock*> &Suffix,
@@ -495,7 +491,7 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
// Split the module into the two halves of the program we want.
ValueToValueMapTy VMap;
- Module *Clone = CloneModule(BD.getProgram(), VMap);
+ Module *Clone = CloneModule(BD.getProgram(), VMap).release();
Module *Orig = BD.swapProgramIn(Clone);
std::vector<Function*> FuncsOnClone;
std::vector<BasicBlock*> BBsOnClone;
@@ -509,45 +505,37 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
}
VMap.clear();
- Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
- Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
- FuncsOnClone,
- VMap);
+ std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+ std::unique_ptr<Module> ToOptimize =
+ SplitFunctionsOutOfModule(ToNotOptimize.get(), FuncsOnClone, VMap);
// Try the extraction. If it doesn't work, then the block extractor crashed
// or something, in which case bugpoint can't chase down this possibility.
if (std::unique_ptr<Module> New =
- BD.extractMappedBlocksFromModule(BBsOnClone, ToOptimize)) {
- delete ToOptimize;
- // Run the predicate,
- // note that the predicate will delete both input modules.
- bool Ret = TestFn(BD, New.get(), ToNotOptimize, Error);
+ BD.extractMappedBlocksFromModule(BBsOnClone, ToOptimize.get())) {
+ bool Ret = TestFn(BD, std::move(New), std::move(ToNotOptimize), Error);
delete BD.swapProgramIn(Orig);
return Ret;
}
delete BD.swapProgramIn(Orig);
- delete ToOptimize;
- delete ToNotOptimize;
return false;
}
-
-/// ExtractBlocks - Given a reduced list of functions that still expose the bug,
-/// extract as many basic blocks from the region as possible without obscuring
-/// the bug.
+/// Given a reduced list of functions that still expose the bug, extract as many
+/// basic blocks from the region as possible without obscuring the bug.
///
static bool ExtractBlocks(BugDriver &BD,
- bool (*TestFn)(BugDriver &, Module *, Module *,
+ bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>,
std::string &),
- std::vector<Function*> &MiscompiledFunctions,
+ std::vector<Function *> &MiscompiledFunctions,
std::string &Error) {
if (BugpointIsInterrupted) return false;
std::vector<BasicBlock*> Blocks;
for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
- for (Function::iterator I = MiscompiledFunctions[i]->begin(),
- E = MiscompiledFunctions[i]->end(); I != E; ++I)
- Blocks.push_back(I);
+ for (BasicBlock &BB : *MiscompiledFunctions[i])
+ Blocks.push_back(&BB);
// Use the list reducer to identify blocks that can be extracted without
// obscuring the bug. The Blocks list will end up containing blocks that must
@@ -571,10 +559,10 @@ static bool ExtractBlocks(BugDriver &BD,
}
ValueToValueMapTy VMap;
- Module *ProgClone = CloneModule(BD.getProgram(), VMap);
- Module *ToExtract = SplitFunctionsOutOfModule(ProgClone,
- MiscompiledFunctions,
- VMap);
+ Module *ProgClone = CloneModule(BD.getProgram(), VMap).release();
+ Module *ToExtract =
+ SplitFunctionsOutOfModule(ProgClone, MiscompiledFunctions, VMap)
+ .release();
std::unique_ptr<Module> Extracted =
BD.extractMappedBlocksFromModule(Blocks, ToExtract);
if (!Extracted) {
@@ -595,7 +583,7 @@ static bool ExtractBlocks(BugDriver &BD,
if (!I->isDeclaration())
MisCompFunctions.emplace_back(I->getName(), I->getFunctionType());
- if (Linker::LinkModules(ProgClone, Extracted.get()))
+ if (Linker::linkModules(*ProgClone, std::move(Extracted)))
exit(1);
// Set the new program and delete the old one.
@@ -613,14 +601,13 @@ static bool ExtractBlocks(BugDriver &BD,
return true;
}
-
-/// DebugAMiscompilation - This is a generic driver to narrow down
-/// miscompilations, either in an optimization or a code generator.
+/// This is a generic driver to narrow down miscompilations, either in an
+/// optimization or a code generator.
///
-static std::vector<Function*>
+static std::vector<Function *>
DebugAMiscompilation(BugDriver &BD,
- bool (*TestFn)(BugDriver &, Module *, Module *,
- std::string &),
+ bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+ std::unique_ptr<Module>, std::string &),
std::string &Error) {
// Okay, now that we have reduced the list of passes which are causing the
// failure, see if we can pin down which functions are being
@@ -628,9 +615,9 @@ DebugAMiscompilation(BugDriver &BD,
// the program.
std::vector<Function*> MiscompiledFunctions;
Module *Prog = BD.getProgram();
- for (Module::iterator I = Prog->begin(), E = Prog->end(); I != E; ++I)
- if (!I->isDeclaration())
- MiscompiledFunctions.push_back(I);
+ for (Function &F : *Prog)
+ if (!F.isDeclaration())
+ MiscompiledFunctions.push_back(&F);
// Do the reduction...
if (!BugpointIsInterrupted)
@@ -699,28 +686,28 @@ DebugAMiscompilation(BugDriver &BD,
return MiscompiledFunctions;
}
-/// TestOptimizer - This is the predicate function used to check to see if the
-/// "Test" portion of the program is misoptimized. If so, return true. In any
-/// case, both module arguments are deleted.
+/// This is the predicate function used to check to see if the "Test" portion of
+/// the program is misoptimized. If so, return true. In any case, both module
+/// arguments are deleted.
///
-static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe,
- std::string &Error) {
+static bool TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
+ std::unique_ptr<Module> Safe, std::string &Error) {
// Run the optimization passes on ToOptimize, producing a transformed version
// of the functions being tested.
outs() << " Optimizing functions being tested: ";
- std::unique_ptr<Module> Optimized = BD.runPassesOn(Test, BD.getPassesToRun(),
- /*AutoDebugCrashes*/ true);
+ std::unique_ptr<Module> Optimized =
+ BD.runPassesOn(Test.get(), BD.getPassesToRun(),
+ /*AutoDebugCrashes*/ true);
outs() << "done.\n";
- delete Test;
outs() << " Checking to see if the merged program executes correctly: ";
bool Broken;
- Module *New =
- TestMergedProgram(BD, Optimized.get(), Safe, true, Error, Broken);
+ std::unique_ptr<Module> New = testMergedProgram(
+ BD, std::move(Optimized), std::move(Safe), Error, Broken);
if (New) {
outs() << (Broken ? " nope.\n" : " yup.\n");
// Delete the original and set the new program.
- delete BD.swapProgramIn(New);
+ delete BD.swapProgramIn(New.release());
}
return Broken;
}
@@ -753,10 +740,10 @@ void BugDriver::debugMiscompilation(std::string *Error) {
// Output a bunch of bitcode files for the user...
outs() << "Outputting reduced bitcode files which expose the problem:\n";
ValueToValueMapTy VMap;
- Module *ToNotOptimize = CloneModule(getProgram(), VMap);
- Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
- MiscompiledFunctions,
- VMap);
+ Module *ToNotOptimize = CloneModule(getProgram(), VMap).release();
+ Module *ToOptimize =
+ SplitFunctionsOutOfModule(ToNotOptimize, MiscompiledFunctions, VMap)
+ .release();
outs() << " Non-optimized portion: ";
EmitProgressBitcode(ToNotOptimize, "tonotoptimize", true);
@@ -769,13 +756,13 @@ void BugDriver::debugMiscompilation(std::string *Error) {
return;
}
-/// CleanupAndPrepareModules - Get the specified modules ready for code
-/// generator testing.
+/// Get the specified modules ready for code generator testing.
///
-static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
+static void CleanupAndPrepareModules(BugDriver &BD,
+ std::unique_ptr<Module> &Test,
Module *Safe) {
// Clean up the modules, removing extra cruft that we don't need anymore...
- Test = BD.performFinalCleanups(Test).release();
+ Test = BD.performFinalCleanups(Test.get());
// If we are executing the JIT, we have several nasty issues to take care of.
if (!BD.isExecutingJIT()) return;
@@ -788,21 +775,21 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
// Rename it
oldMain->setName("llvm_bugpoint_old_main");
// Create a NEW `main' function with same type in the test module.
- Function *newMain = Function::Create(oldMain->getFunctionType(),
- GlobalValue::ExternalLinkage,
- "main", Test);
+ Function *newMain =
+ Function::Create(oldMain->getFunctionType(),
+ GlobalValue::ExternalLinkage, "main", Test.get());
// Create an `oldmain' prototype in the test module, which will
// corresponds to the real main function in the same module.
Function *oldMainProto = Function::Create(oldMain->getFunctionType(),
GlobalValue::ExternalLinkage,
- oldMain->getName(), Test);
+ oldMain->getName(), Test.get());
// Set up and remember the argument list for the main function.
std::vector<Value*> args;
for (Function::arg_iterator
I = newMain->arg_begin(), E = newMain->arg_end(),
OI = oldMain->arg_begin(); I != E; ++I, ++OI) {
I->setName(OI->getName()); // Copy argument names from oldMain
- args.push_back(I);
+ args.push_back(&*I);
}
// Call the old main function and return its result
@@ -905,9 +892,8 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
// Save the argument list.
std::vector<Value*> Args;
- for (Function::arg_iterator i = FuncWrapper->arg_begin(),
- e = FuncWrapper->arg_end(); i != e; ++i)
- Args.push_back(i);
+ for (Argument &A : FuncWrapper->args())
+ Args.push_back(&A);
// Pass on the arguments to the real function, return its result
if (F->getReturnType()->isVoidTy()) {
@@ -932,15 +918,14 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
}
}
-
-
-/// TestCodeGenerator - This is the predicate function used to check to see if
-/// the "Test" portion of the program is miscompiled by the code generator under
-/// test. If so, return true. In any case, both module arguments are deleted.
+/// This is the predicate function used to check to see if the "Test" portion of
+/// the program is miscompiled by the code generator under test. If so, return
+/// true. In any case, both module arguments are deleted.
///
-static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
+static bool TestCodeGenerator(BugDriver &BD, std::unique_ptr<Module> Test,
+ std::unique_ptr<Module> Safe,
std::string &Error) {
- CleanupAndPrepareModules(BD, Test, Safe);
+ CleanupAndPrepareModules(BD, Test, Safe.get());
SmallString<128> TestModuleBC;
int TestModuleFD;
@@ -951,12 +936,11 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
<< EC.message() << "\n";
exit(1);
}
- if (BD.writeProgramToFile(TestModuleBC.str(), TestModuleFD, Test)) {
+ if (BD.writeProgramToFile(TestModuleBC.str(), TestModuleFD, Test.get())) {
errs() << "Error writing bitcode to `" << TestModuleBC.str()
<< "'\nExiting.";
exit(1);
}
- delete Test;
FileRemover TestModuleBCRemover(TestModuleBC.str(), !SaveTemps);
@@ -971,7 +955,7 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
exit(1);
}
- if (BD.writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, Safe)) {
+ if (BD.writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, Safe.get())) {
errs() << "Error writing bitcode to `" << SafeModuleBC
<< "'\nExiting.";
exit(1);
@@ -982,7 +966,6 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
if (!Error.empty())
return false;
- delete Safe;
FileRemover SharedObjectRemover(SharedObject, !SaveTemps);
@@ -1030,11 +1013,12 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
// Split the module into the two halves of the program we want.
ValueToValueMapTy VMap;
- Module *ToNotCodeGen = CloneModule(getProgram(), VMap);
- Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, VMap);
+ std::unique_ptr<Module> ToNotCodeGen = CloneModule(getProgram(), VMap);
+ std::unique_ptr<Module> ToCodeGen =
+ SplitFunctionsOutOfModule(ToNotCodeGen.get(), Funcs, VMap);
// Condition the modules
- CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen);
+ CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen.get());
SmallString<128> TestModuleBC;
int TestModuleFD;
@@ -1046,12 +1030,11 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
exit(1);
}
- if (writeProgramToFile(TestModuleBC.str(), TestModuleFD, ToCodeGen)) {
+ if (writeProgramToFile(TestModuleBC.str(), TestModuleFD, ToCodeGen.get())) {
errs() << "Error writing bitcode to `" << TestModuleBC
<< "'\nExiting.";
exit(1);
}
- delete ToCodeGen;
// Make the shared library
SmallString<128> SafeModuleBC;
@@ -1064,7 +1047,8 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
exit(1);
}
- if (writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, ToNotCodeGen)) {
+ if (writeProgramToFile(SafeModuleBC.str(), SafeModuleFD,
+ ToNotCodeGen.get())) {
errs() << "Error writing bitcode to `" << SafeModuleBC
<< "'\nExiting.";
exit(1);
@@ -1072,7 +1056,6 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
std::string SharedObject = compileSharedObject(SafeModuleBC.str(), *Error);
if (!Error->empty())
return true;
- delete ToNotCodeGen;
outs() << "You can reproduce the problem with the command line: \n";
if (isExecutingJIT()) {
@@ -1080,7 +1063,7 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
} else {
outs() << " llc " << TestModuleBC << " -o " << TestModuleBC
<< ".s\n";
- outs() << " gcc " << SharedObject << " " << TestModuleBC.str()
+ outs() << " cc " << SharedObject << " " << TestModuleBC.str()
<< ".s -o " << TestModuleBC << ".exe";
#if defined (HAVE_LINK_R)
outs() << " -Wl,-R.";
@@ -1093,7 +1076,7 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
outs() << '\n';
outs() << "The shared object was created with:\n llc -march=c "
<< SafeModuleBC.str() << " -o temporary.c\n"
- << " gcc -xc temporary.c -O2 -o " << SharedObject;
+ << " cc -xc temporary.c -O2 -o " << SharedObject;
if (TargetTriple.getArch() == Triple::sparc)
outs() << " -G"; // Compile a shared library, `-G' for Sparc
else
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.cpp b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
index 51091e2..2ccd649 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
@@ -64,16 +64,6 @@ static int RunProgramWithTimeout(StringRef ProgramPath,
unsigned MemoryLimit = 0,
std::string *ErrMsg = nullptr) {
const StringRef *Redirects[3] = { &StdInFile, &StdOutFile, &StdErrFile };
-
-#if 0 // For debug purposes
- {
- errs() << "RUN:";
- for (unsigned i = 0; Args[i]; ++i)
- errs() << " " << Args[i];
- errs() << "\n";
- }
-#endif
-
return sys::ExecuteAndWait(ProgramPath, Args, nullptr, Redirects,
NumSeconds, MemoryLimit, ErrMsg);
}
@@ -93,15 +83,6 @@ static int RunProgramRemotelyWithTimeout(StringRef RemoteClientPath,
unsigned MemoryLimit = 0) {
const StringRef *Redirects[3] = { &StdInFile, &StdOutFile, &StdErrFile };
-#if 0 // For debug purposes
- {
- errs() << "RUN:";
- for (unsigned i = 0; Args[i]; ++i)
- errs() << " " << Args[i];
- errs() << "\n";
- }
-#endif
-
// Run the program remotely with the remote client
int ReturnCode = sys::ExecuteAndWait(RemoteClientPath, Args, nullptr,
Redirects, NumSeconds, MemoryLimit);
@@ -152,7 +133,7 @@ static std::string ProcessFailure(StringRef ProgPath, const char** Args,
ErrorFilename.str(), Timeout, MemoryLimit);
// FIXME: check return code ?
- // Print out the error messages generated by GCC if possible...
+ // Print out the error messages generated by CC if possible...
std::ifstream ErrorFile(ErrorFilename.c_str());
if (ErrorFile) {
std::copy(std::istreambuf_iterator<char>(ErrorFile),
@@ -184,7 +165,7 @@ namespace {
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs,
+ const std::vector<std::string> &CCArgs,
const std::vector<std::string> &SharedLibs =
std::vector<std::string>(),
unsigned Timeout = 0,
@@ -197,7 +178,7 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs,
+ const std::vector<std::string> &CCArgs,
const std::vector<std::string> &SharedLibs,
unsigned Timeout,
unsigned MemoryLimit) {
@@ -305,7 +286,7 @@ namespace {
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs =
+ const std::vector<std::string> &CCArgs =
std::vector<std::string>(),
const std::vector<std::string> &SharedLibs =
std::vector<std::string>(),
@@ -361,7 +342,7 @@ namespace {
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs,
+ const std::vector<std::string> &CCArgs,
const std::vector<std::string> &SharedLibs =
std::vector<std::string>(),
unsigned Timeout = 0,
@@ -374,7 +355,7 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs,
+ const std::vector<std::string> &CCArgs,
const std::vector<std::string> &SharedLibs,
unsigned Timeout,
unsigned MemoryLimit) {
@@ -473,7 +454,7 @@ AbstractInterpreter *AbstractInterpreter::createCustomExecutor(
//===----------------------------------------------------------------------===//
// LLC Implementation of AbstractIntepreter interface
//
-GCC::FileType LLC::OutputCode(const std::string &Bitcode,
+CC::FileType LLC::OutputCode(const std::string &Bitcode,
std::string &OutputAsmFile, std::string &Error,
unsigned Timeout, unsigned MemoryLimit) {
const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
@@ -514,7 +495,7 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
Timeout, MemoryLimit))
Error = ProcessFailure(LLCPath, &LLCArgs[0],
Timeout, MemoryLimit);
- return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;
+ return UseIntegratedAssembler ? CC::ObjectFile : CC::AsmFile;
}
void LLC::compileProgram(const std::string &Bitcode, std::string *Error,
@@ -529,22 +510,22 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &ArgsForGCC,
+ const std::vector<std::string> &ArgsForCC,
const std::vector<std::string> &SharedLibs,
unsigned Timeout,
unsigned MemoryLimit) {
std::string OutputAsmFile;
- GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
+ CC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
MemoryLimit);
FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
- std::vector<std::string> GCCArgs(ArgsForGCC);
- GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
+ std::vector<std::string> CCArgs(ArgsForCC);
+ CCArgs.insert(CCArgs.end(), SharedLibs.begin(), SharedLibs.end());
- // Assuming LLC worked, compile the result with GCC and run it.
- return gcc->ExecuteProgram(OutputAsmFile, Args, FileKind,
- InputFile, OutputFile, Error, GCCArgs,
+ // Assuming LLC worked, compile the result with CC and run it.
+ return cc->ExecuteProgram(OutputAsmFile, Args, FileKind,
+ InputFile, OutputFile, Error, CCArgs,
Timeout, MemoryLimit);
}
@@ -552,9 +533,9 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
///
LLC *AbstractInterpreter::createLLC(const char *Argv0,
std::string &Message,
- const std::string &GCCBinary,
+ const std::string &CCBinary,
const std::vector<std::string> *Args,
- const std::vector<std::string> *GCCArgs,
+ const std::vector<std::string> *CCArgs,
bool UseIntegratedAssembler) {
std::string LLCPath =
PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t) & createLLC);
@@ -563,13 +544,13 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0,
return nullptr;
}
- GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
- if (!gcc) {
+ CC *cc = CC::create(Message, CCBinary, CCArgs);
+ if (!cc) {
errs() << Message << "\n";
exit(1);
}
Message = "Found llc: " + LLCPath + "\n";
- return new LLC(LLCPath, gcc, Args, UseIntegratedAssembler);
+ return new LLC(LLCPath, cc, Args, UseIntegratedAssembler);
}
//===---------------------------------------------------------------------===//
@@ -591,7 +572,7 @@ namespace {
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs =
+ const std::vector<std::string> &CCArgs =
std::vector<std::string>(),
const std::vector<std::string> &SharedLibs =
std::vector<std::string>(),
@@ -605,7 +586,7 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs,
+ const std::vector<std::string> &CCArgs,
const std::vector<std::string> &SharedLibs,
unsigned Timeout,
unsigned MemoryLimit) {
@@ -656,7 +637,7 @@ AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
}
//===---------------------------------------------------------------------===//
-// GCC abstraction
+// CC abstraction
//
static bool IsARMArchitecture(std::vector<const char*> Args) {
@@ -672,82 +653,82 @@ static bool IsARMArchitecture(std::vector<const char*> Args) {
return false;
}
-int GCC::ExecuteProgram(const std::string &ProgramFile,
+int CC::ExecuteProgram(const std::string &ProgramFile,
const std::vector<std::string> &Args,
FileType fileType,
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &ArgsForGCC,
+ const std::vector<std::string> &ArgsForCC,
unsigned Timeout,
unsigned MemoryLimit) {
- std::vector<const char*> GCCArgs;
+ std::vector<const char*> CCArgs;
- GCCArgs.push_back(GCCPath.c_str());
+ CCArgs.push_back(CCPath.c_str());
if (TargetTriple.getArch() == Triple::x86)
- GCCArgs.push_back("-m32");
+ CCArgs.push_back("-m32");
for (std::vector<std::string>::const_iterator
- I = gccArgs.begin(), E = gccArgs.end(); I != E; ++I)
- GCCArgs.push_back(I->c_str());
+ I = ccArgs.begin(), E = ccArgs.end(); I != E; ++I)
+ CCArgs.push_back(I->c_str());
// Specify -x explicitly in case the extension is wonky
if (fileType != ObjectFile) {
- GCCArgs.push_back("-x");
+ CCArgs.push_back("-x");
if (fileType == CFile) {
- GCCArgs.push_back("c");
- GCCArgs.push_back("-fno-strict-aliasing");
+ CCArgs.push_back("c");
+ CCArgs.push_back("-fno-strict-aliasing");
} else {
- GCCArgs.push_back("assembler");
+ CCArgs.push_back("assembler");
// For ARM architectures we don't want this flag. bugpoint isn't
// explicitly told what architecture it is working on, so we get
- // it from gcc flags
- if (TargetTriple.isOSDarwin() && !IsARMArchitecture(GCCArgs))
- GCCArgs.push_back("-force_cpusubtype_ALL");
+ // it from cc flags
+ if (TargetTriple.isOSDarwin() && !IsARMArchitecture(CCArgs))
+ CCArgs.push_back("-force_cpusubtype_ALL");
}
}
- GCCArgs.push_back(ProgramFile.c_str()); // Specify the input filename.
+ CCArgs.push_back(ProgramFile.c_str()); // Specify the input filename.
- GCCArgs.push_back("-x");
- GCCArgs.push_back("none");
- GCCArgs.push_back("-o");
+ CCArgs.push_back("-x");
+ CCArgs.push_back("none");
+ CCArgs.push_back("-o");
SmallString<128> OutputBinary;
std::error_code EC =
- sys::fs::createUniqueFile(ProgramFile + "-%%%%%%%.gcc.exe", OutputBinary);
+ sys::fs::createUniqueFile(ProgramFile + "-%%%%%%%.cc.exe", OutputBinary);
if (EC) {
errs() << "Error making unique filename: " << EC.message() << "\n";
exit(1);
}
- GCCArgs.push_back(OutputBinary.c_str()); // Output to the right file...
+ CCArgs.push_back(OutputBinary.c_str()); // Output to the right file...
- // Add any arguments intended for GCC. We locate them here because this is
+ // Add any arguments intended for CC. We locate them here because this is
// most likely -L and -l options that need to come before other libraries but
// after the source. Other options won't be sensitive to placement on the
// command line, so this should be safe.
- for (unsigned i = 0, e = ArgsForGCC.size(); i != e; ++i)
- GCCArgs.push_back(ArgsForGCC[i].c_str());
+ for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i)
+ CCArgs.push_back(ArgsForCC[i].c_str());
- GCCArgs.push_back("-lm"); // Hard-code the math library...
- GCCArgs.push_back("-O2"); // Optimize the program a bit...
+ CCArgs.push_back("-lm"); // Hard-code the math library...
+ CCArgs.push_back("-O2"); // Optimize the program a bit...
#if defined (HAVE_LINK_R)
- GCCArgs.push_back("-Wl,-R."); // Search this dir for .so files
+ CCArgs.push_back("-Wl,-R."); // Search this dir for .so files
#endif
if (TargetTriple.getArch() == Triple::sparc)
- GCCArgs.push_back("-mcpu=v9");
- GCCArgs.push_back(nullptr); // NULL terminator
+ CCArgs.push_back("-mcpu=v9");
+ CCArgs.push_back(nullptr); // NULL terminator
- outs() << "<gcc>"; outs().flush();
+ outs() << "<CC>"; outs().flush();
DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
- errs() << " " << GCCArgs[i];
+ for (unsigned i = 0, e = CCArgs.size()-1; i != e; ++i)
+ errs() << " " << CCArgs[i];
errs() << "\n";
);
- if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], "", "", "")) {
- *Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+ if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) {
+ *Error = ProcessFailure(CCPath, &CCArgs[0]);
return -1;
}
@@ -821,9 +802,9 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
}
}
-int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
+int CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
std::string &OutputFile,
- const std::vector<std::string> &ArgsForGCC,
+ const std::vector<std::string> &ArgsForCC,
std::string &Error) {
SmallString<128> UniqueFilename;
std::error_code EC = sys::fs::createUniqueFile(
@@ -834,84 +815,84 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
}
OutputFile = UniqueFilename.str();
- std::vector<const char*> GCCArgs;
+ std::vector<const char*> CCArgs;
- GCCArgs.push_back(GCCPath.c_str());
+ CCArgs.push_back(CCPath.c_str());
if (TargetTriple.getArch() == Triple::x86)
- GCCArgs.push_back("-m32");
+ CCArgs.push_back("-m32");
for (std::vector<std::string>::const_iterator
- I = gccArgs.begin(), E = gccArgs.end(); I != E; ++I)
- GCCArgs.push_back(I->c_str());
+ I = ccArgs.begin(), E = ccArgs.end(); I != E; ++I)
+ CCArgs.push_back(I->c_str());
// Compile the C/asm file into a shared object
if (fileType != ObjectFile) {
- GCCArgs.push_back("-x");
- GCCArgs.push_back(fileType == AsmFile ? "assembler" : "c");
+ CCArgs.push_back("-x");
+ CCArgs.push_back(fileType == AsmFile ? "assembler" : "c");
}
- GCCArgs.push_back("-fno-strict-aliasing");
- GCCArgs.push_back(InputFile.c_str()); // Specify the input filename.
- GCCArgs.push_back("-x");
- GCCArgs.push_back("none");
+ CCArgs.push_back("-fno-strict-aliasing");
+ CCArgs.push_back(InputFile.c_str()); // Specify the input filename.
+ CCArgs.push_back("-x");
+ CCArgs.push_back("none");
if (TargetTriple.getArch() == Triple::sparc)
- GCCArgs.push_back("-G"); // Compile a shared library, `-G' for Sparc
+ CCArgs.push_back("-G"); // Compile a shared library, `-G' for Sparc
else if (TargetTriple.isOSDarwin()) {
// link all source files into a single module in data segment, rather than
// generating blocks. dynamic_lookup requires that you set
// MACOSX_DEPLOYMENT_TARGET=10.3 in your env. FIXME: it would be better for
- // bugpoint to just pass that in the environment of GCC.
- GCCArgs.push_back("-single_module");
- GCCArgs.push_back("-dynamiclib"); // `-dynamiclib' for MacOS X/PowerPC
- GCCArgs.push_back("-undefined");
- GCCArgs.push_back("dynamic_lookup");
+ // bugpoint to just pass that in the environment of CC.
+ CCArgs.push_back("-single_module");
+ CCArgs.push_back("-dynamiclib"); // `-dynamiclib' for MacOS X/PowerPC
+ CCArgs.push_back("-undefined");
+ CCArgs.push_back("dynamic_lookup");
} else
- GCCArgs.push_back("-shared"); // `-shared' for Linux/X86, maybe others
+ CCArgs.push_back("-shared"); // `-shared' for Linux/X86, maybe others
if (TargetTriple.getArch() == Triple::x86_64)
- GCCArgs.push_back("-fPIC"); // Requires shared objs to contain PIC
+ CCArgs.push_back("-fPIC"); // Requires shared objs to contain PIC
if (TargetTriple.getArch() == Triple::sparc)
- GCCArgs.push_back("-mcpu=v9");
+ CCArgs.push_back("-mcpu=v9");
- GCCArgs.push_back("-o");
- GCCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
- GCCArgs.push_back("-O2"); // Optimize the program a bit.
+ CCArgs.push_back("-o");
+ CCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
+ CCArgs.push_back("-O2"); // Optimize the program a bit.
- // Add any arguments intended for GCC. We locate them here because this is
+ // Add any arguments intended for CC. We locate them here because this is
// most likely -L and -l options that need to come before other libraries but
// after the source. Other options won't be sensitive to placement on the
// command line, so this should be safe.
- for (unsigned i = 0, e = ArgsForGCC.size(); i != e; ++i)
- GCCArgs.push_back(ArgsForGCC[i].c_str());
- GCCArgs.push_back(nullptr); // NULL terminator
+ for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i)
+ CCArgs.push_back(ArgsForCC[i].c_str());
+ CCArgs.push_back(nullptr); // NULL terminator
- outs() << "<gcc>"; outs().flush();
+ outs() << "<CC>"; outs().flush();
DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
- errs() << " " << GCCArgs[i];
+ for (unsigned i = 0, e = CCArgs.size()-1; i != e; ++i)
+ errs() << " " << CCArgs[i];
errs() << "\n";
);
- if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], "", "", "")) {
- Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+ if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) {
+ Error = ProcessFailure(CCPath, &CCArgs[0]);
return 1;
}
return 0;
}
-/// create - Try to find the `gcc' executable
+/// create - Try to find the CC executable
///
-GCC *GCC::create(std::string &Message,
- const std::string &GCCBinary,
+CC *CC::create(std::string &Message,
+ const std::string &CCBinary,
const std::vector<std::string> *Args) {
- auto GCCPath = sys::findProgramByName(GCCBinary);
- if (!GCCPath) {
- Message = "Cannot find `" + GCCBinary + "' in PATH: " +
- GCCPath.getError().message() + "\n";
+ auto CCPath = sys::findProgramByName(CCBinary);
+ if (!CCPath) {
+ Message = "Cannot find `" + CCBinary + "' in PATH: " +
+ CCPath.getError().message() + "\n";
return nullptr;
}
@@ -926,6 +907,6 @@ GCC *GCC::create(std::string &Message,
RemoteClientPath = *Path;
}
- Message = "Found gcc: " + *GCCPath + "\n";
- return new GCC(*GCCPath, RemoteClientPath, Args);
+ Message = "Found CC: " + *CCPath + "\n";
+ return new CC(*CCPath, RemoteClientPath, Args);
}
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.h b/contrib/llvm/tools/bugpoint/ToolRunner.h
index 5d67a94..3accd70 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.h
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.h
@@ -33,22 +33,22 @@ extern Triple TargetTriple;
class LLC;
//===---------------------------------------------------------------------===//
-// GCC abstraction
+// CC abstraction
//
-class GCC {
- std::string GCCPath; // The path to the gcc executable.
+class CC {
+ std::string CCPath; // The path to the cc executable.
std::string RemoteClientPath; // The path to the rsh / ssh executable.
- std::vector<std::string> gccArgs; // GCC-specific arguments.
- GCC(StringRef gccPath, StringRef RemotePath,
- const std::vector<std::string> *GCCArgs)
- : GCCPath(gccPath), RemoteClientPath(RemotePath) {
- if (GCCArgs) gccArgs = *GCCArgs;
+ std::vector<std::string> ccArgs; // CC-specific arguments.
+ CC(StringRef ccPath, StringRef RemotePath,
+ const std::vector<std::string> *CCArgs)
+ : CCPath(ccPath), RemoteClientPath(RemotePath) {
+ if (CCArgs) ccArgs = *CCArgs;
}
public:
enum FileType { AsmFile, ObjectFile, CFile };
- static GCC *create(std::string &Message,
- const std::string &GCCBinary,
+ static CC *create(std::string &Message,
+ const std::string &CCBinary,
const std::vector<std::string> *Args);
/// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
@@ -64,7 +64,7 @@ public:
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error = nullptr,
- const std::vector<std::string> &GCCArgs =
+ const std::vector<std::string> &CCArgs =
std::vector<std::string>(),
unsigned Timeout = 0,
unsigned MemoryLimit = 0);
@@ -74,7 +74,7 @@ public:
///
int MakeSharedObject(const std::string &InputFile, FileType fileType,
std::string &OutputFile,
- const std::vector<std::string> &ArgsForGCC,
+ const std::vector<std::string> &ArgsForCC,
std::string &Error);
};
@@ -88,9 +88,9 @@ class AbstractInterpreter {
virtual void anchor();
public:
static LLC *createLLC(const char *Argv0, std::string &Message,
- const std::string &GCCBinary,
+ const std::string &CCBinary,
const std::vector<std::string> *Args = nullptr,
- const std::vector<std::string> *GCCArgs = nullptr,
+ const std::vector<std::string> *CCArgs = nullptr,
bool UseIntegratedAssembler = false);
static AbstractInterpreter*
@@ -119,15 +119,15 @@ public:
unsigned Timeout = 0, unsigned MemoryLimit = 0) {}
/// OutputCode - Compile the specified program from bitcode to code
- /// understood by the GCC driver (either C or asm). If the code generator
+ /// understood by the CC driver (either C or asm). If the code generator
/// fails, it sets Error, otherwise, this function returns the type of code
/// emitted.
- virtual GCC::FileType OutputCode(const std::string &Bitcode,
+ virtual CC::FileType OutputCode(const std::string &Bitcode,
std::string &OutFile, std::string &Error,
unsigned Timeout = 0,
unsigned MemoryLimit = 0) {
Error = "OutputCode not supported by this AbstractInterpreter!";
- return GCC::AsmFile;
+ return CC::AsmFile;
}
/// ExecuteProgram - Run the specified bitcode file, emitting output to the
@@ -140,7 +140,7 @@ public:
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs =
+ const std::vector<std::string> &CCArgs =
std::vector<std::string>(),
const std::vector<std::string> &SharedLibs =
std::vector<std::string>(),
@@ -154,18 +154,18 @@ public:
class LLC : public AbstractInterpreter {
std::string LLCPath; // The path to the LLC executable.
std::vector<std::string> ToolArgs; // Extra args to pass to LLC.
- GCC *gcc;
+ CC *cc;
bool UseIntegratedAssembler;
public:
- LLC(const std::string &llcPath, GCC *Gcc,
+ LLC(const std::string &llcPath, CC *cc,
const std::vector<std::string> *Args,
bool useIntegratedAssembler)
- : LLCPath(llcPath), gcc(Gcc),
+ : LLCPath(llcPath), cc(cc),
UseIntegratedAssembler(useIntegratedAssembler) {
ToolArgs.clear();
if (Args) ToolArgs = *Args;
}
- ~LLC() override { delete gcc; }
+ ~LLC() override { delete cc; }
/// compileProgram - Compile the specified program from bitcode to executable
/// code. This does not produce any output, it is only used when debugging
@@ -178,7 +178,7 @@ public:
const std::string &InputFile,
const std::string &OutputFile,
std::string *Error,
- const std::vector<std::string> &GCCArgs =
+ const std::vector<std::string> &CCArgs =
std::vector<std::string>(),
const std::vector<std::string> &SharedLibs =
std::vector<std::string>(),
@@ -186,10 +186,10 @@ public:
unsigned MemoryLimit = 0) override;
/// OutputCode - Compile the specified program from bitcode to code
- /// understood by the GCC driver (either C or asm). If the code generator
+ /// understood by the CC driver (either C or asm). If the code generator
/// fails, it sets Error, otherwise, this function returns the type of code
/// emitted.
- GCC::FileType OutputCode(const std::string &Bitcode,
+ CC::FileType OutputCode(const std::string &Bitcode,
std::string &OutFile, std::string &Error,
unsigned Timeout = 0,
unsigned MemoryLimit = 0) override;
diff --git a/contrib/llvm/tools/bugpoint/bugpoint.cpp b/contrib/llvm/tools/bugpoint/bugpoint.cpp
index af6d9fc..48f30e6 100644
--- a/contrib/llvm/tools/bugpoint/bugpoint.cpp
+++ b/contrib/llvm/tools/bugpoint/bugpoint.cpp
@@ -126,7 +126,6 @@ int main(int argc, char **argv) {
initializeVectorization(Registry);
initializeIPO(Registry);
initializeAnalysis(Registry);
- initializeIPA(Registry);
initializeTransformUtils(Registry);
initializeInstCombine(Registry);
initializeInstrumentation(Registry);
@@ -181,19 +180,12 @@ int main(int argc, char **argv) {
Builder.Inliner = createFunctionInliningPass(225);
else
Builder.Inliner = createFunctionInliningPass(275);
-
- // Note that although clang/llvm-gcc use two separate passmanagers
- // here, it shouldn't normally make a difference.
Builder.populateFunctionPassManager(PM);
Builder.populateModulePassManager(PM);
}
- for (std::vector<const PassInfo*>::iterator I = PassList.begin(),
- E = PassList.end();
- I != E; ++I) {
- const PassInfo* PI = *I;
+ for (const PassInfo *PI : PassList)
D.addPass(PI->getPassArgument());
- }
// Bugpoint has the ability of generating a plethora of core files, so to
// avoid filling up the disk, we prevent it
diff --git a/contrib/llvm/tools/llc/llc.cpp b/contrib/llvm/tools/llc/llc.cpp
index e33cd79..bffa39f 100644
--- a/contrib/llvm/tools/llc/llc.cpp
+++ b/contrib/llvm/tools/llc/llc.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <memory>
using namespace llvm;
@@ -96,6 +97,12 @@ static cl::opt<bool> AsmVerbose("asm-verbose",
cl::desc("Add comments to directives."),
cl::init(true));
+static cl::opt<bool>
+ CompileTwice("compile-twice", cl::Hidden,
+ cl::desc("Run everything twice, re-using the same pass "
+ "manager and verify the result is the same."),
+ cl::init(false));
+
static int compileModule(char **, LLVMContext &);
static std::unique_ptr<tool_output_file>
@@ -312,8 +319,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
PM.add(new TargetLibraryInfoWrapperPass(TLII));
// Add the target data from the target machine, if it exists, or the module.
- if (const DataLayout *DL = Target->getDataLayout())
- M->setDataLayout(*DL);
+ M->setDataLayout(Target->createDataLayout());
// Override function attributes based on CPUStr, FeaturesStr, and command line
// flags.
@@ -326,10 +332,15 @@ static int compileModule(char **argv, LLVMContext &Context) {
{
raw_pwrite_stream *OS = &Out->os();
- std::unique_ptr<buffer_ostream> BOS;
- if (FileType != TargetMachine::CGFT_AssemblyFile &&
- !Out->os().supportsSeeking()) {
- BOS = make_unique<buffer_ostream>(*OS);
+
+ // Manually do the buffering rather than using buffer_ostream,
+ // so we can memcmp the contents in CompileTwice mode
+ SmallVector<char, 0> Buffer;
+ std::unique_ptr<raw_svector_ostream> BOS;
+ if ((FileType != TargetMachine::CGFT_AssemblyFile &&
+ !Out->os().supportsSeeking()) ||
+ CompileTwice) {
+ BOS = make_unique<raw_svector_ostream>(Buffer);
OS = BOS.get();
}
@@ -379,7 +390,39 @@ static int compileModule(char **argv, LLVMContext &Context) {
// Before executing passes, print the final values of the LLVM options.
cl::PrintOptionValues();
+ // If requested, run the pass manager over the same module again,
+ // to catch any bugs due to persistent state in the passes. Note that
+ // opt has the same functionality, so it may be worth abstracting this out
+ // in the future.
+ SmallVector<char, 0> CompileTwiceBuffer;
+ if (CompileTwice) {
+ std::unique_ptr<Module> M2(llvm::CloneModule(M.get()));
+ PM.run(*M2);
+ CompileTwiceBuffer = Buffer;
+ Buffer.clear();
+ }
+
PM.run(*M);
+
+ // Compare the two outputs and make sure they're the same
+ if (CompileTwice) {
+ if (Buffer.size() != CompileTwiceBuffer.size() ||
+ (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
+ 0)) {
+ errs()
+ << "Running the pass manager twice changed the output.\n"
+ "Writing the result of the second run to the specified output\n"
+ "To generate the one-run comparison binary, just run without\n"
+ "the compile-twice option\n";
+ Out->os() << Buffer;
+ Out->keep();
+ return 1;
+ }
+ }
+
+ if (BOS) {
+ Out->os() << Buffer;
+ }
}
// Declare success.
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.cpp b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
index ae276e6..4235145 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.cpp
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
@@ -38,26 +38,39 @@ namespace {
"Dump modules to the current "
"working directory. (WARNING: "
"will overwrite existing files)."),
- clEnumValEnd));
+ clEnumValEnd),
+ cl::Hidden);
+
+ cl::opt<bool> OrcInlineStubs("orc-lazy-inline-stubs",
+ cl::desc("Try to inline stubs"),
+ cl::init(true), cl::Hidden);
}
-OrcLazyJIT::CallbackManagerBuilder
-OrcLazyJIT::createCallbackManagerBuilder(Triple T) {
+std::unique_ptr<OrcLazyJIT::CompileCallbackMgr>
+OrcLazyJIT::createCompileCallbackMgr(Triple T) {
switch (T.getArch()) {
default: return nullptr;
case Triple::x86_64: {
- typedef orc::JITCompileCallbackManager<IRDumpLayerT,
- orc::OrcX86_64> CCMgrT;
- return [](IRDumpLayerT &IRDumpLayer, RuntimeDyld::MemoryManager &MemMgr,
- LLVMContext &Context) {
- return llvm::make_unique<CCMgrT>(IRDumpLayer, MemMgr, Context, 0,
- 64);
- };
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
+ return llvm::make_unique<CCMgrT>(0);
}
}
}
+OrcLazyJIT::IndirectStubsManagerBuilder
+OrcLazyJIT::createIndirectStubsMgrBuilder(Triple T) {
+ switch (T.getArch()) {
+ default: return nullptr;
+
+ case Triple::x86_64:
+ return [](){
+ return llvm::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
+ };
+ }
+}
+
OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
switch (OrcDumpKind) {
@@ -111,6 +124,12 @@ OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
// Defined in lli.cpp.
CodeGenOpt::Level getOptLevel();
+
+template <typename PtrTy>
+static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+ return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
+}
+
int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
// Add the program's symbols into the JIT's search space.
if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
@@ -123,20 +142,31 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
EngineBuilder EB;
EB.setOptLevel(getOptLevel());
auto TM = std::unique_ptr<TargetMachine>(EB.selectTarget());
- auto &Context = getGlobalContext();
- auto CallbackMgrBuilder =
- OrcLazyJIT::createCallbackManagerBuilder(Triple(TM->getTargetTriple()));
+ auto CompileCallbackMgr =
+ OrcLazyJIT::createCompileCallbackMgr(Triple(TM->getTargetTriple()));
// If we couldn't build the factory function then there must not be a callback
// manager for this target. Bail out.
- if (!CallbackMgrBuilder) {
+ if (!CompileCallbackMgr) {
errs() << "No callback manager available for target '"
<< TM->getTargetTriple().str() << "'.\n";
return 1;
}
+ auto IndirectStubsMgrBuilder =
+ OrcLazyJIT::createIndirectStubsMgrBuilder(Triple(TM->getTargetTriple()));
+
+ // If we couldn't build a stubs-manager-builder for this target then bail out.
+ if (!IndirectStubsMgrBuilder) {
+ errs() << "No indirect stubs manager available for target '"
+ << TM->getTargetTriple().str() << "'.\n";
+ return 1;
+ }
+
// Everything looks good. Build the JIT.
- OrcLazyJIT J(std::move(TM), Context, CallbackMgrBuilder);
+ OrcLazyJIT J(std::move(TM), std::move(CompileCallbackMgr),
+ std::move(IndirectStubsMgrBuilder),
+ OrcInlineStubs);
// Add the module, look up main and run it.
auto MainHandle = J.addModule(std::move(M));
@@ -148,6 +178,6 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
}
typedef int (*MainFnPtr)(int, char*[]);
- auto Main = OrcLazyJIT::fromTargetAddress<MainFnPtr>(MainSym.getAddress());
+ auto Main = fromTargetAddress<MainFnPtr>(MainSym.getAddress());
return Main(ArgC, ArgV);
}
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.h b/contrib/llvm/tools/lli/OrcLazyJIT.h
index fe86adb..bb4da33 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.h
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.h
@@ -23,39 +23,36 @@
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/IR/LLVMContext.h"
namespace llvm {
class OrcLazyJIT {
public:
- typedef orc::JITCompileCallbackManagerBase CompileCallbackMgr;
+ typedef orc::JITCompileCallbackManager CompileCallbackMgr;
typedef orc::ObjectLinkingLayer<> ObjLayerT;
typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
TransformFtor;
typedef orc::IRTransformLayer<CompileLayerT, TransformFtor> IRDumpLayerT;
typedef orc::CompileOnDemandLayer<IRDumpLayerT, CompileCallbackMgr> CODLayerT;
+ typedef CODLayerT::IndirectStubsManagerBuilderT
+ IndirectStubsManagerBuilder;
typedef CODLayerT::ModuleSetHandleT ModuleHandleT;
- typedef std::function<
- std::unique_ptr<CompileCallbackMgr>(IRDumpLayerT&,
- RuntimeDyld::MemoryManager&,
- LLVMContext&)>
- CallbackManagerBuilder;
-
- static CallbackManagerBuilder createCallbackManagerBuilder(Triple T);
-
- OrcLazyJIT(std::unique_ptr<TargetMachine> TM, LLVMContext &Context,
- CallbackManagerBuilder &BuildCallbackMgr)
- : TM(std::move(TM)),
- ObjectLayer(),
- CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
- IRDumpLayer(CompileLayer, createDebugDumper()),
- CCMgr(BuildCallbackMgr(IRDumpLayer, CCMgrMemMgr, Context)),
- CODLayer(IRDumpLayer, *CCMgr, false),
- CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
+ OrcLazyJIT(std::unique_ptr<TargetMachine> TM,
+ std::unique_ptr<CompileCallbackMgr> CCMgr,
+ IndirectStubsManagerBuilder IndirectStubsMgrBuilder,
+ bool InlineStubs)
+ : TM(std::move(TM)), DL(this->TM->createDataLayout()),
+ CCMgr(std::move(CCMgr)),
+ ObjectLayer(),
+ CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
+ IRDumpLayer(CompileLayer, createDebugDumper()),
+ CODLayer(IRDumpLayer, extractSingleFunction, *this->CCMgr,
+ std::move(IndirectStubsMgrBuilder), InlineStubs),
+ CXXRuntimeOverrides(
+ [this](const std::string &S) { return mangle(S); }) {}
~OrcLazyJIT() {
// Run any destructors registered with __cxa_atexit.
@@ -65,15 +62,13 @@ public:
DtorRunner.runViaLayer(CODLayer);
}
- template <typename PtrTy>
- static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
- return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
- }
+ static std::unique_ptr<CompileCallbackMgr> createCompileCallbackMgr(Triple T);
+ static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T);
ModuleHandleT addModule(std::unique_ptr<Module> M) {
// Attach a data-layout if one isn't already present.
if (M->getDataLayout().isDefault())
- M->setDataLayout(*TM->getDataLayout());
+ M->setDataLayout(DL);
// Record the static constructors and destructors. We have to do this before
// we hand over ownership of the module to the JIT.
@@ -136,20 +131,27 @@ private:
std::string MangledName;
{
raw_string_ostream MangledNameStream(MangledName);
- Mangler::getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout());
+ Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
}
return MangledName;
}
+ static std::set<Function*> extractSingleFunction(Function &F) {
+ std::set<Function*> Partition;
+ Partition.insert(&F);
+ return Partition;
+ }
+
static TransformFtor createDebugDumper();
std::unique_ptr<TargetMachine> TM;
+ DataLayout DL;
SectionMemoryManager CCMgrMemMgr;
+ std::unique_ptr<CompileCallbackMgr> CCMgr;
ObjLayerT ObjectLayer;
CompileLayerT CompileLayer;
IRDumpLayerT IRDumpLayer;
- std::unique_ptr<CompileCallbackMgr> CCMgr;
CODLayerT CODLayer;
orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
diff --git a/contrib/llvm/tools/lli/RemoteTarget.cpp b/contrib/llvm/tools/lli/RemoteTarget.cpp
index 850fdc5..95e1511 100644
--- a/contrib/llvm/tools/lli/RemoteTarget.cpp
+++ b/contrib/llvm/tools/lli/RemoteTarget.cpp
@@ -1,4 +1,4 @@
-//===- RemoteTarget.cpp - LLVM Remote process JIT execution --------------===//
+//===- RemoteTarget.cpp - LLVM Remote process JIT execution -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -56,7 +56,7 @@ bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) {
}
bool RemoteTarget::executeCode(uint64_t Address, int &RetVal) {
- int (*fn)(void) = (int(*)(void))Address;
+ int (*fn)() = (int(*)())Address;
RetVal = fn();
return true;
}
diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp
index 057841f..9f71406 100644
--- a/contrib/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm/tools/lli/lli.cpp
@@ -262,8 +262,7 @@ public:
if (!getCacheFilename(ModuleID, CacheName))
return;
if (!CacheDir.empty()) { // Create user-defined cache dir.
- SmallString<128> dir(CacheName);
- sys::path::remove_filename(dir);
+ SmallString<128> dir(sys::path::parent_path(CacheName));
sys::fs::create_directories(Twine(dir));
}
std::error_code EC;
@@ -422,7 +421,7 @@ int main(int argc, char **argv, char * const *envp) {
// If not jitting lazily, load the whole bitcode file eagerly too.
if (NoLazyCompilation) {
- if (std::error_code EC = Mod->materializeAllPermanently()) {
+ if (std::error_code EC = Mod->materializeAll()) {
errs() << argv[0] << ": bitcode didn't read correctly.\n";
errs() << "Reason: " << EC.message() << "\n";
exit(1);
diff --git a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
index 2c9668c..ef5fab6 100644
--- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -81,7 +81,7 @@ static cl::opt<Format>
clEnumValN(GNU, "gnu", "gnu"),
clEnumValN(BSD, "bsd", "bsd"), clEnumValEnd));
-std::string Options;
+static std::string Options;
// Provide additional help output explaining the operations and modifiers of
// llvm-ar. This object instructs the CommandLine library to print the text of
@@ -130,6 +130,7 @@ static bool OnlyUpdate = false; ///< 'u' modifier
static bool Verbose = false; ///< 'v' modifier
static bool Symtab = true; ///< 's' modifier
static bool Deterministic = true; ///< 'D' and 'U' modifiers
+static bool Thin = false; ///< 'T' modifier
// Relative Positional Argument (for insert/move). This variable holds
// the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -252,6 +253,9 @@ static ArchiveOperation parseCommandLine() {
case 'U':
Deterministic = false;
break;
+ case 'T':
+ Thin = true;
+ break;
default:
cl::PrintHelpMessage();
}
@@ -308,18 +312,9 @@ static void doPrint(StringRef Name, const object::Archive::Child &C) {
// Utility function for printing out the file mode when the 't' operation is in
// verbose mode.
static void printMode(unsigned mode) {
- if (mode & 004)
- outs() << "r";
- else
- outs() << "-";
- if (mode & 002)
- outs() << "w";
- else
- outs() << "-";
- if (mode & 001)
- outs() << "x";
- else
- outs() << "-";
+ outs() << ((mode & 004) ? "r" : "-");
+ outs() << ((mode & 002) ? "w" : "-");
+ outs() << ((mode & 001) ? "x" : "-");
}
// Implement the 't' operation. This function prints out just
@@ -334,7 +329,9 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
printMode(Mode & 007);
outs() << ' ' << C.getUID();
outs() << '/' << C.getGID();
- outs() << ' ' << format("%6llu", C.getSize());
+ ErrorOr<uint64_t> Size = C.getSize();
+ failIfError(Size.getError());
+ outs() << ' ' << format("%6llu", Size.get());
outs() << ' ' << C.getLastModified().str();
outs() << ' ';
}
@@ -393,13 +390,14 @@ static bool shouldCreateArchive(ArchiveOperation Op) {
static void performReadOperation(ArchiveOperation Operation,
object::Archive *OldArchive) {
- if (Operation == Extract && OldArchive->isThin()) {
- errs() << "extracting from a thin archive is not supported\n";
- std::exit(1);
- }
+ if (Operation == Extract && OldArchive->isThin())
+ fail("extracting from a thin archive is not supported");
bool Filter = !Members.empty();
- for (const object::Archive::Child &C : OldArchive->children()) {
+ for (auto &ChildOrErr : OldArchive->children()) {
+ failIfError(ChildOrErr.getError());
+ const object::Archive::Child &C = *ChildOrErr;
+
ErrorOr<StringRef> NameOrErr = C.getName();
failIfError(NameOrErr.getError());
StringRef Name = NameOrErr.get();
@@ -432,10 +430,21 @@ static void performReadOperation(ArchiveOperation Operation,
std::exit(1);
}
-template <typename T>
-void addMember(std::vector<NewArchiveIterator> &Members, T I, StringRef Name,
- int Pos = -1) {
- NewArchiveIterator NI(I, Name);
+static void addMember(std::vector<NewArchiveIterator> &Members,
+ StringRef FileName, int Pos = -1) {
+ NewArchiveIterator NI(FileName);
+ if (Pos == -1)
+ Members.push_back(NI);
+ else
+ Members[Pos] = NI;
+}
+
+static void addMember(std::vector<NewArchiveIterator> &Members,
+ const object::Archive::Child &M, StringRef Name,
+ int Pos = -1) {
+ if (Thin && !M.getParent()->isThin())
+ fail("Cannot convert a regular archive to a thin one");
+ NewArchiveIterator NI(M, Name);
if (Pos == -1)
Members.push_back(NI);
else
@@ -451,7 +460,7 @@ enum InsertAction {
};
static InsertAction computeInsertAction(ArchiveOperation Operation,
- object::Archive::child_iterator I,
+ const object::Archive::Child &Member,
StringRef Name,
std::vector<StringRef>::iterator &Pos) {
if (Operation == QuickAppend || Members.empty())
@@ -485,7 +494,7 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
// operation.
sys::fs::file_status Status;
failIfError(sys::fs::status(*MI, Status), *MI);
- if (Status.getLastModificationTime() < I->getLastModified()) {
+ if (Status.getLastModificationTime() < Member.getLastModified()) {
if (PosName.empty())
return IA_AddOldMember;
return IA_MoveOldMember;
@@ -508,7 +517,9 @@ computeNewArchiveMembers(ArchiveOperation Operation,
int InsertPos = -1;
StringRef PosName = sys::path::filename(RelPos);
if (OldArchive) {
- for (auto &Child : OldArchive->children()) {
+ for (auto &ChildOrErr : OldArchive->children()) {
+ failIfError(ChildOrErr.getError());
+ auto &Child = ChildOrErr.get();
int Pos = Ret.size();
ErrorOr<StringRef> NameOrErr = Child.getName();
failIfError(NameOrErr.getError());
@@ -529,7 +540,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
addMember(Ret, Child, Name);
break;
case IA_AddNewMeber:
- addMember(Ret, *MemberI, Name);
+ addMember(Ret, *MemberI);
break;
case IA_Delete:
break;
@@ -537,7 +548,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
addMember(Moved, Child, Name);
break;
case IA_MoveNewMember:
- addMember(Moved, *MemberI, Name);
+ addMember(Moved, *MemberI);
break;
}
if (MemberI != Members.end())
@@ -557,12 +568,10 @@ computeNewArchiveMembers(ArchiveOperation Operation,
assert(unsigned(InsertPos) <= Ret.size());
Ret.insert(Ret.begin() + InsertPos, Moved.begin(), Moved.end());
- Ret.insert(Ret.begin() + InsertPos, Members.size(),
- NewArchiveIterator("", ""));
+ Ret.insert(Ret.begin() + InsertPos, Members.size(), NewArchiveIterator(""));
int Pos = InsertPos;
for (auto &Member : Members) {
- StringRef Name = sys::path::filename(Member);
- addMember(Ret, Member, Name, Pos);
+ addMember(Ret, Member, Pos);
++Pos;
}
@@ -590,15 +599,15 @@ performWriteOperation(ArchiveOperation Operation, object::Archive *OldArchive,
break;
}
if (NewMembersP) {
- std::pair<StringRef, std::error_code> Result =
- writeArchive(ArchiveName, *NewMembersP, Symtab, Kind, Deterministic);
+ std::pair<StringRef, std::error_code> Result = writeArchive(
+ ArchiveName, *NewMembersP, Symtab, Kind, Deterministic, Thin);
failIfError(Result.second, Result.first);
return;
}
std::vector<NewArchiveIterator> NewMembers =
computeNewArchiveMembers(Operation, OldArchive);
auto Result =
- writeArchive(ArchiveName, NewMembers, Symtab, Kind, Deterministic);
+ writeArchive(ArchiveName, NewMembers, Symtab, Kind, Deterministic, Thin);
failIfError(Result.second, Result.first);
}
@@ -644,20 +653,13 @@ static int performOperation(ArchiveOperation Operation,
ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
MemoryBuffer::getFile(ArchiveName, -1, false);
std::error_code EC = Buf.getError();
- if (EC && EC != errc::no_such_file_or_directory) {
- errs() << ToolName << ": error opening '" << ArchiveName
- << "': " << EC.message() << "!\n";
- return 1;
- }
+ if (EC && EC != errc::no_such_file_or_directory)
+ fail("error opening '" + ArchiveName + "': " + EC.message() + "!");
if (!EC) {
object::Archive Archive(Buf.get()->getMemBufferRef(), EC);
-
- if (EC) {
- errs() << ToolName << ": error loading '" << ArchiveName
- << "': " << EC.message() << "!\n";
- return 1;
- }
+ failIfError(EC,
+ "error loading '" + ArchiveName + "': " + EC.message() + "!");
performOperation(Operation, &Archive, NewMembers);
return 0;
}
@@ -713,7 +715,9 @@ static void runMRIScript() {
failIfError(LibOrErr.getError(), "Could not parse library");
Archives.push_back(std::move(*LibOrErr));
object::Archive &Lib = *Archives.back();
- for (auto &Member : Lib.children()) {
+ for (auto &MemberOrErr : Lib.children()) {
+ failIfError(MemberOrErr.getError());
+ auto &Member = MemberOrErr.get();
ErrorOr<StringRef> NameOrErr = Member.getName();
failIfError(NameOrErr.getError());
addMember(NewMembers, Member, *NameOrErr);
@@ -721,7 +725,7 @@ static void runMRIScript() {
break;
}
case MRICommand::AddMod:
- addMember(NewMembers, Rest, sys::path::filename(Rest));
+ addMember(NewMembers, Rest);
break;
case MRICommand::Create:
Create = true;
@@ -784,9 +788,9 @@ int main(int argc, char **argv) {
" This program archives bitcode files into single libraries\n"
);
- if (Stem.find("ar") != StringRef::npos)
- return ar_main();
if (Stem.find("ranlib") != StringRef::npos)
return ranlib_main();
+ if (Stem.find("ar") != StringRef::npos)
+ return ar_main();
fail("Not ranlib, ar or lib!");
}
diff --git a/contrib/llvm/tools/llvm-as/llvm-as.cpp b/contrib/llvm/tools/llvm-as/llvm-as.cpp
index 4455d24..d4e4d8d 100644
--- a/contrib/llvm/tools/llvm-as/llvm-as.cpp
+++ b/contrib/llvm/tools/llvm-as/llvm-as.cpp
@@ -45,6 +45,10 @@ static cl::opt<bool>
DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false));
static cl::opt<bool>
+EmitFunctionSummary("function-summary", cl::desc("Emit function summary index"),
+ cl::init(false));
+
+static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
static cl::opt<bool>
@@ -77,7 +81,8 @@ static void WriteOutputFile(const Module *M) {
}
if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
- WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder);
+ WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
+ EmitFunctionSummary);
// Declare success.
Out->keep();
diff --git a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 7672951..fe68689 100644
--- a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -110,10 +110,16 @@ static const char *GetBlockName(unsigned BlockID,
case bitc::TYPE_BLOCK_ID_NEW: return "TYPE_BLOCK_ID";
case bitc::CONSTANTS_BLOCK_ID: return "CONSTANTS_BLOCK";
case bitc::FUNCTION_BLOCK_ID: return "FUNCTION_BLOCK";
+ case bitc::IDENTIFICATION_BLOCK_ID:
+ return "IDENTIFICATION_BLOCK_ID";
case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB";
case bitc::METADATA_BLOCK_ID: return "METADATA_BLOCK";
+ case bitc::METADATA_KIND_BLOCK_ID: return "METADATA_KIND_BLOCK";
case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
case bitc::USELIST_BLOCK_ID: return "USELIST_BLOCK_ID";
+ case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+ return "FUNCTION_SUMMARY_BLOCK";
+ case bitc::MODULE_STRTAB_BLOCK_ID: return "MODULE_STRTAB_BLOCK";
}
}
@@ -165,6 +171,15 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, ALIAS)
STRINGIFY_CODE(MODULE_CODE, PURGEVALS)
STRINGIFY_CODE(MODULE_CODE, GCNAME)
+ STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
+ STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES)
+ }
+ case bitc::IDENTIFICATION_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return nullptr;
+ STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
+ STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
}
case bitc::PARAMATTR_BLOCK_ID:
switch (CodeID) {
@@ -241,6 +256,9 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
+ STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
+ STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
+ STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
STRINGIFY_CODE(FUNC_CODE, INST_PHI)
STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
@@ -260,6 +278,21 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default: return nullptr;
STRINGIFY_CODE(VST_CODE, ENTRY)
STRINGIFY_CODE(VST_CODE, BBENTRY)
+ STRINGIFY_CODE(VST_CODE, FNENTRY)
+ STRINGIFY_CODE(VST_CODE, COMBINED_FNENTRY)
+ }
+ case bitc::MODULE_STRTAB_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return nullptr;
+ STRINGIFY_CODE(MST_CODE, ENTRY)
+ }
+ case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return nullptr;
+ STRINGIFY_CODE(FS_CODE, PERMODULE_ENTRY)
+ STRINGIFY_CODE(FS_CODE, COMBINED_ENTRY)
}
case bitc::METADATA_ATTACHMENT_ID:
switch(CodeID) {
@@ -271,7 +304,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default:return nullptr;
STRINGIFY_CODE(METADATA, STRING)
STRINGIFY_CODE(METADATA, NAME)
- STRINGIFY_CODE(METADATA, KIND)
+ STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
STRINGIFY_CODE(METADATA, NODE)
STRINGIFY_CODE(METADATA, VALUE)
STRINGIFY_CODE(METADATA, OLD_NODE)
@@ -301,6 +334,12 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
STRINGIFY_CODE(METADATA, MODULE)
}
+ case bitc::METADATA_KIND_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return nullptr;
+ STRINGIFY_CODE(METADATA, KIND)
+ }
case bitc::USELIST_BLOCK_ID:
switch(CodeID) {
default:return nullptr;
@@ -476,14 +515,38 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
GetCodeName(Code, BlockID, *Stream.getBitStreamReader(),
CurStreamType))
outs() << " codeid=" << Code;
- if (Entry.ID != bitc::UNABBREV_RECORD)
+ const BitCodeAbbrev *Abbv = nullptr;
+ if (Entry.ID != bitc::UNABBREV_RECORD) {
+ Abbv = Stream.getAbbrev(Entry.ID);
outs() << " abbrevid=" << Entry.ID;
+ }
for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i];
outs() << "/>";
+ if (Abbv) {
+ for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+ const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+ if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
+ continue;
+ assert(i + 2 == e && "Array op not second to last");
+ std::string Str;
+ bool ArrayIsPrintable = true;
+ for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
+ if (!isprint(static_cast<unsigned char>(Record[j]))) {
+ ArrayIsPrintable = false;
+ break;
+ }
+ Str += (char)Record[j];
+ }
+ if (ArrayIsPrintable)
+ outs() << " record string = '" << Str << "'";
+ break;
+ }
+ }
+
if (Blob.data()) {
outs() << " blob data = ";
if (ShowBinaryBlobs) {
diff --git a/contrib/llvm/tools/llvm-cov/CoverageReport.cpp b/contrib/llvm/tools/llvm-cov/CoverageReport.cpp
index 497c2f8..ed01a2e 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageReport.cpp
+++ b/contrib/llvm/tools/llvm-cov/CoverageReport.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
namespace {
/// \brief Helper struct which prints trimmed and aligned columns.
struct Column {
- enum TrimKind { NoTrim, LeftTrim, RightTrim };
+ enum TrimKind { NoTrim, WidthTrim, LeftTrim, RightTrim };
enum AlignmentKind { LeftAlignment, RightAlignment };
@@ -30,7 +30,7 @@ struct Column {
AlignmentKind Alignment;
Column(StringRef Str, unsigned Width)
- : Str(Str), Width(Width), Trim(NoTrim), Alignment(LeftAlignment) {}
+ : Str(Str), Width(Width), Trim(WidthTrim), Alignment(LeftAlignment) {}
Column &set(TrimKind Value) {
Trim = Value;
@@ -44,6 +44,7 @@ struct Column {
void render(raw_ostream &OS) const;
};
+
raw_ostream &operator<<(raw_ostream &OS, const Column &Value) {
Value.render(OS);
return OS;
@@ -64,6 +65,9 @@ void Column::render(raw_ostream &OS) const {
switch (Trim) {
case NoTrim:
+ OS << Str;
+ break;
+ case WidthTrim:
OS << Str.substr(0, Width);
break;
case LeftTrim:
@@ -84,8 +88,19 @@ static Column column(StringRef Str, unsigned Width, const T &Value) {
return Column(Str, Width).set(Value);
}
-static const unsigned FileReportColumns[] = {25, 10, 8, 8, 10, 10};
-static const unsigned FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
+static size_t FileReportColumns[] = {25, 10, 8, 8, 10, 10};
+static size_t FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
+
+/// \brief Adjust column widths to fit long file paths and function names.
+static void adjustColumnWidths(coverage::CoverageMapping *CM) {
+ for (StringRef Filename : CM->getUniqueSourceFiles()) {
+ FileReportColumns[0] = std::max(FileReportColumns[0], Filename.size());
+ for (const auto &F : CM->getCoveredFunctions(Filename)) {
+ FunctionReportColumns[0] =
+ std::max(FunctionReportColumns[0], F.Name.size());
+ }
+ }
+}
/// \brief Prints a horizontal divider which spans across the given columns.
template <typename T, size_t N>
@@ -108,8 +123,9 @@ static raw_ostream::Colors determineCoveragePercentageColor(const T &Info) {
}
void CoverageReport::render(const FileCoverageSummary &File, raw_ostream &OS) {
- OS << column(File.Name, FileReportColumns[0], Column::LeftTrim)
- << format("%*u", FileReportColumns[1], (unsigned)File.RegionCoverage.NumRegions);
+ OS << column(File.Name, FileReportColumns[0], Column::NoTrim)
+ << format("%*u", FileReportColumns[1],
+ (unsigned)File.RegionCoverage.NumRegions);
Options.colored_ostream(OS, File.RegionCoverage.isFullyCovered()
? raw_ostream::GREEN
: raw_ostream::RED)
@@ -157,6 +173,7 @@ void CoverageReport::render(const FunctionCoverageSummary &Function,
void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
raw_ostream &OS) {
+ adjustColumnWidths(Coverage.get());
bool isFirst = true;
for (StringRef Filename : Files) {
if (isFirst)
@@ -191,6 +208,7 @@ void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
}
void CoverageReport::renderFileReports(raw_ostream &OS) {
+ adjustColumnWidths(Coverage.get());
OS << column("Filename", FileReportColumns[0])
<< column("Regions", FileReportColumns[1], Column::RightAlignment)
<< column("Miss", FileReportColumns[2], Column::RightAlignment)
@@ -200,6 +218,7 @@ void CoverageReport::renderFileReports(raw_ostream &OS) {
<< "\n";
renderDivider(FileReportColumns, OS);
OS << "\n";
+
FileCoverageSummary Totals("TOTAL");
for (StringRef Filename : Coverage->getUniqueSourceFiles()) {
FileCoverageSummary Summary(Filename);
diff --git a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
index 94b55fe..1208fad 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -24,6 +24,7 @@ struct CoverageViewOptions {
bool ShowLineStatsOrRegionMarkers;
bool ShowExpandedRegions;
bool ShowFunctionInstantiations;
+ bool ShowFullFilenames;
/// \brief Change the output's stream color if the colors are enabled.
ColoredRawOstream colored_ostream(raw_ostream &OS,
diff --git a/contrib/llvm/tools/llvm-cov/gcov.cpp b/contrib/llvm/tools/llvm-cov/gcov.cpp
index 4377a50..a5343fa 100644
--- a/contrib/llvm/tools/llvm-cov/gcov.cpp
+++ b/contrib/llvm/tools/llvm-cov/gcov.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
const std::string &InputGCNO,
const std::string &InputGCDA, bool DumpGCOV,
- const GCOVOptions &Options) {
+ const GCOV::Options &Options) {
SmallString<128> CoverageFileStem(ObjectDir);
if (CoverageFileStem.empty()) {
// If no directory was specified with -o, look next to the source file.
@@ -143,8 +143,8 @@ int gcovMain(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n");
- GCOVOptions Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
- PreservePaths, UncondBranch, LongNames, NoOutput);
+ GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
+ PreservePaths, UncondBranch, LongNames, NoOutput);
for (const auto &SourceFile : SourceFiles)
reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV,
diff --git a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 4e06be9e..3dda692 100644
--- a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -40,18 +40,14 @@ cl::list<std::string> InputFilenames(cl::Positional,
cl::ZeroOrMore);
} // namespace opts
-static int ReturnValue = EXIT_SUCCESS;
-
namespace llvm {
-static bool error(std::error_code EC) {
+static void error(std::error_code EC) {
if (!EC)
- return false;
-
- ReturnValue = EXIT_FAILURE;
+ return;
outs() << "\nError reading file: " << EC.message() << ".\n";
outs().flush();
- return true;
+ exit(1);
}
} // namespace llvm
@@ -59,38 +55,24 @@ static bool error(std::error_code EC) {
static void reportError(StringRef Input, StringRef Message) {
if (Input == "-")
Input = "<stdin>";
-
errs() << Input << ": " << Message << "\n";
errs().flush();
- ReturnValue = EXIT_FAILURE;
+ exit(1);
}
static void reportError(StringRef Input, std::error_code EC) {
reportError(Input, EC.message());
}
-static SmallVectorImpl<SectionRef> &getRelocSections(const ObjectFile *Obj,
- const SectionRef &Sec) {
- static bool MappingDone = false;
- static std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
- if (!MappingDone) {
- for (const SectionRef &Section : Obj->sections()) {
- section_iterator Sec2 = Section.getRelocatedSection();
- if (Sec2 != Obj->section_end())
- SectionRelocMap[*Sec2].push_back(Section);
- }
- MappingDone = true;
- }
- return SectionRelocMap[Sec];
-}
+static std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
-static bool collectRelocatedSymbols(const ObjectFile *Obj,
+static void collectRelocatedSymbols(const ObjectFile *Obj,
const SectionRef &Sec, uint64_t SecAddress,
uint64_t SymAddress, uint64_t SymSize,
StringRef *I, StringRef *E) {
uint64_t SymOffset = SymAddress - SecAddress;
uint64_t SymEnd = SymOffset + SymSize;
- for (const SectionRef &SR : getRelocSections(Obj, Sec)) {
+ for (const SectionRef &SR : SectionRelocMap[Sec]) {
for (const object::RelocationRef &Reloc : SR.relocations()) {
if (I == E)
break;
@@ -98,8 +80,7 @@ static bool collectRelocatedSymbols(const ObjectFile *Obj,
if (RelocSymI == Obj->symbol_end())
continue;
ErrorOr<StringRef> RelocSymName = RelocSymI->getName();
- if (error(RelocSymName.getError()))
- return true;
+ error(RelocSymName.getError());
uint64_t Offset = Reloc.getOffset();
if (Offset >= SymOffset && Offset < SymEnd) {
*I = *RelocSymName;
@@ -107,29 +88,26 @@ static bool collectRelocatedSymbols(const ObjectFile *Obj,
}
}
}
- return false;
}
-static bool collectRelocationOffsets(
+static void collectRelocationOffsets(
const ObjectFile *Obj, const SectionRef &Sec, uint64_t SecAddress,
uint64_t SymAddress, uint64_t SymSize, StringRef SymName,
std::map<std::pair<StringRef, uint64_t>, StringRef> &Collection) {
uint64_t SymOffset = SymAddress - SecAddress;
uint64_t SymEnd = SymOffset + SymSize;
- for (const SectionRef &SR : getRelocSections(Obj, Sec)) {
+ for (const SectionRef &SR : SectionRelocMap[Sec]) {
for (const object::RelocationRef &Reloc : SR.relocations()) {
const object::symbol_iterator RelocSymI = Reloc.getSymbol();
if (RelocSymI == Obj->symbol_end())
continue;
ErrorOr<StringRef> RelocSymName = RelocSymI->getName();
- if (error(RelocSymName.getError()))
- return true;
+ error(RelocSymName.getError());
uint64_t Offset = Reloc.getOffset();
if (Offset >= SymOffset && Offset < SymEnd)
Collection[std::make_pair(SymName, Offset - SymOffset)] = *RelocSymName;
}
}
- return false;
}
static void dumpCXXData(const ObjectFile *Obj) {
@@ -182,6 +160,13 @@ static void dumpCXXData(const ObjectFile *Obj) {
std::map<std::pair<StringRef, uint64_t>, StringRef> VTTEntries;
std::map<StringRef, StringRef> TINames;
+ SectionRelocMap.clear();
+ for (const SectionRef &Section : Obj->sections()) {
+ section_iterator Sec2 = Section.getRelocatedSection();
+ if (Sec2 != Obj->section_end())
+ SectionRelocMap[*Sec2].push_back(Section);
+ }
+
uint8_t BytesInAddress = Obj->getBytesInAddress();
std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
@@ -191,12 +176,11 @@ static void dumpCXXData(const ObjectFile *Obj) {
object::SymbolRef Sym = P.first;
uint64_t SymSize = P.second;
ErrorOr<StringRef> SymNameOrErr = Sym.getName();
- if (error(SymNameOrErr.getError()))
- return;
+ error(SymNameOrErr.getError());
StringRef SymName = *SymNameOrErr;
- object::section_iterator SecI(Obj->section_begin());
- if (error(Sym.getSection(SecI)))
- return;
+ ErrorOr<object::section_iterator> SecIOrErr = Sym.getSection();
+ error(SecIOrErr.getError());
+ object::section_iterator SecI = *SecIOrErr;
// Skip external symbols.
if (SecI == Obj->section_end())
continue;
@@ -205,11 +189,9 @@ static void dumpCXXData(const ObjectFile *Obj) {
if (Sec.isBSS() || Sec.isVirtual())
continue;
StringRef SecContents;
- if (error(Sec.getContents(SecContents)))
- return;
+ error(Sec.getContents(SecContents));
ErrorOr<uint64_t> SymAddressOrErr = Sym.getAddress();
- if (error(SymAddressOrErr.getError()))
- return;
+ error(SymAddressOrErr.getError());
uint64_t SymAddress = *SymAddressOrErr;
uint64_t SecAddress = Sec.getAddress();
uint64_t SecSize = Sec.getSize();
@@ -236,23 +218,19 @@ static void dumpCXXData(const ObjectFile *Obj) {
// Complete object locators in the MS-ABI start with '??_R4'
else if (SymName.startswith("??_R4")) {
CompleteObjectLocator COL;
- COL.Data = ArrayRef<little32_t>(
+ COL.Data = makeArrayRef(
reinterpret_cast<const little32_t *>(SymContents.data()), 3);
StringRef *I = std::begin(COL.Symbols), *E = std::end(COL.Symbols);
- if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
- E))
- return;
+ collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
COLs[SymName] = COL;
}
// Class hierarchy descriptors in the MS-ABI start with '??_R3'
else if (SymName.startswith("??_R3")) {
ClassHierarchyDescriptor CHD;
- CHD.Data = ArrayRef<little32_t>(
+ CHD.Data = makeArrayRef(
reinterpret_cast<const little32_t *>(SymContents.data()), 3);
StringRef *I = std::begin(CHD.Symbols), *E = std::end(CHD.Symbols);
- if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
- E))
- return;
+ collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
CHDs[SymName] = CHD;
}
// Class hierarchy descriptors in the MS-ABI start with '??_R2'
@@ -265,12 +243,10 @@ static void dumpCXXData(const ObjectFile *Obj) {
// Base class descriptors in the MS-ABI start with '??_R1'
else if (SymName.startswith("??_R1")) {
BaseClassDescriptor BCD;
- BCD.Data = ArrayRef<little32_t>(
+ BCD.Data = makeArrayRef(
reinterpret_cast<const little32_t *>(SymContents.data()) + 1, 5);
StringRef *I = std::begin(BCD.Symbols), *E = std::end(BCD.Symbols);
- if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
- E))
- return;
+ collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
BCDs[SymName] = BCD;
}
// Type descriptors in the MS-ABI start with '??_R0'
@@ -283,9 +259,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
TD.AlwaysZero = *reinterpret_cast<const little32_t *>(DataPtr);
TD.MangledName = SymContents.drop_front(BytesInAddress * 2);
StringRef *I = std::begin(TD.Symbols), *E = std::end(TD.Symbols);
- if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
- E))
- return;
+ collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
TDs[SymName] = TD;
}
// Throw descriptors in the MS-ABI start with '_TI'
@@ -316,9 +290,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
CT.VirtualBaseAdjustmentOffset = DataPtr[4];
CT.Size = DataPtr[5];
StringRef *I = std::begin(CT.Symbols), *E = std::end(CT.Symbols);
- if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
- E))
- return;
+ collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
CTs[SymName] = CT;
}
// Construction vtables in the Itanium ABI start with '_ZTT' or '__ZTT'.
@@ -510,7 +482,9 @@ static void dumpCXXData(const ObjectFile *Obj) {
}
static void dumpArchive(const Archive *Arc) {
- for (const Archive::Child &ArcC : Arc->children()) {
+ for (auto &ErrorOrChild : Arc->children()) {
+ error(ErrorOrChild.getError());
+ const Archive::Child &ArcC = *ErrorOrChild;
ErrorOr<std::unique_ptr<Binary>> ChildOrErr = ArcC.getAsBinary();
if (std::error_code EC = ChildOrErr.getError()) {
// Ignore non-object files.
@@ -527,12 +501,6 @@ static void dumpArchive(const Archive *Arc) {
}
static void dumpInput(StringRef File) {
- // If file isn't stdin, check that it exists.
- if (File != "-" && !sys::fs::exists(File)) {
- reportError(File, cxxdump_error::file_not_found);
- return;
- }
-
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
if (std::error_code EC = BinaryOrErr.getError()) {
@@ -569,5 +537,5 @@ int main(int argc, const char *argv[]) {
std::for_each(opts::InputFilenames.begin(), opts::InputFilenames.end(),
dumpInput);
- return ReturnValue;
+ return EXIT_SUCCESS;
}
diff --git a/contrib/llvm/tools/llvm-diff/DiffLog.cpp b/contrib/llvm/tools/llvm-diff/DiffLog.cpp
index 24a1b08..ed86058 100644
--- a/contrib/llvm/tools/llvm-diff/DiffLog.cpp
+++ b/contrib/llvm/tools/llvm-diff/DiffLog.cpp
@@ -20,7 +20,8 @@
using namespace llvm;
LogBuilder::~LogBuilder() {
- consumer.logf(*this);
+ if (consumer)
+ consumer->logf(*this);
}
StringRef LogBuilder::getFormat() const { return Format; }
diff --git a/contrib/llvm/tools/llvm-diff/DiffLog.h b/contrib/llvm/tools/llvm-diff/DiffLog.h
index 8eb53ff..8f28461 100644
--- a/contrib/llvm/tools/llvm-diff/DiffLog.h
+++ b/contrib/llvm/tools/llvm-diff/DiffLog.h
@@ -27,7 +27,7 @@ namespace llvm {
/// A temporary-object class for building up log messages.
class LogBuilder {
- Consumer &consumer;
+ Consumer *consumer;
/// The use of a stored StringRef here is okay because
/// LogBuilder should be used only as a temporary, and as a
@@ -38,8 +38,12 @@ namespace llvm {
SmallVector<Value*, 4> Arguments;
public:
- LogBuilder(Consumer &c, StringRef Format)
- : consumer(c), Format(Format) {}
+ LogBuilder(Consumer &c, StringRef Format) : consumer(&c), Format(Format) {}
+ LogBuilder(LogBuilder &&L)
+ : consumer(L.consumer), Format(L.Format),
+ Arguments(std::move(L.Arguments)) {
+ L.consumer = nullptr;
+ }
LogBuilder &operator<<(Value *V) {
Arguments.push_back(V);
diff --git a/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp b/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
index 7d379ef..456560b 100644
--- a/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
@@ -599,7 +599,7 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
TerminatorInst *RTerm = RStart->getParent()->getTerminator();
if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
if (cast<BranchInst>(LTerm)->isConditional()) return;
- BasicBlock::iterator I = LTerm;
+ BasicBlock::iterator I = LTerm->getIterator();
if (I == LStart->getParent()->begin()) return;
--I;
if (!isa<CallInst>(*I)) return;
@@ -612,7 +612,7 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
} else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
if (cast<BranchInst>(RTerm)->isConditional()) return;
- BasicBlock::iterator I = RTerm;
+ BasicBlock::iterator I = RTerm->getIterator();
if (I == RStart->getParent()->begin()) return;
--I;
if (!isa<CallInst>(*I)) return;
diff --git a/contrib/llvm/tools/llvm-dis/llvm-dis.cpp b/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
index 4b7d94d..9fdfcd4 100644
--- a/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
+++ b/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
@@ -159,7 +159,7 @@ int main(int argc, char **argv) {
ErrorOr<std::unique_ptr<Module>> MOrErr =
getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context);
M = std::move(*MOrErr);
- M->materializeAllPermanently();
+ M->materializeAll();
} else {
errs() << argv[0] << ": " << ErrorMessage << '\n';
return 1;
diff --git a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index db3fcf6..eaacc7c 100644
--- a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/RelocVisitor.h"
#include "llvm/Support/CommandLine.h"
@@ -22,6 +23,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
@@ -35,19 +37,19 @@ using namespace llvm;
using namespace object;
static cl::list<std::string>
-InputFilenames(cl::Positional, cl::desc("<input object files>"),
+InputFilenames(cl::Positional, cl::desc("<input object files or .dSYM bundles>"),
cl::ZeroOrMore);
-static cl::opt<DIDumpType>
-DumpType("debug-dump", cl::init(DIDT_All),
- cl::desc("Dump of debug sections:"),
- cl::values(
+static cl::opt<DIDumpType> DumpType(
+ "debug-dump", cl::init(DIDT_All), cl::desc("Dump of debug sections:"),
+ cl::values(
clEnumValN(DIDT_All, "all", "Dump all debug sections"),
clEnumValN(DIDT_Abbrev, "abbrev", ".debug_abbrev"),
clEnumValN(DIDT_AbbrevDwo, "abbrev.dwo", ".debug_abbrev.dwo"),
clEnumValN(DIDT_AppleNames, "apple_names", ".apple_names"),
clEnumValN(DIDT_AppleTypes, "apple_types", ".apple_types"),
- clEnumValN(DIDT_AppleNamespaces, "apple_namespaces", ".apple_namespaces"),
+ clEnumValN(DIDT_AppleNamespaces, "apple_namespaces",
+ ".apple_namespaces"),
clEnumValN(DIDT_AppleObjC, "apple_objc", ".apple_objc"),
clEnumValN(DIDT_Aranges, "aranges", ".debug_aranges"),
clEnumValN(DIDT_Info, "info", ".debug_info"),
@@ -59,6 +61,7 @@ DumpType("debug-dump", cl::init(DIDT_All),
clEnumValN(DIDT_Loc, "loc", ".debug_loc"),
clEnumValN(DIDT_LocDwo, "loc.dwo", ".debug_loc.dwo"),
clEnumValN(DIDT_Frames, "frames", ".debug_frame"),
+ clEnumValN(DIDT_Macro, "macro", ".debug_macinfo"),
clEnumValN(DIDT_Ranges, "ranges", ".debug_ranges"),
clEnumValN(DIDT_Pubnames, "pubnames", ".debug_pubnames"),
clEnumValN(DIDT_Pubtypes, "pubtypes", ".debug_pubtypes"),
@@ -66,38 +69,79 @@ DumpType("debug-dump", cl::init(DIDT_All),
clEnumValN(DIDT_GnuPubtypes, "gnu_pubtypes", ".debug_gnu_pubtypes"),
clEnumValN(DIDT_Str, "str", ".debug_str"),
clEnumValN(DIDT_StrDwo, "str.dwo", ".debug_str.dwo"),
- clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo", ".debug_str_offsets.dwo"),
- clEnumValEnd));
+ clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo",
+ ".debug_str_offsets.dwo"),
+ clEnumValN(DIDT_CUIndex, "cu_index", ".debug_cu_index"),
+ clEnumValN(DIDT_TUIndex, "tu_index", ".debug_tu_index"), clEnumValEnd));
-static int ReturnValue = EXIT_SUCCESS;
-
-static bool error(StringRef Filename, std::error_code EC) {
+static void error(StringRef Filename, std::error_code EC) {
if (!EC)
- return false;
+ return;
errs() << Filename << ": " << EC.message() << "\n";
- ReturnValue = EXIT_FAILURE;
- return true;
+ exit(1);
+}
+
+static void DumpObjectFile(ObjectFile &Obj, Twine Filename) {
+ std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
+
+ outs() << Filename.str() << ":\tfile format " << Obj.getFileFormatName()
+ << "\n\n";
+ // Dump the complete DWARF structure.
+ DICtx->dump(outs(), DumpType);
}
static void DumpInput(StringRef Filename) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
- if (error(Filename, BuffOrErr.getError()))
- return;
+ error(Filename, BuffOrErr.getError());
std::unique_ptr<MemoryBuffer> Buff = std::move(BuffOrErr.get());
- ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
- ObjectFile::createObjectFile(Buff->getMemBufferRef());
- if (error(Filename, ObjOrErr.getError()))
- return;
- ObjectFile &Obj = *ObjOrErr.get();
+ ErrorOr<std::unique_ptr<Binary>> BinOrErr =
+ object::createBinary(Buff->getMemBufferRef());
+ error(Filename, BinOrErr.getError());
- std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
+ if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get()))
+ DumpObjectFile(*Obj, Filename);
+ else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get()))
+ for (auto &ObjForArch : Fat->objects()) {
+ auto MachOOrErr = ObjForArch.getAsObjectFile();
+ error(Filename, MachOOrErr.getError());
+ DumpObjectFile(**MachOOrErr,
+ Filename + " (" + ObjForArch.getArchTypeName() + ")");
+ }
+}
- outs() << Filename
- << ":\tfile format " << Obj.getFileFormatName() << "\n\n";
- // Dump the complete DWARF structure.
- DICtx->dump(outs(), DumpType);
+/// If the input path is a .dSYM bundle (as created by the dsymutil tool),
+/// replace it with individual entries for each of the object files inside the
+/// bundle otherwise return the input path.
+static std::vector<std::string> expandBundle(std::string InputPath) {
+ std::vector<std::string> BundlePaths;
+ SmallString<256> BundlePath(InputPath);
+ // Manually open up the bundle to avoid introducing additional dependencies.
+ if (sys::fs::is_directory(BundlePath) &&
+ sys::path::extension(BundlePath) == ".dSYM") {
+ std::error_code EC;
+ sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
+ for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ const std::string &Path = Dir->path();
+ sys::fs::file_status Status;
+ EC = sys::fs::status(Path, Status);
+ error(Path, EC);
+ switch (Status.type()) {
+ case sys::fs::file_type::regular_file:
+ case sys::fs::file_type::symlink_file:
+ case sys::fs::file_type::type_unknown:
+ BundlePaths.push_back(Path);
+ break;
+ default: /*ignore*/;
+ }
+ }
+ error(BundlePath, EC);
+ }
+ if (!BundlePaths.size())
+ BundlePaths.push_back(InputPath);
+ return BundlePaths;
}
int main(int argc, char **argv) {
@@ -112,7 +156,14 @@ int main(int argc, char **argv) {
if (InputFilenames.size() == 0)
InputFilenames.push_back("a.out");
- std::for_each(InputFilenames.begin(), InputFilenames.end(), DumpInput);
+ // Expand any .dSYM bundles to the individual object files contained therein.
+ std::vector<std::string> Objects;
+ for (auto F : InputFilenames) {
+ auto Objs = expandBundle(F);
+ Objects.insert(Objects.end(), Objs.begin(), Objs.end());
+ }
+
+ std::for_each(Objects.begin(), Objects.end(), DumpInput);
- return ReturnValue;
+ return EXIT_SUCCESS;
}
diff --git a/contrib/llvm/tools/llvm-extract/llvm-extract.cpp b/contrib/llvm/tools/llvm-extract/llvm-extract.cpp
index 936496c..1da456d 100644
--- a/contrib/llvm/tools/llvm-extract/llvm-extract.cpp
+++ b/contrib/llvm/tools/llvm-extract/llvm-extract.cpp
@@ -222,45 +222,42 @@ int main(int argc, char **argv) {
}
}
- // Materialize requisite global values.
- if (!DeleteFn)
- for (size_t i = 0, e = GVs.size(); i != e; ++i) {
- GlobalValue *GV = GVs[i];
- if (std::error_code EC = GV->materialize()) {
- errs() << argv[0] << ": error reading input: " << EC.message() << "\n";
- return 1;
- }
+ auto Materialize = [&](GlobalValue &GV) {
+ if (std::error_code EC = GV.materialize()) {
+ errs() << argv[0] << ": error reading input: " << EC.message() << "\n";
+ exit(1);
}
- else {
+ };
+
+ // Materialize requisite global values.
+ if (!DeleteFn) {
+ for (size_t i = 0, e = GVs.size(); i != e; ++i)
+ Materialize(*GVs[i]);
+ } else {
// Deleting. Materialize every GV that's *not* in GVs.
SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end());
- for (auto &G : M->globals()) {
- if (!GVSet.count(&G)) {
- if (std::error_code EC = G.materialize()) {
- errs() << argv[0] << ": error reading input: " << EC.message()
- << "\n";
- return 1;
- }
- }
- }
for (auto &F : *M) {
- if (!GVSet.count(&F)) {
- if (std::error_code EC = F.materialize()) {
- errs() << argv[0] << ": error reading input: " << EC.message()
- << "\n";
- return 1;
- }
- }
+ if (!GVSet.count(&F))
+ Materialize(F);
}
}
+ {
+ std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end());
+ legacy::PassManager Extract;
+ Extract.add(createGVExtractionPass(Gvs, DeleteFn));
+ Extract.run(*M);
+
+ // Now that we have all the GVs we want, mark the module as fully
+ // materialized.
+ // FIXME: should the GVExtractionPass handle this?
+ M->materializeAll();
+ }
+
// In addition to deleting all other functions, we also want to spiff it
// up a little bit. Do this now.
legacy::PassManager Passes;
- std::vector<GlobalValue*> Gvs(GVs.begin(), GVs.end());
-
- Passes.add(createGVExtractionPass(Gvs, DeleteFn));
if (!DeleteFn)
Passes.add(createGlobalDCEPass()); // Delete unreachable globals
Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info
diff --git a/contrib/llvm/tools/llvm-link/llvm-link.cpp b/contrib/llvm/tools/llvm-link/llvm-link.cpp
index 369f347..a3238302 100644
--- a/contrib/llvm/tools/llvm-link/llvm-link.cpp
+++ b/contrib/llvm/tools/llvm-link/llvm-link.cpp
@@ -18,10 +18,12 @@
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
@@ -43,11 +45,34 @@ static cl::list<std::string> OverridingInputs(
cl::desc(
"input bitcode file which can override previously defined symbol(s)"));
+// Option to simulate function importing for testing. This enables using
+// llvm-link to simulate ThinLTO backend processes.
+static cl::list<std::string> Imports(
+ "import", cl::ZeroOrMore, cl::value_desc("function:filename"),
+ cl::desc("Pair of function name and filename, where function should be "
+ "imported from bitcode in filename"));
+
+// Option to support testing of function importing. The function index
+// must be specified in the case were we request imports via the -import
+// option, as well as when compiling any module with functions that may be
+// exported (imported by a different llvm-link -import invocation), to ensure
+// consistent promotion and renaming of locals.
+static cl::opt<std::string> FunctionIndex("functionindex",
+ cl::desc("Function index filename"),
+ cl::init(""),
+ cl::value_desc("filename"));
+
static cl::opt<std::string>
OutputFilename("o", cl::desc("Override output filename"), cl::init("-"),
cl::value_desc("filename"));
static cl::opt<bool>
+Internalize("internalize", cl::desc("Internalize linked symbols"));
+
+static cl::opt<bool>
+OnlyNeeded("only-needed", cl::desc("Link only needed symbols"));
+
+static cl::opt<bool>
Force("f", cl::desc("Enable binary output on terminals"));
static cl::opt<bool>
@@ -64,6 +89,10 @@ static cl::opt<bool>
SuppressWarnings("suppress-warnings", cl::desc("Suppress all linking warnings"),
cl::init(false));
+static cl::opt<bool>
+ PreserveModules("preserve-modules",
+ cl::desc("Preserve linked modules for testing"));
+
static cl::opt<bool> PreserveBitcodeUseListOrder(
"preserve-bc-uselistorder",
cl::desc("Preserve use-list order when writing LLVM bitcode."),
@@ -77,16 +106,21 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
// Read the specified bitcode file in and return it. This routine searches the
// link path for the specified file to try to find it...
//
-static std::unique_ptr<Module>
-loadFile(const char *argv0, const std::string &FN, LLVMContext &Context) {
+static std::unique_ptr<Module> loadFile(const char *argv0,
+ const std::string &FN,
+ LLVMContext &Context,
+ bool MaterializeMetadata = true) {
SMDiagnostic Err;
if (Verbose) errs() << "Loading '" << FN << "'\n";
- std::unique_ptr<Module> Result = getLazyIRFileModule(FN, Err, Context);
+ std::unique_ptr<Module> Result =
+ getLazyIRFileModule(FN, Err, Context, !MaterializeMetadata);
if (!Result)
Err.print(argv0, errs());
- Result->materializeMetadata();
- UpgradeDebugInfo(*Result);
+ if (MaterializeMetadata) {
+ Result->materializeMetadata();
+ UpgradeDebugInfo(*Result);
+ }
return Result;
}
@@ -112,9 +146,111 @@ static void diagnosticHandler(const DiagnosticInfo &DI) {
errs() << '\n';
}
+static void diagnosticHandlerWithContext(const DiagnosticInfo &DI, void *C) {
+ diagnosticHandler(DI);
+}
+
+/// Import any functions requested via the -import option.
+static bool importFunctions(const char *argv0, LLVMContext &Context,
+ Linker &L) {
+ StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+ ModuleToTempMDValsMap;
+ for (const auto &Import : Imports) {
+ // Identify the requested function and its bitcode source file.
+ size_t Idx = Import.find(':');
+ if (Idx == std::string::npos) {
+ errs() << "Import parameter bad format: " << Import << "\n";
+ return false;
+ }
+ std::string FunctionName = Import.substr(0, Idx);
+ std::string FileName = Import.substr(Idx + 1, std::string::npos);
+
+ // Load the specified source module.
+ std::unique_ptr<Module> M = loadFile(argv0, FileName, Context, false);
+ if (!M.get()) {
+ errs() << argv0 << ": error loading file '" << FileName << "'\n";
+ return false;
+ }
+
+ if (verifyModule(*M, &errs())) {
+ errs() << argv0 << ": " << FileName
+ << ": error: input module is broken!\n";
+ return false;
+ }
+
+ Function *F = M->getFunction(FunctionName);
+ if (!F) {
+ errs() << "Ignoring import request for non-existent function "
+ << FunctionName << " from " << FileName << "\n";
+ continue;
+ }
+ // We cannot import weak_any functions without possibly affecting the
+ // order they are seen and selected by the linker, changing program
+ // semantics.
+ if (F->hasWeakAnyLinkage()) {
+ errs() << "Ignoring import request for weak-any function " << FunctionName
+ << " from " << FileName << "\n";
+ continue;
+ }
+
+ if (Verbose)
+ errs() << "Importing " << FunctionName << " from " << FileName << "\n";
+
+ std::unique_ptr<FunctionInfoIndex> Index;
+ if (!FunctionIndex.empty()) {
+ ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+ llvm::getFunctionIndexForFile(FunctionIndex, diagnosticHandler);
+ std::error_code EC = IndexOrErr.getError();
+ if (EC) {
+ errs() << EC.message() << '\n';
+ return false;
+ }
+ Index = std::move(IndexOrErr.get());
+ }
+
+ // Save the mapping of value ids to temporary metadata created when
+ // importing this function. If we have already imported from this module,
+ // add new temporary metadata to the existing mapping.
+ auto &TempMDVals = ModuleToTempMDValsMap[FileName];
+ if (!TempMDVals)
+ TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+ // Link in the specified function.
+ DenseSet<const GlobalValue *> FunctionsToImport;
+ FunctionsToImport.insert(F);
+ if (L.linkInModule(std::move(M), Linker::Flags::None, Index.get(),
+ &FunctionsToImport, TempMDVals.get()))
+ return false;
+ }
+
+ // Now link in metadata for all modules from which we imported functions.
+ for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+ ModuleToTempMDValsMap) {
+ // Load the specified source module.
+ std::unique_ptr<Module> M = loadFile(argv0, SME.getKey(), Context, true);
+ if (!M.get()) {
+ errs() << argv0 << ": error loading file '" << SME.getKey() << "'\n";
+ return false;
+ }
+
+ if (verifyModule(*M, &errs())) {
+ errs() << argv0 << ": " << SME.getKey()
+ << ": error: input module is broken!\n";
+ return false;
+ }
+
+ // Link in all necessary metadata from this module.
+ if (L.linkInMetadata(*M, SME.getValue().get()))
+ return false;
+ }
+ return true;
+}
+
static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
const cl::list<std::string> &Files,
- bool OverrideDuplicateSymbols) {
+ unsigned Flags) {
+ // Filter out flags that don't apply to the first file we load.
+ unsigned ApplicableFlags = Flags & Linker::Flags::OverrideFromSrc;
for (const auto &File : Files) {
std::unique_ptr<Module> M = loadFile(argv0, File, Context);
if (!M.get()) {
@@ -127,11 +263,36 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
return false;
}
+ // If a function index is supplied, load it so linkInModule can treat
+ // local functions/variables as exported and promote if necessary.
+ std::unique_ptr<FunctionInfoIndex> Index;
+ if (!FunctionIndex.empty()) {
+ ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+ llvm::getFunctionIndexForFile(FunctionIndex, diagnosticHandler);
+ std::error_code EC = IndexOrErr.getError();
+ if (EC) {
+ errs() << EC.message() << '\n';
+ return false;
+ }
+ Index = std::move(IndexOrErr.get());
+ }
+
if (Verbose)
errs() << "Linking in '" << File << "'\n";
- if (L.linkInModule(M.get(), OverrideDuplicateSymbols))
+ if (L.linkInModule(std::move(M), ApplicableFlags, Index.get()))
return false;
+ // All linker flags apply to linking of subsequent files.
+ ApplicableFlags = Flags;
+
+ // If requested for testing, preserve modules by releasing them from
+ // the unique_ptr before the are freed. This can help catch any
+ // cross-module references from e.g. unneeded metadata references
+ // that aren't properly set to null but instead mapped to the source
+ // module version. The bitcode writer will assert if it finds any such
+ // cross-module references.
+ if (PreserveModules)
+ M.release();
}
return true;
@@ -143,18 +304,31 @@ int main(int argc, char **argv) {
PrettyStackTraceProgram X(argc, argv);
LLVMContext &Context = getGlobalContext();
+ Context.setDiagnosticHandler(diagnosticHandlerWithContext, nullptr, true);
+
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
auto Composite = make_unique<Module>("llvm-link", Context);
- Linker L(Composite.get(), diagnosticHandler);
+ Linker L(*Composite);
+
+ unsigned Flags = Linker::Flags::None;
+ if (Internalize)
+ Flags |= Linker::Flags::InternalizeLinkedSymbols;
+ if (OnlyNeeded)
+ Flags |= Linker::Flags::LinkOnlyNeeded;
// First add all the regular input files
- if (!linkFiles(argv[0], Context, L, InputFilenames, false))
+ if (!linkFiles(argv[0], Context, L, InputFilenames, Flags))
return 1;
// Next the -override ones.
- if (!linkFiles(argv[0], Context, L, OverridingInputs, true))
+ if (!linkFiles(argv[0], Context, L, OverridingInputs,
+ Flags | Linker::Flags::OverrideFromSrc))
+ return 1;
+
+ // Import any functions requested via -import
+ if (!importFunctions(argv[0], Context, L))
return 1;
if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
index 0821898..4bc6922 100644
--- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -13,16 +13,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringSet.h"
+#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/LTO/LTOCodeGenerator.h"
#include "llvm/LTO/LTOModule.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
+#include <list>
using namespace llvm;
@@ -34,6 +40,10 @@ OptLevel("O",
cl::ZeroOrMore,
cl::init('2'));
+static cl::opt<bool> DisableVerify(
+ "disable-verify", cl::init(false),
+ cl::desc("Do not run the verifier during the optimization pipeline"));
+
static cl::opt<bool>
DisableInline("disable-inlining", cl::init(false),
cl::desc("Do not run the inliner pass"));
@@ -50,6 +60,14 @@ static cl::opt<bool>
UseDiagnosticHandler("use-diagnostic-handler", cl::init(false),
cl::desc("Use a diagnostic handler to test the handler interface"));
+static cl::opt<bool>
+ ThinLTO("thinlto", cl::init(false),
+ cl::desc("Only write combined global index for ThinLTO backends"));
+
+static cl::opt<bool>
+SaveModuleFile("save-merged-module", cl::init(false),
+ cl::desc("Write merged LTO module to file before CodeGen"));
+
static cl::list<std::string>
InputFilenames(cl::Positional, cl::OneOrMore,
cl::desc("<input bitcode files>"));
@@ -77,6 +95,9 @@ static cl::opt<bool> SetMergedModule(
"set-merged-module", cl::init(false),
cl::desc("Use the first input module as the merged module"));
+static cl::opt<unsigned> Parallelism("j", cl::Prefix, cl::init(1),
+ cl::desc("Number of backend threads"));
+
namespace {
struct ModuleInfo {
std::vector<bool> CanBeHidden;
@@ -85,6 +106,7 @@ struct ModuleInfo {
static void handleDiagnostics(lto_codegen_diagnostic_severity_t Severity,
const char *Msg, void *) {
+ errs() << "llvm-lto: ";
switch (Severity) {
case LTO_DS_NOTE:
errs() << "note: ";
@@ -102,18 +124,68 @@ static void handleDiagnostics(lto_codegen_diagnostic_severity_t Severity,
errs() << Msg << "\n";
}
+static std::string CurrentActivity;
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+ raw_ostream &OS = errs();
+ OS << "llvm-lto: ";
+ switch (DI.getSeverity()) {
+ case DS_Error:
+ OS << "error";
+ break;
+ case DS_Warning:
+ OS << "warning";
+ break;
+ case DS_Remark:
+ OS << "remark";
+ break;
+ case DS_Note:
+ OS << "note";
+ break;
+ }
+ if (!CurrentActivity.empty())
+ OS << ' ' << CurrentActivity;
+ OS << ": ";
+
+ DiagnosticPrinterRawOStream DP(OS);
+ DI.print(DP);
+ OS << '\n';
+
+ if (DI.getSeverity() == DS_Error)
+ exit(1);
+}
+
+static void diagnosticHandlerWithContenxt(const DiagnosticInfo &DI,
+ void *Context) {
+ diagnosticHandler(DI);
+}
+
+static void error(const Twine &Msg) {
+ errs() << "llvm-lto: " << Msg << '\n';
+ exit(1);
+}
+
+static void error(std::error_code EC, const Twine &Prefix) {
+ if (EC)
+ error(Prefix + ": " + EC.message());
+}
+
+template <typename T>
+static void error(const ErrorOr<T> &V, const Twine &Prefix) {
+ error(V.getError(), Prefix);
+}
+
static std::unique_ptr<LTOModule>
getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
- const TargetOptions &Options, std::string &Error) {
+ const TargetOptions &Options) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(Path);
- if (std::error_code EC = BufferOrErr.getError()) {
- Error = EC.message();
- return nullptr;
- }
+ error(BufferOrErr, "error loading file '" + Path + "'");
Buffer = std::move(BufferOrErr.get());
- return std::unique_ptr<LTOModule>(LTOModule::createInLocalContext(
- Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Error, Path));
+ CurrentActivity = ("loading file '" + Path + "'").str();
+ ErrorOr<std::unique_ptr<LTOModule>> Ret = LTOModule::createInLocalContext(
+ Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Path);
+ CurrentActivity = "";
+ return std::move(*Ret);
}
/// \brief List symbols in each IR file.
@@ -122,24 +194,44 @@ getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
/// functionality that's exposed by the C API to list symbols. Moreover, this
/// provides testing coverage for modules that have been created in their own
/// contexts.
-static int listSymbols(StringRef Command, const TargetOptions &Options) {
+static void listSymbols(const TargetOptions &Options) {
for (auto &Filename : InputFilenames) {
- std::string Error;
std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<LTOModule> Module =
- getLocalLTOModule(Filename, Buffer, Options, Error);
- if (!Module) {
- errs() << Command << ": error loading file '" << Filename
- << "': " << Error << "\n";
- return 1;
- }
+ getLocalLTOModule(Filename, Buffer, Options);
// List the symbols.
outs() << Filename << ":\n";
for (int I = 0, E = Module->getSymbolCount(); I != E; ++I)
outs() << Module->getSymbolName(I) << "\n";
}
- return 0;
+}
+
+/// Create a combined index file from the input IR files and write it.
+///
+/// This is meant to enable testing of ThinLTO combined index generation,
+/// currently available via the gold plugin via -thinlto.
+static void createCombinedFunctionIndex() {
+ FunctionInfoIndex CombinedIndex;
+ uint64_t NextModuleId = 0;
+ for (auto &Filename : InputFilenames) {
+ CurrentActivity = "loading file '" + Filename + "'";
+ ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+ llvm::getFunctionIndexForFile(Filename, diagnosticHandler);
+ std::unique_ptr<FunctionInfoIndex> Index = std::move(IndexOrErr.get());
+ CurrentActivity = "";
+ // Skip files without a function summary.
+ if (!Index)
+ continue;
+ CombinedIndex.mergeFrom(std::move(Index), ++NextModuleId);
+ }
+ std::error_code EC;
+ assert(!OutputFilename.empty());
+ raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
+ sys::fs::OpenFlags::F_None);
+ error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'");
+ WriteFunctionSummaryToFile(CombinedIndex, OS);
+ OS.close();
}
int main(int argc, char **argv) {
@@ -150,10 +242,8 @@ int main(int argc, char **argv) {
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "llvm LTO linker\n");
- if (OptLevel < '0' || OptLevel > '3') {
- errs() << argv[0] << ": optimization level must be between 0 and 3\n";
- return 1;
- }
+ if (OptLevel < '0' || OptLevel > '3')
+ error("optimization level must be between 0 and 3");
// Initialize the configured targets.
InitializeAllTargets();
@@ -164,29 +254,27 @@ int main(int argc, char **argv) {
// set up the TargetOptions for the machine
TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
- if (ListSymbolsOnly)
- return listSymbols(argv[0], Options);
+ if (ListSymbolsOnly) {
+ listSymbols(Options);
+ return 0;
+ }
+
+ if (ThinLTO) {
+ createCombinedFunctionIndex();
+ return 0;
+ }
unsigned BaseArg = 0;
- LTOCodeGenerator CodeGen;
+ LLVMContext Context;
+ Context.setDiagnosticHandler(diagnosticHandlerWithContenxt, nullptr, true);
+
+ LTOCodeGenerator CodeGen(Context);
if (UseDiagnosticHandler)
CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr);
- switch (RelocModel) {
- case Reloc::Static:
- CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_STATIC);
- break;
- case Reloc::PIC_:
- CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC);
- break;
- case Reloc::DynamicNoPIC:
- CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC);
- break;
- default:
- CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_DEFAULT);
- }
+ CodeGen.setCodePICModel(RelocModel);
CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF);
CodeGen.setTargetOptions(Options);
@@ -198,38 +286,33 @@ int main(int argc, char **argv) {
std::vector<std::string> KeptDSOSyms;
for (unsigned i = BaseArg; i < InputFilenames.size(); ++i) {
- std::string error;
- std::unique_ptr<LTOModule> Module(
- LTOModule::createFromFile(InputFilenames[i].c_str(), Options, error));
- if (!error.empty()) {
- errs() << argv[0] << ": error loading file '" << InputFilenames[i]
- << "': " << error << "\n";
- return 1;
- }
+ CurrentActivity = "loading file '" + InputFilenames[i] + "'";
+ ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr =
+ LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options);
+ std::unique_ptr<LTOModule> &Module = *ModuleOrErr;
+ CurrentActivity = "";
- LTOModule *LTOMod = Module.get();
+ unsigned NumSyms = Module->getSymbolCount();
+ for (unsigned I = 0; I < NumSyms; ++I) {
+ StringRef Name = Module->getSymbolName(I);
+ if (!DSOSymbolsSet.count(Name))
+ continue;
+ lto_symbol_attributes Attrs = Module->getSymbolAttributes(I);
+ unsigned Scope = Attrs & LTO_SYMBOL_SCOPE_MASK;
+ if (Scope != LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN)
+ KeptDSOSyms.push_back(Name);
+ }
// We use the first input module as the destination module when
// SetMergedModule is true.
if (SetMergedModule && i == BaseArg) {
// Transfer ownership to the code generator.
- CodeGen.setModule(Module.release());
+ CodeGen.setModule(std::move(Module));
} else if (!CodeGen.addModule(Module.get())) {
// Print a message here so that we know addModule() did not abort.
errs() << argv[0] << ": error adding file '" << InputFilenames[i] << "'\n";
return 1;
}
-
- unsigned NumSyms = LTOMod->getSymbolCount();
- for (unsigned I = 0; I < NumSyms; ++I) {
- StringRef Name = LTOMod->getSymbolName(I);
- if (!DSOSymbolsSet.count(Name))
- continue;
- lto_symbol_attributes Attrs = LTOMod->getSymbolAttributes(I);
- unsigned Scope = Attrs & LTO_SYMBOL_SCOPE_MASK;
- if (Scope != LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN)
- KeptDSOSyms.push_back(Name);
- }
}
// Add all the exported symbols to the table of symbols to preserve.
@@ -255,34 +338,68 @@ int main(int argc, char **argv) {
if (!attrs.empty())
CodeGen.setAttr(attrs.c_str());
+ if (FileType.getNumOccurrences())
+ CodeGen.setFileType(FileType);
+
if (!OutputFilename.empty()) {
- std::string ErrorInfo;
- std::unique_ptr<MemoryBuffer> Code = CodeGen.compile(
- DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo);
- if (!Code) {
- errs() << argv[0]
- << ": error compiling the code: " << ErrorInfo << "\n";
+ if (!CodeGen.optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+ DisableLTOVectorization)) {
+ // Diagnostic messages should have been printed by the handler.
+ errs() << argv[0] << ": error optimizing the code\n";
return 1;
}
- std::error_code EC;
- raw_fd_ostream FileStream(OutputFilename, EC, sys::fs::F_None);
- if (EC) {
- errs() << argv[0] << ": error opening the file '" << OutputFilename
- << "': " << EC.message() << "\n";
+ if (SaveModuleFile) {
+ std::string ModuleFilename = OutputFilename;
+ ModuleFilename += ".merged.bc";
+ std::string ErrMsg;
+
+ if (!CodeGen.writeMergedModules(ModuleFilename.c_str())) {
+ errs() << argv[0] << ": writing merged module failed.\n";
+ return 1;
+ }
+ }
+
+ std::list<tool_output_file> OSs;
+ std::vector<raw_pwrite_stream *> OSPtrs;
+ for (unsigned I = 0; I != Parallelism; ++I) {
+ std::string PartFilename = OutputFilename;
+ if (Parallelism != 1)
+ PartFilename += "." + utostr(I);
+ std::error_code EC;
+ OSs.emplace_back(PartFilename, EC, sys::fs::F_None);
+ if (EC) {
+ errs() << argv[0] << ": error opening the file '" << PartFilename
+ << "': " << EC.message() << "\n";
+ return 1;
+ }
+ OSPtrs.push_back(&OSs.back().os());
+ }
+
+ if (!CodeGen.compileOptimized(OSPtrs)) {
+ // Diagnostic messages should have been printed by the handler.
+ errs() << argv[0] << ": error compiling the code\n";
return 1;
}
- FileStream.write(Code->getBufferStart(), Code->getBufferSize());
+ for (tool_output_file &OS : OSs)
+ OS.keep();
} else {
- std::string ErrorInfo;
+ if (Parallelism != 1) {
+ errs() << argv[0] << ": -j must be specified together with -o\n";
+ return 1;
+ }
+
+ if (SaveModuleFile) {
+ errs() << argv[0] << ": -save-merged-module must be specified with -o\n";
+ return 1;
+ }
+
const char *OutputName = nullptr;
- if (!CodeGen.compile_to_file(&OutputName, DisableInline,
- DisableGVNLoadPRE, DisableLTOVectorization,
- ErrorInfo)) {
- errs() << argv[0]
- << ": error compiling the code: " << ErrorInfo
- << "\n";
+ if (!CodeGen.compile_to_file(&OutputName, DisableVerify, DisableInline,
+ DisableGVNLoadPRE, DisableLTOVectorization)) {
+ // Diagnostic messages should have been printed by the handler.
+ errs() << argv[0] << ": error compiling the code\n";
return 1;
}
diff --git a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
index 6ecdb2e..96e3f7c 100644
--- a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -1,4 +1,4 @@
-//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===//
+//===-- llvm-mc.cpp - Machine Code Hacking Driver ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -39,6 +39,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
+
using namespace llvm;
static cl::opt<std::string>
@@ -234,7 +235,7 @@ static void setDwarfDebugFlags(int argc, char **argv) {
}
static std::string DwarfDebugProducer;
-static void setDwarfDebugProducer(void) {
+static void setDwarfDebugProducer() {
if(!getenv("DEBUG_PRODUCER"))
return;
DwarfDebugProducer += getenv("DEBUG_PRODUCER");
@@ -398,7 +399,7 @@ int main(int argc, char **argv) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
MemoryBuffer::getFileOrSTDIN(InputFilename);
if (std::error_code EC = BufferPtr.getError()) {
- errs() << ProgName << ": " << EC.message() << '\n';
+ errs() << InputFilename << ": " << EC.message() << '\n';
return 1;
}
MemoryBuffer *Buffer = BufferPtr->get();
@@ -510,9 +511,10 @@ int main(int argc, char **argv) {
MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU);
- Str.reset(TheTarget->createMCObjectStreamer(TheTriple, Ctx, *MAB, *OS, CE,
- *STI, RelaxAll,
- /*DWARFMustBeAtTheEnd*/ false));
+ Str.reset(TheTarget->createMCObjectStreamer(
+ TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll,
+ MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ false));
if (NoExecStack)
Str->InitSections(true);
}
diff --git a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
index e7ee312..b812233 100644
--- a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -44,6 +45,7 @@
#include <cstring>
#include <system_error>
#include <vector>
+
using namespace llvm;
using namespace object;
@@ -64,27 +66,30 @@ cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input files>"),
cl::opt<bool> UndefinedOnly("undefined-only",
cl::desc("Show only undefined symbols"));
cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
- cl::aliasopt(UndefinedOnly));
+ cl::aliasopt(UndefinedOnly), cl::Grouping);
cl::opt<bool> DynamicSyms("dynamic",
cl::desc("Display the dynamic symbols instead "
"of normal symbols."));
cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
- cl::aliasopt(DynamicSyms));
+ cl::aliasopt(DynamicSyms), cl::Grouping);
cl::opt<bool> DefinedOnly("defined-only",
cl::desc("Show only defined symbols"));
cl::alias DefinedOnly2("U", cl::desc("Alias for --defined-only"),
- cl::aliasopt(DefinedOnly));
+ cl::aliasopt(DefinedOnly), cl::Grouping);
cl::opt<bool> ExternalOnly("extern-only",
cl::desc("Show only external symbols"));
cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
- cl::aliasopt(ExternalOnly));
+ cl::aliasopt(ExternalOnly), cl::Grouping);
-cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"));
-cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"));
-cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"));
+cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"),
+ cl::Grouping);
+cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
+ cl::Grouping);
+cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
+ cl::Grouping);
static cl::list<std::string>
ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
@@ -96,32 +101,33 @@ cl::opt<bool> PrintFileName(
cl::desc("Precede each symbol with the object file it came from"));
cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
- cl::aliasopt(PrintFileName));
+ cl::aliasopt(PrintFileName), cl::Grouping);
cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
- cl::aliasopt(PrintFileName));
+ cl::aliasopt(PrintFileName), cl::Grouping);
cl::opt<bool> DebugSyms("debug-syms",
cl::desc("Show all symbols, even debugger only"));
cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
- cl::aliasopt(DebugSyms));
+ cl::aliasopt(DebugSyms), cl::Grouping);
cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"));
cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
- cl::aliasopt(NumericSort));
+ cl::aliasopt(NumericSort), cl::Grouping);
cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
- cl::aliasopt(NumericSort));
+ cl::aliasopt(NumericSort), cl::Grouping);
cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"));
-cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort));
+cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
+ cl::Grouping);
cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"));
cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
- cl::aliasopt(ReverseSort));
+ cl::aliasopt(ReverseSort), cl::Grouping);
cl::opt<bool> PrintSize("print-size",
cl::desc("Show symbol size instead of address"));
cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
- cl::aliasopt(PrintSize));
+ cl::aliasopt(PrintSize), cl::Grouping);
cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"));
@@ -130,12 +136,12 @@ cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"));
cl::alias ArchiveMaps("M", cl::desc("Alias for --print-armap"),
- cl::aliasopt(ArchiveMap));
+ cl::aliasopt(ArchiveMap), cl::Grouping);
cl::opt<bool> JustSymbolName("just-symbol-name",
cl::desc("Print just the symbol's name"));
cl::alias JustSymbolNames("j", cl::desc("Alias for --just-symbol-name"),
- cl::aliasopt(JustSymbolName));
+ cl::aliasopt(JustSymbolName), cl::Grouping);
// FIXME: This option takes exactly two strings and should be allowed anywhere
// on the command line. Such that "llvm-nm -s __TEXT __text foo.o" would work.
@@ -147,7 +153,7 @@ cl::list<std::string> SegSect("s", cl::Positional, cl::ZeroOrMore,
"and section name, Mach-O only"));
cl::opt<bool> FormatMachOasHex("x", cl::desc("Print symbol entry in hex, "
- "Mach-O only"));
+ "Mach-O only"), cl::Grouping);
cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
cl::desc("Disable LLVM bitcode reader"));
@@ -159,7 +165,7 @@ bool MultipleFiles = false;
bool HadError = false;
std::string ToolName;
-}
+} // anonymous namespace
static void error(Twine Message, Twine Path = Twine()) {
HadError = true;
@@ -182,7 +188,7 @@ struct NMSymbol {
StringRef Name;
BasicSymbolRef Sym;
};
-}
+} // anonymous namespace
static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) {
bool ADefined = !(A.Sym.getFlags() & SymbolRef::SF_Undefined);
@@ -202,8 +208,14 @@ static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) {
}
static char isSymbolList64Bit(SymbolicFile &Obj) {
- if (isa<IRObjectFile>(Obj))
- return false;
+ if (isa<IRObjectFile>(Obj)) {
+ IRObjectFile *IRobj = dyn_cast<IRObjectFile>(&Obj);
+ Module &M = IRobj->getModule();
+ if (M.getTargetTriple().empty())
+ return false;
+ Triple T(M.getTargetTriple());
+ return T.isArch64Bit();
+ }
if (isa<COFFObjectFile>(Obj))
return false;
if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
@@ -215,54 +227,80 @@ static StringRef CurrentFilename;
typedef std::vector<NMSymbol> SymbolListT;
static SymbolListT SymbolList;
+static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
+
// darwinPrintSymbol() is used to print a symbol from a Mach-O file when the
// the OutputFormat is darwin or we are printing Mach-O symbols in hex. For
// the darwin format it produces the same output as darwin's nm(1) -m output
// and when printing Mach-O symbols in hex it produces the same output as
// darwin's nm(1) -x format.
-static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
- char *SymbolAddrStr, const char *printBlanks) {
+static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
+ char *SymbolAddrStr, const char *printBlanks,
+ const char *printDashes, const char *printFormat) {
MachO::mach_header H;
MachO::mach_header_64 H_64;
- uint32_t Filetype, Flags;
- MachO::nlist_64 STE_64;
- MachO::nlist STE;
- uint8_t NType;
- uint8_t NSect;
- uint16_t NDesc;
- uint32_t NStrx;
- uint64_t NValue;
- DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
- if (MachO->is64Bit()) {
- H_64 = MachO->MachOObjectFile::getHeader64();
- Filetype = H_64.filetype;
- Flags = H_64.flags;
- STE_64 = MachO->getSymbol64TableEntry(SymDRI);
- NType = STE_64.n_type;
- NSect = STE_64.n_sect;
- NDesc = STE_64.n_desc;
- NStrx = STE_64.n_strx;
- NValue = STE_64.n_value;
+ uint32_t Filetype = MachO::MH_OBJECT;
+ uint32_t Flags = 0;
+ uint8_t NType = 0;
+ uint8_t NSect = 0;
+ uint16_t NDesc = 0;
+ uint32_t NStrx = 0;
+ uint64_t NValue = 0;
+ MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
+ if (Obj.isIR()) {
+ uint32_t SymFlags = I->Sym.getFlags();
+ if (SymFlags & SymbolRef::SF_Global)
+ NType |= MachO::N_EXT;
+ if (SymFlags & SymbolRef::SF_Hidden)
+ NType |= MachO::N_PEXT;
+ if (SymFlags & SymbolRef::SF_Undefined)
+ NType |= MachO::N_EXT | MachO::N_UNDF;
+ else {
+ // Here we have a symbol definition. So to fake out a section name we
+ // use 1, 2 and 3 for section numbers. See below where they are used to
+ // print out fake section names.
+ NType |= MachO::N_SECT;
+ if(SymFlags & SymbolRef::SF_Const)
+ NSect = 3;
+ else {
+ IRObjectFile *IRobj = dyn_cast<IRObjectFile>(&Obj);
+ char c = getSymbolNMTypeChar(*IRobj, I->Sym);
+ if (c == 't')
+ NSect = 1;
+ else
+ NSect = 2;
+ }
+ }
+ if (SymFlags & SymbolRef::SF_Weak)
+ NDesc |= MachO::N_WEAK_DEF;
} else {
- H = MachO->MachOObjectFile::getHeader();
- Filetype = H.filetype;
- Flags = H.flags;
- STE = MachO->getSymbolTableEntry(SymDRI);
- NType = STE.n_type;
- NSect = STE.n_sect;
- NDesc = STE.n_desc;
- NStrx = STE.n_strx;
- NValue = STE.n_value;
+ DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
+ if (MachO->is64Bit()) {
+ H_64 = MachO->MachOObjectFile::getHeader64();
+ Filetype = H_64.filetype;
+ Flags = H_64.flags;
+ MachO::nlist_64 STE_64 = MachO->getSymbol64TableEntry(SymDRI);
+ NType = STE_64.n_type;
+ NSect = STE_64.n_sect;
+ NDesc = STE_64.n_desc;
+ NStrx = STE_64.n_strx;
+ NValue = STE_64.n_value;
+ } else {
+ H = MachO->MachOObjectFile::getHeader();
+ Filetype = H.filetype;
+ Flags = H.flags;
+ MachO::nlist STE = MachO->getSymbolTableEntry(SymDRI);
+ NType = STE.n_type;
+ NSect = STE.n_sect;
+ NDesc = STE.n_desc;
+ NStrx = STE.n_strx;
+ NValue = STE.n_value;
+ }
}
// If we are printing Mach-O symbols in hex do that and return.
if (FormatMachOasHex) {
char Str[18] = "";
- const char *printFormat;
- if (MachO->is64Bit())
- printFormat = "%016" PRIx64;
- else
- printFormat = "%08" PRIx64;
format(printFormat, NValue).print(Str, sizeof(Str));
outs() << Str << ' ';
format("%02x", NType).print(Str, sizeof(Str));
@@ -280,6 +318,8 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
if (PrintAddress) {
if ((NType & MachO::N_TYPE) == MachO::N_INDR)
strcpy(SymbolAddrStr, printBlanks);
+ if (Obj.isIR() && (NType & MachO::N_TYPE) == MachO::N_TYPE)
+ strcpy(SymbolAddrStr, printDashes);
outs() << SymbolAddrStr << ' ';
}
@@ -314,8 +354,20 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
outs() << "(indirect) ";
break;
case MachO::N_SECT: {
- section_iterator Sec = MachO->section_end();
- MachO->getSymbolSection(I->Sym.getRawDataRefImpl(), Sec);
+ if (Obj.isIR()) {
+ // For llvm bitcode files print out a fake section name using the values
+ // use 1, 2 and 3 for section numbers as set above.
+ if (NSect == 1)
+ outs() << "(LTO,CODE) ";
+ else if (NSect == 2)
+ outs() << "(LTO,DATA) ";
+ else if (NSect == 3)
+ outs() << "(LTO,RODATA) ";
+ else
+ outs() << "(?,?) ";
+ break;
+ }
+ section_iterator Sec = *MachO->getSymbolSection(I->Sym.getRawDataRefImpl());
DataRefImpl Ref = Sec->getRawDataRefImpl();
StringRef SectionName;
MachO->getSectionName(Ref, SectionName);
@@ -374,7 +426,8 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
outs() << I->Name << " (for ";
StringRef IndirectName;
- if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+ if (!MachO ||
+ MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
outs() << "?)";
else
outs() << IndirectName << ")";
@@ -392,7 +445,8 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
outs() << " (dynamically looked up)";
else {
StringRef LibraryName;
- if (MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
+ if (!MachO ||
+ MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
outs() << " (from bad library ordinal " << LibraryOrdinal << ")";
else
outs() << " (from " << LibraryName << ")";
@@ -440,13 +494,14 @@ static const struct DarwinStabName DarwinStabNames[] = {
{MachO::N_ECOMM, "ECOMM"},
{MachO::N_ECOML, "ECOML"},
{MachO::N_LENG, "LENG"},
- {0, 0}};
+ {0, nullptr}};
+
static const char *getDarwinStabString(uint8_t NType) {
for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
if (DarwinStabNames[i].NType == NType)
return DarwinStabNames[i].Name;
}
- return 0;
+ return nullptr;
}
// darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
@@ -511,12 +566,14 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
}
}
- const char *printBlanks, *printFormat;
+ const char *printBlanks, *printDashes, *printFormat;
if (isSymbolList64Bit(Obj)) {
printBlanks = " ";
+ printDashes = "----------------";
printFormat = "%016" PRIx64;
} else {
printBlanks = " ";
+ printDashes = "--------";
printFormat = "%08" PRIx64;
}
@@ -528,6 +585,9 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
continue;
if (Undefined && DefinedOnly)
continue;
+ bool Global = SymFlags & SymbolRef::SF_Global;
+ if (!Global && ExternalOnly)
+ continue;
if (SizeSort && !PrintAddress)
continue;
if (PrintFileName) {
@@ -537,7 +597,8 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
outs() << ArchiveName << ":";
outs() << CurrentFilename << ": ";
}
- if (JustSymbolName || (UndefinedOnly && isa<MachOObjectFile>(Obj))) {
+ if ((JustSymbolName || (UndefinedOnly && isa<MachOObjectFile>(Obj) &&
+ OutputFormat != darwin)) && OutputFormat != posix) {
outs() << I->Name << "\n";
continue;
}
@@ -550,9 +611,13 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
if (OutputFormat == sysv)
strcpy(SymbolSizeStr, printBlanks);
- if (I->TypeChar != 'U')
- format(printFormat, I->Address)
+ if (I->TypeChar != 'U') {
+ if (Obj.isIR())
+ strcpy(SymbolAddrStr, printDashes);
+ else
+ format(printFormat, I->Address)
.print(SymbolAddrStr, sizeof(SymbolAddrStr));
+ }
format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
// If OutputFormat is darwin or we are printing Mach-O symbols in hex and
@@ -561,11 +626,15 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
// printing Mach-O symbols in hex and not a Mach-O object fall back to
// OutputFormat bsd (see below).
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
- if ((OutputFormat == darwin || FormatMachOasHex) && MachO) {
- darwinPrintSymbol(MachO, I, SymbolAddrStr, printBlanks);
+ if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
+ darwinPrintSymbol(Obj, I, SymbolAddrStr, printBlanks, printDashes,
+ printFormat);
} else if (OutputFormat == posix) {
- outs() << I->Name << " " << I->TypeChar << " " << SymbolAddrStr
- << SymbolSizeStr << "\n";
+ outs() << I->Name << " " << I->TypeChar << " ";
+ if (MachO)
+ outs() << I->Address << " " << "0" /* SymbolSizeStr */ << "\n";
+ else
+ outs() << SymbolAddrStr << SymbolSizeStr << "\n";
} else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
if (PrintAddress)
outs() << SymbolAddrStr << ' ';
@@ -594,10 +663,11 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
// OK, this is ELF
elf_symbol_iterator SymI(I);
- elf_section_iterator SecI = Obj.section_end();
- if (error(SymI->getSection(SecI)))
+ ErrorOr<elf_section_iterator> SecIOrErr = SymI->getSection();
+ if (error(SecIOrErr.getError()))
return '?';
+ elf_section_iterator SecI = *SecIOrErr;
if (SecI != Obj.section_end()) {
switch (SecI->getType()) {
case ELF::SHT_PROGBITS:
@@ -651,9 +721,10 @@ static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
uint32_t Characteristics = 0;
if (!COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
- section_iterator SecI = Obj.section_end();
- if (error(SymI->getSection(SecI)))
+ ErrorOr<section_iterator> SecIOrErr = SymI->getSection();
+ if (error(SecIOrErr.getError()))
return '?';
+ section_iterator SecI = *SecIOrErr;
const coff_section *Section = Obj.getCOFFSection(*SecI);
Characteristics = Section->Characteristics;
}
@@ -701,8 +772,7 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
case MachO::N_INDR:
return 'i';
case MachO::N_SECT: {
- section_iterator Sec = Obj.section_end();
- Obj.getSymbolSection(Symb, Sec);
+ section_iterator Sec = *Obj.getSymbolSection(Symb);
DataRefImpl Ref = Sec->getRawDataRefImpl();
StringRef SectionName;
Obj.getSectionName(Ref, SectionName);
@@ -762,8 +832,14 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
char Ret = '?';
if (Symflags & object::SymbolRef::SF_Absolute)
Ret = 'a';
- else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj))
+ else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj)) {
Ret = getSymbolNMTypeChar(*IR, I);
+ Triple Host(sys::getDefaultTargetTriple());
+ if (Ret == 'd' && Host.isOSDarwin()) {
+ if(Symflags & SymbolRef::SF_Const)
+ Ret = 's';
+ }
+ }
else if (COFFObjectFile *COFF = dyn_cast<COFFObjectFile>(&Obj))
Ret = getSymbolNMTypeChar(*COFF, I);
else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
@@ -943,10 +1019,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
if (I != E) {
outs() << "Archive map\n";
for (; I != E; ++I) {
- ErrorOr<Archive::child_iterator> C = I->getMember();
+ ErrorOr<Archive::Child> C = I->getMember();
if (error(C.getError()))
return;
- ErrorOr<StringRef> FileNameOrErr = C.get()->getName();
+ ErrorOr<StringRef> FileNameOrErr = C->getName();
if (error(FileNameOrErr.getError()))
return;
StringRef SymName = I->getName();
@@ -958,7 +1034,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
for (Archive::child_iterator I = A->child_begin(), E = A->child_end();
I != E; ++I) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = I->getAsBinary(&Context);
+ if (error(I->getError()))
+ return;
+ auto &C = I->get();
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
if (ChildOrErr.getError())
continue;
if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
@@ -1013,8 +1092,11 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
for (Archive::child_iterator AI = A->child_begin(),
AE = A->child_end();
AI != AE; ++AI) {
+ if (error(AI->getError()))
+ return;
+ auto &C = AI->get();
ErrorOr<std::unique_ptr<Binary>> ChildOrErr =
- AI->getAsBinary(&Context);
+ C.getAsBinary(&Context);
if (ChildOrErr.getError())
continue;
if (SymbolicFile *O =
@@ -1067,8 +1149,11 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
for (Archive::child_iterator AI = A->child_begin(),
AE = A->child_end();
AI != AE; ++AI) {
+ if (error(AI->getError()))
+ return;
+ auto &C = AI->get();
ErrorOr<std::unique_ptr<Binary>> ChildOrErr =
- AI->getAsBinary(&Context);
+ C.getAsBinary(&Context);
if (ChildOrErr.getError())
continue;
if (SymbolicFile *O =
@@ -1116,8 +1201,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
std::unique_ptr<Archive> &A = *AOrErr;
for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end();
AI != AE; ++AI) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr =
- AI->getAsBinary(&Context);
+ if (error(AI->getError()))
+ return;
+ auto &C = AI->get();
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
if (ChildOrErr.getError())
continue;
if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
@@ -1150,7 +1237,6 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
return;
}
error("unrecognizable file type", Filename);
- return;
}
int main(int argc, char **argv) {
diff --git a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
index 8b94a50..f286351 100644
--- a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -151,7 +151,7 @@ static void printAllUnwindCodes(ArrayRef<UnwindCode> UCs) {
<< " remaining in buffer";
return ;
}
- printUnwindCode(ArrayRef<UnwindCode>(I, E));
+ printUnwindCode(makeArrayRef(I, E));
I += UsedSlots;
}
}
@@ -165,10 +165,10 @@ resolveSectionAndAddress(const COFFObjectFile *Obj, const SymbolRef &Sym,
if (std::error_code EC = ResolvedAddrOrErr.getError())
return EC;
ResolvedAddr = *ResolvedAddrOrErr;
- section_iterator iter(Obj->section_begin());
- if (std::error_code EC = Sym.getSection(iter))
+ ErrorOr<section_iterator> Iter = Sym.getSection();
+ if (std::error_code EC = Iter.getError())
return EC;
- ResolvedSection = Obj->getCOFFSection(*iter);
+ ResolvedSection = Obj->getCOFFSection(**Iter);
return std::error_code();
}
@@ -241,12 +241,10 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) {
return;
const pe32_header *PE32Header;
- if (error(Obj->getPE32Header(PE32Header)))
- return;
+ error(Obj->getPE32Header(PE32Header));
uint32_t ImageBase = PE32Header->ImageBase;
uintptr_t IntPtr = 0;
- if (error(Obj->getVaPtr(TableVA, IntPtr)))
- return;
+ error(Obj->getVaPtr(TableVA, IntPtr));
const support::ulittle32_t *P = (const support::ulittle32_t *)IntPtr;
outs() << "SEH Table:";
for (int I = 0; I < Count; ++I)
@@ -257,8 +255,7 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) {
static void printLoadConfiguration(const COFFObjectFile *Obj) {
// Skip if it's not executable.
const pe32_header *PE32Header;
- if (error(Obj->getPE32Header(PE32Header)))
- return;
+ error(Obj->getPE32Header(PE32Header));
if (!PE32Header)
return;
@@ -267,13 +264,11 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) {
return;
const data_directory *DataDir;
- if (error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir)))
- return;
+ error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir));
uintptr_t IntPtr = 0;
if (DataDir->RelativeVirtualAddress == 0)
return;
- if (error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)))
- return;
+ error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr));
auto *LoadConf = reinterpret_cast<const coff_load_configuration32 *>(IntPtr);
outs() << "Load configuration:"
@@ -381,8 +376,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
const RuntimeFunction *&RFStart, int &NumRFs) {
for (const SectionRef &Section : Obj->sections()) {
StringRef Name;
- if (error(Section.getName(Name)))
- continue;
+ error(Section.getName(Name));
if (Name != ".pdata")
continue;
@@ -394,8 +388,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
ArrayRef<uint8_t> Contents;
- if (error(Obj->getSectionContents(Pdata, Contents)))
- continue;
+ error(Obj->getSectionContents(Pdata, Contents));
if (Contents.empty())
continue;
@@ -440,7 +433,7 @@ static void printWin64EHUnwindInfo(const Win64EH::UnwindInfo *UI) {
if (UI->NumCodes)
outs() << " Unwind Codes:\n";
- printAllUnwindCodes(ArrayRef<UnwindCode>(&UI->UnwindCodes[0], UI->NumCodes));
+ printAllUnwindCodes(makeArrayRef(&UI->UnwindCodes[0], UI->NumCodes));
outs() << "\n";
outs().flush();
@@ -499,11 +492,10 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
ArrayRef<uint8_t> XContents;
uint64_t UnwindInfoOffset = 0;
- if (error(getSectionContents(
+ error(getSectionContents(
Obj, Rels, SectionOffset +
/*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
- XContents, UnwindInfoOffset)))
- return;
+ XContents, UnwindInfoOffset));
if (XContents.empty())
return;
@@ -550,3 +542,52 @@ void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
printImportTables(file);
printExportTable(file);
}
+
+void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
+ for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
+ ErrorOr<COFFSymbolRef> Symbol = coff->getSymbol(SI);
+ StringRef Name;
+ error(Symbol.getError());
+ error(coff->getSymbolName(*Symbol, Name));
+
+ outs() << "[" << format("%2d", SI) << "]"
+ << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
+ << "(fl 0x00)" // Flag bits, which COFF doesn't have.
+ << "(ty " << format("%3x", unsigned(Symbol->getType())) << ")"
+ << "(scl " << format("%3x", unsigned(Symbol->getStorageClass())) << ") "
+ << "(nx " << unsigned(Symbol->getNumberOfAuxSymbols()) << ") "
+ << "0x" << format("%08x", unsigned(Symbol->getValue())) << " "
+ << Name << "\n";
+
+ for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
+ if (Symbol->isSectionDefinition()) {
+ const coff_aux_section_definition *asd;
+ error(coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd));
+
+ int32_t AuxNumber = asd->getNumber(Symbol->isBigObj());
+
+ outs() << "AUX "
+ << format("scnlen 0x%x nreloc %d nlnno %d checksum 0x%x "
+ , unsigned(asd->Length)
+ , unsigned(asd->NumberOfRelocations)
+ , unsigned(asd->NumberOfLinenumbers)
+ , unsigned(asd->CheckSum))
+ << format("assoc %d comdat %d\n"
+ , unsigned(AuxNumber)
+ , unsigned(asd->Selection));
+ } else if (Symbol->isFileRecord()) {
+ const char *FileName;
+ error(coff->getAuxSymbol<char>(SI + 1, FileName));
+
+ StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() *
+ coff->getSymbolTableEntrySize());
+ outs() << "AUX " << Name.rtrim(StringRef("\0", 1)) << '\n';
+
+ SI = SI + Symbol->getNumberOfAuxSymbols();
+ break;
+ } else {
+ outs() << "AUX Unknown\n";
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
index 2d0d7d7..7b44e39 100644
--- a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -24,10 +24,8 @@ using namespace llvm::object;
template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
typedef ELFFile<ELFT> ELFO;
outs() << "Program Header:\n";
- for (typename ELFO::Elf_Phdr_Iter pi = o->program_header_begin(),
- pe = o->program_header_end();
- pi != pe; ++pi) {
- switch (pi->p_type) {
+ for (const typename ELFO::Elf_Phdr &Phdr : o->program_headers()) {
+ switch (Phdr.p_type) {
case ELF::PT_LOAD:
outs() << " LOAD ";
break;
@@ -55,22 +53,16 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
const char *Fmt = ELFT::Is64Bits ? "0x%016" PRIx64 " " : "0x%08" PRIx64 " ";
- outs() << "off "
- << format(Fmt, (uint64_t)pi->p_offset)
- << "vaddr "
- << format(Fmt, (uint64_t)pi->p_vaddr)
- << "paddr "
- << format(Fmt, (uint64_t)pi->p_paddr)
- << format("align 2**%u\n", countTrailingZeros<uint64_t>(pi->p_align))
- << " filesz "
- << format(Fmt, (uint64_t)pi->p_filesz)
- << "memsz "
- << format(Fmt, (uint64_t)pi->p_memsz)
- << "flags "
- << ((pi->p_flags & ELF::PF_R) ? "r" : "-")
- << ((pi->p_flags & ELF::PF_W) ? "w" : "-")
- << ((pi->p_flags & ELF::PF_X) ? "x" : "-")
- << "\n";
+ outs() << "off " << format(Fmt, (uint64_t)Phdr.p_offset) << "vaddr "
+ << format(Fmt, (uint64_t)Phdr.p_vaddr) << "paddr "
+ << format(Fmt, (uint64_t)Phdr.p_paddr)
+ << format("align 2**%u\n",
+ countTrailingZeros<uint64_t>(Phdr.p_align))
+ << " filesz " << format(Fmt, (uint64_t)Phdr.p_filesz)
+ << "memsz " << format(Fmt, (uint64_t)Phdr.p_memsz) << "flags "
+ << ((Phdr.p_flags & ELF::PF_R) ? "r" : "-")
+ << ((Phdr.p_flags & ELF::PF_W) ? "w" : "-")
+ << ((Phdr.p_flags & ELF::PF_X) ? "x" : "-") << "\n";
}
outs() << "\n";
}
diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
index 04c72f48..a2f3bc8 100644
--- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
@@ -13,6 +13,7 @@
#include "llvm-objdump.h"
#include "llvm-c/Disassembler.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
@@ -97,11 +98,6 @@ cl::opt<bool>
cl::desc("Print the linker optimization hints for "
"Mach-O objects (requires -macho)"));
-cl::list<std::string>
- llvm::DumpSections("section",
- cl::desc("Prints the specified segment,section for "
- "Mach-O objects (requires -macho)"));
-
cl::opt<bool>
llvm::InfoPlist("info-plist",
cl::desc("Print the info plist section as strings for "
@@ -138,6 +134,7 @@ static cl::opt<bool> NoSymbolicOperands(
static cl::list<std::string>
ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
cl::ZeroOrMore);
+
bool ArchAll = false;
static std::string ThumbTripleName;
@@ -210,19 +207,19 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
case MachO::DICE_KIND_DATA:
if (Length >= 4) {
if (!NoShowRawInsn)
- dumpBytes(ArrayRef<uint8_t>(bytes, 4), outs());
+ dumpBytes(makeArrayRef(bytes, 4), outs());
Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
outs() << "\t.long " << Value;
Size = 4;
} else if (Length >= 2) {
if (!NoShowRawInsn)
- dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
+ dumpBytes(makeArrayRef(bytes, 2), outs());
Value = bytes[1] << 8 | bytes[0];
outs() << "\t.short " << Value;
Size = 2;
} else {
if (!NoShowRawInsn)
- dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
+ dumpBytes(makeArrayRef(bytes, 2), outs());
Value = bytes[0];
outs() << "\t.byte " << Value;
Size = 1;
@@ -234,14 +231,14 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
break;
case MachO::DICE_KIND_JUMP_TABLE8:
if (!NoShowRawInsn)
- dumpBytes(ArrayRef<uint8_t>(bytes, 1), outs());
+ dumpBytes(makeArrayRef(bytes, 1), outs());
Value = bytes[0];
outs() << "\t.byte " << format("%3u", Value) << "\t@ KIND_JUMP_TABLE8\n";
Size = 1;
break;
case MachO::DICE_KIND_JUMP_TABLE16:
if (!NoShowRawInsn)
- dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
+ dumpBytes(makeArrayRef(bytes, 2), outs());
Value = bytes[1] << 8 | bytes[0];
outs() << "\t.short " << format("%5u", Value & 0xffff)
<< "\t@ KIND_JUMP_TABLE16\n";
@@ -250,7 +247,7 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
case MachO::DICE_KIND_JUMP_TABLE32:
case MachO::DICE_KIND_ABS_JUMP_TABLE32:
if (!NoShowRawInsn)
- dumpBytes(ArrayRef<uint8_t>(bytes, 4), outs());
+ dumpBytes(makeArrayRef(bytes, 4), outs());
Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
outs() << "\t.long " << Value;
if (Kind == MachO::DICE_KIND_JUMP_TABLE32)
@@ -670,13 +667,9 @@ static void DumpLiteral8(MachOObjectFile *O, uint32_t l0, uint32_t l1,
double d) {
outs() << format("0x%08" PRIx32, l0) << " " << format("0x%08" PRIx32, l1);
uint32_t Hi, Lo;
- if (O->isLittleEndian()) {
- Hi = l1;
- Lo = l0;
- } else {
- Hi = l0;
- Lo = l1;
- }
+ Hi = (O->isLittleEndian()) ? l1 : l0;
+ Lo = (O->isLittleEndian()) ? l0 : l1;
+
// Hi is the high word, so this is equivalent to if(isfinite(d))
if ((Hi & 0x7ff00000) != 0x7ff00000)
outs() << format(" (%.16e)\n", d);
@@ -921,10 +914,7 @@ static void DumpInitTermPointerSection(MachOObjectFile *O, const char *sect,
SymbolAddressMap *AddrMap,
bool verbose) {
uint32_t stride;
- if (O->is64Bit())
- stride = sizeof(uint64_t);
- else
- stride = sizeof(uint32_t);
+ stride = (O->is64Bit()) ? sizeof(uint64_t) : sizeof(uint32_t);
for (uint32_t i = 0; i < sect_size; i += stride) {
const char *SymbolName = nullptr;
if (O->is64Bit()) {
@@ -1006,8 +996,8 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
if (verbose)
CreateSymbolAddressMap(O, &AddrMap);
- for (unsigned i = 0; i < DumpSections.size(); ++i) {
- StringRef DumpSection = DumpSections[i];
+ for (unsigned i = 0; i < FilterSections.size(); ++i) {
+ StringRef DumpSection = FilterSections[i];
std::pair<StringRef, StringRef> DumpSegSectName;
DumpSegSectName = DumpSection.split(',');
StringRef DumpSegName, DumpSectName;
@@ -1171,7 +1161,7 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
// UniversalHeaders or ArchiveHeaders.
if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind ||
LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints ||
- DylibsUsed || DylibId || ObjcMetaData || (DumpSections.size() != 0)) {
+ DylibsUsed || DylibId || ObjcMetaData || (FilterSections.size() != 0)) {
outs() << Filename;
if (!ArchiveMemberName.empty())
outs() << '(' << ArchiveMemberName << ')';
@@ -1194,7 +1184,7 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
PrintSectionHeaders(MachOOF);
if (SectionContents)
PrintSectionContents(MachOOF);
- if (DumpSections.size() != 0)
+ if (FilterSections.size() != 0)
DumpSectionContents(Filename, MachOOF, !NonVerbose);
if (InfoPlist)
DumpInfoPlistSectionContents(Filename, MachOOF);
@@ -1395,7 +1385,7 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
}
}
-static void printArchiveChild(Archive::Child &C, bool verbose,
+static void printArchiveChild(const Archive::Child &C, bool verbose,
bool print_offset) {
if (print_offset)
outs() << C.getChildOffset() << "\t";
@@ -1404,42 +1394,15 @@ static void printArchiveChild(Archive::Child &C, bool verbose,
// FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
// But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
outs() << "-";
- if (Mode & sys::fs::owner_read)
- outs() << "r";
- else
- outs() << "-";
- if (Mode & sys::fs::owner_write)
- outs() << "w";
- else
- outs() << "-";
- if (Mode & sys::fs::owner_exe)
- outs() << "x";
- else
- outs() << "-";
- if (Mode & sys::fs::group_read)
- outs() << "r";
- else
- outs() << "-";
- if (Mode & sys::fs::group_write)
- outs() << "w";
- else
- outs() << "-";
- if (Mode & sys::fs::group_exe)
- outs() << "x";
- else
- outs() << "-";
- if (Mode & sys::fs::others_read)
- outs() << "r";
- else
- outs() << "-";
- if (Mode & sys::fs::others_write)
- outs() << "w";
- else
- outs() << "-";
- if (Mode & sys::fs::others_exe)
- outs() << "x";
- else
- outs() << "-";
+ outs() << ((Mode & sys::fs::owner_read) ? "r" : "-");
+ outs() << ((Mode & sys::fs::owner_write) ? "w" : "-");
+ outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-");
+ outs() << ((Mode & sys::fs::group_read) ? "r" : "-");
+ outs() << ((Mode & sys::fs::group_write) ? "w" : "-");
+ outs() << ((Mode & sys::fs::group_exe) ? "x" : "-");
+ outs() << ((Mode & sys::fs::others_read) ? "r" : "-");
+ outs() << ((Mode & sys::fs::others_write) ? "w" : "-");
+ outs() << ((Mode & sys::fs::others_exe) ? "x" : "-");
} else {
outs() << format("0%o ", Mode);
}
@@ -1448,8 +1411,10 @@ static void printArchiveChild(Archive::Child &C, bool verbose,
outs() << format("%3d/", UID);
unsigned GID = C.getGID();
outs() << format("%-3d ", GID);
- uint64_t Size = C.getRawSize();
- outs() << format("%5" PRId64, Size) << " ";
+ ErrorOr<uint64_t> Size = C.getRawSize();
+ if (std::error_code EC = Size.getError())
+ report_fatal_error(EC.message());
+ outs() << format("%5" PRId64, Size.get()) << " ";
StringRef RawLastModified = C.getRawLastModified();
if (verbose) {
@@ -1483,14 +1448,11 @@ static void printArchiveChild(Archive::Child &C, bool verbose,
}
static void printArchiveHeaders(Archive *A, bool verbose, bool print_offset) {
- if (A->hasSymbolTable()) {
- Archive::child_iterator S = A->getSymbolTableChild();
- Archive::Child C = *S;
- printArchiveChild(C, verbose, print_offset);
- }
- for (Archive::child_iterator I = A->child_begin(), E = A->child_end(); I != E;
- ++I) {
- Archive::Child C = *I;
+ for (Archive::child_iterator I = A->child_begin(false), E = A->child_end();
+ I != E; ++I) {
+ if (std::error_code EC = I->getError())
+ report_fatal_error(EC.message());
+ const Archive::Child &C = **I;
printArchiveChild(C, verbose, print_offset);
}
}
@@ -1527,7 +1489,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
printArchiveHeaders(A, !NonVerbose, ArchiveMemberOffsets);
for (Archive::child_iterator I = A->child_begin(), E = A->child_end();
I != E; ++I) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = I->getAsBinary();
+ if (std::error_code EC = I->getError())
+ report_error(Filename, EC);
+ auto &C = I->get();
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O = dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
@@ -1575,7 +1540,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
for (Archive::child_iterator AI = A->child_begin(),
AE = A->child_end();
AI != AE; ++AI) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = AI->getAsBinary();
+ if (std::error_code EC = AI->getError())
+ report_error(Filename, EC);
+ auto &C = AI->get();
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O =
@@ -1617,7 +1585,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
for (Archive::child_iterator AI = A->child_begin(),
AE = A->child_end();
AI != AE; ++AI) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = AI->getAsBinary();
+ if (std::error_code EC = AI->getError())
+ report_error(Filename, EC);
+ auto &C = AI->get();
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O =
@@ -1653,7 +1624,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end();
AI != AE; ++AI) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = AI->getAsBinary();
+ if (std::error_code EC = AI->getError())
+ report_error(Filename, EC);
+ auto &C = AI->get();
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (ChildOrErr.getError())
continue;
if (MachOObjectFile *O =
@@ -1676,8 +1650,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
errs() << "llvm-objdump: '" << Filename << "': "
<< "Object is not a Mach-O file type.\n";
} else
- errs() << "llvm-objdump: '" << Filename << "': "
- << "Unrecognized file type.\n";
+ report_error(Filename, object_error::invalid_file_type);
}
typedef std::pair<uint64_t, const char *> BindInfoEntry;
@@ -1698,6 +1671,7 @@ struct DisassembleInfo {
uint64_t adrp_addr;
uint32_t adrp_inst;
BindTable *bindtable;
+ uint32_t depth;
};
// SymbolizerGetOpInfo() is the operand information call back function.
@@ -1735,8 +1709,15 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
if (Arch == Triple::x86) {
if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
return 0;
- // First search the section's relocation entries (if any) for an entry
- // for this section offset.
+ if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+ // TODO:
+ // Search the external relocation entries of a fully linked image
+ // (if any) for an entry that matches this segment offset.
+ // uint32_t seg_offset = (Pc + Offset);
+ return 0;
+ }
+ // In MH_OBJECT filetypes search the section's relocation entries (if any)
+ // for an entry for this section offset.
uint32_t sect_addr = info->S.getAddress();
uint32_t sect_offset = (Pc + Offset) - sect_addr;
bool reloc_found = false;
@@ -1806,17 +1787,20 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
op_info->Value = offset;
return 1;
}
- // TODO:
- // Second search the external relocation entries of a fully linked image
- // (if any) for an entry that matches this segment offset.
- // uint32_t seg_offset = (Pc + Offset);
return 0;
}
if (Arch == Triple::x86_64) {
if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
return 0;
- // First search the section's relocation entries (if any) for an entry
- // for this section offset.
+ if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+ // TODO:
+ // Search the external relocation entries of a fully linked image
+ // (if any) for an entry that matches this segment offset.
+ // uint64_t seg_offset = (Pc + Offset);
+ return 0;
+ }
+ // In MH_OBJECT filetypes search the section's relocation entries (if any)
+ // for an entry for this section offset.
uint64_t sect_addr = info->S.getAddress();
uint64_t sect_offset = (Pc + Offset) - sect_addr;
bool reloc_found = false;
@@ -1874,17 +1858,20 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
op_info->AddSymbol.Name = name;
return 1;
}
- // TODO:
- // Second search the external relocation entries of a fully linked image
- // (if any) for an entry that matches this segment offset.
- // uint64_t seg_offset = (Pc + Offset);
return 0;
}
if (Arch == Triple::arm) {
if (Offset != 0 || (Size != 4 && Size != 2))
return 0;
- // First search the section's relocation entries (if any) for an entry
- // for this section offset.
+ if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+ // TODO:
+ // Search the external relocation entries of a fully linked image
+ // (if any) for an entry that matches this segment offset.
+ // uint32_t seg_offset = (Pc + Offset);
+ return 0;
+ }
+ // In MH_OBJECT filetypes search the section's relocation entries (if any)
+ // for an entry for this section offset.
uint32_t sect_addr = info->S.getAddress();
uint32_t sect_offset = (Pc + Offset) - sect_addr;
DataRefImpl Rel;
@@ -2016,8 +2003,15 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
if (Arch == Triple::aarch64) {
if (Offset != 0 || Size != 4)
return 0;
- // First search the section's relocation entries (if any) for an entry
- // for this section offset.
+ if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+ // TODO:
+ // Search the external relocation entries of a fully linked image
+ // (if any) for an entry that matches this segment offset.
+ // uint64_t seg_offset = (Pc + Offset);
+ return 0;
+ }
+ // In MH_OBJECT filetypes search the section's relocation entries (if any)
+ // for an entry for this section offset.
uint64_t sect_addr = info->S.getAddress();
uint64_t sect_offset = (Pc + Offset) - sect_addr;
auto Reloc =
@@ -2370,6 +2364,8 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
+ if (SectSize == 0)
+ continue;
if (objc_only) {
StringRef SectName;
((*(info->Sections))[SectIdx]).getName(SectName);
@@ -3267,6 +3263,8 @@ walk_pointer_list_32(const char *listname, const SectionRef S,
}
static void print_layout_map(const char *layout_map, uint32_t left) {
+ if (layout_map == nullptr)
+ return;
outs() << " layout map: ";
do {
outs() << format("0x%02" PRIx32, (*layout_map) & 0xff) << " ";
@@ -3330,8 +3328,8 @@ static void print_method_list64_t(uint64_t p, struct DisassembleInfo *info,
return;
memset(&m, '\0', sizeof(struct method64_t));
if (left < sizeof(struct method64_t)) {
- memcpy(&ml, r, left);
- outs() << indent << " (method_t entends past the end of the section)\n";
+ memcpy(&m, r, left);
+ outs() << indent << " (method_t extends past the end of the section)\n";
} else
memcpy(&m, r, sizeof(struct method64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
@@ -4222,7 +4220,7 @@ static void print_objc_property_list32(uint32_t p,
}
}
-static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
+static bool print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
bool &is_meta_class) {
struct class_ro64_t cro;
const char *r;
@@ -4233,7 +4231,7 @@ static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
r = get_pointer_64(p, offset, left, S, info);
if (r == nullptr || left < sizeof(struct class_ro64_t))
- return;
+ return false;
memset(&cro, '\0', sizeof(struct class_ro64_t));
if (left < sizeof(struct class_ro64_t)) {
memcpy(&cro, r, left);
@@ -4357,10 +4355,11 @@ static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
if (cro.baseProperties + n_value != 0)
print_objc_property_list64(cro.baseProperties + n_value, info);
- is_meta_class = (cro.flags & RO_META) ? true : false;
+ is_meta_class = (cro.flags & RO_META) != 0;
+ return true;
}
-static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
+static bool print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
bool &is_meta_class) {
struct class_ro32_t cro;
const char *r;
@@ -4370,7 +4369,7 @@ static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
r = get_pointer_32(p, offset, left, S, info);
if (r == nullptr)
- return;
+ return false;
memset(&cro, '\0', sizeof(struct class_ro32_t));
if (left < sizeof(struct class_ro32_t)) {
memcpy(&cro, r, left);
@@ -4420,7 +4419,8 @@ static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
<< format("0x%" PRIx32, cro.baseProperties) << "\n";
if (cro.baseProperties != 0)
print_objc_property_list32(cro.baseProperties, info);
- is_meta_class = (cro.flags & RO_META) ? true : false;
+ is_meta_class = (cro.flags & RO_META) != 0;
+ return true;
}
static void print_class64_t(uint64_t p, struct DisassembleInfo *info) {
@@ -4490,11 +4490,16 @@ static void print_class64_t(uint64_t p, struct DisassembleInfo *info) {
outs() << " Swift class";
outs() << "\n";
bool is_meta_class;
- print_class_ro64_t((c.data + n_value) & ~0x7, info, is_meta_class);
+ if (!print_class_ro64_t((c.data + n_value) & ~0x7, info, is_meta_class))
+ return;
- if (is_meta_class == false) {
- outs() << "Meta Class\n";
- print_class64_t(c.isa + isa_n_value, info);
+ if (!is_meta_class &&
+ c.isa + isa_n_value != p &&
+ c.isa + isa_n_value != 0 &&
+ info->depth < 100) {
+ info->depth++;
+ outs() << "Meta Class\n";
+ print_class64_t(c.isa + isa_n_value, info);
}
}
@@ -4555,9 +4560,10 @@ static void print_class32_t(uint32_t p, struct DisassembleInfo *info) {
outs() << " Swift class";
outs() << "\n";
bool is_meta_class;
- print_class_ro32_t(c.data & ~0x3, info, is_meta_class);
+ if (!print_class_ro32_t(c.data & ~0x3, info, is_meta_class))
+ return;
- if (is_meta_class == false) {
+ if (!is_meta_class) {
outs() << "Meta Class\n";
print_class32_t(c.isa, info);
}
@@ -4865,7 +4871,7 @@ static void print_category32_t(uint32_t p, struct DisassembleInfo *info) {
outs() << " name " << format("0x%" PRIx32, c.name);
name = get_symbol_32(offset + offsetof(struct category32_t, name), S, info,
c.name);
- if (name != NULL)
+ if (name)
outs() << " " << name;
outs() << "\n";
@@ -5006,6 +5012,9 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) {
struct objc_image_info64 o;
const char *r;
+ if (S == SectionRef())
+ return;
+
StringRef SectName;
S.getName(SectName);
DataRefImpl Ref = S.getRawDataRefImpl();
@@ -5142,75 +5151,48 @@ static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) {
info.adrp_addr = 0;
info.adrp_inst = 0;
- const SectionRef CL = get_section(O, "__OBJC2", "__class_list");
- if (CL != SectionRef()) {
- info.S = CL;
- walk_pointer_list_64("class", CL, O, &info, print_class64_t);
- } else {
- const SectionRef CL = get_section(O, "__DATA", "__objc_classlist");
- info.S = CL;
- walk_pointer_list_64("class", CL, O, &info, print_class64_t);
- }
-
- const SectionRef CR = get_section(O, "__OBJC2", "__class_refs");
- if (CR != SectionRef()) {
- info.S = CR;
- walk_pointer_list_64("class refs", CR, O, &info, nullptr);
- } else {
- const SectionRef CR = get_section(O, "__DATA", "__objc_classrefs");
- info.S = CR;
- walk_pointer_list_64("class refs", CR, O, &info, nullptr);
- }
-
- const SectionRef SR = get_section(O, "__OBJC2", "__super_refs");
- if (SR != SectionRef()) {
- info.S = SR;
- walk_pointer_list_64("super refs", SR, O, &info, nullptr);
- } else {
- const SectionRef SR = get_section(O, "__DATA", "__objc_superrefs");
- info.S = SR;
- walk_pointer_list_64("super refs", SR, O, &info, nullptr);
- }
-
- const SectionRef CA = get_section(O, "__OBJC2", "__category_list");
- if (CA != SectionRef()) {
- info.S = CA;
- walk_pointer_list_64("category", CA, O, &info, print_category64_t);
- } else {
- const SectionRef CA = get_section(O, "__DATA", "__objc_catlist");
- info.S = CA;
- walk_pointer_list_64("category", CA, O, &info, print_category64_t);
- }
-
- const SectionRef PL = get_section(O, "__OBJC2", "__protocol_list");
- if (PL != SectionRef()) {
- info.S = PL;
- walk_pointer_list_64("protocol", PL, O, &info, nullptr);
- } else {
- const SectionRef PL = get_section(O, "__DATA", "__objc_protolist");
- info.S = PL;
- walk_pointer_list_64("protocol", PL, O, &info, nullptr);
- }
-
- const SectionRef MR = get_section(O, "__OBJC2", "__message_refs");
- if (MR != SectionRef()) {
- info.S = MR;
- print_message_refs64(MR, &info);
- } else {
- const SectionRef MR = get_section(O, "__DATA", "__objc_msgrefs");
- info.S = MR;
- print_message_refs64(MR, &info);
- }
-
- const SectionRef II = get_section(O, "__OBJC2", "__image_info");
- if (II != SectionRef()) {
- info.S = II;
- print_image_info64(II, &info);
- } else {
- const SectionRef II = get_section(O, "__DATA", "__objc_imageinfo");
- info.S = II;
- print_image_info64(II, &info);
- }
+ info.depth = 0;
+ SectionRef CL = get_section(O, "__OBJC2", "__class_list");
+ if (CL == SectionRef())
+ CL = get_section(O, "__DATA", "__objc_classlist");
+ info.S = CL;
+ walk_pointer_list_64("class", CL, O, &info, print_class64_t);
+
+ SectionRef CR = get_section(O, "__OBJC2", "__class_refs");
+ if (CR == SectionRef())
+ CR = get_section(O, "__DATA", "__objc_classrefs");
+ info.S = CR;
+ walk_pointer_list_64("class refs", CR, O, &info, nullptr);
+
+ SectionRef SR = get_section(O, "__OBJC2", "__super_refs");
+ if (SR == SectionRef())
+ SR = get_section(O, "__DATA", "__objc_superrefs");
+ info.S = SR;
+ walk_pointer_list_64("super refs", SR, O, &info, nullptr);
+
+ SectionRef CA = get_section(O, "__OBJC2", "__category_list");
+ if (CA == SectionRef())
+ CA = get_section(O, "__DATA", "__objc_catlist");
+ info.S = CA;
+ walk_pointer_list_64("category", CA, O, &info, print_category64_t);
+
+ SectionRef PL = get_section(O, "__OBJC2", "__protocol_list");
+ if (PL == SectionRef())
+ PL = get_section(O, "__DATA", "__objc_protolist");
+ info.S = PL;
+ walk_pointer_list_64("protocol", PL, O, &info, nullptr);
+
+ SectionRef MR = get_section(O, "__OBJC2", "__message_refs");
+ if (MR == SectionRef())
+ MR = get_section(O, "__DATA", "__objc_msgrefs");
+ info.S = MR;
+ print_message_refs64(MR, &info);
+
+ SectionRef II = get_section(O, "__OBJC2", "__image_info");
+ if (II == SectionRef())
+ II = get_section(O, "__DATA", "__objc_imageinfo");
+ info.S = II;
+ print_image_info64(II, &info);
if (info.bindtable != nullptr)
delete info.bindtable;
@@ -5559,7 +5541,7 @@ static void printObjcMetaData(MachOObjectFile *O, bool verbose) {
// binary for the iOS simulator which is the second Objective-C
// ABI. In that case printObjc1_32bit_MetaData() will determine that
// and return false.
- if (printObjc1_32bit_MetaData(O, verbose) == false)
+ if (!printObjc1_32bit_MetaData(O, verbose))
printObjc2_32bit_MetaData(O, verbose);
}
}
@@ -5588,36 +5570,38 @@ static const char *GuessLiteralPointer(uint64_t ReferenceValue,
uint64_t *ReferenceType,
struct DisassembleInfo *info) {
// First see if there is an external relocation entry at the ReferencePC.
- uint64_t sect_addr = info->S.getAddress();
- uint64_t sect_offset = ReferencePC - sect_addr;
- bool reloc_found = false;
- DataRefImpl Rel;
- MachO::any_relocation_info RE;
- bool isExtern = false;
- SymbolRef Symbol;
- for (const RelocationRef &Reloc : info->S.relocations()) {
- uint64_t RelocOffset = Reloc.getOffset();
- if (RelocOffset == sect_offset) {
- Rel = Reloc.getRawDataRefImpl();
- RE = info->O->getRelocation(Rel);
- if (info->O->isRelocationScattered(RE))
- continue;
- isExtern = info->O->getPlainRelocationExternal(RE);
- if (isExtern) {
- symbol_iterator RelocSym = Reloc.getSymbol();
- Symbol = *RelocSym;
+ if (info->O->getHeader().filetype == MachO::MH_OBJECT) {
+ uint64_t sect_addr = info->S.getAddress();
+ uint64_t sect_offset = ReferencePC - sect_addr;
+ bool reloc_found = false;
+ DataRefImpl Rel;
+ MachO::any_relocation_info RE;
+ bool isExtern = false;
+ SymbolRef Symbol;
+ for (const RelocationRef &Reloc : info->S.relocations()) {
+ uint64_t RelocOffset = Reloc.getOffset();
+ if (RelocOffset == sect_offset) {
+ Rel = Reloc.getRawDataRefImpl();
+ RE = info->O->getRelocation(Rel);
+ if (info->O->isRelocationScattered(RE))
+ continue;
+ isExtern = info->O->getPlainRelocationExternal(RE);
+ if (isExtern) {
+ symbol_iterator RelocSym = Reloc.getSymbol();
+ Symbol = *RelocSym;
+ }
+ reloc_found = true;
+ break;
}
- reloc_found = true;
- break;
}
- }
- // If there is an external relocation entry for a symbol in a section
- // then used that symbol's value for the value of the reference.
- if (reloc_found && isExtern) {
- if (info->O->getAnyRelocationPCRel(RE)) {
- unsigned Type = info->O->getAnyRelocationType(RE);
- if (Type == MachO::X86_64_RELOC_SIGNED) {
- ReferenceValue = Symbol.getValue();
+ // If there is an external relocation entry for a symbol in a section
+ // then used that symbol's value for the value of the reference.
+ if (reloc_found && isExtern) {
+ if (info->O->getAnyRelocationPCRel(RE)) {
+ unsigned Type = info->O->getAnyRelocationType(RE);
+ if (Type == MachO::X86_64_RELOC_SIGNED) {
+ ReferenceValue = Symbol.getValue();
+ }
}
}
}
@@ -5872,7 +5856,6 @@ static void emitComments(raw_svector_ostream &CommentStream,
formatted_raw_ostream &FormattedOS,
const MCAsmInfo &MAI) {
// Flush the stream before taking its content.
- CommentStream.flush();
StringRef Comments = CommentsToEmit.str();
// Get the default information for printing a comment.
const char *CommentBegin = MAI.getCommentString();
@@ -5893,7 +5876,6 @@ static void emitComments(raw_svector_ostream &CommentStream,
// Tell the comment stream that the vector changed underneath it.
CommentsToEmit.clear();
- CommentStream.resync();
}
static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
@@ -6065,7 +6047,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
diContext.reset(new DWARFContextInMemory(*DbgObj));
}
- if (DumpSections.size() == 0)
+ if (FilterSections.size() == 0)
outs() << "(" << DisSegName << "," << DisSectName << ") section\n";
for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
@@ -6087,19 +6069,6 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
bool symbolTableWorked = false;
- // Parse relocations.
- std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
- for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) {
- uint64_t RelocOffset = Reloc.getOffset();
- uint64_t SectionAddress = Sections[SectIdx].getAddress();
- RelocOffset -= SectionAddress;
-
- symbol_iterator RelocSym = Reloc.getSymbol();
-
- Relocs.push_back(std::make_pair(RelocOffset, *RelocSym));
- }
- array_pod_sort(Relocs.begin(), Relocs.end());
-
// Create a map of symbol addresses to symbol names for use by
// the SymbolizerSymbolLookUp() routine.
SymbolAddressMap AddrMap;
@@ -6157,7 +6126,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
StringRef SymName = *SymNameOrErr;
SymbolRef::Type ST = Symbols[SymIdx].getType();
- if (ST != SymbolRef::ST_Function)
+ if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
continue;
// Make sure the symbol is defined in this section.
@@ -6251,10 +6220,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
DebugOut, Annotations);
if (gotInst) {
if (!NoShowRawInsn) {
- dumpBytes(ArrayRef<uint8_t>(Bytes.data() + Index, Size), outs());
+ dumpBytes(makeArrayRef(Bytes.data() + Index, Size), outs());
}
formatted_raw_ostream FormattedOS(outs());
- Annotations.flush();
StringRef AnnotationsStr = Annotations.str();
if (isThumb)
ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr, *ThumbSTI);
@@ -6316,7 +6284,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
}
if (!NoShowRawInsn) {
outs() << "\t";
- dumpBytes(ArrayRef<uint8_t>(Bytes.data() + Index, InstSize), outs());
+ dumpBytes(makeArrayRef(Bytes.data() + Index, InstSize), outs());
}
IP->printInst(&Inst, outs(), "", *STI);
outs() << "\n";
@@ -6441,8 +6409,7 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
// Go back one so that SymbolAddress <= Addr.
--Sym;
- section_iterator SymSection = Obj->section_end();
- Sym->second.getSection(SymSection);
+ section_iterator SymSection = *Sym->second.getSection();
if (RelocSection == *SymSection) {
// There's a valid symbol in the same section before this reference.
ErrorOr<StringRef> NameOrErr = Sym->second.getName();
@@ -6780,13 +6747,268 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
}
}
+static unsigned getSizeForEncoding(bool is64Bit,
+ unsigned symbolEncoding) {
+ unsigned format = symbolEncoding & 0x0f;
+ switch (format) {
+ default: llvm_unreachable("Unknown Encoding");
+ case dwarf::DW_EH_PE_absptr:
+ case dwarf::DW_EH_PE_signed:
+ return is64Bit ? 8 : 4;
+ case dwarf::DW_EH_PE_udata2:
+ case dwarf::DW_EH_PE_sdata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ case dwarf::DW_EH_PE_sdata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ case dwarf::DW_EH_PE_sdata8:
+ return 8;
+ }
+}
+
+static uint64_t readPointer(const char *&Pos, bool is64Bit, unsigned Encoding) {
+ switch (getSizeForEncoding(is64Bit, Encoding)) {
+ case 2:
+ return readNext<uint16_t>(Pos);
+ break;
+ case 4:
+ return readNext<uint32_t>(Pos);
+ break;
+ case 8:
+ return readNext<uint64_t>(Pos);
+ break;
+ default:
+ llvm_unreachable("Illegal data size");
+ }
+}
+
+static void printMachOEHFrameSection(const MachOObjectFile *Obj,
+ std::map<uint64_t, SymbolRef> &Symbols,
+ const SectionRef &EHFrame) {
+ if (!Obj->isLittleEndian()) {
+ outs() << "warning: cannot handle big endian __eh_frame section\n";
+ return;
+ }
+
+ bool is64Bit = Obj->is64Bit();
+
+ outs() << "Contents of __eh_frame section:\n";
+
+ StringRef Contents;
+ EHFrame.getContents(Contents);
+
+ /// A few fields of the CIE are used when decoding the FDE's. This struct
+ /// will cache those fields we need so that we don't have to decode it
+ /// repeatedly for each FDE that references it.
+ struct DecodedCIE {
+ Optional<uint32_t> FDEPointerEncoding;
+ Optional<uint32_t> LSDAPointerEncoding;
+ bool hasAugmentationLength;
+ };
+
+ // Map from the start offset of the CIE to the cached data for that CIE.
+ DenseMap<uint64_t, DecodedCIE> CachedCIEs;
+
+ for (const char *Pos = Contents.data(), *End = Contents.end(); Pos != End; ) {
+
+ const char *EntryStartPos = Pos;
+
+ uint64_t Length = readNext<uint32_t>(Pos);
+ if (Length == 0xffffffff)
+ Length = readNext<uint64_t>(Pos);
+
+ // Save the Pos so that we can check the length we encoded against what we
+ // end up decoding.
+ const char *PosAfterLength = Pos;
+ const char *EntryEndPos = PosAfterLength + Length;
+
+ assert(EntryEndPos <= End &&
+ "__eh_frame entry length exceeds section size");
+
+ uint32_t ID = readNext<uint32_t>(Pos);
+ if (ID == 0) {
+ // This is a CIE.
+
+ uint32_t Version = readNext<uint8_t>(Pos);
+
+ // Parse a null terminated augmentation string
+ SmallString<8> AugmentationString;
+ for (uint8_t Char = readNext<uint8_t>(Pos); Char;
+ Char = readNext<uint8_t>(Pos))
+ AugmentationString.push_back(Char);
+
+ // Optionally parse the EH data if the augmentation string says it's there.
+ Optional<uint64_t> EHData;
+ if (StringRef(AugmentationString).count("eh"))
+ EHData = is64Bit ? readNext<uint64_t>(Pos) : readNext<uint32_t>(Pos);
+
+ unsigned ULEBByteCount;
+ uint64_t CodeAlignmentFactor = decodeULEB128((const uint8_t *)Pos,
+ &ULEBByteCount);
+ Pos += ULEBByteCount;
+
+ int64_t DataAlignmentFactor = decodeSLEB128((const uint8_t *)Pos,
+ &ULEBByteCount);
+ Pos += ULEBByteCount;
+
+ uint32_t ReturnAddressRegister = readNext<uint8_t>(Pos);
+
+ Optional<uint64_t> AugmentationLength;
+ Optional<uint32_t> LSDAPointerEncoding;
+ Optional<uint32_t> PersonalityEncoding;
+ Optional<uint64_t> Personality;
+ Optional<uint32_t> FDEPointerEncoding;
+ if (!AugmentationString.empty() && AugmentationString.front() == 'z') {
+ AugmentationLength = decodeULEB128((const uint8_t *)Pos,
+ &ULEBByteCount);
+ Pos += ULEBByteCount;
+
+ // Walk the augmentation string to get all the augmentation data.
+ for (unsigned i = 1, e = AugmentationString.size(); i != e; ++i) {
+ char Char = AugmentationString[i];
+ switch (Char) {
+ case 'e':
+ assert((i + 1) != e && AugmentationString[i + 1] == 'h' &&
+ "Expected 'eh' in augmentation string");
+ break;
+ case 'L':
+ assert(!LSDAPointerEncoding && "Duplicate LSDA encoding");
+ LSDAPointerEncoding = readNext<uint8_t>(Pos);
+ break;
+ case 'P': {
+ assert(!Personality && "Duplicate personality");
+ PersonalityEncoding = readNext<uint8_t>(Pos);
+ Personality = readPointer(Pos, is64Bit, *PersonalityEncoding);
+ break;
+ }
+ case 'R':
+ assert(!FDEPointerEncoding && "Duplicate FDE encoding");
+ FDEPointerEncoding = readNext<uint8_t>(Pos);
+ break;
+ case 'z':
+ llvm_unreachable("'z' must be first in the augmentation string");
+ }
+ }
+ }
+
+ outs() << "CIE:\n";
+ outs() << " Length: " << Length << "\n";
+ outs() << " CIE ID: " << ID << "\n";
+ outs() << " Version: " << Version << "\n";
+ outs() << " Augmentation String: " << AugmentationString << "\n";
+ if (EHData)
+ outs() << " EHData: " << *EHData << "\n";
+ outs() << " Code Alignment Factor: " << CodeAlignmentFactor << "\n";
+ outs() << " Data Alignment Factor: " << DataAlignmentFactor << "\n";
+ outs() << " Return Address Register: " << ReturnAddressRegister << "\n";
+ if (AugmentationLength) {
+ outs() << " Augmentation Data Length: " << *AugmentationLength << "\n";
+ if (LSDAPointerEncoding) {
+ outs() << " FDE LSDA Pointer Encoding: "
+ << *LSDAPointerEncoding << "\n";
+ }
+ if (Personality) {
+ outs() << " Personality Encoding: " << *PersonalityEncoding << "\n";
+ outs() << " Personality: " << *Personality << "\n";
+ }
+ if (FDEPointerEncoding) {
+ outs() << " FDE Address Pointer Encoding: "
+ << *FDEPointerEncoding << "\n";
+ }
+ }
+ // FIXME: Handle instructions.
+ // For now just emit some bytes
+ outs() << " Instructions:\n ";
+ dumpBytes(makeArrayRef((const uint8_t*)Pos, (const uint8_t*)EntryEndPos),
+ outs());
+ outs() << "\n";
+ Pos = EntryEndPos;
+
+ // Cache this entry.
+ uint64_t Offset = EntryStartPos - Contents.data();
+ CachedCIEs[Offset] = { FDEPointerEncoding, LSDAPointerEncoding,
+ AugmentationLength.hasValue() };
+ continue;
+ }
+
+ // This is an FDE.
+ // The CIE pointer for an FDE is the same location as the ID which we
+ // already read.
+ uint32_t CIEPointer = ID;
+
+ const char *CIEStart = PosAfterLength - CIEPointer;
+ assert(CIEStart >= Contents.data() &&
+ "FDE points to CIE before the __eh_frame start");
+
+ uint64_t CIEOffset = CIEStart - Contents.data();
+ auto CIEIt = CachedCIEs.find(CIEOffset);
+ if (CIEIt == CachedCIEs.end())
+ llvm_unreachable("Couldn't find CIE at offset in to __eh_frame section");
+
+ const DecodedCIE &CIE = CIEIt->getSecond();
+ assert(CIE.FDEPointerEncoding &&
+ "FDE references CIE which did not set pointer encoding");
+
+ uint64_t PCPointerSize = getSizeForEncoding(is64Bit,
+ *CIE.FDEPointerEncoding);
+
+ uint64_t PCBegin = readPointer(Pos, is64Bit, *CIE.FDEPointerEncoding);
+ uint64_t PCRange = readPointer(Pos, is64Bit, *CIE.FDEPointerEncoding);
+
+ Optional<uint64_t> AugmentationLength;
+ uint32_t LSDAPointerSize;
+ Optional<uint64_t> LSDAPointer;
+ if (CIE.hasAugmentationLength) {
+ unsigned ULEBByteCount;
+ AugmentationLength = decodeULEB128((const uint8_t *)Pos,
+ &ULEBByteCount);
+ Pos += ULEBByteCount;
+
+ // Decode the LSDA if the CIE augmentation string said we should.
+ if (CIE.LSDAPointerEncoding) {
+ LSDAPointerSize = getSizeForEncoding(is64Bit, *CIE.LSDAPointerEncoding);
+ LSDAPointer = readPointer(Pos, is64Bit, *CIE.LSDAPointerEncoding);
+ }
+ }
+
+ outs() << "FDE:\n";
+ outs() << " Length: " << Length << "\n";
+ outs() << " CIE Offset: " << CIEOffset << "\n";
+
+ if (PCPointerSize == 8) {
+ outs() << format(" PC Begin: %016" PRIx64, PCBegin) << "\n";
+ outs() << format(" PC Range: %016" PRIx64, PCRange) << "\n";
+ } else {
+ outs() << format(" PC Begin: %08" PRIx64, PCBegin) << "\n";
+ outs() << format(" PC Range: %08" PRIx64, PCRange) << "\n";
+ }
+ if (AugmentationLength) {
+ outs() << " Augmentation Data Length: " << *AugmentationLength << "\n";
+ if (LSDAPointer) {
+ if (LSDAPointerSize == 8)
+ outs() << format(" LSDA Pointer: %016\n" PRIx64, *LSDAPointer);
+ else
+ outs() << format(" LSDA Pointer: %08\n" PRIx64, *LSDAPointer);
+ }
+ }
+
+ // FIXME: Handle instructions.
+ // For now just emit some bytes
+ outs() << " Instructions:\n ";
+ dumpBytes(makeArrayRef((const uint8_t*)Pos, (const uint8_t*)EntryEndPos),
+ outs());
+ outs() << "\n";
+ Pos = EntryEndPos;
+ }
+}
+
void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) {
std::map<uint64_t, SymbolRef> Symbols;
for (const SymbolRef &SymRef : Obj->symbols()) {
// Discard any undefined or absolute symbols. They're not going to take part
// in the convenience lookup for unwind info and just take up resources.
- section_iterator Section = Obj->section_end();
- SymRef.getSection(Section);
+ section_iterator Section = *SymRef.getSection();
if (Section == Obj->section_end())
continue;
@@ -6802,7 +7024,7 @@ void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) {
else if (SectName == "__unwind_info")
printMachOUnwindInfoSection(Obj, Symbols, Section);
else if (SectName == "__eh_frame")
- outs() << "llvm-objdump: warning: unhandled __eh_frame section\n";
+ printMachOEHFrameSection(Obj, Symbols, Section);
}
}
@@ -7128,36 +7350,20 @@ static void PrintSegmentCommand(uint32_t cmd, uint32_t cmdsize,
MachO::VM_PROT_EXECUTE)) != 0)
outs() << " maxprot ?" << format("0x%08" PRIx32, maxprot) << "\n";
else {
- if (maxprot & MachO::VM_PROT_READ)
- outs() << " maxprot r";
- else
- outs() << " maxprot -";
- if (maxprot & MachO::VM_PROT_WRITE)
- outs() << "w";
- else
- outs() << "-";
- if (maxprot & MachO::VM_PROT_EXECUTE)
- outs() << "x\n";
- else
- outs() << "-\n";
+ outs() << " maxprot ";
+ outs() << ((maxprot & MachO::VM_PROT_READ) ? "r" : "-");
+ outs() << ((maxprot & MachO::VM_PROT_WRITE) ? "w" : "-");
+ outs() << ((maxprot & MachO::VM_PROT_EXECUTE) ? "x\n" : "-\n");
}
if ((initprot &
~(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE |
MachO::VM_PROT_EXECUTE)) != 0)
outs() << " initprot ?" << format("0x%08" PRIx32, initprot) << "\n";
else {
- if (initprot & MachO::VM_PROT_READ)
- outs() << " initprot r";
- else
- outs() << " initprot -";
- if (initprot & MachO::VM_PROT_WRITE)
- outs() << "w";
- else
- outs() << "-";
- if (initprot & MachO::VM_PROT_EXECUTE)
- outs() << "x\n";
- else
- outs() << "-\n";
+ outs() << " initprot ";
+ outs() << ((initprot & MachO::VM_PROT_READ) ? "r" : "-");
+ outs() << ((initprot & MachO::VM_PROT_WRITE) ? "w" : "-");
+ outs() << ((initprot & MachO::VM_PROT_EXECUTE) ? "x\n" : "-\n");
}
} else {
outs() << " maxprot " << format("0x%08" PRIx32, maxprot) << "\n";
@@ -7611,26 +7817,11 @@ static void PrintUuidLoadCommand(MachO::uuid_command uuid) {
else
outs() << "\n";
outs() << " uuid ";
- outs() << format("%02" PRIX32, uuid.uuid[0]);
- outs() << format("%02" PRIX32, uuid.uuid[1]);
- outs() << format("%02" PRIX32, uuid.uuid[2]);
- outs() << format("%02" PRIX32, uuid.uuid[3]);
- outs() << "-";
- outs() << format("%02" PRIX32, uuid.uuid[4]);
- outs() << format("%02" PRIX32, uuid.uuid[5]);
- outs() << "-";
- outs() << format("%02" PRIX32, uuid.uuid[6]);
- outs() << format("%02" PRIX32, uuid.uuid[7]);
- outs() << "-";
- outs() << format("%02" PRIX32, uuid.uuid[8]);
- outs() << format("%02" PRIX32, uuid.uuid[9]);
- outs() << "-";
- outs() << format("%02" PRIX32, uuid.uuid[10]);
- outs() << format("%02" PRIX32, uuid.uuid[11]);
- outs() << format("%02" PRIX32, uuid.uuid[12]);
- outs() << format("%02" PRIX32, uuid.uuid[13]);
- outs() << format("%02" PRIX32, uuid.uuid[14]);
- outs() << format("%02" PRIX32, uuid.uuid[15]);
+ for (int i = 0; i < 16; ++i) {
+ outs() << format("%02" PRIX32, uuid.uuid[i]);
+ if (i == 3 || i == 5 || i == 7 || i == 9)
+ outs() << "-";
+ }
outs() << "\n";
}
@@ -7650,30 +7841,47 @@ static void PrintRpathLoadCommand(MachO::rpath_command rpath, const char *Ptr) {
}
static void PrintVersionMinLoadCommand(MachO::version_min_command vd) {
- if (vd.cmd == MachO::LC_VERSION_MIN_MACOSX)
- outs() << " cmd LC_VERSION_MIN_MACOSX\n";
- else if (vd.cmd == MachO::LC_VERSION_MIN_IPHONEOS)
- outs() << " cmd LC_VERSION_MIN_IPHONEOS\n";
- else
- outs() << " cmd " << vd.cmd << " (?)\n";
+ StringRef LoadCmdName;
+ switch (vd.cmd) {
+ case MachO::LC_VERSION_MIN_MACOSX:
+ LoadCmdName = "LC_VERSION_MIN_MACOSX";
+ break;
+ case MachO::LC_VERSION_MIN_IPHONEOS:
+ LoadCmdName = "LC_VERSION_MIN_IPHONEOS";
+ break;
+ case MachO::LC_VERSION_MIN_TVOS:
+ LoadCmdName = "LC_VERSION_MIN_TVOS";
+ break;
+ case MachO::LC_VERSION_MIN_WATCHOS:
+ LoadCmdName = "LC_VERSION_MIN_WATCHOS";
+ break;
+ default:
+ llvm_unreachable("Unknown version min load command");
+ }
+
+ outs() << " cmd " << LoadCmdName << '\n';
outs() << " cmdsize " << vd.cmdsize;
if (vd.cmdsize != sizeof(struct MachO::version_min_command))
outs() << " Incorrect size\n";
else
outs() << "\n";
- outs() << " version " << ((vd.version >> 16) & 0xffff) << "."
- << ((vd.version >> 8) & 0xff);
- if ((vd.version & 0xff) != 0)
- outs() << "." << (vd.version & 0xff);
+ outs() << " version "
+ << MachOObjectFile::getVersionMinMajor(vd, false) << "."
+ << MachOObjectFile::getVersionMinMinor(vd, false);
+ uint32_t Update = MachOObjectFile::getVersionMinUpdate(vd, false);
+ if (Update != 0)
+ outs() << "." << Update;
outs() << "\n";
if (vd.sdk == 0)
outs() << " sdk n/a";
else {
- outs() << " sdk " << ((vd.sdk >> 16) & 0xffff) << "."
- << ((vd.sdk >> 8) & 0xff);
+ outs() << " sdk "
+ << MachOObjectFile::getVersionMinMajor(vd, true) << "."
+ << MachOObjectFile::getVersionMinMinor(vd, true);
}
- if ((vd.sdk & 0xff) != 0)
- outs() << "." << (vd.sdk & 0xff);
+ Update = MachOObjectFile::getVersionMinUpdate(vd, true);
+ if (Update != 0)
+ outs() << "." << Update;
outs() << "\n";
}
@@ -8366,7 +8574,9 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
MachO::rpath_command Rpath = Obj->getRpathCommand(Command);
PrintRpathLoadCommand(Rpath, Command.Ptr);
} else if (Command.C.cmd == MachO::LC_VERSION_MIN_MACOSX ||
- Command.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) {
+ Command.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS ||
+ Command.C.cmd == MachO::LC_VERSION_MIN_TVOS ||
+ Command.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) {
MachO::version_min_command Vd = Obj->getVersionMinLoadCommand(Command);
PrintVersionMinLoadCommand(Vd);
} else if (Command.C.cmd == MachO::LC_SOURCE_VERSION) {
@@ -8536,6 +8746,7 @@ public:
StringRef segmentName(uint32_t SegIndex);
StringRef sectionName(uint32_t SegIndex, uint64_t SegOffset);
uint64_t address(uint32_t SegIndex, uint64_t SegOffset);
+ bool isValidSegIndexAndOffset(uint32_t SegIndex, uint64_t SegOffset);
private:
struct SectionInfo {
@@ -8559,8 +8770,7 @@ SegInfo::SegInfo(const object::MachOObjectFile *Obj) {
uint64_t CurSegAddress;
for (const SectionRef &Section : Obj->sections()) {
SectionInfo Info;
- if (error(Section.getName(Info.SectionName)))
- return;
+ error(Section.getName(Info.SectionName));
Info.Address = Section.getAddress();
Info.Size = Section.getSize();
Info.SegmentName =
@@ -8585,6 +8795,20 @@ StringRef SegInfo::segmentName(uint32_t SegIndex) {
llvm_unreachable("invalid segIndex");
}
+bool SegInfo::isValidSegIndexAndOffset(uint32_t SegIndex,
+ uint64_t OffsetInSeg) {
+ for (const SectionInfo &SI : Sections) {
+ if (SI.SegmentIndex != SegIndex)
+ continue;
+ if (SI.OffsetInSegment > OffsetInSeg)
+ continue;
+ if (OffsetInSeg >= (SI.OffsetInSegment + SI.Size))
+ continue;
+ return true;
+ }
+ return false;
+}
+
const SegInfo::SectionInfo &SegInfo::findSection(uint32_t SegIndex,
uint64_t OffsetInSeg) {
for (const SectionInfo &SI : Sections) {
@@ -8753,6 +8977,8 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable()) {
uint32_t SegIndex = Entry.segmentIndex();
uint64_t OffsetInSeg = Entry.segmentOffset();
+ if (!sectionTable.isValidSegIndexAndOffset(SegIndex, OffsetInSeg))
+ continue;
uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg);
const char *SymbolName = nullptr;
StringRef name = Entry.symbolName();
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 275eb9c..22167c7 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -73,6 +73,13 @@ Disassembled("d", cl::desc("Alias for --disassemble"),
cl::aliasopt(Disassemble));
cl::opt<bool>
+llvm::DisassembleAll("disassemble-all",
+ cl::desc("Display assembler mnemonics for the machine instructions"));
+static cl::alias
+DisassembleAlld("D", cl::desc("Alias for --disassemble-all"),
+ cl::aliasopt(DisassembleAll));
+
+cl::opt<bool>
llvm::Relocations("r", cl::desc("Display the relocation entries in the file"));
cl::opt<bool>
@@ -130,6 +137,13 @@ SectionHeadersShorter("h", cl::desc("Alias for --section-headers"),
cl::aliasopt(SectionHeaders));
cl::list<std::string>
+llvm::FilterSections("section", cl::desc("Operate on the specified sections only. "
+ "With -macho dump segment,section"));
+cl::alias
+static FilterSectionsj("j", cl::desc("Alias for --section"),
+ cl::aliasopt(llvm::FilterSections));
+
+cl::list<std::string>
llvm::MAttrs("mattr",
cl::CommaSeparated,
cl::desc("Target specific attributes"),
@@ -163,22 +177,86 @@ cl::opt<bool> PrintFaultMaps("fault-map-section",
cl::desc("Display contents of faultmap section"));
static StringRef ToolName;
-static int ReturnValue = EXIT_SUCCESS;
-bool llvm::error(std::error_code EC) {
+namespace {
+typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+
+class SectionFilterIterator {
+public:
+ SectionFilterIterator(FilterPredicate P,
+ llvm::object::section_iterator const &I,
+ llvm::object::section_iterator const &E)
+ : Predicate(P), Iterator(I), End(E) {
+ ScanPredicate();
+ }
+ const llvm::object::SectionRef &operator*() const { return *Iterator; }
+ SectionFilterIterator &operator++() {
+ ++Iterator;
+ ScanPredicate();
+ return *this;
+ }
+ bool operator!=(SectionFilterIterator const &Other) const {
+ return Iterator != Other.Iterator;
+ }
+
+private:
+ void ScanPredicate() {
+ while (Iterator != End && !Predicate(*Iterator)) {
+ ++Iterator;
+ }
+ }
+ FilterPredicate Predicate;
+ llvm::object::section_iterator Iterator;
+ llvm::object::section_iterator End;
+};
+
+class SectionFilter {
+public:
+ SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
+ : Predicate(P), Object(O) {}
+ SectionFilterIterator begin() {
+ return SectionFilterIterator(Predicate, Object.section_begin(),
+ Object.section_end());
+ }
+ SectionFilterIterator end() {
+ return SectionFilterIterator(Predicate, Object.section_end(),
+ Object.section_end());
+ }
+
+private:
+ FilterPredicate Predicate;
+ llvm::object::ObjectFile const &Object;
+};
+SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
+ return SectionFilter([](llvm::object::SectionRef const &S) {
+ if(FilterSections.empty())
+ return true;
+ llvm::StringRef String;
+ std::error_code error = S.getName(String);
+ if (error)
+ return false;
+ return std::find(FilterSections.begin(),
+ FilterSections.end(),
+ String) != FilterSections.end();
+ },
+ O);
+}
+}
+
+void llvm::error(std::error_code EC) {
if (!EC)
- return false;
+ return;
- outs() << ToolName << ": error reading file: " << EC.message() << ".\n";
- outs().flush();
- ReturnValue = EXIT_FAILURE;
- return true;
+ errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
+ errs().flush();
+ exit(1);
}
-static void report_error(StringRef File, std::error_code EC) {
+LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
+ std::error_code EC) {
assert(EC);
errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
- ReturnValue = EXIT_FAILURE;
+ exit(1);
}
static const Target *getTarget(const ObjectFile *Obj = nullptr) {
@@ -205,10 +283,8 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
Error);
- if (!TheTarget) {
- errs() << ToolName << ": " << Error;
- return nullptr;
- }
+ if (!TheTarget)
+ report_fatal_error("can't find target: " + Error);
// Update the triple name and return the found target.
TripleName = TheTriple.getTriple();
@@ -301,11 +377,12 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
template <class ELFT>
static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
- DataRefImpl Rel,
+ const RelocationRef &RelRef,
SmallVectorImpl<char> &Result) {
+ DataRefImpl Rel = RelRef.getRawDataRefImpl();
+
typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
- typedef typename ELFObjectFile<ELFT>::Elf_Rel Elf_Rel;
typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
const ELFFile<ELFT> &EF = *Obj->getELFFile();
@@ -327,36 +404,31 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
if (std::error_code EC = StrTabOrErr.getError())
return EC;
StringRef StrTab = *StrTabOrErr;
- uint8_t type;
+ uint8_t type = RelRef.getType();
StringRef res;
int64_t addend = 0;
- uint16_t symbol_index = 0;
switch (Sec->sh_type) {
default:
return object_error::parse_failed;
case ELF::SHT_REL: {
- const Elf_Rel *ERel = Obj->getRel(Rel);
- type = ERel->getType(EF.isMips64EL());
- symbol_index = ERel->getSymbol(EF.isMips64EL());
// TODO: Read implicit addend from section data.
break;
}
case ELF::SHT_RELA: {
const Elf_Rela *ERela = Obj->getRela(Rel);
- type = ERela->getType(EF.isMips64EL());
- symbol_index = ERela->getSymbol(EF.isMips64EL());
addend = ERela->r_addend;
break;
}
}
- const Elf_Sym *symb =
- EF.template getEntry<Elf_Sym>(Sec->sh_link, symbol_index);
+ symbol_iterator SI = RelRef.getSymbol();
+ const Elf_Sym *symb = Obj->getSymbol(SI->getRawDataRefImpl());
StringRef Target;
- ErrorOr<const Elf_Shdr *> SymSec = EF.getSection(symb);
- if (std::error_code EC = SymSec.getError())
- return EC;
if (symb->getType() == ELF::STT_SECTION) {
- ErrorOr<StringRef> SecName = EF.getSectionName(*SymSec);
+ ErrorOr<section_iterator> SymSI = SI->getSection();
+ if (std::error_code EC = SymSI.getError())
+ return EC;
+ const Elf_Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl());
+ ErrorOr<StringRef> SecName = EF.getSectionName(SymSec);
if (std::error_code EC = SecName.getError())
return EC;
Target = *SecName;
@@ -404,6 +476,7 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
break;
}
case ELF::EM_386:
+ case ELF::EM_IAMCU:
case ELF::EM_ARM:
case ELF::EM_HEXAGON:
case ELF::EM_MIPS:
@@ -418,9 +491,8 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
}
static std::error_code getRelocationValueString(const ELFObjectFileBase *Obj,
- const RelocationRef &RelRef,
+ const RelocationRef &Rel,
SmallVectorImpl<char> &Result) {
- DataRefImpl Rel = RelRef.getRawDataRefImpl();
if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
return getRelocationValueString(ELF32LE, Rel, Result);
if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
@@ -471,7 +543,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
// If we couldn't find a symbol that this relocation refers to, try
// to find a section beginning instead.
- for (const SectionRef &Section : O->sections()) {
+ for (const SectionRef &Section : ToolSectionFilter(*O)) {
std::error_code ec;
StringRef Name;
@@ -496,8 +568,8 @@ static void printRelocationTargetName(const MachOObjectFile *O,
symbol_iterator SI = O->symbol_begin();
advance(SI, Val);
ErrorOr<StringRef> SOrErr = SI->getName();
- if (!error(SOrErr.getError()))
- S = *SOrErr;
+ error(SOrErr.getError());
+ S = *SOrErr;
} else {
section_iterator SI = O->section_begin();
// Adjust for the fact that sections are 1-indexed.
@@ -732,10 +804,6 @@ static bool getHidden(RelocationRef RelRef) {
static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
const Target *TheTarget = getTarget(Obj);
- // getTarget() will have already issued a diagnostic if necessary, so
- // just bail here if it failed.
- if (!TheTarget)
- return;
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
@@ -748,42 +816,28 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
- if (!MRI) {
- errs() << "error: no register info for target " << TripleName << "\n";
- return;
- }
+ if (!MRI)
+ report_fatal_error("error: no register info for target " + TripleName);
// Set up disassembler.
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName));
- if (!AsmInfo) {
- errs() << "error: no assembly info for target " << TripleName << "\n";
- return;
- }
-
+ if (!AsmInfo)
+ report_fatal_error("error: no assembly info for target " + TripleName);
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
- if (!STI) {
- errs() << "error: no subtarget info for target " << TripleName << "\n";
- return;
- }
-
+ if (!STI)
+ report_fatal_error("error: no subtarget info for target " + TripleName);
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
- if (!MII) {
- errs() << "error: no instruction info for target " << TripleName << "\n";
- return;
- }
-
+ if (!MII)
+ report_fatal_error("error: no instruction info for target " + TripleName);
std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
-
- if (!DisAsm) {
- errs() << "error: no disassembler for target " << TripleName << "\n";
- return;
- }
+ if (!DisAsm)
+ report_fatal_error("error: no disassembler for target " + TripleName);
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
@@ -791,11 +845,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
- if (!IP) {
- errs() << "error: no instruction printer for target " << TripleName
- << '\n';
- return;
- }
+ if (!IP)
+ report_fatal_error("error: no instruction printer for target " +
+ TripleName);
IP->setPrintImmHex(PrintImmHex);
PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
@@ -806,38 +858,75 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// in RelocSecs contain the relocations for section S.
std::error_code EC;
std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
- for (const SectionRef &Section : Obj->sections()) {
+ for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
section_iterator Sec2 = Section.getRelocatedSection();
if (Sec2 != Obj->section_end())
SectionRelocMap[*Sec2].push_back(Section);
}
// Create a mapping from virtual address to symbol name. This is used to
- // pretty print the target of a call.
- std::vector<std::pair<uint64_t, StringRef>> AllSymbols;
- if (MIA) {
- for (const SymbolRef &Symbol : Obj->symbols()) {
- if (Symbol.getType() != SymbolRef::ST_Function)
- continue;
+ // pretty print the symbols while disassembling.
+ typedef std::vector<std::pair<uint64_t, StringRef>> SectionSymbolsTy;
+ std::map<SectionRef, SectionSymbolsTy> AllSymbols;
+ for (const SymbolRef &Symbol : Obj->symbols()) {
+ ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
+ error(AddressOrErr.getError());
+ uint64_t Address = *AddressOrErr;
+
+ ErrorOr<StringRef> Name = Symbol.getName();
+ error(Name.getError());
+ if (Name->empty())
+ continue;
- ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
- if (error(AddressOrErr.getError()))
- break;
- uint64_t Address = *AddressOrErr;
+ ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
+ error(SectionOrErr.getError());
+ section_iterator SecI = *SectionOrErr;
+ if (SecI == Obj->section_end())
+ continue;
- ErrorOr<StringRef> Name = Symbol.getName();
- if (error(Name.getError()))
- break;
- if (Name->empty())
+ AllSymbols[*SecI].emplace_back(Address, *Name);
+ }
+
+ // Create a mapping from virtual address to section.
+ std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
+ for (SectionRef Sec : Obj->sections())
+ SectionAddresses.emplace_back(Sec.getAddress(), Sec);
+ array_pod_sort(SectionAddresses.begin(), SectionAddresses.end());
+
+ // Linked executables (.exe and .dll files) typically don't include a real
+ // symbol table but they might contain an export table.
+ if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
+ for (const auto &ExportEntry : COFFObj->export_directories()) {
+ StringRef Name;
+ error(ExportEntry.getSymbolName(Name));
+ if (Name.empty())
continue;
- AllSymbols.push_back(std::make_pair(Address, *Name));
- }
+ uint32_t RVA;
+ error(ExportEntry.getExportRVA(RVA));
+
+ uint64_t VA = COFFObj->getImageBase() + RVA;
+ auto Sec = std::upper_bound(
+ SectionAddresses.begin(), SectionAddresses.end(), VA,
+ [](uint64_t LHS, const std::pair<uint64_t, SectionRef> &RHS) {
+ return LHS < RHS.first;
+ });
+ if (Sec != SectionAddresses.begin())
+ --Sec;
+ else
+ Sec = SectionAddresses.end();
- array_pod_sort(AllSymbols.begin(), AllSymbols.end());
+ if (Sec != SectionAddresses.end())
+ AllSymbols[Sec->second].emplace_back(VA, Name);
+ }
}
- for (const SectionRef &Section : Obj->sections()) {
- if (!Section.isText() || Section.isVirtual())
+ // Sort all the symbols, this allows us to use a simple binary search to find
+ // a symbol near an address.
+ for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
+ array_pod_sort(SecSyms.second.begin(), SecSyms.second.end());
+
+ for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
+ if (!DisassembleAll && (!Section.isText() || Section.isVirtual()))
continue;
uint64_t SectionAddr = Section.getAddress();
@@ -845,27 +934,23 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (!SectSize)
continue;
- // Make a list of all the symbols in this section.
- std::vector<std::pair<uint64_t, StringRef>> Symbols;
- for (const SymbolRef &Symbol : Obj->symbols()) {
- if (Section.containsSymbol(Symbol)) {
- ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
- if (error(AddressOrErr.getError()))
- break;
- uint64_t Address = *AddressOrErr;
- Address -= SectionAddr;
- if (Address >= SectSize)
- continue;
-
- ErrorOr<StringRef> Name = Symbol.getName();
- if (error(Name.getError()))
- break;
- Symbols.push_back(std::make_pair(Address, *Name));
+ // Get the list of all the symbols in this section.
+ SectionSymbolsTy &Symbols = AllSymbols[Section];
+ std::vector<uint64_t> DataMappingSymsAddr;
+ std::vector<uint64_t> TextMappingSymsAddr;
+ if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+ for (const auto &Symb : Symbols) {
+ uint64_t Address = Symb.first;
+ StringRef Name = Symb.second;
+ if (Name.startswith("$d"))
+ DataMappingSymsAddr.push_back(Address - SectionAddr);
+ if (Name.startswith("$x"))
+ TextMappingSymsAddr.push_back(Address - SectionAddr);
}
}
- // Sort the symbols by address, just in case they didn't come in that way.
- array_pod_sort(Symbols.begin(), Symbols.end());
+ std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
+ std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
// Make a list of all the relocations for this section.
std::vector<RelocationRef> Rels;
@@ -886,8 +971,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
SegmentName = MachO->getSectionFinalSegmentName(DR);
}
StringRef name;
- if (error(Section.getName(name)))
- break;
+ error(Section.getName(name));
outs() << "Disassembly of section ";
if (!SegmentName.empty())
outs() << SegmentName << ",";
@@ -895,14 +979,13 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// If the section has no symbol at the start, just insert a dummy one.
if (Symbols.empty() || Symbols[0].first != 0)
- Symbols.insert(Symbols.begin(), std::make_pair(0, name));
+ Symbols.insert(Symbols.begin(), std::make_pair(SectionAddr, name));
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
StringRef BytesStr;
- if (error(Section.getContents(BytesStr)))
- break;
+ error(Section.getContents(BytesStr));
ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
BytesStr.size());
@@ -914,11 +997,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Disassemble symbol by symbol.
for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
- uint64_t Start = Symbols[si].first;
- // The end is either the section end or the beginning of the next symbol.
- uint64_t End = (si == se - 1) ? SectSize : Symbols[si + 1].first;
+ uint64_t Start = Symbols[si].first - SectionAddr;
+ // The end is either the section end or the beginning of the next
+ // symbol.
+ uint64_t End =
+ (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr;
+ // Don't try to disassemble beyond the end of section contents.
+ if (End > SectSize)
+ End = SectSize;
// If this symbol has the same address as the next symbol, then skip it.
- if (Start == End)
+ if (Start >= End)
continue;
outs() << '\n' << Symbols[si].second << ":\n";
@@ -932,6 +1020,45 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
for (Index = Start; Index < End; Index += Size) {
MCInst Inst;
+ // AArch64 ELF binaries can interleave data and text in the
+ // same section. We rely on the markers introduced to
+ // understand what we need to dump.
+ if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+ uint64_t Stride = 0;
+
+ auto DAI = std::lower_bound(DataMappingSymsAddr.begin(),
+ DataMappingSymsAddr.end(), Index);
+ if (DAI != DataMappingSymsAddr.end() && *DAI == Index) {
+ // Switch to data.
+ while (Index < End) {
+ outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+ outs() << "\t";
+ if (Index + 4 <= End) {
+ Stride = 4;
+ dumpBytes(Bytes.slice(Index, 4), outs());
+ outs() << "\t.word";
+ } else if (Index + 2 <= End) {
+ Stride = 2;
+ dumpBytes(Bytes.slice(Index, 2), outs());
+ outs() << "\t.short";
+ } else {
+ Stride = 1;
+ dumpBytes(Bytes.slice(Index, 1), outs());
+ outs() << "\t.byte";
+ }
+ Index += Stride;
+ outs() << "\n";
+ auto TAI = std::lower_bound(TextMappingSymsAddr.begin(),
+ TextMappingSymsAddr.end(), Index);
+ if (TAI != TextMappingSymsAddr.end() && *TAI == Index)
+ break;
+ }
+ }
+ }
+
+ if (Index >= End)
+ break;
+
if (DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
SectionAddr + Index, DebugOut,
CommentStream)) {
@@ -940,26 +1067,55 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
SectionAddr + Index, outs(), "", *STI);
outs() << CommentStream.str();
Comments.clear();
+
+ // Try to resolve the target of a call, tail call, etc. to a specific
+ // symbol.
if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) ||
MIA->isConditionalBranch(Inst))) {
uint64_t Target;
if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) {
- auto TargetSym = std::upper_bound(
- AllSymbols.begin(), AllSymbols.end(), Target,
- [](uint64_t LHS, const std::pair<uint64_t, StringRef> &RHS) {
- return LHS < RHS.first;
- });
- if (TargetSym != AllSymbols.begin())
- --TargetSym;
- else
- TargetSym = AllSymbols.end();
-
- if (TargetSym != AllSymbols.end()) {
- outs() << " <" << TargetSym->second;
- uint64_t Disp = Target - TargetSym->first;
- if (Disp)
- outs() << '+' << utohexstr(Disp);
- outs() << '>';
+ // In a relocatable object, the target's section must reside in
+ // the same section as the call instruction or it is accessed
+ // through a relocation.
+ //
+ // In a non-relocatable object, the target may be in any section.
+ //
+ // N.B. We don't walk the relocations in the relocatable case yet.
+ auto *TargetSectionSymbols = &Symbols;
+ if (!Obj->isRelocatableObject()) {
+ auto SectionAddress = std::upper_bound(
+ SectionAddresses.begin(), SectionAddresses.end(), Target,
+ [](uint64_t LHS,
+ const std::pair<uint64_t, SectionRef> &RHS) {
+ return LHS < RHS.first;
+ });
+ if (SectionAddress != SectionAddresses.begin()) {
+ --SectionAddress;
+ TargetSectionSymbols = &AllSymbols[SectionAddress->second];
+ } else {
+ TargetSectionSymbols = nullptr;
+ }
+ }
+
+ // Find the first symbol in the section whose offset is less than
+ // or equal to the target.
+ if (TargetSectionSymbols) {
+ auto TargetSym = std::upper_bound(
+ TargetSectionSymbols->begin(), TargetSectionSymbols->end(),
+ Target, [](uint64_t LHS,
+ const std::pair<uint64_t, StringRef> &RHS) {
+ return LHS < RHS.first;
+ });
+ if (TargetSym != TargetSectionSymbols->begin()) {
+ --TargetSym;
+ uint64_t TargetAddress = std::get<0>(*TargetSym);
+ StringRef TargetName = std::get<1>(*TargetSym);
+ outs() << " <" << TargetName;
+ uint64_t Disp = Target - TargetAddress;
+ if (Disp)
+ outs() << '+' << utohexstr(Disp);
+ outs() << '>';
+ }
}
}
}
@@ -983,8 +1139,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Stop when rel_cur's address is past the current instruction.
if (addr >= Index + Size) break;
rel_cur->getTypeName(name);
- if (error(getRelocationValueString(*rel_cur, val)))
- goto skip_print_rel;
+ error(getRelocationValueString(*rel_cur, val));
outs() << format(Fmt.data(), SectionAddr + addr) << name
<< "\t" << val << "\n";
@@ -1004,12 +1159,11 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
if (!Obj->isRelocatableObject())
return;
- for (const SectionRef &Section : Obj->sections()) {
+ for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
StringRef secname;
- if (error(Section.getName(secname)))
- continue;
+ error(Section.getName(secname));
outs() << "RELOCATION RECORDS FOR [" << secname << "]:\n";
for (const RelocationRef &Reloc : Section.relocations()) {
bool hidden = getHidden(Reloc);
@@ -1019,8 +1173,7 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
if (hidden)
continue;
Reloc.getTypeName(relocname);
- if (error(getRelocationValueString(Reloc, valuestr)))
- continue;
+ error(getRelocationValueString(Reloc, valuestr));
outs() << format(Fmt.data(), address) << " " << relocname << " "
<< valuestr << "\n";
}
@@ -1032,10 +1185,9 @@ void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
outs() << "Sections:\n"
"Idx Name Size Address Type\n";
unsigned i = 0;
- for (const SectionRef &Section : Obj->sections()) {
+ for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name;
- if (error(Section.getName(Name)))
- return;
+ error(Section.getName(Name));
uint64_t Address = Section.getAddress();
uint64_t Size = Section.getSize();
bool Text = Section.isText();
@@ -1051,11 +1203,10 @@ void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
void llvm::PrintSectionContents(const ObjectFile *Obj) {
std::error_code EC;
- for (const SectionRef &Section : Obj->sections()) {
+ for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name;
StringRef Contents;
- if (error(Section.getName(Name)))
- continue;
+ error(Section.getName(Name));
uint64_t BaseAddr = Section.getAddress();
uint64_t Size = Section.getSize();
if (!Size)
@@ -1069,8 +1220,7 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
continue;
}
- if (error(Section.getContents(Contents)))
- continue;
+ error(Section.getContents(Contents));
// Dump out the content as hex and printable ascii characters.
for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
@@ -1098,84 +1248,28 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
}
}
-static void PrintCOFFSymbolTable(const COFFObjectFile *coff) {
- for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
- ErrorOr<COFFSymbolRef> Symbol = coff->getSymbol(SI);
- StringRef Name;
- if (error(Symbol.getError()))
- return;
-
- if (error(coff->getSymbolName(*Symbol, Name)))
- return;
-
- outs() << "[" << format("%2d", SI) << "]"
- << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
- << "(fl 0x00)" // Flag bits, which COFF doesn't have.
- << "(ty " << format("%3x", unsigned(Symbol->getType())) << ")"
- << "(scl " << format("%3x", unsigned(Symbol->getStorageClass())) << ") "
- << "(nx " << unsigned(Symbol->getNumberOfAuxSymbols()) << ") "
- << "0x" << format("%08x", unsigned(Symbol->getValue())) << " "
- << Name << "\n";
-
- for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
- if (Symbol->isSectionDefinition()) {
- const coff_aux_section_definition *asd;
- if (error(coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd)))
- return;
-
- int32_t AuxNumber = asd->getNumber(Symbol->isBigObj());
-
- outs() << "AUX "
- << format("scnlen 0x%x nreloc %d nlnno %d checksum 0x%x "
- , unsigned(asd->Length)
- , unsigned(asd->NumberOfRelocations)
- , unsigned(asd->NumberOfLinenumbers)
- , unsigned(asd->CheckSum))
- << format("assoc %d comdat %d\n"
- , unsigned(AuxNumber)
- , unsigned(asd->Selection));
- } else if (Symbol->isFileRecord()) {
- const char *FileName;
- if (error(coff->getAuxSymbol<char>(SI + 1, FileName)))
- return;
-
- StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() *
- coff->getSymbolTableEntrySize());
- outs() << "AUX " << Name.rtrim(StringRef("\0", 1)) << '\n';
-
- SI = SI + Symbol->getNumberOfAuxSymbols();
- break;
- } else {
- outs() << "AUX Unknown\n";
- }
- }
- }
-}
-
void llvm::PrintSymbolTable(const ObjectFile *o) {
outs() << "SYMBOL TABLE:\n";
if (const COFFObjectFile *coff = dyn_cast<const COFFObjectFile>(o)) {
- PrintCOFFSymbolTable(coff);
+ printCOFFSymbolTable(coff);
return;
}
for (const SymbolRef &Symbol : o->symbols()) {
ErrorOr<uint64_t> AddressOrError = Symbol.getAddress();
- if (error(AddressOrError.getError()))
- continue;
+ error(AddressOrError.getError());
uint64_t Address = *AddressOrError;
SymbolRef::Type Type = Symbol.getType();
uint32_t Flags = Symbol.getFlags();
- section_iterator Section = o->section_end();
- if (error(Symbol.getSection(Section)))
- continue;
+ ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
+ error(SectionOrErr.getError());
+ section_iterator Section = *SectionOrErr;
StringRef Name;
if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
Section->getName(Name);
} else {
ErrorOr<StringRef> NameOrErr = Symbol.getName();
- if (error(NameOrErr.getError()))
- continue;
+ error(NameOrErr.getError());
Name = *NameOrErr;
}
@@ -1222,8 +1316,7 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
outs() << SegmentName << ",";
}
StringRef SectionName;
- if (error(Section->getName(SectionName)))
- SectionName = "";
+ error(Section->getName(SectionName));
outs() << SectionName;
}
@@ -1329,7 +1422,7 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
}
Optional<object::SectionRef> ClangASTSection;
- for (auto Sec : Obj->sections()) {
+ for (auto Sec : ToolSectionFilter(*Obj)) {
StringRef Name;
Sec.getName(Name);
if (Name == ClangASTSectionName) {
@@ -1341,11 +1434,7 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
return;
StringRef ClangASTContents;
- if (error(ClangASTSection.getValue().getContents(ClangASTContents))) {
- errs() << "Could not read the " << ClangASTSectionName << " section!\n";
- return;
- }
-
+ error(ClangASTSection.getValue().getContents(ClangASTContents));
outs().write(ClangASTContents.data(), ClangASTContents.size());
}
@@ -1364,7 +1453,7 @@ static void printFaultMaps(const ObjectFile *Obj) {
Optional<object::SectionRef> FaultMapSection;
- for (auto Sec : Obj->sections()) {
+ for (auto Sec : ToolSectionFilter(*Obj)) {
StringRef Name;
Sec.getName(Name);
if (Name == FaultMapSectionName) {
@@ -1381,10 +1470,7 @@ static void printFaultMaps(const ObjectFile *Obj) {
}
StringRef FaultMapContents;
- if (error(FaultMapSection.getValue().getContents(FaultMapContents))) {
- errs() << "Could not read the " << FaultMapContents << " section!\n";
- return;
- }
+ error(FaultMapSection.getValue().getContents(FaultMapContents));
FaultMapParser FMP(FaultMapContents.bytes_begin(),
FaultMapContents.bytes_end());
@@ -1393,13 +1479,14 @@ static void printFaultMaps(const ObjectFile *Obj) {
}
static void printPrivateFileHeader(const ObjectFile *o) {
- if (o->isELF()) {
+ if (o->isELF())
printELFFileHeader(o);
- } else if (o->isCOFF()) {
+ else if (o->isCOFF())
printCOFFFileHeader(o);
- } else if (o->isMachO()) {
+ else if (o->isMachO())
printMachOFileHeader(o);
- }
+ else
+ report_fatal_error("Invalid/Unsupported object file format");
}
static void DumpObject(const ObjectFile *o) {
@@ -1442,15 +1529,14 @@ static void DumpObject(const ObjectFile *o) {
/// @brief Dump each object file in \a a;
static void DumpArchive(const Archive *a) {
- for (Archive::child_iterator i = a->child_begin(), e = a->child_end(); i != e;
- ++i) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
- if (std::error_code EC = ChildOrErr.getError()) {
- // Ignore non-object files.
+ for (auto &ErrorOrChild : a->children()) {
+ if (std::error_code EC = ErrorOrChild.getError())
+ report_error(a->getFileName(), EC);
+ const Archive::Child &C = *ErrorOrChild;
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
+ if (std::error_code EC = ChildOrErr.getError())
if (EC != object_error::invalid_file_type)
report_error(a->getFileName(), EC);
- continue;
- }
if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
DumpObject(o);
else
@@ -1460,11 +1546,6 @@ static void DumpArchive(const Archive *a) {
/// @brief Open file and figure out how to dump it.
static void DumpInput(StringRef file) {
- // If file isn't stdin, check that it exists.
- if (file != "-" && !sys::fs::exists(file)) {
- report_error(file, errc::no_such_file_or_directory);
- return;
- }
// If we are using the Mach-O specific object file parser, then let it parse
// the file and process the command line options. So the -arch flags can
@@ -1476,10 +1557,8 @@ static void DumpInput(StringRef file) {
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(file);
- if (std::error_code EC = BinaryOrErr.getError()) {
+ if (std::error_code EC = BinaryOrErr.getError())
report_error(file, EC);
- return;
- }
Binary &Binary = *BinaryOrErr.get().getBinary();
if (Archive *a = dyn_cast<Archive>(&Binary))
@@ -1499,7 +1578,6 @@ int main(int argc, char **argv) {
// Initialize targets and assembly printers/parsers.
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
- llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();
// Register the target printer for --version.
@@ -1514,6 +1592,8 @@ int main(int argc, char **argv) {
if (InputFilenames.size() == 0)
InputFilenames.push_back("a.out");
+ if (DisassembleAll)
+ Disassemble = true;
if (!Disassemble
&& !Relocations
&& !SectionHeaders
@@ -1536,7 +1616,7 @@ int main(int argc, char **argv) {
&& !(DylibsUsed && MachOOpt)
&& !(DylibId && MachOOpt)
&& !(ObjcMetaData && MachOOpt)
- && !(DumpSections.size() != 0 && MachOOpt)
+ && !(FilterSections.size() != 0 && MachOOpt)
&& !PrintFaultMaps) {
cl::PrintHelpMessage();
return 2;
@@ -1545,5 +1625,5 @@ int main(int argc, char **argv) {
std::for_each(InputFilenames.begin(), InputFilenames.end(),
DumpInput);
- return ReturnValue;
+ return EXIT_SUCCESS;
}
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
index eb10d83..6e8ad6b 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
@@ -25,8 +26,9 @@ extern cl::opt<std::string> TripleName;
extern cl::opt<std::string> ArchName;
extern cl::opt<std::string> MCPU;
extern cl::list<std::string> MAttrs;
-extern cl::list<std::string> DumpSections;
+extern cl::list<std::string> FilterSections;
extern cl::opt<bool> Disassemble;
+extern cl::opt<bool> DisassembleAll;
extern cl::opt<bool> NoShowRawInsn;
extern cl::opt<bool> PrivateHeaders;
extern cl::opt<bool> ExportsTrie;
@@ -54,7 +56,7 @@ extern cl::opt<bool> UnwindInfo;
extern cl::opt<bool> PrintImmHex;
// Various helper functions.
-bool error(std::error_code ec);
+void error(std::error_code ec);
bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
void ParseInputMachO(StringRef Filename);
void printCOFFUnwindInfo(const object::COFFObjectFile* o);
@@ -66,6 +68,7 @@ void printMachOLazyBindTable(const object::MachOObjectFile* o);
void printMachOWeakBindTable(const object::MachOObjectFile* o);
void printELFFileHeader(const object::ObjectFile *o);
void printCOFFFileHeader(const object::ObjectFile *o);
+void printCOFFSymbolTable(const object::COFFObjectFile *o);
void printMachOFileHeader(const object::ObjectFile *o);
void printExportsTrie(const object::ObjectFile *o);
void printRebaseTable(const object::ObjectFile *o);
@@ -77,6 +80,7 @@ void PrintRelocations(const object::ObjectFile *o);
void PrintSectionHeaders(const object::ObjectFile *o);
void PrintSectionContents(const object::ObjectFile *o);
void PrintSymbolTable(const object::ObjectFile *o);
+LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, std::error_code EC);
} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.cpp b/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.cpp
index d808298..4327054 100644
--- a/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.cpp
@@ -19,69 +19,53 @@ BuiltinDumper::BuiltinDumper(LinePrinter &P)
: PDBSymDumper(false), Printer(P) {}
void BuiltinDumper::start(const PDBSymbolTypeBuiltin &Symbol) {
+ WithColor(Printer, PDB_ColorItem::Type).get() << getTypeName(Symbol);
+}
+
+StringRef BuiltinDumper::getTypeName(const PDBSymbolTypeBuiltin &Symbol) {
PDB_BuiltinType Type = Symbol.getBuiltinType();
switch (Type) {
case PDB_BuiltinType::Float:
if (Symbol.getLength() == 4)
- WithColor(Printer, PDB_ColorItem::Type).get() << "float";
- else
- WithColor(Printer, PDB_ColorItem::Type).get() << "double";
- break;
+ return "float";
+ return "double";
case PDB_BuiltinType::UInt:
- WithColor(Printer, PDB_ColorItem::Type).get() << "unsigned";
if (Symbol.getLength() == 8)
- WithColor(Printer, PDB_ColorItem::Type).get() << " __int64";
- break;
+ return "unsigned __int64";
+ return "unsigned";
case PDB_BuiltinType::Int:
if (Symbol.getLength() == 4)
- WithColor(Printer, PDB_ColorItem::Type).get() << "int";
- else
- WithColor(Printer, PDB_ColorItem::Type).get() << "__int64";
- break;
+ return "int";
+ return "__int64";
case PDB_BuiltinType::Char:
- WithColor(Printer, PDB_ColorItem::Type).get() << "char";
- break;
+ return "char";
case PDB_BuiltinType::WCharT:
- WithColor(Printer, PDB_ColorItem::Type).get() << "wchar_t";
- break;
+ return "wchar_t";
case PDB_BuiltinType::Void:
- WithColor(Printer, PDB_ColorItem::Type).get() << "void";
- break;
+ return "void";
case PDB_BuiltinType::Long:
- WithColor(Printer, PDB_ColorItem::Type).get() << "long";
- break;
+ return "long";
case PDB_BuiltinType::ULong:
- WithColor(Printer, PDB_ColorItem::Type).get() << "unsigned long";
- break;
+ return "unsigned long";
case PDB_BuiltinType::Bool:
- WithColor(Printer, PDB_ColorItem::Type).get() << "bool";
- break;
+ return "bool";
case PDB_BuiltinType::Currency:
- WithColor(Printer, PDB_ColorItem::Type).get() << "CURRENCY";
- break;
+ return "CURRENCY";
case PDB_BuiltinType::Date:
- WithColor(Printer, PDB_ColorItem::Type).get() << "DATE";
- break;
+ return "DATE";
case PDB_BuiltinType::Variant:
- WithColor(Printer, PDB_ColorItem::Type).get() << "VARIANT";
- break;
+ return "VARIANT";
case PDB_BuiltinType::Complex:
- WithColor(Printer, PDB_ColorItem::Type).get() << "complex";
- break;
+ return "complex";
case PDB_BuiltinType::Bitfield:
- WithColor(Printer, PDB_ColorItem::Type).get() << "bitfield";
- break;
+ return "bitfield";
case PDB_BuiltinType::BSTR:
- WithColor(Printer, PDB_ColorItem::Type).get() << "BSTR";
- break;
+ return "BSTR";
case PDB_BuiltinType::HResult:
- WithColor(Printer, PDB_ColorItem::Type).get() << "HRESULT";
- break;
+ return "HRESULT";
case PDB_BuiltinType::BCD:
- WithColor(Printer, PDB_ColorItem::Type).get() << "HRESULT";
- break;
+ return "HRESULT";
default:
- WithColor(Printer, PDB_ColorItem::Type).get() << "void";
- break;
+ return "void";
}
}
diff --git a/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.h b/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.h
index 8cf984a0..ac666db 100644
--- a/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.h
+++ b/contrib/llvm/tools/llvm-pdbdump/BuiltinDumper.h
@@ -23,6 +23,8 @@ public:
void start(const PDBSymbolTypeBuiltin &Symbol);
private:
+ StringRef getTypeName(const PDBSymbolTypeBuiltin &Symbol);
+
LinePrinter &Printer;
};
}
diff --git a/contrib/llvm/tools/llvm-pdbdump/LinePrinter.cpp b/contrib/llvm/tools/llvm-pdbdump/LinePrinter.cpp
index 6bbc403..a43727f 100644
--- a/contrib/llvm/tools/llvm-pdbdump/LinePrinter.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/LinePrinter.cpp
@@ -11,19 +11,49 @@
#include "llvm-pdbdump.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Regex.h"
#include <algorithm>
+namespace {
+bool IsItemExcluded(llvm::StringRef Item,
+ std::list<llvm::Regex> &IncludeFilters,
+ std::list<llvm::Regex> &ExcludeFilters) {
+ if (Item.empty())
+ return false;
+
+ auto match_pred = [Item](llvm::Regex &R) { return R.match(Item); };
+
+ // Include takes priority over exclude. If the user specified include
+ // filters, and none of them include this item, them item is gone.
+ if (!IncludeFilters.empty() && !any_of(IncludeFilters, match_pred))
+ return true;
+
+ if (any_of(ExcludeFilters, match_pred))
+ return true;
+
+ return false;
+}
+}
+
using namespace llvm;
LinePrinter::LinePrinter(int Indent, llvm::raw_ostream &Stream)
: OS(Stream), IndentSpaces(Indent), CurrentIndent(0) {
- SetFilters(TypeFilters, opts::ExcludeTypes.begin(), opts::ExcludeTypes.end());
- SetFilters(SymbolFilters, opts::ExcludeSymbols.begin(),
+ SetFilters(ExcludeTypeFilters, opts::ExcludeTypes.begin(),
+ opts::ExcludeTypes.end());
+ SetFilters(ExcludeSymbolFilters, opts::ExcludeSymbols.begin(),
opts::ExcludeSymbols.end());
- SetFilters(CompilandFilters, opts::ExcludeCompilands.begin(),
+ SetFilters(ExcludeCompilandFilters, opts::ExcludeCompilands.begin(),
opts::ExcludeCompilands.end());
+
+ SetFilters(IncludeTypeFilters, opts::IncludeTypes.begin(),
+ opts::IncludeTypes.end());
+ SetFilters(IncludeSymbolFilters, opts::IncludeSymbols.begin(),
+ opts::IncludeSymbols.end());
+ SetFilters(IncludeCompilandFilters, opts::IncludeCompilands.begin(),
+ opts::IncludeCompilands.end());
}
void LinePrinter::Indent() { CurrentIndent += IndentSpaces; }
@@ -38,87 +68,53 @@ void LinePrinter::NewLine() {
}
bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName) {
- if (TypeName.empty())
- return false;
-
- for (auto &Expr : TypeFilters) {
- if (Expr.match(TypeName))
- return true;
- }
- return false;
+ return IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters);
}
bool LinePrinter::IsSymbolExcluded(llvm::StringRef SymbolName) {
- if (SymbolName.empty())
- return false;
-
- for (auto &Expr : SymbolFilters) {
- if (Expr.match(SymbolName))
- return true;
- }
- return false;
+ return IsItemExcluded(SymbolName, IncludeSymbolFilters, ExcludeSymbolFilters);
}
bool LinePrinter::IsCompilandExcluded(llvm::StringRef CompilandName) {
- if (CompilandName.empty())
- return false;
-
- for (auto &Expr : CompilandFilters) {
- if (Expr.match(CompilandName))
- return true;
- }
- return false;
+ return IsItemExcluded(CompilandName, IncludeCompilandFilters,
+ ExcludeCompilandFilters);
}
WithColor::WithColor(LinePrinter &P, PDB_ColorItem C) : OS(P.OS) {
- if (C == PDB_ColorItem::None)
- OS.resetColor();
- else {
- raw_ostream::Colors Color;
- bool Bold;
- translateColor(C, Color, Bold);
- OS.changeColor(Color, Bold);
- }
+ applyColor(C);
}
WithColor::~WithColor() { OS.resetColor(); }
-void WithColor::translateColor(PDB_ColorItem C, raw_ostream::Colors &Color,
- bool &Bold) const {
+void WithColor::applyColor(PDB_ColorItem C) {
switch (C) {
+ case PDB_ColorItem::None:
+ OS.resetColor();
+ return;
case PDB_ColorItem::Address:
- Color = raw_ostream::YELLOW;
- Bold = true;
+ OS.changeColor(raw_ostream::YELLOW, /*bold=*/true);
return;
case PDB_ColorItem::Keyword:
- Color = raw_ostream::MAGENTA;
- Bold = true;
+ OS.changeColor(raw_ostream::MAGENTA, true);
return;
case PDB_ColorItem::Register:
case PDB_ColorItem::Offset:
- Color = raw_ostream::YELLOW;
- Bold = false;
+ OS.changeColor(raw_ostream::YELLOW, false);
return;
case PDB_ColorItem::Type:
- Color = raw_ostream::CYAN;
- Bold = true;
+ OS.changeColor(raw_ostream::CYAN, true);
return;
case PDB_ColorItem::Identifier:
- Color = raw_ostream::CYAN;
- Bold = false;
+ OS.changeColor(raw_ostream::CYAN, false);
return;
case PDB_ColorItem::Path:
- Color = raw_ostream::CYAN;
- Bold = false;
+ OS.changeColor(raw_ostream::CYAN, false);
return;
case PDB_ColorItem::SectionHeader:
- Color = raw_ostream::RED;
- Bold = true;
+ OS.changeColor(raw_ostream::RED, true);
return;
case PDB_ColorItem::LiteralValue:
- Color = raw_ostream::GREEN;
- Bold = true;
- default:
+ OS.changeColor(raw_ostream::GREEN, true);
return;
}
}
diff --git a/contrib/llvm/tools/llvm-pdbdump/LinePrinter.h b/contrib/llvm/tools/llvm-pdbdump/LinePrinter.h
index b985e93..b0a9d2c 100644
--- a/contrib/llvm/tools/llvm-pdbdump/LinePrinter.h
+++ b/contrib/llvm/tools/llvm-pdbdump/LinePrinter.h
@@ -48,9 +48,13 @@ private:
int IndentSpaces;
int CurrentIndent;
- std::list<Regex> CompilandFilters;
- std::list<Regex> TypeFilters;
- std::list<Regex> SymbolFilters;
+ std::list<Regex> ExcludeCompilandFilters;
+ std::list<Regex> ExcludeTypeFilters;
+ std::list<Regex> ExcludeSymbolFilters;
+
+ std::list<Regex> IncludeCompilandFilters;
+ std::list<Regex> IncludeTypeFilters;
+ std::list<Regex> IncludeSymbolFilters;
};
template <class T>
@@ -80,8 +84,7 @@ public:
raw_ostream &get() { return OS; }
private:
- void translateColor(PDB_ColorItem C, raw_ostream::Colors &Color,
- bool &Bold) const;
+ void applyColor(PDB_ColorItem C);
raw_ostream &OS;
};
}
diff --git a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp
index 4a4c64b..0e3f0b2 100644
--- a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -22,6 +22,8 @@
#include "VariableDumper.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
@@ -38,12 +40,16 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Signals.h"
#if defined(HAVE_DIA_SDK)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
#include <Windows.h>
#endif
@@ -79,6 +85,17 @@ cl::opt<uint64_t> LoadAddress(
cl::desc("Assume the module is loaded at the specified address"),
cl::cat(OtherOptions));
+cl::opt<bool> DumpHeaders("dump-headers", cl::desc("dump PDB headers"),
+ cl::cat(OtherOptions));
+cl::opt<bool> DumpStreamSizes("dump-stream-sizes",
+ cl::desc("dump PDB stream sizes"),
+ cl::cat(OtherOptions));
+cl::opt<bool> DumpStreamBlocks("dump-stream-blocks",
+ cl::desc("dump PDB stream blocks"),
+ cl::cat(OtherOptions));
+cl::opt<std::string> DumpStreamData("dump-stream", cl::desc("dump stream data"),
+ cl::cat(OtherOptions));
+
cl::list<std::string>
ExcludeTypes("exclude-types",
cl::desc("Exclude types by regular expression"),
@@ -91,6 +108,20 @@ cl::list<std::string>
ExcludeCompilands("exclude-compilands",
cl::desc("Exclude compilands by regular expression"),
cl::ZeroOrMore, cl::cat(FilterCategory));
+
+cl::list<std::string> IncludeTypes(
+ "include-types",
+ cl::desc("Include only types which match a regular expression"),
+ cl::ZeroOrMore, cl::cat(FilterCategory));
+cl::list<std::string> IncludeSymbols(
+ "include-symbols",
+ cl::desc("Include only symbols which match a regular expression"),
+ cl::ZeroOrMore, cl::cat(FilterCategory));
+cl::list<std::string> IncludeCompilands(
+ "include-compilands",
+ cl::desc("Include only compilands those which match a regular expression"),
+ cl::ZeroOrMore, cl::cat(FilterCategory));
+
cl::opt<bool> ExcludeCompilerGenerated(
"no-compiler-generated",
cl::desc("Don't show compiler generated types and symbols"),
@@ -107,10 +138,264 @@ cl::opt<bool> NoEnumDefs("no-enum-definitions",
cl::cat(FilterCategory));
}
+
+static void reportError(StringRef Input, StringRef Message) {
+ if (Input == "-")
+ Input = "<stdin>";
+ errs() << Input << ": " << Message << "\n";
+ errs().flush();
+ exit(1);
+}
+
+static void reportError(StringRef Input, std::error_code EC) {
+ reportError(Input, EC.message());
+}
+
+static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr,
+ const uint64_t Size) {
+ if (Addr + Size < Addr || Addr + Size < Size ||
+ Addr + Size > uintptr_t(M.getBufferEnd()) ||
+ Addr < uintptr_t(M.getBufferStart())) {
+ return std::make_error_code(std::errc::bad_address);
+ }
+ return std::error_code();
+}
+
+template <typename T>
+static std::error_code checkOffset(MemoryBufferRef M, ArrayRef<T> AR) {
+ return checkOffset(M, uintptr_t(AR.data()), (uint64_t)AR.size() * sizeof(T));
+}
+
+static std::error_code checkOffset(MemoryBufferRef M, StringRef SR) {
+ return checkOffset(M, uintptr_t(SR.data()), SR.size());
+}
+
+// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m.
+// Returns unexpected_eof if error.
+template <typename T>
+static std::error_code getObject(const T *&Obj, MemoryBufferRef M,
+ const void *Ptr,
+ const uint64_t Size = sizeof(T)) {
+ uintptr_t Addr = uintptr_t(Ptr);
+ if (std::error_code EC = checkOffset(M, Addr, Size))
+ return EC;
+ Obj = reinterpret_cast<const T *>(Addr);
+ return std::error_code();
+}
+
+static uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize) {
+ return RoundUpToAlignment(NumBytes, BlockSize) / BlockSize;
+}
+
+static uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize) {
+ return BlockNumber * BlockSize;
+}
+
+static void dumpStructure(MemoryBufferRef M) {
+ const PDB::SuperBlock *SB;
+ if (auto EC = getObject(SB, M, M.getBufferStart()))
+ reportError(M.getBufferIdentifier(), EC);
+
+ if (opts::DumpHeaders) {
+ outs() << "BlockSize: " << SB->BlockSize << '\n';
+ outs() << "Unknown0: " << SB->Unknown0 << '\n';
+ outs() << "NumBlocks: " << SB->NumBlocks << '\n';
+ outs() << "NumDirectoryBytes: " << SB->NumDirectoryBytes << '\n';
+ outs() << "Unknown1: " << SB->Unknown1 << '\n';
+ outs() << "BlockMapAddr: " << SB->BlockMapAddr << '\n';
+ }
+
+ // We don't support blocksizes which aren't a multiple of four bytes.
+ if (SB->BlockSize % sizeof(support::ulittle32_t) != 0)
+ reportError(M.getBufferIdentifier(),
+ std::make_error_code(std::errc::illegal_byte_sequence));
+
+ // We don't support directories whose sizes aren't a multiple of four bytes.
+ if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0)
+ reportError(M.getBufferIdentifier(),
+ std::make_error_code(std::errc::illegal_byte_sequence));
+
+ // The number of blocks which comprise the directory is a simple function of
+ // the number of bytes it contains.
+ uint64_t NumDirectoryBlocks =
+ bytesToBlocks(SB->NumDirectoryBytes, SB->BlockSize);
+ if (opts::DumpHeaders)
+ outs() << "NumDirectoryBlocks: " << NumDirectoryBlocks << '\n';
+
+ // The block map, as we understand it, is a block which consists of a list of
+ // block numbers.
+ // It is unclear what would happen if the number of blocks couldn't fit on a
+ // single block.
+ if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t))
+ reportError(M.getBufferIdentifier(),
+ std::make_error_code(std::errc::illegal_byte_sequence));
+
+
+ uint64_t BlockMapOffset = (uint64_t)SB->BlockMapAddr * SB->BlockSize;
+ if (opts::DumpHeaders)
+ outs() << "BlockMapOffset: " << BlockMapOffset << '\n';
+
+ // The directory is not contiguous. Instead, the block map contains a
+ // contiguous list of block numbers whose contents, when concatenated in
+ // order, make up the directory.
+ auto DirectoryBlocks =
+ makeArrayRef(reinterpret_cast<const support::ulittle32_t *>(
+ M.getBufferStart() + BlockMapOffset),
+ NumDirectoryBlocks);
+ if (auto EC = checkOffset(M, DirectoryBlocks))
+ reportError(M.getBufferIdentifier(), EC);
+
+ if (opts::DumpHeaders) {
+ outs() << "DirectoryBlocks: [";
+ for (const support::ulittle32_t &DirectoryBlockAddr : DirectoryBlocks) {
+ if (&DirectoryBlockAddr != &DirectoryBlocks.front())
+ outs() << ", ";
+ outs() << DirectoryBlockAddr;
+ }
+ outs() << "]\n";
+ }
+
+ bool SeenNumStreams = false;
+ uint32_t NumStreams = 0;
+ std::vector<uint32_t> StreamSizes;
+ DenseMap<uint32_t, std::vector<uint32_t>> StreamMap;
+ uint32_t StreamIdx = 0;
+ uint64_t DirectoryBytesRead = 0;
+ // The structure of the directory is as follows:
+ // struct PDBDirectory {
+ // uint32_t NumStreams;
+ // uint32_t StreamSizes[NumStreams];
+ // uint32_t StreamMap[NumStreams][];
+ // };
+ //
+ // Empty streams don't consume entries in the StreamMap.
+ for (uint32_t DirectoryBlockAddr : DirectoryBlocks) {
+ uint64_t DirectoryBlockOffset =
+ blockToOffset(DirectoryBlockAddr, SB->BlockSize);
+ auto DirectoryBlock =
+ makeArrayRef(reinterpret_cast<const support::ulittle32_t *>(
+ M.getBufferStart() + DirectoryBlockOffset),
+ SB->BlockSize / sizeof(support::ulittle32_t));
+ if (auto EC = checkOffset(M, DirectoryBlock))
+ reportError(M.getBufferIdentifier(), EC);
+
+ // We read data out of the directory four bytes at a time. Depending on
+ // where we are in the directory, the contents may be: the number of streams
+ // in the directory, a stream's size, or a block in the stream map.
+ for (uint32_t Data : DirectoryBlock) {
+ // Don't read beyond the end of the directory.
+ if (DirectoryBytesRead == SB->NumDirectoryBytes)
+ break;
+
+ DirectoryBytesRead += sizeof(Data);
+
+ // This data must be the number of streams if we haven't seen it yet.
+ if (!SeenNumStreams) {
+ NumStreams = Data;
+ SeenNumStreams = true;
+ continue;
+ }
+ // This data must be a stream size if we have not seen them all yet.
+ if (StreamSizes.size() < NumStreams) {
+ // It seems like some streams have their set to -1 when their contents
+ // are not present. Treat them like empty streams for now.
+ if (Data == UINT32_MAX)
+ StreamSizes.push_back(0);
+ else
+ StreamSizes.push_back(Data);
+ continue;
+ }
+
+ // This data must be a stream block number if we have seen all of the
+ // stream sizes.
+ std::vector<uint32_t> *StreamBlocks = nullptr;
+ // Figure out which stream this block number belongs to.
+ while (StreamIdx < NumStreams) {
+ uint64_t NumExpectedStreamBlocks =
+ bytesToBlocks(StreamSizes[StreamIdx], SB->BlockSize);
+ StreamBlocks = &StreamMap[StreamIdx];
+ if (NumExpectedStreamBlocks > StreamBlocks->size())
+ break;
+ ++StreamIdx;
+ }
+ // It seems this block doesn't belong to any stream? The stream is either
+ // corrupt or something more mysterious is going on.
+ if (StreamIdx == NumStreams)
+ reportError(M.getBufferIdentifier(),
+ std::make_error_code(std::errc::illegal_byte_sequence));
+
+ StreamBlocks->push_back(Data);
+ }
+ }
+
+ // We should have read exactly SB->NumDirectoryBytes bytes.
+ assert(DirectoryBytesRead == SB->NumDirectoryBytes);
+
+ if (opts::DumpHeaders)
+ outs() << "NumStreams: " << NumStreams << '\n';
+ if (opts::DumpStreamSizes)
+ for (uint32_t StreamIdx = 0; StreamIdx < NumStreams; ++StreamIdx)
+ outs() << "StreamSizes[" << StreamIdx << "]: " << StreamSizes[StreamIdx]
+ << '\n';
+
+ if (opts::DumpStreamBlocks) {
+ for (uint32_t StreamIdx = 0; StreamIdx < NumStreams; ++StreamIdx) {
+ outs() << "StreamBlocks[" << StreamIdx << "]: [";
+ std::vector<uint32_t> &StreamBlocks = StreamMap[StreamIdx];
+ for (uint32_t &StreamBlock : StreamBlocks) {
+ if (&StreamBlock != &StreamBlocks.front())
+ outs() << ", ";
+ outs() << StreamBlock;
+ }
+ outs() << "]\n";
+ }
+ }
+
+ StringRef DumpStreamStr = opts::DumpStreamData;
+ uint32_t DumpStreamNum;
+ if (!DumpStreamStr.getAsInteger(/*Radix=*/0U, DumpStreamNum) &&
+ DumpStreamNum < NumStreams) {
+ uint32_t StreamBytesRead = 0;
+ uint32_t StreamSize = StreamSizes[DumpStreamNum];
+ std::vector<uint32_t> &StreamBlocks = StreamMap[DumpStreamNum];
+ for (uint32_t &StreamBlockAddr : StreamBlocks) {
+ uint64_t StreamBlockOffset = blockToOffset(StreamBlockAddr, SB->BlockSize);
+ uint32_t BytesLeftToReadInStream = StreamSize - StreamBytesRead;
+ if (BytesLeftToReadInStream == 0)
+ break;
+
+ uint32_t BytesToReadInBlock = std::min(
+ BytesLeftToReadInStream, static_cast<uint32_t>(SB->BlockSize));
+ auto StreamBlockData =
+ StringRef(M.getBufferStart() + StreamBlockOffset, BytesToReadInBlock);
+ if (auto EC = checkOffset(M, StreamBlockData))
+ reportError(M.getBufferIdentifier(), EC);
+
+ outs() << StreamBlockData;
+ StreamBytesRead += StreamBlockData.size();
+ }
+ }
+}
+
static void dumpInput(StringRef Path) {
+ if (opts::DumpHeaders || !opts::DumpStreamData.empty()) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
+ MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
+ /*RequiresNullTerminator=*/false);
+
+ if (std::error_code EC = ErrorOrBuffer.getError())
+ reportError(Path, EC);
+
+ std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get();
+
+ dumpStructure(Buffer->getMemBufferRef());
+
+ outs().flush();
+ return;
+ }
+
std::unique_ptr<IPDBSession> Session;
- PDB_ErrorCode Error =
- llvm::loadDataForPDB(PDB_ReaderType::DIA, Path, Session);
+ PDB_ErrorCode Error = loadDataForPDB(PDB_ReaderType::DIA, Path, Session);
switch (Error) {
case PDB_ErrorCode::Success:
break;
@@ -145,7 +430,7 @@ static void dumpInput(StringRef Path) {
Printer.NewLine();
WithColor(Printer, PDB_ColorItem::Identifier).get() << "Size";
- if (!llvm::sys::fs::file_size(FileName, FileSize)) {
+ if (!sys::fs::file_size(FileName, FileSize)) {
Printer << ": " << FileSize << " bytes";
} else {
Printer << ": (Unable to obtain file size)";
@@ -242,11 +527,11 @@ int main(int argc_, const char *argv_[]) {
PrettyStackTraceProgram X(argc_, argv_);
SmallVector<const char *, 256> argv;
- llvm::SpecificBumpPtrAllocator<char> ArgAllocator;
- std::error_code EC = llvm::sys::Process::GetArgumentVector(
- argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator);
+ SpecificBumpPtrAllocator<char> ArgAllocator;
+ std::error_code EC = sys::Process::GetArgumentVector(
+ argv, makeArrayRef(argv_, argc_), ArgAllocator);
if (EC) {
- llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n';
+ errs() << "error: couldn't get arguments: " << EC.message() << '\n';
return 1;
}
diff --git a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h
index 586a9ea..cb5bec6 100644
--- a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h
+++ b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h
@@ -27,6 +27,9 @@ extern llvm::cl::opt<bool> NoEnumDefs;
extern llvm::cl::list<std::string> ExcludeTypes;
extern llvm::cl::list<std::string> ExcludeSymbols;
extern llvm::cl::list<std::string> ExcludeCompilands;
+extern llvm::cl::list<std::string> IncludeTypes;
+extern llvm::cl::list<std::string> IncludeSymbols;
+extern llvm::cl::list<std::string> IncludeCompilands;
}
#endif \ No newline at end of file
diff --git a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 6fb48d8..dc6cd0a 100644
--- a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/ProfileData/InstrProfReader.h"
@@ -18,6 +20,7 @@
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/ProfileData/SampleProfWriter.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
@@ -26,67 +29,150 @@
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <tuple>
using namespace llvm;
-static void exitWithError(const Twine &Message, StringRef Whence = "") {
+enum ProfileFormat { PF_None = 0, PF_Text, PF_Binary, PF_GCC };
+
+static void exitWithError(const Twine &Message, StringRef Whence = "",
+ StringRef Hint = "") {
errs() << "error: ";
if (!Whence.empty())
errs() << Whence << ": ";
errs() << Message << "\n";
+ if (!Hint.empty())
+ errs() << Hint << "\n";
::exit(1);
}
+static void exitWithErrorCode(const std::error_code &Error,
+ StringRef Whence = "") {
+ if (Error.category() == instrprof_category()) {
+ instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+ if (instrError == instrprof_error::unrecognized_format) {
+ // Hint for common error of forgetting -sample for sample profiles.
+ exitWithError(Error.message(), Whence,
+ "Perhaps you forgot to use the -sample option?");
+ }
+ }
+ exitWithError(Error.message(), Whence);
+}
+
namespace {
enum ProfileKinds { instr, sample };
}
-static void mergeInstrProfile(const cl::list<std::string> &Inputs,
- StringRef OutputFilename) {
+static void handleMergeWriterError(std::error_code &Error,
+ StringRef WhenceFile = "",
+ StringRef WhenceFunction = "",
+ bool ShowHint = true) {
+ if (!WhenceFile.empty())
+ errs() << WhenceFile << ": ";
+ if (!WhenceFunction.empty())
+ errs() << WhenceFunction << ": ";
+ errs() << Error.message() << "\n";
+
+ if (ShowHint) {
+ StringRef Hint = "";
+ if (Error.category() == instrprof_category()) {
+ instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+ switch (instrError) {
+ case instrprof_error::hash_mismatch:
+ case instrprof_error::count_mismatch:
+ case instrprof_error::value_site_count_mismatch:
+ Hint = "Make sure that all profile data to be merged is generated "
+ "from the same binary.";
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Hint.empty())
+ errs() << Hint << "\n";
+ }
+}
+
+struct WeightedFile {
+ StringRef Filename;
+ uint64_t Weight;
+
+ WeightedFile() {}
+
+ WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
+};
+typedef SmallVector<WeightedFile, 5> WeightedFileVector;
+
+static void mergeInstrProfile(const WeightedFileVector &Inputs,
+ StringRef OutputFilename,
+ ProfileFormat OutputFormat) {
if (OutputFilename.compare("-") == 0)
exitWithError("Cannot write indexed profdata format to stdout.");
+ if (OutputFormat != PF_Binary && OutputFormat != PF_Text)
+ exitWithError("Unknown format is specified.");
+
std::error_code EC;
raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
if (EC)
- exitWithError(EC.message(), OutputFilename);
+ exitWithErrorCode(EC, OutputFilename);
InstrProfWriter Writer;
- for (const auto &Filename : Inputs) {
- auto ReaderOrErr = InstrProfReader::create(Filename);
+ SmallSet<std::error_code, 4> WriterErrorCodes;
+ for (const auto &Input : Inputs) {
+ auto ReaderOrErr = InstrProfReader::create(Input.Filename);
if (std::error_code ec = ReaderOrErr.getError())
- exitWithError(ec.message(), Filename);
+ exitWithErrorCode(ec, Input.Filename);
auto Reader = std::move(ReaderOrErr.get());
- for (const auto &I : *Reader)
- if (std::error_code EC =
- Writer.addFunctionCounts(I.Name, I.Hash, I.Counts))
- errs() << Filename << ": " << I.Name << ": " << EC.message() << "\n";
+ for (auto &I : *Reader) {
+ if (std::error_code EC = Writer.addRecord(std::move(I), Input.Weight)) {
+ // Only show hint the first time an error occurs.
+ bool firstTime = WriterErrorCodes.insert(EC).second;
+ handleMergeWriterError(EC, Input.Filename, I.Name, firstTime);
+ }
+ }
if (Reader->hasError())
- exitWithError(Reader->getError().message(), Filename);
+ exitWithErrorCode(Reader->getError(), Input.Filename);
}
- Writer.write(Output);
+ if (OutputFormat == PF_Text)
+ Writer.writeText(Output);
+ else
+ Writer.write(Output);
}
-static void mergeSampleProfile(const cl::list<std::string> &Inputs,
+static sampleprof::SampleProfileFormat FormatMap[] = {
+ sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Binary,
+ sampleprof::SPF_GCC};
+
+static void mergeSampleProfile(const WeightedFileVector &Inputs,
StringRef OutputFilename,
- sampleprof::SampleProfileFormat OutputFormat) {
+ ProfileFormat OutputFormat) {
using namespace sampleprof;
- auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
+ auto WriterOrErr =
+ SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
if (std::error_code EC = WriterOrErr.getError())
- exitWithError(EC.message(), OutputFilename);
+ exitWithErrorCode(EC, OutputFilename);
auto Writer = std::move(WriterOrErr.get());
StringMap<FunctionSamples> ProfileMap;
- for (const auto &Filename : Inputs) {
+ SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
+ for (const auto &Input : Inputs) {
auto ReaderOrErr =
- SampleProfileReader::create(Filename, getGlobalContext());
+ SampleProfileReader::create(Input.Filename, getGlobalContext());
if (std::error_code EC = ReaderOrErr.getError())
- exitWithError(EC.message(), Filename);
-
- auto Reader = std::move(ReaderOrErr.get());
+ exitWithErrorCode(EC, Input.Filename);
+
+ // We need to keep the readers around until after all the files are
+ // read so that we do not lose the function names stored in each
+ // reader's memory. The function names are needed to write out the
+ // merged profile map.
+ Readers.push_back(std::move(ReaderOrErr.get()));
+ const auto Reader = Readers.back().get();
if (std::error_code EC = Reader->read())
- exitWithError(EC.message(), Filename);
+ exitWithErrorCode(EC, Input.Filename);
StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
@@ -94,16 +180,36 @@ static void mergeSampleProfile(const cl::list<std::string> &Inputs,
I != E; ++I) {
StringRef FName = I->first();
FunctionSamples &Samples = I->second;
- ProfileMap[FName].merge(Samples);
+ sampleprof_error Result = ProfileMap[FName].merge(Samples, Input.Weight);
+ if (Result != sampleprof_error::success) {
+ std::error_code EC = make_error_code(Result);
+ handleMergeWriterError(EC, Input.Filename, FName);
+ }
}
}
Writer->write(ProfileMap);
}
-static int merge_main(int argc, const char *argv[]) {
- cl::list<std::string> Inputs(cl::Positional, cl::Required, cl::OneOrMore,
- cl::desc("<filenames...>"));
+static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
+ StringRef WeightStr, FileName;
+ std::tie(WeightStr, FileName) = WeightedFilename.split(',');
+
+ uint64_t Weight;
+ if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
+ exitWithError("Input weight must be a positive integer.");
+
+ if (!sys::fs::exists(FileName))
+ exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
+ FileName);
+ return WeightedFile(FileName, Weight);
+}
+
+static int merge_main(int argc, const char *argv[]) {
+ cl::list<std::string> InputFilenames(cl::Positional,
+ cl::desc("<filename...>"));
+ cl::list<std::string> WeightedInputFilenames("weighted-input",
+ cl::desc("<weight>,<filename>"));
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::init("-"), cl::Required,
cl::desc("Output file"));
@@ -114,31 +220,41 @@ static int merge_main(int argc, const char *argv[]) {
cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
clEnumVal(sample, "Sample profile"), clEnumValEnd));
- cl::opt<sampleprof::SampleProfileFormat> OutputFormat(
- cl::desc("Format of output profile (only meaningful with --sample)"),
- cl::init(sampleprof::SPF_Binary),
- cl::values(clEnumValN(sampleprof::SPF_Binary, "binary",
- "Binary encoding (default)"),
- clEnumValN(sampleprof::SPF_Text, "text", "Text encoding"),
- clEnumValN(sampleprof::SPF_GCC, "gcc", "GCC encoding"),
+ cl::opt<ProfileFormat> OutputFormat(
+ cl::desc("Format of output profile"), cl::init(PF_Binary),
+ cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
+ clEnumValN(PF_Text, "text", "Text encoding"),
+ clEnumValN(PF_GCC, "gcc",
+ "GCC encoding (only meaningful for -sample)"),
clEnumValEnd));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
+ if (InputFilenames.empty() && WeightedInputFilenames.empty())
+ exitWithError("No input files specified. See " +
+ sys::path::filename(argv[0]) + " -help");
+
+ WeightedFileVector WeightedInputs;
+ for (StringRef Filename : InputFilenames)
+ WeightedInputs.push_back(WeightedFile(Filename, 1));
+ for (StringRef WeightedFilename : WeightedInputFilenames)
+ WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
+
if (ProfileKind == instr)
- mergeInstrProfile(Inputs, OutputFilename);
+ mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat);
else
- mergeSampleProfile(Inputs, OutputFilename, OutputFormat);
+ mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
return 0;
}
static int showInstrProfile(std::string Filename, bool ShowCounts,
- bool ShowAllFunctions, std::string ShowFunction,
+ bool ShowIndirectCallTargets, bool ShowAllFunctions,
+ std::string ShowFunction, bool TextFormat,
raw_fd_ostream &OS) {
auto ReaderOrErr = InstrProfReader::create(Filename);
if (std::error_code EC = ReaderOrErr.getError())
- exitWithError(EC.message(), Filename);
+ exitWithErrorCode(EC, Filename);
auto Reader = std::move(ReaderOrErr.get());
uint64_t MaxFunctionCount = 0, MaxBlockCount = 0;
@@ -148,35 +264,71 @@ static int showInstrProfile(std::string Filename, bool ShowCounts,
ShowAllFunctions || (!ShowFunction.empty() &&
Func.Name.find(ShowFunction) != Func.Name.npos);
+ bool doTextFormatDump = (Show && ShowCounts && TextFormat);
+
+ if (doTextFormatDump) {
+ InstrProfSymtab &Symtab = Reader->getSymtab();
+ InstrProfWriter::writeRecordInText(Func, Symtab, OS);
+ continue;
+ }
+
++TotalFunctions;
assert(Func.Counts.size() > 0 && "function missing entry counter");
if (Func.Counts[0] > MaxFunctionCount)
MaxFunctionCount = Func.Counts[0];
+ for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+ if (Func.Counts[I] > MaxBlockCount)
+ MaxBlockCount = Func.Counts[I];
+ }
+
if (Show) {
+
if (!ShownFunctions)
OS << "Counters:\n";
+
++ShownFunctions;
OS << " " << Func.Name << ":\n"
<< " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
<< " Counters: " << Func.Counts.size() << "\n"
<< " Function count: " << Func.Counts[0] << "\n";
- }
- if (Show && ShowCounts)
- OS << " Block counts: [";
- for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
- if (Func.Counts[I] > MaxBlockCount)
- MaxBlockCount = Func.Counts[I];
- if (Show && ShowCounts)
- OS << (I == 1 ? "" : ", ") << Func.Counts[I];
+ if (ShowIndirectCallTargets)
+ OS << " Indirect Call Site Count: "
+ << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
+
+ if (ShowCounts) {
+ OS << " Block counts: [";
+ for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+ OS << (I == 1 ? "" : ", ") << Func.Counts[I];
+ }
+ OS << "]\n";
+ }
+
+ if (ShowIndirectCallTargets) {
+ InstrProfSymtab &Symtab = Reader->getSymtab();
+ uint32_t NS = Func.getNumValueSites(IPVK_IndirectCallTarget);
+ OS << " Indirect Target Results: \n";
+ for (size_t I = 0; I < NS; ++I) {
+ uint32_t NV = Func.getNumValueDataForSite(IPVK_IndirectCallTarget, I);
+ std::unique_ptr<InstrProfValueData[]> VD =
+ Func.getValueForSite(IPVK_IndirectCallTarget, I);
+ for (uint32_t V = 0; V < NV; V++) {
+ OS << "\t[ " << I << ", ";
+ OS << Symtab.getFuncName(VD[V].Value) << ", " << VD[V].Count
+ << " ]\n";
+ }
+ }
+ }
}
- if (Show && ShowCounts)
- OS << "]\n";
}
+
if (Reader->hasError())
- exitWithError(Reader->getError().message(), Filename);
+ exitWithErrorCode(Reader->getError(), Filename);
+
+ if (ShowCounts && TextFormat)
+ return 0;
if (ShowAllFunctions || !ShowFunction.empty())
OS << "Functions shown: " << ShownFunctions << "\n";
@@ -192,10 +344,12 @@ static int showSampleProfile(std::string Filename, bool ShowCounts,
using namespace sampleprof;
auto ReaderOrErr = SampleProfileReader::create(Filename, getGlobalContext());
if (std::error_code EC = ReaderOrErr.getError())
- exitWithError(EC.message(), Filename);
+ exitWithErrorCode(EC, Filename);
auto Reader = std::move(ReaderOrErr.get());
- Reader->read();
+ if (std::error_code EC = Reader->read())
+ exitWithErrorCode(EC, Filename);
+
if (ShowAllFunctions || ShowFunction.empty())
Reader->dump(OS);
else
@@ -210,6 +364,12 @@ static int show_main(int argc, const char *argv[]) {
cl::opt<bool> ShowCounts("counts", cl::init(false),
cl::desc("Show counter values for shown functions"));
+ cl::opt<bool> TextFormat(
+ "text", cl::init(false),
+ cl::desc("Show instr profile data in text dump format"));
+ cl::opt<bool> ShowIndirectCallTargets(
+ "ic-targets", cl::init(false),
+ cl::desc("Show indirect call site target values for shown functions"));
cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
cl::desc("Details for every function"));
cl::opt<std::string> ShowFunction("function",
@@ -232,14 +392,14 @@ static int show_main(int argc, const char *argv[]) {
std::error_code EC;
raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
if (EC)
- exitWithError(EC.message(), OutputFilename);
+ exitWithErrorCode(EC, OutputFilename);
if (ShowAllFunctions && !ShowFunction.empty())
errs() << "warning: -function argument ignored: showing all functions\n";
if (ProfileKind == instr)
- return showInstrProfile(Filename, ShowCounts, ShowAllFunctions,
- ShowFunction, OS);
+ return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets,
+ ShowAllFunctions, ShowFunction, TextFormat, OS);
else
return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
ShowFunction, OS);
@@ -266,8 +426,7 @@ int main(int argc, const char *argv[]) {
return func(argc - 1, argv + 1);
}
- if (strcmp(argv[1], "-h") == 0 ||
- strcmp(argv[1], "-help") == 0 ||
+ if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
strcmp(argv[1], "--help") == 0) {
errs() << "OVERVIEW: LLVM profile data tools\n\n"
diff --git a/contrib/llvm/tools/llvm-readobj/ARMAttributeParser.cpp b/contrib/llvm/tools/llvm-readobj/ARMAttributeParser.cpp
index e2d7191..688d349 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMAttributeParser.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ARMAttributeParser.cpp
@@ -118,7 +118,7 @@ void ARMAttributeParser::PrintAttribute(unsigned Tag, unsigned Value,
void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
"ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
"ARM v7E-M", "ARM v8"
@@ -149,7 +149,7 @@ void ARMAttributeParser::CPU_arch_profile(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ARM_ISA_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "Permitted" };
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -159,7 +159,7 @@ void ARMAttributeParser::ARM_ISA_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::THUMB_ISA_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "Thumb-1", "Thumb-2" };
+ static const char *const Strings[] = { "Not Permitted", "Thumb-1", "Thumb-2" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -169,7 +169,7 @@ void ARMAttributeParser::THUMB_ISA_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::FP_arch(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Permitted", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16", "VFPv4",
"VFPv4-D16", "ARMv8-a FP", "ARMv8-a FP-D16"
};
@@ -182,7 +182,7 @@ void ARMAttributeParser::FP_arch(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::WMMX_arch(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "WMMXv1", "WMMXv2" };
+ static const char *const Strings[] = { "Not Permitted", "WMMXv1", "WMMXv2" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -192,8 +192,8 @@ void ARMAttributeParser::WMMX_arch(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
- "Not Permitted", "NEONv1", "NEONv2+FMA", "ARMv8-a NEON"
+ static const char *const Strings[] = {
+ "Not Permitted", "NEONv1", "NEONv2+FMA", "ARMv8-a NEON", "ARMv8.1-a NEON"
};
uint64_t Value = ParseInteger(Data, Offset);
@@ -204,7 +204,7 @@ void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"None", "Bare Platform", "Linux Application", "Linux DSO", "Palm OS 2004",
"Reserved (Palm OS)", "Symbian OS 2004", "Reserved (Symbian OS)"
};
@@ -217,7 +217,7 @@ void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_PCS_R9_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "v6", "Static Base", "TLS", "Unused" };
+ static const char *const Strings[] = { "v6", "Static Base", "TLS", "Unused" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -227,7 +227,7 @@ void ARMAttributeParser::ABI_PCS_R9_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_PCS_RW_data(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Absolute", "PC-relative", "SB-relative", "Not Permitted"
};
@@ -239,7 +239,9 @@ void ARMAttributeParser::ABI_PCS_RW_data(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_PCS_RO_data(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Absolute", "PC-relative", "Not Permitted" };
+ static const char *const Strings[] = {
+ "Absolute", "PC-relative", "Not Permitted"
+ };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -249,7 +251,9 @@ void ARMAttributeParser::ABI_PCS_RO_data(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_PCS_GOT_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "Direct", "GOT-Indirect" };
+ static const char *const Strings[] = {
+ "Not Permitted", "Direct", "GOT-Indirect"
+ };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -259,7 +263,7 @@ void ARMAttributeParser::ABI_PCS_GOT_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_PCS_wchar_t(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Permitted", "Unknown", "2-byte", "Unknown", "4-byte"
};
@@ -271,7 +275,7 @@ void ARMAttributeParser::ABI_PCS_wchar_t(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_FP_rounding(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "IEEE-754", "Runtime" };
+ static const char *const Strings[] = { "IEEE-754", "Runtime" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -281,7 +285,9 @@ void ARMAttributeParser::ABI_FP_rounding(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_FP_denormal(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Unsupported", "IEEE-754", "Sign Only" };
+ static const char *const Strings[] = {
+ "Unsupported", "IEEE-754", "Sign Only"
+ };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -291,7 +297,7 @@ void ARMAttributeParser::ABI_FP_denormal(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_FP_exceptions(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "IEEE-754" };
+ static const char *const Strings[] = { "Not Permitted", "IEEE-754" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -302,7 +308,7 @@ void ARMAttributeParser::ABI_FP_exceptions(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_FP_user_exceptions(AttrType Tag,
const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "IEEE-754" };
+ static const char *const Strings[] = { "Not Permitted", "IEEE-754" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -312,7 +318,7 @@ void ARMAttributeParser::ABI_FP_user_exceptions(AttrType Tag,
void ARMAttributeParser::ABI_FP_number_model(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Permitted", "Finite Only", "RTABI", "IEEE-754"
};
@@ -324,7 +330,7 @@ void ARMAttributeParser::ABI_FP_number_model(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_align_needed(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Permitted", "8-byte alignment", "4-byte alignment", "Reserved"
};
@@ -344,7 +350,7 @@ void ARMAttributeParser::ABI_align_needed(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_align_preserved(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Required", "8-byte data alignment", "8-byte data and code alignment",
"Reserved"
};
@@ -365,7 +371,7 @@ void ARMAttributeParser::ABI_align_preserved(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_enum_size(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Permitted", "Packed", "Int32", "External Int32"
};
@@ -377,7 +383,7 @@ void ARMAttributeParser::ABI_enum_size(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_HardFP_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Tag_FP_arch", "Single-Precision", "Reserved", "Tag_FP_arch (deprecated)"
};
@@ -389,7 +395,7 @@ void ARMAttributeParser::ABI_HardFP_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_VFP_args(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"AAPCS", "AAPCS VFP", "Custom", "Not Permitted"
};
@@ -401,7 +407,7 @@ void ARMAttributeParser::ABI_VFP_args(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_WMMX_args(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "AAPCS", "iWMMX", "Custom" };
+ static const char *const Strings[] = { "AAPCS", "iWMMX", "Custom" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -412,7 +418,7 @@ void ARMAttributeParser::ABI_WMMX_args(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_optimization_goals(AttrType Tag,
const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Debugging",
"Best Debugging"
};
@@ -426,7 +432,7 @@ void ARMAttributeParser::ABI_optimization_goals(AttrType Tag,
void ARMAttributeParser::ABI_FP_optimization_goals(AttrType Tag,
const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Accuracy",
"Best Accuracy"
};
@@ -461,7 +467,7 @@ void ARMAttributeParser::compatibility(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::CPU_unaligned_access(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "v6-style" };
+ static const char *const Strings[] = { "Not Permitted", "v6-style" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -471,7 +477,7 @@ void ARMAttributeParser::CPU_unaligned_access(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::FP_HP_extension(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "If Available", "Permitted" };
+ static const char *const Strings[] = { "If Available", "Permitted" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -481,7 +487,7 @@ void ARMAttributeParser::FP_HP_extension(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::ABI_FP_16bit_format(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "IEEE-754", "VFPv3" };
+ static const char *const Strings[] = { "Not Permitted", "IEEE-754", "VFPv3" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -491,7 +497,7 @@ void ARMAttributeParser::ABI_FP_16bit_format(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::MPextension_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "Permitted" };
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -501,7 +507,7 @@ void ARMAttributeParser::MPextension_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::DIV_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"If Available", "Not Permitted", "Permitted"
};
@@ -513,7 +519,7 @@ void ARMAttributeParser::DIV_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::T2EE_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = { "Not Permitted", "Permitted" };
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
uint64_t Value = ParseInteger(Data, Offset);
StringRef ValueDesc =
@@ -523,7 +529,7 @@ void ARMAttributeParser::T2EE_use(AttrType Tag, const uint8_t *Data,
void ARMAttributeParser::Virtualization_use(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
- static const char *Strings[] = {
+ static const char *const Strings[] = {
"Not Permitted", "TrustZone", "Virtualization Extensions",
"TrustZone + Virtualization Extensions"
};
diff --git a/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
index dd2490d..beb5fd4 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -305,13 +305,15 @@ void OpcodeDecoder::Decode(const uint8_t *Opcodes, off_t Offset, size_t Length)
template <typename ET>
class PrinterContext {
- StreamWriter &SW;
- const object::ELFFile<ET> *ELF;
-
typedef typename object::ELFFile<ET>::Elf_Sym Elf_Sym;
typedef typename object::ELFFile<ET>::Elf_Shdr Elf_Shdr;
+ typedef typename object::ELFFile<ET>::Elf_Rel Elf_Rel;
+ typedef typename object::ELFFile<ET>::Elf_Word Elf_Word;
- typedef typename object::ELFFile<ET>::Elf_Rel_Iter Elf_Rel_iterator;
+ StreamWriter &SW;
+ const object::ELFFile<ET> *ELF;
+ const Elf_Shdr *Symtab;
+ ArrayRef<Elf_Word> ShndxTable;
static const size_t IndexTableEntrySize;
@@ -332,8 +334,9 @@ class PrinterContext {
void PrintOpcodes(const uint8_t *Entry, size_t Length, off_t Offset) const;
public:
- PrinterContext(StreamWriter &Writer, const object::ELFFile<ET> *File)
- : SW(Writer), ELF(File) {}
+ PrinterContext(StreamWriter &SW, const object::ELFFile<ET> *ELF,
+ const Elf_Shdr *Symtab)
+ : SW(SW), ELF(ELF), Symtab(Symtab) {}
void PrintUnwindInformation() const;
};
@@ -345,10 +348,14 @@ template <typename ET>
ErrorOr<StringRef>
PrinterContext<ET>::FunctionAtAddress(unsigned Section,
uint64_t Address) const {
- for (const Elf_Sym &Sym : ELF->symbols())
+ ErrorOr<StringRef> StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
+ error(StrTableOrErr.getError());
+ StringRef StrTable = *StrTableOrErr;
+
+ for (const Elf_Sym &Sym : ELF->symbols(Symtab))
if (Sym.st_shndx == Section && Sym.st_value == Address &&
Sym.getType() == ELF::STT_FUNC)
- return ELF->getSymbolName(&Sym, false);
+ return Sym.getName(StrTable);
return readobj_error::unknown_symbol;
}
@@ -365,24 +372,29 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
/// table.
for (const Elf_Shdr &Sec : ELF->sections()) {
- if (Sec.sh_type == ELF::SHT_REL && Sec.sh_info == IndexSectionIndex) {
- for (Elf_Rel_iterator RI = ELF->rel_begin(&Sec), RE = ELF->rel_end(&Sec);
- RI != RE; ++RI) {
- if (RI->r_offset == static_cast<unsigned>(IndexTableOffset)) {
- typename object::ELFFile<ET>::Elf_Rela RelA;
- RelA.r_offset = RI->r_offset;
- RelA.r_info = RI->r_info;
- RelA.r_addend = 0;
-
- std::pair<const Elf_Shdr *, const Elf_Sym *> Symbol =
- ELF->getRelocationSymbol(&Sec, &RelA);
-
- ErrorOr<const Elf_Shdr *> Ret = ELF->getSection(Symbol.second);
- if (std::error_code EC = Ret.getError())
- report_fatal_error(EC.message());
- return *Ret;
- }
- }
+ if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex)
+ continue;
+
+ ErrorOr<const Elf_Shdr *> SymTabOrErr = ELF->getSection(Sec.sh_link);
+ error(SymTabOrErr.getError());
+ const Elf_Shdr *SymTab = *SymTabOrErr;
+
+ for (const Elf_Rel &R : ELF->rels(&Sec)) {
+ if (R.r_offset != static_cast<unsigned>(IndexTableOffset))
+ continue;
+
+ typename object::ELFFile<ET>::Elf_Rela RelA;
+ RelA.r_offset = R.r_offset;
+ RelA.r_info = R.r_info;
+ RelA.r_addend = 0;
+
+ const Elf_Sym *Symbol = ELF->getRelocationSymbol(&RelA, SymTab);
+
+ ErrorOr<const Elf_Shdr *> Ret =
+ ELF->getSection(Symbol, SymTab, ShndxTable);
+ if (std::error_code EC = Ret.getError())
+ report_fatal_error(EC.message());
+ return *Ret;
}
}
return nullptr;
diff --git a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index bf5ff8e..650955d 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -630,9 +630,10 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
SW.printString("ExceptionRecord", formatSymbol(*Name, Address));
- section_iterator SI = COFF.section_end();
- if (XDataRecord->getSection(SI))
+ ErrorOr<section_iterator> SIOrErr = XDataRecord->getSection();
+ if (!SIOrErr)
return false;
+ section_iterator SI = *SIOrErr;
return dumpXDataRecord(COFF, *SI, FunctionAddress, Address);
} else {
diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
index cf897d7..516d1cf 100644
--- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -60,6 +60,7 @@ public:
void printCOFFExports() override;
void printCOFFDirectives() override;
void printCOFFBaseReloc() override;
+ void printCodeViewDebugInfo() override;
void printStackMap() const override;
private:
void printSymbol(const SymbolRef &Sym);
@@ -71,7 +72,7 @@ private:
void printBaseOfDataField(const pe32_header *Hdr);
void printBaseOfDataField(const pe32plus_header *Hdr);
- void printCodeViewDebugInfo(const SectionRef &Section);
+ void printCodeViewSection(const SectionRef &Section);
void printCodeViewSymbolsSubsection(StringRef Subsection,
const SectionRef &Section,
@@ -219,6 +220,7 @@ static const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
static const EnumEntry<COFF::SectionCharacteristics>
ImageSectionCharacteristics[] = {
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_TYPE_NOLOAD ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_TYPE_NO_PAD ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_CODE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_INITIALIZED_DATA ),
@@ -385,14 +387,12 @@ void COFFDumper::printFileHeaders() {
// Print PE header. This header does not exist if this is an object file and
// not an executable.
const pe32_header *PEHeader = nullptr;
- if (error(Obj->getPE32Header(PEHeader)))
- return;
+ error(Obj->getPE32Header(PEHeader));
if (PEHeader)
printPEHeader<pe32_header>(PEHeader);
const pe32plus_header *PEPlusHeader = nullptr;
- if (error(Obj->getPE32PlusHeader(PEPlusHeader)))
- return;
+ error(Obj->getPE32PlusHeader(PEPlusHeader));
if (PEPlusHeader)
printPEHeader<pe32plus_header>(PEPlusHeader);
@@ -475,10 +475,18 @@ void COFFDumper::printBaseOfDataField(const pe32_header *Hdr) {
void COFFDumper::printBaseOfDataField(const pe32plus_header *) {}
-void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
+void COFFDumper::printCodeViewDebugInfo() {
+ for (const SectionRef &S : Obj->sections()) {
+ StringRef SecName;
+ error(S.getName(SecName));
+ if (SecName == ".debug$S")
+ printCodeViewSection(S);
+ }
+}
+
+void COFFDumper::printCodeViewSection(const SectionRef &Section) {
StringRef Data;
- if (error(Section.getContents(Data)))
- return;
+ error(Section.getContents(Data));
SmallVector<StringRef, 10> FunctionNames;
StringMap<StringRef> FunctionLineTables;
@@ -518,8 +526,7 @@ void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
switch (SubSectionType) {
case COFF::DEBUG_SYMBOL_SUBSECTION:
- if (opts::SectionSymbols)
- printCodeViewSymbolsSubsection(Contents, Section, Offset);
+ printCodeViewSymbolsSubsection(Contents, Section, Offset);
break;
case COFF::DEBUG_LINE_TABLE_SUBSECTION: {
// Holds a PC to file:line table. Some data to parse this subsection is
@@ -533,19 +540,18 @@ void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
return;
}
- StringRef FunctionName;
- if (error(resolveSymbolName(Obj->getCOFFSection(Section), Offset,
- FunctionName)))
- return;
- W.printString("FunctionName", FunctionName);
- if (FunctionLineTables.count(FunctionName) != 0) {
+ StringRef LinkageName;
+ error(resolveSymbolName(Obj->getCOFFSection(Section), Offset,
+ LinkageName));
+ W.printString("LinkageName", LinkageName);
+ if (FunctionLineTables.count(LinkageName) != 0) {
// Saw debug info for this function already?
error(object_error::parse_failed);
return;
}
- FunctionLineTables[FunctionName] = Contents;
- FunctionNames.push_back(FunctionName);
+ FunctionLineTables[LinkageName] = Contents;
+ FunctionNames.push_back(LinkageName);
break;
}
case COFF::DEBUG_STRING_TABLE_SUBSECTION:
@@ -582,7 +588,7 @@ void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
for (unsigned I = 0, E = FunctionNames.size(); I != E; ++I) {
StringRef Name = FunctionNames[I];
ListScope S(W, "FunctionLineTable");
- W.printString("FunctionName", Name);
+ W.printString("LinkageName", Name);
DataExtractor DE(FunctionLineTables[Name], true, 4);
uint32_t Offset = 6; // Skip relocations.
@@ -695,9 +701,8 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
uint32_t CodeSize = DE.getU32(&Offset);
DE.getU8(&Offset, Unused, 12);
StringRef SectionName;
- if (error(resolveSymbolName(Obj->getCOFFSection(Section),
- OffsetInSection + Offset, SectionName)))
- return;
+ error(resolveSymbolName(Obj->getCOFFSection(Section),
+ OffsetInSection + Offset, SectionName));
Offset += 4;
DE.getU8(&Offset, Unused, 3);
StringRef DisplayName = DE.getCStr(&Offset);
@@ -748,8 +753,7 @@ void COFFDumper::printSections() {
const coff_section *Section = Obj->getCOFFSection(Sec);
StringRef Name;
- if (error(Sec.getName(Name)))
- Name = "";
+ error(Sec.getName(Name));
DictScope D(W, "Section");
W.printNumber("Number", SectionNumber);
@@ -782,14 +786,10 @@ void COFFDumper::printSections() {
}
}
- if (Name == ".debug$S" && opts::CodeView)
- printCodeViewDebugInfo(Sec);
-
if (opts::SectionData &&
!(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) {
StringRef Data;
- if (error(Sec.getContents(Data)))
- break;
+ error(Sec.getContents(Data));
W.printBinaryBlock("SectionData", Data);
}
@@ -803,8 +803,7 @@ void COFFDumper::printRelocations() {
for (const SectionRef &Section : Obj->sections()) {
++SectionNumber;
StringRef Name;
- if (error(Section.getName(Name)))
- continue;
+ error(Section.getName(Name));
bool PrintedGroup = false;
for (const RelocationRef &Reloc : Section.relocations()) {
@@ -834,8 +833,7 @@ void COFFDumper::printRelocation(const SectionRef &Section,
symbol_iterator Symbol = Reloc.getSymbol();
if (Symbol != Obj->symbol_end()) {
ErrorOr<StringRef> SymbolNameOrErr = Symbol->getName();
- if (error(SymbolNameOrErr.getError()))
- return;
+ error(SymbolNameOrErr.getError());
SymbolName = *SymbolNameOrErr;
}
@@ -914,8 +912,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
for (uint8_t I = 0; I < Symbol.getNumberOfAuxSymbols(); ++I) {
if (Symbol.isFunctionDefinition()) {
const coff_aux_function_definition *Aux;
- if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
- break;
+ error(getSymbolAuxData(Obj, Symbol, I, Aux));
DictScope AS(W, "AuxFunctionDef");
W.printNumber("TagIndex", Aux->TagIndex);
@@ -925,8 +922,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
} else if (Symbol.isAnyUndefined()) {
const coff_aux_weak_external *Aux;
- if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
- break;
+ error(getSymbolAuxData(Obj, Symbol, I, Aux));
ErrorOr<COFFSymbolRef> Linked = Obj->getSymbol(Aux->TagIndex);
StringRef LinkedName;
@@ -943,8 +939,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
} else if (Symbol.isFileRecord()) {
const char *FileName;
- if (error(getSymbolAuxData(Obj, Symbol, I, FileName)))
- break;
+ error(getSymbolAuxData(Obj, Symbol, I, FileName));
DictScope AS(W, "AuxFileRecord");
@@ -954,8 +949,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
break;
} else if (Symbol.isSectionDefinition()) {
const coff_aux_section_definition *Aux;
- if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
- break;
+ error(getSymbolAuxData(Obj, Symbol, I, Aux));
int32_t AuxNumber = Aux->getNumber(Symbol.isBigObj());
@@ -986,8 +980,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
}
} else if (Symbol.isCLRToken()) {
const coff_aux_clr_token *Aux;
- if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
- break;
+ error(getSymbolAuxData(Obj, Symbol, I, Aux));
ErrorOr<COFFSymbolRef> ReferredSym =
Obj->getSymbol(Aux->SymbolTableIndex);
@@ -1040,9 +1033,9 @@ void COFFDumper::printImportedSymbols(
iterator_range<imported_symbol_iterator> Range) {
for (const ImportedSymbolRef &I : Range) {
StringRef Sym;
- if (error(I.getSymbolName(Sym))) return;
+ error(I.getSymbolName(Sym));
uint16_t Ordinal;
- if (error(I.getOrdinal(Ordinal))) return;
+ error(I.getOrdinal(Ordinal));
W.printNumber("Symbol", Sym, Ordinal);
}
}
@@ -1054,12 +1047,12 @@ void COFFDumper::printDelayImportedSymbols(
for (const ImportedSymbolRef &S : Range) {
DictScope Import(W, "Import");
StringRef Sym;
- if (error(S.getSymbolName(Sym))) return;
+ error(S.getSymbolName(Sym));
uint16_t Ordinal;
- if (error(S.getOrdinal(Ordinal))) return;
+ error(S.getOrdinal(Ordinal));
W.printNumber("Symbol", Sym, Ordinal);
uint64_t Addr;
- if (error(I.getImportAddress(Index++, Addr))) return;
+ error(I.getImportAddress(Index++, Addr));
W.printHex("Address", Addr);
}
}
@@ -1069,12 +1062,12 @@ void COFFDumper::printCOFFImports() {
for (const ImportDirectoryEntryRef &I : Obj->import_directories()) {
DictScope Import(W, "Import");
StringRef Name;
- if (error(I.getName(Name))) return;
+ error(I.getName(Name));
W.printString("Name", Name);
uint32_t Addr;
- if (error(I.getImportLookupTableRVA(Addr))) return;
+ error(I.getImportLookupTableRVA(Addr));
W.printHex("ImportLookupTableRVA", Addr);
- if (error(I.getImportAddressTableRVA(Addr))) return;
+ error(I.getImportAddressTableRVA(Addr));
W.printHex("ImportAddressTableRVA", Addr);
printImportedSymbols(I.imported_symbols());
}
@@ -1083,10 +1076,10 @@ void COFFDumper::printCOFFImports() {
for (const DelayImportDirectoryEntryRef &I : Obj->delay_import_directories()) {
DictScope Import(W, "DelayImport");
StringRef Name;
- if (error(I.getName(Name))) return;
+ error(I.getName(Name));
W.printString("Name", Name);
const delay_import_directory_table_entry *Table;
- if (error(I.getDelayImportTable(Table))) return;
+ error(I.getDelayImportTable(Table));
W.printHex("Attributes", Table->Attributes);
W.printHex("ModuleHandle", Table->ModuleHandle);
W.printHex("ImportAddressTable", Table->DelayImportAddressTable);
@@ -1104,12 +1097,9 @@ void COFFDumper::printCOFFExports() {
StringRef Name;
uint32_t Ordinal, RVA;
- if (error(E.getSymbolName(Name)))
- continue;
- if (error(E.getOrdinal(Ordinal)))
- continue;
- if (error(E.getExportRVA(RVA)))
- continue;
+ error(E.getSymbolName(Name));
+ error(E.getOrdinal(Ordinal));
+ error(E.getExportRVA(RVA));
W.printNumber("Ordinal", Ordinal);
W.printString("Name", Name);
@@ -1122,13 +1112,11 @@ void COFFDumper::printCOFFDirectives() {
StringRef Contents;
StringRef Name;
- if (error(Section.getName(Name)))
- continue;
+ error(Section.getName(Name));
if (Name != ".drectve")
continue;
- if (error(Section.getContents(Contents)))
- return;
+ error(Section.getContents(Contents));
W.printString("Directive(s)", Contents);
}
@@ -1152,10 +1140,8 @@ void COFFDumper::printCOFFBaseReloc() {
for (const BaseRelocRef &I : Obj->base_relocs()) {
uint8_t Type;
uint32_t RVA;
- if (error(I.getRVA(RVA)))
- continue;
- if (error(I.getType(Type)))
- continue;
+ error(I.getRVA(RVA));
+ error(I.getType(Type));
DictScope Import(W, "Entry");
W.printString("Type", getBaseRelocTypeName(Type));
W.printHex("Address", RVA);
diff --git a/contrib/llvm/tools/llvm-readobj/COFFImportDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFImportDumper.cpp
new file mode 100644
index 0000000..83715e6
--- /dev/null
+++ b/contrib/llvm/tools/llvm-readobj/COFFImportDumper.cpp
@@ -0,0 +1,52 @@
+//===-- COFFImportDumper.cpp - COFF import library dumper -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the COFF import library dumper for llvm-readobj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "ObjDumper.h"
+#include "llvm-readobj.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Support/COFF.h"
+
+using namespace llvm::object;
+
+namespace llvm {
+
+void dumpCOFFImportFile(const COFFImportFile *File) {
+ outs() << '\n';
+ outs() << "File: " << File->getFileName() << "\n";
+ outs() << "Format: COFF-import-file\n";
+
+ const coff_import_header *H = File->getCOFFImportHeader();
+ switch (H->getType()) {
+ case COFF::IMPORT_CODE: outs() << "Type: code\n"; break;
+ case COFF::IMPORT_DATA: outs() << "Type: data\n"; break;
+ case COFF::IMPORT_CONST: outs() << "Type: const\n"; break;
+ }
+
+ switch (H->getNameType()) {
+ case COFF::IMPORT_ORDINAL: outs() << "Name type: ordinal\n"; break;
+ case COFF::IMPORT_NAME: outs() << "Name type: name\n"; break;
+ case COFF::IMPORT_NAME_NOPREFIX: outs() << "Name type: noprefix\n"; break;
+ case COFF::IMPORT_NAME_UNDECORATE: outs() << "Name type: undecorate\n"; break;
+ }
+
+ for (const object::BasicSymbolRef &Sym : File->symbols()) {
+ outs() << "Symbol: ";
+ Sym.printName(outs());
+ outs() << "\n";
+ }
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
index 1cdf552..02397f3 100644
--- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -42,8 +42,7 @@ namespace {
template<typename ELFT>
class ELFDumper : public ObjDumper {
public:
- ELFDumper(const ELFFile<ELFT> *Obj, StreamWriter &Writer)
- : ObjDumper(Writer), Obj(Obj) {}
+ ELFDumper(const ELFFile<ELFT> *Obj, StreamWriter &Writer);
void printFileHeaders() override;
void printSections() override;
@@ -57,6 +56,9 @@ public:
void printNeededLibraries() override;
void printProgramHeaders() override;
void printHashTable() override;
+ void printGnuHashTable() override;
+ void printLoadName() override;
+ void printVersionInfo() override;
void printAttributes() override;
void printMipsPLTGOT() override;
@@ -69,13 +71,114 @@ private:
typedef ELFFile<ELFT> ELFO;
typedef typename ELFO::Elf_Shdr Elf_Shdr;
typedef typename ELFO::Elf_Sym Elf_Sym;
-
- void printSymbol(const Elf_Sym *Symbol, bool IsDynamic);
+ typedef typename ELFO::Elf_Dyn Elf_Dyn;
+ typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;
+ typedef typename ELFO::Elf_Rel Elf_Rel;
+ typedef typename ELFO::Elf_Rela Elf_Rela;
+ typedef typename ELFO::Elf_Rela_Range Elf_Rela_Range;
+ typedef typename ELFO::Elf_Phdr Elf_Phdr;
+ typedef typename ELFO::Elf_Half Elf_Half;
+ typedef typename ELFO::Elf_Hash Elf_Hash;
+ typedef typename ELFO::Elf_GnuHash Elf_GnuHash;
+ typedef typename ELFO::Elf_Ehdr Elf_Ehdr;
+ typedef typename ELFO::Elf_Word Elf_Word;
+ typedef typename ELFO::uintX_t uintX_t;
+ typedef typename ELFO::Elf_Versym Elf_Versym;
+ typedef typename ELFO::Elf_Verneed Elf_Verneed;
+ typedef typename ELFO::Elf_Vernaux Elf_Vernaux;
+ typedef typename ELFO::Elf_Verdef Elf_Verdef;
+ typedef typename ELFO::Elf_Verdaux Elf_Verdaux;
+
+ /// \brief Represents a region described by entries in the .dynamic table.
+ struct DynRegionInfo {
+ DynRegionInfo() : Addr(nullptr), Size(0), EntSize(0) {}
+ /// \brief Address in current address space.
+ const void *Addr;
+ /// \brief Size in bytes of the region.
+ uintX_t Size;
+ /// \brief Size of each entity in the region.
+ uintX_t EntSize;
+ };
+
+ void printSymbolsHelper(bool IsDynamic);
+ void printSymbol(const Elf_Sym *Symbol, const Elf_Shdr *SymTab,
+ StringRef StrTable, bool IsDynamic);
void printRelocations(const Elf_Shdr *Sec);
- void printRelocation(const Elf_Shdr *Sec, typename ELFO::Elf_Rela Rel);
+ void printRelocation(Elf_Rela Rel, const Elf_Shdr *SymTab);
+ void printValue(uint64_t Type, uint64_t Value);
+
+ const Elf_Rela *dyn_rela_begin() const;
+ const Elf_Rela *dyn_rela_end() const;
+ Elf_Rela_Range dyn_relas() const;
+ StringRef getDynamicString(uint64_t Offset) const;
+ const Elf_Dyn *dynamic_table_begin() const {
+ ErrorOr<const Elf_Dyn *> Ret = Obj->dynamic_table_begin(DynamicProgHeader);
+ error(Ret.getError());
+ return *Ret;
+ }
+ const Elf_Dyn *dynamic_table_end() const {
+ ErrorOr<const Elf_Dyn *> Ret = Obj->dynamic_table_end(DynamicProgHeader);
+ error(Ret.getError());
+ return *Ret;
+ }
+ StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb,
+ bool &IsDefault);
+ void LoadVersionMap();
+ void LoadVersionNeeds(const Elf_Shdr *ec) const;
+ void LoadVersionDefs(const Elf_Shdr *sec) const;
const ELFO *Obj;
+ DynRegionInfo DynRelaRegion;
+ const Elf_Phdr *DynamicProgHeader = nullptr;
+ StringRef DynamicStringTable;
+ const Elf_Sym *DynSymStart = nullptr;
+ StringRef SOName;
+ const Elf_Hash *HashTable = nullptr;
+ const Elf_GnuHash *GnuHashTable = nullptr;
+ const Elf_Shdr *DotDynSymSec = nullptr;
+ const Elf_Shdr *DotSymtabSec = nullptr;
+ ArrayRef<Elf_Word> ShndxTable;
+
+ const Elf_Shdr *dot_gnu_version_sec = nullptr; // .gnu.version
+ const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
+ const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
+
+ // Records for each version index the corresponding Verdef or Vernaux entry.
+ // This is filled the first time LoadVersionMap() is called.
+ class VersionMapEntry : public PointerIntPair<const void *, 1> {
+ public:
+ // If the integer is 0, this is an Elf_Verdef*.
+ // If the integer is 1, this is an Elf_Vernaux*.
+ VersionMapEntry() : PointerIntPair<const void *, 1>(nullptr, 0) {}
+ VersionMapEntry(const Elf_Verdef *verdef)
+ : PointerIntPair<const void *, 1>(verdef, 0) {}
+ VersionMapEntry(const Elf_Vernaux *vernaux)
+ : PointerIntPair<const void *, 1>(vernaux, 1) {}
+ bool isNull() const { return getPointer() == nullptr; }
+ bool isVerdef() const { return !isNull() && getInt() == 0; }
+ bool isVernaux() const { return !isNull() && getInt() == 1; }
+ const Elf_Verdef *getVerdef() const {
+ return isVerdef() ? (const Elf_Verdef *)getPointer() : nullptr;
+ }
+ const Elf_Vernaux *getVernaux() const {
+ return isVernaux() ? (const Elf_Vernaux *)getPointer() : nullptr;
+ }
+ };
+ mutable SmallVector<VersionMapEntry, 16> VersionMap;
+
+public:
+ Elf_Dyn_Range dynamic_table() const {
+ ErrorOr<Elf_Dyn_Range> Ret = Obj->dynamic_table(DynamicProgHeader);
+ error(Ret.getError());
+ return *Ret;
+ }
+
+ std::string getFullSymbolName(const Elf_Sym *Symbol, StringRef StrTable,
+ bool IsDynamic);
+ const Elf_Shdr *getDotDynSymSec() const { return DotDynSymSec; }
+ const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; }
+ ArrayRef<Elf_Word> getShndxTable() { return ShndxTable; }
};
template <class T> T errorOrDefault(ErrorOr<T> Val, T Default = T()) {
@@ -122,30 +225,246 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
} // namespace llvm
-template <typename ELFO>
-static std::string getFullSymbolName(const ELFO &Obj,
- const typename ELFO::Elf_Sym *Symbol,
- bool IsDynamic) {
- StringRef SymbolName = errorOrDefault(Obj.getSymbolName(Symbol, IsDynamic));
+// Iterate through the versions needed section, and place each Elf_Vernaux
+// in the VersionMap according to its index.
+template <class ELFT>
+void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
+ unsigned vn_size = sec->sh_size; // Size of section in bytes
+ unsigned vn_count = sec->sh_info; // Number of Verneed entries
+ const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+ const char *sec_end = sec_start + vn_size;
+ // The first Verneed entry is at the start of the section.
+ const char *p = sec_start;
+ for (unsigned i = 0; i < vn_count; i++) {
+ if (p + sizeof(Elf_Verneed) > sec_end)
+ report_fatal_error("Section ended unexpectedly while scanning "
+ "version needed records.");
+ const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
+ if (vn->vn_version != ELF::VER_NEED_CURRENT)
+ report_fatal_error("Unexpected verneed version");
+ // Iterate through the Vernaux entries
+ const char *paux = p + vn->vn_aux;
+ for (unsigned j = 0; j < vn->vn_cnt; j++) {
+ if (paux + sizeof(Elf_Vernaux) > sec_end)
+ report_fatal_error("Section ended unexpected while scanning auxiliary "
+ "version needed records.");
+ const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
+ size_t index = vna->vna_other & ELF::VERSYM_VERSION;
+ if (index >= VersionMap.size())
+ VersionMap.resize(index + 1);
+ VersionMap[index] = VersionMapEntry(vna);
+ paux += vna->vna_next;
+ }
+ p += vn->vn_next;
+ }
+}
+
+// Iterate through the version definitions, and place each Elf_Verdef
+// in the VersionMap according to its index.
+template <class ELFT>
+void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
+ unsigned vd_size = sec->sh_size; // Size of section in bytes
+ unsigned vd_count = sec->sh_info; // Number of Verdef entries
+ const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+ const char *sec_end = sec_start + vd_size;
+ // The first Verdef entry is at the start of the section.
+ const char *p = sec_start;
+ for (unsigned i = 0; i < vd_count; i++) {
+ if (p + sizeof(Elf_Verdef) > sec_end)
+ report_fatal_error("Section ended unexpectedly while scanning "
+ "version definitions.");
+ const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
+ if (vd->vd_version != ELF::VER_DEF_CURRENT)
+ report_fatal_error("Unexpected verdef version");
+ size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
+ if (index >= VersionMap.size())
+ VersionMap.resize(index + 1);
+ VersionMap[index] = VersionMapEntry(vd);
+ p += vd->vd_next;
+ }
+}
+
+template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() {
+ // If there is no dynamic symtab or version table, there is nothing to do.
+ if (!DynSymStart || !dot_gnu_version_sec)
+ return;
+
+ // Has the VersionMap already been loaded?
+ if (VersionMap.size() > 0)
+ return;
+
+ // The first two version indexes are reserved.
+ // Index 0 is LOCAL, index 1 is GLOBAL.
+ VersionMap.push_back(VersionMapEntry());
+ VersionMap.push_back(VersionMapEntry());
+
+ if (dot_gnu_version_d_sec)
+ LoadVersionDefs(dot_gnu_version_d_sec);
+
+ if (dot_gnu_version_r_sec)
+ LoadVersionNeeds(dot_gnu_version_r_sec);
+}
+
+
+template <typename ELFO, class ELFT>
+static void printVersionSymbolSection(ELFDumper<ELFT> *Dumper,
+ const ELFO *Obj,
+ const typename ELFO::Elf_Shdr *Sec,
+ StreamWriter &W) {
+ DictScope SS(W, "Version symbols");
+ if (!Sec)
+ return;
+ StringRef Name = errorOrDefault(Obj->getSectionName(Sec));
+ W.printNumber("Section Name", Name, Sec->sh_name);
+ W.printHex("Address", Sec->sh_addr);
+ W.printHex("Offset", Sec->sh_offset);
+ W.printNumber("Link", Sec->sh_link);
+
+ const typename ELFO::Elf_Shdr *DynSymSec = Dumper->getDotDynSymSec();
+ const uint8_t *P = (const uint8_t *)Obj->base() + Sec->sh_offset;
+ ErrorOr<StringRef> StrTableOrErr =
+ Obj->getStringTableForSymtab(*DynSymSec);
+ error(StrTableOrErr.getError());
+
+ // Same number of entries in the dynamic symbol table (DT_SYMTAB).
+ ListScope Syms(W, "Symbols");
+ for (const typename ELFO::Elf_Sym &Sym : Obj->symbols(DynSymSec)) {
+ DictScope S(W, "Symbol");
+ std::string FullSymbolName =
+ Dumper->getFullSymbolName(&Sym, *StrTableOrErr, true /* IsDynamic */);
+ W.printNumber("Version", *P);
+ W.printString("Name", FullSymbolName);
+ P += sizeof(typename ELFO::Elf_Half);
+ }
+}
+
+template <typename ELFO, class ELFT>
+static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
+ const ELFO *Obj,
+ const typename ELFO::Elf_Shdr *Sec,
+ StreamWriter &W) {
+ DictScope SD(W, "Version definition");
+ if (!Sec)
+ return;
+ StringRef Name = errorOrDefault(Obj->getSectionName(Sec));
+ W.printNumber("Section Name", Name, Sec->sh_name);
+ W.printHex("Address", Sec->sh_addr);
+ W.printHex("Offset", Sec->sh_offset);
+ W.printNumber("Link", Sec->sh_link);
+
+ unsigned verdef_entries = 0;
+ // The number of entries in the section SHT_GNU_verdef
+ // is determined by DT_VERDEFNUM tag.
+ for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
+ if (Dyn.d_tag == DT_VERDEFNUM)
+ verdef_entries = Dyn.d_un.d_val;
+ }
+ const uint8_t *SecStartAddress =
+ (const uint8_t *)Obj->base() + Sec->sh_offset;
+ const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
+ const uint8_t *P = SecStartAddress;
+ ErrorOr<const typename ELFO::Elf_Shdr *> StrTabOrErr =
+ Obj->getSection(Sec->sh_link);
+ error(StrTabOrErr.getError());
+
+ ListScope Entries(W, "Entries");
+ for (unsigned i = 0; i < verdef_entries; ++i) {
+ if (P + sizeof(typename ELFO::Elf_Verdef) > SecEndAddress)
+ report_fatal_error("invalid offset in the section");
+ auto *VD = reinterpret_cast<const typename ELFO::Elf_Verdef *>(P);
+ DictScope Entry(W, "Entry");
+ W.printHex("Offset", (uintptr_t)P - (uintptr_t)SecStartAddress);
+ W.printNumber("Rev", VD->vd_version);
+ // FIXME: print something more readable.
+ W.printNumber("Flags", VD->vd_flags);
+ W.printNumber("Index", VD->vd_ndx);
+ W.printNumber("Cnt", VD->vd_cnt);
+ W.printString("Name", StringRef((const char *)(Obj->base() +
+ (*StrTabOrErr)->sh_offset +
+ VD->getAux()->vda_name)));
+ P += VD->vd_next;
+ }
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
+ // Dump version symbol section.
+ printVersionSymbolSection(this, Obj, dot_gnu_version_sec, W);
+
+ // Dump version definition section.
+ printVersionDefinitionSection(this, Obj, dot_gnu_version_d_sec, W);
+}
+
+template <typename ELFT>
+StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
+ const Elf_Sym *symb,
+ bool &IsDefault) {
+ // This is a dynamic symbol. Look in the GNU symbol version table.
+ if (!dot_gnu_version_sec) {
+ // No version table.
+ IsDefault = false;
+ return StringRef("");
+ }
+
+ // Determine the position in the symbol table of this entry.
+ size_t entry_index = (reinterpret_cast<uintptr_t>(symb) -
+ reinterpret_cast<uintptr_t>(DynSymStart)) /
+ sizeof(Elf_Sym);
+
+ // Get the corresponding version index entry
+ const Elf_Versym *vs =
+ Obj->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
+ size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
+
+ // Special markers for unversioned symbols.
+ if (version_index == ELF::VER_NDX_LOCAL ||
+ version_index == ELF::VER_NDX_GLOBAL) {
+ IsDefault = false;
+ return StringRef("");
+ }
+
+ // Lookup this symbol in the version table
+ LoadVersionMap();
+ if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
+ reportError("Invalid version entry");
+ const VersionMapEntry &entry = VersionMap[version_index];
+
+ // Get the version name string
+ size_t name_offset;
+ if (entry.isVerdef()) {
+ // The first Verdaux entry holds the name.
+ name_offset = entry.getVerdef()->getAux()->vda_name;
+ IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
+ } else {
+ name_offset = entry.getVernaux()->vna_name;
+ IsDefault = false;
+ }
+ if (name_offset >= StrTab.size())
+ reportError("Invalid string offset");
+ return StringRef(StrTab.data() + name_offset);
+}
+
+template <typename ELFT>
+std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
+ StringRef StrTable,
+ bool IsDynamic) {
+ StringRef SymbolName = errorOrDefault(Symbol->getName(StrTable));
if (!IsDynamic)
return SymbolName;
std::string FullSymbolName(SymbolName);
bool IsDefault;
- ErrorOr<StringRef> Version =
- Obj.getSymbolVersion(nullptr, &*Symbol, IsDefault);
- if (Version) {
- FullSymbolName += (IsDefault ? "@@" : "@");
- FullSymbolName += *Version;
- } else
- error(Version.getError());
+ StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
+ FullSymbolName += (IsDefault ? "@@" : "@");
+ FullSymbolName += Version;
return FullSymbolName;
}
template <typename ELFO>
static void
getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
+ const typename ELFO::Elf_Shdr *SymTab,
+ ArrayRef<typename ELFO::Elf_Word> ShndxTable,
StringRef &SectionName, unsigned &SectionIndex) {
SectionIndex = Symbol->st_shndx;
if (Symbol->isUndefined())
@@ -162,25 +481,26 @@ getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
SectionName = "Reserved";
else {
if (SectionIndex == SHN_XINDEX)
- SectionIndex = Obj.getExtendedSymbolTableIndex(&*Symbol);
+ SectionIndex =
+ Obj.getExtendedSymbolTableIndex(Symbol, SymTab, ShndxTable);
ErrorOr<const typename ELFO::Elf_Shdr *> Sec = Obj.getSection(SectionIndex);
- if (!error(Sec.getError()))
- SectionName = errorOrDefault(Obj.getSectionName(*Sec));
+ error(Sec.getError());
+ SectionName = errorOrDefault(Obj.getSectionName(*Sec));
}
}
-template <class ELFT>
-static const typename ELFFile<ELFT>::Elf_Shdr *
-findSectionByAddress(const ELFFile<ELFT> *Obj, uint64_t Addr) {
+template <class ELFO>
+static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj,
+ uint64_t Addr) {
for (const auto &Shdr : Obj->sections())
if (Shdr.sh_addr == Addr)
return &Shdr;
return nullptr;
}
-template <class ELFT>
-static const typename ELFFile<ELFT>::Elf_Shdr *
-findSectionByName(const ELFFile<ELFT> &Obj, StringRef Name) {
+template <class ELFO>
+static const typename ELFO::Elf_Shdr *findSectionByName(const ELFO &Obj,
+ StringRef Name) {
for (const auto &Shdr : Obj.sections()) {
if (Name == errorOrDefault(Obj.getSectionName(&Shdr)))
return &Shdr;
@@ -409,6 +729,12 @@ static const EnumEntry<unsigned> ElfSymbolTypes[] = {
{ "GNU_IFunc", ELF::STT_GNU_IFUNC }
};
+static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
+ { "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL },
+ { "AMDGPU_HSA_INDIRECT_FUNCTION", ELF::STT_AMDGPU_HSA_INDIRECT_FUNCTION },
+ { "AMDGPU_HSA_METADATA", ELF::STT_AMDGPU_HSA_METADATA }
+};
+
static const char *getElfSectionType(unsigned Arch, unsigned Type) {
switch (Arch) {
case ELF::EM_ARM:
@@ -473,13 +799,24 @@ static const EnumEntry<unsigned> ElfSectionFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, SHF_TLS ),
LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_CP_SECTION),
LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_DP_SECTION),
- LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP )
+ LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP ),
+ LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_GLOBAL),
+ LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_READONLY),
+ LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_CODE),
+ LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_AGENT)
};
static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
// Check potentially overlapped processor-specific
// program header type.
switch (Arch) {
+ case ELF::EM_AMDGPU:
+ switch (Type) {
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_READONLY_AGENT);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_CODE_AGENT);
+ }
case ELF::EM_ARM:
switch (Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
@@ -565,9 +902,138 @@ static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64R6)
};
+template <typename ELFT>
+ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, StreamWriter &Writer)
+ : ObjDumper(Writer), Obj(Obj) {
+
+ SmallVector<const Elf_Phdr *, 4> LoadSegments;
+ for (const Elf_Phdr &Phdr : Obj->program_headers()) {
+ if (Phdr.p_type == ELF::PT_DYNAMIC) {
+ DynamicProgHeader = &Phdr;
+ continue;
+ }
+ if (Phdr.p_type != ELF::PT_LOAD || Phdr.p_filesz == 0)
+ continue;
+ LoadSegments.push_back(&Phdr);
+ }
+
+ auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
+ const Elf_Phdr **I = std::upper_bound(
+ LoadSegments.begin(), LoadSegments.end(), VAddr, compareAddr<ELFT>);
+ if (I == LoadSegments.begin())
+ report_fatal_error("Virtual address is not in any segment");
+ --I;
+ const Elf_Phdr &Phdr = **I;
+ uint64_t Delta = VAddr - Phdr.p_vaddr;
+ if (Delta >= Phdr.p_filesz)
+ report_fatal_error("Virtual address is not in any segment");
+ return Obj->base() + Phdr.p_offset + Delta;
+ };
+
+ uint64_t SONameOffset = 0;
+ const char *StringTableBegin = nullptr;
+ uint64_t StringTableSize = 0;
+ for (const Elf_Dyn &Dyn : dynamic_table()) {
+ switch (Dyn.d_tag) {
+ case ELF::DT_HASH:
+ HashTable =
+ reinterpret_cast<const Elf_Hash *>(toMappedAddr(Dyn.getPtr()));
+ break;
+ case ELF::DT_GNU_HASH:
+ GnuHashTable =
+ reinterpret_cast<const Elf_GnuHash *>(toMappedAddr(Dyn.getPtr()));
+ break;
+ case ELF::DT_RELA:
+ DynRelaRegion.Addr = toMappedAddr(Dyn.getPtr());
+ break;
+ case ELF::DT_RELASZ:
+ DynRelaRegion.Size = Dyn.getVal();
+ break;
+ case ELF::DT_RELAENT:
+ DynRelaRegion.EntSize = Dyn.getVal();
+ break;
+ case ELF::DT_SONAME:
+ SONameOffset = Dyn.getVal();
+ break;
+ case ELF::DT_STRTAB:
+ StringTableBegin = (const char *)toMappedAddr(Dyn.getPtr());
+ break;
+ case ELF::DT_STRSZ:
+ StringTableSize = Dyn.getVal();
+ break;
+ case ELF::DT_SYMTAB:
+ DynSymStart =
+ reinterpret_cast<const Elf_Sym *>(toMappedAddr(Dyn.getPtr()));
+ break;
+ }
+ }
+ if (StringTableBegin)
+ DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
+ if (SONameOffset)
+ SOName = getDynamicString(SONameOffset);
+
+ for (const Elf_Shdr &Sec : Obj->sections()) {
+ switch (Sec.sh_type) {
+ case ELF::SHT_GNU_versym:
+ if (dot_gnu_version_sec != nullptr)
+ reportError("Multiple SHT_GNU_versym");
+ dot_gnu_version_sec = &Sec;
+ break;
+ case ELF::SHT_GNU_verdef:
+ if (dot_gnu_version_d_sec != nullptr)
+ reportError("Multiple SHT_GNU_verdef");
+ dot_gnu_version_d_sec = &Sec;
+ break;
+ case ELF::SHT_GNU_verneed:
+ if (dot_gnu_version_r_sec != nullptr)
+ reportError("Multilpe SHT_GNU_verneed");
+ dot_gnu_version_r_sec = &Sec;
+ break;
+ case ELF::SHT_DYNSYM:
+ if (DotDynSymSec != nullptr)
+ reportError("Multilpe SHT_DYNSYM");
+ DotDynSymSec = &Sec;
+ break;
+ case ELF::SHT_SYMTAB:
+ if (DotSymtabSec != nullptr)
+ reportError("Multilpe SHT_SYMTAB");
+ DotSymtabSec = &Sec;
+ break;
+ case ELF::SHT_SYMTAB_SHNDX: {
+ ErrorOr<ArrayRef<Elf_Word>> TableOrErr = Obj->getSHNDXTable(Sec);
+ error(TableOrErr.getError());
+ ShndxTable = *TableOrErr;
+ break;
+ }
+ }
+ }
+}
+
+template <typename ELFT>
+const typename ELFDumper<ELFT>::Elf_Rela *
+ELFDumper<ELFT>::dyn_rela_begin() const {
+ if (DynRelaRegion.Size && DynRelaRegion.EntSize != sizeof(Elf_Rela))
+ report_fatal_error("Invalid relocation entry size");
+ return reinterpret_cast<const Elf_Rela *>(DynRelaRegion.Addr);
+}
+
+template <typename ELFT>
+const typename ELFDumper<ELFT>::Elf_Rela *
+ELFDumper<ELFT>::dyn_rela_end() const {
+ uint64_t Size = DynRelaRegion.Size;
+ if (Size % sizeof(Elf_Rela))
+ report_fatal_error("Invalid relocation table size");
+ return dyn_rela_begin() + Size / sizeof(Elf_Rela);
+}
+
+template <typename ELFT>
+typename ELFDumper<ELFT>::Elf_Rela_Range ELFDumper<ELFT>::dyn_relas() const {
+ return make_range(dyn_rela_begin(), dyn_rela_end());
+}
+
template<class ELFT>
void ELFDumper<ELFT>::printFileHeaders() {
- const typename ELFO::Elf_Ehdr *Header = Obj->getHeader();
+ const Elf_Ehdr *Header = Obj->getHeader();
{
DictScope D(W, "ElfHeader");
@@ -618,7 +1084,7 @@ void ELFDumper<ELFT>::printSections() {
ListScope SectionsD(W, "Sections");
int SectionIndex = -1;
- for (const typename ELFO::Elf_Shdr &Sec : Obj->sections()) {
+ for (const Elf_Shdr &Sec : Obj->sections()) {
++SectionIndex;
StringRef Name = errorOrDefault(Obj->getSectionName(&Sec));
@@ -645,12 +1111,18 @@ void ELFDumper<ELFT>::printSections() {
if (opts::SectionSymbols) {
ListScope D(W, "Symbols");
- for (const typename ELFO::Elf_Sym &Sym : Obj->symbols()) {
- ErrorOr<const Elf_Shdr *> SymSec = Obj->getSection(&Sym);
+ const Elf_Shdr *Symtab = DotSymtabSec;
+ ErrorOr<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*Symtab);
+ error(StrTableOrErr.getError());
+ StringRef StrTable = *StrTableOrErr;
+
+ for (const Elf_Sym &Sym : Obj->symbols(Symtab)) {
+ ErrorOr<const Elf_Shdr *> SymSec =
+ Obj->getSection(&Sym, Symtab, ShndxTable);
if (!SymSec)
continue;
if (*SymSec == &Sec)
- printSymbol(&Sym, false);
+ printSymbol(&Sym, Symtab, StrTable, false);
}
}
@@ -667,7 +1139,7 @@ void ELFDumper<ELFT>::printRelocations() {
ListScope D(W, "Relocations");
int SectionNumber = -1;
- for (const typename ELFO::Elf_Shdr &Sec : Obj->sections()) {
+ for (const Elf_Shdr &Sec : Obj->sections()) {
++SectionNumber;
if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA)
@@ -689,29 +1161,25 @@ template<class ELFT>
void ELFDumper<ELFT>::printDynamicRelocations() {
W.startLine() << "Dynamic Relocations {\n";
W.indent();
- for (typename ELFO::Elf_Rela_Iter RelI = Obj->dyn_rela_begin(),
- RelE = Obj->dyn_rela_end();
- RelI != RelE; ++RelI) {
+ for (const Elf_Rela &Rel : dyn_relas()) {
SmallString<32> RelocName;
- Obj->getRelocationTypeName(RelI->getType(Obj->isMips64EL()), RelocName);
+ Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
StringRef SymbolName;
- uint32_t SymIndex = RelI->getSymbol(Obj->isMips64EL());
- const typename ELFO::Elf_Sym *Sym = Obj->dynamic_symbol_begin() + SymIndex;
- SymbolName = errorOrDefault(Obj->getSymbolName(Sym, true));
+ uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL());
+ const Elf_Sym *Sym = DynSymStart + SymIndex;
+ SymbolName = errorOrDefault(Sym->getName(DynamicStringTable));
if (opts::ExpandRelocs) {
DictScope Group(W, "Relocation");
- W.printHex("Offset", RelI->r_offset);
- W.printNumber("Type", RelocName, (int)RelI->getType(Obj->isMips64EL()));
+ W.printHex("Offset", Rel.r_offset);
+ W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
- W.printHex("Addend", RelI->r_addend);
+ W.printHex("Addend", Rel.r_addend);
}
else {
raw_ostream& OS = W.startLine();
- OS << W.hex(RelI->r_offset)
- << " " << RelocName
- << " " << (SymbolName.size() > 0 ? SymbolName : "-")
- << " " << W.hex(RelI->r_addend)
- << "\n";
+ OS << W.hex(Rel.r_offset) << " " << RelocName << " "
+ << (SymbolName.size() > 0 ? SymbolName : "-") << " "
+ << W.hex(Rel.r_addend) << "\n";
}
}
W.unindent();
@@ -720,51 +1188,43 @@ void ELFDumper<ELFT>::printDynamicRelocations() {
template <class ELFT>
void ELFDumper<ELFT>::printRelocations(const Elf_Shdr *Sec) {
+ ErrorOr<const Elf_Shdr *> SymTabOrErr = Obj->getSection(Sec->sh_link);
+ error(SymTabOrErr.getError());
+ const Elf_Shdr *SymTab = *SymTabOrErr;
+
switch (Sec->sh_type) {
case ELF::SHT_REL:
- for (typename ELFO::Elf_Rel_Iter RI = Obj->rel_begin(Sec),
- RE = Obj->rel_end(Sec);
- RI != RE; ++RI) {
- typename ELFO::Elf_Rela Rela;
- Rela.r_offset = RI->r_offset;
- Rela.r_info = RI->r_info;
+ for (const Elf_Rel &R : Obj->rels(Sec)) {
+ Elf_Rela Rela;
+ Rela.r_offset = R.r_offset;
+ Rela.r_info = R.r_info;
Rela.r_addend = 0;
- printRelocation(Sec, Rela);
+ printRelocation(Rela, SymTab);
}
break;
case ELF::SHT_RELA:
- for (typename ELFO::Elf_Rela_Iter RI = Obj->rela_begin(Sec),
- RE = Obj->rela_end(Sec);
- RI != RE; ++RI) {
- printRelocation(Sec, *RI);
- }
+ for (const Elf_Rela &R : Obj->relas(Sec))
+ printRelocation(R, SymTab);
break;
}
}
template <class ELFT>
-void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
- typename ELFO::Elf_Rela Rel) {
+void ELFDumper<ELFT>::printRelocation(Elf_Rela Rel, const Elf_Shdr *SymTab) {
SmallString<32> RelocName;
Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
StringRef TargetName;
- std::pair<const Elf_Shdr *, const Elf_Sym *> Sym =
- Obj->getRelocationSymbol(Sec, &Rel);
- if (Sym.second && Sym.second->getType() == ELF::STT_SECTION) {
- ErrorOr<const Elf_Shdr *> Sec = Obj->getSection(Sym.second);
- if (!error(Sec.getError())) {
- ErrorOr<StringRef> SecName = Obj->getSectionName(*Sec);
- if (SecName)
- TargetName = SecName.get();
- }
- } else if (Sym.first) {
- const Elf_Shdr *SymTable = Sym.first;
- ErrorOr<const Elf_Shdr *> StrTableSec = Obj->getSection(SymTable->sh_link);
- if (!error(StrTableSec.getError())) {
- ErrorOr<StringRef> StrTableOrErr = Obj->getStringTable(*StrTableSec);
- if (!error(StrTableOrErr.getError()))
- TargetName = errorOrDefault(Sym.second->getName(*StrTableOrErr));
- }
+ const Elf_Sym *Sym = Obj->getRelocationSymbol(&Rel, SymTab);
+ if (Sym && Sym->getType() == ELF::STT_SECTION) {
+ ErrorOr<const Elf_Shdr *> Sec = Obj->getSection(Sym, SymTab, ShndxTable);
+ error(Sec.getError());
+ ErrorOr<StringRef> SecName = Obj->getSectionName(*Sec);
+ if (SecName)
+ TargetName = SecName.get();
+ } else if (Sym) {
+ ErrorOr<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*SymTab);
+ error(StrTableOrErr.getError());
+ TargetName = errorOrDefault(Sym->getName(*StrTableOrErr));
}
if (opts::ExpandRelocs) {
@@ -783,27 +1243,38 @@ void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
}
template<class ELFT>
+void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) {
+ const Elf_Shdr *Symtab = (IsDynamic) ? DotDynSymSec : DotSymtabSec;
+ if (!Symtab)
+ return;
+ ErrorOr<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*Symtab);
+ error(StrTableOrErr.getError());
+ StringRef StrTable = *StrTableOrErr;
+ for (const Elf_Sym &Sym : Obj->symbols(Symtab))
+ printSymbol(&Sym, Symtab, StrTable, IsDynamic);
+}
+
+template<class ELFT>
void ELFDumper<ELFT>::printSymbols() {
ListScope Group(W, "Symbols");
- for (const typename ELFO::Elf_Sym &Sym : Obj->symbols())
- printSymbol(&Sym, false);
+ printSymbolsHelper(false);
}
template<class ELFT>
void ELFDumper<ELFT>::printDynamicSymbols() {
ListScope Group(W, "DynamicSymbols");
-
- for (const typename ELFO::Elf_Sym &Sym : Obj->dynamic_symbols())
- printSymbol(&Sym, true);
+ printSymbolsHelper(true);
}
template <class ELFT>
-void ELFDumper<ELFT>::printSymbol(const typename ELFO::Elf_Sym *Symbol,
- bool IsDynamic) {
+void ELFDumper<ELFT>::printSymbol(const Elf_Sym *Symbol, const Elf_Shdr *SymTab,
+ StringRef StrTable, bool IsDynamic) {
unsigned SectionIndex = 0;
StringRef SectionName;
- getSectionNameIndex(*Obj, Symbol, SectionName, SectionIndex);
- std::string FullSymbolName = getFullSymbolName(*Obj, Symbol, IsDynamic);
+ getSectionNameIndex(*Obj, Symbol, SymTab, ShndxTable, SectionName,
+ SectionIndex);
+ std::string FullSymbolName = getFullSymbolName(Symbol, StrTable, IsDynamic);
+ unsigned char SymbolType = Symbol->getType();
DictScope D(W, "Symbol");
W.printNumber("Name", FullSymbolName, Symbol->st_name);
@@ -811,7 +1282,11 @@ void ELFDumper<ELFT>::printSymbol(const typename ELFO::Elf_Sym *Symbol,
W.printNumber("Size", Symbol->st_size);
W.printEnum ("Binding", Symbol->getBinding(),
makeArrayRef(ElfSymbolBindings));
- W.printEnum ("Type", Symbol->getType(), makeArrayRef(ElfSymbolTypes));
+ if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
+ SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
+ W.printEnum ("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+ else
+ W.printEnum ("Type", SymbolType, makeArrayRef(ElfSymbolTypes));
W.printNumber("Other", Symbol->st_other);
W.printHex("Section", SectionName, SectionIndex);
}
@@ -855,12 +1330,15 @@ static const char *getTypeString(uint64_t Type) {
LLVM_READOBJ_TYPE_CASE(SYMENT);
LLVM_READOBJ_TYPE_CASE(SYMTAB);
LLVM_READOBJ_TYPE_CASE(TEXTREL);
+ LLVM_READOBJ_TYPE_CASE(VERDEF);
+ LLVM_READOBJ_TYPE_CASE(VERDEFNUM);
LLVM_READOBJ_TYPE_CASE(VERNEED);
LLVM_READOBJ_TYPE_CASE(VERNEEDNUM);
LLVM_READOBJ_TYPE_CASE(VERSYM);
LLVM_READOBJ_TYPE_CASE(RELCOUNT);
LLVM_READOBJ_TYPE_CASE(GNU_HASH);
LLVM_READOBJ_TYPE_CASE(MIPS_RLD_VERSION);
+ LLVM_READOBJ_TYPE_CASE(MIPS_RLD_MAP_REL);
LLVM_READOBJ_TYPE_CASE(MIPS_FLAGS);
LLVM_READOBJ_TYPE_CASE(MIPS_BASE_ADDRESS);
LLVM_READOBJ_TYPE_CASE(MIPS_LOCAL_GOTNO);
@@ -956,8 +1434,15 @@ void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
}
template <class ELFT>
-static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
- bool Is64, raw_ostream &OS) {
+StringRef ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
+ if (Value >= DynamicStringTable.size())
+ reportError("Invalid dynamic string table reference");
+ return StringRef(DynamicStringTable.data() + Value);
+}
+
+template <class ELFT>
+void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
+ raw_ostream &OS = W.getOStream();
switch (Type) {
case DT_PLTREL:
if (Value == DT_REL) {
@@ -981,6 +1466,7 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
case DT_FINI_ARRAY:
case DT_PREINIT_ARRAY:
case DT_DEBUG:
+ case DT_VERDEF:
case DT_VERNEED:
case DT_VERSYM:
case DT_GNU_HASH:
@@ -988,11 +1474,13 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
case DT_MIPS_BASE_ADDRESS:
case DT_MIPS_GOTSYM:
case DT_MIPS_RLD_MAP:
+ case DT_MIPS_RLD_MAP_REL:
case DT_MIPS_PLTGOT:
case DT_MIPS_OPTIONS:
OS << format("0x%" PRIX64, Value);
break;
case DT_RELCOUNT:
+ case DT_VERDEFNUM:
case DT_VERNEEDNUM:
case DT_MIPS_RLD_VERSION:
case DT_MIPS_LOCAL_GOTNO:
@@ -1013,14 +1501,14 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
OS << Value << " (bytes)";
break;
case DT_NEEDED:
- OS << "SharedLibrary (" << O->getDynamicString(Value) << ")";
+ OS << "SharedLibrary (" << getDynamicString(Value) << ")";
break;
case DT_SONAME:
- OS << "LibrarySoname (" << O->getDynamicString(Value) << ")";
+ OS << "LibrarySoname (" << getDynamicString(Value) << ")";
break;
case DT_RPATH:
case DT_RUNPATH:
- OS << O->getDynamicString(Value);
+ OS << getDynamicString(Value);
break;
case DT_MIPS_FLAGS:
printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
@@ -1046,7 +1534,8 @@ namespace {
template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
const unsigned Machine = Obj->getHeader()->e_machine;
if (Machine == EM_ARM) {
- ARM::EHABI::PrinterContext<ELFType<support::little, false>> Ctx(W, Obj);
+ ARM::EHABI::PrinterContext<ELFType<support::little, false>> Ctx(
+ W, Obj, DotSymtabSec);
return Ctx.PrintUnwindInformation();
}
W.startLine() << "UnwindInfo not implemented.\n";
@@ -1055,9 +1544,20 @@ template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
template<class ELFT>
void ELFDumper<ELFT>::printDynamicTable() {
- auto DynTable = Obj->dynamic_table(true);
+ auto I = dynamic_table_begin();
+ auto E = dynamic_table_end();
- ptrdiff_t Total = std::distance(DynTable.begin(), DynTable.end());
+ if (I == E)
+ return;
+
+ --E;
+ while (I != E && E->getTag() == ELF::DT_NULL)
+ --E;
+ if (E->getTag() != ELF::DT_NULL)
+ ++E;
+ ++E;
+
+ ptrdiff_t Total = std::distance(I, E);
if (Total == 0)
return;
@@ -1069,12 +1569,13 @@ void ELFDumper<ELFT>::printDynamicTable() {
W.startLine()
<< " Tag" << (Is64 ? " " : " ") << "Type"
<< " " << "Name/Value\n";
- for (const auto &Entry : DynTable) {
- W.startLine()
- << " "
- << format(Is64 ? "0x%016" PRIX64 : "0x%08" PRIX64, Entry.getTag())
- << " " << format("%-21s", getTypeString(Entry.getTag()));
- printValue(Obj, Entry.getTag(), Entry.getVal(), Is64, OS);
+ while (I != E) {
+ const Elf_Dyn &Entry = *I;
+ uintX_t Tag = Entry.getTag();
+ ++I;
+ W.startLine() << " " << format_hex(Tag, Is64 ? 18 : 10, true) << " "
+ << format("%-21s", getTypeString(Tag));
+ printValue(Tag, Entry.getVal());
OS << "\n";
}
@@ -1088,14 +1589,14 @@ void ELFDumper<ELFT>::printNeededLibraries() {
typedef std::vector<StringRef> LibsTy;
LibsTy Libs;
- for (const auto &Entry : Obj->dynamic_table())
+ for (const auto &Entry : dynamic_table())
if (Entry.d_tag == ELF::DT_NEEDED)
- Libs.push_back(Obj->getDynamicString(Entry.d_un.d_val));
+ Libs.push_back(getDynamicString(Entry.d_un.d_val));
std::stable_sort(Libs.begin(), Libs.end());
- for (LibsTy::const_iterator I = Libs.begin(), E = Libs.end(); I != E; ++I) {
- outs() << " " << *I << "\n";
+ for (const auto &L : Libs) {
+ outs() << " " << L << "\n";
}
}
@@ -1103,33 +1604,51 @@ template<class ELFT>
void ELFDumper<ELFT>::printProgramHeaders() {
ListScope L(W, "ProgramHeaders");
- for (typename ELFO::Elf_Phdr_Iter PI = Obj->program_header_begin(),
- PE = Obj->program_header_end();
- PI != PE; ++PI) {
+ for (const Elf_Phdr &Phdr : Obj->program_headers()) {
DictScope P(W, "ProgramHeader");
- W.printHex ("Type",
- getElfSegmentType(Obj->getHeader()->e_machine, PI->p_type),
- PI->p_type);
- W.printHex ("Offset", PI->p_offset);
- W.printHex ("VirtualAddress", PI->p_vaddr);
- W.printHex ("PhysicalAddress", PI->p_paddr);
- W.printNumber("FileSize", PI->p_filesz);
- W.printNumber("MemSize", PI->p_memsz);
- W.printFlags ("Flags", PI->p_flags, makeArrayRef(ElfSegmentFlags));
- W.printNumber("Alignment", PI->p_align);
+ W.printHex("Type",
+ getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type),
+ Phdr.p_type);
+ W.printHex("Offset", Phdr.p_offset);
+ W.printHex("VirtualAddress", Phdr.p_vaddr);
+ W.printHex("PhysicalAddress", Phdr.p_paddr);
+ W.printNumber("FileSize", Phdr.p_filesz);
+ W.printNumber("MemSize", Phdr.p_memsz);
+ W.printFlags("Flags", Phdr.p_flags, makeArrayRef(ElfSegmentFlags));
+ W.printNumber("Alignment", Phdr.p_align);
}
}
template <typename ELFT>
void ELFDumper<ELFT>::printHashTable() {
DictScope D(W, "HashTable");
- auto HT = Obj->getHashTable();
- if (!HT)
+ if (!HashTable)
+ return;
+ W.printNumber("Num Buckets", HashTable->nbucket);
+ W.printNumber("Num Chains", HashTable->nchain);
+ W.printList("Buckets", HashTable->buckets());
+ W.printList("Chains", HashTable->chains());
+}
+
+template <typename ELFT>
+void ELFDumper<ELFT>::printGnuHashTable() {
+ DictScope D(W, "GnuHashTable");
+ if (!GnuHashTable)
return;
- W.printNumber("Num Buckets", HT->nbucket);
- W.printNumber("Num Chains", HT->nchain);
- W.printList("Buckets", HT->buckets());
- W.printList("Chains", HT->chains());
+ W.printNumber("Num Buckets", GnuHashTable->nbuckets);
+ W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx);
+ W.printNumber("Num Mask Words", GnuHashTable->maskwords);
+ W.printNumber("Shift Count", GnuHashTable->shift2);
+ W.printHexList("Bloom Filter", GnuHashTable->filter());
+ W.printList("Buckets", GnuHashTable->buckets());
+ if (!DotDynSymSec)
+ reportError("No dynamic symbol section");
+ W.printHexList("Values",
+ GnuHashTable->values(DotDynSymSec->getEntityCount()));
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
+ outs() << "LoadName: " << SOName << '\n';
}
template <class ELFT>
@@ -1171,21 +1690,23 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
namespace {
template <class ELFT> class MipsGOTParser {
public:
- typedef object::ELFFile<ELFT> ObjectFile;
- typedef typename ObjectFile::Elf_Shdr Elf_Shdr;
- typedef typename ObjectFile::Elf_Sym Elf_Sym;
+ typedef object::ELFFile<ELFT> ELFO;
+ typedef typename ELFO::Elf_Shdr Elf_Shdr;
+ typedef typename ELFO::Elf_Sym Elf_Sym;
+ typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;
+ typedef typename ELFO::Elf_Addr GOTEntry;
+ typedef typename ELFO::Elf_Rel Elf_Rel;
+ typedef typename ELFO::Elf_Rela Elf_Rela;
- MipsGOTParser(const ObjectFile *Obj, StreamWriter &W);
+ MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
+ Elf_Dyn_Range DynTable, StreamWriter &W);
void parseGOT();
void parsePLT();
private:
- typedef typename ObjectFile::Elf_Addr GOTEntry;
- typedef typename ObjectFile::template ELFEntityIterator<const GOTEntry>
- GOTIter;
-
- const ObjectFile *Obj;
+ ELFDumper<ELFT> *Dumper;
+ const ELFO *Obj;
StreamWriter &W;
llvm::Optional<uint64_t> DtPltGot;
llvm::Optional<uint64_t> DtLocalGotNum;
@@ -1194,22 +1715,26 @@ private:
llvm::Optional<uint64_t> DtJmpRel;
std::size_t getGOTTotal(ArrayRef<uint8_t> GOT) const;
- GOTIter makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
-
- void printGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It);
- void printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It,
- const Elf_Sym *Sym, bool IsDynamic);
- void printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt, GOTIter It,
- StringRef Purpose);
- void printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt, GOTIter It,
+ const GOTEntry *makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
+
+ void printGotEntry(uint64_t GotAddr, const GOTEntry *BeginIt,
+ const GOTEntry *It);
+ void printGlobalGotEntry(uint64_t GotAddr, const GOTEntry *BeginIt,
+ const GOTEntry *It, const Elf_Sym *Sym,
+ StringRef StrTable, bool IsDynamic);
+ void printPLTEntry(uint64_t PLTAddr, const GOTEntry *BeginIt,
+ const GOTEntry *It, StringRef Purpose);
+ void printPLTEntry(uint64_t PLTAddr, const GOTEntry *BeginIt,
+ const GOTEntry *It, StringRef StrTable,
const Elf_Sym *Sym);
};
}
template <class ELFT>
-MipsGOTParser<ELFT>::MipsGOTParser(const ObjectFile *Obj, StreamWriter &W)
- : Obj(Obj), W(W) {
- for (const auto &Entry : Obj->dynamic_table()) {
+MipsGOTParser<ELFT>::MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
+ Elf_Dyn_Range DynTable, StreamWriter &W)
+ : Dumper(Dumper), Obj(Obj), W(W) {
+ for (const auto &Entry : DynTable) {
switch (Entry.getTag()) {
case ELF::DT_PLTGOT:
DtPltGot = Entry.getVal();
@@ -1264,8 +1789,11 @@ template <class ELFT> void MipsGOTParser<ELFT>::parseGOT() {
return;
}
- const Elf_Sym *DynSymBegin = Obj->dynamic_symbol_begin();
- const Elf_Sym *DynSymEnd = Obj->dynamic_symbol_end();
+ const Elf_Shdr *DynSymSec = Dumper->getDotDynSymSec();
+ ErrorOr<StringRef> StrTable = Obj->getStringTableForSymtab(*DynSymSec);
+ error(StrTable.getError());
+ const Elf_Sym *DynSymBegin = Obj->symbol_begin(DynSymSec);
+ const Elf_Sym *DynSymEnd = Obj->symbol_end(DynSymSec);
std::size_t DynSymTotal = std::size_t(std::distance(DynSymBegin, DynSymEnd));
if (*DtGotSym > DynSymTotal) {
@@ -1280,9 +1808,9 @@ template <class ELFT> void MipsGOTParser<ELFT>::parseGOT() {
return;
}
- GOTIter GotBegin = makeGOTIter(*GOT, 0);
- GOTIter GotLocalEnd = makeGOTIter(*GOT, *DtLocalGotNum);
- GOTIter It = GotBegin;
+ const GOTEntry *GotBegin = makeGOTIter(*GOT, 0);
+ const GOTEntry *GotLocalEnd = makeGOTIter(*GOT, *DtLocalGotNum);
+ const GOTEntry *It = GotBegin;
DictScope GS(W, "Primary GOT");
@@ -1312,11 +1840,13 @@ template <class ELFT> void MipsGOTParser<ELFT>::parseGOT() {
{
ListScope GS(W, "Global entries");
- GOTIter GotGlobalEnd = makeGOTIter(*GOT, *DtLocalGotNum + GlobalGotNum);
+ const GOTEntry *GotGlobalEnd =
+ makeGOTIter(*GOT, *DtLocalGotNum + GlobalGotNum);
const Elf_Sym *GotDynSym = DynSymBegin + *DtGotSym;
for (; It != GotGlobalEnd; ++It) {
DictScope D(W, "Entry");
- printGlobalGotEntry(GOTShdr->sh_addr, GotBegin, It, GotDynSym++, true);
+ printGlobalGotEntry(GOTShdr->sh_addr, GotBegin, It, GotDynSym++,
+ *StrTable, true);
}
}
@@ -1350,10 +1880,16 @@ template <class ELFT> void MipsGOTParser<ELFT>::parsePLT() {
W.startLine() << "There is no .rel.plt section in the file.\n";
return;
}
+ ErrorOr<const Elf_Shdr *> SymTableOrErr =
+ Obj->getSection(PLTRelShdr->sh_link);
+ error(SymTableOrErr.getError());
+ const Elf_Shdr *SymTable = *SymTableOrErr;
+ ErrorOr<StringRef> StrTable = Obj->getStringTableForSymtab(*SymTable);
+ error(StrTable.getError());
- GOTIter PLTBegin = makeGOTIter(*PLT, 0);
- GOTIter PLTEnd = makeGOTIter(*PLT, getGOTTotal(*PLT));
- GOTIter It = PLTBegin;
+ const GOTEntry *PLTBegin = makeGOTIter(*PLT, 0);
+ const GOTEntry *PLTEnd = makeGOTIter(*PLT, getGOTTotal(*PLT));
+ const GOTEntry *It = PLTBegin;
DictScope GS(W, "PLT GOT");
{
@@ -1367,21 +1903,19 @@ template <class ELFT> void MipsGOTParser<ELFT>::parsePLT() {
switch (PLTRelShdr->sh_type) {
case ELF::SHT_REL:
- for (typename ObjectFile::Elf_Rel_Iter RI = Obj->rel_begin(PLTRelShdr),
- RE = Obj->rel_end(PLTRelShdr);
+ for (const Elf_Rel *RI = Obj->rel_begin(PLTRelShdr),
+ *RE = Obj->rel_end(PLTRelShdr);
RI != RE && It != PLTEnd; ++RI, ++It) {
- const Elf_Sym *Sym =
- Obj->getRelocationSymbol(&*PLTRelShdr, &*RI).second;
- printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, Sym);
+ const Elf_Sym *Sym = Obj->getRelocationSymbol(&*RI, SymTable);
+ printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, *StrTable, Sym);
}
break;
case ELF::SHT_RELA:
- for (typename ObjectFile::Elf_Rela_Iter RI = Obj->rela_begin(PLTRelShdr),
- RE = Obj->rela_end(PLTRelShdr);
+ for (const Elf_Rela *RI = Obj->rela_begin(PLTRelShdr),
+ *RE = Obj->rela_end(PLTRelShdr);
RI != RE && It != PLTEnd; ++RI, ++It) {
- const Elf_Sym *Sym =
- Obj->getRelocationSymbol(&*PLTRelShdr, &*RI).second;
- printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, Sym);
+ const Elf_Sym *Sym = Obj->getRelocationSymbol(&*RI, SymTable);
+ printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, *StrTable, Sym);
}
break;
}
@@ -1394,15 +1928,16 @@ std::size_t MipsGOTParser<ELFT>::getGOTTotal(ArrayRef<uint8_t> GOT) const {
}
template <class ELFT>
-typename MipsGOTParser<ELFT>::GOTIter
+const typename MipsGOTParser<ELFT>::GOTEntry *
MipsGOTParser<ELFT>::makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum) {
const char *Data = reinterpret_cast<const char *>(GOT.data());
- return GOTIter(sizeof(GOTEntry), Data + EntryNum * sizeof(GOTEntry));
+ return reinterpret_cast<const GOTEntry *>(Data + EntryNum * sizeof(GOTEntry));
}
template <class ELFT>
-void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
- GOTIter It) {
+void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr,
+ const GOTEntry *BeginIt,
+ const GOTEntry *It) {
int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
W.printHex("Address", GotAddr + Offset);
W.printNumber("Access", Offset - 0x7ff0);
@@ -1410,9 +1945,9 @@ void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
}
template <class ELFT>
-void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
- GOTIter It, const Elf_Sym *Sym,
- bool IsDynamic) {
+void MipsGOTParser<ELFT>::printGlobalGotEntry(
+ uint64_t GotAddr, const GOTEntry *BeginIt, const GOTEntry *It,
+ const Elf_Sym *Sym, StringRef StrTable, bool IsDynamic) {
printGotEntry(GotAddr, BeginIt, It);
W.printHex("Value", Sym->st_value);
@@ -1420,16 +1955,19 @@ void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
unsigned SectionIndex = 0;
StringRef SectionName;
- getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
+ getSectionNameIndex(*Obj, Sym, Dumper->getDotDynSymSec(),
+ Dumper->getShndxTable(), SectionName, SectionIndex);
W.printHex("Section", SectionName, SectionIndex);
- std::string FullSymbolName = getFullSymbolName(*Obj, Sym, IsDynamic);
+ std::string FullSymbolName =
+ Dumper->getFullSymbolName(Sym, StrTable, IsDynamic);
W.printNumber("Name", FullSymbolName, Sym->st_name);
}
template <class ELFT>
-void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
- GOTIter It, StringRef Purpose) {
+void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr,
+ const GOTEntry *BeginIt,
+ const GOTEntry *It, StringRef Purpose) {
DictScope D(W, "Entry");
int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
W.printHex("Address", PLTAddr + Offset);
@@ -1438,8 +1976,10 @@ void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
}
template <class ELFT>
-void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
- GOTIter It, const Elf_Sym *Sym) {
+void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr,
+ const GOTEntry *BeginIt,
+ const GOTEntry *It, StringRef StrTable,
+ const Elf_Sym *Sym) {
DictScope D(W, "Entry");
int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
W.printHex("Address", PLTAddr + Offset);
@@ -1449,10 +1989,11 @@ void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
unsigned SectionIndex = 0;
StringRef SectionName;
- getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
+ getSectionNameIndex(*Obj, Sym, Dumper->getDotDynSymSec(),
+ Dumper->getShndxTable(), SectionName, SectionIndex);
W.printHex("Section", SectionName, SectionIndex);
- std::string FullSymbolName = getFullSymbolName(*Obj, Sym, true);
+ std::string FullSymbolName = Dumper->getFullSymbolName(Sym, StrTable, true);
W.printNumber("Name", FullSymbolName, Sym->st_name);
}
@@ -1462,7 +2003,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
return;
}
- MipsGOTParser<ELFT> GOTParser(Obj, W);
+ MipsGOTParser<ELFT> GOTParser(this, Obj, dynamic_table(), W);
GOTParser.parseGOT();
GOTParser.parsePLT();
}
@@ -1604,7 +2145,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
}
template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
- const typename ELFFile<ELFT>::Elf_Shdr *StackMapSection = nullptr;
+ const Elf_Shdr *StackMapSection = nullptr;
for (const auto &Sec : Obj->sections()) {
ErrorOr<StringRef> Name = Obj->getSectionName(&Sec);
if (*Name == ".llvm_stackmaps") {
diff --git a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
index adb99b0..58d2c9f 100644
--- a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -40,6 +40,14 @@ public:
void printUnwindInfo() override;
void printStackMap() const override;
+ // MachO-specific.
+ void printMachODataInCode() override;
+ void printMachOVersionMin() override;
+ void printMachODysymtab() override;
+ void printMachOSegment() override;
+ void printMachOIndirectSymbols() override;
+ void printMachOLinkerOptions () override;
+
private:
template<class MachHeader>
void printFileHeaders(const MachHeader &Header);
@@ -255,6 +263,21 @@ namespace {
uint32_t Flags;
uint32_t Reserved1;
uint32_t Reserved2;
+ uint32_t Reserved3;
+ };
+
+ struct MachOSegment {
+ std::string CmdName;
+ std::string SegName;
+ uint64_t cmdsize;
+ uint64_t vmaddr;
+ uint64_t vmsize;
+ uint64_t fileoff;
+ uint64_t filesize;
+ uint32_t maxprot;
+ uint32_t initprot;
+ uint32_t nsects;
+ uint32_t flags;
};
struct MachOSymbol {
@@ -266,6 +289,18 @@ namespace {
};
}
+static std::string getMask(uint32_t prot)
+{
+ // TODO (davide): This always assumes prot is valid.
+ // Catch mistakes and report if needed.
+ std::string Prot;
+ Prot = "";
+ Prot += (prot & MachO::VM_PROT_READ) ? "r" : "-";
+ Prot += (prot & MachO::VM_PROT_WRITE) ? "w" : "-";
+ Prot += (prot & MachO::VM_PROT_EXECUTE) ? "x" : "-";
+ return Prot;
+}
+
static void getSection(const MachOObjectFile *Obj,
DataRefImpl Sec,
MachOSection &Section) {
@@ -292,8 +327,40 @@ static void getSection(const MachOObjectFile *Obj,
Section.Flags = Sect.flags;
Section.Reserved1 = Sect.reserved1;
Section.Reserved2 = Sect.reserved2;
+ Section.Reserved3 = Sect.reserved3;
}
+static void getSegment(const MachOObjectFile *Obj,
+ const MachOObjectFile::LoadCommandInfo &L,
+ MachOSegment &Segment) {
+ if (!Obj->is64Bit()) {
+ MachO::segment_command SC = Obj->getSegmentLoadCommand(L);
+ Segment.CmdName = "LC_SEGMENT";
+ Segment.SegName = SC.segname;
+ Segment.cmdsize = SC.cmdsize;
+ Segment.vmaddr = SC.vmaddr;
+ Segment.vmsize = SC.vmsize;
+ Segment.fileoff = SC.fileoff;
+ Segment.filesize = SC.filesize;
+ Segment.maxprot = SC.maxprot;
+ Segment.initprot = SC.initprot;
+ Segment.nsects = SC.nsects;
+ Segment.flags = SC.flags;
+ return;
+ }
+ MachO::segment_command_64 SC = Obj->getSegment64LoadCommand(L);
+ Segment.CmdName = "LC_SEGMENT_64";
+ Segment.SegName = SC.segname;
+ Segment.cmdsize = SC.cmdsize;
+ Segment.vmaddr = SC.vmaddr;
+ Segment.vmsize = SC.vmsize;
+ Segment.fileoff = SC.fileoff;
+ Segment.filesize = SC.filesize;
+ Segment.maxprot = SC.maxprot;
+ Segment.initprot = SC.initprot;
+ Segment.nsects = SC.nsects;
+ Segment.flags = SC.flags;
+}
static void getSymbol(const MachOObjectFile *Obj,
DataRefImpl DRI,
@@ -375,8 +442,7 @@ void MachODumper::printSections(const MachOObjectFile *Obj) {
DataRefImpl DR = Section.getRawDataRefImpl();
StringRef Name;
- if (error(Section.getName(Name)))
- Name = "";
+ error(Section.getName(Name));
ArrayRef<char> RawName = Obj->getSectionRawName(DR);
StringRef SegmentName = Obj->getSectionFinalSegmentName(DR);
@@ -398,6 +464,8 @@ void MachODumper::printSections(const MachOObjectFile *Obj) {
makeArrayRef(MachOSectionAttributes));
W.printHex("Reserved1", MOSection.Reserved1);
W.printHex("Reserved2", MOSection.Reserved2);
+ if (Obj->is64Bit())
+ W.printHex("Reserved3", MOSection.Reserved3);
if (opts::SectionRelocations) {
ListScope D(W, "Relocations");
@@ -419,8 +487,7 @@ void MachODumper::printSections(const MachOObjectFile *Obj) {
bool IsBSS = Section.isBSS();
if (!IsBSS) {
StringRef Data;
- if (error(Section.getContents(Data)))
- break;
+ error(Section.getContents(Data));
W.printBinaryBlock("SectionData", Data);
}
@@ -434,8 +501,7 @@ void MachODumper::printRelocations() {
std::error_code EC;
for (const SectionRef &Section : Obj->sections()) {
StringRef Name;
- if (error(Section.getName(Name)))
- continue;
+ error(Section.getName(Name));
bool PrintedGroup = false;
for (const RelocationRef &Reloc : Section.relocations()) {
@@ -475,15 +541,13 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj,
symbol_iterator Symbol = Reloc.getSymbol();
if (Symbol != Obj->symbol_end()) {
ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
- if (error(TargetNameOrErr.getError()))
- return;
+ error(TargetNameOrErr.getError());
TargetName = *TargetNameOrErr;
}
} else if (!IsScattered) {
section_iterator SecI = Obj->getRelocationSection(DR);
if (SecI != Obj->section_end()) {
- if (error(SecI->getName(TargetName)))
- return;
+ error(SecI->getName(TargetName));
}
}
if (TargetName.empty())
@@ -547,8 +611,10 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) {
getSymbol(Obj, Symbol.getRawDataRefImpl(), MOSymbol);
StringRef SectionName = "";
- section_iterator SecI(Obj->section_begin());
- if (!error(Symbol.getSection(SecI)) && SecI != Obj->section_end())
+ ErrorOr<section_iterator> SecIOrErr = Symbol.getSection();
+ error(SecIOrErr.getError());
+ section_iterator SecI = *SecIOrErr;
+ if (SecI != Obj->section_end())
error(SecI->getName(SectionName));
DictScope D(W, "Symbol");
@@ -603,3 +669,153 @@ void MachODumper::printStackMap() const {
prettyPrintStackMap(llvm::outs(),
StackMapV1Parser<support::big>(StackMapContentsArray));
}
+
+void MachODumper::printMachODataInCode() {
+ for (const auto &Load : Obj->load_commands()) {
+ if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
+ MachO::linkedit_data_command LLC = Obj->getLinkeditDataLoadCommand(Load);
+ DictScope Group(W, "DataInCode");
+ W.printNumber("Data offset", LLC.dataoff);
+ W.printNumber("Data size", LLC.datasize);
+ ListScope D(W, "Data entries");
+ unsigned NumRegions = LLC.datasize / sizeof(MachO::data_in_code_entry);
+ for (unsigned i = 0; i < NumRegions; ++i) {
+ MachO::data_in_code_entry DICE = Obj->getDataInCodeTableEntry(
+ LLC.dataoff, i);
+ DictScope Group(W, "Entry");
+ W.printNumber("Index", i);
+ W.printNumber("Offset", DICE.offset);
+ W.printNumber("Length", DICE.length);
+ W.printNumber("Kind", DICE.kind);
+ }
+ }
+ }
+}
+
+void MachODumper::printMachOVersionMin() {
+ for (const auto &Load : Obj->load_commands()) {
+ StringRef Cmd;
+ switch (Load.C.cmd) {
+ case MachO::LC_VERSION_MIN_MACOSX:
+ Cmd = "LC_VERSION_MIN_MACOSX";
+ break;
+ case MachO::LC_VERSION_MIN_IPHONEOS:
+ Cmd = "LC_VERSION_MIN_IPHONEOS";
+ break;
+ case MachO::LC_VERSION_MIN_TVOS:
+ Cmd = "LC_VERSION_MIN_TVOS";
+ break;
+ case MachO::LC_VERSION_MIN_WATCHOS:
+ Cmd = "LC_VERSION_MIN_WATCHOS";
+ break;
+ default:
+ continue;
+ }
+
+ MachO::version_min_command VMC = Obj->getVersionMinLoadCommand(Load);
+ DictScope Group(W, "MinVersion");
+ W.printString("Cmd", Cmd);
+ W.printNumber("Size", VMC.cmdsize);
+ SmallString<32> Version;
+ Version = utostr(MachOObjectFile::getVersionMinMajor(VMC, false)) + "." +
+ utostr(MachOObjectFile::getVersionMinMinor(VMC, false));
+ uint32_t Update = MachOObjectFile::getVersionMinUpdate(VMC, false);
+ if (Update != 0)
+ Version += "." + utostr(MachOObjectFile::getVersionMinUpdate(VMC, false));
+ W.printString("Version", Version);
+ SmallString<32> SDK;
+ if (VMC.sdk == 0)
+ SDK = "n/a";
+ else {
+ SDK = utostr(MachOObjectFile::getVersionMinMajor(VMC, true)) + "." +
+ utostr(MachOObjectFile::getVersionMinMinor(VMC, true));
+ uint32_t Update = MachOObjectFile::getVersionMinUpdate(VMC, true);
+ if (Update != 0)
+ SDK += "." + utostr(MachOObjectFile::getVersionMinUpdate(VMC, true));
+ }
+ W.printString("SDK", SDK);
+ }
+}
+
+void MachODumper::printMachODysymtab() {
+ for (const auto &Load : Obj->load_commands()) {
+ if (Load.C.cmd == MachO::LC_DYSYMTAB) {
+ MachO::dysymtab_command DLC = Obj->getDysymtabLoadCommand();
+ DictScope Group(W, "Dysymtab");
+ W.printNumber("ilocalsym", DLC.ilocalsym);
+ W.printNumber("nlocalsym", DLC.nlocalsym);
+ W.printNumber("iextdefsym", DLC.iextdefsym);
+ W.printNumber("nextdefsym", DLC.nextdefsym);
+ W.printNumber("iundefsym", DLC.iundefsym);
+ W.printNumber("nundefsym", DLC.nundefsym);
+ W.printNumber("tocoff", DLC.tocoff);
+ W.printNumber("ntoc", DLC.ntoc);
+ W.printNumber("modtaboff", DLC.modtaboff);
+ W.printNumber("nmodtab", DLC.nmodtab);
+ W.printNumber("extrefsymoff", DLC.extrefsymoff);
+ W.printNumber("nextrefsyms", DLC.nextrefsyms);
+ W.printNumber("indirectsymoff", DLC.indirectsymoff);
+ W.printNumber("nindirectsyms", DLC.nindirectsyms);
+ W.printNumber("extreloff", DLC.extreloff);
+ W.printNumber("nextrel", DLC.nextrel);
+ W.printNumber("locreloff", DLC.locreloff);
+ W.printNumber("nlocrel", DLC.nlocrel);
+ }
+ }
+}
+
+void MachODumper::printMachOSegment() {
+ for (const auto &Load : Obj->load_commands()) {
+ if (Load.C.cmd == MachO::LC_SEGMENT || Load.C.cmd == MachO::LC_SEGMENT_64) {
+ MachOSegment MOSegment;
+ getSegment(Obj, Load, MOSegment);
+ DictScope Group(W, "Segment");
+ W.printString("Cmd", MOSegment.CmdName);
+ W.printString("Name", MOSegment.SegName);
+ W.printNumber("Size", MOSegment.cmdsize);
+ W.printHex("vmaddr", MOSegment.vmaddr);
+ W.printHex("vmsize", MOSegment.vmsize);
+ W.printNumber("fileoff", MOSegment.fileoff);
+ W.printNumber("filesize", MOSegment.filesize);
+ W.printString("maxprot", getMask(MOSegment.maxprot));
+ W.printString("initprot", getMask(MOSegment.initprot));
+ W.printNumber("nsects", MOSegment.nsects);
+ W.printHex("flags", MOSegment.flags);
+ }
+ }
+}
+
+void MachODumper::printMachOIndirectSymbols() {
+ for (const auto &Load : Obj->load_commands()) {
+ if (Load.C.cmd == MachO::LC_DYSYMTAB) {
+ MachO::dysymtab_command DLC = Obj->getDysymtabLoadCommand();
+ DictScope Group(W, "Indirect Symbols");
+ W.printNumber("Number", DLC.nindirectsyms);
+ ListScope D(W, "Symbols");
+ for (unsigned i = 0; i < DLC.nindirectsyms; ++i) {
+ DictScope Group(W, "Entry");
+ W.printNumber("Entry Index", i);
+ W.printHex("Symbol Index", Obj->getIndirectSymbolTableEntry(DLC, i));
+ }
+ }
+ }
+}
+
+void MachODumper::printMachOLinkerOptions() {
+ for (const auto &Load : Obj->load_commands()) {
+ if (Load.C.cmd == MachO::LC_LINKER_OPTION) {
+ MachO::linker_option_command LOLC = Obj->getLinkerOptionLoadCommand(Load);
+ DictScope Group(W, "Linker Options");
+ W.printNumber("Size", LOLC.cmdsize);
+ ListScope D(W, "Strings");
+ uint64_t DataSize = LOLC.cmdsize - sizeof(MachO::linker_option_command);
+ const char *P = Load.Ptr + sizeof(MachO::linker_option_command);
+ StringRef Data(P, DataSize);
+ for (unsigned i = 0; i < LOLC.count; ++i) {
+ std::pair<StringRef,StringRef> Split = Data.split('\0');
+ W.printString("Value", Split.first);
+ Data = Split.second;
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
index 5ecf0ec..db26d69 100644
--- a/contrib/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
@@ -1,4 +1,4 @@
-//===-- ObjDumper.h -------------------------------------------------------===//
+//===-- ObjDumper.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,7 +15,8 @@
namespace llvm {
namespace object {
- class ObjectFile;
+class COFFImportFile;
+class ObjectFile;
}
class StreamWriter;
@@ -38,6 +39,9 @@ public:
virtual void printNeededLibraries() { }
virtual void printProgramHeaders() { }
virtual void printHashTable() { }
+ virtual void printGnuHashTable() { }
+ virtual void printLoadName() {}
+ virtual void printVersionInfo() {}
// Only implemented for ARM ELF at this time.
virtual void printAttributes() { }
@@ -52,6 +56,15 @@ public:
virtual void printCOFFExports() { }
virtual void printCOFFDirectives() { }
virtual void printCOFFBaseReloc() { }
+ virtual void printCodeViewDebugInfo() { }
+
+ // Only implemented for MachO.
+ virtual void printMachODataInCode() { }
+ virtual void printMachOVersionMin() { }
+ virtual void printMachODysymtab() { }
+ virtual void printMachOSegment() { }
+ virtual void printMachOIndirectSymbols() { }
+ virtual void printMachOLinkerOptions() { }
virtual void printStackMap() const = 0;
@@ -71,6 +84,8 @@ std::error_code createMachODumper(const object::ObjectFile *Obj,
StreamWriter &Writer,
std::unique_ptr<ObjDumper> &Result);
+void dumpCOFFImportFile(const object::COFFImportFile *File);
+
} // namespace llvm
#endif
diff --git a/contrib/llvm/tools/llvm-readobj/StreamWriter.h b/contrib/llvm/tools/llvm-readobj/StreamWriter.h
index f3cc57e..d2dbb07 100644
--- a/contrib/llvm/tools/llvm-readobj/StreamWriter.h
+++ b/contrib/llvm/tools/llvm-readobj/StreamWriter.h
@@ -34,14 +34,17 @@ struct HexNumber {
// unsigned type. The overloads are here so that every type that is implicitly
// convertible to an integer (including enums and endian helpers) can be used
// without requiring type traits or call-site changes.
- HexNumber(int8_t Value) : Value(static_cast<uint8_t >(Value)) { }
- HexNumber(int16_t Value) : Value(static_cast<uint16_t>(Value)) { }
- HexNumber(int32_t Value) : Value(static_cast<uint32_t>(Value)) { }
- HexNumber(int64_t Value) : Value(static_cast<uint64_t>(Value)) { }
- HexNumber(uint8_t Value) : Value(Value) { }
- HexNumber(uint16_t Value) : Value(Value) { }
- HexNumber(uint32_t Value) : Value(Value) { }
- HexNumber(uint64_t Value) : Value(Value) { }
+ HexNumber(char Value) : Value(static_cast<unsigned char>(Value)) { }
+ HexNumber(signed char Value) : Value(static_cast<unsigned char>(Value)) { }
+ HexNumber(signed short Value) : Value(static_cast<unsigned short>(Value)) { }
+ HexNumber(signed int Value) : Value(static_cast<unsigned int>(Value)) { }
+ HexNumber(signed long Value) : Value(static_cast<unsigned long>(Value)) { }
+ HexNumber(signed long long Value) : Value(static_cast<unsigned long long>(Value)) { }
+ HexNumber(unsigned char Value) : Value(Value) { }
+ HexNumber(unsigned short Value) : Value(Value) { }
+ HexNumber(unsigned int Value) : Value(Value) { }
+ HexNumber(unsigned long Value) : Value(Value) { }
+ HexNumber(unsigned long long Value) : Value(Value) { }
uint64_t Value;
};
@@ -194,6 +197,19 @@ public:
OS << "]\n";
}
+ template <typename T>
+ void printHexList(StringRef Label, const T &List) {
+ startLine() << Label << ": [";
+ bool Comma = false;
+ for (const auto &Item : List) {
+ if (Comma)
+ OS << ", ";
+ OS << hex(Item);
+ Comma = true;
+ }
+ OS << "]\n";
+ }
+
template<typename T>
void printHex(StringRef Label, T Value) {
startLine() << Label << ": " << hex(Value) << "\n";
diff --git a/contrib/llvm/tools/llvm-readobj/Win64EHDumper.cpp b/contrib/llvm/tools/llvm-readobj/Win64EHDumper.cpp
index f57eea2..2da5ae3 100644
--- a/contrib/llvm/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/Win64EHDumper.cpp
@@ -149,11 +149,8 @@ static std::error_code resolveRelocation(const Dumper::Context &Ctx,
return EC;
ResolvedAddress = *ResolvedAddressOrErr;
- section_iterator SI = Ctx.COFF.section_begin();
- if (std::error_code EC = Symbol.getSection(SI))
- return EC;
-
- ResolvedSection = Ctx.COFF.getCOFFSection(*SI);
+ ErrorOr<section_iterator> SI = Symbol.getSection();
+ ResolvedSection = Ctx.COFF.getCOFFSection(**SI);
return std::error_code();
}
@@ -257,7 +254,7 @@ void Dumper::printUnwindInfo(const Context &Ctx, const coff_section *Section,
return;
}
- printUnwindCode(UI, ArrayRef<UnwindCode>(UCI, UCE));
+ printUnwindCode(UI, makeArrayRef(UCI, UCE));
UCI = UCI + UsedSlots - 1;
}
}
@@ -284,11 +281,11 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
const coff_section *XData;
uint64_t Offset;
- if (error(resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset)))
- return;
+ resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset);
ArrayRef<uint8_t> Contents;
- if (error(Ctx.COFF.getSectionContents(XData, Contents)) || Contents.empty())
+ error(Ctx.COFF.getSectionContents(XData, Contents));
+ if (Contents.empty())
return;
Offset = Offset + RF.UnwindInfoOffset;
@@ -302,15 +299,15 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
void Dumper::printData(const Context &Ctx) {
for (const auto &Section : Ctx.COFF.sections()) {
StringRef Name;
- if (error(Section.getName(Name)))
- continue;
+ Section.getName(Name);
if (Name != ".pdata" && !Name.startswith(".pdata$"))
continue;
const coff_section *PData = Ctx.COFF.getCOFFSection(Section);
ArrayRef<uint8_t> Contents;
- if (error(Ctx.COFF.getSectionContents(PData, Contents)) || Contents.empty())
+ error(Ctx.COFF.getSectionContents(PData, Contents));
+ if (Contents.empty())
continue;
const RuntimeFunction *Entries =
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 12afacb..fa8fee2 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -24,6 +24,7 @@
#include "ObjDumper.h"
#include "StreamWriter.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
@@ -131,6 +132,10 @@ namespace opts {
cl::opt<bool> HashTable("hash-table",
cl::desc("Display ELF hash table"));
+ // -gnu-hash-table
+ cl::opt<bool> GnuHashTable("gnu-hash-table",
+ cl::desc("Display ELF .gnu.hash section"));
+
// -expand-relocs
cl::opt<bool> ExpandRelocs("expand-relocs",
cl::desc("Expand each shown relocation to multiple lines"));
@@ -181,25 +186,62 @@ namespace opts {
COFFBaseRelocs("coff-basereloc",
cl::desc("Display the PE/COFF .reloc section"));
+ // -macho-data-in-code
+ cl::opt<bool>
+ MachODataInCode("macho-data-in-code",
+ cl::desc("Display MachO Data in Code command"));
+
+ // -macho-indirect-symbols
+ cl::opt<bool>
+ MachOIndirectSymbols("macho-indirect-symbols",
+ cl::desc("Display MachO indirect symbols"));
+
+ // -macho-linker-options
+ cl::opt<bool>
+ MachOLinkerOptions("macho-linker-options",
+ cl::desc("Display MachO linker options"));
+
+ // -macho-segment
+ cl::opt<bool>
+ MachOSegment("macho-segment",
+ cl::desc("Display MachO Segment command"));
+
+ // -macho-version-min
+ cl::opt<bool>
+ MachOVersionMin("macho-version-min",
+ cl::desc("Display MachO version min command"));
+
+ // -macho-dysymtab
+ cl::opt<bool>
+ MachODysymtab("macho-dysymtab",
+ cl::desc("Display MachO Dysymtab command"));
+
// -stackmap
cl::opt<bool>
PrintStackMap("stackmap",
cl::desc("Display contents of stackmap section"));
+ // -version-info
+ cl::opt<bool>
+ VersionInfo("version-info",
+ cl::desc("Display ELF version sections (if present)"));
+ cl::alias VersionInfoShort("V", cl::desc("Alias for -version-info"),
+ cl::aliasopt(VersionInfo));
} // namespace opts
-static int ReturnValue = EXIT_SUCCESS;
-
namespace llvm {
-bool error(std::error_code EC) {
+LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
+ errs() << "\nError reading file: " << Msg << ".\n";
+ errs().flush();
+ exit(1);
+}
+
+void error(std::error_code EC) {
if (!EC)
- return false;
+ return;
- ReturnValue = EXIT_FAILURE;
- outs() << "\nError reading file: " << EC.message() << ".\n";
- outs().flush();
- return true;
+ reportError(EC.message());
}
bool relocAddressLess(RelocationRef a, RelocationRef b) {
@@ -212,17 +254,14 @@ static void reportError(StringRef Input, std::error_code EC) {
if (Input == "-")
Input = "<stdin>";
- errs() << Input << ": " << EC.message() << "\n";
- errs().flush();
- ReturnValue = EXIT_FAILURE;
+ reportError(Twine(Input) + ": " + EC.message());
}
static void reportError(StringRef Input, StringRef Message) {
if (Input == "-")
Input = "<stdin>";
- errs() << Input << ": " << Message << "\n";
- ReturnValue = EXIT_FAILURE;
+ reportError(Twine(Input) + ": " + Message);
}
static bool isMipsArch(unsigned Arch) {
@@ -253,26 +292,12 @@ static std::error_code createDumper(const ObjectFile *Obj, StreamWriter &Writer,
return readobj_error::unsupported_obj_file_format;
}
-static StringRef getLoadName(const ObjectFile *Obj) {
- if (auto *ELF = dyn_cast<ELF32LEObjectFile>(Obj))
- return ELF->getLoadName();
- if (auto *ELF = dyn_cast<ELF64LEObjectFile>(Obj))
- return ELF->getLoadName();
- if (auto *ELF = dyn_cast<ELF32BEObjectFile>(Obj))
- return ELF->getLoadName();
- if (auto *ELF = dyn_cast<ELF64BEObjectFile>(Obj))
- return ELF->getLoadName();
- llvm_unreachable("Not ELF");
-}
-
/// @brief Dumps the specified object file.
static void dumpObject(const ObjectFile *Obj) {
StreamWriter Writer(outs());
std::unique_ptr<ObjDumper> Dumper;
- if (std::error_code EC = createDumper(Obj, Writer, Dumper)) {
+ if (std::error_code EC = createDumper(Obj, Writer, Dumper))
reportError(Obj->getFileName(), EC);
- return;
- }
outs() << '\n';
outs() << "File: " << Obj->getFileName() << "\n";
@@ -281,8 +306,7 @@ static void dumpObject(const ObjectFile *Obj) {
<< Triple::getArchTypeName((llvm::Triple::ArchType)Obj->getArch())
<< "\n";
outs() << "AddressSize: " << (8*Obj->getBytesInAddress()) << "bit\n";
- if (Obj->isELF())
- outs() << "LoadName: " << getLoadName(Obj) << "\n";
+ Dumper->printLoadName();
if (opts::FileHeaders)
Dumper->printFileHeaders();
@@ -306,6 +330,10 @@ static void dumpObject(const ObjectFile *Obj) {
Dumper->printProgramHeaders();
if (opts::HashTable)
Dumper->printHashTable();
+ if (opts::GnuHashTable)
+ Dumper->printGnuHashTable();
+ if (opts::VersionInfo)
+ Dumper->printVersionInfo();
if (Obj->getArch() == llvm::Triple::arm && Obj->isELF())
if (opts::ARMAttributes)
Dumper->printAttributes();
@@ -317,25 +345,43 @@ static void dumpObject(const ObjectFile *Obj) {
if (opts::MipsReginfo)
Dumper->printMipsReginfo();
}
- if (opts::COFFImports)
- Dumper->printCOFFImports();
- if (opts::COFFExports)
- Dumper->printCOFFExports();
- if (opts::COFFDirectives)
- Dumper->printCOFFDirectives();
- if (opts::COFFBaseRelocs)
- Dumper->printCOFFBaseReloc();
-
+ if (Obj->isCOFF()) {
+ if (opts::COFFImports)
+ Dumper->printCOFFImports();
+ if (opts::COFFExports)
+ Dumper->printCOFFExports();
+ if (opts::COFFDirectives)
+ Dumper->printCOFFDirectives();
+ if (opts::COFFBaseRelocs)
+ Dumper->printCOFFBaseReloc();
+ if (opts::CodeView)
+ Dumper->printCodeViewDebugInfo();
+ }
+ if (Obj->isMachO()) {
+ if (opts::MachODataInCode)
+ Dumper->printMachODataInCode();
+ if (opts::MachOIndirectSymbols)
+ Dumper->printMachOIndirectSymbols();
+ if (opts::MachOLinkerOptions)
+ Dumper->printMachOLinkerOptions();
+ if (opts::MachOSegment)
+ Dumper->printMachOSegment();
+ if (opts::MachOVersionMin)
+ Dumper->printMachOVersionMin();
+ if (opts::MachODysymtab)
+ Dumper->printMachODysymtab();
+ }
if (opts::PrintStackMap)
Dumper->printStackMap();
}
/// @brief Dumps each object file in \a Arc;
static void dumpArchive(const Archive *Arc) {
- for (Archive::child_iterator ArcI = Arc->child_begin(),
- ArcE = Arc->child_end();
- ArcI != ArcE; ++ArcI) {
- ErrorOr<std::unique_ptr<Binary>> ChildOrErr = ArcI->getAsBinary();
+ for (auto &ErrorOrChild : Arc->children()) {
+ if (std::error_code EC = ErrorOrChild.getError())
+ reportError(Arc->getFileName(), EC.message());
+ const auto &Child = *ErrorOrChild;
+ ErrorOr<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
if (std::error_code EC = ChildOrErr.getError()) {
// Ignore non-object files.
if (EC != object_error::invalid_file_type)
@@ -365,18 +411,11 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary) {
/// @brief Opens \a File and dumps it.
static void dumpInput(StringRef File) {
- // If file isn't stdin, check that it exists.
- if (File != "-" && !sys::fs::exists(File)) {
- reportError(File, readobj_error::file_not_found);
- return;
- }
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
- if (std::error_code EC = BinaryOrErr.getError()) {
+ if (std::error_code EC = BinaryOrErr.getError())
reportError(File, EC);
- return;
- }
Binary &Binary = *BinaryOrErr.get().getBinary();
if (Archive *Arc = dyn_cast<Archive>(&Binary))
@@ -386,6 +425,8 @@ static void dumpInput(StringRef File) {
dumpMachOUniversalBinary(UBinary);
else if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
dumpObject(Obj);
+ else if (COFFImportFile *Import = dyn_cast<COFFImportFile>(&Binary))
+ dumpCOFFImportFile(Import);
else
reportError(File, readobj_error::unrecognized_file_format);
}
@@ -407,5 +448,5 @@ int main(int argc, const char *argv[]) {
std::for_each(opts::InputFilenames.begin(), opts::InputFilenames.end(),
dumpInput);
- return ReturnValue;
+ return 0;
}
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
index 74b9a60..5a10392 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -11,6 +11,7 @@
#define LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include <string>
namespace llvm {
@@ -19,7 +20,8 @@ namespace llvm {
}
// Various helper functions.
- bool error(std::error_code ec);
+ LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
+ void error(std::error_code ec);
bool relocAddressLess(object::RelocationRef A,
object::RelocationRef B);
} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 86f66f8..6ee3a44 100644
--- a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -94,6 +94,11 @@ CheckFiles("check",
cl::ZeroOrMore);
static cl::opt<uint64_t>
+PreallocMemory("preallocate",
+ cl::desc("Allocate memory upfront rather than on-demand"),
+ cl::init(0));
+
+static cl::opt<uint64_t>
TargetAddrStart("target-addr-start",
cl::desc("For -verify only: start of phony target address "
"range."),
@@ -127,6 +132,12 @@ DummySymbolMappings("dummy-extern",
cl::ZeroOrMore,
cl::Hidden);
+static cl::opt<bool>
+PrintAllocationRequests("print-alloc-requests",
+ cl::desc("Print allocation requests made to the memory "
+ "manager by RuntimeDyld"),
+ cl::Hidden);
+
/* *** */
// A trivial memory manager that doesn't do anything fancy, just uses the
@@ -150,12 +161,6 @@ public:
bool finalizeMemory(std::string *ErrMsg) override { return false; }
- // Invalidate instruction cache for sections with execute permissions.
- // Some platforms with separate data cache and instruction cache require
- // explicit cache flush, otherwise JIT code manipulations (like resolved
- // relocations) will get to the data cache but not to the instruction cache.
- virtual void invalidateInstructionCache();
-
void addDummySymbol(const std::string &Name, uint64_t Addr) {
DummyExterns[Name] = Addr;
}
@@ -173,15 +178,56 @@ public:
size_t Size) override {}
void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
size_t Size) override {}
+
+ void preallocateSlab(uint64_t Size) {
+ std::string Err;
+ sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, &Err);
+ if (!MB.base())
+ report_fatal_error("Can't allocate enough memory: " + Err);
+
+ PreallocSlab = MB;
+ UsePreallocation = true;
+ SlabSize = Size;
+ }
+
+ uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode) {
+ Size = RoundUpToAlignment(Size, Alignment);
+ if (CurrentSlabOffset + Size > SlabSize)
+ report_fatal_error("Can't allocate enough memory. Tune --preallocate");
+
+ uintptr_t OldSlabOffset = CurrentSlabOffset;
+ sys::MemoryBlock MB((void *)OldSlabOffset, Size);
+ if (isCode)
+ FunctionMemory.push_back(MB);
+ else
+ DataMemory.push_back(MB);
+ CurrentSlabOffset += Size;
+ return (uint8_t*)OldSlabOffset;
+ }
+
private:
std::map<std::string, uint64_t> DummyExterns;
+ sys::MemoryBlock PreallocSlab;
+ bool UsePreallocation = false;
+ uintptr_t SlabSize = 0;
+ uintptr_t CurrentSlabOffset = 0;
};
uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
unsigned Alignment,
unsigned SectionID,
StringRef SectionName) {
- sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, nullptr);
+ if (PrintAllocationRequests)
+ outs() << "allocateCodeSection(Size = " << Size << ", Alignment = "
+ << Alignment << ", SectionName = " << SectionName << ")\n";
+
+ if (UsePreallocation)
+ return allocateFromSlab(Size, Alignment, true /* isCode */);
+
+ std::string Err;
+ sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, &Err);
+ if (!MB.base())
+ report_fatal_error("MemoryManager allocation failed: " + Err);
FunctionMemory.push_back(MB);
return (uint8_t*)MB.base();
}
@@ -191,41 +237,35 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
unsigned SectionID,
StringRef SectionName,
bool IsReadOnly) {
- sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, nullptr);
- DataMemory.push_back(MB);
- return (uint8_t*)MB.base();
-}
+ if (PrintAllocationRequests)
+ outs() << "allocateDataSection(Size = " << Size << ", Alignment = "
+ << Alignment << ", SectionName = " << SectionName << ")\n";
-void TrivialMemoryManager::invalidateInstructionCache() {
- for (int i = 0, e = FunctionMemory.size(); i != e; ++i)
- sys::Memory::InvalidateInstructionCache(FunctionMemory[i].base(),
- FunctionMemory[i].size());
+ if (UsePreallocation)
+ return allocateFromSlab(Size, Alignment, false /* isCode */);
- for (int i = 0, e = DataMemory.size(); i != e; ++i)
- sys::Memory::InvalidateInstructionCache(DataMemory[i].base(),
- DataMemory[i].size());
+ std::string Err;
+ sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, &Err);
+ if (!MB.base())
+ report_fatal_error("MemoryManager allocation failed: " + Err);
+ DataMemory.push_back(MB);
+ return (uint8_t*)MB.base();
}
static const char *ProgramName;
-static void Message(const char *Type, const Twine &Msg) {
- errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
-}
-
static int Error(const Twine &Msg) {
- Message("error", Msg);
+ errs() << ProgramName << ": error: " << Msg << "\n";
return 1;
}
static void loadDylibs() {
for (const std::string &Dylib : Dylibs) {
- if (sys::fs::is_regular_file(Dylib)) {
- std::string ErrMsg;
- if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
- llvm::errs() << "Error loading '" << Dylib << "': "
- << ErrMsg << "\n";
- } else
- llvm::errs() << "Dylib not found: '" << Dylib << "'.\n";
+ if (!sys::fs::is_regular_file(Dylib))
+ report_fatal_error("Dylib not found: '" + Dylib + "'.");
+ std::string ErrMsg;
+ if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
+ report_fatal_error("Error loading '" + Dylib + "': " + ErrMsg);
}
}
@@ -240,7 +280,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
- for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+ for (auto &File : InputFileList) {
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
RuntimeDyld Dyld(MemMgr, MemMgr);
@@ -248,7 +288,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
- MemoryBuffer::getFileOrSTDIN(InputFileList[i]);
+ MemoryBuffer::getFileOrSTDIN(File);
if (std::error_code EC = InputBuffer.getError())
return Error("unable to read input: '" + EC.message() + "'");
@@ -277,6 +317,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
if (UseDebugObj) {
DebugObj = LoadedObjInfo->getObjectForDebug(Obj);
SymbolObj = DebugObj.getBinary();
+ LoadedObjInfo.reset();
}
}
@@ -303,12 +344,11 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
// symbol in memory (rather than that in the unrelocated object file)
// and use that to query the DWARFContext.
if (!UseDebugObj && LoadObjects) {
- object::section_iterator Sec(SymbolObj->section_end());
- Sym.getSection(Sec);
+ object::section_iterator Sec = *Sym.getSection();
StringRef SecName;
Sec->getName(SecName);
uint64_t SectionLoadAddress =
- LoadedObjInfo->getSectionLoadAddress(SecName);
+ LoadedObjInfo->getSectionLoadAddress(*Sec);
if (SectionLoadAddress != 0)
Addr += SectionLoadAddress - Sec->getAddress();
}
@@ -317,11 +357,9 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
<< ", Addr = " << Addr << "\n";
DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
- DILineInfoTable::iterator Begin = Lines.begin();
- DILineInfoTable::iterator End = Lines.end();
- for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
- outs() << " Line info @ " << It->first - Addr << ": "
- << It->second.FileName << ", line:" << It->second.Line << "\n";
+ for (auto &D : Lines) {
+ outs() << " Line info @ " << D.first - Addr << ": "
+ << D.second.FileName << ", line:" << D.second.Line << "\n";
}
}
}
@@ -330,26 +368,33 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
return 0;
}
+static void doPreallocation(TrivialMemoryManager &MemMgr) {
+ // Allocate a slab of memory upfront, if required. This is used if
+ // we want to test small code models.
+ if (static_cast<intptr_t>(PreallocMemory) < 0)
+ report_fatal_error("Pre-allocated bytes of memory must be a positive integer.");
+
+ // FIXME: Limit the amount of memory that can be preallocated?
+ if (PreallocMemory != 0)
+ MemMgr.preallocateSlab(PreallocMemory);
+}
+
static int executeInput() {
// Load any dylibs requested on the command line.
loadDylibs();
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
+ doPreallocation(MemMgr);
RuntimeDyld Dyld(MemMgr, MemMgr);
- // FIXME: Preserve buffers until resolveRelocations time to work around a bug
- // in RuntimeDyldELF.
- // This fixme should be fixed ASAP. This is a very brittle workaround.
- std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
-
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
- for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+ for (auto &File : InputFileList) {
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
- MemoryBuffer::getFileOrSTDIN(InputFileList[i]);
+ MemoryBuffer::getFileOrSTDIN(File);
if (std::error_code EC = InputBuffer.getError())
return Error("unable to read input: '" + EC.message() + "'");
ErrorOr<std::unique_ptr<ObjectFile>> MaybeObj(
@@ -359,7 +404,6 @@ static int executeInput() {
return Error("unable to create object file: '" + EC.message() + "'");
ObjectFile &Obj = **MaybeObj;
- InputBuffers.push_back(std::move(*InputBuffer));
// Load the object file
Dyld.loadObject(Obj);
@@ -368,12 +412,9 @@ static int executeInput() {
}
}
- // Resolve all the relocations we can.
- Dyld.resolveRelocations();
- // Clear instruction cache before code will be executed.
- MemMgr.invalidateInstructionCache();
-
+ // Resove all the relocations we can.
// FIXME: Error out if there are unresolved relocations.
+ Dyld.resolveRelocations();
// Get the address of the entry point (_main by default).
void *MainAddress = Dyld.getSymbolLocalAddress(EntryPoint);
@@ -381,12 +422,12 @@ static int executeInput() {
return Error("no definition for '" + EntryPoint + "'");
// Invalidate the instruction cache for each loaded function.
- for (unsigned i = 0, e = MemMgr.FunctionMemory.size(); i != e; ++i) {
- sys::MemoryBlock &Data = MemMgr.FunctionMemory[i];
+ for (auto &FM : MemMgr.FunctionMemory) {
+
// Make sure the memory is executable.
+ // setExecutable will call InvalidateInstructionCache.
std::string ErrorStr;
- sys::Memory::InvalidateInstructionCache(Data.base(), Data.size());
- if (!sys::Memory::setExecutable(Data, &ErrorStr))
+ if (!sys::Memory::setExecutable(FM, &ErrorStr))
return Error("unable to mark function executable: '" + ErrorStr + "'");
}
@@ -428,11 +469,9 @@ applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
std::string SectionIDStr = Mapping.substr(0, EqualsIdx);
size_t ComaIdx = Mapping.find_first_of(",");
- if (ComaIdx == StringRef::npos) {
- errs() << "Invalid section specification '" << Mapping
- << "'. Should be '<file name>,<section name>=<addr>'\n";
- exit(1);
- }
+ if (ComaIdx == StringRef::npos)
+ report_fatal_error("Invalid section specification '" + Mapping +
+ "'. Should be '<file name>,<section name>=<addr>'");
std::string FileName = SectionIDStr.substr(0, ComaIdx);
std::string SectionName = SectionIDStr.substr(ComaIdx + 1);
@@ -442,20 +481,17 @@ applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
std::tie(OldAddrInt, ErrorMsg) =
Checker.getSectionAddr(FileName, SectionName, true);
- if (ErrorMsg != "") {
- errs() << ErrorMsg;
- exit(1);
- }
+ if (ErrorMsg != "")
+ report_fatal_error(ErrorMsg);
void* OldAddr = reinterpret_cast<void*>(static_cast<uintptr_t>(OldAddrInt));
std::string NewAddrStr = Mapping.substr(EqualsIdx + 1);
uint64_t NewAddr;
- if (StringRef(NewAddrStr).getAsInteger(0, NewAddr)) {
- errs() << "Invalid section address in mapping '" << Mapping << "'.\n";
- exit(1);
- }
+ if (StringRef(NewAddrStr).getAsInteger(0, NewAddr))
+ report_fatal_error("Invalid section address in mapping '" + Mapping +
+ "'.");
Checker.getRTDyld().mapSectionAddress(OldAddr, NewAddr);
SpecificMappings[OldAddr] = NewAddr;
@@ -544,20 +580,16 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
for (const auto &Mapping : DummySymbolMappings) {
size_t EqualsIdx = Mapping.find_first_of("=");
- if (EqualsIdx == StringRef::npos) {
- errs() << "Invalid dummy symbol specification '" << Mapping
- << "'. Should be '<symbol name>=<addr>'\n";
- exit(1);
- }
+ if (EqualsIdx == StringRef::npos)
+ report_fatal_error("Invalid dummy symbol specification '" + Mapping +
+ "'. Should be '<symbol name>=<addr>'");
std::string Symbol = Mapping.substr(0, EqualsIdx);
std::string AddrStr = Mapping.substr(EqualsIdx + 1);
uint64_t Addr;
- if (StringRef(AddrStr).getAsInteger(0, Addr)) {
- errs() << "Invalid symbol mapping '" << Mapping << "'.\n";
- exit(1);
- }
+ if (StringRef(AddrStr).getAsInteger(0, Addr))
+ report_fatal_error("Invalid symbol mapping '" + Mapping + "'.");
MemMgr.addDummySymbol(Symbol, Addr);
}
@@ -569,38 +601,38 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
static int linkAndVerify() {
// Check for missing triple.
- if (TripleName == "") {
- llvm::errs() << "Error: -triple required when running in -verify mode.\n";
- return 1;
- }
+ if (TripleName == "")
+ return Error("-triple required when running in -verify mode.");
// Look up the target and build the disassembler.
Triple TheTriple(Triple::normalize(TripleName));
std::string ErrorStr;
const Target *TheTarget =
TargetRegistry::lookupTarget("", TheTriple, ErrorStr);
- if (!TheTarget) {
- llvm::errs() << "Error accessing target '" << TripleName << "': "
- << ErrorStr << "\n";
- return 1;
- }
+ if (!TheTarget)
+ return Error("Error accessing target '" + TripleName + "': " + ErrorStr);
+
TripleName = TheTriple.getTriple();
std::unique_ptr<MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, ""));
- assert(STI && "Unable to create subtarget info!");
+ if (!STI)
+ return Error("Unable to create subtarget info!");
std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
- assert(MRI && "Unable to create target register info!");
+ if (!MRI)
+ return Error("Unable to create target register info!");
std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
- assert(MAI && "Unable to create target asm info!");
+ if (!MAI)
+ return Error("Unable to create target asm info!");
MCContext Ctx(MAI.get(), MRI.get(), nullptr);
std::unique_ptr<MCDisassembler> Disassembler(
TheTarget->createMCDisassembler(*STI, Ctx));
- assert(Disassembler && "Unable to create disassembler!");
+ if (!Disassembler)
+ return Error("Unable to create disassembler!");
std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
@@ -612,23 +644,19 @@ static int linkAndVerify() {
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
+ doPreallocation(MemMgr);
RuntimeDyld Dyld(MemMgr, MemMgr);
Dyld.setProcessAllSections(true);
RuntimeDyldChecker Checker(Dyld, Disassembler.get(), InstPrinter.get(),
llvm::dbgs());
- // FIXME: Preserve buffers until resolveRelocations time to work around a bug
- // in RuntimeDyldELF.
- // This fixme should be fixed ASAP. This is a very brittle workaround.
- std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
-
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
- for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+ for (auto &Filename : InputFileList) {
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
- MemoryBuffer::getFileOrSTDIN(InputFileList[i]);
+ MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = InputBuffer.getError())
return Error("unable to read input: '" + EC.message() + "'");
@@ -640,7 +668,6 @@ static int linkAndVerify() {
return Error("unable to create object file: '" + EC.message() + "'");
ObjectFile &Obj = **MaybeObj;
- InputBuffers.push_back(std::move(*InputBuffer));
// Load the object file
Dyld.loadObject(Obj);
@@ -660,11 +687,9 @@ static int linkAndVerify() {
Dyld.registerEHFrames();
int ErrorCode = checkAllExpressions(Checker);
- if (Dyld.hasError()) {
- errs() << "RTDyld reported an error applying relocations:\n "
- << Dyld.getErrorString() << "\n";
- ErrorCode = 1;
- }
+ if (Dyld.hasError())
+ return Error("RTDyld reported an error applying relocations:\n " +
+ Dyld.getErrorString());
return ErrorCode;
}
diff --git a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
index 6a1a248..99d2afd 100644
--- a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -612,7 +612,8 @@ struct CmpModifier: public Modifier {
}
Value *V = CmpInst::Create(fp ? Instruction::FCmp : Instruction::ICmp,
- op, Val0, Val1, "Cmp", BB->getTerminator());
+ (CmpInst::Predicate)op, Val0, Val1, "Cmp",
+ BB->getTerminator());
return PT->push_back(V);
}
};
@@ -666,7 +667,7 @@ static void IntroduceControlFlow(Function *F, Random &R) {
for (auto *Instr : BoolInst) {
BasicBlock *Curr = Instr->getParent();
- BasicBlock::iterator Loc = Instr;
+ BasicBlock::iterator Loc = Instr->getIterator();
BasicBlock *Next = Curr->splitBasicBlock(Loc, "CF");
Instr->moveBefore(Curr->getTerminator());
if (Curr != &F->getEntryBlock()) {
diff --git a/contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp b/contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
deleted file mode 100644
index c57c219..0000000
--- a/contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ /dev/null
@@ -1,532 +0,0 @@
-//===-- LLVMSymbolize.cpp -------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation for LLVM symbolization library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMSymbolize.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Config/config.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/PDB/PDB.h"
-#include "llvm/DebugInfo/PDB/PDBContext.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/SymbolSize.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compression.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include <sstream>
-#include <stdlib.h>
-
-#if defined(_MSC_VER)
-#include <Windows.h>
-#include <DbgHelp.h>
-#pragma comment(lib, "dbghelp.lib")
-#endif
-
-namespace llvm {
-namespace symbolize {
-
-static bool error(std::error_code ec) {
- if (!ec)
- return false;
- errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
- return true;
-}
-
-static DILineInfoSpecifier
-getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) {
- return DILineInfoSpecifier(
- DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
- Opts.PrintFunctions);
-}
-
-ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
- : Module(Obj), DebugInfoContext(DICtx) {
- std::unique_ptr<DataExtractor> OpdExtractor;
- uint64_t OpdAddress = 0;
- // Find the .opd (function descriptor) section if any, for big-endian
- // PowerPC64 ELF.
- if (Module->getArch() == Triple::ppc64) {
- for (section_iterator Section : Module->sections()) {
- StringRef Name;
- if (!error(Section->getName(Name)) && Name == ".opd") {
- StringRef Data;
- if (!error(Section->getContents(Data))) {
- OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(),
- Module->getBytesInAddress()));
- OpdAddress = Section->getAddress();
- }
- break;
- }
- }
- }
- std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
- computeSymbolSizes(*Module);
- for (auto &P : Symbols)
- addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
-}
-
-void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
- DataExtractor *OpdExtractor, uint64_t OpdAddress) {
- SymbolRef::Type SymbolType = Symbol.getType();
- if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
- return;
- ErrorOr<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
- if (error(SymbolAddressOrErr.getError()))
- return;
- uint64_t SymbolAddress = *SymbolAddressOrErr;
- if (OpdExtractor) {
- // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
- // function descriptors. The first word of the descriptor is a pointer to
- // the function's code.
- // For the purposes of symbolization, pretend the symbol's address is that
- // of the function's code, not the descriptor.
- uint64_t OpdOffset = SymbolAddress - OpdAddress;
- uint32_t OpdOffset32 = OpdOffset;
- if (OpdOffset == OpdOffset32 &&
- OpdExtractor->isValidOffsetForAddress(OpdOffset32))
- SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
- }
- ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName();
- if (error(SymbolNameOrErr.getError()))
- return;
- StringRef SymbolName = *SymbolNameOrErr;
- // Mach-O symbol table names have leading underscore, skip it.
- if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
- SymbolName = SymbolName.drop_front();
- // FIXME: If a function has alias, there are two entries in symbol table
- // with same address size. Make sure we choose the correct one.
- auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
- SymbolDesc SD = { SymbolAddress, SymbolSize };
- M.insert(std::make_pair(SD, SymbolName));
-}
-
-bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
- std::string &Name, uint64_t &Addr,
- uint64_t &Size) const {
- const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
- if (SymbolMap.empty())
- return false;
- SymbolDesc SD = { Address, Address };
- auto SymbolIterator = SymbolMap.upper_bound(SD);
- if (SymbolIterator == SymbolMap.begin())
- return false;
- --SymbolIterator;
- if (SymbolIterator->first.Size != 0 &&
- SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
- return false;
- Name = SymbolIterator->second.str();
- Addr = SymbolIterator->first.Addr;
- Size = SymbolIterator->first.Size;
- return true;
-}
-
-DILineInfo ModuleInfo::symbolizeCode(
- uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
- DILineInfo LineInfo;
- if (DebugInfoContext) {
- LineInfo = DebugInfoContext->getLineInfoForAddress(
- ModuleOffset, getDILineInfoSpecifier(Opts));
- }
- // Override function name from symbol table if necessary.
- if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
- std::string FunctionName;
- uint64_t Start, Size;
- if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
- FunctionName, Start, Size)) {
- LineInfo.FunctionName = FunctionName;
- }
- }
- return LineInfo;
-}
-
-DIInliningInfo ModuleInfo::symbolizeInlinedCode(
- uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
- DIInliningInfo InlinedContext;
-
- if (DebugInfoContext) {
- InlinedContext = DebugInfoContext->getInliningInfoForAddress(
- ModuleOffset, getDILineInfoSpecifier(Opts));
- }
- // Make sure there is at least one frame in context.
- if (InlinedContext.getNumberOfFrames() == 0) {
- InlinedContext.addFrame(DILineInfo());
- }
- // Override the function name in lower frame with name from symbol table.
- if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
- DIInliningInfo PatchedInlinedContext;
- for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
- DILineInfo LineInfo = InlinedContext.getFrame(i);
- if (i == n - 1) {
- std::string FunctionName;
- uint64_t Start, Size;
- if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
- FunctionName, Start, Size)) {
- LineInfo.FunctionName = FunctionName;
- }
- }
- PatchedInlinedContext.addFrame(LineInfo);
- }
- InlinedContext = PatchedInlinedContext;
- }
- return InlinedContext;
-}
-
-bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
- uint64_t &Start, uint64_t &Size) const {
- return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
- Size);
-}
-
-const char LLVMSymbolizer::kBadString[] = "??";
-
-std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
- uint64_t ModuleOffset) {
- ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
- if (!Info)
- return printDILineInfo(DILineInfo());
- if (Opts.PrintInlining) {
- DIInliningInfo InlinedContext =
- Info->symbolizeInlinedCode(ModuleOffset, Opts);
- uint32_t FramesNum = InlinedContext.getNumberOfFrames();
- assert(FramesNum > 0);
- std::string Result;
- for (uint32_t i = 0; i < FramesNum; i++) {
- DILineInfo LineInfo = InlinedContext.getFrame(i);
- Result += printDILineInfo(LineInfo);
- }
- return Result;
- }
- DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
- return printDILineInfo(LineInfo);
-}
-
-std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
- uint64_t ModuleOffset) {
- std::string Name = kBadString;
- uint64_t Start = 0;
- uint64_t Size = 0;
- if (Opts.UseSymbolTable) {
- if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
- if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
- Name = DemangleName(Name);
- }
- }
- std::stringstream ss;
- ss << Name << "\n" << Start << " " << Size << "\n";
- return ss.str();
-}
-
-void LLVMSymbolizer::flush() {
- DeleteContainerSeconds(Modules);
- ObjectPairForPathArch.clear();
- ObjectFileForArch.clear();
-}
-
-// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
-// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
-// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
-// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
-static
-std::string getDarwinDWARFResourceForPath(
- const std::string &Path, const std::string &Basename) {
- SmallString<16> ResourceName = StringRef(Path);
- if (sys::path::extension(Path) != ".dSYM") {
- ResourceName += ".dSYM";
- }
- sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
- sys::path::append(ResourceName, Basename);
- return ResourceName.str();
-}
-
-static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
- MemoryBuffer::getFileOrSTDIN(Path);
- if (!MB)
- return false;
- return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
-}
-
-static bool findDebugBinary(const std::string &OrigPath,
- const std::string &DebuglinkName, uint32_t CRCHash,
- std::string &Result) {
- std::string OrigRealPath = OrigPath;
-#if defined(HAVE_REALPATH)
- if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
- OrigRealPath = RP;
- free(RP);
- }
-#endif
- SmallString<16> OrigDir(OrigRealPath);
- llvm::sys::path::remove_filename(OrigDir);
- SmallString<16> DebugPath = OrigDir;
- // Try /path/to/original_binary/debuglink_name
- llvm::sys::path::append(DebugPath, DebuglinkName);
- if (checkFileCRC(DebugPath, CRCHash)) {
- Result = DebugPath.str();
- return true;
- }
- // Try /path/to/original_binary/.debug/debuglink_name
- DebugPath = OrigRealPath;
- llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
- if (checkFileCRC(DebugPath, CRCHash)) {
- Result = DebugPath.str();
- return true;
- }
- // Try /usr/lib/debug/path/to/original_binary/debuglink_name
- DebugPath = "/usr/lib/debug";
- llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
- DebuglinkName);
- if (checkFileCRC(DebugPath, CRCHash)) {
- Result = DebugPath.str();
- return true;
- }
- return false;
-}
-
-static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
- uint32_t &CRCHash) {
- if (!Obj)
- return false;
- for (const SectionRef &Section : Obj->sections()) {
- StringRef Name;
- Section.getName(Name);
- Name = Name.substr(Name.find_first_not_of("._"));
- if (Name == "gnu_debuglink") {
- StringRef Data;
- Section.getContents(Data);
- DataExtractor DE(Data, Obj->isLittleEndian(), 0);
- uint32_t Offset = 0;
- if (const char *DebugNameStr = DE.getCStr(&Offset)) {
- // 4-byte align the offset.
- Offset = (Offset + 3) & ~0x3;
- if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
- DebugName = DebugNameStr;
- CRCHash = DE.getU32(&Offset);
- return true;
- }
- }
- break;
- }
- }
- return false;
-}
-
-static
-bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
- const MachOObjectFile *Obj) {
- ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
- ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
- if (dbg_uuid.empty() || bin_uuid.empty())
- return false;
- return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
-}
-
-ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
- const MachOObjectFile *MachExeObj, const std::string &ArchName) {
- // On Darwin we may find DWARF in separate object file in
- // resource directory.
- std::vector<std::string> DsymPaths;
- StringRef Filename = sys::path::filename(ExePath);
- DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
- for (const auto &Path : Opts.DsymHints) {
- DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
- }
- for (const auto &path : DsymPaths) {
- ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(path);
- std::error_code EC = BinaryOrErr.getError();
- if (EC != errc::no_such_file_or_directory && !error(EC)) {
- OwningBinary<Binary> B = std::move(BinaryOrErr.get());
- ObjectFile *DbgObj =
- getObjectFileFromBinary(B.getBinary(), ArchName);
- const MachOObjectFile *MachDbgObj =
- dyn_cast<const MachOObjectFile>(DbgObj);
- if (!MachDbgObj) continue;
- if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) {
- addOwningBinary(std::move(B));
- return DbgObj;
- }
- }
- }
- return nullptr;
-}
-
-LLVMSymbolizer::ObjectPair
-LLVMSymbolizer::getOrCreateObjects(const std::string &Path,
- const std::string &ArchName) {
- const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
- if (I != ObjectPairForPathArch.end())
- return I->second;
- ObjectFile *Obj = nullptr;
- ObjectFile *DbgObj = nullptr;
- ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
- if (!error(BinaryOrErr.getError())) {
- OwningBinary<Binary> &B = BinaryOrErr.get();
- Obj = getObjectFileFromBinary(B.getBinary(), ArchName);
- if (!Obj) {
- ObjectPair Res = std::make_pair(nullptr, nullptr);
- ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res;
- return Res;
- }
- addOwningBinary(std::move(B));
- if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
- DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
- // Try to locate the debug binary using .gnu_debuglink section.
- if (!DbgObj) {
- std::string DebuglinkName;
- uint32_t CRCHash;
- std::string DebugBinaryPath;
- if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) &&
- findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) {
- BinaryOrErr = createBinary(DebugBinaryPath);
- if (!error(BinaryOrErr.getError())) {
- OwningBinary<Binary> B = std::move(BinaryOrErr.get());
- DbgObj = getObjectFileFromBinary(B.getBinary(), ArchName);
- addOwningBinary(std::move(B));
- }
- }
- }
- }
- if (!DbgObj)
- DbgObj = Obj;
- ObjectPair Res = std::make_pair(Obj, DbgObj);
- ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res;
- return Res;
-}
-
-ObjectFile *
-LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin,
- const std::string &ArchName) {
- if (!Bin)
- return nullptr;
- ObjectFile *Res = nullptr;
- if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
- const auto &I = ObjectFileForArch.find(
- std::make_pair(UB, ArchName));
- if (I != ObjectFileForArch.end())
- return I->second;
- ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj =
- UB->getObjectForArch(ArchName);
- if (ParsedObj) {
- Res = ParsedObj.get().get();
- ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get()));
- }
- ObjectFileForArch[std::make_pair(UB, ArchName)] = Res;
- } else if (Bin->isObject()) {
- Res = cast<ObjectFile>(Bin);
- }
- return Res;
-}
-
-ModuleInfo *
-LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
- const auto &I = Modules.find(ModuleName);
- if (I != Modules.end())
- return I->second;
- std::string BinaryName = ModuleName;
- std::string ArchName = Opts.DefaultArch;
- size_t ColonPos = ModuleName.find_last_of(':');
- // Verify that substring after colon form a valid arch name.
- if (ColonPos != std::string::npos) {
- std::string ArchStr = ModuleName.substr(ColonPos + 1);
- if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
- BinaryName = ModuleName.substr(0, ColonPos);
- ArchName = ArchStr;
- }
- }
- ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName);
-
- if (!Objects.first) {
- // Failed to find valid object file.
- Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr));
- return nullptr;
- }
- DIContext *Context = nullptr;
- if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
- // If this is a COFF object, assume it contains PDB debug information. If
- // we don't find any we will fall back to the DWARF case.
- std::unique_ptr<IPDBSession> Session;
- PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA,
- Objects.first->getFileName(), Session);
- if (Error == PDB_ErrorCode::Success) {
- Context = new PDBContext(*CoffObject, std::move(Session),
- Opts.RelativeAddresses);
- }
- }
- if (!Context)
- Context = new DWARFContextInMemory(*Objects.second);
- assert(Context);
- ModuleInfo *Info = new ModuleInfo(Objects.first, Context);
- Modules.insert(make_pair(ModuleName, Info));
- return Info;
-}
-
-std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
- // By default, DILineInfo contains "<invalid>" for function/filename it
- // cannot fetch. We replace it to "??" to make our output closer to addr2line.
- static const std::string kDILineInfoBadString = "<invalid>";
- std::stringstream Result;
- if (Opts.PrintFunctions != FunctionNameKind::None) {
- std::string FunctionName = LineInfo.FunctionName;
- if (FunctionName == kDILineInfoBadString)
- FunctionName = kBadString;
- else if (Opts.Demangle)
- FunctionName = DemangleName(FunctionName);
- Result << FunctionName << "\n";
- }
- std::string Filename = LineInfo.FileName;
- if (Filename == kDILineInfoBadString)
- Filename = kBadString;
- Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n";
- return Result.str();
-}
-
-#if !defined(_MSC_VER)
-// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
-extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
- size_t *length, int *status);
-#endif
-
-std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
-#if !defined(_MSC_VER)
- // We can spoil names of symbols with C linkage, so use an heuristic
- // approach to check if the name should be demangled.
- if (Name.substr(0, 2) != "_Z")
- return Name;
- int status = 0;
- char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
- if (status != 0)
- return Name;
- std::string Result = DemangledName;
- free(DemangledName);
- return Result;
-#else
- char DemangledName[1024] = {0};
- DWORD result = ::UnDecorateSymbolName(
- Name.c_str(), DemangledName, 1023,
- UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected
- UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
- UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications
- UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
- UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
- UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
-
- return (result == 0) ? Name : std::string(DemangledName);
-#endif
-}
-
-} // namespace symbolize
-} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.h b/contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.h
deleted file mode 100644
index be246c3..0000000
--- a/contrib/llvm/tools/llvm-symbolizer/LLVMSymbolize.h
+++ /dev/null
@@ -1,144 +0,0 @@
-//===-- LLVMSymbolize.h ----------------------------------------- C++ -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Header for LLVM symbolization library.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_SYMBOLIZER_LLVMSYMBOLIZE_H
-#define LLVM_TOOLS_LLVM_SYMBOLIZER_LLVMSYMBOLIZE_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/Object/MachOUniversal.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <map>
-#include <memory>
-#include <string>
-
-namespace llvm {
-
-typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
-using namespace object;
-
-namespace symbolize {
-
-class ModuleInfo;
-
-class LLVMSymbolizer {
-public:
- struct Options {
- FunctionNameKind PrintFunctions;
- bool UseSymbolTable : 1;
- bool PrintInlining : 1;
- bool Demangle : 1;
- bool RelativeAddresses : 1;
- std::string DefaultArch;
- std::vector<std::string> DsymHints;
- Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
- bool UseSymbolTable = true, bool PrintInlining = true,
- bool Demangle = true, bool RelativeAddresses = false,
- std::string DefaultArch = "")
- : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable),
- PrintInlining(PrintInlining), Demangle(Demangle),
- RelativeAddresses(RelativeAddresses), DefaultArch(DefaultArch) {}
- };
-
- LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
- ~LLVMSymbolizer() {
- flush();
- }
-
- // Returns the result of symbolization for module name/offset as
- // a string (possibly containing newlines).
- std::string
- symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset);
- std::string
- symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset);
- void flush();
- static std::string DemangleName(const std::string &Name);
-private:
- typedef std::pair<ObjectFile*, ObjectFile*> ObjectPair;
-
- ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName);
- ObjectFile *lookUpDsymFile(const std::string &Path, const MachOObjectFile *ExeObj,
- const std::string &ArchName);
-
- /// \brief Returns pair of pointers to object and debug object.
- ObjectPair getOrCreateObjects(const std::string &Path,
- const std::string &ArchName);
- /// \brief Returns a parsed object file for a given architecture in a
- /// universal binary (or the binary itself if it is an object file).
- ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName);
-
- std::string printDILineInfo(DILineInfo LineInfo) const;
-
- // Owns all the parsed binaries and object files.
- SmallVector<std::unique_ptr<Binary>, 4> ParsedBinariesAndObjects;
- SmallVector<std::unique_ptr<MemoryBuffer>, 4> MemoryBuffers;
- void addOwningBinary(OwningBinary<Binary> OwningBin) {
- std::unique_ptr<Binary> Bin;
- std::unique_ptr<MemoryBuffer> MemBuf;
- std::tie(Bin, MemBuf) = OwningBin.takeBinary();
- ParsedBinariesAndObjects.push_back(std::move(Bin));
- MemoryBuffers.push_back(std::move(MemBuf));
- }
-
- // Owns module info objects.
- std::map<std::string, ModuleInfo *> Modules;
- std::map<std::pair<MachOUniversalBinary *, std::string>, ObjectFile *>
- ObjectFileForArch;
- std::map<std::pair<std::string, std::string>, ObjectPair>
- ObjectPairForPathArch;
-
- Options Opts;
- static const char kBadString[];
-};
-
-class ModuleInfo {
-public:
- ModuleInfo(ObjectFile *Obj, DIContext *DICtx);
-
- DILineInfo symbolizeCode(uint64_t ModuleOffset,
- const LLVMSymbolizer::Options &Opts) const;
- DIInliningInfo symbolizeInlinedCode(
- uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const;
- bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start,
- uint64_t &Size) const;
-
-private:
- bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
- std::string &Name, uint64_t &Addr,
- uint64_t &Size) const;
- // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
- // (function descriptor) section and OpdExtractor refers to its contents.
- void addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
- DataExtractor *OpdExtractor = nullptr,
- uint64_t OpdAddress = 0);
- ObjectFile *Module;
- std::unique_ptr<DIContext> DebugInfoContext;
-
- struct SymbolDesc {
- uint64_t Addr;
- // If size is 0, assume that symbol occupies the whole memory range up to
- // the following symbol.
- uint64_t Size;
- friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
- return s1.Addr < s2.Addr;
- }
- };
- std::map<SymbolDesc, StringRef> Functions;
- std::map<SymbolDesc, StringRef> Objects;
-};
-
-} // namespace symbolize
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 9c9f3ad..e45660c 100644
--- a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -15,8 +15,9 @@
//
//===----------------------------------------------------------------------===//
-#include "LLVMSymbolize.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Support/COM.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -73,6 +74,20 @@ static cl::list<std::string>
ClDsymHint("dsym-hint", cl::ZeroOrMore,
cl::desc("Path to .dSYM bundles to search for debug info for the "
"object files"));
+static cl::opt<bool>
+ ClPrintAddress("print-address", cl::init(false),
+ cl::desc("Show address before line information"));
+
+static cl::opt<bool>
+ ClPrettyPrint("pretty-print", cl::init(false),
+ cl::desc("Make the output more human friendly"));
+
+static bool error(std::error_code ec) {
+ if (!ec)
+ return false;
+ errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
+ return true;
+}
static bool parseCommand(bool &IsData, std::string &ModuleName,
uint64_t &ModuleOffset) {
@@ -118,9 +133,7 @@ static bool parseCommand(bool &IsData, std::string &ModuleName,
// Skip delimiters and parse module offset.
pos += strspn(pos, kDelimiters);
int offset_length = strcspn(pos, kDelimiters);
- if (StringRef(pos, offset_length).getAsInteger(0, ModuleOffset))
- return false;
- return true;
+ return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset);
}
int main(int argc, char **argv) {
@@ -132,9 +145,9 @@ int main(int argc, char **argv) {
llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
cl::ParseCommandLineOptions(argc, argv, "llvm-symbolizer\n");
- LLVMSymbolizer::Options Opts(ClPrintFunctions, ClUseSymbolTable,
- ClPrintInlining, ClDemangle,
+ LLVMSymbolizer::Options Opts(ClPrintFunctions, ClUseSymbolTable, ClDemangle,
ClUseRelativeAddress, ClDefaultArch);
+
for (const auto &hint : ClDsymHint) {
if (sys::path::extension(hint) == ".dSYM") {
Opts.DsymHints.push_back(hint);
@@ -148,11 +161,28 @@ int main(int argc, char **argv) {
bool IsData = false;
std::string ModuleName;
uint64_t ModuleOffset;
+ DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
+ ClPrettyPrint);
+
while (parseCommand(IsData, ModuleName, ModuleOffset)) {
- std::string Result =
- IsData ? Symbolizer.symbolizeData(ModuleName, ModuleOffset)
- : Symbolizer.symbolizeCode(ModuleName, ModuleOffset);
- outs() << Result << "\n";
+ if (ClPrintAddress) {
+ outs() << "0x";
+ outs().write_hex(ModuleOffset);
+ StringRef Delimiter = (ClPrettyPrint == true) ? ": " : "\n";
+ outs() << Delimiter;
+ }
+ if (IsData) {
+ auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
+ Printer << (error(ResOrErr.getError()) ? DIGlobal() : ResOrErr.get());
+ } else if (ClPrintInlining) {
+ auto ResOrErr = Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset);
+ Printer << (error(ResOrErr.getError()) ? DIInliningInfo()
+ : ResOrErr.get());
+ } else {
+ auto ResOrErr = Symbolizer.symbolizeCode(ModuleName, ModuleOffset);
+ Printer << (error(ResOrErr.getError()) ? DILineInfo() : ResOrErr.get());
+ }
+ outs() << "\n";
outs().flush();
}
diff --git a/contrib/llvm/tools/macho-dump/macho-dump.cpp b/contrib/llvm/tools/macho-dump/macho-dump.cpp
deleted file mode 100644
index 39c2860..0000000
--- a/contrib/llvm/tools/macho-dump/macho-dump.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-//===-- macho-dump.cpp - Mach Object Dumping Tool -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a testing tool for use with the MC/Mach-O LLVM components.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Object/MachO.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <system_error>
-using namespace llvm;
-using namespace llvm::object;
-
-static cl::opt<std::string>
-InputFile(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-
-static cl::opt<bool>
-ShowSectionData("dump-section-data", cl::desc("Dump the contents of sections"),
- cl::init(false));
-
-///
-
-static const char *ProgramName;
-
-static void Message(const char *Type, const Twine &Msg) {
- errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
-}
-
-static int Error(const Twine &Msg) {
- Message("error", Msg);
- return 1;
-}
-
-static void Warning(const Twine &Msg) {
- Message("warning", Msg);
-}
-
-///
-
-static void DumpSegmentCommandData(StringRef Name,
- uint64_t VMAddr, uint64_t VMSize,
- uint64_t FileOffset, uint64_t FileSize,
- uint32_t MaxProt, uint32_t InitProt,
- uint32_t NumSections, uint32_t Flags) {
- outs() << " ('segment_name', '";
- outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
- outs() << " ('vm_addr', " << VMAddr << ")\n";
- outs() << " ('vm_size', " << VMSize << ")\n";
- outs() << " ('file_offset', " << FileOffset << ")\n";
- outs() << " ('file_size', " << FileSize << ")\n";
- outs() << " ('maxprot', " << MaxProt << ")\n";
- outs() << " ('initprot', " << InitProt << ")\n";
- outs() << " ('num_sections', " << NumSections << ")\n";
- outs() << " ('flags', " << Flags << ")\n";
-}
-
-static int DumpSectionData(const MachOObjectFile &Obj, unsigned Index,
- StringRef Name,
- StringRef SegmentName, uint64_t Address,
- uint64_t Size, uint32_t Offset,
- uint32_t Align, uint32_t RelocationTableOffset,
- uint32_t NumRelocationTableEntries,
- uint32_t Flags, uint32_t Reserved1,
- uint32_t Reserved2, uint64_t Reserved3 = ~0ULL) {
- outs() << " # Section " << Index << "\n";
- outs() << " (('section_name', '";
- outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
- outs() << " ('segment_name', '";
- outs().write_escaped(SegmentName, /*UseHexEscapes=*/true) << "')\n";
- outs() << " ('address', " << Address << ")\n";
- outs() << " ('size', " << Size << ")\n";
- outs() << " ('offset', " << Offset << ")\n";
- outs() << " ('alignment', " << Align << ")\n";
- outs() << " ('reloc_offset', " << RelocationTableOffset << ")\n";
- outs() << " ('num_reloc', " << NumRelocationTableEntries << ")\n";
- outs() << " ('flags', " << format("0x%x", Flags) << ")\n";
- outs() << " ('reserved1', " << Reserved1 << ")\n";
- outs() << " ('reserved2', " << Reserved2 << ")\n";
- if (Reserved3 != ~0ULL)
- outs() << " ('reserved3', " << Reserved3 << ")\n";
- outs() << " ),\n";
-
- // Dump the relocation entries.
- outs() << " ('_relocations', [\n";
- unsigned RelNum = 0;
- for (relocation_iterator I = Obj.section_rel_begin(Index),
- E = Obj.section_rel_end(Index);
- I != E; ++I, ++RelNum) {
- MachO::any_relocation_info RE = Obj.getRelocation(I->getRawDataRefImpl());
- outs() << " # Relocation " << RelNum << "\n";
- outs() << " (('word-0', " << format("0x%x", RE.r_word0) << "),\n";
- outs() << " ('word-1', " << format("0x%x", RE.r_word1) << ")),\n";
- }
- outs() << " ])\n";
-
- // Dump the section data, if requested.
- if (ShowSectionData) {
- outs() << " ('_section_data', '";
- StringRef Data = Obj.getData().substr(Offset, Size);
- for (unsigned i = 0; i != Data.size(); ++i) {
- if (i && (i % 4) == 0)
- outs() << ' ';
- outs() << hexdigit((Data[i] >> 4) & 0xF, /*LowerCase=*/true);
- outs() << hexdigit((Data[i] >> 0) & 0xF, /*LowerCase=*/true);
- }
- outs() << "')\n";
- }
-
- return 0;
-}
-
-static int DumpSegmentCommand(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::segment_command SLC = Obj.getSegmentLoadCommand(LCI);
-
- DumpSegmentCommandData(StringRef(SLC.segname, 16), SLC.vmaddr,
- SLC.vmsize, SLC.fileoff, SLC.filesize,
- SLC.maxprot, SLC.initprot, SLC.nsects, SLC.flags);
-
- // Dump the sections.
- outs() << " ('sections', [\n";
- for (unsigned i = 0; i != SLC.nsects; ++i) {
- MachO::section Sect = Obj.getSection(LCI, i);
- DumpSectionData(Obj, i, StringRef(Sect.sectname, 16),
- StringRef(Sect.segname, 16), Sect.addr,
- Sect.size, Sect.offset, Sect.align,
- Sect.reloff, Sect.nreloc, Sect.flags,
- Sect.reserved1, Sect.reserved2);
- }
- outs() << " ])\n";
-
- return 0;
-}
-
-static int DumpSegment64Command(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::segment_command_64 SLC = Obj.getSegment64LoadCommand(LCI);
- DumpSegmentCommandData(StringRef(SLC.segname, 16), SLC.vmaddr,
- SLC.vmsize, SLC.fileoff, SLC.filesize,
- SLC.maxprot, SLC.initprot, SLC.nsects, SLC.flags);
-
- // Dump the sections.
- outs() << " ('sections', [\n";
- for (unsigned i = 0; i != SLC.nsects; ++i) {
- MachO::section_64 Sect = Obj.getSection64(LCI, i);
-
- DumpSectionData(Obj, i, StringRef(Sect.sectname, 16),
- StringRef(Sect.segname, 16), Sect.addr,
- Sect.size, Sect.offset, Sect.align,
- Sect.reloff, Sect.nreloc, Sect.flags,
- Sect.reserved1, Sect.reserved2,
- Sect.reserved3);
- }
- outs() << " ])\n";
-
- return 0;
-}
-
-static void DumpSymbolTableEntryData(const MachOObjectFile &Obj,
- unsigned Index, uint32_t StringIndex,
- uint8_t Type, uint8_t SectionIndex,
- uint16_t Flags, uint64_t Value,
- StringRef StringTable) {
- const char *Name = &StringTable.data()[StringIndex];
- outs() << " # Symbol " << Index << "\n";
- outs() << " (('n_strx', " << StringIndex << ")\n";
- outs() << " ('n_type', " << format("0x%x", Type) << ")\n";
- outs() << " ('n_sect', " << uint32_t(SectionIndex) << ")\n";
- outs() << " ('n_desc', " << Flags << ")\n";
- outs() << " ('n_value', " << Value << ")\n";
- outs() << " ('_string', '" << Name << "')\n";
- outs() << " ),\n";
-}
-
-static int DumpSymtabCommand(const MachOObjectFile &Obj) {
- MachO::symtab_command SLC = Obj.getSymtabLoadCommand();
-
- outs() << " ('symoff', " << SLC.symoff << ")\n";
- outs() << " ('nsyms', " << SLC.nsyms << ")\n";
- outs() << " ('stroff', " << SLC.stroff << ")\n";
- outs() << " ('strsize', " << SLC.strsize << ")\n";
-
- // Dump the string data.
- outs() << " ('_string_data', '";
- StringRef StringTable = Obj.getStringTableData();
- outs().write_escaped(StringTable,
- /*UseHexEscapes=*/true) << "')\n";
-
- // Dump the symbol table.
- outs() << " ('_symbols', [\n";
- unsigned SymNum = 0;
- for (const SymbolRef &Symbol : Obj.symbols()) {
- DataRefImpl DRI = Symbol.getRawDataRefImpl();
- if (Obj.is64Bit()) {
- MachO::nlist_64 STE = Obj.getSymbol64TableEntry(DRI);
- DumpSymbolTableEntryData(Obj, SymNum, STE.n_strx, STE.n_type,
- STE.n_sect, STE.n_desc, STE.n_value,
- StringTable);
- } else {
- MachO::nlist STE = Obj.getSymbolTableEntry(DRI);
- DumpSymbolTableEntryData(Obj, SymNum, STE.n_strx, STE.n_type,
- STE.n_sect, STE.n_desc, STE.n_value,
- StringTable);
- }
- SymNum++;
- }
- outs() << " ])\n";
-
- return 0;
-}
-
-static int DumpDysymtabCommand(const MachOObjectFile &Obj) {
- MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
-
- outs() << " ('ilocalsym', " << DLC.ilocalsym << ")\n";
- outs() << " ('nlocalsym', " << DLC.nlocalsym << ")\n";
- outs() << " ('iextdefsym', " << DLC.iextdefsym << ")\n";
- outs() << " ('nextdefsym', " << DLC.nextdefsym << ")\n";
- outs() << " ('iundefsym', " << DLC.iundefsym << ")\n";
- outs() << " ('nundefsym', " << DLC.nundefsym << ")\n";
- outs() << " ('tocoff', " << DLC.tocoff << ")\n";
- outs() << " ('ntoc', " << DLC.ntoc << ")\n";
- outs() << " ('modtaboff', " << DLC.modtaboff << ")\n";
- outs() << " ('nmodtab', " << DLC.nmodtab << ")\n";
- outs() << " ('extrefsymoff', " << DLC.extrefsymoff << ")\n";
- outs() << " ('nextrefsyms', " << DLC.nextrefsyms << ")\n";
- outs() << " ('indirectsymoff', " << DLC.indirectsymoff << ")\n";
- outs() << " ('nindirectsyms', " << DLC.nindirectsyms << ")\n";
- outs() << " ('extreloff', " << DLC.extreloff << ")\n";
- outs() << " ('nextrel', " << DLC.nextrel << ")\n";
- outs() << " ('locreloff', " << DLC.locreloff << ")\n";
- outs() << " ('nlocrel', " << DLC.nlocrel << ")\n";
-
- // Dump the indirect symbol table.
- outs() << " ('_indirect_symbols', [\n";
- for (unsigned i = 0; i != DLC.nindirectsyms; ++i) {
- uint32_t ISTE = Obj.getIndirectSymbolTableEntry(DLC, i);
- outs() << " # Indirect Symbol " << i << "\n";
- outs() << " (('symbol_index', " << format("0x%x", ISTE) << "),),\n";
- }
- outs() << " ])\n";
-
- return 0;
-}
-
-static int
-DumpLinkeditDataCommand(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::linkedit_data_command LLC = Obj.getLinkeditDataLoadCommand(LCI);
- outs() << " ('dataoff', " << LLC.dataoff << ")\n"
- << " ('datasize', " << LLC.datasize << ")\n"
- << " ('_addresses', [\n";
-
- SmallVector<uint64_t, 8> Addresses;
- Obj.ReadULEB128s(LLC.dataoff, Addresses);
- for (unsigned i = 0, e = Addresses.size(); i != e; ++i)
- outs() << " # Address " << i << '\n'
- << " ('address', " << format("0x%x", Addresses[i]) << "),\n";
-
- outs() << " ])\n";
-
- return 0;
-}
-
-static int
-DumpDataInCodeDataCommand(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::linkedit_data_command LLC = Obj.getLinkeditDataLoadCommand(LCI);
- outs() << " ('dataoff', " << LLC.dataoff << ")\n"
- << " ('datasize', " << LLC.datasize << ")\n"
- << " ('_data_regions', [\n";
-
- unsigned NumRegions = LLC.datasize / sizeof(MachO::data_in_code_entry);
- for (unsigned i = 0; i < NumRegions; ++i) {
- MachO::data_in_code_entry DICE= Obj.getDataInCodeTableEntry(LLC.dataoff, i);
- outs() << " # DICE " << i << "\n"
- << " ('offset', " << DICE.offset << ")\n"
- << " ('length', " << DICE.length << ")\n"
- << " ('kind', " << DICE.kind << ")\n";
- }
-
- outs() <<" ])\n";
-
- return 0;
-}
-
-static int
-DumpLinkerOptionsCommand(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::linker_option_command LOLC = Obj.getLinkerOptionLoadCommand(LCI);
- outs() << " ('count', " << LOLC.count << ")\n"
- << " ('_strings', [\n";
-
- uint64_t DataSize = LOLC.cmdsize - sizeof(MachO::linker_option_command);
- const char *P = LCI.Ptr + sizeof(MachO::linker_option_command);
- StringRef Data(P, DataSize);
- for (unsigned i = 0; i != LOLC.count; ++i) {
- std::pair<StringRef,StringRef> Split = Data.split('\0');
- outs() << "\t\"";
- outs().write_escaped(Split.first);
- outs() << "\",\n";
- Data = Split.second;
- }
- outs() <<" ])\n";
-
- return 0;
-}
-
-static int
-DumpVersionMin(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::version_min_command VMLC = Obj.getVersionMinLoadCommand(LCI);
- outs() << " ('version, " << VMLC.version << ")\n"
- << " ('sdk, " << VMLC.sdk << ")\n";
- return 0;
-}
-
-static int
-DumpDylibID(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- MachO::dylib_command DLLC = Obj.getDylibIDLoadCommand(LCI);
- outs() << " ('install_name', '" << LCI.Ptr + DLLC.dylib.name << "')\n"
- << " ('timestamp, " << DLLC.dylib.timestamp << ")\n"
- << " ('cur_version, " << DLLC.dylib.current_version << ")\n"
- << " ('compat_version, " << DLLC.dylib.compatibility_version << ")\n";
- return 0;
-}
-
-static int DumpLoadCommand(const MachOObjectFile &Obj,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- switch (LCI.C.cmd) {
- case MachO::LC_SEGMENT:
- return DumpSegmentCommand(Obj, LCI);
- case MachO::LC_SEGMENT_64:
- return DumpSegment64Command(Obj, LCI);
- case MachO::LC_SYMTAB:
- return DumpSymtabCommand(Obj);
- case MachO::LC_DYSYMTAB:
- return DumpDysymtabCommand(Obj);
- case MachO::LC_CODE_SIGNATURE:
- case MachO::LC_SEGMENT_SPLIT_INFO:
- case MachO::LC_FUNCTION_STARTS:
- return DumpLinkeditDataCommand(Obj, LCI);
- case MachO::LC_DATA_IN_CODE:
- return DumpDataInCodeDataCommand(Obj, LCI);
- case MachO::LC_LINKER_OPTION:
- return DumpLinkerOptionsCommand(Obj, LCI);
- case MachO::LC_VERSION_MIN_IPHONEOS:
- case MachO::LC_VERSION_MIN_MACOSX:
- return DumpVersionMin(Obj, LCI);
- case MachO::LC_ID_DYLIB:
- return DumpDylibID(Obj, LCI);
- default:
- Warning("unknown load command: " + Twine(LCI.C.cmd));
- return 0;
- }
-}
-
-static int DumpLoadCommand(const MachOObjectFile &Obj, unsigned Index,
- const MachOObjectFile::LoadCommandInfo &LCI) {
- outs() << " # Load Command " << Index << "\n"
- << " (('command', " << LCI.C.cmd << ")\n"
- << " ('size', " << LCI.C.cmdsize << ")\n";
- int Res = DumpLoadCommand(Obj, LCI);
- outs() << " ),\n";
- return Res;
-}
-
-static void printHeader(const MachOObjectFile *Obj,
- const MachO::mach_header &Header) {
- outs() << "('cputype', " << Header.cputype << ")\n";
- outs() << "('cpusubtype', " << Header.cpusubtype << ")\n";
- outs() << "('filetype', " << Header.filetype << ")\n";
- outs() << "('num_load_commands', " << Header.ncmds << ")\n";
- outs() << "('load_commands_size', " << Header.sizeofcmds << ")\n";
- outs() << "('flag', " << Header.flags << ")\n";
-
- // Print extended header if 64-bit.
- if (Obj->is64Bit()) {
- const MachO::mach_header_64 *Header64 =
- reinterpret_cast<const MachO::mach_header_64 *>(&Header);
- outs() << "('reserved', " << Header64->reserved << ")\n";
- }
-}
-
-int main(int argc, char **argv) {
- ProgramName = argv[0];
- llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
-
- cl::ParseCommandLineOptions(argc, argv, "llvm Mach-O dumping tool\n");
-
- ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(InputFile);
- if (std::error_code EC = BinaryOrErr.getError())
- return Error("unable to read input: '" + EC.message() + "'");
- Binary &Binary = *BinaryOrErr.get().getBinary();
-
- const MachOObjectFile *InputObject = dyn_cast<MachOObjectFile>(&Binary);
- if (!InputObject)
- return Error("Not a MachO object");
-
- // Print the header
- MachO::mach_header_64 Header64;
- MachO::mach_header *Header = reinterpret_cast<MachO::mach_header*>(&Header64);
- if (InputObject->is64Bit())
- Header64 = InputObject->getHeader64();
- else
- *Header = InputObject->getHeader();
- printHeader(InputObject, *Header);
-
- // Print the load commands.
- int Res = 0;
- unsigned Index = 0;
- outs() << "('load_commands', [\n";
- for (const auto &Load : InputObject->load_commands()) {
- if (DumpLoadCommand(*InputObject, Index++, Load))
- break;
- }
- outs() << "])\n";
-
- return Res;
-}
diff --git a/contrib/llvm/tools/opt/opt.cpp b/contrib/llvm/tools/opt/opt.cpp
index 0db60d1..fe1605a 100644
--- a/contrib/llvm/tools/opt/opt.cpp
+++ b/contrib/llvm/tools/opt/opt.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LegacyPassNameParser.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
@@ -36,7 +37,6 @@
#include "llvm/LinkAllIR.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -51,6 +51,7 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
#include <memory>
using namespace llvm;
@@ -190,6 +191,11 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
cl::desc("Preserve use-list order when writing LLVM assembly."),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ RunTwice("run-twice",
+ cl::desc("Run all passes twice, re-using the same pass manager."),
+ cl::init(false), cl::Hidden);
+
static inline void addPass(legacy::PassManagerBase &PM, Pass *P) {
// Add the pass to the pass manager...
PM.add(P);
@@ -312,7 +318,6 @@ int main(int argc, char **argv) {
initializeVectorization(Registry);
initializeIPO(Registry);
initializeAnalysis(Registry);
- initializeIPA(Registry);
initializeTransformUtils(Registry);
initializeInstCombine(Registry);
initializeInstrumentation(Registry);
@@ -583,22 +588,61 @@ int main(int argc, char **argv) {
if (!NoVerify && !VerifyEach)
Passes.add(createVerifierPass());
+ // In run twice mode, we want to make sure the output is bit-by-bit
+ // equivalent if we run the pass manager again, so setup two buffers and
+ // a stream to write to them. Note that llc does something similar and it
+ // may be worth to abstract this out in the future.
+ SmallVector<char, 0> Buffer;
+ SmallVector<char, 0> CompileTwiceBuffer;
+ std::unique_ptr<raw_svector_ostream> BOS;
+ raw_ostream *OS = nullptr;
+
// Write bitcode or assembly to the output as the last step...
if (!NoOutput && !AnalyzeOnly) {
+ assert(Out);
+ OS = &Out->os();
+ if (RunTwice) {
+ BOS = make_unique<raw_svector_ostream>(Buffer);
+ OS = BOS.get();
+ }
if (OutputAssembly)
- Passes.add(
- createPrintModulePass(Out->os(), "", PreserveAssemblyUseListOrder));
+ Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder));
else
- Passes.add(
- createBitcodeWriterPass(Out->os(), PreserveBitcodeUseListOrder));
+ Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder));
}
// Before executing passes, print the final values of the LLVM options.
cl::PrintOptionValues();
+ // If requested, run all passes again with the same pass manager to catch
+ // bugs caused by persistent state in the passes
+ if (RunTwice) {
+ std::unique_ptr<Module> M2(CloneModule(M.get()));
+ Passes.run(*M2);
+ CompileTwiceBuffer = Buffer;
+ Buffer.clear();
+ }
+
// Now that we have all of the passes ready, run them.
Passes.run(*M);
+ // Compare the two outputs and make sure they're the same
+ if (RunTwice) {
+ assert(Out);
+ if (Buffer.size() != CompileTwiceBuffer.size() ||
+ (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
+ 0)) {
+ errs() << "Running the pass manager twice changed the output.\n"
+ "Writing the result of the second run to the specified output.\n"
+ "To generate the one-run comparison binary, just run without\n"
+ "the compile-twice option\n";
+ Out->os() << BOS->str();
+ Out->keep();
+ return 1;
+ }
+ Out->os() << BOS->str();
+ }
+
// Declare success.
if (!NoOutput || PrintBreakpoints)
Out->keep();
diff --git a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 0d7c5ff..4177388 100644
--- a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -294,6 +294,15 @@ public:
}
};
+class AsmVariantInfo {
+public:
+ std::string RegisterPrefix;
+ std::string TokenizingCharacters;
+ std::string SeparatorCharacters;
+ std::string BreakCharacters;
+ int AsmVariantNo;
+};
+
/// MatchableInfo - Helper class for storing the necessary information for an
/// instruction or alias which is capable of being matched.
struct MatchableInfo {
@@ -460,6 +469,20 @@ struct MatchableInfo {
TheDef->getValueAsBit("UseInstAsmMatchConverter")) {
}
+ // Could remove this and the dtor if PointerUnion supported unique_ptr
+ // elements with a dynamic failure/assertion (like the one below) in the case
+ // where it was copied while being in an owning state.
+ MatchableInfo(const MatchableInfo &RHS)
+ : AsmVariantID(RHS.AsmVariantID), AsmString(RHS.AsmString),
+ TheDef(RHS.TheDef), DefRec(RHS.DefRec), ResOperands(RHS.ResOperands),
+ Mnemonic(RHS.Mnemonic), AsmOperands(RHS.AsmOperands),
+ RequiredFeatures(RHS.RequiredFeatures),
+ ConversionFnKind(RHS.ConversionFnKind),
+ HasDeprecation(RHS.HasDeprecation),
+ UseInstAsmMatchConverter(RHS.UseInstAsmMatchConverter) {
+ assert(!DefRec.is<const CodeGenInstAlias *>());
+ }
+
~MatchableInfo() {
delete DefRec.dyn_cast<const CodeGenInstAlias*>();
}
@@ -470,18 +493,12 @@ struct MatchableInfo {
void initialize(const AsmMatcherInfo &Info,
SmallPtrSetImpl<Record*> &SingletonRegisters,
- int AsmVariantNo, std::string &RegisterPrefix);
+ AsmVariantInfo const &Variant);
/// validate - Return true if this matchable is a valid thing to match against
/// and perform a bunch of validity checking.
bool validate(StringRef CommentDelimiter, bool Hack) const;
- /// extractSingletonRegisterForAsmOperand - Extract singleton register,
- /// if present, from specified token.
- void
- extractSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info,
- std::string &RegisterPrefix);
-
/// findAsmOperand - Find the AsmOperand with the specified name and
/// suboperand index.
int findAsmOperand(StringRef N, int SubOpIdx) const {
@@ -576,8 +593,10 @@ struct MatchableInfo {
void dump() const;
private:
- void tokenizeAsmString(const AsmMatcherInfo &Info);
- void addAsmOperand(size_t Start, size_t End);
+ void tokenizeAsmString(AsmMatcherInfo const &Info,
+ AsmVariantInfo const &Variant);
+ void addAsmOperand(size_t Start, size_t End,
+ std::string const &SeparatorCharacters);
};
/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -756,8 +775,7 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
// Find the ResOperand that refers to the operand we're aliasing away
// and update it to refer to the combined operand instead.
- for (unsigned i = 0, e = ResOperands.size(); i != e; ++i) {
- ResOperand &Op = ResOperands[i];
+ for (ResOperand &Op : ResOperands) {
if (Op.Kind == ResOperand::RenderAsmOperand &&
Op.AsmOperandNum == (unsigned)SrcAsmOperand) {
Op.AsmOperandNum = DstAsmOperand;
@@ -768,8 +786,7 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
AsmOperands.erase(AsmOperands.begin() + SrcAsmOperand);
// Adjust the ResOperand references to any AsmOperands that followed
// the one we just deleted.
- for (unsigned i = 0, e = ResOperands.size(); i != e; ++i) {
- ResOperand &Op = ResOperands[i];
+ for (ResOperand &Op : ResOperands) {
switch(Op.Kind) {
default:
// Nothing to do for operands that don't reference AsmOperands.
@@ -786,26 +803,58 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
}
}
+/// extractSingletonRegisterForAsmOperand - Extract singleton register,
+/// if present, from specified token.
+static void
+extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op,
+ const AsmMatcherInfo &Info,
+ StringRef RegisterPrefix) {
+ StringRef Tok = Op.Token;
+
+ // If this token is not an isolated token, i.e., it isn't separated from
+ // other tokens (e.g. with whitespace), don't interpret it as a register name.
+ if (!Op.IsIsolatedToken)
+ return;
+
+ if (RegisterPrefix.empty()) {
+ std::string LoweredTok = Tok.lower();
+ if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
+ Op.SingletonReg = Reg->TheDef;
+ return;
+ }
+
+ if (!Tok.startswith(RegisterPrefix))
+ return;
+
+ StringRef RegName = Tok.substr(RegisterPrefix.size());
+ if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
+ Op.SingletonReg = Reg->TheDef;
+
+ // If there is no register prefix (i.e. "%" in "%eax"), then this may
+ // be some random non-register token, just ignore it.
+ return;
+}
+
void MatchableInfo::initialize(const AsmMatcherInfo &Info,
SmallPtrSetImpl<Record*> &SingletonRegisters,
- int AsmVariantNo, std::string &RegisterPrefix) {
- AsmVariantID = AsmVariantNo;
+ AsmVariantInfo const &Variant) {
+ AsmVariantID = Variant.AsmVariantNo;
AsmString =
- CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo);
+ CodeGenInstruction::FlattenAsmStringVariants(AsmString,
+ Variant.AsmVariantNo);
- tokenizeAsmString(Info);
+ tokenizeAsmString(Info, Variant);
// Compute the require features.
- std::vector<Record*> Predicates =TheDef->getValueAsListOfDefs("Predicates");
- for (unsigned i = 0, e = Predicates.size(); i != e; ++i)
+ for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
if (const SubtargetFeatureInfo *Feature =
- Info.getSubtargetFeature(Predicates[i]))
+ Info.getSubtargetFeature(Predicate))
RequiredFeatures.push_back(Feature);
// Collect singleton registers, if used.
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
- extractSingletonRegisterForAsmOperand(i, Info, RegisterPrefix);
- if (Record *Reg = AsmOperands[i].SingletonReg)
+ for (MatchableInfo::AsmOperand &Op : AsmOperands) {
+ extractSingletonRegisterForAsmOperand(Op, Info, Variant.RegisterPrefix);
+ if (Record *Reg = Op.SingletonReg)
SingletonRegisters.insert(Reg);
}
@@ -818,9 +867,9 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info,
}
/// Append an AsmOperand for the given substring of AsmString.
-void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
+void MatchableInfo::addAsmOperand(size_t Start, size_t End,
+ std::string const &Separators) {
StringRef String = AsmString;
- StringRef Separators = "[]*! \t,";
// Look for separators before and after to figure out is this token is
// isolated. Accept '$$' as that's how we escape '$'.
bool IsIsolatedToken =
@@ -831,92 +880,86 @@ void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
}
/// tokenizeAsmString - Tokenize a simplified assembly string.
-void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
+void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
+ AsmVariantInfo const &Variant) {
StringRef String = AsmString;
- unsigned Prev = 0;
- bool InTok = true;
- for (unsigned i = 0, e = String.size(); i != e; ++i) {
- switch (String[i]) {
- case '[':
- case ']':
- case '*':
- case '!':
- case ' ':
- case '\t':
- case ',':
- if (InTok) {
- addAsmOperand(Prev, i);
+ size_t Prev = 0;
+ bool InTok = false;
+ std::string Separators = Variant.TokenizingCharacters +
+ Variant.SeparatorCharacters;
+ for (size_t i = 0, e = String.size(); i != e; ++i) {
+ if(Variant.BreakCharacters.find(String[i]) != std::string::npos) {
+ if(InTok) {
+ addAsmOperand(Prev, i, Separators);
+ Prev = i;
+ }
+ InTok = true;
+ continue;
+ }
+ if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) {
+ if(InTok) {
+ addAsmOperand(Prev, i, Separators);
InTok = false;
}
- if (!isspace(String[i]) && String[i] != ',')
- addAsmOperand(i, i + 1);
+ addAsmOperand(i, i + 1, Separators);
Prev = i + 1;
- break;
-
+ continue;
+ }
+ if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) {
+ if(InTok) {
+ addAsmOperand(Prev, i, Separators);
+ InTok = false;
+ }
+ Prev = i + 1;
+ continue;
+ }
+ switch (String[i]) {
case '\\':
if (InTok) {
- addAsmOperand(Prev, i);
+ addAsmOperand(Prev, i, Separators);
InTok = false;
}
++i;
assert(i != String.size() && "Invalid quoted character");
- addAsmOperand(i, i + 1);
+ addAsmOperand(i, i + 1, Separators);
Prev = i + 1;
break;
case '$': {
- if (InTok) {
- addAsmOperand(Prev, i);
+ if (InTok && Prev != i) {
+ addAsmOperand(Prev, i, Separators);
InTok = false;
}
- // If this isn't "${", treat like a normal token.
+ // If this isn't "${", start new identifier looking like "$xxx"
if (i + 1 == String.size() || String[i + 1] != '{') {
Prev = i;
break;
}
- StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
- assert(End != String.end() && "Missing brace in operand reference!");
- size_t EndPos = End - String.begin();
- addAsmOperand(i, EndPos+1);
+ size_t EndPos = String.find('}', i);
+ assert(EndPos != StringRef::npos &&
+ "Missing brace in operand reference!");
+ addAsmOperand(i, EndPos+1, Separators);
Prev = EndPos + 1;
i = EndPos;
break;
}
-
- case '.':
- if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) {
- if (InTok)
- addAsmOperand(Prev, i);
- Prev = i;
- }
- InTok = true;
- break;
-
default:
InTok = true;
}
}
if (InTok && Prev != String.size())
- addAsmOperand(Prev, StringRef::npos);
+ addAsmOperand(Prev, StringRef::npos, Separators);
// The first token of the instruction is the mnemonic, which must be a
// simple string, not a $foo variable or a singleton register.
if (AsmOperands.empty())
PrintFatalError(TheDef->getLoc(),
"Instruction '" + TheDef->getName() + "' has no tokens");
- Mnemonic = AsmOperands[0].Token;
- if (Mnemonic.empty())
- PrintFatalError(TheDef->getLoc(),
- "Missing instruction mnemonic");
- // FIXME : Check and raise an error if it is a register.
- if (Mnemonic[0] == '$')
- PrintFatalError(TheDef->getLoc(),
- "Invalid instruction mnemonic '" + Mnemonic + "'!");
-
- // Remove the first operand, it is tracked in the mnemonic field.
- AsmOperands.erase(AsmOperands.begin());
+ assert(!AsmOperands[0].Token.empty());
+ if (AsmOperands[0].Token[0] != '$')
+ Mnemonic = AsmOperands[0].Token;
}
bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
@@ -946,8 +989,8 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
// Also, check for instructions which reference the operand multiple times;
// this implies a constraint we would not honor.
std::set<std::string> OperandNames;
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
- StringRef Tok = AsmOperands[i].Token;
+ for (const AsmOperand &Op : AsmOperands) {
+ StringRef Tok = Op.Token;
if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
PrintFatalError(TheDef->getLoc(),
"matchable with operand modifier '" + Tok +
@@ -974,38 +1017,6 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
return true;
}
-/// extractSingletonRegisterForAsmOperand - Extract singleton register,
-/// if present, from specified token.
-void MatchableInfo::
-extractSingletonRegisterForAsmOperand(unsigned OperandNo,
- const AsmMatcherInfo &Info,
- std::string &RegisterPrefix) {
- StringRef Tok = AsmOperands[OperandNo].Token;
-
- // If this token is not an isolated token, i.e., it isn't separated from
- // other tokens (e.g. with whitespace), don't interpret it as a register name.
- if (!AsmOperands[OperandNo].IsIsolatedToken)
- return;
-
- if (RegisterPrefix.empty()) {
- std::string LoweredTok = Tok.lower();
- if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
- AsmOperands[OperandNo].SingletonReg = Reg->TheDef;
- return;
- }
-
- if (!Tok.startswith(RegisterPrefix))
- return;
-
- StringRef RegName = Tok.substr(RegisterPrefix.size());
- if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
- AsmOperands[OperandNo].SingletonReg = Reg->TheDef;
-
- // If there is no register prefix (i.e. "%" in "%eax"), then this may
- // be some random non-register token, just ignore it.
- return;
-}
-
static std::string getEnumNameForToken(StringRef Str) {
std::string Res;
@@ -1364,8 +1375,15 @@ void AsmMatcherInfo::buildInfo() {
Record *AsmVariant = Target.getAsmParserVariant(VC);
std::string CommentDelimiter =
AsmVariant->getValueAsString("CommentDelimiter");
- std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
- int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
+ AsmVariantInfo Variant;
+ Variant.RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
+ Variant.TokenizingCharacters =
+ AsmVariant->getValueAsString("TokenizingCharacters");
+ Variant.SeparatorCharacters =
+ AsmVariant->getValueAsString("SeparatorCharacters");
+ Variant.BreakCharacters =
+ AsmVariant->getValueAsString("BreakCharacters");
+ Variant.AsmVariantNo = AsmVariant->getValueAsInt("Variant");
for (const CodeGenInstruction *CGI : Target.instructions()) {
@@ -1378,9 +1396,9 @@ void AsmMatcherInfo::buildInfo() {
if (CGI->TheDef->getValueAsBit("isCodeGenOnly"))
continue;
- std::unique_ptr<MatchableInfo> II(new MatchableInfo(*CGI));
+ auto II = llvm::make_unique<MatchableInfo>(*CGI);
- II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
+ II->initialize(*this, SingletonRegisters, Variant);
// Ignore instructions which shouldn't be matched and diagnose invalid
// instruction definitions with an error.
@@ -1396,7 +1414,8 @@ void AsmMatcherInfo::buildInfo() {
Records.getAllDerivedDefinitions("InstAlias");
for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
auto Alias = llvm::make_unique<CodeGenInstAlias>(AllInstAliases[i],
- AsmVariantNo, Target);
+ Variant.AsmVariantNo,
+ Target);
// If the tblgen -match-prefix option is specified (for tblgen hackers),
// filter the set of instruction aliases we consider, based on the target
@@ -1405,9 +1424,9 @@ void AsmMatcherInfo::buildInfo() {
.startswith( MatchPrefix))
continue;
- std::unique_ptr<MatchableInfo> II(new MatchableInfo(std::move(Alias)));
+ auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
- II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
+ II->initialize(*this, SingletonRegisters, Variant);
// Validate the alias definitions.
II->validate(CommentDelimiter, false);
@@ -1434,7 +1453,7 @@ void AsmMatcherInfo::buildInfo() {
StringRef Token = Op.Token;
// Check for singleton registers.
- if (Record *RegRecord = II->AsmOperands[i].SingletonReg) {
+ if (Record *RegRecord = Op.SingletonReg) {
Op.Class = RegisterClasses[RegRecord];
assert(Op.Class && Op.Class->Registers.size() == 1 &&
"Unexpected class for singleton register");
@@ -1474,7 +1493,7 @@ void AsmMatcherInfo::buildInfo() {
II->TheDef->getValueAsString("TwoOperandAliasConstraint");
if (Constraint != "") {
// Start by making a copy of the original matchable.
- std::unique_ptr<MatchableInfo> AliasII(new MatchableInfo(*II));
+ auto AliasII = llvm::make_unique<MatchableInfo>(*II);
// Adjust it to be a two-operand alias.
AliasII->formTwoOperandAlias(Constraint);
@@ -1494,8 +1513,7 @@ void AsmMatcherInfo::buildInfo() {
// information.
std::vector<Record*> AllTokenAliases =
Records.getAllDerivedDefinitions("TokenAlias");
- for (unsigned i = 0, e = AllTokenAliases.size(); i != e; ++i) {
- Record *Rec = AllTokenAliases[i];
+ for (Record *Rec : AllTokenAliases) {
ClassInfo *FromClass = getTokenClass(Rec->getValueAsString("FromToken"));
ClassInfo *ToClass = getTokenClass(Rec->getValueAsString("ToToken"));
if (FromClass == ToClass)
@@ -1603,9 +1621,7 @@ void MatchableInfo::buildInstructionResultOperands() {
// Loop over all operands of the result instruction, determining how to
// populate them.
- for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
- const CGIOperandList::OperandInfo &OpInfo = ResultInst->Operands[i];
-
+ for (const CGIOperandList::OperandInfo &OpInfo : ResultInst->Operands) {
// If this is a tied operand, just copy from the previously handled operand.
int TiedOp = -1;
if (OpInfo.MINumOperands == 1)
@@ -1701,7 +1717,7 @@ void MatchableInfo::buildAliasResultOperands() {
}
static unsigned getConverterOperandID(const std::string &Name,
- SetVector<std::string> &Table,
+ SmallSetVector<std::string, 16> &Table,
bool &IsNew) {
IsNew = Table.insert(Name);
@@ -1717,8 +1733,8 @@ static unsigned getConverterOperandID(const std::string &Name,
static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
std::vector<std::unique_ptr<MatchableInfo>> &Infos,
raw_ostream &OS) {
- SetVector<std::string> OperandConversionKinds;
- SetVector<std::string> InstructionConversionKinds;
+ SmallSetVector<std::string, 16> OperandConversionKinds;
+ SmallSetVector<std::string, 16> InstructionConversionKinds;
std::vector<std::vector<uint8_t> > ConversionTable;
size_t MaxRowLength = 2; // minimum is custom converter plus terminator.
@@ -1850,7 +1866,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Add the operand entry to the instruction kind conversion row.
ConversionRow.push_back(ID);
- ConversionRow.push_back(OpInfo.AsmOperandNum + 1);
+ ConversionRow.push_back(OpInfo.AsmOperandNum);
if (!IsNewConverter)
break;
@@ -1959,7 +1975,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
continue;
// Add the row to the table.
- ConversionTable.push_back(ConversionRow);
+ ConversionTable.push_back(std::move(ConversionRow));
}
// Finish up the converter driver function.
@@ -1979,10 +1995,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Output the instruction conversion kind enum.
OS << "enum InstructionConversionKind {\n";
- for (SetVector<std::string>::const_iterator
- i = InstructionConversionKinds.begin(),
- e = InstructionConversionKinds.end(); i != e; ++i)
- OS << " " << *i << ",\n";
+ for (const std::string &Signature : InstructionConversionKinds)
+ OS << " " << Signature << ",\n";
OS << " CVT_NUM_SIGNATURES\n";
OS << "};\n\n";
@@ -2105,12 +2119,7 @@ static void emitIsSubclass(CodeGenTarget &Target,
OS << " if (A == B)\n";
OS << " return true;\n\n";
- std::string OStr;
- raw_string_ostream SS(OStr);
- unsigned Count = 0;
- SS << " switch (A) {\n";
- SS << " default:\n";
- SS << " return false;\n";
+ bool EmittedSwitch = false;
for (const auto &A : Infos) {
std::vector<StringRef> SuperClasses;
for (const auto &B : Infos) {
@@ -2120,33 +2129,38 @@ static void emitIsSubclass(CodeGenTarget &Target,
if (SuperClasses.empty())
continue;
- ++Count;
- SS << "\n case " << A.Name << ":\n";
+ // If this is the first SuperClass, emit the switch header.
+ if (!EmittedSwitch) {
+ OS << " switch (A) {\n";
+ OS << " default:\n";
+ OS << " return false;\n";
+ EmittedSwitch = true;
+ }
+
+ OS << "\n case " << A.Name << ":\n";
if (SuperClasses.size() == 1) {
- SS << " return B == " << SuperClasses.back().str() << ";\n";
+ OS << " return B == " << SuperClasses.back() << ";\n";
continue;
}
if (!SuperClasses.empty()) {
- SS << " switch (B) {\n";
- SS << " default: return false;\n";
- for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
- SS << " case " << SuperClasses[i].str() << ": return true;\n";
- SS << " }\n";
+ OS << " switch (B) {\n";
+ OS << " default: return false;\n";
+ for (StringRef SC : SuperClasses)
+ OS << " case " << SC << ": return true;\n";
+ OS << " }\n";
} else {
// No case statement to emit
- SS << " return false;\n";
+ OS << " return false;\n";
}
}
- SS << " }\n";
+ OS << " }\n";
- // If there were case statements emitted into the string stream, write them
- // to the output stream, otherwise write the default.
- if (Count)
- OS << SS.str();
- else
+ // If there were case statements emitted into the string stream write the
+ // default.
+ if (!EmittedSwitch)
OS << " return false;\n";
OS << "}\n\n";
@@ -2575,13 +2589,17 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
OS << " // Get the next operand index.\n";
- OS << " unsigned NextOpNum = Operands.size()-1;\n";
+ OS << " unsigned NextOpNum = Operands.size();\n";
// Emit code to search the table.
OS << " // Search the table.\n";
OS << " std::pair<const OperandMatchEntry*, const OperandMatchEntry*>";
- OS << " MnemonicRange =\n";
- OS << " std::equal_range(OperandMatchTable, OperandMatchTable+"
+ OS << " MnemonicRange\n";
+ OS << " (OperandMatchTable, OperandMatchTable+";
+ OS << Info.OperandMatchInfo.size() << ");\n";
+ OS << " if(!Mnemonic.empty())\n";
+ OS << " MnemonicRange = std::equal_range(OperandMatchTable,";
+ OS << " OperandMatchTable+"
<< Info.OperandMatchInfo.size() << ", Mnemonic,\n"
<< " LessOpcodeOperand());\n\n";
@@ -2682,7 +2700,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< "&Operands);\n";
OS << " void convertToMapAndConstraints(unsigned Kind,\n ";
OS << " const OperandVector &Operands) override;\n";
- OS << " bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) override;\n";
+ OS << " bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n";
OS << " unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
<< " MCInst &Inst,\n"
<< " uint64_t &ErrorInfo,"
@@ -2891,8 +2909,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< " bool matchingInlineAsm, unsigned VariantID) {\n";
OS << " // Eliminate obvious mismatches.\n";
- OS << " if (Operands.size() > " << (MaxNumOperands+1) << ") {\n";
- OS << " ErrorInfo = " << (MaxNumOperands+1) << ";\n";
+ OS << " if (Operands.size() > " << MaxNumOperands << ") {\n";
+ OS << " ErrorInfo = " << MaxNumOperands << ";\n";
OS << " return Match_InvalidOperand;\n";
OS << " }\n\n";
@@ -2901,7 +2919,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
OS << " // Get the instruction mnemonic, which is the first token.\n";
- OS << " StringRef Mnemonic = ((" << Target.getName()
+ OS << " StringRef Mnemonic;\n";
+ OS << " if (Operands[0]->isToken())\n";
+ OS << " Mnemonic = ((" << Target.getName()
<< "Operand&)*Operands[0]).getToken();\n\n";
if (HasMnemonicAliases) {
@@ -2932,8 +2952,11 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
}
OS << " }\n";
OS << " // Search the table.\n";
- OS << " std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
- OS << " std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n";
+ OS << " std::pair<const MatchEntry*, const MatchEntry*> "
+ "MnemonicRange(Start, End);\n";
+ OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n";
+ OS << " if (!Mnemonic.empty())\n";
+ OS << " MnemonicRange = std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n";
OS << " // Return a more specific error code if no mnemonics match.\n";
OS << " if (MnemonicRange.first == MnemonicRange.second)\n";
@@ -2943,28 +2966,23 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< "*ie = MnemonicRange.second;\n";
OS << " it != ie; ++it) {\n";
- OS << " // equal_range guarantees that instruction mnemonic matches.\n";
- OS << " assert(Mnemonic == it->getMnemonic());\n";
-
// Emit check that the subclasses match.
OS << " bool OperandsValid = true;\n";
- OS << " for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
- OS << " if (i + 1 >= Operands.size()) {\n";
- OS << " OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
- OS << " if (!OperandsValid) ErrorInfo = i + 1;\n";
+ OS << " for (unsigned i = SIndex; i != " << MaxNumOperands << "; ++i) {\n";
+ OS << " auto Formal = static_cast<MatchClassKind>(it->Classes[i]);\n";
+ OS << " if (i >= Operands.size()) {\n";
+ OS << " OperandsValid = (Formal == " <<"InvalidMatchClass);\n";
+ OS << " if (!OperandsValid) ErrorInfo = i;\n";
OS << " break;\n";
OS << " }\n";
- OS << " unsigned Diag = validateOperandClass(*Operands[i+1],\n";
- OS.indent(43);
- OS << "(MatchClassKind)it->Classes[i]);\n";
+ OS << " MCParsedAsmOperand &Actual = *Operands[i];\n";
+ OS << " unsigned Diag = validateOperandClass(Actual, Formal);\n";
OS << " if (Diag == Match_Success)\n";
OS << " continue;\n";
OS << " // If the generic handler indicates an invalid operand\n";
OS << " // failure, check for a special case.\n";
OS << " if (Diag == Match_InvalidOperand) {\n";
- OS << " Diag = validateTargetOperandClass(*Operands[i+1],\n";
- OS.indent(43);
- OS << "(MatchClassKind)it->Classes[i]);\n";
+ OS << " Diag = validateTargetOperandClass(Actual, Formal);\n";
OS << " if (Diag == Match_Success)\n";
OS << " continue;\n";
OS << " }\n";
@@ -2973,8 +2991,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " // If we already had a match that only failed due to a\n";
OS << " // target predicate, that diagnostic is preferred.\n";
OS << " if (!HadMatchOtherThanPredicate &&\n";
- OS << " (it == MnemonicRange.first || ErrorInfo <= i+1)) {\n";
- OS << " ErrorInfo = i+1;\n";
+ OS << " (it == MnemonicRange.first || ErrorInfo <= i)) {\n";
+ OS << " ErrorInfo = i;\n";
OS << " // InvalidOperand is the default. Prefer specificity.\n";
OS << " if (Diag != Match_InvalidOperand)\n";
OS << " RetCode = Diag;\n";
@@ -3029,7 +3047,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (HasDeprecation) {
OS << " std::string Info;\n";
- OS << " if (MII.get(Inst.getOpcode()).getDeprecatedInfo(Inst, STI, Info)) {\n";
+ OS << " if (MII.get(Inst.getOpcode()).getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
OS << " SMLoc Loc = ((" << Target.getName()
<< "Operand&)*Operands[0]).getStartLoc();\n";
OS << " getParser().Warning(Loc, Info, None);\n";
diff --git a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 8163f68..cc74f9e 100644
--- a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -586,6 +586,8 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
const auto &Registers = Target.getRegBank().getRegisters();
std::vector<Record*> AltNameIndices = Target.getRegAltNameIndices();
bool hasAltNames = AltNameIndices.size() > 1;
+ std::string Namespace =
+ Registers.front().TheDef->getValueAsString("Namespace");
O <<
"\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
@@ -610,9 +612,9 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
O << " switch(AltIdx) {\n"
<< " default: llvm_unreachable(\"Invalid register alt name index!\");\n";
for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) {
- std::string Namespace = AltNameIndices[1]->getValueAsString("Namespace");
std::string AltName(AltNameIndices[i]->getName());
- O << " case " << Namespace << "::" << AltName << ":\n"
+ std::string Prefix = !Namespace.empty() ? Namespace + "::" : "";
+ O << " case " << Prefix << AltName << ":\n"
<< " assert(*(AsmStrs" << AltName << "+RegAsmOffset"
<< AltName << "[RegNo-1]) &&\n"
<< " \"Invalid alt name index for register!\");\n"
@@ -727,7 +729,6 @@ public:
++I;
}
}
- OS.flush();
// Emit the string.
O.indent(6) << "AsmString = \"" << OutString << "\";\n";
@@ -736,14 +737,13 @@ public:
O.indent(4) << '}';
}
- bool operator==(const IAPrinter &RHS) {
+ bool operator==(const IAPrinter &RHS) const {
if (Conds.size() != RHS.Conds.size())
return false;
unsigned Idx = 0;
- for (std::vector<std::string>::iterator
- I = Conds.begin(), E = Conds.end(); I != E; ++I)
- if (*I != RHS.Conds[Idx++])
+ for (const auto &str : Conds)
+ if (str != RHS.Conds[Idx++])
return false;
return true;
@@ -762,12 +762,12 @@ static unsigned CountNumOperands(StringRef AsmString, unsigned Variant) {
namespace {
struct AliasPriorityComparator {
- typedef std::pair<CodeGenInstAlias *, int> ValueType;
+ typedef std::pair<CodeGenInstAlias, int> ValueType;
bool operator()(const ValueType &LHS, const ValueType &RHS) {
if (LHS.second == RHS.second) {
// We don't actually care about the order, but for consistency it
// shouldn't depend on pointer comparisons.
- return LHS.first->TheDef->getName() < RHS.first->TheDef->getName();
+ return LHS.first.TheDef->getName() < RHS.first.TheDef->getName();
}
// Aliases with larger priorities should be considered first.
@@ -796,12 +796,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
Records.getAllDerivedDefinitions("InstAlias");
// Create a map from the qualified name to a list of potential matches.
- typedef std::set<std::pair<CodeGenInstAlias*, int>, AliasPriorityComparator>
+ typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator>
AliasWithPriority;
std::map<std::string, AliasWithPriority> AliasMap;
for (std::vector<Record*>::iterator
I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) {
- CodeGenInstAlias *Alias = new CodeGenInstAlias(*I, Variant, Target);
const Record *R = *I;
int Priority = R->getValueAsInt("EmitPriority");
if (Priority < 1)
@@ -809,13 +808,13 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
const DagInit *DI = R->getValueAsDag("ResultInst");
const DefInit *Op = cast<DefInit>(DI->getOperator());
- AliasMap[getQualifiedName(Op->getDef())].insert(std::make_pair(Alias,
- Priority));
+ AliasMap[getQualifiedName(Op->getDef())].insert(
+ std::make_pair(CodeGenInstAlias(*I, Variant, Target), Priority));
}
// A map of which conditions need to be met for each instruction operand
// before it can be matched to the mnemonic.
- std::map<std::string, std::vector<IAPrinter*> > IAPrinterMap;
+ std::map<std::string, std::vector<IAPrinter>> IAPrinterMap;
// A list of MCOperandPredicates for all operands in use, and the reverse map
std::vector<const Record*> MCOpPredicates;
@@ -823,25 +822,24 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
for (auto &Aliases : AliasMap) {
for (auto &Alias : Aliases.second) {
- const CodeGenInstAlias *CGA = Alias.first;
- unsigned LastOpNo = CGA->ResultInstOperandIndex.size();
+ const CodeGenInstAlias &CGA = Alias.first;
+ unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
unsigned NumResultOps =
- CountNumOperands(CGA->ResultInst->AsmString, Variant);
+ CountNumOperands(CGA.ResultInst->AsmString, Variant);
// Don't emit the alias if it has more operands than what it's aliasing.
- if (NumResultOps < CountNumOperands(CGA->AsmString, Variant))
+ if (NumResultOps < CountNumOperands(CGA.AsmString, Variant))
continue;
- IAPrinter *IAP = new IAPrinter(CGA->Result->getAsString(),
- CGA->AsmString);
+ IAPrinter IAP(CGA.Result->getAsString(), CGA.AsmString);
unsigned NumMIOps = 0;
- for (auto &Operand : CGA->ResultOperands)
+ for (auto &Operand : CGA.ResultOperands)
NumMIOps += Operand.getMINumOperands();
std::string Cond;
Cond = std::string("MI->getNumOperands() == ") + llvm::utostr(NumMIOps);
- IAP->addCond(Cond);
+ IAP.addCond(Cond);
bool CantHandle = false;
@@ -849,7 +847,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
std::string Op = "MI->getOperand(" + llvm::utostr(MIOpNum) + ")";
- const CodeGenInstAlias::ResultOperand &RO = CGA->ResultOperands[i];
+ const CodeGenInstAlias::ResultOperand &RO = CGA.ResultOperands[i];
switch (RO.Kind) {
case CodeGenInstAlias::ResultOperand::K_Record: {
@@ -875,11 +873,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (Rec->isSubClassOf("RegisterOperand"))
Rec = Rec->getValueAsDef("RegClass");
if (Rec->isSubClassOf("RegisterClass")) {
- IAP->addCond(Op + ".isReg()");
+ IAP.addCond(Op + ".isReg()");
- if (!IAP->isOpMapped(ROName)) {
- IAP->addOperand(ROName, MIOpNum, PrintMethodIdx);
- Record *R = CGA->ResultOperands[i].getRecord();
+ if (!IAP.isOpMapped(ROName)) {
+ IAP.addOperand(ROName, MIOpNum, PrintMethodIdx);
+ Record *R = CGA.ResultOperands[i].getRecord();
if (R->isSubClassOf("RegisterOperand"))
R = R->getValueAsDef("RegClass");
Cond = std::string("MRI.getRegClass(") + Target.getName() + "::" +
@@ -887,12 +885,12 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
".contains(" + Op + ".getReg())";
} else {
Cond = Op + ".getReg() == MI->getOperand(" +
- llvm::utostr(IAP->getOpIndex(ROName)) + ").getReg()";
+ llvm::utostr(IAP.getOpIndex(ROName)) + ").getReg()";
}
} else {
// Assume all printable operands are desired for now. This can be
// overridden in the InstAlias instantiation if necessary.
- IAP->addOperand(ROName, MIOpNum, PrintMethodIdx);
+ IAP.addOperand(ROName, MIOpNum, PrintMethodIdx);
// There might be an additional predicate on the MCOperand
unsigned Entry = MCOpPredicateMap[Rec];
@@ -905,42 +903,41 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
break; // No conditions on this operand at all
}
Cond = Target.getName() + ClassName + "ValidateMCOperand(" +
- Op + ", " + llvm::utostr(Entry) + ")";
+ Op + ", STI, " + llvm::utostr(Entry) + ")";
}
// for all subcases of ResultOperand::K_Record:
- IAP->addCond(Cond);
+ IAP.addCond(Cond);
break;
}
case CodeGenInstAlias::ResultOperand::K_Imm: {
// Just because the alias has an immediate result, doesn't mean the
// MCInst will. An MCExpr could be present, for example.
- IAP->addCond(Op + ".isImm()");
+ IAP.addCond(Op + ".isImm()");
- Cond = Op + ".getImm() == "
- + llvm::utostr(CGA->ResultOperands[i].getImm());
- IAP->addCond(Cond);
+ Cond = Op + ".getImm() == " +
+ llvm::utostr(CGA.ResultOperands[i].getImm());
+ IAP.addCond(Cond);
break;
}
case CodeGenInstAlias::ResultOperand::K_Reg:
// If this is zero_reg, something's playing tricks we're not
// equipped to handle.
- if (!CGA->ResultOperands[i].getRegister()) {
+ if (!CGA.ResultOperands[i].getRegister()) {
CantHandle = true;
break;
}
- Cond = Op + ".getReg() == " + Target.getName() +
- "::" + CGA->ResultOperands[i].getRegister()->getName();
- IAP->addCond(Cond);
+ Cond = Op + ".getReg() == " + Target.getName() + "::" +
+ CGA.ResultOperands[i].getRegister()->getName();
+ IAP.addCond(Cond);
break;
}
- if (!IAP) break;
MIOpNum += RO.getMINumOperands();
}
if (CantHandle) continue;
- IAPrinterMap[Aliases.first].push_back(IAP);
+ IAPrinterMap[Aliases.first].push_back(std::move(IAP));
}
}
@@ -959,30 +956,26 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
std::string Cases;
raw_string_ostream CasesO(Cases);
- for (std::map<std::string, std::vector<IAPrinter*> >::iterator
- I = IAPrinterMap.begin(), E = IAPrinterMap.end(); I != E; ++I) {
- std::vector<IAPrinter*> &IAPs = I->second;
+ for (auto &Entry : IAPrinterMap) {
+ std::vector<IAPrinter> &IAPs = Entry.second;
std::vector<IAPrinter*> UniqueIAPs;
- for (std::vector<IAPrinter*>::iterator
- II = IAPs.begin(), IE = IAPs.end(); II != IE; ++II) {
- IAPrinter *LHS = *II;
+ for (auto &LHS : IAPs) {
bool IsDup = false;
- for (std::vector<IAPrinter*>::iterator
- III = IAPs.begin(), IIE = IAPs.end(); III != IIE; ++III) {
- IAPrinter *RHS = *III;
- if (LHS != RHS && *LHS == *RHS) {
+ for (const auto &RHS : IAPs) {
+ if (&LHS != &RHS && LHS == RHS) {
IsDup = true;
break;
}
}
- if (!IsDup) UniqueIAPs.push_back(LHS);
+ if (!IsDup)
+ UniqueIAPs.push_back(&LHS);
}
if (UniqueIAPs.empty()) continue;
- CasesO.indent(2) << "case " << I->first << ":\n";
+ CasesO.indent(2) << "case " << Entry.first << ":\n";
for (std::vector<IAPrinter*>::iterator
II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) {
@@ -1005,8 +998,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (!MCOpPredicates.empty())
O << "static bool " << Target.getName() << ClassName
- << "ValidateMCOperand(\n"
- << " const MCOperand &MCOp, unsigned PredicateIndex);\n";
+ << "ValidateMCOperand(const MCOperand &MCOp,\n"
+ << " const MCSubtargetInfo &STI,\n"
+ << " unsigned PredicateIndex);\n";
O << HeaderO.str();
O.indent(2) << "const char *AsmString;\n";
@@ -1078,8 +1072,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (!MCOpPredicates.empty()) {
O << "static bool " << Target.getName() << ClassName
- << "ValidateMCOperand(\n"
- << " const MCOperand &MCOp, unsigned PredicateIndex) {\n"
+ << "ValidateMCOperand(const MCOperand &MCOp,\n"
+ << " const MCSubtargetInfo &STI,\n"
+ << " unsigned PredicateIndex) {\n"
<< " switch (PredicateIndex) {\n"
<< " default:\n"
<< " llvm_unreachable(\"Unknown MCOperandPredicate kind\");\n"
diff --git a/contrib/llvm/utils/TableGen/Attributes.cpp b/contrib/llvm/utils/TableGen/Attributes.cpp
new file mode 100644
index 0000000..7b001bf
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/Attributes.cpp
@@ -0,0 +1,156 @@
+//===- Attributes.cpp - Generate attributes -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "attr-enum"
+
+namespace {
+
+class Attributes {
+public:
+ Attributes(RecordKeeper &R) : Records(R) {}
+ void emit(raw_ostream &OS);
+
+private:
+ void emitTargetIndependentEnums(raw_ostream &OS);
+ void emitFnAttrCompatCheck(raw_ostream &OS, bool IsStringAttr);
+
+ void printEnumAttrClasses(raw_ostream &OS,
+ const std::vector<Record *> &Records);
+ void printStrBoolAttrClasses(raw_ostream &OS,
+ const std::vector<Record *> &Records);
+
+ RecordKeeper &Records;
+};
+
+} // End anonymous namespace.
+
+void Attributes::emitTargetIndependentEnums(raw_ostream &OS) {
+ OS << "#ifdef GET_ATTR_ENUM\n";
+ OS << "#undef GET_ATTR_ENUM\n";
+
+ std::vector<Record*> Attrs =
+ Records.getAllDerivedDefinitions("EnumAttr");
+
+ for (auto A : Attrs)
+ OS << A->getName() << ",\n";
+
+ OS << "#endif\n";
+}
+
+void Attributes::emitFnAttrCompatCheck(raw_ostream &OS, bool IsStringAttr) {
+ OS << "#ifdef GET_ATTR_COMPAT_FUNC\n";
+ OS << "#undef GET_ATTR_COMPAT_FUNC\n";
+
+ OS << "struct EnumAttr {\n";
+ OS << " static bool isSet(const Function &Fn,\n";
+ OS << " Attribute::AttrKind Kind) {\n";
+ OS << " return Fn.hasFnAttribute(Kind);\n";
+ OS << " }\n\n";
+ OS << " static void set(Function &Fn,\n";
+ OS << " Attribute::AttrKind Kind, bool Val) {\n";
+ OS << " if (Val)\n";
+ OS << " Fn.addFnAttr(Kind);\n";
+ OS << " else\n";
+ OS << " Fn.removeFnAttr(Kind);\n";
+ OS << " }\n";
+ OS << "};\n\n";
+
+ OS << "struct StrBoolAttr {\n";
+ OS << " static bool isSet(const Function &Fn,\n";
+ OS << " StringRef Kind) {\n";
+ OS << " auto A = Fn.getFnAttribute(Kind);\n";
+ OS << " return A.getValueAsString().equals(\"true\");\n";
+ OS << " }\n\n";
+ OS << " static void set(Function &Fn,\n";
+ OS << " StringRef Kind, bool Val) {\n";
+ OS << " Fn.addFnAttr(Kind, Val ? \"true\" : \"false\");\n";
+ OS << " }\n";
+ OS << "};\n\n";
+
+ printEnumAttrClasses(OS ,Records.getAllDerivedDefinitions("EnumAttr"));
+ printStrBoolAttrClasses(OS , Records.getAllDerivedDefinitions("StrBoolAttr"));
+
+ OS << "static inline bool hasCompatibleFnAttrs(const Function &Caller,\n"
+ << " const Function &Callee) {\n";
+ OS << " bool Ret = true;\n\n";
+
+ std::vector<Record *> CompatRules =
+ Records.getAllDerivedDefinitions("CompatRule");
+
+ for (auto *Rule : CompatRules) {
+ std::string FuncName = Rule->getValueAsString("CompatFunc");
+ OS << " Ret &= " << FuncName << "(Caller, Callee);\n";
+ }
+
+ OS << "\n";
+ OS << " return Ret;\n";
+ OS << "}\n\n";
+
+ std::vector<Record *> MergeRules =
+ Records.getAllDerivedDefinitions("MergeRule");
+ OS << "static inline void mergeFnAttrs(Function &Caller,\n"
+ << " const Function &Callee) {\n";
+
+ for (auto *Rule : MergeRules) {
+ std::string FuncName = Rule->getValueAsString("MergeFunc");
+ OS << " " << FuncName << "(Caller, Callee);\n";
+ }
+
+ OS << "}\n\n";
+
+ OS << "#endif\n";
+}
+
+void Attributes::printEnumAttrClasses(raw_ostream &OS,
+ const std::vector<Record *> &Records) {
+ OS << "// EnumAttr classes\n";
+ for (const auto *R : Records) {
+ OS << "struct " << R->getName() << "Attr : EnumAttr {\n";
+ OS << " static enum Attribute::AttrKind getKind() {\n";
+ OS << " return llvm::Attribute::" << R->getName() << ";\n";
+ OS << " }\n";
+ OS << "};\n";
+ }
+ OS << "\n";
+}
+
+void Attributes::printStrBoolAttrClasses(raw_ostream &OS,
+ const std::vector<Record *> &Records) {
+ OS << "// StrBoolAttr classes\n";
+ for (const auto *R : Records) {
+ OS << "struct " << R->getName() << "Attr : StrBoolAttr {\n";
+ OS << " static const char *getKind() {\n";
+ OS << " return \"" << R->getValueAsString("AttrString") << "\";\n";
+ OS << " }\n";
+ OS << "};\n";
+ }
+ OS << "\n";
+}
+
+void Attributes::emit(raw_ostream &OS) {
+ emitTargetIndependentEnums(OS);
+ emitFnAttrCompatCheck(OS, false);
+}
+
+namespace llvm {
+
+void EmitAttributes(RecordKeeper &RK, raw_ostream &OS) {
+ Attributes(RK).emit(OS);
+}
+
+} // End llvm namespace.
diff --git a/contrib/llvm/utils/TableGen/CallingConvEmitter.cpp b/contrib/llvm/utils/TableGen/CallingConvEmitter.cpp
index c7519b3..a47662b 100644
--- a/contrib/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -181,15 +181,15 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << Size << ", ";
else
O << "\n" << IndentStr
- << " State.getMachineFunction().getTarget().getDataLayout()"
- "->getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext())),"
+ << " State.getMachineFunction().getDataLayout()."
+ "getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext())),"
" ";
if (Align)
O << Align;
else
O << "\n" << IndentStr
- << " State.getMachineFunction().getTarget().getDataLayout()"
- "->getABITypeAlignment(EVT(LocVT).getTypeForEVT(State.getContext()"
+ << " State.getMachineFunction().getDataLayout()."
+ "getABITypeAlignment(EVT(LocVT).getTypeForEVT(State.getContext()"
"))";
O << ");\n" << IndentStr
<< "State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset"
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index ae1cc0c..3ebe51e 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -14,6 +14,7 @@
#include "CodeGenDAGPatterns.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Debug.h"
@@ -84,9 +85,9 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
if (TP.hasError())
return false;
- for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
- if (!Pred || Pred(LegalTypes[i]))
- TypeVec.push_back(LegalTypes[i]);
+ for (MVT::SimpleValueType VT : LegalTypes)
+ if (!Pred || Pred(VT))
+ TypeVec.push_back(VT);
// If we have nothing that matches the predicate, bail out.
if (TypeVec.empty()) {
@@ -107,36 +108,24 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
/// hasIntegerTypes - Return true if this TypeSet contains iAny or an
/// integer value type.
bool EEVT::TypeSet::hasIntegerTypes() const {
- for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
- if (isInteger(TypeVec[i]))
- return true;
- return false;
+ return std::any_of(TypeVec.begin(), TypeVec.end(), isInteger);
}
/// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
/// a floating point value type.
bool EEVT::TypeSet::hasFloatingPointTypes() const {
- for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
- if (isFloatingPoint(TypeVec[i]))
- return true;
- return false;
+ return std::any_of(TypeVec.begin(), TypeVec.end(), isFloatingPoint);
}
/// hasScalarTypes - Return true if this TypeSet contains a scalar value type.
bool EEVT::TypeSet::hasScalarTypes() const {
- for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
- if (isScalar(TypeVec[i]))
- return true;
- return false;
+ return std::any_of(TypeVec.begin(), TypeVec.end(), isScalar);
}
/// hasVectorTypes - Return true if this TypeSet contains a vAny or a vector
/// value type.
bool EEVT::TypeSet::hasVectorTypes() const {
- for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
- if (isVector(TypeVec[i]))
- return true;
- return false;
+ return std::any_of(TypeVec.begin(), TypeVec.end(), isVector);
}
@@ -171,7 +160,7 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
return true;
}
- assert(TypeVec.size() >= 1 && InVT.TypeVec.size() >= 1 && "No unknowns");
+ assert(!TypeVec.empty() && !InVT.TypeVec.empty() && "No unknowns");
// Handle the abstract cases, seeing if we can resolve them better.
switch (TypeVec[0]) {
@@ -206,8 +195,7 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
// multiple different integer types, replace them with a single iPTR.
if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
TypeVec.size() != 1) {
- TypeVec.resize(1);
- TypeVec[0] = InVT.TypeVec[0];
+ TypeVec.assign(1, InVT.TypeVec[0]);
MadeChange = true;
}
@@ -216,25 +204,20 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
// If this is a type list and the RHS is a typelist as well, eliminate entries
// from this list that aren't in the other one.
- bool MadeChange = false;
TypeSet InputSet(*this);
- for (unsigned i = 0; i != TypeVec.size(); ++i) {
- bool InInVT = false;
- for (unsigned j = 0, e = InVT.TypeVec.size(); j != e; ++j)
- if (TypeVec[i] == InVT.TypeVec[j]) {
- InInVT = true;
- break;
- }
+ TypeVec.clear();
+ std::set_intersection(InputSet.TypeVec.begin(), InputSet.TypeVec.end(),
+ InVT.TypeVec.begin(), InVT.TypeVec.end(),
+ std::back_inserter(TypeVec));
- if (InInVT) continue;
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
- }
+ // If the intersection is the same size as the original set then we're done.
+ if (TypeVec.size() == InputSet.TypeVec.size())
+ return false;
// If we removed all of our types, we have a type contradiction.
if (!TypeVec.empty())
- return MadeChange;
+ return true;
// FIXME: Really want an SMLoc here!
TP.error("Type inference contradiction found, merging '" +
@@ -249,15 +232,16 @@ bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
// If we know nothing, then get the full set.
if (TypeVec.empty())
return FillWithPossibleTypes(TP, isInteger, "integer");
+
if (!hasFloatingPointTypes())
return false;
TypeSet InputSet(*this);
// Filter out all the fp types.
- for (unsigned i = 0; i != TypeVec.size(); ++i)
- if (!isInteger(TypeVec[i]))
- TypeVec.erase(TypeVec.begin()+i--);
+ TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+ std::not1(std::ptr_fun(isInteger))),
+ TypeVec.end());
if (TypeVec.empty()) {
TP.error("Type inference contradiction found, '" +
@@ -280,10 +264,10 @@ bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
TypeSet InputSet(*this);
- // Filter out all the fp types.
- for (unsigned i = 0; i != TypeVec.size(); ++i)
- if (!isFloatingPoint(TypeVec[i]))
- TypeVec.erase(TypeVec.begin()+i--);
+ // Filter out all the integer types.
+ TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+ std::not1(std::ptr_fun(isFloatingPoint))),
+ TypeVec.end());
if (TypeVec.empty()) {
TP.error("Type inference contradiction found, '" +
@@ -308,9 +292,9 @@ bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
TypeSet InputSet(*this);
// Filter out all the vector types.
- for (unsigned i = 0; i != TypeVec.size(); ++i)
- if (!isScalar(TypeVec[i]))
- TypeVec.erase(TypeVec.begin()+i--);
+ TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+ std::not1(std::ptr_fun(isScalar))),
+ TypeVec.end());
if (TypeVec.empty()) {
TP.error("Type inference contradiction found, '" +
@@ -333,11 +317,9 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
bool MadeChange = false;
// Filter out all the scalar types.
- for (unsigned i = 0; i != TypeVec.size(); ++i)
- if (!isVector(TypeVec[i])) {
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
- }
+ TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+ std::not1(std::ptr_fun(isVector))),
+ TypeVec.end());
if (TypeVec.empty()) {
TP.error("Type inference contradiction found, '" +
@@ -350,7 +332,7 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
/// EnforceSmallerThan - 'this' must be a smaller VT than Other. For vectors
-/// this shoud be based on the element type. Update this and other based on
+/// this should be based on the element type. Update this and other based on
/// this information.
bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
if (TP.hasError())
@@ -404,59 +386,70 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
// type size is smaller than the scalar size of the smallest type. For
// vectors, we also need to make sure that the total size is no larger than
// the size of the smallest type.
- TypeSet InputSet(Other);
- MVT Smallest = TypeVec[0];
- for (unsigned i = 0; i != Other.TypeVec.size(); ++i) {
- MVT OtherVT = Other.TypeVec[i];
- // Don't compare vector and non-vector types.
- if (OtherVT.isVector() != Smallest.isVector())
- continue;
- // The getSizeInBits() check here is only needed for vectors, but is
- // a subset of the scalar check for scalars so no need to qualify.
- if (OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits() ||
- OtherVT.getSizeInBits() < Smallest.getSizeInBits()) {
- Other.TypeVec.erase(Other.TypeVec.begin()+i--);
- MadeChange = true;
+ {
+ TypeSet InputSet(Other);
+ MVT Smallest = *std::min_element(TypeVec.begin(), TypeVec.end(),
+ [](MVT A, MVT B) {
+ return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
+ (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+ A.getSizeInBits() < B.getSizeInBits());
+ });
+
+ auto I = std::remove_if(Other.TypeVec.begin(), Other.TypeVec.end(),
+ [Smallest](MVT OtherVT) {
+ // Don't compare vector and non-vector types.
+ if (OtherVT.isVector() != Smallest.isVector())
+ return false;
+ // The getSizeInBits() check here is only needed for vectors, but is
+ // a subset of the scalar check for scalars so no need to qualify.
+ return OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits()||
+ OtherVT.getSizeInBits() < Smallest.getSizeInBits();
+ });
+ MadeChange |= I != Other.TypeVec.end(); // If we're about to remove types.
+ Other.TypeVec.erase(I, Other.TypeVec.end());
+
+ if (Other.TypeVec.empty()) {
+ TP.error("Type inference contradiction found, '" + InputSet.getName() +
+ "' has nothing larger than '" + getName() +"'!");
+ return false;
}
}
- if (Other.TypeVec.empty()) {
- TP.error("Type inference contradiction found, '" + InputSet.getName() +
- "' has nothing larger than '" + getName() +"'!");
- return false;
- }
-
// Okay, find the largest type from the other set and remove anything the
// same or smaller from the current set. We need to ensure that the scalar
// type size is larger than the scalar size of the largest type. For
// vectors, we also need to make sure that the total size is no smaller than
// the size of the largest type.
- InputSet = TypeSet(*this);
- MVT Largest = Other.TypeVec[Other.TypeVec.size()-1];
- for (unsigned i = 0; i != TypeVec.size(); ++i) {
- MVT OtherVT = TypeVec[i];
- // Don't compare vector and non-vector types.
- if (OtherVT.isVector() != Largest.isVector())
- continue;
- // The getSizeInBits() check here is only needed for vectors, but is
- // a subset of the scalar check for scalars so no need to qualify.
- if (OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
- OtherVT.getSizeInBits() > Largest.getSizeInBits()) {
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
+ {
+ TypeSet InputSet(*this);
+ MVT Largest = *std::max_element(Other.TypeVec.begin(), Other.TypeVec.end(),
+ [](MVT A, MVT B) {
+ return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
+ (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+ A.getSizeInBits() < B.getSizeInBits());
+ });
+ auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+ [Largest](MVT OtherVT) {
+ // Don't compare vector and non-vector types.
+ if (OtherVT.isVector() != Largest.isVector())
+ return false;
+ return OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
+ OtherVT.getSizeInBits() > Largest.getSizeInBits();
+ });
+ MadeChange |= I != TypeVec.end(); // If we're about to remove types.
+ TypeVec.erase(I, TypeVec.end());
+
+ if (TypeVec.empty()) {
+ TP.error("Type inference contradiction found, '" + InputSet.getName() +
+ "' has nothing smaller than '" + Other.getName() +"'!");
+ return false;
}
}
- if (TypeVec.empty()) {
- TP.error("Type inference contradiction found, '" + InputSet.getName() +
- "' has nothing smaller than '" + Other.getName() +"'!");
- return false;
- }
-
return MadeChange;
}
-/// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+/// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
/// whose element is specified by VTOperand.
bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
TreePattern &TP) {
@@ -467,24 +460,24 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
TypeSet InputSet(*this);
// Filter out all the types which don't have the right element type.
- for (unsigned i = 0; i != TypeVec.size(); ++i) {
- assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
- if (MVT(TypeVec[i]).getVectorElementType().SimpleTy != VT) {
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
- }
- }
+ auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+ [VT](MVT VVT) {
+ return VVT.getVectorElementType().SimpleTy != VT;
+ });
+ MadeChange |= I != TypeVec.end();
+ TypeVec.erase(I, TypeVec.end());
if (TypeVec.empty()) { // FIXME: Really want an SMLoc here!
TP.error("Type inference contradiction found, forcing '" +
- InputSet.getName() + "' to have a vector element");
+ InputSet.getName() + "' to have a vector element of type " +
+ getEnumName(VT));
return false;
}
return MadeChange;
}
-/// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+/// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
/// whose element is specified by VTOperand.
bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
TreePattern &TP) {
@@ -500,8 +493,7 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
if (isConcrete()) {
MVT IVT = getConcrete();
IVT = IVT.getVectorElementType();
- return MadeChange |
- VTOperand.MergeInTypeInfo(IVT.SimpleTy, TP);
+ return MadeChange || VTOperand.MergeInTypeInfo(IVT.SimpleTy, TP);
}
// If the scalar type is known, filter out vector types whose element types
@@ -511,26 +503,12 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
MVT::SimpleValueType VT = VTOperand.getConcrete();
- TypeSet InputSet(*this);
-
- // Filter out all the types which don't have the right element type.
- for (unsigned i = 0; i != TypeVec.size(); ++i) {
- assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
- if (MVT(TypeVec[i]).getVectorElementType().SimpleTy != VT) {
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
- }
- }
+ MadeChange |= EnforceVectorEltTypeIs(VT, TP);
- if (TypeVec.empty()) { // FIXME: Really want an SMLoc here!
- TP.error("Type inference contradiction found, forcing '" +
- InputSet.getName() + "' to have a vector element");
- return false;
- }
return MadeChange;
}
-/// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to be a
+/// EnforceVectorSubVectorTypeIs - 'this' is now constrained to be a
/// vector type specified by VTOperand.
bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
TreePattern &TP) {
@@ -569,13 +547,13 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
// Only keep types that have less elements than VTOperand.
TypeSet InputSet(VTOperand);
- for (unsigned i = 0; i != VTOperand.TypeVec.size(); ++i) {
- assert(isVector(VTOperand.TypeVec[i]) && "EnforceVector didn't work");
- if (MVT(VTOperand.TypeVec[i]).getVectorNumElements() >= NumElems) {
- VTOperand.TypeVec.erase(VTOperand.TypeVec.begin()+i--);
- MadeChange = true;
- }
- }
+ auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
+ [NumElems](MVT VVT) {
+ return VVT.getVectorNumElements() >= NumElems;
+ });
+ MadeChange |= I != VTOperand.TypeVec.end();
+ VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+
if (VTOperand.TypeVec.empty()) { // FIXME: Really want an SMLoc here!
TP.error("Type inference contradiction found, forcing '" +
InputSet.getName() + "' to have less vector elements than '" +
@@ -593,13 +571,13 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
// Only keep types that have more elements than 'this'.
TypeSet InputSet(*this);
- for (unsigned i = 0; i != TypeVec.size(); ++i) {
- assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
- if (MVT(TypeVec[i]).getVectorNumElements() <= NumElems) {
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
- }
- }
+ auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+ [NumElems](MVT VVT) {
+ return VVT.getVectorNumElements() <= NumElems;
+ });
+ MadeChange |= I != TypeVec.end();
+ TypeVec.erase(I, TypeVec.end());
+
if (TypeVec.empty()) { // FIXME: Really want an SMLoc here!
TP.error("Type inference contradiction found, forcing '" +
InputSet.getName() + "' to have more vector elements than '" +
@@ -611,7 +589,7 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
return MadeChange;
}
-/// EnforceVectorSameNumElts - 'this' is now constrainted to
+/// EnforceVectorSameNumElts - 'this' is now constrained to
/// be a vector with same num elements as VTOperand.
bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
TreePattern &TP) {
@@ -628,16 +606,16 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
MVT IVT = getConcrete();
unsigned NumElems = IVT.getVectorNumElements();
- // Only keep types that have same elements as VTOperand.
+ // Only keep types that have same elements as 'this'.
TypeSet InputSet(VTOperand);
- for (unsigned i = 0; i != VTOperand.TypeVec.size(); ++i) {
- assert(isVector(VTOperand.TypeVec[i]) && "EnforceVector didn't work");
- if (MVT(VTOperand.TypeVec[i]).getVectorNumElements() != NumElems) {
- VTOperand.TypeVec.erase(VTOperand.TypeVec.begin()+i--);
- MadeChange = true;
- }
- }
+ auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
+ [NumElems](MVT VVT) {
+ return VVT.getVectorNumElements() != NumElems;
+ });
+ MadeChange |= I != VTOperand.TypeVec.end();
+ VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+
if (VTOperand.TypeVec.empty()) { // FIXME: Really want an SMLoc here!
TP.error("Type inference contradiction found, forcing '" +
InputSet.getName() + "' to have same number elements as '" +
@@ -648,16 +626,16 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
MVT IVT = VTOperand.getConcrete();
unsigned NumElems = IVT.getVectorNumElements();
- // Only keep types that have same elements as 'this'.
+ // Only keep types that have same elements as VTOperand.
TypeSet InputSet(*this);
- for (unsigned i = 0; i != TypeVec.size(); ++i) {
- assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
- if (MVT(TypeVec[i]).getVectorNumElements() != NumElems) {
- TypeVec.erase(TypeVec.begin()+i--);
- MadeChange = true;
- }
- }
+ auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+ [NumElems](MVT VVT) {
+ return VVT.getVectorNumElements() != NumElems;
+ });
+ MadeChange |= I != TypeVec.end();
+ TypeVec.erase(I, TypeVec.end());
+
if (TypeVec.empty()) { // FIXME: Really want an SMLoc here!
TP.error("Type inference contradiction found, forcing '" +
InputSet.getName() + "' to have same number elements than '" +
@@ -669,15 +647,66 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
return MadeChange;
}
+/// EnforceSameSize - 'this' is now constrained to be same size as VTOperand.
+bool EEVT::TypeSet::EnforceSameSize(EEVT::TypeSet &VTOperand,
+ TreePattern &TP) {
+ if (TP.hasError())
+ return false;
+
+ bool MadeChange = false;
+
+ // If we know one of the types, it forces the other type agree.
+ if (isConcrete()) {
+ MVT IVT = getConcrete();
+ unsigned Size = IVT.getSizeInBits();
+
+ // Only keep types that have the same size as 'this'.
+ TypeSet InputSet(VTOperand);
+
+ auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
+ [&](MVT VT) {
+ return VT.getSizeInBits() != Size;
+ });
+ MadeChange |= I != VTOperand.TypeVec.end();
+ VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+
+ if (VTOperand.TypeVec.empty()) { // FIXME: Really want an SMLoc here!
+ TP.error("Type inference contradiction found, forcing '" +
+ InputSet.getName() + "' to have same size as '" +
+ getName() + "'");
+ return false;
+ }
+ } else if (VTOperand.isConcrete()) {
+ MVT IVT = VTOperand.getConcrete();
+ unsigned Size = IVT.getSizeInBits();
+
+ // Only keep types that have the same size as VTOperand.
+ TypeSet InputSet(*this);
+
+ auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+ [&](MVT VT) {
+ return VT.getSizeInBits() != Size;
+ });
+ MadeChange |= I != TypeVec.end();
+ TypeVec.erase(I, TypeVec.end());
+
+ if (TypeVec.empty()) { // FIXME: Really want an SMLoc here!
+ TP.error("Type inference contradiction found, forcing '" +
+ InputSet.getName() + "' to have same size as '" +
+ VTOperand.getName() + "'");
+ return false;
+ }
+ }
+
+ return MadeChange;
+}
+
//===----------------------------------------------------------------------===//
// Helpers for working with extended types.
/// Dependent variable map for CodeGenDAGPattern variant generation
typedef std::map<std::string, int> DepVarMap;
-/// Const iterator shorthand for DepVarMap
-typedef DepVarMap::const_iterator DepVarMap_citer;
-
static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
if (N->isLeaf()) {
if (isa<DefInit>(N->getLeafValue()))
@@ -692,9 +721,9 @@ static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
static void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
DepVarMap depcounts;
FindDepVarsOf(N, depcounts);
- for (DepVarMap_citer i = depcounts.begin(); i != depcounts.end(); ++i) {
- if (i->second > 1) // std::pair<std::string, int>
- DepVars.insert(i->first);
+ for (const std::pair<std::string, int> &Pair : depcounts) {
+ if (Pair.second > 1)
+ DepVars.insert(Pair.first);
}
}
@@ -705,9 +734,8 @@ static void DumpDepVars(MultipleUseVarSet &DepVars) {
DEBUG(errs() << "<empty set>");
} else {
DEBUG(errs() << "[ ");
- for (MultipleUseVarSet::const_iterator i = DepVars.begin(),
- e = DepVars.end(); i != e; ++i) {
- DEBUG(errs() << (*i) << " ");
+ for (const std::string &DepVar : DepVars) {
+ DEBUG(errs() << DepVar << " ");
}
DEBUG(errs() << "]");
}
@@ -771,7 +799,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
if (ClassName == "SDNode")
Result = " SDNode *N = Node;\n";
else
- Result = " " + ClassName + "*N = cast<" + ClassName + ">(Node);\n";
+ Result = " auto *N = cast<" + ClassName + ">(Node);\n";
return Result + getPredCode();
}
@@ -841,7 +869,7 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
/// pattern's predicates concatenated with "&&" operators.
///
std::string PatternToMatch::getPredicateCheck() const {
- std::string PredicateCheck;
+ SmallVector<Record *, 4> PredicateRecs;
for (Init *I : Predicates->getValues()) {
if (DefInit *Pred = dyn_cast<DefInit>(I)) {
Record *Def = Pred->getDef();
@@ -851,13 +879,20 @@ std::string PatternToMatch::getPredicateCheck() const {
#endif
llvm_unreachable("Unknown predicate type!");
}
- if (!PredicateCheck.empty())
- PredicateCheck += " && ";
- PredicateCheck += "(" + Def->getValueAsString("CondString") + ")";
+ PredicateRecs.push_back(Def);
}
}
+ // Sort so that different orders get canonicalized to the same string.
+ std::sort(PredicateRecs.begin(), PredicateRecs.end(), LessRecord());
+
+ SmallString<128> PredicateCheck;
+ for (Record *Pred : PredicateRecs) {
+ if (!PredicateCheck.empty())
+ PredicateCheck += " && ";
+ PredicateCheck += "(" + Pred->getValueAsString("CondString") + ")";
+ }
- return PredicateCheck;
+ return PredicateCheck.str();
}
//===----------------------------------------------------------------------===//
@@ -912,6 +947,10 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
ConstraintType = SDTCisSameNumEltsAs;
x.SDTCisSameNumEltsAs_Info.OtherOperandNum =
R->getValueAsInt("OtherOperandNum");
+ } else if (R->isSubClassOf("SDTCisSameSizeAs")) {
+ ConstraintType = SDTCisSameSizeAs;
+ x.SDTCisSameSizeAs_Info.OtherOperandNum =
+ R->getValueAsInt("OtherOperandNum");
} else {
PrintFatalError("Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
}
@@ -1041,6 +1080,14 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
return OtherNode->getExtType(OResNo).
EnforceVectorSameNumElts(NodeToApply->getExtType(ResNo), TP);
}
+ case SDTCisSameSizeAs: {
+ unsigned OResNo = 0;
+ TreePatternNode *OtherNode =
+ getOperandNum(x.SDTCisSameSizeAs_Info.OtherOperandNum,
+ N, NodeInfo, OResNo);
+ return OtherNode->getExtType(OResNo).
+ EnforceSameSize(NodeToApply->getExtType(ResNo), TP);
+ }
}
llvm_unreachable("Invalid ConstraintType!");
}
@@ -1091,33 +1138,32 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
// Parse the properties.
Properties = 0;
- std::vector<Record*> PropList = R->getValueAsListOfDefs("Properties");
- for (unsigned i = 0, e = PropList.size(); i != e; ++i) {
- if (PropList[i]->getName() == "SDNPCommutative") {
+ for (Record *Property : R->getValueAsListOfDefs("Properties")) {
+ if (Property->getName() == "SDNPCommutative") {
Properties |= 1 << SDNPCommutative;
- } else if (PropList[i]->getName() == "SDNPAssociative") {
+ } else if (Property->getName() == "SDNPAssociative") {
Properties |= 1 << SDNPAssociative;
- } else if (PropList[i]->getName() == "SDNPHasChain") {
+ } else if (Property->getName() == "SDNPHasChain") {
Properties |= 1 << SDNPHasChain;
- } else if (PropList[i]->getName() == "SDNPOutGlue") {
+ } else if (Property->getName() == "SDNPOutGlue") {
Properties |= 1 << SDNPOutGlue;
- } else if (PropList[i]->getName() == "SDNPInGlue") {
+ } else if (Property->getName() == "SDNPInGlue") {
Properties |= 1 << SDNPInGlue;
- } else if (PropList[i]->getName() == "SDNPOptInGlue") {
+ } else if (Property->getName() == "SDNPOptInGlue") {
Properties |= 1 << SDNPOptInGlue;
- } else if (PropList[i]->getName() == "SDNPMayStore") {
+ } else if (Property->getName() == "SDNPMayStore") {
Properties |= 1 << SDNPMayStore;
- } else if (PropList[i]->getName() == "SDNPMayLoad") {
+ } else if (Property->getName() == "SDNPMayLoad") {
Properties |= 1 << SDNPMayLoad;
- } else if (PropList[i]->getName() == "SDNPSideEffect") {
+ } else if (Property->getName() == "SDNPSideEffect") {
Properties |= 1 << SDNPSideEffect;
- } else if (PropList[i]->getName() == "SDNPMemOperand") {
+ } else if (Property->getName() == "SDNPMemOperand") {
Properties |= 1 << SDNPMemOperand;
- } else if (PropList[i]->getName() == "SDNPVariadic") {
+ } else if (Property->getName() == "SDNPVariadic") {
Properties |= 1 << SDNPVariadic;
} else {
PrintFatalError("Unknown SD Node property '" +
- PropList[i]->getName() + "' on node '" +
+ Property->getName() + "' on node '" +
R->getName() + "'!");
}
}
@@ -1138,15 +1184,15 @@ MVT::SimpleValueType SDNodeInfo::getKnownType(unsigned ResNo) const {
"We only work with nodes with zero or one result so far!");
assert(ResNo == 0 && "Only handles single result nodes so far");
- for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i) {
+ for (const SDTypeConstraint &Constraint : TypeConstraints) {
// Make sure that this applies to the correct node result.
- if (TypeConstraints[i].OperandNo >= NumResults) // FIXME: need value #
+ if (Constraint.OperandNo >= NumResults) // FIXME: need value #
continue;
- switch (TypeConstraints[i].ConstraintType) {
+ switch (Constraint.ConstraintType) {
default: break;
case SDTypeConstraint::SDTCisVT:
- return TypeConstraints[i].x.SDTCisVT_Info.VT;
+ return Constraint.x.SDTCisVT_Info.VT;
case SDTypeConstraint::SDTCisPtrTy:
return MVT::iPTR;
}
@@ -1247,8 +1293,8 @@ void TreePatternNode::print(raw_ostream &OS) const {
OS << ")";
}
- for (unsigned i = 0, e = PredicateFns.size(); i != e; ++i)
- OS << "<<P:" << PredicateFns[i].getFnName() << ">>";
+ for (const TreePredicateFn &Pred : PredicateFns)
+ OS << "<<P:" << Pred.getFnName() << ">>";
if (TransformFn)
OS << "<<X:" << TransformFn->getName() << ">>";
if (!getName().empty())
@@ -1315,8 +1361,8 @@ TreePatternNode *TreePatternNode::clone() const {
/// RemoveAllTypes - Recursively strip all the types of this tree.
void TreePatternNode::RemoveAllTypes() {
- for (unsigned i = 0, e = Types.size(); i != e; ++i)
- Types[i] = EEVT::TypeSet(); // Reset to unknown type.
+ // Reset to unknown type.
+ std::fill(Types.begin(), Types.end(), EEVT::TypeSet());
if (isLeaf()) return;
for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
getChild(i)->RemoveAllTypes();
@@ -1410,8 +1456,8 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
FragTree->UpdateNodeType(i, getExtType(i), TP);
// Transfer in the old predicates.
- for (unsigned i = 0, e = getPredicateFns().size(); i != e; ++i)
- FragTree->addPredicateFn(getPredicateFns()[i]);
+ for (const TreePredicateFn &Pred : getPredicateFns())
+ FragTree->addPredicateFn(Pred);
// Get a new copy of this fragment to stitch into here.
//delete this; // FIXME: implement refcounting!
@@ -2024,8 +2070,8 @@ void TreePattern::error(const Twine &Msg) {
}
void TreePattern::ComputeNamedNodes() {
- for (unsigned i = 0, e = Trees.size(); i != e; ++i)
- ComputeNamedNodes(Trees[i]);
+ for (TreePatternNode *Tree : Trees)
+ ComputeNamedNodes(Tree);
}
void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
@@ -2251,53 +2297,52 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
- for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
- MadeChange |= Trees[i]->ApplyTypeConstraints(*this, false);
- MadeChange |= SimplifyTree(Trees[i]);
+ for (TreePatternNode *Tree : Trees) {
+ MadeChange |= Tree->ApplyTypeConstraints(*this, false);
+ MadeChange |= SimplifyTree(Tree);
}
// If there are constraints on our named nodes, apply them.
- for (StringMap<SmallVector<TreePatternNode*,1> >::iterator
- I = NamedNodes.begin(), E = NamedNodes.end(); I != E; ++I) {
- SmallVectorImpl<TreePatternNode*> &Nodes = I->second;
+ for (auto &Entry : NamedNodes) {
+ SmallVectorImpl<TreePatternNode*> &Nodes = Entry.second;
// If we have input named node types, propagate their types to the named
// values here.
if (InNamedTypes) {
- if (!InNamedTypes->count(I->getKey())) {
- error("Node '" + std::string(I->getKey()) +
+ if (!InNamedTypes->count(Entry.getKey())) {
+ error("Node '" + std::string(Entry.getKey()) +
"' in output pattern but not input pattern");
return true;
}
const SmallVectorImpl<TreePatternNode*> &InNodes =
- InNamedTypes->find(I->getKey())->second;
+ InNamedTypes->find(Entry.getKey())->second;
// The input types should be fully resolved by now.
- for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+ for (TreePatternNode *Node : Nodes) {
// If this node is a register class, and it is the root of the pattern
// then we're mapping something onto an input register. We allow
// changing the type of the input register in this case. This allows
// us to match things like:
// def : Pat<(v1i64 (bitconvert(v2i32 DPR:$src))), (v1i64 DPR:$src)>;
- if (Nodes[i] == Trees[0] && Nodes[i]->isLeaf()) {
- DefInit *DI = dyn_cast<DefInit>(Nodes[i]->getLeafValue());
+ if (Node == Trees[0] && Node->isLeaf()) {
+ DefInit *DI = dyn_cast<DefInit>(Node->getLeafValue());
if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
DI->getDef()->isSubClassOf("RegisterOperand")))
continue;
}
- assert(Nodes[i]->getNumTypes() == 1 &&
+ assert(Node->getNumTypes() == 1 &&
InNodes[0]->getNumTypes() == 1 &&
"FIXME: cannot name multiple result nodes yet");
- MadeChange |= Nodes[i]->UpdateNodeType(0, InNodes[0]->getExtType(0),
- *this);
+ MadeChange |= Node->UpdateNodeType(0, InNodes[0]->getExtType(0),
+ *this);
}
}
// If there are multiple nodes with the same name, they must all have the
// same type.
- if (I->second.size() > 1) {
+ if (Entry.second.size() > 1) {
for (unsigned i = 0, e = Nodes.size()-1; i != e; ++i) {
TreePatternNode *N1 = Nodes[i], *N2 = Nodes[i+1];
assert(N1->getNumTypes() == 1 && N2->getNumTypes() == 1 &&
@@ -2311,8 +2356,8 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
}
bool HasUnresolvedTypes = false;
- for (unsigned i = 0, e = Trees.size(); i != e; ++i)
- HasUnresolvedTypes |= Trees[i]->ContainsUnresolvedType();
+ for (const TreePatternNode *Tree : Trees)
+ HasUnresolvedTypes |= Tree->ContainsUnresolvedType();
return !HasUnresolvedTypes;
}
@@ -2328,9 +2373,9 @@ void TreePattern::print(raw_ostream &OS) const {
if (Trees.size() > 1)
OS << "[\n";
- for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
+ for (const TreePatternNode *Tree : Trees) {
OS << "\t";
- Trees[i]->print(OS);
+ Tree->print(OS);
OS << "\n";
}
@@ -2425,14 +2470,14 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
std::vector<Record*> Fragments = Records.getAllDerivedDefinitions("PatFrag");
// First step, parse all of the fragments.
- for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
- if (OutFrags != Fragments[i]->isSubClassOf("OutPatFrag"))
+ for (Record *Frag : Fragments) {
+ if (OutFrags != Frag->isSubClassOf("OutPatFrag"))
continue;
- DagInit *Tree = Fragments[i]->getValueAsDag("Fragment");
+ DagInit *Tree = Frag->getValueAsDag("Fragment");
TreePattern *P =
- (PatternFragments[Fragments[i]] = llvm::make_unique<TreePattern>(
- Fragments[i], Tree, !Fragments[i]->isSubClassOf("OutPatFrag"),
+ (PatternFragments[Frag] = llvm::make_unique<TreePattern>(
+ Frag, Tree, !Frag->isSubClassOf("OutPatFrag"),
*this)).get();
// Validate the argument list, converting it to set, to discard duplicates.
@@ -2443,7 +2488,7 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
P->error("Cannot have unnamed 'node' values in pattern fragment!");
// Parse the operands list.
- DagInit *OpsList = Fragments[i]->getValueAsDag("Operands");
+ DagInit *OpsList = Frag->getValueAsDag("Operands");
DefInit *OpsOp = dyn_cast<DefInit>(OpsList->getOperator());
// Special cases: ops == outs == ins. Different names are used to
// improve readability.
@@ -2480,18 +2525,18 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
// If there is a node transformation corresponding to this, keep track of
// it.
- Record *Transform = Fragments[i]->getValueAsDef("OperandTransform");
+ Record *Transform = Frag->getValueAsDef("OperandTransform");
if (!getSDNodeTransform(Transform).second.empty()) // not noop xform?
P->getOnlyTree()->setTransformFn(Transform);
}
// Now that we've parsed all of the tree fragments, do a closure on them so
// that there are not references to PatFrags left inside of them.
- for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
- if (OutFrags != Fragments[i]->isSubClassOf("OutPatFrag"))
+ for (Record *Frag : Fragments) {
+ if (OutFrags != Frag->isSubClassOf("OutPatFrag"))
continue;
- TreePattern &ThePat = *PatternFragments[Fragments[i]];
+ TreePattern &ThePat = *PatternFragments[Frag];
ThePat.InlinePatternFragments();
// Infer as many types as possible. Don't worry about it if we don't infer
@@ -2815,7 +2860,7 @@ static bool InferFromPattern(CodeGenInstruction &InstInfo,
if (InstInfo.mayLoad != PatInfo.mayLoad && !InstInfo.mayLoad_Unset) {
// Allow explicitly setting mayLoad = 1, even when the pattern has no loads.
- // Some targets translate imediates to loads.
+ // Some targets translate immediates to loads.
if (!InstInfo.mayLoad) {
Error = true;
PrintError(PatDef->getLoc(), "Pattern doesn't match mayLoad = " +
@@ -3065,11 +3110,11 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
void CodeGenDAGPatterns::ParseInstructions() {
std::vector<Record*> Instrs = Records.getAllDerivedDefinitions("Instruction");
- for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
+ for (Record *Instr : Instrs) {
ListInit *LI = nullptr;
- if (isa<ListInit>(Instrs[i]->getValueInit("Pattern")))
- LI = Instrs[i]->getValueAsListInit("Pattern");
+ if (isa<ListInit>(Instr->getValueInit("Pattern")))
+ LI = Instr->getValueAsListInit("Pattern");
// If there is no pattern, only collect minimal information about the
// instruction for its operand list. We have to assume that there is one
@@ -3081,7 +3126,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
std::vector<Record*> Results;
std::vector<Record*> Operands;
- CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+ CodeGenInstruction &InstInfo = Target.getInstruction(Instr);
if (InstInfo.Operands.size() != 0) {
for (unsigned j = 0, e = InstInfo.Operands.NumDefs; j < e; ++j)
@@ -3095,12 +3140,12 @@ void CodeGenDAGPatterns::ParseInstructions() {
// Create and insert the instruction.
std::vector<Record*> ImpResults;
- Instructions.insert(std::make_pair(Instrs[i],
+ Instructions.insert(std::make_pair(Instr,
DAGInstruction(nullptr, Results, Operands, ImpResults)));
continue; // no pattern.
}
- CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]);
+ CodeGenInstruction &CGI = Target.getInstruction(Instr);
const DAGInstruction &DI = parseInstructionPattern(CGI, LI, Instructions);
(void)DI;
@@ -3108,10 +3153,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
}
// If we can, convert the instructions to be patterns that are matched!
- for (std::map<Record*, DAGInstruction, LessRecordByID>::iterator II =
- Instructions.begin(),
- E = Instructions.end(); II != E; ++II) {
- DAGInstruction &TheInst = II->second;
+ for (auto &Entry : Instructions) {
+ DAGInstruction &TheInst = Entry.second;
TreePattern *I = TheInst.getPattern();
if (!I) continue; // No pattern.
@@ -3126,7 +3169,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
SrcPattern = Pattern;
}
- Record *Instr = II->first;
+ Record *Instr = Entry.first;
AddPatternToMatch(I,
PatternToMatch(Instr,
Instr->getValueAsListInit("Predicates"),
@@ -3187,19 +3230,18 @@ void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern,
// Scan all of the named values in the destination pattern, rejecting them if
// they don't exist in the input pattern.
- for (std::map<std::string, NameRecord>::iterator
- I = DstNames.begin(), E = DstNames.end(); I != E; ++I) {
- if (SrcNames[I->first].first == nullptr)
+ for (const auto &Entry : DstNames) {
+ if (SrcNames[Entry.first].first == nullptr)
Pattern->error("Pattern has input without matching name in output: $" +
- I->first);
+ Entry.first);
}
// Scan all of the named values in the source pattern, rejecting them if the
// name isn't used in the dest, and isn't used to tie two values together.
- for (std::map<std::string, NameRecord>::iterator
- I = SrcNames.begin(), E = SrcNames.end(); I != E; ++I)
- if (DstNames[I->first].first == nullptr && SrcNames[I->first].second == 1)
- Pattern->error("Pattern has dead named input: $" + I->first);
+ for (const auto &Entry : SrcNames)
+ if (DstNames[Entry.first].first == nullptr &&
+ SrcNames[Entry.first].second == 1)
+ Pattern->error("Pattern has dead named input: $" + Entry.first);
PatternsToMatch.push_back(PTM);
}
@@ -3258,31 +3300,29 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
// Revisit instructions with undefined flags and no pattern.
if (Target.guessInstructionProperties()) {
- for (unsigned i = 0, e = Revisit.size(); i != e; ++i) {
- CodeGenInstruction &InstInfo = *Revisit[i];
- if (InstInfo.InferredFrom)
+ for (CodeGenInstruction *InstInfo : Revisit) {
+ if (InstInfo->InferredFrom)
continue;
// The mayLoad and mayStore flags default to false.
// Conservatively assume hasSideEffects if it wasn't explicit.
- if (InstInfo.hasSideEffects_Unset)
- InstInfo.hasSideEffects = true;
+ if (InstInfo->hasSideEffects_Unset)
+ InstInfo->hasSideEffects = true;
}
return;
}
// Complain about any flags that are still undefined.
- for (unsigned i = 0, e = Revisit.size(); i != e; ++i) {
- CodeGenInstruction &InstInfo = *Revisit[i];
- if (InstInfo.InferredFrom)
+ for (CodeGenInstruction *InstInfo : Revisit) {
+ if (InstInfo->InferredFrom)
continue;
- if (InstInfo.hasSideEffects_Unset)
- PrintError(InstInfo.TheDef->getLoc(),
+ if (InstInfo->hasSideEffects_Unset)
+ PrintError(InstInfo->TheDef->getLoc(),
"Can't infer hasSideEffects from patterns");
- if (InstInfo.mayStore_Unset)
- PrintError(InstInfo.TheDef->getLoc(),
+ if (InstInfo->mayStore_Unset)
+ PrintError(InstInfo->TheDef->getLoc(),
"Can't infer mayStore from patterns");
- if (InstInfo.mayLoad_Unset)
- PrintError(InstInfo.TheDef->getLoc(),
+ if (InstInfo->mayLoad_Unset)
+ PrintError(InstInfo->TheDef->getLoc(),
"Can't infer mayLoad from patterns");
}
}
@@ -3302,8 +3342,8 @@ void CodeGenDAGPatterns::VerifyInstructionFlags() {
unsigned NumSideEffects = 0;
unsigned NumStores = 0;
unsigned NumLoads = 0;
- for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
- const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+ for (const Record *Instr : Instrs) {
+ const CodeGenInstruction &InstInfo = Target.getInstruction(Instr);
NumSideEffects += InstInfo.hasSideEffects;
NumStores += InstInfo.mayStore;
NumLoads += InstInfo.mayLoad;
@@ -3335,19 +3375,19 @@ void CodeGenDAGPatterns::VerifyInstructionFlags() {
continue;
++Errors;
- for (unsigned i = 0, e = Msgs.size(); i != e; ++i)
- PrintError(PTM.getSrcRecord()->getLoc(), Twine(Msgs[i]) + " on the " +
+ for (const std::string &Msg : Msgs)
+ PrintError(PTM.getSrcRecord()->getLoc(), Twine(Msg) + " on the " +
(Instrs.size() == 1 ?
"instruction" : "output instructions"));
// Provide the location of the relevant instruction definitions.
- for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
- if (Instrs[i] != PTM.getSrcRecord())
- PrintError(Instrs[i]->getLoc(), "defined here");
- const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+ for (const Record *Instr : Instrs) {
+ if (Instr != PTM.getSrcRecord())
+ PrintError(Instr->getLoc(), "defined here");
+ const CodeGenInstruction &InstInfo = Target.getInstruction(Instr);
if (InstInfo.InferredFrom &&
InstInfo.InferredFrom != InstInfo.TheDef &&
InstInfo.InferredFrom != PTM.getSrcRecord())
- PrintError(InstInfo.InferredFrom->getLoc(), "inferred from patttern");
+ PrintError(InstInfo.InferredFrom->getLoc(), "inferred from pattern");
}
}
if (Errors)
@@ -3386,8 +3426,7 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
void CodeGenDAGPatterns::ParsePatterns() {
std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
- for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
- Record *CurPattern = Patterns[i];
+ for (Record *CurPattern : Patterns) {
DagInit *Tree = CurPattern->getValueAsDag("PatternToMatch");
// If the pattern references the null_frag, there's nothing to do.
@@ -3517,8 +3556,8 @@ static void CombineChildVariants(TreePatternNode *Orig,
CodeGenDAGPatterns &CDP,
const MultipleUseVarSet &DepVars) {
// Make sure that each operand has at least one variant to choose from.
- for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
- if (ChildVariants[i].empty())
+ for (const auto &Variants : ChildVariants)
+ if (Variants.empty())
return;
// The end result is an all-pairs construction of the resultant pattern.
@@ -3529,8 +3568,8 @@ static void CombineChildVariants(TreePatternNode *Orig,
#ifndef NDEBUG
DEBUG(if (!Idxs.empty()) {
errs() << Orig->getOperator()->getName() << ": Idxs = [ ";
- for (unsigned i = 0; i < Idxs.size(); ++i) {
- errs() << Idxs[i] << " ";
+ for (unsigned Idx : Idxs) {
+ errs() << Idx << " ";
}
errs() << "]\n";
});
@@ -3539,8 +3578,8 @@ static void CombineChildVariants(TreePatternNode *Orig,
std::vector<TreePatternNode*> NewChildren;
for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
NewChildren.push_back(ChildVariants[i][Idxs[i]]);
- TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren,
- Orig->getNumTypes());
+ auto R = llvm::make_unique<TreePatternNode>(
+ Orig->getOperator(), NewChildren, Orig->getNumTypes());
// Copy over properties.
R->setName(Orig->getName());
@@ -3551,29 +3590,19 @@ static void CombineChildVariants(TreePatternNode *Orig,
// If this pattern cannot match, do not include it as a variant.
std::string ErrString;
- if (!R->canPatternMatch(ErrString, CDP)) {
- delete R;
- } else {
- bool AlreadyExists = false;
-
- // Scan to see if this pattern has already been emitted. We can get
- // duplication due to things like commuting:
- // (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
- // which are the same pattern. Ignore the dups.
- for (unsigned i = 0, e = OutVariants.size(); i != e; ++i)
- if (R->isIsomorphicTo(OutVariants[i], DepVars)) {
- AlreadyExists = true;
- break;
- }
-
- if (AlreadyExists)
- delete R;
- else
- OutVariants.push_back(R);
- }
+ // Scan to see if this pattern has already been emitted. We can get
+ // duplication due to things like commuting:
+ // (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
+ // which are the same pattern. Ignore the dups.
+ if (R->canPatternMatch(ErrString, CDP) &&
+ std::none_of(OutVariants.begin(), OutVariants.end(),
+ [&](TreePatternNode *Variant) {
+ return R->isIsomorphicTo(Variant, DepVars);
+ }))
+ OutVariants.push_back(R.release());
// Increment indices to the next permutation by incrementing the
- // indicies from last index backward, e.g., generate the sequence
+ // indices from last index backward, e.g., generate the sequence
// [0, 0], [0, 1], [1, 0], [1, 1].
int IdxsIdx;
for (IdxsIdx = Idxs.size() - 1; IdxsIdx >= 0; --IdxsIdx) {
@@ -3724,7 +3753,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
// operands are the commutative operands, and there might be more operands
// after those.
assert(NC >= 3 &&
- "Commutative intrinsic should have at least 3 childrean!");
+ "Commutative intrinsic should have at least 3 children!");
std::vector<std::vector<TreePatternNode*> > Variants;
Variants.push_back(ChildVariants[0]); // Intrinsic id.
Variants.push_back(ChildVariants[2]);
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 9ce3cdf..76c9cef 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -132,22 +132,25 @@ namespace EEVT {
/// this an other based on this information.
bool EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP);
- /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+ /// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
/// whose element is VT.
bool EnforceVectorEltTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
- /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+ /// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
/// whose element is VT.
bool EnforceVectorEltTypeIs(MVT::SimpleValueType VT, TreePattern &TP);
- /// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to
+ /// EnforceVectorSubVectorTypeIs - 'this' is now constrained to
/// be a vector type VT.
bool EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
- /// EnforceVectorSameNumElts - 'this' is now constrainted to
+ /// EnforceVectorSameNumElts - 'this' is now constrained to
/// be a vector with same num elements as VT.
bool EnforceVectorSameNumElts(EEVT::TypeSet &VT, TreePattern &TP);
+ /// EnforceSameSize - 'this' is now constrained to be the same size as VT.
+ bool EnforceSameSize(EEVT::TypeSet &VT, TreePattern &TP);
+
bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
@@ -173,7 +176,7 @@ struct SDTypeConstraint {
enum {
SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisVec, SDTCisSameAs,
SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec,
- SDTCisSubVecOfVec, SDTCVecEltisVT, SDTCisSameNumEltsAs
+ SDTCisSubVecOfVec, SDTCVecEltisVT, SDTCisSameNumEltsAs, SDTCisSameSizeAs
} ConstraintType;
union { // The discriminated union.
@@ -201,6 +204,9 @@ struct SDTypeConstraint {
struct {
unsigned OtherOperandNum;
} SDTCisSameNumEltsAs_Info;
+ struct {
+ unsigned OtherOperandNum;
+ } SDTCisSameSizeAs_Info;
} x;
/// ApplyTypeConstraint - Given a node in a pattern, apply this type
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
index e83d503..366e8ec 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -78,6 +78,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
} else if (Rec->isSubClassOf("Operand")) {
PrintMethod = Rec->getValueAsString("PrintMethod");
OperandType = Rec->getValueAsString("OperandType");
+ OperandNamespace = Rec->getValueAsString("OperandNamespace");
// If there is an explicit encoder method, use it.
EncoderMethod = Rec->getValueAsString("EncoderMethod");
MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
diff --git a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
index f405557..7bdb7e1 100644
--- a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -60,9 +60,10 @@ namespace llvm {
IntrinsicSignature IS;
// Memory mod/ref behavior of this intrinsic.
- enum {
+ enum ModRefKind {
NoMem, ReadArgMem, ReadMem, ReadWriteArgMem, ReadWriteMem
- } ModRef;
+ };
+ ModRefKind ModRef;
/// This is set to true if the intrinsic is overloaded by its argument
/// types.
diff --git a/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp b/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp
index 48df439..f66dd08 100644
--- a/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -471,7 +471,7 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
ListInit *ColFields = InstrMapDesc.getColFields();
const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
- OS << "// "<< InstrMapDesc.getName() << "\n";
+ OS << "// "<< InstrMapDesc.getName() << "\nLLVM_READONLY\n";
OS << "int "<< InstrMapDesc.getName() << "(uint16_t Opcode";
if (ValueCols.size() > 1) {
for (Init *CF : ColFields->getValues()) {
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
index c9e6d1d..ca316e9 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -1171,20 +1171,13 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
CoveringLanes = ~0u;
for (auto &Idx : SubRegIndices) {
if (Idx.getComposites().empty()) {
+ if (Bit > 32) {
+ PrintFatalError(
+ Twine("Ran out of lanemask bits to represent subregister ")
+ + Idx.getName());
+ }
Idx.LaneMask = 1u << Bit;
- // Share bit 31 in the unlikely case there are more than 32 leafs.
- //
- // Sharing bits is harmless; it allows graceful degradation in targets
- // with more than 32 vector lanes. They simply get a limited resolution
- // view of lanes beyond the 32nd.
- //
- // See also the comment for getSubRegIndexLaneMask().
- if (Bit < 31)
- ++Bit;
- else
- // Once bit 31 is shared among multiple leafs, the 'lane' it represents
- // is no longer covering its registers.
- CoveringLanes &= ~(1u << Bit);
+ ++Bit;
} else {
Idx.LaneMask = 0;
}
@@ -1274,6 +1267,12 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
continue;
LaneMask |= SubRegIndex.LaneMask;
}
+
+ // For classes without any subregisters set LaneMask to ~0u instead of 0.
+ // This makes it easier for client code to handle classes uniformly.
+ if (LaneMask == 0)
+ LaneMask = ~0u;
+
RegClass.LaneMask = LaneMask;
}
}
@@ -1568,6 +1567,12 @@ void CodeGenRegBank::pruneUnitSets() {
&& UnitWeight == RegUnits[SuperSet.Units.back()].Weight) {
DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx
<< "\n");
+ // We can pick any of the set names for the merged set. Go for the
+ // shortest one to avoid picking the name of one of the classes that are
+ // artificially created by tablegen. So "FPR128_lo" instead of
+ // "QQQQ_with_qsub3_in_FPR128_lo".
+ if (RegUnitSets[SubIdx].Name.size() < RegUnitSets[SuperIdx].Name.size())
+ RegUnitSets[SuperIdx].Name = RegUnitSets[SubIdx].Name;
break;
}
}
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
index bc27481..c98f623 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -24,15 +24,9 @@ using namespace llvm;
#define DEBUG_TYPE "subtarget-emitter"
#ifndef NDEBUG
-static void dumpIdxVec(const IdxVec &V) {
- for (unsigned i = 0, e = V.size(); i < e; ++i) {
- dbgs() << V[i] << ", ";
- }
-}
-static void dumpIdxVec(const SmallVectorImpl<unsigned> &V) {
- for (unsigned i = 0, e = V.size(); i < e; ++i) {
- dbgs() << V[i] << ", ";
- }
+static void dumpIdxVec(ArrayRef<unsigned> V) {
+ for (unsigned Idx : V)
+ dbgs() << Idx << ", ";
}
#endif
@@ -326,9 +320,9 @@ void CodeGenSchedModels::collectSchedRW() {
}
/// Compute a SchedWrite name from a sequence of writes.
-std::string CodeGenSchedModels::genRWName(const IdxVec& Seq, bool IsRead) {
+std::string CodeGenSchedModels::genRWName(ArrayRef<unsigned> Seq, bool IsRead) {
std::string Name("(");
- for (IdxIter I = Seq.begin(), E = Seq.end(); I != E; ++I) {
+ for (auto I = Seq.begin(), E = Seq.end(); I != E; ++I) {
if (I != Seq.begin())
Name += '_';
Name += getSchedRW(*I, IsRead).Name;
@@ -457,13 +451,13 @@ void CodeGenSchedModels::expandRWSeqForProc(
}
// Find the existing SchedWrite that models this sequence of writes.
-unsigned CodeGenSchedModels::findRWForSequence(const IdxVec &Seq,
+unsigned CodeGenSchedModels::findRWForSequence(ArrayRef<unsigned> Seq,
bool IsRead) {
std::vector<CodeGenSchedRW> &RWVec = IsRead ? SchedReads : SchedWrites;
for (std::vector<CodeGenSchedRW>::iterator I = RWVec.begin(), E = RWVec.end();
I != E; ++I) {
- if (I->Sequence == Seq)
+ if (makeArrayRef(I->Sequence) == Seq)
return I - RWVec.begin();
}
// Index zero reserved for invalid RW.
@@ -585,11 +579,11 @@ void CodeGenSchedModels::collectSchedClasses() {
/// Find an SchedClass that has been inferred from a per-operand list of
/// SchedWrites and SchedReads.
unsigned CodeGenSchedModels::findSchedClassIdx(Record *ItinClassDef,
- const IdxVec &Writes,
- const IdxVec &Reads) const {
+ ArrayRef<unsigned> Writes,
+ ArrayRef<unsigned> Reads) const {
for (SchedClassIter I = schedClassBegin(), E = schedClassEnd(); I != E; ++I) {
- if (I->ItinClassDef == ItinClassDef
- && I->Writes == Writes && I->Reads == Reads) {
+ if (I->ItinClassDef == ItinClassDef && makeArrayRef(I->Writes) == Writes &&
+ makeArrayRef(I->Reads) == Reads) {
return I - schedClassBegin();
}
}
@@ -603,20 +597,22 @@ unsigned CodeGenSchedModels::getSchedClassIdx(
return InstrClassMap.lookup(Inst.TheDef);
}
-std::string CodeGenSchedModels::createSchedClassName(
- Record *ItinClassDef, const IdxVec &OperWrites, const IdxVec &OperReads) {
+std::string
+CodeGenSchedModels::createSchedClassName(Record *ItinClassDef,
+ ArrayRef<unsigned> OperWrites,
+ ArrayRef<unsigned> OperReads) {
std::string Name;
if (ItinClassDef && ItinClassDef->getName() != "NoItinerary")
Name = ItinClassDef->getName();
- for (IdxIter WI = OperWrites.begin(), WE = OperWrites.end(); WI != WE; ++WI) {
+ for (unsigned Idx : OperWrites) {
if (!Name.empty())
Name += '_';
- Name += SchedWrites[*WI].Name;
+ Name += SchedWrites[Idx].Name;
}
- for (IdxIter RI = OperReads.begin(), RE = OperReads.end(); RI != RE; ++RI) {
+ for (unsigned Idx : OperReads) {
Name += '_';
- Name += SchedReads[*RI].Name;
+ Name += SchedReads[Idx].Name;
}
return Name;
}
@@ -636,10 +632,9 @@ std::string CodeGenSchedModels::createSchedClassName(const RecVec &InstDefs) {
/// SchedWrites and SchedReads. ProcIndices contains the set of IDs of
/// processors that may utilize this class.
unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef,
- const IdxVec &OperWrites,
- const IdxVec &OperReads,
- const IdxVec &ProcIndices)
-{
+ ArrayRef<unsigned> OperWrites,
+ ArrayRef<unsigned> OperReads,
+ ArrayRef<unsigned> ProcIndices) {
assert(!ProcIndices.empty() && "expect at least one ProcIdx");
unsigned Idx = findSchedClassIdx(ItinClassDef, OperWrites, OperReads);
@@ -1322,10 +1317,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
// Create new SchedClasses for the given ReadWrite list. If any of the
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
// of the ReadWrite list, following Aliases if necessary.
-void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
- const IdxVec &OperReads,
+void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
+ ArrayRef<unsigned> OperReads,
unsigned FromClassIdx,
- const IdxVec &ProcIndices) {
+ ArrayRef<unsigned> ProcIndices) {
DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") ");
// Create a seed transition with an empty PredTerm and the expanded sequences
@@ -1335,9 +1330,9 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
LastTransitions.back().ProcIndices.append(ProcIndices.begin(),
ProcIndices.end());
- for (IdxIter I = OperWrites.begin(), E = OperWrites.end(); I != E; ++I) {
+ for (unsigned WriteIdx : OperWrites) {
IdxVec WriteSeq;
- expandRWSequence(*I, WriteSeq, /*IsRead=*/false);
+ expandRWSequence(WriteIdx, WriteSeq, /*IsRead=*/false);
unsigned Idx = LastTransitions[0].WriteSequences.size();
LastTransitions[0].WriteSequences.resize(Idx + 1);
SmallVectorImpl<unsigned> &Seq = LastTransitions[0].WriteSequences[Idx];
@@ -1346,9 +1341,9 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") ");
}
DEBUG(dbgs() << " Reads: ");
- for (IdxIter I = OperReads.begin(), E = OperReads.end(); I != E; ++I) {
+ for (unsigned ReadIdx : OperReads) {
IdxVec ReadSeq;
- expandRWSequence(*I, ReadSeq, /*IsRead=*/true);
+ expandRWSequence(ReadIdx, ReadSeq, /*IsRead=*/true);
unsigned Idx = LastTransitions[0].ReadSequences.size();
LastTransitions[0].ReadSequences.resize(Idx + 1);
SmallVectorImpl<unsigned> &Seq = LastTransitions[0].ReadSequences[Idx];
@@ -1552,20 +1547,16 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) {
}
void CodeGenSchedModels::collectRWResources(unsigned RWIdx, bool IsRead,
- const IdxVec &ProcIndices) {
+ ArrayRef<unsigned> ProcIndices) {
const CodeGenSchedRW &SchedRW = getSchedRW(RWIdx, IsRead);
if (SchedRW.TheDef) {
if (!IsRead && SchedRW.TheDef->isSubClassOf("SchedWriteRes")) {
- for (IdxIter PI = ProcIndices.begin(), PE = ProcIndices.end();
- PI != PE; ++PI) {
- addWriteRes(SchedRW.TheDef, *PI);
- }
+ for (unsigned Idx : ProcIndices)
+ addWriteRes(SchedRW.TheDef, Idx);
}
else if (IsRead && SchedRW.TheDef->isSubClassOf("SchedReadAdvance")) {
- for (IdxIter PI = ProcIndices.begin(), PE = ProcIndices.end();
- PI != PE; ++PI) {
- addReadAdvance(SchedRW.TheDef, *PI);
- }
+ for (unsigned Idx : ProcIndices)
+ addReadAdvance(SchedRW.TheDef, Idx);
}
}
for (RecIter AI = SchedRW.Aliases.begin(), AE = SchedRW.Aliases.end();
@@ -1590,15 +1581,15 @@ void CodeGenSchedModels::collectRWResources(unsigned RWIdx, bool IsRead,
}
// Collect resources for a set of read/write types and processor indices.
-void CodeGenSchedModels::collectRWResources(const IdxVec &Writes,
- const IdxVec &Reads,
- const IdxVec &ProcIndices) {
+void CodeGenSchedModels::collectRWResources(ArrayRef<unsigned> Writes,
+ ArrayRef<unsigned> Reads,
+ ArrayRef<unsigned> ProcIndices) {
- for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI)
- collectRWResources(*WI, /*IsRead=*/false, ProcIndices);
+ for (unsigned Idx : Writes)
+ collectRWResources(Idx, /*IsRead=*/false, ProcIndices);
- for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI)
- collectRWResources(*RI, /*IsRead=*/true, ProcIndices);
+ for (unsigned Idx : Reads)
+ collectRWResources(Idx, /*IsRead=*/true, ProcIndices);
}
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.h b/contrib/llvm/utils/TableGen/CodeGenSchedule.h
index e5241b9..f5c50c9 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.h
@@ -72,10 +72,10 @@ struct CodeGenSchedRW {
IsSequence = Def->isSubClassOf("WriteSequence");
}
- CodeGenSchedRW(unsigned Idx, bool Read, const IdxVec &Seq,
+ CodeGenSchedRW(unsigned Idx, bool Read, ArrayRef<unsigned> Seq,
const std::string &Name)
- : Index(Idx), Name(Name), TheDef(nullptr), IsRead(Read), IsAlias(false),
- HasVariants(false), IsVariadic(false), IsSequence(true), Sequence(Seq) {
+ : Index(Idx), Name(Name), TheDef(nullptr), IsRead(Read), IsAlias(false),
+ HasVariants(false), IsVariadic(false), IsSequence(true), Sequence(Seq) {
assert(Sequence.size() > 1 && "implied sequence needs >1 RWs");
}
@@ -144,8 +144,9 @@ struct CodeGenSchedClass {
CodeGenSchedClass(): Index(0), ItinClassDef(nullptr) {}
- bool isKeyEqual(Record *IC, const IdxVec &W, const IdxVec &R) {
- return ItinClassDef == IC && Writes == W && Reads == R;
+ bool isKeyEqual(Record *IC, ArrayRef<unsigned> W, ArrayRef<unsigned> R) {
+ return ItinClassDef == IC && makeArrayRef(Writes) == W &&
+ makeArrayRef(Reads) == R;
}
// Is this class generated from a variants if existing classes? Instructions
@@ -256,18 +257,16 @@ public:
class_iterator classes_end() { return SchedClasses.end(); }
const_class_iterator classes_end() const { return SchedClasses.end(); }
iterator_range<class_iterator> classes() {
- return iterator_range<class_iterator>(classes_begin(), classes_end());
+ return make_range(classes_begin(), classes_end());
}
iterator_range<const_class_iterator> classes() const {
- return iterator_range<const_class_iterator>(classes_begin(), classes_end());
+ return make_range(classes_begin(), classes_end());
}
iterator_range<class_iterator> explicit_classes() {
- return iterator_range<class_iterator>(
- classes_begin(), classes_begin() + NumInstrSchedClasses);
+ return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses);
}
iterator_range<const_class_iterator> explicit_classes() const {
- return iterator_range<const_class_iterator>(
- classes_begin(), classes_begin() + NumInstrSchedClasses);
+ return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses);
}
Record *getModelOrItinDef(Record *ProcDef) const {
@@ -363,14 +362,14 @@ public:
void expandRWSeqForProc(unsigned RWIdx, IdxVec &RWSeq, bool IsRead,
const CodeGenProcModel &ProcModel) const;
- unsigned addSchedClass(Record *ItinDef, const IdxVec &OperWrites,
- const IdxVec &OperReads, const IdxVec &ProcIndices);
+ unsigned addSchedClass(Record *ItinDef, ArrayRef<unsigned> OperWrites,
+ ArrayRef<unsigned> OperReads,
+ ArrayRef<unsigned> ProcIndices);
unsigned findOrInsertRW(ArrayRef<unsigned> Seq, bool IsRead);
- unsigned findSchedClassIdx(Record *ItinClassDef,
- const IdxVec &Writes,
- const IdxVec &Reads) const;
+ unsigned findSchedClassIdx(Record *ItinClassDef, ArrayRef<unsigned> Writes,
+ ArrayRef<unsigned> Reads) const;
Record *findProcResUnits(Record *ProcResKind,
const CodeGenProcModel &PM) const;
@@ -383,14 +382,14 @@ private:
void collectSchedRW();
- std::string genRWName(const IdxVec& Seq, bool IsRead);
- unsigned findRWForSequence(const IdxVec &Seq, bool IsRead);
+ std::string genRWName(ArrayRef<unsigned> Seq, bool IsRead);
+ unsigned findRWForSequence(ArrayRef<unsigned> Seq, bool IsRead);
void collectSchedClasses();
std::string createSchedClassName(Record *ItinClassDef,
- const IdxVec &OperWrites,
- const IdxVec &OperReads);
+ ArrayRef<unsigned> OperWrites,
+ ArrayRef<unsigned> OperReads);
std::string createSchedClassName(const RecVec &InstDefs);
void createInstRWClass(Record *InstRWDef);
@@ -400,8 +399,8 @@ private:
void inferSchedClasses();
- void inferFromRW(const IdxVec &OperWrites, const IdxVec &OperReads,
- unsigned FromClassIdx, const IdxVec &ProcIndices);
+ void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
+ unsigned FromClassIdx, ArrayRef<unsigned> ProcIndices);
void inferFromItinClass(Record *ItinClassDef, unsigned FromClassIdx);
void inferFromInstRWs(unsigned SCIdx);
@@ -413,10 +412,10 @@ private:
void collectItinProcResources(Record *ItinClassDef);
void collectRWResources(unsigned RWIdx, bool IsRead,
- const IdxVec &ProcIndices);
+ ArrayRef<unsigned> ProcIndices);
- void collectRWResources(const IdxVec &Writes, const IdxVec &Reads,
- const IdxVec &ProcIndices);
+ void collectRWResources(ArrayRef<unsigned> Writes, ArrayRef<unsigned> Reads,
+ ArrayRef<unsigned> ProcIndices);
void addProcResource(Record *ProcResourceKind, CodeGenProcModel &PM);
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
index 661975e..aaad422 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -76,6 +76,8 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v16i1: return "MVT::v16i1";
case MVT::v32i1: return "MVT::v32i1";
case MVT::v64i1: return "MVT::v64i1";
+ case MVT::v512i1: return "MVT::v512i1";
+ case MVT::v1024i1: return "MVT::v1024i1";
case MVT::v1i8: return "MVT::v1i8";
case MVT::v2i8: return "MVT::v2i8";
case MVT::v4i8: return "MVT::v4i8";
@@ -83,22 +85,29 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v16i8: return "MVT::v16i8";
case MVT::v32i8: return "MVT::v32i8";
case MVT::v64i8: return "MVT::v64i8";
+ case MVT::v128i8: return "MVT::v128i8";
+ case MVT::v256i8: return "MVT::v256i8";
case MVT::v1i16: return "MVT::v1i16";
case MVT::v2i16: return "MVT::v2i16";
case MVT::v4i16: return "MVT::v4i16";
case MVT::v8i16: return "MVT::v8i16";
case MVT::v16i16: return "MVT::v16i16";
case MVT::v32i16: return "MVT::v32i16";
+ case MVT::v64i16: return "MVT::v64i16";
+ case MVT::v128i16: return "MVT::v128i16";
case MVT::v1i32: return "MVT::v1i32";
case MVT::v2i32: return "MVT::v2i32";
case MVT::v4i32: return "MVT::v4i32";
case MVT::v8i32: return "MVT::v8i32";
case MVT::v16i32: return "MVT::v16i32";
+ case MVT::v32i32: return "MVT::v32i32";
+ case MVT::v64i32: return "MVT::v64i32";
case MVT::v1i64: return "MVT::v1i64";
case MVT::v2i64: return "MVT::v2i64";
case MVT::v4i64: return "MVT::v4i64";
case MVT::v8i64: return "MVT::v8i64";
case MVT::v16i64: return "MVT::v16i64";
+ case MVT::v32i64: return "MVT::v32i64";
case MVT::v1i128: return "MVT::v1i128";
case MVT::v2f16: return "MVT::v2f16";
case MVT::v4f16: return "MVT::v4f16";
@@ -112,6 +121,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v2f64: return "MVT::v2f64";
case MVT::v4f64: return "MVT::v4f64";
case MVT::v8f64: return "MVT::v8f64";
+ case MVT::token: return "MVT::token";
case MVT::Metadata: return "MVT::Metadata";
case MVT::iPTR: return "MVT::iPTR";
case MVT::iPTRAny: return "MVT::iPTRAny";
@@ -252,7 +262,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
// Remove duplicates.
- std::sort(LegalValueTypes.begin(), LegalValueTypes.end());
+ array_pod_sort(LegalValueTypes.begin(), LegalValueTypes.end());
LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
LegalValueTypes.end()),
LegalValueTypes.end());
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.h b/contrib/llvm/utils/TableGen/CodeGenTarget.h
index 24b3851..cf4a0bb 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.h
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.h
@@ -173,7 +173,7 @@ public:
inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
iterator_range<inst_iterator> instructions() const {
- return iterator_range<inst_iterator>(inst_begin(), inst_end());
+ return make_range(inst_begin(), inst_end());
}
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 4659dc1..26f53dc 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/TableGen/Record.h"
@@ -36,6 +37,10 @@ class MatcherTableEmitter {
DenseMap<TreePattern *, unsigned> NodePredicateMap;
std::vector<TreePredicateFn> NodePredicates;
+
+ // We de-duplicate the predicates by code string, and use this map to track
+ // all the patterns with "identical" predicates.
+ StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun;
StringMap<unsigned> PatternPredicateMap;
std::vector<std::string> PatternPredicates;
@@ -62,10 +67,23 @@ private:
formatted_raw_ostream &OS);
unsigned getNodePredicate(TreePredicateFn Pred) {
- unsigned &Entry = NodePredicateMap[Pred.getOrigPatFragRecord()];
+ TreePattern *TP = Pred.getOrigPatFragRecord();
+ unsigned &Entry = NodePredicateMap[TP];
if (Entry == 0) {
- NodePredicates.push_back(Pred);
- Entry = NodePredicates.size();
+ TinyPtrVector<TreePattern *> &SameCodePreds =
+ NodePredicatesByCodeToRun[Pred.getCodeToRunOnSDNode()];
+ if (SameCodePreds.empty()) {
+ // We've never seen a predicate with the same code: allocate an entry.
+ NodePredicates.push_back(Pred);
+ Entry = NodePredicates.size();
+ } else {
+ // We did see an identical predicate: re-use it.
+ Entry = NodePredicateMap[SameCodePreds.front()];
+ assert(Entry != 0);
+ }
+ // In both cases, we've never seen this particular predicate before, so
+ // mark it in the list of predicates sharing the same code.
+ SameCodePreds.push_back(TP);
}
return Entry-1;
}
@@ -625,13 +643,6 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
}
// Emit Node predicates.
- // FIXME: Annoyingly, these are stored by name, which we never even emit. Yay?
- StringMap<TreePattern*> PFsByName;
-
- for (CodeGenDAGPatterns::pf_iterator I = CGP.pf_begin(), E = CGP.pf_end();
- I != E; ++I)
- PFsByName[I->first->getName()] = I->second.get();
-
if (!NodePredicates.empty()) {
OS << "bool CheckNodePredicate(SDNode *Node,\n";
OS << " unsigned PredNo) const override {\n";
@@ -642,7 +653,10 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
TreePredicateFn PredFn = NodePredicates[i];
assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
- OS << " case " << i << ": { // " << NodePredicates[i].getFnName() <<'\n';
+ OS << " case " << i << ": { \n";
+ for (auto *SimilarPred :
+ NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
+ OS << " // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
OS << PredFn.getCodeToRunOnSDNode() << "\n }\n";
}
diff --git a/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index 5060b6e..77afff7 100644
--- a/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -15,16 +15,83 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "dfa-emitter"
+
#include "CodeGenTarget.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/Support/Debug.h"
#include <list>
#include <map>
#include <string>
+#include <queue>
using namespace llvm;
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
+// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
+//
+// e.g. terms x resource bit combinations that fit in uint32_t:
+// 4 terms x 8 bits = 32 bits
+// 3 terms x 10 bits = 30 bits
+// 2 terms x 16 bits = 32 bits
+//
+// e.g. terms x resource bit combinations that fit in uint64_t:
+// 8 terms x 8 bits = 64 bits
+// 7 terms x 9 bits = 63 bits
+// 6 terms x 10 bits = 60 bits
+// 5 terms x 12 bits = 60 bits
+// 4 terms x 16 bits = 64 bits <--- current
+// 3 terms x 21 bits = 63 bits
+// 2 terms x 32 bits = 64 bits
+//
+#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
+#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
+
+typedef uint64_t DFAInput;
+typedef int64_t DFAStateInput;
+#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
+
+namespace {
+ DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+ return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+ }
+
+ /// Return the DFAInput for an instruction class input vector.
+ /// This function is used in both DFAPacketizer.cpp and in
+ /// DFAPacketizerEmitter.cpp.
+ DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+ DFAInput InsnInput = 0;
+ assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+ "Exceeded maximum number of DFA terms");
+ for (auto U : InsnClass)
+ InsnInput = addDFAFuncUnits(InsnInput, U);
+ return InsnInput;
+ }
+}
+// --------------------------------------------------------------------
+
+#ifndef NDEBUG
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo);
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent);
+#endif
+
//
// class DFAPacketizerEmitter: class that generates and prints out the DFA
// for resource tracking.
@@ -37,20 +104,48 @@ private:
// allInsnClasses is the set of all possible resources consumed by an
// InstrStage.
//
- DenseSet<unsigned> allInsnClasses;
+ std::vector<std::vector<unsigned>> allInsnClasses;
RecordKeeper &Records;
public:
DFAPacketizerEmitter(RecordKeeper &R);
//
- // collectAllInsnClasses: Populate allInsnClasses which is a set of units
+ // collectAllFuncUnits - Construct a map of function unit names to bits.
+ //
+ int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ int &maxResources,
+ raw_ostream &OS);
+
+ //
+ // collectAllComboFuncs - Construct a map from a combo function unit bit to
+ // the bits of all included functional units.
+ //
+ int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap,
+ raw_ostream &OS);
+
+ //
+ // collectOneInsnClass - Populate allInsnClasses with one instruction class.
+ //
+ int collectOneInsnClass(const std::string &ProcName,
+ std::vector<Record*> &ProcItinList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ Record *ItinData,
+ raw_ostream &OS);
+
+ //
+ // collectAllInsnClasses - Populate allInsnClasses which is a set of units
// used in each stage.
//
- void collectAllInsnClasses(const std::string &Name,
- Record *ItinData,
- unsigned &NStages,
- raw_ostream &OS);
+ int collectAllInsnClasses(const std::string &ProcName,
+ std::vector<Record*> &ProcItinList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ std::vector<Record*> &ItinDataList,
+ int &maxStages,
+ raw_ostream &OS);
void run(raw_ostream &OS);
};
@@ -87,7 +182,7 @@ class State {
const int stateNum;
mutable bool isInitial;
mutable std::set<unsigned> stateInfo;
- typedef std::map<unsigned, const State *> TransitionMap;
+ typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
mutable TransitionMap Transitions;
State();
@@ -97,28 +192,47 @@ class State {
}
//
- // canAddInsnClass - Returns true if an instruction of type InsnClass is a
- // valid transition from this state, i.e., can an instruction of type InsnClass
- // be added to the packet represented by this state.
+ // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+ // may be a valid transition from this state i.e., can an instruction of type
+ // InsnClass be added to the packet represented by this state.
+ //
+ // Note that for multiple stages, this quick check does not take into account
+ // any possible resource competition between the stages themselves. That is
+ // enforced in AddInsnClassStages which checks the cross product of all
+ // stages for resource availability (which is a more involved check).
+ //
+ bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
+ //
+ // AddInsnClass - Return all combinations of resource reservation
+ // which are possible from this state (PossibleStates).
//
// PossibleStates is the set of valid resource states that ensue from valid
// transitions.
//
- bool canAddInsnClass(unsigned InsnClass) const;
+ void AddInsnClass(std::vector<unsigned> &InsnClass,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap,
+ std::set<unsigned> &PossibleStates) const;
//
- // AddInsnClass - Return all combinations of resource reservation
+ // AddInsnClassStages - Return all combinations of resource reservation
+ // resulting from the cross product of all stages for this InsnClass
// which are possible from this state (PossibleStates).
//
- void AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates) const;
- //
+ void AddInsnClassStages(std::vector<unsigned> &InsnClass,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap,
+ unsigned chkstage, unsigned numstages,
+ unsigned prevState, unsigned origState,
+ DenseSet<unsigned> &VisitedResourceStates,
+ std::set<unsigned> &PossibleStates) const;
+ //
// addTransition - Add a transition from this state given the input InsnClass
//
- void addTransition(unsigned InsnClass, const State *To) const;
+ void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
//
// hasTransition - Returns true if there is a transition from this state
// given the input InsnClass
//
- bool hasTransition(unsigned InsnClass) const;
+ bool hasTransition(std::vector<unsigned> InsnClass) const;
};
} // End anonymous namespace.
@@ -144,10 +258,54 @@ public:
//
// writeTable: Print out a table representing the DFA.
//
- void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
+ void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName,
+ int numInsnClasses = 0,
+ int maxResources = 0, int numCombos = 0, int maxStages = 0);
};
} // End anonymous namespace.
+#ifndef NDEBUG
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
+ DEBUG(dbgs() << "InsnClass: ");
+ for (unsigned i = 0; i < InsnClass.size(); ++i) {
+ if (i > 0) {
+ DEBUG(dbgs() << ", ");
+ }
+ DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i]));
+ }
+ DFAInput InsnInput = getDFAInsnInput(InsnClass);
+ DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")");
+}
+
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
+ DEBUG(dbgs() << "StateInfo: ");
+ unsigned i = 0;
+ for (std::set<unsigned>::iterator SI = stateInfo.begin();
+ SI != stateInfo.end(); ++SI, ++i) {
+ unsigned thisState = *SI;
+ if (i > 0) {
+ DEBUG(dbgs() << ", ");
+ }
+ DEBUG(dbgs() << "0x" << utohexstr(thisState));
+ }
+}
+
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent) {
+ for (unsigned i = 0; i < indent; ++i) {
+ DEBUG(dbgs() << " ");
+ }
+}
+#endif
//
// Constructors and destructors for State and DFA
@@ -157,10 +315,11 @@ State::State() :
DFA::DFA(): currentState(nullptr) {}
-//
+//
// addTransition - Add a transition from this state given the input InsnClass
//
-void State::addTransition(unsigned InsnClass, const State *To) const {
+void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
+ const {
assert(!Transitions.count(InsnClass) &&
"Cannot have multiple transitions for the same input");
Transitions[InsnClass] = To;
@@ -170,7 +329,7 @@ void State::addTransition(unsigned InsnClass, const State *To) const {
// hasTransition - Returns true if there is a transition from this state
// given the input InsnClass
//
-bool State::hasTransition(unsigned InsnClass) const {
+bool State::hasTransition(std::vector<unsigned> InsnClass) const {
return Transitions.count(InsnClass) > 0;
}
@@ -178,61 +337,170 @@ bool State::hasTransition(unsigned InsnClass) const {
// AddInsnClass - Return all combinations of resource reservation
// which are possible from this state (PossibleStates).
//
-void State::AddInsnClass(unsigned InsnClass,
- std::set<unsigned> &PossibleStates) const {
+// PossibleStates is the set of valid resource states that ensue from valid
+// transitions.
+//
+void State::AddInsnClass(std::vector<unsigned> &InsnClass,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap,
+ std::set<unsigned> &PossibleStates) const {
//
// Iterate over all resource states in currentState.
//
+ unsigned numstages = InsnClass.size();
+ assert((numstages > 0) && "InsnClass has no stages");
for (std::set<unsigned>::iterator SI = stateInfo.begin();
SI != stateInfo.end(); ++SI) {
unsigned thisState = *SI;
- //
- // Iterate over all possible resources used in InsnClass.
- // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
- //
-
DenseSet<unsigned> VisitedResourceStates;
- for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
- if ((0x1 << j) & InsnClass) {
- //
- // For each possible resource used in InsnClass, generate the
- // resource state if that resource was used.
- //
- unsigned ResultingResourceState = thisState | (0x1 << j);
+
+ DEBUG(dbgs() << " thisState: 0x" << utohexstr(thisState) << "\n");
+ AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+ numstages - 1, numstages,
+ thisState, thisState,
+ VisitedResourceStates, PossibleStates);
+ }
+}
+
+void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap,
+ unsigned chkstage, unsigned numstages,
+ unsigned prevState, unsigned origState,
+ DenseSet<unsigned> &VisitedResourceStates,
+ std::set<unsigned> &PossibleStates) const {
+
+ assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
+ unsigned thisStage = InsnClass[chkstage];
+
+ DEBUG({
+ dbgsIndent((1 + numstages - chkstage) << 1);
+ dbgs() << "AddInsnClassStages " << chkstage << " (0x"
+ << utohexstr(thisStage) << ") from ";
+ dbgsInsnClass(InsnClass);
+ dbgs() << "\n";
+ });
+
+ //
+ // Iterate over all possible resources used in thisStage.
+ // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}.
+ //
+ for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) {
+ unsigned resourceMask = (0x1 << j);
+ if (resourceMask & thisStage) {
+ unsigned combo = ComboBitToBitsMap[resourceMask];
+ if (combo && ((~prevState & combo) != combo)) {
+ DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState)
+ << " - combo op 0x" << utohexstr(resourceMask)
+ << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+ continue;
+ }
+ //
+ // For each possible resource used in thisStage, generate the
+ // resource state if that resource was used.
+ //
+ unsigned ResultingResourceState = prevState | resourceMask | combo;
+ DEBUG({
+ dbgsIndent((2 + numstages - chkstage) << 1);
+ dbgs() << "0x" << utohexstr(prevState)
+ << " | 0x" << utohexstr(resourceMask);
+ if (combo)
+ dbgs() << " | 0x" << utohexstr(combo);
+ dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " ";
+ });
+
+ //
+ // If this is the final stage for this class
+ //
+ if (chkstage == 0) {
//
// Check if the resulting resource state can be accommodated in this
// packet.
- // We compute ResultingResourceState OR thisState.
- // If the result of the OR is different than thisState, it implies
+ // We compute resource OR prevState (originally started as origState).
+ // If the result of the OR is different than origState, it implies
// that there is at least one resource that can be used to schedule
- // InsnClass in the current packet.
+ // thisStage in the current packet.
// Insert ResultingResourceState into PossibleStates only if we haven't
// processed ResultingResourceState before.
//
- if ((ResultingResourceState != thisState) &&
- (VisitedResourceStates.count(ResultingResourceState) == 0)) {
- VisitedResourceStates.insert(ResultingResourceState);
- PossibleStates.insert(ResultingResourceState);
+ if (ResultingResourceState != prevState) {
+ if (VisitedResourceStates.count(ResultingResourceState) == 0) {
+ VisitedResourceStates.insert(ResultingResourceState);
+ PossibleStates.insert(ResultingResourceState);
+ DEBUG(dbgs() << "\tResultingResourceState: 0x"
+ << utohexstr(ResultingResourceState) << "\n");
+ } else {
+ DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
+ }
+ } else {
+ DEBUG(dbgs() << "\tSkipped Add - no final resources available\n");
+ }
+ } else {
+ //
+ // If the current resource can be accommodated, check the next
+ // stage in InsnClass for available resources.
+ //
+ if (ResultingResourceState != prevState) {
+ DEBUG(dbgs() << "\n");
+ AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+ chkstage - 1, numstages,
+ ResultingResourceState, origState,
+ VisitedResourceStates, PossibleStates);
+ } else {
+ DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
}
}
}
}
-
}
//
-// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a
-// valid transition from this state i.e., can an instruction of type InsnClass
-// be added to the packet represented by this state.
+// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+// may be a valid transition from this state i.e., can an instruction of type
+// InsnClass be added to the packet represented by this state.
+//
+// Note that this routine is performing conservative checks that can be
+// quickly executed acting as a filter before calling AddInsnClassStages.
+// Any cases allowed through here will be caught later in AddInsnClassStages
+// which performs the more expensive exact check.
//
-bool State::canAddInsnClass(unsigned InsnClass) const {
+bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
SI != stateInfo.end(); ++SI) {
- if (~*SI & InsnClass)
+
+ // Check to see if all required resources are available.
+ bool available = true;
+
+ // Inspect each stage independently.
+ // note: This is a conservative check as we aren't checking for
+ // possible resource competition between the stages themselves
+ // The full cross product is examined later in AddInsnClass.
+ for (unsigned i = 0; i < InsnClass.size(); ++i) {
+ unsigned resources = *SI;
+ if ((~resources & InsnClass[i]) == 0) {
+ available = false;
+ break;
+ }
+ // Make sure _all_ resources for a combo function are available.
+ // note: This is a quick conservative check as it won't catch an
+ // unscheduleable combo if this stage is an OR expression
+ // containing a combo.
+ // These cases are caught later in AddInsnClass.
+ unsigned combo = ComboBitToBitsMap[InsnClass[i]];
+ if (combo && ((~resources & combo) != combo)) {
+ DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources)
+ << " - combo op 0x" << utohexstr(InsnClass[i])
+ << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+ available = false;
+ break;
+ }
+ }
+
+ if (available) {
return true;
+ }
}
return false;
}
@@ -244,7 +512,6 @@ const State &DFA::newState() {
return *IterPair.first;
}
-
int State::currentStateNum = 0;
DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
@@ -263,57 +530,100 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
// the ith state.
//
//
-void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
- static const std::string SentinelEntry = "{-1, -1}";
- DFA::StateSet::iterator SI = states.begin();
+void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
+ int numInsnClasses,
+ int maxResources, int numCombos, int maxStages) {
+
+ unsigned numStates = states.size();
+
+ DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+ DEBUG(dbgs() << "writeTableAndAPI\n");
+ DEBUG(dbgs() << "Total states: " << numStates << "\n");
+
+ OS << "namespace llvm {\n";
+
+ OS << "\n// Input format:\n";
+ OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
+ << "\t// maximum AND'ed resource terms\n";
+ OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
+ << "\t// maximum resource bits in one term\n";
+
+ OS << "\n// " << TargetName << "DFAStateInputTable[][2] = "
+ << "pairs of <Input, NextState> for all valid\n";
+ OS << "// transitions.\n";
+ OS << "// " << numStates << "\tstates\n";
+ OS << "// " << numInsnClasses << "\tinstruction classes\n";
+ OS << "// " << maxResources << "\tresources max\n";
+ OS << "// " << numCombos << "\tcombo resources\n";
+ OS << "// " << maxStages << "\tstages max\n";
+ OS << "const " << DFA_TBLTYPE << " "
+ << TargetName << "DFAStateInputTable[][2] = {\n";
+
// This table provides a map to the beginning of the transitions for State s
// in DFAStateInputTable.
- std::vector<int> StateEntry(states.size());
-
- OS << "namespace llvm {\n\n";
- OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
+ std::vector<int> StateEntry(numStates+1);
+ static const std::string SentinelEntry = "{-1, -1}";
// Tracks the total valid transitions encountered so far. It is used
// to construct the StateEntry table.
int ValidTransitions = 0;
- for (unsigned i = 0; i < states.size(); ++i, ++SI) {
+ DFA::StateSet::iterator SI = states.begin();
+ for (unsigned i = 0; i < numStates; ++i, ++SI) {
assert ((SI->stateNum == (int) i) && "Mismatch in state numbers");
StateEntry[i] = ValidTransitions;
for (State::TransitionMap::iterator
II = SI->Transitions.begin(), IE = SI->Transitions.end();
II != IE; ++II) {
- OS << "{" << II->first << ", "
+ OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", "
<< II->second->stateNum
- << "}, ";
+ << "},\t";
}
ValidTransitions += SI->Transitions.size();
// If there are no valid transitions from this stage, we need a sentinel
// transition.
if (ValidTransitions == StateEntry[i]) {
- OS << SentinelEntry << ",";
+ OS << SentinelEntry << ",\t";
++ValidTransitions;
}
+ OS << " // state " << i << ": " << StateEntry[i];
+ if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition.
+ OS << "-" << (ValidTransitions-1);
+ }
OS << "\n";
}
// Print out a sentinel entry at the end of the StateInputTable. This is
// needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
- OS << SentinelEntry << "\n";
-
+ OS << SentinelEntry << "\t";
+ OS << " // state " << numStates << ": " << ValidTransitions;
+ OS << "\n";
+
OS << "};\n\n";
+ OS << "// " << TargetName << "DFAStateEntryTable[i] = "
+ << "Index of the first entry in DFAStateInputTable for\n";
+ OS << "// "
+ << "the ith state.\n";
+ OS << "// " << numStates << " states\n";
OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
// Multiply i by 2 since each entry in DFAStateInputTable is a set of
// two numbers.
- for (unsigned i = 0; i < states.size(); ++i)
+ unsigned lastState = 0;
+ for (unsigned i = 0; i < numStates; ++i) {
+ if (i && ((i % 10) == 0)) {
+ lastState = i-1;
+ OS << " // states " << (i-10) << ":" << lastState << "\n";
+ }
OS << StateEntry[i] << ", ";
+ }
// Print out the index to the sentinel entry in StateInputTable
OS << ValidTransitions << ", ";
+ OS << " // states " << (lastState+1) << ":" << numStates << "\n";
- OS << "\n};\n";
+ OS << "};\n";
OS << "} // namespace\n";
@@ -332,40 +642,118 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
//
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
+// collectAllFuncUnits - Construct a map of function unit names to bits.
//
-void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
- Record *ItinData,
- unsigned &NStages,
- raw_ostream &OS) {
- // Collect processor itineraries.
- std::vector<Record*> ProcItinList =
- Records.getAllDerivedDefinitions("ProcessorItineraries");
-
- // If just no itinerary then don't bother.
- if (ProcItinList.size() < 2)
- return;
- std::map<std::string, unsigned> NameToBitsMap;
+int DFAPacketizerEmitter::collectAllFuncUnits(
+ std::vector<Record*> &ProcItinList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ int &maxFUs,
+ raw_ostream &OS) {
+ DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+ DEBUG(dbgs() << "collectAllFuncUnits");
+ DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
+ int totalFUs = 0;
// Parse functional units for all the itineraries.
for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
Record *Proc = ProcItinList[i];
std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+ DEBUG(dbgs() << " FU:" << i
+ << " (" << FUs.size() << " FUs) "
+ << Proc->getName());
+
+
// Convert macros to bits for each stage.
- for (unsigned i = 0, N = FUs.size(); i < N; ++i)
- NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
+ unsigned numFUs = FUs.size();
+ for (unsigned j = 0; j < numFUs; ++j) {
+ assert ((j < DFA_MAX_RESOURCES) &&
+ "Exceeded maximum number of representable resources");
+ unsigned FuncResources = (unsigned) (1U << j);
+ FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
+ DEBUG(dbgs() << " " << FUs[j]->getName()
+ << ":0x" << utohexstr(FuncResources));
+ }
+ if (((int) numFUs) > maxFUs) {
+ maxFUs = numFUs;
+ }
+ totalFUs += numFUs;
+ DEBUG(dbgs() << "\n");
}
+ return totalFUs;
+}
+
+//
+// collectAllComboFuncs - Construct a map from a combo function unit bit to
+// the bits of all included functional units.
+//
+int DFAPacketizerEmitter::collectAllComboFuncs(
+ std::vector<Record*> &ComboFuncList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ std::map<unsigned, unsigned> &ComboBitToBitsMap,
+ raw_ostream &OS) {
+ DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+ DEBUG(dbgs() << "collectAllComboFuncs");
+ DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
+ int numCombos = 0;
+ for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
+ Record *Func = ComboFuncList[i];
+ std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+
+ DEBUG(dbgs() << " CFD:" << i
+ << " (" << FUs.size() << " combo FUs) "
+ << Func->getName() << "\n");
+
+ // Convert macros to bits for each stage.
+ for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
+ assert ((j < DFA_MAX_RESOURCES) &&
+ "Exceeded maximum number of DFA resources");
+ Record *FuncData = FUs[j];
+ Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
+ const std::vector<Record*> &FuncList =
+ FuncData->getValueAsListOfDefs("FuncList");
+ std::string ComboFuncName = ComboFunc->getName();
+ unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
+ unsigned ComboResources = ComboBit;
+ DEBUG(dbgs() << " combo: " << ComboFuncName
+ << ":0x" << utohexstr(ComboResources) << "\n");
+ for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
+ std::string FuncName = FuncList[k]->getName();
+ unsigned FuncResources = FUNameToBitsMap[FuncName];
+ DEBUG(dbgs() << " " << FuncName
+ << ":0x" << utohexstr(FuncResources) << "\n");
+ ComboResources |= FuncResources;
+ }
+ ComboBitToBitsMap[ComboBit] = ComboResources;
+ numCombos++;
+ DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x"
+ << utohexstr(ComboBit) << " = 0x"
+ << utohexstr(ComboResources) << "\n");
+ }
+ }
+ return numCombos;
+}
+
+
+//
+// collectOneInsnClass - Populate allInsnClasses with one instruction class
+//
+int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
+ std::vector<Record*> &ProcItinList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ Record *ItinData,
+ raw_ostream &OS) {
const std::vector<Record*> &StageList =
ItinData->getValueAsListOfDefs("Stages");
// The number of stages.
- NStages = StageList.size();
+ unsigned NStages = StageList.size();
+
+ DEBUG(dbgs() << " " << ItinData->getValueAsDef("TheClass")->getName()
+ << "\n");
- // For each unit.
- unsigned UnitBitValue = 0;
+ std::vector<unsigned> UnitBits;
// Compute the bitwise or of each unit used in this stage.
for (unsigned i = 0; i < NStages; ++i) {
@@ -375,18 +763,74 @@ void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
const std::vector<Record*> &UnitList =
Stage->getValueAsListOfDefs("Units");
+ DEBUG(dbgs() << " stage:" << i
+ << " [" << UnitList.size() << " units]:");
+ unsigned dbglen = 26; // cursor after stage dbgs
+
+ // Compute the bitwise or of each unit used in this stage.
+ unsigned UnitBitValue = 0;
for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
// Conduct bitwise or.
std::string UnitName = UnitList[j]->getName();
- assert(NameToBitsMap.count(UnitName));
- UnitBitValue |= NameToBitsMap[UnitName];
+ DEBUG(dbgs() << " " << j << ":" << UnitName);
+ dbglen += 3 + UnitName.length();
+ assert(FUNameToBitsMap.count(UnitName));
+ UnitBitValue |= FUNameToBitsMap[UnitName];
}
if (UnitBitValue != 0)
- allInsnClasses.insert(UnitBitValue);
+ UnitBits.push_back(UnitBitValue);
+
+ while (dbglen <= 64) { // line up bits dbgs
+ dbglen += 8;
+ DEBUG(dbgs() << "\t");
+ }
+ DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n");
}
+
+ if (UnitBits.size() > 0)
+ allInsnClasses.push_back(UnitBits);
+
+ DEBUG({
+ dbgs() << " ";
+ dbgsInsnClass(UnitBits);
+ dbgs() << "\n";
+ });
+
+ return NStages;
}
+//
+// collectAllInsnClasses - Populate allInsnClasses which is a set of units
+// used in each stage.
+//
+int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
+ std::vector<Record*> &ProcItinList,
+ std::map<std::string, unsigned> &FUNameToBitsMap,
+ std::vector<Record*> &ItinDataList,
+ int &maxStages,
+ raw_ostream &OS) {
+ // Collect all instruction classes.
+ unsigned M = ItinDataList.size();
+
+ int numInsnClasses = 0;
+ DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"
+ << "collectAllInsnClasses "
+ << ProcName
+ << " (" << M << " classes)\n");
+
+ // Collect stages for each instruction class for all itinerary data
+ for (unsigned j = 0; j < M; j++) {
+ Record *ItinData = ItinDataList[j];
+ int NStages = collectOneInsnClass(ProcName, ProcItinList,
+ FUNameToBitsMap, ItinData, OS);
+ if (NStages > maxStages) {
+ maxStages = NStages;
+ }
+ numInsnClasses++;
+ }
+ return numInsnClasses;
+}
//
// Run the worklist algorithm to generate the DFA.
@@ -398,16 +842,35 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
Records.getAllDerivedDefinitions("ProcessorItineraries");
//
- // Collect the instruction classes.
+ // Collect the Functional units.
+ //
+ std::map<std::string, unsigned> FUNameToBitsMap;
+ int maxResources = 0;
+ collectAllFuncUnits(ProcItinList,
+ FUNameToBitsMap, maxResources, OS);
+
+ //
+ // Collect the Combo Functional units.
//
+ std::map<unsigned, unsigned> ComboBitToBitsMap;
+ std::vector<Record*> ComboFuncList =
+ Records.getAllDerivedDefinitions("ComboFuncUnits");
+ int numCombos = collectAllComboFuncs(ComboFuncList,
+ FUNameToBitsMap, ComboBitToBitsMap, OS);
+
+ //
+ // Collect the itineraries.
+ //
+ int maxStages = 0;
+ int numInsnClasses = 0;
for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
Record *Proc = ProcItinList[i];
// Get processor itinerary name.
- const std::string &Name = Proc->getName();
+ const std::string &ProcName = Proc->getName();
// Skip default.
- if (Name == "NoItineraries")
+ if (ProcName == "NoItineraries")
continue;
// Sanity check for at least one instruction itinerary class.
@@ -419,15 +882,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
// Get itinerary data list.
std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
- // Collect instruction classes for all itinerary data.
- for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
- Record *ItinData = ItinDataList[j];
- unsigned NStages;
- collectAllInsnClasses(Name, ItinData, NStages, OS);
- }
+ // Collect all instruction classes
+ numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
+ FUNameToBitsMap, ItinDataList, maxStages, OS);
}
-
//
// Run a worklist algorithm to generate the DFA.
//
@@ -436,6 +895,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
Initial->isInitial = true;
Initial->stateInfo.insert(0x0);
SmallVector<const State*, 32> WorkList;
+// std::queue<State*> WorkList;
std::map<std::set<unsigned>, const State*> Visited;
WorkList.push_back(Initial);
@@ -459,9 +919,19 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
//
while (!WorkList.empty()) {
const State *current = WorkList.pop_back_val();
- for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
- CE = allInsnClasses.end(); CI != CE; ++CI) {
- unsigned InsnClass = *CI;
+ DEBUG({
+ dbgs() << "---------------------\n";
+ dbgs() << "Processing state: " << current->stateNum << " - ";
+ dbgsStateInfo(current->stateInfo);
+ dbgs() << "\n";
+ });
+ for (unsigned i = 0; i < allInsnClasses.size(); i++) {
+ std::vector<unsigned> InsnClass = allInsnClasses[i];
+ DEBUG({
+ dbgs() << i << " ";
+ dbgsInsnClass(InsnClass);
+ dbgs() << "\n";
+ });
std::set<unsigned> NewStateResources;
//
@@ -469,32 +939,52 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
// and the state can accommodate this InsnClass, create a transition.
//
if (!current->hasTransition(InsnClass) &&
- current->canAddInsnClass(InsnClass)) {
- const State *NewState;
- current->AddInsnClass(InsnClass, NewStateResources);
- assert(!NewStateResources.empty() && "New states must be generated");
+ current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
+ const State *NewState = NULL;
+ current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
+ if (NewStateResources.size() == 0) {
+ DEBUG(dbgs() << " Skipped - no new states generated\n");
+ continue;
+ }
+
+ DEBUG({
+ dbgs() << "\t";
+ dbgsStateInfo(NewStateResources);
+ dbgs() << "\n";
+ });
//
// If we have seen this state before, then do not create a new state.
//
- //
auto VI = Visited.find(NewStateResources);
- if (VI != Visited.end())
+ if (VI != Visited.end()) {
NewState = VI->second;
- else {
+ DEBUG({
+ dbgs() << "\tFound existing state: " << NewState->stateNum
+ << " - ";
+ dbgsStateInfo(NewState->stateInfo);
+ dbgs() << "\n";
+ });
+ } else {
NewState = &D.newState();
NewState->stateInfo = NewStateResources;
Visited[NewStateResources] = NewState;
WorkList.push_back(NewState);
+ DEBUG({
+ dbgs() << "\tAccepted new state: " << NewState->stateNum << " - ";
+ dbgsStateInfo(NewState->stateInfo);
+ dbgs() << "\n";
+ });
}
-
+
current->addTransition(InsnClass, NewState);
}
}
}
// Print out the table.
- D.writeTableAndAPI(OS, TargetName);
+ D.writeTableAndAPI(OS, TargetName,
+ numInsnClasses, maxResources, numCombos, maxStages);
}
namespace llvm {
diff --git a/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp b/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp
index f02051a..e859527 100644
--- a/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp
@@ -134,7 +134,7 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
PredicateNamespace = "ARM";
EmitFixedLenDecoder(Records, OS, PredicateNamespace,
- "if (!Check(S, ", ")) return MCDisassembler::Fail;",
+ "if (!Check(S, ", "))",
"S", "MCDisassembler::Fail",
" MCDisassembler::DecodeStatus S = "
"MCDisassembler::Success;\n(void)S;");
@@ -142,8 +142,7 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
}
EmitFixedLenDecoder(Records, OS, Target.getName(),
- "if (", " == MCDisassembler::Fail)"
- " return MCDisassembler::Fail;",
+ "if (", " == MCDisassembler::Fail)",
"MCDisassembler::Success", "MCDisassembler::Fail", "");
}
diff --git a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
index c4df283..8ca4a1b 100644
--- a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -44,9 +44,10 @@ struct EncodingField {
struct OperandInfo {
std::vector<EncodingField> Fields;
std::string Decoder;
+ bool HasCompleteDecoder;
- OperandInfo(std::string D)
- : Decoder(D) { }
+ OperandInfo(std::string D, bool HCD)
+ : Decoder(D), HasCompleteDecoder(HCD) { }
void addField(unsigned Base, unsigned Width, unsigned Offset) {
Fields.push_back(EncodingField(Base, Width, Offset));
@@ -64,8 +65,8 @@ typedef std::vector<uint8_t> DecoderTable;
typedef uint32_t DecoderFixup;
typedef std::vector<DecoderFixup> FixupList;
typedef std::vector<FixupList> FixupScopeList;
-typedef SetVector<std::string> PredicateSet;
-typedef SetVector<std::string> DecoderSet;
+typedef SmallSetVector<std::string, 16> PredicateSet;
+typedef SmallSetVector<std::string, 16> DecoderSet;
struct DecoderTableInfo {
DecoderTable Table;
FixupScopeList FixupStack;
@@ -85,8 +86,7 @@ public:
FixedLenDecoderEmitter(RecordKeeper &R,
std::string PredicateNamespace,
std::string GPrefix = "if (",
- std::string GPostfix = " == MCDisassembler::Fail)"
- " return MCDisassembler::Fail;",
+ std::string GPostfix = " == MCDisassembler::Fail)",
std::string ROK = "MCDisassembler::Success",
std::string RFail = "MCDisassembler::Fail",
std::string L = "") :
@@ -448,10 +448,13 @@ protected:
const Filter &Best) const;
void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
- const OperandInfo &OpInfo) const;
+ const OperandInfo &OpInfo,
+ bool &OpHasCompleteDecoder) const;
- void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc) const;
- unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc) const;
+ void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc,
+ bool &HasCompleteDecoder) const;
+ unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
+ bool &HasCompleteDecoder) const;
// Assign a single filter and run with it.
void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
@@ -779,7 +782,9 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
break;
}
- case MCD::OPC_Decode: {
+ case MCD::OPC_Decode:
+ case MCD::OPC_TryDecode: {
+ bool IsTry = *I == MCD::OPC_TryDecode;
++I;
// Extract the ULEB128 encoded Opcode to a buffer.
uint8_t Buffer[8], *p = Buffer;
@@ -788,7 +793,8 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
&& "ULEB128 value too large!");
// Decode the Opcode value.
unsigned Opc = decodeULEB128(Buffer);
- OS.indent(Indentation) << "MCD::OPC_Decode, ";
+ OS.indent(Indentation) << "MCD::OPC_" << (IsTry ? "Try" : "")
+ << "Decode, ";
for (p = Buffer; *p >= 128; ++p)
OS << utostr(*p) << ", ";
OS << utostr(*p) << ", ";
@@ -798,8 +804,25 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
OS << utostr(*I) << ", ";
OS << utostr(*I++) << ", ";
+ if (!IsTry) {
+ OS << "// Opcode: "
+ << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+ break;
+ }
+
+ // Fallthrough for OPC_TryDecode.
+
+ // 16-bit numtoskip value.
+ uint8_t Byte = *I++;
+ uint32_t NumToSkip = Byte;
+ OS << utostr(Byte) << ", ";
+ Byte = *I++;
+ OS << utostr(Byte) << ", ";
+ NumToSkip |= Byte << 8;
+
OS << "// Opcode: "
- << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+ << NumberedInstructions->at(Opc)->TheDef->getName()
+ << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
break;
}
case MCD::OPC_SoftFail: {
@@ -876,8 +899,9 @@ emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
<< " unsigned Idx, InsnType insn, MCInst &MI,\n";
OS.indent(Indentation) << " uint64_t "
- << "Address, const void *Decoder) {\n";
+ << "Address, const void *Decoder, bool &DecodeComplete) {\n";
Indentation += 2;
+ OS.indent(Indentation) << "DecodeComplete = true;\n";
OS.indent(Indentation) << "InsnType tmp;\n";
OS.indent(Indentation) << "switch (Idx) {\n";
OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
@@ -1033,7 +1057,8 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
}
void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
- const OperandInfo &OpInfo) const {
+ const OperandInfo &OpInfo,
+ bool &OpHasCompleteDecoder) const {
const std::string &Decoder = OpInfo.Decoder;
if (OpInfo.numFields() != 1)
@@ -1049,45 +1074,57 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
o << ";\n";
}
- if (Decoder != "")
+ if (Decoder != "") {
+ OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
o.indent(Indentation) << Emitter->GuardPrefix << Decoder
- << "(MI, tmp, Address, Decoder)"
- << Emitter->GuardPostfix << "\n";
- else
+ << "(MI, tmp, Address, Decoder)"
+ << Emitter->GuardPostfix
+ << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
+ << "return MCDisassembler::Fail; }\n";
+ } else {
+ OpHasCompleteDecoder = true;
o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n";
-
+ }
}
void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
- unsigned Opc) const {
+ unsigned Opc, bool &HasCompleteDecoder) const {
+ HasCompleteDecoder = true;
+
for (const auto &Op : Operands.find(Opc)->second) {
// If a custom instruction decoder was specified, use that.
if (Op.numFields() == 0 && Op.Decoder.size()) {
+ HasCompleteDecoder = Op.HasCompleteDecoder;
OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder
<< "(MI, insn, Address, Decoder)"
- << Emitter->GuardPostfix << "\n";
+ << Emitter->GuardPostfix
+ << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
+ << "return MCDisassembler::Fail; }\n";
break;
}
- emitBinaryParser(OS, Indentation, Op);
+ bool OpHasCompleteDecoder;
+ emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder);
+ if (!OpHasCompleteDecoder)
+ HasCompleteDecoder = false;
}
}
unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
- unsigned Opc) const {
+ unsigned Opc,
+ bool &HasCompleteDecoder) const {
// Build up the predicate string.
SmallString<256> Decoder;
// FIXME: emitDecoder() function can take a buffer directly rather than
// a stream.
raw_svector_ostream S(Decoder);
unsigned I = 4;
- emitDecoder(S, I, Opc);
- S.flush();
+ emitDecoder(S, I, Opc, HasCompleteDecoder);
// Using the full decoder string as the key value here is a bit
// heavyweight, but is effective. If the string comparisons become a
// performance concern, we can implement a mangling of the predicate
- // data easilly enough with a map back to the actual string. That's
+ // data easily enough with a map back to the actual string. That's
// overkill for now, though.
// Make sure the predicate is in the table.
@@ -1162,7 +1199,7 @@ unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
// Using the full predicate string as the key value here is a bit
// heavyweight, but is effective. If the string comparisons become a
// performance concern, we can implement a mangling of the predicate
- // data easilly enough with a map back to the actual string. That's
+ // data easily enough with a map back to the actual string. That's
// overkill for now, though.
// Make sure the predicate is in the table.
@@ -1193,7 +1230,6 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
SmallString<16> PBytes;
raw_svector_ostream S(PBytes);
encodeULEB128(PIdx, S);
- S.flush();
TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
// Predicate index
@@ -1252,16 +1288,13 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
raw_svector_ostream S(MaskBytes);
if (NeedPositiveMask) {
encodeULEB128(PositiveMask.getZExtValue(), S);
- S.flush();
for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
TableInfo.Table.push_back(MaskBytes[i]);
} else
TableInfo.Table.push_back(0);
if (NeedNegativeMask) {
MaskBytes.clear();
- S.resync();
encodeULEB128(NegativeMask.getZExtValue(), S);
- S.flush();
for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
TableInfo.Table.push_back(MaskBytes[i]);
} else
@@ -1308,22 +1341,41 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
// Check for soft failure of the match.
emitSoftFailTableEntry(TableInfo, Opc);
- TableInfo.Table.push_back(MCD::OPC_Decode);
+ bool HasCompleteDecoder;
+ unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc, HasCompleteDecoder);
+
+ // Produce OPC_Decode or OPC_TryDecode opcode based on the information
+ // whether the instruction decoder is complete or not. If it is complete
+ // then it handles all possible values of remaining variable/unfiltered bits
+ // and for any value can determine if the bitpattern is a valid instruction
+ // or not. This means OPC_Decode will be the final step in the decoding
+ // process. If it is not complete, then the Fail return code from the
+ // decoder method indicates that additional processing should be done to see
+ // if there is any other instruction that also matches the bitpattern and
+ // can decode it.
+ TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
+ MCD::OPC_TryDecode);
uint8_t Buffer[8], *p;
encodeULEB128(Opc, Buffer);
for (p = Buffer; *p >= 128 ; ++p)
TableInfo.Table.push_back(*p);
TableInfo.Table.push_back(*p);
- unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc);
SmallString<16> Bytes;
raw_svector_ostream S(Bytes);
encodeULEB128(DIdx, S);
- S.flush();
// Decoder index
for (unsigned i = 0, e = Bytes.size(); i != e; ++i)
TableInfo.Table.push_back(Bytes[i]);
+
+ if (!HasCompleteDecoder) {
+ // Push location for NumToSkip backpatching.
+ TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+ // Allocate the space for the fixup.
+ TableInfo.Table.push_back(0);
+ TableInfo.Table.push_back(0);
+ }
}
// Emits table entries to decode the singleton, and then to decode the rest.
@@ -1679,7 +1731,8 @@ static bool populateInstruction(CodeGenTarget &Target,
// of trying to auto-generate the decoder.
std::string InstDecoder = Def.getValueAsString("DecoderMethod");
if (InstDecoder != "") {
- InsnOperands.push_back(OperandInfo(InstDecoder));
+ bool HasCompleteInstDecoder = Def.getValueAsBit("hasCompleteDecoder");
+ InsnOperands.push_back(OperandInfo(InstDecoder, HasCompleteInstDecoder));
Operands[Opc] = InsnOperands;
return true;
}
@@ -1835,7 +1888,14 @@ static bool populateInstruction(CodeGenTarget &Target,
if (!isReg && String && String->getValue() != "")
Decoder = String->getValue();
- OperandInfo OpInfo(Decoder);
+ RecordVal *HasCompleteDecoderVal =
+ TypeRecord->getValue("hasCompleteDecoder");
+ BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
+ dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
+ bool HasCompleteDecoder = HasCompleteDecoderBit ?
+ HasCompleteDecoderBit->getValue() : true;
+
+ OperandInfo OpInfo(Decoder, HasCompleteDecoder);
OpInfo.addField(bitStart, bitWidth, 0);
NumberedInsnOperands[Name].push_back(OpInfo);
@@ -1907,7 +1967,14 @@ static bool populateInstruction(CodeGenTarget &Target,
if (!isReg && String && String->getValue() != "")
Decoder = String->getValue();
- OperandInfo OpInfo(Decoder);
+ RecordVal *HasCompleteDecoderVal =
+ TypeRecord->getValue("hasCompleteDecoder");
+ BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
+ dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
+ bool HasCompleteDecoder = HasCompleteDecoderBit ?
+ HasCompleteDecoderBit->getValue() : true;
+
+ OperandInfo OpInfo(Decoder, HasCompleteDecoder);
unsigned Base = ~0U;
unsigned Width = 0;
unsigned Offset = 0;
@@ -2096,12 +2163,52 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " Ptr += Len;\n"
<< " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
<< " Ptr += Len;\n"
- << " DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
- << " << \", using decoder \" << DecodeIdx << \"\\n\" );\n"
- << " DEBUG(dbgs() << \"----- DECODE SUCCESSFUL -----\\n\");\n"
<< "\n"
+ << " MI.clear();\n"
<< " MI.setOpcode(Opc);\n"
- << " return decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm);\n"
+ << " bool DecodeComplete;\n"
+ << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);\n"
+ << " assert(DecodeComplete);\n"
+ << "\n"
+ << " DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
+ << " << \", using decoder \" << DecodeIdx << \": \"\n"
+ << " << (S != MCDisassembler::Fail ? \"PASS\" : \"FAIL\") << \"\\n\");\n"
+ << " return S;\n"
+ << " }\n"
+ << " case MCD::OPC_TryDecode: {\n"
+ << " unsigned Len;\n"
+ << " // Decode the Opcode value.\n"
+ << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
+ << " Ptr += Len;\n"
+ << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
+ << " Ptr += Len;\n"
+ << " // NumToSkip is a plain 16-bit integer.\n"
+ << " unsigned NumToSkip = *Ptr++;\n"
+ << " NumToSkip |= (*Ptr++) << 8;\n"
+ << "\n"
+ << " // Perform the decode operation.\n"
+ << " MCInst TmpMI;\n"
+ << " TmpMI.setOpcode(Opc);\n"
+ << " bool DecodeComplete;\n"
+ << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);\n"
+ << " DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << Opc\n"
+ << " << \", using decoder \" << DecodeIdx << \": \");\n"
+ << "\n"
+ << " if (DecodeComplete) {\n"
+ << " // Decoding complete.\n"
+ << " DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : \"FAIL\") << \"\\n\");\n"
+ << " MI = TmpMI;\n"
+ << " return S;\n"
+ << " } else {\n"
+ << " assert(S == MCDisassembler::Fail);\n"
+ << " // If the decoding was incomplete, skip.\n"
+ << " Ptr += NumToSkip;\n"
+ << " DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
+ << " // Reset decode status. This also drops a SoftFail status that could be\n"
+ << " // set before the decode attempt.\n"
+ << " S = MCDisassembler::Success;\n"
+ << " }\n"
+ << " break;\n"
<< " }\n"
<< " case MCD::OPC_SoftFail: {\n"
<< " // Decode the mask values.\n"
diff --git a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
index e242a96..a658339 100644
--- a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -1,4 +1,4 @@
-//===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. ------------===//
+//===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "CodeGenDAGPatterns.h"
#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
@@ -26,6 +25,7 @@
#include <cstdio>
#include <map>
#include <vector>
+
using namespace llvm;
namespace {
@@ -70,11 +70,11 @@ private:
void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
};
-} // End anonymous namespace
+} // end anonymous namespace
static void PrintDefList(const std::vector<Record*> &Uses,
unsigned Num, raw_ostream &OS) {
- OS << "static const uint16_t ImplicitList" << Num << "[] = { ";
+ OS << "static const MCPhysReg ImplicitList" << Num << "[] = { ";
for (unsigned i = 0, e = Uses.size(); i != e; ++i)
OS << getQualifiedName(Uses[i]) << ", ";
OS << "0 };\n";
@@ -190,7 +190,6 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
}
}
-
/// Initialize data structures for generating operand name mappings.
///
/// \param Operands [out] A map used to generate the OpName enum with operand
@@ -257,9 +256,9 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
OS << "OPERAND_LAST";
OS << "\n};\n";
- OS << "} // End namespace OpName\n";
- OS << "} // End namespace " << Namespace << "\n";
- OS << "} // End namespace llvm\n";
+ OS << "} // end namespace OpName\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif //GET_INSTRINFO_OPERAND_ENUM\n";
OS << "#ifdef GET_INSTRINFO_NAMED_OPS\n";
@@ -298,8 +297,8 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
OS << " return -1;\n";
}
OS << "}\n";
- OS << "} // End namespace " << Namespace << "\n";
- OS << "} // End namespace llvm\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif //GET_INSTRINFO_NAMED_OPS\n";
}
@@ -328,9 +327,9 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
}
OS << " OPERAND_TYPE_LIST_END" << "\n};\n";
- OS << "} // End namespace OpTypes\n";
- OS << "} // End namespace " << Namespace << "\n";
- OS << "} // End namespace llvm\n";
+ OS << "} // end namespace OpTypes\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n";
}
@@ -419,7 +418,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
<< TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
<< NumberedInstructions.size() << ");\n}\n\n";
- OS << "} // End llvm namespace \n";
+ OS << "} // end llvm namespace \n";
OS << "#endif // GET_INSTRINFO_MC_DESC\n\n";
@@ -431,10 +430,10 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "namespace llvm {\n";
OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
<< " explicit " << ClassName
- << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1);\n"
- << " virtual ~" << ClassName << "();\n"
+ << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1);\n"
+ << " ~" << ClassName << "() override {}\n"
<< "};\n";
- OS << "} // End llvm namespace \n";
+ OS << "} // end llvm namespace \n";
OS << "#endif // GET_INSTRINFO_HEADER\n\n";
@@ -446,13 +445,12 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n";
OS << "extern const char " << TargetName << "InstrNameData[];\n";
OS << ClassName << "::" << ClassName
- << "(int CFSetupOpcode, int CFDestroyOpcode)\n"
- << " : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode) {\n"
+ << "(int CFSetupOpcode, int CFDestroyOpcode, int CatchRetOpcode)\n"
+ << " : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode, CatchRetOpcode) {\n"
<< " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName
<< "InstrNameIndices, " << TargetName << "InstrNameData, "
- << NumberedInstructions.size() << ");\n}\n"
- << ClassName << "::~" << ClassName << "() {}\n";
- OS << "} // End llvm namespace \n";
+ << NumberedInstructions.size() << ");\n}\n";
+ OS << "} // end llvm namespace \n";
OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n";
@@ -596,9 +594,9 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
OS << " " << Class.Name << "\t= " << Num++ << ",\n";
OS << " SCHED_LIST_END = " << SchedModels.numInstrSchedClasses() << "\n";
OS << " };\n";
- OS << "} // End Sched namespace\n";
- OS << "} // End " << Namespace << " namespace\n";
- OS << "} // End llvm namespace \n";
+ OS << "} // end Sched namespace\n";
+ OS << "} // end " << Namespace << " namespace\n";
+ OS << "} // end llvm namespace \n";
OS << "#endif // GET_INSTRINFO_ENUM\n\n";
}
@@ -610,4 +608,4 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
EmitMapTable(RK, OS);
}
-} // End llvm namespace
+} // end llvm namespace
diff --git a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 2b59ee6..42a6a15 100644
--- a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -50,8 +50,6 @@ public:
raw_ostream &OS);
void EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints,
raw_ostream &OS);
- void EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints,
- raw_ostream &OS);
void EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints,
raw_ostream &OS);
void EmitIntrinsicToMSBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints,
@@ -92,9 +90,6 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
// Emit the intrinsic parameter attributes.
EmitAttributes(Ints, OS);
- // Emit intrinsic alias analysis mod/ref behavior.
- EmitModRefBehavior(Ints, OS);
-
// Emit code to translate GCC builtins into LLVM intrinsics.
EmitIntrinsicToGCCBuiltinMap(Ints, OS);
@@ -246,22 +241,25 @@ enum IIT_Info {
// Values from 16+ are only encodable with the inefficient encoding.
IIT_V64 = 16,
IIT_MMX = 17,
- IIT_METADATA = 18,
- IIT_EMPTYSTRUCT = 19,
- IIT_STRUCT2 = 20,
- IIT_STRUCT3 = 21,
- IIT_STRUCT4 = 22,
- IIT_STRUCT5 = 23,
- IIT_EXTEND_ARG = 24,
- IIT_TRUNC_ARG = 25,
- IIT_ANYPTR = 26,
- IIT_V1 = 27,
- IIT_VARARG = 28,
- IIT_HALF_VEC_ARG = 29,
- IIT_SAME_VEC_WIDTH_ARG = 30,
- IIT_PTR_TO_ARG = 31,
- IIT_VEC_OF_PTRS_TO_ELT = 32,
- IIT_I128 = 33
+ IIT_TOKEN = 18,
+ IIT_METADATA = 19,
+ IIT_EMPTYSTRUCT = 20,
+ IIT_STRUCT2 = 21,
+ IIT_STRUCT3 = 22,
+ IIT_STRUCT4 = 23,
+ IIT_STRUCT5 = 24,
+ IIT_EXTEND_ARG = 25,
+ IIT_TRUNC_ARG = 26,
+ IIT_ANYPTR = 27,
+ IIT_V1 = 28,
+ IIT_VARARG = 29,
+ IIT_HALF_VEC_ARG = 30,
+ IIT_SAME_VEC_WIDTH_ARG = 31,
+ IIT_PTR_TO_ARG = 32,
+ IIT_VEC_OF_PTRS_TO_ELT = 33,
+ IIT_I128 = 34,
+ IIT_V512 = 35,
+ IIT_V1024 = 36
};
@@ -285,6 +283,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
case MVT::f16: return Sig.push_back(IIT_F16);
case MVT::f32: return Sig.push_back(IIT_F32);
case MVT::f64: return Sig.push_back(IIT_F64);
+ case MVT::token: return Sig.push_back(IIT_TOKEN);
case MVT::Metadata: return Sig.push_back(IIT_METADATA);
case MVT::x86mmx: return Sig.push_back(IIT_MMX);
// MVT::OtherVT is used to mean the empty struct type here.
@@ -375,6 +374,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
case 16: Sig.push_back(IIT_V16); break;
case 32: Sig.push_back(IIT_V32); break;
case 64: Sig.push_back(IIT_V64); break;
+ case 512: Sig.push_back(IIT_V512); break;
+ case 1024: Sig.push_back(IIT_V1024); break;
}
return EncodeFixedValueType(VVT.getVectorElementType().SimpleTy, Sig);
@@ -503,28 +504,6 @@ void IntrinsicEmitter::EmitGenerator(const std::vector<CodeGenIntrinsic> &Ints,
}
namespace {
-enum ModRefKind {
- MRK_none,
- MRK_readonly,
- MRK_readnone
-};
-}
-
-static ModRefKind getModRefKind(const CodeGenIntrinsic &intrinsic) {
- switch (intrinsic.ModRef) {
- case CodeGenIntrinsic::NoMem:
- return MRK_readnone;
- case CodeGenIntrinsic::ReadArgMem:
- case CodeGenIntrinsic::ReadMem:
- return MRK_readonly;
- case CodeGenIntrinsic::ReadWriteArgMem:
- case CodeGenIntrinsic::ReadWriteMem:
- return MRK_none;
- }
- llvm_unreachable("bad mod-ref kind");
-}
-
-namespace {
struct AttributeComparator {
bool operator()(const CodeGenIntrinsic *L, const CodeGenIntrinsic *R) const {
// Sort throwing intrinsics after non-throwing intrinsics.
@@ -541,8 +520,8 @@ struct AttributeComparator {
return R->isConvergent;
// Try to order by readonly/readnone attribute.
- ModRefKind LK = getModRefKind(*L);
- ModRefKind RK = getModRefKind(*R);
+ CodeGenIntrinsic::ModRefKind LK = L->ModRef;
+ CodeGenIntrinsic::ModRefKind RK = R->ModRef;
if (LK != RK) return (LK > RK);
// Order by argument attributes.
@@ -636,7 +615,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
case CodeGenIntrinsic::ReadNone:
if (addComma)
OS << ",";
- OS << "Attributes::ReadNone";
+ OS << "Attribute::ReadNone";
addComma = true;
break;
}
@@ -649,10 +628,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
}
}
- ModRefKind modRef = getModRefKind(intrinsic);
-
- if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn ||
- intrinsic.isNoDuplicate || intrinsic.isConvergent) {
+ if (!intrinsic.canThrow ||
+ intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
+ intrinsic.isNoReturn || intrinsic.isNoDuplicate ||
+ intrinsic.isConvergent) {
OS << " const Attribute::AttrKind Atts[] = {";
bool addComma = false;
if (!intrinsic.canThrow) {
@@ -678,17 +657,29 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
addComma = true;
}
- switch (modRef) {
- case MRK_none: break;
- case MRK_readonly:
+ switch (intrinsic.ModRef) {
+ case CodeGenIntrinsic::NoMem:
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::ReadNone";
+ break;
+ case CodeGenIntrinsic::ReadArgMem:
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::ReadOnly,";
+ OS << "Attribute::ArgMemOnly";
+ break;
+ case CodeGenIntrinsic::ReadMem:
if (addComma)
OS << ",";
OS << "Attribute::ReadOnly";
break;
- case MRK_readnone:
+ case CodeGenIntrinsic::ReadWriteArgMem:
if (addComma)
OS << ",";
- OS << "Attribute::ReadNone";
+ OS << "Attribute::ArgMemOnly";
+ break;
+ case CodeGenIntrinsic::ReadWriteMem:
break;
}
OS << "};\n";
@@ -713,41 +704,6 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
}
-/// EmitModRefBehavior - Determine intrinsic alias analysis mod/ref behavior.
-void IntrinsicEmitter::
-EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
- OS << "// Determine intrinsic alias analysis mod/ref behavior.\n"
- << "#ifdef GET_INTRINSIC_MODREF_BEHAVIOR\n"
- << "assert(iid <= Intrinsic::" << Ints.back().EnumName << " && "
- << "\"Unknown intrinsic.\");\n\n";
-
- OS << "static const uint8_t IntrinsicModRefBehavior[] = {\n"
- << " /* invalid */ UnknownModRefBehavior,\n";
- for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
- OS << " /* " << TargetPrefix << Ints[i].EnumName << " */ ";
- switch (Ints[i].ModRef) {
- case CodeGenIntrinsic::NoMem:
- OS << "DoesNotAccessMemory,\n";
- break;
- case CodeGenIntrinsic::ReadArgMem:
- OS << "OnlyReadsArgumentPointees,\n";
- break;
- case CodeGenIntrinsic::ReadMem:
- OS << "OnlyReadsMemory,\n";
- break;
- case CodeGenIntrinsic::ReadWriteArgMem:
- OS << "OnlyAccessesArgumentPointees,\n";
- break;
- case CodeGenIntrinsic::ReadWriteMem:
- OS << "UnknownModRefBehavior,\n";
- break;
- }
- }
- OS << "};\n\n"
- << "return static_cast<ModRefBehavior>(IntrinsicModRefBehavior[iid]);\n"
- << "#endif // GET_INTRINSIC_MODREF_BEHAVIOR\n\n";
-}
-
/// EmitTargetBuiltins - All of the builtins in the specified map are for the
/// same target, and we already checked it.
static void EmitTargetBuiltins(const std::map<std::string, std::string> &BIM,
diff --git a/contrib/llvm/utils/TableGen/OptParserEmitter.cpp b/contrib/llvm/utils/TableGen/OptParserEmitter.cpp
index 9262d7c..c1b5e65 100644
--- a/contrib/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -149,10 +149,10 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
PE = I->first.end(); PI != PE; ++PI) {
OS << "\"" << *PI << "\" COMMA ";
}
- OS << "0})\n";
+ OS << "nullptr})\n";
}
OS << "#undef COMMA\n";
- OS << "#endif\n\n";
+ OS << "#endif // PREFIX\n\n";
OS << "/////////\n";
OS << "// Groups\n\n";
@@ -164,7 +164,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "OPTION(";
// The option prefix;
- OS << "0";
+ OS << "nullptr";
// The option string.
OS << ", \"" << R.getValueAsString("Name") << '"';
@@ -183,7 +183,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "INVALID";
// The other option arguments (unused for groups).
- OS << ", INVALID, 0, 0, 0";
+ OS << ", INVALID, nullptr, 0, 0";
// The option help text.
if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
@@ -191,10 +191,10 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << " ";
write_cstring(OS, R.getValueAsString("HelpText"));
} else
- OS << ", 0";
+ OS << ", nullptr";
// The option meta-variable name (unused).
- OS << ", 0)\n";
+ OS << ", nullptr)\n";
}
OS << "\n";
@@ -242,7 +242,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << ", ";
std::vector<std::string> AliasArgs = R.getValueAsListOfStrings("AliasArgs");
if (AliasArgs.size() == 0) {
- OS << "0";
+ OS << "nullptr";
} else {
OS << "\"";
for (size_t i = 0, e = AliasArgs.size(); i != e; ++i)
@@ -274,17 +274,17 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << " ";
write_cstring(OS, R.getValueAsString("HelpText"));
} else
- OS << ", 0";
+ OS << ", nullptr";
// The option meta-variable name.
OS << ", ";
if (!isa<UnsetInit>(R.getValueInit("MetaVarName")))
write_cstring(OS, R.getValueAsString("MetaVarName"));
else
- OS << "0";
+ OS << "nullptr";
OS << ")\n";
}
- OS << "#endif\n";
+ OS << "#endif // OPTION\n";
}
} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 9619fb9..b727df7 100644
--- a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -240,7 +240,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
MaxRegUnitWeight = std::max(MaxRegUnitWeight, RegUnits.Weight);
OS << " \"" << RegUnits.Name << "\",\n";
}
- OS << " nullptr };\n"
+ OS << " };\n"
<< " return PressureNameTable[Idx];\n"
<< "}\n\n";
@@ -1074,9 +1074,7 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"
<< " explicit " << ClassName
- << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n"
- << " bool needsStackRealignment(const MachineFunction &) const override\n"
- << " { return false; }\n";
+ << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n";
if (!RegBank.getSubRegIndices().empty()) {
OS << " unsigned composeSubRegIndicesImpl"
<< "(unsigned, unsigned) const override;\n"
@@ -1454,27 +1452,32 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
OS << "ArrayRef<const uint32_t *> " << ClassName
<< "::getRegMasks() const {\n";
- OS << " static const uint32_t *Masks[] = {\n";
- for (Record *CSRSet : CSRSets)
- OS << " " << CSRSet->getName() << "_RegMask, \n";
- OS << " nullptr\n };\n";
- OS << " return ArrayRef<const uint32_t *>(Masks, (size_t)" << CSRSets.size()
- << ");\n";
+ if (!CSRSets.empty()) {
+ OS << " static const uint32_t *const Masks[] = {\n";
+ for (Record *CSRSet : CSRSets)
+ OS << " " << CSRSet->getName() << "_RegMask,\n";
+ OS << " };\n";
+ OS << " return makeArrayRef(Masks);\n";
+ } else {
+ OS << " return None;\n";
+ }
OS << "}\n\n";
OS << "ArrayRef<const char *> " << ClassName
<< "::getRegMaskNames() const {\n";
- OS << " static const char *Names[] = {\n";
- for (Record *CSRSet : CSRSets)
- OS << " " << '"' << CSRSet->getName() << '"' << ",\n";
- OS << " nullptr\n };\n";
- OS << " return ArrayRef<const char *>(Names, (size_t)" << CSRSets.size()
- << ");\n";
+ if (!CSRSets.empty()) {
+ OS << " static const char *const Names[] = {\n";
+ for (Record *CSRSet : CSRSets)
+ OS << " " << '"' << CSRSet->getName() << '"' << ",\n";
+ OS << " };\n";
+ OS << " return makeArrayRef(Names);\n";
+ } else {
+ OS << " return None;\n";
+ }
OS << "}\n\n";
- OS << "const " << TargetName << "FrameLowering *"
- << TargetName << "GenRegisterInfo::\n"
- << " getFrameLowering(const MachineFunction &MF) {\n"
+ OS << "const " << TargetName << "FrameLowering *\n" << TargetName
+ << "GenRegisterInfo::getFrameLowering(const MachineFunction &MF) {\n"
<< " return static_cast<const " << TargetName << "FrameLowering *>(\n"
<< " MF.getSubtarget().getFrameLowering());\n"
<< "}\n\n";
diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
index 03d7f4e..6246d81 100644
--- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -26,6 +26,7 @@
#include <map>
#include <string>
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "subtarget-emitter"
@@ -105,9 +106,8 @@ public:
Records(R), SchedModels(TGT.getSchedModels()), Target(TGT.getName()) {}
void run(raw_ostream &o);
-
};
-} // End anonymous namespace
+} // end anonymous namespace
//
// Enumeration - Emit the specified class as an enumeration.
@@ -1199,7 +1199,8 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
<< " " << (SchedModels.schedClassEnd()
- SchedModels.schedClassBegin()) << ",\n";
else
- OS << " 0, 0, 0, 0, // No instruction-level machine model.\n";
+ OS << " nullptr, nullptr, 0, 0,"
+ << " // No instruction-level machine model.\n";
if (PI->hasItineraries())
OS << " " << PI->ItinsDef->getName() << "};\n";
else
@@ -1414,7 +1415,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "namespace llvm {\n";
Enumeration(OS, "SubtargetFeature");
- OS << "} // End llvm namespace \n";
+ OS << "} // end llvm namespace\n";
OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n";
@@ -1461,7 +1462,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "0, 0, 0";
OS << ");\n}\n\n";
- OS << "} // End llvm namespace \n";
+ OS << "} // end llvm namespace\n";
OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n";
@@ -1491,7 +1492,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
<< " DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
<< " const;\n"
<< "};\n";
- OS << "} // End llvm namespace \n";
+ OS << "} // end llvm namespace\n";
OS << "#endif // GET_SUBTARGETINFO_HEADER\n\n";
@@ -1543,7 +1544,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
EmitSchedModelHelpers(ClassName, OS);
- OS << "} // End llvm namespace \n";
+ OS << "} // end llvm namespace\n";
OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
}
@@ -1555,4 +1556,4 @@ void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS) {
SubtargetEmitter(RK, CGTarget).run(OS);
}
-} // End llvm namespace
+} // end llvm namespace
diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp
index 02fe4dc..c16a558 100644
--- a/contrib/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm/utils/TableGen/TableGen.cpp
@@ -41,7 +41,8 @@ enum ActionType {
PrintEnums,
PrintSets,
GenOptParserDefs,
- GenCTags
+ GenCTags,
+ GenAttributes
};
namespace {
@@ -85,6 +86,8 @@ namespace {
"Generate option definitions"),
clEnumValN(GenCTags, "gen-ctags",
"Generate ctags-compatible index"),
+ clEnumValN(GenAttributes, "gen-attrs",
+ "Generate attributes"),
clEnumValEnd));
cl::opt<std::string>
@@ -165,6 +168,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenCTags:
EmitCTags(Records, OS);
break;
+ case GenAttributes:
+ EmitAttributes(Records, OS);
+ break;
}
return false;
diff --git a/contrib/llvm/utils/TableGen/TableGenBackends.h b/contrib/llvm/utils/TableGen/TableGenBackends.h
index 2dc03ce..d9dd3d1 100644
--- a/contrib/llvm/utils/TableGen/TableGenBackends.h
+++ b/contrib/llvm/utils/TableGen/TableGenBackends.h
@@ -78,6 +78,7 @@ void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
+void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
} // End llvm namespace
diff --git a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
index efcb0c8..8a5ae12 100644
--- a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -796,12 +796,12 @@ void RecognizableInstr::emitInstructionSpecifier() {
case X86Local::MRM_E3: case X86Local::MRM_E4: case X86Local::MRM_E5:
case X86Local::MRM_E8: case X86Local::MRM_E9: case X86Local::MRM_EA:
case X86Local::MRM_EB: case X86Local::MRM_EC: case X86Local::MRM_ED:
- case X86Local::MRM_EE: case X86Local::MRM_F0: case X86Local::MRM_F1:
- case X86Local::MRM_F2: case X86Local::MRM_F3: case X86Local::MRM_F4:
- case X86Local::MRM_F5: case X86Local::MRM_F6: case X86Local::MRM_F7:
- case X86Local::MRM_F9: case X86Local::MRM_FA: case X86Local::MRM_FB:
- case X86Local::MRM_FC: case X86Local::MRM_FD: case X86Local::MRM_FE:
- case X86Local::MRM_FF:
+ case X86Local::MRM_EE: case X86Local::MRM_EF: case X86Local::MRM_F0:
+ case X86Local::MRM_F1: case X86Local::MRM_F2: case X86Local::MRM_F3:
+ case X86Local::MRM_F4: case X86Local::MRM_F5: case X86Local::MRM_F6:
+ case X86Local::MRM_F7: case X86Local::MRM_F9: case X86Local::MRM_FA:
+ case X86Local::MRM_FB: case X86Local::MRM_FC: case X86Local::MRM_FD:
+ case X86Local::MRM_FE: case X86Local::MRM_FF:
// Ignored.
break;
}
@@ -951,6 +951,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("f128mem", TYPE_M128)
TYPE("f256mem", TYPE_M256)
TYPE("f512mem", TYPE_M512)
+ TYPE("FR128", TYPE_XMM128)
TYPE("FR64", TYPE_XMM64)
TYPE("FR64X", TYPE_XMM64)
TYPE("f64mem", TYPE_M64FP)
@@ -1069,6 +1070,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
// register IDs in 8-bit immediates nowadays.
ENCODING("FR32", ENCODING_IB)
ENCODING("FR64", ENCODING_IB)
+ ENCODING("FR128", ENCODING_IB)
ENCODING("VR128", ENCODING_IB)
ENCODING("VR256", ENCODING_IB)
ENCODING("FR32X", ENCODING_IB)
@@ -1091,6 +1093,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
ENCODING("GR8", ENCODING_RM)
ENCODING("VR128", ENCODING_RM)
ENCODING("VR128X", ENCODING_RM)
+ ENCODING("FR128", ENCODING_RM)
ENCODING("FR64", ENCODING_RM)
ENCODING("FR32", ENCODING_RM)
ENCODING("FR64X", ENCODING_RM)
@@ -1120,6 +1123,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
ENCODING("GR64", ENCODING_REG)
ENCODING("GR8", ENCODING_REG)
ENCODING("VR128", ENCODING_REG)
+ ENCODING("FR128", ENCODING_REG)
ENCODING("FR64", ENCODING_REG)
ENCODING("FR32", ENCODING_REG)
ENCODING("VR64", ENCODING_REG)
@@ -1157,6 +1161,7 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("GR32", ENCODING_VVVV)
ENCODING("GR64", ENCODING_VVVV)
ENCODING("FR32", ENCODING_VVVV)
+ ENCODING("FR128", ENCODING_VVVV)
ENCODING("FR64", ENCODING_VVVV)
ENCODING("VR128", ENCODING_VVVV)
ENCODING("VR256", ENCODING_VVVV)
OpenPOWER on IntegriCloud